From a1e78722f6743330f32d8ab18393a0ddc2a4ef62 Mon Sep 17 00:00:00 2001 From: Packit Service Date: Dec 12 2020 02:31:11 +0000 Subject: ucx-1.8.0 base --- diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..148e8de --- /dev/null +++ b/AUTHORS @@ -0,0 +1,101 @@ +Akshay Venkatesh +Alex Margolin +Alex Mikheev +Alina Sklarevich +Andrey Maslennikov +Artem Polyakov +Artemy Kovalyov +Aurelien Bouteiller +Brad Benton +Corey J. Nolet +Devendar Bureddy +Dmitry Gladkov +Doug Jacobsen +Elad Persiko +Eugene Voronov +Evgeny Leksikov +Gilbert Lee +Gilles Gouaillardet +Graham Lopez +Guy Shattah +Hiroyuki Sato +Howard Pritchard +Igor Ivanov +Ilya Nelkenbaum +Jason Gunthorpe +Jeff Daily +John Snyder +Keisuke Fukuda +Ken Raffenetti +Khaled Hamidouche +Konstantin Belousov +Luis E. Pena +Manjunath Gorentla Venkata +Marek Schimara +Matthew Baker +Mike Dubman +Mikhail Brinskiy +Nathan Hjelm +Netanel Yosephian +Pak Lui +Pavan Balaji +Pavel Shamis (Pasha) +Peter Andreas Entschev +Peter Rudenko +Qiang Yu +Rohit Zambre +Sasha Kotchubievsky +Scott Saulters +Sergey Lebedev +Sergey Oblomov +Sergey Shalnov +Serguei Sagalovitch +Sheng Yang +Sourav Chakraborty +Stephen Richmond +Swen Boehm +Tony Curtis +Valentin Petrov +Wenbin Lu +Xin Zhao +Yossi Itigin + +In addition we would like to acknowledge the following members of UCX community +for their participation in annual face-to-face meeting, design discussions, and +code reviews: + +Amith Mamidala +Barney Maccabe +Brad Benton +Brandon Potter +Bronis R. de Supinski +Christophe Harle +Christopher Lamb +Craig Stunkel +Cydney Ewald Stevens +Davide Rossetti +Donald Becker +Duncan Poole +Edgar A. Leon +Eric Van Hensbergen +Geoffrey Blake +George Bosilca +Gilad Shainer +Jeff Kuehn +Joshua Ladd +Ken Raffenetti +Lena Oden +Liran Liss +Nathaniel Graham +Oscar Hernandez +Pavan Balaji +Richard Graham +Ron Brightwell +Sameer Kumar +Sameh Sharkawi +Serguei Sagalovitch +Sreeram Potluri +Steve Poole +Sylvain Jeaugey +Tommy Janjusic +Yiftah Shahar diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8d43ffb --- /dev/null +++ b/LICENSE @@ -0,0 +1,35 @@ +Copyright (c) 2014-2015 UT-Battelle, LLC. All rights reserved. +Copyright (C) 2014-2015 Mellanox Technologies Ltd. All rights reserved. +Copyright (C) 2014-2015 The University of Houston System. All rights reserved. +Copyright (C) 2015 The University of Tennessee and The University + of Tennessee Research Foundation. All rights reserved. +Copyright (C) 2016 ARM Ltd. All rights reserved. +Copyright (c) 2016 Los Alamos National Security, LLC. All rights reserved. +Copyright (C) 2016-2017 Advanced Micro Devices, Inc. All rights reserved. +Copyright (C) 2019 UChicago Argonne, LLC. All rights reserved. +Copyright (c) 2018-2019 NVIDIA CORPORATION. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..57b459e --- /dev/null +++ b/Makefile.am @@ -0,0 +1,102 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and The University +# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# Build . before src so that our all-local and clean-local hooks kicks in at +# the right time. + +EXTRA_DIST = +ACLOCAL_AMFLAGS = -I config/m4 + +noinst_HEADERS = src/uct/api/uct.h src/uct/api/uct_def.h src/uct/api/tl.h +doxygen_doc_files = $(noinst_HEADERS) + +doc_dir = $(pkgdatadir)/doc + +if !DOCS_ONLY +perftest_dir = $(pkgdatadir)/perftest +dist_perftest__DATA = contrib/ucx_perftest_config/msg_pow2 \ + contrib/ucx_perftest_config/msg_pow2_large \ + contrib/ucx_perftest_config/README \ + contrib/ucx_perftest_config/test_types_uct \ + contrib/ucx_perftest_config/test_types_ucp \ + contrib/ucx_perftest_config/transports + +SUBDIRS = \ + src/ucm \ + src/ucs \ + src/uct \ + src/ucp \ + src/tools/info \ + src/tools/perf \ + src/tools/profile \ + bindings/java \ + test/apps \ + test/examples + +if HAVE_GTEST +SUBDIRS += test/gtest +endif + +if HAVE_MPICC +SUBDIRS += test/mpi +endif + +EXTRA_DIST += contrib/configure-devel +EXTRA_DIST += contrib/configure-release +EXTRA_DIST += contrib/configure-prof +EXTRA_DIST += contrib/buildrpm.sh +EXTRA_DIST += contrib/ucx_perftest_config/msg_pow2 +EXTRA_DIST += contrib/ucx_perftest_config/README +EXTRA_DIST += contrib/ucx_perftest_config/test_types_uct +EXTRA_DIST += contrib/ucx_perftest_config/test_types_ucp +EXTRA_DIST += contrib/ucx_perftest_config/transports +EXTRA_DIST += debian +EXTRA_DIST += ucx.pc.in +EXTRA_DIST += LICENSE +endif #!DOCS_ONLY +EXTRA_DIST += docs/uml/uct.dot + +include $(srcdir)/docs/doxygen/doxygen.am + +.PHONY: docs docs-clean + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = ucx.pc + +DOCLIST = docs/doxygen/doxygen-doc/ucx.tag + +FORMAT = pdf +DOT_CLEANFILES = + +if HAVE_GTEST +gtest: + @make -C test/gtest test +endif + +if HAVE_DOT +DOCLIST += docs/uml/uml.tag docs/uml/uct.$(FORMAT) docs/uml/ucp.$(FORMAT) +DOT_CLEANFILES += docs/uml/uml.tag docs/uml/uct.$(FORMAT) docs/uml/ucp.$(FORMAT) +endif + +docs: $(DOCLIST) + +docs-clean: + $(RM) $(DX_CLEANFILES) + $(RM) $(DOT_CLEANFILES) + +docs/doxygen/doxygen-doc/ucx.tag: $(doxygen_doc_files) doxygen-doc + +docs/uml/uml.tag: + mkdir -p docs/uml + echo `date` > $@ + +.dot.pdf: + dot -T pdf -o $@ $< + +MOSTLYCLEANFILES = $(DX_CLEANFILES) $(DOT_CLEANFILES) diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..2ab8065 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,1235 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and The University +# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# Build . before src so that our all-local and clean-local hooks kicks in at +# the right time. + +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# ----- begin aminclude.am ------------------------------------- +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@DOCS_ONLY_FALSE@@HAVE_GTEST_TRUE@am__append_1 = test/gtest +@DOCS_ONLY_FALSE@@HAVE_MPICC_TRUE@am__append_2 = test/mpi +@DOCS_ONLY_FALSE@am__append_3 = contrib/configure-devel \ +@DOCS_ONLY_FALSE@ contrib/configure-release \ +@DOCS_ONLY_FALSE@ contrib/configure-prof contrib/buildrpm.sh \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/msg_pow2 \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/README \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/test_types_uct \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/test_types_ucp \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/transports debian \ +@DOCS_ONLY_FALSE@ ucx.pc.in LICENSE +@HAVE_DOT_TRUE@am__append_4 = docs/uml/uml.tag docs/uml/uct.$(FORMAT) docs/uml/ucp.$(FORMAT) +@HAVE_DOT_TRUE@am__append_5 = docs/uml/uml.tag docs/uml/uct.$(FORMAT) docs/uml/ucp.$(FORMAT) +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ + $(am__configure_deps) $(am__dist_perftest__DATA_DIST) \ + $(noinst_HEADERS) $(am__DIST_COMMON) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = config.h +CONFIG_CLEAN_FILES = docs/doxygen/header.tex src/uct/api/version.h \ + ucx.spec ucx.pc contrib/rpmdef.sh debian/rules debian/control \ + debian/changelog src/ucp/api/ucp_version.h \ + src/ucp/core/ucp_version.c +CONFIG_CLEAN_VPATH_FILES = debian/compat debian/copyright \ + debian/ucx.prerm +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__dist_perftest__DATA_DIST = contrib/ucx_perftest_config/msg_pow2 \ + contrib/ucx_perftest_config/msg_pow2_large \ + contrib/ucx_perftest_config/README \ + contrib/ucx_perftest_config/test_types_uct \ + contrib/ucx_perftest_config/test_types_ucp \ + contrib/ucx_perftest_config/transports +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(perftest_dir)" \ + "$(DESTDIR)$(pkgconfigdir)" +DATA = $(dist_perftest__DATA) $(pkgconfig_DATA) +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + cscope distdir distdir-am dist dist-all distcheck +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \ + $(LISP)config.h.in +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +CSCOPE = cscope +DIST_SUBDIRS = src/ucm src/ucs src/uct src/ucp src/tools/info \ + src/tools/perf src/tools/profile bindings/java test/apps \ + test/examples test/gtest test/mpi +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config.h.in \ + $(srcdir)/docs/doxygen/doxygen.am $(srcdir)/ucx.pc.in \ + $(srcdir)/ucx.spec.in $(top_srcdir)/contrib/rpmdef.sh.in \ + $(top_srcdir)/debian/changelog.in $(top_srcdir)/debian/compat \ + $(top_srcdir)/debian/control.in $(top_srcdir)/debian/copyright \ + $(top_srcdir)/debian/rules.in $(top_srcdir)/debian/ucx.prerm \ + $(top_srcdir)/docs/doxygen/header.tex.in \ + $(top_srcdir)/src/ucp/api/ucp_version.h.in \ + $(top_srcdir)/src/ucp/core/ucp_version.c.in \ + $(top_srcdir)/src/uct/api/version.h.in AUTHORS NEWS README \ + compile config.guess config.sub install-sh ltmain.sh missing +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +DIST_ARCHIVES = $(distdir).tar.gz +GZIP_ENV = --best +DIST_TARGETS = dist-gzip +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +EXTRA_DIST = $(am__append_3) docs/uml/uct.dot +ACLOCAL_AMFLAGS = -I config/m4 +noinst_HEADERS = src/uct/api/uct.h src/uct/api/uct_def.h src/uct/api/tl.h +doxygen_doc_files = $(noinst_HEADERS) +doc_dir = $(pkgdatadir)/doc +@DOCS_ONLY_FALSE@perftest_dir = $(pkgdatadir)/perftest +@DOCS_ONLY_FALSE@dist_perftest__DATA = contrib/ucx_perftest_config/msg_pow2 \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/msg_pow2_large \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/README \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/test_types_uct \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/test_types_ucp \ +@DOCS_ONLY_FALSE@ contrib/ucx_perftest_config/transports + +@DOCS_ONLY_FALSE@SUBDIRS = src/ucm src/ucs src/uct src/ucp \ +@DOCS_ONLY_FALSE@ src/tools/info src/tools/perf \ +@DOCS_ONLY_FALSE@ src/tools/profile bindings/java test/apps \ +@DOCS_ONLY_FALSE@ test/examples $(am__append_1) $(am__append_2) +@DX_COND_doc_TRUE@@DX_COND_html_TRUE@DX_CLEAN_HTML = @DX_DOCDIR@/html +@DX_COND_chm_TRUE@@DX_COND_doc_TRUE@DX_CLEAN_CHM = @DX_DOCDIR@/chm +@DX_COND_chi_TRUE@@DX_COND_chm_TRUE@@DX_COND_doc_TRUE@DX_CLEAN_CHI = @DX_DOCDIR@/@PACKAGE@.chi +@DX_COND_doc_TRUE@@DX_COND_man_TRUE@DX_CLEAN_MAN = @DX_DOCDIR@/man +@DX_COND_doc_TRUE@@DX_COND_rtf_TRUE@DX_CLEAN_RTF = @DX_DOCDIR@/rtf +@DX_COND_doc_TRUE@@DX_COND_xml_TRUE@DX_CLEAN_XML = @DX_DOCDIR@/xml +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@DX_CLEAN_PS = @DX_DOCDIR@/@PACKAGE@.ps +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@DX_PS_GOAL = doxygen-ps +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@DX_CLEAN_PDF = @DX_DOCDIR@/@PACKAGE@.pdf +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@DX_PDF_GOAL = doxygen-pdf +@DX_COND_doc_TRUE@@DX_COND_latex_TRUE@DX_CLEAN_LATEX = @DX_DOCDIR@/latex +@DX_COND_doc_TRUE@DX_CLEANFILES = \ +@DX_COND_doc_TRUE@ @DX_DOCDIR@/@PACKAGE@.tag \ +@DX_COND_doc_TRUE@ -r \ +@DX_COND_doc_TRUE@ $(DX_CLEAN_HTML) \ +@DX_COND_doc_TRUE@ $(DX_CLEAN_CHM) \ +@DX_COND_doc_TRUE@ $(DX_CLEAN_CHI) \ +@DX_COND_doc_TRUE@ $(DX_CLEAN_MAN) \ +@DX_COND_doc_TRUE@ $(DX_CLEAN_RTF) \ +@DX_COND_doc_TRUE@ $(DX_CLEAN_XML) \ +@DX_COND_doc_TRUE@ $(DX_CLEAN_PS) \ +@DX_COND_doc_TRUE@ $(DX_CLEAN_PDF) \ +@DX_COND_doc_TRUE@ $(DX_CLEAN_LATEX) + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = ucx.pc +DOCLIST = docs/doxygen/doxygen-doc/ucx.tag $(am__append_4) +FORMAT = pdf +DOT_CLEANFILES = $(am__append_5) +MOSTLYCLEANFILES = $(DX_CLEANFILES) $(DOT_CLEANFILES) +all: config.h + $(MAKE) $(AM_MAKEFLAGS) all-recursive + +.SUFFIXES: +.SUFFIXES: .dot .pdf +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/docs/doxygen/doxygen.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \ + esac; +$(srcdir)/docs/doxygen/doxygen.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +config.h: stamp-h1 + @test -f $@ || rm -f stamp-h1 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1 + +stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status + @rm -f stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status config.h +$(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f stamp-h1 + touch $@ + +distclean-hdr: + -rm -f config.h stamp-h1 +docs/doxygen/header.tex: $(top_builddir)/config.status $(top_srcdir)/docs/doxygen/header.tex.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +src/uct/api/version.h: $(top_builddir)/config.status $(top_srcdir)/src/uct/api/version.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +ucx.spec: $(top_builddir)/config.status $(srcdir)/ucx.spec.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +ucx.pc: $(top_builddir)/config.status $(srcdir)/ucx.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +contrib/rpmdef.sh: $(top_builddir)/config.status $(top_srcdir)/contrib/rpmdef.sh.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +debian/rules: $(top_builddir)/config.status $(top_srcdir)/debian/rules.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +debian/control: $(top_builddir)/config.status $(top_srcdir)/debian/control.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +debian/changelog: $(top_builddir)/config.status $(top_srcdir)/debian/changelog.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +src/ucp/api/ucp_version.h: $(top_builddir)/config.status $(top_srcdir)/src/ucp/api/ucp_version.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +src/ucp/core/ucp_version.c: $(top_builddir)/config.status $(top_srcdir)/src/ucp/core/ucp_version.c.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool config.lt +install-dist_perftest_DATA: $(dist_perftest__DATA) + @$(NORMAL_INSTALL) + @list='$(dist_perftest__DATA)'; test -n "$(perftest_dir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(perftest_dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(perftest_dir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(perftest_dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(perftest_dir)" || exit $$?; \ + done + +uninstall-dist_perftest_DATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_perftest__DATA)'; test -n "$(perftest_dir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(perftest_dir)'; $(am__uninstall_files_from_dir) +install-pkgconfigDATA: $(pkgconfig_DATA) + @$(NORMAL_INSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ + done + +uninstall-pkgconfigDATA: + @$(NORMAL_UNINSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz + $(am__post_remove_distdir) + +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-tarZ: distdir + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz + $(am__post_remove_distdir) + +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build/sub \ + && ../../configure \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + --srcdir=../.. --prefix="$$dc_install_base" \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) dvi \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am +check: check-recursive +all-am: Makefile $(DATA) $(HEADERS) config.h +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(perftest_dir)" "$(DESTDIR)$(pkgconfigdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES) + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-hdr \ + distclean-libtool distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-dist_perftest_DATA install-pkgconfigDATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-dist_perftest_DATA uninstall-pkgconfigDATA + +.MAKE: $(am__recursive_targets) all install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--refresh check check-am clean clean-cscope clean-generic \ + clean-libtool cscope cscopelist-am ctags ctags-am dist \ + dist-all dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ \ + dist-xz dist-zip distcheck distclean distclean-generic \ + distclean-hdr distclean-libtool distclean-tags distcleancheck \ + distdir distuninstallcheck dvi dvi-am html html-am info \ + info-am install install-am install-data install-data-am \ + install-dist_perftest_DATA install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-pkgconfigDATA install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-dist_perftest_DATA uninstall-pkgconfigDATA + +.PRECIOUS: Makefile + + +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@doxygen-ps: @DX_DOCDIR@/@PACKAGE@.ps + +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@@DX_DOCDIR@/@PACKAGE@.ps: @DX_DOCDIR@/@PACKAGE@.tag +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ cd @DX_DOCDIR@/latex; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ $(DX_LATEX) refman.tex; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ $(MAKEINDEX_PATH) refman.idx; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ $(DX_BIBTEX) refman; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ $(DX_LATEX) refman.tex; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ $(DX_LATEX) refman.tex; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ countdown=5; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ refman.log > /dev/null 2>&1 \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ && test $$countdown -gt 0; do \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ $(DX_LATEX) refman.tex; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ countdown=`expr $$countdown - 1`; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ done; \ +@DX_COND_doc_TRUE@@DX_COND_ps_TRUE@ $(DX_DVIPS) -o ../@PACKAGE@.ps refman.dvi + +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@doxygen-pdf: @DX_DOCDIR@/@PACKAGE@.pdf + +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@@DX_DOCDIR@/@PACKAGE@.pdf: @DX_DOCDIR@/@PACKAGE@.tag +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ cd @DX_DOCDIR@/latex; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ $(DX_PDFLATEX) refman.tex; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ $(DX_MAKEINDEX) refman.idx; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ $(DX_BIBTEX) refman; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ $(DX_PDFLATEX) refman.tex; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ $(DX_PDFLATEX) refman.tex; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ countdown=5; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ refman.log > /dev/null 2>&1 \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ && test $$countdown -gt 0; do \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ $(DX_PDFLATEX) refman.tex; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ countdown=`expr $$countdown - 1`; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ done; \ +@DX_COND_doc_TRUE@@DX_COND_pdf_TRUE@ mv refman.pdf ../@PACKAGE@.pdf + +@DX_COND_doc_TRUE@.INTERMEDIATE: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL) + +@DX_COND_doc_TRUE@doxygen-run: @DX_DOCDIR@/@PACKAGE@.tag + +@DX_COND_doc_TRUE@doxygen-doc: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL) + +@DX_COND_doc_TRUE@@DX_DOCDIR@/@PACKAGE@.tag: $(DX_CONFIG) $(doxygen_doc_files) +@DX_COND_doc_TRUE@ rm -rf @DX_DOCDIR@ +@DX_COND_doc_TRUE@ mkdir -p @DX_DOCDIR@ +@DX_COND_doc_TRUE@ $(DX_ENV) $(DX_DOXYGEN) $(srcdir)/$(DX_CONFIG) +@DX_COND_doc_TRUE@ echo Timestamp >$@ + +.PHONY: doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) + +# ----- end aminclude.am --------------------------------------- +# +# LICENSE +# +# Copyright (c) 2009 Oren Ben-Kiki +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +.PHONY: docs docs-clean + +@HAVE_GTEST_TRUE@gtest: +@HAVE_GTEST_TRUE@ @make -C test/gtest test + +docs: $(DOCLIST) + +docs-clean: + $(RM) $(DX_CLEANFILES) + $(RM) $(DOT_CLEANFILES) + +docs/doxygen/doxygen-doc/ucx.tag: $(doxygen_doc_files) doxygen-doc + +docs/uml/uml.tag: + mkdir -p docs/uml + echo `date` > $@ + +.dot.pdf: + dot -T pdf -o $@ $< + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..6726077 --- /dev/null +++ b/NEWS @@ -0,0 +1,347 @@ +# +## Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. +## Copyright (C) UT-Battelle, LLC. 2014-2019. ALL RIGHTS RESERVED. +## Copyright (C) ARM Ltd. 2017-2020. ALL RIGHTS RESERVED. +## +## See file LICENSE for terms. +## +# + +## 1.8.0 (April 3, 2020) +### Features: +#### UCX Core +- Improved detection for DEVX support +- Improved TCP scalability +- Added support for ROCM to perftest +- Added support for different source and target memory types to perftest +- Added optimized memcpy for ROCM devices +- Added hardware tag-matching for CUDA buffers +- Added support for CUDA and ROCM managed memories +- Added support for client/server disconnect protocol over rdma connection manager +- Added support for striding receive queue for hardware tag-matching +- Added XPMEM-based rendezvous protocol for shared memory +- Added support shared memory communication between containers on same machine +- Added support for multi-threaded RDMA memory registration for large regions +- Added new test cases to Azure CI + +#### UCX Java (API Preview) +- Added APIs for stream send/recv, tag probe, and connect request handle +- Added Java package (automatically published) to Maven central + +### Bugfixes: +- Multiple fixes in JUCX +- Fixes in UCP thread safety +- Fixes for most recent versions GCC, PGI, and ICC +- Fixes for CPU affinity on Azure instances +- Fixes in XPMEM support on PPC64 +- Performance fixes in CUDA IPC +- Fixes in RDMA CM flows +- Multiple fixes in TCP transport +- Multiple fixes in documentation +- Fixes in transport lane selection logic +- Fixes in Java jar build +- Fixes in socket connection manager for Nvidia DGX-2 platform + +## 1.7.0 (January 19, 2020) +### Features: +- Added support for multiple listening transports +- Added UCT socket-based connection manager transport +- Updated API for UCT component management +- Added API to retrieve the listening port +- Added UCP active message API +- Removed deprecated API for querying UCT memory domains +- Refactored server/client examples +- Added support for dlopen interception in UCM +- Added support for PCIe atomics +- Updated Java API: added support for most of UCP layer operations +- Updated support for Mellanox DevX API +- Added multiple UCT/TCP transport performance optimizations +- Optimized memcpy() for Intel platforms +- Added protection from non-UCX socket based app connections +- Improved search time for PKEY object +- Enable gtest over IPv6 interfaces +- Updated Mellanox and Bull device IDs +- Added support for CUDA_VISIBLE_DEVICES +- Increased limits for CUDA IPC registration + +### Bugfixes: +- Multiple fixes in UCP, UCT, UCM libraries +- Multiple fixes for BSD and Mac OS systems +- Fixes for Clang compiler +- Fixes for CUDA IPC +- Fix CPU optimization configuration options +- Fix JUCX build on GPU nodes +- Fix in Azure release pipeline flow +- Fix in CUDA memory hooks management +- Fix in GPU memory peer direct gtest +- Fix in TCP connection establishment flow +- Fix in GPU IPC check +- Fix in CUDA Jenkins test flow +- Multiple fixes in CUDA IPC flow +- Fix adding missing header files +- Fix to prevent failures in presence of VPN enabled Ethernet interfaces + +## 1.6.1 (September 23, 2019) +### Features: +- Added Bull Atos HCA device IDs +- Added Azure Pipelines testing + +### Bugfixes: +- Multiple static checker fixes +- Remove pkg.m4 dependency +- Multiple clang static checker fixes +- Fix mem type support with generic datatype + +## 1.6.0 (July 17, 2019) +### Features: +- Modular architecture for UCT transports +- ROCm transport re-design: support for managed memory, direct copy, ROCm GDR +- Random scheduling policy for DC transport +- Optimized out-of-box settings for multi-rail +- Added support for OmniPath (using Verbs) +- Support for PCI atomics with IB transports +- Reduced UCP address size for homogeneous environments + +### Bugfixes: +- Multiple stability and performance improvements in TCP transport +- Multiple stability fixes in Verbs and MLX5 transports +- Multiple stability fixes in UCM memory hooks +- Multiple stability fixes in UGNI transport +- RPM Spec file cleanup +- Fixing compilation issues with most recent clang and gcc compilers +- Fixing the wrong name of aliases +- Fix data race in UCP wireup +- Fix segfault when libuct.so is reloaded - issue #3558 +- Include Java sources in distribution +- Handle EADDRNOTAVAIL in rdma_cm connection manager +- Disable ibcm on RHEL7+ by default +- Fix data race in UCP proxy endpoint +- Static checker fixes +- Fallback to ibv_create_cq() if ibv_create_cq_ex() returns ENOSYS +- Fix malloc hooks test +- Fix checking return status in ucp_client_server example +- Fix gdrcopy libdir config value +- Fix printing atomic capabilities in ucx_info +- Fix perftest warmup iterations to be non-zero +- Fixing default values for configure logic +- Fix race condition updating fired_events from multiple threads +- Fix madvise() hook + +### Tested configurations: +- RDMA: MLNX_OFED 4.5, distribution inbox drivers, rdma-core 22.1 +- CUDA: gdrcopy 1.3.2, cuda 9.2, ROCm 2.2 +- XPMEM: 2.6.2 +- KNEM: 1.1.3 + +## 1.5.1 (April 1, 2019) +### Bugfixes: +- Fix dc_mlx5 transport support check for inbox libmlx5 drivers - issue #3301 +- Fix compilation warnings with gcc9 and clang +- ROCm - reduce log level of device-not-found message + +## 1.5.0 (February 14, 2019) +### Features: +- New emulation mode enabling full UCX functionality (Atomic, Put, Get) + over TCP and RDMA-CORE interconnects that don't implement full RDMA semantics +- Non-blocking API for all one-sided operations. All blocking communication APIs marked + as deprecated +- New client/server connection establishment API, which allows connected handover between workers +- Support for rdma-core direct-verbs (DEVX) and DC with mlx5 transports +- GPU - Support for stream API and receive side pipelining +- Malloc hooks using binary instrumentation instead of symbol override +- Statistics for UCT tag API +- GPU-to-Infiniband HCA affinity support based on locality/distance (PCIe) + +### Bugfixes: +- Fix overflow in RC/DC flush operations +- Update description in SPEC file and README +- Fix RoCE source port for dc_mlx5 flow control +- Improve ucx_info help message +- Fix segfault in UCP, due to int truncation in count_one_bits() +- Multiple other bugfixes (full list on github) + +### Tested configurations: +- InfiniBand: MLNX_OFED 4.4-4.5, distribution inbox drivers, rdma-core +- CUDA: gdrcopy 1.2, cuda 9.1.85 +- XPMEM: 2.6.2 +- KNEM: 1.1.2 + +## 1.4.0-rc2 (October 23, 2018) +### Features: +- Improved support for installation with latest ROCm +- Improved support for latest rdma-core +- Added support for CUDA IPC for intra-node GPU +- Added support for CUDA memory allocation cache for mem-type detection +- Added support for latest Mellanox devices +- Added support for Nvidia GPU managed memory +- Added support for multiple connections between the same pair of workers +- Added support large worker address for client/server connection establishment + and INADDR_ANY +- Added support for bitwise atomics operations + +### Bugfixes: +- Performance fixes for rendezvous protocol +- Memory hook fixes +- Clang support fixes +- Self tl multi-rail fix +- Thread safety fixes in IB/RDMA transport +- Compilation fixes with upstream rdma-core +- Multiple minor bugfixes (full list on github) +- Segfault fix for a code generated by armclang compiler +- UCP memory-domain index fix for zero-copy active messages + +### Tested configurations: +- InfiniBand: MLNX_OFED 4.2-4.4, distribution inbox drivers, rdma-core +- CUDA: gdrcopy 1.2, cuda 9.1.85 +- XPMEM: 2.6.2 +- KNEM: 1.1.2 +- Multiple bugfixes (full list on github) + +### Known issues: +#2919 - Segfault in CUDA support when KNEM not present and CMA is active +intra-node RMA transport. As a workaround user can disable CMA support at +compile time: --disable-cma. Alternatively user can remove CMA from UCX_TLS +list, for example: UCX_TLS=mm,rc,cuda_copy,cuda_ipc,gdr_copy. + +## 1.3.1 (August 20, 2018) +### Bugfixes: +- Prevent potential out-of-order sending in shared memory active messages +- CUDA: Include cudamem.h in source tarball, pass cudaFree memory size +- Registration cache: fix large range lookup, handle shmat(REMAP)/mmap(FIXED) +- Limit IB CQE size for specific ARM boards +- RPM: explicitly set gcc-c++ as requirement +- Multiple bugfixes (full list on github) + +### Tested configurations: +- InfiniBand: MLNX_OFED 4.2, inbox OFED drivers. +- CUDA: gdrcopy 1.2, cuda 9.1.85 +- XPMEM: 2.6.2 +- KNEM: 1.1.2 + +## 1.3.0 (February 15, 2018) +### Features: +- Added stream-based communication API to UCP +- Added support for GPU platforms: Nvidia CUDA and AMD ROCm software stacks +- Added API for client/server based connection establishment +- Added support for TCP transport +- Support for InfiniBand tag-matching offload for DC and accelerated transports +- Multi-rail support for eager and rendezvous protocols +- Added support for tag-matching communications with CUDA buffers +- Added ucp_rkey_ptr() to obtain pointer for shared memory region +- Avoid progress overhead on unused transports +- Improved scalability of software tag-matching by using a hash table +- Added transparent huge-pages allocator +- Added non-blocking flush and disconnect for UCP +- Support fixed-address memory allocation via ucp_mem_map() +- Added ucp_tag_send_nbr() API to avoid send request allocation +- Support global addressing in all IB transports +- Add support for external epoll fd and edge-triggered events +- Added registration cache for knem +- Initial support for Java bindings + +### Bugfixes: +- Multiple bugfixes (full list on github) + +### Tested configurations: +- InfiniBand: MLNX_OFED 4.2, inbox OFED drivers. +- CUDA: gdrcopy 1.2, cuda 9.1.85 +- XPMEM: 2.6.2 +- KNEM: 1.1.2 + +### Known issues: +#2047 - UCP: ucp_do_am_bcopy_multi drops data on UCS_ERROR_NO_RESOURCE +#2047 - failure in ud/uct_flush_test.am_zcopy_flush_ep_nb/1 +#1977 - failure in shm/test_ucp_rma.blocking_small/0 +#1926 - Timeout in mpi_test_suite with HW TM +#1920 - transport retry count exceeded in many-to-one tests +#1689 - Segmentation fault on memory hooks test in jenkins + +## 1.2.2 (January 4, 2018) +### Main: +- Support including UCX API headers from C++ code +- UD transport to handle unicast flood on RoCE fabric +- Compilation fixes for gcc 7.1.1, clang 3.6, clang 5 + +### Details: +- When UD transport is used with RoCE, packets intended for other peers may + arrive on different adapters (as a result of unicast flooding). +- This change adds packet filtering based on destination GIDs. Now the packet + is silently dropped, if its destination GID does not match the local GID. +- Added a new device ID for InfiniBand HCA +- [packaging] Move `examples/` and `perftest/` into doc +- [packaging] Update spec to work on old distros while complaint with Fedora + guidelines +- [cleanup] Removed unused ptmalloc version (2.83) +- [cleanup] Fixup license headers + +## 1.2.1 (August 28, 2017) +### Bugfixes: +- Compilation fixes for gcc 7.1 +- Spec file cleanups +- Versioning cleanups + +## 1.2.0 (June 15, 2017) +### Supported platforms +- Shared memory: KNEM, CMA, XPMEM, SYSV, Posix +- VERBs over InfiniBand and RoCE. + VERBS over other RDMA interconnects (iWarp, OmniPath, etc.) is available + for community evaluation and has not been tested in context of this release +- Cray Gemini and Aries +- Architectures: x86_64, ARMv8 (64bit), Power64 + +### Features: +- Added support for InfiniBand DC and UD transports, including accelerated verbs for Mellanox devices +- Full support for PGAS/SHMEM interfaces, blocking and non-blocking APIs +- Support for MPI tag matching, both in software and offload mode +- Zero copy protocols and rendezvous, registration cache +- Handling transport errors +- Flow control for DC/RC +- Dataypes support: contiguous, IOV, generic +- Multi-threading support +- Support for ARMv8 64bit architecture +- A new API for efficient memory polling +- Support for malloc-hooks and memory registration caching + +### Bugfixes: + - Multiple bugfixes improving overall stability of the library + +### Known issues: +#1604 - Failure in ud/test_ud_slow_timer.retransmit1/1 with valgrind bug +#1588 - Fix reading cpuinfo timebase for ppc bug portability training +#1579 - Ud/test_ud.ca_md test takes too long too complete bug +#1576 - Failure in ud/test_ud_slow_timer.retransmit1/0 with valgrind bug +#1569 - Send completion with error with dc_verbs bug +#1566 - Segfault in malloc_hook.fork on arm bug +#1565 - Hang in udrc/test_ucp_rma.nonblocking_stream_get_nbi_flush_worker bug +#1534 - Wireup.c:473 Fatal: endpoint reconfiguration not supported yet bug +#1533 - Stack overflow under Valgrind 'rc_mlx5/uct_p2p_err_test.local_access_error/0' bug +#1513 - Hang in MPI_Finalize with UCX_TLS=rc[_x],sm on the bsend2 test bug +#1504 - Failure in cm/uct_p2p_am_test.am_bcopy/1 bug +#1492 - Hang when using polling fd bug +#1489 - Hang on the osu_fop_latency test with RoCE bug +#1005 - ROcE problem with OMPI direct modex - UD assertion + +## 1.1.0 (September 1, 2015) +### Workarounds: +### Features: +- Added support for AM based on FIFO in `mm` shared memory transport +- Added support for UCT `knem` shared memory transport (http://knem.gforge.inria.fr) +- Added support for UCT `mm/xpmem` shared memory transport (https://github.com/hjelmn/xpmem) + +## 1.0.0 (July 22, 2015) +### Features: +- Added support for UCT `cma` shared memory transport (Cross-Memory Attatch) +- Added support for UCT `mm` shared memory transport with mmap/sysv APIs +- Added support for UCT `rc` transport based on Infiniband/RC with verbs +- Added support for UCT `mlx5_rc` transport based on Infiniband/RC with accelerated verbs +- Added support for UCT `cm` transport based on Infiniband/SIDR (Service ID Resolution) +- Added support for UCT `ugni` transport based on Cray/UGNI +- Added support for Doxygen based documentation generation +- Added support for UCP basic protocol layer to fit PGAS paradigm (RMA, AMO) +- Added ucx_perftest utility to exercise major UCX flows and provide performance metrics +- Added test script for jenkins (contrib/test_jenkins.sh) +- Added packaging for RPM/DEB based linux distributions (see contrib/buildrpm.sh) +- Added Unit-tests infractucture for UCX functionality based on Google Test framework (see test/gtest/) +- Added initial integration for OpenMPI with UCX for PGAS/SHMEM API + (see: https://github.com/openucx/ompi-mirror/pull/1) +- Added end-to-end testing infrastructure based on MTT (see contrib/mtt/README_MTT) diff --git a/README b/README new file mode 100644 index 0000000..ed19037 --- /dev/null +++ b/README @@ -0,0 +1,182 @@ +
+ +
+ + + +
+ + + +
+ + * [Unified Communication X](#unified-communication-x) + * [Using UCX](#using-ucx) + * [Building and Running Internal Unit Tests](#building-and-running-internal-unit-tests) + * [UCX Performance Test](#ucx-performance-test) + * [Our Community](#our-community) + * [Licenses](#licenses) + * [Contributor Agreement and Guidelines](#contributor-agreement-and-guidelines) + * [UCX Publications](#ucx-publications) + * [UCX Architecture](#ucx-architecture) + * [Supported Transports](#supported-transports) + * [Supported CPU Architectures](#supported-cpu-architectures) + +
+ +# Unified Communication X + +Unified Communication X (UCX) provides an optimized communication +layer for Message Passing ([MPI](https://www.mpi-forum.org/)), +[PGAS](http://www.pgas.org/)/[OpenSHMEM](http://www.openshmem.org/) +libraries and RPC/data-centric applications. + +UCX utilizes high-speed networks for inter-node communication, and +shared memory mechanisms for efficient intra-node communication. + +## Using UCX + +### Release Builds + +Building UCX is typically a combination of running "configure" and "make". +Execute the following commands to install the UCX system from within the +directory at the top of the tree: + +```sh +$ ./autogen.sh +$ ./contrib/configure-release --prefix=/where/to/install +$ make -j8 +$ make install +``` + +NOTE: Compiling support for various networks or other specific hardware may +require additional command line flags when running configure. + +### Developer Builds + +```bash +$ ./autogen.sh +$ ./contrib/configure-devel --prefix=$PWD/install-debug +``` + +*** NOTE: Developer builds of UCX typically include a large performance +penalty at run-time because of extra debugging code. + +### Running internal unit tests + +```sh +$ make -C test/gtest test +``` + +### Build RPM package +```bash +$ contrib/buildrpm.sh -s -b +``` + +### Build DEB package +```bash +$ dpkg-buildpackage -us -uc +``` + +### Build Doxygen documentation +```bash +$ make docs +``` + +### OpenMPI and OpenSHMEM installation with UCX +[Wiki page](http://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX) + +### MPICH installation with UCX +[Wiki page](http://github.com/openucx/ucx/wiki/MPICH-installation-with-UCX) + +### UCX Performance Test + +Start server: + +```sh +$ ./src/tools/perf/ucx_perftest -c 0 +``` + +Connect client: + +```sh +$ ./src/tools/perf/ucx_perftest -t tag_lat -c 1 +``` +Note: the `-c` flag sets CPU affinity. If running both commands on same host, make sure you set the affinity to different CPU cores. + +## Our Community + +* [Project Website](http://www.openucx.org/) +* [Github](http://www.github.com/openucx/ucx/) +* [Software Releases](http://www.github.com/openucx/ucx/releases) +* [Mailing List](https://elist.ornl.gov/mailman/listinfo/ucx-group) +* [Twitter](https://twitter.com/openucx) + +## Licenses + +UCX is licensed as: + +* [BSD3](LICENSE) + +## Contributor Agreement and Guidelines + +In order to contribute to UCX, please sign up with an appropriate +[Contributor Agreement](http://www.openucx.org/license/). + +Follow these +[instructions](https://github.com/openucx/ucx/wiki/Guidance-for-contributors) +when submitting contributions and changes. + +## UCX Publications + +To reference UCX in a publication, please use the following entry: + +```bibtex +@inproceedings{shamis2015ucx, + title={UCX: an open source framework for HPC network APIs and beyond}, + author={Shamis, Pavel and Venkata, Manjunath Gorentla and Lopez, M Graham and Baker, Matthew B and Hernandez, Oscar and Itigin, Yossi and Dubman, Mike and Shainer, Gilad and Graham, Richard L and Liss, Liran and others}, + booktitle={2015 IEEE 23rd Annual Symposium on High-Performance Interconnects}, + pages={40--43}, + year={2015}, + organization={IEEE} +} +``` + +To reference the UCX website: + +```bibtex +@misc{openucx-website, + title = {{The Unified Communication X Library}}, + key = {{{The Unified Communication X Library}}, + howpublished = {{\url{http://www.openucx.org}}} +} +``` + +## UCX Architecture + +![](docs/doxygen/Architecture.png) + +| Component | Role | Description | +| :---: | :---: | --- | +| UCP | Protocol | Implements high-level abstractions such as tag-matching, streams, connection negotiation and establishment, multi-rail, and handling different memory types | +| UCT | Transport | Implements low-level communication primitives such as active messages, remote memory access, and atomic operations | +| UCS | Services | A collection of data structures, algorithms, and system utilities for common use | +| UCM | Memory | Intercepts memory allocation and release events, used by the memory registration cache | + +## Supported Transports + +* [Infiniband](https://www.infinibandta.org/) +* [Omni-Path](https://www.intel.com/content/www/us/en/high-performance-computing-fabrics/omni-path-driving-exascale-computing.html) +* [RoCE](http://www.roceinitiative.org/) +* [Cray Gemini and Aries](https://www.cray.com/) +* [CUDA](https://developer.nvidia.com/cuda-zone) +* [ROCm](https://rocm.github.io/) +* Shared Memory + * posix, sysv, [cma](https://dl.acm.org/citation.cfm?id=2616532), [knem](http://knem.gforge.inria.fr/), and [xpmem](https://github.com/hjelmn/xpmem) +* TCP/IP + +## Supported CPU Architectures + +* [x86_64](https://en.wikipedia.org/wiki/X86-64) +* [Power8/9](https://www.ibm.com/support/knowledgecenter/en/POWER9/p9hdx/POWER9welcome.htm) +* [Arm v8](https://www.arm.com/products/silicon-ip-cpu) diff --git a/aclocal.m4 b/aclocal.m4 new file mode 100644 index 0000000..31045ed --- /dev/null +++ b/aclocal.m4 @@ -0,0 +1,1234 @@ +# generated automatically by aclocal 1.16.1 -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, +[m4_warning([this file was generated for autoconf 2.69. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# Copyright (C) 2002-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.16' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.16.1], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.16.1])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# Figure out how to run the assembler. -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_AS +# ---------- +AC_DEFUN([AM_PROG_AS], +[# By default we simply use the C compiler to build assembly code. +AC_REQUIRE([AC_PROG_CC]) +test "${CCAS+set}" = set || CCAS=$CC +test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS +AC_ARG_VAR([CCAS], [assembler compiler command (defaults to CC)]) +AC_ARG_VAR([CCASFLAGS], [assembler compiler flags (defaults to CFLAGS)]) +_AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl +]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` +]) + +# AM_COND_IF -*- Autoconf -*- + +# Copyright (C) 2008-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_COND_IF +# _AM_COND_ELSE +# _AM_COND_ENDIF +# -------------- +# These macros are only used for tracing. +m4_define([_AM_COND_IF]) +m4_define([_AM_COND_ELSE]) +m4_define([_AM_COND_ENDIF]) + +# AM_COND_IF(COND, [IF-TRUE], [IF-FALSE]) +# --------------------------------------- +# If the shell condition COND is true, execute IF-TRUE, otherwise execute +# IF-FALSE. Allow automake to learn about conditional instantiating macros +# (the AC_CONFIG_FOOS). +AC_DEFUN([AM_COND_IF], +[m4_ifndef([_AM_COND_VALUE_$1], + [m4_fatal([$0: no such condition "$1"])])dnl +_AM_COND_IF([$1])dnl +if test -z "$$1_TRUE"; then : + m4_n([$2])[]dnl +m4_ifval([$3], +[_AM_COND_ELSE([$1])dnl +else + $3 +])dnl +_AM_COND_ENDIF([$1])dnl +fi[]dnl +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + AS_CASE([$CONFIG_FILES], + [*\'*], [eval set x "$CONFIG_FILES"], + [*], [set x $CONFIG_FILES]) + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`AS_DIRNAME(["$am_mf"])` + am_filepart=`AS_BASENAME(["$am_mf"])` + AM_RUN_LOG([cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles]) || am_rc=$? + done + if test $am_rc -ne 0; then + AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. Try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking).]) + fi + AS_UNSET([am_dirpart]) + AS_UNSET([am_filepart]) + AS_UNSET([am_mf]) + AS_UNSET([am_rc]) + rm -f conftest-deps.mk +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking is enabled. +# This creates each '.Po' and '.Plo' makefile fragment that we'll need in +# order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi +dnl The trailing newline in this macro's definition is deliberate, for +dnl backward compatibility and to allow trailing 'dnl'-style comments +dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. +]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- +# From Jim Meyering + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAINTAINER_MODE([DEFAULT-MODE]) +# ---------------------------------- +# Control maintainer-specific portions of Makefiles. +# Default is to disable them, unless 'enable' is passed literally. +# For symmetry, 'disable' may be passed as well. Anyway, the user +# can override the default with the --enable/--disable switch. +AC_DEFUN([AM_MAINTAINER_MODE], +[m4_case(m4_default([$1], [disable]), + [enable], [m4_define([am_maintainer_other], [disable])], + [disable], [m4_define([am_maintainer_other], [enable])], + [m4_define([am_maintainer_other], [enable]) + m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])]) +AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) + dnl maintainer-mode's default is 'disable' unless 'enable' is passed + AC_ARG_ENABLE([maintainer-mode], + [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode], + am_maintainer_other[ make rules and dependencies not useful + (and sometimes confusing) to the casual installer])], + [USE_MAINTAINER_MODE=$enableval], + [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes])) + AC_MSG_RESULT([$USE_MAINTAINER_MODE]) + AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes]) + MAINT=$MAINTAINER_MODE_TRUE + AC_SUBST([MAINT])dnl +] +) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check whether make has an 'include' directive that can support all +# the idioms we need for our automatic dependency tracking code. +AC_DEFUN([AM_MAKE_INCLUDE], +[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive]) +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out]) + AS_CASE([$?:`cat confinc.out 2>/dev/null`], + ['0:this is the am__doit target'], + [AS_CASE([$s], + [BSD], [am__include='.include' am__quote='"'], + [am__include='include' am__quote=''])]) + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +AC_MSG_RESULT([${_am_result}]) +AC_SUBST([am__include])]) +AC_SUBST([am__quote])]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2018 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([config/m4/gtest.m4]) +m4_include([config/m4/libtool.m4]) +m4_include([config/m4/ltoptions.m4]) +m4_include([config/m4/ltsugar.m4]) +m4_include([config/m4/ltversion.m4]) +m4_include([config/m4/lt~obsolete.m4]) diff --git a/bindings/java/Makefile.am b/bindings/java/Makefile.am new file mode 100644 index 0000000..c849b13 --- /dev/null +++ b/bindings/java/Makefile.am @@ -0,0 +1,18 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +EXTRA_DIST = \ + src/main/java \ + src/test \ + checkstyle.xml \ + pom.xml.in \ + README.md + +SUBDIRS = \ + src/main/native + +clean-local: + -rm -rf resources \ No newline at end of file diff --git a/bindings/java/Makefile.in b/bindings/java/Makefile.in new file mode 100644 index 0000000..c085165 --- /dev/null +++ b/bindings/java/Makefile.in @@ -0,0 +1,780 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = bindings/java +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = pom.xml +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/pom.xml.in +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +EXTRA_DIST = \ + src/main/java \ + src/test \ + checkstyle.xml \ + pom.xml.in \ + README.md + +SUBDIRS = \ + src/main/native + +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign bindings/java/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign bindings/java/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +pom.xml: $(top_builddir)/config.status $(srcdir)/pom.xml.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-local mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool clean-local \ + cscopelist-am ctags ctags-am distclean distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + +.PRECIOUS: Makefile + + +clean-local: + -rm -rf resources + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/bindings/java/README.md b/bindings/java/README.md new file mode 100644 index 0000000..ac02aa1 --- /dev/null +++ b/bindings/java/README.md @@ -0,0 +1,10 @@ +

JUCX

+ +JUCX is a Java API over UCP (UCX protocol).
+See more about UCX at: https://github.com/openucx/ucx + +# Building JUCX +Building the source requires [Apache Maven](http://maven.apache.org/) and [GNU/autotools](http://www.gnu.org/software/autoconf/autoconf.html) and Java version 8 or higher.
+Java binding will be built by default, but it is recommended to execute the following steps: +1. export JAVA_HOME=\. +2. When running UCX's "configure" add "--with-java" flag, i.e. "shell$ ./configure --with-java". diff --git a/bindings/java/checkstyle.xml b/bindings/java/checkstyle.xml new file mode 100644 index 0000000..907d7af --- /dev/null +++ b/bindings/java/checkstyle.xml @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bindings/java/pom.xml.in b/bindings/java/pom.xml.in new file mode 100644 index 0000000..872a089 --- /dev/null +++ b/bindings/java/pom.xml.in @@ -0,0 +1,365 @@ + + + + 4.0.0 + org.openucx + jucx + @VERSION@ + jar + jucx + https://github.com/openucx/ucx + Java binding to ucx high performance communication library + + + + UCX group + https://elist.ornl.gov/mailman/listinfo/ucx-group + + + + + + BSD 3 Clause License + http://www.openucx.org/license/ + repo + + + + + scm:git:git://github.com/openucx/ucx.git + scm:git:ssh://git@github.com/openucx/ucx.git + HEAD + https://github.com/openucx/ucx.git + + + + false + ${env.GPG_PASSPHRASE} + UTF-8 + @abs_top_srcdir@/src + ${ucx.src.dir}/../bindings/java + @abs_top_builddir@ + ${ucx.build.dir}/bindings/java/src/main/native + ${ucx.build.dir}/src/ucm/.libs + ${ucx.build.dir}/src/ucs/.libs + ${ucx.build.dir}/src/uct/.libs + ${ucx.build.dir}/src/ucp/.libs + 4.12 + **/jucx/** + false + + + + Github + https://github.com/openucx/ucx/issues + + + + + Peter Rudenko + peterr@mellanox.com + Mellanox Technologies + + + Yossi Itigin + yosefe@mellanox.com + Mellanox Technologies + + + + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + + ossrh + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + + gcs-maven-central-mirror + + GCS Maven Central mirror + https://maven-central.storage-download.googleapis.com/repos/central/data/ + + true + + + false + + + + + central + Maven Repository + https://repo.maven.apache.org/maven2 + + true + + + false + + + + + + gcs-maven-central-mirror + + GCS Maven Central mirror + https://maven-central.storage-download.googleapis.com/repos/central/data/ + + true + + + false + + + + central + https://repo.maven.apache.org/maven2 + + true + + + false + + + + + + + junit + junit + ${junit.version} + test + + + + + ${jucx.src.dir}/src/main/java + ${jucx.src.dir}/src/test/java + ${native.dir}/build-java + + + resources + + **/* + + + + + + resources + + libjucx.so + + + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.1.0 + + + attach-sources + + jar-no-fork + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.6 + + + --pinentry-mode + loopback + + + + + sign-artifacts + deploy + + sign + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + + -h + ${native.dir} + + 1.8 + 1.8 + + ${sources} + + + + + default-testCompile + test-compile + + testCompile + + + + + + + maven-clean-plugin + 3.0.0 + + + + resources + + **/*.so + + + + + + + + maven-resources-plugin + 2.7 + + + copy-dynamic-libs + generate-resources + + copy-resources + + + true + ${skipCopy} + ${basedir}/resources + + + ${native.dir}/.libs + + **/*.so + + + + ${ucm.lib.path} + + **/*.so + + + + ${ucs.lib.path} + + **/*.so + + + + ${uct.lib.path} + + **/*.so + + + + ${ucp.lib.path} + + **/*.so + + + + + + + + + + org.apache.maven.plugins + maven-checkstyle-plugin + 3.1.0 + + + com.puppycrawl.tools + checkstyle + 8.29 + + + + + validate + validate + + ${jucx.src.dir}/checkstyle.xml + UTF-8 + true + true + false + true + warning + + + check + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.0.1 + + true + all,-missing + + + + attach-javadocs + + jar + + + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.8 + true + + ossrh + https://oss.sonatype.org/ + true + + + + + + diff --git a/bindings/java/src/main/java/org/openucx/jucx/NativeLibs.java b/bindings/java/src/main/java/org/openucx/jucx/NativeLibs.java new file mode 100644 index 0000000..b45fd4c --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/NativeLibs.java @@ -0,0 +1,214 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +import java.io.*; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.stream.Stream; + +public class NativeLibs { + private static final String UCM = "libucm.so"; + private static final String UCS = "libucs.so"; + private static final String UCT = "libuct.so"; + private static final String UCP = "libucp.so"; + private static final String JUCX = "libjucx.so"; + private static ClassLoader loader = NativeLibs.class.getClassLoader(); + private static String errorMessage = null; + + static { + extractUCTLibs(); // UCT Transport + loadLibrary(UCM); // UCM library + loadLibrary(UCS); // UCS library + loadLibrary(UCT); // UCT library + loadLibrary(UCP); // UCP library + loadLibrary(JUCX); // JUCX native library + } + + public static void load() { + if (errorMessage != null) { + throw new UnsatisfiedLinkError(errorMessage); + } + } + + /** + * Tries to load the library, by extracting it from the current class jar. + * If that fails, falls back on {@link System#loadLibrary(String)}. + * + * @param resourceName - library name to be extracted and loaded from the this current jar. + */ + private static void loadLibrary(String resourceName) { + // Search shared object on java classpath + URL url = loader.getResource(resourceName); + File file = null; + try { // Extract shared object's content to a generated temp file + file = extractResource(url); + } catch (IOException ex) { + errorMessage = "Native code library failed to extract URL: " + url; + return; + } + + if (file != null && file.exists()) { + String filename = file.getAbsolutePath(); + try { // Load shared object to JVM + System.load(filename); + } catch (UnsatisfiedLinkError ex) { + errorMessage = "Native code library failed to load: " + + resourceName; + } + + file.deleteOnExit(); + } + } + + /** + * Extracts shared UCT transport. + */ + private static void extractUCTLibs() { + Path ucxTempFolder, ucxFolder; + Stream uctLibs; + final FileSystem fileSystem; + try { + createTempDir(); + ucxTempFolder = Files.createDirectory(Paths.get(tempDir.getPath(), "ucx")); + ucxTempFolder.toFile().deleteOnExit(); + URI uri = NativeLibs.class.getClassLoader().getResource("ucx").toURI(); + if ("jar".equals(uri.getScheme())) { + fileSystem = FileSystems.newFileSystem(uri, Collections.emptyMap(), null); + ucxFolder = fileSystem.getPath("ucx"); + } else { + ucxFolder = Paths.get(uri); + fileSystem = null; + } + uctLibs = Files.walk(ucxFolder, 1); + } catch (IOException ex) { + errorMessage = "Failed to create temp directory"; + return; + } catch (URISyntaxException e) { + errorMessage = "Failed to find ucx resources"; + return; + } + + uctLibs.forEach(filePath -> { + if (!filePath.getFileName().toString().contains(".so")) { + return; + } + FileOutputStream os = null; + InputStream is = null; + File out = new File(ucxTempFolder.toAbsolutePath().toString(), + filePath.getFileName().toString()); + out.deleteOnExit(); + try { + if (fileSystem != null) { + is = NativeLibs.class.getResourceAsStream(filePath.toString()); + } else { + is = new FileInputStream(filePath.toFile()); + } + os = new FileOutputStream(out); + copy(is, os); + } catch (IOException ex) { + errorMessage = "Failed to copy UCT lib: " + ex.getLocalizedMessage(); + return; + } finally { + closeQuietly(os); + closeQuietly(is); + } + }); + + if (fileSystem != null) { + closeQuietly(fileSystem); + } + } + + /** + * Extracts a resource into a temp directory. + * + * @param resourceURL - the URL of the resource to extract + * @return the File object representing the extracted file + * @throws IOException if fails to extract resource properly + */ + private static File extractResource(URL resourceURL) throws IOException { + InputStream is = resourceURL.openStream(); + if (is == null) { + errorMessage = "Error extracting native library content"; + return null; + } + + try { + createTempDir(); + } catch (IOException ex) { + errorMessage = "Failed to create temp directory"; + return null; + } + + File file = new File(tempDir, + new File(resourceURL.getPath()).getName()); + file.deleteOnExit(); + FileOutputStream os = null; + try { + os = new FileOutputStream(file); + copy(is, os); + } finally { + closeQuietly(os); + closeQuietly(is); + } + return file; + } + + /** + * Temporary directory set and returned by {@link #createTempDir()}. + */ + static File tempDir = null; + + /** + * Creates a new temp directory in default temp files directory. + * Directory will be represented by {@link #tempDir}. + */ + private static void createTempDir() throws IOException { + if (tempDir == null) { + Path tmp = Files.createTempDirectory("jucx"); + tempDir = tmp.toFile(); + tempDir.deleteOnExit(); + } + } + + /** + * Helper function to copy an InputStream into an OutputStream. + */ + private static void copy(InputStream is, OutputStream os) + throws IOException { + if (is == null || os == null) { + return; + } + byte[] buffer = new byte[1024]; + int length = 0; + while ((length = is.read(buffer)) != -1) { + os.write(buffer, 0, length); + } + } + + /** + * Helper function to close InputStream or OutputStream in a quiet way + * which hides the exceptions. + */ + private static void closeQuietly(Closeable closable) { + if (closable == null) { + return; + } + try { + closable.close(); + } catch (IOException ex) { + // No logging in this 'Quiet Close' method + } + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/UcxCallback.java b/bindings/java/src/main/java/org/openucx/jucx/UcxCallback.java new file mode 100644 index 0000000..a75cb76 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/UcxCallback.java @@ -0,0 +1,20 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +import org.openucx.jucx.ucp.UcpRequest; + +/** + * Callback wrapper to notify successful or failure events from JNI. + */ + +public class UcxCallback { + public void onSuccess(UcpRequest request) {} + + public void onError(int ucsStatus, String errorMsg) { + throw new UcxException(errorMsg); + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/UcxException.java b/bindings/java/src/main/java/org/openucx/jucx/UcxException.java new file mode 100644 index 0000000..8fb3554 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/UcxException.java @@ -0,0 +1,20 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +/** + * Exception to be thrown from JNI and all UCX routines. + */ +public class UcxException extends RuntimeException { + + public UcxException() { + super(); + } + + public UcxException(String message) { + super(message); + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/UcxNativeStruct.java b/bindings/java/src/main/java/org/openucx/jucx/UcxNativeStruct.java new file mode 100644 index 0000000..2fd71cb --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/UcxNativeStruct.java @@ -0,0 +1,29 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +/** + * Wrapper around native ucx struct, that holds pointer address. + */ +public abstract class UcxNativeStruct { + private Long nativeId; + + /** + * Getter for native pointer as long. + * @return long integer representing native pointer + */ + public Long getNativeId() { + return nativeId; + } + + protected void setNativeId(Long nativeId) { + if (nativeId != null && nativeId < 0) { + throw new UcxException("UcxNativeStruct.setNativeId: invalid native pointer: " + + nativeId); + } + this.nativeId = nativeId; + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/UcxParams.java b/bindings/java/src/main/java/org/openucx/jucx/UcxParams.java new file mode 100644 index 0000000..e72a9ba --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/UcxParams.java @@ -0,0 +1,25 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +/** + * Common interface for representing parameters to instantiate ucx objects. + */ +public abstract class UcxParams { + /** + * Mask of valid fields in this structure. + * Fields not specified in this mask would be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + protected long fieldMask; + /** + * Reset state of parameters. + */ + public UcxParams clear() { + fieldMask = 0L; + return this; + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/UcxUtils.java b/bindings/java/src/main/java/org/openucx/jucx/UcxUtils.java new file mode 100644 index 0000000..8f43bf0 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/UcxUtils.java @@ -0,0 +1,42 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.nio.ByteBuffer; + +public class UcxUtils { + + private static final Constructor directBufferConstructor; + + static { + try { + Class classDirectByteBuffer = Class.forName("java.nio.DirectByteBuffer"); + directBufferConstructor = classDirectByteBuffer.getDeclaredConstructor(long.class, + int.class); + directBufferConstructor.setAccessible(true); + } catch (Exception e) { + throw new UcxException(e.getMessage()); + } + } + + /** + * Returns view of underlying memory region as a ByteBuffer. + * @param address - address of start of memory region + */ + public static ByteBuffer getByteBufferView(long address, int length) + throws IllegalAccessException, InvocationTargetException, InstantiationException { + return (ByteBuffer)directBufferConstructor.newInstance(address, length); + } + + /** + * Returns native address of the current position of a direct byte buffer. + */ + public static long getAddress(ByteBuffer buffer) { + return ((sun.nio.ch.DirectBuffer) buffer).address() + buffer.position(); + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/examples/UcxBenchmark.java b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxBenchmark.java new file mode 100644 index 0000000..26636e7 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxBenchmark.java @@ -0,0 +1,105 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.examples; + +import org.openucx.jucx.ucp.*; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; +import java.util.Stack; + +public abstract class UcxBenchmark { + + protected static Map argsMap = new HashMap<>(); + + // Stack of closable resources (context, worker, etc.) to be closed at the end. + protected static Stack resources = new Stack<>(); + + protected static UcpContext context; + + protected static UcpWorker worker; + + protected static int serverPort; + + protected static int numIterations; + + protected static long totalSize; + + protected static UcpMemMapParams allocationParams; + + private static String DESCRIPTION = "JUCX benchmark.\n" + + "Run: \n" + + "java -cp jucx.jar org.openucx.jucx.examples.UcxReadBWBenchmarkReceiver " + + "[s=host] [p=port] [n=number of iterations]\n" + + "java -cp jucx.jar org.openucx.jucx.examples.UcxReadBWBenchmarkSender " + + "[s=receiver host] [p=receiver port] [t=total size to transfer]\n\n" + + "Parameters:\n" + + "h - print help\n" + + "s - IP address to bind sender listener (default: 0.0.0.0)\n" + + "p - port to bind sender listener (default: 54321)\n" + + "t - total size in bytes to transfer from sender to receiver (default 10000)\n" + + "o - on demand registration (default: false) \n" + + "n - number of iterations (default 5)\n"; + + static { + argsMap.put("s", "0.0.0.0"); + argsMap.put("p", "54321"); + argsMap.put("t", "10000"); + argsMap.put("o", "false"); + argsMap.put("n", "5"); + } + + /** + * Initializes common variables from command line arguments. + */ + protected static boolean initializeArguments(String[] args) { + for (String arg: args) { + if (arg.contains("h")) { + System.out.println(DESCRIPTION); + return false; + } + String[] parts = arg.split("="); + argsMap.put(parts[0], parts[1]); + } + try { + serverPort = Integer.parseInt(argsMap.get("p")); + numIterations = Integer.parseInt(argsMap.get("n")); + totalSize = Long.parseLong(argsMap.get("t")); + allocationParams = new UcpMemMapParams().allocate().setLength(totalSize); + if (argsMap.get("o").compareToIgnoreCase("true") == 0) { + allocationParams.nonBlocking(); + } + } catch (NumberFormatException ex) { + System.out.println(DESCRIPTION); + return false; + } + return true; + } + + protected static void createContextAndWorker() { + context = new UcpContext(new UcpParams().requestWakeupFeature() + .requestRmaFeature().requestTagFeature()); + resources.push(context); + + worker = context.newWorker(new UcpWorkerParams()); + resources.push(worker); + } + + protected static double getBandwithGbits(long nanoTimeDelta, long size) { + double sizeInGigabits = (double)size * 8.0 / 1e9; + double secondsElapsed = nanoTimeDelta / 1e9; + return sizeInGigabits / secondsElapsed; + } + + protected static void closeResources() throws IOException { + while (!resources.empty()) { + resources.pop().close(); + } + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkReceiver.java b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkReceiver.java new file mode 100644 index 0000000..96a6a1a --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkReceiver.java @@ -0,0 +1,111 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.examples; + +import org.openucx.jucx.UcxCallback; +import org.openucx.jucx.ucp.UcpRequest; +import org.openucx.jucx.UcxUtils; +import org.openucx.jucx.ucp.*; + + +import java.net.InetSocketAddress; +import java.nio.ByteBuffer; +import java.util.concurrent.atomic.AtomicReference; + +public class UcxReadBWBenchmarkReceiver extends UcxBenchmark { + + public static void main(String[] args) throws Exception { + if (!initializeArguments(args)) { + return; + } + + createContextAndWorker(); + + String serverHost = argsMap.get("s"); + InetSocketAddress sockaddr = new InetSocketAddress(serverHost, serverPort); + AtomicReference connRequest = new AtomicReference<>(null); + UcpListener listener = worker.newListener( + new UcpListenerParams() + .setConnectionHandler(connRequest::set) + .setSockAddr(sockaddr)); + resources.push(listener); + System.out.println("Waiting for connections on " + sockaddr + " ..."); + + while (connRequest.get() == null) { + worker.progress(); + } + + UcpEndpoint endpoint = worker.newEndpoint(new UcpEndpointParams() + .setConnectionRequest(connRequest.get()) + .setPeerErrorHadnlingMode()); + + // Temporary workaround until new connection establishment protocol in UCX. + for (int i = 0; i < 10; i++) { + worker.progress(); + try { + Thread.sleep(10); + } catch (Exception ignored) { } + } + + ByteBuffer recvBuffer = ByteBuffer.allocateDirect(4096); + UcpRequest recvRequest = worker.recvTaggedNonBlocking(recvBuffer, null); + + worker.progressRequest(recvRequest); + + long remoteAddress = recvBuffer.getLong(); + long remoteSize = recvBuffer.getLong(); + int remoteKeySize = recvBuffer.getInt(); + int rkeyBufferOffset = recvBuffer.position(); + + recvBuffer.position(rkeyBufferOffset + remoteKeySize); + int remoteHashCode = recvBuffer.getInt(); + System.out.printf("Received connection. Will read %d bytes from remote address %d%n", + remoteSize, remoteAddress); + + recvBuffer.position(rkeyBufferOffset); + UcpRemoteKey remoteKey = endpoint.unpackRemoteKey(recvBuffer); + resources.push(remoteKey); + + UcpMemory recvMemory = context.memoryMap(allocationParams); + resources.push(recvMemory); + ByteBuffer data = UcxUtils.getByteBufferView(recvMemory.getAddress(), + (int)Math.min(Integer.MAX_VALUE, totalSize)); + for (int i = 0; i < numIterations; i++) { + final int iterNum = i; + UcpRequest getRequest = endpoint.getNonBlocking(remoteAddress, remoteKey, + recvMemory.getAddress(), totalSize, + new UcxCallback() { + long startTime = System.nanoTime(); + + @Override + public void onSuccess(UcpRequest request) { + long finishTime = System.nanoTime(); + data.clear(); + assert data.hashCode() == remoteHashCode; + double bw = getBandwithGbits(finishTime - startTime, remoteSize); + System.out.printf("Iteration %d, bandwidth: %.4f GB/s%n", iterNum, bw); + } + }); + + worker.progressRequest(getRequest); + // To make sure we receive correct data each time to compare hashCodes + data.put(0, (byte)1); + } + + ByteBuffer sendBuffer = ByteBuffer.allocateDirect(100); + sendBuffer.asCharBuffer().put("DONE"); + + UcpRequest sent = endpoint.sendTaggedNonBlocking(sendBuffer, null); + worker.progressRequest(sent); + + UcpRequest closeRequest = endpoint.closeNonBlockingFlush(); + worker.progressRequest(closeRequest); + // Close request won't be return to pull automatically, since there's no callback. + resources.push(closeRequest); + + closeResources(); + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkSender.java b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkSender.java new file mode 100644 index 0000000..c79cf6b --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/examples/UcxReadBWBenchmarkSender.java @@ -0,0 +1,70 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.examples; + +import org.openucx.jucx.UcxCallback; +import org.openucx.jucx.ucp.UcpRequest; +import org.openucx.jucx.UcxUtils; +import org.openucx.jucx.ucp.UcpEndpoint; +import org.openucx.jucx.ucp.UcpEndpointParams; +import org.openucx.jucx.ucp.UcpMemory; + +import java.net.InetSocketAddress; +import java.nio.ByteBuffer; + + +public class UcxReadBWBenchmarkSender extends UcxBenchmark { + + public static void main(String[] args) throws Exception { + if (!initializeArguments(args)) { + return; + } + + createContextAndWorker(); + + String serverHost = argsMap.get("s"); + UcpEndpoint endpoint = worker.newEndpoint(new UcpEndpointParams() + .setPeerErrorHadnlingMode() + .setSocketAddress(new InetSocketAddress(serverHost, serverPort))); + + UcpMemory memory = context.memoryMap(allocationParams); + resources.push(memory); + ByteBuffer data = UcxUtils.getByteBufferView(memory.getAddress(), + (int)Math.min(Integer.MAX_VALUE, totalSize)); + + // Send worker and memory address and Rkey to receiver. + ByteBuffer rkeyBuffer = memory.getRemoteKeyBuffer(); + + // 24b = 8b buffer address + 8b buffer size + 4b rkeyBuffer size + 4b hashCode + ByteBuffer sendData = ByteBuffer.allocateDirect(24 + rkeyBuffer.capacity()); + sendData.putLong(memory.getAddress()); + sendData.putLong(totalSize); + sendData.putInt(rkeyBuffer.capacity()); + sendData.put(rkeyBuffer); + sendData.putInt(data.hashCode()); + sendData.clear(); + + // Send memory metadata and wait until receiver will finish benchmark. + endpoint.sendTaggedNonBlocking(sendData, null); + ByteBuffer recvBuffer = ByteBuffer.allocateDirect(4096); + UcpRequest recvRequest = worker.recvTaggedNonBlocking(recvBuffer, + new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + System.out.println("Received a message:"); + System.out.println(recvBuffer.asCharBuffer().toString().trim()); + } + }); + + worker.progressRequest(recvRequest); + + UcpRequest closeRequest = endpoint.closeNonBlockingFlush(); + worker.progressRequest(closeRequest); + resources.push(closeRequest); + + closeResources(); + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConnectionRequest.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConnectionRequest.java new file mode 100644 index 0000000..f0e7529 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConnectionRequest.java @@ -0,0 +1,18 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx.ucp; + +import org.openucx.jucx.UcxNativeStruct; + +/** + * A server-side handle to incoming connection request. Can be used to create an + * endpoint which connects back to the client. + */ +public class UcpConnectionRequest extends UcxNativeStruct { + + private UcpConnectionRequest(long nativeId) { + setNativeId(nativeId); + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConstants.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConstants.java new file mode 100644 index 0000000..e47a25a --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpConstants.java @@ -0,0 +1,123 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.ucp; + +import org.openucx.jucx.NativeLibs; +import org.openucx.jucx.UcxCallback; + +public class UcpConstants { + static { + NativeLibs.load(); + loadConstants(); + } + + /** + * UCP context parameters field mask. + * + *

The enumeration allows specifying which fields in {@link UcpParams} are + * present. It is used for the enablement of backward compatibility support. + */ + static long UCP_PARAM_FIELD_FEATURES; + static long UCP_PARAM_FIELD_TAG_SENDER_MASK; + static long UCP_PARAM_FIELD_MT_WORKERS_SHARED; + static long UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; + + /** + * UCP configuration features + * + *

The enumeration list describes the features supported by UCP. + * An application can request the features using "UCP parameters" + * during "UCP initialization" process. + */ + static long UCP_FEATURE_TAG; + static long UCP_FEATURE_RMA; + static long UCP_FEATURE_AMO32; + static long UCP_FEATURE_AMO64; + static long UCP_FEATURE_WAKEUP; + static long UCP_FEATURE_STREAM; + + /** + * UCP worker parameters field mask. + * + *

The enumeration allows specifying which fields in {@link UcpWorker} are + * present. It is used for the enablement of backward compatibility support. + */ + static long UCP_WORKER_PARAM_FIELD_THREAD_MODE; + static long UCP_WORKER_PARAM_FIELD_CPU_MASK; + static long UCP_WORKER_PARAM_FIELD_EVENTS; + static long UCP_WORKER_PARAM_FIELD_USER_DATA; + static long UCP_WORKER_PARAM_FIELD_EVENT_FD; + + /** + * Mask of events which are expected on wakeup. + * If it's not set all types of events will trigger on + * wakeup. + */ + static long UCP_WAKEUP_RMA; + static long UCP_WAKEUP_AMO; + static long UCP_WAKEUP_TAG_SEND; + static long UCP_WAKEUP_TAG_RECV; + static long UCP_WAKEUP_TX; + static long UCP_WAKEUP_RX; + static long UCP_WAKEUP_EDGE; + + /** + * UCP listener parameters field mask. + */ + static long UCP_LISTENER_PARAM_FIELD_SOCK_ADDR; + static long UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER; + static long UCP_LISTENER_PARAM_FIELD_CONN_HANDLER; + + /** + * UCP endpoint parameters field mask. + */ + static long UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; + static long UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE; + static long UCP_EP_PARAM_FIELD_ERR_HANDLER; + static long UCP_EP_PARAM_FIELD_USER_DATA; + static long UCP_EP_PARAM_FIELD_SOCK_ADDR; + static long UCP_EP_PARAM_FIELD_FLAGS; + static long UCP_EP_PARAM_FIELD_CONN_REQUEST; + + /** + * UCP error handling mode. + */ + static int UCP_ERR_HANDLING_MODE_PEER; + + /** + * The enumeration list describes the endpoint's parameters flags. + */ + static long UCP_EP_PARAMS_FLAGS_CLIENT_SERVER; + static long UCP_EP_PARAMS_FLAGS_NO_LOOPBACK; + + /** + * The enumeration is used to specify the behavior of UcpEndpoint closeNonBlocking. + */ + static int UCP_EP_CLOSE_MODE_FORCE; + static int UCP_EP_CLOSE_MODE_FLUSH; + + /** + * UCP memory mapping parameters field mask. + */ + static long UCP_MEM_MAP_PARAM_FIELD_ADDRESS; + static long UCP_MEM_MAP_PARAM_FIELD_LENGTH; + static long UCP_MEM_MAP_PARAM_FIELD_FLAGS; + + /** + * The enumeration list describes the memory mapping flags. + */ + static long UCP_MEM_MAP_NONBLOCK; + static long UCP_MEM_MAP_ALLOCATE; + static long UCP_MEM_MAP_FIXED; + + /** + * The enumeration defines behavior of + * {@link UcpEndpoint#recvStreamNonBlocking(long, long, long, UcxCallback)} function. + */ + public static long UCP_STREAM_RECV_FLAG_WAITALL; + + private static native void loadConstants(); +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpContext.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpContext.java new file mode 100644 index 0000000..50cf4de --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpContext.java @@ -0,0 +1,89 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.ucp; + +import java.io.Closeable; +import java.nio.ByteBuffer; + +import org.openucx.jucx.NativeLibs; +import org.openucx.jucx.UcxException; +import org.openucx.jucx.UcxNativeStruct; +import org.openucx.jucx.UcxUtils; + +/** + * UCP application context (or just a context) is an opaque handle that holds a + * UCP communication instance's global information. It represents a single UCP + * communication instance. The communication instance could be an OS process + * (an application) that uses UCP library. This global information includes + * communication resources, endpoints, memory, temporary file storage, and + * other communication information directly associated with a specific UCP + * instance. The context also acts as an isolation mechanism, allowing + * resources associated with the context to manage multiple concurrent + * communication instances. For example, users can isolate their communication + * by allocating and using separate contexts. Alternatively, users can share the + * communication resources (memory, network resource context, etc.) between + * them by using the same application context. A message sent or a RMA + * operation performed in one application context cannot be received in any + * other application context. + */ +public class UcpContext extends UcxNativeStruct implements Closeable { + static { + NativeLibs.load(); + } + + public UcpContext(UcpParams params) { + setNativeId(createContextNative(params)); + } + + @Override + public void close() { + cleanupContextNative(getNativeId()); + this.setNativeId(null); + } + + /** + * Creates new UcpWorker on current context. + */ + public UcpWorker newWorker(UcpWorkerParams params) { + return new UcpWorker(this, params); + } + + /** + * Associates memory allocated/mapped region with communication operations. + * The network stack associated with an application context + * can typically send and receive data from the mapped memory without + * CPU intervention; some devices and associated network stacks + * require the memory to be registered to send and receive data. + */ + public UcpMemory registerMemory(ByteBuffer buf) { + if (!buf.isDirect()) { + throw new UcxException("Registered buffer must be direct"); + } + UcpMemMapParams params = new UcpMemMapParams().setAddress(UcxUtils.getAddress(buf)) + .setLength(buf.remaining()); + UcpMemory result = memoryMapNative(getNativeId(), params); + + result.setByteBufferReference(buf); + return result; + } + + /** + * Associates memory allocated/mapped region with communication operations. + * The network stack associated with an application context + * can typically send and receive data from the mapped memory without + * CPU intervention; some devices and associated network stacks + * require the memory to be registered to send and receive data. + */ + public UcpMemory memoryMap(UcpMemMapParams params) { + return memoryMapNative(getNativeId(), params); + } + + private static native long createContextNative(UcpParams params); + + private static native void cleanupContextNative(long contextId); + + private native UcpMemory memoryMapNative(long conetxtId, UcpMemMapParams params); +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpoint.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpoint.java new file mode 100644 index 0000000..72f7ea2 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpoint.java @@ -0,0 +1,288 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx.ucp; + +import org.openucx.jucx.*; + +import java.io.Closeable; +import java.nio.ByteBuffer; + +public class UcpEndpoint extends UcxNativeStruct implements Closeable { + + public UcpEndpoint(UcpWorker worker, UcpEndpointParams params) { + setNativeId(createEndpointNative(params, worker.getNativeId())); + } + + @Override + public void close() { + destroyEndpointNative(getNativeId()); + setNativeId(null); + } + + /** + * This routine unpacks the remote key (RKEY) object into the local memory + * such that it can be accessed and used by UCP routines. + * @param rkeyBuffer - Packed remote key buffer + * (see {@link UcpMemory#getRemoteKeyBuffer()}). + */ + public UcpRemoteKey unpackRemoteKey(ByteBuffer rkeyBuffer) { + return unpackRemoteKey(getNativeId(), + UcxUtils.getAddress(rkeyBuffer)); + } + + private void checkRemoteAccessParams(ByteBuffer buf, UcpRemoteKey remoteKey) { + if (!buf.isDirect()) { + throw new UcxException("Data buffer must be direct."); + } + if (remoteKey.getNativeId() == null) { + throw new UcxException("Remote key is null."); + } + } + + /** + * Non-blocking remote memory put operation. + * This routine initiates a storage of contiguous block of data that is + * described by the local {@code src} buffer, starting of it's {@code src.position()} + * and size {@code src.remaining()} in the remote contiguous memory + * region described by {@code remoteAddress} address and the {@code remoteKey} "memory + * handle". The routine returns immediately and does not + * guarantee re-usability of the source {@code data} buffer. + * {@code callback} is invoked on completion of this operation. + */ + public UcpRequest putNonBlocking(ByteBuffer src, long remoteAddress, UcpRemoteKey remoteKey, + UcxCallback callback) { + + checkRemoteAccessParams(src, remoteKey); + + return putNonBlocking(UcxUtils.getAddress(src), src.remaining(), remoteAddress, + remoteKey, callback); + } + + public UcpRequest putNonBlocking(long localAddress, long size, + long remoteAddress, UcpRemoteKey remoteKey, + UcxCallback callback) { + + return putNonBlockingNative(getNativeId(), localAddress, + size, remoteAddress, remoteKey.getNativeId(), callback); + } + + /** + * This routine initiates a storage of contiguous block of data that is + * described by the local {@code buffer} in the remote contiguous memory + * region described by {@code remoteAddress} and the {@code remoteKey} + * "memory handle". The routine returns immediately and does not + * guarantee re-usability of the source {@code src} buffer. + */ + public void putNonBlockingImplicit(ByteBuffer src, long remoteAddress, + UcpRemoteKey remoteKey) { + checkRemoteAccessParams(src, remoteKey); + + putNonBlockingImplicit(UcxUtils.getAddress(src), src.remaining(), remoteAddress, + remoteKey); + } + + /** + * This routine initiates a storage of contiguous block of data that is + * described by the local {@code localAddress} in the remote contiguous memory + * region described by {@code remoteAddress} and the {@code remoteKey} + * "memory handle". The routine returns immediately and does not + * guarantee re-usability of the source {@code localAddress} address. + */ + public void putNonBlockingImplicit(long localAddress, long size, + long remoteAddress, UcpRemoteKey remoteKey) { + putNonBlockingImplicitNative(getNativeId(), localAddress, size, remoteAddress, + remoteKey.getNativeId()); + } + + /** + * Non-blocking remote memory get operation. + * This routine initiates a load of a contiguous block of data that is + * described by the remote memory address {@code remoteAddress} and the + * {@code remoteKey} "memory handle". The routine returns immediately and does + * not guarantee that remote data is loaded and stored under the local {@code dst} buffer + * starting of it's {@code dst.position()} and size {@code dst.remaining()}. + * {@code callback} is invoked on completion of this operation. + * @return {@link UcpRequest} object that can be monitored for completion. + */ + public UcpRequest getNonBlocking(long remoteAddress, UcpRemoteKey remoteKey, + ByteBuffer dst, UcxCallback callback) { + + checkRemoteAccessParams(dst, remoteKey); + + return getNonBlocking(remoteAddress, remoteKey, UcxUtils.getAddress(dst), + dst.remaining(), callback); + } + + public UcpRequest getNonBlocking(long remoteAddress, UcpRemoteKey remoteKey, + long localAddress, long size, UcxCallback callback) { + + return getNonBlockingNative(getNativeId(), remoteAddress, remoteKey.getNativeId(), + localAddress, size, callback); + } + + /** + * Non-blocking implicit remote memory get operation. + * This routine initiate a load of contiguous block of data that is described + * by the remote memory address {@code remoteAddress} and the + * {@code remoteKey} "memory handle" in the local contiguous memory region described + * by {@code dst} buffer. The routine returns immediately and does not guarantee that + * remote data is loaded and stored under the local buffer. + */ + public void getNonBlockingImplicit(long remoteAddress, UcpRemoteKey remoteKey, + ByteBuffer dst) { + checkRemoteAccessParams(dst, remoteKey); + + getNonBlockingImplicit(remoteAddress, remoteKey, UcxUtils.getAddress(dst), + dst.remaining()); + } + + /** + * Non-blocking implicit remote memory get operation. + * This routine initiate a load of contiguous block of data that is described + * by the remote memory address {@code remoteAddress} and the + * {@code remoteKey} "memory handle" in the local contiguous memory region described + * by {@code localAddress} the local address. The routine returns immediately + * and does not guarantee that remote data is loaded and stored under the local buffer. + */ + public void getNonBlockingImplicit(long remoteAddress, UcpRemoteKey remoteKey, + long localAddress, long size) { + + getNonBlockingImplicitNative(getNativeId(), remoteAddress, remoteKey.getNativeId(), + localAddress, size); + } + + /** + * Non-blocking tagged-send operations + * This routine sends a messages that is described by the local buffer {@code sendBuffer}, + * starting of it's {@code sendBuffer.position()} and size {@code sendBuffer.remaining()}. + * to the destination endpoint. Each message is associated with a {@code tag} value + * that is used for message matching on the + * {@link UcpWorker#recvTaggedNonBlocking(ByteBuffer, long, long, UcxCallback)} + * "receiver". The routine is non-blocking and therefore returns immediately, + * however the actual send operation may be delayed. + * The send operation is considered completed when it is safe to reuse the source + * {@code data} buffer. {@code callback} is invoked on completion of this operation. + */ + public UcpRequest sendTaggedNonBlocking(ByteBuffer sendBuffer, long tag, UcxCallback callback) { + if (!sendBuffer.isDirect()) { + throw new UcxException("Send buffer must be direct."); + } + return sendTaggedNonBlocking(UcxUtils.getAddress(sendBuffer), + sendBuffer.remaining(), tag, callback); + } + + public UcpRequest sendTaggedNonBlocking(long localAddress, long size, + long tag, UcxCallback callback) { + + return sendTaggedNonBlockingNative(getNativeId(), + localAddress, size, tag, callback); + } + + /** + * Non blocking send operation. Invokes + * {@link UcpEndpoint#sendTaggedNonBlocking(ByteBuffer, long, UcxCallback)} with default 0 tag. + */ + public UcpRequest sendTaggedNonBlocking(ByteBuffer sendBuffer, UcxCallback callback) { + return sendTaggedNonBlocking(sendBuffer, 0, callback); + } + + /** + * This routine sends data that is described by the local address to the destination endpoint. + * The routine is non-blocking and therefore returns immediately, however the actual send + * operation may be delayed. The send operation is considered completed when it is safe + * to reuse the source buffer. The UCP library will schedule invocation of the call-back upon + * completion of the send operation. + */ + public UcpRequest sendStreamNonBlocking(long localAddress, long size, UcxCallback callback) { + return sendStreamNonBlockingNative(getNativeId(), localAddress, size, callback); + } + + public UcpRequest sendStreamNonBlocking(ByteBuffer buffer, UcxCallback callback) { + return sendStreamNonBlockingNative(getNativeId(), UcxUtils.getAddress(buffer), + buffer.remaining(), callback); + } + + /** + * This routine receives data that is described by the local address and a size on the endpoint. + * The routine is non-blocking and therefore returns immediately. The receive operation is + * considered complete when the message is delivered to the buffer. + * In order to notify the application about completion of a scheduled receive operation, + * the UCP library will invoke the call-back when data is in the receive buffer + * and ready for application access. + */ + public UcpRequest recvStreamNonBlocking(long localAddress, long size, long flags, + UcxCallback callback) { + return recvStreamNonBlockingNative(getNativeId(), localAddress, size, flags, callback); + } + + public UcpRequest recvStreamNonBlocking(ByteBuffer buffer, long flags, UcxCallback callback) { + return recvStreamNonBlocking(UcxUtils.getAddress(buffer), buffer.remaining(), flags, + callback); + } + + /** + * This routine flushes all outstanding AMO and RMA communications on this endpoint. + * All the AMO and RMA operations issued on this endpoint prior to this call + * are completed both at the origin and at the target. + */ + public UcpRequest flushNonBlocking(UcxCallback callback) { + return flushNonBlockingNative(getNativeId(), callback); + } + + /** + * Releases the endpoint without any confirmation from the peer. All + * outstanding requests will be completed with UCS_ERR_CANCELED error. + * This mode may cause transport level errors on remote side, so it requires set + * {@link UcpEndpointParams#setPeerErrorHadnlingMode()} for all endpoints created on + * both (local and remote) sides to avoid undefined behavior. + */ + public UcpRequest closeNonBlockingForce() { + return closeNonBlockingNative(getNativeId(), UcpConstants.UCP_EP_CLOSE_MODE_FORCE); + } + + /** + * Releases the endpoint by scheduling flushes on all outstanding operations. + */ + public UcpRequest closeNonBlockingFlush() { + return closeNonBlockingNative(getNativeId(), UcpConstants.UCP_EP_CLOSE_MODE_FLUSH); + } + + private static native long createEndpointNative(UcpEndpointParams params, long workerId); + + private static native void destroyEndpointNative(long epId); + + private static native UcpRemoteKey unpackRemoteKey(long epId, long rkeyAddress); + + private static native UcpRequest putNonBlockingNative(long enpointId, long localAddress, + long size, long remoteAddr, + long ucpRkeyId, UcxCallback callback); + + private static native void putNonBlockingImplicitNative(long enpointId, long localAddress, + long size, long remoteAddr, + long ucpRkeyId); + + private static native UcpRequest getNonBlockingNative(long enpointId, long remoteAddress, + long ucpRkeyId, long localAddress, + long size, UcxCallback callback); + + private static native void getNonBlockingImplicitNative(long enpointId, long remoteAddress, + long ucpRkeyId, long localAddress, + long size); + + private static native UcpRequest sendTaggedNonBlockingNative(long enpointId, long localAddress, + long size, long tag, + UcxCallback callback); + + private static native UcpRequest sendStreamNonBlockingNative(long enpointId, long localAddress, + long size, UcxCallback callback); + + private static native UcpRequest recvStreamNonBlockingNative(long enpointId, long localAddress, + long size, long flags, + UcxCallback callback); + + private static native UcpRequest flushNonBlockingNative(long enpointId, UcxCallback callback); + + private static native UcpRequest closeNonBlockingNative(long endpointId, int mode); +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointParams.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointParams.java new file mode 100644 index 0000000..7907f05 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpEndpointParams.java @@ -0,0 +1,105 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.ucp; + +import org.openucx.jucx.UcxException; +import org.openucx.jucx.UcxParams; + +import java.net.InetSocketAddress; +import java.nio.ByteBuffer; + +/** + * Tuning parameters for the UCP endpoint. + */ +public class UcpEndpointParams extends UcxParams { + + @Override + public UcpEndpointParams clear() { + super.clear(); + ucpAddress = null; + errorHandlingMode = 0; + userData = null; + flags = 0; + socketAddress = null; + connectionRequest = 0; + return this; + } + + private ByteBuffer ucpAddress; + + private int errorHandlingMode; + + private ByteBuffer userData; + + private long flags; + + private InetSocketAddress socketAddress; + + private long connectionRequest; + + /** + * Destination address in form of workerAddress. + */ + public UcpEndpointParams setUcpAddress(ByteBuffer ucpAddress) { + this.fieldMask |= UcpConstants.UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; + this.ucpAddress = ucpAddress; + return this; + } + + /** + * Guarantees that send requests are always completed (successfully or error) even in + * case of remote failure, disables protocols and APIs which may cause a hang or undefined + * behavior in case of peer failure, may affect performance and memory footprint. + */ + public UcpEndpointParams setPeerErrorHadnlingMode() { + this.fieldMask |= UcpConstants.UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE; + this.errorHandlingMode = UcpConstants.UCP_ERR_HANDLING_MODE_PEER; + return this; + } + + /** + * User data associated with an endpoint. + */ + public UcpEndpointParams setUserData(ByteBuffer userData) { + if (!userData.isDirect()) { + throw new UcxException("User data must be of type DirectByteBuffer."); + } + this.fieldMask |= UcpConstants.UCP_EP_PARAM_FIELD_USER_DATA; + this.userData = userData; + return this; + } + + /** + * Destination address in form of InetSocketAddress. + */ + public UcpEndpointParams setSocketAddress(InetSocketAddress socketAddress) { + this.fieldMask |= UcpConstants.UCP_EP_PARAM_FIELD_SOCK_ADDR | + UcpConstants.UCP_EP_PARAM_FIELD_FLAGS; + this.flags |= UcpConstants.UCP_EP_PARAMS_FLAGS_CLIENT_SERVER; + this.socketAddress = socketAddress; + return this; + } + + /** + * Avoid connecting the endpoint to itself when connecting the endpoint + * to the same worker it was created on. Affects protocols which send to a particular + * remote endpoint, for example stream. + */ + public UcpEndpointParams setNoLoopbackMode() { + this.fieldMask |= UcpConstants.UCP_EP_PARAM_FIELD_FLAGS; + this.flags |= UcpConstants.UCP_EP_PARAMS_FLAGS_NO_LOOPBACK; + return this; + } + + /** + * Connection request from client. + */ + public UcpEndpointParams setConnectionRequest(UcpConnectionRequest connectionRequest) { + this.fieldMask |= UcpConstants.UCP_EP_PARAM_FIELD_CONN_REQUEST; + this.connectionRequest = connectionRequest.getNativeId(); + return this; + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListener.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListener.java new file mode 100644 index 0000000..63c0ac0 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListener.java @@ -0,0 +1,45 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx.ucp; + +import org.openucx.jucx.UcxException; +import org.openucx.jucx.UcxNativeStruct; + +import java.io.Closeable; +import java.net.InetSocketAddress; + +/** + * The listener handle is an opaque object that is used for listening on a + * specific address and accepting connections from clients. + */ +public class UcpListener extends UcxNativeStruct implements Closeable { + + private InetSocketAddress address; + + public UcpListener(UcpWorker worker, UcpListenerParams params) { + if (params.getSockAddr() == null) { + throw new UcxException("UcpListenerParams.sockAddr must be non-null."); + } + setNativeId(createUcpListener(params, worker.getNativeId())); + address = params.getSockAddr(); + } + + /** + * Returns a socket address of this listener. + */ + public InetSocketAddress getAddress() { + return address; + } + + @Override + public void close() { + destroyUcpListenerNative(getNativeId()); + setNativeId(null); + } + + private static native long createUcpListener(UcpListenerParams params, long workerId); + + private static native void destroyUcpListenerNative(long listenerId); +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerConnectionHandler.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerConnectionHandler.java new file mode 100644 index 0000000..4a5d84a --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerConnectionHandler.java @@ -0,0 +1,20 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx.ucp; + +/** + * A server-side handle to incoming connection request. Can be used to create an + * endpoint which connects back to the client. + */ +public interface UcpListenerConnectionHandler { + /** + * This callback routine is invoked on the server side to handle incoming + * connections from remote clients. + * @param connectionRequest - native pointer to connection request, that could be used + * in {@link UcpEndpointParams#setConnectionRequest( + * UcpConnectionRequest connectionRequest)} + */ + void onConnectionRequest(UcpConnectionRequest connectionRequest); +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerParams.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerParams.java new file mode 100644 index 0000000..28153a0 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpListenerParams.java @@ -0,0 +1,45 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx.ucp; + +import java.net.InetSocketAddress; + +import org.openucx.jucx.UcxParams; + +public class UcpListenerParams extends UcxParams { + + @Override + public UcpListenerParams clear() { + super.clear(); + sockAddr = null; + return this; + } + + private InetSocketAddress sockAddr; + + private UcpListenerConnectionHandler connectionHandler; + + /** + * An address, on which {@link UcpListener} would bind. + */ + public UcpListenerParams setSockAddr(InetSocketAddress sockAddr) { + this.sockAddr = sockAddr; + this.fieldMask |= UcpConstants.UCP_LISTENER_PARAM_FIELD_SOCK_ADDR; + return this; + } + + public InetSocketAddress getSockAddr() { + return sockAddr; + } + + /** + * Handler of an incoming connection request in a client-server connection flow. + */ + public UcpListenerParams setConnectionHandler(UcpListenerConnectionHandler handler) { + this.connectionHandler = handler; + this.fieldMask |= UcpConstants.UCP_LISTENER_PARAM_FIELD_CONN_HANDLER; + return this; + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemMapParams.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemMapParams.java new file mode 100644 index 0000000..9ce96b9 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemMapParams.java @@ -0,0 +1,72 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx.ucp; + +import org.openucx.jucx.UcxParams; + +public class UcpMemMapParams extends UcxParams { + private long flags; + private long address; + private long length; + + @Override + public UcpMemMapParams clear() { + super.clear(); + address = 0; + length = 0; + flags = 0; + return this; + } + + public UcpMemMapParams setAddress(long address) { + this.fieldMask |= UcpConstants.UCP_MEM_MAP_PARAM_FIELD_ADDRESS; + this.address = address; + return this; + } + + public long getAddress() { + return address; + } + + public UcpMemMapParams setLength(long length) { + this.fieldMask |= UcpConstants.UCP_MEM_MAP_PARAM_FIELD_LENGTH; + this.length = length; + return this; + } + + public long getLength() { + return length; + } + + /** + * Identify requirement for allocation, if passed address is not a null-pointer + * then it will be used as a hint or direct address for allocation. + */ + public UcpMemMapParams allocate() { + this.fieldMask |= UcpConstants.UCP_MEM_MAP_PARAM_FIELD_FLAGS; + flags |= UcpConstants.UCP_MEM_MAP_ALLOCATE; + return this; + } + + /** + * Complete the registration faster, possibly by not populating the pages up-front, + * and mapping them later when they are accessed by communication routines. + */ + public UcpMemMapParams nonBlocking() { + this.fieldMask |= UcpConstants.UCP_MEM_MAP_PARAM_FIELD_FLAGS; + flags |= UcpConstants.UCP_MEM_MAP_NONBLOCK; + return this; + } + + /** + * Don't interpret address as a hint: place the mapping at exactly that + * address. The address must be a multiple of the page size. + */ + public UcpMemMapParams fixed() { + this.fieldMask |= UcpConstants.UCP_MEM_MAP_PARAM_FIELD_FLAGS; + flags |= UcpConstants.UCP_MEM_MAP_FIXED; + return this; + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemory.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemory.java new file mode 100644 index 0000000..360b33f --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpMemory.java @@ -0,0 +1,109 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.ucp; + +import org.openucx.jucx.UcxNativeStruct; + +import java.io.Closeable; +import java.nio.ByteBuffer; + +/** + * Memory handle is an opaque object representing a memory region allocated + * through UCP library, which is optimized for remote memory access + * operations (zero-copy operations). The memory could be registered + * to one or multiple network resources that are supported by UCP, + * such as InfiniBand, Gemini, and others. + */ +public class UcpMemory extends UcxNativeStruct implements Closeable { + + private UcpContext context; + + private ByteBuffer data; + + private long address; + + private long length; + + /** + * To prevent construct outside of JNI. + */ + private UcpMemory(long nativeId) { + setNativeId(nativeId); + } + + /** + * This routine unmaps a user specified memory segment. + * When the function returns, the {@code data} and associated + * "remote key" will be invalid and cannot be used with any UCP routine. + * Another well know terminology for the "unmap" operation that is typically + * used in the context of networking is memory "de-registration". The UCP + * library de-registers the memory the available hardware so it can be returned + * back to the operation system. + */ + public void deregister() { + unmapMemoryNative(context.getNativeId(), getNativeId()); + setNativeId(null); + data = null; + } + + /** + * This routine allocates memory buffer and packs into the buffer + * a remote access key (RKEY) object. RKEY is an opaque object that provides + * the information that is necessary for remote memory access. + * This routine packs the RKEY object in a portable format such that the + * object can be "unpacked" on any platform supported by the + * UCP library. + * RKEYs for InfiniBand and Cray Aries networks typically includes + * InifiniBand and Aries key. + * In order to enable remote direct memory access to the memory associated + * with the memory handle the application is responsible for sharing the RKEY with + * the peers that will initiate the access. + */ + public ByteBuffer getRemoteKeyBuffer() { + ByteBuffer rKeyBuffer = getRkeyBufferNative(context.getNativeId(), getNativeId()); + // 1. Allocating java native ByteBuffer (managed by java's reference count cleaner). + ByteBuffer result = ByteBuffer.allocateDirect(rKeyBuffer.capacity()); + // 2. Copy content of native ucp address to java's buffer. + result.put(rKeyBuffer); + result.clear(); + // 3. Release an address of the worker object. Memory allocated in JNI must be freed by JNI. + releaseRkeyBufferNative(rKeyBuffer); + return result; + } + + /** + * To keep reference to user's ByteBuffer so it won't be cleaned by refCount cleaner. + * @param data + */ + void setByteBufferReference(ByteBuffer data) { + this.data = data; + } + + /** + * Address of registered memory. + */ + public long getAddress() { + return address; + } + + /** + * Length of registered memory + */ + public long getLength() { + return length; + } + + private static native void unmapMemoryNative(long contextId, long memoryId); + + private static native ByteBuffer getRkeyBufferNative(long contextId, long memoryId); + + private static native void releaseRkeyBufferNative(ByteBuffer rkey); + + @Override + public void close() { + deregister(); + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpParams.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpParams.java new file mode 100644 index 0000000..bf5b2ba --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpParams.java @@ -0,0 +1,143 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.ucp; + +import org.openucx.jucx.UcxParams; + +/** + * Tuning parameters for UCP library. + * The structure defines the parameters that are used for + * UCP library tuning during UCP library {@link UcpContext} "initialization". + * + *

UCP library implementation uses the "features" + * parameter to optimize the library functionality that minimize memory + * footprint. For example, if the application does not require send/receive + * semantics UCP library may avoid allocation of expensive resources associated with + * send/receive queues. + */ +public class UcpParams extends UcxParams { + + /** + * UCP ucp_feature "features" that are used for library + * initialization. It is recommended for applications only to request + * the features that are required for an optimal functionality + * This field must be specified. + */ + private long features; + + private long tagSenderMask; + + private boolean mtWorkersShared; + + private long estimatedNumEps; + + @Override + public UcpParams clear() { + super.clear(); + features = 0L; + tagSenderMask = 0L; + mtWorkersShared = false; + estimatedNumEps = 0L; + return this; + } + + /** + * Mask which specifies particular bits of the tag which can uniquely + * identify the sender (UCP endpoint) in tagged operations. + * This field defaults to 0 if not specified. + */ + public UcpParams setTagSenderMask(long tagSenderMask) { + this.tagSenderMask = tagSenderMask; + this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_TAG_SENDER_MASK; + return this; + } + + /** + * Indicates if this context is shared by multiple workers + * from different threads. If so, this context needs thread safety + * support; otherwise, the context does not need to provide thread + * safety. + * For example, if the context is used by single worker, and that + * worker is shared by multiple threads, this context does not need + * thread safety; if the context is used by worker 1 and worker 2, + * and worker 1 is used by thread 1 and worker 2 is used by thread 2, + * then this context needs thread safety. + * Note that actual thread mode may be different from mode passed + * to {@link UcpContext}. + */ + public UcpParams setMtWorkersShared(boolean mtWorkersShared) { + this.mtWorkersShared = mtWorkersShared; + this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_MT_WORKERS_SHARED; + return this; + } + + /** + * An optimization hint of how many endpoints would be created on this context. + * Does not affect semantics, but only transport selection criteria and the + * resulting performance. + * The value can be also set by UCX_NUM_EPS environment variable. In such case + * it will override the number of endpoints set by {@link #setEstimatedNumEps}. + */ + public UcpParams setEstimatedNumEps(long estimatedNumEps) { + this.estimatedNumEps = estimatedNumEps; + this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; + return this; + } + + /** + * Request tag matching support. + */ + public UcpParams requestTagFeature() { + this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_FEATURES; + this.features |= UcpConstants.UCP_FEATURE_TAG; + return this; + } + + /** + * Request remote memory access support. + */ + public UcpParams requestRmaFeature() { + this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_FEATURES; + this.features |= UcpConstants.UCP_FEATURE_RMA; + return this; + } + + /** + * Request 32-bit atomic operations support. + */ + public UcpParams requestAtomic32BitFeature() { + this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_FEATURES; + this.features |= UcpConstants.UCP_FEATURE_AMO32; + return this; + } + + /** + * Request 64-bit atomic operations support. + */ + public UcpParams requestAtomic64BitFeature() { + this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_FEATURES; + this.features |= UcpConstants.UCP_FEATURE_AMO64; + return this; + } + + /** + * Request interrupt notification support. + */ + public UcpParams requestWakeupFeature() { + this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_FEATURES; + this.features |= UcpConstants.UCP_FEATURE_WAKEUP; + return this; + } + + /** + * Request stream support. + */ + public UcpParams requestStreamFeature() { + this.fieldMask |= UcpConstants.UCP_PARAM_FIELD_FEATURES; + this.features |= UcpConstants.UCP_FEATURE_STREAM; + return this; + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRemoteKey.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRemoteKey.java new file mode 100644 index 0000000..fb66bf6 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRemoteKey.java @@ -0,0 +1,38 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx.ucp; + +import org.openucx.jucx.UcxNativeStruct; + +import java.io.Closeable; + +/** + * Remote memory handle is an opaque object representing remote memory access + * information. Typically, the handle includes a memory access key and other + * network hardware specific information, which are input to remote memory + * access operations, such as PUT, GET, and ATOMIC. The object is + * communicated to remote peers to enable an access to the memory region. + */ +public class UcpRemoteKey extends UcxNativeStruct implements Closeable { + + /** + * Private constructor to construct from JNI only. + */ + private UcpRemoteKey() { + + } + + private UcpRemoteKey(long nativeRkeyPtr) { + setNativeId(nativeRkeyPtr); + } + + @Override + public void close() { + rkeyDestroy(getNativeId()); + setNativeId(null); + } + + private static native void rkeyDestroy(long ucpRkeyId); +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRequest.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRequest.java new file mode 100644 index 0000000..87abf91 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpRequest.java @@ -0,0 +1,57 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.ucp; + +import org.openucx.jucx.UcxCallback; +import org.openucx.jucx.UcxNativeStruct; + +import java.io.Closeable; +import java.nio.ByteBuffer; + +/** + * Request object, that returns by ucp operations (GET, PUT, SEND, etc.). + * Call {@link UcpRequest#isCompleted()} to monitor completion of request. + */ +public class UcpRequest extends UcxNativeStruct implements Closeable { + + private long recvSize; + + private UcpRequest(long nativeId) { + setNativeId(nativeId); + } + + /** + * The size of the received data in bytes, valid only for recv requests, e.g.: + * {@link UcpWorker#recvTaggedNonBlocking(ByteBuffer buffer, UcxCallback clb)} + */ + public long getRecvSize() { + return recvSize; + } + + /** + * @return whether this request is completed. + */ + public boolean isCompleted() { + return (getNativeId() == null) || isCompletedNative(getNativeId()); + } + + /** + * This routine releases the non-blocking request back to the library, regardless + * of its current state. Communications operations associated with this request + * will make progress internally, however no further notifications or callbacks + * will be invoked for this request. + */ + @Override + public void close() { + if (getNativeId() != null) { + closeRequestNative(getNativeId()); + } + } + + private static native boolean isCompletedNative(long ucpRequest); + + private static native void closeRequestNative(long ucpRequest); +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorker.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorker.java new file mode 100644 index 0000000..e1bf5c2 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorker.java @@ -0,0 +1,202 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.ucp; + +import java.io.Closeable; +import java.nio.ByteBuffer; + +import org.openucx.jucx.*; + +/** + * UCP worker is an opaque object representing the communication context. The + * worker represents an instance of a local communication resource and the + * progress engine associated with it. The progress engine is a construct that + * is responsible for asynchronous and independent progress of communication + * directives. The progress engine could be implemented in hardware or software. + * The worker object abstracts an instance of network resources such as a host + * channel adapter port, network interface, or multiple resources such as + * multiple network interfaces or communication ports. It could also represent + * virtual communication resources that are defined across multiple devices. + * Although the worker can represent multiple network resources, it is + * associated with a single {@link UcpContext} "UCX application context". + * All communication functions require a context to perform the operation on + * the dedicated hardware resource(s) and an "endpoint" to address the + * destination. + * + *

Worker are parallel "threading points" that an upper layer may use to + * optimize concurrent communications. + */ +public class UcpWorker extends UcxNativeStruct implements Closeable { + + public UcpWorker(UcpContext context, UcpWorkerParams params) { + setNativeId(createWorkerNative(params, context.getNativeId())); + } + + /** + * Creates new UcpEndpoint on current worker. + */ + public UcpEndpoint newEndpoint(UcpEndpointParams params) { + return new UcpEndpoint(this, params); + } + + /** + * Creates new UcpListener on current worker. + */ + public UcpListener newListener(UcpListenerParams params) { + return new UcpListener(this, params); + } + + @Override + public void close() { + releaseWorkerNative(getNativeId()); + setNativeId(null); + } + + /** + * This routine explicitly progresses all communication operations on a worker. + * @return Non-zero if any communication was progressed, zero otherwise. + */ + public int progress() { + return progressWorkerNative(getNativeId()); + } + + /** + * Blocking progress for request until it's not completed. + */ + public void progressRequest(UcpRequest request) { + while (!request.isCompleted()) { + progress(); + } + } + + /** + * This routine flushes all outstanding AMO and RMA communications on the + * this worker. All the AMO and RMA operations issued on this worker prior to this call + * are completed both at the origin and at the target when this call returns. + */ + public UcpRequest flushNonBlocking(UcxCallback callback) { + return flushNonBlockingNative(getNativeId(), callback); + } + + /** + * This routine waits (blocking) until an event has happened, as part of the + * wake-up mechanism. + * + * This function is guaranteed to return only if new communication events occur + * on the worker. Therefore one must drain all existing events before waiting + * on the file descriptor. This can be achieved by calling + * {@link UcpWorker#progress()} repeatedly until it returns 0. + */ + public void waitForEvents() { + waitWorkerNative(getNativeId()); + } + + /** + * This routine signals that the event has happened, as part of the wake-up + * mechanism. This function causes a blocking call to {@link UcpWorker#waitForEvents()} + * to return, even if no event from the underlying interfaces has taken place. + * + * It’s safe to use this routine from any thread, even if UCX is compiled + * without multi-threading support and/or initialized without + * {@link UcpWorkerParams#requestThreadSafety()}. However {@link UcpContext} has to be + * created with {@link UcpParams#requestWakeupFeature()}. + */ + public void signal() { + signalWorkerNative(getNativeId()); + } + + /** + * Non-blocking tagged-receive operation. + * This routine receives a messages that is described by the local {@code recvBuffer} + * buffer on the current worker. The tag value of the receive message has to match + * the {@code tag} of sent message. The routine is a non-blocking and therefore returns + * immediately. The receive operation is considered completed when the message is delivered + * to the {@code recvBuffer} at position {@code recvBuffer.position()} and size + * {@code recvBuffer.remaining()}. + * In order to notify the application about completion of the receive + * operation the UCP library will invoke the call-back {@code callback} when the received + * message is in the receive buffer and ready for application access. + * + * @param tagMask - bit mask that indicates the bits that are used for the matching of the + * incoming tag against the expected tag. + */ + public UcpRequest recvTaggedNonBlocking(ByteBuffer recvBuffer, long tag, long tagMask, + UcxCallback callback) { + if (!recvBuffer.isDirect()) { + throw new UcxException("Recv buffer must be direct."); + } + return recvTaggedNonBlockingNative(getNativeId(), UcxUtils.getAddress(recvBuffer), + recvBuffer.remaining(), tag, tagMask, callback); + } + + public UcpRequest recvTaggedNonBlocking(long localAddress, long size, long tag, long tagMask, + UcxCallback callback) { + return recvTaggedNonBlockingNative(getNativeId(), localAddress, size, + tag, tagMask, callback); + } + + /** + * Non-blocking receive operation. Invokes + * {@link UcpWorker#recvTaggedNonBlocking(ByteBuffer, long, long, UcxCallback)} + * with default tag=0 and tagMask=0. + */ + public UcpRequest recvTaggedNonBlocking(ByteBuffer recvBuffer, UcxCallback callback) { + return recvTaggedNonBlocking(recvBuffer, 0, 0, callback); + } + + /** + * This routine tries to cancels an outstanding communication request. After + * calling this routine, the request will be in completed or canceled (but + * not both) state regardless of the status of the target endpoint associated + * with the communication request. If the request is completed successfully, + * the "send" or the "receive" completion callbacks (based on the type of the request) will be + * called with the status argument of the callback set to UCS_OK, and in a + * case it is canceled the status argument is set to UCS_ERR_CANCELED. + */ + public void cancelRequest(UcpRequest request) { + cancelRequestNative(getNativeId(), request.getNativeId()); + } + + /** + * This routine returns the address of the worker object. This address can be + * passed to remote instances of the UCP library in order to connect to this + * worker. Ucp worker address - is an opaque object that is used as an + * identifier for a {@link UcpWorker} instance. + */ + public ByteBuffer getAddress() { + ByteBuffer nativeUcpAddress = workerGetAddressNative(getNativeId()); + // 1. Allocating java native ByteBuffer (managed by java's reference count cleaner). + ByteBuffer result = ByteBuffer.allocateDirect(nativeUcpAddress.capacity()); + // 2. Copy content of native ucp address to java's buffer. + result.put(nativeUcpAddress); + result.clear(); + // 3. Release an address of the worker object. Memory allocated in JNI must be freed by JNI. + releaseAddressNative(getNativeId(), nativeUcpAddress); + return result; + } + + private static native long createWorkerNative(UcpWorkerParams params, long ucpContextId); + + private static native void releaseWorkerNative(long workerId); + + private static native ByteBuffer workerGetAddressNative(long workerId); + + private static native void releaseAddressNative(long workerId, ByteBuffer addressId); + + private static native int progressWorkerNative(long workerId); + + private static native UcpRequest flushNonBlockingNative(long workerId, UcxCallback callback); + + private static native void waitWorkerNative(long workerId); + + private static native void signalWorkerNative(long workerId); + + private static native UcpRequest recvTaggedNonBlockingNative(long workerId, long localAddress, + long size, long tag, long tagMask, + UcxCallback callback); + + private static native void cancelRequestNative(long workerId, long requestId); +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorkerParams.java b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorkerParams.java new file mode 100644 index 0000000..a1ea736 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucp/UcpWorkerParams.java @@ -0,0 +1,162 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.ucp; + +import java.nio.ByteBuffer; +import java.util.BitSet; + +import org.openucx.jucx.ucs.UcsConstants; +import org.openucx.jucx.UcxException; +import org.openucx.jucx.UcxParams; + +public class UcpWorkerParams extends UcxParams { + + private int threadMode; + + private BitSet cpuMask = new BitSet(); + + private long events; + + private ByteBuffer userData; + + private int eventFD; + + @Override + public UcpWorkerParams clear() { + super.clear(); + threadMode = 0; + cpuMask = new BitSet(); + events = 0; + userData = null; + eventFD = 0; + return this; + } + + /** + * Requests the thread safety mode which worker and the associated resources + * should be created with. + * When thread safety requested, the + * {@link UcpWorker#UcpWorker(UcpContext, UcpWorkerParams)} + * attempts to create worker where multiple threads can access concurrently. + * The thread mode with which worker is created can differ from the + * suggested mode. + */ + public UcpWorkerParams requestThreadSafety() { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_THREAD_MODE; + this.threadMode = UcsConstants.ThreadMode.UCS_THREAD_MODE_MULTI; + return this; + } + + /** + * Mask of which CPUs worker resources should preferably be allocated on. + * This value is optional. + * If it's not set, resources are allocated according to system's default policy. + */ + public UcpWorkerParams setCpu(int cpuNum) { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_CPU_MASK; + this.cpuMask.set(cpuNum); + return this; + } + + /** + * Remote memory access send completion. + */ + public UcpWorkerParams requestWakeupRMA() { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_EVENTS; + this.events |= UcpConstants.UCP_WAKEUP_RMA; + return this; + } + + /** + * Atomic operation send completion. + */ + public UcpWorkerParams requestWakeupAMO() { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_EVENTS; + this.events |= UcpConstants.UCP_WAKEUP_AMO; + return this; + } + + /** + * Tag send completion. + */ + public UcpWorkerParams requestWakeupTagSend() { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_EVENTS; + this.events |= UcpConstants.UCP_WAKEUP_TAG_SEND; + return this; + } + + /** + * Tag receive completion. + */ + public UcpWorkerParams requestWakeupTagRecv() { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_EVENTS; + this.events |= UcpConstants.UCP_WAKEUP_TAG_RECV; + return this; + } + + /** + * This event type will generate an event on completion of any + * outgoing operation (complete or partial, according to the + * underlying protocol) for any type of transfer (send, atomic, or RMA). + */ + public UcpWorkerParams requestWakeupTX() { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_EVENTS; + this.events |= UcpConstants.UCP_WAKEUP_TX; + return this; + } + + /** + * This event type will generate an event on completion of any receive + * operation (complete or partial, according to the underlying protocol). + */ + public UcpWorkerParams requestWakeupRX() { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_EVENTS; + this.events |= UcpConstants.UCP_WAKEUP_RX; + return this; + } + + /** + * Use edge-triggered wakeup. The event file descriptor will be signaled only + * for new events, rather than existing ones. + */ + public UcpWorkerParams requestWakeupEdge() { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_EVENTS; + this.events |= UcpConstants.UCP_WAKEUP_EDGE; + return this; + } + + /** + * User data associated with the current worker. + */ + public UcpWorkerParams setUserData(ByteBuffer userData) { + if (!userData.isDirect()) { + throw new UcxException("User data must be of type DirectByteBuffer."); + } + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_USER_DATA; + this.userData = userData; + return this; + } + + /** + * External event file descriptor. + * + *

Events on the worker will be reported on the provided event file descriptor. + * The provided file descriptor must be capable of aggregating notifications + * for arbitrary events, for example epoll(7) on Linux systems. + * + *

{@code userData} will be used as the event user-data on systems which + * support it. For example, on Linux, it will be placed in + * epoll_data_t::ptr, when returned from epoll_wait(2).

+ * + *

Otherwise, events would be reported to the event file descriptor returned + * from ucp_worker_get_efd().

+ */ + public UcpWorkerParams setEventFD(int eventFD) { + this.fieldMask |= UcpConstants.UCP_WORKER_PARAM_FIELD_EVENT_FD; + this.eventFD = eventFD; + return this; + } +} diff --git a/bindings/java/src/main/java/org/openucx/jucx/ucs/UcsConstants.java b/bindings/java/src/main/java/org/openucx/jucx/ucs/UcsConstants.java new file mode 100644 index 0000000..b22f0b1 --- /dev/null +++ b/bindings/java/src/main/java/org/openucx/jucx/ucs/UcsConstants.java @@ -0,0 +1,31 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx.ucs; + +import org.openucx.jucx.NativeLibs; + +public class UcsConstants { + static { + load(); + } + + public static class ThreadMode { + static { + load(); + } + /** + * Multiple threads can access concurrently + */ + public static int UCS_THREAD_MODE_MULTI; + } + + private static void load() { + NativeLibs.load(); + loadConstants(); + } + + private static native void loadConstants(); +} diff --git a/bindings/java/src/main/native/Makefile.am b/bindings/java/src/main/native/Makefile.am new file mode 100644 index 0000000..32ec308 --- /dev/null +++ b/bindings/java/src/main/native/Makefile.am @@ -0,0 +1,109 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_JAVA + +jardir = $(libdir) +topdir = $(abs_top_builddir) +java_build_dir = $(builddir)/build-java +jarfile = $(java_build_dir)/jucx-@VERSION@.jar +javadir = $(top_srcdir)/bindings/java + +MVNCMD = $(MVN) -B -T 1C -f \ + $(topdir)/bindings/java/pom.xml \ + -Dmaven.repo.local=$(java_build_dir)/.deps \ + -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn + +JUCX_GENERATED_H_FILES = org_openucx_jucx_ucp_UcpConstants.h \ + org_openucx_jucx_ucp_UcpContext.h \ + org_openucx_jucx_ucp_UcpEndpoint.h \ + org_openucx_jucx_ucp_UcpListener.h \ + org_openucx_jucx_ucp_UcpMemory.h \ + org_openucx_jucx_ucp_UcpRequest.h \ + org_openucx_jucx_ucp_UcpRemoteKey.h \ + org_openucx_jucx_ucp_UcpWorker.h \ + org_openucx_jucx_ucs_UcsConstants_ThreadMode.h \ + org_openucx_jucx_ucs_UcsConstants.h + +BUILT_SOURCES = $(JUCX_GENERATED_H_FILES) + +STAMP_FILE = native_headers.stamp + +MOSTLYCLEANFILES = $(JUCX_GENERATED_H_FILES) $(STAMP_FILE) + +# +# Create a timestamp file to avoid regenerating header files every time +# See https://www.gnu.org/software/automake/manual/html_node/Multiple-Outputs.html +# +$(STAMP_FILE): \ + $(javadir)/src/main/java/org/openucx/jucx/ucs/*.java \ + $(javadir)/src/main/java/org/openucx/jucx/ucp/*.java + $(MVNCMD) compile + touch $(STAMP_FILE) + +$(JUCX_GENERATED_H_FILES): $(STAMP_FILE) + +lib_LTLIBRARIES = libjucx.la + +libjucx_la_CPPFLAGS = -I$(JDK)/include -I$(JDK)/include/linux \ + -I$(topdir)/src -I$(top_srcdir)/src + +noinst_HEADERS = jucx_common_def.h + +libjucx_la_SOURCES = context.cc \ + endpoint.cc \ + jucx_common_def.cc \ + listener.cc \ + memory.cc \ + request.cc \ + ucp_constants.cc \ + ucs_constants.cc \ + worker.cc + +libjucx_la_CXXFLAGS = -fPIC -DPIC -Werror + +libjucx_la_LIBADD = $(topdir)/src/ucs/libucs.la \ + $(topdir)/src/uct/libuct.la \ + $(topdir)/src/ucm/libucm.la \ + $(topdir)/src/ucp/libucp.la + +libjucx_la_DEPENDENCIES = Makefile.am Makefile.in Makefile + +# Compile Java source code and pack to jar +$(jarfile): + $(MVNCMD) package -DskipTests + +package : $(jarfile) + +.PHONY: package + +# Maven install phase +jar_DATA = $(jarfile) + +# Remove all compiled Java files +clean-local: + -rm -rf $(java_build_dir) + +set-version: + $(MVNCMD) versions:set -DnewVersion=${JUCX_VERSION} + +# Publish JUCX jar to maven central +publish-snapshot: + @make set-version JUCX_VERSION=@VERSION@-SNAPSHOT + @make publish + +publish-release: + @make set-version JUCX_VERSION=${JUCX_VERSION} + @make publish + +publish: + $(MVNCMD) deploy -DskipTests ${ARGS} + +test: + $(MVNCMD) test -DargLine="-XX:OnError='cat hs_err_pid%p.log'" +docs: + $(MVNCMD) javadoc:javadoc + +endif diff --git a/bindings/java/src/main/native/Makefile.in b/bindings/java/src/main/native/Makefile.in new file mode 100644 index 0000000..bc59b28 --- /dev/null +++ b/bindings/java/src/main/native/Makefile.in @@ -0,0 +1,1030 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = bindings/java/src/main/native +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(jardir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +am__libjucx_la_SOURCES_DIST = context.cc endpoint.cc \ + jucx_common_def.cc listener.cc memory.cc request.cc \ + ucp_constants.cc ucs_constants.cc worker.cc +@HAVE_JAVA_TRUE@am_libjucx_la_OBJECTS = libjucx_la-context.lo \ +@HAVE_JAVA_TRUE@ libjucx_la-endpoint.lo \ +@HAVE_JAVA_TRUE@ libjucx_la-jucx_common_def.lo \ +@HAVE_JAVA_TRUE@ libjucx_la-listener.lo libjucx_la-memory.lo \ +@HAVE_JAVA_TRUE@ libjucx_la-request.lo \ +@HAVE_JAVA_TRUE@ libjucx_la-ucp_constants.lo \ +@HAVE_JAVA_TRUE@ libjucx_la-ucs_constants.lo \ +@HAVE_JAVA_TRUE@ libjucx_la-worker.lo +libjucx_la_OBJECTS = $(am_libjucx_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libjucx_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(libjucx_la_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +@HAVE_JAVA_TRUE@am_libjucx_la_rpath = -rpath $(libdir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libjucx_la-context.Plo \ + ./$(DEPDIR)/libjucx_la-endpoint.Plo \ + ./$(DEPDIR)/libjucx_la-jucx_common_def.Plo \ + ./$(DEPDIR)/libjucx_la-listener.Plo \ + ./$(DEPDIR)/libjucx_la-memory.Plo \ + ./$(DEPDIR)/libjucx_la-request.Plo \ + ./$(DEPDIR)/libjucx_la-ucp_constants.Plo \ + ./$(DEPDIR)/libjucx_la-ucs_constants.Plo \ + ./$(DEPDIR)/libjucx_la-worker.Plo +am__mv = mv -f +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(libjucx_la_SOURCES) +DIST_SOURCES = $(am__libjucx_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +DATA = $(jar_DATA) +am__noinst_HEADERS_DIST = jucx_common_def.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_JAVA_TRUE@jardir = $(libdir) +@HAVE_JAVA_TRUE@topdir = $(abs_top_builddir) +@HAVE_JAVA_TRUE@java_build_dir = $(builddir)/build-java +@HAVE_JAVA_TRUE@jarfile = $(java_build_dir)/jucx-@VERSION@.jar +@HAVE_JAVA_TRUE@javadir = $(top_srcdir)/bindings/java +@HAVE_JAVA_TRUE@MVNCMD = $(MVN) -B -T 1C -f \ +@HAVE_JAVA_TRUE@ $(topdir)/bindings/java/pom.xml \ +@HAVE_JAVA_TRUE@ -Dmaven.repo.local=$(java_build_dir)/.deps \ +@HAVE_JAVA_TRUE@ -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn + +@HAVE_JAVA_TRUE@JUCX_GENERATED_H_FILES = org_openucx_jucx_ucp_UcpConstants.h \ +@HAVE_JAVA_TRUE@ org_openucx_jucx_ucp_UcpContext.h \ +@HAVE_JAVA_TRUE@ org_openucx_jucx_ucp_UcpEndpoint.h \ +@HAVE_JAVA_TRUE@ org_openucx_jucx_ucp_UcpListener.h \ +@HAVE_JAVA_TRUE@ org_openucx_jucx_ucp_UcpMemory.h \ +@HAVE_JAVA_TRUE@ org_openucx_jucx_ucp_UcpRequest.h \ +@HAVE_JAVA_TRUE@ org_openucx_jucx_ucp_UcpRemoteKey.h \ +@HAVE_JAVA_TRUE@ org_openucx_jucx_ucp_UcpWorker.h \ +@HAVE_JAVA_TRUE@ org_openucx_jucx_ucs_UcsConstants_ThreadMode.h \ +@HAVE_JAVA_TRUE@ org_openucx_jucx_ucs_UcsConstants.h + +@HAVE_JAVA_TRUE@BUILT_SOURCES = $(JUCX_GENERATED_H_FILES) +@HAVE_JAVA_TRUE@STAMP_FILE = native_headers.stamp +@HAVE_JAVA_TRUE@MOSTLYCLEANFILES = $(JUCX_GENERATED_H_FILES) $(STAMP_FILE) +@HAVE_JAVA_TRUE@lib_LTLIBRARIES = libjucx.la +@HAVE_JAVA_TRUE@libjucx_la_CPPFLAGS = -I$(JDK)/include -I$(JDK)/include/linux \ +@HAVE_JAVA_TRUE@ -I$(topdir)/src -I$(top_srcdir)/src + +@HAVE_JAVA_TRUE@noinst_HEADERS = jucx_common_def.h +@HAVE_JAVA_TRUE@libjucx_la_SOURCES = context.cc \ +@HAVE_JAVA_TRUE@ endpoint.cc \ +@HAVE_JAVA_TRUE@ jucx_common_def.cc \ +@HAVE_JAVA_TRUE@ listener.cc \ +@HAVE_JAVA_TRUE@ memory.cc \ +@HAVE_JAVA_TRUE@ request.cc \ +@HAVE_JAVA_TRUE@ ucp_constants.cc \ +@HAVE_JAVA_TRUE@ ucs_constants.cc \ +@HAVE_JAVA_TRUE@ worker.cc + +@HAVE_JAVA_TRUE@libjucx_la_CXXFLAGS = -fPIC -DPIC -Werror +@HAVE_JAVA_TRUE@libjucx_la_LIBADD = $(topdir)/src/ucs/libucs.la \ +@HAVE_JAVA_TRUE@ $(topdir)/src/uct/libuct.la \ +@HAVE_JAVA_TRUE@ $(topdir)/src/ucm/libucm.la \ +@HAVE_JAVA_TRUE@ $(topdir)/src/ucp/libucp.la + +@HAVE_JAVA_TRUE@libjucx_la_DEPENDENCIES = Makefile.am Makefile.in Makefile + +# Maven install phase +@HAVE_JAVA_TRUE@jar_DATA = $(jarfile) +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .cc .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign bindings/java/src/main/native/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign bindings/java/src/main/native/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libjucx.la: $(libjucx_la_OBJECTS) $(libjucx_la_DEPENDENCIES) $(EXTRA_libjucx_la_DEPENDENCIES) + $(AM_V_CXXLD)$(libjucx_la_LINK) $(am_libjucx_la_rpath) $(libjucx_la_OBJECTS) $(libjucx_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libjucx_la-context.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libjucx_la-endpoint.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libjucx_la-jucx_common_def.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libjucx_la-listener.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libjucx_la-memory.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libjucx_la-request.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libjucx_la-ucp_constants.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libjucx_la-ucs_constants.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libjucx_la-worker.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.cc.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +libjucx_la-context.lo: context.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -MT libjucx_la-context.lo -MD -MP -MF $(DEPDIR)/libjucx_la-context.Tpo -c -o libjucx_la-context.lo `test -f 'context.cc' || echo '$(srcdir)/'`context.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libjucx_la-context.Tpo $(DEPDIR)/libjucx_la-context.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='context.cc' object='libjucx_la-context.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -c -o libjucx_la-context.lo `test -f 'context.cc' || echo '$(srcdir)/'`context.cc + +libjucx_la-endpoint.lo: endpoint.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -MT libjucx_la-endpoint.lo -MD -MP -MF $(DEPDIR)/libjucx_la-endpoint.Tpo -c -o libjucx_la-endpoint.lo `test -f 'endpoint.cc' || echo '$(srcdir)/'`endpoint.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libjucx_la-endpoint.Tpo $(DEPDIR)/libjucx_la-endpoint.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='endpoint.cc' object='libjucx_la-endpoint.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -c -o libjucx_la-endpoint.lo `test -f 'endpoint.cc' || echo '$(srcdir)/'`endpoint.cc + +libjucx_la-jucx_common_def.lo: jucx_common_def.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -MT libjucx_la-jucx_common_def.lo -MD -MP -MF $(DEPDIR)/libjucx_la-jucx_common_def.Tpo -c -o libjucx_la-jucx_common_def.lo `test -f 'jucx_common_def.cc' || echo '$(srcdir)/'`jucx_common_def.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libjucx_la-jucx_common_def.Tpo $(DEPDIR)/libjucx_la-jucx_common_def.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='jucx_common_def.cc' object='libjucx_la-jucx_common_def.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -c -o libjucx_la-jucx_common_def.lo `test -f 'jucx_common_def.cc' || echo '$(srcdir)/'`jucx_common_def.cc + +libjucx_la-listener.lo: listener.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -MT libjucx_la-listener.lo -MD -MP -MF $(DEPDIR)/libjucx_la-listener.Tpo -c -o libjucx_la-listener.lo `test -f 'listener.cc' || echo '$(srcdir)/'`listener.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libjucx_la-listener.Tpo $(DEPDIR)/libjucx_la-listener.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='listener.cc' object='libjucx_la-listener.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -c -o libjucx_la-listener.lo `test -f 'listener.cc' || echo '$(srcdir)/'`listener.cc + +libjucx_la-memory.lo: memory.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -MT libjucx_la-memory.lo -MD -MP -MF $(DEPDIR)/libjucx_la-memory.Tpo -c -o libjucx_la-memory.lo `test -f 'memory.cc' || echo '$(srcdir)/'`memory.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libjucx_la-memory.Tpo $(DEPDIR)/libjucx_la-memory.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='memory.cc' object='libjucx_la-memory.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -c -o libjucx_la-memory.lo `test -f 'memory.cc' || echo '$(srcdir)/'`memory.cc + +libjucx_la-request.lo: request.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -MT libjucx_la-request.lo -MD -MP -MF $(DEPDIR)/libjucx_la-request.Tpo -c -o libjucx_la-request.lo `test -f 'request.cc' || echo '$(srcdir)/'`request.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libjucx_la-request.Tpo $(DEPDIR)/libjucx_la-request.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='request.cc' object='libjucx_la-request.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -c -o libjucx_la-request.lo `test -f 'request.cc' || echo '$(srcdir)/'`request.cc + +libjucx_la-ucp_constants.lo: ucp_constants.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -MT libjucx_la-ucp_constants.lo -MD -MP -MF $(DEPDIR)/libjucx_la-ucp_constants.Tpo -c -o libjucx_la-ucp_constants.lo `test -f 'ucp_constants.cc' || echo '$(srcdir)/'`ucp_constants.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libjucx_la-ucp_constants.Tpo $(DEPDIR)/libjucx_la-ucp_constants.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp_constants.cc' object='libjucx_la-ucp_constants.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -c -o libjucx_la-ucp_constants.lo `test -f 'ucp_constants.cc' || echo '$(srcdir)/'`ucp_constants.cc + +libjucx_la-ucs_constants.lo: ucs_constants.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -MT libjucx_la-ucs_constants.lo -MD -MP -MF $(DEPDIR)/libjucx_la-ucs_constants.Tpo -c -o libjucx_la-ucs_constants.lo `test -f 'ucs_constants.cc' || echo '$(srcdir)/'`ucs_constants.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libjucx_la-ucs_constants.Tpo $(DEPDIR)/libjucx_la-ucs_constants.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs_constants.cc' object='libjucx_la-ucs_constants.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -c -o libjucx_la-ucs_constants.lo `test -f 'ucs_constants.cc' || echo '$(srcdir)/'`ucs_constants.cc + +libjucx_la-worker.lo: worker.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -MT libjucx_la-worker.lo -MD -MP -MF $(DEPDIR)/libjucx_la-worker.Tpo -c -o libjucx_la-worker.lo `test -f 'worker.cc' || echo '$(srcdir)/'`worker.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libjucx_la-worker.Tpo $(DEPDIR)/libjucx_la-worker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='worker.cc' object='libjucx_la-worker.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libjucx_la_CPPFLAGS) $(CPPFLAGS) $(libjucx_la_CXXFLAGS) $(CXXFLAGS) -c -o libjucx_la-worker.lo `test -f 'worker.cc' || echo '$(srcdir)/'`worker.cc + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-jarDATA: $(jar_DATA) + @$(NORMAL_INSTALL) + @list='$(jar_DATA)'; test -n "$(jardir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(jardir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(jardir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(jardir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(jardir)" || exit $$?; \ + done + +uninstall-jarDATA: + @$(NORMAL_UNINSTALL) + @list='$(jar_DATA)'; test -n "$(jardir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(jardir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(jardir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES) + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +@HAVE_JAVA_FALSE@clean-local: +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool clean-local \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libjucx_la-context.Plo + -rm -f ./$(DEPDIR)/libjucx_la-endpoint.Plo + -rm -f ./$(DEPDIR)/libjucx_la-jucx_common_def.Plo + -rm -f ./$(DEPDIR)/libjucx_la-listener.Plo + -rm -f ./$(DEPDIR)/libjucx_la-memory.Plo + -rm -f ./$(DEPDIR)/libjucx_la-request.Plo + -rm -f ./$(DEPDIR)/libjucx_la-ucp_constants.Plo + -rm -f ./$(DEPDIR)/libjucx_la-ucs_constants.Plo + -rm -f ./$(DEPDIR)/libjucx_la-worker.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-jarDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libjucx_la-context.Plo + -rm -f ./$(DEPDIR)/libjucx_la-endpoint.Plo + -rm -f ./$(DEPDIR)/libjucx_la-jucx_common_def.Plo + -rm -f ./$(DEPDIR)/libjucx_la-listener.Plo + -rm -f ./$(DEPDIR)/libjucx_la-memory.Plo + -rm -f ./$(DEPDIR)/libjucx_la-request.Plo + -rm -f ./$(DEPDIR)/libjucx_la-ucp_constants.Plo + -rm -f ./$(DEPDIR)/libjucx_la-ucs_constants.Plo + -rm -f ./$(DEPDIR)/libjucx_la-worker.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-jarDATA uninstall-libLTLIBRARIES + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libLTLIBRARIES clean-libtool clean-local \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-jarDATA \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-jarDATA \ + uninstall-libLTLIBRARIES + +.PRECIOUS: Makefile + + +# +# Create a timestamp file to avoid regenerating header files every time +# See https://www.gnu.org/software/automake/manual/html_node/Multiple-Outputs.html +# +@HAVE_JAVA_TRUE@$(STAMP_FILE): \ +@HAVE_JAVA_TRUE@ $(javadir)/src/main/java/org/openucx/jucx/ucs/*.java \ +@HAVE_JAVA_TRUE@ $(javadir)/src/main/java/org/openucx/jucx/ucp/*.java +@HAVE_JAVA_TRUE@ $(MVNCMD) compile +@HAVE_JAVA_TRUE@ touch $(STAMP_FILE) + +@HAVE_JAVA_TRUE@$(JUCX_GENERATED_H_FILES): $(STAMP_FILE) + +# Compile Java source code and pack to jar +@HAVE_JAVA_TRUE@$(jarfile): +@HAVE_JAVA_TRUE@ $(MVNCMD) package -DskipTests + +@HAVE_JAVA_TRUE@package : $(jarfile) + +@HAVE_JAVA_TRUE@.PHONY: package + +# Remove all compiled Java files +@HAVE_JAVA_TRUE@clean-local: +@HAVE_JAVA_TRUE@ -rm -rf $(java_build_dir) + +@HAVE_JAVA_TRUE@set-version: +@HAVE_JAVA_TRUE@ $(MVNCMD) versions:set -DnewVersion=${JUCX_VERSION} + +# Publish JUCX jar to maven central +@HAVE_JAVA_TRUE@publish-snapshot: +@HAVE_JAVA_TRUE@ @make set-version JUCX_VERSION=@VERSION@-SNAPSHOT +@HAVE_JAVA_TRUE@ @make publish + +@HAVE_JAVA_TRUE@publish-release: +@HAVE_JAVA_TRUE@ @make set-version JUCX_VERSION=${JUCX_VERSION} +@HAVE_JAVA_TRUE@ @make publish + +@HAVE_JAVA_TRUE@publish: +@HAVE_JAVA_TRUE@ $(MVNCMD) deploy -DskipTests ${ARGS} + +@HAVE_JAVA_TRUE@test: +@HAVE_JAVA_TRUE@ $(MVNCMD) test -DargLine="-XX:OnError='cat hs_err_pid%p.log'" +@HAVE_JAVA_TRUE@docs: +@HAVE_JAVA_TRUE@ $(MVNCMD) javadoc:javadoc + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/bindings/java/src/main/native/context.cc b/bindings/java/src/main/native/context.cc new file mode 100644 index 0000000..91b79cb --- /dev/null +++ b/bindings/java/src/main/native/context.cc @@ -0,0 +1,125 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "jucx_common_def.h" +#include "org_openucx_jucx_ucp_UcpContext.h" +extern "C" { +#include +} + +/** + * Bridge method for creating ucp_context from java + */ +JNIEXPORT jlong JNICALL +Java_org_openucx_jucx_ucp_UcpContext_createContextNative(JNIEnv *env, jclass cls, + jobject jucx_ctx_params) +{ + ucp_params_t ucp_params = { 0 }; + ucp_context_h ucp_context; + jfieldID field; + + jclass jucx_param_class = env->GetObjectClass(jucx_ctx_params); + field = env->GetFieldID(jucx_param_class, "fieldMask", "J"); + ucp_params.field_mask = env->GetLongField(jucx_ctx_params, field); + + if (ucp_params.field_mask & UCP_PARAM_FIELD_FEATURES) { + field = env->GetFieldID(jucx_param_class, "features", "J"); + ucp_params.features = env->GetLongField(jucx_ctx_params, field); + } + + if (ucp_params.field_mask & UCP_PARAM_FIELD_MT_WORKERS_SHARED) { + field = env->GetFieldID(jucx_param_class, "mtWorkersShared", "Z"); + ucp_params.mt_workers_shared = env->GetBooleanField(jucx_ctx_params, + field); + } + + if (ucp_params.field_mask & UCP_PARAM_FIELD_ESTIMATED_NUM_EPS) { + field = env->GetFieldID(jucx_param_class, "estimatedNumEps", "J"); + ucp_params.estimated_num_eps = env->GetLongField(jucx_ctx_params, + field); + } + + if (ucp_params.field_mask & UCP_PARAM_FIELD_TAG_SENDER_MASK) { + field = env->GetFieldID(jucx_param_class, "tagSenderMask", "J"); + ucp_params.estimated_num_eps = env->GetLongField(jucx_ctx_params, + field); + } + + ucp_params.field_mask |= UCP_PARAM_FIELD_REQUEST_INIT | + UCP_PARAM_FIELD_REQUEST_SIZE; + ucp_params.request_size = sizeof(struct jucx_context); + ucp_params.request_init = jucx_request_init; + + ucs_status_t status = ucp_init(&ucp_params, NULL, &ucp_context); + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } + return (native_ptr)ucp_context; +} + + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpContext_cleanupContextNative(JNIEnv *env, jclass cls, + jlong ucp_context_ptr) +{ + ucp_cleanup((ucp_context_h)ucp_context_ptr); +} + + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpContext_memoryMapNative(JNIEnv *env, jobject ctx, + jlong ucp_context_ptr, + jobject jucx_mmap_params) +{ + ucp_mem_map_params_t params = {0}; + ucp_mem_h memh; + jfieldID field; + + jclass jucx_mmap_class = env->GetObjectClass(jucx_mmap_params); + field = env->GetFieldID(jucx_mmap_class, "fieldMask", "J"); + params.field_mask = env->GetLongField(jucx_mmap_params, field); + + if (params.field_mask & UCP_MEM_MAP_PARAM_FIELD_ADDRESS) { + field = env->GetFieldID(jucx_mmap_class, "address", "J"); + params.address = (void *)env->GetLongField(jucx_mmap_params, field);; + } + + if (params.field_mask & UCP_MEM_MAP_PARAM_FIELD_LENGTH) { + field = env->GetFieldID(jucx_mmap_class, "length", "J"); + params.length = env->GetLongField(jucx_mmap_params, field);; + } + + if (params.field_mask & UCP_MEM_MAP_PARAM_FIELD_FLAGS) { + field = env->GetFieldID(jucx_mmap_class, "flags", "J"); + params.flags = env->GetLongField(jucx_mmap_params, field);; + } + + ucs_status_t status = ucp_mem_map((ucp_context_h)ucp_context_ptr, ¶ms, &memh); + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } + + // Construct UcpMemory class + jclass jucx_mem_cls = env->FindClass("org/openucx/jucx/ucp/UcpMemory"); + jmethodID constructor = env->GetMethodID(jucx_mem_cls, "", "(J)V"); + jobject jucx_mem = env->NewObject(jucx_mem_cls, constructor, (native_ptr)memh); + + // Set UcpContext pointer + field = env->GetFieldID(jucx_mem_cls, "context", "Lorg/openucx/jucx/ucp/UcpContext;"); + env->SetObjectField(jucx_mem, field, ctx); + + // Set address + field = env->GetFieldID(jucx_mem_cls, "address", "J"); + env->SetLongField(jucx_mem, field, (native_ptr)memh->address); + + // Set length + field = env->GetFieldID(jucx_mem_cls, "length", "J"); + env->SetLongField(jucx_mem, field, memh->length); + + /* Coverity thinks that memh is a leaked object here, + * but it's stored in a UcpMemory object */ + /* coverity[leaked_storage] */ + return jucx_mem; +} diff --git a/bindings/java/src/main/native/endpoint.cc b/bindings/java/src/main/native/endpoint.cc new file mode 100644 index 0000000..0398752 --- /dev/null +++ b/bindings/java/src/main/native/endpoint.cc @@ -0,0 +1,237 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "jucx_common_def.h" +#include "org_openucx_jucx_ucp_UcpEndpoint.h" + +#include /* memset */ + +#include /* ucp_ep_peer_name */ + + +static void error_handler(void *arg, ucp_ep_h ep, ucs_status_t status) +{ + JNIEnv* env = get_jni_env(); + JNU_ThrowExceptionByStatus(env, status); + ucs_error("JUCX: endpoint error handler: %s", ucs_status_string(status)); +} + +JNIEXPORT jlong JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_createEndpointNative(JNIEnv *env, jclass cls, + jobject ucp_ep_params, + jlong worker_ptr) +{ + ucp_ep_params_t ep_params; + jfieldID field; + ucp_worker_h ucp_worker = (ucp_worker_h)worker_ptr; + ucp_ep_h endpoint; + + // Get field mask + jclass ucp_ep_params_class = env->GetObjectClass(ucp_ep_params); + field = env->GetFieldID(ucp_ep_params_class, "fieldMask", "J"); + ep_params.field_mask = env->GetLongField(ucp_ep_params, field); + + if (ep_params.field_mask & UCP_EP_PARAM_FIELD_REMOTE_ADDRESS) { + field = env->GetFieldID(ucp_ep_params_class, "ucpAddress", "Ljava/nio/ByteBuffer;"); + jobject buf = env->GetObjectField(ucp_ep_params, field); + ep_params.address = static_cast(env->GetDirectBufferAddress(buf)); + } + + if (ep_params.field_mask & UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE) { + field = env->GetFieldID(ucp_ep_params_class, "errorHandlingMode", "I"); + ep_params.err_mode = static_cast(env->GetIntField(ucp_ep_params, field)); + } + + if (ep_params.field_mask & UCP_EP_PARAM_FIELD_USER_DATA) { + field = env->GetFieldID(ucp_ep_params_class, "userData", "Ljava/nio/ByteBuffer;"); + jobject user_data = env->GetObjectField(ucp_ep_params, field); + ep_params.user_data = env->GetDirectBufferAddress(user_data); + } + + if (ep_params.field_mask & UCP_EP_PARAM_FIELD_FLAGS) { + field = env->GetFieldID(ucp_ep_params_class, "flags", "J"); + ep_params.flags = env->GetLongField(ucp_ep_params, field); + } + + if (ep_params.field_mask & UCP_EP_PARAM_FIELD_SOCK_ADDR) { + struct sockaddr_storage worker_addr; + socklen_t addrlen; + memset(&worker_addr, 0, sizeof(struct sockaddr_storage)); + + field = env->GetFieldID(ucp_ep_params_class, + "socketAddress", "Ljava/net/InetSocketAddress;"); + jobject sock_addr = env->GetObjectField(ucp_ep_params, field); + + if (j2cInetSockAddr(env, sock_addr, worker_addr, addrlen)) { + ep_params.sockaddr.addr = (const struct sockaddr*)&worker_addr; + ep_params.sockaddr.addrlen = addrlen; + } + } + + if (ep_params.field_mask & UCP_EP_PARAM_FIELD_CONN_REQUEST) { + field = env->GetFieldID(ucp_ep_params_class, "connectionRequest", "J"); + ep_params.conn_request = reinterpret_cast(env->GetLongField(ucp_ep_params, field)); + } + + ep_params.field_mask |= UCP_EP_PARAM_FIELD_ERR_HANDLER; + ep_params.err_handler.cb = error_handler; + + ucs_status_t status = ucp_ep_create(ucp_worker, &ep_params, &endpoint); + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } + + return (native_ptr)endpoint; +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_destroyEndpointNative(JNIEnv *env, jclass cls, + jlong ep_ptr) +{ + ucp_ep_destroy((ucp_ep_h)ep_ptr); +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_closeNonBlockingNative(JNIEnv *env, jclass cls, + jlong ep_ptr, jint mode) +{ + ucs_status_ptr_t request = ucp_ep_close_nb((ucp_ep_h)ep_ptr, mode); + + return process_request(request, NULL); +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_unpackRemoteKey(JNIEnv *env, jclass cls, + jlong ep_ptr, jlong addr) +{ + ucp_rkey_h rkey; + + ucs_status_t status = ucp_ep_rkey_unpack((ucp_ep_h)ep_ptr, (void *)addr, &rkey); + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } + + jobject result = new_rkey_instance(env, rkey); + + /* Coverity thinks that rkey is a leaked object here, + * but it's stored in a UcpRemoteKey object */ + /* coverity[leaked_storage] */ + return result; +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_putNonBlockingNative(JNIEnv *env, jclass cls, + jlong ep_ptr, jlong laddr, + jlong size, jlong raddr, + jlong rkey_ptr, jobject callback) +{ + ucs_status_ptr_t request = ucp_put_nb((ucp_ep_h)ep_ptr, (void *)laddr, size, raddr, + (ucp_rkey_h)rkey_ptr, jucx_request_callback); + + ucs_trace_req("JUCX: put_nb request %p to %s, of size: %zu, raddr: %zu", + request, ucp_ep_peer_name((ucp_ep_h)ep_ptr), size, raddr); + return process_request(request, callback); +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_putNonBlockingImplicitNative(JNIEnv *env, jclass cls, + jlong ep_ptr, jlong laddr, + jlong size, jlong raddr, + jlong rkey_ptr) +{ + ucs_status_t status = ucp_put_nbi((ucp_ep_h)ep_ptr, (void *)laddr, size, raddr, + (ucp_rkey_h)rkey_ptr); + + if (UCS_STATUS_IS_ERR(status)) { + JNU_ThrowExceptionByStatus(env, status); + } +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_getNonBlockingNative(JNIEnv *env, jclass cls, + jlong ep_ptr, jlong raddr, + jlong rkey_ptr, jlong laddr, + jlong size, jobject callback) +{ + ucs_status_ptr_t request = ucp_get_nb((ucp_ep_h)ep_ptr, (void *)laddr, size, + raddr, (ucp_rkey_h)rkey_ptr, jucx_request_callback); + + ucs_trace_req("JUCX: get_nb request %p to %s, raddr: %zu, size: %zu, result address: %zu", + request, ucp_ep_peer_name((ucp_ep_h)ep_ptr), raddr, size, laddr); + return process_request(request, callback); +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_getNonBlockingImplicitNative(JNIEnv *env, jclass cls, + jlong ep_ptr, jlong raddr, + jlong rkey_ptr, jlong laddr, + jlong size) +{ + ucs_status_t status = ucp_get_nbi((ucp_ep_h)ep_ptr, (void *)laddr, size, raddr, + (ucp_rkey_h)rkey_ptr); + + if (UCS_STATUS_IS_ERR(status)) { + JNU_ThrowExceptionByStatus(env, status); + } +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_sendTaggedNonBlockingNative(JNIEnv *env, jclass cls, + jlong ep_ptr, jlong addr, + jlong size, jlong tag, + jobject callback) +{ + ucs_status_ptr_t request = ucp_tag_send_nb((ucp_ep_h)ep_ptr, (void *)addr, size, + ucp_dt_make_contig(1), tag, jucx_request_callback); + + ucs_trace_req("JUCX: send_tag_nb request %p to %s, size: %zu, tag: %ld", + request, ucp_ep_peer_name((ucp_ep_h)ep_ptr), size, tag); + return process_request(request, callback); +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_sendStreamNonBlockingNative(JNIEnv *env, jclass cls, + jlong ep_ptr, jlong addr, + jlong size, jobject callback) +{ + ucs_status_ptr_t request = ucp_stream_send_nb((ucp_ep_h)ep_ptr, (void *)addr, size, + ucp_dt_make_contig(1), jucx_request_callback, 0); + + ucs_trace_req("JUCX: send_stream_nb request %p to %s, size: %zu", + request, ucp_ep_peer_name((ucp_ep_h)ep_ptr), size); + return process_request(request, callback); +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_recvStreamNonBlockingNative(JNIEnv *env, jclass cls, + jlong ep_ptr, jlong addr, + jlong size, jlong flags, + jobject callback) +{ + size_t rlength; + ucs_status_ptr_t request = ucp_stream_recv_nb((ucp_ep_h)ep_ptr, (void *)addr, size, + ucp_dt_make_contig(1), stream_recv_callback, + &rlength, flags); + + ucs_trace_req("JUCX: recv_stream_nb request %p to %s, size: %zu", + request, ucp_ep_peer_name((ucp_ep_h)ep_ptr), size); + + if (request == NULL) { + // If request completed immidiately. + return process_completed_stream_recv(rlength, callback); + } + + return process_request(request, callback); +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpEndpoint_flushNonBlockingNative(JNIEnv *env, jclass cls, + jlong ep_ptr, + jobject callback) +{ + ucs_status_ptr_t request = ucp_ep_flush_nb((ucp_ep_h)ep_ptr, 0, jucx_request_callback); + + return process_request(request, callback); +} diff --git a/bindings/java/src/main/native/jucx_common_def.cc b/bindings/java/src/main/native/jucx_common_def.cc new file mode 100644 index 0000000..bb32564 --- /dev/null +++ b/bindings/java/src/main/native/jucx_common_def.cc @@ -0,0 +1,322 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "jucx_common_def.h" +extern "C" { + #include + #include + #include +} + +#include /* memset */ +#include /* inet_addr */ +#include /* pthread_yield */ + + +static JavaVM *jvm_global; +static jclass jucx_request_cls; +static jfieldID native_id_field; +static jfieldID recv_size_field; +static jmethodID on_success; +static jmethodID jucx_request_constructor; +static jclass ucp_rkey_cls; +static jmethodID ucp_rkey_cls_constructor; + +extern "C" JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *jvm, void* reserved) { + ucs_debug_disable_signals(); + jvm_global = jvm; + JNIEnv* env; + if (jvm->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_1) != JNI_OK) { + return JNI_ERR; + } + + jclass jucx_request_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpRequest"); + jucx_request_cls = (jclass) env->NewGlobalRef(jucx_request_cls_local); + jclass jucx_callback_cls = env->FindClass("org/openucx/jucx/UcxCallback"); + native_id_field = env->GetFieldID(jucx_request_cls, "nativeId", "Ljava/lang/Long;"); + recv_size_field = env->GetFieldID(jucx_request_cls, "recvSize", "J"); + on_success = env->GetMethodID(jucx_callback_cls, "onSuccess", + "(Lorg/openucx/jucx/ucp/UcpRequest;)V"); + jucx_request_constructor = env->GetMethodID(jucx_request_cls, "", "(J)V"); + + jclass ucp_rkey_cls_local = env->FindClass("org/openucx/jucx/ucp/UcpRemoteKey"); + ucp_rkey_cls = (jclass) env->NewGlobalRef(ucp_rkey_cls_local); + ucp_rkey_cls_constructor = env->GetMethodID(ucp_rkey_cls, "", "(J)V"); + return JNI_VERSION_1_1; +} + +extern "C" JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *jvm, void *reserved) { + JNIEnv* env; + if (jvm->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_1) != JNI_OK) { + return; + } + + if (jucx_request_cls != NULL) { + env->DeleteGlobalRef(jucx_request_cls); + } +} + +bool j2cInetSockAddr(JNIEnv *env, jobject sock_addr, sockaddr_storage& ss, socklen_t& sa_len) +{ + jfieldID field; + memset(&ss, 0, sizeof(ss)); + sa_len = 0; + + if (sock_addr == NULL) { + JNU_ThrowException(env, "j2cInetSockAddr: InetSocketAddr is null"); + return false; + } + + jclass inetsockaddr_cls = env->GetObjectClass(sock_addr); + + // Get sockAddr->port + jmethodID getPort = env->GetMethodID(inetsockaddr_cls, "getPort", "()I"); + jint port = env->CallIntMethod(sock_addr, getPort); + + // Get sockAddr->getAddress (InetAddress) + jmethodID getAddress = env->GetMethodID(inetsockaddr_cls, "getAddress", + "()Ljava/net/InetAddress;"); + jobject inet_address = env->CallObjectMethod(sock_addr, getAddress); + + if (inet_address == NULL) { + JNU_ThrowException(env, "j2cInetSockAddr: InetSocketAddr.getAddress is null"); + return false; + } + + jclass inetaddr_cls = env->GetObjectClass(inet_address); + + // Get address family. In Java IPv4 has addressFamily = 1, IPv6 = 2. + field = env->GetFieldID(inetaddr_cls, "holder", + "Ljava/net/InetAddress$InetAddressHolder;"); + jobject inet_addr_holder = env->GetObjectField(inet_address, field); + jclass inet_addr_holder_cls = env->GetObjectClass(inet_addr_holder); + field = env->GetFieldID(inet_addr_holder_cls, "family", "I"); + jint family = env->GetIntField(inet_addr_holder, field); + + field = env->GetStaticFieldID(inetaddr_cls, "IPv4", "I"); + const int JAVA_IPV4_FAMILY = env->GetStaticIntField(inetaddr_cls, field); + field = env->GetStaticFieldID(inetaddr_cls, "IPv6", "I"); + const int JAVA_IPV6_FAMILY = env->GetStaticIntField(inetaddr_cls, field); + + // Get the byte array that stores the IP address bytes in the InetAddress. + jmethodID get_addr_bytes = env->GetMethodID(inetaddr_cls, "getAddress", "()[B"); + jobject ip_byte_array = env->CallObjectMethod(inet_address, get_addr_bytes); + + if (ip_byte_array == NULL) { + JNU_ThrowException(env, "j2cInetSockAddr: InetAddr.getAddress.getAddress is null"); + return false; + } + + jbyteArray addressBytes = static_cast(ip_byte_array); + + if (family == JAVA_IPV4_FAMILY) { + // Deal with Inet4Address instances. + // We should represent this Inet4Address as an IPv4 sockaddr_in. + ss.ss_family = AF_INET; + sockaddr_in &sin = reinterpret_cast(ss); + sin.sin_port = htons(port); + jbyte *dst = reinterpret_cast(&sin.sin_addr.s_addr); + env->GetByteArrayRegion(addressBytes, 0, 4, dst); + sa_len = sizeof(sockaddr_in); + return true; + } else if (family == JAVA_IPV6_FAMILY) { + jclass inet6_addr_cls = env->FindClass("java/net/Inet6Address"); + ss.ss_family = AF_INET6; + sockaddr_in6& sin6 = reinterpret_cast(ss); + sin6.sin6_port = htons(port); + // IPv6 address. Copy the bytes... + jbyte *dst = reinterpret_cast(&sin6.sin6_addr.s6_addr); + env->GetByteArrayRegion(addressBytes, 0, 16, dst); + // ...and set the scope id... + jmethodID getScopeId = env->GetMethodID(inet6_addr_cls, "getScopeId", "()I"); + sin6.sin6_scope_id = env->CallIntMethod(inet_address, getScopeId); + sa_len = sizeof(sockaddr_in6); + return true; + } + JNU_ThrowException(env, "Unknown InetAddress family"); + return false; +} + +static inline void jucx_context_reset(struct jucx_context* ctx) +{ + ctx->callback = NULL; + ctx->jucx_request = NULL; + ctx->status = UCS_INPROGRESS; + ctx->length = 0; +} + +void jucx_request_init(void *request) +{ + struct jucx_context *ctx = (struct jucx_context *)request; + jucx_context_reset(ctx); + ucs_spinlock_init(&ctx->lock); +} + +JNIEnv* get_jni_env() +{ + void *env; + jint rs = jvm_global->AttachCurrentThread(&env, NULL); + ucs_assert_always(rs == JNI_OK); + return (JNIEnv*)env; +} + +static inline void set_jucx_request_completed(JNIEnv *env, jobject jucx_request, + struct jucx_context *ctx) +{ + env->SetObjectField(jucx_request, native_id_field, NULL); + if ((ctx != NULL) && (ctx->length > 0)) { + env->SetLongField(jucx_request, recv_size_field, ctx->length); + } +} + +static inline void call_on_success(jobject callback, jobject request) +{ + JNIEnv *env = get_jni_env(); + env->CallVoidMethod(callback, on_success, request); +} + +static inline void call_on_error(jobject callback, ucs_status_t status) +{ + if (status == UCS_ERR_CANCELED) { + ucs_debug("JUCX: Request canceled"); + } else { + ucs_error("JUCX: request error: %s", ucs_status_string(status)); + } + + JNIEnv *env = get_jni_env(); + jclass callback_cls = env->GetObjectClass(callback); + jmethodID on_error = env->GetMethodID(callback_cls, "onError", "(ILjava/lang/String;)V"); + jstring error_msg = env->NewStringUTF(ucs_status_string(status)); + env->CallVoidMethod(callback, on_error, status, error_msg); +} + +static inline void jucx_call_callback(jobject callback, jobject jucx_request, + ucs_status_t status) +{ + if (status == UCS_OK) { + UCS_PROFILE_CALL_VOID(call_on_success, callback, jucx_request); + } else { + call_on_error(callback, status); + } +} + +UCS_PROFILE_FUNC_VOID(jucx_request_callback, (request, status), void *request, ucs_status_t status) +{ + struct jucx_context *ctx = (struct jucx_context *)request; + ucs_spin_lock(&ctx->lock); + if (ctx->jucx_request == NULL) { + // here because 1 of 2 reasons: + // 1. progress is in another thread and got here earlier then process_request happened. + // 2. this callback is inside ucp_tag_recv_nb function. + ctx->status = status; + ucs_spin_unlock(&ctx->lock); + return; + } + + JNIEnv *env = get_jni_env(); + set_jucx_request_completed(env, ctx->jucx_request, ctx); + + if (ctx->callback != NULL) { + jucx_call_callback(ctx->callback, ctx->jucx_request, status); + env->DeleteGlobalRef(ctx->callback); + } + + env->DeleteGlobalRef(ctx->jucx_request); + jucx_context_reset(ctx); + ucp_request_free(request); + ucs_spin_unlock(&ctx->lock); +} + +void recv_callback(void *request, ucs_status_t status, ucp_tag_recv_info_t *info) +{ + struct jucx_context *ctx = (struct jucx_context *)request; + ctx->length = info->length; + jucx_request_callback(request, status); +} + +void stream_recv_callback(void *request, ucs_status_t status, size_t length) +{ + struct jucx_context *ctx = (struct jucx_context *)request; + ctx->length = length; + jucx_request_callback(request, status); +} + +UCS_PROFILE_FUNC(jobject, process_request, (request, callback), void *request, jobject callback) +{ + JNIEnv *env = get_jni_env(); + jobject jucx_request = env->NewObject(jucx_request_cls, jucx_request_constructor, + (native_ptr)request); + + if (UCS_PTR_IS_PTR(request)) { + struct jucx_context *ctx = (struct jucx_context *)request; + ucs_spin_lock(&ctx->lock); + if (ctx->status == UCS_INPROGRESS) { + // request not completed yet, install user callback + if (callback != NULL) { + ctx->callback = env->NewGlobalRef(callback); + } + ctx->jucx_request = env->NewGlobalRef(jucx_request); + } else { + // request was completed whether by progress in other thread or inside + // ucp_tag_recv_nb function call. + set_jucx_request_completed(env, jucx_request, ctx); + if (callback != NULL) { + jucx_call_callback(callback, jucx_request, ctx->status); + } + jucx_context_reset(ctx); + ucp_request_free(request); + } + ucs_spin_unlock(&ctx->lock); + } else { + set_jucx_request_completed(env, jucx_request, NULL); + if (UCS_PTR_IS_ERR(request)) { + JNU_ThrowExceptionByStatus(env, UCS_PTR_STATUS(request)); + if (callback != NULL) { + call_on_error(callback, UCS_PTR_STATUS(request)); + } + } else if (callback != NULL) { + call_on_success(callback, jucx_request); + } + } + return jucx_request; +} + +jobject process_completed_stream_recv(size_t length, jobject callback) +{ + JNIEnv *env = get_jni_env(); + jobject jucx_request = env->NewObject(jucx_request_cls, jucx_request_constructor, NULL); + env->SetObjectField(jucx_request, native_id_field, NULL); + env->SetLongField(jucx_request, recv_size_field, length); + if (callback != NULL) { + jucx_call_callback(callback, jucx_request, UCS_OK); + } + return jucx_request; +} + +void jucx_connection_handler(ucp_conn_request_h conn_request, void *arg) +{ + jobject jucx_conn_handler = reinterpret_cast(arg); + + JNIEnv *env = get_jni_env(); + + // Construct connection request class instance + jclass conn_request_cls = env->FindClass("org/openucx/jucx/ucp/UcpConnectionRequest"); + jmethodID conn_request_constructor = env->GetMethodID(conn_request_cls, "", "(J)V"); + jobject jucx_conn_request = env->NewObject(conn_request_cls, conn_request_constructor, + (native_ptr)conn_request); + + // Call onConnectionRequest method + jclass jucx_conn_hndl_cls = env->FindClass("org/openucx/jucx/ucp/UcpListenerConnectionHandler"); + jmethodID on_conn_request = env->GetMethodID(jucx_conn_hndl_cls, "onConnectionRequest", + "(Lorg/openucx/jucx/ucp/UcpConnectionRequest;)V"); + env->CallVoidMethod(jucx_conn_handler, on_conn_request, jucx_conn_request); + env->DeleteGlobalRef(jucx_conn_handler); +} + + +jobject new_rkey_instance(JNIEnv *env, ucp_rkey_h rkey) +{ + return env->NewObject(ucp_rkey_cls, ucp_rkey_cls_constructor, (native_ptr)rkey); +} diff --git a/bindings/java/src/main/native/jucx_common_def.h b/bindings/java/src/main/native/jucx_common_def.h new file mode 100644 index 0000000..7b31c5d --- /dev/null +++ b/bindings/java/src/main/native/jucx_common_def.h @@ -0,0 +1,103 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +#ifndef HELPER_H_ +#define HELPER_H_ + +#include +#include +#include +#include + +#include + + +typedef uintptr_t native_ptr; + +#define JUCX_DEFINE_LONG_CONSTANT(_name) do { \ + jfieldID field = env->GetStaticFieldID(cls, #_name, "J"); \ + if (field != NULL) { \ + env->SetStaticLongField(cls, field, _name); \ + } \ +} while(0) + +#define JUCX_DEFINE_INT_CONSTANT(_name) do { \ + jfieldID field = env->GetStaticFieldID(cls, #_name, "I"); \ + if (field != NULL) { \ + env->SetStaticIntField(cls, field, _name); \ + } \ +} while(0) + +/** + * Throw a Java exception by name. Similar to SignalError. + */ +#define JNU_ThrowException(_env, _msg) do { \ + jclass _cls = _env->FindClass("org/openucx/jucx/UcxException"); \ + ucs_error("JUCX: %s", _msg); \ + if (_cls != 0) { /* Otherwise an exception has already been thrown */ \ + _env->ThrowNew(_cls, _msg); \ + } \ +} while(0) + +#define JNU_ThrowExceptionByStatus(_env, _status) do { \ + JNU_ThrowException(_env, ucs_status_string(_status)); \ +} while(0) + +/** + * @brief Utility to convert Java InetSocketAddress class (corresponds to the Network Layer 4 + * and consists of an IP address and a port number) to corresponding sockaddr_storage struct. + * Supports IPv4 and IPv6. + */ +bool j2cInetSockAddr(JNIEnv *env, jobject sock_addr, sockaddr_storage& ss, socklen_t& sa_len); + +struct jucx_context { + jobject callback; + volatile jobject jucx_request; + ucs_status_t status; + ucs_spinlock_t lock; + size_t length; +}; + +void jucx_request_init(void *request); + +/** + * @brief Get the jni env object. To be able to call java methods from ucx async callbacks. + */ +JNIEnv* get_jni_env(); + +/** + * @brief Send callback used to invoke java callback class on completion of ucp operations. + */ +void jucx_request_callback(void *request, ucs_status_t status); + +/** + * @brief Recv callback used to invoke java callback class on completion of ucp tag_recv_nb operation. + */ +void recv_callback(void *request, ucs_status_t status, ucp_tag_recv_info_t *info); + +/** + * @brief Recv callback used to invoke java callback class on completion of ucp stream_recv_nb operation. + */ +void stream_recv_callback(void *request, ucs_status_t status, size_t length); + +/** + * @brief Utility to process request logic: if request is pointer - set callback to request context. + * If request is status - call callback directly. + * Returns jucx_request object, that could be monitored on completion. + */ +jobject process_request(void *request, jobject callback); + +/** + * @brief Call java callback on completed stream recv operation, that didn't invoke callback. + */ +jobject process_completed_stream_recv(size_t length, jobject callback); + +void jucx_connection_handler(ucp_conn_request_h conn_request, void *arg); + +/** + * @brief Creates new jucx rkey class. + */ +jobject new_rkey_instance(JNIEnv *env, ucp_rkey_h rkey); + +#endif diff --git a/bindings/java/src/main/native/listener.cc b/bindings/java/src/main/native/listener.cc new file mode 100644 index 0000000..3114e71 --- /dev/null +++ b/bindings/java/src/main/native/listener.cc @@ -0,0 +1,65 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "jucx_common_def.h" +#include "org_openucx_jucx_ucp_UcpListener.h" + +#include /* memset */ + + +JNIEXPORT jlong JNICALL +Java_org_openucx_jucx_ucp_UcpListener_createUcpListener(JNIEnv *env, jclass cls, + jobject ucp_listener_params, + jlong worker_ptr) +{ + ucp_listener_params_t params; + ucp_listener_h listener; + jfieldID field; + ucp_worker_h ucp_worker = (ucp_worker_h)worker_ptr; + + // Get field mask + jclass jucx_listener_param_class = env->GetObjectClass(ucp_listener_params); + field = env->GetFieldID(jucx_listener_param_class, "fieldMask", "J"); + params.field_mask = env->GetLongField(ucp_listener_params, field); + + // Get sockAddr + field = env->GetFieldID(jucx_listener_param_class, + "sockAddr", "Ljava/net/InetSocketAddress;"); + jobject sock_addr = env->GetObjectField(ucp_listener_params, field); + + struct sockaddr_storage listen_addr; + socklen_t addrlen; + memset(&listen_addr, 0, sizeof(struct sockaddr_storage)); + + if (!j2cInetSockAddr(env, sock_addr, listen_addr, addrlen)) { + return -1; + } + + params.sockaddr.addr = (const struct sockaddr*)&listen_addr; + params.sockaddr.addrlen = addrlen; + + if (params.field_mask & UCP_LISTENER_PARAM_FIELD_CONN_HANDLER) { + field = env->GetFieldID(jucx_listener_param_class, + "connectionHandler", "Lorg/openucx/jucx/ucp/UcpListenerConnectionHandler;"); + jobject jucx_conn_handler = env->GetObjectField(ucp_listener_params, field); + params.conn_handler.arg = env->NewGlobalRef(jucx_conn_handler); + params.conn_handler.cb = jucx_connection_handler; + } + + ucs_status_t status = ucp_listener_create(ucp_worker, ¶ms, &listener); + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } + + return (native_ptr)listener; +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpListener_destroyUcpListenerNative(JNIEnv *env, + jclass cls, + jlong listener_ptr) +{ + ucp_listener_destroy((ucp_listener_h)listener_ptr); +} diff --git a/bindings/java/src/main/native/memory.cc b/bindings/java/src/main/native/memory.cc new file mode 100644 index 0000000..8627aca --- /dev/null +++ b/bindings/java/src/main/native/memory.cc @@ -0,0 +1,45 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +#include "jucx_common_def.h" +#include "org_openucx_jucx_ucp_UcpMemory.h" +#include "org_openucx_jucx_ucp_UcpRemoteKey.h" + + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpMemory_unmapMemoryNative(JNIEnv *env, jclass cls, + jlong context_ptr, jlong mem_ptr) +{ + ucs_status_t status = ucp_mem_unmap((ucp_context_h)context_ptr, (ucp_mem_h)mem_ptr); + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpMemory_getRkeyBufferNative(JNIEnv *env, jclass cls, + jlong context_ptr, jlong mem_ptr) +{ + void *rkey_buffer; + size_t rkey_size; + + ucs_status_t status = ucp_rkey_pack((ucp_context_h)context_ptr, (ucp_mem_h)mem_ptr, + &rkey_buffer, &rkey_size); + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } + return env->NewDirectByteBuffer(rkey_buffer, rkey_size); +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpMemory_releaseRkeyBufferNative(JNIEnv *env, jclass cls, jobject rkey_buf) +{ + ucp_rkey_buffer_release(env->GetDirectBufferAddress(rkey_buf)); +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpRemoteKey_rkeyDestroy(JNIEnv *env, jclass cls, jlong rkey_ptr) +{ + ucp_rkey_destroy((ucp_rkey_h) rkey_ptr); +} diff --git a/bindings/java/src/main/native/request.cc b/bindings/java/src/main/native/request.cc new file mode 100644 index 0000000..d65619b --- /dev/null +++ b/bindings/java/src/main/native/request.cc @@ -0,0 +1,23 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "org_openucx_jucx_ucp_UcpRequest.h" + +#include +#include + +JNIEXPORT jboolean JNICALL +Java_org_openucx_jucx_ucp_UcpRequest_isCompletedNative(JNIEnv *env, jclass cls, + jlong ucp_req_ptr) +{ + return ucp_request_check_status((void *)ucp_req_ptr) != UCS_INPROGRESS; +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpRequest_closeRequestNative(JNIEnv *env, jclass cls, + jlong ucp_req_ptr) +{ + ucp_request_free((void *)ucp_req_ptr); +} diff --git a/bindings/java/src/main/native/ucp_constants.cc b/bindings/java/src/main/native/ucp_constants.cc new file mode 100644 index 0000000..c156aae --- /dev/null +++ b/bindings/java/src/main/native/ucp_constants.cc @@ -0,0 +1,87 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "org_openucx_jucx_ucp_UcpConstants.h" +#include "jucx_common_def.h" + +#include + + +/** + * @brief Routine to set UCX constants in java + * + */ +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpConstants_loadConstants(JNIEnv *env, jclass cls) +{ + // UCP context parameters + JUCX_DEFINE_LONG_CONSTANT(UCP_PARAM_FIELD_FEATURES); + JUCX_DEFINE_LONG_CONSTANT(UCP_PARAM_FIELD_FEATURES); + JUCX_DEFINE_LONG_CONSTANT(UCP_PARAM_FIELD_TAG_SENDER_MASK); + JUCX_DEFINE_LONG_CONSTANT(UCP_PARAM_FIELD_MT_WORKERS_SHARED); + JUCX_DEFINE_LONG_CONSTANT(UCP_PARAM_FIELD_ESTIMATED_NUM_EPS); + + // UCP configuration features + JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_TAG); + JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_RMA); + JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_AMO32); + JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_AMO64); + JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_WAKEUP); + JUCX_DEFINE_LONG_CONSTANT(UCP_FEATURE_STREAM); + + // UCP worker parameters + JUCX_DEFINE_LONG_CONSTANT(UCP_WORKER_PARAM_FIELD_THREAD_MODE); + JUCX_DEFINE_LONG_CONSTANT(UCP_WORKER_PARAM_FIELD_CPU_MASK); + JUCX_DEFINE_LONG_CONSTANT(UCP_WORKER_PARAM_FIELD_EVENTS); + JUCX_DEFINE_LONG_CONSTANT(UCP_WORKER_PARAM_FIELD_USER_DATA); + JUCX_DEFINE_LONG_CONSTANT(UCP_WORKER_PARAM_FIELD_EVENT_FD); + + // UCP worker wakeup events + JUCX_DEFINE_LONG_CONSTANT(UCP_WAKEUP_RMA); + JUCX_DEFINE_LONG_CONSTANT(UCP_WAKEUP_AMO); + JUCX_DEFINE_LONG_CONSTANT(UCP_WAKEUP_TAG_SEND); + JUCX_DEFINE_LONG_CONSTANT(UCP_WAKEUP_TAG_RECV); + JUCX_DEFINE_LONG_CONSTANT(UCP_WAKEUP_TX); + JUCX_DEFINE_LONG_CONSTANT(UCP_WAKEUP_RX); + JUCX_DEFINE_LONG_CONSTANT(UCP_WAKEUP_EDGE); + + // UCP listener parameters field mask + JUCX_DEFINE_LONG_CONSTANT(UCP_LISTENER_PARAM_FIELD_SOCK_ADDR); + JUCX_DEFINE_LONG_CONSTANT(UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER); + JUCX_DEFINE_LONG_CONSTANT(UCP_LISTENER_PARAM_FIELD_CONN_HANDLER); + + // UCP endpoint parameters field mask + JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAM_FIELD_REMOTE_ADDRESS); + JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE); + JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAM_FIELD_ERR_HANDLER); + JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAM_FIELD_USER_DATA); + JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAM_FIELD_SOCK_ADDR); + JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAM_FIELD_FLAGS); + JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAM_FIELD_CONN_REQUEST); + + // UCP error handling mode + JUCX_DEFINE_INT_CONSTANT(UCP_ERR_HANDLING_MODE_PEER); + + // UCP endpoint close non blocking mode. + JUCX_DEFINE_INT_CONSTANT(UCP_EP_CLOSE_MODE_FORCE); + JUCX_DEFINE_INT_CONSTANT(UCP_EP_CLOSE_MODE_FLUSH); + + // The enumeration list describes the endpoint's parameters flags + JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAMS_FLAGS_CLIENT_SERVER); + JUCX_DEFINE_LONG_CONSTANT(UCP_EP_PARAMS_FLAGS_NO_LOOPBACK); + + // UCP memory mapping parameters field mask + JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PARAM_FIELD_ADDRESS); + JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PARAM_FIELD_LENGTH); + JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_PARAM_FIELD_FLAGS); + + // The enumeration list describes the memory mapping flags + JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_NONBLOCK); + JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_ALLOCATE); + JUCX_DEFINE_LONG_CONSTANT(UCP_MEM_MAP_FIXED); + + // The enumeration defines behavior of @ref ucp_stream_recv_nb function + JUCX_DEFINE_LONG_CONSTANT(UCP_STREAM_RECV_FLAG_WAITALL); +} diff --git a/bindings/java/src/main/native/ucs_constants.cc b/bindings/java/src/main/native/ucs_constants.cc new file mode 100644 index 0000000..28507b0 --- /dev/null +++ b/bindings/java/src/main/native/ucs_constants.cc @@ -0,0 +1,17 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "org_openucx_jucx_ucs_UcsConstants.h" +#include "jucx_common_def.h" + +#include + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucs_UcsConstants_loadConstants(JNIEnv *env, jclass cls) +{ + jclass thread_mode = env->FindClass("org/openucx/jucx/ucs/UcsConstants$ThreadMode"); + jfieldID field = env->GetStaticFieldID(thread_mode, "UCS_THREAD_MODE_MULTI", "I"); + env->SetStaticIntField(thread_mode, field, UCS_THREAD_MODE_MULTI); +} diff --git a/bindings/java/src/main/native/worker.cc b/bindings/java/src/main/native/worker.cc new file mode 100644 index 0000000..6e36a62 --- /dev/null +++ b/bindings/java/src/main/native/worker.cc @@ -0,0 +1,166 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "jucx_common_def.h" +#include "org_openucx_jucx_ucp_UcpWorker.h" + +/** + * Bridge method for creating ucp_worker from java + */ +JNIEXPORT jlong JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_createWorkerNative(JNIEnv *env, jclass cls, + jobject jucx_worker_params, + jlong context_ptr) +{ + ucp_worker_params_t worker_params = { 0 }; + ucp_worker_h ucp_worker; + ucp_context_h ucp_context = (ucp_context_h)context_ptr; + jfieldID field; + + jclass jucx_param_class = env->GetObjectClass(jucx_worker_params); + field = env->GetFieldID(jucx_param_class, "fieldMask", "J"); + worker_params.field_mask = env->GetLongField(jucx_worker_params, field); + + if (worker_params.field_mask & UCP_WORKER_PARAM_FIELD_THREAD_MODE) { + field = env->GetFieldID(jucx_param_class, "threadMode", "I"); + worker_params.thread_mode = static_cast( + env->GetIntField(jucx_worker_params, field)); + } + + if (worker_params.field_mask & UCP_WORKER_PARAM_FIELD_CPU_MASK) { + ucs_cpu_set_t cpu_mask; + UCS_CPU_ZERO(&cpu_mask); + field = env->GetFieldID(jucx_param_class, "cpuMask", "Ljava/util/BitSet;"); + jobject cpu_mask_bitset = env->GetObjectField(jucx_worker_params, field); + jclass bitset_class = env->FindClass("java/util/BitSet"); + jmethodID next_set_bit = env->GetMethodID(bitset_class, "nextSetBit", "(I)I"); + for (jint bit_index = env->CallIntMethod(cpu_mask_bitset, next_set_bit, 0); bit_index >=0; + bit_index = env->CallIntMethod(cpu_mask_bitset, next_set_bit, bit_index + 1)) { + UCS_CPU_SET(bit_index, &cpu_mask); + } + worker_params.cpu_mask = cpu_mask; + } + + + if (worker_params.field_mask & UCP_WORKER_PARAM_FIELD_EVENTS) { + field = env->GetFieldID(jucx_param_class, "events", "J"); + worker_params.events = env->GetLongField(jucx_worker_params, field); + } + + if (worker_params.field_mask & UCP_WORKER_PARAM_FIELD_USER_DATA) { + field = env->GetFieldID(jucx_param_class, "userData", "Ljava/nio/ByteBuffer;"); + jobject user_data = env->GetObjectField(jucx_worker_params, field); + worker_params.user_data = env->GetDirectBufferAddress(user_data); + } + + if (worker_params.field_mask & UCP_WORKER_PARAM_FIELD_EVENT_FD) { + field = env->GetFieldID(jucx_param_class, "eventFD", "I"); + worker_params.event_fd = env->GetIntField(jucx_worker_params, field); + } + + ucs_status_t status = ucp_worker_create(ucp_context, &worker_params, &ucp_worker); + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } + return (native_ptr)ucp_worker; +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_releaseWorkerNative(JNIEnv *env, jclass cls, + jlong ucp_worker_ptr) +{ + ucp_worker_destroy((ucp_worker_h)ucp_worker_ptr); +} + + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_workerGetAddressNative(JNIEnv *env, jclass cls, + jlong ucp_worker_ptr) +{ + ucp_address_t *addr; + size_t len; + ucs_status_t status; + + status = ucp_worker_get_address((ucp_worker_h)ucp_worker_ptr, &addr, &len); + + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + return NULL; + } + + return env->NewDirectByteBuffer(addr, len); +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_releaseAddressNative(JNIEnv *env, jclass cls, + jlong ucp_worker_ptr, + jobject ucp_address) +{ + + ucp_worker_release_address((ucp_worker_h)ucp_worker_ptr, + (ucp_address_t *)env->GetDirectBufferAddress(ucp_address)); +} + +JNIEXPORT jint JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_progressWorkerNative(JNIEnv *env, jclass cls, jlong ucp_worker_ptr) +{ + return ucp_worker_progress((ucp_worker_h)ucp_worker_ptr); +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_flushNonBlockingNative(JNIEnv *env, jclass cls, + jlong ucp_worker_ptr, + jobject callback) +{ + ucs_status_ptr_t request = ucp_worker_flush_nb((ucp_worker_h)ucp_worker_ptr, 0, + jucx_request_callback); + + return process_request(request, callback); +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_waitWorkerNative(JNIEnv *env, jclass cls, jlong ucp_worker_ptr) +{ + ucs_status_t status = ucp_worker_wait((ucp_worker_h)ucp_worker_ptr); + + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_signalWorkerNative(JNIEnv *env, jclass cls, jlong ucp_worker_ptr) +{ + ucs_status_t status = ucp_worker_signal((ucp_worker_h)ucp_worker_ptr); + + if (status != UCS_OK) { + JNU_ThrowExceptionByStatus(env, status); + } +} + +JNIEXPORT jobject JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_recvTaggedNonBlockingNative(JNIEnv *env, jclass cls, + jlong ucp_worker_ptr, + jlong laddr, jlong size, + jlong tag, jlong tagMask, + jobject callback) +{ + ucs_status_ptr_t request = ucp_tag_recv_nb((ucp_worker_h)ucp_worker_ptr, + (void *)laddr, size, + ucp_dt_make_contig(1), tag, tagMask, + recv_callback); + + ucs_trace_req("JUCX: recv_nb request %p, msg size: %zu, tag: %ld", request, size, tag); + + return process_request(request, callback); +} + +JNIEXPORT void JNICALL +Java_org_openucx_jucx_ucp_UcpWorker_cancelRequestNative(JNIEnv *env, jclass cls, + jlong ucp_worker_ptr, + jlong ucp_request_ptr) +{ + ucp_request_cancel((ucp_worker_h)ucp_worker_ptr, (void *)ucp_request_ptr); +} diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpContextTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpContextTest.java new file mode 100644 index 0000000..8f41202 --- /dev/null +++ b/bindings/java/src/test/java/org/openucx/jucx/UcpContextTest.java @@ -0,0 +1,52 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.openucx.jucx.ucp.UcpContext; +import org.openucx.jucx.ucp.UcpParams; + +public class UcpContextTest { + + public static UcpContext createContext(UcpParams contextParams) { + UcpContext context = new UcpContext(contextParams); + assertTrue(context.getNativeId() > 0); + return context; + } + + public static void closeContext(UcpContext context) { + context.close(); + assertEquals(context.getNativeId(), null); + } + + @Test + public void testCreateSimpleUcpContext() { + UcpParams contextParams = new UcpParams().requestTagFeature(); + UcpContext context = createContext(contextParams); + closeContext(context); + } + + @Test + public void testCreateUcpContextRdma() { + UcpParams contextParams = new UcpParams().requestTagFeature().requestRmaFeature() + .setEstimatedNumEps(10).setMtWorkersShared(false).setTagSenderMask(0L); + UcpContext context = createContext(contextParams); + closeContext(context); + } + + @Test(expected = NullPointerException.class) + public void testCatchJVMSignal() { + UcpParams contextParams = new UcpParams().requestTagFeature(); + UcpContext context = createContext(contextParams); + closeContext(context); + long nullPointer = context.getNativeId(); + nullPointer += 2; + } +} diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java new file mode 100644 index 0000000..6c65d01 --- /dev/null +++ b/bindings/java/src/test/java/org/openucx/jucx/UcpEndpointTest.java @@ -0,0 +1,457 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +import org.junit.Test; +import org.openucx.jucx.ucp.*; + +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.Assert.*; + +public class UcpEndpointTest extends UcxTest { + @Test + public void testConnectToListenerByWorkerAddr() { + UcpContext context = new UcpContext(new UcpParams().requestStreamFeature()); + UcpWorker worker = context.newWorker(new UcpWorkerParams()); + UcpEndpointParams epParams = new UcpEndpointParams().setUcpAddress(worker.getAddress()) + .setPeerErrorHadnlingMode().setNoLoopbackMode(); + UcpEndpoint endpoint = worker.newEndpoint(epParams); + assertNotNull(endpoint.getNativeId()); + + Collections.addAll(resources, context, worker, endpoint); + closeResources(); + } + + @Test + public void testGetNB() { + // Crerate 2 contexts + 2 workers + UcpParams params = new UcpParams().requestRmaFeature(); + UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA(); + UcpContext context1 = new UcpContext(params); + UcpContext context2 = new UcpContext(params); + UcpWorker worker1 = context1.newWorker(rdmaWorkerParams); + UcpWorker worker2 = context2.newWorker(rdmaWorkerParams); + + // Create endpoint worker1 -> worker2 + UcpEndpointParams epParams = new UcpEndpointParams().setPeerErrorHadnlingMode() + .setUcpAddress(worker2.getAddress()); + UcpEndpoint endpoint = worker1.newEndpoint(epParams); + + // Allocate 2 source and 2 destination buffers, to perform 2 RDMA Read operations + ByteBuffer src1 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + ByteBuffer src2 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + ByteBuffer dst1 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + ByteBuffer dst2 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + src1.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT); + src2.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT + UcpMemoryTest.RANDOM_TEXT); + + // Register source buffers on context2 + UcpMemory memory1 = context2.registerMemory(src1); + UcpMemory memory2 = context2.registerMemory(src2); + + UcpRemoteKey rkey1 = endpoint.unpackRemoteKey(memory1.getRemoteKeyBuffer()); + UcpRemoteKey rkey2 = endpoint.unpackRemoteKey(memory2.getRemoteKeyBuffer()); + + AtomicInteger numCompletedRequests = new AtomicInteger(0); + HashMap requestToData = new HashMap<>(); + UcxCallback callback = new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + // Here thread safety is guaranteed since worker progress is called after + // request added to map. In multithreaded environment could be an issue that + // callback is called, but request wasn't added yet to map. + if (requestToData.get(request) == dst1) { + assertEquals(UcpMemoryTest.RANDOM_TEXT, dst1.asCharBuffer().toString().trim()); + memory1.deregister(); + } else { + assertEquals(UcpMemoryTest.RANDOM_TEXT + UcpMemoryTest.RANDOM_TEXT, + dst2.asCharBuffer().toString().trim()); + memory2.deregister(); + } + numCompletedRequests.incrementAndGet(); + } + }; + + // Submit 2 get requests + UcpRequest request1 = endpoint.getNonBlocking(memory1.getAddress(), rkey1, dst1, callback); + UcpRequest request2 = endpoint.getNonBlocking(memory2.getAddress(), rkey2, dst2, callback); + + // Map each request to corresponding data buffer. + requestToData.put(request1, dst1); + requestToData.put(request2, dst2); + + // Wait for 2 get operations to complete + while (numCompletedRequests.get() != 2) { + worker1.progress(); + } + + assertTrue(request1.isCompleted() && request2.isCompleted()); + + Collections.addAll(resources, context2, context1, worker2, worker1, endpoint, rkey2, + rkey1); + closeResources(); + } + + @Test + public void testPutNB() { + // Crerate 2 contexts + 2 workers + UcpParams params = new UcpParams().requestRmaFeature(); + UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA(); + UcpContext context1 = new UcpContext(params); + UcpContext context2 = new UcpContext(params); + UcpWorker worker1 = context1.newWorker(rdmaWorkerParams); + UcpWorker worker2 = context2.newWorker(rdmaWorkerParams); + + ByteBuffer src = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + ByteBuffer dst = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + src.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT); + + // Register destination buffer on context2 + UcpMemory memory = context2.registerMemory(dst); + UcpEndpoint ep = + worker1.newEndpoint(new UcpEndpointParams().setUcpAddress(worker2.getAddress())); + + UcpRemoteKey rkey = ep.unpackRemoteKey(memory.getRemoteKeyBuffer()); + UcpRequest request = ep.putNonBlocking(src, memory.getAddress(), rkey, + new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + rkey.close(); + memory.deregister(); + } + }); + + worker1.progressRequest(request); + + assertEquals(dst.asCharBuffer().toString().trim(), UcpMemoryTest.RANDOM_TEXT); + + Collections.addAll(resources, context2, context1, worker2, worker1, ep); + closeResources(); + } + + @Test + public void testSendRecv() throws Exception { + // Crerate 2 contexts + 2 workers + UcpParams params = new UcpParams().requestRmaFeature().requestTagFeature(); + UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA(); + UcpContext context1 = new UcpContext(params); + UcpContext context2 = new UcpContext(params); + UcpWorker worker1 = context1.newWorker(rdmaWorkerParams); + UcpWorker worker2 = context2.newWorker(rdmaWorkerParams); + + // Allocate 2 source and 2 destination buffers, to perform 2 RDMA Read operations + UcpMemMapParams allocationParams = new UcpMemMapParams().allocate() + .setLength(UcpMemoryTest.MEM_SIZE); + UcpMemory memory1 = context1.memoryMap(allocationParams); + UcpMemory memory2 = context1.memoryMap(allocationParams); + ByteBuffer src1 = UcxUtils.getByteBufferView(memory1.getAddress(), UcpMemoryTest.MEM_SIZE); + ByteBuffer src2 = UcxUtils.getByteBufferView(memory1.getAddress(), UcpMemoryTest.MEM_SIZE); + ByteBuffer dst1 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + ByteBuffer dst2 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + src1.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT); + src2.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT + UcpMemoryTest.RANDOM_TEXT); + + AtomicInteger receivedMessages = new AtomicInteger(0); + worker2.recvTaggedNonBlocking(dst1, 0, 0, new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + assertEquals(dst1, src1); + receivedMessages.incrementAndGet(); + } + }); + + worker2.recvTaggedNonBlocking(dst2, 1, -1, new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + assertEquals(dst2, src2); + receivedMessages.incrementAndGet(); + } + }); + + UcpEndpoint ep = worker1.newEndpoint(new UcpEndpointParams() + .setUcpAddress(worker2.getAddress())); + + ep.sendTaggedNonBlocking(src1, 0, null); + ep.sendTaggedNonBlocking(src2, 1, null); + + while (receivedMessages.get() != 2) { + worker1.progress(); + worker2.progress(); + } + + Collections.addAll(resources, context2, context1, worker2, worker1, memory2, memory1, ep); + closeResources(); + } + + @Test + public void testRecvAfterSend() { + // Crerate 2 contexts + 2 workers + UcpParams params = new UcpParams().requestRmaFeature().requestTagFeature() + .setMtWorkersShared(true); + UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA() + .requestThreadSafety(); + UcpContext context1 = new UcpContext(params); + UcpContext context2 = new UcpContext(params); + UcpWorker worker1 = context1.newWorker(rdmaWorkerParams); + UcpWorker worker2 = context2.newWorker(rdmaWorkerParams); + + UcpEndpoint ep = worker1.newEndpoint(new UcpEndpointParams() + .setPeerErrorHadnlingMode() + .setUcpAddress(worker2.getAddress())); + + ByteBuffer src1 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + ByteBuffer dst1 = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + + ep.sendTaggedNonBlocking(src1, 0, null); + + Thread progressThread = new Thread() { + @Override + public void run() { + while (!isInterrupted()) { + worker1.progress(); + worker2.progress(); + } + } + }; + + progressThread.setDaemon(true); + progressThread.start(); + + try { + Thread.sleep(5); + } catch (InterruptedException ignored) { } + + AtomicBoolean success = new AtomicBoolean(false); + + worker2.recvTaggedNonBlocking(dst1, 0, -1, new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + assertEquals(UcpMemoryTest.MEM_SIZE, request.getRecvSize()); + success.set(true); + } + }); + + try { + int count = 0; + while ((++count < 100) && !success.get()) { + Thread.sleep(50); + } + } catch (InterruptedException ignored) { } + + assertTrue(success.get()); + UcpRequest closeRequest = ep.closeNonBlockingForce(); + + while (!closeRequest.isCompleted()) { + try { + // Wait until progress thread will close the endpoint. + Thread.sleep(10); + } catch (InterruptedException e) { + e.printStackTrace(); + } finally { + closeRequest.close(); + } + } + + progressThread.interrupt(); + try { + progressThread.join(); + } catch (InterruptedException ignored) { } + + Collections.addAll(resources, context1, context2, worker1, worker2); + closeResources(); + } + + @Test + public void testBufferOffset() { + int msgSize = 200; + int offset = 100; + // Crerate 2 contexts + 2 workers + UcpParams params = new UcpParams().requestTagFeature(); + UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA(); + UcpContext context1 = new UcpContext(params); + UcpContext context2 = new UcpContext(params); + UcpWorker worker1 = context1.newWorker(rdmaWorkerParams); + UcpWorker worker2 = context2.newWorker(rdmaWorkerParams); + + ByteBuffer bigRecvBuffer = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + ByteBuffer bigSendBuffer = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + + bigRecvBuffer.position(offset).limit(offset + msgSize); + UcpRequest recv = worker1.recvTaggedNonBlocking(bigRecvBuffer, 0, + 0, null); + + UcpEndpoint ep = worker2.newEndpoint(new UcpEndpointParams() + .setUcpAddress(worker1.getAddress())); + + byte[] msg = new byte[msgSize]; + for (int i = 0; i < msgSize; i++) { + msg[i] = (byte)i; + } + + bigSendBuffer.position(offset).limit(offset + msgSize); + bigSendBuffer.put(msg); + bigSendBuffer.position(offset); + + UcpRequest sent = ep.sendTaggedNonBlocking(bigSendBuffer, 0, null); + + while (!sent.isCompleted() || !recv.isCompleted()) { + worker1.progress(); + worker2.progress(); + } + + bigSendBuffer.position(offset).limit(offset + msgSize); + bigRecvBuffer.position(offset).limit(offset + msgSize); + final ByteBuffer sendData = bigSendBuffer.slice(); + final ByteBuffer recvData = bigRecvBuffer.slice(); + assertEquals("Send buffer not equals to recv buffer", sendData, recvData); + + Collections.addAll(resources, context2, context1, worker2, worker1, ep); + closeResources(); + } + + @Test + public void testFlushEp() { + int numRequests = 10; + // Crerate 2 contexts + 2 workers + UcpParams params = new UcpParams().requestRmaFeature(); + UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA(); + UcpContext context1 = new UcpContext(params); + UcpContext context2 = new UcpContext(params); + + ByteBuffer src = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + src.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT); + ByteBuffer dst = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + UcpMemory memory = context2.registerMemory(src); + + UcpWorker worker1 = context1.newWorker(rdmaWorkerParams); + UcpWorker worker2 = context2.newWorker(rdmaWorkerParams); + + UcpEndpoint ep = worker1.newEndpoint(new UcpEndpointParams() + .setUcpAddress(worker2.getAddress()).setPeerErrorHadnlingMode()); + UcpRemoteKey rkey = ep.unpackRemoteKey(memory.getRemoteKeyBuffer()); + + int blockSize = UcpMemoryTest.MEM_SIZE / numRequests; + for (int i = 0; i < numRequests; i++) { + ep.getNonBlockingImplicit(memory.getAddress() + i * blockSize, rkey, + UcxUtils.getAddress(dst) + i * blockSize, blockSize); + } + + UcpRequest request = ep.flushNonBlocking(new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + rkey.close(); + memory.deregister(); + assertEquals(dst.asCharBuffer().toString().trim(), UcpMemoryTest.RANDOM_TEXT); + } + }); + + while (request.isCompleted()) { + worker1.progress(); + worker2.progress(); + } + + Collections.addAll(resources, context2, context1, worker2, worker1, ep); + closeResources(); + } + + @Test + public void testRecvSize() { + UcpContext context1 = new UcpContext(new UcpParams().requestTagFeature()); + UcpContext context2 = new UcpContext(new UcpParams().requestTagFeature()); + + UcpWorker worker1 = context1.newWorker(new UcpWorkerParams()); + UcpWorker worker2 = context2.newWorker(new UcpWorkerParams()); + + UcpEndpoint ep = worker1.newEndpoint( + new UcpEndpointParams().setUcpAddress(worker2.getAddress())); + + ByteBuffer sendBuffer = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + ByteBuffer recvBuffer = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + + sendBuffer.limit(UcpMemoryTest.MEM_SIZE / 2); + + UcpRequest send = ep.sendTaggedNonBlocking(sendBuffer, null); + UcpRequest recv = worker2.recvTaggedNonBlocking(recvBuffer, null); + + while (!send.isCompleted() || !recv.isCompleted()) { + worker1.progress(); + worker2.progress(); + } + + assertEquals(UcpMemoryTest.MEM_SIZE / 2, recv.getRecvSize()); + + Collections.addAll(resources, context1, context2, worker1, worker2, ep); + closeResources(); + } + + @Test + public void testStreamingAPI() { + UcpParams params = new UcpParams().requestStreamFeature().requestRmaFeature(); + UcpContext context1 = new UcpContext(params); + UcpContext context2 = new UcpContext(params); + + UcpWorker worker1 = context1.newWorker(new UcpWorkerParams()); + UcpWorker worker2 = context2.newWorker(new UcpWorkerParams()); + + UcpEndpoint clientToServer = worker1.newEndpoint( + new UcpEndpointParams().setUcpAddress(worker2.getAddress())); + + UcpEndpoint serverToClient = worker2.newEndpoint( + new UcpEndpointParams().setUcpAddress(worker1.getAddress())); + + ByteBuffer sendBuffer = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + sendBuffer.put(0, (byte)1); + ByteBuffer recvBuffer = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE * 2); + + UcpRequest[] sends = new UcpRequest[2]; + + sends[0] = clientToServer.sendStreamNonBlocking(sendBuffer, new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + sendBuffer.put(0, (byte)2); + sends[1] = clientToServer.sendStreamNonBlocking(sendBuffer, null); + } + }); + + while (sends[1] == null || !sends[1].isCompleted()) { + worker1.progress(); + worker2.progress(); + } + + AtomicBoolean received = new AtomicBoolean(false); + serverToClient.recvStreamNonBlocking( + UcxUtils.getAddress(recvBuffer), UcpMemoryTest.MEM_SIZE * 2, + UcpConstants.UCP_STREAM_RECV_FLAG_WAITALL, + new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + assertEquals(request.getRecvSize(), UcpMemoryTest.MEM_SIZE * 2); + assertEquals((byte)1, recvBuffer.get(0)); + assertEquals((byte)2, recvBuffer.get(UcpMemoryTest.MEM_SIZE)); + received.set(true); + } + }); + + while (!received.get()) { + worker1.progress(); + worker2.progress(); + } + + Collections.addAll(resources, context1, context2, worker1, worker2, clientToServer, + serverToClient); + closeResources(); + } +} diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpListenerTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpListenerTest.java new file mode 100644 index 0000000..658a601 --- /dev/null +++ b/bindings/java/src/test/java/org/openucx/jucx/UcpListenerTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx; + +import org.junit.Test; +import org.openucx.jucx.ucp.*; + +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.nio.ByteBuffer; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.junit.Assert.*; + +public class UcpListenerTest extends UcxTest { + static final int port = Integer.parseInt( + System.getenv().getOrDefault("JUCX_TEST_PORT", "55321")); + + @Test + public void testCreateUcpListener() { + UcpContext context = new UcpContext(new UcpParams().requestStreamFeature()); + UcpWorker worker = context.newWorker(new UcpWorkerParams()); + InetSocketAddress ipv4 = new InetSocketAddress("0.0.0.0", port); + try { + UcpListener ipv4Listener = worker.newListener( + new UcpListenerParams().setSockAddr(ipv4)); + + assertNotNull(ipv4Listener); + ipv4Listener.close(); + } catch (UcxException ignored) { } + + try { + InetSocketAddress ipv6 = new InetSocketAddress("::", port); + UcpListener ipv6Listener = worker.newListener( + new UcpListenerParams().setSockAddr(ipv6)); + + assertNotNull(ipv6Listener); + ipv6Listener.close(); + } catch (UcxException ignored) { } + + worker.close(); + context.close(); + } + + static Stream getInterfaces() { + try { + return Collections.list(NetworkInterface.getNetworkInterfaces()).stream() + .filter(iface -> { + try { + return iface.isUp() && !iface.isLoopback(); + } catch (SocketException e) { + return false; + } + }); + } catch (SocketException e) { + return Stream.empty(); + } + } + + /** + * Iterates over network interfaces and tries to bind and create listener + * on a specific socket address. + */ + static UcpListener tryBindListener(UcpWorker worker, UcpListenerParams params) { + UcpListener result = null; + List addresses = getInterfaces().flatMap(iface -> + Collections.list(iface.getInetAddresses()).stream()) + .collect(Collectors.toList()); + for (InetAddress address : addresses) { + try { + result = worker.newListener( + params.setSockAddr(new InetSocketAddress(address, port))); + break; + } catch (UcxException ignored) { } + } + assertNotNull("Could not find socket address to start UcpListener", result); + return result; + } + + @Test + public void testConnectionHandler() { + UcpContext context1 = new UcpContext(new UcpParams().requestStreamFeature() + .requestRmaFeature()); + UcpContext context2 = new UcpContext(new UcpParams().requestStreamFeature() + .requestRmaFeature()); + UcpWorker serverWorker1 = context1.newWorker(new UcpWorkerParams()); + UcpWorker serverWorker2 = context1.newWorker(new UcpWorkerParams()); + UcpWorker clientWorker = context2.newWorker(new UcpWorkerParams()); + + AtomicReference conRequest = new AtomicReference<>(null); + + // Create listener and set connection handler + UcpListenerParams listenerParams = new UcpListenerParams() + .setConnectionHandler(conRequest::set); + UcpListener listener = tryBindListener(serverWorker1, listenerParams); + + UcpEndpoint clientToServer = clientWorker.newEndpoint(new UcpEndpointParams() + .setSocketAddress(listener.getAddress())); + + while (conRequest.get() == null) { + serverWorker1.progress(); + clientWorker.progress(); + } + + // Create endpoint from another worker from pool. + UcpEndpoint serverToClient = serverWorker2.newEndpoint( + new UcpEndpointParams().setConnectionRequest(conRequest.get())); + + // Temporary workaround until new connection establishment protocol in UCX. + for (int i = 0; i < 10; i++) { + serverWorker1.progress(); + serverWorker2.progress(); + clientWorker.progress(); + try { + Thread.sleep(10); + } catch (Exception ignored) { } + } + + UcpRequest sent = serverToClient.sendStreamNonBlocking( + ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE), null); + + // Progress all workers to make sure recv request will complete immediately + for (int i = 0; i < 10; i++) { + serverWorker1.progress(); + serverWorker2.progress(); + clientWorker.progress(); + try { + Thread.sleep(2); + } catch (Exception ignored) { } + } + + UcpRequest recv = clientToServer.recvStreamNonBlocking( + ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE), 0, null); + + while (!sent.isCompleted() || !recv.isCompleted()) { + serverWorker1.progress(); + clientWorker.progress(); + } + + assertEquals(UcpMemoryTest.MEM_SIZE, recv.getRecvSize()); + + Collections.addAll(resources, context2, context1, clientWorker, serverWorker1, + serverWorker2, listener, serverToClient, clientToServer); + closeResources(); + } +} diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpMemoryTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpMemoryTest.java new file mode 100644 index 0000000..01668d0 --- /dev/null +++ b/bindings/java/src/test/java/org/openucx/jucx/UcpMemoryTest.java @@ -0,0 +1,81 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx; + +import org.junit.Test; + +import org.openucx.jucx.ucp.*; + +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.UUID; + +import static java.nio.file.StandardOpenOption.*; +import static org.junit.Assert.*; + +public class UcpMemoryTest { + static int MEM_SIZE = 4096; + static String RANDOM_TEXT = UUID.randomUUID().toString(); + + @Test + public void testMmapFile() throws Exception { + UcpContext context = new UcpContext(new UcpParams().requestTagFeature()); + Path tempFile = Files.createTempFile("jucx", "test"); + // 1. Create FileChannel to file in tmp directory. + FileChannel fileChannel = FileChannel.open(tempFile, CREATE, WRITE, READ, DELETE_ON_CLOSE); + MappedByteBuffer buf = fileChannel.map(FileChannel.MapMode.READ_WRITE, 0, MEM_SIZE); + buf.asCharBuffer().put(RANDOM_TEXT); + buf.force(); + // 2. Register mmap buffer with ODP + UcpMemory mmapedMemory = context.memoryMap(new UcpMemMapParams() + .setAddress(UcxUtils.getAddress(buf)).setLength(MEM_SIZE).nonBlocking()); + + assertEquals(mmapedMemory.getAddress(), UcxUtils.getAddress(buf)); + + // 3. Test allocation + UcpMemory allocatedMemory = context.memoryMap(new UcpMemMapParams() + .allocate().setLength(MEM_SIZE).nonBlocking()); + assertEquals(allocatedMemory.getLength(), MEM_SIZE); + + allocatedMemory.deregister(); + mmapedMemory.deregister(); + fileChannel.close(); + context.close(); + } + + @Test + public void testGetRkey() { + UcpContext context = new UcpContext(new UcpParams().requestRmaFeature()); + ByteBuffer buf = ByteBuffer.allocateDirect(MEM_SIZE); + UcpMemory mem = context.registerMemory(buf); + ByteBuffer rkeyBuffer = mem.getRemoteKeyBuffer(); + assertTrue(rkeyBuffer.capacity() > 0); + assertTrue(mem.getAddress() > 0); + mem.deregister(); + context.close(); + } + + @Test + public void testRemoteKeyUnpack() { + UcpContext context = new UcpContext(new UcpParams().requestRmaFeature()); + UcpWorker worker1 = new UcpWorker(context, new UcpWorkerParams()); + UcpWorker worker2 = new UcpWorker(context, new UcpWorkerParams()); + UcpEndpoint endpoint = new UcpEndpoint(worker1, + new UcpEndpointParams().setUcpAddress(worker2.getAddress())); + ByteBuffer buf = ByteBuffer.allocateDirect(MEM_SIZE); + UcpMemory mem = context.registerMemory(buf); + UcpRemoteKey rkey = endpoint.unpackRemoteKey(mem.getRemoteKeyBuffer()); + assertNotNull(rkey.getNativeId()); + rkey.close(); + mem.deregister(); + endpoint.close(); + worker1.close(); + worker2.close(); + context.close(); + } +} diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpRequestTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpRequestTest.java new file mode 100644 index 0000000..0ac1fc6 --- /dev/null +++ b/bindings/java/src/test/java/org/openucx/jucx/UcpRequestTest.java @@ -0,0 +1,31 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ +package org.openucx.jucx; + +import org.junit.Test; +import org.openucx.jucx.ucp.*; + +import java.nio.ByteBuffer; +import static org.junit.Assert.*; + +public class UcpRequestTest { + @Test + public void testCancelRequest() { + UcpContext context = new UcpContext(new UcpParams().requestTagFeature()); + UcpWorker worker = context.newWorker(new UcpWorkerParams()); + UcpRequest recv = worker.recvTaggedNonBlocking(ByteBuffer.allocateDirect(100), null); + worker.cancelRequest(recv); + + while (!recv.isCompleted()) { + worker.progress(); + } + + assertTrue(recv.isCompleted()); + assertNull(recv.getNativeId()); + + worker.close(); + context.close(); + } +} diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcpWorkerTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcpWorkerTest.java new file mode 100644 index 0000000..81fce4c --- /dev/null +++ b/bindings/java/src/test/java/org/openucx/jucx/UcpWorkerTest.java @@ -0,0 +1,169 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +import org.junit.Test; +import org.openucx.jucx.ucp.*; +import org.openucx.jucx.ucs.UcsConstants; + +import java.nio.ByteBuffer; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.junit.Assert.*; + +public class UcpWorkerTest { + private static int numWorkers = Runtime.getRuntime().availableProcessors(); + + @Test + public void testSingleWorker() { + UcpContext context = new UcpContext(new UcpParams().requestTagFeature()); + assertEquals(2, UcsConstants.ThreadMode.UCS_THREAD_MODE_MULTI); + assertNotEquals(context.getNativeId(), null); + UcpWorker worker = context.newWorker(new UcpWorkerParams()); + assertNotNull(worker.getNativeId()); + assertEquals(0, worker.progress()); // No communications was submitted. + worker.close(); + assertNull(worker.getNativeId()); + context.close(); + } + + @Test + public void testMultipleWorkersWithinSameContext() { + UcpContext context = new UcpContext(new UcpParams().requestTagFeature()); + assertNotEquals(context.getNativeId(), null); + UcpWorker[] workers = new UcpWorker[numWorkers]; + UcpWorkerParams workerParam = new UcpWorkerParams(); + for (int i = 0; i < numWorkers; i++) { + workerParam.clear().setCpu(i).requestThreadSafety(); + workers[i] = context.newWorker(workerParam); + assertNotNull(workers[i].getNativeId()); + } + for (int i = 0; i < numWorkers; i++) { + workers[i].close(); + } + context.close(); + } + + @Test + public void testMultipleWorkersFromMultipleContexts() { + UcpContext tcpContext = new UcpContext(new UcpParams().requestTagFeature()); + UcpContext rdmaContext = new UcpContext(new UcpParams().requestRmaFeature() + .requestAtomic64BitFeature().requestAtomic32BitFeature()); + UcpWorker[] workers = new UcpWorker[numWorkers]; + UcpWorkerParams workerParams = new UcpWorkerParams(); + for (int i = 0; i < numWorkers; i++) { + ByteBuffer userData = ByteBuffer.allocateDirect(100); + workerParams.clear(); + if (i % 2 == 0) { + userData.asCharBuffer().put("TCPWorker" + i); + workerParams.requestWakeupRX().setUserData(userData); + workers[i] = tcpContext.newWorker(workerParams); + } else { + userData.asCharBuffer().put("RDMAWorker" + i); + workerParams.requestWakeupRMA().setCpu(i).setUserData(userData) + .requestThreadSafety(); + workers[i] = rdmaContext.newWorker(workerParams); + } + } + for (int i = 0; i < numWorkers; i++) { + workers[i].close(); + } + tcpContext.close(); + rdmaContext.close(); + } + + @Test + public void testGetWorkerAddress() { + UcpContext context = new UcpContext(new UcpParams().requestTagFeature()); + UcpWorker worker = context.newWorker(new UcpWorkerParams()); + ByteBuffer workerAddress = worker.getAddress(); + assertNotNull(workerAddress); + assertTrue(workerAddress.capacity() > 0); + worker.close(); + context.close(); + } + + @Test + public void testWorkerSleepWakeup() throws InterruptedException { + UcpContext context = new UcpContext(new UcpParams() + .requestRmaFeature().requestWakeupFeature()); + UcpWorker worker = context.newWorker( + new UcpWorkerParams().requestWakeupRMA()); + + AtomicBoolean success = new AtomicBoolean(false); + Thread workerProgressThread = new Thread() { + @Override + public void run() { + while (!isInterrupted()) { + if (worker.progress() == 0) { + worker.waitForEvents(); + } + } + success.set(true); + } + }; + + workerProgressThread.start(); + + workerProgressThread.interrupt(); + worker.signal(); + + workerProgressThread.join(); + assertTrue(success.get()); + + worker.close(); + context.close(); + } + + @Test + public void testFlushWorker() { + int numRequests = 10; + // Crerate 2 contexts + 2 workers + UcpParams params = new UcpParams().requestRmaFeature(); + UcpWorkerParams rdmaWorkerParams = new UcpWorkerParams().requestWakeupRMA(); + UcpContext context1 = new UcpContext(params); + UcpContext context2 = new UcpContext(params); + + ByteBuffer src = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + ByteBuffer dst = ByteBuffer.allocateDirect(UcpMemoryTest.MEM_SIZE); + dst.asCharBuffer().put(UcpMemoryTest.RANDOM_TEXT); + UcpMemory memory = context2.registerMemory(src); + + UcpWorker worker1 = context1.newWorker(rdmaWorkerParams); + UcpWorker worker2 = context2.newWorker(rdmaWorkerParams); + + UcpEndpoint ep = worker1.newEndpoint( new UcpEndpointParams() + .setUcpAddress(worker2.getAddress()).setPeerErrorHadnlingMode()); + UcpRemoteKey rkey = ep.unpackRemoteKey(memory.getRemoteKeyBuffer()); + + int blockSize = UcpMemoryTest.MEM_SIZE / numRequests; + for (int i = 0; i < numRequests; i++) { + ep.putNonBlockingImplicit(UcxUtils.getAddress(dst) + i * blockSize, + blockSize, memory.getAddress() + i * blockSize, rkey); + } + + UcpRequest request = worker1.flushNonBlocking(new UcxCallback() { + @Override + public void onSuccess(UcpRequest request) { + rkey.close(); + memory.deregister(); + assertEquals(dst.asCharBuffer().toString().trim(), UcpMemoryTest.RANDOM_TEXT); + } + }); + + while (!request.isCompleted()) { + worker1.progress(); + worker2.progress(); + } + + assertTrue(request.isCompleted()); + ep.close(); + worker1.close(); + worker2.close(); + context1.close(); + context2.close(); + } +} diff --git a/bindings/java/src/test/java/org/openucx/jucx/UcxTest.java b/bindings/java/src/test/java/org/openucx/jucx/UcxTest.java new file mode 100644 index 0000000..5d40f2d --- /dev/null +++ b/bindings/java/src/test/java/org/openucx/jucx/UcxTest.java @@ -0,0 +1,25 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +package org.openucx.jucx; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Stack; + +abstract class UcxTest { + // Stack of closable resources (context, worker, etc.) to be closed at the end. + protected static Stack resources = new Stack<>(); + + protected void closeResources() { + while (!resources.empty()) { + try { + resources.pop().close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } +} diff --git a/compile b/compile new file mode 100755 index 0000000..99e5052 --- /dev/null +++ b/compile @@ -0,0 +1,348 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. +# Written by Tom Tromey . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ + icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/config.guess b/config.guess new file mode 100755 index 0000000..b33c9e8 --- /dev/null +++ b/config.guess @@ -0,0 +1,1486 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2018 Free Software Foundation, Inc. + +timestamp='2018-08-29' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# +# Please send patches to . + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2018 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +tmp= +# shellcheck disable=SC2172 +trap 'test -z "$tmp" || rm -fr "$tmp"' 1 2 13 15 +trap 'exitcode=$?; test -z "$tmp" || rm -fr "$tmp"; exit $exitcode' 0 + +set_cc_for_build() { + : "${TMPDIR=/tmp}" + # shellcheck disable=SC2039 + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } + dummy=$tmp/dummy + case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in + ,,) echo "int x;" > "$dummy.c" + for driver in cc gcc c89 c99 ; do + if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then + CC_FOR_BUILD="$driver" + break + fi + done + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; + esac +} + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if test -f /.attbin/uname ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case "$UNAME_SYSTEM" in +Linux|GNU|GNU/*) + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + LIBC=gnu + + set_cc_for_build + cat <<-EOF > "$dummy.c" + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #else + LIBC=gnu + #endif + EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`" + + # If ldd exists, use it to detect musl libc. + if command -v ldd >/dev/null && \ + ldd --version 2>&1 | grep -q ^musl + then + LIBC=musl + fi + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + "/sbin/$sysctl" 2>/dev/null || \ + "/usr/sbin/$sysctl" 2>/dev/null || \ + echo unknown)` + case "$UNAME_MACHINE_ARCH" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` + machine="${arch}${endian}"-unknown + ;; + *) machine="$UNAME_MACHINE_ARCH"-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently (or will in the future) and ABI. + case "$UNAME_MACHINE_ARCH" in + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # Determine ABI tags. + case "$UNAME_MACHINE_ARCH" in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "$UNAME_VERSION" in + Debian*) + release='-gnu' + ;; + *) + release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "$machine-${os}${release}${abi-}" + exit ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE" + exit ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE" + exit ;; + *:MidnightBSD:*:*) + echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE" + exit ;; + *:ekkoBSD:*:*) + echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE" + exit ;; + *:SolidBSD:*:*) + echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE" + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:MirBSD:*:*) + echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE" + exit ;; + *:Sortix:*:*) + echo "$UNAME_MACHINE"-unknown-sortix + exit ;; + *:Redox:*:*) + echo "$UNAME_MACHINE"-unknown-redox + exit ;; + mips:OSF1:*.*) + echo mips-dec-osf1 + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE=alpha ;; + "EV4.5 (21064)") + UNAME_MACHINE=alpha ;; + "LCA4 (21066/21068)") + UNAME_MACHINE=alpha ;; + "EV5 (21164)") + UNAME_MACHINE=alphaev5 ;; + "EV5.6 (21164A)") + UNAME_MACHINE=alphaev56 ;; + "EV5.6 (21164PC)") + UNAME_MACHINE=alphapca56 ;; + "EV5.7 (21164PC)") + UNAME_MACHINE=alphapca57 ;; + "EV6 (21264)") + UNAME_MACHINE=alphaev6 ;; + "EV6.7 (21264A)") + UNAME_MACHINE=alphaev67 ;; + "EV6.8CB (21264C)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8AL (21264B)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8CX (21264D)") + UNAME_MACHINE=alphaev68 ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE=alphaev69 ;; + "EV7 (21364)") + UNAME_MACHINE=alphaev7 ;; + "EV7.9 (21364A)") + UNAME_MACHINE=alphaev79 ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo "$UNAME_MACHINE"-dec-osf"`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`" + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo "$UNAME_MACHINE"-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix"$UNAME_RELEASE" + exit ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + s390x:SunOS:*:*) + echo "$UNAME_MACHINE"-ibm-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2"`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux"$UNAME_RELEASE" + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + UNAME_REL="`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`" + case `isainfo -b` in + 32) + echo i386-pc-solaris2"$UNAME_REL" + ;; + 64) + echo x86_64-pc-solaris2"$UNAME_REL" + ;; + esac + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos"`echo "$UNAME_RELEASE"|sed -e 's/-/_/'`" + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos"$UNAME_RELEASE" + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos"$UNAME_RELEASE" + ;; + sun4) + echo sparc-sun-sunos"$UNAME_RELEASE" + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos"$UNAME_RELEASE" + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint"$UNAME_RELEASE" + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint"$UNAME_RELEASE" + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint"$UNAME_RELEASE" + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint"$UNAME_RELEASE" + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten"$UNAME_RELEASE" + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten"$UNAME_RELEASE" + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix"$UNAME_RELEASE" + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix"$UNAME_RELEASE" + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix"$UNAME_RELEASE" + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`"$dummy" "$dummyarg"` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos"$UNAME_RELEASE" + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ "$UNAME_PROCESSOR" = mc88100 ] || [ "$UNAME_PROCESSOR" = mc88110 ] + then + if [ "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx ] || \ + [ "$TARGET_BINARY_INTERFACE"x = x ] + then + echo m88k-dg-dgux"$UNAME_RELEASE" + else + echo m88k-dg-dguxbcs"$UNAME_RELEASE" + fi + else + echo i586-dg-dgux"$UNAME_RELEASE" + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix"`echo "$UNAME_RELEASE"|sed -e 's/-/_/g'`" + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/lslpp ] ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + else + IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" + fi + echo "$IBM_ARCH"-ibm-aix"$IBM_REV" + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + case "$UNAME_MACHINE" in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "$sc_cpu_version" in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "$sc_kernel_bits" in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "$HP_ARCH" = "" ]; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ "$HP_ARCH" = hppa2.0w ] + then + set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH=hppa2.0w + else + HP_ARCH=hppa64 + fi + fi + echo "$HP_ARCH"-hp-hpux"$HPUX_REV" + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux"$HPUX_REV" + exit ;; + 3050*:HI-UX:*:*) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo "$UNAME_MACHINE"-unknown-osf1mk + else + echo "$UNAME_MACHINE"-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE" + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi"$UNAME_RELEASE" + exit ;; + *:BSD/OS:*:*) + echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE" + exit ;; + arm:FreeBSD:*:*) + UNAME_PROCESSOR=`uname -p` + set_cc_for_build + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabi + else + echo "${UNAME_PROCESSOR}"-unknown-freebsd"`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`"-gnueabihf + fi + exit ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case "$UNAME_PROCESSOR" in + amd64) + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; + esac + echo "$UNAME_PROCESSOR"-unknown-freebsd"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + i*:CYGWIN*:*) + echo "$UNAME_MACHINE"-pc-cygwin + exit ;; + *:MINGW64*:*) + echo "$UNAME_MACHINE"-pc-mingw64 + exit ;; + *:MINGW*:*) + echo "$UNAME_MACHINE"-pc-mingw32 + exit ;; + *:MSYS*:*) + echo "$UNAME_MACHINE"-pc-msys + exit ;; + i*:PW*:*) + echo "$UNAME_MACHINE"-pc-pw32 + exit ;; + *:Interix*:*) + case "$UNAME_MACHINE" in + x86) + echo i586-pc-interix"$UNAME_RELEASE" + exit ;; + authenticamd | genuineintel | EM64T) + echo x86_64-unknown-interix"$UNAME_RELEASE" + exit ;; + IA64) + echo ia64-unknown-interix"$UNAME_RELEASE" + exit ;; + esac ;; + i*:UWIN*:*) + echo "$UNAME_MACHINE"-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2"`echo "$UNAME_RELEASE"|sed -e 's/[^.]*//'`" + exit ;; + *:GNU:*:*) + # the GNU system + echo "`echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,'`-unknown-$LIBC`echo "$UNAME_RELEASE"|sed -e 's,/.*$,,'`" + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo "$UNAME_MACHINE-unknown-`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`-$LIBC" + exit ;; + *:Minix:*:*) + echo "$UNAME_MACHINE"-unknown-minix + exit ;; + aarch64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arc:Linux:*:* | arceb:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + arm*:Linux:*:*) + set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi + else + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf + fi + fi + exit ;; + avr32*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + cris:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + crisv32:Linux:*:*) + echo "$UNAME_MACHINE"-axis-linux-"$LIBC" + exit ;; + e2k:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + frv:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + hexagon:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + ia64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + k1om:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m32r*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + m68*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #undef CPU + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=${UNAME_MACHINE}el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=${UNAME_MACHINE} + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU'`" + test "x$CPU" != x && { echo "$CPU-unknown-linux-$LIBC"; exit; } + ;; + mips64el:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + openrisc*:Linux:*:*) + echo or1k-unknown-linux-"$LIBC" + exit ;; + or32:Linux:*:* | or1k*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-"$LIBC" + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-"$LIBC" + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;; + PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;; + *) echo hppa-unknown-linux-"$LIBC" ;; + esac + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-"$LIBC" + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-"$LIBC" + exit ;; + ppc64le:Linux:*:*) + echo powerpc64le-unknown-linux-"$LIBC" + exit ;; + ppcle:Linux:*:*) + echo powerpcle-unknown-linux-"$LIBC" + exit ;; + riscv32:Linux:*:* | riscv64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo "$UNAME_MACHINE"-ibm-linux-"$LIBC" + exit ;; + sh64*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sh*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + tile*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + vax:Linux:*:*) + echo "$UNAME_MACHINE"-dec-linux-"$LIBC" + exit ;; + x86_64:Linux:*:*) + echo "$UNAME_MACHINE"-pc-linux-"$LIBC" + exit ;; + xtensa*:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION" + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo "$UNAME_MACHINE"-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo "$UNAME_MACHINE"-unknown-stop + exit ;; + i*86:atheos:*:*) + echo "$UNAME_MACHINE"-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo "$UNAME_MACHINE"-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + echo i386-unknown-lynxos"$UNAME_RELEASE" + exit ;; + i*86:*DOS:*:*) + echo "$UNAME_MACHINE"-pc-msdosdjgpp + exit ;; + i*86:*:4.*:*) + UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL" + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}{$UNAME_VERSION}" + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL" + else + echo "$UNAME_MACHINE"-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configure will decide that + # this is a cross-build. + echo i586-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos"$UNAME_RELEASE" + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos"$UNAME_RELEASE" + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + echo powerpc-unknown-lynxos"$UNAME_RELEASE" + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv"$UNAME_RELEASE" + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo "$UNAME_MACHINE"-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo "$UNAME_MACHINE"-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux"$UNAME_RELEASE" + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv"$UNAME_RELEASE" + else + echo mips-unknown-sysv"$UNAME_RELEASE" + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux"$UNAME_RELEASE" + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux"$UNAME_RELEASE" + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux"$UNAME_RELEASE" + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux"$UNAME_RELEASE" + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux"$UNAME_RELEASE" + exit ;; + SX-ACE:SUPER-UX:*:*) + echo sxace-nec-superux"$UNAME_RELEASE" + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Rhapsody:*:*) + echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE" + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + set_cc_for_build + if test "$UNAME_PROCESSOR" = unknown ; then + UNAME_PROCESSOR=powerpc + fi + if test "`echo "$UNAME_RELEASE" | sed -e 's/\..*//'`" -le 10 ; then + if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # Avoid executing cc on OS X 10.9, as it ships with a stub + # that puts up a graphical alert prompting to install + # developer tools. Any system running Mac OS X 10.7 or + # later (Darwin 11 and later) is required to have a 64-bit + # processor. This is not true of the ARM version of Darwin + # that Apple uses in portable devices. + UNAME_PROCESSOR=x86_64 + fi + echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE" + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = x86; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE" + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NEO-*:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSE-*:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSR-*:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSV-*:NONSTOP_KERNEL:*:*) + echo nsv-tandem-nsk"$UNAME_RELEASE" + exit ;; + NSX-*:NONSTOP_KERNEL:*:*) + echo nsx-tandem-nsk"$UNAME_RELEASE" + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE" + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + # shellcheck disable=SC2154 + if test "$cputype" = 386; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo "$UNAME_MACHINE"-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux"$UNAME_RELEASE" + exit ;; + *:DragonFly:*:*) + echo "$UNAME_MACHINE"-unknown-dragonfly"`echo "$UNAME_RELEASE"|sed -e 's/[-(].*//'`" + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "$UNAME_MACHINE" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo "$UNAME_MACHINE"-pc-skyos"`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`" + exit ;; + i*86:rdos:*:*) + echo "$UNAME_MACHINE"-pc-rdos + exit ;; + i*86:AROS:*:*) + echo "$UNAME_MACHINE"-pc-aros + exit ;; + x86_64:VMkernel:*:*) + echo "$UNAME_MACHINE"-unknown-esx + exit ;; + amd64:Isilon\ OneFS:*:*) + echo x86_64-unknown-onefs + exit ;; +esac + +echo "$0: unable to guess system type" >&2 + +case "$UNAME_MACHINE:$UNAME_SYSTEM" in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 </dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/config.h.in b/config.h.in new file mode 100644 index 0000000..e6e035e --- /dev/null +++ b/config.h.in @@ -0,0 +1,942 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + + +#ifndef UCX_CONFIG_H +#define UCX_CONFIG_H + + +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ +#undef CRAY_STACKSEG_END + +/* Define to 1 if using `alloca.c'. */ +#undef C_ALLOCA + +/* Enable assertions */ +#undef ENABLE_ASSERT + +/* Enable builtin memcpy */ +#undef ENABLE_BUILTIN_MEMCPY + +/* Enable collecting data */ +#undef ENABLE_DEBUG_DATA + +/* Enable fault injection code */ +#undef ENABLE_FAULT_INJECTION + +/* Enable memory tracking */ +#undef ENABLE_MEMTRACK + +/* Enable thread support in UCP and UCT */ +#undef ENABLE_MT + +/* Enable checking user parameters */ +#undef ENABLE_PARAMS_CHECK + +/* Enable statistics */ +#undef ENABLE_STATS + +/* Enable symbol override */ +#undef ENABLE_SYMBOL_OVERRIDE + +/* Enable tuning */ +#undef ENABLE_TUNING + +/* Huawei Kunpeng 920 */ +#undef HAVE_AARCH64_HI1620 + +/* Cavium ThunderX1 */ +#undef HAVE_AARCH64_THUNDERX1 + +/* Cavium ThunderX2 */ +#undef HAVE_AARCH64_THUNDERX2 + +/* Define to 1 if you have `alloca', as a function or macro. */ +#undef HAVE_ALLOCA + +/* Define to 1 if you have and it should be used (not on Ultrix). + */ +#undef HAVE_ALLOCA_H + +/* Check attribute [optimize] */ +#undef HAVE_ATTRIBUTE_NOOPTIMIZE + +/* user defined cache line size */ +#undef HAVE_CACHE_LINE_SIZE + +/* Define to 1 if the system has the type `cap_user_data_t'. */ +#undef HAVE_CAP_USER_DATA_T + +/* Define to 1 if the system has the type `cap_user_header_t'. */ +#undef HAVE_CAP_USER_HEADER_T + +/* Define to 1 if you have the `clearenv' function. */ +#undef HAVE_CLEARENV + +/* Define to 1 if you have the `cplus_demangle' function. */ +#undef HAVE_CPLUS_DEMANGLE + +/* Define to 1 if you have the `cpuset_getaffinity' function. */ +#undef HAVE_CPUSET_GETAFFINITY + +/* Define to 1 if you have the `cpuset_setaffinity' function. */ +#undef HAVE_CPUSET_SETAFFINITY + +/* Define to 1 if the system has the type `cpuset_t'. */ +#undef HAVE_CPUSET_T + +/* Define to 1 if the system has the type `cpu_set_t'. */ +#undef HAVE_CPU_SET_T + +/* Enable CUDA support */ +#undef HAVE_CUDA + +/* Define to 1 if you have the header file. */ +#undef HAVE_CUDA_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_CUDA_RUNTIME_H + +/* DC DV support */ +#undef HAVE_DC_DV + +/* DC EXP support */ +#undef HAVE_DC_EXP + +/* Define to 1 if you have the declaration of `asprintf', and to 0 if you + don't. */ +#undef HAVE_DECL_ASPRINTF + +/* Define to 1 if you have the declaration of `basename', and to 0 if you + don't. */ +#undef HAVE_DECL_BASENAME + +/* Define to 1 if you have the declaration of `CPU_ISSET', and to 0 if you + don't. */ +#undef HAVE_DECL_CPU_ISSET + +/* Define to 1 if you have the declaration of `CPU_ZERO', and to 0 if you + don't. */ +#undef HAVE_DECL_CPU_ZERO + +/* Define to 1 if you have the declaration of `ethtool_cmd_speed', and to 0 if + you don't. */ +#undef HAVE_DECL_ETHTOOL_CMD_SPEED + +/* Define to 1 if you have the declaration of `fmemopen', and to 0 if you + don't. */ +#undef HAVE_DECL_FMEMOPEN + +/* Define to 1 if you have the declaration of `F_SETOWN_EX', and to 0 if you + don't. */ +#undef HAVE_DECL_F_SETOWN_EX + +/* Define to 1 if you have the declaration of `gdr_copy_to_mapping', and to 0 + if you don't. */ +#undef HAVE_DECL_GDR_COPY_TO_MAPPING + +/* Define to 1 if you have the declaration of `IBV_ACCESS_ON_DEMAND', and to 0 + if you don't. */ +#undef HAVE_DECL_IBV_ACCESS_ON_DEMAND + +/* Define to 1 if you have the declaration of `ibv_advise_mr', and to 0 if you + don't. */ +#undef HAVE_DECL_IBV_ADVISE_MR + +/* Define to 1 if you have the declaration of `ibv_alloc_dm', and to 0 if you + don't. */ +#undef HAVE_DECL_IBV_ALLOC_DM + +/* Define to 1 if you have the declaration of `ibv_alloc_td', and to 0 if you + don't. */ +#undef HAVE_DECL_IBV_ALLOC_TD + +/* Define to 1 if you have the declaration of `ibv_cmd_modify_qp', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_CMD_MODIFY_QP + +/* Define to 1 if you have the declaration of + `IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN + +/* Define to 1 if you have the declaration of `ibv_create_qp_ex', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_CREATE_QP_EX + +/* Define to 1 if you have the declaration of `ibv_create_srq', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_CREATE_SRQ + +/* Define to 1 if you have the declaration of `ibv_create_srq_ex', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_CREATE_SRQ_EX + +/* Define to 1 if you have the declaration of `IBV_EVENT_GID_CHANGE', and to 0 + if you don't. */ +#undef HAVE_DECL_IBV_EVENT_GID_CHANGE + +/* Define to 1 if you have the declaration of `ibv_event_type_str', and to 0 + if you don't. */ +#undef HAVE_DECL_IBV_EVENT_TYPE_STR + +/* Define to 1 if you have the declaration of `IBV_EXP_ACCESS_ALLOCATE_MR', + and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_ACCESS_ALLOCATE_MR + +/* Define to 1 if you have the declaration of `IBV_EXP_ACCESS_ON_DEMAND', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_ACCESS_ON_DEMAND + +/* Define to 1 if you have the declaration of `ibv_exp_alloc_dm', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_EXP_ALLOC_DM + +/* Define to 1 if you have the declaration of `IBV_EXP_ATOMIC_HCA_REPLY_BE', + and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE + +/* Define to 1 if you have the declaration of `IBV_EXP_CQ_IGNORE_OVERRUN', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_CQ_IGNORE_OVERRUN + +/* Define to 1 if you have the declaration of `IBV_EXP_CQ_MODERATION', and to + 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_CQ_MODERATION + +/* Define to 1 if you have the declaration of `ibv_exp_create_qp', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_EXP_CREATE_QP + +/* Define to 1 if you have the declaration of `ibv_exp_create_res_domain', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_CREATE_RES_DOMAIN + +/* Define to 1 if you have the declaration of `ibv_exp_create_srq', and to 0 + if you don't. */ +#undef HAVE_DECL_IBV_EXP_CREATE_SRQ + +/* Define to 1 if you have the declaration of + `IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT + +/* Define to 1 if you have the declaration of `ibv_exp_destroy_res_domain', + and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_DESTROY_RES_DOMAIN + +/* Define to 1 if you have the declaration of + `IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS + +/* Define to 1 if you have the declaration of + `IBV_EXP_DEVICE_ATTR_RESERVED_2', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_DEVICE_ATTR_RESERVED_2 + +/* Define to 1 if you have the declaration of `IBV_EXP_DEVICE_DC_TRANSPORT', + and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_DEVICE_DC_TRANSPORT + +/* Define to 1 if you have the declaration of `IBV_EXP_DEVICE_MR_ALLOCATE', + and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_DEVICE_MR_ALLOCATE + +/* Define to 1 if you have the declaration of `IBV_EXP_MR_FIXED_BUFFER_SIZE', + and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_MR_FIXED_BUFFER_SIZE + +/* Define to 1 if you have the declaration of `IBV_EXP_MR_INDIRECT_KLMS', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_MR_INDIRECT_KLMS + +/* Define to 1 if you have the declaration of `IBV_EXP_ODP_SUPPORT_IMPLICIT', + and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_ODP_SUPPORT_IMPLICIT + +/* Define to 1 if you have the declaration of `ibv_exp_post_send', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_EXP_POST_SEND + +/* Define to 1 if you have the declaration of `ibv_exp_prefetch_mr', and to 0 + if you don't. */ +#undef HAVE_DECL_IBV_EXP_PREFETCH_MR + +/* Define to 1 if you have the declaration of `IBV_EXP_PREFETCH_WRITE_ACCESS', + and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_PREFETCH_WRITE_ACCESS + +/* Define to 1 if you have the declaration of `IBV_EXP_QPT_DC_INI', and to 0 + if you don't. */ +#undef HAVE_DECL_IBV_EXP_QPT_DC_INI + +/* Define to 1 if you have the declaration of `IBV_EXP_QP_CREATE_UMR', and to + 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_QP_CREATE_UMR + +/* Define to 1 if you have the declaration of + `IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG + +/* Define to 1 if you have the declaration of + `IBV_EXP_QP_INIT_ATTR_RES_DOMAIN', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_QP_INIT_ATTR_RES_DOMAIN + +/* Define to 1 if you have the declaration of + `IBV_EXP_QP_OOO_RW_DATA_PLACEMENT', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT + +/* Define to 1 if you have the declaration of `ibv_exp_query_device', and to 0 + if you don't. */ +#undef HAVE_DECL_IBV_EXP_QUERY_DEVICE + +/* Define to 1 if you have the declaration of `ibv_exp_query_gid_attr', and to + 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_QUERY_GID_ATTR + +/* Define to 1 if you have the declaration of `ibv_exp_reg_mr', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_EXP_REG_MR + +/* Define to 1 if you have the declaration of + `IBV_EXP_RES_DOMAIN_THREAD_MODEL', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_RES_DOMAIN_THREAD_MODEL + +/* Define to 1 if you have the declaration of + `IBV_EXP_SEND_EXT_ATOMIC_INLINE', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_SEND_EXT_ATOMIC_INLINE + +/* Define to 1 if you have the declaration of `ibv_exp_setenv', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_EXP_SETENV + +/* Define to 1 if you have the declaration of + `IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP + +/* Define to 1 if you have the declaration of + `IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD', and to 0 if you don't. */ +#undef HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD + +/* Define to 1 if you have the declaration of `IBV_EXP_WR_NOP', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_EXP_WR_NOP + +/* Define to 1 if you have the declaration of `ibv_get_async_event', and to 0 + if you don't. */ +#undef HAVE_DECL_IBV_GET_ASYNC_EVENT + +/* Define to 1 if you have the declaration of `ibv_get_device_name', and to 0 + if you don't. */ +#undef HAVE_DECL_IBV_GET_DEVICE_NAME + +/* Define to 1 if you have the declaration of `IBV_LINK_LAYER_ETHERNET', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_LINK_LAYER_ETHERNET + +/* Define to 1 if you have the declaration of `IBV_LINK_LAYER_INFINIBAND', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_LINK_LAYER_INFINIBAND + +/* Define to 1 if you have the declaration of `ibv_mlx5_exp_get_cq_info', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_MLX5_EXP_GET_CQ_INFO + +/* Define to 1 if you have the declaration of `ibv_mlx5_exp_get_qp_info', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_MLX5_EXP_GET_QP_INFO + +/* Define to 1 if you have the declaration of `ibv_mlx5_exp_get_srq_info', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_MLX5_EXP_GET_SRQ_INFO + +/* Define to 1 if you have the declaration of `ibv_mlx5_exp_update_cq_ci', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_MLX5_EXP_UPDATE_CQ_CI + +/* Define to 1 if you have the declaration of `IBV_ODP_SUPPORT_IMPLICIT', and + to 0 if you don't. */ +#undef HAVE_DECL_IBV_ODP_SUPPORT_IMPLICIT + +/* have upstream ibv_query_device_ex */ +#undef HAVE_DECL_IBV_QUERY_DEVICE_EX + +/* Define to 1 if you have the declaration of `ibv_query_gid', and to 0 if you + don't. */ +#undef HAVE_DECL_IBV_QUERY_GID + +/* Define to 1 if you have the declaration of `ibv_wc_status_str', and to 0 if + you don't. */ +#undef HAVE_DECL_IBV_WC_STATUS_STR + +/* Define to 1 if you have the declaration of `MADV_FREE', and to 0 if you + don't. */ +#undef HAVE_DECL_MADV_FREE + +/* Define to 1 if you have the declaration of `MADV_REMOVE', and to 0 if you + don't. */ +#undef HAVE_DECL_MADV_REMOVE + +/* Define to 1 if you have the declaration of + `MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE', and to 0 if you don't. */ +#undef HAVE_DECL_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE + +/* Define to 1 if you have the declaration of `mlx5dv_create_qp', and to 0 if + you don't. */ +#undef HAVE_DECL_MLX5DV_CREATE_QP + +/* Define to 1 if you have the declaration of `MLX5DV_DCTYPE_DCT', and to 0 if + you don't. */ +#undef HAVE_DECL_MLX5DV_DCTYPE_DCT + +/* Define to 1 if you have the declaration of `mlx5dv_init_obj', and to 0 if + you don't. */ +#undef HAVE_DECL_MLX5DV_INIT_OBJ + +/* Define to 1 if you have the declaration of `mlx5dv_is_supported', and to 0 + if you don't. */ +#undef HAVE_DECL_MLX5DV_IS_SUPPORTED + +/* Define to 1 if you have the declaration of `MLX5DV_OBJ_AH', and to 0 if you + don't. */ +#undef HAVE_DECL_MLX5DV_OBJ_AH + +/* Define to 1 if you have the declaration of + `MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE', and to 0 if you don't. */ +#undef HAVE_DECL_MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE + +/* Define to 1 if you have the declaration of `MLX5_WQE_CTRL_SOLICITED', and + to 0 if you don't. */ +#undef HAVE_DECL_MLX5_WQE_CTRL_SOLICITED + +/* Define to 1 if you have the declaration of `POSIX_MADV_DONTNEED', and to 0 + if you don't. */ +#undef HAVE_DECL_POSIX_MADV_DONTNEED + +/* Define to 1 if you have the declaration of `PR_SET_PTRACER', and to 0 if + you don't. */ +#undef HAVE_DECL_PR_SET_PTRACER + +/* Define to 1 if you have the declaration of `rdma_establish', and to 0 if + you don't. */ +#undef HAVE_DECL_RDMA_ESTABLISH + +/* Define to 1 if you have the declaration of `rdma_init_qp_attr', and to 0 if + you don't. */ +#undef HAVE_DECL_RDMA_INIT_QP_ATTR + +/* Define to 1 if you have the declaration of `SPEED_UNKNOWN', and to 0 if you + don't. */ +#undef HAVE_DECL_SPEED_UNKNOWN + +/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you + don't. */ +#undef HAVE_DECL_STRERROR_R + +/* Define to 1 if you have the declaration of `SYS_brk', and to 0 if you + don't. */ +#undef HAVE_DECL_SYS_BRK + +/* Define to 1 if you have the declaration of `SYS_ipc', and to 0 if you + don't. */ +#undef HAVE_DECL_SYS_IPC + +/* Define to 1 if you have the declaration of `SYS_madvise', and to 0 if you + don't. */ +#undef HAVE_DECL_SYS_MADVISE + +/* Define to 1 if you have the declaration of `SYS_mmap', and to 0 if you + don't. */ +#undef HAVE_DECL_SYS_MMAP + +/* Define to 1 if you have the declaration of `SYS_mremap', and to 0 if you + don't. */ +#undef HAVE_DECL_SYS_MREMAP + +/* Define to 1 if you have the declaration of `SYS_munmap', and to 0 if you + don't. */ +#undef HAVE_DECL_SYS_MUNMAP + +/* Define to 1 if you have the declaration of `SYS_shmat', and to 0 if you + don't. */ +#undef HAVE_DECL_SYS_SHMAT + +/* Define to 1 if you have the declaration of `SYS_shmdt', and to 0 if you + don't. */ +#undef HAVE_DECL_SYS_SHMDT + +/* Define to 1 if you have the declaration of `__ppc_get_timebase_freq', and + to 0 if you don't. */ +#undef HAVE_DECL___PPC_GET_TIMEBASE_FREQ + +/* Enable detailed backtrace */ +#undef HAVE_DETAILED_BACKTRACE + +/* DEVX support */ +#undef HAVE_DEVX + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_ENDIAN_H_ + +/* IB UMR support */ +#undef HAVE_EXP_UMR + +/* IB UMR KSM support */ +#undef HAVE_EXP_UMR_KSM + +/* Define to 1 if you have the header file. */ +#undef HAVE_GDRAPI_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_HIP_RUNTIME_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_HSA_EXT_AMD_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_HSA_H + +/* high-resolution hardware timer disabled */ +#undef HAVE_HW_TIMER + +/* IB support */ +#undef HAVE_IB + +/* Device Memory support */ +#undef HAVE_IBV_DM + +/* Device Memory support (EXP) */ +#undef HAVE_IBV_EXP_DM + +/* IB QP Create UMR support */ +#undef HAVE_IBV_EXP_QP_CREATE_UMR + +/* Support UMR max caps v2 */ +#undef HAVE_IBV_EXP_QP_CREATE_UMR_CAPS + +/* IB resource domain */ +#undef HAVE_IBV_EXP_RES_DOMAIN + +/* IB extended atomics support */ +#undef HAVE_IB_EXT_ATOMICS + +/* struct in6_addr has s6_addr32 member */ +#undef HAVE_IN6_ADDR_S6_ADDR32 + +/* struct in6_addr is BSD-style */ +#undef HAVE_IN6_ADDR_U6_ADDR32 + +/* Define to 1 if you have the header file. */ +#undef HAVE_INFINIBAND_MLX5DV_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_INFINIBAND_MLX5_HW_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_INFINIBAND_TM_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* struct iphdr has daddr member */ +#undef HAVE_IPHDR_DADDR + +/* struct ip has ip_dst member */ +#undef HAVE_IP_IP_DST + +/* Define to 1 if you have the header file. */ +#undef HAVE_JNI_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_JNI_MD_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIBGEN_H + +/* Define to 1 if you have the `rt' library (-lrt). */ +#undef HAVE_LIBRT + +/* Define to 1 if you have the header file. */ +#undef HAVE_LINUX_FUTEX_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LINUX_IP_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LINUX_MMAN_H + +/* Define to 1 if you have the `malloc_get_state' function. */ +#undef HAVE_MALLOC_GET_STATE + +/* Define to 1 if you have the header file. */ +#undef HAVE_MALLOC_H + +/* malloc hooks support */ +#undef HAVE_MALLOC_HOOK + +/* Define to 1 if you have the header file. */ +#undef HAVE_MALLOC_NP_H + +/* Define to 1 if you have the `malloc_set_state' function. */ +#undef HAVE_MALLOC_SET_STATE + +/* Define to 1 if you have the `malloc_trim' function. */ +#undef HAVE_MALLOC_TRIM + +/* have masked atomic endianness */ +#undef HAVE_MASKED_ATOMICS_ENDIANNESS + +/* Define to 1 if you have the `memalign' function. */ +#undef HAVE_MEMALIGN + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* mlx5 bare-metal support */ +#undef HAVE_MLX5_HW + +/* mlx5 UD bare-metal support */ +#undef HAVE_MLX5_HW_UD + +/* MPI support */ +#undef HAVE_MPI + +/* Define to 1 if you have the `mremap' function. */ +#undef HAVE_MREMAP + +/* Define to 1 if you have the header file. */ +#undef HAVE_NETINET_IP_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NET_ETHERNET_H + +/* Define to 1 to enable NUMA support */ +#undef HAVE_NUMA + +/* Define to 1 if you have the header file. */ +#undef HAVE_NUMAIF_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_NUMA_H + +/* ODP support */ +#undef HAVE_ODP + +/* Implicit ODP support */ +#undef HAVE_ODP_IMPLICIT + +/* Define to 1 if you have the `posix_memalign' function. */ +#undef HAVE_POSIX_MEMALIGN + +/* Prefetch support */ +#undef HAVE_PREFETCH + +/* Enable profiling */ +#undef HAVE_PROFILING + +/* Define to 1 if you have the header file. */ +#undef HAVE_PTHREAD_NP_H + +/* RDMACM QP less support */ +#undef HAVE_RDMACM_QP_LESS + +/* RTE support */ +#undef HAVE_RTE + +/* Define to 1 if you have the `sched_getaffinity' function. */ +#undef HAVE_SCHED_GETAFFINITY + +/* Define to 1 if you have the `sched_setaffinity' function. */ +#undef HAVE_SCHED_SETAFFINITY + +/* struct sigaction has sa_restorer member */ +#undef HAVE_SIGACTION_SA_RESTORER + +/* struct sigevent has sigev_notify_thread_id */ +#undef HAVE_SIGEVENT_SIGEV_NOTIFY_THREAD_ID + +/* struct sigevent has _sigev_un._tid */ +#undef HAVE_SIGEVENT_SIGEV_UN_TID + +/* Define to 1 if the system has the type `sighandler_t'. */ +#undef HAVE_SIGHANDLER_T + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strerror_r' function. */ +#undef HAVE_STRERROR_R + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if the system has the type `struct bitmask'. */ +#undef HAVE_STRUCT_BITMASK + +/* Define to 1 if the system has the type `struct dl_phdr_info'. */ +#undef HAVE_STRUCT_DL_PHDR_INFO + +/* Define to 1 if `element.dct' is a member of `struct ibv_async_event'. */ +#undef HAVE_STRUCT_IBV_ASYNC_EVENT_ELEMENT_DCT + +/* Define to 1 if `dc_offload_params' is a member of `struct + ibv_exp_create_srq_attr'. */ +#undef HAVE_STRUCT_IBV_EXP_CREATE_SRQ_ATTR_DC_OFFLOAD_PARAMS + +/* Define to 1 if `exp_device_cap_flags' is a member of `struct + ibv_exp_device_attr'. */ +#undef HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_EXP_DEVICE_CAP_FLAGS + +/* Define to 1 if `odp_caps' is a member of `struct ibv_exp_device_attr'. */ +#undef HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS + +/* Define to 1 if `odp_caps.per_transport_caps.dc_odp_caps' is a member of + `struct ibv_exp_device_attr'. */ +#undef HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS_PER_TRANSPORT_CAPS_DC_ODP_CAPS + +/* Define to 1 if `odp_mr_max_size' is a member of `struct + ibv_exp_device_attr'. */ +#undef HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_MR_MAX_SIZE + +/* Define to 1 if `max_inl_recv' is a member of `struct ibv_exp_qp_init_attr'. + */ +#undef HAVE_STRUCT_IBV_EXP_QP_INIT_ATTR_MAX_INL_RECV + +/* Define to 1 if `umr_caps' is a member of `struct ibv_exp_qp_init_attr'. */ +#undef HAVE_STRUCT_IBV_EXP_QP_INIT_ATTR_UMR_CAPS + +/* Define to 1 if `bf.need_lock' is a member of `struct ibv_mlx5_qp_info'. */ +#undef HAVE_STRUCT_IBV_MLX5_QP_INFO_BF_NEED_LOCK + +/* Define to 1 if `flags' is a member of `struct ibv_tm_caps'. */ +#undef HAVE_STRUCT_IBV_TM_CAPS_FLAGS + +/* Define to 1 if `cq_uar' is a member of `struct mlx5dv_cq'. */ +#undef HAVE_STRUCT_MLX5DV_CQ_CQ_UAR + +/* Define to 1 if `ibv_ah' is a member of `struct mlx5_ah'. */ +#undef HAVE_STRUCT_MLX5_AH_IBV_AH + +/* Define to 1 if `ib_stride_index' is a member of `struct mlx5_cqe64'. */ +#undef HAVE_STRUCT_MLX5_CQE64_IB_STRIDE_INDEX + +/* Define to 1 if `rmac' is a member of `struct mlx5_grh_av'. */ +#undef HAVE_STRUCT_MLX5_GRH_AV_RMAC + +/* Define to 1 if `cmd_qp' is a member of `struct mlx5_srq'. */ +#undef HAVE_STRUCT_MLX5_SRQ_CMD_QP + +/* Define to 1 if `base' is a member of `struct mlx5_wqe_av'. */ +#undef HAVE_STRUCT_MLX5_WQE_AV_BASE + +/* Linux capability API support */ +#undef HAVE_SYS_CAPABILITY_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_CPUSET_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_ENDIAN_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_EPOLL_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_EVENTFD_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_EVENT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PLATFORM_PPC_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_THR_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_UIO_H + +/* DC transport support */ +#undef HAVE_TL_DC + +/* RC transport support */ +#undef HAVE_TL_RC + +/* UD transport support */ +#undef HAVE_TL_UD + +/* Defined if UGNI transport exists */ +#undef HAVE_TL_UGNI + +/* Use ptmalloc-2.8.6 version */ +#undef HAVE_UCM_PTMALLOC286 + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* IB experimental verbs */ +#undef HAVE_VERBS_EXP_H + +/* Define to 1 if you have the `__aarch64_sync_cache_range' function. */ +#undef HAVE___AARCH64_SYNC_CACHE_RANGE + +/* Define to 1 if you have the `__clear_cache' function. */ +#undef HAVE___CLEAR_CACHE + +/* Define to 1 if you have the `__curbrk' function. */ +#undef HAVE___CURBRK + +/* Define to 1 if the system has the type `__sighandler_t'. */ +#undef HAVE___SIGHANDLER_T + +/* IB Tag Matching support */ +#undef IBV_HW_TM + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* Define to 1 to disable Valgrind annotations. */ +#undef NVALGRIND + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at runtime. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if strerror_r returns char *. */ +#undef STRERROR_R_CHAR_P + +/* Enable BISTRO hooks */ +#undef UCM_BISTRO_HOOKS + +/* Highest log level */ +#undef UCS_MAX_LOG_LEVEL + +/* Enable packet header inspection/rewriting in UCT/UD */ +#undef UCT_UD_EP_DEBUG_HOOKS + +/* Set aligment assumption for compiler */ +#undef UCX_ALLOC_ALIGN + +/* UCX configure flags */ +#undef UCX_CONFIGURE_FLAGS + +/* UCX module sub-directory */ +#undef UCX_MODULE_SUBDIR + +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# undef _ALL_SOURCE +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# undef _POSIX_PTHREAD_SEMANTICS +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# undef _TANDEM_SOURCE +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# undef __EXTENSIONS__ +#endif + + +/* Version number of package */ +#undef VERSION + +/* Define to 1 if on MINIX. */ +#undef _MINIX + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +#undef _POSIX_1_SOURCE + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +#undef _POSIX_SOURCE + +/* Define to the equivalent of the C99 'restrict' keyword, or to + nothing if this is not supported. Do not define if restrict is + supported directly. */ +#undef restrict +/* Work around a bug in Sun C++: it does not support _Restrict or + __restrict__, even though the corresponding Sun C compiler ends up with + "#define restrict _Restrict" or "#define restrict __restrict__" in the + previous line. Perhaps some future version of Sun C++ will work with + restrict; if so, hopefully it defines __RESTRICT like Sun C does. */ +#if defined __SUNPRO_CC && !defined __RESTRICT +# define _Restrict +# define __restrict__ +#endif + +/* Define to `unsigned int' if does not define. */ +#undef size_t + +/* Test loadable modules */ +#undef test_MODULES + +/* UCM loadable modules */ +#undef ucm_MODULES + +/* UCT loadable modules */ +#undef uct_MODULES + +/* CUDA loadable modules */ +#undef uct_cuda_MODULES + +/* IB loadable modules */ +#undef uct_ib_MODULES + +/* ROCM loadable modules */ +#undef uct_rocm_MODULES + +/* Perftest loadable modules */ +#undef ucx_perftest_MODULES + + +#endif /* UCX_CONFIG_H */ + diff --git a/config.sub b/config.sub new file mode 100755 index 0000000..b51fb8c --- /dev/null +++ b/config.sub @@ -0,0 +1,1790 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2018 Free Software Foundation, Inc. + +timestamp='2018-08-29' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches to . +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS + +Canonicalize a configuration name. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2018 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo "$1" + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Split fields of configuration type +IFS="-" read -r field1 field2 field3 field4 <&2 + exit 1 + ;; + *-*-*-*) + basic_machine=$field1-$field2 + os=$field3-$field4 + ;; + *-*-*) + # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two + # parts + maybe_os=$field2-$field3 + case $maybe_os in + nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc \ + | linux-newlib* | linux-musl* | linux-uclibc* | uclinux-uclibc* \ + | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ + | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ + | storm-chaos* | os2-emx* | rtmk-nova*) + basic_machine=$field1 + os=$maybe_os + ;; + android-linux) + basic_machine=$field1-unknown + os=linux-android + ;; + *) + basic_machine=$field1-$field2 + os=$field3 + ;; + esac + ;; + *-*) + # A lone config we happen to match not fitting any patern + case $field1-$field2 in + decstation-3100) + basic_machine=mips-dec + os= + ;; + *-*) + # Second component is usually, but not always the OS + case $field2 in + # Prevent following clause from handling this valid os + sun*os*) + basic_machine=$field1 + os=$field2 + ;; + # Manufacturers + dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \ + | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \ + | unicom* | ibm* | next | hp | isi* | apollo | altos* \ + | convergent* | ncr* | news | 32* | 3600* | 3100* \ + | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \ + | ultra | tti* | harris | dolphin | highlevel | gould \ + | cbm | ns | masscomp | apple | axis | knuth | cray \ + | microblaze* | sim | cisco \ + | oki | wec | wrs | winbond) + basic_machine=$field1-$field2 + os= + ;; + *) + basic_machine=$field1 + os=$field2 + ;; + esac + ;; + esac + ;; + *) + # Convert single-component short-hands not valid as part of + # multi-component configurations. + case $field1 in + 386bsd) + basic_machine=i386-pc + os=bsd + ;; + a29khif) + basic_machine=a29k-amd + os=udi + ;; + adobe68k) + basic_machine=m68010-adobe + os=scout + ;; + alliant) + basic_machine=fx80-alliant + os= + ;; + altos | altos3068) + basic_machine=m68k-altos + os= + ;; + am29k) + basic_machine=a29k-none + os=bsd + ;; + amdahl) + basic_machine=580-amdahl + os=sysv + ;; + amiga) + basic_machine=m68k-unknown + os= + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=bsd + ;; + aros) + basic_machine=i386-pc + os=aros + ;; + aux) + basic_machine=m68k-apple + os=aux + ;; + balance) + basic_machine=ns32k-sequent + os=dynix + ;; + blackfin) + basic_machine=bfin-unknown + os=linux + ;; + cegcc) + basic_machine=arm-unknown + os=cegcc + ;; + convex-c1) + basic_machine=c1-convex + os=bsd + ;; + convex-c2) + basic_machine=c2-convex + os=bsd + ;; + convex-c32) + basic_machine=c32-convex + os=bsd + ;; + convex-c34) + basic_machine=c34-convex + os=bsd + ;; + convex-c38) + basic_machine=c38-convex + os=bsd + ;; + cray) + basic_machine=j90-cray + os=unicos + ;; + crds | unos) + basic_machine=m68k-crds + os= + ;; + da30) + basic_machine=m68k-da30 + os= + ;; + decstation | pmax | pmin | dec3100 | decstatn) + basic_machine=mips-dec + os= + ;; + delta88) + basic_machine=m88k-motorola + os=sysv3 + ;; + dicos) + basic_machine=i686-pc + os=dicos + ;; + djgpp) + basic_machine=i586-pc + os=msdosdjgpp + ;; + ebmon29k) + basic_machine=a29k-amd + os=ebmon + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=ose + ;; + gmicro) + basic_machine=tron-gmicro + os=sysv + ;; + go32) + basic_machine=i386-pc + os=go32 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=hms + ;; + harris) + basic_machine=m88k-harris + os=sysv3 + ;; + hp300) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=hpux + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=proelf + ;; + i386mach) + basic_machine=i386-mach + os=mach + ;; + vsta) + basic_machine=i386-pc + os=vsta + ;; + isi68 | isi) + basic_machine=m68k-isi + os=sysv + ;; + m68knommu) + basic_machine=m68k-unknown + os=linux + ;; + magnum | m3230) + basic_machine=mips-mips + os=sysv + ;; + merlin) + basic_machine=ns32k-utek + os=sysv + ;; + mingw64) + basic_machine=x86_64-pc + os=mingw64 + ;; + mingw32) + basic_machine=i686-pc + os=mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + os=mingw32ce + ;; + monitor) + basic_machine=m68k-rom68k + os=coff + ;; + morphos) + basic_machine=powerpc-unknown + os=morphos + ;; + moxiebox) + basic_machine=moxie-unknown + os=moxiebox + ;; + msdos) + basic_machine=i386-pc + os=msdos + ;; + msys) + basic_machine=i686-pc + os=msys + ;; + mvs) + basic_machine=i370-ibm + os=mvs + ;; + nacl) + basic_machine=le32-unknown + os=nacl + ;; + ncr3000) + basic_machine=i486-ncr + os=sysv4 + ;; + netbsd386) + basic_machine=i386-pc + os=netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=newsos + ;; + news1000) + basic_machine=m68030-sony + os=newsos + ;; + necv70) + basic_machine=v70-nec + os=sysv + ;; + nh3000) + basic_machine=m68k-harris + os=cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=cxux + ;; + nindy960) + basic_machine=i960-intel + os=nindy + ;; + mon960) + basic_machine=i960-intel + os=mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=nonstopux + ;; + os400) + basic_machine=powerpc-ibm + os=os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=ose + ;; + os68k) + basic_machine=m68k-none + os=os68k + ;; + paragon) + basic_machine=i860-intel + os=osf + ;; + parisc) + basic_machine=hppa-unknown + os=linux + ;; + pw32) + basic_machine=i586-unknown + os=pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + os=rdos + ;; + rdos32) + basic_machine=i386-pc + os=rdos + ;; + rom68k) + basic_machine=m68k-rom68k + os=coff + ;; + sa29200) + basic_machine=a29k-amd + os=udi + ;; + sei) + basic_machine=mips-sei + os=seiux + ;; + sequent) + basic_machine=i386-sequent + os= + ;; + sps7) + basic_machine=m68k-bull + os=sysv2 + ;; + st2000) + basic_machine=m68k-tandem + os= + ;; + stratus) + basic_machine=i860-stratus + os=sysv4 + ;; + sun2) + basic_machine=m68000-sun + os= + ;; + sun2os3) + basic_machine=m68000-sun + os=sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=sunos4 + ;; + sun3) + basic_machine=m68k-sun + os= + ;; + sun3os3) + basic_machine=m68k-sun + os=sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=sunos4 + ;; + sun4) + basic_machine=sparc-sun + os= + ;; + sun4os3) + basic_machine=sparc-sun + os=sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=solaris2 + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + os= + ;; + sv1) + basic_machine=sv1-cray + os=unicos + ;; + symmetry) + basic_machine=i386-sequent + os=dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=unicos + ;; + t90) + basic_machine=t90-cray + os=unicos + ;; + toad1) + basic_machine=pdp10-xkl + os=tops20 + ;; + tpf) + basic_machine=s390x-ibm + os=tpf + ;; + udi29k) + basic_machine=a29k-amd + os=udi + ;; + ultra3) + basic_machine=a29k-nyu + os=sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=none + ;; + vaxv) + basic_machine=vax-dec + os=sysv + ;; + vms) + basic_machine=vax-dec + os=vms + ;; + vxworks960) + basic_machine=i960-wrs + os=vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=vxworks + ;; + xbox) + basic_machine=i686-pc + os=mingw32 + ;; + ymp) + basic_machine=ymp-cray + os=unicos + ;; + *) + basic_machine=$1 + os= + ;; + esac + ;; +esac + +# Decode 1-component or ad-hoc basic machines +case $basic_machine in + # Here we handle the default manufacturer of certain CPU types. It is in + # some cases the only manufacturer, in others, it is the most popular. + w89k) + cpu=hppa1.1 + vendor=winbond + ;; + op50n) + cpu=hppa1.1 + vendor=oki + ;; + op60c) + cpu=hppa1.1 + vendor=oki + ;; + ibm*) + cpu=i370 + vendor=ibm + ;; + orion105) + cpu=clipper + vendor=highlevel + ;; + mac | mpw | mac-mpw) + cpu=m68k + vendor=apple + ;; + pmac | pmac-mpw) + cpu=powerpc + vendor=apple + ;; + + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + cpu=m68000 + vendor=att + ;; + 3b*) + cpu=we32k + vendor=att + ;; + bluegene*) + cpu=powerpc + vendor=ibm + os=cnk + ;; + decsystem10* | dec10*) + cpu=pdp10 + vendor=dec + os=tops10 + ;; + decsystem20* | dec20*) + cpu=pdp10 + vendor=dec + os=tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + cpu=m68k + vendor=motorola + ;; + dpx2*) + cpu=m68k + vendor=bull + os=sysv3 + ;; + encore | umax | mmax) + cpu=ns32k + vendor=encore + ;; + elxsi) + cpu=elxsi + vendor=elxsi + os=${os:-bsd} + ;; + fx2800) + cpu=i860 + vendor=alliant + ;; + genix) + cpu=ns32k + vendor=ns + ;; + h3050r* | hiux*) + cpu=hppa1.1 + vendor=hitachi + os=hiuxwe2 + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + cpu=m68000 + vendor=hp + ;; + hp9k3[2-9][0-9]) + cpu=m68k + vendor=hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + i*86v32) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + os=sysv32 + ;; + i*86v4*) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + os=sysv4 + ;; + i*86v) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + os=sysv + ;; + i*86sol2) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + os=solaris2 + ;; + j90 | j90-cray) + cpu=j90 + vendor=cray + os=${os:-unicos} + ;; + iris | iris4d) + cpu=mips + vendor=sgi + case $os in + irix*) + ;; + *) + os=irix4 + ;; + esac + ;; + miniframe) + cpu=m68000 + vendor=convergent + ;; + *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*) + cpu=m68k + vendor=atari + os=mint + ;; + news-3600 | risc-news) + cpu=mips + vendor=sony + os=newsos + ;; + next | m*-next) + cpu=m68k + vendor=next + case $os in + nextstep* ) + ;; + ns2*) + os=nextstep2 + ;; + *) + os=nextstep3 + ;; + esac + ;; + np1) + cpu=np1 + vendor=gould + ;; + op50n-* | op60c-*) + cpu=hppa1.1 + vendor=oki + os=proelf + ;; + pa-hitachi) + cpu=hppa1.1 + vendor=hitachi + os=hiuxwe2 + ;; + pbd) + cpu=sparc + vendor=tti + ;; + pbb) + cpu=m68k + vendor=tti + ;; + pc532) + cpu=ns32k + vendor=pc532 + ;; + pn) + cpu=pn + vendor=gould + ;; + power) + cpu=power + vendor=ibm + ;; + ps2) + cpu=i386 + vendor=ibm + ;; + rm[46]00) + cpu=mips + vendor=siemens + ;; + rtpc | rtpc-*) + cpu=romp + vendor=ibm + ;; + sde) + cpu=mipsisa32 + vendor=sde + os=${os:-elf} + ;; + simso-wrs) + cpu=sparclite + vendor=wrs + os=vxworks + ;; + tower | tower-32) + cpu=m68k + vendor=ncr + ;; + vpp*|vx|vx-*) + cpu=f301 + vendor=fujitsu + ;; + w65) + cpu=w65 + vendor=wdc + ;; + w89k-*) + cpu=hppa1.1 + vendor=winbond + os=proelf + ;; + none) + cpu=none + vendor=none + ;; + leon|leon[3-9]) + cpu=sparc + vendor=$basic_machine + ;; + leon-*|leon[3-9]-*) + cpu=sparc + vendor=`echo "$basic_machine" | sed 's/-.*//'` + ;; + + *-*) + IFS="-" read -r cpu vendor <&2 + exit 1 + ;; + esac + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $vendor in + digital*) + vendor=dec + ;; + commodore*) + vendor=cbm + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x$os != x ] +then +case $os in + # First match some system type aliases that might get confused + # with valid system types. + # solaris* is a basic system type, with this one exception. + auroraux) + os=auroraux + ;; + bluegene*) + os=cnk + ;; + solaris1 | solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + solaris) + os=solaris2 + ;; + unixware*) + os=sysv4.2uw + ;; + gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # es1800 is here to avoid being matched by es* (a different OS) + es1800*) + os=ose + ;; + # Some version numbers need modification + chorusos*) + os=chorusos + ;; + isc) + os=isc2.2 + ;; + sco6) + os=sco5v6 + ;; + sco5) + os=sco3.2v5 + ;; + sco4) + os=sco3.2v4 + ;; + sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + ;; + sco3.2v[4-9]* | sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + ;; + scout) + # Don't match below + ;; + sco*) + os=sco3.2v2 + ;; + psos*) + os=psos + ;; + # Now accept the basic system types. + # The portable systems comes first. + # Each alternative MUST end in a * to match a version number. + # sysv* is not here because it comes later, after sysvr4. + gnu* | bsd* | mach* | minix* | genix* | ultrix* | irix* \ + | *vms* | esix* | aix* | cnk* | sunos | sunos[34]*\ + | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \ + | sym* | kopensolaris* | plan9* \ + | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \ + | aos* | aros* | cloudabi* | sortix* \ + | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \ + | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \ + | knetbsd* | mirbsd* | netbsd* \ + | bitrig* | openbsd* | solidbsd* | libertybsd* \ + | ekkobsd* | kfreebsd* | freebsd* | riscix* | lynxos* \ + | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \ + | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \ + | udi* | eabi* | lites* | ieee* | go32* | aux* | hcos* \ + | chorusrdb* | cegcc* | glidix* \ + | cygwin* | msys* | pe* | moss* | proelf* | rtems* \ + | midipix* | mingw32* | mingw64* | linux-gnu* | linux-android* \ + | linux-newlib* | linux-musl* | linux-uclibc* \ + | uxpv* | beos* | mpeix* | udk* | moxiebox* \ + | interix* | uwin* | mks* | rhapsody* | darwin* \ + | openstep* | oskit* | conix* | pw32* | nonstopux* \ + | storm-chaos* | tops10* | tenex* | tops20* | its* \ + | os2* | vos* | palmos* | uclinux* | nucleus* \ + | morphos* | superux* | rtmk* | windiss* \ + | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \ + | skyos* | haiku* | rdos* | toppers* | drops* | es* \ + | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \ + | midnightbsd*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + qnx*) + case $cpu in + x86 | i*86) + ;; + *) + os=nto-$os + ;; + esac + ;; + hiux*) + os=hiuxwe2 + ;; + nto-qnx*) + ;; + nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + sim | xray | os68k* | v88r* \ + | windows* | osx | abug | netware* | os9* \ + | macos* | mpw* | magic* | mmixware* | mon960* | lnews*) + ;; + linux-dietlibc) + os=linux-dietlibc + ;; + linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + lynx*178) + os=lynxos178 + ;; + lynx*5) + os=lynxos5 + ;; + lynx*) + os=lynxos + ;; + mac*) + os=`echo "$os" | sed -e 's|mac|macos|'` + ;; + opened*) + os=openedition + ;; + os400*) + os=os400 + ;; + sunos5*) + os=`echo "$os" | sed -e 's|sunos5|solaris2|'` + ;; + sunos6*) + os=`echo "$os" | sed -e 's|sunos6|solaris3|'` + ;; + wince*) + os=wince + ;; + utek*) + os=bsd + ;; + dynix*) + os=bsd + ;; + acis*) + os=aos + ;; + atheos*) + os=atheos + ;; + syllable*) + os=syllable + ;; + 386bsd) + os=bsd + ;; + ctix* | uts*) + os=sysv + ;; + nova*) + os=rtmk-nova + ;; + ns2) + os=nextstep2 + ;; + nsk*) + os=nsk + ;; + # Preserve the version number of sinix5. + sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + sinix*) + os=sysv4 + ;; + tpf*) + os=tpf + ;; + triton*) + os=sysv3 + ;; + oss*) + os=sysv3 + ;; + svr4*) + os=sysv4 + ;; + svr3) + os=sysv3 + ;; + sysvr4) + os=sysv4 + ;; + # This must come after sysvr4. + sysv*) + ;; + ose*) + os=ose + ;; + *mint | mint[0-9]* | *MiNT | MiNT[0-9]*) + os=mint + ;; + zvmoe) + os=zvmoe + ;; + dicos*) + os=dicos + ;; + pikeos*) + # Until real need of OS specific support for + # particular features comes up, bare metal + # configurations are quite functional. + case $cpu in + arm*) + os=eabi + ;; + *) + os=elf + ;; + esac + ;; + nacl*) + ;; + ios) + ;; + none) + ;; + *-eabi) + ;; + *) + echo Invalid configuration \`"$1"\': system \`"$os"\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $cpu-$vendor in + score-*) + os=elf + ;; + spu-*) + os=elf + ;; + *-acorn) + os=riscix1.2 + ;; + arm*-rebel) + os=linux + ;; + arm*-semi) + os=aout + ;; + c4x-* | tic4x-*) + os=coff + ;; + c8051-*) + os=elf + ;; + clipper-intergraph) + os=clix + ;; + hexagon-*) + os=elf + ;; + tic54x-*) + os=coff + ;; + tic55x-*) + os=coff + ;; + tic6x-*) + os=coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=tops20 + ;; + pdp11-*) + os=none + ;; + *-dec | vax-*) + os=ultrix4.2 + ;; + m68*-apollo) + os=domain + ;; + i386-sun) + os=sunos4.0.2 + ;; + m68000-sun) + os=sunos3 + ;; + m68*-cisco) + os=aout + ;; + mep-*) + os=elf + ;; + mips*-cisco) + os=elf + ;; + mips*-*) + os=elf + ;; + or32-*) + os=coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=sysv3 + ;; + sparc-* | *-sun) + os=sunos4.1.1 + ;; + pru-*) + os=elf + ;; + *-be) + os=beos + ;; + *-ibm) + os=aix + ;; + *-knuth) + os=mmixware + ;; + *-wec) + os=proelf + ;; + *-winbond) + os=proelf + ;; + *-oki) + os=proelf + ;; + *-hp) + os=hpux + ;; + *-hitachi) + os=hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=sysv + ;; + *-cbm) + os=amigaos + ;; + *-dg) + os=dgux + ;; + *-dolphin) + os=sysv3 + ;; + m68k-ccur) + os=rtu + ;; + m88k-omron*) + os=luna + ;; + *-next) + os=nextstep + ;; + *-sequent) + os=ptx + ;; + *-crds) + os=unos + ;; + *-ns) + os=genix + ;; + i370-*) + os=mvs + ;; + *-gould) + os=sysv + ;; + *-highlevel) + os=bsd + ;; + *-encore) + os=bsd + ;; + *-sgi) + os=irix + ;; + *-siemens) + os=sysv4 + ;; + *-masscomp) + os=rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=uxpv + ;; + *-rom68k) + os=coff + ;; + *-*bug) + os=coff + ;; + *-apple) + os=macos + ;; + *-atari*) + os=mint + ;; + *-wrs) + os=vxworks + ;; + *) + os=none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +case $vendor in + unknown) + case $os in + riscix*) + vendor=acorn + ;; + sunos*) + vendor=sun + ;; + cnk*|-aix*) + vendor=ibm + ;; + beos*) + vendor=be + ;; + hpux*) + vendor=hp + ;; + mpeix*) + vendor=hp + ;; + hiux*) + vendor=hitachi + ;; + unos*) + vendor=crds + ;; + dgux*) + vendor=dg + ;; + luna*) + vendor=omron + ;; + genix*) + vendor=ns + ;; + clix*) + vendor=intergraph + ;; + mvs* | opened*) + vendor=ibm + ;; + os400*) + vendor=ibm + ;; + ptx*) + vendor=sequent + ;; + tpf*) + vendor=ibm + ;; + vxsim* | vxworks* | windiss*) + vendor=wrs + ;; + aux*) + vendor=apple + ;; + hms*) + vendor=hitachi + ;; + mpw* | macos*) + vendor=apple + ;; + *mint | mint[0-9]* | *MiNT | MiNT[0-9]*) + vendor=atari + ;; + vos*) + vendor=stratus + ;; + esac + ;; +esac + +echo "$cpu-$vendor-$os" +exit + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/config/m4/ax_prog_doxygen.m4 b/config/m4/ax_prog_doxygen.m4 new file mode 100644 index 0000000..7b87c88 --- /dev/null +++ b/config/m4/ax_prog_doxygen.m4 @@ -0,0 +1,555 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_prog_doxygen.html +# =========================================================================== +# +# SYNOPSIS +# +# DX_INIT_DOXYGEN(PROJECT-NAME, DOXYFILE-PATH, [OUTPUT-DIR]) +# DX_DOXYGEN_FEATURE(ON|OFF) +# DX_DOT_FEATURE(ON|OFF) +# DX_HTML_FEATURE(ON|OFF) +# DX_CHM_FEATURE(ON|OFF) +# DX_CHI_FEATURE(ON|OFF) +# DX_MAN_FEATURE(ON|OFF) +# DX_RTF_FEATURE(ON|OFF) +# DX_XML_FEATURE(ON|OFF) +# DX_PDF_FEATURE(ON|OFF) +# DX_PS_FEATURE(ON|OFF) +# +# DESCRIPTION +# +# The DX_*_FEATURE macros control the default setting for the given +# Doxygen feature. Supported features are 'DOXYGEN' itself, 'DOT' for +# generating graphics, 'HTML' for plain HTML, 'CHM' for compressed HTML +# help (for MS users), 'CHI' for generating a seperate .chi file by the +# .chm file, and 'MAN', 'RTF', 'XML', 'PDF' and 'PS' for the appropriate +# output formats. The environment variable DOXYGEN_PAPER_SIZE may be +# specified to override the default 'a4wide' paper size. +# +# By default, HTML, PDF and PS documentation is generated as this seems to +# be the most popular and portable combination. MAN pages created by +# Doxygen are usually problematic, though by picking an appropriate subset +# and doing some massaging they might be better than nothing. CHM and RTF +# are specific for MS (note that you can't generate both HTML and CHM at +# the same time). The XML is rather useless unless you apply specialized +# post-processing to it. +# +# The macros mainly control the default state of the feature. The use can +# override the default by specifying --enable or --disable. The macros +# ensure that contradictory flags are not given (e.g., +# --enable-doxygen-html and --enable-doxygen-chm, +# --enable-doxygen-anything with --disable-doxygen, etc.) Finally, each +# feature will be automatically disabled (with a warning) if the required +# programs are missing. +# +# Once all the feature defaults have been specified, call DX_INIT_DOXYGEN +# with the following parameters: a one-word name for the project for use +# as a filename base etc., an optional configuration file name (the +# default is 'Doxyfile', the same as Doxygen's default), and an optional +# output directory name (the default is 'doxygen-doc'). +# +# Automake Support +# +# The following is a template aminclude.am file for use with Automake. +# Make targets and variables values are controlled by the various +# DX_COND_* conditionals set by autoconf. +# +# The provided targets are: +# +# doxygen-doc: Generate all doxygen documentation. +# +# doxygen-run: Run doxygen, which will generate some of the +# documentation (HTML, CHM, CHI, MAN, RTF, XML) +# but will not do the post processing required +# for the rest of it (PS, PDF, and some MAN). +# +# doxygen-man: Rename some doxygen generated man pages. +# +# doxygen-ps: Generate doxygen PostScript documentation. +# +# doxygen-pdf: Generate doxygen PDF documentation. +# +# Note that by default these are not integrated into the automake targets. +# If doxygen is used to generate man pages, you can achieve this +# integration by setting man3_MANS to the list of man pages generated and +# then adding the dependency: +# +# $(man3_MANS): doxygen-doc +# +# This will cause make to run doxygen and generate all the documentation. +# +# The following variable is intended for use in Makefile.am: +# +# DX_CLEANFILES = everything to clean. +# +# Then add this variable to MOSTLYCLEANFILES. +# +# ----- begin aminclude.am ------------------------------------- +# +# ## --------------------------------- ## +# ## Format-independent Doxygen rules. ## +# ## --------------------------------- ## +# +# if DX_COND_doc +# +# ## ------------------------------- ## +# ## Rules specific for HTML output. ## +# ## ------------------------------- ## +# +# if DX_COND_html +# +# DX_CLEAN_HTML = @DX_DOCDIR@/html +# +# endif DX_COND_html +# +# ## ------------------------------ ## +# ## Rules specific for CHM output. ## +# ## ------------------------------ ## +# +# if DX_COND_chm +# +# DX_CLEAN_CHM = @DX_DOCDIR@/chm +# +# if DX_COND_chi +# +# DX_CLEAN_CHI = @DX_DOCDIR@/@PACKAGE@.chi +# +# endif DX_COND_chi +# +# endif DX_COND_chm +# +# ## ------------------------------ ## +# ## Rules specific for MAN output. ## +# ## ------------------------------ ## +# +# if DX_COND_man +# +# DX_CLEAN_MAN = @DX_DOCDIR@/man +# +# endif DX_COND_man +# +# ## ------------------------------ ## +# ## Rules specific for RTF output. ## +# ## ------------------------------ ## +# +# if DX_COND_rtf +# +# DX_CLEAN_RTF = @DX_DOCDIR@/rtf +# +# endif DX_COND_rtf +# +# ## ------------------------------ ## +# ## Rules specific for XML output. ## +# ## ------------------------------ ## +# +# if DX_COND_xml +# +# DX_CLEAN_XML = @DX_DOCDIR@/xml +# +# endif DX_COND_xml +# +# ## ----------------------------- ## +# ## Rules specific for PS output. ## +# ## ----------------------------- ## +# +# if DX_COND_ps +# +# DX_CLEAN_PS = @DX_DOCDIR@/@PACKAGE@.ps +# +# DX_PS_GOAL = doxygen-ps +# +# doxygen-ps: @DX_DOCDIR@/@PACKAGE@.ps +# +# @DX_DOCDIR@/@PACKAGE@.ps: @DX_DOCDIR@/@PACKAGE@.tag +# cd @DX_DOCDIR@/latex; \ +# rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \ +# $(DX_LATEX) refman.tex; \ +# $(MAKEINDEX_PATH) refman.idx; \ +# $(DX_LATEX) refman.tex; \ +# countdown=5; \ +# while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \ +# refman.log > /dev/null 2>&1 \ +# && test $$countdown -gt 0; do \ +# $(DX_LATEX) refman.tex; \ +# countdown=`expr $$countdown - 1`; \ +# done; \ +# $(DX_DVIPS) -o ../@PACKAGE@.ps refman.dvi +# +# endif DX_COND_ps +# +# ## ------------------------------ ## +# ## Rules specific for PDF output. ## +# ## ------------------------------ ## +# +# if DX_COND_pdf +# +# DX_CLEAN_PDF = @DX_DOCDIR@/@PACKAGE@.pdf +# +# DX_PDF_GOAL = doxygen-pdf +# +# doxygen-pdf: @DX_DOCDIR@/@PACKAGE@.pdf +# +# @DX_DOCDIR@/@PACKAGE@.pdf: @DX_DOCDIR@/@PACKAGE@.tag +# cd @DX_DOCDIR@/latex; \ +# rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \ +# $(DX_PDFLATEX) refman.tex; \ +# $(DX_MAKEINDEX) refman.idx; \ +# $(DX_PDFLATEX) refman.tex; \ +# countdown=5; \ +# while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \ +# refman.log > /dev/null 2>&1 \ +# && test $$countdown -gt 0; do \ +# $(DX_PDFLATEX) refman.tex; \ +# countdown=`expr $$countdown - 1`; \ +# done; \ +# mv refman.pdf ../@PACKAGE@.pdf +# +# endif DX_COND_pdf +# +# ## ------------------------------------------------- ## +# ## Rules specific for LaTeX (shared for PS and PDF). ## +# ## ------------------------------------------------- ## +# +# if DX_COND_latex +# +# DX_CLEAN_LATEX = @DX_DOCDIR@/latex +# +# endif DX_COND_latex +# +# .PHONY: doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) +# +# .INTERMEDIATE: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL) +# +# doxygen-run: @DX_DOCDIR@/@PACKAGE@.tag +# +# doxygen-doc: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL) +# +# @DX_DOCDIR@/@PACKAGE@.tag: $(DX_CONFIG) $(pkginclude_HEADERS) +# rm -rf @DX_DOCDIR@ +# $(DX_ENV) $(DX_DOXYGEN) $(srcdir)/$(DX_CONFIG) +# echo Timestamp >$@ +# +# DX_CLEANFILES = \ +# @DX_DOCDIR@/@PACKAGE@.tag \ +# -r \ +# $(DX_CLEAN_HTML) \ +# $(DX_CLEAN_CHM) \ +# $(DX_CLEAN_CHI) \ +# $(DX_CLEAN_MAN) \ +# $(DX_CLEAN_RTF) \ +# $(DX_CLEAN_XML) \ +# $(DX_CLEAN_PS) \ +# $(DX_CLEAN_PDF) \ +# $(DX_CLEAN_LATEX) +# +# endif DX_COND_doc +# +# ----- end aminclude.am --------------------------------------- +# +# LICENSE +# +# Copyright (c) 2009 Oren Ben-Kiki +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 13 + +## ----------## +## Defaults. ## +## ----------## + +DX_ENV="" +DX_MIN_VERSION="1.8.6" + +AC_DEFUN([DX_FEATURE_doc], ON) +AC_DEFUN([DX_FEATURE_dot], OFF) +AC_DEFUN([DX_FEATURE_man], OFF) +AC_DEFUN([DX_FEATURE_html], ON) +AC_DEFUN([DX_FEATURE_chm], OFF) +AC_DEFUN([DX_FEATURE_chi], OFF) +AC_DEFUN([DX_FEATURE_rtf], OFF) +AC_DEFUN([DX_FEATURE_xml], OFF) +AC_DEFUN([DX_FEATURE_pdf], ON) +AC_DEFUN([DX_FEATURE_ps], ON) + +## --------------- ## +## Private macros. ## +## --------------- ## + +# DX_ENV_APPEND(VARIABLE, VALUE) +# ------------------------------ +# Append VARIABLE="VALUE" to DX_ENV for invoking doxygen. +AC_DEFUN([DX_ENV_APPEND], [AC_SUBST([DX_ENV], ["$DX_ENV $1='$2'"])]) + +# DX_DIRNAME_EXPR +# --------------- +# Expand into a shell expression prints the directory part of a path. +AC_DEFUN([DX_DIRNAME_EXPR], + [[expr ".$1" : '\(\.\)[^/]*$' \| "x$1" : 'x\(.*\)/[^/]*$']]) + +# DX_IF_FEATURE(FEATURE, IF-ON, IF-OFF) +# ------------------------------------- +# Expands according to the M4 (static) status of the feature. +AC_DEFUN([DX_IF_FEATURE], [ifelse(DX_FEATURE_$1, ON, [$2], [$3])]) + +# DX_REQUIRE_PROG(VARIABLE, PROGRAM) +# ---------------------------------- +# Require the specified program to be found for the DX_CURRENT_FEATURE to work. +AC_DEFUN([DX_REQUIRE_PROG], [ +AC_PATH_TOOL([$1], [$2]) +if test "$DX_FLAG_[]DX_CURRENT_FEATURE$$1" = 1; then + AC_MSG_WARN([$2 not found - will not DX_CURRENT_DESCRIPTION]) + AC_SUBST(DX_FLAG_[]DX_CURRENT_FEATURE, 0) +fi +]) + +# DX_REQUIRE_PROG_WITH_VERSION(VARIABLE, PROGRAM, VERSIONCMD, MINVERSION) +# ---------------------------------- +# Require the specified program to be found for the DX_CURRENT_FEATURE to work. +AC_DEFUN([DX_REQUIRE_PROG_WITH_VERSION], [ +AC_PATH_TOOL([$1], [$2]) +if test "$DX_FLAG_[]DX_CURRENT_FEATURE$$1" = 1; then + AC_MSG_WARN([$2 not found - will not DX_CURRENT_DESCRIPTION]) + AC_SUBST(DX_FLAG_[]DX_CURRENT_FEATURE, 0) +fi +version=`$3` +AS_VERSION_COMPARE($version, $4, + [AC_MSG_WARN([$2 version $version is bad. Required version: $4 and above]) + AC_SUBST(DX_FLAG_[]DX_CURRENT_FEATURE, 0) + ],[],[]) +]) + +# DX_TEST_FEATURE(FEATURE) +# ------------------------ +# Expand to a shell expression testing whether the feature is active. +AC_DEFUN([DX_TEST_FEATURE], [test "$DX_FLAG_$1" = 1]) + +# DX_CHECK_DEPEND(REQUIRED_FEATURE, REQUIRED_STATE) +# ------------------------------------------------- +# Verify that a required features has the right state before trying to turn on +# the DX_CURRENT_FEATURE. +AC_DEFUN([DX_CHECK_DEPEND], [ +test "$DX_FLAG_$1" = "$2" \ +|| AC_MSG_ERROR([doxygen-DX_CURRENT_FEATURE ifelse([$2], 1, + requires, contradicts) doxygen-DX_CURRENT_FEATURE]) +]) + +# DX_CLEAR_DEPEND(FEATURE, REQUIRED_FEATURE, REQUIRED_STATE) +# ---------------------------------------------------------- +# Turn off the DX_CURRENT_FEATURE if the required feature is off. +AC_DEFUN([DX_CLEAR_DEPEND], [ +test "$DX_FLAG_$1" = "$2" || AC_SUBST(DX_FLAG_[]DX_CURRENT_FEATURE, 0) +]) + +# DX_FEATURE_ARG(FEATURE, DESCRIPTION, +# CHECK_DEPEND, CLEAR_DEPEND, +# REQUIRE, DO-IF-ON, DO-IF-OFF) +# -------------------------------------------- +# Parse the command-line option controlling a feature. CHECK_DEPEND is called +# if the user explicitly turns the feature on (and invokes DX_CHECK_DEPEND), +# otherwise CLEAR_DEPEND is called to turn off the default state if a required +# feature is disabled (using DX_CLEAR_DEPEND). REQUIRE performs additional +# requirement tests (DX_REQUIRE_PROG). Finally, an automake flag is set and +# DO-IF-ON or DO-IF-OFF are called according to the final state of the feature. +AC_DEFUN([DX_ARG_ABLE], [ + AC_DEFUN([DX_CURRENT_FEATURE], [$1]) + AC_DEFUN([DX_CURRENT_DESCRIPTION], [$2]) + AC_ARG_ENABLE(doxygen-$1, + [AS_HELP_STRING(DX_IF_FEATURE([$1], [--disable-doxygen-$1], + [--enable-doxygen-$1]), + DX_IF_FEATURE([$1], [don't $2], [$2]))], + [ +case "$enableval" in +#( +y|Y|yes|Yes|YES) + AC_SUBST([DX_FLAG_$1], 1) + $3 +;; #( +n|N|no|No|NO) + AC_SUBST([DX_FLAG_$1], 0) +;; #( +*) + AC_MSG_ERROR([invalid value '$enableval' given to doxygen-$1]) +;; +esac +], [ +AC_SUBST([DX_FLAG_$1], [DX_IF_FEATURE([$1], 1, 0)]) +$4 +]) +if DX_TEST_FEATURE([$1]); then + $5 + : +fi +AM_CONDITIONAL(DX_COND_$1, DX_TEST_FEATURE([$1])) +if DX_TEST_FEATURE([$1]); then + $6 + : +else + $7 + : +fi +]) + +## -------------- ## +## Public macros. ## +## -------------- ## + +# DX_XXX_FEATURE(DEFAULT_STATE) +# ----------------------------- +AC_DEFUN([DX_DOXYGEN_FEATURE], [AC_DEFUN([DX_FEATURE_doc], [$1])]) +AC_DEFUN([DX_DOT_FEATURE], [AC_DEFUN([DX_FEATURE_dot], [$1])]) +AC_DEFUN([DX_MAN_FEATURE], [AC_DEFUN([DX_FEATURE_man], [$1])]) +AC_DEFUN([DX_HTML_FEATURE], [AC_DEFUN([DX_FEATURE_html], [$1])]) +AC_DEFUN([DX_CHM_FEATURE], [AC_DEFUN([DX_FEATURE_chm], [$1])]) +AC_DEFUN([DX_CHI_FEATURE], [AC_DEFUN([DX_FEATURE_chi], [$1])]) +AC_DEFUN([DX_RTF_FEATURE], [AC_DEFUN([DX_FEATURE_rtf], [$1])]) +AC_DEFUN([DX_XML_FEATURE], [AC_DEFUN([DX_FEATURE_xml], [$1])]) +AC_DEFUN([DX_XML_FEATURE], [AC_DEFUN([DX_FEATURE_xml], [$1])]) +AC_DEFUN([DX_PDF_FEATURE], [AC_DEFUN([DX_FEATURE_pdf], [$1])]) +AC_DEFUN([DX_PS_FEATURE], [AC_DEFUN([DX_FEATURE_ps], [$1])]) + +# DX_INIT_DOXYGEN(PROJECT, [CONFIG-FILE], [OUTPUT-DOC-DIR]) +# --------------------------------------------------------- +# PROJECT also serves as the base name for the documentation files. +# The default CONFIG-FILE is "Doxyfile" and OUTPUT-DOC-DIR is "doxygen-doc". +AC_DEFUN([DX_INIT_DOXYGEN], [ + +# Files: +AC_SUBST([DX_PROJECT], [$1]) +AC_SUBST([DX_CONFIG], [ifelse([$2], [], Doxyfile, [$2])]) +AC_SUBST([DX_DOCDIR], [ifelse([$3], [], doxygen-doc, [$3])]) + +# Environment variables used inside doxygen.cfg: +DX_ENV_APPEND(SRCDIR, $srcdir) +DX_ENV_APPEND(PROJECT, $DX_PROJECT) +DX_ENV_APPEND(DOCDIR, $DX_DOCDIR) +DX_ENV_APPEND(VERSION, $PACKAGE_VERSION) + +# Doxygen itself: +DX_ARG_ABLE(doc, [generate any doxygen documentation], + [], + [], + [DX_REQUIRE_PROG_WITH_VERSION([DX_DOXYGEN], doxygen,[doxygen --version], + $DX_MIN_VERSION) + DX_REQUIRE_PROG([DX_PERL], perl)], + [DX_ENV_APPEND(PERL_PATH, $DX_PERL)]) + +# Dot for graphics: +DX_ARG_ABLE(dot, [generate graphics for doxygen documentation], + [DX_CHECK_DEPEND(doc, 1)], + [DX_CLEAR_DEPEND(doc, 1)], + [DX_REQUIRE_PROG([DX_DOT], dot)], + [DX_ENV_APPEND(HAVE_DOT, YES) + DX_ENV_APPEND(DOT_PATH, [`DX_DIRNAME_EXPR($DX_DOT)`])], + [DX_ENV_APPEND(HAVE_DOT, NO)]) + +# Man pages generation: +DX_ARG_ABLE(man, [generate doxygen manual pages], + [DX_CHECK_DEPEND(doc, 1)], + [DX_CLEAR_DEPEND(doc, 1)], + [], + [DX_ENV_APPEND(GENERATE_MAN, YES)], + [DX_ENV_APPEND(GENERATE_MAN, NO)]) + +# RTF file generation: +DX_ARG_ABLE(rtf, [generate doxygen RTF documentation], + [DX_CHECK_DEPEND(doc, 1)], + [DX_CLEAR_DEPEND(doc, 1)], + [], + [DX_ENV_APPEND(GENERATE_RTF, YES)], + [DX_ENV_APPEND(GENERATE_RTF, NO)]) + +# XML file generation: +DX_ARG_ABLE(xml, [generate doxygen XML documentation], + [DX_CHECK_DEPEND(doc, 1)], + [DX_CLEAR_DEPEND(doc, 1)], + [], + [DX_ENV_APPEND(GENERATE_XML, YES)], + [DX_ENV_APPEND(GENERATE_XML, NO)]) + +# (Compressed) HTML help generation: +DX_ARG_ABLE(chm, [generate doxygen compressed HTML help documentation], + [DX_CHECK_DEPEND(doc, 1)], + [DX_CLEAR_DEPEND(doc, 1)], + [DX_REQUIRE_PROG([DX_HHC], hhc)], + [DX_ENV_APPEND(HHC_PATH, $DX_HHC) + DX_ENV_APPEND(GENERATE_HTML, YES) + DX_ENV_APPEND(GENERATE_HTMLHELP, YES)], + [DX_ENV_APPEND(GENERATE_HTMLHELP, NO)]) + +# Seperate CHI file generation. +DX_ARG_ABLE(chi, [generate doxygen seperate compressed HTML help index file], + [DX_CHECK_DEPEND(chm, 1)], + [DX_CLEAR_DEPEND(chm, 1)], + [], + [DX_ENV_APPEND(GENERATE_CHI, YES)], + [DX_ENV_APPEND(GENERATE_CHI, NO)]) + +# Plain HTML pages generation: +DX_ARG_ABLE(html, [generate doxygen plain HTML documentation], + [DX_CHECK_DEPEND(doc, 1) DX_CHECK_DEPEND(chm, 0)], + [DX_CLEAR_DEPEND(doc, 1) DX_CLEAR_DEPEND(chm, 0)], + [], + [DX_ENV_APPEND(GENERATE_HTML, YES)], + [DX_TEST_FEATURE(chm) || DX_ENV_APPEND(GENERATE_HTML, NO)]) + +# PostScript file generation: +DX_ARG_ABLE(ps, [generate doxygen PostScript documentation], + [DX_CHECK_DEPEND(doc, 1)], + [DX_CLEAR_DEPEND(doc, 1)], + [DX_REQUIRE_PROG([DX_LATEX], latex) + DX_REQUIRE_PROG([DX_MAKEINDEX], makeindex) + DX_REQUIRE_PROG([DX_BIBTEX], bibtex) + DX_REQUIRE_PROG([DX_DVIPS], dvips) + DX_REQUIRE_PROG([DX_EGREP], egrep)]) + +# PDF file generation: +DX_ARG_ABLE(pdf, [generate doxygen PDF documentation], + [DX_CHECK_DEPEND(doc, 1)], + [DX_CLEAR_DEPEND(doc, 1)], + [DX_REQUIRE_PROG([DX_PDFLATEX], pdflatex) + DX_REQUIRE_PROG([DX_MAKEINDEX], makeindex) + DX_REQUIRE_PROG([DX_BIBTEX], bibtex) + DX_REQUIRE_PROG([DX_EGREP], egrep)]) + +# LaTeX generation for PS and/or PDF: +AM_CONDITIONAL(DX_COND_latex, DX_TEST_FEATURE(ps) || DX_TEST_FEATURE(pdf)) +if DX_TEST_FEATURE(ps) || DX_TEST_FEATURE(pdf); then + DX_ENV_APPEND(GENERATE_LATEX, YES) +else + DX_ENV_APPEND(GENERATE_LATEX, NO) +fi + +# Paper size for PS and/or PDF: +AC_ARG_VAR(DOXYGEN_PAPER_SIZE, + [a4wide (default), a4, letter, legal or executive]) +case "$DOXYGEN_PAPER_SIZE" in +#( +"") + AC_SUBST(DOXYGEN_PAPER_SIZE, "") +;; #( +a4wide|a4|letter|legal|executive) + DX_ENV_APPEND(PAPER_SIZE, $DOXYGEN_PAPER_SIZE) +;; #( +*) + AC_MSG_ERROR([unknown DOXYGEN_PAPER_SIZE='$DOXYGEN_PAPER_SIZE']) +;; +esac + +#For debugging: +#echo DX_FLAG_doc=$DX_FLAG_doc +#echo DX_FLAG_dot=$DX_FLAG_dot +#echo DX_FLAG_man=$DX_FLAG_man +#echo DX_FLAG_html=$DX_FLAG_html +#echo DX_FLAG_chm=$DX_FLAG_chm +#echo DX_FLAG_chi=$DX_FLAG_chi +#echo DX_FLAG_rtf=$DX_FLAG_rtf +#echo DX_FLAG_xml=$DX_FLAG_xml +#echo DX_FLAG_pdf=$DX_FLAG_pdf +#echo DX_FLAG_ps=$DX_FLAG_ps +#echo DX_ENV=$DX_ENV +]) diff --git a/config/m4/compiler.m4 b/config/m4/compiler.m4 new file mode 100644 index 0000000..82bdc3d --- /dev/null +++ b/config/m4/compiler.m4 @@ -0,0 +1,454 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Initialize CFLAGS +# +BASE_CFLAGS="-g -Wall -Werror" + +# +# Debug mode +# +AC_ARG_ENABLE(debug, + AC_HELP_STRING([--enable-debug], [Enable debug mode build]), + [], + [enable_debug=no]) +AS_IF([test "x$enable_debug" = xyes], + [BASE_CFLAGS="-D_DEBUG $BASE_CFLAGS"], + []) + +# +# Optimization level +# +AC_ARG_ENABLE(compiler-opt, + AC_HELP_STRING([--enable-compiler-opt], [Set optimization level [0-3]]), + [], + [enable_compiler_opt="none"]) +AS_IF([test "x$enable_compiler_opt" = "xyes"], [BASE_CFLAGS="-O3 $BASE_CFLAGS"], + [test "x$enable_compiler_opt" = "xnone"], + [AS_IF([test "x$enable_debug" = xyes], + [BASE_CFLAGS="-O0 $BASE_CFLAGS"], + [BASE_CFLAGS="-O3 $BASE_CFLAGS"])], + [test "x$enable_compiler_opt" = "xno"], [], + [BASE_CFLAGS="-O$enable_compiler_opt $BASE_CFLAGS"]) + + +# +# CHECK_CROSS_COMP (program, true-action, false-action) +# +# The macro checks if it can run the program; it executes +# true action if the program can be executed, otherwise +# false action is executed. +# For cross-platform compilation we only check +# if we can compile and link the program. +AC_DEFUN([CHECK_CROSS_COMP], [ + AC_RUN_IFELSE([$1], [$2], [$3], + [AC_LINK_IFELSE([$1], [$2], [$3])]) +]) + +# +# Check for one specific attribute by compiling with C +# Usage: CHECK_SPECIFIC_ATTRIBUTE([name], [doc], [program]) +# +AC_DEFUN([CHECK_SPECIFIC_ATTRIBUTE], [ + AC_CACHE_VAL(ucx_cv_attribute_[$1], [ + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS" + # + # Try to compile using the C compiler + # + AC_TRY_COMPILE([$3],[], + [ucx_cv_attribute_[$1]=1], + [ucx_cv_attribute_[$1]=0]) + CFLAGS="$SAVE_CFLAGS" + ]) + AC_MSG_CHECKING([for __attribute__([$1])]) + AC_MSG_RESULT([$ucx_cv_attribute_[$1]]) + AC_DEFINE_UNQUOTED([HAVE_ATTRIBUTE_[$2]], [$ucx_cv_attribute_[$1]], [Check attribute [$1]]) +]) + + +# +# Enable/disable turning on machine-specific optimizations +# +AC_ARG_ENABLE(optimizations, + AC_HELP_STRING([--enable-optimizations], + [Enable non-portable machine-specific CPU optimizations, default: NO]), + [], + [enable_optimizations=no]) + + +# +# Check if compiler supports a given CPU optimization flag, and if yes - add it +# to BASE_CFLAGS substitution, and OPT_CFLAGS C define. +# +# Usage: COMPILER_CPU_OPTIMIZATION([name], [doc], [flag], [program]) +# +AC_DEFUN([COMPILER_CPU_OPTIMIZATION], +[ + AC_ARG_WITH([$1], + [AC_HELP_STRING([--with-$1], [Use $2 compiler option.])], + [], + [with_$1=$enable_optimizations]) + + AS_IF([test "x$with_$1" != "xno"], + [SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS $3" + AC_MSG_CHECKING([$3]) + CHECK_CROSS_COMP([AC_LANG_SOURCE([$4])], + [AC_MSG_RESULT([yes]) + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS $3" + AS_IF([test "x$1" != "xmcpu" -a "x$1" != "xmarch"], + [OPT_CFLAGS="$OPT_CFLAGS|$1"])], + [AC_MSG_RESULT([no])]) + CFLAGS="$SAVE_CFLAGS"]) +]) + + +# +# Check platform uarch and apply micro-architecture specific optimizations +# +AC_DEFUN([DETECT_UARCH], +[ + cpuimpl=`grep 'CPU implementer' /proc/cpuinfo 2> /dev/null | cut -d: -f2 | tr -d " " | head -n 1` + cpuarch=`grep 'CPU architecture' /proc/cpuinfo 2> /dev/null | cut -d: -f2 | tr -d " " | head -n 1` + cpuvar=`grep 'CPU variant' /proc/cpuinfo 2> /dev/null | cut -d: -f2 | tr -d " " | head -n 1` + cpupart=`grep 'CPU part' /proc/cpuinfo 2> /dev/null | cut -d: -f2 | tr -d " " | head -n 1` + + ax_cpu="" + ax_arch="" + + AC_MSG_NOTICE(Detected CPU implementation: ${cpuimpl}) + AC_MSG_NOTICE(Detected CPU architecture: ${cpuarch}) + AC_MSG_NOTICE(Detected CPU variant: ${cpuvar}) + AC_MSG_NOTICE(Detected CPU part: ${cpupart}) + + case $cpuimpl in + 0x42) case $cpupart in + 0x516 | 0x0516) + AC_DEFINE([HAVE_AARCH64_THUNDERX2], 1, [Cavium ThunderX2]) + ax_cpu="thunderx2t99" + ax_arch="armv8.1-a+lse" ;; + 0xaf | 0x0af) + AC_DEFINE([HAVE_AARCH64_THUNDERX2], 1, [Cavium ThunderX2]) + ax_cpu="thunderx2t99" + ax_arch="armv8.1-a+lse" ;; + esac + ;; + 0x43) case $cpupart in + 0x516 | 0x0516) + AC_DEFINE([HAVE_AARCH64_THUNDERX2], 1, [Cavium ThunderX2]) + ax_cpu="thunderx2t99" + ax_arch="armv8.1-a+lse" ;; + 0xaf | 0x0af) + AC_DEFINE([HAVE_AARCH64_THUNDERX2], 1, [Cavium ThunderX2]) + ax_cpu="thunderx2t99" + ax_arch="armv8.1-a+lse" ;; + 0xa1 | 0x0a1) + AC_DEFINE([HAVE_AARCH64_THUNDERX1], 1, [Cavium ThunderX1]) + ax_cpu="thunderxt88" ;; + esac + ;; + 0x48) case $cpupart in + 0xd01 | 0x0d01) + AC_DEFINE([HAVE_AARCH64_HI1620], 1, [Huawei Kunpeng 920]) + ax_cpu="tsv110" + ax_arch="armv8.2-a" ;; + esac + ;; + *) + ;; + esac +]) + + +# +# CHECK_COMPILER_FLAG +# Usage: CHECK_COMPILER_FLAG([name], [flag], [program], [if-true], [if-false]) +# +# The macro checks if program may be compiled using specified flag +# +AC_DEFUN([CHECK_COMPILER_FLAG], +[ + AC_MSG_CHECKING([compiler flag $1]) + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS $2" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS $2" + AC_COMPILE_IFELSE([$3], + [AC_MSG_RESULT([yes]) + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + $4], + [AC_MSG_RESULT([no]) + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + $5]) +]) + +# +# ADD_COMPILER_FLAG_IF_SUPPORTED +# Usage: ADD_COMPILER_FLAG_IF_SUPPORTED([name], [flag], [program], [if-true], [if-false]) +# +# The macro checks if program may be compiled using specified flag and adds +# this flag if it is supported +# +AC_DEFUN([ADD_COMPILER_FLAG_IF_SUPPORTED], +[ + CHECK_COMPILER_FLAG([$1], [$2], [$3], + [BASE_CFLAGS="$BASE_CFLAGS $2" + $4], + [$5]) +]) + +# +# CHECK_DEPRECATED_DECL_FLAG (flag, variable) +# +# The macro checks if the given compiler flag enables usig deprecated declarations. +# If yes, it appends the flags to "variable". +# +AC_DEFUN([CHECK_DEPRECATED_DECL_FLAG], +[ + AC_MSG_CHECKING([whether $1 overrides deprecated declarations]) + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS $1" + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + int __attribute__ ((__deprecated__)) f() { return 0; } + int main() { return f(); } + ]])], + [AC_MSG_RESULT([yes]) + $2="${$2} $1"], + [AC_MSG_RESULT([no])]) + CFLAGS="$SAVE_CFLAGS" +]) + + +# +# Force ICC treat command line warnings as errors. +# This evaluation should be called prior to all other compiler flags evals +# +CHECK_COMPILER_FLAG([-diag-error 10006], [-diag-error 10006], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [BASE_CFLAGS="$BASE_CFLAGS -diag-error 10006" + BASE_CXXFLAGS="$BASE_CXXFLAGS -diag-error 10006"], + []) + + +CHECK_DEPRECATED_DECL_FLAG([-diag-disable 1478], CFLAGS_NO_DEPRECATED) # icc +CHECK_DEPRECATED_DECL_FLAG([-Wno-deprecated-declarations], CFLAGS_NO_DEPRECATED) # gcc +AC_SUBST([CFLAGS_NO_DEPRECATED], [$CFLAGS_NO_DEPRECATED]) + + +# +# Disable format-string warning on ICC +# +ADD_COMPILER_FLAG_IF_SUPPORTED([-diag-disable 269], + [-diag-disable 269], + [AC_LANG_SOURCE([[#include + #include + int main() { + char *p = NULL; + scanf("%m[^.]", &p); + free(p); + return 0; + }]])], + [], + []) + + +# +# Set default datatype alignment to 16 bytes. +# Some compilers (LLVM based, clang) expects allocation of datatypes by 32 bytes +# to optimize operations memset/memcpy/etc using vectorized processor instructions +# which requires aligment of memory buffer by 32 or higer bytes. Default malloc method +# guarantee alignment for 16 bytes only. Force using compiler 16-bytes alignment +# by default if option is supported. +# +UCX_ALLOC_ALIGN=16 +ADD_COMPILER_FLAG_IF_SUPPORTED([-fmax-type-align=$UCX_ALLOC_ALIGN], + [-fmax-type-align=$UCX_ALLOC_ALIGN], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [AC_DEFINE_UNQUOTED([UCX_ALLOC_ALIGN], $UCX_ALLOC_ALIGN, [Set aligment assumption for compiler])], + []) + + +# +# SSE/AVX +# +COMPILER_CPU_OPTIMIZATION([avx], [AVX], [-mavx], + [#include + int main() { + return _mm256_testz_si256(_mm256_set1_epi32(1), _mm256_set1_epi32(3)); + } + ]) +AS_IF([test "x$with_avx" != xyes], + [COMPILER_CPU_OPTIMIZATION([sse41], [SSE 4.1], [-msse4.1], + [#include + int main() { + return _mm_testz_si128(_mm_set1_epi32(1), _mm_set1_epi32(3)); + } + ]) + COMPILER_CPU_OPTIMIZATION([sse42], [SSE 4.2], [-msse4.2], + [#include + int main() { return _mm_popcnt_u32(0x101) - 2; + }]) + ]) + + +DETECT_UARCH() + + +# +# CPU tuning +# +AS_IF([test "x$ax_cpu" != "x"], + [COMPILER_CPU_OPTIMIZATION([mcpu], [CPU Model], [-mcpu=$ax_cpu], + [int main() { return 0;}]) + ]) + + +# +# Architecture tuning +# +AS_IF([test "x$ax_arch" != "x"], + [COMPILER_CPU_OPTIMIZATION([march], [architecture tuning], [-march=$ax_arch], + [int main() { return 0;}]) + ]) + + +# +# Check for compiler attribute which disables optimizations per-function. +# +CHECK_SPECIFIC_ATTRIBUTE([optimize], [NOOPTIMIZE], + [int foo (int arg) __attribute__ ((optimize("O0")));]) + + +# +# Compile code with frame pointer. Optimizations usually omit the frame pointer, +# but if we are profiling the code with callgraph we need it. +# This option may affect perofrmance so it is off by default. +# +AC_ARG_ENABLE([frame-pointer], + AS_HELP_STRING([--enable-frame-pointer], + [Compile with frame pointer, useful for profiling, default: NO]), + [], + [enable_frame_pointer=no]) +AS_IF([test "x$enable_frame_pointer" = xyes], + [ADD_COMPILER_FLAG_IF_SUPPORTED([-fno-omit-frame-pointer], + [-fno-omit-frame-pointer], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [AS_MESSAGE([compiling with frame pointer])], + [AS_MESSAGE([compiling with frame pointer is not supported])])], + [:]) + + +# +# Check for C++11 support +# +AC_MSG_CHECKING([c++11 support]) +AC_LANG_PUSH([C++]) +SAVE_CXXFLAGS="$CXXFLAGS" +CXX11FLAGS="-std=c++11" +CXXFLAGS="$CXXFLAGS $CXX11FLAGS" +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#include + #include + int main() { + std::to_string(1); + return 0; + } ]])], + [AC_MSG_RESULT([yes]) + AC_SUBST([CXX11FLAGS]) + cxx11_happy=yes], + [AC_MSG_RESULT([no]) + cxx11_happy=no]) +CXXFLAGS="$SAVE_CXXFLAGS" +AC_LANG_POP +AM_CONDITIONAL([HAVE_CXX11], [test "x$cxx11_happy" != xno]) + + +# +# Check for GNU++11 support +# +AC_MSG_CHECKING([gnu++11 support]) +AC_LANG_PUSH([C++]) +SAVE_CXXFLAGS="$CXXFLAGS" +CXX11FLAGS="-std=gnu++11" +CXXFLAGS="$CXXFLAGS $CXX11FLAGS" +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#include + #include + int main() { + int a; + typeof(a) b = 0; + std::to_string(1); + return 0; + } ]])], + [AC_MSG_RESULT([yes]) + AC_SUBST([CXX11FLAGS]) + gnuxx11_happy=yes], + [AC_MSG_RESULT([no]) + gnuxx11_happy=no]) +CXXFLAGS="$SAVE_CXXFLAGS" +AC_LANG_POP +AM_CONDITIONAL([HAVE_GNUXX11], [test "x$gnuxx11_happy" != xno]) + + +# +# PGI specific switches +# +ADD_COMPILER_FLAG_IF_SUPPORTED([--display_error_number], + [--display_error_number], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [], + []) + +# Suppress incorrect printf format for PGI18 compiler. TODO: remove it after compiler fix +ADD_COMPILER_FLAG_IF_SUPPORTED([--diag_suppress 181], + [--diag_suppress 181], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [], + []) + +# Suppress deprecated API warning for PGI18 compiler +ADD_COMPILER_FLAG_IF_SUPPORTED([--diag_suppress 1215], + [--diag_suppress 1215], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [], + []) + +# Use of a const variable in a constant expression is nonstandard in C +ADD_COMPILER_FLAG_IF_SUPPORTED([--diag_suppress 1901], + [--diag_suppress 1901], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [], + []) + +# Check if "-pedantic" flag is supported +CHECK_COMPILER_FLAG([-pedantic], [-pedantic], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [CFLAGS_PEDANTIC="$CFLAGS_PEDANTIC -pedantic"], + []) + + +# +# Set C++ optimization/debug flags to be the same as for C +# +BASE_CXXFLAGS="$BASE_CFLAGS" +AC_SUBST([BASE_CFLAGS], [$BASE_CFLAGS]) +AC_SUBST([BASE_CXXFLAGS], [$BASE_CXXFLAGS]) +AC_SUBST([CFLAGS_PEDANTIC], [$CFLAGS_PEDANTIC]) + +# +# Set common preprocessor flags +# +BASE_CPPFLAGS="-DCPU_FLAGS=\"$OPT_CFLAGS\"" +BASE_CPPFLAGS="$BASE_CPPFLAGS -I\${abs_top_srcdir}/src" +BASE_CPPFLAGS="$BASE_CPPFLAGS -I\${abs_top_builddir}" +BASE_CPPFLAGS="$BASE_CPPFLAGS -I\${abs_top_builddir}/src" +AC_SUBST([BASE_CPPFLAGS], [$BASE_CPPFLAGS]) diff --git a/config/m4/cuda.m4 b/config/m4/cuda.m4 new file mode 100644 index 0000000..1862eb6 --- /dev/null +++ b/config/m4/cuda.m4 @@ -0,0 +1,67 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +AC_DEFUN([UCX_CHECK_CUDA],[ + +AS_IF([test "x$cuda_checked" != "xyes"], + [ + AC_ARG_WITH([cuda], + [AS_HELP_STRING([--with-cuda=(DIR)], [Enable the use of CUDA (default is guess).])], + [], [with_cuda=guess]) + + AS_IF([test "x$with_cuda" = "xno"], + [cuda_happy=no], + [ + save_CPPFLAGS="$CPPFLAGS" + save_LDFLAGS="$LDFLAGS" + + CUDA_CPPFLAGS="" + CUDA_LDFLAGS="" + + AS_IF([test ! -z "$with_cuda" -a "x$with_cuda" != "xyes" -a "x$with_cuda" != "xguess"], + [ucx_check_cuda_dir="$with_cuda" + AS_IF([test -d "$with_cuda/lib64"], [libsuff="64"], [libsuff=""]) + ucx_check_cuda_libdir="$with_cuda/lib$libsuff" + CUDA_CPPFLAGS="-I$with_cuda/include" + CUDA_LDFLAGS="-L$ucx_check_cuda_libdir -L$ucx_check_cuda_libdir/stubs"]) + + AS_IF([test ! -z "$with_cuda_libdir" -a "x$with_cuda_libdir" != "xyes"], + [ucx_check_cuda_libdir="$with_cuda_libdir" + CUDA_LDFLAGS="-L$ucx_check_cuda_libdir -L$ucx_check_cuda_libdir/stubs"]) + + CPPFLAGS="$CPPFLAGS $CUDA_CPPFLAGS" + LDFLAGS="$LDFLAGS $CUDA_LDFLAGS" + + # Check cuda header files + AC_CHECK_HEADERS([cuda.h cuda_runtime.h], + [cuda_happy="yes"], [cuda_happy="no"]) + + # Check cuda libraries + AS_IF([test "x$cuda_happy" = "xyes"], + [AC_CHECK_LIB([cuda], [cuDeviceGetUuid], + [CUDA_LDFLAGS="$CUDA_LDFLAGS -lcuda"], [cuda_happy="no"])]) + AS_IF([test "x$cuda_happy" = "xyes"], + [AC_CHECK_LIB([cudart], [cudaGetDeviceCount], + [CUDA_LDFLAGS="$CUDA_LDFLAGS -lcudart"], [cuda_happy="no"])]) + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + + AS_IF([test "x$cuda_happy" = "xyes"], + [AC_SUBST([CUDA_CPPFLAGS], ["$CUDA_CPPFLAGS"]) + AC_SUBST([CUDA_LDFLAGS], ["$CUDA_LDFLAGS"]) + AC_DEFINE([HAVE_CUDA], 1, [Enable CUDA support])], + [AS_IF([test "x$with_cuda" != "xguess"], + [AC_MSG_ERROR([CUDA support is requested but cuda packages cannot be found])], + [AC_MSG_WARN([CUDA not found])])]) + + ]) # "x$with_cuda" = "xno" + + cuda_checked=yes + AM_CONDITIONAL([HAVE_CUDA], [test "x$cuda_happy" != xno]) + + ]) # "x$cuda_checked" != "xyes" + +]) # UCX_CHECK_CUDA diff --git a/config/m4/gdrcopy.m4 b/config/m4/gdrcopy.m4 new file mode 100644 index 0000000..537338c --- /dev/null +++ b/config/m4/gdrcopy.m4 @@ -0,0 +1,67 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +AC_DEFUN([UCX_CHECK_GDRCOPY],[ + +AS_IF([test "x$gdrcopy_checked" != "xyes"],[ + +gdrcopy_happy="no" + +AC_ARG_WITH([gdrcopy], + [AS_HELP_STRING([--with-gdrcopy=(DIR)], [Enable the use of GDR_COPY (default is guess).])], + [], [with_gdrcopy=guess]) + +AS_IF([test "x$with_gdrcopy" != "xno"], + [save_CPPFLAGS="$CPPFLAGS" + save_CFLAGS="$CFLAGS" + save_LDFLAGS="$LDFLAGS" + + AS_IF([test ! -z "$with_gdrcopy" -a "x$with_gdrcopy" != "xyes" -a "x$with_gdrcopy" != "xguess"], + [ + ucx_check_gdrcopy_dir="$with_gdrcopy" + AS_IF([test -d "$with_gdrcopy/lib64"],[libsuff="64"],[libsuff=""]) + ucx_check_gdrcopy_libdir="$with_gdrcopy/lib$libsuff" + CPPFLAGS="-I$with_gdrcopy/include $save_CPPFLAGS" + LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS" + ]) + AS_IF([test ! -z "$with_gdrcopy_libdir" -a "x$with_gdrcopy_libdir" != "xyes"], + [ucx_check_gdrcopy_libdir="$with_gdrcopy_libdir" + LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS"]) + + AC_CHECK_HEADERS([gdrapi.h], + [AC_CHECK_LIB([gdrapi] , [gdr_pin_buffer], + [gdrcopy_happy="yes"], + [AC_MSG_WARN([GDR_COPY runtime not detected. Disable.]) + gdrcopy_happy="no"]) + ], [gdrcopy_happy="no"]) + + AS_IF([test "x$gdrcopy_happy" = "xyes"], + [AC_CHECK_DECLS([gdr_copy_to_mapping], [], [], [#include "gdrapi.h"])]) + + CFLAGS="$save_CFLAGS" + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + + AS_IF([test "x$gdrcopy_happy" = "xyes"], + [ + AC_SUBST(GDR_COPY_CPPFLAGS, "-I$ucx_check_gdrcopy_dir/include/ ") + AC_SUBST(GDR_COPY_LDFLAGS, "-lgdrapi -L$ucx_check_gdrcopy_dir/lib64") + ], + [ + AS_IF([test "x$with_gdrcopy" != "xguess"], + [AC_MSG_ERROR([gdrcopy support is requested but gdrcopy packages cannot be found])], + [AC_MSG_WARN([GDR_COPY not found])]) + ]) + + ], + [AC_MSG_WARN([GDR_COPY was explicitly disabled])]) + +gdrcopy_checked=yes +AM_CONDITIONAL([HAVE_GDR_COPY], [test "x$gdrcopy_happy" != xno]) + +]) + +]) diff --git a/config/m4/graphviz.m4 b/config/m4/graphviz.m4 new file mode 100644 index 0000000..777f667 --- /dev/null +++ b/config/m4/graphviz.m4 @@ -0,0 +1,8 @@ +# +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +AC_DEFUN([AC_PROG_GRAPHVIZ_DOT], [AC_CHECK_PROG([GRAPHVIZ_DOT],[dot],[yes])]) +AC_PROG_GRAPHVIZ_DOT +AM_CONDITIONAL([HAVE_DOT], [test "x$GRAPHVIZ_DOT" = xyes]) diff --git a/config/m4/gtest.m4 b/config/m4/gtest.m4 new file mode 100644 index 0000000..4852d78 --- /dev/null +++ b/config/m4/gtest.m4 @@ -0,0 +1,23 @@ +# Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +dnl GTEST_LIB_CHECK([minimum version [, +dnl action if found [,action if not found]]]) +dnl +dnl Google C++ Testing Framework is a part of project source code. +dnl So ignore version check and just process if gtest is enable. +AC_DEFUN([GTEST_LIB_CHECK], +[ +dnl Provide a flag to enable or disable Google Test usage. +AC_ARG_ENABLE([gtest], + [AS_HELP_STRING([--enable-gtest], + [Enable tests using the Google C++ Testing Framework. + (Default is disabled.)])], + [enable_gtest=$enableval], + [enable_gtest=no]) +AC_MSG_CHECKING([for using Google C++ Testing Framework]) +AC_MSG_RESULT([$enable_gtest]) +AM_CONDITIONAL([HAVE_GTEST],[test "x$enable_gtest" = "xyes"]) +]) diff --git a/config/m4/java.m4 b/config/m4/java.m4 new file mode 100644 index 0000000..3d9c9b7 --- /dev/null +++ b/config/m4/java.m4 @@ -0,0 +1,82 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# +# +# +# Check for Java support +# +# +java_happy="no" +AC_ARG_WITH([java], + [AC_HELP_STRING([--with-java=(PATH)], + [Compile Java UCX (default is guess).]) + ], [], [with_java=guess]) + +AS_IF([test "x$with_java" != xno], + [ + AC_CHECK_PROG(MVNBIN, mvn, yes) + AC_CHECK_PROG(JAVABIN, java, yes) + AS_IF([test "x${MVNBIN}" = "xyes" -a "x${JAVABIN}" = "xyes"], + [ + AS_IF([test -n "$with_java" -a "x$with_java" != "xyes" -a "x$with_java" != "xguess"], + [java_dir=$with_java], + [ + AS_IF([test -n "$JAVA_HOME"], + [], + [ + AC_CHECK_PROG(READLINK, readlink, yes) + AS_IF([test "x${READLINK}" = xyes], + [ + AC_SUBST([JAVA], [$(readlink -f $(type -P java))]) + AC_SUBST([JAVA_HOME], [${JAVA%*/jre*}]) + AC_MSG_WARN([Please set JAVA_HOME=$JAVA_HOME]) + ], + [ + AS_IF( + [test "x$with_java" = "xguess"], + [AC_MSG_WARN([For Java support please install readlink or set JAVA_HOME=])], + [AC_MSG_ERROR([Java support requested, but couldn't find path; please set JAVA_HOME=])] + ) + ] + ) + ] + ) + java_dir=$JAVA_HOME + ] + ) + save_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="-I$java_dir/include/linux -I$java_dir/include $CPPFLAGS" + AC_CHECK_HEADERS([jni_md.h jni.h], + [ + java_happy="yes" + ], + [ + AS_IF([test "x$with_java" = "xguess"], + [AC_MSG_WARN([Couldn't find jni headers.])], + [AC_MSG_ERROR([Java support requested, but couldn't find jni headers in $java_dir])] + ) + ] + ) + + CPPFLAGS="$save_CPPFLAGS" + ], + [ + AS_IF([test "x$with_java" = "xguess"], + [AC_MSG_WARN([Disabling Java support - java or mvn not in path.])], + [AC_MSG_ERROR([Java support was explicitly requested, but java or mvn not in path.])] + ) + ] + ) + ], + [AC_MSG_WARN([Java support was explicitly disabled.])] + ) + +AC_SUBST([JDK], [${java_dir}]) +AM_CONDITIONAL([HAVE_JAVA], [test "x$java_happy" != "xno"]) +#Set MVN according to whether user has Java and Maven or not +AM_COND_IF([HAVE_JAVA], + [AC_SUBST([MVN], ["mvn"]) + build_bindings="${build_bindings}:java"] + ) diff --git a/config/m4/libtool.m4 b/config/m4/libtool.m4 new file mode 100644 index 0000000..a644432 --- /dev/null +++ b/config/m4/libtool.m4 @@ -0,0 +1,8372 @@ +# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- +# +# Copyright (C) 1996-2001, 2003-2015 Free Software Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +m4_define([_LT_COPYING], [dnl +# Copyright (C) 2014 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program or library that is built +# using GNU Libtool, you may include this file under the same +# distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +]) + +# serial 58 LT_INIT + + +# LT_PREREQ(VERSION) +# ------------------ +# Complain and exit if this libtool version is less that VERSION. +m4_defun([LT_PREREQ], +[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, + [m4_default([$3], + [m4_fatal([Libtool version $1 or higher is required], + 63)])], + [$2])]) + + +# _LT_CHECK_BUILDDIR +# ------------------ +# Complain if the absolute build directory name contains unusual characters +m4_defun([_LT_CHECK_BUILDDIR], +[case `pwd` in + *\ * | *\ *) + AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; +esac +]) + + +# LT_INIT([OPTIONS]) +# ------------------ +AC_DEFUN([LT_INIT], +[AC_PREREQ([2.62])dnl We use AC_PATH_PROGS_FEATURE_CHECK +AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +AC_BEFORE([$0], [LT_LANG])dnl +AC_BEFORE([$0], [LT_OUTPUT])dnl +AC_BEFORE([$0], [LTDL_INIT])dnl +m4_require([_LT_CHECK_BUILDDIR])dnl + +dnl Autoconf doesn't catch unexpanded LT_ macros by default: +m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl +m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl +dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 +dnl unless we require an AC_DEFUNed macro: +AC_REQUIRE([LTOPTIONS_VERSION])dnl +AC_REQUIRE([LTSUGAR_VERSION])dnl +AC_REQUIRE([LTVERSION_VERSION])dnl +AC_REQUIRE([LTOBSOLETE_VERSION])dnl +m4_require([_LT_PROG_LTMAIN])dnl + +_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) + +dnl Parse OPTIONS +_LT_SET_OPTIONS([$0], [$1]) + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS=$ltmain + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' +AC_SUBST(LIBTOOL)dnl + +_LT_SETUP + +# Only expand once: +m4_define([LT_INIT]) +])# LT_INIT + +# Old names: +AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) +AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PROG_LIBTOOL], []) +dnl AC_DEFUN([AM_PROG_LIBTOOL], []) + + +# _LT_PREPARE_CC_BASENAME +# ----------------------- +m4_defun([_LT_PREPARE_CC_BASENAME], [ +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in @S|@*""; do + case $cc_temp in + compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; + distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} +])# _LT_PREPARE_CC_BASENAME + + +# _LT_CC_BASENAME(CC) +# ------------------- +# It would be clearer to call AC_REQUIREs from _LT_PREPARE_CC_BASENAME, +# but that macro is also expanded into generated libtool script, which +# arranges for $SED and $ECHO to be set by different means. +m4_defun([_LT_CC_BASENAME], +[m4_require([_LT_PREPARE_CC_BASENAME])dnl +AC_REQUIRE([_LT_DECL_SED])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl +func_cc_basename $1 +cc_basename=$func_cc_basename_result +]) + + +# _LT_FILEUTILS_DEFAULTS +# ---------------------- +# It is okay to use these file commands and assume they have been set +# sensibly after 'm4_require([_LT_FILEUTILS_DEFAULTS])'. +m4_defun([_LT_FILEUTILS_DEFAULTS], +[: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} +])# _LT_FILEUTILS_DEFAULTS + + +# _LT_SETUP +# --------- +m4_defun([_LT_SETUP], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl + +_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl +dnl +_LT_DECL([], [host_alias], [0], [The host system])dnl +_LT_DECL([], [host], [0])dnl +_LT_DECL([], [host_os], [0])dnl +dnl +_LT_DECL([], [build_alias], [0], [The build system])dnl +_LT_DECL([], [build], [0])dnl +_LT_DECL([], [build_os], [0])dnl +dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +dnl +AC_REQUIRE([AC_PROG_LN_S])dnl +test -z "$LN_S" && LN_S="ln -s" +_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl +dnl +AC_REQUIRE([LT_CMD_MAX_LEN])dnl +_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl +_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl +dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl +m4_require([_LT_CMD_RELOAD])dnl +m4_require([_LT_CHECK_MAGIC_METHOD])dnl +m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl +m4_require([_LT_CMD_OLD_ARCHIVE])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_WITH_SYSROOT])dnl +m4_require([_LT_CMD_TRUNCATE])dnl + +_LT_CONFIG_LIBTOOL_INIT([ +# See if we are running on zsh, and set the options that allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi +]) +if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + +_LT_CHECK_OBJDIR + +m4_require([_LT_TAG_COMPILER])dnl + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a '.a' archive for static linking (except MSVC, +# which needs '.lib'). +libext=a + +with_gnu_ld=$lt_cv_prog_gnu_ld + +old_CC=$CC +old_CFLAGS=$CFLAGS + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +_LT_CC_BASENAME([$compiler]) + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + _LT_PATH_MAGIC + fi + ;; +esac + +# Use C for the default configuration in the libtool script +LT_SUPPORTED_TAG([CC]) +_LT_LANG_C_CONFIG +_LT_LANG_DEFAULT_CONFIG +_LT_CONFIG_COMMANDS +])# _LT_SETUP + + +# _LT_PREPARE_SED_QUOTE_VARS +# -------------------------- +# Define a few sed substitution that help us do robust quoting. +m4_defun([_LT_PREPARE_SED_QUOTE_VARS], +[# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([["`\\]]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' +]) + +# _LT_PROG_LTMAIN +# --------------- +# Note that this code is called both from 'configure', and 'config.status' +# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, +# 'config.status' has no value for ac_aux_dir unless we are using Automake, +# so we pass a copy along to make sure it has a sensible value anyway. +m4_defun([_LT_PROG_LTMAIN], +[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl +_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) +ltmain=$ac_aux_dir/ltmain.sh +])# _LT_PROG_LTMAIN + + +## ------------------------------------- ## +## Accumulate code for creating libtool. ## +## ------------------------------------- ## + +# So that we can recreate a full libtool script including additional +# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS +# in macros and then make a single call at the end using the 'libtool' +# label. + + +# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) +# ---------------------------------------- +# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL_INIT], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_INIT], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_INIT]) + + +# _LT_CONFIG_LIBTOOL([COMMANDS]) +# ------------------------------ +# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) + + +# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) +# ----------------------------------------------------- +m4_defun([_LT_CONFIG_SAVE_COMMANDS], +[_LT_CONFIG_LIBTOOL([$1]) +_LT_CONFIG_LIBTOOL_INIT([$2]) +]) + + +# _LT_FORMAT_COMMENT([COMMENT]) +# ----------------------------- +# Add leading comment marks to the start of each line, and a trailing +# full-stop to the whole comment if one is not present already. +m4_define([_LT_FORMAT_COMMENT], +[m4_ifval([$1], [ +m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], + [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) +)]) + + + +## ------------------------ ## +## FIXME: Eliminate VARNAME ## +## ------------------------ ## + + +# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) +# ------------------------------------------------------------------- +# CONFIGNAME is the name given to the value in the libtool script. +# VARNAME is the (base) name used in the configure script. +# VALUE may be 0, 1 or 2 for a computed quote escaped value based on +# VARNAME. Any other value will be used directly. +m4_define([_LT_DECL], +[lt_if_append_uniq([lt_decl_varnames], [$2], [, ], + [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], + [m4_ifval([$1], [$1], [$2])]) + lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) + m4_ifval([$4], + [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) + lt_dict_add_subkey([lt_decl_dict], [$2], + [tagged?], [m4_ifval([$5], [yes], [no])])]) +]) + + +# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) +# -------------------------------------------------------- +m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) + + +# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_tag_varnames], +[_lt_decl_filter([tagged?], [yes], $@)]) + + +# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) +# --------------------------------------------------------- +m4_define([_lt_decl_filter], +[m4_case([$#], + [0], [m4_fatal([$0: too few arguments: $#])], + [1], [m4_fatal([$0: too few arguments: $#: $1])], + [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], + [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], + [lt_dict_filter([lt_decl_dict], $@)])[]dnl +]) + + +# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) +# -------------------------------------------------- +m4_define([lt_decl_quote_varnames], +[_lt_decl_filter([value], [1], $@)]) + + +# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_dquote_varnames], +[_lt_decl_filter([value], [2], $@)]) + + +# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_varnames_tagged], +[m4_assert([$# <= 2])dnl +_$0(m4_quote(m4_default([$1], [[, ]])), + m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), + m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) +m4_define([_lt_decl_varnames_tagged], +[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) + + +# lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_all_varnames], +[_$0(m4_quote(m4_default([$1], [[, ]])), + m4_if([$2], [], + m4_quote(lt_decl_varnames), + m4_quote(m4_shift($@))))[]dnl +]) +m4_define([_lt_decl_all_varnames], +[lt_join($@, lt_decl_varnames_tagged([$1], + lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl +]) + + +# _LT_CONFIG_STATUS_DECLARE([VARNAME]) +# ------------------------------------ +# Quote a variable value, and forward it to 'config.status' so that its +# declaration there will have the same value as in 'configure'. VARNAME +# must have a single quote delimited value for this to work. +m4_define([_LT_CONFIG_STATUS_DECLARE], +[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) + + +# _LT_CONFIG_STATUS_DECLARATIONS +# ------------------------------ +# We delimit libtool config variables with single quotes, so when +# we write them to config.status, we have to be sure to quote all +# embedded single quotes properly. In configure, this macro expands +# each variable declared with _LT_DECL (and _LT_TAGDECL) into: +# +# ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' +m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], +[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), + [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAGS +# ---------------- +# Output comment and list of tags supported by the script +m4_defun([_LT_LIBTOOL_TAGS], +[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl +available_tags='_LT_TAGS'dnl +]) + + +# _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) +# ----------------------------------- +# Extract the dictionary values for VARNAME (optionally with TAG) and +# expand to a commented shell variable setting: +# +# # Some comment about what VAR is for. +# visible_name=$lt_internal_name +m4_define([_LT_LIBTOOL_DECLARE], +[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], + [description])))[]dnl +m4_pushdef([_libtool_name], + m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl +m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), + [0], [_libtool_name=[$]$1], + [1], [_libtool_name=$lt_[]$1], + [2], [_libtool_name=$lt_[]$1], + [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl +m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl +]) + + +# _LT_LIBTOOL_CONFIG_VARS +# ----------------------- +# Produce commented declarations of non-tagged libtool config variables +# suitable for insertion in the LIBTOOL CONFIG section of the 'libtool' +# script. Tagged libtool config variables (even for the LIBTOOL CONFIG +# section) are produced by _LT_LIBTOOL_TAG_VARS. +m4_defun([_LT_LIBTOOL_CONFIG_VARS], +[m4_foreach([_lt_var], + m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAG_VARS(TAG) +# ------------------------- +m4_define([_LT_LIBTOOL_TAG_VARS], +[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) + + +# _LT_TAGVAR(VARNAME, [TAGNAME]) +# ------------------------------ +m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) + + +# _LT_CONFIG_COMMANDS +# ------------------- +# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of +# variables for single and double quote escaping we saved from calls +# to _LT_DECL, we can put quote escaped variables declarations +# into 'config.status', and then the shell code to quote escape them in +# for loops in 'config.status'. Finally, any additional code accumulated +# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. +m4_defun([_LT_CONFIG_COMMANDS], +[AC_PROVIDE_IFELSE([LT_OUTPUT], + dnl If the libtool generation code has been placed in $CONFIG_LT, + dnl instead of duplicating it all over again into config.status, + dnl then we will have config.status run $CONFIG_LT later, so it + dnl needs to know what name is stored there: + [AC_CONFIG_COMMANDS([libtool], + [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], + dnl If the libtool generation code is destined for config.status, + dnl expand the accumulated commands and init code now: + [AC_CONFIG_COMMANDS([libtool], + [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) +])#_LT_CONFIG_COMMANDS + + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], +[ + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +_LT_CONFIG_STATUS_DECLARATIONS +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$[]1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_quote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_dquote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +_LT_OUTPUT_LIBTOOL_INIT +]) + +# _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) +# ------------------------------------ +# Generate a child script FILE with all initialization necessary to +# reuse the environment learned by the parent script, and make the +# file executable. If COMMENT is supplied, it is inserted after the +# '#!' sequence but before initialization text begins. After this +# macro, additional text can be appended to FILE to form the body of +# the child script. The macro ends with non-zero status if the +# file could not be fully written (such as if the disk is full). +m4_ifdef([AS_INIT_GENERATED], +[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], +[m4_defun([_LT_GENERATED_FILE_INIT], +[m4_require([AS_PREPARE])]dnl +[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl +[lt_write_fail=0 +cat >$1 <<_ASEOF || lt_write_fail=1 +#! $SHELL +# Generated by $as_me. +$2 +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$1 <<\_ASEOF || lt_write_fail=1 +AS_SHELL_SANITIZE +_AS_PREPARE +exec AS_MESSAGE_FD>&1 +_ASEOF +test 0 = "$lt_write_fail" && chmod +x $1[]dnl +m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT + +# LT_OUTPUT +# --------- +# This macro allows early generation of the libtool script (before +# AC_OUTPUT is called), incase it is used in configure for compilation +# tests. +AC_DEFUN([LT_OUTPUT], +[: ${CONFIG_LT=./config.lt} +AC_MSG_NOTICE([creating $CONFIG_LT]) +_LT_GENERATED_FILE_INIT(["$CONFIG_LT"], +[# Run this file to recreate a libtool stub with the current configuration.]) + +cat >>"$CONFIG_LT" <<\_LTEOF +lt_cl_silent=false +exec AS_MESSAGE_LOG_FD>>config.log +{ + echo + AS_BOX([Running $as_me.]) +} >&AS_MESSAGE_LOG_FD + +lt_cl_help="\ +'$as_me' creates a local libtool stub from the current configuration, +for use in further configure time tests before the real libtool is +generated. + +Usage: $[0] [[OPTIONS]] + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + +Report bugs to ." + +lt_cl_version="\ +m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl +m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) +configured by $[0], generated by m4_PACKAGE_STRING. + +Copyright (C) 2011 Free Software Foundation, Inc. +This config.lt script is free software; the Free Software Foundation +gives unlimited permision to copy, distribute and modify it." + +while test 0 != $[#] +do + case $[1] in + --version | --v* | -V ) + echo "$lt_cl_version"; exit 0 ;; + --help | --h* | -h ) + echo "$lt_cl_help"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --quiet | --q* | --silent | --s* | -q ) + lt_cl_silent=: ;; + + -*) AC_MSG_ERROR([unrecognized option: $[1] +Try '$[0] --help' for more information.]) ;; + + *) AC_MSG_ERROR([unrecognized argument: $[1] +Try '$[0] --help' for more information.]) ;; + esac + shift +done + +if $lt_cl_silent; then + exec AS_MESSAGE_FD>/dev/null +fi +_LTEOF + +cat >>"$CONFIG_LT" <<_LTEOF +_LT_OUTPUT_LIBTOOL_COMMANDS_INIT +_LTEOF + +cat >>"$CONFIG_LT" <<\_LTEOF +AC_MSG_NOTICE([creating $ofile]) +_LT_OUTPUT_LIBTOOL_COMMANDS +AS_EXIT(0) +_LTEOF +chmod +x "$CONFIG_LT" + +# configure is writing to config.log, but config.lt does its own redirection, +# appending to config.log, which fails on DOS, as config.log is still kept +# open by configure. Here we exec the FD to /dev/null, effectively closing +# config.log, so it can be properly (re)opened and appended to by config.lt. +lt_cl_success=: +test yes = "$silent" && + lt_config_lt_args="$lt_config_lt_args --quiet" +exec AS_MESSAGE_LOG_FD>/dev/null +$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false +exec AS_MESSAGE_LOG_FD>>config.log +$lt_cl_success || AS_EXIT(1) +])# LT_OUTPUT + + +# _LT_CONFIG(TAG) +# --------------- +# If TAG is the built-in tag, create an initial libtool script with a +# default configuration from the untagged config vars. Otherwise add code +# to config.status for appending the configuration named by TAG from the +# matching tagged config vars. +m4_defun([_LT_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_CONFIG_SAVE_COMMANDS([ + m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl + m4_if(_LT_TAG, [C], [ + # See if we are running on zsh, and set the options that allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST + fi + + cfgfile=${ofile}T + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL +# Generated automatically by $as_me ($PACKAGE) $VERSION +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# NOTE: Changes made to this file will be lost: look at ltmain.sh. + +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit, 1996 + +_LT_COPYING +_LT_LIBTOOL_TAGS + +# Configured defaults for sys_lib_dlsearch_path munging. +: \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} + +# ### BEGIN LIBTOOL CONFIG +_LT_LIBTOOL_CONFIG_VARS +_LT_LIBTOOL_TAG_VARS +# ### END LIBTOOL CONFIG + +_LT_EOF + + cat <<'_LT_EOF' >> "$cfgfile" + +# ### BEGIN FUNCTIONS SHARED WITH CONFIGURE + +_LT_PREPARE_MUNGE_PATH_LIST +_LT_PREPARE_CC_BASENAME + +# ### END FUNCTIONS SHARED WITH CONFIGURE + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + _LT_PROG_LTMAIN + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + sed '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" +], +[cat <<_LT_EOF >> "$ofile" + +dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded +dnl in a comment (ie after a #). +# ### BEGIN LIBTOOL TAG CONFIG: $1 +_LT_LIBTOOL_TAG_VARS(_LT_TAG) +# ### END LIBTOOL TAG CONFIG: $1 +_LT_EOF +])dnl /m4_if +], +[m4_if([$1], [], [ + PACKAGE='$PACKAGE' + VERSION='$VERSION' + RM='$RM' + ofile='$ofile'], []) +])dnl /_LT_CONFIG_SAVE_COMMANDS +])# _LT_CONFIG + + +# LT_SUPPORTED_TAG(TAG) +# --------------------- +# Trace this macro to discover what tags are supported by the libtool +# --tag option, using: +# autoconf --trace 'LT_SUPPORTED_TAG:$1' +AC_DEFUN([LT_SUPPORTED_TAG], []) + + +# C support is built-in for now +m4_define([_LT_LANG_C_enabled], []) +m4_define([_LT_TAGS], []) + + +# LT_LANG(LANG) +# ------------- +# Enable libtool support for the given language if not already enabled. +AC_DEFUN([LT_LANG], +[AC_BEFORE([$0], [LT_OUTPUT])dnl +m4_case([$1], + [C], [_LT_LANG(C)], + [C++], [_LT_LANG(CXX)], + [Go], [_LT_LANG(GO)], + [Java], [_LT_LANG(GCJ)], + [Fortran 77], [_LT_LANG(F77)], + [Fortran], [_LT_LANG(FC)], + [Windows Resource], [_LT_LANG(RC)], + [m4_ifdef([_LT_LANG_]$1[_CONFIG], + [_LT_LANG($1)], + [m4_fatal([$0: unsupported language: "$1"])])])dnl +])# LT_LANG + + +# _LT_LANG(LANGNAME) +# ------------------ +m4_defun([_LT_LANG], +[m4_ifdef([_LT_LANG_]$1[_enabled], [], + [LT_SUPPORTED_TAG([$1])dnl + m4_append([_LT_TAGS], [$1 ])dnl + m4_define([_LT_LANG_]$1[_enabled], [])dnl + _LT_LANG_$1_CONFIG($1)])dnl +])# _LT_LANG + + +m4_ifndef([AC_PROG_GO], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_GO. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ +m4_defun([AC_PROG_GO], +[AC_LANG_PUSH(Go)dnl +AC_ARG_VAR([GOC], [Go compiler command])dnl +AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl +_AC_ARG_VAR_LDFLAGS()dnl +AC_CHECK_TOOL(GOC, gccgo) +if test -z "$GOC"; then + if test -n "$ac_tool_prefix"; then + AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) + fi +fi +if test -z "$GOC"; then + AC_CHECK_PROG(GOC, gccgo, gccgo, false) +fi +])#m4_defun +])#m4_ifndef + + +# _LT_LANG_DEFAULT_CONFIG +# ----------------------- +m4_defun([_LT_LANG_DEFAULT_CONFIG], +[AC_PROVIDE_IFELSE([AC_PROG_CXX], + [LT_LANG(CXX)], + [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) + +AC_PROVIDE_IFELSE([AC_PROG_F77], + [LT_LANG(F77)], + [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [LT_LANG(FC)], + [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) + +dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal +dnl pulling things in needlessly. +AC_PROVIDE_IFELSE([AC_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([LT_PROG_GCJ], + [LT_LANG(GCJ)], + [m4_ifdef([AC_PROG_GCJ], + [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([A][M_PROG_GCJ], + [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([LT_PROG_GCJ], + [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) + +AC_PROVIDE_IFELSE([AC_PROG_GO], + [LT_LANG(GO)], + [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) + +AC_PROVIDE_IFELSE([LT_PROG_RC], + [LT_LANG(RC)], + [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) +])# _LT_LANG_DEFAULT_CONFIG + +# Obsolete macros: +AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) +AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) +AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) +AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) +AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_CXX], []) +dnl AC_DEFUN([AC_LIBTOOL_F77], []) +dnl AC_DEFUN([AC_LIBTOOL_FC], []) +dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) +dnl AC_DEFUN([AC_LIBTOOL_RC], []) + + +# _LT_TAG_COMPILER +# ---------------- +m4_defun([_LT_TAG_COMPILER], +[AC_REQUIRE([AC_PROG_CC])dnl + +_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl +_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl +_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl +_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC +])# _LT_TAG_COMPILER + + +# _LT_COMPILER_BOILERPLATE +# ------------------------ +# Check for compiler boilerplate output or warnings with +# the simple compiler test code. +m4_defun([_LT_COMPILER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* +])# _LT_COMPILER_BOILERPLATE + + +# _LT_LINKER_BOILERPLATE +# ---------------------- +# Check for linker boilerplate output or warnings with +# the simple link test code. +m4_defun([_LT_LINKER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* +])# _LT_LINKER_BOILERPLATE + +# _LT_REQUIRED_DARWIN_CHECKS +# ------------------------- +m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ + case $host_os in + rhapsody* | darwin*) + AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) + AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) + AC_CHECK_TOOL([LIPO], [lipo], [:]) + AC_CHECK_TOOL([OTOOL], [otool], [:]) + AC_CHECK_TOOL([OTOOL64], [otool64], [:]) + _LT_DECL([], [DSYMUTIL], [1], + [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) + _LT_DECL([], [NMEDIT], [1], + [Tool to change global to local symbols on Mac OS X]) + _LT_DECL([], [LIPO], [1], + [Tool to manipulate fat objects and archives on Mac OS X]) + _LT_DECL([], [OTOOL], [1], + [ldd/readelf like tool for Mach-O binaries on Mac OS X]) + _LT_DECL([], [OTOOL64], [1], + [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) + + AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], + [lt_cv_apple_cc_single_mod=no + if test -z "$LT_MULTI_MODULE"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test 0 = "$_lt_result"; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi]) + + AC_CACHE_CHECK([for -exported_symbols_list linker flag], + [lt_cv_ld_exported_symbols_list], + [lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [lt_cv_ld_exported_symbols_list=yes], + [lt_cv_ld_exported_symbols_list=no]) + LDFLAGS=$save_LDFLAGS + ]) + + AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], + [lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD + echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD + $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD + echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD + $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + ]) + case $host_os in + rhapsody* | darwin1.[[012]]) + _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + darwin*) # darwin 5.x on + # if running on 10.5 or later, the deployment target defaults + # to the OS version, if on x86, and 10.4, the deployment + # target defaults to 10.4. Don't you love it? + case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in + 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + 10.[[012]][[,.]]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + 10.*) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test yes = "$lt_cv_apple_cc_single_mod"; then + _lt_dar_single_mod='$single_module' + fi + if test yes = "$lt_cv_ld_exported_symbols_list"; then + _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' + fi + if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac +]) + + +# _LT_DARWIN_LINKER_FEATURES([TAG]) +# --------------------------------- +# Checks for linker and compiler features on darwin +m4_defun([_LT_DARWIN_LINKER_FEATURES], +[ + m4_require([_LT_REQUIRED_DARWIN_CHECKS]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_automatic, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + if test yes = "$lt_cv_ld_force_load"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], + [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='' + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + m4_if([$1], [CXX], +[ if test yes != "$lt_cv_apple_cc_single_mod"; then + _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" + fi +],[]) + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi +]) + +# _LT_SYS_MODULE_PATH_AIX([TAGNAME]) +# ---------------------------------- +# Links a minimal program and checks the executable +# for the system default hardcoded library path. In most cases, +# this is /usr/lib:/lib, but when the MPI compilers are used +# the location of the communication and MPI libs are included too. +# If we don't find anything, use the default library path according +# to the aix ld manual. +# Store the results from the different compilers for each TAGNAME. +# Allow to override them for all tags through lt_cv_aix_libpath. +m4_defun([_LT_SYS_MODULE_PATH_AIX], +[m4_require([_LT_DECL_SED])dnl +if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], + [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ + lt_aix_libpath_sed='[ + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }]' + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi],[]) + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=/usr/lib:/lib + fi + ]) + aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) +fi +])# _LT_SYS_MODULE_PATH_AIX + + +# _LT_SHELL_INIT(ARG) +# ------------------- +m4_define([_LT_SHELL_INIT], +[m4_divert_text([M4SH-INIT], [$1 +])])# _LT_SHELL_INIT + + + +# _LT_PROG_ECHO_BACKSLASH +# ----------------------- +# Find how we can fake an echo command that does not interpret backslash. +# In particular, with Autoconf 2.60 or later we add some code to the start +# of the generated configure script that will find a shell with a builtin +# printf (that we can use as an echo command). +m4_defun([_LT_PROG_ECHO_BACKSLASH], +[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +AC_MSG_CHECKING([how to print strings]) +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$[]1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +case $ECHO in + printf*) AC_MSG_RESULT([printf]) ;; + print*) AC_MSG_RESULT([print -r]) ;; + *) AC_MSG_RESULT([cat]) ;; +esac + +m4_ifdef([_AS_DETECT_SUGGESTED], +[_AS_DETECT_SUGGESTED([ + test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test "X`printf %s $ECHO`" = "X$ECHO" \ + || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) + +_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) +_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) +])# _LT_PROG_ECHO_BACKSLASH + + +# _LT_WITH_SYSROOT +# ---------------- +AC_DEFUN([_LT_WITH_SYSROOT], +[AC_MSG_CHECKING([for sysroot]) +AC_ARG_WITH([sysroot], +[AS_HELP_STRING([--with-sysroot@<:@=DIR@:>@], + [Search for dependent libraries within DIR (or the compiler's sysroot + if not specified).])], +[], [with_sysroot=no]) + +dnl lt_sysroot will always be passed unquoted. We quote it here +dnl in case the user passed a directory name. +lt_sysroot= +case $with_sysroot in #( + yes) + if test yes = "$GCC"; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + AC_MSG_RESULT([$with_sysroot]) + AC_MSG_ERROR([The sysroot must be an absolute path.]) + ;; +esac + + AC_MSG_RESULT([${lt_sysroot:-no}]) +_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl +[dependent libraries, and where our libraries should be installed.])]) + +# _LT_ENABLE_LOCK +# --------------- +m4_defun([_LT_ENABLE_LOCK], +[AC_ARG_ENABLE([libtool-lock], + [AS_HELP_STRING([--disable-libtool-lock], + [avoid locking (might break parallel builds)])]) +test no = "$enable_libtool_lock" || enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out what ABI is being produced by ac_compile, and set mode + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE=32 + ;; + *ELF-64*) + HPUX_IA64_MODE=64 + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + if test yes = "$lt_cv_prog_gnu_ld"; then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +mips64*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + emul=elf + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + emul="${emul}32" + ;; + *64-bit*) + emul="${emul}64" + ;; + esac + case `/usr/bin/file conftest.$ac_objext` in + *MSB*) + emul="${emul}btsmip" + ;; + *LSB*) + emul="${emul}ltsmip" + ;; + esac + case `/usr/bin/file conftest.$ac_objext` in + *N32*) + emul="${emul}n32" + ;; + esac + LD="${LD-ld} -m $emul" + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. Note that the listed cases only cover the + # situations where additional linker options are needed (such as when + # doing 32-bit compilation for a host where ld defaults to 64-bit, or + # vice versa); the common cases where no linker options are needed do + # not appear in the list. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + case `/usr/bin/file conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*linux*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*linux*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -belf" + AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, + [AC_LANG_PUSH(C) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) + AC_LANG_POP]) + if test yes != "$lt_cv_cc_needs_belf"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS=$SAVE_CFLAGS + fi + ;; +*-*solaris*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `/usr/bin/file conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*|x86_64-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD=${LD-ld}_sol2 + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks=$enable_libtool_lock +])# _LT_ENABLE_LOCK + + +# _LT_PROG_AR +# ----------- +m4_defun([_LT_PROG_AR], +[AC_CHECK_TOOLS(AR, [ar], false) +: ${AR=ar} +: ${AR_FLAGS=cru} +_LT_DECL([], [AR], [1], [The archiver]) +_LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive]) + +AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], + [lt_cv_ar_at_file=no + AC_COMPILE_IFELSE([AC_LANG_PROGRAM], + [echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([lt_ar_try]) + if test 0 -eq "$ac_status"; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + AC_TRY_EVAL([lt_ar_try]) + if test 0 -ne "$ac_status"; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + ]) + ]) + +if test no = "$lt_cv_ar_at_file"; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi +_LT_DECL([], [archiver_list_spec], [1], + [How to feed a file listing to the archiver]) +])# _LT_PROG_AR + + +# _LT_CMD_OLD_ARCHIVE +# ------------------- +m4_defun([_LT_CMD_OLD_ARCHIVE], +[_LT_PROG_AR + +AC_CHECK_TOOL(STRIP, strip, :) +test -z "$STRIP" && STRIP=: +_LT_DECL([], [STRIP], [1], [A symbol stripping program]) + +AC_CHECK_TOOL(RANLIB, ranlib, :) +test -z "$RANLIB" && RANLIB=: +_LT_DECL([], [RANLIB], [1], + [Commands used to install an old-style archive]) + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + bitrig* | openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac +_LT_DECL([], [old_postinstall_cmds], [2]) +_LT_DECL([], [old_postuninstall_cmds], [2]) +_LT_TAGDECL([], [old_archive_cmds], [2], + [Commands used to build an old-style archive]) +_LT_DECL([], [lock_old_archive_extraction], [0], + [Whether to use a lock for old archive extraction]) +])# _LT_CMD_OLD_ARCHIVE + + +# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------------------- +# Check whether the given compiler option works +AC_DEFUN([_LT_COMPILER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$3" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + fi + $RM conftest* +]) + +if test yes = "[$]$2"; then + m4_if([$5], , :, [$5]) +else + m4_if([$6], , :, [$6]) +fi +])# _LT_COMPILER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) + + +# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------- +# Check whether the given linker option works +AC_DEFUN([_LT_LINKER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $3" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&AS_MESSAGE_LOG_FD + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + else + $2=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS +]) + +if test yes = "[$]$2"; then + m4_if([$4], , :, [$4]) +else + m4_if([$5], , :, [$5]) +fi +])# _LT_LINKER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) + + +# LT_CMD_MAX_LEN +#--------------- +AC_DEFUN([LT_CMD_MAX_LEN], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +# find the maximum length of command line arguments +AC_MSG_CHECKING([the maximum length of command line arguments]) +AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl + i=0 + teststring=ABCD + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + bitrig* | darwin* | dragonfly* | freebsd* | netbsd* | openbsd*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test X`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test 17 != "$i" # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac +]) +if test -n "$lt_cv_sys_max_cmd_len"; then + AC_MSG_RESULT($lt_cv_sys_max_cmd_len) +else + AC_MSG_RESULT(none) +fi +max_cmd_len=$lt_cv_sys_max_cmd_len +_LT_DECL([], [max_cmd_len], [0], + [What is the maximum length of a command?]) +])# LT_CMD_MAX_LEN + +# Old name: +AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) + + +# _LT_HEADER_DLFCN +# ---------------- +m4_defun([_LT_HEADER_DLFCN], +[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl +])# _LT_HEADER_DLFCN + + +# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, +# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) +# ---------------------------------------------------------------- +m4_defun([_LT_TRY_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test yes = "$cross_compiling"; then : + [$4] +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +[#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +}] +_LT_EOF + if AC_TRY_EVAL(ac_link) && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) $1 ;; + x$lt_dlneed_uscore) $2 ;; + x$lt_dlunknown|x*) $3 ;; + esac + else : + # compilation failed + $3 + fi +fi +rm -fr conftest* +])# _LT_TRY_DLOPEN_SELF + + +# LT_SYS_DLOPEN_SELF +# ------------------ +AC_DEFUN([LT_SYS_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test yes != "$enable_dlopen"; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen=load_add_on + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen=LoadLibrary + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl],[ + lt_cv_dlopen=dyld + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ]) + ;; + + tpf*) + # Don't try to run any link tests for TPF. We know it's impossible + # because TPF is a cross-compiler, and we know how we open DSOs. + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + lt_cv_dlopen_self=no + ;; + + *) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen=shl_load], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen=dlopen], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld]) + ]) + ]) + ]) + ]) + ]) + ;; + esac + + if test no = "$lt_cv_dlopen"; then + enable_dlopen=no + else + enable_dlopen=yes + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS=$CPPFLAGS + test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS=$LDFLAGS + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS=$LIBS + LIBS="$lt_cv_dlopen_libs $LIBS" + + AC_CACHE_CHECK([whether a program can dlopen itself], + lt_cv_dlopen_self, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, + lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) + ]) + + if test yes = "$lt_cv_dlopen_self"; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + AC_CACHE_CHECK([whether a statically linked program can dlopen itself], + lt_cv_dlopen_self_static, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, + lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) + ]) + fi + + CPPFLAGS=$save_CPPFLAGS + LDFLAGS=$save_LDFLAGS + LIBS=$save_LIBS + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi +_LT_DECL([dlopen_support], [enable_dlopen], [0], + [Whether dlopen is supported]) +_LT_DECL([dlopen_self], [enable_dlopen_self], [0], + [Whether dlopen of programs is supported]) +_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], + [Whether dlopen of statically linked programs is supported]) +])# LT_SYS_DLOPEN_SELF + +# Old name: +AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) + + +# _LT_COMPILER_C_O([TAGNAME]) +# --------------------------- +# Check to see if options -c and -o are simultaneously supported by compiler. +# This macro does not hard code the compiler like AC_PROG_CC_C_O. +m4_defun([_LT_COMPILER_C_O], +[m4_require([_LT_DECL_SED])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + fi + fi + chmod u+w . 2>&AS_MESSAGE_LOG_FD + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* +]) +_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], + [Does compiler simultaneously support -c and -o options?]) +])# _LT_COMPILER_C_O + + +# _LT_COMPILER_FILE_LOCKS([TAGNAME]) +# ---------------------------------- +# Check to see if we can do hard links to lock some files if needed +m4_defun([_LT_COMPILER_FILE_LOCKS], +[m4_require([_LT_ENABLE_LOCK])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_COMPILER_C_O([$1]) + +hard_links=nottested +if test no = "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + AC_MSG_CHECKING([if we can lock with hard links]) + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + AC_MSG_RESULT([$hard_links]) + if test no = "$hard_links"; then + AC_MSG_WARN(['$CC' does not support '-c -o', so 'make -j' may be unsafe]) + need_locks=warn + fi +else + need_locks=no +fi +_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) +])# _LT_COMPILER_FILE_LOCKS + + +# _LT_CHECK_OBJDIR +# ---------------- +m4_defun([_LT_CHECK_OBJDIR], +[AC_CACHE_CHECK([for objdir], [lt_cv_objdir], +[rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null]) +objdir=$lt_cv_objdir +_LT_DECL([], [objdir], [0], + [The name of the directory that contains temporary libtool files])dnl +m4_pattern_allow([LT_OBJDIR])dnl +AC_DEFINE_UNQUOTED([LT_OBJDIR], "$lt_cv_objdir/", + [Define to the sub-directory where libtool stores uninstalled libraries.]) +])# _LT_CHECK_OBJDIR + + +# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) +# -------------------------------------- +# Check hardcoding attributes. +m4_defun([_LT_LINKER_HARDCODE_LIBPATH], +[AC_MSG_CHECKING([how to hardcode library paths into programs]) +_LT_TAGVAR(hardcode_action, $1)= +if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || + test -n "$_LT_TAGVAR(runpath_var, $1)" || + test yes = "$_LT_TAGVAR(hardcode_automatic, $1)"; then + + # We can hardcode non-existent directories. + if test no != "$_LT_TAGVAR(hardcode_direct, $1)" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" && + test no != "$_LT_TAGVAR(hardcode_minus_L, $1)"; then + # Linking always hardcodes the temporary library directory. + _LT_TAGVAR(hardcode_action, $1)=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + _LT_TAGVAR(hardcode_action, $1)=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + _LT_TAGVAR(hardcode_action, $1)=unsupported +fi +AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) + +if test relink = "$_LT_TAGVAR(hardcode_action, $1)" || + test yes = "$_LT_TAGVAR(inherit_rpath, $1)"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi +_LT_TAGDECL([], [hardcode_action], [0], + [How to hardcode a shared library path into an executable]) +])# _LT_LINKER_HARDCODE_LIBPATH + + +# _LT_CMD_STRIPLIB +# ---------------- +m4_defun([_LT_CMD_STRIPLIB], +[m4_require([_LT_DECL_EGREP]) +striplib= +old_striplib= +AC_MSG_CHECKING([whether stripping libraries is possible]) +if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) +else +# FIXME - insert some real tests, host_os isn't really good enough + case $host_os in + darwin*) + if test -n "$STRIP"; then + striplib="$STRIP -x" + old_striplib="$STRIP -S" + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + ;; + *) + AC_MSG_RESULT([no]) + ;; + esac +fi +_LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) +_LT_DECL([], [striplib], [1]) +])# _LT_CMD_STRIPLIB + + +# _LT_PREPARE_MUNGE_PATH_LIST +# --------------------------- +# Make sure func_munge_path_list() is defined correctly. +m4_defun([_LT_PREPARE_MUNGE_PATH_LIST], +[[# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x@S|@2 in + x) + ;; + *:) + eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'` \@S|@@S|@1\" + ;; + x:*) + eval @S|@1=\"\@S|@@S|@1 `$ECHO @S|@2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval @S|@1=\"\@S|@@S|@1\ `$ECHO @S|@2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval @S|@1=\"`$ECHO @S|@2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \@S|@@S|@1\" + ;; + *) + eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'`\" + ;; + esac +} +]])# _LT_PREPARE_PATH_LIST + + +# _LT_SYS_DYNAMIC_LINKER([TAG]) +# ----------------------------- +# PORTME Fill in your ld.so characteristics +m4_defun([_LT_SYS_DYNAMIC_LINKER], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_OBJDUMP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PREPARE_MUNGE_PATH_LIST])dnl +AC_MSG_CHECKING([dynamic linker characteristics]) +m4_if([$1], + [], [ +if test yes = "$GCC"; then + case $host_os in + darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; + *) lt_awk_arg='/^libraries:/' ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq='s|=\([[A-Za-z]]:\)|\1|g' ;; + *) lt_sed_strip_eq='s|=/|/|g' ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary... + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + # ...but if some path component already ends with the multilib dir we assume + # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). + case "$lt_multi_os_dir; $lt_search_path_spec " in + "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) + lt_multi_os_dir= + ;; + esac + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" + elif test -n "$lt_multi_os_dir"; then + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS = " "; FS = "/|\n";} { + lt_foo = ""; + lt_count = 0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo = "/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[[lt_foo]]++; } + if (lt_freq[[lt_foo]] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's|/\([[A-Za-z]]:\)|\1|g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi]) +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +AC_ARG_VAR([LT_SYS_LIBRARY_PATH], +[User-defined run-time library search path.]) + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[[4-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[[01]] | aix4.[[01]].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a[(]lib.so.V[)]' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)]" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)], lib.a[(]lib.so.V[)]" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a[(]lib.so.V[)], lib.so.V[(]$shared_archive_member_spec.o[)]" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[[45]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | sed -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | sed -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[[23]].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[[01]]* | freebsdelf3.[[01]]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ + freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[[3-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], + [lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ + LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], + [lt_cv_shlibpath_overrides_runpath=yes])]) + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + ]) + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Add ABI-specific directories to the system library path. + sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib" + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +AC_MSG_RESULT([$dynamic_linker]) +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + +_LT_DECL([], [variables_saved_for_relink], [1], + [Variables whose values should be saved in libtool wrapper scripts and + restored at link time]) +_LT_DECL([], [need_lib_prefix], [0], + [Do we need the "lib" prefix for modules?]) +_LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) +_LT_DECL([], [version_type], [0], [Library versioning type]) +_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) +_LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) +_LT_DECL([], [shlibpath_overrides_runpath], [0], + [Is shlibpath searched before the hard-coded library search path?]) +_LT_DECL([], [libname_spec], [1], [Format of library name prefix]) +_LT_DECL([], [library_names_spec], [1], + [[List of archive names. First name is the real one, the rest are links. + The last name is the one that the linker finds with -lNAME]]) +_LT_DECL([], [soname_spec], [1], + [[The coded name of the library, if different from the real name]]) +_LT_DECL([], [install_override_mode], [1], + [Permission mode override for installation of shared libraries]) +_LT_DECL([], [postinstall_cmds], [2], + [Command to use after installation of a shared archive]) +_LT_DECL([], [postuninstall_cmds], [2], + [Command to use after uninstallation of a shared archive]) +_LT_DECL([], [finish_cmds], [2], + [Commands used to finish a libtool library installation in a directory]) +_LT_DECL([], [finish_eval], [1], + [[As "finish_cmds", except a single script fragment to be evaled but + not shown]]) +_LT_DECL([], [hardcode_into_libs], [0], + [Whether we should hardcode library paths into libraries]) +_LT_DECL([], [sys_lib_search_path_spec], [2], + [Compile-time system search path for libraries]) +_LT_DECL([sys_lib_dlsearch_path_spec], [configure_time_dlsearch_path], [2], + [Detected run-time system search path for libraries]) +_LT_DECL([], [configure_time_lt_sys_library_path], [2], + [Explicit LT_SYS_LIBRARY_PATH set during ./configure time]) +])# _LT_SYS_DYNAMIC_LINKER + + +# _LT_PATH_TOOL_PREFIX(TOOL) +# -------------------------- +# find a file program that can recognize shared library +AC_DEFUN([_LT_PATH_TOOL_PREFIX], +[m4_require([_LT_DECL_EGREP])dnl +AC_MSG_CHECKING([for $1]) +AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, +[case $MAGIC_CMD in +[[\\/*] | ?:[\\/]*]) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="m4_if([$2], , $PATH, [$2])" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$1"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"$1" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac]) +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + AC_MSG_RESULT($MAGIC_CMD) +else + AC_MSG_RESULT(no) +fi +_LT_DECL([], [MAGIC_CMD], [0], + [Used to examine libraries when file_magic_cmd begins with "file"])dnl +])# _LT_PATH_TOOL_PREFIX + +# Old name: +AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) + + +# _LT_PATH_MAGIC +# -------------- +# find a file program that can recognize a shared library +m4_defun([_LT_PATH_MAGIC], +[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) + else + MAGIC_CMD=: + fi +fi +])# _LT_PATH_MAGIC + + +# LT_PATH_LD +# ---------- +# find the pathname to the GNU or non-GNU linker +AC_DEFUN([LT_PATH_LD], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PROG_ECHO_BACKSLASH])dnl + +AC_ARG_WITH([gnu-ld], + [AS_HELP_STRING([--with-gnu-ld], + [assume the C compiler uses GNU ld @<:@default=no@:>@])], + [test no = "$withval" || with_gnu_ld=yes], + [with_gnu_ld=no])dnl + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by $CC]) + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [[\\/]]* | ?:[[\\/]]*) + re_direlt='/[[^/]][[^/]]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL(lt_cv_path_LD, +[if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &1 conftest.i +cat conftest.i conftest.i >conftest2.i +: ${lt_DD:=$DD} +AC_PATH_PROGS_FEATURE_CHECK([lt_DD], [dd], +[if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: +fi]) +rm -f conftest.i conftest2.i conftest.out]) +])# _LT_PATH_DD + + +# _LT_CMD_TRUNCATE +# ---------------- +# find command to truncate a binary pipe +m4_defun([_LT_CMD_TRUNCATE], +[m4_require([_LT_PATH_DD]) +AC_CACHE_CHECK([how to truncate binary pipes], [lt_cv_truncate_bin], +[printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +lt_cv_truncate_bin= +if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" +fi +rm -f conftest.i conftest2.i conftest.out +test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q"]) +_LT_DECL([lt_truncate_bin], [lt_cv_truncate_bin], [1], + [Command to truncate a binary pipe]) +])# _LT_CMD_TRUNCATE + + +# _LT_CHECK_MAGIC_METHOD +# ---------------------- +# how to check for library dependencies +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_MAGIC_METHOD], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +AC_CACHE_CHECK([how to recognize dependent libraries], +lt_cv_deplibs_check_method, +[lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# 'unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# that responds to the $file_magic_cmd with a given extended regex. +# If you have 'file' or equivalent on your system and you're not sure +# whether 'pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[[4-9]]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[[45]]*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + if ( file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=/usr/bin/file + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[[3-9]]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd* | bitrig*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +os2*) + lt_cv_deplibs_check_method=pass_all + ;; +esac +]) + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +_LT_DECL([], [deplibs_check_method], [1], + [Method to check whether dependent libraries are shared objects]) +_LT_DECL([], [file_magic_cmd], [1], + [Command to use when deplibs_check_method = "file_magic"]) +_LT_DECL([], [file_magic_glob], [1], + [How to find potential files when deplibs_check_method = "file_magic"]) +_LT_DECL([], [want_nocaseglob], [1], + [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) +])# _LT_CHECK_MAGIC_METHOD + + +# LT_PATH_NM +# ---------- +# find the pathname to a BSD- or MS-compatible name lister +AC_DEFUN([LT_PATH_NM], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, +[if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM=$NM +else + lt_nm_to_check=${ac_tool_prefix}nm + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/$lt_tmp_nm + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the 'sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty + case $build_os in + mingw*) lt_bad_file=conftest.nm/nofile ;; + *) lt_bad_file=/dev/null ;; + esac + case `"$tmp_nm" -B $lt_bad_file 2>&1 | sed '1q'` in + *$lt_bad_file* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break 2 + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break 2 + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS=$lt_save_ifs + done + : ${lt_cv_path_NM=no} +fi]) +if test no != "$lt_cv_path_NM"; then + NM=$lt_cv_path_NM +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | sed '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols -headers" + ;; + *) + DUMPBIN=: + ;; + esac + fi + AC_SUBST([DUMPBIN]) + if test : != "$DUMPBIN"; then + NM=$DUMPBIN + fi +fi +test -z "$NM" && NM=nm +AC_SUBST([NM]) +_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl + +AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], + [lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) + cat conftest.out >&AS_MESSAGE_LOG_FD + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest*]) +])# LT_PATH_NM + +# Old names: +AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) +AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_PROG_NM], []) +dnl AC_DEFUN([AC_PROG_NM], []) + +# _LT_CHECK_SHAREDLIB_FROM_LINKLIB +# -------------------------------- +# how to determine the name of the shared library +# associated with a specific link library. +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +m4_require([_LT_DECL_DLLTOOL]) +AC_CACHE_CHECK([how to associate runtime and link libraries], +lt_cv_sharedlib_from_linklib_cmd, +[lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh; + # decide which one to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd=$ECHO + ;; +esac +]) +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + +_LT_DECL([], [sharedlib_from_linklib_cmd], [1], + [Command to associate shared and link libraries]) +])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB + + +# _LT_PATH_MANIFEST_TOOL +# ---------------------- +# locate the manifest tool +m4_defun([_LT_PATH_MANIFEST_TOOL], +[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], + [lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&AS_MESSAGE_LOG_FD + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest*]) +if test yes != "$lt_cv_path_mainfest_tool"; then + MANIFEST_TOOL=: +fi +_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl +])# _LT_PATH_MANIFEST_TOOL + + +# _LT_DLL_DEF_P([FILE]) +# --------------------- +# True iff FILE is a Windows DLL '.def' file. +# Keep in sync with func_dll_def_p in the libtool script +AC_DEFUN([_LT_DLL_DEF_P], +[dnl + test DEF = "`$SED -n dnl + -e '\''s/^[[ ]]*//'\'' dnl Strip leading whitespace + -e '\''/^\(;.*\)*$/d'\'' dnl Delete empty lines and comments + -e '\''s/^\(EXPORTS\|LIBRARY\)\([[ ]].*\)*$/DEF/p'\'' dnl + -e q dnl Only consider the first "real" line + $1`" dnl +])# _LT_DLL_DEF_P + + +# LT_LIB_M +# -------- +# check for math library +AC_DEFUN([LT_LIB_M], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*-ncr-sysv4.3*) + AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM=-lmw) + AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") + ;; +*) + AC_CHECK_LIB(m, cos, LIBM=-lm) + ;; +esac +AC_SUBST([LIBM]) +])# LT_LIB_M + +# Old name: +AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_CHECK_LIBM], []) + + +# _LT_COMPILER_NO_RTTI([TAGNAME]) +# ------------------------------- +m4_defun([_LT_COMPILER_NO_RTTI], +[m4_require([_LT_TAG_COMPILER])dnl + +_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + +if test yes = "$GCC"; then + case $cc_basename in + nvcc*) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; + *) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; + esac + + _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], + lt_cv_prog_compiler_rtti_exceptions, + [-fno-rtti -fno-exceptions], [], + [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) +fi +_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], + [Compiler flag to turn off builtin functions]) +])# _LT_COMPILER_NO_RTTI + + +# _LT_CMD_GLOBAL_SYMBOLS +# ---------------------- +m4_defun([_LT_CMD_GLOBAL_SYMBOLS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([LT_PATH_NM])dnl +AC_REQUIRE([LT_PATH_LD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_TAG_COMPILER])dnl + +# Check for command to grab the raw symbol name followed by C symbol from nm. +AC_MSG_CHECKING([command to parse $NM output from $compiler object]) +AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], +[ +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[[BCDEGRST]]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[[BCDT]]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[[ABCDGISTW]]' + ;; +hpux*) + if test ia64 = "$host_cpu"; then + symcode='[[ABCDEGRST]]' + fi + ;; +irix* | nonstopux*) + symcode='[[BCDEGRST]]' + ;; +osf*) + symcode='[[BCDEGQRST]]' + ;; +solaris*) + symcode='[[BDRT]]' + ;; +sco3.2v5*) + symcode='[[DT]]' + ;; +sysv4.2uw2*) + symcode='[[DT]]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[[ABDT]]' + ;; +sysv4) + symcode='[[DFNSTU]]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[[ABCDGIRSTW]]' ;; +esac + +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Gets list of data symbols to import. + lt_cv_sys_global_symbol_to_import="sed -n -e 's/^I .* \(.*\)$/\1/p'" + # Adjust the below global symbol transforms to fixup imported variables. + lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" + lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" + lt_c_name_lib_hook="\ + -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ + -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" +else + # Disable hooks by default. + lt_cv_sys_global_symbol_to_import= + lt_cdecl_hook= + lt_c_name_hook= + lt_c_name_lib_hook= +fi + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="sed -n"\ +$lt_cdecl_hook\ +" -e 's/^T .* \(.*\)$/extern int \1();/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="sed -n"\ +$lt_c_name_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" + +# Transform an extracted symbol line into symbol name with lib prefix and +# symbol address. +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n"\ +$lt_c_name_lib_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function, + # D for any global variable and I for any imported variable. + # Also find C++ and __fastcall symbols from MSVC++, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK ['"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ +" /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ +" /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ +" {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ +" s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx]" + else + lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if AC_TRY_EVAL(ac_compile); then + # Now try to grab the symbols. + nlist=conftest.nm + if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT@&t@_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT@&t@_DLSYM_CONST +#else +# define LT@&t@_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT@&t@_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[[]] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS=conftstm.$ac_objext + CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" + if AC_TRY_EVAL(ac_link) && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD + fi + else + echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test yes = "$pipe_works"; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done +]) +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + AC_MSG_RESULT(failed) +else + AC_MSG_RESULT(ok) +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + +_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], + [Take the output of nm and produce a listing of raw symbols and C names]) +_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], + [Transform the output of nm in a proper C declaration]) +_LT_DECL([global_symbol_to_import], [lt_cv_sys_global_symbol_to_import], [1], + [Transform the output of nm into a list of symbols to manually relocate]) +_LT_DECL([global_symbol_to_c_name_address], + [lt_cv_sys_global_symbol_to_c_name_address], [1], + [Transform the output of nm in a C name address pair]) +_LT_DECL([global_symbol_to_c_name_address_lib_prefix], + [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], + [Transform the output of nm in a C name address pair when lib prefix is needed]) +_LT_DECL([nm_interface], [lt_cv_nm_interface], [1], + [The name lister interface]) +_LT_DECL([], [nm_file_list_spec], [1], + [Specify filename containing input files for $NM]) +]) # _LT_CMD_GLOBAL_SYMBOLS + + +# _LT_COMPILER_PIC([TAGNAME]) +# --------------------------- +m4_defun([_LT_COMPILER_PIC], +[m4_require([_LT_TAG_COMPILER])dnl +_LT_TAGVAR(lt_prog_compiler_wl, $1)= +_LT_TAGVAR(lt_prog_compiler_pic, $1)= +_LT_TAGVAR(lt_prog_compiler_static, $1)= + +m4_if([$1], [CXX], [ + # C++ specific cases for pic, static, wl, etc. + if test yes = "$GXX"; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + else + case $host_os in + aix[[4-9]]*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + dgux*) + case $cc_basename in + ec++*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + if test ia64 != "$host_cpu"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + fi + ;; + aCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # KAI C++ Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64, which still supported -KPIC. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd*) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + cxx*) + # Digital/Compaq C++ + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + lcc*) + # Lucid + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +], +[ + if test yes = "$GCC"; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' + if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC (with -KPIC) is the default. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' + _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' + ;; + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + ccc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All Alpha code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='' + ;; + *Sun\ F* | *Sun*Fortran*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + ;; + *Intel*\ [[CF]]*Compiler*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + *Portland\ Group*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All OSF/1 code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + rdos*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + solaris*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; + *) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; + esac + ;; + + sunos4*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + unicos*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + + uts4*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +]) +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" + ;; +esac + +AC_CACHE_CHECK([for $compiler option to produce PIC], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) +_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], + [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], + [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], + [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in + "" | " "*) ;; + *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; + esac], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) +fi +_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], + [Additional compiler flags for building library objects]) + +_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], + [How to pass a linker flag through the compiler]) +# +# Check to make sure the static flag actually works. +# +wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" +_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], + _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), + $lt_tmp_static_flag, + [], + [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) +_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], + [Compiler flag to prevent dynamic linking]) +])# _LT_COMPILER_PIC + + +# _LT_LINKER_SHLIBS([TAGNAME]) +# ---------------------------- +# See if the linker supports building shared libraries. +m4_defun([_LT_LINKER_SHLIBS], +[AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) +m4_if([$1], [CXX], [ + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + case $host_os in + aix[[4-9]]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + _LT_TAGVAR(export_symbols_cmds, $1)=$ltdll_cmds + ;; + cygwin* | mingw* | cegcc*) + case $cc_basename in + cl*) + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + ;; + esac + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac +], [ + runpath_var= + _LT_TAGVAR(allow_undefined_flag, $1)= + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(archive_cmds, $1)= + _LT_TAGVAR(archive_expsym_cmds, $1)= + _LT_TAGVAR(compiler_needs_object, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(hardcode_automatic, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(hardcode_libdir_separator, $1)= + _LT_TAGVAR(hardcode_minus_L, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + _LT_TAGVAR(inherit_rpath, $1)=no + _LT_TAGVAR(link_all_deplibs, $1)=unknown + _LT_TAGVAR(module_cmds, $1)= + _LT_TAGVAR(module_expsym_cmds, $1)= + _LT_TAGVAR(old_archive_from_new_cmds, $1)= + _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= + _LT_TAGVAR(thread_safe_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + _LT_TAGVAR(include_expsyms, $1)= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. +dnl Note also adjust exclude_expsyms for C++ above. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++) + with_gnu_ld=yes + ;; + openbsd* | bitrig*) + with_gnu_ld=no + ;; + esac + + _LT_TAGVAR(ld_shlibs, $1)=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; + *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[[3-9]]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + _LT_TAGVAR(whole_archive_flag_spec, $1)= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + ;; + esac + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C 5.9 + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + sunos4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + + if test no = "$_LT_TAGVAR(ld_shlibs, $1)"; then + runpath_var= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + _LT_TAGVAR(hardcode_direct, $1)=unsupported + fi + ;; + + aix[[4-9]]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + bsdi[[45]]*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl*) + # Native MSVC + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC wrapper + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + # FIXME: Should let the user specify the lib program. + _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + esac + ;; + + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + hpux9*) + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + m4_if($1, [], [ + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + _LT_LINKER_OPTION([if $CC understands -b], + _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], + [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) + ;; + esac + fi + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], + [lt_cv_irix_exported_symbol], + [save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + AC_LINK_IFELSE( + [AC_LANG_SOURCE( + [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], + [C++], [[int foo (void) { return 0; }]], + [Fortran 77], [[ + subroutine foo + end]], + [Fortran], [[ + subroutine foo + end]])])], + [lt_cv_irix_exported_symbol=yes], + [lt_cv_irix_exported_symbol=no]) + LDFLAGS=$save_LDFLAGS]) + if test yes = "$lt_cv_irix_exported_symbol"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + _LT_TAGVAR(ld_shlibs, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + newsos6) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *nto* | *qnx*) + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + fi + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + + osf3*) + if test yes = "$GCC"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + solaris*) + _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + _LT_TAGVAR(archive_cmds, $1)='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + fi + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4) + case $host_vendor in + sni) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' + _LT_TAGVAR(hardcode_direct, $1)=no + ;; + motorola) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4.3*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + _LT_TAGVAR(ld_shlibs, $1)=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Blargedynsym' + ;; + esac + fi + fi +]) +AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) +test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no + +_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld + +_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl +_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl +_LT_DECL([], [extract_expsyms_cmds], [2], + [The commands to extract the exported symbol list from a shared archive]) + +# +# Do we need to explicitly link libc? +# +case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in +x|xyes) + # Assume -lc should be added + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $_LT_TAGVAR(archive_cmds, $1) in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + AC_CACHE_CHECK([whether -lc should be explicitly linked in], + [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), + [$RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if AC_TRY_EVAL(ac_compile) 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) + pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) + _LT_TAGVAR(allow_undefined_flag, $1)= + if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) + then + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no + else + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes + fi + _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + ]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) + ;; + esac + fi + ;; +esac + +_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], + [Whether or not to add -lc for building shared libraries]) +_LT_TAGDECL([allow_libtool_libs_with_static_runtimes], + [enable_shared_with_static_runtimes], [0], + [Whether or not to disallow shared libs when runtime libs are static]) +_LT_TAGDECL([], [export_dynamic_flag_spec], [1], + [Compiler flag to allow reflexive dlopens]) +_LT_TAGDECL([], [whole_archive_flag_spec], [1], + [Compiler flag to generate shared objects directly from archives]) +_LT_TAGDECL([], [compiler_needs_object], [1], + [Whether the compiler copes with passing no objects directly]) +_LT_TAGDECL([], [old_archive_from_new_cmds], [2], + [Create an old-style archive from a shared archive]) +_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], + [Create a temporary old-style archive to link instead of a shared archive]) +_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) +_LT_TAGDECL([], [archive_expsym_cmds], [2]) +_LT_TAGDECL([], [module_cmds], [2], + [Commands used to build a loadable module if different from building + a shared archive.]) +_LT_TAGDECL([], [module_expsym_cmds], [2]) +_LT_TAGDECL([], [with_gnu_ld], [1], + [Whether we are building with GNU ld or not]) +_LT_TAGDECL([], [allow_undefined_flag], [1], + [Flag that allows shared libraries with undefined symbols to be built]) +_LT_TAGDECL([], [no_undefined_flag], [1], + [Flag that enforces no undefined symbols]) +_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], + [Flag to hardcode $libdir into a binary during linking. + This must work even if $libdir does not exist]) +_LT_TAGDECL([], [hardcode_libdir_separator], [1], + [Whether we need a single "-rpath" flag with a separated argument]) +_LT_TAGDECL([], [hardcode_direct], [0], + [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes + DIR into the resulting binary]) +_LT_TAGDECL([], [hardcode_direct_absolute], [0], + [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes + DIR into the resulting binary and the resulting library dependency is + "absolute", i.e impossible to change by setting $shlibpath_var if the + library is relocated]) +_LT_TAGDECL([], [hardcode_minus_L], [0], + [Set to "yes" if using the -LDIR flag during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_shlibpath_var], [0], + [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_automatic], [0], + [Set to "yes" if building a shared library automatically hardcodes DIR + into the library and all subsequent libraries and executables linked + against it]) +_LT_TAGDECL([], [inherit_rpath], [0], + [Set to yes if linker adds runtime paths of dependent libraries + to runtime path list]) +_LT_TAGDECL([], [link_all_deplibs], [0], + [Whether libtool must link a program against all its dependency libraries]) +_LT_TAGDECL([], [always_export_symbols], [0], + [Set to "yes" if exported symbols are required]) +_LT_TAGDECL([], [export_symbols_cmds], [2], + [The commands to list exported symbols]) +_LT_TAGDECL([], [exclude_expsyms], [1], + [Symbols that should not be listed in the preloaded symbols]) +_LT_TAGDECL([], [include_expsyms], [1], + [Symbols that must always be exported]) +_LT_TAGDECL([], [prelink_cmds], [2], + [Commands necessary for linking programs (against libraries) with templates]) +_LT_TAGDECL([], [postlink_cmds], [2], + [Commands necessary for finishing linking programs]) +_LT_TAGDECL([], [file_list_spec], [1], + [Specify filename containing input files]) +dnl FIXME: Not yet implemented +dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], +dnl [Compiler flag to generate thread safe objects]) +])# _LT_LINKER_SHLIBS + + +# _LT_LANG_C_CONFIG([TAG]) +# ------------------------ +# Ensure that the configuration variables for a C compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_C_CONFIG], +[m4_require([_LT_DECL_EGREP])dnl +lt_save_CC=$CC +AC_LANG_PUSH(C) + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + +_LT_TAG_COMPILER +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + LT_SYS_DLOPEN_SELF + _LT_CMD_STRIPLIB + + # Report what library types will actually be built + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_CONFIG($1) +fi +AC_LANG_POP +CC=$lt_save_CC +])# _LT_LANG_C_CONFIG + + +# _LT_LANG_CXX_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a C++ compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_CXX_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +if test -n "$CXX" && ( test no != "$CXX" && + ( (test g++ = "$CXX" && `g++ -v >/dev/null 2>&1` ) || + (test g++ != "$CXX"))); then + AC_PROG_CXXCPP +else + _lt_caught_CXX_error=yes +fi + +AC_LANG_PUSH(C++) +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(compiler_needs_object, $1)=no +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_caught_CXX_error"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test yes = "$GXX"; then + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' + else + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + fi + + if test yes = "$GXX"; then + # Set up default GNU C++ configuration + + LT_PATH_LD + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test yes = "$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='$wl' + + # ancient GNU ld didn't support --whole-archive et. al. + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) + _LT_TAGVAR(ld_shlibs, $1)=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aix[[4-9]]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + ;; + esac + + if test yes = "$GXX"; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag=$shared_flag' $wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + # The "-G" linker flag allows undefined symbols. + _LT_TAGVAR(no_undefined_flag, $1)='-bernotok' + # Determine the default libpath from the value encoded in an empty + # executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared + # libraries. Need -bnortl late, we may have -brtl in LDFLAGS. + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + cygwin* | mingw* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl*) + # Native MSVC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + freebsd2.*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + freebsd-elf*) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + ;; + + freebsd* | dragonfly*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + hpux9*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` -o $lib' + fi + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + esac + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib $wl-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) + _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl--rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + ;; + cxx*) + # Compaq C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib $wl-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file $wl$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file,$export_symbols -o $lib' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + cxx*) + case $host in + osf3*) + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $soname `test -n "$verstring" && func_echo_all "$wl-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + ;; + *) + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname $wl-input $wl$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~ + $RM $lib.exp' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes,no = "$GXX,$with_gnu_ld"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + case $host in + osf3*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(archive_cmds_need_lc,$1)=yes + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag $wl-M $wl$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test yes,no = "$GXX,$with_gnu_ld"; then + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + else + # g++ 2.7 appears to require '-G' NOT '-shared' on this + # platform. + _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + fi + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $wl$libdir' + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ + '"$_LT_TAGVAR(old_archive_cmds, $1)" + _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ + '"$_LT_TAGVAR(reload_cmds, $1)" + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) + test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no + + _LT_TAGVAR(GCC, $1)=$GXX + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test yes != "$_lt_caught_CXX_error" + +AC_LANG_POP +])# _LT_LANG_CXX_CONFIG + + +# _LT_FUNC_STRIPNAME_CNF +# ---------------------- +# func_stripname_cnf prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# +# This function is identical to the (non-XSI) version of func_stripname, +# except this one can be used by m4 code that may be executed by configure, +# rather than the libtool script. +m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl +AC_REQUIRE([_LT_DECL_SED]) +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) +func_stripname_cnf () +{ + case @S|@2 in + .*) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%\\\\@S|@2\$%%"`;; + *) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%@S|@2\$%%"`;; + esac +} # func_stripname_cnf +])# _LT_FUNC_STRIPNAME_CNF + + +# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) +# --------------------------------- +# Figure out "hidden" library dependencies from verbose +# compiler output when linking a shared library. +# Parse the compiler output and extract the necessary +# objects, libraries and library flags. +m4_defun([_LT_SYS_HIDDEN_LIBDEPS], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl +# Dependencies to place before and after the object being linked: +_LT_TAGVAR(predep_objects, $1)= +_LT_TAGVAR(postdep_objects, $1)= +_LT_TAGVAR(predeps, $1)= +_LT_TAGVAR(postdeps, $1)= +_LT_TAGVAR(compiler_lib_search_path, $1)= + +dnl we can't use the lt_simple_compile_test_code here, +dnl because it contains code intended for an executable, +dnl not a library. It's possible we should let each +dnl tag define a new lt_????_link_test_code variable, +dnl but it's only used here... +m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF +int a; +void foo (void) { a = 0; } +_LT_EOF +], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF +], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer*4 a + a=0 + return + end +_LT_EOF +], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer a + a=0 + return + end +_LT_EOF +], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF +public class foo { + private int a; + public void bar (void) { + a = 0; + } +}; +_LT_EOF +], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF +package foo +func foo() { +} +_LT_EOF +]) + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; +esac + +dnl Parse the compiler output and extract the necessary +dnl objects, libraries and library flags. +if AC_TRY_EVAL(ac_compile); then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case $prev$p in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test x-L = "$p" || + test x-R = "$p"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test no = "$pre_test_object_deps_done"; then + case $prev in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then + _LT_TAGVAR(compiler_lib_search_path, $1)=$prev$p + else + _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} $prev$p" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$_LT_TAGVAR(postdeps, $1)"; then + _LT_TAGVAR(postdeps, $1)=$prev$p + else + _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} $prev$p" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test no = "$pre_test_object_deps_done"; then + if test -z "$_LT_TAGVAR(predep_objects, $1)"; then + _LT_TAGVAR(predep_objects, $1)=$p + else + _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" + fi + else + if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then + _LT_TAGVAR(postdep_objects, $1)=$p + else + _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling $1 test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +m4_if([$1], [CXX], +[case $host_os in +interix[[3-9]]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + _LT_TAGVAR(predep_objects,$1)= + _LT_TAGVAR(postdep_objects,$1)= + _LT_TAGVAR(postdeps,$1)= + ;; +esac +]) + +case " $_LT_TAGVAR(postdeps, $1) " in +*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; +esac + _LT_TAGVAR(compiler_lib_search_dirs, $1)= +if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then + _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | $SED -e 's! -L! !g' -e 's!^ !!'` +fi +_LT_TAGDECL([], [compiler_lib_search_dirs], [1], + [The directories searched by this compiler when creating a shared library]) +_LT_TAGDECL([], [predep_objects], [1], + [Dependencies to place before and after the objects being linked to + create a shared library]) +_LT_TAGDECL([], [postdep_objects], [1]) +_LT_TAGDECL([], [predeps], [1]) +_LT_TAGDECL([], [postdeps], [1]) +_LT_TAGDECL([], [compiler_lib_search_path], [1], + [The library search path used internally by the compiler when linking + a shared library]) +])# _LT_SYS_HIDDEN_LIBDEPS + + +# _LT_LANG_F77_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a Fortran 77 compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_F77_CONFIG], +[AC_LANG_PUSH(Fortran 77) +if test -z "$F77" || test no = "$F77"; then + _lt_disable_F77=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for f77 test sources. +ac_ext=f + +# Object file extension for compiled f77 test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the F77 compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_F77"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${F77-"f77"} + CFLAGS=$FFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + GCC=$G77 + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)=$G77 + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_F77" + +AC_LANG_POP +])# _LT_LANG_F77_CONFIG + + +# _LT_LANG_FC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for a Fortran compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_FC_CONFIG], +[AC_LANG_PUSH(Fortran) + +if test -z "$FC" || test no = "$FC"; then + _lt_disable_FC=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for fc test sources. +ac_ext=${ac_fc_srcext-f} + +# Object file extension for compiled fc test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the FC compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_FC"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${FC-"f95"} + CFLAGS=$FCFLAGS + compiler=$CC + GCC=$ac_cv_fc_compiler_gnu + + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)=$ac_cv_fc_compiler_gnu + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_FC" + +AC_LANG_POP +])# _LT_LANG_FC_CONFIG + + +# _LT_LANG_GCJ_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Java Compiler compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_GCJ_CONFIG], +[AC_REQUIRE([LT_PROG_GCJ])dnl +AC_LANG_SAVE + +# Source file extension for Java test sources. +ac_ext=java + +# Object file extension for compiled Java test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="class foo {}" + +# Code to be used in simple link tests +lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GCJ-"gcj"} +CFLAGS=$GCJFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)=$LD +_LT_CC_BASENAME([$compiler]) + +# GCJ did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GCJ_CONFIG + + +# _LT_LANG_GO_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Go compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_GO_CONFIG], +[AC_REQUIRE([LT_PROG_GO])dnl +AC_LANG_SAVE + +# Source file extension for Go test sources. +ac_ext=go + +# Object file extension for compiled Go test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="package main; func main() { }" + +# Code to be used in simple link tests +lt_simple_link_test_code='package main; func main() { }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GOC-"gccgo"} +CFLAGS=$GOFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)=$LD +_LT_CC_BASENAME([$compiler]) + +# Go did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GO_CONFIG + + +# _LT_LANG_RC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for the Windows resource compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_RC_CONFIG], +[AC_REQUIRE([LT_PROG_RC])dnl +AC_LANG_SAVE + +# Source file extension for RC test sources. +ac_ext=rc + +# Object file extension for compiled RC test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' + +# Code to be used in simple link tests +lt_simple_link_test_code=$lt_simple_compile_test_code + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC= +CC=${RC-"windres"} +CFLAGS= +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_CC_BASENAME([$compiler]) +_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + +if test -n "$compiler"; then + : + _LT_CONFIG($1) +fi + +GCC=$lt_save_GCC +AC_LANG_RESTORE +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_RC_CONFIG + + +# LT_PROG_GCJ +# ----------- +AC_DEFUN([LT_PROG_GCJ], +[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], + [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], + [AC_CHECK_TOOL(GCJ, gcj,) + test set = "${GCJFLAGS+set}" || GCJFLAGS="-g -O2" + AC_SUBST(GCJFLAGS)])])[]dnl +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_GCJ], []) + + +# LT_PROG_GO +# ---------- +AC_DEFUN([LT_PROG_GO], +[AC_CHECK_TOOL(GOC, gccgo,) +]) + + +# LT_PROG_RC +# ---------- +AC_DEFUN([LT_PROG_RC], +[AC_CHECK_TOOL(RC, windres,) +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_RC], []) + + +# _LT_DECL_EGREP +# -------------- +# If we don't have a new enough Autoconf to choose the best grep +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_EGREP], +[AC_REQUIRE([AC_PROG_EGREP])dnl +AC_REQUIRE([AC_PROG_FGREP])dnl +test -z "$GREP" && GREP=grep +_LT_DECL([], [GREP], [1], [A grep program that handles long lines]) +_LT_DECL([], [EGREP], [1], [An ERE matcher]) +_LT_DECL([], [FGREP], [1], [A literal string matcher]) +dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too +AC_SUBST([GREP]) +]) + + +# _LT_DECL_OBJDUMP +# -------------- +# If we don't have a new enough Autoconf to choose the best objdump +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_OBJDUMP], +[AC_CHECK_TOOL(OBJDUMP, objdump, false) +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) +AC_SUBST([OBJDUMP]) +]) + +# _LT_DECL_DLLTOOL +# ---------------- +# Ensure DLLTOOL variable is set. +m4_defun([_LT_DECL_DLLTOOL], +[AC_CHECK_TOOL(DLLTOOL, dlltool, false) +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program]) +AC_SUBST([DLLTOOL]) +]) + +# _LT_DECL_SED +# ------------ +# Check for a fully-functional sed program, that truncates +# as few characters as possible. Prefer GNU sed if found. +m4_defun([_LT_DECL_SED], +[AC_PROG_SED +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" +_LT_DECL([], [SED], [1], [A sed program that does not truncate output]) +_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], + [Sed that helps us avoid accidentally triggering echo(1) options like -n]) +])# _LT_DECL_SED + +m4_ifndef([AC_PROG_SED], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_SED. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ + +m4_defun([AC_PROG_SED], +[AC_MSG_CHECKING([for a sed that does not truncate output]) +AC_CACHE_VAL(lt_cv_path_SED, +[# Loop through the user's path and test for sed and gsed. +# Then use that list of sed's as ones to test for truncation. +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for lt_ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then + lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" + fi + done + done +done +IFS=$as_save_IFS +lt_ac_max=0 +lt_ac_count=0 +# Add /usr/xpg4/bin/sed as it is typically found on Solaris +# along with /bin/sed that truncates output. +for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do + test ! -f "$lt_ac_sed" && continue + cat /dev/null > conftest.in + lt_ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >conftest.in + # Check for GNU sed and select it if it is found. + if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then + lt_cv_path_SED=$lt_ac_sed + break + fi + while true; do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo >>conftest.nl + $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break + cmp -s conftest.out conftest.nl || break + # 10000 chars as input seems more than enough + test 10 -lt "$lt_ac_count" && break + lt_ac_count=`expr $lt_ac_count + 1` + if test "$lt_ac_count" -gt "$lt_ac_max"; then + lt_ac_max=$lt_ac_count + lt_cv_path_SED=$lt_ac_sed + fi + done +done +]) +SED=$lt_cv_path_SED +AC_SUBST([SED]) +AC_MSG_RESULT([$SED]) +])#AC_PROG_SED +])#m4_ifndef + +# Old name: +AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_SED], []) + + +# _LT_CHECK_SHELL_FEATURES +# ------------------------ +# Find out whether the shell is Bourne or XSI compatible, +# or has some other useful features. +m4_defun([_LT_CHECK_SHELL_FEATURES], +[if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi +_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac +_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl +_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl +])# _LT_CHECK_SHELL_FEATURES + + +# _LT_PATH_CONVERSION_FUNCTIONS +# ----------------------------- +# Determine what file name conversion functions should be used by +# func_to_host_file (and, implicitly, by func_to_host_path). These are needed +# for certain cross-compile configurations and native mingw. +m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_MSG_CHECKING([how to convert $build file names to $host format]) +AC_CACHE_VAL(lt_cv_to_host_file_cmd, +[case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac +]) +to_host_file_cmd=$lt_cv_to_host_file_cmd +AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) +_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], + [0], [convert $build file names to $host format])dnl + +AC_MSG_CHECKING([how to convert $build file names to toolchain format]) +AC_CACHE_VAL(lt_cv_to_tool_file_cmd, +[#assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac +]) +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) +_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], + [0], [convert $build files to toolchain format])dnl +])# _LT_PATH_CONVERSION_FUNCTIONS diff --git a/config/m4/ltoptions.m4 b/config/m4/ltoptions.m4 new file mode 100644 index 0000000..94b0829 --- /dev/null +++ b/config/m4/ltoptions.m4 @@ -0,0 +1,437 @@ +# Helper functions for option handling. -*- Autoconf -*- +# +# Copyright (C) 2004-2005, 2007-2009, 2011-2015 Free Software +# Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 8 ltoptions.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) + + +# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) +# ------------------------------------------ +m4_define([_LT_MANGLE_OPTION], +[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) + + +# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) +# --------------------------------------- +# Set option OPTION-NAME for macro MACRO-NAME, and if there is a +# matching handler defined, dispatch to it. Other OPTION-NAMEs are +# saved as a flag. +m4_define([_LT_SET_OPTION], +[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl +m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), + _LT_MANGLE_DEFUN([$1], [$2]), + [m4_warning([Unknown $1 option '$2'])])[]dnl +]) + + +# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) +# ------------------------------------------------------------ +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +m4_define([_LT_IF_OPTION], +[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) + + +# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) +# ------------------------------------------------------- +# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME +# are set. +m4_define([_LT_UNLESS_OPTIONS], +[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), + [m4_define([$0_found])])])[]dnl +m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 +])[]dnl +]) + + +# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) +# ---------------------------------------- +# OPTION-LIST is a space-separated list of Libtool options associated +# with MACRO-NAME. If any OPTION has a matching handler declared with +# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about +# the unknown option and exit. +m4_defun([_LT_SET_OPTIONS], +[# Set options +m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [_LT_SET_OPTION([$1], _LT_Option)]) + +m4_if([$1],[LT_INIT],[ + dnl + dnl Simply set some default values (i.e off) if boolean options were not + dnl specified: + _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no + ]) + _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no + ]) + dnl + dnl If no reference was made to various pairs of opposing options, then + dnl we run the default mode handler for the pair. For example, if neither + dnl 'shared' nor 'disable-shared' was passed, we enable building of shared + dnl archives by default: + _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) + _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], + [_LT_ENABLE_FAST_INSTALL]) + _LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4], + [_LT_WITH_AIX_SONAME([aix])]) + ]) +])# _LT_SET_OPTIONS + + +## --------------------------------- ## +## Macros to handle LT_INIT options. ## +## --------------------------------- ## + +# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) +# ----------------------------------------- +m4_define([_LT_MANGLE_DEFUN], +[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) + + +# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) +# ----------------------------------------------- +m4_define([LT_OPTION_DEFINE], +[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl +])# LT_OPTION_DEFINE + + +# dlopen +# ------ +LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes +]) + +AU_DEFUN([AC_LIBTOOL_DLOPEN], +[_LT_SET_OPTION([LT_INIT], [dlopen]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'dlopen' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) + + +# win32-dll +# --------- +# Declare package support for building win32 dll's. +LT_OPTION_DEFINE([LT_INIT], [win32-dll], +[enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) + AC_CHECK_TOOL(AS, as, false) + AC_CHECK_TOOL(DLLTOOL, dlltool, false) + AC_CHECK_TOOL(OBJDUMP, objdump, false) + ;; +esac + +test -z "$AS" && AS=as +_LT_DECL([], [AS], [1], [Assembler program])dnl + +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl + +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl +])# win32-dll + +AU_DEFUN([AC_LIBTOOL_WIN32_DLL], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +_LT_SET_OPTION([LT_INIT], [win32-dll]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'win32-dll' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) + + +# _LT_ENABLE_SHARED([DEFAULT]) +# ---------------------------- +# implement the --enable-shared flag, and supports the 'shared' and +# 'disable-shared' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_SHARED], +[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([shared], + [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], + [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) + + _LT_DECL([build_libtool_libs], [enable_shared], [0], + [Whether or not to build shared libraries]) +])# _LT_ENABLE_SHARED + +LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) +]) + +AC_DEFUN([AC_DISABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], [disable-shared]) +]) + +AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) +AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_SHARED], []) +dnl AC_DEFUN([AM_DISABLE_SHARED], []) + + + +# _LT_ENABLE_STATIC([DEFAULT]) +# ---------------------------- +# implement the --enable-static flag, and support the 'static' and +# 'disable-static' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_STATIC], +[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([static], + [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], + [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_static=]_LT_ENABLE_STATIC_DEFAULT) + + _LT_DECL([build_old_libs], [enable_static], [0], + [Whether or not to build static libraries]) +])# _LT_ENABLE_STATIC + +LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) +]) + +AC_DEFUN([AC_DISABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], [disable-static]) +]) + +AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) +AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_STATIC], []) +dnl AC_DEFUN([AM_DISABLE_STATIC], []) + + + +# _LT_ENABLE_FAST_INSTALL([DEFAULT]) +# ---------------------------------- +# implement the --enable-fast-install flag, and support the 'fast-install' +# and 'disable-fast-install' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_FAST_INSTALL], +[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([fast-install], + [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], + [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) + +_LT_DECL([fast_install], [enable_fast_install], [0], + [Whether or not to optimize for fast installation])dnl +])# _LT_ENABLE_FAST_INSTALL + +LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) + +# Old names: +AU_DEFUN([AC_ENABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the 'fast-install' option into LT_INIT's first parameter.]) +]) + +AU_DEFUN([AC_DISABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the 'disable-fast-install' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) +dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) + + +# _LT_WITH_AIX_SONAME([DEFAULT]) +# ---------------------------------- +# implement the --with-aix-soname flag, and support the `aix-soname=aix' +# and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT +# is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'. +m4_define([_LT_WITH_AIX_SONAME], +[m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl +shared_archive_member_spec= +case $host,$enable_shared in +power*-*-aix[[5-9]]*,yes) + AC_MSG_CHECKING([which variant of shared library versioning to provide]) + AC_ARG_WITH([aix-soname], + [AS_HELP_STRING([--with-aix-soname=aix|svr4|both], + [shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])], + [case $withval in + aix|svr4|both) + ;; + *) + AC_MSG_ERROR([Unknown argument to --with-aix-soname]) + ;; + esac + lt_cv_with_aix_soname=$with_aix_soname], + [AC_CACHE_VAL([lt_cv_with_aix_soname], + [lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT) + with_aix_soname=$lt_cv_with_aix_soname]) + AC_MSG_RESULT([$with_aix_soname]) + if test aix != "$with_aix_soname"; then + # For the AIX way of multilib, we name the shared archive member + # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', + # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. + # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, + # the AIX toolchain works better with OBJECT_MODE set (default 32). + if test 64 = "${OBJECT_MODE-32}"; then + shared_archive_member_spec=shr_64 + else + shared_archive_member_spec=shr + fi + fi + ;; +*) + with_aix_soname=aix + ;; +esac + +_LT_DECL([], [shared_archive_member_spec], [0], + [Shared archive member basename, for filename based shared library versioning on AIX])dnl +])# _LT_WITH_AIX_SONAME + +LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])]) +LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])]) +LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])]) + + +# _LT_WITH_PIC([MODE]) +# -------------------- +# implement the --with-pic flag, and support the 'pic-only' and 'no-pic' +# LT_INIT options. +# MODE is either 'yes' or 'no'. If omitted, it defaults to 'both'. +m4_define([_LT_WITH_PIC], +[AC_ARG_WITH([pic], + [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], + [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], + [lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for lt_pkg in $withval; do + IFS=$lt_save_ifs + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [pic_mode=m4_default([$1], [default])]) + +_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl +])# _LT_WITH_PIC + +LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) + +# Old name: +AU_DEFUN([AC_LIBTOOL_PICMODE], +[_LT_SET_OPTION([LT_INIT], [pic-only]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'pic-only' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) + +## ----------------- ## +## LTDL_INIT Options ## +## ----------------- ## + +m4_define([_LTDL_MODE], []) +LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], + [m4_define([_LTDL_MODE], [nonrecursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [recursive], + [m4_define([_LTDL_MODE], [recursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [subproject], + [m4_define([_LTDL_MODE], [subproject])]) + +m4_define([_LTDL_TYPE], []) +LT_OPTION_DEFINE([LTDL_INIT], [installable], + [m4_define([_LTDL_TYPE], [installable])]) +LT_OPTION_DEFINE([LTDL_INIT], [convenience], + [m4_define([_LTDL_TYPE], [convenience])]) diff --git a/config/m4/ltsugar.m4 b/config/m4/ltsugar.m4 new file mode 100644 index 0000000..48bc934 --- /dev/null +++ b/config/m4/ltsugar.m4 @@ -0,0 +1,124 @@ +# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- +# +# Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software +# Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 6 ltsugar.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) + + +# lt_join(SEP, ARG1, [ARG2...]) +# ----------------------------- +# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their +# associated separator. +# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier +# versions in m4sugar had bugs. +m4_define([lt_join], +[m4_if([$#], [1], [], + [$#], [2], [[$2]], + [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) +m4_define([_lt_join], +[m4_if([$#$2], [2], [], + [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) + + +# lt_car(LIST) +# lt_cdr(LIST) +# ------------ +# Manipulate m4 lists. +# These macros are necessary as long as will still need to support +# Autoconf-2.59, which quotes differently. +m4_define([lt_car], [[$1]]) +m4_define([lt_cdr], +[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], + [$#], 1, [], + [m4_dquote(m4_shift($@))])]) +m4_define([lt_unquote], $1) + + +# lt_append(MACRO-NAME, STRING, [SEPARATOR]) +# ------------------------------------------ +# Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'. +# Note that neither SEPARATOR nor STRING are expanded; they are appended +# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). +# No SEPARATOR is output if MACRO-NAME was previously undefined (different +# than defined and empty). +# +# This macro is needed until we can rely on Autoconf 2.62, since earlier +# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. +m4_define([lt_append], +[m4_define([$1], + m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) + + + +# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) +# ---------------------------------------------------------- +# Produce a SEP delimited list of all paired combinations of elements of +# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list +# has the form PREFIXmINFIXSUFFIXn. +# Needed until we can rely on m4_combine added in Autoconf 2.62. +m4_define([lt_combine], +[m4_if(m4_eval([$# > 3]), [1], + [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl +[[m4_foreach([_Lt_prefix], [$2], + [m4_foreach([_Lt_suffix], + ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, + [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) + + +# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) +# ----------------------------------------------------------------------- +# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited +# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. +m4_define([lt_if_append_uniq], +[m4_ifdef([$1], + [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], + [lt_append([$1], [$2], [$3])$4], + [$5])], + [lt_append([$1], [$2], [$3])$4])]) + + +# lt_dict_add(DICT, KEY, VALUE) +# ----------------------------- +m4_define([lt_dict_add], +[m4_define([$1($2)], [$3])]) + + +# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) +# -------------------------------------------- +m4_define([lt_dict_add_subkey], +[m4_define([$1($2:$3)], [$4])]) + + +# lt_dict_fetch(DICT, KEY, [SUBKEY]) +# ---------------------------------- +m4_define([lt_dict_fetch], +[m4_ifval([$3], + m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), + m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) + + +# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) +# ----------------------------------------------------------------- +m4_define([lt_if_dict_fetch], +[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], + [$5], + [$6])]) + + +# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) +# -------------------------------------------------------------- +m4_define([lt_dict_filter], +[m4_if([$5], [], [], + [lt_join(m4_quote(m4_default([$4], [[, ]])), + lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), + [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl +]) diff --git a/config/m4/ltversion.m4 b/config/m4/ltversion.m4 new file mode 100644 index 0000000..fa04b52 --- /dev/null +++ b/config/m4/ltversion.m4 @@ -0,0 +1,23 @@ +# ltversion.m4 -- version numbers -*- Autoconf -*- +# +# Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc. +# Written by Scott James Remnant, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# @configure_input@ + +# serial 4179 ltversion.m4 +# This file is part of GNU Libtool + +m4_define([LT_PACKAGE_VERSION], [2.4.6]) +m4_define([LT_PACKAGE_REVISION], [2.4.6]) + +AC_DEFUN([LTVERSION_VERSION], +[macro_version='2.4.6' +macro_revision='2.4.6' +_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) +_LT_DECL(, macro_revision, 0) +]) diff --git a/config/m4/lt~obsolete.m4 b/config/m4/lt~obsolete.m4 new file mode 100644 index 0000000..c6b26f8 --- /dev/null +++ b/config/m4/lt~obsolete.m4 @@ -0,0 +1,99 @@ +# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- +# +# Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software +# Foundation, Inc. +# Written by Scott James Remnant, 2004. +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 5 lt~obsolete.m4 + +# These exist entirely to fool aclocal when bootstrapping libtool. +# +# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN), +# which have later been changed to m4_define as they aren't part of the +# exported API, or moved to Autoconf or Automake where they belong. +# +# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN +# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us +# using a macro with the same name in our local m4/libtool.m4 it'll +# pull the old libtool.m4 in (it doesn't see our shiny new m4_define +# and doesn't know about Autoconf macros at all.) +# +# So we provide this file, which has a silly filename so it's always +# included after everything else. This provides aclocal with the +# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything +# because those macros already exist, or will be overwritten later. +# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. +# +# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. +# Yes, that means every name once taken will need to remain here until +# we give up compatibility with versions before 1.7, at which point +# we need to keep only those names which we still refer to. + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) + +m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) +m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) +m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) +m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) +m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) +m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) +m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) +m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) +m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) +m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) +m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) +m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) +m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) +m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) +m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) +m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) +m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) +m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) +m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) +m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) +m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) +m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) +m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) +m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) +m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) +m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) +m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) +m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) +m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) +m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) +m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) +m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) +m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) +m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) +m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) +m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) +m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) +m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) +m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) +m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) +m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) +m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) +m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) +m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) +m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) +m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) +m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) +m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) +m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) +m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) +m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) diff --git a/config/m4/mpi.m4 b/config/m4/mpi.m4 new file mode 100644 index 0000000..f7822c9 --- /dev/null +++ b/config/m4/mpi.m4 @@ -0,0 +1,41 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +# +# Enable compiling tests with MPI +# +AC_ARG_WITH([mpi], + [AS_HELP_STRING([--with-mpi@<:@=MPIHOME@:>@], [Compile MPI tests (default is NO).])],[:],[with_mpi=no]) + + AS_IF([test "x$with_mpi" != xyes && test "x$with_mpi" != xno], + [ + AS_IF([test -d "$with_mpi/bin"],[with_mpi="$with_mpi/bin"],[:]) + mpi_path=$with_mpi;with_mpi=yes + ], + mpi_path=$PATH) + +# +# Search for mpicc and mpirun in the given path. +# +AS_IF([test "x$with_mpi" = xyes], + [ + AC_ARG_VAR(MPICC,[MPI C compiler command]) + AC_PATH_PROGS(MPICC,mpicc mpiicc,"",$mpi_path) + AC_ARG_VAR(MPIRUN,[MPI launch command]) + AC_PATH_PROGS(MPIRUN,mpirun mpiexec aprun orterun,"",$mpi_path) + AS_IF([test -z "$MPIRUN"], + AC_MSG_ERROR([--with-mpi was requested but MPI was not found in the PATH in $mpi_path]),[:]) + ],[:]) + +AS_IF([test -n "$MPICC"], + [AC_DEFINE([HAVE_MPI], [1], [MPI support]) + mpi_enable=enabled], + [mpi_enable=disabled]) +AM_CONDITIONAL([HAVE_MPI], [test -n "$MPIRUN"]) +AM_CONDITIONAL([HAVE_MPICC], [test -n "$MPICC"]) +AM_CONDITIONAL([HAVE_MPIRUN], [test -n "$MPIRUN"]) diff --git a/config/m4/rocm.m4 b/config/m4/rocm.m4 new file mode 100644 index 0000000..ab58b6d --- /dev/null +++ b/config/m4/rocm.m4 @@ -0,0 +1,136 @@ +# +# Copyright (C) Advanced Micro Devices, Inc. 2016 - 2018. ALL RIGHTS RESERVED. +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# ROCM_PARSE_FLAGS(ARG, VAR_LIBS, VAR_LDFLAGS, VAR_CPPFLAGS) +# ---------------------------------------------------------- +# Parse whitespace-separated ARG into appropriate LIBS, LDFLAGS, and +# CPPFLAGS variables. +AC_DEFUN([ROCM_PARSE_FLAGS], +[for arg in $$1 ; do + AS_CASE([$arg], + [yes], [], + [no], [], + [-l*|*.a|*.so], [$2="$$2 $arg"], + [-L*|-WL*|-Wl*], [$3="$$3 $arg"], + [-I*], [$4="$$4 $arg"], + [*lib|*lib/|*lib64|*lib64/],[AS_IF([test -d $arg], [$3="$$3 -L$arg"], + [AC_MSG_WARN([$arg of $1 not parsed])])], + [*include|*include/],[AS_IF([test -d $arg], [$4="$$4 -I$arg"], + [AC_MSG_WARN([$arg of $1 not parsed])])], + [AC_MSG_WARN([$arg of $1 not parsed])]) +done]) + +# ROCM_BUILD_FLAGS(ARG, VAR_LIBS, VAR_LDFLAGS, VAR_CPPFLAGS) +# ---------------------------------------------------------- +# Parse value of ARG into appropriate LIBS, LDFLAGS, and +# CPPFLAGS variables. +AC_DEFUN([ROCM_BUILD_FLAGS], + $4="-I$1/include/hsa -I$1/include" + $3="-L$1/hsa/lib -L$1/lib" + $2="-lhsa-runtime64" +) + +# HIP_BUILD_FLAGS(ARG, VAR_LIBS, VAR_LDFLAGS, VAR_CPPFLAGS) +# ---------------------------------------------------------- +# Parse value of ARG into appropriate LIBS, LDFLAGS, and +# CPPFLAGS variables. +AC_DEFUN([HIP_BUILD_FLAGS], + $4="-D__HIP_PLATFORM_HCC__ -I$1/include/hip -I$1/include" + $3="-L$1/hip/lib -L$1/lib" + $2="-lhip_hcc" +) + +# +# Check for ROCm support +# +AC_DEFUN([UCX_CHECK_ROCM],[ + +AS_IF([test "x$rocm_checked" != "xyes"],[ + +AC_ARG_WITH([rocm], + [AS_HELP_STRING([--with-rocm=(DIR)], + [Enable the use of ROCm (default is autodetect).])], + [], + [with_rocm=guess]) + +rocm_happy=no +hip_happy=no +AS_IF([test "x$with_rocm" != "xno"], + [AS_CASE(["x$with_rocm"], + [x|xguess|xyes], + [AC_MSG_NOTICE([ROCm path was not specified. Guessing ...]) + with_rocm="/opt/rocm" + ROCM_BUILD_FLAGS([$with_rocm], + [ROCM_LIBS], [ROCM_LDFLAGS], [ROCM_CPPFLAGS])], + [x/*], + [AC_MSG_NOTICE([ROCm path given as $with_rocm ...]) + ROCM_BUILD_FLAGS([$with_rocm], + [ROCM_LIBS], [ROCM_LDFLAGS], [ROCM_CPPFLAGS])], + [AC_MSG_NOTICE([ROCm flags given ...]) + ROCM_PARSE_FLAGS([$with_rocm], + [ROCM_LIBS], [ROCM_LDFLAGS], [ROCM_CPPFLAGS])]) + + SAVE_CPPFLAGS="$CPPFLAGS" + SAVE_LDFLAGS="$LDFLAGS" + SAVE_LIBS="$LIBS" + + CPPFLAGS="$ROCM_CPPFLAGS $CPPFLAGS" + LDFLAGS="$ROCM_LDFLAGS $LDFLAGS" + LIBS="$ROCM_LIBS $LIBS" + + rocm_happy=yes + AS_IF([test "x$rocm_happy" = xyes], + [AC_CHECK_HEADERS([hsa.h], [rocm_happy=yes], [rocm_happy=no])]) + AS_IF([test "x$rocm_happy" = xyes], + [AC_CHECK_HEADERS([hsa_ext_amd.h], [rocm_happy=yes], [rocm_happy=no])]) + AS_IF([test "x$rocm_happy" = xyes], + [AC_CHECK_LIB([hsa-runtime64], [hsa_init], [rocm_happy=yes], [rocm_happy=no])]) + + AS_IF([test "x$rocm_happy" = "xyes"], + [AC_SUBST([ROCM_CPPFLAGS]) + AC_SUBST([ROCM_LDFLAGS]) + AC_SUBST([ROCM_LIBS])], + [AC_MSG_WARN([ROCm not found])]) + + CPPFLAGS="$SAVE_CPPFLAGS" + LDFLAGS="$SAVE_LDFLAGS" + LIBS="$SAVE_LIBS" + + HIP_BUILD_FLAGS([$with_rocm], [HIP_LIBS], [HIP_LDFLAGS], [HIP_CPPFLAGS]) + + CPPFLAGS="$HIP_CPPFLAGS $CPPFLAGS" + LDFLAGS="$HIP_LDFLAGS $LDFLAGS" + LIBS="$HIP_LIBS $LIBS" + + hip_happy=yes + AS_IF([test "x$hip_happy" = xyes], + [AC_CHECK_HEADERS([hip_runtime.h], [hip_happy=yes], [hip_happy=no])]) + AS_IF([test "x$hip_happy" = xyes], + [AC_CHECK_LIB([hip_hcc], [hipFree], [hip_happy=yes], [hip_happy=no])]) + AS_IF([test "x$hip_happy" = xyes], [HIP_CXXFLAGS="--std=gnu++11"], []) + + CPPFLAGS="$SAVE_CPPFLAGS" + LDFLAGS="$SAVE_LDFLAGS" + LIBS="$SAVE_LIBS" + + AS_IF([test "x$hip_happy" = "xyes"], + [AC_SUBST([HIP_CPPFLAGS]) + AC_SUBST([HIP_CXXFLAGS]) + AC_SUBST([HIP_LDFLAGS]) + AC_SUBST([HIP_LIBS])], + [AC_MSG_WARN([HIP Runtime not found])]) + + ], + [AC_MSG_WARN([ROCm was explicitly disabled])] +) + +rocm_checked=yes +AM_CONDITIONAL([HAVE_ROCM], [test "x$rocm_happy" != xno]) +AM_CONDITIONAL([HAVE_HIP], [test "x$hip_happy" != xno]) + +]) + +]) diff --git a/config/m4/rte.m4 b/config/m4/rte.m4 new file mode 100644 index 0000000..860926e --- /dev/null +++ b/config/m4/rte.m4 @@ -0,0 +1,16 @@ +AC_ARG_WITH([rte], + [AC_HELP_STRING([--with-rte(=DIR)], + [Where to find the RTE libraries and header + files] + )], [], [with_rte=no]) + +AS_IF([test "x$with_rte" != xno], + [ + AC_CHECK_HEADERS([$with_rte/include/rte.h], [rte_happy="yes"], [rte_happy="no"]) + AS_IF([test "x$rte_happy" = xyes], + [ + AC_SUBST(RTE_CPPFLAGS, "-I$with_rte/include") + AC_SUBST(RTE_LDFLAGS, "-L$with_rte/lib -lrte") + AC_DEFINE([HAVE_RTE], [1], [RTE support]) + ], [])], + []) diff --git a/config/m4/sysdep.m4 b/config/m4/sysdep.m4 new file mode 100644 index 0000000..b1e5485 --- /dev/null +++ b/config/m4/sysdep.m4 @@ -0,0 +1,291 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +AC_FUNC_ALLOCA + + +# +# SystemV shared memory +# +#IPC_INFO +AC_CHECK_LIB([rt], [shm_open], [], AC_MSG_ERROR([librt not found])) +AC_CHECK_LIB([rt], [timer_create], [], AC_MSG_ERROR([librt not found])) + + +# +# Extended string functions +# +AC_CHECK_HEADERS([libgen.h]) +AC_CHECK_DECLS([asprintf, basename, fmemopen], [], + AC_MSG_ERROR([GNU string extensions not found]), + [#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_LIBGEN_H + #include + #endif + ]) + + +# +# CPU-sets +# +AC_CHECK_HEADERS([sys/cpuset.h]) +AC_CHECK_DECLS([CPU_ZERO, CPU_ISSET], [], + AC_MSG_ERROR([CPU_ZERO/CPU_ISSET not found]), + [#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_SYS_CPUSET_H + #include + #endif + ]) +AC_CHECK_TYPES([cpu_set_t, cpuset_t], [], [], + [#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_SYS_CPUSET_H + #include + #endif]) + + +# +# Type for sighandler +# +AC_CHECK_TYPES([sighandler_t, __sighandler_t], [], [], + [#define _GNU_SOURCE 1 + #include ]) + + +# +# pthread +# +AC_CHECK_HEADERS([pthread_np.h]) +AC_SEARCH_LIBS(pthread_create, pthread) +AC_SEARCH_LIBS(pthread_atfork, pthread) + + +# +# Misc. Linux-specific functions +# +AC_CHECK_FUNCS([clearenv]) +AC_CHECK_FUNCS([malloc_trim]) +AC_CHECK_FUNCS([memalign]) +AC_CHECK_FUNCS([posix_memalign]) +AC_CHECK_FUNCS([mremap]) +AC_CHECK_FUNCS([sched_setaffinity sched_getaffinity]) +AC_CHECK_FUNCS([cpuset_setaffinity cpuset_getaffinity]) + + +# +# Route file descriptor signal to specific thread +# +AC_CHECK_DECLS([F_SETOWN_EX], [], [], [#define _GNU_SOURCE 1 +#include ]) + + +# +# Ethtool definitions +# +AC_CHECK_DECLS([ethtool_cmd_speed, SPEED_UNKNOWN], [], [], + [#include ]) + + +# +# PowerPC query for TB frequency +# +AC_CHECK_DECLS([__ppc_get_timebase_freq], [], [], [#include ]) +AC_CHECK_HEADERS([sys/platform/ppc.h]) + + +# +# Google Testing framework +# +GTEST_LIB_CHECK([1.5.0], [true], [true]) + + +# +# Valgrind support +# +AC_ARG_WITH([valgrind], + AC_HELP_STRING([--with-valgrind], + [Enable Valgrind annotations (small runtime overhead, default NO)]), + [], + [with_valgrind=no] +) +AS_IF([test "x$with_valgrind" = xno], + [AC_DEFINE([NVALGRIND], 1, [Define to 1 to disable Valgrind annotations.])], + [AS_IF([test ! -d $with_valgrind], + [AC_MSG_NOTICE([Valgrind path was not defined, guessing ...]) + with_valgrind=/usr], [:]) + AC_CHECK_HEADER([$with_valgrind/include/valgrind/memcheck.h], [], + [AC_MSG_ERROR([Valgrind memcheck support requested, but not found, install valgrind-devel rpm.])]) + CPPFLAGS="$CPPFLAGS -I$with_valgrind/include" + ] +) + + +# +# NUMA support +# +AC_ARG_ENABLE([numa], + AC_HELP_STRING([--disable-numa], [Disable NUMA support]), + [ + AC_MSG_NOTICE([NUMA support is disabled]) + ], + [ + AC_DEFUN([NUMA_W1], [not found. Please reconfigure with --disable-numa. ]) + AC_DEFUN([NUMA_W2], [Warning: this may have negative impact on library performance. It is better to install]) + AC_CHECK_HEADERS([numa.h numaif.h], [], + [AC_MSG_ERROR([NUMA headers NUMA_W1 NUMA_W2 libnuma-devel package])]) + AC_CHECK_LIB(numa, mbind, + [AC_SUBST(NUMA_LIBS, [-lnuma])], + [AC_MSG_ERROR([NUMA library NUMA_W1 NUMA_W2 libnuma package])]) + AC_DEFINE([HAVE_NUMA], 1, [Define to 1 to enable NUMA support]) + AC_CHECK_TYPES([struct bitmask], [], [], [[#include ]]) + ] +) + + +# +# Malloc hooks +# +AC_MSG_CHECKING([malloc hooks]) +SAVE_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS $CFLAGS_NO_DEPRECATED" +CHECK_CROSS_COMP([AC_LANG_SOURCE([#include + static int rc = 1; + void *ptr; + void *myhook(size_t size, const void *caller) { + rc = 0; + return NULL; + } + int main() { + __malloc_hook = myhook; + ptr = malloc(1); + return rc; + }])], + [AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_MALLOC_HOOK], 1, [malloc hooks support])], + [AC_MSG_RESULT([no]) + AC_MSG_WARN([malloc hooks are not supported])] + ) +CFLAGS=$SAVE_CFLAGS + + +# +# Check for capability.h header (usually comes from libcap-devel package) and +# make sure it defines the types we need +# +AC_CHECK_HEADERS([sys/capability.h], + [AC_CHECK_TYPES([cap_user_header_t, cap_user_data_t], [], + [AC_DEFINE([HAVE_SYS_CAPABILITY_H], [0], [Linux capability API support])], + [[#include ]])] + ) + +# +# Check for PR_SET_PTRACER +# +AC_CHECK_DECLS([PR_SET_PTRACER], [], [], [#include ]) + + +# +# ipv6 s6_addr32/__u6_addr32 shortcuts for in6_addr +# ip header structure layout name +# +AC_CHECK_MEMBER(struct in6_addr.s6_addr32, + [AC_DEFINE([HAVE_IN6_ADDR_S6_ADDR32], [1], + [struct in6_addr has s6_addr32 member])], + [], + [#include ]) +AC_CHECK_MEMBER(struct in6_addr.__u6_addr.__u6_addr32, + [AC_DEFINE([HAVE_IN6_ADDR_U6_ADDR32], [1], + [struct in6_addr is BSD-style])], + [], + [#include ]) +AC_CHECK_MEMBER(struct iphdr.daddr.s_addr, + [AC_DEFINE([HAVE_IPHDR_DADDR], [1], + [struct iphdr has daddr member])], + [], + [#include ]) +AC_CHECK_MEMBER(struct ip.ip_dst.s_addr, + [AC_DEFINE([HAVE_IP_IP_DST], [1], + [struct ip has ip_dst member])], + [], + [#include + #include + #include ]) + + +# +# struct sigevent reporting thread id +# +AC_CHECK_MEMBER(struct sigevent._sigev_un._tid, + [AC_DEFINE([HAVE_SIGEVENT_SIGEV_UN_TID], [1], + [struct sigevent has _sigev_un._tid])], + [], + [#include ]) +AC_CHECK_MEMBER(struct sigevent.sigev_notify_thread_id, + [AC_DEFINE([HAVE_SIGEVENT_SIGEV_NOTIFY_THREAD_ID], [1], + [struct sigevent has sigev_notify_thread_id])], + [], + [#include ]) + + +# +# sa_restorer is something that only Linux has +# +AC_CHECK_MEMBER(struct sigaction.sa_restorer, + [AC_DEFINE([HAVE_SIGACTION_SA_RESTORER], [1], + [struct sigaction has sa_restorer member])], + [], + [#include ]) + + +# +# epoll vs. kqueue +# +AC_CHECK_HEADERS([sys/epoll.h]) +AC_CHECK_HEADERS([sys/eventfd.h]) +AC_CHECK_HEADERS([sys/event.h]) + + +# +# FreeBSD-specific threading functions +# +AC_CHECK_HEADERS([sys/thr.h]) + + +# +# malloc headers are Linux-specific +# +AC_CHECK_HEADERS([malloc.h]) +AC_CHECK_HEADERS([malloc_np.h]) + + +# +# endianess +# +AC_CHECK_HEADERS([endian.h, sys/endian.h]) + + +# +# Linux-only headers +# +AC_CHECK_HEADERS([linux/mman.h]) +AC_CHECK_HEADERS([linux/ip.h]) +AC_CHECK_HEADERS([linux/futex.h]) + + +# +# Networking headers +# +AC_CHECK_HEADERS([net/ethernet.h], [], [], + [#include ]) +AC_CHECK_HEADERS([netinet/ip.h], [], [], + [#include + #include ]) diff --git a/config/m4/ucm.m4 b/config/m4/ucm.m4 new file mode 100644 index 0000000..9c7c820 --- /dev/null +++ b/config/m4/ucm.m4 @@ -0,0 +1,109 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +# +# Enable overriding library symbols +# +AC_ARG_ENABLE([symbol-override], + AS_HELP_STRING([--disable-symbol-override], [Disable overriding library symbols, default: NO]), + [], + [enable_symbol_override=yes]) + +AS_IF([test "x$enable_symbol_override" = xyes], + [AC_DEFINE([ENABLE_SYMBOL_OVERRIDE], [1], [Enable symbol override])] + [:] +) + +# +# Memory allocator selection +# +AC_ARG_WITH([allocator], + [AC_HELP_STRING([--with-allocator=NAME], + [Build UCX with predefined memory allocator. The supported values are: + ptmalloc286. Default: ptmalloc286])], + [], + [with_allocator=ptmalloc286]) + +case ${with_allocator} in + ptmalloc286) + AC_MSG_NOTICE(Memory allocator is ptmalloc-2.8.6 version) + AC_DEFINE([HAVE_UCM_PTMALLOC286], 1, [Use ptmalloc-2.8.6 version]) + HAVE_UCM_PTMALLOC286=yes + ;; + *) + AC_MSG_ERROR(Cannot continue. Unsupported memory allocator name + in --with-allocator=[$with_allocator]) + ;; +esac + +AM_CONDITIONAL([HAVE_UCM_PTMALLOC286],[test "x$HAVE_UCM_PTMALLOC286" = "xyes"]) + +AC_CHECK_FUNCS([malloc_get_state malloc_set_state], + [], + [], + [#include ]) + + +# +# Madvise flags +# +AC_CHECK_DECLS([MADV_FREE, + MADV_REMOVE, + POSIX_MADV_DONTNEED], + [], + [], + [#include ]) + + +# BISTRO hooks infrastructure +# +# SYS_xxx macro +# +mmap_hooks_happy=yes +AC_CHECK_DECLS([SYS_mmap, + SYS_munmap, + SYS_mremap, + SYS_brk, + SYS_madvise], + [], + [mmap_hooks_happy=no], dnl mmap syscalls are not defined + [#include ]) + +shm_hooks_happy=yes +AC_CHECK_DECLS([SYS_shmat, + SYS_shmdt], + [], + [shm_hooks_happy=no], + [#include ]) + +ipc_hooks_happy=yes +AC_CHECK_DECLS([SYS_ipc], + [], + [ipc_hooks_happy=no], + [#include ]) + +AS_IF([test "x$mmap_hooks_happy" = "xyes"], + AS_IF([test "x$ipc_hooks_happy" = "xyes" -o "x$shm_hooks_happy" = "xyes"], + [bistro_hooks_happy=yes])) + +AS_IF([test "x$bistro_hooks_happy" = "xyes"], + [AC_DEFINE([UCM_BISTRO_HOOKS], [1], [Enable BISTRO hooks])], + [AC_DEFINE([UCM_BISTRO_HOOKS], [0], [Enable BISTRO hooks]) + AC_MSG_WARN([Some of required syscalls could not be found]) + AC_MSG_WARN([BISTRO mmap hook mode is disabled])]) + +AC_CHECK_FUNCS([__curbrk], [], [], []) + +# +# tcmalloc library - for testing only +# +SAVE_LDFLAGS="$LDFLAGS" +AC_CHECK_LIB([tcmalloc], [tc_malloc], + [have_tcmalloc=yes + TCMALLOC_LIB="-ltcmalloc"], + [have_tcmalloc=no]) +AM_CONDITIONAL([HAVE_TCMALLOC],[test "x$have_tcmalloc" = "xyes"]) diff --git a/config/m4/ucs.m4 b/config/m4/ucs.m4 new file mode 100644 index 0000000..0537145 --- /dev/null +++ b/config/m4/ucs.m4 @@ -0,0 +1,220 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# Copyright (C) ARM, Ltd. 2016. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +# +# Internal profiling support. +# This option may affect perofrmance so it is off by default. +# +AC_ARG_ENABLE([profiling], + AS_HELP_STRING([--enable-profiling], [Enable profiling support, default: NO]), + [], + [enable_profiling=no]) + +AS_IF([test "x$enable_profiling" = xyes], + [AS_MESSAGE([enabling profiling]) + AC_DEFINE([HAVE_PROFILING], [1], [Enable profiling]) + HAVE_PROFILING=yes] + [:] +) +AM_CONDITIONAL([HAVE_PROFILING],[test "x$HAVE_PROFILING" = "xyes"]) + + +# +# Detailed backtrace with debug information. +# This option requires binutils-devel package. +# +AC_ARG_ENABLE([backtrace-detail], + AS_HELP_STRING([--disable-backtrace-detail], [Disable detailed backtrace support, default: NO]), + [], + [enable_backtrace_detail=yes]) + +AS_IF([test "x$enable_backtrace_detail" = xyes], + [ + BT=1 + AC_CHECK_HEADER([bfd.h], [], [AC_MSG_WARN([binutils headers not found])]; BT=0) + AC_CHECK_LIB(bfd, bfd_openr, LIBS="$LIBS -lbfd", [AC_MSG_WARN([bfd library not found])];BT=0) + AC_CHECK_LIB(dl, dlopen, LIBS="$LIBS -ldl", [AC_MSG_WARN([dl library not found])];BT=0) + AC_CHECK_LIB(intl, main, LIBS="$LIBS -lintl", [AC_MSG_WARN([intl library not found])]) + AC_CHECK_TYPES([struct dl_phdr_info], [], [AC_MSG_WARN([struct dl_phdr_info not defined])];BT=0, + [#define _GNU_SOURCE 1 + #include ]) + if test "x$BT" = "x1"; then + AC_CHECK_FUNCS([cplus_demangle]) + AC_DEFINE([HAVE_DETAILED_BACKTRACE], 1, [Enable detailed backtrace]) + case ${host} in + aarch64*) CFLAGS="$CFLAGS -funwind-tables" ;; + esac + else + AC_MSG_WARN([detailed backtrace is not supported]) + fi + ] +) + + +# +# Enable statistics and counters +# +AC_ARG_ENABLE([stats], + AS_HELP_STRING([--enable-stats], + [Enable statistics, useful for profiling, default: NO]), + [], + [enable_stats=no]) + +AS_IF([test "x$enable_stats" = xyes], + [AS_MESSAGE([enabling statistics]) + AC_DEFINE([ENABLE_STATS], [1], [Enable statistics]) + HAVE_STATS=yes], + [:] + ) +AM_CONDITIONAL([HAVE_STATS],[test "x$HAVE_STATS" = "xyes"]) + + +# +# Enable tuning params at runtime +# +AC_ARG_ENABLE([tuning], + AS_HELP_STRING([--enable-tuning], + [Enable parameter tuning in run-time, default: NO]), + [], + [enable_tuning=no]) + +AS_IF([test "x$enable_tuning" = xyes], + [AS_MESSAGE([enabling tuning]) + AC_DEFINE([ENABLE_TUNING], [1], [Enable tuning]) + HAVE_TUNING=yes], + [:] + ) +AM_CONDITIONAL([HAVE_TUNING],[test "x$HAVE_TUNING" = "xyes"]) + + +# +# Enable memory tracking +# +AC_ARG_ENABLE([memtrack], + AS_HELP_STRING([--enable-memtrack], + [Enable memory tracking, useful for profiling, default: NO]), + [], + [enable_memtrack=no]) + +AS_IF([test "x$enable_memtrack" = xyes], + [AS_MESSAGE([enabling memory tracking]) + AC_DEFINE([ENABLE_MEMTRACK], [1], [Enable memory tracking]) + HAVE_MEMTRACK=yes], + [:] + ) +AM_CONDITIONAL([HAVE_MEMTRACK],[test "x$HAVE_MEMTRACK" = "xyes"]) + + +# +# Disable logging levels below INFO +# +AC_ARG_ENABLE([logging], + AS_HELP_STRING([--enable-logging], + [Enable debug logging, default: YES]) + ) + +AS_IF([test "x$enable_logging" != xno], + [AC_DEFINE([UCS_MAX_LOG_LEVEL], [UCS_LOG_LEVEL_TRACE_POLL], [Highest log level])], + [AC_DEFINE([UCS_MAX_LOG_LEVEL], [UCS_LOG_LEVEL_INFO], [Highest log level])] + ) + + +# +# Disable assertions +# +AC_ARG_ENABLE([assertions], + AS_HELP_STRING([--disable-assertions], + [Disable code assertions, default: NO]) + ) + +AS_IF([test "x$enable_assertions" != xno], + AC_DEFINE([ENABLE_ASSERT], [1], [Enable assertions]) + ) + +# +# Check if __attribute__((constructor)) works +# +AC_MSG_CHECKING([__attribute__((constructor))]) +CHECK_CROSS_COMP([AC_LANG_SOURCE([static int rc = 1; + static void constructor_test() __attribute__((constructor)); + static void constructor_test() { rc = 0; } + int main() { return rc; }])], + [AC_MSG_RESULT([yes])], + [AC_MSG_ERROR([Cannot continue. Please use compiler that + supports __attribute__((constructor))])] + ) + + +# +# Manual configuration of cacheline size +# +AC_ARG_WITH([cache-line-size], + [AC_HELP_STRING([--with-cache-line-size=SIZE], + [Build UCX with cache line size defined by user. This parameter + overwrites default cache line sizes defines in + UCX (x86-64: 64, Power: 128, ARMv8: 64/128). The supported values are: 64, 128])], + [], + [with_cache_line_size=no]) + +AS_IF([test "x$with_cache_line_size" != xno],[ + case ${with_cache_line_size} in + 64) + AC_MSG_RESULT(The cache line size is set to 64B) + AC_DEFINE([HAVE_CACHE_LINE_SIZE], 64, [user defined cache line size]) + ;; + 128) + AC_MSG_RESULT(The cache line size is set to 128B) + AC_DEFINE([HAVE_CACHE_LINE_SIZE], 128, [user defined cache line size]) + ;; + @<:@0-9@:>@*) + AC_MSG_WARN(Unusual cache cache line size was specified: [$with_cache_line_size]) + AC_DEFINE_UNQUOTED([HAVE_CACHE_LINE_SIZE], [$with_cache_line_size], [user defined cache line size]) + ;; + *) + AC_MSG_ERROR(Cannot continue. Unsupported cache line size [$with_cache_line_size].) + ;; + esac], + []) + + +# +# Architecture specific checks +# +case ${host} in + aarch64*) + AC_MSG_CHECKING([support for CNTVCT_EL0 on aarch64]) + AC_RUN_IFELSE([AC_LANG_PROGRAM( + [[#include ]], + [[uint64_t tmp; asm volatile("mrs %0, cntvct_el0" : "=r" (tmp));]])], + [AC_MSG_RESULT([yes])] + [AC_DEFINE([HAVE_HW_TIMER], [1], [high-resolution hardware timer enabled])], + [AC_MSG_RESULT([no])] + [AC_DEFINE([HAVE_HW_TIMER], [0], [high-resolution hardware timer disabled])] + );; + *) + # HW timer is supported for all other architectures + AC_DEFINE([HAVE_HW_TIMER], [1], [high-resolution hardware timer disabled]) +esac + +# +# Enable built-in memcpy +# +AC_ARG_ENABLE([builtin-memcpy], + AS_HELP_STRING([--enable-builtin-memcpy], + [Enable builtin memcpy routine, default: YES]), + [], + [enable_builtin_memcpy=yes]) + +AS_IF([test "x$enable_builtin_memcpy" != xno], + [AS_MESSAGE([enabling builtin memcpy]) + AC_DEFINE([ENABLE_BUILTIN_MEMCPY], [1], [Enable builtin memcpy])], + [AC_DEFINE([ENABLE_BUILTIN_MEMCPY], [0], [Enable builtin memcpy])] + ) + +AC_CHECK_FUNCS([__clear_cache], [], []) +AC_CHECK_FUNCS([__aarch64_sync_cache_range], [], []) diff --git a/config/module.am b/config/module.am new file mode 100644 index 0000000..bcc28aa --- /dev/null +++ b/config/module.am @@ -0,0 +1,26 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# Automake silent rules +AM_V_LN = $(AM_V_LN_@AM_V@) +AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +AM_V_LN_0 = echo " LN " +AM_V_LN_1 = true + +local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) + +all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +$(local_la_modules): $(module_LTLIBRARIES) + $(AM_V_at)$(MKDIR_P) $(localmoduledir) + $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ + (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ + done + @for lib in *.la $(objdir)/*$(shrext)*; do \ + $(AM_V_LN) $$lib; \ + done diff --git a/configure b/configure new file mode 100755 index 0000000..ddebe9a --- /dev/null +++ b/configure @@ -0,0 +1,32518 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for ucx 1.8. +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1 + + test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ + || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + +SHELL=${CONFIG_SHELL-/bin/sh} + +as_awk_strverscmp=' + # Use only awk features that work with 7th edition Unix awk (1978). + # My, what an old awk you have, Mr. Solaris! + END { + while (length(v1) && length(v2)) { + # Set d1 to be the next thing to compare from v1, and likewise for d2. + # Normally this is a single character, but if v1 and v2 contain digits, + # compare them as integers and fractions as strverscmp does. + if (v1 ~ /^[0-9]/ && v2 ~ /^[0-9]/) { + # Split v1 and v2 into their leading digit string components d1 and d2, + # and advance v1 and v2 past the leading digit strings. + for (len1 = 1; substr(v1, len1 + 1) ~ /^[0-9]/; len1++) continue + for (len2 = 1; substr(v2, len2 + 1) ~ /^[0-9]/; len2++) continue + d1 = substr(v1, 1, len1); v1 = substr(v1, len1 + 1) + d2 = substr(v2, 1, len2); v2 = substr(v2, len2 + 1) + if (d1 ~ /^0/) { + if (d2 ~ /^0/) { + # Compare two fractions. + while (d1 ~ /^0/ && d2 ~ /^0/) { + d1 = substr(d1, 2); len1-- + d2 = substr(d2, 2); len2-- + } + if (len1 != len2 && ! (len1 && len2 && substr(d1, 1, 1) == substr(d2, 1, 1))) { + # The two components differ in length, and the common prefix + # contains only leading zeros. Consider the longer to be less. + d1 = -len1 + d2 = -len2 + } else { + # Otherwise, compare as strings. + d1 = "x" d1 + d2 = "x" d2 + } + } else { + # A fraction is less than an integer. + exit 1 + } + } else { + if (d2 ~ /^0/) { + # An integer is greater than a fraction. + exit 2 + } else { + # Compare two integers. + d1 += 0 + d2 += 0 + } + } + } else { + # The normal case, without worrying about digits. + d1 = substr(v1, 1, 1); v1 = substr(v1, 2) + d2 = substr(v2, 1, 1); v2 = substr(v2, 2) + } + if (d1 < d2) exit 1 + if (d1 > d2) exit 2 + } + # Beware Solaris /usr/xgp4/bin/awk (at least through Solaris 10), + # which mishandles some comparisons of empty strings to integers. + if (length(v2)) exit 1 + if (length(v1)) exit 2 + } +' + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='ucx' +PACKAGE_TARNAME='ucx' +PACKAGE_VERSION='1.8' +PACKAGE_STRING='ucx 1.8' +PACKAGE_BUGREPORT='' +PACKAGE_URL='' + +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_default_prefix=/usr +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +build_bindings +build_modules +VALGRIND_LIBPATH +GTEST_CXXFLAGS +UCX_PERFTEST_CC +PERF_LIB_CXXFLAGS +CRAY_UGNI_LIBS +CRAY_UGNI_CFLAGS +XPMEM_LIBS +XPMEM_CFLAGS +KNEM_CPPFLAGS +RDMACM_LIBS +RDMACM_LDFLAGS +RDMACM_CPPFLAGS +IBCM_LIBS +LIB_MLX5 +IBVERBS_CFLAGS +IBVERBS_CPPFLAGS +IBVERBS_DIR +IBVERBS_LDFLAGS +GDR_COPY_LDFLAGS +GDR_COPY_CPPFLAGS +HIP_LIBS +HIP_LDFLAGS +HIP_CXXFLAGS +HIP_CPPFLAGS +ROCM_LIBS +ROCM_LDFLAGS +ROCM_CPPFLAGS +CUDA_LDFLAGS +CUDA_CPPFLAGS +UCM_MODULE_LDFLAGS +MVN +JDK +JAVA_HOME +JAVA +READLINK +JAVABIN +MVNBIN +RTE_LDFLAGS +RTE_CPPFLAGS +MPIRUN +MPICC +NUMA_LIBS +ALLOCA +BASE_CPPFLAGS +CFLAGS_PEDANTIC +BASE_CXXFLAGS +BASE_CFLAGS +CXX11FLAGS +CFLAGS_NO_DEPRECATED +HAVE_EXAMPLES_FALSE +HAVE_EXAMPLES_TRUE +INSTALL_DEVEL_HEADERS_FALSE +INSTALL_DEVEL_HEADERS_TRUE +ENABLE_EXPERIMENTAL_API_FALSE +ENABLE_EXPERIMENTAL_API_TRUE +HAVE_TCMALLOC_FALSE +HAVE_TCMALLOC_TRUE +HAVE_GNUXX11_FALSE +HAVE_GNUXX11_TRUE +HAVE_CXX11_FALSE +HAVE_CXX11_TRUE +HAVE_JAVA_FALSE +HAVE_JAVA_TRUE +HAVE_UCM_PTMALLOC286_FALSE +HAVE_UCM_PTMALLOC286_TRUE +HAVE_PROFILING_FALSE +HAVE_PROFILING_TRUE +HAVE_MPICC_FALSE +HAVE_MPICC_TRUE +HAVE_MPIRUN_FALSE +HAVE_MPIRUN_TRUE +HAVE_MPI_FALSE +HAVE_MPI_TRUE +HAVE_RDMACM_QP_LESS_FALSE +HAVE_RDMACM_QP_LESS_TRUE +HAVE_RDMACM_FALSE +HAVE_RDMACM_TRUE +HAVE_KNEM_FALSE +HAVE_KNEM_TRUE +HAVE_CMA_FALSE +HAVE_CMA_TRUE +HAVE_XPMEM_FALSE +HAVE_XPMEM_TRUE +HAVE_HIP_FALSE +HAVE_HIP_TRUE +HAVE_ROCM_FALSE +HAVE_ROCM_TRUE +HAVE_GDR_COPY_FALSE +HAVE_GDR_COPY_TRUE +HAVE_CUDA_FALSE +HAVE_CUDA_TRUE +HAVE_CRAY_UGNI_FALSE +HAVE_CRAY_UGNI_TRUE +HAVE_TL_CM_FALSE +HAVE_TL_CM_TRUE +HAVE_TL_UD_FALSE +HAVE_TL_UD_TRUE +HAVE_DC_EXP_FALSE +HAVE_DC_EXP_TRUE +HAVE_DC_DV_FALSE +HAVE_DC_DV_TRUE +HAVE_TL_DC_FALSE +HAVE_TL_DC_TRUE +HAVE_TL_RC_FALSE +HAVE_TL_RC_TRUE +HAVE_EXP_FALSE +HAVE_EXP_TRUE +HAVE_DEVX_FALSE +HAVE_DEVX_TRUE +HAVE_MLX5_DV_FALSE +HAVE_MLX5_DV_TRUE +HAVE_MLX5_HW_UD_FALSE +HAVE_MLX5_HW_UD_TRUE +HAVE_MLX5_HW_FALSE +HAVE_MLX5_HW_TRUE +HAVE_IB_FALSE +HAVE_IB_TRUE +HAVE_MEMTRACK_FALSE +HAVE_MEMTRACK_TRUE +HAVE_TUNING_FALSE +HAVE_TUNING_TRUE +HAVE_STATS_FALSE +HAVE_STATS_TRUE +HAVE_GTEST_FALSE +HAVE_GTEST_TRUE +DOCS_ONLY_FALSE +DOCS_ONLY_TRUE +DOXYGEN_PAPER_SIZE +DX_COND_latex_FALSE +DX_COND_latex_TRUE +DX_COND_pdf_FALSE +DX_COND_pdf_TRUE +DX_PDFLATEX +DX_FLAG_pdf +DX_COND_ps_FALSE +DX_COND_ps_TRUE +DX_EGREP +DX_DVIPS +DX_BIBTEX +DX_MAKEINDEX +DX_LATEX +DX_FLAG_ps +DX_COND_html_FALSE +DX_COND_html_TRUE +DX_FLAG_html +DX_COND_chi_FALSE +DX_COND_chi_TRUE +DX_FLAG_chi +DX_COND_chm_FALSE +DX_COND_chm_TRUE +DX_HHC +DX_FLAG_chm +DX_COND_xml_FALSE +DX_COND_xml_TRUE +DX_FLAG_xml +DX_COND_rtf_FALSE +DX_COND_rtf_TRUE +DX_FLAG_rtf +DX_COND_man_FALSE +DX_COND_man_TRUE +DX_FLAG_man +DX_COND_dot_FALSE +DX_COND_dot_TRUE +DX_DOT +DX_FLAG_dot +DX_COND_doc_FALSE +DX_COND_doc_TRUE +DX_PERL +DX_DOXYGEN +DX_FLAG_doc +DX_ENV +DX_DOCDIR +DX_CONFIG +DX_PROJECT +HAVE_DOT_FALSE +HAVE_DOT_TRUE +GRAPHVIZ_DOT +shrext +objdir +localmoduledir +moduledir +modulesubdir +LN_RS +PKG_CONFIG +LIBM +CXXCPP +LT_SYS_LIBRARY_PATH +OTOOL64 +OTOOL +LIPO +NMEDIT +DSYMUTIL +MANIFEST_TOOL +RANLIB +ac_ct_AR +AR +DLLTOOL +OBJDUMP +NM +ac_ct_DUMPBIN +DUMPBIN +LD +FGREP +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +LIBTOOL +SED +LN_S +am__fastdepCCAS_FALSE +am__fastdepCCAS_TRUE +CCASDEPMODE +CCASFLAGS +CCAS +OPENMP_CFLAGS +am__fastdepCXX_FALSE +am__fastdepCXX_TRUE +CXXDEPMODE +ac_ct_CXX +CXXFLAGS +CXX +SOVERSION +PATCH_VERSION +MINOR_VERSION +MAJOR_VERSION +top_top_srcdir +MAINT +MAINTAINER_MODE_FALSE +MAINTAINER_MODE_TRUE +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__include +DEPDIR +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +SCM_VERSION +SCM_BRANCH +GITBIN +EGREP +GREP +CPP +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL +am__quote' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_dependency_tracking +enable_silent_rules +enable_maintainer_mode +enable_openmp +enable_shared +enable_static +with_pic +enable_fast_install +with_aix_soname +with_gnu_ld +with_sysroot +enable_libtool_lock +with_docs_only +enable_doxygen_doc +enable_doxygen_dot +enable_doxygen_man +enable_doxygen_rtf +enable_doxygen_xml +enable_doxygen_chm +enable_doxygen_chi +enable_doxygen_html +enable_doxygen_ps +enable_doxygen_pdf +enable_debug +enable_compiler_opt +enable_optimizations +with_avx +with_sse41 +with_sse42 +with_mcpu +with_march +enable_frame_pointer +enable_gtest +with_valgrind +enable_numa +enable_profiling +enable_backtrace_detail +enable_stats +enable_tuning +enable_memtrack +enable_logging +enable_assertions +with_cache_line_size +enable_builtin_memcpy +enable_symbol_override +with_allocator +with_mpi +with_rte +with_java +with_cuda +with_rocm +with_gdrcopy +with_verbs +with_rc +with_ud +with_dc +with_mlx5_dv +with_ib_hw_tm +with_dm +with_devx +with_cm +with_rdmacm +enable_cma +with_knem +with_xpmem +with_ugni +enable_fault_injection +enable_params_check +enable_debug_data +enable_mt +enable_experimental_api +enable_devel_headers +enable_examples +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +CPP +CXX +CXXFLAGS +CCC +CCAS +CCASFLAGS +LT_SYS_LIBRARY_PATH +CXXCPP +DOXYGEN_PAPER_SIZE +MPICC +MPIRUN' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures ucx 1.8 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/ucx] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of ucx 1.8:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + --enable-maintainer-mode + enable make rules and dependencies not useful (and + sometimes confusing) to the casual installer + --disable-openmp do not use OpenMP + --enable-shared[=PKGS] build shared libraries [default=yes] + --enable-static[=PKGS] build static libraries [default=yes] + --enable-fast-install[=PKGS] + optimize for fast installation [default=yes] + --disable-libtool-lock avoid locking (might break parallel builds) + --disable-doxygen-doc don't generate any doxygen documentation + --enable-doxygen-dot generate graphics for doxygen documentation + --disable-doxygen-man don't generate doxygen manual pages + --enable-doxygen-rtf generate doxygen RTF documentation + --enable-doxygen-xml generate doxygen XML documentation + --enable-doxygen-chm generate doxygen compressed HTML help documentation + --enable-doxygen-chi generate doxygen seperate compressed HTML help index + file + --disable-doxygen-html don't generate doxygen plain HTML documentation + --enable-doxygen-ps generate doxygen PostScript documentation + --disable-doxygen-pdf don't generate doxygen PDF documentation + --enable-debug Enable debug mode build + --enable-compiler-opt Set optimization level [0-3] + --enable-optimizations Enable non-portable machine-specific CPU + optimizations, default: NO + --enable-frame-pointer Compile with frame pointer, useful for profiling, + default: NO + --enable-gtest Enable tests using the Google C++ Testing Framework. + (Default is disabled.) + --disable-numa Disable NUMA support + --enable-profiling Enable profiling support, default: NO + --disable-backtrace-detail + Disable detailed backtrace support, default: NO + --enable-stats Enable statistics, useful for profiling, default: NO + --enable-tuning Enable parameter tuning in run-time, default: NO + --enable-memtrack Enable memory tracking, useful for profiling, + default: NO + --enable-logging Enable debug logging, default: YES + + --disable-assertions Disable code assertions, default: NO + + --enable-builtin-memcpy Enable builtin memcpy routine, default: YES + --disable-symbol-override + Disable overriding library symbols, default: NO + --enable-cma Enable Cross Memory Attach + --enable-fault-injection + Enable fault injection code, default: NO + --disable-params-check Disable checking user parameters passed to API, + default: NO + --enable-debug-data Enable collecting data to ease debugging, default: + NO + --enable-mt Enable thread support in UCP and UCT, default: NO + --enable-experimental-api + Enable installing experimental APIs, default: NO + --enable-devel-headers Enable installing development headers, default: NO + --enable-examples Enable examples build, default: NO + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use + both] + --with-aix-soname=aix|svr4|both + shared library versioning (aka "SONAME") variant to + provide on AIX, [default=aix]. + --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --with-sysroot[=DIR] Search for dependent libraries within DIR (or the + compiler's sysroot if not specified). + --with-docs-only Compile only the docs and not the rest of UCX. + [default=NO] + --with-avx Use AVX compiler option. + --with-sse41 Use SSE 4.1 compiler option. + --with-sse42 Use SSE 4.2 compiler option. + --with-mcpu Use CPU Model compiler option. + --with-march Use architecture tuning compiler option. + --with-valgrind Enable Valgrind annotations (small runtime overhead, + default NO) + --with-cache-line-size=SIZE + Build UCX with cache line size defined by user. This + parameter overwrites default cache line sizes + defines in UCX (x86-64: 64, Power: 128, ARMv8: + 64/128). The supported values are: 64, 128 + --with-allocator=NAME Build UCX with predefined memory allocator. The + supported values are: ptmalloc286. Default: + ptmalloc286 + --with-mpi[=MPIHOME] Compile MPI tests (default is NO). + --with-rte(=DIR) Where to find the RTE libraries and header files + --with-java=(PATH) Compile Java UCX (default is guess). + + --with-cuda=(DIR) Enable the use of CUDA (default is guess). + --with-rocm=(DIR) Enable the use of ROCm (default is autodetect). + --with-gdrcopy=(DIR) Enable the use of GDR_COPY (default is guess). + --with-verbs(=DIR) Build OpenFabrics support, adding DIR/include, + DIR/lib, and DIR/lib64 to the search path for + headers and libraries + --with-rc Compile with IB Reliable Connection support + --with-ud Compile with IB Unreliable Datagram support + --with-dc Compile with IB Dynamic Connection support + --with-mlx5-dv Compile with mlx5 Direct Verbs support. Direct Verbs + (DV) support provides additional acceleration + capabilities that are not available in a regular + mode. + --with-ib-hw-tm Compile with IB Tag Matching support + --with-dm Compile with Device Memory support + + --with-cm Compile with IB Connection Manager support + --with-rdmacm=(DIR) Enable the use of RDMACM (default is guess). + --with-knem=(DIR) Enable the use of KNEM (default is guess). + --with-xpmem=(DIR) Enable the use of XPMEM (default is guess). + --with-ugni(=DIR) Build Cray UGNI support + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CPP C preprocessor + CXX C++ compiler command + CXXFLAGS C++ compiler flags + CCAS assembler compiler command (defaults to CC) + CCASFLAGS assembler compiler flags (defaults to CFLAGS) + LT_SYS_LIBRARY_PATH + User-defined run-time library search path. + CXXCPP C++ preprocessor + DOXYGEN_PAPER_SIZE + a4wide (default), a4, letter, legal or executive + MPICC MPI C compiler command + MPIRUN MPI launch command + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to the package provider. +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +ucx configure 1.8 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_cxx_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_cxx_try_cpp LINENO +# ------------------------ +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_cpp + +# ac_fn_cxx_try_link LINENO +# ------------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_link + +# ac_fn_c_check_decl LINENO SYMBOL VAR INCLUDES +# --------------------------------------------- +# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR +# accordingly. +ac_fn_c_check_decl () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + as_decl_name=`echo $2|sed 's/ *(.*//'` + as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'` + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5 +$as_echo_n "checking whether $as_decl_name is declared... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +#ifndef $as_decl_name +#ifdef __cplusplus + (void) $as_decl_use; +#else + (void) $as_decl_name; +#endif +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_decl + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type + +# ac_fn_c_check_member LINENO AGGR MEMBER VAR INCLUDES +# ---------------------------------------------------- +# Tries to find if the field MEMBER exists in type AGGR, after including +# INCLUDES, setting cache variable VAR accordingly. +ac_fn_c_check_member () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5 +$as_echo_n "checking for $2.$3... " >&6; } +if eval \${$4+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main () +{ +static $2 ac_aggr; +if (ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$4=yes" +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$5 +int +main () +{ +static $2 ac_aggr; +if (sizeof ac_aggr.$3) +return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$4=yes" +else + eval "$4=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$4 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_member +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by ucx $as_me 1.8, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +: ${CPPFLAGS=""} +: ${CFLAGS=""} +: ${CXXFLAGS=""} +config_flags="$*" +valgrind_libpath="" + +ac_aux_dir= +for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do + if test -f "$ac_dir/install-sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f "$ac_dir/install.sh"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f "$ac_dir/shtool"; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 +fi + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. +ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. +ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. + + +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +$as_echo_n "checking whether the C compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +$as_echo_n "checking for C compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } +if ${am_cv_prog_cc_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +$as_echo "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + + ac_fn_c_check_header_mongrel "$LINENO" "minix/config.h" "ac_cv_header_minix_config_h" "$ac_includes_default" +if test "x$ac_cv_header_minix_config_h" = xyes; then : + MINIX=yes +else + MINIX= +fi + + + if test "$MINIX" = yes; then + +$as_echo "#define _POSIX_SOURCE 1" >>confdefs.h + + +$as_echo "#define _POSIX_1_SOURCE 2" >>confdefs.h + + +$as_echo "#define _MINIX 1" >>confdefs.h + + fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether it is safe to define __EXTENSIONS__" >&5 +$as_echo_n "checking whether it is safe to define __EXTENSIONS__... " >&6; } +if ${ac_cv_safe_to_define___extensions__+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +# define __EXTENSIONS__ 1 + $ac_includes_default +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_safe_to_define___extensions__=yes +else + ac_cv_safe_to_define___extensions__=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_safe_to_define___extensions__" >&5 +$as_echo "$ac_cv_safe_to_define___extensions__" >&6; } + test $ac_cv_safe_to_define___extensions__ = yes && + $as_echo "#define __EXTENSIONS__ 1" >>confdefs.h + + $as_echo "#define _ALL_SOURCE 1" >>confdefs.h + + $as_echo "#define _GNU_SOURCE 1" >>confdefs.h + + $as_echo "#define _POSIX_PTHREAD_SEMANTICS 1" >>confdefs.h + + $as_echo "#define _TANDEM_SOURCE 1" >>confdefs.h + + + +ac_config_headers="$ac_config_headers config.h" + + +# Extract the first word of "git", so it can be a program name with args. +set dummy git; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_GITBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$GITBIN"; then + ac_cv_prog_GITBIN="$GITBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_GITBIN="yes" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +GITBIN=$ac_cv_prog_GITBIN +if test -n "$GITBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GITBIN" >&5 +$as_echo "$GITBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +if test x"${GITBIN}" = x"yes"; then : + + SCM_VERSION=c30b7da + + +else + SCM_BRANCH="" + + SCM_VERSION="0000000" + +fi + + + + + +am__api_version='1.16' + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +$as_echo_n "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if ${ac_cv_path_install+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + # Account for people who put trailing slashes in PATH elements. +case $as_dir/ in #(( + ./ | .// | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +$as_echo "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +$as_echo_n "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` + +if test x"${MISSING+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; + *) + MISSING="\${SHELL} $am_aux_dir/missing" ;; + esac +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 +$as_echo_n "checking for a thread-safe mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if ${ac_cv_path_mkdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue + case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir (GNU coreutils) '* | \ + 'mkdir (coreutils) '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test "${ac_cv_path_mkdir+set}" = set; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +$as_echo "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AWK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +$as_echo "$AWK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + SET_MAKE= +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 +$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; } +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 + (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + case $?:`cat confinc.out 2>/dev/null` in #( + '0:this is the am__doit target') : + case $s in #( + BSD) : + am__include='.include' am__quote='"' ;; #( + *) : + am__include='include' am__quote='' ;; +esac ;; #( + *) : + ;; +esac + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 +$as_echo "${_am_result}" >&6; } + +# Check whether --enable-dependency-tracking was given. +if test "${enable_dependency_tracking+set}" = set; then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='ucx' + VERSION='1.8' + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE "$PACKAGE" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define VERSION "$VERSION" +_ACEOF + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar plaintar pax cpio none' + +# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether UID '$am_uid' is supported by ustar format" >&5 +$as_echo_n "checking whether UID '$am_uid' is supported by ustar format... " >&6; } + if test $am_uid -le $am_max_uid; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + _am_tools=none + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether GID '$am_gid' is supported by ustar format" >&5 +$as_echo_n "checking whether GID '$am_gid' is supported by ustar format... " >&6; } + if test $am_gid -le $am_max_gid; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + _am_tools=none + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to create a ustar tar archive" >&5 +$as_echo_n "checking how to create a ustar tar archive... " >&6; } + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_ustar-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + { echo "$as_me:$LINENO: $_am_tar --version" >&5 + ($_am_tar --version) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && break + done + am__tar="$_am_tar --format=ustar -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=ustar -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x ustar -w "$$tardir"' + am__tar_='pax -L -x ustar -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H ustar -L' + am__tar_='find "$tardir" -print | cpio -o -H ustar -L' + am__untar='cpio -i -H ustar -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_ustar}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + { echo "$as_me:$LINENO: tardir=conftest.dir && eval $am__tar_ >conftest.tar" >&5 + (tardir=conftest.dir && eval $am__tar_ >conftest.tar) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + rm -rf conftest.dir + if test -s conftest.tar; then + { echo "$as_me:$LINENO: $am__untar &5 + ($am__untar &5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + { echo "$as_me:$LINENO: cat conftest.dir/file" >&5 + (cat conftest.dir/file) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + grep GrepMe conftest.dir/file >/dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + if ${am_cv_prog_tar_ustar+:} false; then : + $as_echo_n "(cached) " >&6 +else + am_cv_prog_tar_ustar=$_am_tool +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_tar_ustar" >&5 +$as_echo "$am_cv_prog_tar_ustar" >&6; } + + + + + +depcc="$CC" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CC_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi + +# Check whether --enable-silent-rules was given. +if test "${enable_silent_rules+set}" = set; then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=0;; +esac +am_make=${MAKE-make} +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +$as_echo_n "checking whether $am_make supports nested variables... " >&6; } +if ${am_cv_make_support_nested_variables+:} false; then : + $as_echo_n "(cached) " >&6 +else + if $as_echo 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +$as_echo "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 +$as_echo_n "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } + # Check whether --enable-maintainer-mode was given. +if test "${enable_maintainer_mode+set}" = set; then : + enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval +else + USE_MAINTAINER_MODE=no +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 +$as_echo "$USE_MAINTAINER_MODE" >&6; } + if test $USE_MAINTAINER_MODE = yes; then + MAINTAINER_MODE_TRUE= + MAINTAINER_MODE_FALSE='#' +else + MAINTAINER_MODE_TRUE='#' + MAINTAINER_MODE_FALSE= +fi + + MAINT=$MAINTAINER_MODE_TRUE + + + + + + + +top_top_srcdir=$srcdir + + +MAJOR_VERSION=1 +MINOR_VERSION=8 +PATCH_VERSION=0 +VERSION=$MAJOR_VERSION.$MINOR_VERSION.$PATCH_VERSION +SOVERSION=0:0:0 + + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +$as_echo_n "checking whether $CC understands -c and -o together... " >&6; } +if ${am_cv_prog_cc_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +$as_echo "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -z "$CXX"; then + if test -n "$CCC"; then + CXX=$CCC + else + if test -n "$ac_tool_prefix"; then + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +$as_echo "$CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CXX" && break + done +fi +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 +$as_echo "$ac_ct_CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CXX" && break +done + + if test "x$ac_ct_CXX" = x; then + CXX="g++" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CXX=$ac_ct_CXX + fi +fi + + fi +fi +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5 +$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; } +if ${ac_cv_cxx_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 +$as_echo "$ac_cv_cxx_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GXX=yes +else + GXX= +fi +ac_test_CXXFLAGS=${CXXFLAGS+set} +ac_save_CXXFLAGS=$CXXFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 +$as_echo_n "checking whether $CXX accepts -g... " >&6; } +if ${ac_cv_prog_cxx_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_cxx_werror_flag=$ac_cxx_werror_flag + ac_cxx_werror_flag=yes + ac_cv_prog_cxx_g=no + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +else + CXXFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + +else + ac_cxx_werror_flag=$ac_save_cxx_werror_flag + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cxx_werror_flag=$ac_save_cxx_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 +$as_echo "$ac_cv_prog_cxx_g" >&6; } +if test "$ac_test_CXXFLAGS" = set; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" + else + CXXFLAGS="-g" + fi +else + if test "$GXX" = yes; then + CXXFLAGS="-O2" + else + CXXFLAGS= + fi +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +depcc="$CXX" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CXX_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CXX_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CXX_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CXX_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CXX_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CXX_dependencies_compiler_type" >&6; } +CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then + am__fastdepCXX_TRUE= + am__fastdepCXX_FALSE='#' +else + am__fastdepCXX_TRUE='#' + am__fastdepCXX_FALSE= +fi + + + + OPENMP_CFLAGS= + # Check whether --enable-openmp was given. +if test "${enable_openmp+set}" = set; then : + enableval=$enable_openmp; +fi + + if test "$enable_openmp" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5 +$as_echo_n "checking for $CC option to support OpenMP... " >&6; } +if ${ac_cv_prog_c_openmp+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENMP + choke me +#endif +#include +int main () { return omp_get_num_threads (); } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_prog_c_openmp='none needed' +else + ac_cv_prog_c_openmp='unsupported' + for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ + -Popenmp --openmp; do + ac_save_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $ac_option" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENMP + choke me +#endif +#include +int main () { return omp_get_num_threads (); } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_prog_c_openmp=$ac_option +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS=$ac_save_CFLAGS + if test "$ac_cv_prog_c_openmp" != unsupported; then + break + fi + done +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 +$as_echo "$ac_cv_prog_c_openmp" >&6; } + case $ac_cv_prog_c_openmp in #( + "none needed" | unsupported) + ;; #( + *) + OPENMP_CFLAGS=$ac_cv_prog_c_openmp ;; + esac + fi + + +# By default we simply use the C compiler to build assembly code. + +test "${CCAS+set}" = set || CCAS=$CC +test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS + + + +depcc="$CCAS" am_compiler_list= + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +$as_echo_n "checking dependency style of $depcc... " >&6; } +if ${am_cv_CCAS_dependencies_compiler_type+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CCAS_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CCAS_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CCAS_dependencies_compiler_type=none +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CCAS_dependencies_compiler_type" >&5 +$as_echo "$am_cv_CCAS_dependencies_compiler_type" >&6; } +CCASDEPMODE=depmode=$am_cv_CCAS_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CCAS_dependencies_compiler_type" = gcc3; then + am__fastdepCCAS_TRUE= + am__fastdepCCAS_FALSE='#' +else + am__fastdepCCAS_TRUE='#' + am__fastdepCCAS_FALSE= +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +$as_echo_n "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +$as_echo "no, using $LN_S" >&6; } +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +$as_echo_n "checking for a sed that does not truncate output... " >&6; } +if ${ac_cv_path_SED+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for ac_i in 1 2 3 4 5 6 7; do + ac_script="$ac_script$as_nl$ac_script" + done + echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed + { ac_script=; unset ac_script;} + if test -z "$SED"; then + ac_path_SED_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_SED="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_SED" || continue +# Check for GNU ac_path_SED and select it if it is found. + # Check for GNU $ac_path_SED +case `"$ac_path_SED" --version 2>&1` in +*GNU*) + ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo '' >> "conftest.nl" + "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_SED_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_SED="$ac_path_SED" + ac_path_SED_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_SED_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_SED"; then + as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 + fi +else + ac_cv_path_SED=$SED +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +$as_echo "$ac_cv_path_SED" >&6; } + SED="$ac_cv_path_SED" + rm -f conftest.sed + + +case `pwd` in + *\ * | *\ *) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 +$as_echo "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; +esac + + + +macro_version='2.4.6' +macro_revision='2.4.6' + + + + + + + + + + + + + +ltmain=$ac_aux_dir/ltmain.sh + +# Make sure we can run config.sub. +$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +$as_echo_n "checking build system type... " >&6; } +if ${ac_cv_build+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +$as_echo "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +$as_echo_n "checking host system type... " >&6; } +if ${ac_cv_host+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || + as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +$as_echo "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + +# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\(["`$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' + +ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 +$as_echo_n "checking how to print strings... " >&6; } +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "" +} + +case $ECHO in + printf*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: printf" >&5 +$as_echo "printf" >&6; } ;; + print*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 +$as_echo "print -r" >&6; } ;; + *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: cat" >&5 +$as_echo "cat" >&6; } ;; +esac + + + + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +$as_echo_n "checking for a sed that does not truncate output... " >&6; } +if ${ac_cv_path_SED+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for ac_i in 1 2 3 4 5 6 7; do + ac_script="$ac_script$as_nl$ac_script" + done + echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed + { ac_script=; unset ac_script;} + if test -z "$SED"; then + ac_path_SED_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_SED="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_SED" || continue +# Check for GNU ac_path_SED and select it if it is found. + # Check for GNU $ac_path_SED +case `"$ac_path_SED" --version 2>&1` in +*GNU*) + ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo '' >> "conftest.nl" + "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_SED_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_SED="$ac_path_SED" + ac_path_SED_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_SED_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_SED"; then + as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 + fi +else + ac_cv_path_SED=$SED +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +$as_echo "$ac_cv_path_SED" >&6; } + SED="$ac_cv_path_SED" + rm -f conftest.sed + +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 +$as_echo_n "checking for fgrep... " >&6; } +if ${ac_cv_path_FGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 + then ac_cv_path_FGREP="$GREP -F" + else + if test -z "$FGREP"; then + ac_path_FGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in fgrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_FGREP" || continue +# Check for GNU ac_path_FGREP and select it if it is found. + # Check for GNU $ac_path_FGREP +case `"$ac_path_FGREP" --version 2>&1` in +*GNU*) + ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'FGREP' >> "conftest.nl" + "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_FGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_FGREP="$ac_path_FGREP" + ac_path_FGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_FGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_FGREP"; then + as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_FGREP=$FGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 +$as_echo "$ac_cv_path_FGREP" >&6; } + FGREP="$ac_cv_path_FGREP" + + +test -z "$GREP" && GREP=grep + + + + + + + + + + + + + + + + + + + +# Check whether --with-gnu-ld was given. +if test "${with_gnu_ld+set}" = set; then : + withval=$with_gnu_ld; test no = "$withval" || with_gnu_ld=yes +else + with_gnu_ld=no +fi + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +$as_echo_n "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +$as_echo_n "checking for GNU ld... " >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +$as_echo_n "checking for non-GNU ld... " >&6; } +fi +if ${lt_cv_path_LD+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &5 +$as_echo "$LD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; } +if ${lt_cv_prog_gnu_ld+:} false; then : + $as_echo_n "(cached) " >&6 +else + # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 &5 +$as_echo "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 +$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; } +if ${lt_cv_path_NM+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM=$NM +else + lt_nm_to_check=${ac_tool_prefix}nm + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/$lt_tmp_nm + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the 'sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty + case $build_os in + mingw*) lt_bad_file=conftest.nm/nofile ;; + *) lt_bad_file=/dev/null ;; + esac + case `"$tmp_nm" -B $lt_bad_file 2>&1 | sed '1q'` in + *$lt_bad_file* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break 2 + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break 2 + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS=$lt_save_ifs + done + : ${lt_cv_path_NM=no} +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 +$as_echo "$lt_cv_path_NM" >&6; } +if test no != "$lt_cv_path_NM"; then + NM=$lt_cv_path_NM +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + if test -n "$ac_tool_prefix"; then + for ac_prog in dumpbin "link -dump" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DUMPBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DUMPBIN"; then + ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DUMPBIN=$ac_cv_prog_DUMPBIN +if test -n "$DUMPBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 +$as_echo "$DUMPBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$DUMPBIN" && break + done +fi +if test -z "$DUMPBIN"; then + ac_ct_DUMPBIN=$DUMPBIN + for ac_prog in dumpbin "link -dump" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DUMPBIN"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN +if test -n "$ac_ct_DUMPBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 +$as_echo "$ac_ct_DUMPBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_DUMPBIN" && break +done + + if test "x$ac_ct_DUMPBIN" = x; then + DUMPBIN=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DUMPBIN=$ac_ct_DUMPBIN + fi +fi + + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | sed '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols -headers" + ;; + *) + DUMPBIN=: + ;; + esac + fi + + if test : != "$DUMPBIN"; then + NM=$DUMPBIN + fi +fi +test -z "$NM" && NM=nm + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 +$as_echo_n "checking the name lister ($NM) interface... " >&6; } +if ${lt_cv_nm_interface+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: output\"" >&5) + cat conftest.out >&5 + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 +$as_echo "$lt_cv_nm_interface" >&6; } + +# find the maximum length of command line arguments +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 +$as_echo_n "checking the maximum length of command line arguments... " >&6; } +if ${lt_cv_sys_max_cmd_len+:} false; then : + $as_echo_n "(cached) " >&6 +else + i=0 + teststring=ABCD + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + bitrig* | darwin* | dragonfly* | freebsd* | netbsd* | openbsd*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[ ]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test X`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test 17 != "$i" # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac + +fi + +if test -n "$lt_cv_sys_max_cmd_len"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 +$as_echo "$lt_cv_sys_max_cmd_len" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5 +$as_echo "none" >&6; } +fi +max_cmd_len=$lt_cv_sys_max_cmd_len + + + + + + +: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi + + + + + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 +$as_echo_n "checking how to convert $build file names to $host format... " >&6; } +if ${lt_cv_to_host_file_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac + +fi + +to_host_file_cmd=$lt_cv_to_host_file_cmd +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 +$as_echo "$lt_cv_to_host_file_cmd" >&6; } + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 +$as_echo_n "checking how to convert $build file names to toolchain format... " >&6; } +if ${lt_cv_to_tool_file_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + #assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac + +fi + +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 +$as_echo "$lt_cv_to_tool_file_cmd" >&6; } + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 +$as_echo_n "checking for $LD option to reload object files... " >&6; } +if ${lt_cv_ld_reload_flag+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_reload_flag='-r' +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 +$as_echo "$lt_cv_ld_reload_flag" >&6; } +reload_flag=$lt_cv_ld_reload_flag +case $reload_flag in +"" | " "*) ;; +*) reload_flag=" $reload_flag" ;; +esac +reload_cmds='$LD$reload_flag -o $output$reload_objs' +case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + if test yes != "$GCC"; then + reload_cmds=false + fi + ;; + darwin*) + if test yes = "$GCC"; then + reload_cmds='$LTCC $LTCFLAGS -nostdlib $wl-r -o $output$reload_objs' + else + reload_cmds='$LD$reload_flag -o $output$reload_objs' + fi + ;; +esac + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +$as_echo "$OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +$as_echo "$ac_ct_OBJDUMP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 +$as_echo_n "checking how to recognize dependent libraries... " >&6; } +if ${lt_cv_deplibs_check_method+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# 'unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# that responds to the $file_magic_cmd with a given extended regex. +# If you have 'file' or equivalent on your system and you're not sure +# whether 'pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[4-9]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[45]*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='/usr/bin/file -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + if ( file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=/usr/bin/file + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[3-9]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=/usr/bin/file + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd* | bitrig*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +os2*) + lt_cv_deplibs_check_method=pass_all + ;; +esac + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 +$as_echo "$lt_cv_deplibs_check_method" >&6; } + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + + + + + + + + + + + + + + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +$as_echo "$DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +$as_echo "$ac_ct_DLLTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 +$as_echo_n "checking how to associate runtime and link libraries... " >&6; } +if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh; + # decide which one to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd=$ECHO + ;; +esac + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 +$as_echo "$lt_cv_sharedlib_from_linklib_cmd" >&6; } +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + + + + + + + +if test -n "$ac_tool_prefix"; then + for ac_prog in ar + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +$as_echo "$AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_AR+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +$as_echo "$ac_ct_AR" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} +: ${AR_FLAGS=cru} + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 +$as_echo_n "checking for archiver @FILE support... " >&6; } +if ${lt_cv_ar_at_file+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ar_at_file=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test 0 -eq "$ac_status"; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test 0 -ne "$ac_status"; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 +$as_echo "$lt_cv_ar_at_file" >&6; } + +if test no = "$lt_cv_ar_at_file"; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +$as_echo "$STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_STRIP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +$as_echo "$ac_ct_STRIP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +test -z "$STRIP" && STRIP=: + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +$as_echo "$RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +$as_echo "$ac_ct_RANLIB" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +test -z "$RANLIB" && RANLIB=: + + + + + + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + bitrig* | openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + +# Check for command to grab the raw symbol name followed by C symbol from nm. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 +$as_echo_n "checking command to parse $NM output from $compiler object... " >&6; } +if ${lt_cv_sys_global_symbol_pipe+:} false; then : + $as_echo_n "(cached) " >&6 +else + +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[BCDEGRST]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([_A-Za-z][_A-Za-z0-9]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[BCDT]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[ABCDGISTW]' + ;; +hpux*) + if test ia64 = "$host_cpu"; then + symcode='[ABCDEGRST]' + fi + ;; +irix* | nonstopux*) + symcode='[BCDEGRST]' + ;; +osf*) + symcode='[BCDEGQRST]' + ;; +solaris*) + symcode='[BDRT]' + ;; +sco3.2v5*) + symcode='[DT]' + ;; +sysv4.2uw2*) + symcode='[DT]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[ABDT]' + ;; +sysv4) + symcode='[DFNSTU]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[ABCDGIRSTW]' ;; +esac + +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Gets list of data symbols to import. + lt_cv_sys_global_symbol_to_import="sed -n -e 's/^I .* \(.*\)$/\1/p'" + # Adjust the below global symbol transforms to fixup imported variables. + lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" + lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" + lt_c_name_lib_hook="\ + -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ + -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" +else + # Disable hooks by default. + lt_cv_sys_global_symbol_to_import= + lt_cdecl_hook= + lt_c_name_hook= + lt_c_name_lib_hook= +fi + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="sed -n"\ +$lt_cdecl_hook\ +" -e 's/^T .* \(.*\)$/extern int \1();/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="sed -n"\ +$lt_c_name_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" + +# Transform an extracted symbol line into symbol name with lib prefix and +# symbol address. +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n"\ +$lt_c_name_lib_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function, + # D for any global variable and I for any imported variable. + # Also find C++ and __fastcall symbols from MSVC++, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK '"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ +" /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ +" /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ +" {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ +" s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx" + else + lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Now try to grab the symbols. + nlist=conftest.nm + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5 + (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS=conftstm.$ac_objext + CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&5 + fi + else + echo "cannot find nm_test_var in $nlist" >&5 + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 + fi + else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test yes = "$pipe_works"; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done + +fi + +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5 +$as_echo "failed" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5 +$as_echo "ok" >&6; } +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 +$as_echo_n "checking for sysroot... " >&6; } + +# Check whether --with-sysroot was given. +if test "${with_sysroot+set}" = set; then : + withval=$with_sysroot; +else + with_sysroot=no +fi + + +lt_sysroot= +case $with_sysroot in #( + yes) + if test yes = "$GCC"; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_sysroot" >&5 +$as_echo "$with_sysroot" >&6; } + as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 + ;; +esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 +$as_echo "${lt_sysroot:-no}" >&6; } + + + + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a working dd" >&5 +$as_echo_n "checking for a working dd... " >&6; } +if ${ac_cv_path_lt_DD+:} false; then : + $as_echo_n "(cached) " >&6 +else + printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +: ${lt_DD:=$DD} +if test -z "$lt_DD"; then + ac_path_lt_DD_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in dd; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_lt_DD="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_lt_DD" || continue +if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: +fi + $ac_path_lt_DD_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_lt_DD"; then + : + fi +else + ac_cv_path_lt_DD=$lt_DD +fi + +rm -f conftest.i conftest2.i conftest.out +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_lt_DD" >&5 +$as_echo "$ac_cv_path_lt_DD" >&6; } + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to truncate binary pipes" >&5 +$as_echo_n "checking how to truncate binary pipes... " >&6; } +if ${lt_cv_truncate_bin+:} false; then : + $as_echo_n "(cached) " >&6 +else + printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +lt_cv_truncate_bin= +if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" +fi +rm -f conftest.i conftest2.i conftest.out +test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_truncate_bin" >&5 +$as_echo "$lt_cv_truncate_bin" >&6; } + + + + + + + +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in $*""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} + +# Check whether --enable-libtool-lock was given. +if test "${enable_libtool_lock+set}" = set; then : + enableval=$enable_libtool_lock; +fi + +test no = "$enable_libtool_lock" || enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out what ABI is being produced by ac_compile, and set mode + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE=32 + ;; + *ELF-64*) + HPUX_IA64_MODE=64 + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + if test yes = "$lt_cv_prog_gnu_ld"; then + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +mips64*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + emul=elf + case `/usr/bin/file conftest.$ac_objext` in + *32-bit*) + emul="${emul}32" + ;; + *64-bit*) + emul="${emul}64" + ;; + esac + case `/usr/bin/file conftest.$ac_objext` in + *MSB*) + emul="${emul}btsmip" + ;; + *LSB*) + emul="${emul}ltsmip" + ;; + esac + case `/usr/bin/file conftest.$ac_objext` in + *N32*) + emul="${emul}n32" + ;; + esac + LD="${LD-ld} -m $emul" + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. Note that the listed cases only cover the + # situations where additional linker options are needed (such as when + # doing 32-bit compilation for a host where ld defaults to 64-bit, or + # vice versa); the common cases where no linker options are needed do + # not appear in the list. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + case `/usr/bin/file conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*linux*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*linux*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -belf" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 +$as_echo_n "checking whether the C compiler needs -belf... " >&6; } +if ${lt_cv_cc_needs_belf+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_cc_needs_belf=yes +else + lt_cv_cc_needs_belf=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 +$as_echo "$lt_cv_cc_needs_belf" >&6; } + if test yes != "$lt_cv_cc_needs_belf"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS=$SAVE_CFLAGS + fi + ;; +*-*solaris*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `/usr/bin/file conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*|x86_64-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD=${LD-ld}_sol2 + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks=$enable_libtool_lock + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. +set dummy ${ac_tool_prefix}mt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MANIFEST_TOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MANIFEST_TOOL"; then + ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL +if test -n "$MANIFEST_TOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 +$as_echo "$MANIFEST_TOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_MANIFEST_TOOL"; then + ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL + # Extract the first word of "mt", so it can be a program name with args. +set dummy mt; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_MANIFEST_TOOL"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL +if test -n "$ac_ct_MANIFEST_TOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 +$as_echo "$ac_ct_MANIFEST_TOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_MANIFEST_TOOL" = x; then + MANIFEST_TOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL + fi +else + MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" +fi + +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 +$as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } +if ${lt_cv_path_mainfest_tool+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&5 + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 +$as_echo "$lt_cv_path_mainfest_tool" >&6; } +if test yes != "$lt_cv_path_mainfest_tool"; then + MANIFEST_TOOL=: +fi + + + + + + + case $host_os in + rhapsody* | darwin*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. +set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_DSYMUTIL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$DSYMUTIL"; then + ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DSYMUTIL=$ac_cv_prog_DSYMUTIL +if test -n "$DSYMUTIL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 +$as_echo "$DSYMUTIL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DSYMUTIL"; then + ac_ct_DSYMUTIL=$DSYMUTIL + # Extract the first word of "dsymutil", so it can be a program name with args. +set dummy dsymutil; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_DSYMUTIL"; then + ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL +if test -n "$ac_ct_DSYMUTIL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 +$as_echo "$ac_ct_DSYMUTIL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_DSYMUTIL" = x; then + DSYMUTIL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DSYMUTIL=$ac_ct_DSYMUTIL + fi +else + DSYMUTIL="$ac_cv_prog_DSYMUTIL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. +set dummy ${ac_tool_prefix}nmedit; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_NMEDIT+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$NMEDIT"; then + ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +NMEDIT=$ac_cv_prog_NMEDIT +if test -n "$NMEDIT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 +$as_echo "$NMEDIT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_NMEDIT"; then + ac_ct_NMEDIT=$NMEDIT + # Extract the first word of "nmedit", so it can be a program name with args. +set dummy nmedit; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_NMEDIT"; then + ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_NMEDIT="nmedit" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT +if test -n "$ac_ct_NMEDIT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 +$as_echo "$ac_ct_NMEDIT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_NMEDIT" = x; then + NMEDIT=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + NMEDIT=$ac_ct_NMEDIT + fi +else + NMEDIT="$ac_cv_prog_NMEDIT" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. +set dummy ${ac_tool_prefix}lipo; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_LIPO+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$LIPO"; then + ac_cv_prog_LIPO="$LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_LIPO="${ac_tool_prefix}lipo" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +LIPO=$ac_cv_prog_LIPO +if test -n "$LIPO"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 +$as_echo "$LIPO" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_LIPO"; then + ac_ct_LIPO=$LIPO + # Extract the first word of "lipo", so it can be a program name with args. +set dummy lipo; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_LIPO+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_LIPO"; then + ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_LIPO="lipo" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO +if test -n "$ac_ct_LIPO"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 +$as_echo "$ac_ct_LIPO" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_LIPO" = x; then + LIPO=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + LIPO=$ac_ct_LIPO + fi +else + LIPO="$ac_cv_prog_LIPO" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OTOOL"; then + ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL="${ac_tool_prefix}otool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL=$ac_cv_prog_OTOOL +if test -n "$OTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 +$as_echo "$OTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL"; then + ac_ct_OTOOL=$OTOOL + # Extract the first word of "otool", so it can be a program name with args. +set dummy otool; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OTOOL+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OTOOL"; then + ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL="otool" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL +if test -n "$ac_ct_OTOOL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 +$as_echo "$ac_ct_OTOOL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OTOOL" = x; then + OTOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL=$ac_ct_OTOOL + fi +else + OTOOL="$ac_cv_prog_OTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool64; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_OTOOL64+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$OTOOL64"; then + ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL64=$ac_cv_prog_OTOOL64 +if test -n "$OTOOL64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 +$as_echo "$OTOOL64" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL64"; then + ac_ct_OTOOL64=$OTOOL64 + # Extract the first word of "otool64", so it can be a program name with args. +set dummy otool64; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_OTOOL64"; then + ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL64="otool64" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 +if test -n "$ac_ct_OTOOL64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 +$as_echo "$ac_ct_OTOOL64" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_OTOOL64" = x; then + OTOOL64=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL64=$ac_ct_OTOOL64 + fi +else + OTOOL64="$ac_cv_prog_OTOOL64" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 +$as_echo_n "checking for -single_module linker flag... " >&6; } +if ${lt_cv_apple_cc_single_mod+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_apple_cc_single_mod=no + if test -z "$LT_MULTI_MODULE"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&5 + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test 0 = "$_lt_result"; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&5 + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 +$as_echo "$lt_cv_apple_cc_single_mod" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 +$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; } +if ${lt_cv_ld_exported_symbols_list+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_ld_exported_symbols_list=yes +else + lt_cv_ld_exported_symbols_list=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 +$as_echo "$lt_cv_ld_exported_symbols_list" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 +$as_echo_n "checking for -force_load linker flag... " >&6; } +if ${lt_cv_ld_force_load+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 + echo "$AR cru libconftest.a conftest.o" >&5 + $AR cru libconftest.a conftest.o 2>&5 + echo "$RANLIB libconftest.a" >&5 + $RANLIB libconftest.a 2>&5 + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&5 + elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&5 + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 +$as_echo "$lt_cv_ld_force_load" >&6; } + case $host_os in + rhapsody* | darwin1.[012]) + _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + darwin*) # darwin 5.x on + # if running on 10.5 or later, the deployment target defaults + # to the OS version, if on x86, and 10.4, the deployment + # target defaults to 10.4. Don't you love it? + case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in + 10.0,*86*-darwin8*|10.0,*-darwin[91]*) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + 10.[012][,.]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + 10.*) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test yes = "$lt_cv_apple_cc_single_mod"; then + _lt_dar_single_mod='$single_module' + fi + if test yes = "$lt_cv_ld_exported_symbols_list"; then + _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' + fi + if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac + +# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x$2 in + x) + ;; + *:) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" + ;; + x:*) + eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" + ;; + *) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" + ;; + esac +} + +for ac_header in dlfcn.h +do : + ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default +" +if test "x$ac_cv_header_dlfcn_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_DLFCN_H 1 +_ACEOF + +fi + +done + + + + +func_stripname_cnf () +{ + case $2 in + .*) func_stripname_result=`$ECHO "$3" | $SED "s%^$1%%; s%\\\\$2\$%%"`;; + *) func_stripname_result=`$ECHO "$3" | $SED "s%^$1%%; s%$2\$%%"`;; + esac +} # func_stripname_cnf + + + + + +# Set options + + + + enable_dlopen=no + + + enable_win32_dll=no + + + # Check whether --enable-shared was given. +if test "${enable_shared+set}" = set; then : + enableval=$enable_shared; p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else + enable_shared=yes +fi + + + + + + + + + + # Check whether --enable-static was given. +if test "${enable_static+set}" = set; then : + enableval=$enable_static; p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else + enable_static=yes +fi + + + + + + + + + + +# Check whether --with-pic was given. +if test "${with_pic+set}" = set; then : + withval=$with_pic; lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for lt_pkg in $withval; do + IFS=$lt_save_ifs + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else + pic_mode=default +fi + + + + + + + + + # Check whether --enable-fast-install was given. +if test "${enable_fast_install+set}" = set; then : + enableval=$enable_fast_install; p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else + enable_fast_install=yes +fi + + + + + + + + + shared_archive_member_spec= +case $host,$enable_shared in +power*-*-aix[5-9]*,yes) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking which variant of shared library versioning to provide" >&5 +$as_echo_n "checking which variant of shared library versioning to provide... " >&6; } + +# Check whether --with-aix-soname was given. +if test "${with_aix_soname+set}" = set; then : + withval=$with_aix_soname; case $withval in + aix|svr4|both) + ;; + *) + as_fn_error $? "Unknown argument to --with-aix-soname" "$LINENO" 5 + ;; + esac + lt_cv_with_aix_soname=$with_aix_soname +else + if ${lt_cv_with_aix_soname+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_with_aix_soname=aix +fi + + with_aix_soname=$lt_cv_with_aix_soname +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_aix_soname" >&5 +$as_echo "$with_aix_soname" >&6; } + if test aix != "$with_aix_soname"; then + # For the AIX way of multilib, we name the shared archive member + # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', + # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. + # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, + # the AIX toolchain works better with OBJECT_MODE set (default 32). + if test 64 = "${OBJECT_MODE-32}"; then + shared_archive_member_spec=shr_64 + else + shared_archive_member_spec=shr + fi + fi + ;; +*) + with_aix_soname=aix + ;; +esac + + + + + + + + + + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS=$ltmain + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +test -z "$LN_S" && LN_S="ln -s" + + + + + + + + + + + + + + +if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 +$as_echo_n "checking for objdir... " >&6; } +if ${lt_cv_objdir+:} false; then : + $as_echo_n "(cached) " >&6 +else + rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 +$as_echo "$lt_cv_objdir" >&6; } +objdir=$lt_cv_objdir + + + + + +cat >>confdefs.h <<_ACEOF +#define LT_OBJDIR "$lt_cv_objdir/" +_ACEOF + + + + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a '.a' archive for static linking (except MSVC, +# which needs '.lib'). +libext=a + +with_gnu_ld=$lt_cv_prog_gnu_ld + +old_CC=$CC +old_CFLAGS=$CFLAGS + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +func_cc_basename $compiler +cc_basename=$func_cc_basename_result + + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 +$as_echo_n "checking for ${ac_tool_prefix}file... " >&6; } +if ${lt_cv_path_MAGIC_CMD+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/${ac_tool_prefix}file"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"${ac_tool_prefix}file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac +fi + +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +$as_echo "$MAGIC_CMD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + + + +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5 +$as_echo_n "checking for file... " >&6; } +if ${lt_cv_path_MAGIC_CMD+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/file"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac +fi + +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +$as_echo "$MAGIC_CMD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + else + MAGIC_CMD=: + fi +fi + + fi + ;; +esac + +# Use C for the default configuration in the libtool script + +lt_save_CC=$CC +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +objext=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + +lt_prog_compiler_no_builtin_flag= + +if test yes = "$GCC"; then + case $cc_basename in + nvcc*) + lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; + *) + lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; + esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 +$as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } +if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_rtti_exceptions=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="-fno-rtti -fno-exceptions" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_rtti_exceptions=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 +$as_echo "$lt_cv_prog_compiler_rtti_exceptions" >&6; } + +if test yes = "$lt_cv_prog_compiler_rtti_exceptions"; then + lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" +else + : +fi + +fi + + + + + + + lt_prog_compiler_wl= +lt_prog_compiler_pic= +lt_prog_compiler_static= + + + if test yes = "$GCC"; then + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_static='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + fi + lt_prog_compiler_pic='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl='-Xlinker ' + if test -n "$lt_prog_compiler_pic"; then + lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + else + lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='--shared' + lt_prog_compiler_static='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-qpic' + lt_prog_compiler_static='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='' + ;; + *Sun\ F* | *Sun*Fortran*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Wl,' + ;; + *Intel*\ [CF]*Compiler*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + *Portland\ Group*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl='-Qoption ld ';; + *) + lt_prog_compiler_wl='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl='-Qoption ld ' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic='-Kconform_pic' + lt_prog_compiler_static='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_can_build_shared=no + ;; + + uts4*) + lt_prog_compiler_pic='-pic' + lt_prog_compiler_static='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared=no + ;; + esac + fi + +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic= + ;; + *) + lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" + ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +$as_echo_n "checking for $compiler option to produce PIC... " >&6; } +if ${lt_cv_prog_compiler_pic+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic=$lt_prog_compiler_pic +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 +$as_echo "$lt_cv_prog_compiler_pic" >&6; } +lt_prog_compiler_pic=$lt_cv_prog_compiler_pic + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 +$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } +if ${lt_cv_prog_compiler_pic_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_works=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic -DPIC" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 +$as_echo "$lt_cv_prog_compiler_pic_works" >&6; } + +if test yes = "$lt_cv_prog_compiler_pic_works"; then + case $lt_prog_compiler_pic in + "" | " "*) ;; + *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; + esac +else + lt_prog_compiler_pic= + lt_prog_compiler_can_build_shared=no +fi + +fi + + + + + + + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if ${lt_cv_prog_compiler_static_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_static_works=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works=yes + fi + else + lt_cv_prog_compiler_static_works=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 +$as_echo "$lt_cv_prog_compiler_static_works" >&6; } + +if test yes = "$lt_cv_prog_compiler_static_works"; then + : +else + lt_prog_compiler_static= +fi + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +$as_echo "$lt_cv_prog_compiler_c_o" >&6; } + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +$as_echo "$lt_cv_prog_compiler_c_o" >&6; } + + + + +hard_links=nottested +if test no = "$lt_cv_prog_compiler_c_o" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +$as_echo_n "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +$as_echo "$hard_links" >&6; } + if test no = "$hard_links"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 +$as_echo "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag= + always_export_symbols=no + archive_cmds= + archive_expsym_cmds= + compiler_needs_object=no + enable_shared_with_static_runtimes=no + export_dynamic_flag_spec= + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic=no + hardcode_direct=no + hardcode_direct_absolute=no + hardcode_libdir_flag_spec= + hardcode_libdir_separator= + hardcode_minus_L=no + hardcode_shlibpath_var=unsupported + inherit_rpath=no + link_all_deplibs=unknown + module_cmds= + module_expsym_cmds= + old_archive_from_new_cmds= + old_archive_from_expsyms_cmds= + thread_safe_flag_spec= + whole_archive_flag_spec= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++) + with_gnu_ld=yes + ;; + openbsd* | bitrig*) + with_gnu_ld=no + ;; + esac + + ld_shlibs=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + export_dynamic_flag_spec='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + whole_archive_flag_spec= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/(^)\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + export_dynamic_flag_spec='$wl--export-all-symbols' + allow_undefined_flag=unsupported + always_export_symbols=no + enable_shared_with_static_runtimes=yes + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs=no + fi + ;; + + haiku*) + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + link_all_deplibs=yes + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + shrext_cmds=.dll + archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes=yes + ;; + + interix[3-9]*) + hardcode_direct=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + export_dynamic_flag_spec='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object=yes + ;; + esac + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + export_dynamic_flag_spec='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs=no + fi + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test no = "$ld_shlibs"; then + runpath_var= + hardcode_libdir_flag_spec= + export_dynamic_flag_spec= + whole_archive_flag_spec= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix[4-9]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds='' + hardcode_direct=yes + hardcode_direct_absolute=yes + hardcode_libdir_separator=':' + link_all_deplibs=yes + file_list_spec='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + hardcode_direct=no + hardcode_direct_absolute=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + export_dynamic_flag_spec='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath_+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + hardcode_libdir_flag_spec='$wl-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath_+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag=' $wl-bernotok' + allow_undefined_flag=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec='$convenience' + fi + archive_cmds_need_lc=yes + archive_expsym_cmds='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + archive_expsym_cmds="$archive_expsym_cmds"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + archive_expsym_cmds="$archive_expsym_cmds"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl*) + # Native MSVC + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + always_export_symbols=yes + file_list_spec='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, )='true' + enable_shared_with_static_runtimes=yes + exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds='chmod 644 $oldlib' + postlink_cmds='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC wrapper + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc=no + hardcode_direct=no + hardcode_automatic=yes + hardcode_shlibpath_var=unsupported + if test yes = "$lt_cv_ld_force_load"; then + whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + + else + whole_archive_flag_spec='' + fi + link_all_deplibs=yes + allow_undefined_flag=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + archive_expsym_cmds="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + module_expsym_cmds="sed -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + + else + ld_shlibs=no + fi + + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly*) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9*) + if test yes = "$GCC"; then + archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + export_dynamic_flag_spec='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + archive_cmds='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 +$as_echo_n "checking if $CC understands -b... " >&6; } +if ${lt_cv_prog_compiler__b+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler__b=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -b" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler__b=yes + fi + else + lt_cv_prog_compiler__b=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 +$as_echo "$lt_cv_prog_compiler__b" >&6; } + +if test yes = "$lt_cv_prog_compiler__b"; then + archive_cmds='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' +else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' +fi + + ;; + esac + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct=no + hardcode_shlibpath_var=no + ;; + *) + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if ${lt_cv_irix_exported_symbol+:} false; then : + $as_echo_n "(cached) " >&6 +else + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo (void) { return 0; } +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + lt_cv_irix_exported_symbol=yes +else + lt_cv_irix_exported_symbol=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +$as_echo "$lt_cv_irix_exported_symbol" >&6; } + if test yes = "$lt_cv_irix_exported_symbol"; then + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + else + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + inherit_rpath=yes + link_all_deplibs=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + ld_shlibs=yes + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + newsos6) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + hardcode_shlibpath_var=no + ;; + + *nto* | *qnx*) + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + hardcode_direct=yes + hardcode_shlibpath_var=no + hardcode_direct_absolute=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + export_dynamic_flag_spec='$wl-E' + else + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + fi + else + ld_shlibs=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + shrext_cmds=.dll + archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes=yes + ;; + + osf3*) + if test yes = "$GCC"; then + allow_undefined_flag=' $wl-expect_unresolved $wl\*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + allow_undefined_flag=' $wl-expect_unresolved $wl\*' + archive_cmds='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec='-rpath $libdir' + fi + archive_cmds_need_lc='no' + hardcode_libdir_separator=: + ;; + + solaris*) + no_undefined_flag=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + archive_cmds='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + archive_cmds='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + whole_archive_flag_spec='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds='$CC -r -o $output$reload_objs' + hardcode_direct=no + ;; + motorola) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag='$wl-z,text' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag='$wl-z,text' + allow_undefined_flag='$wl-z,nodefs' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='$wl-R,$libdir' + hardcode_libdir_separator=':' + link_all_deplibs=yes + export_dynamic_flag_spec='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec='$wl-Blargedynsym' + ;; + esac + fi + fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 +$as_echo "$ld_shlibs" >&6; } +test no = "$ld_shlibs" && can_build_shared=no + +with_gnu_ld=$with_gnu_ld + + + + + + + + + + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $archive_cmds in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } +if ${lt_cv_archive_cmds_need_lc+:} false; then : + $as_echo_n "(cached) " >&6 +else + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl + pic_flag=$lt_prog_compiler_pic + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag + allow_undefined_flag= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc=no + else + lt_cv_archive_cmds_need_lc=yes + fi + allow_undefined_flag=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 +$as_echo "$lt_cv_archive_cmds_need_lc" >&6; } + archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +$as_echo_n "checking dynamic linker characteristics... " >&6; } + +if test yes = "$GCC"; then + case $host_os in + darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; + *) lt_awk_arg='/^libraries:/' ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq='s|=\([A-Za-z]:\)|\1|g' ;; + *) lt_sed_strip_eq='s|=/|/|g' ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary... + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + # ...but if some path component already ends with the multilib dir we assume + # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). + case "$lt_multi_os_dir; $lt_search_path_spec " in + "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) + lt_multi_os_dir= + ;; + esac + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" + elif test -n "$lt_multi_os_dir"; then + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS = " "; FS = "/|\n";} { + lt_foo = ""; + lt_count = 0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo = "/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[lt_foo]++; } + if (lt_freq[lt_foo] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's|/\([A-Za-z]:\)|\1|g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + + + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a(lib.so.V)' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | sed -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | sed -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + hardcode_libdir_flag_spec='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if ${lt_cv_shlibpath_overrides_runpath+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Add ABI-specific directories to the system library path. + sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib" + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +$as_echo "$dynamic_linker" >&6; } +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +$as_echo_n "checking how to hardcode library paths into programs... " >&6; } +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || + test -n "$runpath_var" || + test yes = "$hardcode_automatic"; then + + # We can hardcode non-existent directories. + if test no != "$hardcode_direct" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, )" && + test no != "$hardcode_minus_L"; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 +$as_echo "$hardcode_action" >&6; } + +if test relink = "$hardcode_action" || + test yes = "$inherit_rpath"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + if test yes != "$enable_dlopen"; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen=load_add_on + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen=LoadLibrary + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +$as_echo_n "checking for dlopen in -ldl... " >&6; } +if ${ac_cv_lib_dl_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_dlopen=yes +else + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +$as_echo "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes; then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl +else + + lt_cv_dlopen=dyld + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + +fi + + ;; + + tpf*) + # Don't try to run any link tests for TPF. We know it's impossible + # because TPF is a cross-compiler, and we know how we open DSOs. + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + lt_cv_dlopen_self=no + ;; + + *) + ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" +if test "x$ac_cv_func_shl_load" = xyes; then : + lt_cv_dlopen=shl_load +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 +$as_echo_n "checking for shl_load in -ldld... " >&6; } +if ${ac_cv_lib_dld_shl_load+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char shl_load (); +int +main () +{ +return shl_load (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dld_shl_load=yes +else + ac_cv_lib_dld_shl_load=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 +$as_echo "$ac_cv_lib_dld_shl_load" >&6; } +if test "x$ac_cv_lib_dld_shl_load" = xyes; then : + lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld +else + ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" +if test "x$ac_cv_func_dlopen" = xyes; then : + lt_cv_dlopen=dlopen +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +$as_echo_n "checking for dlopen in -ldl... " >&6; } +if ${ac_cv_lib_dl_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_dlopen=yes +else + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +$as_echo "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes; then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 +$as_echo_n "checking for dlopen in -lsvld... " >&6; } +if ${ac_cv_lib_svld_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsvld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_svld_dlopen=yes +else + ac_cv_lib_svld_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 +$as_echo "$ac_cv_lib_svld_dlopen" >&6; } +if test "x$ac_cv_lib_svld_dlopen" = xyes; then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 +$as_echo_n "checking for dld_link in -ldld... " >&6; } +if ${ac_cv_lib_dld_dld_link+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dld_link (); +int +main () +{ +return dld_link (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dld_dld_link=yes +else + ac_cv_lib_dld_dld_link=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 +$as_echo "$ac_cv_lib_dld_dld_link" >&6; } +if test "x$ac_cv_lib_dld_dld_link" = xyes; then : + lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld +fi + + +fi + + +fi + + +fi + + +fi + + +fi + + ;; + esac + + if test no = "$lt_cv_dlopen"; then + enable_dlopen=no + else + enable_dlopen=yes + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS=$CPPFLAGS + test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS=$LDFLAGS + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS=$LIBS + LIBS="$lt_cv_dlopen_libs $LIBS" + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 +$as_echo_n "checking whether a program can dlopen itself... " >&6; } +if ${lt_cv_dlopen_self+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test yes = "$cross_compiling"; then : + lt_cv_dlopen_self=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self=no + fi +fi +rm -fr conftest* + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 +$as_echo "$lt_cv_dlopen_self" >&6; } + + if test yes = "$lt_cv_dlopen_self"; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 +$as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; } +if ${lt_cv_dlopen_self_static+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test yes = "$cross_compiling"; then : + lt_cv_dlopen_self_static=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self_static=no + fi +fi +rm -fr conftest* + + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 +$as_echo "$lt_cv_dlopen_self_static" >&6; } + fi + + CPPFLAGS=$save_CPPFLAGS + LDFLAGS=$save_LDFLAGS + LIBS=$save_LIBS + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi + + + + + + + + + + + + + + + + + +striplib= +old_striplib= +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 +$as_echo_n "checking whether stripping libraries is possible... " >&6; } +if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" + test -z "$striplib" && striplib="$STRIP --strip-unneeded" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else +# FIXME - insert some real tests, host_os isn't really good enough + case $host_os in + darwin*) + if test -n "$STRIP"; then + striplib="$STRIP -x" + old_striplib="$STRIP -S" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + fi + ;; + *) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + ;; + esac +fi + + + + + + + + + + + + + # Report what library types will actually be built + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +$as_echo_n "checking if libtool supports shared libraries... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +$as_echo "$can_build_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +$as_echo_n "checking whether to build shared libraries... " >&6; } + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[4-9]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +$as_echo "$enable_shared" >&6; } + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +$as_echo_n "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +$as_echo "$enable_static" >&6; } + + + + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +CC=$lt_save_CC + + if test -n "$CXX" && ( test no != "$CXX" && + ( (test g++ = "$CXX" && `g++ -v >/dev/null 2>&1` ) || + (test g++ != "$CXX"))); then + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5 +$as_echo_n "checking how to run the C++ preprocessor... " >&6; } +if test -z "$CXXCPP"; then + if ${ac_cv_prog_CXXCPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CXXCPP needs to be expanded + for CXXCPP in "$CXX -E" "/lib/cpp" + do + ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CXXCPP=$CXXCPP + +fi + CXXCPP=$ac_cv_prog_CXXCPP +else + ac_cv_prog_CXXCPP=$CXXCPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXXCPP" >&5 +$as_echo "$CXXCPP" >&6; } +ac_preproc_ok=false +for ac_cxx_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_cxx_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +else + _lt_caught_CXX_error=yes +fi + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +archive_cmds_need_lc_CXX=no +allow_undefined_flag_CXX= +always_export_symbols_CXX=no +archive_expsym_cmds_CXX= +compiler_needs_object_CXX=no +export_dynamic_flag_spec_CXX= +hardcode_direct_CXX=no +hardcode_direct_absolute_CXX=no +hardcode_libdir_flag_spec_CXX= +hardcode_libdir_separator_CXX= +hardcode_minus_L_CXX=no +hardcode_shlibpath_var_CXX=unsupported +hardcode_automatic_CXX=no +inherit_rpath_CXX=no +module_cmds_CXX= +module_expsym_cmds_CXX= +link_all_deplibs_CXX=unknown +old_archive_cmds_CXX=$old_archive_cmds +reload_flag_CXX=$reload_flag +reload_cmds_CXX=$reload_cmds +no_undefined_flag_CXX= +whole_archive_flag_spec_CXX= +enable_shared_with_static_runtimes_CXX=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +objext_CXX=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_caught_CXX_error"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + + # save warnings/boilerplate of simple test code + ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + + ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + compiler_CXX=$CC + func_cc_basename $compiler +cc_basename=$func_cc_basename_result + + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test yes = "$GXX"; then + lt_prog_compiler_no_builtin_flag_CXX=' -fno-builtin' + else + lt_prog_compiler_no_builtin_flag_CXX= + fi + + if test yes = "$GXX"; then + # Set up default GNU C++ configuration + + + +# Check whether --with-gnu-ld was given. +if test "${with_gnu_ld+set}" = set; then : + withval=$with_gnu_ld; test no = "$withval" || with_gnu_ld=yes +else + with_gnu_ld=no +fi + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +$as_echo_n "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +$as_echo_n "checking for GNU ld... " >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +$as_echo_n "checking for non-GNU ld... " >&6; } +fi +if ${lt_cv_path_LD+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &5 +$as_echo "$LD" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; } +if ${lt_cv_prog_gnu_ld+:} false; then : + $as_echo_n "(cached) " >&6 +else + # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 &5 +$as_echo "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test yes = "$with_gnu_ld"; then + archive_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='$wl' + + # ancient GNU ld didn't support --whole-archive et. al. + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec_CXX=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + whole_archive_flag_spec_CXX= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + ld_shlibs_CXX=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + aix[4-9]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds_CXX='' + hardcode_direct_CXX=yes + hardcode_direct_absolute_CXX=yes + hardcode_libdir_separator_CXX=':' + link_all_deplibs_CXX=yes + file_list_spec_CXX='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + hardcode_direct_CXX=no + hardcode_direct_absolute_CXX=no + ;; + esac + + if test yes = "$GXX"; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct_CXX=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L_CXX=yes + hardcode_libdir_flag_spec_CXX='-L$libdir' + hardcode_libdir_separator_CXX= + fi + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag=$shared_flag' $wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + export_dynamic_flag_spec_CXX='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + always_export_symbols_CXX=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + # The "-G" linker flag allows undefined symbols. + no_undefined_flag_CXX='-bernotok' + # Determine the default libpath from the value encoded in an empty + # executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath__CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__CXX +fi + + hardcode_libdir_flag_spec_CXX='$wl-blibpath:$libdir:'"$aix_libpath" + + archive_expsym_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + hardcode_libdir_flag_spec_CXX='$wl-R $libdir:/usr/lib:/lib' + allow_undefined_flag_CXX="-z nodefs" + archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if ${lt_cv_aix_libpath__CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath__CXX=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath__CXX"; then + lt_cv_aix_libpath__CXX=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath__CXX +fi + + hardcode_libdir_flag_spec_CXX='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag_CXX=' $wl-bernotok' + allow_undefined_flag_CXX=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec_CXX='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec_CXX='$convenience' + fi + archive_cmds_need_lc_CXX=yes + archive_expsym_cmds_CXX='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared + # libraries. Need -bnortl late, we may have -brtl in LDFLAGS. + archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + archive_expsym_cmds_CXX="$archive_expsym_cmds_CXX"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag_CXX=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds_CXX='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + ld_shlibs_CXX=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + cygwin* | mingw* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl*) + # Native MSVC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + hardcode_libdir_flag_spec_CXX=' ' + allow_undefined_flag_CXX=unsupported + always_export_symbols_CXX=yes + file_list_spec_CXX='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds_CXX='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + archive_expsym_cmds_CXX='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, CXX)='true' + enable_shared_with_static_runtimes_CXX=yes + # Don't use ranlib + old_postinstall_cmds_CXX='chmod 644 $oldlib' + postlink_cmds_CXX='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, CXX) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec_CXX='-L$libdir' + export_dynamic_flag_spec_CXX='$wl--export-all-symbols' + allow_undefined_flag_CXX=unsupported + always_export_symbols_CXX=no + enable_shared_with_static_runtimes_CXX=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + archive_expsym_cmds_CXX='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs_CXX=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + + + archive_cmds_need_lc_CXX=no + hardcode_direct_CXX=no + hardcode_automatic_CXX=yes + hardcode_shlibpath_var_CXX=unsupported + if test yes = "$lt_cv_ld_force_load"; then + whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + + else + whole_archive_flag_spec_CXX='' + fi + link_all_deplibs_CXX=yes + allow_undefined_flag_CXX=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + archive_cmds_CXX="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + module_cmds_CXX="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + archive_expsym_cmds_CXX="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + module_expsym_cmds_CXX="sed -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + if test yes != "$lt_cv_apple_cc_single_mod"; then + archive_cmds_CXX="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" + archive_expsym_cmds_CXX="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" + fi + + else + ld_shlibs_CXX=no + fi + + ;; + + os2*) + hardcode_libdir_flag_spec_CXX='-L$libdir' + hardcode_minus_L_CXX=yes + allow_undefined_flag_CXX=unsupported + shrext_cmds=.dll + archive_cmds_CXX='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds_CXX='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds_CXX='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes_CXX=yes + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + freebsd2.*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + ld_shlibs_CXX=no + ;; + + freebsd-elf*) + archive_cmds_need_lc_CXX=no + ;; + + freebsd* | dragonfly*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + ld_shlibs_CXX=yes + ;; + + haiku*) + archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + link_all_deplibs_CXX=yes + ;; + + hpux9*) + hardcode_libdir_flag_spec_CXX='$wl+b $wl$libdir' + hardcode_libdir_separator_CXX=: + export_dynamic_flag_spec_CXX='$wl-E' + hardcode_direct_CXX=yes + hardcode_minus_L_CXX=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + aCC*) + archive_cmds_CXX='$RM $output_objdir/$soname~$CC -b $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + archive_cmds_CXX='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec_CXX='$wl+b $wl$libdir' + hardcode_libdir_separator_CXX=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + export_dynamic_flag_spec_CXX='$wl-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct_CXX=no + hardcode_shlibpath_var_CXX=no + ;; + *) + hardcode_direct_CXX=yes + hardcode_direct_absolute_CXX=yes + hardcode_minus_L_CXX=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + archive_cmds_CXX='$CC -b $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + archive_cmds_CXX='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + archive_cmds_CXX='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + archive_cmds_CXX='$CC -shared -nostdlib -fPIC $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + archive_cmds_CXX='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + archive_cmds_CXX='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + fi + ;; + esac + ;; + + interix[3-9]*) + hardcode_direct_CXX=no + hardcode_shlibpath_var_CXX=no + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + export_dynamic_flag_spec_CXX='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds_CXX='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds_CXX='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + archive_cmds_CXX='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + old_archive_cmds_CXX='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` -o $lib' + fi + fi + link_all_deplibs_CXX=yes + ;; + esac + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + hardcode_libdir_separator_CXX=: + inherit_rpath_CXX=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + archive_expsym_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib $wl-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + archive_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + archive_cmds_need_lc_CXX=no + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + whole_archive_flag_spec_CXX='$wl--whole-archive$convenience $wl--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [1-5].* | *pgcpp\ [1-5].*) + prelink_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + old_archive_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + archive_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + + hardcode_libdir_flag_spec_CXX='$wl--rpath $wl$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + whole_archive_flag_spec_CXX='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + ;; + cxx*) + # Compaq C++ + archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib $wl-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec_CXX='-rpath $libdir' + hardcode_libdir_separator_CXX=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + export_dynamic_flag_spec_CXX='$wl--export-dynamic' + archive_cmds_CXX='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds_CXX='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + no_undefined_flag_CXX=' -zdefs' + archive_cmds_CXX='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + archive_expsym_cmds_CXX='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file $wl$export_symbols' + hardcode_libdir_flag_spec_CXX='-R$libdir' + whole_archive_flag_spec_CXX='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object_CXX=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds_CXX='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + hardcode_libdir_flag_spec_CXX='-R$libdir' + hardcode_direct_CXX=yes + hardcode_shlibpath_var_CXX=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + ld_shlibs_CXX=yes + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + hardcode_direct_CXX=yes + hardcode_shlibpath_var_CXX=no + hardcode_direct_absolute_CXX=yes + archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`"; then + archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file,$export_symbols -o $lib' + export_dynamic_flag_spec_CXX='$wl-E' + whole_archive_flag_spec_CXX=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + ld_shlibs_CXX=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + hardcode_libdir_flag_spec_CXX='$wl-rpath,$libdir' + hardcode_libdir_separator_CXX=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) old_archive_cmds_CXX='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + cxx*) + case $host in + osf3*) + allow_undefined_flag_CXX=' $wl-expect_unresolved $wl\*' + archive_cmds_CXX='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $soname `test -n "$verstring" && func_echo_all "$wl-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + ;; + *) + allow_undefined_flag_CXX=' -expect_unresolved \*' + archive_cmds_CXX='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds_CXX='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname $wl-input $wl$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~ + $RM $lib.exp' + hardcode_libdir_flag_spec_CXX='-rpath $libdir' + ;; + esac + + hardcode_libdir_separator_CXX=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes,no = "$GXX,$with_gnu_ld"; then + allow_undefined_flag_CXX=' $wl-expect_unresolved $wl\*' + case $host in + osf3*) + archive_cmds_CXX='$CC -shared -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + *) + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + esac + + hardcode_libdir_flag_spec_CXX='$wl-rpath $wl$libdir' + hardcode_libdir_separator_CXX=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + + else + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + archive_cmds_need_lc_CXX=yes + no_undefined_flag_CXX=' -zdefs' + archive_cmds_CXX='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag $wl-M $wl$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + hardcode_libdir_flag_spec_CXX='-R$libdir' + hardcode_shlibpath_var_CXX=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract' + ;; + esac + link_all_deplibs_CXX=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + old_archive_cmds_CXX='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test yes,no = "$GXX,$with_gnu_ld"; then + no_undefined_flag_CXX=' $wl-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + archive_cmds_CXX='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + else + # g++ 2.7 appears to require '-G' NOT '-shared' on this + # platform. + archive_cmds_CXX='$CC -G -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + archive_expsym_cmds_CXX='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' + fi + + hardcode_libdir_flag_spec_CXX='$wl-R $wl$libdir' + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + whole_archive_flag_spec_CXX='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag_CXX='$wl-z,text' + archive_cmds_need_lc_CXX=no + hardcode_shlibpath_var_CXX=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + archive_cmds_CXX='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds_CXX='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag_CXX='$wl-z,text' + allow_undefined_flag_CXX='$wl-z,nodefs' + archive_cmds_need_lc_CXX=no + hardcode_shlibpath_var_CXX=no + hardcode_libdir_flag_spec_CXX='$wl-R,$libdir' + hardcode_libdir_separator_CXX=':' + link_all_deplibs_CXX=yes + export_dynamic_flag_spec_CXX='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + archive_cmds_CXX='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + old_archive_cmds_CXX='$CC -Tprelink_objects $oldobjs~ + '"$old_archive_cmds_CXX" + reload_cmds_CXX='$CC -Tprelink_objects $reload_objs~ + '"$reload_cmds_CXX" + ;; + *) + archive_cmds_CXX='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds_CXX='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + + *) + # FIXME: insert proper C++ library support + ld_shlibs_CXX=no + ;; + esac + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 +$as_echo "$ld_shlibs_CXX" >&6; } + test no = "$ld_shlibs_CXX" && can_build_shared=no + + GCC_CXX=$GXX + LD_CXX=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + # Dependencies to place before and after the object being linked: +predep_objects_CXX= +postdep_objects_CXX= +predeps_CXX= +postdeps_CXX= +compiler_lib_search_path_CXX= + +cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF + + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; +esac + +if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case $prev$p in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test x-L = "$p" || + test x-R = "$p"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test no = "$pre_test_object_deps_done"; then + case $prev in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$compiler_lib_search_path_CXX"; then + compiler_lib_search_path_CXX=$prev$p + else + compiler_lib_search_path_CXX="${compiler_lib_search_path_CXX} $prev$p" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$postdeps_CXX"; then + postdeps_CXX=$prev$p + else + postdeps_CXX="${postdeps_CXX} $prev$p" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test no = "$pre_test_object_deps_done"; then + if test -z "$predep_objects_CXX"; then + predep_objects_CXX=$p + else + predep_objects_CXX="$predep_objects_CXX $p" + fi + else + if test -z "$postdep_objects_CXX"; then + postdep_objects_CXX=$p + else + postdep_objects_CXX="$postdep_objects_CXX $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling CXX test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +case $host_os in +interix[3-9]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + predep_objects_CXX= + postdep_objects_CXX= + postdeps_CXX= + ;; +esac + + +case " $postdeps_CXX " in +*" -lc "*) archive_cmds_need_lc_CXX=no ;; +esac + compiler_lib_search_dirs_CXX= +if test -n "${compiler_lib_search_path_CXX}"; then + compiler_lib_search_dirs_CXX=`echo " ${compiler_lib_search_path_CXX}" | $SED -e 's! -L! !g' -e 's!^ !!'` +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lt_prog_compiler_wl_CXX= +lt_prog_compiler_pic_CXX= +lt_prog_compiler_static_CXX= + + + # C++ specific cases for pic, static, wl, etc. + if test yes = "$GXX"; then + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_CXX='-Bstatic' + fi + lt_prog_compiler_pic_CXX='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic_CXX='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic_CXX='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static_CXX='$wl-static' + ;; + esac + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic_CXX='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + lt_prog_compiler_pic_CXX= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static_CXX= + ;; + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic_CXX=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + lt_prog_compiler_pic_CXX='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_CXX='-fPIC -shared' + ;; + *) + lt_prog_compiler_pic_CXX='-fPIC' + ;; + esac + else + case $host_os in + aix[4-9]*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static_CXX='-Bstatic' + else + lt_prog_compiler_static_CXX='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, CXX)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic_CXX='-DDLL_EXPORT' + ;; + dgux*) + case $cc_basename in + ec++*) + lt_prog_compiler_pic_CXX='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + lt_prog_compiler_pic_CXX='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='$wl-a ${wl}archive' + if test ia64 != "$host_cpu"; then + lt_prog_compiler_pic_CXX='+Z' + fi + ;; + aCC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='$wl-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic_CXX='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_static_CXX='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # KAI C++ Compiler + lt_prog_compiler_wl_CXX='--backend -Wl,' + lt_prog_compiler_pic_CXX='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64, which still supported -KPIC. + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-fPIC' + lt_prog_compiler_static_CXX='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-fpic' + lt_prog_compiler_static_CXX='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + lt_prog_compiler_pic_CXX= + lt_prog_compiler_static_CXX='-non_shared' + ;; + xlc* | xlC* | bgxl[cC]* | mpixl[cC]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-qpic' + lt_prog_compiler_static_CXX='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-Bstatic' + lt_prog_compiler_wl_CXX='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + lt_prog_compiler_pic_CXX='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd*) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic_CXX='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + lt_prog_compiler_wl_CXX='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + lt_prog_compiler_pic_CXX='-pic' + ;; + cxx*) + # Digital/Compaq C++ + lt_prog_compiler_wl_CXX='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + lt_prog_compiler_pic_CXX= + lt_prog_compiler_static_CXX='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-Bstatic' + lt_prog_compiler_wl_CXX='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + lt_prog_compiler_pic_CXX='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + lt_prog_compiler_pic_CXX='-pic' + lt_prog_compiler_static_CXX='-Bstatic' + ;; + lcc*) + # Lucid + lt_prog_compiler_pic_CXX='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + lt_prog_compiler_wl_CXX='-Wl,' + lt_prog_compiler_pic_CXX='-KPIC' + lt_prog_compiler_static_CXX='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + lt_prog_compiler_pic_CXX='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + lt_prog_compiler_can_build_shared_CXX=no + ;; + esac + fi + +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic_CXX= + ;; + *) + lt_prog_compiler_pic_CXX="$lt_prog_compiler_pic_CXX -DPIC" + ;; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +$as_echo_n "checking for $compiler option to produce PIC... " >&6; } +if ${lt_cv_prog_compiler_pic_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_pic_CXX" >&6; } +lt_prog_compiler_pic_CXX=$lt_cv_prog_compiler_pic_CXX + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic_CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5 +$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; } +if ${lt_cv_prog_compiler_pic_works_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_pic_works_CXX=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works_CXX=yes + fi + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_pic_works_CXX" >&6; } + +if test yes = "$lt_cv_prog_compiler_pic_works_CXX"; then + case $lt_prog_compiler_pic_CXX in + "" | " "*) ;; + *) lt_prog_compiler_pic_CXX=" $lt_prog_compiler_pic_CXX" ;; + esac +else + lt_prog_compiler_pic_CXX= + lt_prog_compiler_can_build_shared_CXX=no +fi + +fi + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if ${lt_cv_prog_compiler_static_works_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_static_works_CXX=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works_CXX=yes + fi + else + lt_cv_prog_compiler_static_works_CXX=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_static_works_CXX" >&6; } + +if test yes = "$lt_cv_prog_compiler_static_works_CXX"; then + : +else + lt_prog_compiler_static_CXX= +fi + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o_CXX=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_CXX=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; } + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if ${lt_cv_prog_compiler_c_o_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_prog_compiler_c_o_CXX=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o_CXX=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o_CXX" >&5 +$as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; } + + + + +hard_links=nottested +if test no = "$lt_cv_prog_compiler_c_o_CXX" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +$as_echo_n "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +$as_echo "$hard_links" >&6; } + if test no = "$hard_links"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 +$as_echo "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +$as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms_CXX='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + case $host_os in + aix[4-9]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds_CXX='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + export_symbols_cmds_CXX=$ltdll_cmds + ;; + cygwin* | mingw* | cegcc*) + case $cc_basename in + cl*) + exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + ;; + *) + export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + ;; + esac + ;; + *) + export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs_CXX" >&5 +$as_echo "$ld_shlibs_CXX" >&6; } +test no = "$ld_shlibs_CXX" && can_build_shared=no + +with_gnu_ld_CXX=$with_gnu_ld + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc_CXX" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc_CXX=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $archive_cmds_CXX in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } +if ${lt_cv_archive_cmds_need_lc_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl_CXX + pic_flag=$lt_prog_compiler_pic_CXX + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag_CXX + allow_undefined_flag_CXX= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds_CXX 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc_CXX=no + else + lt_cv_archive_cmds_need_lc_CXX=yes + fi + allow_undefined_flag_CXX=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc_CXX" >&5 +$as_echo "$lt_cv_archive_cmds_need_lc_CXX" >&6; } + archive_cmds_need_lc_CXX=$lt_cv_archive_cmds_need_lc_CXX + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +$as_echo_n "checking dynamic linker characteristics... " >&6; } + +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + + + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a(lib.so.V)' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | sed -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | sed -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl*) + # Native MSVC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + hardcode_libdir_flag_spec_CXX='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if ${lt_cv_shlibpath_overrides_runpath+:} false; then : + $as_echo_n "(cached) " >&6 +else + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl_CXX\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec_CXX\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Add ABI-specific directories to the system library path. + sys_lib_dlsearch_path_spec="/lib64 /usr/lib64 /lib /usr/lib" + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +$as_echo "$dynamic_linker" >&6; } +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +$as_echo_n "checking how to hardcode library paths into programs... " >&6; } +hardcode_action_CXX= +if test -n "$hardcode_libdir_flag_spec_CXX" || + test -n "$runpath_var_CXX" || + test yes = "$hardcode_automatic_CXX"; then + + # We can hardcode non-existent directories. + if test no != "$hardcode_direct_CXX" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, CXX)" && + test no != "$hardcode_minus_L_CXX"; then + # Linking always hardcodes the temporary library directory. + hardcode_action_CXX=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action_CXX=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action_CXX=unsupported +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action_CXX" >&5 +$as_echo "$hardcode_action_CXX" >&6; } + +if test relink = "$hardcode_action_CXX" || + test yes = "$inherit_rpath_CXX"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test yes != "$_lt_caught_CXX_error" + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + + + + + + + + + + ac_config_commands="$ac_config_commands libtool" + + + + +# Only expand once: + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*-ncr-sysv4.3*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mwvalidcheckl in -lmw" >&5 +$as_echo_n "checking for _mwvalidcheckl in -lmw... " >&6; } +if ${ac_cv_lib_mw__mwvalidcheckl+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmw $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char _mwvalidcheckl (); +int +main () +{ +return _mwvalidcheckl (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_mw__mwvalidcheckl=yes +else + ac_cv_lib_mw__mwvalidcheckl=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mw__mwvalidcheckl" >&5 +$as_echo "$ac_cv_lib_mw__mwvalidcheckl" >&6; } +if test "x$ac_cv_lib_mw__mwvalidcheckl" = xyes; then : + LIBM=-lmw +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5 +$as_echo_n "checking for cos in -lm... " >&6; } +if ${ac_cv_lib_m_cos+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char cos (); +int +main () +{ +return cos (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_m_cos=yes +else + ac_cv_lib_m_cos=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5 +$as_echo "$ac_cv_lib_m_cos" >&6; } +if test "x$ac_cv_lib_m_cos" = xyes; then : + LIBM="$LIBM -lm" +fi + + ;; +*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5 +$as_echo_n "checking for cos in -lm... " >&6; } +if ${ac_cv_lib_m_cos+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char cos (); +int +main () +{ +return cos (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_m_cos=yes +else + ac_cv_lib_m_cos=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5 +$as_echo "$ac_cv_lib_m_cos" >&6; } +if test "x$ac_cv_lib_m_cos" = xyes; then : + LIBM=-lm +fi + + ;; +esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C/C++ restrict keyword" >&5 +$as_echo_n "checking for C/C++ restrict keyword... " >&6; } +if ${ac_cv_c_restrict+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_c_restrict=no + # The order here caters to the fact that C++ does not require restrict. + for ac_kw in __restrict __restrict__ _Restrict restrict; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +typedef int * int_ptr; + int foo (int_ptr $ac_kw ip) { + return ip[0]; + } +int +main () +{ +int s[1]; + int * $ac_kw t = s; + t[0] = 0; + return foo(t) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_c_restrict=$ac_kw +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$ac_cv_c_restrict" != no && break + done + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_restrict" >&5 +$as_echo "$ac_cv_c_restrict" >&6; } + + case $ac_cv_c_restrict in + restrict) ;; + no) $as_echo "#define restrict /**/" >>confdefs.h + ;; + *) cat >>confdefs.h <<_ACEOF +#define restrict $ac_cv_c_restrict +_ACEOF + ;; + esac + +ac_fn_c_check_decl "$LINENO" "strerror_r" "ac_cv_have_decl_strerror_r" "$ac_includes_default" +if test "x$ac_cv_have_decl_strerror_r" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_STRERROR_R $ac_have_decl +_ACEOF + +for ac_func in strerror_r +do : + ac_fn_c_check_func "$LINENO" "strerror_r" "ac_cv_func_strerror_r" +if test "x$ac_cv_func_strerror_r" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_STRERROR_R 1 +_ACEOF + +fi +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether strerror_r returns char *" >&5 +$as_echo_n "checking whether strerror_r returns char *... " >&6; } +if ${ac_cv_func_strerror_r_char_p+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ac_cv_func_strerror_r_char_p=no + if test $ac_cv_have_decl_strerror_r = yes; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ + + char buf[100]; + char x = *strerror_r (0, buf, sizeof buf); + char *p = strerror_r (0, buf, sizeof buf); + return !p || x; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_func_strerror_r_char_p=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + else + # strerror_r is not declared. Choose between + # systems that have relatively inaccessible declarations for the + # function. BeOS and DEC UNIX 4.0 fall in this category, but the + # former has a strerror_r that returns char*, while the latter + # has a strerror_r that returns `int'. + # This test should segfault on the DEC system. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default + extern char *strerror_r (); +int +main () +{ +char buf[100]; + char x = *strerror_r (0, buf, sizeof buf); + return ! isalpha (x); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_func_strerror_r_char_p=yes +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_strerror_r_char_p" >&5 +$as_echo "$ac_cv_func_strerror_r_char_p" >&6; } +if test $ac_cv_func_strerror_r_char_p = yes; then + +$as_echo "#define STRERROR_R_CHAR_P 1" >>confdefs.h + +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. +set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_PKG_CONFIG+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +PKG_CONFIG=$ac_cv_path_PKG_CONFIG +if test -n "$PKG_CONFIG"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 +$as_echo "$PKG_CONFIG" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_PKG_CONFIG"; then + ac_pt_PKG_CONFIG=$PKG_CONFIG + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_PKG_CONFIG+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG +if test -n "$ac_pt_PKG_CONFIG"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 +$as_echo "$ac_pt_PKG_CONFIG" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_PKG_CONFIG" = x; then + PKG_CONFIG="pkg-config" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + PKG_CONFIG=$ac_pt_PKG_CONFIG + fi +else + PKG_CONFIG="$ac_cv_path_PKG_CONFIG" +fi + + + +# +# Force link_all_deplibs=yes for libtool, otherwise it will not +# link against dependency libs +# +link_all_deplibs=yes + + +# +# Check if 'ln' supports creating relative links +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if ${LN_S} supports --relative" >&5 +$as_echo_n "checking if ${LN_S} supports --relative... " >&6; } +if ${LN_S} --relative symlinktest 2>/dev/null; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + LN_RS="${LN_S} --relative" + + rm symlinktest +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + LN_RS=${LN_S} + +fi + + +# +# Save config flags for version dump tool +# + +cat >>confdefs.h <<_ACEOF +#define UCX_CONFIGURE_FLAGS "$config_flags" +_ACEOF + + + +# +# Provide the functionality of AS_VAR_APPEND if Autoconf does not have it. +# + + + +# +# Paths for loadable modules +# +modulesubdir=${PACKAGE_NAME} + # module directory names +moduledir=${libdir}/${modulesubdir} + # module installation directory +localmoduledir='$(abs_top_builddir)/modules' + # local directory for module symlinks +objdir=${objdir} + # libtool objects dir, usually .libs +shrext=${shrext_cmds} + # libtool shared library extension + +cat >>confdefs.h <<_ACEOF +#define UCX_MODULE_SUBDIR "${modulesubdir}" +_ACEOF + + + +# +# Additional m4 files +# +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_prog_doxygen.html +# =========================================================================== +# +# SYNOPSIS +# +# DX_INIT_DOXYGEN(PROJECT-NAME, DOXYFILE-PATH, [OUTPUT-DIR]) +# DX_DOXYGEN_FEATURE(ON|OFF) +# DX_DOT_FEATURE(ON|OFF) +# DX_HTML_FEATURE(ON|OFF) +# DX_CHM_FEATURE(ON|OFF) +# DX_CHI_FEATURE(ON|OFF) +# DX_MAN_FEATURE(ON|OFF) +# DX_RTF_FEATURE(ON|OFF) +# DX_XML_FEATURE(ON|OFF) +# DX_PDF_FEATURE(ON|OFF) +# DX_PS_FEATURE(ON|OFF) +# +# DESCRIPTION +# +# The DX_*_FEATURE macros control the default setting for the given +# Doxygen feature. Supported features are 'DOXYGEN' itself, 'DOT' for +# generating graphics, 'HTML' for plain HTML, 'CHM' for compressed HTML +# help (for MS users), 'CHI' for generating a seperate .chi file by the +# .chm file, and 'MAN', 'RTF', 'XML', 'PDF' and 'PS' for the appropriate +# output formats. The environment variable DOXYGEN_PAPER_SIZE may be +# specified to override the default 'a4wide' paper size. +# +# By default, HTML, PDF and PS documentation is generated as this seems to +# be the most popular and portable combination. MAN pages created by +# Doxygen are usually problematic, though by picking an appropriate subset +# and doing some massaging they might be better than nothing. CHM and RTF +# are specific for MS (note that you can't generate both HTML and CHM at +# the same time). The XML is rather useless unless you apply specialized +# post-processing to it. +# +# The macros mainly control the default state of the feature. The use can +# override the default by specifying --enable or --disable. The macros +# ensure that contradictory flags are not given (e.g., +# --enable-doxygen-html and --enable-doxygen-chm, +# --enable-doxygen-anything with --disable-doxygen, etc.) Finally, each +# feature will be automatically disabled (with a warning) if the required +# programs are missing. +# +# Once all the feature defaults have been specified, call DX_INIT_DOXYGEN +# with the following parameters: a one-word name for the project for use +# as a filename base etc., an optional configuration file name (the +# default is 'Doxyfile', the same as Doxygen's default), and an optional +# output directory name (the default is 'doxygen-doc'). +# +# Automake Support +# +# The following is a template aminclude.am file for use with Automake. +# Make targets and variables values are controlled by the various +# DX_COND_* conditionals set by autoconf. +# +# The provided targets are: +# +# doxygen-doc: Generate all doxygen documentation. +# +# doxygen-run: Run doxygen, which will generate some of the +# documentation (HTML, CHM, CHI, MAN, RTF, XML) +# but will not do the post processing required +# for the rest of it (PS, PDF, and some MAN). +# +# doxygen-man: Rename some doxygen generated man pages. +# +# doxygen-ps: Generate doxygen PostScript documentation. +# +# doxygen-pdf: Generate doxygen PDF documentation. +# +# Note that by default these are not integrated into the automake targets. +# If doxygen is used to generate man pages, you can achieve this +# integration by setting man3_MANS to the list of man pages generated and +# then adding the dependency: +# +# $(man3_MANS): doxygen-doc +# +# This will cause make to run doxygen and generate all the documentation. +# +# The following variable is intended for use in Makefile.am: +# +# DX_CLEANFILES = everything to clean. +# +# Then add this variable to MOSTLYCLEANFILES. +# +# ----- begin aminclude.am ------------------------------------- +# +# ## --------------------------------- ## +# ## Format-independent Doxygen rules. ## +# ## --------------------------------- ## +# +# if DX_COND_doc +# +# ## ------------------------------- ## +# ## Rules specific for HTML output. ## +# ## ------------------------------- ## +# +# if DX_COND_html +# +# DX_CLEAN_HTML = @DX_DOCDIR@/html +# +# endif DX_COND_html +# +# ## ------------------------------ ## +# ## Rules specific for CHM output. ## +# ## ------------------------------ ## +# +# if DX_COND_chm +# +# DX_CLEAN_CHM = @DX_DOCDIR@/chm +# +# if DX_COND_chi +# +# DX_CLEAN_CHI = @DX_DOCDIR@/@PACKAGE@.chi +# +# endif DX_COND_chi +# +# endif DX_COND_chm +# +# ## ------------------------------ ## +# ## Rules specific for MAN output. ## +# ## ------------------------------ ## +# +# if DX_COND_man +# +# DX_CLEAN_MAN = @DX_DOCDIR@/man +# +# endif DX_COND_man +# +# ## ------------------------------ ## +# ## Rules specific for RTF output. ## +# ## ------------------------------ ## +# +# if DX_COND_rtf +# +# DX_CLEAN_RTF = @DX_DOCDIR@/rtf +# +# endif DX_COND_rtf +# +# ## ------------------------------ ## +# ## Rules specific for XML output. ## +# ## ------------------------------ ## +# +# if DX_COND_xml +# +# DX_CLEAN_XML = @DX_DOCDIR@/xml +# +# endif DX_COND_xml +# +# ## ----------------------------- ## +# ## Rules specific for PS output. ## +# ## ----------------------------- ## +# +# if DX_COND_ps +# +# DX_CLEAN_PS = @DX_DOCDIR@/@PACKAGE@.ps +# +# DX_PS_GOAL = doxygen-ps +# +# doxygen-ps: @DX_DOCDIR@/@PACKAGE@.ps +# +# @DX_DOCDIR@/@PACKAGE@.ps: @DX_DOCDIR@/@PACKAGE@.tag +# cd @DX_DOCDIR@/latex; \ +# rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \ +# $(DX_LATEX) refman.tex; \ +# $(MAKEINDEX_PATH) refman.idx; \ +# $(DX_LATEX) refman.tex; \ +# countdown=5; \ +# while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \ +# refman.log > /dev/null 2>&1 \ +# && test $$countdown -gt 0; do \ +# $(DX_LATEX) refman.tex; \ +# countdown=`expr $$countdown - 1`; \ +# done; \ +# $(DX_DVIPS) -o ../@PACKAGE@.ps refman.dvi +# +# endif DX_COND_ps +# +# ## ------------------------------ ## +# ## Rules specific for PDF output. ## +# ## ------------------------------ ## +# +# if DX_COND_pdf +# +# DX_CLEAN_PDF = @DX_DOCDIR@/@PACKAGE@.pdf +# +# DX_PDF_GOAL = doxygen-pdf +# +# doxygen-pdf: @DX_DOCDIR@/@PACKAGE@.pdf +# +# @DX_DOCDIR@/@PACKAGE@.pdf: @DX_DOCDIR@/@PACKAGE@.tag +# cd @DX_DOCDIR@/latex; \ +# rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \ +# $(DX_PDFLATEX) refman.tex; \ +# $(DX_MAKEINDEX) refman.idx; \ +# $(DX_PDFLATEX) refman.tex; \ +# countdown=5; \ +# while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \ +# refman.log > /dev/null 2>&1 \ +# && test $$countdown -gt 0; do \ +# $(DX_PDFLATEX) refman.tex; \ +# countdown=`expr $$countdown - 1`; \ +# done; \ +# mv refman.pdf ../@PACKAGE@.pdf +# +# endif DX_COND_pdf +# +# ## ------------------------------------------------- ## +# ## Rules specific for LaTeX (shared for PS and PDF). ## +# ## ------------------------------------------------- ## +# +# if DX_COND_latex +# +# DX_CLEAN_LATEX = @DX_DOCDIR@/latex +# +# endif DX_COND_latex +# +# .PHONY: doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) +# +# .INTERMEDIATE: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL) +# +# doxygen-run: @DX_DOCDIR@/@PACKAGE@.tag +# +# doxygen-doc: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL) +# +# @DX_DOCDIR@/@PACKAGE@.tag: $(DX_CONFIG) $(pkginclude_HEADERS) +# rm -rf @DX_DOCDIR@ +# $(DX_ENV) $(DX_DOXYGEN) $(srcdir)/$(DX_CONFIG) +# echo Timestamp >$@ +# +# DX_CLEANFILES = \ +# @DX_DOCDIR@/@PACKAGE@.tag \ +# -r \ +# $(DX_CLEAN_HTML) \ +# $(DX_CLEAN_CHM) \ +# $(DX_CLEAN_CHI) \ +# $(DX_CLEAN_MAN) \ +# $(DX_CLEAN_RTF) \ +# $(DX_CLEAN_XML) \ +# $(DX_CLEAN_PS) \ +# $(DX_CLEAN_PDF) \ +# $(DX_CLEAN_LATEX) +# +# endif DX_COND_doc +# +# ----- end aminclude.am --------------------------------------- +# +# LICENSE +# +# Copyright (c) 2009 Oren Ben-Kiki +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 13 + +## ----------## +## Defaults. ## +## ----------## + +DX_ENV="" +DX_MIN_VERSION="1.8.6" + + + + + + + + + + + + +## --------------- ## +## Private macros. ## +## --------------- ## + +# DX_ENV_APPEND(VARIABLE, VALUE) +# ------------------------------ +# Append VARIABLE="VALUE" to DX_ENV for invoking doxygen. + + +# DX_DIRNAME_EXPR +# --------------- +# Expand into a shell expression prints the directory part of a path. + + +# DX_IF_FEATURE(FEATURE, IF-ON, IF-OFF) +# ------------------------------------- +# Expands according to the M4 (static) status of the feature. + + +# DX_REQUIRE_PROG(VARIABLE, PROGRAM) +# ---------------------------------- +# Require the specified program to be found for the DX_CURRENT_FEATURE to work. + + +# DX_REQUIRE_PROG_WITH_VERSION(VARIABLE, PROGRAM, VERSIONCMD, MINVERSION) +# ---------------------------------- +# Require the specified program to be found for the DX_CURRENT_FEATURE to work. + + +# DX_TEST_FEATURE(FEATURE) +# ------------------------ +# Expand to a shell expression testing whether the feature is active. + + +# DX_CHECK_DEPEND(REQUIRED_FEATURE, REQUIRED_STATE) +# ------------------------------------------------- +# Verify that a required features has the right state before trying to turn on +# the DX_CURRENT_FEATURE. + + +# DX_CLEAR_DEPEND(FEATURE, REQUIRED_FEATURE, REQUIRED_STATE) +# ---------------------------------------------------------- +# Turn off the DX_CURRENT_FEATURE if the required feature is off. + + +# DX_FEATURE_ARG(FEATURE, DESCRIPTION, +# CHECK_DEPEND, CLEAR_DEPEND, +# REQUIRE, DO-IF-ON, DO-IF-OFF) +# -------------------------------------------- +# Parse the command-line option controlling a feature. CHECK_DEPEND is called +# if the user explicitly turns the feature on (and invokes DX_CHECK_DEPEND), +# otherwise CLEAR_DEPEND is called to turn off the default state if a required +# feature is disabled (using DX_CLEAR_DEPEND). REQUIRE performs additional +# requirement tests (DX_REQUIRE_PROG). Finally, an automake flag is set and +# DO-IF-ON or DO-IF-OFF are called according to the final state of the feature. + + +## -------------- ## +## Public macros. ## +## -------------- ## + +# DX_XXX_FEATURE(DEFAULT_STATE) +# ----------------------------- + + + + + + + + + + + + +# DX_INIT_DOXYGEN(PROJECT, [CONFIG-FILE], [OUTPUT-DOC-DIR]) +# --------------------------------------------------------- +# PROJECT also serves as the base name for the documentation files. +# The default CONFIG-FILE is "Doxyfile" and OUTPUT-DOC-DIR is "doxygen-doc". + + +# +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +# Extract the first word of "dot", so it can be a program name with args. +set dummy dot; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_GRAPHVIZ_DOT+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$GRAPHVIZ_DOT"; then + ac_cv_prog_GRAPHVIZ_DOT="$GRAPHVIZ_DOT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_GRAPHVIZ_DOT="yes" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +GRAPHVIZ_DOT=$ac_cv_prog_GRAPHVIZ_DOT +if test -n "$GRAPHVIZ_DOT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $GRAPHVIZ_DOT" >&5 +$as_echo "$GRAPHVIZ_DOT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + if test "x$GRAPHVIZ_DOT" = xyes; then + HAVE_DOT_TRUE= + HAVE_DOT_FALSE='#' +else + HAVE_DOT_TRUE='#' + HAVE_DOT_FALSE= +fi + + + +# Check whether --with-docs_only was given. +if test "${with_docs_only+set}" = set; then : + withval=$with_docs_only; +else + : +fi + + + + +# +# Doxygen options +# + + + + + + +# Files: +DX_PROJECT=UCX + +DX_CONFIG=docs/doxygen/ucxdox + +DX_DOCDIR=docs/doxygen-doc + + +# Environment variables used inside doxygen.cfg: +DX_ENV="$DX_ENV SRCDIR='$srcdir'" + +DX_ENV="$DX_ENV PROJECT='$DX_PROJECT'" + +DX_ENV="$DX_ENV DOCDIR='$DX_DOCDIR'" + +DX_ENV="$DX_ENV VERSION='$PACKAGE_VERSION'" + + +# Doxygen itself: + + + + # Check whether --enable-doxygen-doc was given. +if test "${enable_doxygen_doc+set}" = set; then : + enableval=$enable_doxygen_doc; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_doc=1 + + +;; #( +n|N|no|No|NO) + DX_FLAG_doc=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-doc" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_doc=1 + + + +fi + +if test "$DX_FLAG_doc" = 1; then + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}doxygen", so it can be a program name with args. +set dummy ${ac_tool_prefix}doxygen; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_DOXYGEN+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_DOXYGEN in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_DOXYGEN="$DX_DOXYGEN" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_DOXYGEN="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_DOXYGEN=$ac_cv_path_DX_DOXYGEN +if test -n "$DX_DOXYGEN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_DOXYGEN" >&5 +$as_echo "$DX_DOXYGEN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_DOXYGEN"; then + ac_pt_DX_DOXYGEN=$DX_DOXYGEN + # Extract the first word of "doxygen", so it can be a program name with args. +set dummy doxygen; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_DOXYGEN+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_DOXYGEN in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_DOXYGEN="$ac_pt_DX_DOXYGEN" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_DOXYGEN="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_DOXYGEN=$ac_cv_path_ac_pt_DX_DOXYGEN +if test -n "$ac_pt_DX_DOXYGEN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_DOXYGEN" >&5 +$as_echo "$ac_pt_DX_DOXYGEN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_DOXYGEN" = x; then + DX_DOXYGEN="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_DOXYGEN=$ac_pt_DX_DOXYGEN + fi +else + DX_DOXYGEN="$ac_cv_path_DX_DOXYGEN" +fi + +if test "$DX_FLAG_doc$DX_DOXYGEN" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: doxygen not found - will not generate any doxygen documentation" >&5 +$as_echo "$as_me: WARNING: doxygen not found - will not generate any doxygen documentation" >&2;} + DX_FLAG_doc=0 + +fi +version=`doxygen --version` +as_arg_v1=$version +as_arg_v2=$DX_MIN_VERSION +awk "$as_awk_strverscmp" v1="$as_arg_v1" v2="$as_arg_v2" /dev/null +case $? in #( + 1) : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: doxygen version $version is bad. Required version: $DX_MIN_VERSION and above" >&5 +$as_echo "$as_me: WARNING: doxygen version $version is bad. Required version: $DX_MIN_VERSION and above" >&2;} + DX_FLAG_doc=0 + + ;; #( + 0) : + ;; #( + 2) : + ;; #( + *) : + ;; +esac + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}perl", so it can be a program name with args. +set dummy ${ac_tool_prefix}perl; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_PERL+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_PERL in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_PERL="$DX_PERL" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_PERL="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_PERL=$ac_cv_path_DX_PERL +if test -n "$DX_PERL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_PERL" >&5 +$as_echo "$DX_PERL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_PERL"; then + ac_pt_DX_PERL=$DX_PERL + # Extract the first word of "perl", so it can be a program name with args. +set dummy perl; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_PERL+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_PERL in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_PERL="$ac_pt_DX_PERL" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_PERL="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_PERL=$ac_cv_path_ac_pt_DX_PERL +if test -n "$ac_pt_DX_PERL"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_PERL" >&5 +$as_echo "$ac_pt_DX_PERL" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_PERL" = x; then + DX_PERL="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_PERL=$ac_pt_DX_PERL + fi +else + DX_PERL="$ac_cv_path_DX_PERL" +fi + +if test "$DX_FLAG_doc$DX_PERL" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: perl not found - will not generate any doxygen documentation" >&5 +$as_echo "$as_me: WARNING: perl not found - will not generate any doxygen documentation" >&2;} + DX_FLAG_doc=0 + +fi + + : +fi + if test "$DX_FLAG_doc" = 1; then + DX_COND_doc_TRUE= + DX_COND_doc_FALSE='#' +else + DX_COND_doc_TRUE='#' + DX_COND_doc_FALSE= +fi + +if test "$DX_FLAG_doc" = 1; then + DX_ENV="$DX_ENV PERL_PATH='$DX_PERL'" + + : +else + + : +fi + + +# Dot for graphics: + + + + # Check whether --enable-doxygen-dot was given. +if test "${enable_doxygen_dot+set}" = set; then : + enableval=$enable_doxygen_dot; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_dot=1 + + +test "$DX_FLAG_doc" = "1" \ +|| as_fn_error $? "doxygen-dot requires doxygen-dot" "$LINENO" 5 + +;; #( +n|N|no|No|NO) + DX_FLAG_dot=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-dot" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_dot=0 + + +test "$DX_FLAG_doc" = "1" || DX_FLAG_dot=0 + + + +fi + +if test "$DX_FLAG_dot" = 1; then + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dot", so it can be a program name with args. +set dummy ${ac_tool_prefix}dot; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_DOT+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_DOT in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_DOT="$DX_DOT" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_DOT="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_DOT=$ac_cv_path_DX_DOT +if test -n "$DX_DOT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_DOT" >&5 +$as_echo "$DX_DOT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_DOT"; then + ac_pt_DX_DOT=$DX_DOT + # Extract the first word of "dot", so it can be a program name with args. +set dummy dot; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_DOT+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_DOT in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_DOT="$ac_pt_DX_DOT" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_DOT="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_DOT=$ac_cv_path_ac_pt_DX_DOT +if test -n "$ac_pt_DX_DOT"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_DOT" >&5 +$as_echo "$ac_pt_DX_DOT" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_DOT" = x; then + DX_DOT="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_DOT=$ac_pt_DX_DOT + fi +else + DX_DOT="$ac_cv_path_DX_DOT" +fi + +if test "$DX_FLAG_dot$DX_DOT" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: dot not found - will not generate graphics for doxygen documentation" >&5 +$as_echo "$as_me: WARNING: dot not found - will not generate graphics for doxygen documentation" >&2;} + DX_FLAG_dot=0 + +fi + + : +fi + if test "$DX_FLAG_dot" = 1; then + DX_COND_dot_TRUE= + DX_COND_dot_FALSE='#' +else + DX_COND_dot_TRUE='#' + DX_COND_dot_FALSE= +fi + +if test "$DX_FLAG_dot" = 1; then + DX_ENV="$DX_ENV HAVE_DOT='YES'" + + DX_ENV="$DX_ENV DOT_PATH='`expr ".$DX_DOT" : '\(\.\)[^/]*$' \| "x$DX_DOT" : 'x\(.*\)/[^/]*$'`'" + + : +else + DX_ENV="$DX_ENV HAVE_DOT='NO'" + + : +fi + + +# Man pages generation: + + + + # Check whether --enable-doxygen-man was given. +if test "${enable_doxygen_man+set}" = set; then : + enableval=$enable_doxygen_man; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_man=1 + + +test "$DX_FLAG_doc" = "1" \ +|| as_fn_error $? "doxygen-man requires doxygen-man" "$LINENO" 5 + +;; #( +n|N|no|No|NO) + DX_FLAG_man=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-man" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_man=1 + + +test "$DX_FLAG_doc" = "1" || DX_FLAG_man=0 + + + +fi + +if test "$DX_FLAG_man" = 1; then + + : +fi + if test "$DX_FLAG_man" = 1; then + DX_COND_man_TRUE= + DX_COND_man_FALSE='#' +else + DX_COND_man_TRUE='#' + DX_COND_man_FALSE= +fi + +if test "$DX_FLAG_man" = 1; then + DX_ENV="$DX_ENV GENERATE_MAN='YES'" + + : +else + DX_ENV="$DX_ENV GENERATE_MAN='NO'" + + : +fi + + +# RTF file generation: + + + + # Check whether --enable-doxygen-rtf was given. +if test "${enable_doxygen_rtf+set}" = set; then : + enableval=$enable_doxygen_rtf; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_rtf=1 + + +test "$DX_FLAG_doc" = "1" \ +|| as_fn_error $? "doxygen-rtf requires doxygen-rtf" "$LINENO" 5 + +;; #( +n|N|no|No|NO) + DX_FLAG_rtf=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-rtf" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_rtf=0 + + +test "$DX_FLAG_doc" = "1" || DX_FLAG_rtf=0 + + + +fi + +if test "$DX_FLAG_rtf" = 1; then + + : +fi + if test "$DX_FLAG_rtf" = 1; then + DX_COND_rtf_TRUE= + DX_COND_rtf_FALSE='#' +else + DX_COND_rtf_TRUE='#' + DX_COND_rtf_FALSE= +fi + +if test "$DX_FLAG_rtf" = 1; then + DX_ENV="$DX_ENV GENERATE_RTF='YES'" + + : +else + DX_ENV="$DX_ENV GENERATE_RTF='NO'" + + : +fi + + +# XML file generation: + + + + # Check whether --enable-doxygen-xml was given. +if test "${enable_doxygen_xml+set}" = set; then : + enableval=$enable_doxygen_xml; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_xml=1 + + +test "$DX_FLAG_doc" = "1" \ +|| as_fn_error $? "doxygen-xml requires doxygen-xml" "$LINENO" 5 + +;; #( +n|N|no|No|NO) + DX_FLAG_xml=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-xml" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_xml=0 + + +test "$DX_FLAG_doc" = "1" || DX_FLAG_xml=0 + + + +fi + +if test "$DX_FLAG_xml" = 1; then + + : +fi + if test "$DX_FLAG_xml" = 1; then + DX_COND_xml_TRUE= + DX_COND_xml_FALSE='#' +else + DX_COND_xml_TRUE='#' + DX_COND_xml_FALSE= +fi + +if test "$DX_FLAG_xml" = 1; then + DX_ENV="$DX_ENV GENERATE_XML='YES'" + + : +else + DX_ENV="$DX_ENV GENERATE_XML='NO'" + + : +fi + + +# (Compressed) HTML help generation: + + + + # Check whether --enable-doxygen-chm was given. +if test "${enable_doxygen_chm+set}" = set; then : + enableval=$enable_doxygen_chm; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_chm=1 + + +test "$DX_FLAG_doc" = "1" \ +|| as_fn_error $? "doxygen-chm requires doxygen-chm" "$LINENO" 5 + +;; #( +n|N|no|No|NO) + DX_FLAG_chm=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-chm" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_chm=0 + + +test "$DX_FLAG_doc" = "1" || DX_FLAG_chm=0 + + + +fi + +if test "$DX_FLAG_chm" = 1; then + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}hhc", so it can be a program name with args. +set dummy ${ac_tool_prefix}hhc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_HHC+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_HHC in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_HHC="$DX_HHC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_HHC="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_HHC=$ac_cv_path_DX_HHC +if test -n "$DX_HHC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_HHC" >&5 +$as_echo "$DX_HHC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_HHC"; then + ac_pt_DX_HHC=$DX_HHC + # Extract the first word of "hhc", so it can be a program name with args. +set dummy hhc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_HHC+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_HHC in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_HHC="$ac_pt_DX_HHC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_HHC="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_HHC=$ac_cv_path_ac_pt_DX_HHC +if test -n "$ac_pt_DX_HHC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_HHC" >&5 +$as_echo "$ac_pt_DX_HHC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_HHC" = x; then + DX_HHC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_HHC=$ac_pt_DX_HHC + fi +else + DX_HHC="$ac_cv_path_DX_HHC" +fi + +if test "$DX_FLAG_chm$DX_HHC" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: hhc not found - will not generate doxygen compressed HTML help documentation" >&5 +$as_echo "$as_me: WARNING: hhc not found - will not generate doxygen compressed HTML help documentation" >&2;} + DX_FLAG_chm=0 + +fi + + : +fi + if test "$DX_FLAG_chm" = 1; then + DX_COND_chm_TRUE= + DX_COND_chm_FALSE='#' +else + DX_COND_chm_TRUE='#' + DX_COND_chm_FALSE= +fi + +if test "$DX_FLAG_chm" = 1; then + DX_ENV="$DX_ENV HHC_PATH='$DX_HHC'" + + DX_ENV="$DX_ENV GENERATE_HTML='YES'" + + DX_ENV="$DX_ENV GENERATE_HTMLHELP='YES'" + + : +else + DX_ENV="$DX_ENV GENERATE_HTMLHELP='NO'" + + : +fi + + +# Seperate CHI file generation. + + + + # Check whether --enable-doxygen-chi was given. +if test "${enable_doxygen_chi+set}" = set; then : + enableval=$enable_doxygen_chi; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_chi=1 + + +test "$DX_FLAG_chm" = "1" \ +|| as_fn_error $? "doxygen-chi requires doxygen-chi" "$LINENO" 5 + +;; #( +n|N|no|No|NO) + DX_FLAG_chi=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-chi" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_chi=0 + + +test "$DX_FLAG_chm" = "1" || DX_FLAG_chi=0 + + + +fi + +if test "$DX_FLAG_chi" = 1; then + + : +fi + if test "$DX_FLAG_chi" = 1; then + DX_COND_chi_TRUE= + DX_COND_chi_FALSE='#' +else + DX_COND_chi_TRUE='#' + DX_COND_chi_FALSE= +fi + +if test "$DX_FLAG_chi" = 1; then + DX_ENV="$DX_ENV GENERATE_CHI='YES'" + + : +else + DX_ENV="$DX_ENV GENERATE_CHI='NO'" + + : +fi + + +# Plain HTML pages generation: + + + + # Check whether --enable-doxygen-html was given. +if test "${enable_doxygen_html+set}" = set; then : + enableval=$enable_doxygen_html; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_html=1 + + +test "$DX_FLAG_doc" = "1" \ +|| as_fn_error $? "doxygen-html requires doxygen-html" "$LINENO" 5 + +test "$DX_FLAG_chm" = "0" \ +|| as_fn_error $? "doxygen-html contradicts doxygen-html" "$LINENO" 5 + +;; #( +n|N|no|No|NO) + DX_FLAG_html=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-html" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_html=1 + + +test "$DX_FLAG_doc" = "1" || DX_FLAG_html=0 + + +test "$DX_FLAG_chm" = "0" || DX_FLAG_html=0 + + + +fi + +if test "$DX_FLAG_html" = 1; then + + : +fi + if test "$DX_FLAG_html" = 1; then + DX_COND_html_TRUE= + DX_COND_html_FALSE='#' +else + DX_COND_html_TRUE='#' + DX_COND_html_FALSE= +fi + +if test "$DX_FLAG_html" = 1; then + DX_ENV="$DX_ENV GENERATE_HTML='YES'" + + : +else + test "$DX_FLAG_chm" = 1 || DX_ENV="$DX_ENV GENERATE_HTML='NO'" + + : +fi + + +# PostScript file generation: + + + + # Check whether --enable-doxygen-ps was given. +if test "${enable_doxygen_ps+set}" = set; then : + enableval=$enable_doxygen_ps; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_ps=1 + + +test "$DX_FLAG_doc" = "1" \ +|| as_fn_error $? "doxygen-ps requires doxygen-ps" "$LINENO" 5 + +;; #( +n|N|no|No|NO) + DX_FLAG_ps=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-ps" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_ps=0 + + +test "$DX_FLAG_doc" = "1" || DX_FLAG_ps=0 + + + +fi + +if test "$DX_FLAG_ps" = 1; then + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}latex", so it can be a program name with args. +set dummy ${ac_tool_prefix}latex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_LATEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_LATEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_LATEX="$DX_LATEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_LATEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_LATEX=$ac_cv_path_DX_LATEX +if test -n "$DX_LATEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_LATEX" >&5 +$as_echo "$DX_LATEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_LATEX"; then + ac_pt_DX_LATEX=$DX_LATEX + # Extract the first word of "latex", so it can be a program name with args. +set dummy latex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_LATEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_LATEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_LATEX="$ac_pt_DX_LATEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_LATEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_LATEX=$ac_cv_path_ac_pt_DX_LATEX +if test -n "$ac_pt_DX_LATEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_LATEX" >&5 +$as_echo "$ac_pt_DX_LATEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_LATEX" = x; then + DX_LATEX="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_LATEX=$ac_pt_DX_LATEX + fi +else + DX_LATEX="$ac_cv_path_DX_LATEX" +fi + +if test "$DX_FLAG_ps$DX_LATEX" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: latex not found - will not generate doxygen PostScript documentation" >&5 +$as_echo "$as_me: WARNING: latex not found - will not generate doxygen PostScript documentation" >&2;} + DX_FLAG_ps=0 + +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}makeindex", so it can be a program name with args. +set dummy ${ac_tool_prefix}makeindex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_MAKEINDEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_MAKEINDEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_MAKEINDEX="$DX_MAKEINDEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_MAKEINDEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_MAKEINDEX=$ac_cv_path_DX_MAKEINDEX +if test -n "$DX_MAKEINDEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_MAKEINDEX" >&5 +$as_echo "$DX_MAKEINDEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_MAKEINDEX"; then + ac_pt_DX_MAKEINDEX=$DX_MAKEINDEX + # Extract the first word of "makeindex", so it can be a program name with args. +set dummy makeindex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_MAKEINDEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_MAKEINDEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_MAKEINDEX="$ac_pt_DX_MAKEINDEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_MAKEINDEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_MAKEINDEX=$ac_cv_path_ac_pt_DX_MAKEINDEX +if test -n "$ac_pt_DX_MAKEINDEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_MAKEINDEX" >&5 +$as_echo "$ac_pt_DX_MAKEINDEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_MAKEINDEX" = x; then + DX_MAKEINDEX="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_MAKEINDEX=$ac_pt_DX_MAKEINDEX + fi +else + DX_MAKEINDEX="$ac_cv_path_DX_MAKEINDEX" +fi + +if test "$DX_FLAG_ps$DX_MAKEINDEX" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: makeindex not found - will not generate doxygen PostScript documentation" >&5 +$as_echo "$as_me: WARNING: makeindex not found - will not generate doxygen PostScript documentation" >&2;} + DX_FLAG_ps=0 + +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}bibtex", so it can be a program name with args. +set dummy ${ac_tool_prefix}bibtex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_BIBTEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_BIBTEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_BIBTEX="$DX_BIBTEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_BIBTEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_BIBTEX=$ac_cv_path_DX_BIBTEX +if test -n "$DX_BIBTEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_BIBTEX" >&5 +$as_echo "$DX_BIBTEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_BIBTEX"; then + ac_pt_DX_BIBTEX=$DX_BIBTEX + # Extract the first word of "bibtex", so it can be a program name with args. +set dummy bibtex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_BIBTEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_BIBTEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_BIBTEX="$ac_pt_DX_BIBTEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_BIBTEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_BIBTEX=$ac_cv_path_ac_pt_DX_BIBTEX +if test -n "$ac_pt_DX_BIBTEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_BIBTEX" >&5 +$as_echo "$ac_pt_DX_BIBTEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_BIBTEX" = x; then + DX_BIBTEX="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_BIBTEX=$ac_pt_DX_BIBTEX + fi +else + DX_BIBTEX="$ac_cv_path_DX_BIBTEX" +fi + +if test "$DX_FLAG_ps$DX_BIBTEX" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: bibtex not found - will not generate doxygen PostScript documentation" >&5 +$as_echo "$as_me: WARNING: bibtex not found - will not generate doxygen PostScript documentation" >&2;} + DX_FLAG_ps=0 + +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dvips", so it can be a program name with args. +set dummy ${ac_tool_prefix}dvips; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_DVIPS+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_DVIPS in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_DVIPS="$DX_DVIPS" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_DVIPS="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_DVIPS=$ac_cv_path_DX_DVIPS +if test -n "$DX_DVIPS"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_DVIPS" >&5 +$as_echo "$DX_DVIPS" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_DVIPS"; then + ac_pt_DX_DVIPS=$DX_DVIPS + # Extract the first word of "dvips", so it can be a program name with args. +set dummy dvips; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_DVIPS+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_DVIPS in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_DVIPS="$ac_pt_DX_DVIPS" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_DVIPS="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_DVIPS=$ac_cv_path_ac_pt_DX_DVIPS +if test -n "$ac_pt_DX_DVIPS"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_DVIPS" >&5 +$as_echo "$ac_pt_DX_DVIPS" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_DVIPS" = x; then + DX_DVIPS="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_DVIPS=$ac_pt_DX_DVIPS + fi +else + DX_DVIPS="$ac_cv_path_DX_DVIPS" +fi + +if test "$DX_FLAG_ps$DX_DVIPS" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: dvips not found - will not generate doxygen PostScript documentation" >&5 +$as_echo "$as_me: WARNING: dvips not found - will not generate doxygen PostScript documentation" >&2;} + DX_FLAG_ps=0 + +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}egrep", so it can be a program name with args. +set dummy ${ac_tool_prefix}egrep; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_EGREP in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_EGREP="$DX_EGREP" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_EGREP="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_EGREP=$ac_cv_path_DX_EGREP +if test -n "$DX_EGREP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_EGREP" >&5 +$as_echo "$DX_EGREP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_EGREP"; then + ac_pt_DX_EGREP=$DX_EGREP + # Extract the first word of "egrep", so it can be a program name with args. +set dummy egrep; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_EGREP in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_EGREP="$ac_pt_DX_EGREP" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_EGREP="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_EGREP=$ac_cv_path_ac_pt_DX_EGREP +if test -n "$ac_pt_DX_EGREP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_EGREP" >&5 +$as_echo "$ac_pt_DX_EGREP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_EGREP" = x; then + DX_EGREP="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_EGREP=$ac_pt_DX_EGREP + fi +else + DX_EGREP="$ac_cv_path_DX_EGREP" +fi + +if test "$DX_FLAG_ps$DX_EGREP" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: egrep not found - will not generate doxygen PostScript documentation" >&5 +$as_echo "$as_me: WARNING: egrep not found - will not generate doxygen PostScript documentation" >&2;} + DX_FLAG_ps=0 + +fi + + : +fi + if test "$DX_FLAG_ps" = 1; then + DX_COND_ps_TRUE= + DX_COND_ps_FALSE='#' +else + DX_COND_ps_TRUE='#' + DX_COND_ps_FALSE= +fi + +if test "$DX_FLAG_ps" = 1; then + + : +else + + : +fi + + +# PDF file generation: + + + + # Check whether --enable-doxygen-pdf was given. +if test "${enable_doxygen_pdf+set}" = set; then : + enableval=$enable_doxygen_pdf; +case "$enableval" in +#( +y|Y|yes|Yes|YES) + DX_FLAG_pdf=1 + + +test "$DX_FLAG_doc" = "1" \ +|| as_fn_error $? "doxygen-pdf requires doxygen-pdf" "$LINENO" 5 + +;; #( +n|N|no|No|NO) + DX_FLAG_pdf=0 + +;; #( +*) + as_fn_error $? "invalid value '$enableval' given to doxygen-pdf" "$LINENO" 5 +;; +esac + +else + +DX_FLAG_pdf=1 + + +test "$DX_FLAG_doc" = "1" || DX_FLAG_pdf=0 + + + +fi + +if test "$DX_FLAG_pdf" = 1; then + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}pdflatex", so it can be a program name with args. +set dummy ${ac_tool_prefix}pdflatex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_PDFLATEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_PDFLATEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_PDFLATEX="$DX_PDFLATEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_PDFLATEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_PDFLATEX=$ac_cv_path_DX_PDFLATEX +if test -n "$DX_PDFLATEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_PDFLATEX" >&5 +$as_echo "$DX_PDFLATEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_PDFLATEX"; then + ac_pt_DX_PDFLATEX=$DX_PDFLATEX + # Extract the first word of "pdflatex", so it can be a program name with args. +set dummy pdflatex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_PDFLATEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_PDFLATEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_PDFLATEX="$ac_pt_DX_PDFLATEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_PDFLATEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_PDFLATEX=$ac_cv_path_ac_pt_DX_PDFLATEX +if test -n "$ac_pt_DX_PDFLATEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_PDFLATEX" >&5 +$as_echo "$ac_pt_DX_PDFLATEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_PDFLATEX" = x; then + DX_PDFLATEX="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_PDFLATEX=$ac_pt_DX_PDFLATEX + fi +else + DX_PDFLATEX="$ac_cv_path_DX_PDFLATEX" +fi + +if test "$DX_FLAG_pdf$DX_PDFLATEX" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: pdflatex not found - will not generate doxygen PDF documentation" >&5 +$as_echo "$as_me: WARNING: pdflatex not found - will not generate doxygen PDF documentation" >&2;} + DX_FLAG_pdf=0 + +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}makeindex", so it can be a program name with args. +set dummy ${ac_tool_prefix}makeindex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_MAKEINDEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_MAKEINDEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_MAKEINDEX="$DX_MAKEINDEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_MAKEINDEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_MAKEINDEX=$ac_cv_path_DX_MAKEINDEX +if test -n "$DX_MAKEINDEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_MAKEINDEX" >&5 +$as_echo "$DX_MAKEINDEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_MAKEINDEX"; then + ac_pt_DX_MAKEINDEX=$DX_MAKEINDEX + # Extract the first word of "makeindex", so it can be a program name with args. +set dummy makeindex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_MAKEINDEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_MAKEINDEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_MAKEINDEX="$ac_pt_DX_MAKEINDEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_MAKEINDEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_MAKEINDEX=$ac_cv_path_ac_pt_DX_MAKEINDEX +if test -n "$ac_pt_DX_MAKEINDEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_MAKEINDEX" >&5 +$as_echo "$ac_pt_DX_MAKEINDEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_MAKEINDEX" = x; then + DX_MAKEINDEX="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_MAKEINDEX=$ac_pt_DX_MAKEINDEX + fi +else + DX_MAKEINDEX="$ac_cv_path_DX_MAKEINDEX" +fi + +if test "$DX_FLAG_pdf$DX_MAKEINDEX" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: makeindex not found - will not generate doxygen PDF documentation" >&5 +$as_echo "$as_me: WARNING: makeindex not found - will not generate doxygen PDF documentation" >&2;} + DX_FLAG_pdf=0 + +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}bibtex", so it can be a program name with args. +set dummy ${ac_tool_prefix}bibtex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_BIBTEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_BIBTEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_BIBTEX="$DX_BIBTEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_BIBTEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_BIBTEX=$ac_cv_path_DX_BIBTEX +if test -n "$DX_BIBTEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_BIBTEX" >&5 +$as_echo "$DX_BIBTEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_BIBTEX"; then + ac_pt_DX_BIBTEX=$DX_BIBTEX + # Extract the first word of "bibtex", so it can be a program name with args. +set dummy bibtex; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_BIBTEX+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_BIBTEX in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_BIBTEX="$ac_pt_DX_BIBTEX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_BIBTEX="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_BIBTEX=$ac_cv_path_ac_pt_DX_BIBTEX +if test -n "$ac_pt_DX_BIBTEX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_BIBTEX" >&5 +$as_echo "$ac_pt_DX_BIBTEX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_BIBTEX" = x; then + DX_BIBTEX="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_BIBTEX=$ac_pt_DX_BIBTEX + fi +else + DX_BIBTEX="$ac_cv_path_DX_BIBTEX" +fi + +if test "$DX_FLAG_pdf$DX_BIBTEX" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: bibtex not found - will not generate doxygen PDF documentation" >&5 +$as_echo "$as_me: WARNING: bibtex not found - will not generate doxygen PDF documentation" >&2;} + DX_FLAG_pdf=0 + +fi + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}egrep", so it can be a program name with args. +set dummy ${ac_tool_prefix}egrep; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_DX_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $DX_EGREP in + [\\/]* | ?:[\\/]*) + ac_cv_path_DX_EGREP="$DX_EGREP" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_DX_EGREP="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +DX_EGREP=$ac_cv_path_DX_EGREP +if test -n "$DX_EGREP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DX_EGREP" >&5 +$as_echo "$DX_EGREP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_DX_EGREP"; then + ac_pt_DX_EGREP=$DX_EGREP + # Extract the first word of "egrep", so it can be a program name with args. +set dummy egrep; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_ac_pt_DX_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $ac_pt_DX_EGREP in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_DX_EGREP="$ac_pt_DX_EGREP" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_DX_EGREP="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_DX_EGREP=$ac_cv_path_ac_pt_DX_EGREP +if test -n "$ac_pt_DX_EGREP"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_DX_EGREP" >&5 +$as_echo "$ac_pt_DX_EGREP" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_pt_DX_EGREP" = x; then + DX_EGREP="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DX_EGREP=$ac_pt_DX_EGREP + fi +else + DX_EGREP="$ac_cv_path_DX_EGREP" +fi + +if test "$DX_FLAG_pdf$DX_EGREP" = 1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: egrep not found - will not generate doxygen PDF documentation" >&5 +$as_echo "$as_me: WARNING: egrep not found - will not generate doxygen PDF documentation" >&2;} + DX_FLAG_pdf=0 + +fi + + : +fi + if test "$DX_FLAG_pdf" = 1; then + DX_COND_pdf_TRUE= + DX_COND_pdf_FALSE='#' +else + DX_COND_pdf_TRUE='#' + DX_COND_pdf_FALSE= +fi + +if test "$DX_FLAG_pdf" = 1; then + + : +else + + : +fi + + +# LaTeX generation for PS and/or PDF: + if test "$DX_FLAG_ps" = 1 || test "$DX_FLAG_pdf" = 1; then + DX_COND_latex_TRUE= + DX_COND_latex_FALSE='#' +else + DX_COND_latex_TRUE='#' + DX_COND_latex_FALSE= +fi + +if test "$DX_FLAG_ps" = 1 || test "$DX_FLAG_pdf" = 1; then + DX_ENV="$DX_ENV GENERATE_LATEX='YES'" + +else + DX_ENV="$DX_ENV GENERATE_LATEX='NO'" + +fi + +# Paper size for PS and/or PDF: + +case "$DOXYGEN_PAPER_SIZE" in +#( +"") + DOXYGEN_PAPER_SIZE="" + +;; #( +a4wide|a4|letter|legal|executive) + DX_ENV="$DX_ENV PAPER_SIZE='$DOXYGEN_PAPER_SIZE'" + +;; #( +*) + as_fn_error $? "unknown DOXYGEN_PAPER_SIZE='$DOXYGEN_PAPER_SIZE'" "$LINENO" 5 +;; +esac + +#For debugging: +#echo DX_FLAG_doc=$DX_FLAG_doc +#echo DX_FLAG_dot=$DX_FLAG_dot +#echo DX_FLAG_man=$DX_FLAG_man +#echo DX_FLAG_html=$DX_FLAG_html +#echo DX_FLAG_chm=$DX_FLAG_chm +#echo DX_FLAG_chi=$DX_FLAG_chi +#echo DX_FLAG_rtf=$DX_FLAG_rtf +#echo DX_FLAG_xml=$DX_FLAG_xml +#echo DX_FLAG_pdf=$DX_FLAG_pdf +#echo DX_FLAG_ps=$DX_FLAG_ps +#echo DX_ENV=$DX_ENV + + +ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" +if test "x$ac_cv_type_size_t" = xyes; then : + +else + +cat >>confdefs.h <<_ACEOF +#define size_t unsigned int +_ACEOF + +fi + +if test "x$with_docs_only" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: Documents only requested" >&5 +$as_echo "$as_me: Documents only requested" >&6;} + if test "$DX_FLAG_doc" = 1; then : + +else + as_fn_error $? "--with-only-docs was specified, but doxygen was not found" "$LINENO" 5 +fi + if test "$DX_FLAG_html" = 1; then : + +else + if test "x$enable_doxygen_html" == xyes; then : + as_fn_error $? "--enable-doxygen-html was specified, but html tools were not found" "$LINENO" 5 +fi +fi + if test "$DX_FLAG_man" = 1; then : + +else + if test "x$enable_doxygen_man" == xyes; then : + as_fn_error $? "--enable-doxygen-man was specified, but man tools were not found" "$LINENO" 5 +fi +fi + if test "$DX_FLAG_pdf" = 1; then : + +else + if test "x$enable_doxygen_pdf" == xyes; then : + as_fn_error $? "--enable-doxygen-pdf was specified, but pdf tools were not found" "$LINENO" 5 +fi +fi + if true; then + DOCS_ONLY_TRUE= + DOCS_ONLY_FALSE='#' +else + DOCS_ONLY_TRUE='#' + DOCS_ONLY_FALSE= +fi + + if false; then + HAVE_GTEST_TRUE= + HAVE_GTEST_FALSE='#' +else + HAVE_GTEST_TRUE='#' + HAVE_GTEST_FALSE= +fi + + if false; then + HAVE_STATS_TRUE= + HAVE_STATS_FALSE='#' +else + HAVE_STATS_TRUE='#' + HAVE_STATS_FALSE= +fi + + if false; then + HAVE_TUNING_TRUE= + HAVE_TUNING_FALSE='#' +else + HAVE_TUNING_TRUE='#' + HAVE_TUNING_FALSE= +fi + + if false; then + HAVE_MEMTRACK_TRUE= + HAVE_MEMTRACK_FALSE='#' +else + HAVE_MEMTRACK_TRUE='#' + HAVE_MEMTRACK_FALSE= +fi + + if false; then + HAVE_IB_TRUE= + HAVE_IB_FALSE='#' +else + HAVE_IB_TRUE='#' + HAVE_IB_FALSE= +fi + + if false; then + HAVE_MLX5_HW_TRUE= + HAVE_MLX5_HW_FALSE='#' +else + HAVE_MLX5_HW_TRUE='#' + HAVE_MLX5_HW_FALSE= +fi + + if false; then + HAVE_MLX5_HW_UD_TRUE= + HAVE_MLX5_HW_UD_FALSE='#' +else + HAVE_MLX5_HW_UD_TRUE='#' + HAVE_MLX5_HW_UD_FALSE= +fi + + if false; then + HAVE_MLX5_DV_TRUE= + HAVE_MLX5_DV_FALSE='#' +else + HAVE_MLX5_DV_TRUE='#' + HAVE_MLX5_DV_FALSE= +fi + + if false; then + HAVE_DEVX_TRUE= + HAVE_DEVX_FALSE='#' +else + HAVE_DEVX_TRUE='#' + HAVE_DEVX_FALSE= +fi + + if false; then + HAVE_EXP_TRUE= + HAVE_EXP_FALSE='#' +else + HAVE_EXP_TRUE='#' + HAVE_EXP_FALSE= +fi + + if false; then + HAVE_TL_RC_TRUE= + HAVE_TL_RC_FALSE='#' +else + HAVE_TL_RC_TRUE='#' + HAVE_TL_RC_FALSE= +fi + + if false; then + HAVE_TL_DC_TRUE= + HAVE_TL_DC_FALSE='#' +else + HAVE_TL_DC_TRUE='#' + HAVE_TL_DC_FALSE= +fi + + if false; then + HAVE_DC_DV_TRUE= + HAVE_DC_DV_FALSE='#' +else + HAVE_DC_DV_TRUE='#' + HAVE_DC_DV_FALSE= +fi + + if false; then + HAVE_DC_EXP_TRUE= + HAVE_DC_EXP_FALSE='#' +else + HAVE_DC_EXP_TRUE='#' + HAVE_DC_EXP_FALSE= +fi + + if false; then + HAVE_TL_UD_TRUE= + HAVE_TL_UD_FALSE='#' +else + HAVE_TL_UD_TRUE='#' + HAVE_TL_UD_FALSE= +fi + + if false; then + HAVE_TL_CM_TRUE= + HAVE_TL_CM_FALSE='#' +else + HAVE_TL_CM_TRUE='#' + HAVE_TL_CM_FALSE= +fi + + if false; then + HAVE_CRAY_UGNI_TRUE= + HAVE_CRAY_UGNI_FALSE='#' +else + HAVE_CRAY_UGNI_TRUE='#' + HAVE_CRAY_UGNI_FALSE= +fi + + if false; then + HAVE_CUDA_TRUE= + HAVE_CUDA_FALSE='#' +else + HAVE_CUDA_TRUE='#' + HAVE_CUDA_FALSE= +fi + + if false; then + HAVE_GDR_COPY_TRUE= + HAVE_GDR_COPY_FALSE='#' +else + HAVE_GDR_COPY_TRUE='#' + HAVE_GDR_COPY_FALSE= +fi + + if false; then + HAVE_ROCM_TRUE= + HAVE_ROCM_FALSE='#' +else + HAVE_ROCM_TRUE='#' + HAVE_ROCM_FALSE= +fi + + if false; then + HAVE_HIP_TRUE= + HAVE_HIP_FALSE='#' +else + HAVE_HIP_TRUE='#' + HAVE_HIP_FALSE= +fi + + if false; then + HAVE_XPMEM_TRUE= + HAVE_XPMEM_FALSE='#' +else + HAVE_XPMEM_TRUE='#' + HAVE_XPMEM_FALSE= +fi + + if false; then + HAVE_CMA_TRUE= + HAVE_CMA_FALSE='#' +else + HAVE_CMA_TRUE='#' + HAVE_CMA_FALSE= +fi + + if false; then + HAVE_KNEM_TRUE= + HAVE_KNEM_FALSE='#' +else + HAVE_KNEM_TRUE='#' + HAVE_KNEM_FALSE= +fi + + if false; then + HAVE_RDMACM_TRUE= + HAVE_RDMACM_FALSE='#' +else + HAVE_RDMACM_TRUE='#' + HAVE_RDMACM_FALSE= +fi + + if false; then + HAVE_RDMACM_QP_LESS_TRUE= + HAVE_RDMACM_QP_LESS_FALSE='#' +else + HAVE_RDMACM_QP_LESS_TRUE='#' + HAVE_RDMACM_QP_LESS_FALSE= +fi + + if false; then + HAVE_MPI_TRUE= + HAVE_MPI_FALSE='#' +else + HAVE_MPI_TRUE='#' + HAVE_MPI_FALSE= +fi + + if false; then + HAVE_MPIRUN_TRUE= + HAVE_MPIRUN_FALSE='#' +else + HAVE_MPIRUN_TRUE='#' + HAVE_MPIRUN_FALSE= +fi + + if false; then + HAVE_MPICC_TRUE= + HAVE_MPICC_FALSE='#' +else + HAVE_MPICC_TRUE='#' + HAVE_MPICC_FALSE= +fi + + if false; then + HAVE_PROFILING_TRUE= + HAVE_PROFILING_FALSE='#' +else + HAVE_PROFILING_TRUE='#' + HAVE_PROFILING_FALSE= +fi + + if false; then + HAVE_UCM_PTMALLOC286_TRUE= + HAVE_UCM_PTMALLOC286_FALSE='#' +else + HAVE_UCM_PTMALLOC286_TRUE='#' + HAVE_UCM_PTMALLOC286_FALSE= +fi + + if false; then + HAVE_JAVA_TRUE= + HAVE_JAVA_FALSE='#' +else + HAVE_JAVA_TRUE='#' + HAVE_JAVA_FALSE= +fi + + if false; then + HAVE_CXX11_TRUE= + HAVE_CXX11_FALSE='#' +else + HAVE_CXX11_TRUE='#' + HAVE_CXX11_FALSE= +fi + + if false; then + HAVE_GNUXX11_TRUE= + HAVE_GNUXX11_FALSE='#' +else + HAVE_GNUXX11_TRUE='#' + HAVE_GNUXX11_FALSE= +fi + + if false; then + HAVE_TCMALLOC_TRUE= + HAVE_TCMALLOC_FALSE='#' +else + HAVE_TCMALLOC_TRUE='#' + HAVE_TCMALLOC_FALSE= +fi + + if false; then + ENABLE_EXPERIMENTAL_API_TRUE= + ENABLE_EXPERIMENTAL_API_FALSE='#' +else + ENABLE_EXPERIMENTAL_API_TRUE='#' + ENABLE_EXPERIMENTAL_API_FALSE= +fi + + if false; then + INSTALL_DEVEL_HEADERS_TRUE= + INSTALL_DEVEL_HEADERS_FALSE='#' +else + INSTALL_DEVEL_HEADERS_TRUE='#' + INSTALL_DEVEL_HEADERS_FALSE= +fi + + if false; then + HAVE_EXAMPLES_TRUE= + HAVE_EXAMPLES_FALSE='#' +else + HAVE_EXAMPLES_TRUE='#' + HAVE_EXAMPLES_FALSE= +fi + + +else + + if false; then + DOCS_ONLY_TRUE= + DOCS_ONLY_FALSE='#' +else + DOCS_ONLY_TRUE='#' + DOCS_ONLY_FALSE= +fi + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Initialize CFLAGS +# +BASE_CFLAGS="-g -Wall -Werror" + +# +# Debug mode +# +# Check whether --enable-debug was given. +if test "${enable_debug+set}" = set; then : + enableval=$enable_debug; +else + enable_debug=no +fi + +if test "x$enable_debug" = xyes; then : + BASE_CFLAGS="-D_DEBUG $BASE_CFLAGS" +fi + +# +# Optimization level +# +# Check whether --enable-compiler-opt was given. +if test "${enable_compiler_opt+set}" = set; then : + enableval=$enable_compiler_opt; +else + enable_compiler_opt="none" +fi + +if test "x$enable_compiler_opt" = "xyes"; then : + BASE_CFLAGS="-O3 $BASE_CFLAGS" +elif test "x$enable_compiler_opt" = "xnone"; then : + if test "x$enable_debug" = xyes; then : + BASE_CFLAGS="-O0 $BASE_CFLAGS" +else + BASE_CFLAGS="-O3 $BASE_CFLAGS" +fi +elif test "x$enable_compiler_opt" = "xno"; then : + +else + BASE_CFLAGS="-O$enable_compiler_opt $BASE_CFLAGS" +fi + + +# +# CHECK_CROSS_COMP (program, true-action, false-action) +# +# The macro checks if it can run the program; it executes +# true action if the program can be executed, otherwise +# false action is executed. +# For cross-platform compilation we only check +# if we can compile and link the program. + + +# +# Check for one specific attribute by compiling with C +# Usage: CHECK_SPECIFIC_ATTRIBUTE([name], [doc], [program]) +# + + + +# +# Enable/disable turning on machine-specific optimizations +# +# Check whether --enable-optimizations was given. +if test "${enable_optimizations+set}" = set; then : + enableval=$enable_optimizations; +else + enable_optimizations=no +fi + + + +# +# Check if compiler supports a given CPU optimization flag, and if yes - add it +# to BASE_CFLAGS substitution, and OPT_CFLAGS C define. +# +# Usage: COMPILER_CPU_OPTIMIZATION([name], [doc], [flag], [program]) +# + + + +# +# Check platform uarch and apply micro-architecture specific optimizations +# + + + +# +# CHECK_COMPILER_FLAG +# Usage: CHECK_COMPILER_FLAG([name], [flag], [program], [if-true], [if-false]) +# +# The macro checks if program may be compiled using specified flag +# + + +# +# ADD_COMPILER_FLAG_IF_SUPPORTED +# Usage: ADD_COMPILER_FLAG_IF_SUPPORTED([name], [flag], [program], [if-true], [if-false]) +# +# The macro checks if program may be compiled using specified flag and adds +# this flag if it is supported +# + + +# +# CHECK_DEPRECATED_DECL_FLAG (flag, variable) +# +# The macro checks if the given compiler flag enables usig deprecated declarations. +# If yes, it appends the flags to "variable". +# + + + +# +# Force ICC treat command line warnings as errors. +# This evaluation should be called prior to all other compiler flags evals +# + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag -diag-error 10006" >&5 +$as_echo_n "checking compiler flag -diag-error 10006... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -diag-error 10006" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS -diag-error 10006" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + BASE_CFLAGS="$BASE_CFLAGS -diag-error 10006" + BASE_CXXFLAGS="$BASE_CXXFLAGS -diag-error 10006" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -diag-disable 1478 overrides deprecated declarations" >&5 +$as_echo_n "checking whether -diag-disable 1478 overrides deprecated declarations... " >&6; } + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -diag-disable 1478" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + int __attribute__ ((__deprecated__)) f() { return 0; } + int main() { return f(); } + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS_NO_DEPRECATED="${CFLAGS_NO_DEPRECATED} -diag-disable 1478" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS="$SAVE_CFLAGS" + # icc + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -Wno-deprecated-declarations overrides deprecated declarations" >&5 +$as_echo_n "checking whether -Wno-deprecated-declarations overrides deprecated declarations... " >&6; } + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -Wno-deprecated-declarations" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + int __attribute__ ((__deprecated__)) f() { return 0; } + int main() { return f(); } + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS_NO_DEPRECATED="${CFLAGS_NO_DEPRECATED} -Wno-deprecated-declarations" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS="$SAVE_CFLAGS" + # gcc +CFLAGS_NO_DEPRECATED=$CFLAGS_NO_DEPRECATED + + + +# +# Disable format-string warning on ICC +# + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag -diag-disable 269" >&5 +$as_echo_n "checking compiler flag -diag-disable 269... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -diag-disable 269" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS -diag-disable 269" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + int main() { + char *p = NULL; + scanf("%m[^.]", &p); + free(p); + return 0; + } +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + BASE_CFLAGS="$BASE_CFLAGS -diag-disable 269" + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + + +# +# Set default datatype alignment to 16 bytes. +# Some compilers (LLVM based, clang) expects allocation of datatypes by 32 bytes +# to optimize operations memset/memcpy/etc using vectorized processor instructions +# which requires aligment of memory buffer by 32 or higer bytes. Default malloc method +# guarantee alignment for 16 bytes only. Force using compiler 16-bytes alignment +# by default if option is supported. +# +UCX_ALLOC_ALIGN=16 + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag -fmax-type-align=$UCX_ALLOC_ALIGN" >&5 +$as_echo_n "checking compiler flag -fmax-type-align=$UCX_ALLOC_ALIGN... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -fmax-type-align=$UCX_ALLOC_ALIGN" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS -fmax-type-align=$UCX_ALLOC_ALIGN" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + BASE_CFLAGS="$BASE_CFLAGS -fmax-type-align=$UCX_ALLOC_ALIGN" + +cat >>confdefs.h <<_ACEOF +#define UCX_ALLOC_ALIGN $UCX_ALLOC_ALIGN +_ACEOF + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + + +# +# SSE/AVX +# + + +# Check whether --with-avx was given. +if test "${with_avx+set}" = set; then : + withval=$with_avx; +else + with_avx=$enable_optimizations +fi + + + if test "x$with_avx" != "xno"; then : + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -mavx" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking -mavx" >&5 +$as_echo_n "checking -mavx... " >&6; } + + if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int main() { + return _mm256_testz_si256(_mm256_set1_epi32(1), _mm256_set1_epi32(3)); + } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -mavx" + if test "xavx" != "xmcpu" -a "xavx" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|avx" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int main() { + return _mm256_testz_si256(_mm256_set1_epi32(1), _mm256_set1_epi32(3)); + } + +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -mavx" + if test "xavx" != "xmcpu" -a "xavx" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|avx" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + + CFLAGS="$SAVE_CFLAGS" +fi + +if test "x$with_avx" != xyes; then : + + +# Check whether --with-sse41 was given. +if test "${with_sse41+set}" = set; then : + withval=$with_sse41; +else + with_sse41=$enable_optimizations +fi + + + if test "x$with_sse41" != "xno"; then : + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -msse4.1" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking -msse4.1" >&5 +$as_echo_n "checking -msse4.1... " >&6; } + + if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int main() { + return _mm_testz_si128(_mm_set1_epi32(1), _mm_set1_epi32(3)); + } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -msse4.1" + if test "xsse41" != "xmcpu" -a "xsse41" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|sse41" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int main() { + return _mm_testz_si128(_mm_set1_epi32(1), _mm_set1_epi32(3)); + } + +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -msse4.1" + if test "xsse41" != "xmcpu" -a "xsse41" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|sse41" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + + CFLAGS="$SAVE_CFLAGS" +fi + + + +# Check whether --with-sse42 was given. +if test "${with_sse42+set}" = set; then : + withval=$with_sse42; +else + with_sse42=$enable_optimizations +fi + + + if test "x$with_sse42" != "xno"; then : + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -msse4.2" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking -msse4.2" >&5 +$as_echo_n "checking -msse4.2... " >&6; } + + if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int main() { return _mm_popcnt_u32(0x101) - 2; + } +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -msse4.2" + if test "xsse42" != "xmcpu" -a "xsse42" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|sse42" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + int main() { return _mm_popcnt_u32(0x101) - 2; + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -msse4.2" + if test "xsse42" != "xmcpu" -a "xsse42" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|sse42" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + + CFLAGS="$SAVE_CFLAGS" +fi + + +fi + + + + cpuimpl=`grep 'CPU implementer' /proc/cpuinfo 2> /dev/null | cut -d: -f2 | tr -d " " | head -n 1` + cpuarch=`grep 'CPU architecture' /proc/cpuinfo 2> /dev/null | cut -d: -f2 | tr -d " " | head -n 1` + cpuvar=`grep 'CPU variant' /proc/cpuinfo 2> /dev/null | cut -d: -f2 | tr -d " " | head -n 1` + cpupart=`grep 'CPU part' /proc/cpuinfo 2> /dev/null | cut -d: -f2 | tr -d " " | head -n 1` + + ax_cpu="" + ax_arch="" + + { $as_echo "$as_me:${as_lineno-$LINENO}: Detected CPU implementation: ${cpuimpl}" >&5 +$as_echo "$as_me: Detected CPU implementation: ${cpuimpl}" >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: Detected CPU architecture: ${cpuarch}" >&5 +$as_echo "$as_me: Detected CPU architecture: ${cpuarch}" >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: Detected CPU variant: ${cpuvar}" >&5 +$as_echo "$as_me: Detected CPU variant: ${cpuvar}" >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: Detected CPU part: ${cpupart}" >&5 +$as_echo "$as_me: Detected CPU part: ${cpupart}" >&6;} + + case $cpuimpl in + 0x42) case $cpupart in + 0x516 | 0x0516) + +$as_echo "#define HAVE_AARCH64_THUNDERX2 1" >>confdefs.h + + ax_cpu="thunderx2t99" + ax_arch="armv8.1-a+lse" ;; + 0xaf | 0x0af) + +$as_echo "#define HAVE_AARCH64_THUNDERX2 1" >>confdefs.h + + ax_cpu="thunderx2t99" + ax_arch="armv8.1-a+lse" ;; + esac + ;; + 0x43) case $cpupart in + 0x516 | 0x0516) + +$as_echo "#define HAVE_AARCH64_THUNDERX2 1" >>confdefs.h + + ax_cpu="thunderx2t99" + ax_arch="armv8.1-a+lse" ;; + 0xaf | 0x0af) + +$as_echo "#define HAVE_AARCH64_THUNDERX2 1" >>confdefs.h + + ax_cpu="thunderx2t99" + ax_arch="armv8.1-a+lse" ;; + 0xa1 | 0x0a1) + +$as_echo "#define HAVE_AARCH64_THUNDERX1 1" >>confdefs.h + + ax_cpu="thunderxt88" ;; + esac + ;; + 0x48) case $cpupart in + 0xd01 | 0x0d01) + +$as_echo "#define HAVE_AARCH64_HI1620 1" >>confdefs.h + + ax_cpu="tsv110" + ax_arch="armv8.2-a" ;; + esac + ;; + *) + ;; + esac + + + +# +# CPU tuning +# +if test "x$ax_cpu" != "x"; then : + + +# Check whether --with-mcpu was given. +if test "${with_mcpu+set}" = set; then : + withval=$with_mcpu; +else + with_mcpu=$enable_optimizations +fi + + + if test "x$with_mcpu" != "xno"; then : + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -mcpu=$ax_cpu" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking -mcpu=$ax_cpu" >&5 +$as_echo_n "checking -mcpu=$ax_cpu... " >&6; } + + if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main() { return 0;} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -mcpu=$ax_cpu" + if test "xmcpu" != "xmcpu" -a "xmcpu" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|mcpu" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main() { return 0;} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -mcpu=$ax_cpu" + if test "xmcpu" != "xmcpu" -a "xmcpu" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|mcpu" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + + CFLAGS="$SAVE_CFLAGS" +fi + + +fi + + +# +# Architecture tuning +# +if test "x$ax_arch" != "x"; then : + + +# Check whether --with-march was given. +if test "${with_march+set}" = set; then : + withval=$with_march; +else + with_march=$enable_optimizations +fi + + + if test "x$with_march" != "xno"; then : + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -march=$ax_arch" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking -march=$ax_arch" >&5 +$as_echo_n "checking -march=$ax_arch... " >&6; } + + if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main() { return 0;} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -march=$ax_arch" + if test "xmarch" != "xmcpu" -a "xmarch" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|march" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main() { return 0;} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + # TODO: Add CPU UARCH detector and validator in UCX init. + # As for now we will avoid passing this information to + # library. + BASE_CFLAGS="$BASE_CFLAGS -march=$ax_arch" + if test "xmarch" != "xmcpu" -a "xmarch" != "xmarch"; then : + OPT_CFLAGS="$OPT_CFLAGS|march" +fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + + CFLAGS="$SAVE_CFLAGS" +fi + + +fi + + +# +# Check for compiler attribute which disables optimizations per-function. +# + + if ${ucx_cv_attribute_optimize+:} false; then : + $as_echo_n "(cached) " >&6 +else + + SAVE_CFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS" + # + # Try to compile using the C compiler + # + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo (int arg) __attribute__ ((optimize("O0"))); +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ucx_cv_attribute_optimize=1 +else + ucx_cv_attribute_optimize=0 +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS="$SAVE_CFLAGS" + +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __attribute__(optimize)" >&5 +$as_echo_n "checking for __attribute__(optimize)... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ucx_cv_attribute_optimize" >&5 +$as_echo "$ucx_cv_attribute_optimize" >&6; } + +cat >>confdefs.h <<_ACEOF +#define HAVE_ATTRIBUTE_NOOPTIMIZE $ucx_cv_attribute_optimize +_ACEOF + + + + +# +# Compile code with frame pointer. Optimizations usually omit the frame pointer, +# but if we are profiling the code with callgraph we need it. +# This option may affect perofrmance so it is off by default. +# +# Check whether --enable-frame-pointer was given. +if test "${enable_frame_pointer+set}" = set; then : + enableval=$enable_frame_pointer; +else + enable_frame_pointer=no +fi + +if test "x$enable_frame_pointer" = xyes; then : + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag -fno-omit-frame-pointer" >&5 +$as_echo_n "checking compiler flag -fno-omit-frame-pointer... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -fno-omit-frame-pointer" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS -fno-omit-frame-pointer" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + BASE_CFLAGS="$BASE_CFLAGS -fno-omit-frame-pointer" + { $as_echo "$as_me:${as_lineno-$LINENO}: compiling with frame pointer" >&5 +$as_echo "$as_me: compiling with frame pointer" >&6;} +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + { $as_echo "$as_me:${as_lineno-$LINENO}: compiling with frame pointer is not supported" >&5 +$as_echo "$as_me: compiling with frame pointer is not supported" >&6;} +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + +else + : +fi + + +# +# Check for C++11 support +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking c++11 support" >&5 +$as_echo_n "checking c++11 support... " >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +SAVE_CXXFLAGS="$CXXFLAGS" +CXX11FLAGS="-std=c++11" +CXXFLAGS="$CXXFLAGS $CXX11FLAGS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + int main() { + std::to_string(1); + return 0; + } +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + + cxx11_happy=yes +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + cxx11_happy=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +CXXFLAGS="$SAVE_CXXFLAGS" +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + if test "x$cxx11_happy" != xno; then + HAVE_CXX11_TRUE= + HAVE_CXX11_FALSE='#' +else + HAVE_CXX11_TRUE='#' + HAVE_CXX11_FALSE= +fi + + + +# +# Check for GNU++11 support +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking gnu++11 support" >&5 +$as_echo_n "checking gnu++11 support... " >&6; } +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + +SAVE_CXXFLAGS="$CXXFLAGS" +CXX11FLAGS="-std=gnu++11" +CXXFLAGS="$CXXFLAGS $CXX11FLAGS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + int main() { + int a; + typeof(a) b = 0; + std::to_string(1); + return 0; + } +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + + gnuxx11_happy=yes +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + gnuxx11_happy=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +CXXFLAGS="$SAVE_CXXFLAGS" +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + if test "x$gnuxx11_happy" != xno; then + HAVE_GNUXX11_TRUE= + HAVE_GNUXX11_FALSE='#' +else + HAVE_GNUXX11_TRUE='#' + HAVE_GNUXX11_FALSE= +fi + + + +# +# PGI specific switches +# + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag --display_error_number" >&5 +$as_echo_n "checking compiler flag --display_error_number... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS --display_error_number" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS --display_error_number" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + BASE_CFLAGS="$BASE_CFLAGS --display_error_number" + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + +# Suppress incorrect printf format for PGI18 compiler. TODO: remove it after compiler fix + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag --diag_suppress 181" >&5 +$as_echo_n "checking compiler flag --diag_suppress 181... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS --diag_suppress 181" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS --diag_suppress 181" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + BASE_CFLAGS="$BASE_CFLAGS --diag_suppress 181" + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + +# Suppress deprecated API warning for PGI18 compiler + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag --diag_suppress 1215" >&5 +$as_echo_n "checking compiler flag --diag_suppress 1215... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS --diag_suppress 1215" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS --diag_suppress 1215" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + BASE_CFLAGS="$BASE_CFLAGS --diag_suppress 1215" + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + +# Use of a const variable in a constant expression is nonstandard in C + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag --diag_suppress 1901" >&5 +$as_echo_n "checking compiler flag --diag_suppress 1901... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS --diag_suppress 1901" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS --diag_suppress 1901" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + BASE_CFLAGS="$BASE_CFLAGS --diag_suppress 1901" + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + +# Check if "-pedantic" flag is supported + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag -pedantic" >&5 +$as_echo_n "checking compiler flag -pedantic... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -pedantic" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS -pedantic" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + CFLAGS_PEDANTIC="$CFLAGS_PEDANTIC -pedantic" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + +# +# Set C++ optimization/debug flags to be the same as for C +# +BASE_CXXFLAGS="$BASE_CFLAGS" +BASE_CFLAGS=$BASE_CFLAGS + +BASE_CXXFLAGS=$BASE_CXXFLAGS + +CFLAGS_PEDANTIC=$CFLAGS_PEDANTIC + + +# +# Set common preprocessor flags +# +BASE_CPPFLAGS="-DCPU_FLAGS=\"$OPT_CFLAGS\"" +BASE_CPPFLAGS="$BASE_CPPFLAGS -I\${abs_top_srcdir}/src" +BASE_CPPFLAGS="$BASE_CPPFLAGS -I\${abs_top_builddir}" +BASE_CPPFLAGS="$BASE_CPPFLAGS -I\${abs_top_builddir}/src" +BASE_CPPFLAGS=$BASE_CPPFLAGS + + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works +# for constant arguments. Useless! +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5 +$as_echo_n "checking for working alloca.h... " >&6; } +if ${ac_cv_working_alloca_h+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +char *p = (char *) alloca (2 * sizeof (int)); + if (p) return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_working_alloca_h=yes +else + ac_cv_working_alloca_h=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_working_alloca_h" >&5 +$as_echo "$ac_cv_working_alloca_h" >&6; } +if test $ac_cv_working_alloca_h = yes; then + +$as_echo "#define HAVE_ALLOCA_H 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca" >&5 +$as_echo_n "checking for alloca... " >&6; } +if ${ac_cv_func_alloca_works+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __GNUC__ +# define alloca __builtin_alloca +#else +# ifdef _MSC_VER +# include +# define alloca _alloca +# else +# ifdef HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +void *alloca (size_t); +# endif +# endif +# endif +# endif +#endif + +int +main () +{ +char *p = (char *) alloca (1); + if (p) return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_func_alloca_works=yes +else + ac_cv_func_alloca_works=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_alloca_works" >&5 +$as_echo "$ac_cv_func_alloca_works" >&6; } + +if test $ac_cv_func_alloca_works = yes; then + +$as_echo "#define HAVE_ALLOCA 1" >>confdefs.h + +else + # The SVR3 libPW and SVR4 libucb both contain incompatible functions +# that cause trouble. Some versions do not even contain alloca or +# contain a buggy version. If you still want to use their alloca, +# use ar to extract alloca.o from them instead of compiling alloca.c. + +ALLOCA=\${LIBOBJDIR}alloca.$ac_objext + +$as_echo "#define C_ALLOCA 1" >>confdefs.h + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether \`alloca.c' needs Cray hooks" >&5 +$as_echo_n "checking whether \`alloca.c' needs Cray hooks... " >&6; } +if ${ac_cv_os_cray+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if defined CRAY && ! defined CRAY2 +webecray +#else +wenotbecray +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "webecray" >/dev/null 2>&1; then : + ac_cv_os_cray=yes +else + ac_cv_os_cray=no +fi +rm -f conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_os_cray" >&5 +$as_echo "$ac_cv_os_cray" >&6; } +if test $ac_cv_os_cray = yes; then + for ac_func in _getb67 GETB67 getb67; do + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + +cat >>confdefs.h <<_ACEOF +#define CRAY_STACKSEG_END $ac_func +_ACEOF + + break +fi + + done +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking stack direction for C alloca" >&5 +$as_echo_n "checking stack direction for C alloca... " >&6; } +if ${ac_cv_c_stack_direction+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ac_cv_c_stack_direction=0 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +int +find_stack_direction (int *addr, int depth) +{ + int dir, dummy = 0; + if (! addr) + addr = &dummy; + *addr = addr < &dummy ? 1 : addr == &dummy ? 0 : -1; + dir = depth ? find_stack_direction (addr, depth - 1) : 0; + return dir + dummy; +} + +int +main (int argc, char **argv) +{ + return find_stack_direction (0, argc + !argv + 20) < 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_c_stack_direction=1 +else + ac_cv_c_stack_direction=-1 +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_stack_direction" >&5 +$as_echo "$ac_cv_c_stack_direction" >&6; } +cat >>confdefs.h <<_ACEOF +#define STACK_DIRECTION $ac_cv_c_stack_direction +_ACEOF + + +fi + + + +# +# SystemV shared memory +# +#IPC_INFO +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for shm_open in -lrt" >&5 +$as_echo_n "checking for shm_open in -lrt... " >&6; } +if ${ac_cv_lib_rt_shm_open+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lrt $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char shm_open (); +int +main () +{ +return shm_open (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_rt_shm_open=yes +else + ac_cv_lib_rt_shm_open=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_shm_open" >&5 +$as_echo "$ac_cv_lib_rt_shm_open" >&6; } +if test "x$ac_cv_lib_rt_shm_open" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBRT 1 +_ACEOF + + LIBS="-lrt $LIBS" + +else + as_fn_error $? "librt not found" "$LINENO" 5 +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for timer_create in -lrt" >&5 +$as_echo_n "checking for timer_create in -lrt... " >&6; } +if ${ac_cv_lib_rt_timer_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lrt $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char timer_create (); +int +main () +{ +return timer_create (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_rt_timer_create=yes +else + ac_cv_lib_rt_timer_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_timer_create" >&5 +$as_echo "$ac_cv_lib_rt_timer_create" >&6; } +if test "x$ac_cv_lib_rt_timer_create" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBRT 1 +_ACEOF + + LIBS="-lrt $LIBS" + +else + as_fn_error $? "librt not found" "$LINENO" 5 +fi + + + +# +# Extended string functions +# +for ac_header in libgen.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "libgen.h" "ac_cv_header_libgen_h" "$ac_includes_default" +if test "x$ac_cv_header_libgen_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBGEN_H 1 +_ACEOF + +fi + +done + +ac_fn_c_check_decl "$LINENO" "asprintf" "ac_cv_have_decl_asprintf" "#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_LIBGEN_H + #include + #endif + +" +if test "x$ac_cv_have_decl_asprintf" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_ASPRINTF $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + as_fn_error $? "GNU string extensions not found" "$LINENO" 5 +fi +ac_fn_c_check_decl "$LINENO" "basename" "ac_cv_have_decl_basename" "#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_LIBGEN_H + #include + #endif + +" +if test "x$ac_cv_have_decl_basename" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_BASENAME $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + as_fn_error $? "GNU string extensions not found" "$LINENO" 5 +fi +ac_fn_c_check_decl "$LINENO" "fmemopen" "ac_cv_have_decl_fmemopen" "#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_LIBGEN_H + #include + #endif + +" +if test "x$ac_cv_have_decl_fmemopen" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_FMEMOPEN $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + as_fn_error $? "GNU string extensions not found" "$LINENO" 5 +fi + + + +# +# CPU-sets +# +for ac_header in sys/cpuset.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/cpuset.h" "ac_cv_header_sys_cpuset_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_cpuset_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_CPUSET_H 1 +_ACEOF + +fi + +done + +ac_fn_c_check_decl "$LINENO" "CPU_ZERO" "ac_cv_have_decl_CPU_ZERO" "#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_SYS_CPUSET_H + #include + #endif + +" +if test "x$ac_cv_have_decl_CPU_ZERO" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_CPU_ZERO $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + as_fn_error $? "CPU_ZERO/CPU_ISSET not found" "$LINENO" 5 +fi +ac_fn_c_check_decl "$LINENO" "CPU_ISSET" "ac_cv_have_decl_CPU_ISSET" "#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_SYS_CPUSET_H + #include + #endif + +" +if test "x$ac_cv_have_decl_CPU_ISSET" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_CPU_ISSET $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + as_fn_error $? "CPU_ZERO/CPU_ISSET not found" "$LINENO" 5 +fi + +ac_fn_c_check_type "$LINENO" "cpu_set_t" "ac_cv_type_cpu_set_t" "#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_SYS_CPUSET_H + #include + #endif +" +if test "x$ac_cv_type_cpu_set_t" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_CPU_SET_T 1 +_ACEOF + + +fi +ac_fn_c_check_type "$LINENO" "cpuset_t" "ac_cv_type_cpuset_t" "#define _GNU_SOURCE 1 + #include + #include + #ifdef HAVE_SYS_CPUSET_H + #include + #endif +" +if test "x$ac_cv_type_cpuset_t" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_CPUSET_T 1 +_ACEOF + + +fi + + + +# +# Type for sighandler +# +ac_fn_c_check_type "$LINENO" "sighandler_t" "ac_cv_type_sighandler_t" "#define _GNU_SOURCE 1 + #include +" +if test "x$ac_cv_type_sighandler_t" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_SIGHANDLER_T 1 +_ACEOF + + +fi +ac_fn_c_check_type "$LINENO" "__sighandler_t" "ac_cv_type___sighandler_t" "#define _GNU_SOURCE 1 + #include +" +if test "x$ac_cv_type___sighandler_t" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE___SIGHANDLER_T 1 +_ACEOF + + +fi + + + +# +# pthread +# +for ac_header in pthread_np.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "pthread_np.h" "ac_cv_header_pthread_np_h" "$ac_includes_default" +if test "x$ac_cv_header_pthread_np_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_PTHREAD_NP_H 1 +_ACEOF + +fi + +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing pthread_create" >&5 +$as_echo_n "checking for library containing pthread_create... " >&6; } +if ${ac_cv_search_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +for ac_lib in '' pthread; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_pthread_create=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_pthread_create+:} false; then : + break +fi +done +if ${ac_cv_search_pthread_create+:} false; then : + +else + ac_cv_search_pthread_create=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_pthread_create" >&5 +$as_echo "$ac_cv_search_pthread_create" >&6; } +ac_res=$ac_cv_search_pthread_create +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing pthread_atfork" >&5 +$as_echo_n "checking for library containing pthread_atfork... " >&6; } +if ${ac_cv_search_pthread_atfork+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_atfork (); +int +main () +{ +return pthread_atfork (); + ; + return 0; +} +_ACEOF +for ac_lib in '' pthread; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_pthread_atfork=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_pthread_atfork+:} false; then : + break +fi +done +if ${ac_cv_search_pthread_atfork+:} false; then : + +else + ac_cv_search_pthread_atfork=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_pthread_atfork" >&5 +$as_echo "$ac_cv_search_pthread_atfork" >&6; } +ac_res=$ac_cv_search_pthread_atfork +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + + + +# +# Misc. Linux-specific functions +# +for ac_func in clearenv +do : + ac_fn_c_check_func "$LINENO" "clearenv" "ac_cv_func_clearenv" +if test "x$ac_cv_func_clearenv" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_CLEARENV 1 +_ACEOF + +fi +done + +for ac_func in malloc_trim +do : + ac_fn_c_check_func "$LINENO" "malloc_trim" "ac_cv_func_malloc_trim" +if test "x$ac_cv_func_malloc_trim" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_MALLOC_TRIM 1 +_ACEOF + +fi +done + +for ac_func in memalign +do : + ac_fn_c_check_func "$LINENO" "memalign" "ac_cv_func_memalign" +if test "x$ac_cv_func_memalign" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_MEMALIGN 1 +_ACEOF + +fi +done + +for ac_func in posix_memalign +do : + ac_fn_c_check_func "$LINENO" "posix_memalign" "ac_cv_func_posix_memalign" +if test "x$ac_cv_func_posix_memalign" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_POSIX_MEMALIGN 1 +_ACEOF + +fi +done + +for ac_func in mremap +do : + ac_fn_c_check_func "$LINENO" "mremap" "ac_cv_func_mremap" +if test "x$ac_cv_func_mremap" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_MREMAP 1 +_ACEOF + +fi +done + +for ac_func in sched_setaffinity sched_getaffinity +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + +for ac_func in cpuset_setaffinity cpuset_getaffinity +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + + +# +# Route file descriptor signal to specific thread +# +ac_fn_c_check_decl "$LINENO" "F_SETOWN_EX" "ac_cv_have_decl_F_SETOWN_EX" "#define _GNU_SOURCE 1 +#include +" +if test "x$ac_cv_have_decl_F_SETOWN_EX" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_F_SETOWN_EX $ac_have_decl +_ACEOF + + + +# +# Ethtool definitions +# +ac_fn_c_check_decl "$LINENO" "ethtool_cmd_speed" "ac_cv_have_decl_ethtool_cmd_speed" "#include +" +if test "x$ac_cv_have_decl_ethtool_cmd_speed" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_ETHTOOL_CMD_SPEED $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "SPEED_UNKNOWN" "ac_cv_have_decl_SPEED_UNKNOWN" "#include +" +if test "x$ac_cv_have_decl_SPEED_UNKNOWN" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SPEED_UNKNOWN $ac_have_decl +_ACEOF + + + +# +# PowerPC query for TB frequency +# +ac_fn_c_check_decl "$LINENO" "__ppc_get_timebase_freq" "ac_cv_have_decl___ppc_get_timebase_freq" "#include +" +if test "x$ac_cv_have_decl___ppc_get_timebase_freq" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL___PPC_GET_TIMEBASE_FREQ $ac_have_decl +_ACEOF + +for ac_header in sys/platform/ppc.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/platform/ppc.h" "ac_cv_header_sys_platform_ppc_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_platform_ppc_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_PLATFORM_PPC_H 1 +_ACEOF + +fi + +done + + + +# +# Google Testing framework +# + +# Check whether --enable-gtest was given. +if test "${enable_gtest+set}" = set; then : + enableval=$enable_gtest; enable_gtest=$enableval +else + enable_gtest=no +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for using Google C++ Testing Framework" >&5 +$as_echo_n "checking for using Google C++ Testing Framework... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_gtest" >&5 +$as_echo "$enable_gtest" >&6; } + if test "x$enable_gtest" = "xyes"; then + HAVE_GTEST_TRUE= + HAVE_GTEST_FALSE='#' +else + HAVE_GTEST_TRUE='#' + HAVE_GTEST_FALSE= +fi + + + + +# +# Valgrind support +# + +# Check whether --with-valgrind was given. +if test "${with_valgrind+set}" = set; then : + withval=$with_valgrind; +else + with_valgrind=no + +fi + +if test "x$with_valgrind" = xno; then : + +$as_echo "#define NVALGRIND 1" >>confdefs.h + +else + if test ! -d $with_valgrind; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: Valgrind path was not defined, guessing ..." >&5 +$as_echo "$as_me: Valgrind path was not defined, guessing ..." >&6;} + with_valgrind=/usr +else + : +fi + as_ac_Header=`$as_echo "ac_cv_header_$with_valgrind/include/valgrind/memcheck.h" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$with_valgrind/include/valgrind/memcheck.h" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + +else + as_fn_error $? "Valgrind memcheck support requested, but not found, install valgrind-devel rpm." "$LINENO" 5 +fi + + + CPPFLAGS="$CPPFLAGS -I$with_valgrind/include" + + +fi + + +# +# NUMA support +# +# Check whether --enable-numa was given. +if test "${enable_numa+set}" = set; then : + enableval=$enable_numa; + { $as_echo "$as_me:${as_lineno-$LINENO}: NUMA support is disabled" >&5 +$as_echo "$as_me: NUMA support is disabled" >&6;} + +else + + + + for ac_header in numa.h numaif.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +else + as_fn_error $? "NUMA headers not found. Please reconfigure with --disable-numa. Warning: this may have negative impact on library performance. It is better to install libnuma-devel package" "$LINENO" 5 +fi + +done + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mbind in -lnuma" >&5 +$as_echo_n "checking for mbind in -lnuma... " >&6; } +if ${ac_cv_lib_numa_mbind+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lnuma $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mbind (); +int +main () +{ +return mbind (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_numa_mbind=yes +else + ac_cv_lib_numa_mbind=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_numa_mbind" >&5 +$as_echo "$ac_cv_lib_numa_mbind" >&6; } +if test "x$ac_cv_lib_numa_mbind" = xyes; then : + NUMA_LIBS=-lnuma + +else + as_fn_error $? "NUMA library not found. Please reconfigure with --disable-numa. Warning: this may have negative impact on library performance. It is better to install libnuma package" "$LINENO" 5 +fi + + +$as_echo "#define HAVE_NUMA 1" >>confdefs.h + + ac_fn_c_check_type "$LINENO" "struct bitmask" "ac_cv_type_struct_bitmask" "#include +" +if test "x$ac_cv_type_struct_bitmask" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_BITMASK 1 +_ACEOF + + +fi + + + +fi + + + +# +# Malloc hooks +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking malloc hooks" >&5 +$as_echo_n "checking malloc hooks... " >&6; } +SAVE_CFLAGS=$CFLAGS +CFLAGS="$CFLAGS $CFLAGS_NO_DEPRECATED" + + if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + static int rc = 1; + void *ptr; + void *myhook(size_t size, const void *caller) { + rc = 0; + return NULL; + } + int main() { + __malloc_hook = myhook; + ptr = malloc(1); + return rc; + } +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +$as_echo "#define HAVE_MALLOC_HOOK 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: malloc hooks are not supported" >&5 +$as_echo "$as_me: WARNING: malloc hooks are not supported" >&2;} + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + static int rc = 1; + void *ptr; + void *myhook(size_t size, const void *caller) { + rc = 0; + return NULL; + } + int main() { + __malloc_hook = myhook; + ptr = malloc(1); + return rc; + } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +$as_echo "#define HAVE_MALLOC_HOOK 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: malloc hooks are not supported" >&5 +$as_echo "$as_me: WARNING: malloc hooks are not supported" >&2;} + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +CFLAGS=$SAVE_CFLAGS + + +# +# Check for capability.h header (usually comes from libcap-devel package) and +# make sure it defines the types we need +# +for ac_header in sys/capability.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/capability.h" "ac_cv_header_sys_capability_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_capability_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_CAPABILITY_H 1 +_ACEOF + ac_fn_c_check_type "$LINENO" "cap_user_header_t" "ac_cv_type_cap_user_header_t" "#include +" +if test "x$ac_cv_type_cap_user_header_t" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_CAP_USER_HEADER_T 1 +_ACEOF + + +else + +$as_echo "#define HAVE_SYS_CAPABILITY_H 0" >>confdefs.h + +fi +ac_fn_c_check_type "$LINENO" "cap_user_data_t" "ac_cv_type_cap_user_data_t" "#include +" +if test "x$ac_cv_type_cap_user_data_t" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_CAP_USER_DATA_T 1 +_ACEOF + + +else + +$as_echo "#define HAVE_SYS_CAPABILITY_H 0" >>confdefs.h + +fi + + +fi + +done + + +# +# Check for PR_SET_PTRACER +# +ac_fn_c_check_decl "$LINENO" "PR_SET_PTRACER" "ac_cv_have_decl_PR_SET_PTRACER" "#include +" +if test "x$ac_cv_have_decl_PR_SET_PTRACER" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_PR_SET_PTRACER $ac_have_decl +_ACEOF + + + +# +# ipv6 s6_addr32/__u6_addr32 shortcuts for in6_addr +# ip header structure layout name +# +ac_fn_c_check_member "$LINENO" "struct in6_addr" "s6_addr32" "ac_cv_member_struct_in6_addr_s6_addr32" "#include +" +if test "x$ac_cv_member_struct_in6_addr_s6_addr32" = xyes; then : + +$as_echo "#define HAVE_IN6_ADDR_S6_ADDR32 1" >>confdefs.h + +fi + +ac_fn_c_check_member "$LINENO" "struct in6_addr" "__u6_addr.__u6_addr32" "ac_cv_member_struct_in6_addr___u6_addr___u6_addr32" "#include +" +if test "x$ac_cv_member_struct_in6_addr___u6_addr___u6_addr32" = xyes; then : + +$as_echo "#define HAVE_IN6_ADDR_U6_ADDR32 1" >>confdefs.h + +fi + +ac_fn_c_check_member "$LINENO" "struct iphdr" "daddr.s_addr" "ac_cv_member_struct_iphdr_daddr_s_addr" "#include +" +if test "x$ac_cv_member_struct_iphdr_daddr_s_addr" = xyes; then : + +$as_echo "#define HAVE_IPHDR_DADDR 1" >>confdefs.h + +fi + +ac_fn_c_check_member "$LINENO" "struct ip" "ip_dst.s_addr" "ac_cv_member_struct_ip_ip_dst_s_addr" "#include + #include + #include +" +if test "x$ac_cv_member_struct_ip_ip_dst_s_addr" = xyes; then : + +$as_echo "#define HAVE_IP_IP_DST 1" >>confdefs.h + +fi + + + +# +# struct sigevent reporting thread id +# +ac_fn_c_check_member "$LINENO" "struct sigevent" "_sigev_un._tid" "ac_cv_member_struct_sigevent__sigev_un__tid" "#include +" +if test "x$ac_cv_member_struct_sigevent__sigev_un__tid" = xyes; then : + +$as_echo "#define HAVE_SIGEVENT_SIGEV_UN_TID 1" >>confdefs.h + +fi + +ac_fn_c_check_member "$LINENO" "struct sigevent" "sigev_notify_thread_id" "ac_cv_member_struct_sigevent_sigev_notify_thread_id" "#include +" +if test "x$ac_cv_member_struct_sigevent_sigev_notify_thread_id" = xyes; then : + +$as_echo "#define HAVE_SIGEVENT_SIGEV_NOTIFY_THREAD_ID 1" >>confdefs.h + +fi + + + +# +# sa_restorer is something that only Linux has +# +ac_fn_c_check_member "$LINENO" "struct sigaction" "sa_restorer" "ac_cv_member_struct_sigaction_sa_restorer" "#include +" +if test "x$ac_cv_member_struct_sigaction_sa_restorer" = xyes; then : + +$as_echo "#define HAVE_SIGACTION_SA_RESTORER 1" >>confdefs.h + +fi + + + +# +# epoll vs. kqueue +# +for ac_header in sys/epoll.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/epoll.h" "ac_cv_header_sys_epoll_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_epoll_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_EPOLL_H 1 +_ACEOF + +fi + +done + +for ac_header in sys/eventfd.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/eventfd.h" "ac_cv_header_sys_eventfd_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_eventfd_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_EVENTFD_H 1 +_ACEOF + +fi + +done + +for ac_header in sys/event.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/event.h" "ac_cv_header_sys_event_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_event_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_EVENT_H 1 +_ACEOF + +fi + +done + + + +# +# FreeBSD-specific threading functions +# +for ac_header in sys/thr.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/thr.h" "ac_cv_header_sys_thr_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_thr_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_THR_H 1 +_ACEOF + +fi + +done + + + +# +# malloc headers are Linux-specific +# +for ac_header in malloc.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "malloc.h" "ac_cv_header_malloc_h" "$ac_includes_default" +if test "x$ac_cv_header_malloc_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_MALLOC_H 1 +_ACEOF + +fi + +done + +for ac_header in malloc_np.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "malloc_np.h" "ac_cv_header_malloc_np_h" "$ac_includes_default" +if test "x$ac_cv_header_malloc_np_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_MALLOC_NP_H 1 +_ACEOF + +fi + +done + + + +# +# endianess +# +for ac_header in endian.h, sys/endian.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + +# +# Linux-only headers +# +for ac_header in linux/mman.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "linux/mman.h" "ac_cv_header_linux_mman_h" "$ac_includes_default" +if test "x$ac_cv_header_linux_mman_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LINUX_MMAN_H 1 +_ACEOF + +fi + +done + +for ac_header in linux/ip.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "linux/ip.h" "ac_cv_header_linux_ip_h" "$ac_includes_default" +if test "x$ac_cv_header_linux_ip_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LINUX_IP_H 1 +_ACEOF + +fi + +done + +for ac_header in linux/futex.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "linux/futex.h" "ac_cv_header_linux_futex_h" "$ac_includes_default" +if test "x$ac_cv_header_linux_futex_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LINUX_FUTEX_H 1 +_ACEOF + +fi + +done + + + +# +# Networking headers +# +for ac_header in net/ethernet.h +do : + ac_fn_c_check_header_compile "$LINENO" "net/ethernet.h" "ac_cv_header_net_ethernet_h" "#include +" +if test "x$ac_cv_header_net_ethernet_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_NET_ETHERNET_H 1 +_ACEOF + +fi + +done + +for ac_header in netinet/ip.h +do : + ac_fn_c_check_header_compile "$LINENO" "netinet/ip.h" "ac_cv_header_netinet_ip_h" "#include + #include +" +if test "x$ac_cv_header_netinet_ip_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_NETINET_IP_H 1 +_ACEOF + +fi + +done + + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# Copyright (C) ARM, Ltd. 2016. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +# +# Internal profiling support. +# This option may affect perofrmance so it is off by default. +# +# Check whether --enable-profiling was given. +if test "${enable_profiling+set}" = set; then : + enableval=$enable_profiling; +else + enable_profiling=no +fi + + +if test "x$enable_profiling" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: enabling profiling" >&5 +$as_echo "$as_me: enabling profiling" >&6;} + +$as_echo "#define HAVE_PROFILING 1" >>confdefs.h + + HAVE_PROFILING=yes + : + +fi + if test "x$HAVE_PROFILING" = "xyes"; then + HAVE_PROFILING_TRUE= + HAVE_PROFILING_FALSE='#' +else + HAVE_PROFILING_TRUE='#' + HAVE_PROFILING_FALSE= +fi + + + +# +# Detailed backtrace with debug information. +# This option requires binutils-devel package. +# +# Check whether --enable-backtrace-detail was given. +if test "${enable_backtrace_detail+set}" = set; then : + enableval=$enable_backtrace_detail; +else + enable_backtrace_detail=yes +fi + + +if test "x$enable_backtrace_detail" = xyes; then : + + BT=1 + ac_fn_c_check_header_mongrel "$LINENO" "bfd.h" "ac_cv_header_bfd_h" "$ac_includes_default" +if test "x$ac_cv_header_bfd_h" = xyes; then : + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: binutils headers not found" >&5 +$as_echo "$as_me: WARNING: binutils headers not found" >&2;}; BT=0 +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for bfd_openr in -lbfd" >&5 +$as_echo_n "checking for bfd_openr in -lbfd... " >&6; } +if ${ac_cv_lib_bfd_bfd_openr+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lbfd $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char bfd_openr (); +int +main () +{ +return bfd_openr (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_bfd_bfd_openr=yes +else + ac_cv_lib_bfd_bfd_openr=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_bfd_bfd_openr" >&5 +$as_echo "$ac_cv_lib_bfd_bfd_openr" >&6; } +if test "x$ac_cv_lib_bfd_bfd_openr" = xyes; then : + LIBS="$LIBS -lbfd" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: bfd library not found" >&5 +$as_echo "$as_me: WARNING: bfd library not found" >&2;};BT=0 +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +$as_echo_n "checking for dlopen in -ldl... " >&6; } +if ${ac_cv_lib_dl_dlopen+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char dlopen (); +int +main () +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_dl_dlopen=yes +else + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +$as_echo "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes; then : + LIBS="$LIBS -ldl" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: dl library not found" >&5 +$as_echo "$as_me: WARNING: dl library not found" >&2;};BT=0 +fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lintl" >&5 +$as_echo_n "checking for main in -lintl... " >&6; } +if ${ac_cv_lib_intl_main+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lintl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_intl_main=yes +else + ac_cv_lib_intl_main=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_intl_main" >&5 +$as_echo "$ac_cv_lib_intl_main" >&6; } +if test "x$ac_cv_lib_intl_main" = xyes; then : + LIBS="$LIBS -lintl" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: intl library not found" >&5 +$as_echo "$as_me: WARNING: intl library not found" >&2;} +fi + + ac_fn_c_check_type "$LINENO" "struct dl_phdr_info" "ac_cv_type_struct_dl_phdr_info" "#define _GNU_SOURCE 1 + #include +" +if test "x$ac_cv_type_struct_dl_phdr_info" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_DL_PHDR_INFO 1 +_ACEOF + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: struct dl_phdr_info not defined" >&5 +$as_echo "$as_me: WARNING: struct dl_phdr_info not defined" >&2;};BT=0 +fi + + if test "x$BT" = "x1"; then + for ac_func in cplus_demangle +do : + ac_fn_c_check_func "$LINENO" "cplus_demangle" "ac_cv_func_cplus_demangle" +if test "x$ac_cv_func_cplus_demangle" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_CPLUS_DEMANGLE 1 +_ACEOF + +fi +done + + +$as_echo "#define HAVE_DETAILED_BACKTRACE 1" >>confdefs.h + + case ${host} in + aarch64*) CFLAGS="$CFLAGS -funwind-tables" ;; + esac + else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: detailed backtrace is not supported" >&5 +$as_echo "$as_me: WARNING: detailed backtrace is not supported" >&2;} + fi + + +fi + + +# +# Enable statistics and counters +# +# Check whether --enable-stats was given. +if test "${enable_stats+set}" = set; then : + enableval=$enable_stats; +else + enable_stats=no +fi + + +if test "x$enable_stats" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: enabling statistics" >&5 +$as_echo "$as_me: enabling statistics" >&6;} + +$as_echo "#define ENABLE_STATS 1" >>confdefs.h + + HAVE_STATS=yes +else + : + +fi + if test "x$HAVE_STATS" = "xyes"; then + HAVE_STATS_TRUE= + HAVE_STATS_FALSE='#' +else + HAVE_STATS_TRUE='#' + HAVE_STATS_FALSE= +fi + + + +# +# Enable tuning params at runtime +# +# Check whether --enable-tuning was given. +if test "${enable_tuning+set}" = set; then : + enableval=$enable_tuning; +else + enable_tuning=no +fi + + +if test "x$enable_tuning" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: enabling tuning" >&5 +$as_echo "$as_me: enabling tuning" >&6;} + +$as_echo "#define ENABLE_TUNING 1" >>confdefs.h + + HAVE_TUNING=yes +else + : + +fi + if test "x$HAVE_TUNING" = "xyes"; then + HAVE_TUNING_TRUE= + HAVE_TUNING_FALSE='#' +else + HAVE_TUNING_TRUE='#' + HAVE_TUNING_FALSE= +fi + + + +# +# Enable memory tracking +# +# Check whether --enable-memtrack was given. +if test "${enable_memtrack+set}" = set; then : + enableval=$enable_memtrack; +else + enable_memtrack=no +fi + + +if test "x$enable_memtrack" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: enabling memory tracking" >&5 +$as_echo "$as_me: enabling memory tracking" >&6;} + +$as_echo "#define ENABLE_MEMTRACK 1" >>confdefs.h + + HAVE_MEMTRACK=yes +else + : + +fi + if test "x$HAVE_MEMTRACK" = "xyes"; then + HAVE_MEMTRACK_TRUE= + HAVE_MEMTRACK_FALSE='#' +else + HAVE_MEMTRACK_TRUE='#' + HAVE_MEMTRACK_FALSE= +fi + + + +# +# Disable logging levels below INFO +# +# Check whether --enable-logging was given. +if test "${enable_logging+set}" = set; then : + enableval=$enable_logging; +fi + + +if test "x$enable_logging" != xno; then : + +$as_echo "#define UCS_MAX_LOG_LEVEL UCS_LOG_LEVEL_TRACE_POLL" >>confdefs.h + +else + +$as_echo "#define UCS_MAX_LOG_LEVEL UCS_LOG_LEVEL_INFO" >>confdefs.h + + +fi + + +# +# Disable assertions +# +# Check whether --enable-assertions was given. +if test "${enable_assertions+set}" = set; then : + enableval=$enable_assertions; +fi + + +if test "x$enable_assertions" != xno; then : + +$as_echo "#define ENABLE_ASSERT 1" >>confdefs.h + + +fi + +# +# Check if __attribute__((constructor)) works +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking __attribute__((constructor))" >&5 +$as_echo_n "checking __attribute__((constructor))... " >&6; } + + if test "$cross_compiling" = yes; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int rc = 1; + static void constructor_test() __attribute__((constructor)); + static void constructor_test() { rc = 0; } + int main() { return rc; } +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + as_fn_error $? "Cannot continue. Please use compiler that + supports __attribute__((constructor))" "$LINENO" 5 + +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static int rc = 1; + static void constructor_test() __attribute__((constructor)); + static void constructor_test() { rc = 0; } + int main() { return rc; } +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +else + as_fn_error $? "Cannot continue. Please use compiler that + supports __attribute__((constructor))" "$LINENO" 5 + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + + + +# +# Manual configuration of cacheline size +# + +# Check whether --with-cache-line-size was given. +if test "${with_cache_line_size+set}" = set; then : + withval=$with_cache_line_size; +else + with_cache_line_size=no +fi + + +if test "x$with_cache_line_size" != xno; then : + + case ${with_cache_line_size} in + 64) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: The cache line size is set to 64B" >&5 +$as_echo "The cache line size is set to 64B" >&6; } + +$as_echo "#define HAVE_CACHE_LINE_SIZE 64" >>confdefs.h + + ;; + 128) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: The cache line size is set to 128B" >&5 +$as_echo "The cache line size is set to 128B" >&6; } + +$as_echo "#define HAVE_CACHE_LINE_SIZE 128" >>confdefs.h + + ;; + [0-9]*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unusual cache cache line size was specified: $with_cache_line_size" >&5 +$as_echo "$as_me: WARNING: Unusual cache cache line size was specified: $with_cache_line_size" >&2;} + +cat >>confdefs.h <<_ACEOF +#define HAVE_CACHE_LINE_SIZE $with_cache_line_size +_ACEOF + + ;; + *) + as_fn_error $? "Cannot continue. Unsupported cache line size $with_cache_line_size." "$LINENO" 5 + ;; + esac +fi + + +# +# Architecture specific checks +# +case ${host} in + aarch64*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking support for CNTVCT_EL0 on aarch64" >&5 +$as_echo_n "checking support for CNTVCT_EL0 on aarch64... " >&6; } + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +uint64_t tmp; asm volatile("mrs %0, cntvct_el0" : "=r" (tmp)); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +$as_echo "#define HAVE_HW_TIMER 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +$as_echo "#define HAVE_HW_TIMER 0" >>confdefs.h + + +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +;; + *) + # HW timer is supported for all other architectures + +$as_echo "#define HAVE_HW_TIMER 1" >>confdefs.h + +esac + +# +# Enable built-in memcpy +# +# Check whether --enable-builtin-memcpy was given. +if test "${enable_builtin_memcpy+set}" = set; then : + enableval=$enable_builtin_memcpy; +else + enable_builtin_memcpy=yes +fi + + +if test "x$enable_builtin_memcpy" != xno; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: enabling builtin memcpy" >&5 +$as_echo "$as_me: enabling builtin memcpy" >&6;} + +$as_echo "#define ENABLE_BUILTIN_MEMCPY 1" >>confdefs.h + +else + +$as_echo "#define ENABLE_BUILTIN_MEMCPY 0" >>confdefs.h + + +fi + +for ac_func in __clear_cache +do : + ac_fn_c_check_func "$LINENO" "__clear_cache" "ac_cv_func___clear_cache" +if test "x$ac_cv_func___clear_cache" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE___CLEAR_CACHE 1 +_ACEOF + +fi +done + +for ac_func in __aarch64_sync_cache_range +do : + ac_fn_c_check_func "$LINENO" "__aarch64_sync_cache_range" "ac_cv_func___aarch64_sync_cache_range" +if test "x$ac_cv_func___aarch64_sync_cache_range" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE___AARCH64_SYNC_CACHE_RANGE 1 +_ACEOF + +fi +done + + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +# +# Enable overriding library symbols +# +# Check whether --enable-symbol-override was given. +if test "${enable_symbol_override+set}" = set; then : + enableval=$enable_symbol_override; +else + enable_symbol_override=yes +fi + + +if test "x$enable_symbol_override" = xyes; then : + +$as_echo "#define ENABLE_SYMBOL_OVERRIDE 1" >>confdefs.h + + : + +fi + +# +# Memory allocator selection +# + +# Check whether --with-allocator was given. +if test "${with_allocator+set}" = set; then : + withval=$with_allocator; +else + with_allocator=ptmalloc286 +fi + + +case ${with_allocator} in + ptmalloc286) + { $as_echo "$as_me:${as_lineno-$LINENO}: Memory allocator is ptmalloc-2.8.6 version" >&5 +$as_echo "$as_me: Memory allocator is ptmalloc-2.8.6 version" >&6;} + +$as_echo "#define HAVE_UCM_PTMALLOC286 1" >>confdefs.h + + HAVE_UCM_PTMALLOC286=yes + ;; + *) + as_fn_error $? "Cannot continue. Unsupported memory allocator name + in --with-allocator=$with_allocator" "$LINENO" 5 + ;; +esac + + if test "x$HAVE_UCM_PTMALLOC286" = "xyes"; then + HAVE_UCM_PTMALLOC286_TRUE= + HAVE_UCM_PTMALLOC286_FALSE='#' +else + HAVE_UCM_PTMALLOC286_TRUE='#' + HAVE_UCM_PTMALLOC286_FALSE= +fi + + +for ac_func in malloc_get_state malloc_set_state +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + + +# +# Madvise flags +# +ac_fn_c_check_decl "$LINENO" "MADV_FREE" "ac_cv_have_decl_MADV_FREE" "#include +" +if test "x$ac_cv_have_decl_MADV_FREE" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MADV_FREE $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "MADV_REMOVE" "ac_cv_have_decl_MADV_REMOVE" "#include +" +if test "x$ac_cv_have_decl_MADV_REMOVE" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MADV_REMOVE $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "POSIX_MADV_DONTNEED" "ac_cv_have_decl_POSIX_MADV_DONTNEED" "#include +" +if test "x$ac_cv_have_decl_POSIX_MADV_DONTNEED" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_POSIX_MADV_DONTNEED $ac_have_decl +_ACEOF + + + +# BISTRO hooks infrastructure +# +# SYS_xxx macro +# +mmap_hooks_happy=yes +ac_fn_c_check_decl "$LINENO" "SYS_mmap" "ac_cv_have_decl_SYS_mmap" " #include +" +if test "x$ac_cv_have_decl_SYS_mmap" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYS_MMAP $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + mmap_hooks_happy=no +fi +ac_fn_c_check_decl "$LINENO" "SYS_munmap" "ac_cv_have_decl_SYS_munmap" " #include +" +if test "x$ac_cv_have_decl_SYS_munmap" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYS_MUNMAP $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + mmap_hooks_happy=no +fi +ac_fn_c_check_decl "$LINENO" "SYS_mremap" "ac_cv_have_decl_SYS_mremap" " #include +" +if test "x$ac_cv_have_decl_SYS_mremap" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYS_MREMAP $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + mmap_hooks_happy=no +fi +ac_fn_c_check_decl "$LINENO" "SYS_brk" "ac_cv_have_decl_SYS_brk" " #include +" +if test "x$ac_cv_have_decl_SYS_brk" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYS_BRK $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + mmap_hooks_happy=no +fi +ac_fn_c_check_decl "$LINENO" "SYS_madvise" "ac_cv_have_decl_SYS_madvise" " #include +" +if test "x$ac_cv_have_decl_SYS_madvise" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYS_MADVISE $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + mmap_hooks_happy=no +fi + + +shm_hooks_happy=yes +ac_fn_c_check_decl "$LINENO" "SYS_shmat" "ac_cv_have_decl_SYS_shmat" "#include +" +if test "x$ac_cv_have_decl_SYS_shmat" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYS_SHMAT $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + shm_hooks_happy=no +fi +ac_fn_c_check_decl "$LINENO" "SYS_shmdt" "ac_cv_have_decl_SYS_shmdt" "#include +" +if test "x$ac_cv_have_decl_SYS_shmdt" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYS_SHMDT $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + shm_hooks_happy=no +fi + + +ipc_hooks_happy=yes +ac_fn_c_check_decl "$LINENO" "SYS_ipc" "ac_cv_have_decl_SYS_ipc" "#include +" +if test "x$ac_cv_have_decl_SYS_ipc" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_SYS_IPC $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + ipc_hooks_happy=no +fi + + +if test "x$mmap_hooks_happy" = "xyes"; then : + if test "x$ipc_hooks_happy" = "xyes" -o "x$shm_hooks_happy" = "xyes"; then : + bistro_hooks_happy=yes +fi +fi + +if test "x$bistro_hooks_happy" = "xyes"; then : + +$as_echo "#define UCM_BISTRO_HOOKS 1" >>confdefs.h + +else + +$as_echo "#define UCM_BISTRO_HOOKS 0" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Some of required syscalls could not be found" >&5 +$as_echo "$as_me: WARNING: Some of required syscalls could not be found" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: BISTRO mmap hook mode is disabled" >&5 +$as_echo "$as_me: WARNING: BISTRO mmap hook mode is disabled" >&2;} +fi + +for ac_func in __curbrk +do : + ac_fn_c_check_func "$LINENO" "__curbrk" "ac_cv_func___curbrk" +if test "x$ac_cv_func___curbrk" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE___CURBRK 1 +_ACEOF + +fi +done + + +# +# tcmalloc library - for testing only +# +SAVE_LDFLAGS="$LDFLAGS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for tc_malloc in -ltcmalloc" >&5 +$as_echo_n "checking for tc_malloc in -ltcmalloc... " >&6; } +if ${ac_cv_lib_tcmalloc_tc_malloc+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-ltcmalloc $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char tc_malloc (); +int +main () +{ +return tc_malloc (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_tcmalloc_tc_malloc=yes +else + ac_cv_lib_tcmalloc_tc_malloc=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_tcmalloc_tc_malloc" >&5 +$as_echo "$ac_cv_lib_tcmalloc_tc_malloc" >&6; } +if test "x$ac_cv_lib_tcmalloc_tc_malloc" = xyes; then : + have_tcmalloc=yes + TCMALLOC_LIB="-ltcmalloc" +else + have_tcmalloc=no +fi + + if test "x$have_tcmalloc" = "xyes"; then + HAVE_TCMALLOC_TRUE= + HAVE_TCMALLOC_FALSE='#' +else + HAVE_TCMALLOC_TRUE='#' + HAVE_TCMALLOC_FALSE= +fi + + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +# +# Enable compiling tests with MPI +# + +# Check whether --with-mpi was given. +if test "${with_mpi+set}" = set; then : + withval=$with_mpi; : +else + with_mpi=no +fi + + + if test "x$with_mpi" != xyes && test "x$with_mpi" != xno; then : + + if test -d "$with_mpi/bin"; then : + with_mpi="$with_mpi/bin" +else + : +fi + mpi_path=$with_mpi;with_mpi=yes + +else + mpi_path=$PATH +fi + +# +# Search for mpicc and mpirun in the given path. +# +if test "x$with_mpi" = xyes; then : + + + for ac_prog in mpicc mpiicc +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_MPICC+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MPICC in + [\\/]* | ?:[\\/]*) + ac_cv_path_MPICC="$MPICC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $mpi_path +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_MPICC="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +MPICC=$ac_cv_path_MPICC +if test -n "$MPICC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MPICC" >&5 +$as_echo "$MPICC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$MPICC" && break +done +test -n "$MPICC" || MPICC="""" + + + for ac_prog in mpirun mpiexec aprun orterun +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_MPIRUN+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $MPIRUN in + [\\/]* | ?:[\\/]*) + ac_cv_path_MPIRUN="$MPIRUN" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $mpi_path +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_MPIRUN="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +MPIRUN=$ac_cv_path_MPIRUN +if test -n "$MPIRUN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MPIRUN" >&5 +$as_echo "$MPIRUN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$MPIRUN" && break +done +test -n "$MPIRUN" || MPIRUN="""" + + if test -z "$MPIRUN"; then : + as_fn_error $? "--with-mpi was requested but MPI was not found in the PATH in $mpi_path" "$LINENO" 5 +else + : +fi + +else + : +fi + +if test -n "$MPICC"; then : + +$as_echo "#define HAVE_MPI 1" >>confdefs.h + + mpi_enable=enabled +else + mpi_enable=disabled +fi + if test -n "$MPIRUN"; then + HAVE_MPI_TRUE= + HAVE_MPI_FALSE='#' +else + HAVE_MPI_TRUE='#' + HAVE_MPI_FALSE= +fi + + if test -n "$MPICC"; then + HAVE_MPICC_TRUE= + HAVE_MPICC_FALSE='#' +else + HAVE_MPICC_TRUE='#' + HAVE_MPICC_FALSE= +fi + + if test -n "$MPIRUN"; then + HAVE_MPIRUN_TRUE= + HAVE_MPIRUN_FALSE='#' +else + HAVE_MPIRUN_TRUE='#' + HAVE_MPIRUN_FALSE= +fi + + + +# Check whether --with-rte was given. +if test "${with_rte+set}" = set; then : + withval=$with_rte; +else + with_rte=no +fi + + +if test "x$with_rte" != xno; then : + + for ac_header in $with_rte/include/rte.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + rte_happy="yes" +else + rte_happy="no" +fi + +done + + if test "x$rte_happy" = xyes; then : + + RTE_CPPFLAGS="-I$with_rte/include" + + RTE_LDFLAGS="-L$with_rte/lib -lrte" + + +$as_echo "#define HAVE_RTE 1" >>confdefs.h + + +fi +fi + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# +# +# +# Check for Java support +# +# +java_happy="no" + +# Check whether --with-java was given. +if test "${with_java+set}" = set; then : + withval=$with_java; +else + with_java=guess +fi + + +if test "x$with_java" != xno; then : + + # Extract the first word of "mvn", so it can be a program name with args. +set dummy mvn; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_MVNBIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$MVNBIN"; then + ac_cv_prog_MVNBIN="$MVNBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_MVNBIN="yes" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MVNBIN=$ac_cv_prog_MVNBIN +if test -n "$MVNBIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MVNBIN" >&5 +$as_echo "$MVNBIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + # Extract the first word of "java", so it can be a program name with args. +set dummy java; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_JAVABIN+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$JAVABIN"; then + ac_cv_prog_JAVABIN="$JAVABIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_JAVABIN="yes" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +JAVABIN=$ac_cv_prog_JAVABIN +if test -n "$JAVABIN"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JAVABIN" >&5 +$as_echo "$JAVABIN" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + if test "x${MVNBIN}" = "xyes" -a "x${JAVABIN}" = "xyes"; then : + + if test -n "$with_java" -a "x$with_java" != "xyes" -a "x$with_java" != "xguess"; then : + java_dir=$with_java +else + + if test -n "$JAVA_HOME"; then : + +else + + # Extract the first word of "readlink", so it can be a program name with args. +set dummy readlink; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_READLINK+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$READLINK"; then + ac_cv_prog_READLINK="$READLINK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_READLINK="yes" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +READLINK=$ac_cv_prog_READLINK +if test -n "$READLINK"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $READLINK" >&5 +$as_echo "$READLINK" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + if test "x${READLINK}" = xyes; then : + + JAVA=$(readlink -f $(type -P java)) + + JAVA_HOME=${JAVA%*/jre*} + + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Please set JAVA_HOME=$JAVA_HOME" >&5 +$as_echo "$as_me: WARNING: Please set JAVA_HOME=$JAVA_HOME" >&2;} + +else + + if test "x$with_java" = "xguess"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: For Java support please install readlink or set JAVA_HOME=" >&5 +$as_echo "$as_me: WARNING: For Java support please install readlink or set JAVA_HOME=" >&2;} +else + as_fn_error $? "Java support requested, but couldn't find path; please set JAVA_HOME=" "$LINENO" 5 + +fi + + +fi + + +fi + java_dir=$JAVA_HOME + + +fi + save_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="-I$java_dir/include/linux -I$java_dir/include $CPPFLAGS" + for ac_header in jni_md.h jni.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + + java_happy="yes" + +else + + if test "x$with_java" = "xguess"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Couldn't find jni headers." >&5 +$as_echo "$as_me: WARNING: Couldn't find jni headers." >&2;} +else + as_fn_error $? "Java support requested, but couldn't find jni headers in $java_dir" "$LINENO" 5 + +fi + + +fi + +done + + + CPPFLAGS="$save_CPPFLAGS" + +else + + if test "x$with_java" = "xguess"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Disabling Java support - java or mvn not in path." >&5 +$as_echo "$as_me: WARNING: Disabling Java support - java or mvn not in path." >&2;} +else + as_fn_error $? "Java support was explicitly requested, but java or mvn not in path." "$LINENO" 5 + +fi + + +fi + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Java support was explicitly disabled." >&5 +$as_echo "$as_me: WARNING: Java support was explicitly disabled." >&2;} + +fi + +JDK=${java_dir} + + if test "x$java_happy" != "xno"; then + HAVE_JAVA_TRUE= + HAVE_JAVA_FALSE='#' +else + HAVE_JAVA_TRUE='#' + HAVE_JAVA_FALSE= +fi + +#Set MVN according to whether user has Java and Maven or not +if test -z "$HAVE_JAVA_TRUE"; then : + MVN="mvn" + + build_bindings="${build_bindings}:java" + +fi + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + # UCX_CHECK_CUDA + + # +# Copyright (C) Advanced Micro Devices, Inc. 2016 - 2018. ALL RIGHTS RESERVED. +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# ROCM_PARSE_FLAGS(ARG, VAR_LIBS, VAR_LDFLAGS, VAR_CPPFLAGS) +# ---------------------------------------------------------- +# Parse whitespace-separated ARG into appropriate LIBS, LDFLAGS, and +# CPPFLAGS variables. + + +# ROCM_BUILD_FLAGS(ARG, VAR_LIBS, VAR_LDFLAGS, VAR_CPPFLAGS) +# ---------------------------------------------------------- +# Parse value of ARG into appropriate LIBS, LDFLAGS, and +# CPPFLAGS variables. + + +# HIP_BUILD_FLAGS(ARG, VAR_LIBS, VAR_LDFLAGS, VAR_CPPFLAGS) +# ---------------------------------------------------------- +# Parse value of ARG into appropriate LIBS, LDFLAGS, and +# CPPFLAGS variables. + + +# +# Check for ROCm support +# + + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +UCM_MODULE_LDFLAGS="-Xlinker -z -Xlinker interpose -Xlinker --no-as-needed" + + +ucm_modules="" +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + + +if test "x$cuda_checked" != "xyes"; then : + + +# Check whether --with-cuda was given. +if test "${with_cuda+set}" = set; then : + withval=$with_cuda; +else + with_cuda=guess +fi + + + if test "x$with_cuda" = "xno"; then : + cuda_happy=no +else + + save_CPPFLAGS="$CPPFLAGS" + save_LDFLAGS="$LDFLAGS" + + CUDA_CPPFLAGS="" + CUDA_LDFLAGS="" + + if test ! -z "$with_cuda" -a "x$with_cuda" != "xyes" -a "x$with_cuda" != "xguess"; then : + ucx_check_cuda_dir="$with_cuda" + if test -d "$with_cuda/lib64"; then : + libsuff="64" +else + libsuff="" +fi + ucx_check_cuda_libdir="$with_cuda/lib$libsuff" + CUDA_CPPFLAGS="-I$with_cuda/include" + CUDA_LDFLAGS="-L$ucx_check_cuda_libdir -L$ucx_check_cuda_libdir/stubs" +fi + + if test ! -z "$with_cuda_libdir" -a "x$with_cuda_libdir" != "xyes"; then : + ucx_check_cuda_libdir="$with_cuda_libdir" + CUDA_LDFLAGS="-L$ucx_check_cuda_libdir -L$ucx_check_cuda_libdir/stubs" +fi + + CPPFLAGS="$CPPFLAGS $CUDA_CPPFLAGS" + LDFLAGS="$LDFLAGS $CUDA_LDFLAGS" + + # Check cuda header files + for ac_header in cuda.h cuda_runtime.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + cuda_happy="yes" +else + cuda_happy="no" +fi + +done + + + # Check cuda libraries + if test "x$cuda_happy" = "xyes"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cuDeviceGetUuid in -lcuda" >&5 +$as_echo_n "checking for cuDeviceGetUuid in -lcuda... " >&6; } +if ${ac_cv_lib_cuda_cuDeviceGetUuid+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcuda $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char cuDeviceGetUuid (); +int +main () +{ +return cuDeviceGetUuid (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_cuda_cuDeviceGetUuid=yes +else + ac_cv_lib_cuda_cuDeviceGetUuid=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cuda_cuDeviceGetUuid" >&5 +$as_echo "$ac_cv_lib_cuda_cuDeviceGetUuid" >&6; } +if test "x$ac_cv_lib_cuda_cuDeviceGetUuid" = xyes; then : + CUDA_LDFLAGS="$CUDA_LDFLAGS -lcuda" +else + cuda_happy="no" +fi + +fi + if test "x$cuda_happy" = "xyes"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cudaGetDeviceCount in -lcudart" >&5 +$as_echo_n "checking for cudaGetDeviceCount in -lcudart... " >&6; } +if ${ac_cv_lib_cudart_cudaGetDeviceCount+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char cudaGetDeviceCount (); +int +main () +{ +return cudaGetDeviceCount (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_cudart_cudaGetDeviceCount=yes +else + ac_cv_lib_cudart_cudaGetDeviceCount=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_cudaGetDeviceCount" >&5 +$as_echo "$ac_cv_lib_cudart_cudaGetDeviceCount" >&6; } +if test "x$ac_cv_lib_cudart_cudaGetDeviceCount" = xyes; then : + CUDA_LDFLAGS="$CUDA_LDFLAGS -lcudart" +else + cuda_happy="no" +fi + +fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + + if test "x$cuda_happy" = "xyes"; then : + CUDA_CPPFLAGS="$CUDA_CPPFLAGS" + + CUDA_LDFLAGS="$CUDA_LDFLAGS" + + +$as_echo "#define HAVE_CUDA 1" >>confdefs.h + +else + if test "x$with_cuda" != "xguess"; then : + as_fn_error $? "CUDA support is requested but cuda packages cannot be found" "$LINENO" 5 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: CUDA not found" >&5 +$as_echo "$as_me: WARNING: CUDA not found" >&2;} +fi +fi + + +fi # "x$with_cuda" = "xno" + + cuda_checked=yes + if test "x$cuda_happy" != xno; then + HAVE_CUDA_TRUE= + HAVE_CUDA_FALSE='#' +else + HAVE_CUDA_TRUE='#' + HAVE_CUDA_FALSE= +fi + + + +fi # "x$cuda_checked" != "xyes" + + +if test "x$cuda_happy" = "xyes"; then : + ucm_modules="${ucm_modules}:cuda" +fi +ac_config_files="$ac_config_files src/ucm/cuda/Makefile" + + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + + +if test "x$rocm_checked" != "xyes"; then : + + + +# Check whether --with-rocm was given. +if test "${with_rocm+set}" = set; then : + withval=$with_rocm; +else + with_rocm=guess +fi + + +rocm_happy=no +hip_happy=no +if test "x$with_rocm" != "xno"; then : + case "x$with_rocm" in #( + x|xguess|xyes) : + { $as_echo "$as_me:${as_lineno-$LINENO}: ROCm path was not specified. Guessing ..." >&5 +$as_echo "$as_me: ROCm path was not specified. Guessing ..." >&6;} + with_rocm="/opt/rocm" + ROCM_CPPFLAGS="-I$with_rocm/include/hsa -I$with_rocm/include" + ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib" + ROCM_LIBS="-lhsa-runtime64" + ;; #( + x/*) : + { $as_echo "$as_me:${as_lineno-$LINENO}: ROCm path given as $with_rocm ..." >&5 +$as_echo "$as_me: ROCm path given as $with_rocm ..." >&6;} + ROCM_CPPFLAGS="-I$with_rocm/include/hsa -I$with_rocm/include" + ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib" + ROCM_LIBS="-lhsa-runtime64" + ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: ROCm flags given ..." >&5 +$as_echo "$as_me: ROCm flags given ..." >&6;} + for arg in $$with_rocm ; do + case $arg in #( + yes) : + ;; #( + no) : + ;; #( + -l*|*.a|*.so) : + ROCM_LIBS="$ROCM_LIBS $arg" ;; #( + -L*|-WL*|-Wl*) : + ROCM_LDFLAGS="$ROCM_LDFLAGS $arg" ;; #( + -I*) : + ROCM_CPPFLAGS="$ROCM_CPPFLAGS $arg" ;; #( + *lib|*lib/|*lib64|*lib64/) : + if test -d $arg; then : + ROCM_LDFLAGS="$ROCM_LDFLAGS -L$arg" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $arg of $with_rocm not parsed" >&5 +$as_echo "$as_me: WARNING: $arg of $with_rocm not parsed" >&2;} +fi ;; #( + *include|*include/) : + if test -d $arg; then : + ROCM_CPPFLAGS="$ROCM_CPPFLAGS -I$arg" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $arg of $with_rocm not parsed" >&5 +$as_echo "$as_me: WARNING: $arg of $with_rocm not parsed" >&2;} +fi ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $arg of $with_rocm not parsed" >&5 +$as_echo "$as_me: WARNING: $arg of $with_rocm not parsed" >&2;} ;; +esac +done ;; +esac + + SAVE_CPPFLAGS="$CPPFLAGS" + SAVE_LDFLAGS="$LDFLAGS" + SAVE_LIBS="$LIBS" + + CPPFLAGS="$ROCM_CPPFLAGS $CPPFLAGS" + LDFLAGS="$ROCM_LDFLAGS $LDFLAGS" + LIBS="$ROCM_LIBS $LIBS" + + rocm_happy=yes + if test "x$rocm_happy" = xyes; then : + for ac_header in hsa.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "hsa.h" "ac_cv_header_hsa_h" "$ac_includes_default" +if test "x$ac_cv_header_hsa_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_HSA_H 1 +_ACEOF + rocm_happy=yes +else + rocm_happy=no +fi + +done + +fi + if test "x$rocm_happy" = xyes; then : + for ac_header in hsa_ext_amd.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "hsa_ext_amd.h" "ac_cv_header_hsa_ext_amd_h" "$ac_includes_default" +if test "x$ac_cv_header_hsa_ext_amd_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_HSA_EXT_AMD_H 1 +_ACEOF + rocm_happy=yes +else + rocm_happy=no +fi + +done + +fi + if test "x$rocm_happy" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for hsa_init in -lhsa-runtime64" >&5 +$as_echo_n "checking for hsa_init in -lhsa-runtime64... " >&6; } +if ${ac_cv_lib_hsa_runtime64_hsa_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhsa-runtime64 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char hsa_init (); +int +main () +{ +return hsa_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_hsa_runtime64_hsa_init=yes +else + ac_cv_lib_hsa_runtime64_hsa_init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hsa_runtime64_hsa_init" >&5 +$as_echo "$ac_cv_lib_hsa_runtime64_hsa_init" >&6; } +if test "x$ac_cv_lib_hsa_runtime64_hsa_init" = xyes; then : + rocm_happy=yes +else + rocm_happy=no +fi + +fi + + if test "x$rocm_happy" = "xyes"; then : + + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ROCm not found" >&5 +$as_echo "$as_me: WARNING: ROCm not found" >&2;} +fi + + CPPFLAGS="$SAVE_CPPFLAGS" + LDFLAGS="$SAVE_LDFLAGS" + LIBS="$SAVE_LIBS" + + HIP_CPPFLAGS="-D__HIP_PLATFORM_HCC__ -I$with_rocm/include/hip -I$with_rocm/include" + HIP_LDFLAGS="-L$with_rocm/hip/lib -L$with_rocm/lib" + HIP_LIBS="-lhip_hcc" + + + CPPFLAGS="$HIP_CPPFLAGS $CPPFLAGS" + LDFLAGS="$HIP_LDFLAGS $LDFLAGS" + LIBS="$HIP_LIBS $LIBS" + + hip_happy=yes + if test "x$hip_happy" = xyes; then : + for ac_header in hip_runtime.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "hip_runtime.h" "ac_cv_header_hip_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_hip_runtime_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_HIP_RUNTIME_H 1 +_ACEOF + hip_happy=yes +else + hip_happy=no +fi + +done + +fi + if test "x$hip_happy" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for hipFree in -lhip_hcc" >&5 +$as_echo_n "checking for hipFree in -lhip_hcc... " >&6; } +if ${ac_cv_lib_hip_hcc_hipFree+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhip_hcc $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char hipFree (); +int +main () +{ +return hipFree (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_hip_hcc_hipFree=yes +else + ac_cv_lib_hip_hcc_hipFree=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hip_hcc_hipFree" >&5 +$as_echo "$ac_cv_lib_hip_hcc_hipFree" >&6; } +if test "x$ac_cv_lib_hip_hcc_hipFree" = xyes; then : + hip_happy=yes +else + hip_happy=no +fi + +fi + if test "x$hip_happy" = xyes; then : + HIP_CXXFLAGS="--std=gnu++11" +fi + + CPPFLAGS="$SAVE_CPPFLAGS" + LDFLAGS="$SAVE_LDFLAGS" + LIBS="$SAVE_LIBS" + + if test "x$hip_happy" = "xyes"; then : + + + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: HIP Runtime not found" >&5 +$as_echo "$as_me: WARNING: HIP Runtime not found" >&2;} +fi + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ROCm was explicitly disabled" >&5 +$as_echo "$as_me: WARNING: ROCm was explicitly disabled" >&2;} + +fi + +rocm_checked=yes + if test "x$rocm_happy" != xno; then + HAVE_ROCM_TRUE= + HAVE_ROCM_FALSE='#' +else + HAVE_ROCM_TRUE='#' + HAVE_ROCM_FALSE= +fi + + if test "x$hip_happy" != xno; then + HAVE_HIP_TRUE= + HAVE_HIP_FALSE='#' +else + HAVE_HIP_TRUE='#' + HAVE_HIP_FALSE= +fi + + + +fi + + +if test "x$rocm_happy" = "xyes"; then : + ucm_modules="${ucm_modules}:rocm" +fi +ac_config_files="$ac_config_files src/ucm/rocm/Makefile" + + + +cat >>confdefs.h <<_ACEOF +#define ucm_MODULES "${ucm_modules}" +_ACEOF + + +ac_config_files="$ac_config_files src/ucm/Makefile" + + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +uct_modules="" +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + + +if test "x$cuda_checked" != "xyes"; then : + + +# Check whether --with-cuda was given. +if test "${with_cuda+set}" = set; then : + withval=$with_cuda; +else + with_cuda=guess +fi + + + if test "x$with_cuda" = "xno"; then : + cuda_happy=no +else + + save_CPPFLAGS="$CPPFLAGS" + save_LDFLAGS="$LDFLAGS" + + CUDA_CPPFLAGS="" + CUDA_LDFLAGS="" + + if test ! -z "$with_cuda" -a "x$with_cuda" != "xyes" -a "x$with_cuda" != "xguess"; then : + ucx_check_cuda_dir="$with_cuda" + if test -d "$with_cuda/lib64"; then : + libsuff="64" +else + libsuff="" +fi + ucx_check_cuda_libdir="$with_cuda/lib$libsuff" + CUDA_CPPFLAGS="-I$with_cuda/include" + CUDA_LDFLAGS="-L$ucx_check_cuda_libdir -L$ucx_check_cuda_libdir/stubs" +fi + + if test ! -z "$with_cuda_libdir" -a "x$with_cuda_libdir" != "xyes"; then : + ucx_check_cuda_libdir="$with_cuda_libdir" + CUDA_LDFLAGS="-L$ucx_check_cuda_libdir -L$ucx_check_cuda_libdir/stubs" +fi + + CPPFLAGS="$CPPFLAGS $CUDA_CPPFLAGS" + LDFLAGS="$LDFLAGS $CUDA_LDFLAGS" + + # Check cuda header files + for ac_header in cuda.h cuda_runtime.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + cuda_happy="yes" +else + cuda_happy="no" +fi + +done + + + # Check cuda libraries + if test "x$cuda_happy" = "xyes"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cuDeviceGetUuid in -lcuda" >&5 +$as_echo_n "checking for cuDeviceGetUuid in -lcuda... " >&6; } +if ${ac_cv_lib_cuda_cuDeviceGetUuid+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcuda $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char cuDeviceGetUuid (); +int +main () +{ +return cuDeviceGetUuid (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_cuda_cuDeviceGetUuid=yes +else + ac_cv_lib_cuda_cuDeviceGetUuid=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cuda_cuDeviceGetUuid" >&5 +$as_echo "$ac_cv_lib_cuda_cuDeviceGetUuid" >&6; } +if test "x$ac_cv_lib_cuda_cuDeviceGetUuid" = xyes; then : + CUDA_LDFLAGS="$CUDA_LDFLAGS -lcuda" +else + cuda_happy="no" +fi + +fi + if test "x$cuda_happy" = "xyes"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cudaGetDeviceCount in -lcudart" >&5 +$as_echo_n "checking for cudaGetDeviceCount in -lcudart... " >&6; } +if ${ac_cv_lib_cudart_cudaGetDeviceCount+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char cudaGetDeviceCount (); +int +main () +{ +return cudaGetDeviceCount (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_cudart_cudaGetDeviceCount=yes +else + ac_cv_lib_cudart_cudaGetDeviceCount=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_cudaGetDeviceCount" >&5 +$as_echo "$ac_cv_lib_cudart_cudaGetDeviceCount" >&6; } +if test "x$ac_cv_lib_cudart_cudaGetDeviceCount" = xyes; then : + CUDA_LDFLAGS="$CUDA_LDFLAGS -lcudart" +else + cuda_happy="no" +fi + +fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + + if test "x$cuda_happy" = "xyes"; then : + CUDA_CPPFLAGS="$CUDA_CPPFLAGS" + + CUDA_LDFLAGS="$CUDA_LDFLAGS" + + +$as_echo "#define HAVE_CUDA 1" >>confdefs.h + +else + if test "x$with_cuda" != "xguess"; then : + as_fn_error $? "CUDA support is requested but cuda packages cannot be found" "$LINENO" 5 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: CUDA not found" >&5 +$as_echo "$as_me: WARNING: CUDA not found" >&2;} +fi +fi + + +fi # "x$with_cuda" = "xno" + + cuda_checked=yes + if test "x$cuda_happy" != xno; then + HAVE_CUDA_TRUE= + HAVE_CUDA_FALSE='#' +else + HAVE_CUDA_TRUE='#' + HAVE_CUDA_FALSE= +fi + + + +fi # "x$cuda_checked" != "xyes" + + + +if test "x$cuda_happy" = "xyes"; then : + uct_modules="${uct_modules}:cuda" +fi +uct_cuda_modules="" +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + + +if test "x$gdrcopy_checked" != "xyes"; then : + + +gdrcopy_happy="no" + + +# Check whether --with-gdrcopy was given. +if test "${with_gdrcopy+set}" = set; then : + withval=$with_gdrcopy; +else + with_gdrcopy=guess +fi + + +if test "x$with_gdrcopy" != "xno"; then : + save_CPPFLAGS="$CPPFLAGS" + save_CFLAGS="$CFLAGS" + save_LDFLAGS="$LDFLAGS" + + if test ! -z "$with_gdrcopy" -a "x$with_gdrcopy" != "xyes" -a "x$with_gdrcopy" != "xguess"; then : + + ucx_check_gdrcopy_dir="$with_gdrcopy" + if test -d "$with_gdrcopy/lib64"; then : + libsuff="64" +else + libsuff="" +fi + ucx_check_gdrcopy_libdir="$with_gdrcopy/lib$libsuff" + CPPFLAGS="-I$with_gdrcopy/include $save_CPPFLAGS" + LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS" + +fi + if test ! -z "$with_gdrcopy_libdir" -a "x$with_gdrcopy_libdir" != "xyes"; then : + ucx_check_gdrcopy_libdir="$with_gdrcopy_libdir" + LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS" +fi + + for ac_header in gdrapi.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "gdrapi.h" "ac_cv_header_gdrapi_h" "$ac_includes_default" +if test "x$ac_cv_header_gdrapi_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_GDRAPI_H 1 +_ACEOF + as_ac_Lib=`$as_echo "ac_cv_lib_gdrapi ''_gdr_pin_buffer" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for gdr_pin_buffer in -lgdrapi " >&5 +$as_echo_n "checking for gdr_pin_buffer in -lgdrapi ... " >&6; } +if eval \${$as_ac_Lib+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgdrapi $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char gdr_pin_buffer (); +int +main () +{ +return gdr_pin_buffer (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$as_ac_Lib=yes" +else + eval "$as_ac_Lib=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +eval ac_res=\$$as_ac_Lib + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +if eval test \"x\$"$as_ac_Lib"\" = x"yes"; then : + gdrcopy_happy="yes" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: GDR_COPY runtime not detected. Disable." >&5 +$as_echo "$as_me: WARNING: GDR_COPY runtime not detected. Disable." >&2;} + gdrcopy_happy="no" +fi + + +else + gdrcopy_happy="no" +fi + +done + + + if test "x$gdrcopy_happy" = "xyes"; then : + ac_fn_c_check_decl "$LINENO" "gdr_copy_to_mapping" "ac_cv_have_decl_gdr_copy_to_mapping" "#include \"gdrapi.h\" +" +if test "x$ac_cv_have_decl_gdr_copy_to_mapping" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_GDR_COPY_TO_MAPPING $ac_have_decl +_ACEOF + +fi + + CFLAGS="$save_CFLAGS" + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + + if test "x$gdrcopy_happy" = "xyes"; then : + + GDR_COPY_CPPFLAGS="-I$ucx_check_gdrcopy_dir/include/ " + + GDR_COPY_LDFLAGS="-lgdrapi -L$ucx_check_gdrcopy_dir/lib64" + + +else + + if test "x$with_gdrcopy" != "xguess"; then : + as_fn_error $? "gdrcopy support is requested but gdrcopy packages cannot be found" "$LINENO" 5 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: GDR_COPY not found" >&5 +$as_echo "$as_me: WARNING: GDR_COPY not found" >&2;} +fi + +fi + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: GDR_COPY was explicitly disabled" >&5 +$as_echo "$as_me: WARNING: GDR_COPY was explicitly disabled" >&2;} +fi + +gdrcopy_checked=yes + if test "x$gdrcopy_happy" != xno; then + HAVE_GDR_COPY_TRUE= + HAVE_GDR_COPY_FALSE='#' +else + HAVE_GDR_COPY_TRUE='#' + HAVE_GDR_COPY_FALSE= +fi + + + +fi + + + +if test "x$gdrcopy_happy" = "xyes"; then : + uct_cuda_modules="${uct_cuda_modules}:gdrcopy" +fi +ac_config_files="$ac_config_files src/uct/cuda/gdr_copy/Makefile" + + + +cat >>confdefs.h <<_ACEOF +#define uct_cuda_MODULES "${uct_cuda_modules}" +_ACEOF + +ac_config_files="$ac_config_files src/uct/cuda/Makefile" + + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + + +# Check whether --with-verbs was given. +if test "${with_verbs+set}" = set; then : + withval=$with_verbs; +else + with_verbs=/usr +fi + + +if test "x$with_verbs" = "xyes"; then : + with_verbs=/usr +fi +if test -d "$with_verbs"; then : + with_ib=yes; str="with verbs support from $with_verbs" +else + with_ib=no; str="without verbs support" +fi +if test -d "$with_verbs/lib64"; then : + libsuff="64" +else + libsuff="" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: Compiling $str" >&5 +$as_echo "$as_me: Compiling $str" >&6;} + +# +# RC Support +# + +# Check whether --with-rc was given. +if test "${with_rc+set}" = set; then : + withval=$with_rc; +else + with_rc=yes +fi + + + +# +# UD Support +# + +# Check whether --with-ud was given. +if test "${with_ud+set}" = set; then : + withval=$with_ud; +else + with_ud=yes +fi + + + +# +# DC Support +# + +# Check whether --with-dc was given. +if test "${with_dc+set}" = set; then : + withval=$with_dc; +else + with_dc=yes +fi + + + +# +# mlx5 DV support +# + +# Check whether --with-mlx5-dv was given. +if test "${with_mlx5_dv+set}" = set; then : + withval=$with_mlx5_dv; +fi + + +# +# TM (IB Tag Matching) Support +# + +# Check whether --with-ib-hw-tm was given. +if test "${with_ib_hw_tm+set}" = set; then : + withval=$with_ib_hw_tm; +else + with_ib_hw_tm=yes +fi + + + +# +# DM Support +# + +# Check whether --with-dm was given. +if test "${with_dm+set}" = set; then : + withval=$with_dm; +else + with_dm=yes +fi + + +# +# DEVX Support +# + +# Check whether --with-devx was given. +if test "${with_devx+set}" = set; then : + withval=$with_devx; +else + with_devx=check +fi + + +# +# Check basic IB support: User wanted at least one IB transport, and we found +# verbs header file and library. +# +if test "x$with_ib" = "xyes"; then : + + save_LDFLAGS="$LDFLAGS" + save_CFLAGS="$CFLAGS" + save_CPPFLAGS="$CPPFLAGS" + if test "x/usr" = "x$with_verbs"; then : + +else + verbs_incl="-I$with_verbs/include" + verbs_libs="-L$with_verbs/lib$libsuff" +fi + LDFLAGS="$verbs_libs $LDFLAGS" + CFLAGS="$verbs_incl $CFLAGS" + CPPFLAGS="$verbs_incl $CPPFLAGS" + ac_fn_c_check_header_mongrel "$LINENO" "infiniband/verbs.h" "ac_cv_header_infiniband_verbs_h" "$ac_includes_default" +if test "x$ac_cv_header_infiniband_verbs_h" = xyes; then : + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ibverbs header files not found" >&5 +$as_echo "$as_me: WARNING: ibverbs header files not found" >&2;}; with_ib=no +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ibv_get_device_list in -libverbs" >&5 +$as_echo_n "checking for ibv_get_device_list in -libverbs... " >&6; } +if ${ac_cv_lib_ibverbs_ibv_get_device_list+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-libverbs $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char ibv_get_device_list (); +int +main () +{ +return ibv_get_device_list (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_ibverbs_ibv_get_device_list=yes +else + ac_cv_lib_ibverbs_ibv_get_device_list=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ibverbs_ibv_get_device_list" >&5 +$as_echo "$ac_cv_lib_ibverbs_ibv_get_device_list" >&6; } +if test "x$ac_cv_lib_ibverbs_ibv_get_device_list" = xyes; then : + + IBVERBS_LDFLAGS="$verbs_libs -libverbs" + + IBVERBS_DIR="$with_verbs" + + IBVERBS_CPPFLAGS="$verbs_incl" + + IBVERBS_CFLAGS="$verbs_incl" + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: libibverbs not found" >&5 +$as_echo "$as_me: WARNING: libibverbs not found" >&2;}; with_ib=no +fi + + + have_ib_funcs=yes + LDFLAGS="$LDFLAGS $IBVERBS_LDFLAGS" + ac_fn_c_check_decl "$LINENO" "ibv_wc_status_str" "ac_cv_have_decl_ibv_wc_status_str" "#include +" +if test "x$ac_cv_have_decl_ibv_wc_status_str" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_WC_STATUS_STR $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ib_funcs=no +fi +ac_fn_c_check_decl "$LINENO" "ibv_event_type_str" "ac_cv_have_decl_ibv_event_type_str" "#include +" +if test "x$ac_cv_have_decl_ibv_event_type_str" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EVENT_TYPE_STR $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ib_funcs=no +fi +ac_fn_c_check_decl "$LINENO" "ibv_query_gid" "ac_cv_have_decl_ibv_query_gid" "#include +" +if test "x$ac_cv_have_decl_ibv_query_gid" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_QUERY_GID $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ib_funcs=no +fi +ac_fn_c_check_decl "$LINENO" "ibv_get_device_name" "ac_cv_have_decl_ibv_get_device_name" "#include +" +if test "x$ac_cv_have_decl_ibv_get_device_name" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_GET_DEVICE_NAME $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ib_funcs=no +fi +ac_fn_c_check_decl "$LINENO" "ibv_create_srq" "ac_cv_have_decl_ibv_create_srq" "#include +" +if test "x$ac_cv_have_decl_ibv_create_srq" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_CREATE_SRQ $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ib_funcs=no +fi +ac_fn_c_check_decl "$LINENO" "ibv_get_async_event" "ac_cv_have_decl_ibv_get_async_event" "#include +" +if test "x$ac_cv_have_decl_ibv_get_async_event" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_GET_ASYNC_EVENT $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ib_funcs=no +fi + + if test "x$have_ib_funcs" != xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Some IB verbs are not found. Please make sure OFED version is 1.5 or above." >&5 +$as_echo "$as_me: WARNING: Some IB verbs are not found. Please make sure OFED version is 1.5 or above." >&2;} + with_ib=no +fi + + LDFLAGS="$save_LDFLAGS" + CFLAGS="$save_CFLAGS" + CPPFLAGS="$save_CPPFLAGS" + +else + : +fi + +if test "x$with_ib" = "xyes"; then : + + save_LDFLAGS="$LDFLAGS" + save_CFLAGS="$CFLAGS" + save_CPPFLAGS="$CPPFLAGS" + LDFLAGS="$IBVERBS_LDFLAGS $LDFLAGS" + CFLAGS="$IBVERBS_CFLAGS $CFLAGS" + CPPFLAGS="$IBVERBS_CPPFLAGS $CPPFLAGS" + ac_fn_c_check_header_mongrel "$LINENO" "infiniband/verbs_exp.h" "ac_cv_header_infiniband_verbs_exp_h" "$ac_includes_default" +if test "x$ac_cv_header_infiniband_verbs_exp_h" = xyes; then : + +$as_echo "#define HAVE_VERBS_EXP_H 1" >>confdefs.h + + verbs_exp=yes +else + verbs_exp=no +fi + + + + ac_fn_c_check_member "$LINENO" "struct ibv_exp_device_attr" "exp_device_cap_flags" "ac_cv_member_struct_ibv_exp_device_attr_exp_device_cap_flags" "#include +" +if test "x$ac_cv_member_struct_ibv_exp_device_attr_exp_device_cap_flags" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_EXP_DEVICE_CAP_FLAGS 1 +_ACEOF + + +fi +ac_fn_c_check_member "$LINENO" "struct ibv_exp_device_attr" "odp_caps" "ac_cv_member_struct_ibv_exp_device_attr_odp_caps" "#include +" +if test "x$ac_cv_member_struct_ibv_exp_device_attr_odp_caps" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS 1 +_ACEOF + + +fi +ac_fn_c_check_member "$LINENO" "struct ibv_exp_device_attr" "odp_caps.per_transport_caps.dc_odp_caps" "ac_cv_member_struct_ibv_exp_device_attr_odp_caps_per_transport_caps_dc_odp_caps" "#include +" +if test "x$ac_cv_member_struct_ibv_exp_device_attr_odp_caps_per_transport_caps_dc_odp_caps" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS_PER_TRANSPORT_CAPS_DC_ODP_CAPS 1 +_ACEOF + + +fi +ac_fn_c_check_member "$LINENO" "struct ibv_exp_device_attr" "odp_mr_max_size" "ac_cv_member_struct_ibv_exp_device_attr_odp_mr_max_size" "#include +" +if test "x$ac_cv_member_struct_ibv_exp_device_attr_odp_mr_max_size" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_MR_MAX_SIZE 1 +_ACEOF + + +fi +ac_fn_c_check_member "$LINENO" "struct ibv_exp_qp_init_attr" "max_inl_recv" "ac_cv_member_struct_ibv_exp_qp_init_attr_max_inl_recv" "#include +" +if test "x$ac_cv_member_struct_ibv_exp_qp_init_attr_max_inl_recv" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_EXP_QP_INIT_ATTR_MAX_INL_RECV 1 +_ACEOF + + +fi +ac_fn_c_check_member "$LINENO" "struct ibv_async_event" "element.dct" "ac_cv_member_struct_ibv_async_event_element_dct" "#include +" +if test "x$ac_cv_member_struct_ibv_async_event_element_dct" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_ASYNC_EVENT_ELEMENT_DCT 1 +_ACEOF + + +fi + + + ac_fn_c_check_decl "$LINENO" "IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN" "ac_cv_have_decl_IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN" "#include +" +if test "x$ac_cv_have_decl_IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + have_cq_io=yes +fi + + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_CQ_IGNORE_OVERRUN" "ac_cv_have_decl_IBV_EXP_CQ_IGNORE_OVERRUN" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_CQ_IGNORE_OVERRUN" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_CQ_IGNORE_OVERRUN $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + have_cq_io=yes +fi + + + if test "x$with_mlx5_dv" != xno; then : + + { $as_echo "$as_me:${as_lineno-$LINENO}: Checking for legacy bare-metal support" >&5 +$as_echo "$as_me: Checking for legacy bare-metal support" >&6;} + for ac_header in infiniband/mlx5_hw.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "infiniband/mlx5_hw.h" "ac_cv_header_infiniband_mlx5_hw_h" "$ac_includes_default" +if test "x$ac_cv_header_infiniband_mlx5_hw_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_INFINIBAND_MLX5_HW_H 1 +_ACEOF + with_mlx5_hw=yes + mlx5_include=mlx5_hw.h + ac_fn_c_check_decl "$LINENO" "ibv_mlx5_exp_get_qp_info" "ac_cv_have_decl_ibv_mlx5_exp_get_qp_info" "#include +" +if test "x$ac_cv_have_decl_ibv_mlx5_exp_get_qp_info" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_MLX5_EXP_GET_QP_INFO $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_mlx5_exp_get_cq_info" "ac_cv_have_decl_ibv_mlx5_exp_get_cq_info" "#include +" +if test "x$ac_cv_have_decl_ibv_mlx5_exp_get_cq_info" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_MLX5_EXP_GET_CQ_INFO $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_mlx5_exp_get_srq_info" "ac_cv_have_decl_ibv_mlx5_exp_get_srq_info" "#include +" +if test "x$ac_cv_have_decl_ibv_mlx5_exp_get_srq_info" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_MLX5_EXP_GET_SRQ_INFO $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_mlx5_exp_update_cq_ci" "ac_cv_have_decl_ibv_mlx5_exp_update_cq_ci" "#include +" +if test "x$ac_cv_have_decl_ibv_mlx5_exp_update_cq_ci" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_MLX5_EXP_UPDATE_CQ_CI $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "MLX5_WQE_CTRL_SOLICITED" "ac_cv_have_decl_MLX5_WQE_CTRL_SOLICITED" "#include +" +if test "x$ac_cv_have_decl_MLX5_WQE_CTRL_SOLICITED" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MLX5_WQE_CTRL_SOLICITED $ac_have_decl +_ACEOF + + ac_fn_c_check_member "$LINENO" "struct mlx5_srq" "cmd_qp" "ac_cv_member_struct_mlx5_srq_cmd_qp" "#include +" +if test "x$ac_cv_member_struct_mlx5_srq_cmd_qp" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_MLX5_SRQ_CMD_QP 1 +_ACEOF + + +else + with_ib_hw_tm=no +fi + + ac_fn_c_check_member "$LINENO" "struct mlx5_ah" "ibv_ah" "ac_cv_member_struct_mlx5_ah_ibv_ah" "#include +" +if test "x$ac_cv_member_struct_mlx5_ah_ibv_ah" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_MLX5_AH_IBV_AH 1 +_ACEOF + +has_get_av=yes +fi + + ac_fn_c_check_member "$LINENO" "struct ibv_mlx5_qp_info" "bf.need_lock" "ac_cv_member_struct_ibv_mlx5_qp_info_bf_need_lock" "#include +" +if test "x$ac_cv_member_struct_ibv_mlx5_qp_info_bf_need_lock" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_MLX5_QP_INFO_BF_NEED_LOCK 1 +_ACEOF + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Cannot use mlx5 QP because it assumes dedicated BF" >&5 +$as_echo "$as_me: WARNING: Cannot use mlx5 QP because it assumes dedicated BF" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Please upgrade MellanoxOFED to 3.0 or above" >&5 +$as_echo "$as_me: WARNING: Please upgrade MellanoxOFED to 3.0 or above" >&2;} + with_mlx5_hw=no +fi + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_QP_INIT_ATTR_RES_DOMAIN" "ac_cv_have_decl_IBV_EXP_QP_INIT_ATTR_RES_DOMAIN" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_QP_INIT_ATTR_RES_DOMAIN" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_QP_INIT_ATTR_RES_DOMAIN $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +$as_echo "#define HAVE_IBV_EXP_RES_DOMAIN 1" >>confdefs.h + + has_res_domain=yes +fi +ac_fn_c_check_decl "$LINENO" "IBV_EXP_RES_DOMAIN_THREAD_MODEL" "ac_cv_have_decl_IBV_EXP_RES_DOMAIN_THREAD_MODEL" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_RES_DOMAIN_THREAD_MODEL" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_RES_DOMAIN_THREAD_MODEL $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +$as_echo "#define HAVE_IBV_EXP_RES_DOMAIN 1" >>confdefs.h + + has_res_domain=yes +fi +ac_fn_c_check_decl "$LINENO" "ibv_exp_create_res_domain" "ac_cv_have_decl_ibv_exp_create_res_domain" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_create_res_domain" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_CREATE_RES_DOMAIN $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +$as_echo "#define HAVE_IBV_EXP_RES_DOMAIN 1" >>confdefs.h + + has_res_domain=yes +fi +ac_fn_c_check_decl "$LINENO" "ibv_exp_destroy_res_domain" "ac_cv_have_decl_ibv_exp_destroy_res_domain" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_destroy_res_domain" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_DESTROY_RES_DOMAIN $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +$as_echo "#define HAVE_IBV_EXP_RES_DOMAIN 1" >>confdefs.h + + has_res_domain=yes +fi + + +else + with_mlx5_hw=no +fi + +done + + + { $as_echo "$as_me:${as_lineno-$LINENO}: Checking for DV bare-metal support" >&5 +$as_echo "$as_me: Checking for DV bare-metal support" >&6;} + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mlx5dv_query_device in -lmlx5-rdmav2" >&5 +$as_echo_n "checking for mlx5dv_query_device in -lmlx5-rdmav2... " >&6; } +if ${ac_cv_lib_mlx5_rdmav2_mlx5dv_query_device+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmlx5-rdmav2 -libverbs $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mlx5dv_query_device (); +int +main () +{ +return mlx5dv_query_device (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_mlx5_rdmav2_mlx5dv_query_device=yes +else + ac_cv_lib_mlx5_rdmav2_mlx5dv_query_device=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mlx5_rdmav2_mlx5dv_query_device" >&5 +$as_echo "$ac_cv_lib_mlx5_rdmav2_mlx5dv_query_device" >&6; } +if test "x$ac_cv_lib_mlx5_rdmav2_mlx5dv_query_device" = xyes; then : + LIB_MLX5=-lmlx5-rdmav2 + +else + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mlx5dv_query_device in -lmlx5" >&5 +$as_echo_n "checking for mlx5dv_query_device in -lmlx5... " >&6; } +if ${ac_cv_lib_mlx5_mlx5dv_query_device+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmlx5 -libverbs $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mlx5dv_query_device (); +int +main () +{ +return mlx5dv_query_device (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_mlx5_mlx5dv_query_device=yes +else + ac_cv_lib_mlx5_mlx5dv_query_device=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mlx5_mlx5dv_query_device" >&5 +$as_echo "$ac_cv_lib_mlx5_mlx5dv_query_device" >&6; } +if test "x$ac_cv_lib_mlx5_mlx5dv_query_device" = xyes; then : + LIB_MLX5=-lmlx5 + +else + with_mlx5_dv=no +fi + +fi + + + if test "x$with_mlx5_dv" != xno; then : + + for ac_header in infiniband/mlx5dv.h +do : + ac_fn_c_check_header_compile "$LINENO" "infiniband/mlx5dv.h" "ac_cv_header_infiniband_mlx5dv_h" " +" +if test "x$ac_cv_header_infiniband_mlx5dv_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_INFINIBAND_MLX5DV_H 1 +_ACEOF + with_mlx5_hw=yes + with_mlx5_dv=yes + mlx5_include=mlx5dv.h +fi + +done + +fi + + if test "x$with_mlx5_dv" = "xyes" -a "x$have_cq_io" = "xyes" ; then : + + ac_fn_c_check_decl "$LINENO" "mlx5dv_init_obj" "ac_cv_have_decl_mlx5dv_init_obj" "#include +" +if test "x$ac_cv_have_decl_mlx5dv_init_obj" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MLX5DV_INIT_OBJ $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "mlx5dv_create_qp" "ac_cv_have_decl_mlx5dv_create_qp" "#include +" +if test "x$ac_cv_have_decl_mlx5dv_create_qp" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MLX5DV_CREATE_QP $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "mlx5dv_is_supported" "ac_cv_have_decl_mlx5dv_is_supported" "#include +" +if test "x$ac_cv_have_decl_mlx5dv_is_supported" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MLX5DV_IS_SUPPORTED $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE" "ac_cv_have_decl_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE" "#include +" +if test "x$ac_cv_have_decl_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE" "ac_cv_have_decl_MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE" "#include +" +if test "x$ac_cv_have_decl_MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE $ac_have_decl +_ACEOF + + ac_fn_c_check_member "$LINENO" "struct mlx5dv_cq" "cq_uar" "ac_cv_member_struct_mlx5dv_cq_cq_uar" "#include +" +if test "x$ac_cv_member_struct_mlx5dv_cq_cq_uar" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_MLX5DV_CQ_CQ_UAR 1 +_ACEOF + + +fi + + ac_fn_c_check_decl "$LINENO" "MLX5DV_OBJ_AH" "ac_cv_have_decl_MLX5DV_OBJ_AH" "#include +" +if test "x$ac_cv_have_decl_MLX5DV_OBJ_AH" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MLX5DV_OBJ_AH $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + has_get_av=yes +fi + + ac_fn_c_check_decl "$LINENO" "MLX5DV_DCTYPE_DCT" "ac_cv_have_decl_MLX5DV_DCTYPE_DCT" "#include +" +if test "x$ac_cv_have_decl_MLX5DV_DCTYPE_DCT" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_MLX5DV_DCTYPE_DCT $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + have_dc_dv=yes +fi + + ac_fn_c_check_decl "$LINENO" "ibv_alloc_td" "ac_cv_have_decl_ibv_alloc_td" "#include +" +if test "x$ac_cv_have_decl_ibv_alloc_td" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_ALLOC_TD $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + has_res_domain=yes +fi + +fi + + ac_fn_c_check_decl "$LINENO" "ibv_alloc_td" "ac_cv_have_decl_ibv_alloc_td" "#include +" +if test "x$ac_cv_have_decl_ibv_alloc_td" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_ALLOC_TD $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + has_res_domain=yes +fi + +fi + + if test "x$with_devx" != xno; then : + + ac_fn_c_check_decl "$LINENO" "MLX5DV_CONTEXT_FLAGS_DEVX" "ac_cv_have_decl_MLX5DV_CONTEXT_FLAGS_DEVX" "#include +" +if test "x$ac_cv_have_decl_MLX5DV_CONTEXT_FLAGS_DEVX" = xyes; then : + + +$as_echo "#define HAVE_DEVX 1" >>confdefs.h + + have_devx=yes + +else + + if test "x$with_devx" != xcheck; then : + as_fn_error $? "devx requested but not found" "$LINENO" 5 +fi + +fi + +fi + + if test "x$has_res_domain" = "xyes" -a "x$have_cq_io" = "xyes" ; then : + +else + + with_mlx5_hw=no +fi + + if test "x$with_mlx5_hw" = "xyes"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling with mlx5 bare-metal support" >&5 +$as_echo "$as_me: Compiling with mlx5 bare-metal support" >&6;} + +$as_echo "#define HAVE_MLX5_HW 1" >>confdefs.h + + if test "x$has_get_av" = "xyes"; then : + +$as_echo "#define HAVE_MLX5_HW_UD 1" >>confdefs.h + +fi +fi + + ac_fn_c_check_decl "$LINENO" "IBV_LINK_LAYER_INFINIBAND" "ac_cv_have_decl_IBV_LINK_LAYER_INFINIBAND" "#include +" +if test "x$ac_cv_have_decl_IBV_LINK_LAYER_INFINIBAND" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_LINK_LAYER_INFINIBAND $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_LINK_LAYER_ETHERNET" "ac_cv_have_decl_IBV_LINK_LAYER_ETHERNET" "#include +" +if test "x$ac_cv_have_decl_IBV_LINK_LAYER_ETHERNET" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_LINK_LAYER_ETHERNET $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EVENT_GID_CHANGE" "ac_cv_have_decl_IBV_EVENT_GID_CHANGE" "#include +" +if test "x$ac_cv_have_decl_IBV_EVENT_GID_CHANGE" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EVENT_GID_CHANGE $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_create_qp_ex" "ac_cv_have_decl_ibv_create_qp_ex" "#include +" +if test "x$ac_cv_have_decl_ibv_create_qp_ex" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_CREATE_QP_EX $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_create_srq_ex" "ac_cv_have_decl_ibv_create_srq_ex" "#include +" +if test "x$ac_cv_have_decl_ibv_create_srq_ex" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_CREATE_SRQ_EX $ac_have_decl +_ACEOF + + + # We shouldn't confuse upstream ibv_query_device_ex with + # legacy MOFED one, distinguish by arguments number + ac_fn_c_check_decl "$LINENO" "ibv_query_device_ex" "ac_cv_have_decl_ibv_query_device_ex" "#include +" +if test "x$ac_cv_have_decl_ibv_query_device_ex" = xyes; then : + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +ibv_query_device_ex(NULL, NULL, NULL) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +$as_echo "#define HAVE_DECL_IBV_QUERY_DEVICE_EX 1" >>confdefs.h + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi + + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_ACCESS_ALLOCATE_MR" "ac_cv_have_decl_IBV_EXP_ACCESS_ALLOCATE_MR" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_ACCESS_ALLOCATE_MR" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_ACCESS_ALLOCATE_MR $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_ACCESS_ON_DEMAND" "ac_cv_have_decl_IBV_EXP_ACCESS_ON_DEMAND" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_ACCESS_ON_DEMAND" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_ACCESS_ON_DEMAND $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_DEVICE_MR_ALLOCATE" "ac_cv_have_decl_IBV_EXP_DEVICE_MR_ALLOCATE" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_DEVICE_MR_ALLOCATE" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_DEVICE_MR_ALLOCATE $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_WR_NOP" "ac_cv_have_decl_IBV_EXP_WR_NOP" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_WR_NOP" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_WR_NOP $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_DEVICE_DC_TRANSPORT" "ac_cv_have_decl_IBV_EXP_DEVICE_DC_TRANSPORT" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_DEVICE_DC_TRANSPORT" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_DEVICE_DC_TRANSPORT $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_ATOMIC_HCA_REPLY_BE" "ac_cv_have_decl_IBV_EXP_ATOMIC_HCA_REPLY_BE" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_ATOMIC_HCA_REPLY_BE" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_PREFETCH_WRITE_ACCESS" "ac_cv_have_decl_IBV_EXP_PREFETCH_WRITE_ACCESS" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_PREFETCH_WRITE_ACCESS" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_PREFETCH_WRITE_ACCESS $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_QP_OOO_RW_DATA_PLACEMENT" "ac_cv_have_decl_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT" "ac_cv_have_decl_IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_CQ_MODERATION" "ac_cv_have_decl_IBV_EXP_CQ_MODERATION" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_CQ_MODERATION" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_CQ_MODERATION $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS" "ac_cv_have_decl_IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_exp_reg_mr" "ac_cv_have_decl_ibv_exp_reg_mr" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_reg_mr" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_REG_MR $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_exp_create_qp" "ac_cv_have_decl_ibv_exp_create_qp" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_create_qp" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_CREATE_QP $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_exp_prefetch_mr" "ac_cv_have_decl_ibv_exp_prefetch_mr" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_prefetch_mr" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_PREFETCH_MR $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_exp_create_srq" "ac_cv_have_decl_ibv_exp_create_srq" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_create_srq" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_CREATE_SRQ $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_exp_setenv" "ac_cv_have_decl_ibv_exp_setenv" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_setenv" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_SETENV $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_exp_query_gid_attr" "ac_cv_have_decl_ibv_exp_query_gid_attr" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_query_gid_attr" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_QUERY_GID_ATTR $ac_have_decl +_ACEOF +ac_fn_c_check_decl "$LINENO" "ibv_exp_query_device" "ac_cv_have_decl_ibv_exp_query_device" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_query_device" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_QUERY_DEVICE $ac_have_decl +_ACEOF + + + ac_fn_c_check_decl "$LINENO" "ibv_exp_post_send" "ac_cv_have_decl_ibv_exp_post_send" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_post_send" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_POST_SEND $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ext_atomics=no +fi +ac_fn_c_check_decl "$LINENO" "IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP" "ac_cv_have_decl_IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ext_atomics=no +fi +ac_fn_c_check_decl "$LINENO" "IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD" "ac_cv_have_decl_IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ext_atomics=no +fi +ac_fn_c_check_decl "$LINENO" "IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG" "ac_cv_have_decl_IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ext_atomics=no +fi +ac_fn_c_check_decl "$LINENO" "IBV_EXP_SEND_EXT_ATOMIC_INLINE" "ac_cv_have_decl_IBV_EXP_SEND_EXT_ATOMIC_INLINE" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_SEND_EXT_ATOMIC_INLINE" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_SEND_EXT_ATOMIC_INLINE $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +else + have_ext_atomics=no +fi + + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_DEVICE_ATTR_RESERVED_2" "ac_cv_have_decl_IBV_EXP_DEVICE_ATTR_RESERVED_2" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_DEVICE_ATTR_RESERVED_2" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_DEVICE_ATTR_RESERVED_2 $ac_have_decl +_ACEOF + + + # UMR support + ac_fn_c_check_decl "$LINENO" "IBV_EXP_MR_INDIRECT_KLMS" "ac_cv_have_decl_IBV_EXP_MR_INDIRECT_KLMS" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_MR_INDIRECT_KLMS" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_MR_INDIRECT_KLMS $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +$as_echo "#define HAVE_EXP_UMR 1" >>confdefs.h + +fi + + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_QP_CREATE_UMR" "ac_cv_have_decl_IBV_EXP_QP_CREATE_UMR" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_QP_CREATE_UMR" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_QP_CREATE_UMR $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +$as_echo "#define HAVE_IBV_EXP_QP_CREATE_UMR 1" >>confdefs.h + +fi + + + ac_fn_c_check_member "$LINENO" "struct ibv_exp_qp_init_attr" "umr_caps" "ac_cv_member_struct_ibv_exp_qp_init_attr_umr_caps" "#include +" +if test "x$ac_cv_member_struct_ibv_exp_qp_init_attr_umr_caps" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_EXP_QP_INIT_ATTR_UMR_CAPS 1 +_ACEOF + + +$as_echo "#define HAVE_IBV_EXP_QP_CREATE_UMR_CAPS 1" >>confdefs.h + +fi + + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_MR_FIXED_BUFFER_SIZE" "ac_cv_have_decl_IBV_EXP_MR_FIXED_BUFFER_SIZE" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_MR_FIXED_BUFFER_SIZE" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_MR_FIXED_BUFFER_SIZE $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +$as_echo "#define HAVE_EXP_UMR_KSM 1" >>confdefs.h + +fi + + + # Extended atomics + if test "x$have_ext_atomics" != xno; then : + +$as_echo "#define HAVE_IB_EXT_ATOMICS 1" >>confdefs.h + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Compiling without extended atomics support" >&5 +$as_echo "$as_me: WARNING: Compiling without extended atomics support" >&2;} +fi + + # Check for driver which exposes masked atomics endianity per size + ac_fn_c_check_member "$LINENO" "struct ibv_exp_masked_atomic_params" "masked_log_atomic_arg_sizes_network_endianness" "ac_cv_member_struct_ibv_exp_masked_atomic_params_masked_log_atomic_arg_sizes_network_endianness" "#include +" +if test "x$ac_cv_member_struct_ibv_exp_masked_atomic_params_masked_log_atomic_arg_sizes_network_endianness" = xyes; then : + +$as_echo "#define HAVE_MASKED_ATOMICS_ENDIANNESS 1" >>confdefs.h + +fi + + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_ODP_SUPPORT_IMPLICIT" "ac_cv_have_decl_IBV_EXP_ODP_SUPPORT_IMPLICIT" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_ODP_SUPPORT_IMPLICIT" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_ODP_SUPPORT_IMPLICIT $ac_have_decl +_ACEOF + + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_ACCESS_ON_DEMAND" "ac_cv_have_decl_IBV_EXP_ACCESS_ON_DEMAND" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_ACCESS_ON_DEMAND" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_ACCESS_ON_DEMAND $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + with_odp=yes +fi + + + ac_fn_c_check_decl "$LINENO" "IBV_ACCESS_ON_DEMAND" "ac_cv_have_decl_IBV_ACCESS_ON_DEMAND" "#include +" +if test "x$ac_cv_have_decl_IBV_ACCESS_ON_DEMAND" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_ACCESS_ON_DEMAND $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + with_odp=yes +fi + + + if test "x$with_odp" = "xyes" ; then : + + +$as_echo "#define HAVE_ODP 1" >>confdefs.h + + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_ODP_SUPPORT_IMPLICIT" "ac_cv_have_decl_IBV_EXP_ODP_SUPPORT_IMPLICIT" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_ODP_SUPPORT_IMPLICIT" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_ODP_SUPPORT_IMPLICIT $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + with_odp_i=yes +fi + + + ac_fn_c_check_decl "$LINENO" "IBV_ODP_SUPPORT_IMPLICIT" "ac_cv_have_decl_IBV_ODP_SUPPORT_IMPLICIT" "#include +" +if test "x$ac_cv_have_decl_IBV_ODP_SUPPORT_IMPLICIT" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_ODP_SUPPORT_IMPLICIT $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + with_odp_i=yes +fi + + + if test "x$with_odp_i" = "xyes" ; then : + + +$as_echo "#define HAVE_ODP_IMPLICIT 1" >>confdefs.h + +fi +fi + + ac_fn_c_check_decl "$LINENO" "ibv_exp_prefetch_mr" "ac_cv_have_decl_ibv_exp_prefetch_mr" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_prefetch_mr" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_PREFETCH_MR $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + with_prefetch=yes +fi + + + ac_fn_c_check_decl "$LINENO" "ibv_advise_mr" "ac_cv_have_decl_ibv_advise_mr" "#include +" +if test "x$ac_cv_have_decl_ibv_advise_mr" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_ADVISE_MR $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + with_prefetch=yes +fi + + + if test "x$with_prefetch" = "xyes" ; then : + + +$as_echo "#define HAVE_PREFETCH 1" >>confdefs.h + +fi + + ac_fn_c_check_member "$LINENO" "struct mlx5_wqe_av" "base" "ac_cv_member_struct_mlx5_wqe_av_base" "#include +" +if test "x$ac_cv_member_struct_mlx5_wqe_av_base" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_MLX5_WQE_AV_BASE 1 +_ACEOF + + +fi +ac_fn_c_check_member "$LINENO" "struct mlx5_grh_av" "rmac" "ac_cv_member_struct_mlx5_grh_av_rmac" "#include +" +if test "x$ac_cv_member_struct_mlx5_grh_av_rmac" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_MLX5_GRH_AV_RMAC 1 +_ACEOF + + +fi + + + ac_fn_c_check_member "$LINENO" "struct mlx5_cqe64" "ib_stride_index" "ac_cv_member_struct_mlx5_cqe64_ib_stride_index" "#include +" +if test "x$ac_cv_member_struct_mlx5_cqe64_ib_stride_index" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_MLX5_CQE64_IB_STRIDE_INDEX 1 +_ACEOF + + +fi + + + +$as_echo "#define HAVE_IB 1" >>confdefs.h + + + ac_fn_c_check_decl "$LINENO" "IBV_EXP_QPT_DC_INI" "ac_cv_have_decl_IBV_EXP_QPT_DC_INI" "#include +" +if test "x$ac_cv_have_decl_IBV_EXP_QPT_DC_INI" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_QPT_DC_INI $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + have_dc_exp=yes +fi + + + if test "x$with_dc" != xno -a \( "x$have_dc_exp" = xyes -o "x$have_dc_dv" = xyes \) -a "x$with_mlx5_hw" = "xyes"; then : + + +$as_echo "#define HAVE_TL_DC 1" >>confdefs.h + + if test -n "$have_dc_dv"; then : + +$as_echo "#define HAVE_DC_DV 1" >>confdefs.h + +else + + if test -n "$have_dc_exp"; then : + +$as_echo "#define HAVE_DC_EXP 1" >>confdefs.h + +fi +fi +else + with_dc=no +fi + + if test "x$with_rc" != xno; then : + +$as_echo "#define HAVE_TL_RC 1" >>confdefs.h + +fi + + if test "x$with_ud" != xno; then : + +$as_echo "#define HAVE_TL_UD 1" >>confdefs.h + +fi + + # XRQ with Tag Matching support + if test "x$with_ib_hw_tm" != xno; then : + for ac_header in infiniband/tm_types.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "infiniband/tm_types.h" "ac_cv_header_infiniband_tm_types_h" "$ac_includes_default" +if test "x$ac_cv_header_infiniband_tm_types_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_INFINIBAND_TM_TYPES_H 1 +_ACEOF + +fi + +done + + ac_fn_c_check_member "$LINENO" "struct ibv_exp_tmh" "tag" "ac_cv_member_struct_ibv_exp_tmh_tag" "#include +" +if test "x$ac_cv_member_struct_ibv_exp_tmh_tag" = xyes; then : + with_ib_hw_tm=exp +fi + + ac_fn_c_check_member "$LINENO" "struct ibv_tmh" "tag" "ac_cv_member_struct_ibv_tmh_tag" "#include +" +if test "x$ac_cv_member_struct_ibv_tmh_tag" = xyes; then : + with_ib_hw_tm=upstream +fi + + +fi + if test "x$with_ib_hw_tm" = xexp; then : + ac_fn_c_check_member "$LINENO" "struct ibv_exp_create_srq_attr" "dc_offload_params" "ac_cv_member_struct_ibv_exp_create_srq_attr_dc_offload_params" "#include +" +if test "x$ac_cv_member_struct_ibv_exp_create_srq_attr_dc_offload_params" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_EXP_CREATE_SRQ_ATTR_DC_OFFLOAD_PARAMS 1 +_ACEOF + + + +$as_echo "#define IBV_HW_TM 1" >>confdefs.h + + +fi + + +fi + if test "x$with_ib_hw_tm" = xupstream; then : + +$as_echo "#define IBV_HW_TM 1" >>confdefs.h + + ac_fn_c_check_member "$LINENO" "struct ibv_tm_caps" "flags" "ac_cv_member_struct_ibv_tm_caps_flags" "#include +" +if test "x$ac_cv_member_struct_ibv_tm_caps_flags" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE_STRUCT_IBV_TM_CAPS_FLAGS 1 +_ACEOF + + +fi + +fi + + # Device Memory support + if test "x$with_dm" != xno; then : + + ac_fn_c_check_decl "$LINENO" "ibv_exp_alloc_dm" "ac_cv_have_decl_ibv_exp_alloc_dm" "#include +" +if test "x$ac_cv_have_decl_ibv_exp_alloc_dm" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_EXP_ALLOC_DM $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +$as_echo "#define HAVE_IBV_DM 1" >>confdefs.h + + +$as_echo "#define HAVE_IBV_EXP_DM 1" >>confdefs.h + +fi + + ac_fn_c_check_decl "$LINENO" "ibv_alloc_dm" "ac_cv_have_decl_ibv_alloc_dm" "#include +" +if test "x$ac_cv_have_decl_ibv_alloc_dm" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_ALLOC_DM $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + +$as_echo "#define HAVE_IBV_DM 1" >>confdefs.h + +fi + +fi + + ac_fn_c_check_decl "$LINENO" "ibv_cmd_modify_qp" "ac_cv_have_decl_ibv_cmd_modify_qp" "#include +" +if test "x$ac_cv_have_decl_ibv_cmd_modify_qp" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_IBV_CMD_MODIFY_QP $ac_have_decl +_ACEOF + + + mlnx_valg_libdir=$with_verbs/lib${libsuff}/mlnx_ofed/valgrind + { $as_echo "$as_me:${as_lineno-$LINENO}: Checking OFED valgrind libs $mlnx_valg_libdir" >&5 +$as_echo "$as_me: Checking OFED valgrind libs $mlnx_valg_libdir" >&6;} + + if test -d "$mlnx_valg_libdir"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: Added $mlnx_valg_libdir to valgrind LD_LIBRARY_PATH" >&5 +$as_echo "$as_me: Added $mlnx_valg_libdir to valgrind LD_LIBRARY_PATH" >&6;} + valgrind_libpath="$mlnx_valg_libdir:$valgrind_libpath" +fi + LDFLAGS="$save_LDFLAGS" + CFLAGS="$save_CFLAGS" + CPPFLAGS="$save_CPPFLAGS" + + uct_modules="${uct_modules}:ib" + +else + + with_dc=no + with_rc=no + with_ud=no + with_mlx5_hw=no + with_mlx5_dv=no + +fi + +# +# For automake +# + if test "x$with_ib" != xno; then + HAVE_IB_TRUE= + HAVE_IB_FALSE='#' +else + HAVE_IB_TRUE='#' + HAVE_IB_FALSE= +fi + + if test "x$with_rc" != xno; then + HAVE_TL_RC_TRUE= + HAVE_TL_RC_FALSE='#' +else + HAVE_TL_RC_TRUE='#' + HAVE_TL_RC_FALSE= +fi + + if test "x$with_dc" != xno; then + HAVE_TL_DC_TRUE= + HAVE_TL_DC_FALSE='#' +else + HAVE_TL_DC_TRUE='#' + HAVE_TL_DC_FALSE= +fi + + if test -n "$have_dc_dv"; then + HAVE_DC_DV_TRUE= + HAVE_DC_DV_FALSE='#' +else + HAVE_DC_DV_TRUE='#' + HAVE_DC_DV_FALSE= +fi + + if test -n "$have_dc_exp"; then + HAVE_DC_EXP_TRUE= + HAVE_DC_EXP_FALSE='#' +else + HAVE_DC_EXP_TRUE='#' + HAVE_DC_EXP_FALSE= +fi + + if test "x$with_ud" != xno; then + HAVE_TL_UD_TRUE= + HAVE_TL_UD_FALSE='#' +else + HAVE_TL_UD_TRUE='#' + HAVE_TL_UD_FALSE= +fi + + if test "x$with_mlx5_hw" != xno; then + HAVE_MLX5_HW_TRUE= + HAVE_MLX5_HW_FALSE='#' +else + HAVE_MLX5_HW_TRUE='#' + HAVE_MLX5_HW_FALSE= +fi + + if test "x$with_mlx5_dv" = xyes; then + HAVE_MLX5_DV_TRUE= + HAVE_MLX5_DV_FALSE='#' +else + HAVE_MLX5_DV_TRUE='#' + HAVE_MLX5_DV_FALSE= +fi + + if test -n "$have_devx"; then + HAVE_DEVX_TRUE= + HAVE_DEVX_FALSE='#' +else + HAVE_DEVX_TRUE='#' + HAVE_DEVX_FALSE= +fi + + if test "x$verbs_exp" != xno; then + HAVE_EXP_TRUE= + HAVE_EXP_FALSE='#' +else + HAVE_EXP_TRUE='#' + HAVE_EXP_FALSE= +fi + + if test "x$with_mlx5_hw" != xno -a "x$has_get_av" != xno; then + HAVE_MLX5_HW_UD_TRUE= + HAVE_MLX5_HW_UD_FALSE='#' +else + HAVE_MLX5_HW_UD_TRUE='#' + HAVE_MLX5_HW_UD_FALSE= +fi + + +uct_ib_modules="" +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# CM (IB connection manager) Support +# +cm_happy="no" + + +# Check whether --with-cm was given. +if test "${with_cm+set}" = set; then : + withval=$with_cm; +else + with_cm=guess +fi + + +if test "x$with_cm" != xno; then : + save_LIBS="$LIBS" + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ib_cm_send_req in -libcm" >&5 +$as_echo_n "checking for ib_cm_send_req in -libcm... " >&6; } +if ${ac_cv_lib_ibcm_ib_cm_send_req+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-libcm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char ib_cm_send_req (); +int +main () +{ +return ib_cm_send_req (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_ibcm_ib_cm_send_req=yes +else + ac_cv_lib_ibcm_ib_cm_send_req=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ibcm_ib_cm_send_req" >&5 +$as_echo "$ac_cv_lib_ibcm_ib_cm_send_req" >&6; } +if test "x$ac_cv_lib_ibcm_ib_cm_send_req" = xyes; then : + IBCM_LIBS=-libcm + + uct_ib_modules="${uct_ib_modules}:cm" + cm_happy="yes" +else + if test "x$with_cm" = xyes; then : + as_fn_error $? "CM requested but lib ibcm not found" "$LINENO" 5 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: CM support not found, skipping" >&5 +$as_echo "$as_me: WARNING: CM support not found, skipping" >&2;} + +fi + +fi + + LIBS="$save_LIBS" +fi + + if test "x$cm_happy" != xno; then + HAVE_TL_CM_TRUE= + HAVE_TL_CM_FALSE='#' +else + HAVE_TL_CM_TRUE='#' + HAVE_TL_CM_FALSE= +fi + +ac_config_files="$ac_config_files src/uct/ib/cm/Makefile" + + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# Check for RDMACM support +# +rdmacm_happy="no" +rdmacm_qp_less_happy="no" + +# Check whether --with-rdmacm was given. +if test "${with_rdmacm+set}" = set; then : + withval=$with_rdmacm; +else + with_rdmacm=guess +fi + + +if test "x$with_rdmacm" != xno; then : + if test "x$with_rdmacm" = xguess -o "x$with_rdmacm" = xyes -o "x$with_rdmacm" = x; then : + ucx_check_rdmacm_dir=/usr +else + ucx_check_rdmacm_dir=$with_rdmacm +fi + + if test -d "$ucx_check_rdmacm_dir/lib64"; then : + libsuff="64" +else + libsuff="" +fi + save_LDFLAGS="$LDFLAGS" + save_CPPFLAGS="$CPPFLAGS" + + if test "$ucx_check_rdmacm_dir" != /usr; then : + + LDFLAGS="-L$ucx_check_rdmacm_dir/lib$libsuff $LDFLAGS" + CPPFLAGS="-I$ucx_check_rdmacm_dir/include $CPPFLAGS" +fi + + as_ac_Header=`$as_echo "ac_cv_header_$ucx_check_rdmacm_dir/include/rdma/rdma_cma.h" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ucx_check_rdmacm_dir/include/rdma/rdma_cma.h" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for rdma_create_id in -lrdmacm" >&5 +$as_echo_n "checking for rdma_create_id in -lrdmacm... " >&6; } +if ${ac_cv_lib_rdmacm_rdma_create_id+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lrdmacm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char rdma_create_id (); +int +main () +{ +return rdma_create_id (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_rdmacm_rdma_create_id=yes +else + ac_cv_lib_rdmacm_rdma_create_id=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rdmacm_rdma_create_id" >&5 +$as_echo "$ac_cv_lib_rdmacm_rdma_create_id" >&6; } +if test "x$ac_cv_lib_rdmacm_rdma_create_id" = xyes; then : + uct_modules="${uct_modules}:rdmacm" + rdmacm_happy="yes" + if test "$ucx_check_rdmacm_dir" != /usr; then : + + RDMACM_CPPFLAGS="-I$ucx_check_rdmacm_dir/include" + + RDMACM_LDFLAGS="-L$ucx_check_rdmacm_dir/lib$libsuff" + +fi + RDMACM_LIBS=-lrdmacm + + # QP less support + ac_fn_c_check_decl "$LINENO" "rdma_establish" "ac_cv_have_decl_rdma_establish" "#include <$ucx_check_rdmacm_dir/include/rdma/rdma_cma.h> +" +if test "x$ac_cv_have_decl_rdma_establish" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_RDMA_ESTABLISH $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + rdmacm_qp_less_happy="yes" + +$as_echo "#define HAVE_RDMACM_QP_LESS 1" >>confdefs.h + +fi +ac_fn_c_check_decl "$LINENO" "rdma_init_qp_attr" "ac_cv_have_decl_rdma_init_qp_attr" "#include <$ucx_check_rdmacm_dir/include/rdma/rdma_cma.h> +" +if test "x$ac_cv_have_decl_rdma_init_qp_attr" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_RDMA_INIT_QP_ATTR $ac_have_decl +_ACEOF +if test $ac_have_decl = 1; then : + rdmacm_qp_less_happy="yes" + +$as_echo "#define HAVE_RDMACM_QP_LESS 1" >>confdefs.h + +fi + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: RDMACM requested but librdmacm is not found" >&5 +$as_echo "$as_me: WARNING: RDMACM requested but librdmacm is not found" >&2;} + as_fn_error $? "Please install librdmacm and librdmacm-devel or disable rdmacm support" "$LINENO" 5 + +fi + + +else + + if test "x$with_rdmacm" != xguess; then : + as_fn_error $? "RDMACM requested but required file (rdma/rdma_cma.h) could not be found in $ucx_check_rdmacm_dir" "$LINENO" 5 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: RDMACM requested but required file (rdma/rdma_cma.h) could not be found in $ucx_check_rdmacm_dir" >&5 +$as_echo "$as_me: WARNING: RDMACM requested but required file (rdma/rdma_cma.h) could not be found in $ucx_check_rdmacm_dir" >&2;} +fi + +fi + + + + LDFLAGS="$save_LDFLAGS" + CPPFLAGS="$save_CPPFLAGS" + + +fi + + if test "x$rdmacm_happy" != xno; then + HAVE_RDMACM_TRUE= + HAVE_RDMACM_FALSE='#' +else + HAVE_RDMACM_TRUE='#' + HAVE_RDMACM_FALSE= +fi + + if test "x$rdmacm_qp_less_happy" != xno; then + HAVE_RDMACM_QP_LESS_TRUE= + HAVE_RDMACM_QP_LESS_FALSE='#' +else + HAVE_RDMACM_QP_LESS_TRUE='#' + HAVE_RDMACM_QP_LESS_FALSE= +fi + +ac_config_files="$ac_config_files src/uct/ib/rdmacm/Makefile" + + + +cat >>confdefs.h <<_ACEOF +#define uct_ib_MODULES "${uct_ib_modules}" +_ACEOF + +ac_config_files="$ac_config_files src/uct/ib/Makefile" + + +# +# Copyright (C) Advanced Micro Devices, Inc. 2016 - 2018. ALL RIGHTS RESERVED. +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + + +if test "x$rocm_checked" != "xyes"; then : + + + +# Check whether --with-rocm was given. +if test "${with_rocm+set}" = set; then : + withval=$with_rocm; +else + with_rocm=guess +fi + + +rocm_happy=no +hip_happy=no +if test "x$with_rocm" != "xno"; then : + case "x$with_rocm" in #( + x|xguess|xyes) : + { $as_echo "$as_me:${as_lineno-$LINENO}: ROCm path was not specified. Guessing ..." >&5 +$as_echo "$as_me: ROCm path was not specified. Guessing ..." >&6;} + with_rocm="/opt/rocm" + ROCM_CPPFLAGS="-I$with_rocm/include/hsa -I$with_rocm/include" + ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib" + ROCM_LIBS="-lhsa-runtime64" + ;; #( + x/*) : + { $as_echo "$as_me:${as_lineno-$LINENO}: ROCm path given as $with_rocm ..." >&5 +$as_echo "$as_me: ROCm path given as $with_rocm ..." >&6;} + ROCM_CPPFLAGS="-I$with_rocm/include/hsa -I$with_rocm/include" + ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib" + ROCM_LIBS="-lhsa-runtime64" + ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: ROCm flags given ..." >&5 +$as_echo "$as_me: ROCm flags given ..." >&6;} + for arg in $$with_rocm ; do + case $arg in #( + yes) : + ;; #( + no) : + ;; #( + -l*|*.a|*.so) : + ROCM_LIBS="$ROCM_LIBS $arg" ;; #( + -L*|-WL*|-Wl*) : + ROCM_LDFLAGS="$ROCM_LDFLAGS $arg" ;; #( + -I*) : + ROCM_CPPFLAGS="$ROCM_CPPFLAGS $arg" ;; #( + *lib|*lib/|*lib64|*lib64/) : + if test -d $arg; then : + ROCM_LDFLAGS="$ROCM_LDFLAGS -L$arg" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $arg of $with_rocm not parsed" >&5 +$as_echo "$as_me: WARNING: $arg of $with_rocm not parsed" >&2;} +fi ;; #( + *include|*include/) : + if test -d $arg; then : + ROCM_CPPFLAGS="$ROCM_CPPFLAGS -I$arg" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $arg of $with_rocm not parsed" >&5 +$as_echo "$as_me: WARNING: $arg of $with_rocm not parsed" >&2;} +fi ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $arg of $with_rocm not parsed" >&5 +$as_echo "$as_me: WARNING: $arg of $with_rocm not parsed" >&2;} ;; +esac +done ;; +esac + + SAVE_CPPFLAGS="$CPPFLAGS" + SAVE_LDFLAGS="$LDFLAGS" + SAVE_LIBS="$LIBS" + + CPPFLAGS="$ROCM_CPPFLAGS $CPPFLAGS" + LDFLAGS="$ROCM_LDFLAGS $LDFLAGS" + LIBS="$ROCM_LIBS $LIBS" + + rocm_happy=yes + if test "x$rocm_happy" = xyes; then : + for ac_header in hsa.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "hsa.h" "ac_cv_header_hsa_h" "$ac_includes_default" +if test "x$ac_cv_header_hsa_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_HSA_H 1 +_ACEOF + rocm_happy=yes +else + rocm_happy=no +fi + +done + +fi + if test "x$rocm_happy" = xyes; then : + for ac_header in hsa_ext_amd.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "hsa_ext_amd.h" "ac_cv_header_hsa_ext_amd_h" "$ac_includes_default" +if test "x$ac_cv_header_hsa_ext_amd_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_HSA_EXT_AMD_H 1 +_ACEOF + rocm_happy=yes +else + rocm_happy=no +fi + +done + +fi + if test "x$rocm_happy" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for hsa_init in -lhsa-runtime64" >&5 +$as_echo_n "checking for hsa_init in -lhsa-runtime64... " >&6; } +if ${ac_cv_lib_hsa_runtime64_hsa_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhsa-runtime64 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char hsa_init (); +int +main () +{ +return hsa_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_hsa_runtime64_hsa_init=yes +else + ac_cv_lib_hsa_runtime64_hsa_init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hsa_runtime64_hsa_init" >&5 +$as_echo "$ac_cv_lib_hsa_runtime64_hsa_init" >&6; } +if test "x$ac_cv_lib_hsa_runtime64_hsa_init" = xyes; then : + rocm_happy=yes +else + rocm_happy=no +fi + +fi + + if test "x$rocm_happy" = "xyes"; then : + + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ROCm not found" >&5 +$as_echo "$as_me: WARNING: ROCm not found" >&2;} +fi + + CPPFLAGS="$SAVE_CPPFLAGS" + LDFLAGS="$SAVE_LDFLAGS" + LIBS="$SAVE_LIBS" + + HIP_CPPFLAGS="-D__HIP_PLATFORM_HCC__ -I$with_rocm/include/hip -I$with_rocm/include" + HIP_LDFLAGS="-L$with_rocm/hip/lib -L$with_rocm/lib" + HIP_LIBS="-lhip_hcc" + + + CPPFLAGS="$HIP_CPPFLAGS $CPPFLAGS" + LDFLAGS="$HIP_LDFLAGS $LDFLAGS" + LIBS="$HIP_LIBS $LIBS" + + hip_happy=yes + if test "x$hip_happy" = xyes; then : + for ac_header in hip_runtime.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "hip_runtime.h" "ac_cv_header_hip_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_hip_runtime_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_HIP_RUNTIME_H 1 +_ACEOF + hip_happy=yes +else + hip_happy=no +fi + +done + +fi + if test "x$hip_happy" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for hipFree in -lhip_hcc" >&5 +$as_echo_n "checking for hipFree in -lhip_hcc... " >&6; } +if ${ac_cv_lib_hip_hcc_hipFree+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhip_hcc $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char hipFree (); +int +main () +{ +return hipFree (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_hip_hcc_hipFree=yes +else + ac_cv_lib_hip_hcc_hipFree=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hip_hcc_hipFree" >&5 +$as_echo "$ac_cv_lib_hip_hcc_hipFree" >&6; } +if test "x$ac_cv_lib_hip_hcc_hipFree" = xyes; then : + hip_happy=yes +else + hip_happy=no +fi + +fi + if test "x$hip_happy" = xyes; then : + HIP_CXXFLAGS="--std=gnu++11" +fi + + CPPFLAGS="$SAVE_CPPFLAGS" + LDFLAGS="$SAVE_LDFLAGS" + LIBS="$SAVE_LIBS" + + if test "x$hip_happy" = "xyes"; then : + + + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: HIP Runtime not found" >&5 +$as_echo "$as_me: WARNING: HIP Runtime not found" >&2;} +fi + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ROCm was explicitly disabled" >&5 +$as_echo "$as_me: WARNING: ROCm was explicitly disabled" >&2;} + +fi + +rocm_checked=yes + if test "x$rocm_happy" != xno; then + HAVE_ROCM_TRUE= + HAVE_ROCM_FALSE='#' +else + HAVE_ROCM_TRUE='#' + HAVE_ROCM_FALSE= +fi + + if test "x$hip_happy" != xno; then + HAVE_HIP_TRUE= + HAVE_HIP_FALSE='#' +else + HAVE_HIP_TRUE='#' + HAVE_HIP_FALSE= +fi + + + +fi + + + +if test "x$rocm_happy" = "xyes"; then : + uct_modules="${uct_modules}:rocm" +fi +uct_rocm_modules="" +# +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + + +if test "x$gdrcopy_checked" != "xyes"; then : + + +gdrcopy_happy="no" + + +# Check whether --with-gdrcopy was given. +if test "${with_gdrcopy+set}" = set; then : + withval=$with_gdrcopy; +else + with_gdrcopy=guess +fi + + +if test "x$with_gdrcopy" != "xno"; then : + save_CPPFLAGS="$CPPFLAGS" + save_CFLAGS="$CFLAGS" + save_LDFLAGS="$LDFLAGS" + + if test ! -z "$with_gdrcopy" -a "x$with_gdrcopy" != "xyes" -a "x$with_gdrcopy" != "xguess"; then : + + ucx_check_gdrcopy_dir="$with_gdrcopy" + if test -d "$with_gdrcopy/lib64"; then : + libsuff="64" +else + libsuff="" +fi + ucx_check_gdrcopy_libdir="$with_gdrcopy/lib$libsuff" + CPPFLAGS="-I$with_gdrcopy/include $save_CPPFLAGS" + LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS" + +fi + if test ! -z "$with_gdrcopy_libdir" -a "x$with_gdrcopy_libdir" != "xyes"; then : + ucx_check_gdrcopy_libdir="$with_gdrcopy_libdir" + LDFLAGS="-L$ucx_check_gdrcopy_libdir $save_LDFLAGS" +fi + + for ac_header in gdrapi.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "gdrapi.h" "ac_cv_header_gdrapi_h" "$ac_includes_default" +if test "x$ac_cv_header_gdrapi_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_GDRAPI_H 1 +_ACEOF + as_ac_Lib=`$as_echo "ac_cv_lib_gdrapi ''_gdr_pin_buffer" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for gdr_pin_buffer in -lgdrapi " >&5 +$as_echo_n "checking for gdr_pin_buffer in -lgdrapi ... " >&6; } +if eval \${$as_ac_Lib+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgdrapi $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char gdr_pin_buffer (); +int +main () +{ +return gdr_pin_buffer (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$as_ac_Lib=yes" +else + eval "$as_ac_Lib=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +eval ac_res=\$$as_ac_Lib + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +if eval test \"x\$"$as_ac_Lib"\" = x"yes"; then : + gdrcopy_happy="yes" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: GDR_COPY runtime not detected. Disable." >&5 +$as_echo "$as_me: WARNING: GDR_COPY runtime not detected. Disable." >&2;} + gdrcopy_happy="no" +fi + + +else + gdrcopy_happy="no" +fi + +done + + + if test "x$gdrcopy_happy" = "xyes"; then : + ac_fn_c_check_decl "$LINENO" "gdr_copy_to_mapping" "ac_cv_have_decl_gdr_copy_to_mapping" "#include \"gdrapi.h\" +" +if test "x$ac_cv_have_decl_gdr_copy_to_mapping" = xyes; then : + ac_have_decl=1 +else + ac_have_decl=0 +fi + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_GDR_COPY_TO_MAPPING $ac_have_decl +_ACEOF + +fi + + CFLAGS="$save_CFLAGS" + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + + if test "x$gdrcopy_happy" = "xyes"; then : + + GDR_COPY_CPPFLAGS="-I$ucx_check_gdrcopy_dir/include/ " + + GDR_COPY_LDFLAGS="-lgdrapi -L$ucx_check_gdrcopy_dir/lib64" + + +else + + if test "x$with_gdrcopy" != "xguess"; then : + as_fn_error $? "gdrcopy support is requested but gdrcopy packages cannot be found" "$LINENO" 5 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: GDR_COPY not found" >&5 +$as_echo "$as_me: WARNING: GDR_COPY not found" >&2;} +fi + +fi + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: GDR_COPY was explicitly disabled" >&5 +$as_echo "$as_me: WARNING: GDR_COPY was explicitly disabled" >&2;} +fi + +gdrcopy_checked=yes + if test "x$gdrcopy_happy" != xno; then + HAVE_GDR_COPY_TRUE= + HAVE_GDR_COPY_FALSE='#' +else + HAVE_GDR_COPY_TRUE='#' + HAVE_GDR_COPY_FALSE= +fi + + + +fi + + + +if test "x$gdrcopy_happy" = "xyes" && test "x$rocm_happy" = "xyes"; then : + uct_rocm_modules="${uct_rocm_modules}:gdr" +fi +ac_config_files="$ac_config_files src/uct/rocm/gdr/Makefile" + + + +cat >>confdefs.h <<_ACEOF +#define uct_rocm_MODULES "${uct_rocm_modules}" +_ACEOF + +ac_config_files="$ac_config_files src/uct/rocm/Makefile" + + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +cma_happy="no" +# Check whether --enable-cma was given. +if test "${enable_cma+set}" = set; then : + enableval=$enable_cma; +else + enable_cma=yes +fi + + +if test "x$enable_cma" != xno; then : + for ac_header in sys/uio.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "sys/uio.h" "ac_cv_header_sys_uio_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_uio_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_SYS_UIO_H 1 +_ACEOF + ac_fn_c_check_func "$LINENO" "process_vm_readv" "ac_cv_func_process_vm_readv" +if test "x$ac_cv_func_process_vm_readv" = xyes; then : + cma_happy="yes" +else + cma_happy="no" +fi + + if test "x$cma_happy" = "xyes"; then : + uct_modules="${uct_modules}:cma" +fi + +fi + +done + + + +fi + + if test "x$cma_happy" != xno; then + HAVE_CMA_TRUE= + HAVE_CMA_FALSE='#' +else + HAVE_CMA_TRUE='#' + HAVE_CMA_FALSE= +fi + +ac_config_files="$ac_config_files src/uct/sm/cma/Makefile" + + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +knem_happy="no" + + +# Check whether --with-knem was given. +if test "${with_knem+set}" = set; then : + withval=$with_knem; +else + with_knem=guess +fi + + +if test "x$with_knem" != xno; then : + if test "x$with_knem" = "xguess" -o "x$with_knem" = xyes -o "x$with_knem" = "x"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: KNEM path was not found, guessing ..." >&5 +$as_echo "$as_me: KNEM path was not found, guessing ..." >&6;} + ucx_check_knem_include_dir=$(pkg-config --cflags knem) +else + ucx_check_knem_include_dir=-I$with_knem/include +fi + + save_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$ucx_check_knem_include_dir $CPPFLAGS" + + ac_fn_c_check_decl "$LINENO" "KNEM_CMD_GET_INFO" "ac_cv_have_decl_KNEM_CMD_GET_INFO" "#include +" +if test "x$ac_cv_have_decl_KNEM_CMD_GET_INFO" = xyes; then : + KNEM_CPPFLAGS=$ucx_check_knem_include_dir + + uct_modules="${uct_modules}:knem" + knem_happy="yes" +else + if test "x$with_knem" != xguess; then : + as_fn_error $? "KNEM requested but required file (knem_io.h) could not be found" "$LINENO" 5 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: KNEM requested but required file (knem_io.h) could not be found" >&5 +$as_echo "$as_me: WARNING: KNEM requested but required file (knem_io.h) could not be found" >&2;} +fi +fi + + + CPPFLAGS="$save_CPPFLAGS" + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: KNEM was explicitly disabled" >&5 +$as_echo "$as_me: WARNING: KNEM was explicitly disabled" >&2;} + +fi + + if test "x$knem_happy" != xno; then + HAVE_KNEM_TRUE= + HAVE_KNEM_FALSE='#' +else + HAVE_KNEM_TRUE='#' + HAVE_KNEM_FALSE= +fi + +ac_config_files="$ac_config_files src/uct/sm/knem/Makefile" + + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +xpmem_happy="no" + +# Check whether --with-xpmem was given. +if test "${with_xpmem+set}" = set; then : + withval=$with_xpmem; +else + with_xpmem=guess +fi + + +if test "x$with_xpmem" != "xno"; then : + if test ! -d "$with_xpmem"; then : + + { $as_echo "$as_me:${as_lineno-$LINENO}: XPMEM - failed to open the requested location ($with_xpmem), guessing ..." >&5 +$as_echo "$as_me: XPMEM - failed to open the requested location ($with_xpmem), guessing ..." >&6;} + if $PKG_CONFIG --exists cray-xpmem; then : + + xpmem_happy=yes + XPMEM_CFLAGS=`$PKG_CONFIG --cflags cray-xpmem` + + XPMEM_LIBS=`$PKG_CONFIG --libs cray-xpmem` + + +else + + # If cray-xpmem module not found in pkg-config, try to search + xpmem_header=$(find /opt/xpmem /usr/local/include /usr/local/xpmem -name xpmem.h 2>/dev/null|head -1) + if test -f "$xpmem_header"; then : + with_xpmem=$(dirname $xpmem_header | head -1 | sed -e s,/include,,g) +fi + +fi + +fi + +fi + +# Verify XPMEM header file +if test "x$xpmem_happy" = "xno" -a -d "$with_xpmem"; then : + as_ac_Header=`$as_echo "ac_cv_header_$with_xpmem/include/xpmem.h" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$with_xpmem/include/xpmem.h" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + XPMEM_CFLAGS="-I$with_xpmem/include" + + XPMEM_LIBS="-L$with_xpmem/lib -lxpmem" + + xpmem_happy="yes" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cray-xpmem header was not found in $with_xpmem" >&5 +$as_echo "$as_me: WARNING: cray-xpmem header was not found in $with_xpmem" >&2;} +fi + + + +fi + +if test "x$xpmem_happy" = "xyes"; then : + uct_modules="${uct_modules}:xpmem" +fi + if test "x$xpmem_happy" != "xno"; then + HAVE_XPMEM_TRUE= + HAVE_XPMEM_FALSE='#' +else + HAVE_XPMEM_TRUE='#' + HAVE_XPMEM_FALSE= +fi + +ac_config_files="$ac_config_files src/uct/sm/mm/xpmem/Makefile" + + + +ac_config_files="$ac_config_files src/uct/sm/mm/Makefile" + + + +ac_config_files="$ac_config_files src/uct/sm/Makefile" + + +# +# Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +cray_ugni_supported=no + + +# Check whether --with-ugni was given. +if test "${with_ugni+set}" = set; then : + withval=$with_ugni; +else + with_ugni=guess +fi + + +if test "x$with_ugni" != "xno"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking cray-ugni" >&5 +$as_echo_n "checking cray-ugni... " >&6; } + if $PKG_CONFIG --exists cray-ugni cray-pmi; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CRAY_UGNI_CFLAGS=`$PKG_CONFIG --cflags cray-ugni cray-pmi` + + CRAY_UGNI_LIBS=`$PKG_CONFIG --libs cray-ugni cray-pmi` + + uct_modules="${uct_modules}:ugni" + cray_ugni_supported=yes + +$as_echo "#define HAVE_TL_UGNI 1" >>confdefs.h + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + if test "x$with_ugni" != "xguess"; then : + as_fn_error $? "UGNI support was requested but cray-ugni and cray-pmi packages cannot be found" "$LINENO" 5 +fi + +fi + +fi + + + if test "x$cray_ugni_supported" = xyes; then + HAVE_CRAY_UGNI_TRUE= + HAVE_CRAY_UGNI_FALSE='#' +else + HAVE_CRAY_UGNI_TRUE='#' + HAVE_CRAY_UGNI_FALSE= +fi + +ac_config_files="$ac_config_files src/uct/ugni/Makefile" + + + + +cat >>confdefs.h <<_ACEOF +#define uct_MODULES "${uct_modules}" +_ACEOF + + +ac_config_files="$ac_config_files src/uct/Makefile" + + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +ucx_perftest_modules="" +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag -fno-exceptions" >&5 +$as_echo_n "checking compiler flag -fno-exceptions... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -fno-exceptions" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS -fno-exceptions" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + PERF_LIB_CXXFLAGS="$PERF_LIB_CXXFLAGS -fno-exceptions" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag -fno-rtti" >&5 +$as_echo_n "checking compiler flag -fno-rtti... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -fno-rtti" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS -fno-rtti" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + PERF_LIB_CXXFLAGS="$PERF_LIB_CXXFLAGS -fno-rtti" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag --no_exceptions" >&5 +$as_echo_n "checking compiler flag --no_exceptions... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS --no_exceptions" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS --no_exceptions" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + PERF_LIB_CXXFLAGS="$PERF_LIB_CXXFLAGS --no_exceptions" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +PERF_LIB_CXXFLAGS=$PERF_LIB_CXXFLAGS + + +ac_config_files="$ac_config_files src/tools/perf/lib/Makefile" + + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + + +if test "x$cuda_checked" != "xyes"; then : + + +# Check whether --with-cuda was given. +if test "${with_cuda+set}" = set; then : + withval=$with_cuda; +else + with_cuda=guess +fi + + + if test "x$with_cuda" = "xno"; then : + cuda_happy=no +else + + save_CPPFLAGS="$CPPFLAGS" + save_LDFLAGS="$LDFLAGS" + + CUDA_CPPFLAGS="" + CUDA_LDFLAGS="" + + if test ! -z "$with_cuda" -a "x$with_cuda" != "xyes" -a "x$with_cuda" != "xguess"; then : + ucx_check_cuda_dir="$with_cuda" + if test -d "$with_cuda/lib64"; then : + libsuff="64" +else + libsuff="" +fi + ucx_check_cuda_libdir="$with_cuda/lib$libsuff" + CUDA_CPPFLAGS="-I$with_cuda/include" + CUDA_LDFLAGS="-L$ucx_check_cuda_libdir -L$ucx_check_cuda_libdir/stubs" +fi + + if test ! -z "$with_cuda_libdir" -a "x$with_cuda_libdir" != "xyes"; then : + ucx_check_cuda_libdir="$with_cuda_libdir" + CUDA_LDFLAGS="-L$ucx_check_cuda_libdir -L$ucx_check_cuda_libdir/stubs" +fi + + CPPFLAGS="$CPPFLAGS $CUDA_CPPFLAGS" + LDFLAGS="$LDFLAGS $CUDA_LDFLAGS" + + # Check cuda header files + for ac_header in cuda.h cuda_runtime.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + cuda_happy="yes" +else + cuda_happy="no" +fi + +done + + + # Check cuda libraries + if test "x$cuda_happy" = "xyes"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cuDeviceGetUuid in -lcuda" >&5 +$as_echo_n "checking for cuDeviceGetUuid in -lcuda... " >&6; } +if ${ac_cv_lib_cuda_cuDeviceGetUuid+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcuda $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char cuDeviceGetUuid (); +int +main () +{ +return cuDeviceGetUuid (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_cuda_cuDeviceGetUuid=yes +else + ac_cv_lib_cuda_cuDeviceGetUuid=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cuda_cuDeviceGetUuid" >&5 +$as_echo "$ac_cv_lib_cuda_cuDeviceGetUuid" >&6; } +if test "x$ac_cv_lib_cuda_cuDeviceGetUuid" = xyes; then : + CUDA_LDFLAGS="$CUDA_LDFLAGS -lcuda" +else + cuda_happy="no" +fi + +fi + if test "x$cuda_happy" = "xyes"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cudaGetDeviceCount in -lcudart" >&5 +$as_echo_n "checking for cudaGetDeviceCount in -lcudart... " >&6; } +if ${ac_cv_lib_cudart_cudaGetDeviceCount+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lcudart $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char cudaGetDeviceCount (); +int +main () +{ +return cudaGetDeviceCount (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_cudart_cudaGetDeviceCount=yes +else + ac_cv_lib_cudart_cudaGetDeviceCount=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cudart_cudaGetDeviceCount" >&5 +$as_echo "$ac_cv_lib_cudart_cudaGetDeviceCount" >&6; } +if test "x$ac_cv_lib_cudart_cudaGetDeviceCount" = xyes; then : + CUDA_LDFLAGS="$CUDA_LDFLAGS -lcudart" +else + cuda_happy="no" +fi + +fi + + CPPFLAGS="$save_CPPFLAGS" + LDFLAGS="$save_LDFLAGS" + + if test "x$cuda_happy" = "xyes"; then : + CUDA_CPPFLAGS="$CUDA_CPPFLAGS" + + CUDA_LDFLAGS="$CUDA_LDFLAGS" + + +$as_echo "#define HAVE_CUDA 1" >>confdefs.h + +else + if test "x$with_cuda" != "xguess"; then : + as_fn_error $? "CUDA support is requested but cuda packages cannot be found" "$LINENO" 5 +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: CUDA not found" >&5 +$as_echo "$as_me: WARNING: CUDA not found" >&2;} +fi +fi + + +fi # "x$with_cuda" = "xno" + + cuda_checked=yes + if test "x$cuda_happy" != xno; then + HAVE_CUDA_TRUE= + HAVE_CUDA_FALSE='#' +else + HAVE_CUDA_TRUE='#' + HAVE_CUDA_FALSE= +fi + + + +fi # "x$cuda_checked" != "xyes" + + + +if test "x$cuda_happy" = "xyes"; then : + ucx_perftest_modules="${ucx_perftest_modules}:cuda" +fi + +ac_config_files="$ac_config_files src/tools/perf/cuda/Makefile" + + +# +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + + +if test "x$rocm_checked" != "xyes"; then : + + + +# Check whether --with-rocm was given. +if test "${with_rocm+set}" = set; then : + withval=$with_rocm; +else + with_rocm=guess +fi + + +rocm_happy=no +hip_happy=no +if test "x$with_rocm" != "xno"; then : + case "x$with_rocm" in #( + x|xguess|xyes) : + { $as_echo "$as_me:${as_lineno-$LINENO}: ROCm path was not specified. Guessing ..." >&5 +$as_echo "$as_me: ROCm path was not specified. Guessing ..." >&6;} + with_rocm="/opt/rocm" + ROCM_CPPFLAGS="-I$with_rocm/include/hsa -I$with_rocm/include" + ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib" + ROCM_LIBS="-lhsa-runtime64" + ;; #( + x/*) : + { $as_echo "$as_me:${as_lineno-$LINENO}: ROCm path given as $with_rocm ..." >&5 +$as_echo "$as_me: ROCm path given as $with_rocm ..." >&6;} + ROCM_CPPFLAGS="-I$with_rocm/include/hsa -I$with_rocm/include" + ROCM_LDFLAGS="-L$with_rocm/hsa/lib -L$with_rocm/lib" + ROCM_LIBS="-lhsa-runtime64" + ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: ROCm flags given ..." >&5 +$as_echo "$as_me: ROCm flags given ..." >&6;} + for arg in $$with_rocm ; do + case $arg in #( + yes) : + ;; #( + no) : + ;; #( + -l*|*.a|*.so) : + ROCM_LIBS="$ROCM_LIBS $arg" ;; #( + -L*|-WL*|-Wl*) : + ROCM_LDFLAGS="$ROCM_LDFLAGS $arg" ;; #( + -I*) : + ROCM_CPPFLAGS="$ROCM_CPPFLAGS $arg" ;; #( + *lib|*lib/|*lib64|*lib64/) : + if test -d $arg; then : + ROCM_LDFLAGS="$ROCM_LDFLAGS -L$arg" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $arg of $with_rocm not parsed" >&5 +$as_echo "$as_me: WARNING: $arg of $with_rocm not parsed" >&2;} +fi ;; #( + *include|*include/) : + if test -d $arg; then : + ROCM_CPPFLAGS="$ROCM_CPPFLAGS -I$arg" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $arg of $with_rocm not parsed" >&5 +$as_echo "$as_me: WARNING: $arg of $with_rocm not parsed" >&2;} +fi ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $arg of $with_rocm not parsed" >&5 +$as_echo "$as_me: WARNING: $arg of $with_rocm not parsed" >&2;} ;; +esac +done ;; +esac + + SAVE_CPPFLAGS="$CPPFLAGS" + SAVE_LDFLAGS="$LDFLAGS" + SAVE_LIBS="$LIBS" + + CPPFLAGS="$ROCM_CPPFLAGS $CPPFLAGS" + LDFLAGS="$ROCM_LDFLAGS $LDFLAGS" + LIBS="$ROCM_LIBS $LIBS" + + rocm_happy=yes + if test "x$rocm_happy" = xyes; then : + for ac_header in hsa.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "hsa.h" "ac_cv_header_hsa_h" "$ac_includes_default" +if test "x$ac_cv_header_hsa_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_HSA_H 1 +_ACEOF + rocm_happy=yes +else + rocm_happy=no +fi + +done + +fi + if test "x$rocm_happy" = xyes; then : + for ac_header in hsa_ext_amd.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "hsa_ext_amd.h" "ac_cv_header_hsa_ext_amd_h" "$ac_includes_default" +if test "x$ac_cv_header_hsa_ext_amd_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_HSA_EXT_AMD_H 1 +_ACEOF + rocm_happy=yes +else + rocm_happy=no +fi + +done + +fi + if test "x$rocm_happy" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for hsa_init in -lhsa-runtime64" >&5 +$as_echo_n "checking for hsa_init in -lhsa-runtime64... " >&6; } +if ${ac_cv_lib_hsa_runtime64_hsa_init+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhsa-runtime64 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char hsa_init (); +int +main () +{ +return hsa_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_hsa_runtime64_hsa_init=yes +else + ac_cv_lib_hsa_runtime64_hsa_init=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hsa_runtime64_hsa_init" >&5 +$as_echo "$ac_cv_lib_hsa_runtime64_hsa_init" >&6; } +if test "x$ac_cv_lib_hsa_runtime64_hsa_init" = xyes; then : + rocm_happy=yes +else + rocm_happy=no +fi + +fi + + if test "x$rocm_happy" = "xyes"; then : + + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ROCm not found" >&5 +$as_echo "$as_me: WARNING: ROCm not found" >&2;} +fi + + CPPFLAGS="$SAVE_CPPFLAGS" + LDFLAGS="$SAVE_LDFLAGS" + LIBS="$SAVE_LIBS" + + HIP_CPPFLAGS="-D__HIP_PLATFORM_HCC__ -I$with_rocm/include/hip -I$with_rocm/include" + HIP_LDFLAGS="-L$with_rocm/hip/lib -L$with_rocm/lib" + HIP_LIBS="-lhip_hcc" + + + CPPFLAGS="$HIP_CPPFLAGS $CPPFLAGS" + LDFLAGS="$HIP_LDFLAGS $LDFLAGS" + LIBS="$HIP_LIBS $LIBS" + + hip_happy=yes + if test "x$hip_happy" = xyes; then : + for ac_header in hip_runtime.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "hip_runtime.h" "ac_cv_header_hip_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_hip_runtime_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_HIP_RUNTIME_H 1 +_ACEOF + hip_happy=yes +else + hip_happy=no +fi + +done + +fi + if test "x$hip_happy" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for hipFree in -lhip_hcc" >&5 +$as_echo_n "checking for hipFree in -lhip_hcc... " >&6; } +if ${ac_cv_lib_hip_hcc_hipFree+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lhip_hcc $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char hipFree (); +int +main () +{ +return hipFree (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_hip_hcc_hipFree=yes +else + ac_cv_lib_hip_hcc_hipFree=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_hip_hcc_hipFree" >&5 +$as_echo "$ac_cv_lib_hip_hcc_hipFree" >&6; } +if test "x$ac_cv_lib_hip_hcc_hipFree" = xyes; then : + hip_happy=yes +else + hip_happy=no +fi + +fi + if test "x$hip_happy" = xyes; then : + HIP_CXXFLAGS="--std=gnu++11" +fi + + CPPFLAGS="$SAVE_CPPFLAGS" + LDFLAGS="$SAVE_LDFLAGS" + LIBS="$SAVE_LIBS" + + if test "x$hip_happy" = "xyes"; then : + + + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: HIP Runtime not found" >&5 +$as_echo "$as_me: WARNING: HIP Runtime not found" >&2;} +fi + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: ROCm was explicitly disabled" >&5 +$as_echo "$as_me: WARNING: ROCm was explicitly disabled" >&2;} + +fi + +rocm_checked=yes + if test "x$rocm_happy" != xno; then + HAVE_ROCM_TRUE= + HAVE_ROCM_FALSE='#' +else + HAVE_ROCM_TRUE='#' + HAVE_ROCM_FALSE= +fi + + if test "x$hip_happy" != xno; then + HAVE_HIP_TRUE= + HAVE_HIP_FALSE='#' +else + HAVE_HIP_TRUE='#' + HAVE_HIP_FALSE= +fi + + + +fi + + + +if test "x$rocm_happy" = "xyes"; then : + ucx_perftest_modules="${ucx_perftest_modules}:rocm" +fi + +ac_config_files="$ac_config_files src/tools/perf/rocm/Makefile" + + + +cat >>confdefs.h <<_ACEOF +#define ucx_perftest_MODULES "${ucx_perftest_modules}" +_ACEOF + + +# TODO build RTE support (MPI/librte) as loadable modules +if test -n "$MPICC"; then : + UCX_PERFTEST_CC=$MPICC + +else + UCX_PERFTEST_CC=$CC + +fi + +ac_config_files="$ac_config_files src/tools/perf/Makefile" + + + # +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag -fno-tree-vectorize" >&5 +$as_echo_n "checking compiler flag -fno-tree-vectorize... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS -fno-tree-vectorize" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS -fno-tree-vectorize" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + GTEST_CXXFLAGS="$GTEST_CXXFLAGS -fno-tree-vectorize" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + +# error #236: controlling expression is constant + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking compiler flag --diag_suppress 236" >&5 +$as_echo_n "checking compiler flag --diag_suppress 236... " >&6; } + SAVE_CFLAGS="$CFLAGS" + SAVE_CXXFLAGS="$CFLAGS" + CFLAGS="$BASE_CFLAGS $CFLAGS --diag_suppress 236" + CXXFLAGS="$BASE_CXXFLAGS $CXXFLAGS --diag_suppress 236" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int main(){return 0;} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + GTEST_CXXFLAGS="$GTEST_CXXFLAGS --diag_suppress 236" +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + CFLAGS="$SAVE_CFLAGS" + CXXFLAGS="$SAVE_CXXFLAGS" + +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +GTEST_CXXFLAGS=$GTEST_CXXFLAGS + + +test_modules="" +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +ac_config_files="$ac_config_files test/gtest/ucm/test_dlopen/Makefile" + + +# +# Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +ac_config_files="$ac_config_files test/gtest/ucm/test_dlopen/rpath-subdir/Makefile" + + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019 ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +test_modules="${test_modules}:module" +ac_config_files="$ac_config_files test/gtest/ucs/test_module/Makefile" + + + +cat >>confdefs.h <<_ACEOF +#define test_MODULES "${test_modules}" +_ACEOF + +ac_config_files="$ac_config_files test/gtest/Makefile" + + + + # + # Enable fault injection code + # + # Check whether --enable-fault-injection was given. +if test "${enable_fault_injection+set}" = set; then : + enableval=$enable_fault_injection; +else + enable_fault_injection=no +fi + + if test "x$enable_fault_injection" = xyes; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: enabling with fault injection code" >&5 +$as_echo "$as_me: enabling with fault injection code" >&6;} + +$as_echo "#define ENABLE_FAULT_INJECTION 1" >>confdefs.h + +else + : +fi + + + # + # Disable checking user parameters + # + # Check whether --enable-params-check was given. +if test "${enable_params_check+set}" = set; then : + enableval=$enable_params_check; +else + enable_params_check=yes +fi + + if test "x$enable_params_check" = xyes; then : + +$as_echo "#define ENABLE_PARAMS_CHECK 1" >>confdefs.h + +else + $as_echo "#define ENABLE_PARAMS_CHECK 0" >>confdefs.h + +fi + + # + # Enable collecting data to ease debugging + # + # Check whether --enable-debug-data was given. +if test "${enable_debug_data+set}" = set; then : + enableval=$enable_debug_data; +else + enable_debug_data=no +fi + + if test "x$enable_debug_data" = xyes; then : + +$as_echo "#define ENABLE_DEBUG_DATA 1" >>confdefs.h + + +$as_echo "#define UCT_UD_EP_DEBUG_HOOKS 1" >>confdefs.h + +else + $as_echo "#define ENABLE_DEBUG_DATA 0" >>confdefs.h + + $as_echo "#define UCT_UD_EP_DEBUG_HOOKS 0" >>confdefs.h + +fi + + + # + # Enable multithreading support + # + # Check whether --enable-mt was given. +if test "${enable_mt+set}" = set; then : + enableval=$enable_mt; +else + enable_mt=no +fi + + if test "x$enable_mt" = xyes; then : + +$as_echo "#define ENABLE_MT 1" >>confdefs.h + + mt_enable=enabled +else + $as_echo "#define ENABLE_MT 0" >>confdefs.h + + mt_enable=disabled +fi + + + # + # Enable experimental header + # + # Check whether --enable-experimental-api was given. +if test "${enable_experimental_api+set}" = set; then : + enableval=$enable_experimental_api; +else + enable_experimental_api=no +fi + + if test "x$enable_experimental_api" = "xyes"; then + ENABLE_EXPERIMENTAL_API_TRUE= + ENABLE_EXPERIMENTAL_API_FALSE='#' +else + ENABLE_EXPERIMENTAL_API_TRUE='#' + ENABLE_EXPERIMENTAL_API_FALSE= +fi + + + + # + # Install development headers + # + # Check whether --enable-devel-headers was given. +if test "${enable_devel_headers+set}" = set; then : + enableval=$enable_devel_headers; +else + enable_devel_headers=no +fi + + if test "x$enable_devel_headers" = "xyes"; then + INSTALL_DEVEL_HEADERS_TRUE= + INSTALL_DEVEL_HEADERS_FALSE='#' +else + INSTALL_DEVEL_HEADERS_TRUE='#' + INSTALL_DEVEL_HEADERS_FALSE= +fi + + + + # + # Path for valgrind-enabled libraries + # + VALGRIND_LIBPATH=${valgrind_libpath} + + + + # + # Enable examples build + # + # Check whether --enable-examples was given. +if test "${enable_examples+set}" = set; then : + enableval=$enable_examples; +else + enable_examples=no +fi + + if test "x$enable_examples" = "xyes"; then + HAVE_EXAMPLES_TRUE= + HAVE_EXAMPLES_FALSE='#' +else + HAVE_EXAMPLES_TRUE='#' + HAVE_EXAMPLES_FALSE= +fi + + +fi # Docs only + +# +# Print which transports are built +# +build_modules="${uct_modules}" +build_modules="${build_modules}${uct_ib_modules}" +build_modules="${build_modules}${uct_cuda_modules}" +build_modules="${build_modules}${ucm_modules}" +build_modules="${build_modules}${ucx_perftest_modules}" +build_modules="${build_modules}${uct_rocm_modules}" +build_modules=${build_modules} + +build_bindings=${build_bindings} + + +# +# Final output +# +ac_config_files="$ac_config_files Makefile docs/doxygen/header.tex src/uct/api/version.h" + +if test "x$with_docs_only" = xyes; then : + +else + +ac_config_links="$ac_config_links debian/compat:debian/compat debian/copyright:debian/copyright debian/ucx.prerm:debian/ucx.prerm" + +ac_config_files="$ac_config_files ucx.spec ucx.pc contrib/rpmdef.sh debian/rules debian/control debian/changelog src/ucs/Makefile src/ucp/Makefile src/ucp/api/ucp_version.h src/ucp/core/ucp_version.c src/tools/info/Makefile src/tools/profile/Makefile test/apps/Makefile test/apps/sockaddr/Makefile test/examples/Makefile test/mpi/Makefile bindings/java/Makefile bindings/java/pom.xml bindings/java/src/main/native/Makefile" + + +ac_config_files="$ac_config_files test/mpi/run_mpi.sh" + + +fi + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +$as_echo_n "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: done" >&5 +$as_echo "done" >&6; } +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + +if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then + as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCXX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCCAS_TRUE}" && test -z "${am__fastdepCCAS_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCCAS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_DOT_TRUE}" && test -z "${HAVE_DOT_FALSE}"; then + as_fn_error $? "conditional \"HAVE_DOT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_doc_TRUE}" && test -z "${DX_COND_doc_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_doc\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_dot_TRUE}" && test -z "${DX_COND_dot_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_dot\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_man_TRUE}" && test -z "${DX_COND_man_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_man\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_rtf_TRUE}" && test -z "${DX_COND_rtf_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_rtf\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_xml_TRUE}" && test -z "${DX_COND_xml_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_xml\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_chm_TRUE}" && test -z "${DX_COND_chm_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_chm\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_chi_TRUE}" && test -z "${DX_COND_chi_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_chi\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_html_TRUE}" && test -z "${DX_COND_html_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_html\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_ps_TRUE}" && test -z "${DX_COND_ps_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_ps\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_pdf_TRUE}" && test -z "${DX_COND_pdf_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_pdf\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DX_COND_latex_TRUE}" && test -z "${DX_COND_latex_FALSE}"; then + as_fn_error $? "conditional \"DX_COND_latex\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DOCS_ONLY_TRUE}" && test -z "${DOCS_ONLY_FALSE}"; then + as_fn_error $? "conditional \"DOCS_ONLY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_GTEST_TRUE}" && test -z "${HAVE_GTEST_FALSE}"; then + as_fn_error $? "conditional \"HAVE_GTEST\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_STATS_TRUE}" && test -z "${HAVE_STATS_FALSE}"; then + as_fn_error $? "conditional \"HAVE_STATS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TUNING_TRUE}" && test -z "${HAVE_TUNING_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TUNING\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MEMTRACK_TRUE}" && test -z "${HAVE_MEMTRACK_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MEMTRACK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_IB_TRUE}" && test -z "${HAVE_IB_FALSE}"; then + as_fn_error $? "conditional \"HAVE_IB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MLX5_HW_TRUE}" && test -z "${HAVE_MLX5_HW_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MLX5_HW\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MLX5_HW_UD_TRUE}" && test -z "${HAVE_MLX5_HW_UD_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MLX5_HW_UD\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MLX5_DV_TRUE}" && test -z "${HAVE_MLX5_DV_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MLX5_DV\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_DEVX_TRUE}" && test -z "${HAVE_DEVX_FALSE}"; then + as_fn_error $? "conditional \"HAVE_DEVX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_EXP_TRUE}" && test -z "${HAVE_EXP_FALSE}"; then + as_fn_error $? "conditional \"HAVE_EXP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TL_RC_TRUE}" && test -z "${HAVE_TL_RC_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TL_RC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TL_DC_TRUE}" && test -z "${HAVE_TL_DC_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TL_DC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_DC_DV_TRUE}" && test -z "${HAVE_DC_DV_FALSE}"; then + as_fn_error $? "conditional \"HAVE_DC_DV\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_DC_EXP_TRUE}" && test -z "${HAVE_DC_EXP_FALSE}"; then + as_fn_error $? "conditional \"HAVE_DC_EXP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TL_UD_TRUE}" && test -z "${HAVE_TL_UD_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TL_UD\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TL_CM_TRUE}" && test -z "${HAVE_TL_CM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TL_CM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CRAY_UGNI_TRUE}" && test -z "${HAVE_CRAY_UGNI_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CRAY_UGNI\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CUDA_TRUE}" && test -z "${HAVE_CUDA_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CUDA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_GDR_COPY_TRUE}" && test -z "${HAVE_GDR_COPY_FALSE}"; then + as_fn_error $? "conditional \"HAVE_GDR_COPY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_ROCM_TRUE}" && test -z "${HAVE_ROCM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_ROCM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_HIP_TRUE}" && test -z "${HAVE_HIP_FALSE}"; then + as_fn_error $? "conditional \"HAVE_HIP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_XPMEM_TRUE}" && test -z "${HAVE_XPMEM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_XPMEM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CMA_TRUE}" && test -z "${HAVE_CMA_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CMA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_KNEM_TRUE}" && test -z "${HAVE_KNEM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_KNEM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_RDMACM_TRUE}" && test -z "${HAVE_RDMACM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_RDMACM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_RDMACM_QP_LESS_TRUE}" && test -z "${HAVE_RDMACM_QP_LESS_FALSE}"; then + as_fn_error $? "conditional \"HAVE_RDMACM_QP_LESS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MPI_TRUE}" && test -z "${HAVE_MPI_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MPI\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MPIRUN_TRUE}" && test -z "${HAVE_MPIRUN_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MPIRUN\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MPICC_TRUE}" && test -z "${HAVE_MPICC_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MPICC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_PROFILING_TRUE}" && test -z "${HAVE_PROFILING_FALSE}"; then + as_fn_error $? "conditional \"HAVE_PROFILING\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_UCM_PTMALLOC286_TRUE}" && test -z "${HAVE_UCM_PTMALLOC286_FALSE}"; then + as_fn_error $? "conditional \"HAVE_UCM_PTMALLOC286\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_JAVA_TRUE}" && test -z "${HAVE_JAVA_FALSE}"; then + as_fn_error $? "conditional \"HAVE_JAVA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CXX11_TRUE}" && test -z "${HAVE_CXX11_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CXX11\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_GNUXX11_TRUE}" && test -z "${HAVE_GNUXX11_FALSE}"; then + as_fn_error $? "conditional \"HAVE_GNUXX11\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TCMALLOC_TRUE}" && test -z "${HAVE_TCMALLOC_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TCMALLOC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${ENABLE_EXPERIMENTAL_API_TRUE}" && test -z "${ENABLE_EXPERIMENTAL_API_FALSE}"; then + as_fn_error $? "conditional \"ENABLE_EXPERIMENTAL_API\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${INSTALL_DEVEL_HEADERS_TRUE}" && test -z "${INSTALL_DEVEL_HEADERS_FALSE}"; then + as_fn_error $? "conditional \"INSTALL_DEVEL_HEADERS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_EXAMPLES_TRUE}" && test -z "${HAVE_EXAMPLES_FALSE}"; then + as_fn_error $? "conditional \"HAVE_EXAMPLES\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${DOCS_ONLY_TRUE}" && test -z "${DOCS_ONLY_FALSE}"; then + as_fn_error $? "conditional \"DOCS_ONLY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CXX11_TRUE}" && test -z "${HAVE_CXX11_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CXX11\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_GNUXX11_TRUE}" && test -z "${HAVE_GNUXX11_FALSE}"; then + as_fn_error $? "conditional \"HAVE_GNUXX11\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_GTEST_TRUE}" && test -z "${HAVE_GTEST_FALSE}"; then + as_fn_error $? "conditional \"HAVE_GTEST\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_PROFILING_TRUE}" && test -z "${HAVE_PROFILING_FALSE}"; then + as_fn_error $? "conditional \"HAVE_PROFILING\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_STATS_TRUE}" && test -z "${HAVE_STATS_FALSE}"; then + as_fn_error $? "conditional \"HAVE_STATS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TUNING_TRUE}" && test -z "${HAVE_TUNING_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TUNING\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MEMTRACK_TRUE}" && test -z "${HAVE_MEMTRACK_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MEMTRACK\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_UCM_PTMALLOC286_TRUE}" && test -z "${HAVE_UCM_PTMALLOC286_FALSE}"; then + as_fn_error $? "conditional \"HAVE_UCM_PTMALLOC286\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TCMALLOC_TRUE}" && test -z "${HAVE_TCMALLOC_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TCMALLOC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MPI_TRUE}" && test -z "${HAVE_MPI_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MPI\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MPICC_TRUE}" && test -z "${HAVE_MPICC_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MPICC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MPIRUN_TRUE}" && test -z "${HAVE_MPIRUN_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MPIRUN\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_JAVA_TRUE}" && test -z "${HAVE_JAVA_FALSE}"; then + as_fn_error $? "conditional \"HAVE_JAVA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CUDA_TRUE}" && test -z "${HAVE_CUDA_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CUDA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_ROCM_TRUE}" && test -z "${HAVE_ROCM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_ROCM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_HIP_TRUE}" && test -z "${HAVE_HIP_FALSE}"; then + as_fn_error $? "conditional \"HAVE_HIP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CUDA_TRUE}" && test -z "${HAVE_CUDA_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CUDA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_GDR_COPY_TRUE}" && test -z "${HAVE_GDR_COPY_FALSE}"; then + as_fn_error $? "conditional \"HAVE_GDR_COPY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_IB_TRUE}" && test -z "${HAVE_IB_FALSE}"; then + as_fn_error $? "conditional \"HAVE_IB\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TL_RC_TRUE}" && test -z "${HAVE_TL_RC_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TL_RC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TL_DC_TRUE}" && test -z "${HAVE_TL_DC_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TL_DC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_DC_DV_TRUE}" && test -z "${HAVE_DC_DV_FALSE}"; then + as_fn_error $? "conditional \"HAVE_DC_DV\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_DC_EXP_TRUE}" && test -z "${HAVE_DC_EXP_FALSE}"; then + as_fn_error $? "conditional \"HAVE_DC_EXP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TL_UD_TRUE}" && test -z "${HAVE_TL_UD_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TL_UD\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MLX5_HW_TRUE}" && test -z "${HAVE_MLX5_HW_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MLX5_HW\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MLX5_DV_TRUE}" && test -z "${HAVE_MLX5_DV_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MLX5_DV\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_DEVX_TRUE}" && test -z "${HAVE_DEVX_FALSE}"; then + as_fn_error $? "conditional \"HAVE_DEVX\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_EXP_TRUE}" && test -z "${HAVE_EXP_FALSE}"; then + as_fn_error $? "conditional \"HAVE_EXP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_MLX5_HW_UD_TRUE}" && test -z "${HAVE_MLX5_HW_UD_FALSE}"; then + as_fn_error $? "conditional \"HAVE_MLX5_HW_UD\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_TL_CM_TRUE}" && test -z "${HAVE_TL_CM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_TL_CM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_RDMACM_TRUE}" && test -z "${HAVE_RDMACM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_RDMACM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_RDMACM_QP_LESS_TRUE}" && test -z "${HAVE_RDMACM_QP_LESS_FALSE}"; then + as_fn_error $? "conditional \"HAVE_RDMACM_QP_LESS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_ROCM_TRUE}" && test -z "${HAVE_ROCM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_ROCM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_HIP_TRUE}" && test -z "${HAVE_HIP_FALSE}"; then + as_fn_error $? "conditional \"HAVE_HIP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_GDR_COPY_TRUE}" && test -z "${HAVE_GDR_COPY_FALSE}"; then + as_fn_error $? "conditional \"HAVE_GDR_COPY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CMA_TRUE}" && test -z "${HAVE_CMA_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CMA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_KNEM_TRUE}" && test -z "${HAVE_KNEM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_KNEM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_XPMEM_TRUE}" && test -z "${HAVE_XPMEM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_XPMEM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CRAY_UGNI_TRUE}" && test -z "${HAVE_CRAY_UGNI_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CRAY_UGNI\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_CUDA_TRUE}" && test -z "${HAVE_CUDA_FALSE}"; then + as_fn_error $? "conditional \"HAVE_CUDA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_ROCM_TRUE}" && test -z "${HAVE_ROCM_FALSE}"; then + as_fn_error $? "conditional \"HAVE_ROCM\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_HIP_TRUE}" && test -z "${HAVE_HIP_FALSE}"; then + as_fn_error $? "conditional \"HAVE_HIP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${ENABLE_EXPERIMENTAL_API_TRUE}" && test -z "${ENABLE_EXPERIMENTAL_API_FALSE}"; then + as_fn_error $? "conditional \"ENABLE_EXPERIMENTAL_API\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${INSTALL_DEVEL_HEADERS_TRUE}" && test -z "${INSTALL_DEVEL_HEADERS_FALSE}"; then + as_fn_error $? "conditional \"INSTALL_DEVEL_HEADERS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${HAVE_EXAMPLES_TRUE}" && test -z "${HAVE_EXAMPLES_FALSE}"; then + as_fn_error $? "conditional \"HAVE_EXAMPLES\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by ucx $as_me 1.8, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_links="$ac_config_links" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration links: +$config_links + +Configuration commands: +$config_commands + +Report bugs to the package provider." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +ucx config.status 1.8 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" + + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' +macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' +enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' +enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' +pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' +enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' +shared_archive_member_spec='`$ECHO "$shared_archive_member_spec" | $SED "$delay_single_quote_subst"`' +SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' +ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' +PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`' +host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' +host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' +host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' +build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' +build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' +build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' +SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' +Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' +GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' +EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' +FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' +LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' +NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' +LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' +max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' +ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' +exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' +lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' +lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' +lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' +lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' +lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' +reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' +reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' +OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' +deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' +file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' +file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' +want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' +DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' +sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' +AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' +AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' +archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' +STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' +RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' +old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' +old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' +lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' +CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' +CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' +compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' +GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_import='`$ECHO "$lt_cv_sys_global_symbol_to_import" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' +lt_cv_nm_interface='`$ECHO "$lt_cv_nm_interface" | $SED "$delay_single_quote_subst"`' +nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' +lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' +lt_cv_truncate_bin='`$ECHO "$lt_cv_truncate_bin" | $SED "$delay_single_quote_subst"`' +objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' +MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' +need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' +MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' +DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' +NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' +LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' +OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' +OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' +libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' +shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' +extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' +compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' +module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' +with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' +no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' +hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' +hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' +inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' +link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' +always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' +exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' +include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' +prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' +postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' +file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' +variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' +need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' +need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' +version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' +runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' +libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' +library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' +soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' +install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' +postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' +postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' +finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' +hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' +sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' +configure_time_dlsearch_path='`$ECHO "$configure_time_dlsearch_path" | $SED "$delay_single_quote_subst"`' +configure_time_lt_sys_library_path='`$ECHO "$configure_time_lt_sys_library_path" | $SED "$delay_single_quote_subst"`' +hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' +enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' +old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' +striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_dirs='`$ECHO "$compiler_lib_search_dirs" | $SED "$delay_single_quote_subst"`' +predep_objects='`$ECHO "$predep_objects" | $SED "$delay_single_quote_subst"`' +postdep_objects='`$ECHO "$postdep_objects" | $SED "$delay_single_quote_subst"`' +predeps='`$ECHO "$predeps" | $SED "$delay_single_quote_subst"`' +postdeps='`$ECHO "$postdeps" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_path='`$ECHO "$compiler_lib_search_path" | $SED "$delay_single_quote_subst"`' +LD_CXX='`$ECHO "$LD_CXX" | $SED "$delay_single_quote_subst"`' +reload_flag_CXX='`$ECHO "$reload_flag_CXX" | $SED "$delay_single_quote_subst"`' +reload_cmds_CXX='`$ECHO "$reload_cmds_CXX" | $SED "$delay_single_quote_subst"`' +old_archive_cmds_CXX='`$ECHO "$old_archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' +compiler_CXX='`$ECHO "$compiler_CXX" | $SED "$delay_single_quote_subst"`' +GCC_CXX='`$ECHO "$GCC_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag_CXX='`$ECHO "$lt_prog_compiler_no_builtin_flag_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic_CXX='`$ECHO "$lt_prog_compiler_pic_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl_CXX='`$ECHO "$lt_prog_compiler_wl_CXX" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static_CXX='`$ECHO "$lt_prog_compiler_static_CXX" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o_CXX='`$ECHO "$lt_cv_prog_compiler_c_o_CXX" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc_CXX='`$ECHO "$archive_cmds_need_lc_CXX" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes_CXX='`$ECHO "$enable_shared_with_static_runtimes_CXX" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec_CXX='`$ECHO "$export_dynamic_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec_CXX='`$ECHO "$whole_archive_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' +compiler_needs_object_CXX='`$ECHO "$compiler_needs_object_CXX" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds_CXX='`$ECHO "$old_archive_from_new_cmds_CXX" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds_CXX='`$ECHO "$old_archive_from_expsyms_cmds_CXX" | $SED "$delay_single_quote_subst"`' +archive_cmds_CXX='`$ECHO "$archive_cmds_CXX" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds_CXX='`$ECHO "$archive_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' +module_cmds_CXX='`$ECHO "$module_cmds_CXX" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds_CXX='`$ECHO "$module_expsym_cmds_CXX" | $SED "$delay_single_quote_subst"`' +with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' +no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L_CXX='`$ECHO "$hardcode_minus_L_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var_CXX='`$ECHO "$hardcode_shlibpath_var_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_automatic_CXX='`$ECHO "$hardcode_automatic_CXX" | $SED "$delay_single_quote_subst"`' +inherit_rpath_CXX='`$ECHO "$inherit_rpath_CXX" | $SED "$delay_single_quote_subst"`' +link_all_deplibs_CXX='`$ECHO "$link_all_deplibs_CXX" | $SED "$delay_single_quote_subst"`' +always_export_symbols_CXX='`$ECHO "$always_export_symbols_CXX" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds_CXX='`$ECHO "$export_symbols_cmds_CXX" | $SED "$delay_single_quote_subst"`' +exclude_expsyms_CXX='`$ECHO "$exclude_expsyms_CXX" | $SED "$delay_single_quote_subst"`' +include_expsyms_CXX='`$ECHO "$include_expsyms_CXX" | $SED "$delay_single_quote_subst"`' +prelink_cmds_CXX='`$ECHO "$prelink_cmds_CXX" | $SED "$delay_single_quote_subst"`' +postlink_cmds_CXX='`$ECHO "$postlink_cmds_CXX" | $SED "$delay_single_quote_subst"`' +file_list_spec_CXX='`$ECHO "$file_list_spec_CXX" | $SED "$delay_single_quote_subst"`' +hardcode_action_CXX='`$ECHO "$hardcode_action_CXX" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_dirs_CXX='`$ECHO "$compiler_lib_search_dirs_CXX" | $SED "$delay_single_quote_subst"`' +predep_objects_CXX='`$ECHO "$predep_objects_CXX" | $SED "$delay_single_quote_subst"`' +postdep_objects_CXX='`$ECHO "$postdep_objects_CXX" | $SED "$delay_single_quote_subst"`' +predeps_CXX='`$ECHO "$predeps_CXX" | $SED "$delay_single_quote_subst"`' +postdeps_CXX='`$ECHO "$postdeps_CXX" | $SED "$delay_single_quote_subst"`' +compiler_lib_search_path_CXX='`$ECHO "$compiler_lib_search_path_CXX" | $SED "$delay_single_quote_subst"`' + +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in SHELL \ +ECHO \ +PATH_SEPARATOR \ +SED \ +GREP \ +EGREP \ +FGREP \ +LD \ +NM \ +LN_S \ +lt_SP2NL \ +lt_NL2SP \ +reload_flag \ +OBJDUMP \ +deplibs_check_method \ +file_magic_cmd \ +file_magic_glob \ +want_nocaseglob \ +DLLTOOL \ +sharedlib_from_linklib_cmd \ +AR \ +AR_FLAGS \ +archiver_list_spec \ +STRIP \ +RANLIB \ +CC \ +CFLAGS \ +compiler \ +lt_cv_sys_global_symbol_pipe \ +lt_cv_sys_global_symbol_to_cdecl \ +lt_cv_sys_global_symbol_to_import \ +lt_cv_sys_global_symbol_to_c_name_address \ +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ +lt_cv_nm_interface \ +nm_file_list_spec \ +lt_cv_truncate_bin \ +lt_prog_compiler_no_builtin_flag \ +lt_prog_compiler_pic \ +lt_prog_compiler_wl \ +lt_prog_compiler_static \ +lt_cv_prog_compiler_c_o \ +need_locks \ +MANIFEST_TOOL \ +DSYMUTIL \ +NMEDIT \ +LIPO \ +OTOOL \ +OTOOL64 \ +shrext_cmds \ +export_dynamic_flag_spec \ +whole_archive_flag_spec \ +compiler_needs_object \ +with_gnu_ld \ +allow_undefined_flag \ +no_undefined_flag \ +hardcode_libdir_flag_spec \ +hardcode_libdir_separator \ +exclude_expsyms \ +include_expsyms \ +file_list_spec \ +variables_saved_for_relink \ +libname_spec \ +library_names_spec \ +soname_spec \ +install_override_mode \ +finish_eval \ +old_striplib \ +striplib \ +compiler_lib_search_dirs \ +predep_objects \ +postdep_objects \ +predeps \ +postdeps \ +compiler_lib_search_path \ +LD_CXX \ +reload_flag_CXX \ +compiler_CXX \ +lt_prog_compiler_no_builtin_flag_CXX \ +lt_prog_compiler_pic_CXX \ +lt_prog_compiler_wl_CXX \ +lt_prog_compiler_static_CXX \ +lt_cv_prog_compiler_c_o_CXX \ +export_dynamic_flag_spec_CXX \ +whole_archive_flag_spec_CXX \ +compiler_needs_object_CXX \ +with_gnu_ld_CXX \ +allow_undefined_flag_CXX \ +no_undefined_flag_CXX \ +hardcode_libdir_flag_spec_CXX \ +hardcode_libdir_separator_CXX \ +exclude_expsyms_CXX \ +include_expsyms_CXX \ +file_list_spec_CXX \ +compiler_lib_search_dirs_CXX \ +predep_objects_CXX \ +postdep_objects_CXX \ +predeps_CXX \ +postdeps_CXX \ +compiler_lib_search_path_CXX; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in reload_cmds \ +old_postinstall_cmds \ +old_postuninstall_cmds \ +old_archive_cmds \ +extract_expsyms_cmds \ +old_archive_from_new_cmds \ +old_archive_from_expsyms_cmds \ +archive_cmds \ +archive_expsym_cmds \ +module_cmds \ +module_expsym_cmds \ +export_symbols_cmds \ +prelink_cmds \ +postlink_cmds \ +postinstall_cmds \ +postuninstall_cmds \ +finish_cmds \ +sys_lib_search_path_spec \ +configure_time_dlsearch_path \ +configure_time_lt_sys_library_path \ +reload_cmds_CXX \ +old_archive_cmds_CXX \ +old_archive_from_new_cmds_CXX \ +old_archive_from_expsyms_cmds_CXX \ +archive_cmds_CXX \ +archive_expsym_cmds_CXX \ +module_cmds_CXX \ +module_expsym_cmds_CXX \ +export_symbols_cmds_CXX \ +prelink_cmds_CXX \ +postlink_cmds_CXX; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +ac_aux_dir='$ac_aux_dir' + +# See if we are running on zsh, and set the options that allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + + + PACKAGE='$PACKAGE' + VERSION='$VERSION' + RM='$RM' + ofile='$ofile' + + + + + + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "src/ucm/cuda/Makefile") CONFIG_FILES="$CONFIG_FILES src/ucm/cuda/Makefile" ;; + "src/ucm/rocm/Makefile") CONFIG_FILES="$CONFIG_FILES src/ucm/rocm/Makefile" ;; + "src/ucm/Makefile") CONFIG_FILES="$CONFIG_FILES src/ucm/Makefile" ;; + "src/uct/cuda/gdr_copy/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/cuda/gdr_copy/Makefile" ;; + "src/uct/cuda/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/cuda/Makefile" ;; + "src/uct/ib/cm/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/ib/cm/Makefile" ;; + "src/uct/ib/rdmacm/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/ib/rdmacm/Makefile" ;; + "src/uct/ib/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/ib/Makefile" ;; + "src/uct/rocm/gdr/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/rocm/gdr/Makefile" ;; + "src/uct/rocm/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/rocm/Makefile" ;; + "src/uct/sm/cma/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/sm/cma/Makefile" ;; + "src/uct/sm/knem/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/sm/knem/Makefile" ;; + "src/uct/sm/mm/xpmem/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/sm/mm/xpmem/Makefile" ;; + "src/uct/sm/mm/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/sm/mm/Makefile" ;; + "src/uct/sm/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/sm/Makefile" ;; + "src/uct/ugni/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/ugni/Makefile" ;; + "src/uct/Makefile") CONFIG_FILES="$CONFIG_FILES src/uct/Makefile" ;; + "src/tools/perf/lib/Makefile") CONFIG_FILES="$CONFIG_FILES src/tools/perf/lib/Makefile" ;; + "src/tools/perf/cuda/Makefile") CONFIG_FILES="$CONFIG_FILES src/tools/perf/cuda/Makefile" ;; + "src/tools/perf/rocm/Makefile") CONFIG_FILES="$CONFIG_FILES src/tools/perf/rocm/Makefile" ;; + "src/tools/perf/Makefile") CONFIG_FILES="$CONFIG_FILES src/tools/perf/Makefile" ;; + "test/gtest/ucm/test_dlopen/Makefile") CONFIG_FILES="$CONFIG_FILES test/gtest/ucm/test_dlopen/Makefile" ;; + "test/gtest/ucm/test_dlopen/rpath-subdir/Makefile") CONFIG_FILES="$CONFIG_FILES test/gtest/ucm/test_dlopen/rpath-subdir/Makefile" ;; + "test/gtest/ucs/test_module/Makefile") CONFIG_FILES="$CONFIG_FILES test/gtest/ucs/test_module/Makefile" ;; + "test/gtest/Makefile") CONFIG_FILES="$CONFIG_FILES test/gtest/Makefile" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "docs/doxygen/header.tex") CONFIG_FILES="$CONFIG_FILES docs/doxygen/header.tex" ;; + "src/uct/api/version.h") CONFIG_FILES="$CONFIG_FILES src/uct/api/version.h" ;; + "debian/compat") CONFIG_LINKS="$CONFIG_LINKS debian/compat:debian/compat" ;; + "debian/copyright") CONFIG_LINKS="$CONFIG_LINKS debian/copyright:debian/copyright" ;; + "debian/ucx.prerm") CONFIG_LINKS="$CONFIG_LINKS debian/ucx.prerm:debian/ucx.prerm" ;; + "ucx.spec") CONFIG_FILES="$CONFIG_FILES ucx.spec" ;; + "ucx.pc") CONFIG_FILES="$CONFIG_FILES ucx.pc" ;; + "contrib/rpmdef.sh") CONFIG_FILES="$CONFIG_FILES contrib/rpmdef.sh" ;; + "debian/rules") CONFIG_FILES="$CONFIG_FILES debian/rules" ;; + "debian/control") CONFIG_FILES="$CONFIG_FILES debian/control" ;; + "debian/changelog") CONFIG_FILES="$CONFIG_FILES debian/changelog" ;; + "src/ucs/Makefile") CONFIG_FILES="$CONFIG_FILES src/ucs/Makefile" ;; + "src/ucp/Makefile") CONFIG_FILES="$CONFIG_FILES src/ucp/Makefile" ;; + "src/ucp/api/ucp_version.h") CONFIG_FILES="$CONFIG_FILES src/ucp/api/ucp_version.h" ;; + "src/ucp/core/ucp_version.c") CONFIG_FILES="$CONFIG_FILES src/ucp/core/ucp_version.c" ;; + "src/tools/info/Makefile") CONFIG_FILES="$CONFIG_FILES src/tools/info/Makefile" ;; + "src/tools/profile/Makefile") CONFIG_FILES="$CONFIG_FILES src/tools/profile/Makefile" ;; + "test/apps/Makefile") CONFIG_FILES="$CONFIG_FILES test/apps/Makefile" ;; + "test/apps/sockaddr/Makefile") CONFIG_FILES="$CONFIG_FILES test/apps/sockaddr/Makefile" ;; + "test/examples/Makefile") CONFIG_FILES="$CONFIG_FILES test/examples/Makefile" ;; + "test/mpi/Makefile") CONFIG_FILES="$CONFIG_FILES test/mpi/Makefile" ;; + "bindings/java/Makefile") CONFIG_FILES="$CONFIG_FILES bindings/java/Makefile" ;; + "bindings/java/pom.xml") CONFIG_FILES="$CONFIG_FILES bindings/java/pom.xml" ;; + "bindings/java/src/main/native/Makefile") CONFIG_FILES="$CONFIG_FILES bindings/java/src/main/native/Makefile" ;; + "test/mpi/run_mpi.sh") CONFIG_FILES="$CONFIG_FILES test/mpi/run_mpi.sh" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers + test "${CONFIG_LINKS+set}" = set || CONFIG_LINKS=$config_links + test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :L $CONFIG_LINKS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + :L) + # + # CONFIG_LINK + # + + if test "$ac_source" = "$ac_file" && test "$srcdir" = '.'; then + : + else + # Prefer the file from the source tree if names are identical. + if test "$ac_source" = "$ac_file" || test ! -r "$ac_source"; then + ac_source=$srcdir/$ac_source + fi + + { $as_echo "$as_me:${as_lineno-$LINENO}: linking $ac_source to $ac_file" >&5 +$as_echo "$as_me: linking $ac_source to $ac_file" >&6;} + + if test ! -r "$ac_source"; then + as_fn_error $? "$ac_source: file not found" "$LINENO" 5 + fi + rm -f "$ac_file" + + # Try a relative symlink, then a hard link, then a copy. + case $ac_source in + [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;; + *) ac_rel_source=$ac_top_build_prefix$ac_source ;; + esac + ln -s "$ac_rel_source" "$ac_file" 2>/dev/null || + ln "$ac_source" "$ac_file" 2>/dev/null || + cp -p "$ac_source" "$ac_file" || + as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5 + fi + ;; + :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +$as_echo "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + case $CONFIG_FILES in #( + *\'*) : + eval set x "$CONFIG_FILES" ;; #( + *) : + set x $CONFIG_FILES ;; #( + *) : + ;; +esac + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`$as_dirname -- "$am_mf" || +$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$am_mf" : 'X\(//\)[^/]' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$am_mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + am_filepart=`$as_basename -- "$am_mf" || +$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$am_mf" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { echo "$as_me:$LINENO: cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles" >&5 + (cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } || am_rc=$? + done + if test $am_rc -ne 0; then + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. Try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking). +See \`config.log' for more details" "$LINENO" 5; } + fi + { am_dirpart=; unset am_dirpart;} + { am_filepart=; unset am_filepart;} + { am_mf=; unset am_mf;} + { am_rc=; unset am_rc;} + rm -f conftest-deps.mk +} + ;; + "libtool":C) + + # See if we are running on zsh, and set the options that allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST + fi + + cfgfile=${ofile}T + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL +# Generated automatically by $as_me ($PACKAGE) $VERSION +# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: +# NOTE: Changes made to this file will be lost: look at ltmain.sh. + +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit, 1996 + +# Copyright (C) 2014 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program or library that is built +# using GNU Libtool, you may include this file under the same +# distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# The names of the tagged configurations supported by this script. +available_tags='CXX ' + +# Configured defaults for sys_lib_dlsearch_path munging. +: \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} + +# ### BEGIN LIBTOOL CONFIG + +# Which release of libtool.m4 was used? +macro_version=$macro_version +macro_revision=$macro_revision + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# What type of objects to build. +pic_mode=$pic_mode + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# Shared archive member basename,for filename based shared library versioning on AIX. +shared_archive_member_spec=$shared_archive_member_spec + +# Shell to use when invoking shell scripts. +SHELL=$lt_SHELL + +# An echo program that protects backslashes. +ECHO=$lt_ECHO + +# The PATH separator for the build system. +PATH_SEPARATOR=$lt_PATH_SEPARATOR + +# The host system. +host_alias=$host_alias +host=$host +host_os=$host_os + +# The build system. +build_alias=$build_alias +build=$build +build_os=$build_os + +# A sed program that does not truncate output. +SED=$lt_SED + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="\$SED -e 1s/^X//" + +# A grep program that handles long lines. +GREP=$lt_GREP + +# An ERE matcher. +EGREP=$lt_EGREP + +# A literal string matcher. +FGREP=$lt_FGREP + +# A BSD- or MS-compatible name lister. +NM=$lt_NM + +# Whether we need soft or hard links. +LN_S=$lt_LN_S + +# What is the maximum length of a command? +max_cmd_len=$max_cmd_len + +# Object file suffix (normally "o"). +objext=$ac_objext + +# Executable file suffix (normally ""). +exeext=$exeext + +# whether the shell understands "unset". +lt_unset=$lt_unset + +# turn spaces into newlines. +SP2NL=$lt_lt_SP2NL + +# turn newlines into spaces. +NL2SP=$lt_lt_NL2SP + +# convert \$build file names to \$host format. +to_host_file_cmd=$lt_cv_to_host_file_cmd + +# convert \$build files to toolchain format. +to_tool_file_cmd=$lt_cv_to_tool_file_cmd + +# An object symbol dumper. +OBJDUMP=$lt_OBJDUMP + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$lt_deplibs_check_method + +# Command to use when deplibs_check_method = "file_magic". +file_magic_cmd=$lt_file_magic_cmd + +# How to find potential files when deplibs_check_method = "file_magic". +file_magic_glob=$lt_file_magic_glob + +# Find potential files using nocaseglob when deplibs_check_method = "file_magic". +want_nocaseglob=$lt_want_nocaseglob + +# DLL creation program. +DLLTOOL=$lt_DLLTOOL + +# Command to associate shared and link libraries. +sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd + +# The archiver. +AR=$lt_AR + +# Flags to create an archive. +AR_FLAGS=$lt_AR_FLAGS + +# How to feed a file listing to the archiver. +archiver_list_spec=$lt_archiver_list_spec + +# A symbol stripping program. +STRIP=$lt_STRIP + +# Commands used to install an old-style archive. +RANLIB=$lt_RANLIB +old_postinstall_cmds=$lt_old_postinstall_cmds +old_postuninstall_cmds=$lt_old_postuninstall_cmds + +# Whether to use a lock for old archive extraction. +lock_old_archive_extraction=$lock_old_archive_extraction + +# A C compiler. +LTCC=$lt_CC + +# LTCC compiler flags. +LTCFLAGS=$lt_CFLAGS + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe + +# Transform the output of nm in a proper C declaration. +global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl + +# Transform the output of nm into a list of symbols to manually relocate. +global_symbol_to_import=$lt_lt_cv_sys_global_symbol_to_import + +# Transform the output of nm in a C name address pair. +global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address + +# Transform the output of nm in a C name address pair when lib prefix is needed. +global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix + +# The name lister interface. +nm_interface=$lt_lt_cv_nm_interface + +# Specify filename containing input files for \$NM. +nm_file_list_spec=$lt_nm_file_list_spec + +# The root where to search for dependent libraries,and where our libraries should be installed. +lt_sysroot=$lt_sysroot + +# Command to truncate a binary pipe. +lt_truncate_bin=$lt_lt_cv_truncate_bin + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# Used to examine libraries when file_magic_cmd begins with "file". +MAGIC_CMD=$MAGIC_CMD + +# Must we lock files when doing compilation? +need_locks=$lt_need_locks + +# Manifest tool. +MANIFEST_TOOL=$lt_MANIFEST_TOOL + +# Tool to manipulate archived DWARF debug symbol files on Mac OS X. +DSYMUTIL=$lt_DSYMUTIL + +# Tool to change global to local symbols on Mac OS X. +NMEDIT=$lt_NMEDIT + +# Tool to manipulate fat objects and archives on Mac OS X. +LIPO=$lt_LIPO + +# ldd/readelf like tool for Mach-O binaries on Mac OS X. +OTOOL=$lt_OTOOL + +# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. +OTOOL64=$lt_OTOOL64 + +# Old archive suffix (normally "a"). +libext=$libext + +# Shared library suffix (normally ".so"). +shrext_cmds=$lt_shrext_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$lt_extract_expsyms_cmds + +# Variables whose values should be saved in libtool wrapper scripts and +# restored at link time. +variables_saved_for_relink=$lt_variables_saved_for_relink + +# Do we need the "lib" prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Library versioning type. +version_type=$version_type + +# Shared library runtime path variable. +runpath_var=$runpath_var + +# Shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# Format of library name prefix. +libname_spec=$lt_libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME +library_names_spec=$lt_library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$lt_soname_spec + +# Permission mode override for installation of shared libraries. +install_override_mode=$lt_install_override_mode + +# Command to use after installation of a shared archive. +postinstall_cmds=$lt_postinstall_cmds + +# Command to use after uninstallation of a shared archive. +postuninstall_cmds=$lt_postuninstall_cmds + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$lt_finish_cmds + +# As "finish_cmds", except a single script fragment to be evaled but +# not shown. +finish_eval=$lt_finish_eval + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Compile-time system search path for libraries. +sys_lib_search_path_spec=$lt_sys_lib_search_path_spec + +# Detected run-time system search path for libraries. +sys_lib_dlsearch_path_spec=$lt_configure_time_dlsearch_path + +# Explicit LT_SYS_LIBRARY_PATH set during ./configure time. +configure_time_lt_sys_library_path=$lt_configure_time_lt_sys_library_path + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Commands to strip libraries. +old_striplib=$lt_old_striplib +striplib=$lt_striplib + + +# The linker used to build libraries. +LD=$lt_LD + +# How to create reloadable object files. +reload_flag=$lt_reload_flag +reload_cmds=$lt_reload_cmds + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds + +# A language specific compiler. +CC=$lt_compiler + +# Is the compiler the GNU compiler? +with_gcc=$GCC + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds +archive_expsym_cmds=$lt_archive_expsym_cmds + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds +module_expsym_cmds=$lt_module_expsym_cmds + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \$shlibpath_var if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# The directories searched by this compiler when creating a shared library. +compiler_lib_search_dirs=$lt_compiler_lib_search_dirs + +# Dependencies to place before and after the objects being linked to +# create a shared library. +predep_objects=$lt_predep_objects +postdep_objects=$lt_postdep_objects +predeps=$lt_predeps +postdeps=$lt_postdeps + +# The library search path used internally by the compiler when linking +# a shared library. +compiler_lib_search_path=$lt_compiler_lib_search_path + +# ### END LIBTOOL CONFIG + +_LT_EOF + + cat <<'_LT_EOF' >> "$cfgfile" + +# ### BEGIN FUNCTIONS SHARED WITH CONFIGURE + +# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x$2 in + x) + ;; + *:) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" + ;; + x:*) + eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" + ;; + *) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" + ;; + esac +} + + +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in $*""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} + + +# ### END FUNCTIONS SHARED WITH CONFIGURE + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + +ltmain=$ac_aux_dir/ltmain.sh + + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + sed '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" + + + cat <<_LT_EOF >> "$ofile" + +# ### BEGIN LIBTOOL TAG CONFIG: CXX + +# The linker used to build libraries. +LD=$lt_LD_CXX + +# How to create reloadable object files. +reload_flag=$lt_reload_flag_CXX +reload_cmds=$lt_reload_cmds_CXX + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds_CXX + +# A language specific compiler. +CC=$lt_compiler_CXX + +# Is the compiler the GNU compiler? +with_gcc=$GCC_CXX + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_CXX + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic_CXX + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl_CXX + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static_CXX + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o_CXX + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc_CXX + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_CXX + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_CXX + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec_CXX + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object_CXX + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_CXX + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_CXX + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds_CXX +archive_expsym_cmds=$lt_archive_expsym_cmds_CXX + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds_CXX +module_expsym_cmds=$lt_module_expsym_cmds_CXX + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld_CXX + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag_CXX + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag_CXX + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct_CXX + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \$shlibpath_var if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute_CXX + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L_CXX + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var_CXX + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic_CXX + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath_CXX + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs_CXX + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols_CXX + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds_CXX + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms_CXX + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms_CXX + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds_CXX + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds_CXX + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec_CXX + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action_CXX + +# The directories searched by this compiler when creating a shared library. +compiler_lib_search_dirs=$lt_compiler_lib_search_dirs_CXX + +# Dependencies to place before and after the objects being linked to +# create a shared library. +predep_objects=$lt_predep_objects_CXX +postdep_objects=$lt_postdep_objects_CXX +predeps=$lt_predeps_CXX +postdeps=$lt_postdeps_CXX + +# The library search path used internally by the compiler when linking +# a shared library. +compiler_lib_search_path=$lt_compiler_lib_search_path_CXX + +# ### END LIBTOOL TAG CONFIG: CXX +_LT_EOF + + ;; + "test/mpi/run_mpi.sh":F) chmod a+x test/mpi/run_mpi.sh ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + + + +# +# Print build condiguration +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: =========================================================" >&5 +$as_echo "$as_me: =========================================================" >&6;} +if test "x$with_docs_only" = xyes; then : + +{ $as_echo "$as_me:${as_lineno-$LINENO}: Building documents only" >&5 +$as_echo "$as_me: Building documents only" >&6;} + +else + +{ $as_echo "$as_me:${as_lineno-$LINENO}: UCX build configuration:" >&5 +$as_echo "$as_me: UCX build configuration:" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: Build prefix: ${prefix}" >&5 +$as_echo "$as_me: Build prefix: ${prefix}" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: Preprocessor flags: ${BASE_CPPFLAGS}" >&5 +$as_echo "$as_me: Preprocessor flags: ${BASE_CPPFLAGS}" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: C compiler: ${CC} ${BASE_CFLAGS}" >&5 +$as_echo "$as_me: C compiler: ${CC} ${BASE_CFLAGS}" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: C++ compiler: ${CXX} ${BASE_CXXFLAGS}" >&5 +$as_echo "$as_me: C++ compiler: ${CXX} ${BASE_CXXFLAGS}" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: Multi-thread: ${mt_enable}" >&5 +$as_echo "$as_me: Multi-thread: ${mt_enable}" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: MPI tests: ${mpi_enable}" >&5 +$as_echo "$as_me: MPI tests: ${mpi_enable}" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: Devel headers: ${enable_devel_headers}" >&5 +$as_echo "$as_me: Devel headers: ${enable_devel_headers}" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: Bindings: <$(echo ${build_bindings}|tr ':' ' ') >" >&5 +$as_echo "$as_me: Bindings: <$(echo ${build_bindings}|tr ':' ' ') >" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: UCT modules: <$(echo ${uct_modules}|tr ':' ' ') >" >&5 +$as_echo "$as_me: UCT modules: <$(echo ${uct_modules}|tr ':' ' ') >" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: CUDA modules: <$(echo ${uct_cuda_modules}|tr ':' ' ') >" >&5 +$as_echo "$as_me: CUDA modules: <$(echo ${uct_cuda_modules}|tr ':' ' ') >" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: ROCM modules: <$(echo ${uct_rocm_modules}|tr ':' ' ') >" >&5 +$as_echo "$as_me: ROCM modules: <$(echo ${uct_rocm_modules}|tr ':' ' ') >" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: IB modules: <$(echo ${uct_ib_modules}|tr ':' ' ') >" >&5 +$as_echo "$as_me: IB modules: <$(echo ${uct_ib_modules}|tr ':' ' ') >" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: UCM modules: <$(echo ${ucm_modules}|tr ':' ' ') >" >&5 +$as_echo "$as_me: UCM modules: <$(echo ${ucm_modules}|tr ':' ' ') >" >&6;} +{ $as_echo "$as_me:${as_lineno-$LINENO}: Perf modules: <$(echo ${ucx_perftest_modules}|tr ':' ' ') >" >&5 +$as_echo "$as_me: Perf modules: <$(echo ${ucx_perftest_modules}|tr ':' ' ') >" >&6;} + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: =========================================================" >&5 +$as_echo "$as_me: =========================================================" >&6;} diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..7e84728 --- /dev/null +++ b/configure.ac @@ -0,0 +1,398 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and The University +# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016-2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# +AC_PREREQ([2.63]) + +define([ucx_ver_major], 1) +define([ucx_ver_minor], 8) +define([ucx_ver_patch], 0) +define([ts], esyscmd([sh -c "date +%Y%m%d%H%M%S"])) + +# This is the API version (see libtool library versioning) +# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html +# current:rev:age +define([libucx_so_version], 0:0:0) + +AC_INIT([ucx], [ucx_ver_major.ucx_ver_minor]) +: ${CPPFLAGS=""} +: ${CFLAGS=""} +: ${CXXFLAGS=""} +config_flags="$*" +valgrind_libpath="" + +AC_USE_SYSTEM_EXTENSIONS +AC_GNU_SOURCE +AC_CONFIG_HEADERS([config.h]) + +AC_CHECK_PROG(GITBIN,git,yes) +AS_IF([test x"${GITBIN}" = x"yes"], + [AC_SUBST(SCM_BRANCH, esyscmd([sh -c 'git symbolic-ref --short HEAD'])) + AC_SUBST(SCM_VERSION, esyscmd([sh -c 'git rev-parse --short=7 HEAD']))], + [AC_SUBST(SCM_BRANCH, "") + AC_SUBST(SCM_VERSION, "0000000")]) + +AH_TOP([ +#ifndef UCX_CONFIG_H +#define UCX_CONFIG_H +]) + +AH_BOTTOM([ +#endif /* UCX_CONFIG_H */ +]) + +AM_INIT_AUTOMAKE([1.10 foreign tar-ustar subdir-objects]) +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) +AM_MAINTAINER_MODE +AC_CONFIG_MACRO_DIR([config/m4]) + +define([ucx_prefix], [/usr]) +AC_PREFIX_DEFAULT([ucx_prefix]) + +top_top_srcdir=$srcdir +AC_SUBST(top_top_srcdir) + +MAJOR_VERSION=ucx_ver_major +MINOR_VERSION=ucx_ver_minor +PATCH_VERSION=ucx_ver_patch +VERSION=$MAJOR_VERSION.$MINOR_VERSION.$PATCH_VERSION +SOVERSION=libucx_so_version +AC_SUBST(MAJOR_VERSION) +AC_SUBST(MINOR_VERSION) +AC_SUBST(PATCH_VERSION) +AC_SUBST(SCM_VERSION) +AC_SUBST(SOVERSION) + +AC_PROG_CC +AC_PROG_CXX +AC_OPENMP +AM_PROG_AS +AC_PROG_LN_S +AC_PROG_MKDIR_P +AC_PROG_SED +AC_PROG_INSTALL +AC_PROG_LIBTOOL +AC_HEADER_STDC +LT_LIB_M +AC_C_RESTRICT +AC_FUNC_STRERROR_R + +AC_PATH_TOOL([PKG_CONFIG], [pkg-config], [pkg-config]) + + +# +# Force link_all_deplibs=yes for libtool, otherwise it will not +# link against dependency libs +# +link_all_deplibs=yes + + +# +# Check if 'ln' supports creating relative links +# +AC_MSG_CHECKING([if ${LN_S} supports --relative]) +AS_IF([${LN_S} --relative symlinktest 2>/dev/null], + [AC_MSG_RESULT([yes]) + AC_SUBST([LN_RS], ["${LN_S} --relative"]) + rm symlinktest], + [AC_MSG_RESULT([no]) + AC_SUBST([LN_RS], [${LN_S}])]) + + +# +# Save config flags for version dump tool +# +AC_DEFINE_UNQUOTED([UCX_CONFIGURE_FLAGS], ["$config_flags"], [UCX configure flags]) + + +# +# Provide the functionality of AS_VAR_APPEND if Autoconf does not have it. +# +m4_ifdef([AS_VAR_APPEND], +[m4_copy([AS_VAR_APPEND], [ucx_AS_VAR_APPEND])], +[m4_define([ucx_AS_VAR_APPEND], +[AS_VAR_SET([$1], [AS_VAR_GET([$1])$2])])]) + + +# +# Paths for loadable modules +# +AC_SUBST([modulesubdir], [${PACKAGE_NAME}]) # module directory names +AC_SUBST([moduledir], [${libdir}/${modulesubdir}]) # module installation directory +AC_SUBST([localmoduledir], ['$(abs_top_builddir)/modules']) # local directory for module symlinks +AC_SUBST([objdir], [${objdir}]) # libtool objects dir, usually .libs +AC_SUBST([shrext], [${shrext_cmds}]) # libtool shared library extension +AC_DEFINE_UNQUOTED([UCX_MODULE_SUBDIR], ["${modulesubdir}"], [UCX module sub-directory]) + + +# +# Additional m4 files +# +m4_include([config/m4/ax_prog_doxygen.m4]) +m4_include([config/m4/graphviz.m4]) +AC_ARG_WITH([docs_only], + AS_HELP_STRING([--with-docs-only], + [Compile only the docs and not the rest of UCX. [default=NO]]), + ,[:],[with_docs_only=no]) + +AC_DEFUN([UCX_DX_ENABLE_CHECK], + [AS_IF([DX_TEST_FEATURE($1)], + [], + [AS_IF([test "x$enable_doxygen_$1" == xyes], + [AC_MSG_ERROR([--enable-doxygen-$1 was specified, but $1 tools were not found])], + [])])]) + +# +# Doxygen options +# +DX_PS_FEATURE(OFF) +DX_HTML_FEATURE(ON) +DX_MAN_FEATURE(ON) +DX_PDF_FEATURE(ON) +DX_INIT_DOXYGEN([UCX],[docs/doxygen/ucxdox],[docs/doxygen-doc]) + +AS_IF([test "x$with_docs_only" = xyes], + [AS_MESSAGE([Documents only requested]) + AS_IF([DX_TEST_FEATURE(doc)], + [], + [AC_MSG_ERROR([--with-only-docs was specified, but doxygen was not found])]) + UCX_DX_ENABLE_CHECK([html]) + UCX_DX_ENABLE_CHECK([man]) + UCX_DX_ENABLE_CHECK([pdf]) + AM_CONDITIONAL([DOCS_ONLY], [true]) + AM_CONDITIONAL([HAVE_GTEST], [false]) + AM_CONDITIONAL([HAVE_STATS], [false]) + AM_CONDITIONAL([HAVE_TUNING], [false]) + AM_CONDITIONAL([HAVE_MEMTRACK], [false]) + AM_CONDITIONAL([HAVE_IB], [false]) + AM_CONDITIONAL([HAVE_MLX5_HW], [false]) + AM_CONDITIONAL([HAVE_MLX5_HW_UD], [false]) + AM_CONDITIONAL([HAVE_MLX5_DV], [false]) + AM_CONDITIONAL([HAVE_DEVX], [false]) + AM_CONDITIONAL([HAVE_EXP], [false]) + AM_CONDITIONAL([HAVE_TL_RC], [false]) + AM_CONDITIONAL([HAVE_TL_DC], [false]) + AM_CONDITIONAL([HAVE_DC_DV], [false]) + AM_CONDITIONAL([HAVE_DC_EXP], [false]) + AM_CONDITIONAL([HAVE_TL_UD], [false]) + AM_CONDITIONAL([HAVE_TL_CM], [false]) + AM_CONDITIONAL([HAVE_CRAY_UGNI], [false]) + AM_CONDITIONAL([HAVE_CUDA], [false]) + AM_CONDITIONAL([HAVE_GDR_COPY], [false]) + AM_CONDITIONAL([HAVE_ROCM], [false]) + AM_CONDITIONAL([HAVE_HIP], [false]) + AM_CONDITIONAL([HAVE_XPMEM], [false]) + AM_CONDITIONAL([HAVE_CMA], [false]) + AM_CONDITIONAL([HAVE_KNEM], [false]) + AM_CONDITIONAL([HAVE_RDMACM], [false]) + AM_CONDITIONAL([HAVE_RDMACM_QP_LESS], [false]) + AM_CONDITIONAL([HAVE_MPI], [false]) + AM_CONDITIONAL([HAVE_MPIRUN], [false]) + AM_CONDITIONAL([HAVE_MPICC], [false]) + AM_CONDITIONAL([HAVE_PROFILING], [false]) + AM_CONDITIONAL([HAVE_UCM_PTMALLOC286], [false]) + AM_CONDITIONAL([HAVE_JAVA], [false]) + AM_CONDITIONAL([HAVE_CXX11], [false]) + AM_CONDITIONAL([HAVE_GNUXX11], [false]) + AM_CONDITIONAL([HAVE_TCMALLOC], [false]) + AM_CONDITIONAL([ENABLE_EXPERIMENTAL_API], [false]) + AM_CONDITIONAL([INSTALL_DEVEL_HEADERS], [false]) + AM_CONDITIONAL([HAVE_EXAMPLES], [false]) + ], + [ + AM_CONDITIONAL([DOCS_ONLY], [false]) + m4_include([config/m4/compiler.m4]) + m4_include([config/m4/sysdep.m4]) + m4_include([config/m4/ucs.m4]) + m4_include([config/m4/ucm.m4]) + m4_include([config/m4/mpi.m4]) + m4_include([config/m4/rte.m4]) + m4_include([config/m4/java.m4]) + m4_include([config/m4/cuda.m4]) + m4_include([config/m4/rocm.m4]) + m4_include([config/m4/gdrcopy.m4]) + m4_include([src/ucm/configure.m4]) + m4_include([src/uct/configure.m4]) + m4_include([src/tools/perf/configure.m4]) + m4_include([test/gtest/configure.m4]) + + # + # Enable fault injection code + # + AC_ARG_ENABLE([fault-injection], + AS_HELP_STRING([--enable-fault-injection], + [Enable fault injection code, default: NO]), + [], + [enable_fault_injection=no]) + AS_IF([test "x$enable_fault_injection" = xyes], + [AS_MESSAGE([enabling with fault injection code]) + AC_DEFINE([ENABLE_FAULT_INJECTION], [1], [Enable fault injection code])], + [:]) + + + # + # Disable checking user parameters + # + AC_ARG_ENABLE([params-check], + AS_HELP_STRING([--disable-params-check], + [Disable checking user parameters passed to API, default: NO]), + [], + [enable_params_check=yes]) + AS_IF([test "x$enable_params_check" = xyes], + [AC_DEFINE([ENABLE_PARAMS_CHECK], [1], [Enable checking user parameters])], + [AC_DEFINE([ENABLE_PARAMS_CHECK], [0])]) + + # + # Enable collecting data to ease debugging + # + AC_ARG_ENABLE([debug-data], + AS_HELP_STRING([--enable-debug-data], + [Enable collecting data to ease debugging, default: NO]), + [], + [enable_debug_data=no]) + AS_IF([test "x$enable_debug_data" = xyes], + [AC_DEFINE([ENABLE_DEBUG_DATA], [1], [Enable collecting data]) + AC_DEFINE([UCT_UD_EP_DEBUG_HOOKS], [1], + [Enable packet header inspection/rewriting in UCT/UD])], + [AC_DEFINE([ENABLE_DEBUG_DATA], [0]) + AC_DEFINE([UCT_UD_EP_DEBUG_HOOKS], [0])]) + + + # + # Enable multithreading support + # + AC_ARG_ENABLE([mt], + AS_HELP_STRING([--enable-mt], + [Enable thread support in UCP and UCT, default: NO]), + [], + [enable_mt=no]) + AS_IF([test "x$enable_mt" = xyes], + [AC_DEFINE([ENABLE_MT], [1], [Enable thread support in UCP and UCT]) + mt_enable=enabled], + [AC_DEFINE([ENABLE_MT], [0]) + mt_enable=disabled]) + + + # + # Enable experimental header + # + AC_ARG_ENABLE([experimental-api], + AS_HELP_STRING([--enable-experimental-api], + [Enable installing experimental APIs, default: NO]), + [], + [enable_experimental_api=no]) + AM_CONDITIONAL([ENABLE_EXPERIMENTAL_API], [test "x$enable_experimental_api" = "xyes"]) + + + # + # Install development headers + # + AC_ARG_ENABLE([devel-headers], + AS_HELP_STRING([--enable-devel-headers], + [Enable installing development headers, default: NO]), + [], + [enable_devel_headers=no]) + AM_CONDITIONAL([INSTALL_DEVEL_HEADERS], [test "x$enable_devel_headers" = "xyes"]) + + + # + # Path for valgrind-enabled libraries + # + AC_SUBST([VALGRIND_LIBPATH], [${valgrind_libpath}]) + + + # + # Enable examples build + # + AC_ARG_ENABLE([examples], + AS_HELP_STRING([--enable-examples], + [Enable examples build, default: NO]), + [], + [enable_examples=no]) + AM_CONDITIONAL([HAVE_EXAMPLES], [test "x$enable_examples" = "xyes"]) + ]) # Docs only + +# +# Print which transports are built +# +build_modules="${uct_modules}" +build_modules="${build_modules}${uct_ib_modules}" +build_modules="${build_modules}${uct_cuda_modules}" +build_modules="${build_modules}${ucm_modules}" +build_modules="${build_modules}${ucx_perftest_modules}" +build_modules="${build_modules}${uct_rocm_modules}" +AC_SUBST([build_modules], [${build_modules}]) +AC_SUBST([build_bindings], [${build_bindings}]) + +# +# Final output +# +AC_CONFIG_FILES([Makefile + docs/doxygen/header.tex + src/uct/api/version.h + ]) +AS_IF([test "x$with_docs_only" = xyes], [], [ +AC_CONFIG_LINKS([ + debian/compat:debian/compat + debian/copyright:debian/copyright + debian/ucx.prerm:debian/ucx.prerm + ]) +AC_CONFIG_FILES([ + ucx.spec + ucx.pc + contrib/rpmdef.sh + debian/rules + debian/control + debian/changelog + src/ucs/Makefile + src/ucp/Makefile + src/ucp/api/ucp_version.h + src/ucp/core/ucp_version.c + src/tools/info/Makefile + src/tools/profile/Makefile + test/apps/Makefile + test/apps/sockaddr/Makefile + test/examples/Makefile + test/mpi/Makefile + bindings/java/Makefile + bindings/java/pom.xml + bindings/java/src/main/native/Makefile + ]) + +AC_CONFIG_FILES([test/mpi/run_mpi.sh], [chmod a+x test/mpi/run_mpi.sh]) +]) + + +AC_OUTPUT + + +# +# Print build condiguration +# +AC_MSG_NOTICE([=========================================================]) +AS_IF([test "x$with_docs_only" = xyes], +[ +AC_MSG_NOTICE([Building documents only]) +], +[ +AC_MSG_NOTICE([UCX build configuration:]) +AC_MSG_NOTICE([ Build prefix: ${prefix}]) +AC_MSG_NOTICE([Preprocessor flags: ${BASE_CPPFLAGS}]) +AC_MSG_NOTICE([ C compiler: ${CC} ${BASE_CFLAGS}]) +AC_MSG_NOTICE([ C++ compiler: ${CXX} ${BASE_CXXFLAGS}]) +AC_MSG_NOTICE([ Multi-thread: ${mt_enable}]) +AC_MSG_NOTICE([ MPI tests: ${mpi_enable}]) +AC_MSG_NOTICE([ Devel headers: ${enable_devel_headers}]) +AC_MSG_NOTICE([ Bindings: <$(echo ${build_bindings}|tr ':' ' ') >]) +AC_MSG_NOTICE([ UCT modules: <$(echo ${uct_modules}|tr ':' ' ') >]) +AC_MSG_NOTICE([ CUDA modules: <$(echo ${uct_cuda_modules}|tr ':' ' ') >]) +AC_MSG_NOTICE([ ROCM modules: <$(echo ${uct_rocm_modules}|tr ':' ' ') >]) +AC_MSG_NOTICE([ IB modules: <$(echo ${uct_ib_modules}|tr ':' ' ') >]) +AC_MSG_NOTICE([ UCM modules: <$(echo ${ucm_modules}|tr ':' ' ') >]) +AC_MSG_NOTICE([ Perf modules: <$(echo ${ucx_perftest_modules}|tr ':' ' ') >]) +]) +AC_MSG_NOTICE([=========================================================]) diff --git a/contrib/buildrpm.sh b/contrib/buildrpm.sh new file mode 100755 index 0000000..7b95819 --- /dev/null +++ b/contrib/buildrpm.sh @@ -0,0 +1,105 @@ +#!/bin/bash -eE + + +PACKAGE=ucx +WS=$PWD +rpmspec=${PACKAGE}.spec +rpmmacros="--define='_rpmdir ${WS}/rpm-dist' --define='_srcrpmdir ${WS}/rpm-dist' --define='_sourcedir ${WS}' --define='_specdir ${WS}' --define='_builddir ${WS}'" +rpmopts="--buildroot='${WS}/_rpm'" + + + +opt_tarball=0 +opt_srcrpm=0 +opt_binrpm=0 +opt_no_dist=0 +opt_no_deps=0 +defines="" + +while test "$1" != ""; do + case $1 in + --tarball|-t) opt_tarball=1 ;; + --srcrpm|-s) opt_srcrpm=1 ;; + --binrpm|-b) opt_binrpm=1 ;; + --no-dist) opt_no_dist=1 ;; + --nodeps) opt_no_deps=1 ;; + --define|-d) defines="$defines --define '$2'"; shift ;; + *) + cat < Add a define to rpmbuild + + +EOF + exit 1 + ;; + esac + shift +done + +if [ $opt_no_dist -eq 1 ]; then + rpmmacros="$rpmmacros '--undefine=dist'" +fi + +if [ $opt_no_deps -eq 1 ]; then + rpmopts="$rpmopts --nodeps" +fi + +mkdir -p rpm-dist + +if [ $opt_tarball -eq 1 ]; then + make dist +fi + +# Version includes revision, while tarball in Source doesn't have it since +# it uses GitHub standart name v.tar.gz, so make: +# ucx-1.3.0.6a61458.tar.gz --> v1.3.0.tar.gz for rpmbuild +tgz=(ucx*.tar.gz) +tarball=${tgz[0]} +link_tarball=$(perl -e '$fname=$ARGV[0]; ($new_name=$fname)=~s/^.+-(\d+\.\d+\.\d+)/v$1/; print $new_name' $tarball) +rm -f $link_tarball +ln -s $tarball $link_tarball + +if [ $opt_srcrpm -eq 1 ]; then + echo rpmbuild -bs $rpmmacros $rpmopts $rpmspec $defines | bash -eEx +fi + +if [ $opt_binrpm -eq 1 ]; then + # read build configuration + source contrib/rpmdef.sh || exit 1 + + with_arg() { + module=$1 + with_arg=${2:-$module} + if (echo ${build_modules} | tr ':' '\n' | grep -q "^${module}$") || + (echo ${build_bindings} | tr ':' '\n' | grep -q "^${module}$") + then + echo "--with ${with_arg}" + else + echo "--without ${with_arg}" + fi + } + + with_args="" + with_args+=" $(with_arg cma)" + with_args+=" $(with_arg cuda)" + with_args+=" $(with_arg gdrcopy)" + with_args+=" $(with_arg ib)" + with_args+=" $(with_arg cm ib_cm)" + with_args+=" $(with_arg knem)" + with_args+=" $(with_arg rdmacm)" + with_args+=" $(with_arg rocm)" + with_args+=" $(with_arg ugni)" + with_args+=" $(with_arg xpmem)" + with_args+=" $(with_arg java)" + + echo rpmbuild -bb $rpmmacros $rpmopts $rpmspec $defines $with_args | bash -eEx +fi diff --git a/contrib/configure-devel b/contrib/configure-devel new file mode 100755 index 0000000..7525e93 --- /dev/null +++ b/contrib/configure-devel @@ -0,0 +1,25 @@ +#!/bin/sh +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# UCX build for development. +# Full logging and an runtime checks. +# + +basedir=$(cd $(dirname $0) && pwd) +$basedir/../configure \ + --enable-gtest \ + --enable-examples \ + --with-valgrind \ + --enable-profiling \ + --enable-frame-pointer \ + --enable-stats \ + --enable-memtrack \ + --enable-fault-injection \ + --enable-debug-data \ + --enable-mt \ + "$@" diff --git a/contrib/configure-prof b/contrib/configure-prof new file mode 100755 index 0000000..14a306d --- /dev/null +++ b/contrib/configure-prof @@ -0,0 +1,24 @@ +#!/bin/sh +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# UCX build for profiling purposes. +# Some extra code to ease performance debugging. +# + +basedir=$(cd $(dirname $0) && pwd) +$basedir/../configure \ + --disable-logging \ + --disable-debug \ + --disable-assertions \ + --disable-params-check \ + --enable-backtrace-detail \ + --enable-profiling \ + --enable-frame-pointer \ + --enable-stats \ + --enable-memtrack \ + "$@" diff --git a/contrib/configure-release b/contrib/configure-release new file mode 100755 index 0000000..cae1421 --- /dev/null +++ b/contrib/configure-release @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# UCX build for maximal performance while maintaining portability. +# No extra debugging or profiling code. +# + +basedir=$(cd $(dirname $0) && pwd) +$basedir/../configure \ + --disable-logging \ + --disable-debug \ + --disable-assertions \ + --disable-params-check \ + "$@" diff --git a/contrib/rpmdef.sh.in b/contrib/rpmdef.sh.in new file mode 100644 index 0000000..50925e5 --- /dev/null +++ b/contrib/rpmdef.sh.in @@ -0,0 +1,2 @@ +build_modules="@build_modules@" +build_bindings="@build_bindings@" diff --git a/contrib/ucx_perftest_config/README b/contrib/ucx_perftest_config/README new file mode 100644 index 0000000..906e54a --- /dev/null +++ b/contrib/ucx_perftest_config/README @@ -0,0 +1,3 @@ +This is an example of the "batch" configuration files for ucx_perftest. +The files are passed as an input parameter to the ucx_pertest benchmark: +ucx_perftest -b msg_pow2 -b test_types_uct -b transports <...> diff --git a/contrib/ucx_perftest_config/msg_pow2 b/contrib/ucx_perftest_config/msg_pow2 new file mode 100644 index 0000000..df68de2 --- /dev/null +++ b/contrib/ucx_perftest_config/msg_pow2 @@ -0,0 +1,35 @@ + 1 -s 1 -n 2000000 + 2 -s 2 -n 2000000 + 4 -s 4 -n 2000000 + 8 -s 8 -n 2000000 + 12 -s 14 -n 2000000 + 16 -s 16 -n 2000000 + 24 -s 24 -n 2000000 + 32 -s 32 -n 2000000 + 40 -s 40 -n 2000000 + 48 -s 48 -n 2000000 + 64 -s 64 -n 2000000 + 80 -s 80 -n 2000000 + 96 -s 96 -n 2000000 + 128 -s 128 -n 1400000 + 256 -s 256 -n 700000 + 300 -s 300 -n 700000 + 512 -s 512 -n 300000 + 1024 -s 1024 -n 200000 + 2048 -s 2048 -n 100000 + 3000 -s 3000 -n 100000 + 4096 -s 4096 -n 100000 + 6000 -s 6000 -n 100000 + 8192 -s 8192 -n 80000 + 10000 -s 10000 -n 80000 + 16384 -s 16384 -n 40000 + 25000 -s 25000 -n 40000 + 32768 -s 32768 -n 20000 + 45000 -s 45000 -n 20000 + 65536 -s 65536 -n 10000 + 100000 -s 100000 -n 10000 + 131072 -s 131072 -n 5000 + 262144 -s 262144 -n 2500 + 524288 -s 524288 -n 1200 +1048576 -s 1048576 -n 600 +2097152 -s 2097152 -n 300 diff --git a/contrib/ucx_perftest_config/msg_pow2_large b/contrib/ucx_perftest_config/msg_pow2_large new file mode 100644 index 0000000..114dba8 --- /dev/null +++ b/contrib/ucx_perftest_config/msg_pow2_large @@ -0,0 +1,10 @@ +4194304 -s 4194304 -n 100 +8388608 -s 8388608 -n 100 +16777216 -s 16777216 -n 100 +33554432 -s 33554432 -n 100 +67108864 -s 67108864 -n 10 +134217728 -s 134217728 -n 10 +268435456 -s 268435456 -n 10 +536870912 -s 536870912 -n 10 +1073741824 -s 1073741824 -n 10 +2147483648 -s 2147483648 -n 10 diff --git a/contrib/ucx_perftest_config/test_types_ucp b/contrib/ucx_perftest_config/test_types_ucp new file mode 100644 index 0000000..2a9ecfa --- /dev/null +++ b/contrib/ucx_perftest_config/test_types_ucp @@ -0,0 +1,36 @@ +# UCP +ucp_iov_contig_tag_lat -t tag_lat -D iov,contig +ucp_iov_iov_tag_lat -t tag_lat -D iov,iov +ucp_contig_contig_tag_lat -t tag_lat -D contig,contig +#IOV with RNDV is not yet supported +#ucp_contig_iov_tag_lat -t tag_lat -D contig,iov +ucp_iov_contig_tag_bw -t tag_bw -D iov,contig +ucp_iov_iov_tag_bw -t tag_bw -D iov,iov +ucp_contig_contig_tag_bw -t tag_bw -D contig,contig +#IOV with RNDV is not yet supported +#ucp_contig_iov_tag_bw -t tag_bw -D contig,iov +ucp_sync_tag_lat -t tag_sync_lat +ucp_unexp_tag_lat -t tag_lat -U +ucp_wild_tag_lat -t tag_lat -C +ucp_contig_stream_bw -t stream_bw -r recv_data +ucp_contig_stream_lat -t stream_lat -r recv_data +ucp_contig_stream_bw -t stream_bw -r recv +ucp_contig_stream_lat -t stream_lat -r recv +#CUDA +ucp_contig_contig_cuda_tag_lat -t tag_lat -D contig,contig -m cuda,cuda +ucp_contig_contig_cuda_tag_lat -t tag_lat -D contig,contig -m cuda,host +ucp_contig_contig_cuda_tag_lat -t tag_lat -D contig,contig -m host,cuda +ucp_contig_contig_cuda_tag_bw -t tag_bw -D contig,contig -m cuda,cuda +ucp_contig_contig_cuda_tag_bw -t tag_bw -D contig,contig -m cuda,host +ucp_contig_contig_cuda_tag_bw -t tag_bw -D contig,contig -m host,cuda +ucp_contig_cuda_stream_bw -t stream_bw -r recv_data -m cuda +ucp_contig_cuda_stream_lat -t stream_lat -r recv_data -m cuda +ucp_contig_cuda_stream_bw -t stream_bw -r recv -m cuda +ucp_contig_cuda_stream_lat -t stream_lat -r recv -m cuda +ucp_contig_contig_cuda_mng_tag_lat -t tag_lat -D contig,contig -m cuda-managed +ucp_contig_contig_cuda_mng_tag_bw -t tag_bw -D contig,contig -m cuda-managed +ucp_contig_cuda_mng_stream_bw -t stream_bw -r recv_data -m cuda-managed +ucp_contig_cuda_mng_stream_lat -t stream_lat -r recv_data -m cuda-managed +ucp_contig_cuda_mng_stream_bw -t stream_bw -r recv -m cuda-managed +ucp_contig_cuda_mng_stream_lat -t stream_lat -r recv -m cuda-managed + diff --git a/contrib/ucx_perftest_config/test_types_uct b/contrib/ucx_perftest_config/test_types_uct new file mode 100644 index 0000000..2769ee4 --- /dev/null +++ b/contrib/ucx_perftest_config/test_types_uct @@ -0,0 +1,23 @@ +# PUT +put_short_bw -t put_bw -D short +put_bcopy_bw -t put_bw -D bcopy +put_zcopy_bw -t put_bw -D zcopy +put_short_lat -t put_lat -D short +put_bcopy_lat -t put_lat -D bcopy +put_zcopy_lat -t put_lat -D zcopy +# AM +am_short_lat -t am_lat -D short +am_bcopy_lat -t am_lat -D bcopy +am_zcopy_lat -t am_lat -D zcopy +am_short_bw -t am_bw -D short +am_bcopy_bw -t am_bw -D bcopy +am_zcopy_bw -t am_bw -D zcopy +# GET +get_bcopy -t get -D bcopy +get_zcopy -t get -D zcopy +# ATOMICS +add_lat -t add_lat +add_mr -t add_mr +fadd -t fadd +swap -t swap +cswap -t cswap diff --git a/contrib/ucx_perftest_config/transports b/contrib/ucx_perftest_config/transports new file mode 100644 index 0000000..5fbbd7d --- /dev/null +++ b/contrib/ucx_perftest_config/transports @@ -0,0 +1,2 @@ +regular_verbs -x rc_verbs +accel_verbs -x rc_mlx5 diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..7ce2c19 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +ucx (1.8.c30b7da) unstable; urgency=low + + * Initial release (Closes: #nnnn) + + -- Mellanox Ltd. Wed, 11 Sep 2013 15:24:22 +0300 diff --git a/debian/changelog.in b/debian/changelog.in new file mode 100644 index 0000000..1f1dab1 --- /dev/null +++ b/debian/changelog.in @@ -0,0 +1,5 @@ +ucx (@MAJOR_VERSION@.@MINOR_VERSION@.@SCM_VERSION@) unstable; urgency=low + + * Initial release (Closes: #nnnn) + + -- Mellanox Ltd. Wed, 11 Sep 2013 15:24:22 +0300 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..7ed6ff8 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +5 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..8e27d46 --- /dev/null +++ b/debian/control @@ -0,0 +1,20 @@ +Source: ucx +Section: libs +Priority: extra +Maintainer: ucx-group@elist.ornl.gov +Build-Depends: +Standards-Version: 1.8 +Homepage: http://www.openucx.org + +#Package: ucx-dev +#Section: libdevel +#Architecture: any +#Depends: ucx (= ${binary:Version}) +#Description: Header files for UCX library + +Package: ucx +Section: libs +Depends: ${shlibs:Depends}, ${misc:Depends} +Architecture: any +Description: Unified Communication X + UCX is a communication library implementing high-performance messaging. diff --git a/debian/control.in b/debian/control.in new file mode 100644 index 0000000..411e752 --- /dev/null +++ b/debian/control.in @@ -0,0 +1,20 @@ +Source: @PACKAGE@ +Section: libs +Priority: extra +Maintainer: ucx-group@elist.ornl.gov +Build-Depends: +Standards-Version: @MAJOR_VERSION@.@MINOR_VERSION@ +Homepage: http://www.openucx.org + +#Package: ucx-dev +#Section: libdevel +#Architecture: any +#Depends: ucx (= ${binary:Version}) +#Description: Header files for UCX library + +Package: @PACKAGE@ +Section: libs +Depends: ${shlibs:Depends}, ${misc:Depends} +Architecture: any +Description: Unified Communication X + UCX is a communication library implementing high-performance messaging. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..453aaf6 --- /dev/null +++ b/debian/copyright @@ -0,0 +1,32 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: UCX +Source: http://www.openucx.org + +Files: * +Copyright (c) 2014-2015 UT-Battelle, LLC. All rights reserved. +Copyright (C) 2014-2015 Mellanox Technologies Ltd. All rights reserved. +License: BSD + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/debian/rules b/debian/rules new file mode 100644 index 0000000..42f460c --- /dev/null +++ b/debian/rules @@ -0,0 +1,22 @@ +#!/usr/bin/make -f +# -*- makefile -*- +# Sample debian/rules that uses debhelper. +# This file was originally written by Joey Hess and Craig Small. +# As a special exception, when this file is copied by dh-make into a +# dh-make output file, you may use that output file without restriction. +# This special exception was added by Craig Small in version 0.37 of dh-make. + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +%: + dh $@ + +override_dh_auto_configure: + ./contrib/configure-release --prefix=/usr + chmod +x debian/rules + +override_dh_shlibdeps: + dh_shlibdeps --dpkg-shlibdeps-params=--ignore-missing-info + +override_dh_auto_clean: diff --git a/debian/rules.in b/debian/rules.in new file mode 100755 index 0000000..03f8670 --- /dev/null +++ b/debian/rules.in @@ -0,0 +1,22 @@ +#!/usr/bin/make -f +# -*- makefile -*- +# Sample debian/rules that uses debhelper. +# This file was originally written by Joey Hess and Craig Small. +# As a special exception, when this file is copied by dh-make into a +# dh-make output file, you may use that output file without restriction. +# This special exception was added by Craig Small in version 0.37 of dh-make. + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +%: + dh $@ + +override_dh_auto_configure: + @top_top_srcdir@/contrib/configure-release --prefix=/usr + chmod +x debian/rules + +override_dh_shlibdeps: + dh_shlibdeps --dpkg-shlibdeps-params=--ignore-missing-info + +override_dh_auto_clean: diff --git a/debian/source/format b/debian/source/format new file mode 100644 index 0000000..89ae9db --- /dev/null +++ b/debian/source/format @@ -0,0 +1 @@ +3.0 (native) diff --git a/debian/ucx.prerm b/debian/ucx.prerm new file mode 100755 index 0000000..87db440 --- /dev/null +++ b/debian/ucx.prerm @@ -0,0 +1,7 @@ +#!/bin/sh + +PCF=/usr/lib/pkgconfig/ucx.pc + +if [ -f $PCF ];then + rm -f $PCF +fi diff --git a/depcomp b/depcomp new file mode 100755 index 0000000..65cbf70 --- /dev/null +++ b/depcomp @@ -0,0 +1,791 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2018 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/docs/doxygen/doxygen.am b/docs/doxygen/doxygen.am new file mode 100644 index 0000000..9109a43 --- /dev/null +++ b/docs/doxygen/doxygen.am @@ -0,0 +1,177 @@ +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# ----- begin aminclude.am ------------------------------------- +# +## --------------------------------- ## +## Format-independent Doxygen rules. ## +## --------------------------------- ## + +if DX_COND_doc + +## ------------------------------- ## +## Rules specific for HTML output. ## +## ------------------------------- ## + +if DX_COND_html + +DX_CLEAN_HTML = @DX_DOCDIR@/html + +endif DX_COND_html + +## ------------------------------ ## +## Rules specific for CHM output. ## +## ------------------------------ ## + +if DX_COND_chm + +DX_CLEAN_CHM = @DX_DOCDIR@/chm + +if DX_COND_chi + +DX_CLEAN_CHI = @DX_DOCDIR@/@PACKAGE@.chi + +endif DX_COND_chi + +endif DX_COND_chm + +## ------------------------------ ## +## Rules specific for MAN output. ## +## ------------------------------ ## + +if DX_COND_man + +DX_CLEAN_MAN = @DX_DOCDIR@/man + +endif DX_COND_man + +## ------------------------------ ## +## Rules specific for RTF output. ## +## ------------------------------ ## + +if DX_COND_rtf + +DX_CLEAN_RTF = @DX_DOCDIR@/rtf + +endif DX_COND_rtf + +## ------------------------------ ## +## Rules specific for XML output. ## +## ------------------------------ ## + +if DX_COND_xml + +DX_CLEAN_XML = @DX_DOCDIR@/xml + +endif DX_COND_xml + +## ----------------------------- ## +## Rules specific for PS output. ## +## ----------------------------- ## + +if DX_COND_ps + +DX_CLEAN_PS = @DX_DOCDIR@/@PACKAGE@.ps + +DX_PS_GOAL = doxygen-ps + +doxygen-ps: @DX_DOCDIR@/@PACKAGE@.ps + +@DX_DOCDIR@/@PACKAGE@.ps: @DX_DOCDIR@/@PACKAGE@.tag + cd @DX_DOCDIR@/latex; \ + rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \ + $(DX_LATEX) refman.tex; \ + $(MAKEINDEX_PATH) refman.idx; \ + $(DX_BIBTEX) refman; \ + $(DX_LATEX) refman.tex; \ + $(DX_LATEX) refman.tex; \ + countdown=5; \ + while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \ + refman.log > /dev/null 2>&1 \ + && test $$countdown -gt 0; do \ + $(DX_LATEX) refman.tex; \ + countdown=`expr $$countdown - 1`; \ + done; \ + $(DX_DVIPS) -o ../@PACKAGE@.ps refman.dvi + +endif DX_COND_ps + +## ------------------------------ ## +## Rules specific for PDF output. ## +## ------------------------------ ## + +if DX_COND_pdf + +DX_CLEAN_PDF = @DX_DOCDIR@/@PACKAGE@.pdf + +DX_PDF_GOAL = doxygen-pdf + +doxygen-pdf: @DX_DOCDIR@/@PACKAGE@.pdf + +@DX_DOCDIR@/@PACKAGE@.pdf: @DX_DOCDIR@/@PACKAGE@.tag + cd @DX_DOCDIR@/latex; \ + rm -f *.aux *.toc *.idx *.ind *.ilg *.log *.out; \ + $(DX_PDFLATEX) refman.tex; \ + $(DX_MAKEINDEX) refman.idx; \ + $(DX_BIBTEX) refman; \ + $(DX_PDFLATEX) refman.tex; \ + $(DX_PDFLATEX) refman.tex; \ + countdown=5; \ + while $(DX_EGREP) 'Rerun (LaTeX|to get cross-references right)' \ + refman.log > /dev/null 2>&1 \ + && test $$countdown -gt 0; do \ + $(DX_PDFLATEX) refman.tex; \ + countdown=`expr $$countdown - 1`; \ + done; \ + mv refman.pdf ../@PACKAGE@.pdf + +endif DX_COND_pdf + +## ------------------------------------------------- ## +## Rules specific for LaTeX (shared for PS and PDF). ## +## ------------------------------------------------- ## + +if DX_COND_latex + +DX_CLEAN_LATEX = @DX_DOCDIR@/latex + +endif DX_COND_latex + +.INTERMEDIATE: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL) + +doxygen-run: @DX_DOCDIR@/@PACKAGE@.tag + +doxygen-doc: doxygen-run $(DX_PS_GOAL) $(DX_PDF_GOAL) + +@DX_DOCDIR@/@PACKAGE@.tag: $(DX_CONFIG) $(doxygen_doc_files) + rm -rf @DX_DOCDIR@ + mkdir -p @DX_DOCDIR@ + $(DX_ENV) $(DX_DOXYGEN) $(srcdir)/$(DX_CONFIG) + echo Timestamp >$@ + +DX_CLEANFILES = \ + @DX_DOCDIR@/@PACKAGE@.tag \ + -r \ + $(DX_CLEAN_HTML) \ + $(DX_CLEAN_CHM) \ + $(DX_CLEAN_CHI) \ + $(DX_CLEAN_MAN) \ + $(DX_CLEAN_RTF) \ + $(DX_CLEAN_XML) \ + $(DX_CLEAN_PS) \ + $(DX_CLEAN_PDF) \ + $(DX_CLEAN_LATEX) + +endif DX_COND_doc + +.PHONY: doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) + +# ----- end aminclude.am --------------------------------------- +# +# LICENSE +# +# Copyright (c) 2009 Oren Ben-Kiki +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. diff --git a/docs/doxygen/header.tex.in b/docs/doxygen/header.tex.in new file mode 100644 index 0000000..086619b --- /dev/null +++ b/docs/doxygen/header.tex.in @@ -0,0 +1,142 @@ +% Latex header for doxygen 1.8.9.1 +\documentclass[twoside]{book} + +% Packages required by doxygen +\usepackage{fixltx2e} +\usepackage{calc} +\usepackage{doxygen} +\usepackage{graphicx} +\usepackage[utf8]{inputenc} +\usepackage{makeidx} +\usepackage{multicol} +\usepackage{multirow} +\PassOptionsToPackage{warn}{textcomp} +\usepackage{textcomp} +\usepackage[nointegrals]{wasysym} +\usepackage[table]{xcolor} + +% Font selection +\usepackage[T1]{fontenc} +\usepackage[scaled=.90]{helvet} +\usepackage{courier} +\usepackage{amssymb} +\usepackage{sectsty} +\renewcommand{\familydefault}{\sfdefault} +\allsectionsfont{% + \fontseries{bc}\selectfont% + \color{darkgray}% +} +\renewcommand{\DoxyLabelFont}{% + \fontseries{bc}\selectfont% + \color{darkgray}% +} +\newcommand{\+}{\discretionary{\mbox{\scriptsize$\hookleftarrow$}}{}{}} + +% Page & text layout +\usepackage{geometry} +\geometry{% + a4paper,% + top=2.5cm,% + bottom=2.5cm,% + left=2.5cm,% + right=2.5cm% +} +\tolerance=750 +\hfuzz=15pt +\hbadness=750 +\setlength{\emergencystretch}{15pt} +\setlength{\parindent}{0cm} +\setlength{\parskip}{0.2cm} +\makeatletter +\renewcommand{\paragraph}{% + \@startsection{paragraph}{4}{0ex}{-1.0ex}{1.0ex}{% + \normalfont\normalsize\bfseries\SS@parafont% + }% +} +\renewcommand{\subparagraph}{% + \@startsection{subparagraph}{5}{0ex}{-1.0ex}{1.0ex}{% + \normalfont\normalsize\bfseries\SS@subparafont% + }% +} +\makeatother + +% Headers & footers +\usepackage{fancyhdr} +\pagestyle{fancyplain} +\fancyhead[LE]{\fancyplain{}{\bfseries\thepage}} +\fancyhead[CE]{\fancyplain{}{}} +\fancyhead[RE]{\fancyplain{}{\bfseries\leftmark}} +\fancyhead[LO]{\fancyplain{}{\bfseries\rightmark}} +\fancyhead[CO]{\fancyplain{}{}} +\fancyhead[RO]{\fancyplain{}{\bfseries\thepage}} +\fancyfoot[LE]{\fancyplain{}{}} +\fancyfoot[CE]{\fancyplain{}{}} +\fancyfoot[RE]{\fancyplain{}{\bfseries\scriptsize \textcircled{c} \the\year\space Unified Communication X (UCX). All rights reserved. }} +\fancyfoot[LO]{\fancyplain{}{\bfseries\scriptsize \textcircled{c} \the\year\space Unified Communication X (UCX). All rights reserved. }} +\fancyfoot[CO]{\fancyplain{}{}} +\fancyfoot[RO]{\fancyplain{}{}} +\renewcommand{\footrulewidth}{0.4pt} +\renewcommand{\chaptermark}[1]{% + \markboth{#1}{}% +} +\renewcommand{\sectionmark}[1]{% + \markright{\thesection\ #1}% +} + +% Indices & bibliography +\usepackage{natbib} +\usepackage[titles]{tocloft} +\setcounter{tocdepth}{3} +\setcounter{secnumdepth}{5} +\makeindex + +% Hyperlinks (required, but should be loaded last) +\usepackage{ifpdf} +\ifpdf + \usepackage[pdftex,pagebackref=true]{hyperref} +\else + \usepackage[ps2pdf,pagebackref=true]{hyperref} +\fi +\hypersetup{% + colorlinks=true,% + linkcolor=blue,% + citecolor=blue,% + unicode% +} + +% Custom commands +\newcommand{\clearemptydoublepage}{% + \newpage{\pagestyle{empty}\cleardoublepage}% +} + + +%===== C O N T E N T S ===== + +\begin{document} + +% Titlepage & ToC +\hypersetup{pageanchor=false, + bookmarks=true, + bookmarksnumbered=true, + pdfencoding=unicode + } +\pagenumbering{roman} +\begin{titlepage} +\vspace*{7cm} +\begin{center}% + {\Large Unified Communication X (UCX)}\\ +\vspace*{0.5cm} +{\large API Standard}\\ +{\small Version @MAJOR_VERSION@.@MINOR_VERSION@}\\ +\vspace*{0.5cm} +\includegraphics[width=6cm]{UCX_Logo_930x933.png} +\vspace*{0.5cm} +\end{center} +\end{titlepage} +\clearemptydoublepage +\tableofcontents +\clearemptydoublepage +\pagenumbering{arabic} +\hypersetup{pageanchor=true} + +%--- Begin generated contents --- diff --git a/docs/uml/uct.dot b/docs/uml/uct.dot new file mode 100644 index 0000000..d0adb18 --- /dev/null +++ b/docs/uml/uct.dot @@ -0,0 +1,43 @@ +digraph G { + fontname = "Bitstream Vera Sans" + fontsize = 8 + + node [ + fontname = "Bitstream Vera Sans" + fontsize = 8 + shape = "record" + ] + + edge [ + fontname = "Bitstream Vera Sans" + fontsize = 8 + ] + + uct_iface [ + label = "{UCT Iface||+ Open() : void\l}" + ] + + uct_worker [ + label = "{UCT Worker||+ Open() : void\l}" + ] + + uct_pd [ + label = "{UCT PD||+ Open() : void\l}" + ] + + uct_endpoint [ + label = "{UCT Endpoint||+ Open() : void\l}" + ] + + edge [ + arrowhead = "none" + + taillabel= "1" + headlabel = "0..*" + ] + + + uct_pd -> uct_iface + uct_worker -> uct_iface + uct_iface -> uct_endpoint +} diff --git a/install-sh b/install-sh new file mode 100755 index 0000000..8175c64 --- /dev/null +++ b/install-sh @@ -0,0 +1,518 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2018-03-11.20; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve the last data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -s $stripprog installed files. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -s) stripcmd=$stripprog;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dstbase=`basename "$src"` + case $dst in + */) dst=$dst$dstbase;; + *) dst=$dst/$dstbase;; + esac + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + case $dstdir in + */) dstdirslash=$dstdir;; + *) dstdirslash=$dstdir/;; + esac + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + # Note that $RANDOM variable is not portable (e.g. dash); Use it + # here however when possible just to lower collision chance. + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + + trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0 + + # Because "mkdir -p" follows existing symlinks and we likely work + # directly in world-writeable /tmp, make sure that the '$tmpdir' + # directory is successfully created first before we actually test + # 'mkdir -p' feature. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # The umask is ridiculous, or mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=${dstdirslash}_inst.$$_ + rmtmp=${dstdirslash}_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/ltmain.sh b/ltmain.sh new file mode 100644 index 0000000..7f3523d --- /dev/null +++ b/ltmain.sh @@ -0,0 +1,11149 @@ +#! /bin/sh +## DO NOT EDIT - This file generated from ./build-aux/ltmain.in +## by inline-source v2014-01-03.01 + +# libtool (GNU libtool) 2.4.6 +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit , 1996 + +# Copyright (C) 1996-2015 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +PROGRAM=libtool +PACKAGE=libtool +VERSION=2.4.6 +package_revision=2.4.6 + + +## ------ ## +## Usage. ## +## ------ ## + +# Run './libtool --help' for help with using this script from the +# command line. + + +## ------------------------------- ## +## User overridable command paths. ## +## ------------------------------- ## + +# After configure completes, it has a better idea of some of the +# shell tools we need than the defaults used by the functions shared +# with bootstrap, so set those here where they can still be over- +# ridden by the user, but otherwise take precedence. + +: ${AUTOCONF="autoconf"} +: ${AUTOMAKE="automake"} + + +## -------------------------- ## +## Source external libraries. ## +## -------------------------- ## + +# Much of our low-level functionality needs to be sourced from external +# libraries, which are installed to $pkgauxdir. + +# Set a version string for this script. +scriptversion=2015-01-20.17; # UTC + +# General shell script boiler plate, and helper functions. +# Written by Gary V. Vaughan, 2004 + +# Copyright (C) 2004-2015 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. + +# As a special exception to the GNU General Public License, if you distribute +# this file as part of a program or library that is built using GNU Libtool, +# you may include this file under the same distribution terms that you use +# for the rest of that program. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNES FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Please report bugs or propose patches to gary@gnu.org. + + +## ------ ## +## Usage. ## +## ------ ## + +# Evaluate this file near the top of your script to gain access to +# the functions and variables defined here: +# +# . `echo "$0" | ${SED-sed} 's|[^/]*$||'`/build-aux/funclib.sh +# +# If you need to override any of the default environment variable +# settings, do that before evaluating this file. + + +## -------------------- ## +## Shell normalisation. ## +## -------------------- ## + +# Some shells need a little help to be as Bourne compatible as possible. +# Before doing anything else, make sure all that help has been provided! + +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac +fi + +# NLS nuisances: We save the old values in case they are required later. +_G_user_locale= +_G_safe_locale= +for _G_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES +do + eval "if test set = \"\${$_G_var+set}\"; then + save_$_G_var=\$$_G_var + $_G_var=C + export $_G_var + _G_user_locale=\"$_G_var=\\\$save_\$_G_var; \$_G_user_locale\" + _G_safe_locale=\"$_G_var=C; \$_G_safe_locale\" + fi" +done + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Make sure IFS has a sensible default +sp=' ' +nl=' +' +IFS="$sp $nl" + +# There are apparently some retarded systems that use ';' as a PATH separator! +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + + +## ------------------------- ## +## Locate command utilities. ## +## ------------------------- ## + + +# func_executable_p FILE +# ---------------------- +# Check that FILE is an executable regular file. +func_executable_p () +{ + test -f "$1" && test -x "$1" +} + + +# func_path_progs PROGS_LIST CHECK_FUNC [PATH] +# -------------------------------------------- +# Search for either a program that responds to --version with output +# containing "GNU", or else returned by CHECK_FUNC otherwise, by +# trying all the directories in PATH with each of the elements of +# PROGS_LIST. +# +# CHECK_FUNC should accept the path to a candidate program, and +# set $func_check_prog_result if it truncates its output less than +# $_G_path_prog_max characters. +func_path_progs () +{ + _G_progs_list=$1 + _G_check_func=$2 + _G_PATH=${3-"$PATH"} + + _G_path_prog_max=0 + _G_path_prog_found=false + _G_save_IFS=$IFS; IFS=${PATH_SEPARATOR-:} + for _G_dir in $_G_PATH; do + IFS=$_G_save_IFS + test -z "$_G_dir" && _G_dir=. + for _G_prog_name in $_G_progs_list; do + for _exeext in '' .EXE; do + _G_path_prog=$_G_dir/$_G_prog_name$_exeext + func_executable_p "$_G_path_prog" || continue + case `"$_G_path_prog" --version 2>&1` in + *GNU*) func_path_progs_result=$_G_path_prog _G_path_prog_found=: ;; + *) $_G_check_func $_G_path_prog + func_path_progs_result=$func_check_prog_result + ;; + esac + $_G_path_prog_found && break 3 + done + done + done + IFS=$_G_save_IFS + test -z "$func_path_progs_result" && { + echo "no acceptable sed could be found in \$PATH" >&2 + exit 1 + } +} + + +# We want to be able to use the functions in this file before configure +# has figured out where the best binaries are kept, which means we have +# to search for them ourselves - except when the results are already set +# where we skip the searches. + +# Unless the user overrides by setting SED, search the path for either GNU +# sed, or the sed that truncates its output the least. +test -z "$SED" && { + _G_sed_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for _G_i in 1 2 3 4 5 6 7; do + _G_sed_script=$_G_sed_script$nl$_G_sed_script + done + echo "$_G_sed_script" 2>/dev/null | sed 99q >conftest.sed + _G_sed_script= + + func_check_prog_sed () + { + _G_path_prog=$1 + + _G_count=0 + printf 0123456789 >conftest.in + while : + do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo '' >> conftest.nl + "$_G_path_prog" -f conftest.sed conftest.out 2>/dev/null || break + diff conftest.out conftest.nl >/dev/null 2>&1 || break + _G_count=`expr $_G_count + 1` + if test "$_G_count" -gt "$_G_path_prog_max"; then + # Best one so far, save it but keep looking for a better one + func_check_prog_result=$_G_path_prog + _G_path_prog_max=$_G_count + fi + # 10*(2^10) chars as input seems more than enough + test 10 -lt "$_G_count" && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out + } + + func_path_progs "sed gsed" func_check_prog_sed $PATH:/usr/xpg4/bin + rm -f conftest.sed + SED=$func_path_progs_result +} + + +# Unless the user overrides by setting GREP, search the path for either GNU +# grep, or the grep that truncates its output the least. +test -z "$GREP" && { + func_check_prog_grep () + { + _G_path_prog=$1 + + _G_count=0 + _G_path_prog_max=0 + printf 0123456789 >conftest.in + while : + do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo 'GREP' >> conftest.nl + "$_G_path_prog" -e 'GREP$' -e '-(cannot match)-' conftest.out 2>/dev/null || break + diff conftest.out conftest.nl >/dev/null 2>&1 || break + _G_count=`expr $_G_count + 1` + if test "$_G_count" -gt "$_G_path_prog_max"; then + # Best one so far, save it but keep looking for a better one + func_check_prog_result=$_G_path_prog + _G_path_prog_max=$_G_count + fi + # 10*(2^10) chars as input seems more than enough + test 10 -lt "$_G_count" && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out + } + + func_path_progs "grep ggrep" func_check_prog_grep $PATH:/usr/xpg4/bin + GREP=$func_path_progs_result +} + + +## ------------------------------- ## +## User overridable command paths. ## +## ------------------------------- ## + +# All uppercase variable names are used for environment variables. These +# variables can be overridden by the user before calling a script that +# uses them if a suitable command of that name is not already available +# in the command search PATH. + +: ${CP="cp -f"} +: ${ECHO="printf %s\n"} +: ${EGREP="$GREP -E"} +: ${FGREP="$GREP -F"} +: ${LN_S="ln -s"} +: ${MAKE="make"} +: ${MKDIR="mkdir"} +: ${MV="mv -f"} +: ${RM="rm -f"} +: ${SHELL="${CONFIG_SHELL-/bin/sh}"} + + +## -------------------- ## +## Useful sed snippets. ## +## -------------------- ## + +sed_dirname='s|/[^/]*$||' +sed_basename='s|^.*/||' + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='s|\([`"$\\]\)|\\\1|g' + +# Same as above, but do not quote variable references. +sed_double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution that turns a string into a regex matching for the +# string literally. +sed_make_literal_regex='s|[].[^$\\*\/]|\\&|g' + +# Sed substitution that converts a w32 file name or path +# that contains forward slashes, into one that contains +# (escaped) backslashes. A very naive implementation. +sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' + +# Re-'\' parameter expansions in output of sed_double_quote_subst that +# were '\'-ed in input to the same. If an odd number of '\' preceded a +# '$' in input to sed_double_quote_subst, that '$' was protected from +# expansion. Since each input '\' is now two '\'s, look for any number +# of runs of four '\'s followed by two '\'s and then a '$'. '\' that '$'. +_G_bs='\\' +_G_bs2='\\\\' +_G_bs4='\\\\\\\\' +_G_dollar='\$' +sed_double_backslash="\ + s/$_G_bs4/&\\ +/g + s/^$_G_bs2$_G_dollar/$_G_bs&/ + s/\\([^$_G_bs]\\)$_G_bs2$_G_dollar/\\1$_G_bs2$_G_bs$_G_dollar/g + s/\n//g" + + +## ----------------- ## +## Global variables. ## +## ----------------- ## + +# Except for the global variables explicitly listed below, the following +# functions in the '^func_' namespace, and the '^require_' namespace +# variables initialised in the 'Resource management' section, sourcing +# this file will not pollute your global namespace with anything +# else. There's no portable way to scope variables in Bourne shell +# though, so actually running these functions will sometimes place +# results into a variable named after the function, and often use +# temporary variables in the '^_G_' namespace. If you are careful to +# avoid using those namespaces casually in your sourcing script, things +# should continue to work as you expect. And, of course, you can freely +# overwrite any of the functions or variables defined here before +# calling anything to customize them. + +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. +EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. + +# Allow overriding, eg assuming that you follow the convention of +# putting '$debug_cmd' at the start of all your functions, you can get +# bash to show function call trace with: +# +# debug_cmd='eval echo "${FUNCNAME[0]} $*" >&2' bash your-script-name +debug_cmd=${debug_cmd-":"} +exit_cmd=: + +# By convention, finish your script with: +# +# exit $exit_status +# +# so that you can set exit_status to non-zero if you want to indicate +# something went wrong during execution without actually bailing out at +# the point of failure. +exit_status=$EXIT_SUCCESS + +# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh +# is ksh but when the shell is invoked as "sh" and the current value of +# the _XPG environment variable is not equal to 1 (one), the special +# positional parameter $0, within a function call, is the name of the +# function. +progpath=$0 + +# The name of this program. +progname=`$ECHO "$progpath" |$SED "$sed_basename"` + +# Make sure we have an absolute progpath for reexecution: +case $progpath in + [\\/]*|[A-Za-z]:\\*) ;; + *[\\/]*) + progdir=`$ECHO "$progpath" |$SED "$sed_dirname"` + progdir=`cd "$progdir" && pwd` + progpath=$progdir/$progname + ;; + *) + _G_IFS=$IFS + IFS=${PATH_SEPARATOR-:} + for progdir in $PATH; do + IFS=$_G_IFS + test -x "$progdir/$progname" && break + done + IFS=$_G_IFS + test -n "$progdir" || progdir=`pwd` + progpath=$progdir/$progname + ;; +esac + + +## ----------------- ## +## Standard options. ## +## ----------------- ## + +# The following options affect the operation of the functions defined +# below, and should be set appropriately depending on run-time para- +# meters passed on the command line. + +opt_dry_run=false +opt_quiet=false +opt_verbose=false + +# Categories 'all' and 'none' are always available. Append any others +# you will pass as the first argument to func_warning from your own +# code. +warning_categories= + +# By default, display warnings according to 'opt_warning_types'. Set +# 'warning_func' to ':' to elide all warnings, or func_fatal_error to +# treat the next displayed warning as a fatal error. +warning_func=func_warn_and_continue + +# Set to 'all' to display all warnings, 'none' to suppress all +# warnings, or a space delimited list of some subset of +# 'warning_categories' to display only the listed warnings. +opt_warning_types=all + + +## -------------------- ## +## Resource management. ## +## -------------------- ## + +# This section contains definitions for functions that each ensure a +# particular resource (a file, or a non-empty configuration variable for +# example) is available, and if appropriate to extract default values +# from pertinent package files. Call them using their associated +# 'require_*' variable to ensure that they are executed, at most, once. +# +# It's entirely deliberate that calling these functions can set +# variables that don't obey the namespace limitations obeyed by the rest +# of this file, in order that that they be as useful as possible to +# callers. + + +# require_term_colors +# ------------------- +# Allow display of bold text on terminals that support it. +require_term_colors=func_require_term_colors +func_require_term_colors () +{ + $debug_cmd + + test -t 1 && { + # COLORTERM and USE_ANSI_COLORS environment variables take + # precedence, because most terminfo databases neglect to describe + # whether color sequences are supported. + test -n "${COLORTERM+set}" && : ${USE_ANSI_COLORS="1"} + + if test 1 = "$USE_ANSI_COLORS"; then + # Standard ANSI escape sequences + tc_reset='' + tc_bold=''; tc_standout='' + tc_red=''; tc_green='' + tc_blue=''; tc_cyan='' + else + # Otherwise trust the terminfo database after all. + test -n "`tput sgr0 2>/dev/null`" && { + tc_reset=`tput sgr0` + test -n "`tput bold 2>/dev/null`" && tc_bold=`tput bold` + tc_standout=$tc_bold + test -n "`tput smso 2>/dev/null`" && tc_standout=`tput smso` + test -n "`tput setaf 1 2>/dev/null`" && tc_red=`tput setaf 1` + test -n "`tput setaf 2 2>/dev/null`" && tc_green=`tput setaf 2` + test -n "`tput setaf 4 2>/dev/null`" && tc_blue=`tput setaf 4` + test -n "`tput setaf 5 2>/dev/null`" && tc_cyan=`tput setaf 5` + } + fi + } + + require_term_colors=: +} + + +## ----------------- ## +## Function library. ## +## ----------------- ## + +# This section contains a variety of useful functions to call in your +# scripts. Take note of the portable wrappers for features provided by +# some modern shells, which will fall back to slower equivalents on +# less featureful shells. + + +# func_append VAR VALUE +# --------------------- +# Append VALUE onto the existing contents of VAR. + + # We should try to minimise forks, especially on Windows where they are + # unreasonably slow, so skip the feature probes when bash or zsh are + # being used: + if test set = "${BASH_VERSION+set}${ZSH_VERSION+set}"; then + : ${_G_HAVE_ARITH_OP="yes"} + : ${_G_HAVE_XSI_OPS="yes"} + # The += operator was introduced in bash 3.1 + case $BASH_VERSION in + [12].* | 3.0 | 3.0*) ;; + *) + : ${_G_HAVE_PLUSEQ_OP="yes"} + ;; + esac + fi + + # _G_HAVE_PLUSEQ_OP + # Can be empty, in which case the shell is probed, "yes" if += is + # useable or anything else if it does not work. + test -z "$_G_HAVE_PLUSEQ_OP" \ + && (eval 'x=a; x+=" b"; test "a b" = "$x"') 2>/dev/null \ + && _G_HAVE_PLUSEQ_OP=yes + +if test yes = "$_G_HAVE_PLUSEQ_OP" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_append () + { + $debug_cmd + + eval "$1+=\$2" + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_append () + { + $debug_cmd + + eval "$1=\$$1\$2" + } +fi + + +# func_append_quoted VAR VALUE +# ---------------------------- +# Quote VALUE and append to the end of shell variable VAR, separated +# by a space. +if test yes = "$_G_HAVE_PLUSEQ_OP"; then + eval 'func_append_quoted () + { + $debug_cmd + + func_quote_for_eval "$2" + eval "$1+=\\ \$func_quote_for_eval_result" + }' +else + func_append_quoted () + { + $debug_cmd + + func_quote_for_eval "$2" + eval "$1=\$$1\\ \$func_quote_for_eval_result" + } +fi + + +# func_append_uniq VAR VALUE +# -------------------------- +# Append unique VALUE onto the existing contents of VAR, assuming +# entries are delimited by the first character of VALUE. For example: +# +# func_append_uniq options " --another-option option-argument" +# +# will only append to $options if " --another-option option-argument " +# is not already present somewhere in $options already (note spaces at +# each end implied by leading space in second argument). +func_append_uniq () +{ + $debug_cmd + + eval _G_current_value='`$ECHO $'$1'`' + _G_delim=`expr "$2" : '\(.\)'` + + case $_G_delim$_G_current_value$_G_delim in + *"$2$_G_delim"*) ;; + *) func_append "$@" ;; + esac +} + + +# func_arith TERM... +# ------------------ +# Set func_arith_result to the result of evaluating TERMs. + test -z "$_G_HAVE_ARITH_OP" \ + && (eval 'test 2 = $(( 1 + 1 ))') 2>/dev/null \ + && _G_HAVE_ARITH_OP=yes + +if test yes = "$_G_HAVE_ARITH_OP"; then + eval 'func_arith () + { + $debug_cmd + + func_arith_result=$(( $* )) + }' +else + func_arith () + { + $debug_cmd + + func_arith_result=`expr "$@"` + } +fi + + +# func_basename FILE +# ------------------ +# Set func_basename_result to FILE with everything up to and including +# the last / stripped. +if test yes = "$_G_HAVE_XSI_OPS"; then + # If this shell supports suffix pattern removal, then use it to avoid + # forking. Hide the definitions single quotes in case the shell chokes + # on unsupported syntax... + _b='func_basename_result=${1##*/}' + _d='case $1 in + */*) func_dirname_result=${1%/*}$2 ;; + * ) func_dirname_result=$3 ;; + esac' + +else + # ...otherwise fall back to using sed. + _b='func_basename_result=`$ECHO "$1" |$SED "$sed_basename"`' + _d='func_dirname_result=`$ECHO "$1" |$SED "$sed_dirname"` + if test "X$func_dirname_result" = "X$1"; then + func_dirname_result=$3 + else + func_append func_dirname_result "$2" + fi' +fi + +eval 'func_basename () +{ + $debug_cmd + + '"$_b"' +}' + + +# func_dirname FILE APPEND NONDIR_REPLACEMENT +# ------------------------------------------- +# Compute the dirname of FILE. If nonempty, add APPEND to the result, +# otherwise set result to NONDIR_REPLACEMENT. +eval 'func_dirname () +{ + $debug_cmd + + '"$_d"' +}' + + +# func_dirname_and_basename FILE APPEND NONDIR_REPLACEMENT +# -------------------------------------------------------- +# Perform func_basename and func_dirname in a single function +# call: +# dirname: Compute the dirname of FILE. If nonempty, +# add APPEND to the result, otherwise set result +# to NONDIR_REPLACEMENT. +# value returned in "$func_dirname_result" +# basename: Compute filename of FILE. +# value retuned in "$func_basename_result" +# For efficiency, we do not delegate to the functions above but instead +# duplicate the functionality here. +eval 'func_dirname_and_basename () +{ + $debug_cmd + + '"$_b"' + '"$_d"' +}' + + +# func_echo ARG... +# ---------------- +# Echo program name prefixed message. +func_echo () +{ + $debug_cmd + + _G_message=$* + + func_echo_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_IFS + $ECHO "$progname: $_G_line" + done + IFS=$func_echo_IFS +} + + +# func_echo_all ARG... +# -------------------- +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + + +# func_echo_infix_1 INFIX ARG... +# ------------------------------ +# Echo program name, followed by INFIX on the first line, with any +# additional lines not showing INFIX. +func_echo_infix_1 () +{ + $debug_cmd + + $require_term_colors + + _G_infix=$1; shift + _G_indent=$_G_infix + _G_prefix="$progname: $_G_infix: " + _G_message=$* + + # Strip color escape sequences before counting printable length + for _G_tc in "$tc_reset" "$tc_bold" "$tc_standout" "$tc_red" "$tc_green" "$tc_blue" "$tc_cyan" + do + test -n "$_G_tc" && { + _G_esc_tc=`$ECHO "$_G_tc" | $SED "$sed_make_literal_regex"` + _G_indent=`$ECHO "$_G_indent" | $SED "s|$_G_esc_tc||g"` + } + done + _G_indent="$progname: "`echo "$_G_indent" | $SED 's|.| |g'`" " ## exclude from sc_prohibit_nested_quotes + + func_echo_infix_1_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_infix_1_IFS + $ECHO "$_G_prefix$tc_bold$_G_line$tc_reset" >&2 + _G_prefix=$_G_indent + done + IFS=$func_echo_infix_1_IFS +} + + +# func_error ARG... +# ----------------- +# Echo program name prefixed message to standard error. +func_error () +{ + $debug_cmd + + $require_term_colors + + func_echo_infix_1 " $tc_standout${tc_red}error$tc_reset" "$*" >&2 +} + + +# func_fatal_error ARG... +# ----------------------- +# Echo program name prefixed message to standard error, and exit. +func_fatal_error () +{ + $debug_cmd + + func_error "$*" + exit $EXIT_FAILURE +} + + +# func_grep EXPRESSION FILENAME +# ----------------------------- +# Check whether EXPRESSION matches any line of FILENAME, without output. +func_grep () +{ + $debug_cmd + + $GREP "$1" "$2" >/dev/null 2>&1 +} + + +# func_len STRING +# --------------- +# Set func_len_result to the length of STRING. STRING may not +# start with a hyphen. + test -z "$_G_HAVE_XSI_OPS" \ + && (eval 'x=a/b/c; + test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ + && _G_HAVE_XSI_OPS=yes + +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_len () + { + $debug_cmd + + func_len_result=${#1} + }' +else + func_len () + { + $debug_cmd + + func_len_result=`expr "$1" : ".*" 2>/dev/null || echo $max_cmd_len` + } +fi + + +# func_mkdir_p DIRECTORY-PATH +# --------------------------- +# Make sure the entire path to DIRECTORY-PATH is available. +func_mkdir_p () +{ + $debug_cmd + + _G_directory_path=$1 + _G_dir_list= + + if test -n "$_G_directory_path" && test : != "$opt_dry_run"; then + + # Protect directory names starting with '-' + case $_G_directory_path in + -*) _G_directory_path=./$_G_directory_path ;; + esac + + # While some portion of DIR does not yet exist... + while test ! -d "$_G_directory_path"; do + # ...make a list in topmost first order. Use a colon delimited + # list incase some portion of path contains whitespace. + _G_dir_list=$_G_directory_path:$_G_dir_list + + # If the last portion added has no slash in it, the list is done + case $_G_directory_path in */*) ;; *) break ;; esac + + # ...otherwise throw away the child directory and loop + _G_directory_path=`$ECHO "$_G_directory_path" | $SED -e "$sed_dirname"` + done + _G_dir_list=`$ECHO "$_G_dir_list" | $SED 's|:*$||'` + + func_mkdir_p_IFS=$IFS; IFS=: + for _G_dir in $_G_dir_list; do + IFS=$func_mkdir_p_IFS + # mkdir can fail with a 'File exist' error if two processes + # try to create one of the directories concurrently. Don't + # stop in that case! + $MKDIR "$_G_dir" 2>/dev/null || : + done + IFS=$func_mkdir_p_IFS + + # Bail out if we (or some other process) failed to create a directory. + test -d "$_G_directory_path" || \ + func_fatal_error "Failed to create '$1'" + fi +} + + +# func_mktempdir [BASENAME] +# ------------------------- +# Make a temporary directory that won't clash with other running +# libtool processes, and avoids race conditions if possible. If +# given, BASENAME is the basename for that directory. +func_mktempdir () +{ + $debug_cmd + + _G_template=${TMPDIR-/tmp}/${1-$progname} + + if test : = "$opt_dry_run"; then + # Return a directory name, but don't create it in dry-run mode + _G_tmpdir=$_G_template-$$ + else + + # If mktemp works, use that first and foremost + _G_tmpdir=`mktemp -d "$_G_template-XXXXXXXX" 2>/dev/null` + + if test ! -d "$_G_tmpdir"; then + # Failing that, at least try and use $RANDOM to avoid a race + _G_tmpdir=$_G_template-${RANDOM-0}$$ + + func_mktempdir_umask=`umask` + umask 0077 + $MKDIR "$_G_tmpdir" + umask $func_mktempdir_umask + fi + + # If we're not in dry-run mode, bomb out on failure + test -d "$_G_tmpdir" || \ + func_fatal_error "cannot create temporary directory '$_G_tmpdir'" + fi + + $ECHO "$_G_tmpdir" +} + + +# func_normal_abspath PATH +# ------------------------ +# Remove doubled-up and trailing slashes, "." path components, +# and cancel out any ".." path components in PATH after making +# it an absolute path. +func_normal_abspath () +{ + $debug_cmd + + # These SED scripts presuppose an absolute path with a trailing slash. + _G_pathcar='s|^/\([^/]*\).*$|\1|' + _G_pathcdr='s|^/[^/]*||' + _G_removedotparts=':dotsl + s|/\./|/|g + t dotsl + s|/\.$|/|' + _G_collapseslashes='s|/\{1,\}|/|g' + _G_finalslash='s|/*$|/|' + + # Start from root dir and reassemble the path. + func_normal_abspath_result= + func_normal_abspath_tpath=$1 + func_normal_abspath_altnamespace= + case $func_normal_abspath_tpath in + "") + # Empty path, that just means $cwd. + func_stripname '' '/' "`pwd`" + func_normal_abspath_result=$func_stripname_result + return + ;; + # The next three entries are used to spot a run of precisely + # two leading slashes without using negated character classes; + # we take advantage of case's first-match behaviour. + ///*) + # Unusual form of absolute path, do nothing. + ;; + //*) + # Not necessarily an ordinary path; POSIX reserves leading '//' + # and for example Cygwin uses it to access remote file shares + # over CIFS/SMB, so we conserve a leading double slash if found. + func_normal_abspath_altnamespace=/ + ;; + /*) + # Absolute path, do nothing. + ;; + *) + # Relative path, prepend $cwd. + func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath + ;; + esac + + # Cancel out all the simple stuff to save iterations. We also want + # the path to end with a slash for ease of parsing, so make sure + # there is one (and only one) here. + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_removedotparts" -e "$_G_collapseslashes" -e "$_G_finalslash"` + while :; do + # Processed it all yet? + if test / = "$func_normal_abspath_tpath"; then + # If we ascended to the root using ".." the result may be empty now. + if test -z "$func_normal_abspath_result"; then + func_normal_abspath_result=/ + fi + break + fi + func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_pathcar"` + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_pathcdr"` + # Figure out what to do with it + case $func_normal_abspath_tcomponent in + "") + # Trailing empty path component, ignore it. + ;; + ..) + # Parent dir; strip last assembled component from result. + func_dirname "$func_normal_abspath_result" + func_normal_abspath_result=$func_dirname_result + ;; + *) + # Actual path component, append it. + func_append func_normal_abspath_result "/$func_normal_abspath_tcomponent" + ;; + esac + done + # Restore leading double-slash if one was found on entry. + func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result +} + + +# func_notquiet ARG... +# -------------------- +# Echo program name prefixed message only when not in quiet mode. +func_notquiet () +{ + $debug_cmd + + $opt_quiet || func_echo ${1+"$@"} + + # A bug in bash halts the script if the last line of a function + # fails when set -e is in force, so we need another command to + # work around that: + : +} + + +# func_relative_path SRCDIR DSTDIR +# -------------------------------- +# Set func_relative_path_result to the relative path from SRCDIR to DSTDIR. +func_relative_path () +{ + $debug_cmd + + func_relative_path_result= + func_normal_abspath "$1" + func_relative_path_tlibdir=$func_normal_abspath_result + func_normal_abspath "$2" + func_relative_path_tbindir=$func_normal_abspath_result + + # Ascend the tree starting from libdir + while :; do + # check if we have found a prefix of bindir + case $func_relative_path_tbindir in + $func_relative_path_tlibdir) + # found an exact match + func_relative_path_tcancelled= + break + ;; + $func_relative_path_tlibdir*) + # found a matching prefix + func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" + func_relative_path_tcancelled=$func_stripname_result + if test -z "$func_relative_path_result"; then + func_relative_path_result=. + fi + break + ;; + *) + func_dirname $func_relative_path_tlibdir + func_relative_path_tlibdir=$func_dirname_result + if test -z "$func_relative_path_tlibdir"; then + # Have to descend all the way to the root! + func_relative_path_result=../$func_relative_path_result + func_relative_path_tcancelled=$func_relative_path_tbindir + break + fi + func_relative_path_result=../$func_relative_path_result + ;; + esac + done + + # Now calculate path; take care to avoid doubling-up slashes. + func_stripname '' '/' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + func_stripname '/' '/' "$func_relative_path_tcancelled" + if test -n "$func_stripname_result"; then + func_append func_relative_path_result "/$func_stripname_result" + fi + + # Normalisation. If bindir is libdir, return '.' else relative path. + if test -n "$func_relative_path_result"; then + func_stripname './' '' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + fi + + test -n "$func_relative_path_result" || func_relative_path_result=. + + : +} + + +# func_quote_for_eval ARG... +# -------------------------- +# Aesthetically quote ARGs to be evaled later. +# This function returns two values: +# i) func_quote_for_eval_result +# double-quoted, suitable for a subsequent eval +# ii) func_quote_for_eval_unquoted_result +# has all characters that are still active within double +# quotes backslashified. +func_quote_for_eval () +{ + $debug_cmd + + func_quote_for_eval_unquoted_result= + func_quote_for_eval_result= + while test 0 -lt $#; do + case $1 in + *[\\\`\"\$]*) + _G_unquoted_arg=`printf '%s\n' "$1" |$SED "$sed_quote_subst"` ;; + *) + _G_unquoted_arg=$1 ;; + esac + if test -n "$func_quote_for_eval_unquoted_result"; then + func_append func_quote_for_eval_unquoted_result " $_G_unquoted_arg" + else + func_append func_quote_for_eval_unquoted_result "$_G_unquoted_arg" + fi + + case $_G_unquoted_arg in + # Double-quote args containing shell metacharacters to delay + # word splitting, command substitution and variable expansion + # for a subsequent eval. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + _G_quoted_arg=\"$_G_unquoted_arg\" + ;; + *) + _G_quoted_arg=$_G_unquoted_arg + ;; + esac + + if test -n "$func_quote_for_eval_result"; then + func_append func_quote_for_eval_result " $_G_quoted_arg" + else + func_append func_quote_for_eval_result "$_G_quoted_arg" + fi + shift + done +} + + +# func_quote_for_expand ARG +# ------------------------- +# Aesthetically quote ARG to be evaled later; same as above, +# but do not quote variable references. +func_quote_for_expand () +{ + $debug_cmd + + case $1 in + *[\\\`\"]*) + _G_arg=`$ECHO "$1" | $SED \ + -e "$sed_double_quote_subst" -e "$sed_double_backslash"` ;; + *) + _G_arg=$1 ;; + esac + + case $_G_arg in + # Double-quote args containing shell metacharacters to delay + # word splitting and command substitution for a subsequent eval. + # Many Bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + _G_arg=\"$_G_arg\" + ;; + esac + + func_quote_for_expand_result=$_G_arg +} + + +# func_stripname PREFIX SUFFIX NAME +# --------------------------------- +# strip PREFIX and SUFFIX from NAME, and store in func_stripname_result. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_stripname () + { + $debug_cmd + + # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are + # positional parameters, so assign one to ordinary variable first. + func_stripname_result=$3 + func_stripname_result=${func_stripname_result#"$1"} + func_stripname_result=${func_stripname_result%"$2"} + }' +else + func_stripname () + { + $debug_cmd + + case $2 in + .*) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%\\\\$2\$%%"`;; + *) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%$2\$%%"`;; + esac + } +fi + + +# func_show_eval CMD [FAIL_EXP] +# ----------------------------- +# Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. +func_show_eval () +{ + $debug_cmd + + _G_cmd=$1 + _G_fail_exp=${2-':'} + + func_quote_for_expand "$_G_cmd" + eval "func_notquiet $func_quote_for_expand_result" + + $opt_dry_run || { + eval "$_G_cmd" + _G_status=$? + if test 0 -ne "$_G_status"; then + eval "(exit $_G_status); $_G_fail_exp" + fi + } +} + + +# func_show_eval_locale CMD [FAIL_EXP] +# ------------------------------------ +# Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. Use the saved locale for evaluation. +func_show_eval_locale () +{ + $debug_cmd + + _G_cmd=$1 + _G_fail_exp=${2-':'} + + $opt_quiet || { + func_quote_for_expand "$_G_cmd" + eval "func_echo $func_quote_for_expand_result" + } + + $opt_dry_run || { + eval "$_G_user_locale + $_G_cmd" + _G_status=$? + eval "$_G_safe_locale" + if test 0 -ne "$_G_status"; then + eval "(exit $_G_status); $_G_fail_exp" + fi + } +} + + +# func_tr_sh +# ---------- +# Turn $1 into a string suitable for a shell variable name. +# Result is stored in $func_tr_sh_result. All characters +# not in the set a-zA-Z0-9_ are replaced with '_'. Further, +# if $1 begins with a digit, a '_' is prepended as well. +func_tr_sh () +{ + $debug_cmd + + case $1 in + [0-9]* | *[!a-zA-Z0-9_]*) + func_tr_sh_result=`$ECHO "$1" | $SED -e 's/^\([0-9]\)/_\1/' -e 's/[^a-zA-Z0-9_]/_/g'` + ;; + * ) + func_tr_sh_result=$1 + ;; + esac +} + + +# func_verbose ARG... +# ------------------- +# Echo program name prefixed message in verbose mode only. +func_verbose () +{ + $debug_cmd + + $opt_verbose && func_echo "$*" + + : +} + + +# func_warn_and_continue ARG... +# ----------------------------- +# Echo program name prefixed warning message to standard error. +func_warn_and_continue () +{ + $debug_cmd + + $require_term_colors + + func_echo_infix_1 "${tc_red}warning$tc_reset" "$*" >&2 +} + + +# func_warning CATEGORY ARG... +# ---------------------------- +# Echo program name prefixed warning message to standard error. Warning +# messages can be filtered according to CATEGORY, where this function +# elides messages where CATEGORY is not listed in the global variable +# 'opt_warning_types'. +func_warning () +{ + $debug_cmd + + # CATEGORY must be in the warning_categories list! + case " $warning_categories " in + *" $1 "*) ;; + *) func_internal_error "invalid warning category '$1'" ;; + esac + + _G_category=$1 + shift + + case " $opt_warning_types " in + *" $_G_category "*) $warning_func ${1+"$@"} ;; + esac +} + + +# func_sort_ver VER1 VER2 +# ----------------------- +# 'sort -V' is not generally available. +# Note this deviates from the version comparison in automake +# in that it treats 1.5 < 1.5.0, and treats 1.4.4a < 1.4-p3a +# but this should suffice as we won't be specifying old +# version formats or redundant trailing .0 in bootstrap.conf. +# If we did want full compatibility then we should probably +# use m4_version_compare from autoconf. +func_sort_ver () +{ + $debug_cmd + + printf '%s\n%s\n' "$1" "$2" \ + | sort -t. -k 1,1n -k 2,2n -k 3,3n -k 4,4n -k 5,5n -k 6,6n -k 7,7n -k 8,8n -k 9,9n +} + +# func_lt_ver PREV CURR +# --------------------- +# Return true if PREV and CURR are in the correct order according to +# func_sort_ver, otherwise false. Use it like this: +# +# func_lt_ver "$prev_ver" "$proposed_ver" || func_fatal_error "..." +func_lt_ver () +{ + $debug_cmd + + test "x$1" = x`func_sort_ver "$1" "$2" | $SED 1q` +} + + +# Local variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-pattern: "10/scriptversion=%:y-%02m-%02d.%02H; # UTC" +# time-stamp-time-zone: "UTC" +# End: +#! /bin/sh + +# Set a version string for this script. +scriptversion=2014-01-07.03; # UTC + +# A portable, pluggable option parser for Bourne shell. +# Written by Gary V. Vaughan, 2010 + +# Copyright (C) 2010-2015 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Please report bugs or propose patches to gary@gnu.org. + + +## ------ ## +## Usage. ## +## ------ ## + +# This file is a library for parsing options in your shell scripts along +# with assorted other useful supporting features that you can make use +# of too. +# +# For the simplest scripts you might need only: +# +# #!/bin/sh +# . relative/path/to/funclib.sh +# . relative/path/to/options-parser +# scriptversion=1.0 +# func_options ${1+"$@"} +# eval set dummy "$func_options_result"; shift +# ...rest of your script... +# +# In order for the '--version' option to work, you will need to have a +# suitably formatted comment like the one at the top of this file +# starting with '# Written by ' and ending with '# warranty; '. +# +# For '-h' and '--help' to work, you will also need a one line +# description of your script's purpose in a comment directly above the +# '# Written by ' line, like the one at the top of this file. +# +# The default options also support '--debug', which will turn on shell +# execution tracing (see the comment above debug_cmd below for another +# use), and '--verbose' and the func_verbose function to allow your script +# to display verbose messages only when your user has specified +# '--verbose'. +# +# After sourcing this file, you can plug processing for additional +# options by amending the variables from the 'Configuration' section +# below, and following the instructions in the 'Option parsing' +# section further down. + +## -------------- ## +## Configuration. ## +## -------------- ## + +# You should override these variables in your script after sourcing this +# file so that they reflect the customisations you have added to the +# option parser. + +# The usage line for option parsing errors and the start of '-h' and +# '--help' output messages. You can embed shell variables for delayed +# expansion at the time the message is displayed, but you will need to +# quote other shell meta-characters carefully to prevent them being +# expanded when the contents are evaled. +usage='$progpath [OPTION]...' + +# Short help message in response to '-h' and '--help'. Add to this or +# override it after sourcing this library to reflect the full set of +# options your script accepts. +usage_message="\ + --debug enable verbose shell tracing + -W, --warnings=CATEGORY + report the warnings falling in CATEGORY [all] + -v, --verbose verbosely report processing + --version print version information and exit + -h, --help print short or long help message and exit +" + +# Additional text appended to 'usage_message' in response to '--help'. +long_help_message=" +Warning categories include: + 'all' show all warnings + 'none' turn off all the warnings + 'error' warnings are treated as fatal errors" + +# Help message printed before fatal option parsing errors. +fatal_help="Try '\$progname --help' for more information." + + + +## ------------------------- ## +## Hook function management. ## +## ------------------------- ## + +# This section contains functions for adding, removing, and running hooks +# to the main code. A hook is just a named list of of function, that can +# be run in order later on. + +# func_hookable FUNC_NAME +# ----------------------- +# Declare that FUNC_NAME will run hooks added with +# 'func_add_hook FUNC_NAME ...'. +func_hookable () +{ + $debug_cmd + + func_append hookable_fns " $1" +} + + +# func_add_hook FUNC_NAME HOOK_FUNC +# --------------------------------- +# Request that FUNC_NAME call HOOK_FUNC before it returns. FUNC_NAME must +# first have been declared "hookable" by a call to 'func_hookable'. +func_add_hook () +{ + $debug_cmd + + case " $hookable_fns " in + *" $1 "*) ;; + *) func_fatal_error "'$1' does not accept hook functions." ;; + esac + + eval func_append ${1}_hooks '" $2"' +} + + +# func_remove_hook FUNC_NAME HOOK_FUNC +# ------------------------------------ +# Remove HOOK_FUNC from the list of functions called by FUNC_NAME. +func_remove_hook () +{ + $debug_cmd + + eval ${1}_hooks='`$ECHO "\$'$1'_hooks" |$SED "s| '$2'||"`' +} + + +# func_run_hooks FUNC_NAME [ARG]... +# --------------------------------- +# Run all hook functions registered to FUNC_NAME. +# It is assumed that the list of hook functions contains nothing more +# than a whitespace-delimited list of legal shell function names, and +# no effort is wasted trying to catch shell meta-characters or preserve +# whitespace. +func_run_hooks () +{ + $debug_cmd + + case " $hookable_fns " in + *" $1 "*) ;; + *) func_fatal_error "'$1' does not support hook funcions.n" ;; + esac + + eval _G_hook_fns=\$$1_hooks; shift + + for _G_hook in $_G_hook_fns; do + eval $_G_hook '"$@"' + + # store returned options list back into positional + # parameters for next 'cmd' execution. + eval _G_hook_result=\$${_G_hook}_result + eval set dummy "$_G_hook_result"; shift + done + + func_quote_for_eval ${1+"$@"} + func_run_hooks_result=$func_quote_for_eval_result +} + + + +## --------------- ## +## Option parsing. ## +## --------------- ## + +# In order to add your own option parsing hooks, you must accept the +# full positional parameter list in your hook function, remove any +# options that you action, and then pass back the remaining unprocessed +# options in '_result', escaped suitably for +# 'eval'. Like this: +# +# my_options_prep () +# { +# $debug_cmd +# +# # Extend the existing usage message. +# usage_message=$usage_message' +# -s, --silent don'\''t print informational messages +# ' +# +# func_quote_for_eval ${1+"$@"} +# my_options_prep_result=$func_quote_for_eval_result +# } +# func_add_hook func_options_prep my_options_prep +# +# +# my_silent_option () +# { +# $debug_cmd +# +# # Note that for efficiency, we parse as many options as we can +# # recognise in a loop before passing the remainder back to the +# # caller on the first unrecognised argument we encounter. +# while test $# -gt 0; do +# opt=$1; shift +# case $opt in +# --silent|-s) opt_silent=: ;; +# # Separate non-argument short options: +# -s*) func_split_short_opt "$_G_opt" +# set dummy "$func_split_short_opt_name" \ +# "-$func_split_short_opt_arg" ${1+"$@"} +# shift +# ;; +# *) set dummy "$_G_opt" "$*"; shift; break ;; +# esac +# done +# +# func_quote_for_eval ${1+"$@"} +# my_silent_option_result=$func_quote_for_eval_result +# } +# func_add_hook func_parse_options my_silent_option +# +# +# my_option_validation () +# { +# $debug_cmd +# +# $opt_silent && $opt_verbose && func_fatal_help "\ +# '--silent' and '--verbose' options are mutually exclusive." +# +# func_quote_for_eval ${1+"$@"} +# my_option_validation_result=$func_quote_for_eval_result +# } +# func_add_hook func_validate_options my_option_validation +# +# You'll alse need to manually amend $usage_message to reflect the extra +# options you parse. It's preferable to append if you can, so that +# multiple option parsing hooks can be added safely. + + +# func_options [ARG]... +# --------------------- +# All the functions called inside func_options are hookable. See the +# individual implementations for details. +func_hookable func_options +func_options () +{ + $debug_cmd + + func_options_prep ${1+"$@"} + eval func_parse_options \ + ${func_options_prep_result+"$func_options_prep_result"} + eval func_validate_options \ + ${func_parse_options_result+"$func_parse_options_result"} + + eval func_run_hooks func_options \ + ${func_validate_options_result+"$func_validate_options_result"} + + # save modified positional parameters for caller + func_options_result=$func_run_hooks_result +} + + +# func_options_prep [ARG]... +# -------------------------- +# All initialisations required before starting the option parse loop. +# Note that when calling hook functions, we pass through the list of +# positional parameters. If a hook function modifies that list, and +# needs to propogate that back to rest of this script, then the complete +# modified list must be put in 'func_run_hooks_result' before +# returning. +func_hookable func_options_prep +func_options_prep () +{ + $debug_cmd + + # Option defaults: + opt_verbose=false + opt_warning_types= + + func_run_hooks func_options_prep ${1+"$@"} + + # save modified positional parameters for caller + func_options_prep_result=$func_run_hooks_result +} + + +# func_parse_options [ARG]... +# --------------------------- +# The main option parsing loop. +func_hookable func_parse_options +func_parse_options () +{ + $debug_cmd + + func_parse_options_result= + + # this just eases exit handling + while test $# -gt 0; do + # Defer to hook functions for initial option parsing, so they + # get priority in the event of reusing an option name. + func_run_hooks func_parse_options ${1+"$@"} + + # Adjust func_parse_options positional parameters to match + eval set dummy "$func_run_hooks_result"; shift + + # Break out of the loop if we already parsed every option. + test $# -gt 0 || break + + _G_opt=$1 + shift + case $_G_opt in + --debug|-x) debug_cmd='set -x' + func_echo "enabling shell trace mode" + $debug_cmd + ;; + + --no-warnings|--no-warning|--no-warn) + set dummy --warnings none ${1+"$@"} + shift + ;; + + --warnings|--warning|-W) + test $# = 0 && func_missing_arg $_G_opt && break + case " $warning_categories $1" in + *" $1 "*) + # trailing space prevents matching last $1 above + func_append_uniq opt_warning_types " $1" + ;; + *all) + opt_warning_types=$warning_categories + ;; + *none) + opt_warning_types=none + warning_func=: + ;; + *error) + opt_warning_types=$warning_categories + warning_func=func_fatal_error + ;; + *) + func_fatal_error \ + "unsupported warning category: '$1'" + ;; + esac + shift + ;; + + --verbose|-v) opt_verbose=: ;; + --version) func_version ;; + -\?|-h) func_usage ;; + --help) func_help ;; + + # Separate optargs to long options (plugins may need this): + --*=*) func_split_equals "$_G_opt" + set dummy "$func_split_equals_lhs" \ + "$func_split_equals_rhs" ${1+"$@"} + shift + ;; + + # Separate optargs to short options: + -W*) + func_split_short_opt "$_G_opt" + set dummy "$func_split_short_opt_name" \ + "$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + # Separate non-argument short options: + -\?*|-h*|-v*|-x*) + func_split_short_opt "$_G_opt" + set dummy "$func_split_short_opt_name" \ + "-$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + --) break ;; + -*) func_fatal_help "unrecognised option: '$_G_opt'" ;; + *) set dummy "$_G_opt" ${1+"$@"}; shift; break ;; + esac + done + + # save modified positional parameters for caller + func_quote_for_eval ${1+"$@"} + func_parse_options_result=$func_quote_for_eval_result +} + + +# func_validate_options [ARG]... +# ------------------------------ +# Perform any sanity checks on option settings and/or unconsumed +# arguments. +func_hookable func_validate_options +func_validate_options () +{ + $debug_cmd + + # Display all warnings if -W was not given. + test -n "$opt_warning_types" || opt_warning_types=" $warning_categories" + + func_run_hooks func_validate_options ${1+"$@"} + + # Bail if the options were screwed! + $exit_cmd $EXIT_FAILURE + + # save modified positional parameters for caller + func_validate_options_result=$func_run_hooks_result +} + + + +## ----------------- ## +## Helper functions. ## +## ----------------- ## + +# This section contains the helper functions used by the rest of the +# hookable option parser framework in ascii-betical order. + + +# func_fatal_help ARG... +# ---------------------- +# Echo program name prefixed message to standard error, followed by +# a help hint, and exit. +func_fatal_help () +{ + $debug_cmd + + eval \$ECHO \""Usage: $usage"\" + eval \$ECHO \""$fatal_help"\" + func_error ${1+"$@"} + exit $EXIT_FAILURE +} + + +# func_help +# --------- +# Echo long help message to standard output and exit. +func_help () +{ + $debug_cmd + + func_usage_message + $ECHO "$long_help_message" + exit 0 +} + + +# func_missing_arg ARGNAME +# ------------------------ +# Echo program name prefixed message to standard error and set global +# exit_cmd. +func_missing_arg () +{ + $debug_cmd + + func_error "Missing argument for '$1'." + exit_cmd=exit +} + + +# func_split_equals STRING +# ------------------------ +# Set func_split_equals_lhs and func_split_equals_rhs shell variables after +# splitting STRING at the '=' sign. +test -z "$_G_HAVE_XSI_OPS" \ + && (eval 'x=a/b/c; + test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ + && _G_HAVE_XSI_OPS=yes + +if test yes = "$_G_HAVE_XSI_OPS" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_split_equals () + { + $debug_cmd + + func_split_equals_lhs=${1%%=*} + func_split_equals_rhs=${1#*=} + test "x$func_split_equals_lhs" = "x$1" \ + && func_split_equals_rhs= + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_split_equals () + { + $debug_cmd + + func_split_equals_lhs=`expr "x$1" : 'x\([^=]*\)'` + func_split_equals_rhs= + test "x$func_split_equals_lhs" = "x$1" \ + || func_split_equals_rhs=`expr "x$1" : 'x[^=]*=\(.*\)$'` + } +fi #func_split_equals + + +# func_split_short_opt SHORTOPT +# ----------------------------- +# Set func_split_short_opt_name and func_split_short_opt_arg shell +# variables after splitting SHORTOPT after the 2nd character. +if test yes = "$_G_HAVE_XSI_OPS" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_split_short_opt () + { + $debug_cmd + + func_split_short_opt_arg=${1#??} + func_split_short_opt_name=${1%"$func_split_short_opt_arg"} + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_split_short_opt () + { + $debug_cmd + + func_split_short_opt_name=`expr "x$1" : 'x-\(.\)'` + func_split_short_opt_arg=`expr "x$1" : 'x-.\(.*\)$'` + } +fi #func_split_short_opt + + +# func_usage +# ---------- +# Echo short help message to standard output and exit. +func_usage () +{ + $debug_cmd + + func_usage_message + $ECHO "Run '$progname --help |${PAGER-more}' for full usage" + exit 0 +} + + +# func_usage_message +# ------------------ +# Echo short help message to standard output. +func_usage_message () +{ + $debug_cmd + + eval \$ECHO \""Usage: $usage"\" + echo + $SED -n 's|^# || + /^Written by/{ + x;p;x + } + h + /^Written by/q' < "$progpath" + echo + eval \$ECHO \""$usage_message"\" +} + + +# func_version +# ------------ +# Echo version message to standard output and exit. +func_version () +{ + $debug_cmd + + printf '%s\n' "$progname $scriptversion" + $SED -n ' + /(C)/!b go + :more + /\./!{ + N + s|\n# | | + b more + } + :go + /^# Written by /,/# warranty; / { + s|^# || + s|^# *$|| + s|\((C)\)[ 0-9,-]*[ ,-]\([1-9][0-9]* \)|\1 \2| + p + } + /^# Written by / { + s|^# || + p + } + /^warranty; /q' < "$progpath" + + exit $? +} + + +# Local variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-pattern: "10/scriptversion=%:y-%02m-%02d.%02H; # UTC" +# time-stamp-time-zone: "UTC" +# End: + +# Set a version string. +scriptversion='(GNU libtool) 2.4.6' + + +# func_echo ARG... +# ---------------- +# Libtool also displays the current mode in messages, so override +# funclib.sh func_echo with this custom definition. +func_echo () +{ + $debug_cmd + + _G_message=$* + + func_echo_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_IFS + $ECHO "$progname${opt_mode+: $opt_mode}: $_G_line" + done + IFS=$func_echo_IFS +} + + +# func_warning ARG... +# ------------------- +# Libtool warnings are not categorized, so override funclib.sh +# func_warning with this simpler definition. +func_warning () +{ + $debug_cmd + + $warning_func ${1+"$@"} +} + + +## ---------------- ## +## Options parsing. ## +## ---------------- ## + +# Hook in the functions to make sure our own options are parsed during +# the option parsing loop. + +usage='$progpath [OPTION]... [MODE-ARG]...' + +# Short help message in response to '-h'. +usage_message="Options: + --config show all configuration variables + --debug enable verbose shell tracing + -n, --dry-run display commands without modifying any files + --features display basic configuration information and exit + --mode=MODE use operation mode MODE + --no-warnings equivalent to '-Wnone' + --preserve-dup-deps don't remove duplicate dependency libraries + --quiet, --silent don't print informational messages + --tag=TAG use configuration variables from tag TAG + -v, --verbose print more informational messages than default + --version print version information + -W, --warnings=CATEGORY report the warnings falling in CATEGORY [all] + -h, --help, --help-all print short, long, or detailed help message +" + +# Additional text appended to 'usage_message' in response to '--help'. +func_help () +{ + $debug_cmd + + func_usage_message + $ECHO "$long_help_message + +MODE must be one of the following: + + clean remove files from the build directory + compile compile a source file into a libtool object + execute automatically set library path, then run a program + finish complete the installation of libtool libraries + install install libraries or executables + link create a library or an executable + uninstall remove libraries from an installed directory + +MODE-ARGS vary depending on the MODE. When passed as first option, +'--mode=MODE' may be abbreviated as 'MODE' or a unique abbreviation of that. +Try '$progname --help --mode=MODE' for a more detailed description of MODE. + +When reporting a bug, please describe a test case to reproduce it and +include the following information: + + host-triplet: $host + shell: $SHELL + compiler: $LTCC + compiler flags: $LTCFLAGS + linker: $LD (gnu? $with_gnu_ld) + version: $progname (GNU libtool) 2.4.6 + automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q` + autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q` + +Report bugs to . +GNU libtool home page: . +General help using GNU software: ." + exit 0 +} + + +# func_lo2o OBJECT-NAME +# --------------------- +# Transform OBJECT-NAME from a '.lo' suffix to the platform specific +# object suffix. + +lo2o=s/\\.lo\$/.$objext/ +o2lo=s/\\.$objext\$/.lo/ + +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_lo2o () + { + case $1 in + *.lo) func_lo2o_result=${1%.lo}.$objext ;; + * ) func_lo2o_result=$1 ;; + esac + }' + + # func_xform LIBOBJ-OR-SOURCE + # --------------------------- + # Transform LIBOBJ-OR-SOURCE from a '.o' or '.c' (or otherwise) + # suffix to a '.lo' libtool-object suffix. + eval 'func_xform () + { + func_xform_result=${1%.*}.lo + }' +else + # ...otherwise fall back to using sed. + func_lo2o () + { + func_lo2o_result=`$ECHO "$1" | $SED "$lo2o"` + } + + func_xform () + { + func_xform_result=`$ECHO "$1" | $SED 's|\.[^.]*$|.lo|'` + } +fi + + +# func_fatal_configuration ARG... +# ------------------------------- +# Echo program name prefixed message to standard error, followed by +# a configuration failure hint, and exit. +func_fatal_configuration () +{ + func_fatal_error ${1+"$@"} \ + "See the $PACKAGE documentation for more information." \ + "Fatal configuration error." +} + + +# func_config +# ----------- +# Display the configuration for all the tags in this script. +func_config () +{ + re_begincf='^# ### BEGIN LIBTOOL' + re_endcf='^# ### END LIBTOOL' + + # Default configuration. + $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" + + # Now print the configurations for the tags. + for tagname in $taglist; do + $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" + done + + exit $? +} + + +# func_features +# ------------- +# Display the features supported by this script. +func_features () +{ + echo "host: $host" + if test yes = "$build_libtool_libs"; then + echo "enable shared libraries" + else + echo "disable shared libraries" + fi + if test yes = "$build_old_libs"; then + echo "enable static libraries" + else + echo "disable static libraries" + fi + + exit $? +} + + +# func_enable_tag TAGNAME +# ----------------------- +# Verify that TAGNAME is valid, and either flag an error and exit, or +# enable the TAGNAME tag. We also add TAGNAME to the global $taglist +# variable here. +func_enable_tag () +{ + # Global variable: + tagname=$1 + + re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" + re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" + sed_extractcf=/$re_begincf/,/$re_endcf/p + + # Validate tagname. + case $tagname in + *[!-_A-Za-z0-9,/]*) + func_fatal_error "invalid tag name: $tagname" + ;; + esac + + # Don't test for the "default" C tag, as we know it's + # there but not specially marked. + case $tagname in + CC) ;; + *) + if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then + taglist="$taglist $tagname" + + # Evaluate the configuration. Be careful to quote the path + # and the sed script, to avoid splitting on whitespace, but + # also don't use non-portable quotes within backquotes within + # quotes we have to do it in 2 steps: + extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` + eval "$extractedcf" + else + func_error "ignoring unknown tag $tagname" + fi + ;; + esac +} + + +# func_check_version_match +# ------------------------ +# Ensure that we are using m4 macros, and libtool script from the same +# release of libtool. +func_check_version_match () +{ + if test "$package_revision" != "$macro_revision"; then + if test "$VERSION" != "$macro_version"; then + if test -z "$macro_version"; then + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from an older release. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from $PACKAGE $macro_version. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + fi + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, +$progname: but the definition of this LT_INIT comes from revision $macro_revision. +$progname: You should recreate aclocal.m4 with macros from revision $package_revision +$progname: of $PACKAGE $VERSION and run autoconf again. +_LT_EOF + fi + + exit $EXIT_MISMATCH + fi +} + + +# libtool_options_prep [ARG]... +# ----------------------------- +# Preparation for options parsed by libtool. +libtool_options_prep () +{ + $debug_mode + + # Option defaults: + opt_config=false + opt_dlopen= + opt_dry_run=false + opt_help=false + opt_mode= + opt_preserve_dup_deps=false + opt_quiet=false + + nonopt= + preserve_args= + + # Shorthand for --mode=foo, only valid as the first argument + case $1 in + clean|clea|cle|cl) + shift; set dummy --mode clean ${1+"$@"}; shift + ;; + compile|compil|compi|comp|com|co|c) + shift; set dummy --mode compile ${1+"$@"}; shift + ;; + execute|execut|execu|exec|exe|ex|e) + shift; set dummy --mode execute ${1+"$@"}; shift + ;; + finish|finis|fini|fin|fi|f) + shift; set dummy --mode finish ${1+"$@"}; shift + ;; + install|instal|insta|inst|ins|in|i) + shift; set dummy --mode install ${1+"$@"}; shift + ;; + link|lin|li|l) + shift; set dummy --mode link ${1+"$@"}; shift + ;; + uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) + shift; set dummy --mode uninstall ${1+"$@"}; shift + ;; + esac + + # Pass back the list of options. + func_quote_for_eval ${1+"$@"} + libtool_options_prep_result=$func_quote_for_eval_result +} +func_add_hook func_options_prep libtool_options_prep + + +# libtool_parse_options [ARG]... +# --------------------------------- +# Provide handling for libtool specific options. +libtool_parse_options () +{ + $debug_cmd + + # Perform our own loop to consume as many options as possible in + # each iteration. + while test $# -gt 0; do + _G_opt=$1 + shift + case $_G_opt in + --dry-run|--dryrun|-n) + opt_dry_run=: + ;; + + --config) func_config ;; + + --dlopen|-dlopen) + opt_dlopen="${opt_dlopen+$opt_dlopen +}$1" + shift + ;; + + --preserve-dup-deps) + opt_preserve_dup_deps=: ;; + + --features) func_features ;; + + --finish) set dummy --mode finish ${1+"$@"}; shift ;; + + --help) opt_help=: ;; + + --help-all) opt_help=': help-all' ;; + + --mode) test $# = 0 && func_missing_arg $_G_opt && break + opt_mode=$1 + case $1 in + # Valid mode arguments: + clean|compile|execute|finish|install|link|relink|uninstall) ;; + + # Catch anything else as an error + *) func_error "invalid argument for $_G_opt" + exit_cmd=exit + break + ;; + esac + shift + ;; + + --no-silent|--no-quiet) + opt_quiet=false + func_append preserve_args " $_G_opt" + ;; + + --no-warnings|--no-warning|--no-warn) + opt_warning=false + func_append preserve_args " $_G_opt" + ;; + + --no-verbose) + opt_verbose=false + func_append preserve_args " $_G_opt" + ;; + + --silent|--quiet) + opt_quiet=: + opt_verbose=false + func_append preserve_args " $_G_opt" + ;; + + --tag) test $# = 0 && func_missing_arg $_G_opt && break + opt_tag=$1 + func_append preserve_args " $_G_opt $1" + func_enable_tag "$1" + shift + ;; + + --verbose|-v) opt_quiet=false + opt_verbose=: + func_append preserve_args " $_G_opt" + ;; + + # An option not handled by this hook function: + *) set dummy "$_G_opt" ${1+"$@"}; shift; break ;; + esac + done + + + # save modified positional parameters for caller + func_quote_for_eval ${1+"$@"} + libtool_parse_options_result=$func_quote_for_eval_result +} +func_add_hook func_parse_options libtool_parse_options + + + +# libtool_validate_options [ARG]... +# --------------------------------- +# Perform any sanity checks on option settings and/or unconsumed +# arguments. +libtool_validate_options () +{ + # save first non-option argument + if test 0 -lt $#; then + nonopt=$1 + shift + fi + + # preserve --debug + test : = "$debug_cmd" || func_append preserve_args " --debug" + + case $host in + # Solaris2 added to fix http://debbugs.gnu.org/cgi/bugreport.cgi?bug=16452 + # see also: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59788 + *cygwin* | *mingw* | *pw32* | *cegcc* | *solaris2* | *os2*) + # don't eliminate duplications in $postdeps and $predeps + opt_duplicate_compiler_generated_deps=: + ;; + *) + opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps + ;; + esac + + $opt_help || { + # Sanity checks first: + func_check_version_match + + test yes != "$build_libtool_libs" \ + && test yes != "$build_old_libs" \ + && func_fatal_configuration "not configured to build any kind of library" + + # Darwin sucks + eval std_shrext=\"$shrext_cmds\" + + # Only execute mode is allowed to have -dlopen flags. + if test -n "$opt_dlopen" && test execute != "$opt_mode"; then + func_error "unrecognized option '-dlopen'" + $ECHO "$help" 1>&2 + exit $EXIT_FAILURE + fi + + # Change the help message to a mode-specific one. + generic_help=$help + help="Try '$progname --help --mode=$opt_mode' for more information." + } + + # Pass back the unparsed argument list + func_quote_for_eval ${1+"$@"} + libtool_validate_options_result=$func_quote_for_eval_result +} +func_add_hook func_validate_options libtool_validate_options + + +# Process options as early as possible so that --help and --version +# can return quickly. +func_options ${1+"$@"} +eval set dummy "$func_options_result"; shift + + + +## ----------- ## +## Main. ## +## ----------- ## + +magic='%%%MAGIC variable%%%' +magic_exe='%%%MAGIC EXE variable%%%' + +# Global variables. +extracted_archives= +extracted_serial=0 + +# If this variable is set in any of the actions, the command in it +# will be execed at the end. This prevents here-documents from being +# left over by shells. +exec_cmd= + + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + +# func_generated_by_libtool +# True iff stdin has been generated by Libtool. This function is only +# a basic sanity check; it will hardly flush out determined imposters. +func_generated_by_libtool_p () +{ + $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 +} + +# func_lalib_p file +# True iff FILE is a libtool '.la' library or '.lo' object file. +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_lalib_p () +{ + test -f "$1" && + $SED -e 4q "$1" 2>/dev/null | func_generated_by_libtool_p +} + +# func_lalib_unsafe_p file +# True iff FILE is a libtool '.la' library or '.lo' object file. +# This function implements the same check as func_lalib_p without +# resorting to external programs. To this end, it redirects stdin and +# closes it afterwards, without saving the original file descriptor. +# As a safety measure, use it only where a negative result would be +# fatal anyway. Works if 'file' does not exist. +func_lalib_unsafe_p () +{ + lalib_p=no + if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then + for lalib_p_l in 1 2 3 4 + do + read lalib_p_line + case $lalib_p_line in + \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; + esac + done + exec 0<&5 5<&- + fi + test yes = "$lalib_p" +} + +# func_ltwrapper_script_p file +# True iff FILE is a libtool wrapper script +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_script_p () +{ + test -f "$1" && + $lt_truncate_bin < "$1" 2>/dev/null | func_generated_by_libtool_p +} + +# func_ltwrapper_executable_p file +# True iff FILE is a libtool wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_executable_p () +{ + func_ltwrapper_exec_suffix= + case $1 in + *.exe) ;; + *) func_ltwrapper_exec_suffix=.exe ;; + esac + $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 +} + +# func_ltwrapper_scriptname file +# Assumes file is an ltwrapper_executable +# uses $file to determine the appropriate filename for a +# temporary ltwrapper_script. +func_ltwrapper_scriptname () +{ + func_dirname_and_basename "$1" "" "." + func_stripname '' '.exe' "$func_basename_result" + func_ltwrapper_scriptname_result=$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper +} + +# func_ltwrapper_p file +# True iff FILE is a libtool wrapper script or wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_p () +{ + func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" +} + + +# func_execute_cmds commands fail_cmd +# Execute tilde-delimited COMMANDS. +# If FAIL_CMD is given, eval that upon failure. +# FAIL_CMD may read-access the current command in variable CMD! +func_execute_cmds () +{ + $debug_cmd + + save_ifs=$IFS; IFS='~' + for cmd in $1; do + IFS=$sp$nl + eval cmd=\"$cmd\" + IFS=$save_ifs + func_show_eval "$cmd" "${2-:}" + done + IFS=$save_ifs +} + + +# func_source file +# Source FILE, adding directory component if necessary. +# Note that it is not necessary on cygwin/mingw to append a dot to +# FILE even if both FILE and FILE.exe exist: automatic-append-.exe +# behavior happens only for exec(3), not for open(2)! Also, sourcing +# 'FILE.' does not work on cygwin managed mounts. +func_source () +{ + $debug_cmd + + case $1 in + */* | *\\*) . "$1" ;; + *) . "./$1" ;; + esac +} + + +# func_resolve_sysroot PATH +# Replace a leading = in PATH with a sysroot. Store the result into +# func_resolve_sysroot_result +func_resolve_sysroot () +{ + func_resolve_sysroot_result=$1 + case $func_resolve_sysroot_result in + =*) + func_stripname '=' '' "$func_resolve_sysroot_result" + func_resolve_sysroot_result=$lt_sysroot$func_stripname_result + ;; + esac +} + +# func_replace_sysroot PATH +# If PATH begins with the sysroot, replace it with = and +# store the result into func_replace_sysroot_result. +func_replace_sysroot () +{ + case $lt_sysroot:$1 in + ?*:"$lt_sysroot"*) + func_stripname "$lt_sysroot" '' "$1" + func_replace_sysroot_result='='$func_stripname_result + ;; + *) + # Including no sysroot. + func_replace_sysroot_result=$1 + ;; + esac +} + +# func_infer_tag arg +# Infer tagged configuration to use if any are available and +# if one wasn't chosen via the "--tag" command line option. +# Only attempt this if the compiler in the base compile +# command doesn't match the default compiler. +# arg is usually of the form 'gcc ...' +func_infer_tag () +{ + $debug_cmd + + if test -n "$available_tags" && test -z "$tagname"; then + CC_quoted= + for arg in $CC; do + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case $@ in + # Blanks in the command may have been stripped by the calling shell, + # but not from the CC environment variable when configure was run. + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; + # Blanks at the start of $base_compile will cause this to fail + # if we don't check for them as well. + *) + for z in $available_tags; do + if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then + # Evaluate the configuration. + eval "`$SED -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" + CC_quoted= + for arg in $CC; do + # Double-quote args containing other shell metacharacters. + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case "$@ " in + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) + # The compiler in the base compile command matches + # the one in the tagged configuration. + # Assume this is the tagged configuration we want. + tagname=$z + break + ;; + esac + fi + done + # If $tagname still isn't set, then no tagged configuration + # was found and let the user know that the "--tag" command + # line option must be used. + if test -z "$tagname"; then + func_echo "unable to infer tagged configuration" + func_fatal_error "specify a tag with '--tag'" +# else +# func_verbose "using $tagname tagged configuration" + fi + ;; + esac + fi +} + + + +# func_write_libtool_object output_name pic_name nonpic_name +# Create a libtool object file (analogous to a ".la" file), +# but don't create it if we're doing a dry run. +func_write_libtool_object () +{ + write_libobj=$1 + if test yes = "$build_libtool_libs"; then + write_lobj=\'$2\' + else + write_lobj=none + fi + + if test yes = "$build_old_libs"; then + write_oldobj=\'$3\' + else + write_oldobj=none + fi + + $opt_dry_run || { + cat >${write_libobj}T </dev/null` + if test "$?" -eq 0 && test -n "$func_convert_core_file_wine_to_w32_tmp"; then + func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | + $SED -e "$sed_naive_backslashify"` + else + func_convert_core_file_wine_to_w32_result= + fi + fi +} +# end: func_convert_core_file_wine_to_w32 + + +# func_convert_core_path_wine_to_w32 ARG +# Helper function used by path conversion functions when $build is *nix, and +# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly +# configured wine environment available, with the winepath program in $build's +# $PATH. Assumes ARG has no leading or trailing path separator characters. +# +# ARG is path to be converted from $build format to win32. +# Result is available in $func_convert_core_path_wine_to_w32_result. +# Unconvertible file (directory) names in ARG are skipped; if no directory names +# are convertible, then the result may be empty. +func_convert_core_path_wine_to_w32 () +{ + $debug_cmd + + # unfortunately, winepath doesn't convert paths, only file names + func_convert_core_path_wine_to_w32_result= + if test -n "$1"; then + oldIFS=$IFS + IFS=: + for func_convert_core_path_wine_to_w32_f in $1; do + IFS=$oldIFS + func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" + if test -n "$func_convert_core_file_wine_to_w32_result"; then + if test -z "$func_convert_core_path_wine_to_w32_result"; then + func_convert_core_path_wine_to_w32_result=$func_convert_core_file_wine_to_w32_result + else + func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" + fi + fi + done + IFS=$oldIFS + fi +} +# end: func_convert_core_path_wine_to_w32 + + +# func_cygpath ARGS... +# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when +# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) +# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or +# (2), returns the Cygwin file name or path in func_cygpath_result (input +# file name or path is assumed to be in w32 format, as previously converted +# from $build's *nix or MSYS format). In case (3), returns the w32 file name +# or path in func_cygpath_result (input file name or path is assumed to be in +# Cygwin format). Returns an empty string on error. +# +# ARGS are passed to cygpath, with the last one being the file name or path to +# be converted. +# +# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH +# environment variable; do not put it in $PATH. +func_cygpath () +{ + $debug_cmd + + if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then + func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` + if test "$?" -ne 0; then + # on failure, ensure result is empty + func_cygpath_result= + fi + else + func_cygpath_result= + func_error "LT_CYGPATH is empty or specifies non-existent file: '$LT_CYGPATH'" + fi +} +#end: func_cygpath + + +# func_convert_core_msys_to_w32 ARG +# Convert file name or path ARG from MSYS format to w32 format. Return +# result in func_convert_core_msys_to_w32_result. +func_convert_core_msys_to_w32 () +{ + $debug_cmd + + # awkward: cmd appends spaces to result + func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | + $SED -e 's/[ ]*$//' -e "$sed_naive_backslashify"` +} +#end: func_convert_core_msys_to_w32 + + +# func_convert_file_check ARG1 ARG2 +# Verify that ARG1 (a file name in $build format) was converted to $host +# format in ARG2. Otherwise, emit an error message, but continue (resetting +# func_to_host_file_result to ARG1). +func_convert_file_check () +{ + $debug_cmd + + if test -z "$2" && test -n "$1"; then + func_error "Could not determine host file name corresponding to" + func_error " '$1'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback: + func_to_host_file_result=$1 + fi +} +# end func_convert_file_check + + +# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH +# Verify that FROM_PATH (a path in $build format) was converted to $host +# format in TO_PATH. Otherwise, emit an error message, but continue, resetting +# func_to_host_file_result to a simplistic fallback value (see below). +func_convert_path_check () +{ + $debug_cmd + + if test -z "$4" && test -n "$3"; then + func_error "Could not determine the host path corresponding to" + func_error " '$3'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback. This is a deliberately simplistic "conversion" and + # should not be "improved". See libtool.info. + if test "x$1" != "x$2"; then + lt_replace_pathsep_chars="s|$1|$2|g" + func_to_host_path_result=`echo "$3" | + $SED -e "$lt_replace_pathsep_chars"` + else + func_to_host_path_result=$3 + fi + fi +} +# end func_convert_path_check + + +# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG +# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT +# and appending REPL if ORIG matches BACKPAT. +func_convert_path_front_back_pathsep () +{ + $debug_cmd + + case $4 in + $1 ) func_to_host_path_result=$3$func_to_host_path_result + ;; + esac + case $4 in + $2 ) func_append func_to_host_path_result "$3" + ;; + esac +} +# end func_convert_path_front_back_pathsep + + +################################################## +# $build to $host FILE NAME CONVERSION FUNCTIONS # +################################################## +# invoked via '$to_host_file_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# Result will be available in $func_to_host_file_result. + + +# func_to_host_file ARG +# Converts the file name ARG from $build format to $host format. Return result +# in func_to_host_file_result. +func_to_host_file () +{ + $debug_cmd + + $to_host_file_cmd "$1" +} +# end func_to_host_file + + +# func_to_tool_file ARG LAZY +# converts the file name ARG from $build format to toolchain format. Return +# result in func_to_tool_file_result. If the conversion in use is listed +# in (the comma separated) LAZY, no conversion takes place. +func_to_tool_file () +{ + $debug_cmd + + case ,$2, in + *,"$to_tool_file_cmd",*) + func_to_tool_file_result=$1 + ;; + *) + $to_tool_file_cmd "$1" + func_to_tool_file_result=$func_to_host_file_result + ;; + esac +} +# end func_to_tool_file + + +# func_convert_file_noop ARG +# Copy ARG to func_to_host_file_result. +func_convert_file_noop () +{ + func_to_host_file_result=$1 +} +# end func_convert_file_noop + + +# func_convert_file_msys_to_w32 ARG +# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_file_result. +func_convert_file_msys_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_to_host_file_result=$func_convert_core_msys_to_w32_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_w32 + + +# func_convert_file_cygwin_to_w32 ARG +# Convert file name ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_file_cygwin_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + # because $build is cygwin, we call "the" cygpath in $PATH; no need to use + # LT_CYGPATH in this case. + func_to_host_file_result=`cygpath -m "$1"` + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_cygwin_to_w32 + + +# func_convert_file_nix_to_w32 ARG +# Convert file name ARG from *nix to w32 format. Requires a wine environment +# and a working winepath. Returns result in func_to_host_file_result. +func_convert_file_nix_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_file_wine_to_w32 "$1" + func_to_host_file_result=$func_convert_core_file_wine_to_w32_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_w32 + + +# func_convert_file_msys_to_cygwin ARG +# Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_file_msys_to_cygwin () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_cygpath -u "$func_convert_core_msys_to_w32_result" + func_to_host_file_result=$func_cygpath_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_cygwin + + +# func_convert_file_nix_to_cygwin ARG +# Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed +# in a wine environment, working winepath, and LT_CYGPATH set. Returns result +# in func_to_host_file_result. +func_convert_file_nix_to_cygwin () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. + func_convert_core_file_wine_to_w32 "$1" + func_cygpath -u "$func_convert_core_file_wine_to_w32_result" + func_to_host_file_result=$func_cygpath_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_cygwin + + +############################################# +# $build to $host PATH CONVERSION FUNCTIONS # +############################################# +# invoked via '$to_host_path_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# The result will be available in $func_to_host_path_result. +# +# Path separators are also converted from $build format to $host format. If +# ARG begins or ends with a path separator character, it is preserved (but +# converted to $host format) on output. +# +# All path conversion functions are named using the following convention: +# file name conversion function : func_convert_file_X_to_Y () +# path conversion function : func_convert_path_X_to_Y () +# where, for any given $build/$host combination the 'X_to_Y' value is the +# same. If conversion functions are added for new $build/$host combinations, +# the two new functions must follow this pattern, or func_init_to_host_path_cmd +# will break. + + +# func_init_to_host_path_cmd +# Ensures that function "pointer" variable $to_host_path_cmd is set to the +# appropriate value, based on the value of $to_host_file_cmd. +to_host_path_cmd= +func_init_to_host_path_cmd () +{ + $debug_cmd + + if test -z "$to_host_path_cmd"; then + func_stripname 'func_convert_file_' '' "$to_host_file_cmd" + to_host_path_cmd=func_convert_path_$func_stripname_result + fi +} + + +# func_to_host_path ARG +# Converts the path ARG from $build format to $host format. Return result +# in func_to_host_path_result. +func_to_host_path () +{ + $debug_cmd + + func_init_to_host_path_cmd + $to_host_path_cmd "$1" +} +# end func_to_host_path + + +# func_convert_path_noop ARG +# Copy ARG to func_to_host_path_result. +func_convert_path_noop () +{ + func_to_host_path_result=$1 +} +# end func_convert_path_noop + + +# func_convert_path_msys_to_w32 ARG +# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_path_result. +func_convert_path_msys_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # Remove leading and trailing path separator characters from ARG. MSYS + # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; + # and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result=$func_convert_core_msys_to_w32_result + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_msys_to_w32 + + +# func_convert_path_cygwin_to_w32 ARG +# Convert path ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_path_cygwin_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_cygwin_to_w32 + + +# func_convert_path_nix_to_w32 ARG +# Convert path ARG from *nix to w32 format. Requires a wine environment and +# a working winepath. Returns result in func_to_host_file_result. +func_convert_path_nix_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result=$func_convert_core_path_wine_to_w32_result + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_nix_to_w32 + + +# func_convert_path_msys_to_cygwin ARG +# Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_path_msys_to_cygwin () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_msys_to_w32_result" + func_to_host_path_result=$func_cygpath_result + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_msys_to_cygwin + + +# func_convert_path_nix_to_cygwin ARG +# Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a +# a wine environment, working winepath, and LT_CYGPATH set. Returns result in +# func_to_host_file_result. +func_convert_path_nix_to_cygwin () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # Remove leading and trailing path separator characters from + # ARG. msys behavior is inconsistent here, cygpath turns them + # into '.;' and ';.', and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" + func_to_host_path_result=$func_cygpath_result + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_nix_to_cygwin + + +# func_dll_def_p FILE +# True iff FILE is a Windows DLL '.def' file. +# Keep in sync with _LT_DLL_DEF_P in libtool.m4 +func_dll_def_p () +{ + $debug_cmd + + func_dll_def_p_tmp=`$SED -n \ + -e 's/^[ ]*//' \ + -e '/^\(;.*\)*$/d' \ + -e 's/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p' \ + -e q \ + "$1"` + test DEF = "$func_dll_def_p_tmp" +} + + +# func_mode_compile arg... +func_mode_compile () +{ + $debug_cmd + + # Get the compilation command and the source file. + base_compile= + srcfile=$nonopt # always keep a non-empty value in "srcfile" + suppress_opt=yes + suppress_output= + arg_mode=normal + libobj= + later= + pie_flag= + + for arg + do + case $arg_mode in + arg ) + # do not "continue". Instead, add this to base_compile + lastarg=$arg + arg_mode=normal + ;; + + target ) + libobj=$arg + arg_mode=normal + continue + ;; + + normal ) + # Accept any command-line options. + case $arg in + -o) + test -n "$libobj" && \ + func_fatal_error "you cannot specify '-o' more than once" + arg_mode=target + continue + ;; + + -pie | -fpie | -fPIE) + func_append pie_flag " $arg" + continue + ;; + + -shared | -static | -prefer-pic | -prefer-non-pic) + func_append later " $arg" + continue + ;; + + -no-suppress) + suppress_opt=no + continue + ;; + + -Xcompiler) + arg_mode=arg # the next one goes into the "base_compile" arg list + continue # The current "srcfile" will either be retained or + ;; # replaced later. I would guess that would be a bug. + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + lastarg= + save_ifs=$IFS; IFS=, + for arg in $args; do + IFS=$save_ifs + func_append_quoted lastarg "$arg" + done + IFS=$save_ifs + func_stripname ' ' '' "$lastarg" + lastarg=$func_stripname_result + + # Add the arguments to base_compile. + func_append base_compile " $lastarg" + continue + ;; + + *) + # Accept the current argument as the source file. + # The previous "srcfile" becomes the current argument. + # + lastarg=$srcfile + srcfile=$arg + ;; + esac # case $arg + ;; + esac # case $arg_mode + + # Aesthetically quote the previous argument. + func_append_quoted base_compile "$lastarg" + done # for arg + + case $arg_mode in + arg) + func_fatal_error "you must specify an argument for -Xcompile" + ;; + target) + func_fatal_error "you must specify a target with '-o'" + ;; + *) + # Get the name of the library object. + test -z "$libobj" && { + func_basename "$srcfile" + libobj=$func_basename_result + } + ;; + esac + + # Recognize several different file suffixes. + # If the user specifies -o file.o, it is replaced with file.lo + case $libobj in + *.[cCFSifmso] | \ + *.ada | *.adb | *.ads | *.asm | \ + *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ + *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup) + func_xform "$libobj" + libobj=$func_xform_result + ;; + esac + + case $libobj in + *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; + *) + func_fatal_error "cannot determine name of library object from '$libobj'" + ;; + esac + + func_infer_tag $base_compile + + for arg in $later; do + case $arg in + -shared) + test yes = "$build_libtool_libs" \ + || func_fatal_configuration "cannot build a shared library" + build_old_libs=no + continue + ;; + + -static) + build_libtool_libs=no + build_old_libs=yes + continue + ;; + + -prefer-pic) + pic_mode=yes + continue + ;; + + -prefer-non-pic) + pic_mode=no + continue + ;; + esac + done + + func_quote_for_eval "$libobj" + test "X$libobj" != "X$func_quote_for_eval_result" \ + && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ + && func_warning "libobj name '$libobj' may not contain shell special characters." + func_dirname_and_basename "$obj" "/" "" + objname=$func_basename_result + xdir=$func_dirname_result + lobj=$xdir$objdir/$objname + + test -z "$base_compile" && \ + func_fatal_help "you must specify a compilation command" + + # Delete any leftover library objects. + if test yes = "$build_old_libs"; then + removelist="$obj $lobj $libobj ${libobj}T" + else + removelist="$lobj $libobj ${libobj}T" + fi + + # On Cygwin there's no "real" PIC flag so we must build both object types + case $host_os in + cygwin* | mingw* | pw32* | os2* | cegcc*) + pic_mode=default + ;; + esac + if test no = "$pic_mode" && test pass_all != "$deplibs_check_method"; then + # non-PIC code in shared libraries is not supported + pic_mode=default + fi + + # Calculate the filename of the output object if compiler does + # not support -o with -c + if test no = "$compiler_c_o"; then + output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.$objext + lockfile=$output_obj.lock + else + output_obj= + need_locks=no + lockfile= + fi + + # Lock this critical section if it is needed + # We use this script file to make the link, it avoids creating a new file + if test yes = "$need_locks"; then + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + elif test warn = "$need_locks"; then + if test -f "$lockfile"; then + $ECHO "\ +*** ERROR, $lockfile exists and contains: +`cat $lockfile 2>/dev/null` + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + func_append removelist " $output_obj" + $ECHO "$srcfile" > "$lockfile" + fi + + $opt_dry_run || $RM $removelist + func_append removelist " $lockfile" + trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 + + func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 + srcfile=$func_to_tool_file_result + func_quote_for_eval "$srcfile" + qsrcfile=$func_quote_for_eval_result + + # Only build a PIC object if we are building libtool libraries. + if test yes = "$build_libtool_libs"; then + # Without this assignment, base_compile gets emptied. + fbsd_hideous_sh_bug=$base_compile + + if test no != "$pic_mode"; then + command="$base_compile $qsrcfile $pic_flag" + else + # Don't build PIC code + command="$base_compile $qsrcfile" + fi + + func_mkdir_p "$xdir$objdir" + + if test -z "$output_obj"; then + # Place PIC objects in $objdir + func_append command " -o $lobj" + fi + + func_show_eval_locale "$command" \ + 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' + + if test warn = "$need_locks" && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed, then go on to compile the next one + if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then + func_show_eval '$MV "$output_obj" "$lobj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + + # Allow error messages only from the first compilation. + if test yes = "$suppress_opt"; then + suppress_output=' >/dev/null 2>&1' + fi + fi + + # Only build a position-dependent object if we build old libraries. + if test yes = "$build_old_libs"; then + if test yes != "$pic_mode"; then + # Don't build PIC code + command="$base_compile $qsrcfile$pie_flag" + else + command="$base_compile $qsrcfile $pic_flag" + fi + if test yes = "$compiler_c_o"; then + func_append command " -o $obj" + fi + + # Suppress compiler output if we already did a PIC compilation. + func_append command "$suppress_output" + func_show_eval_locale "$command" \ + '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' + + if test warn = "$need_locks" && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed + if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then + func_show_eval '$MV "$output_obj" "$obj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + fi + + $opt_dry_run || { + func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" + + # Unlock the critical section if it was locked + if test no != "$need_locks"; then + removelist=$lockfile + $RM "$lockfile" + fi + } + + exit $EXIT_SUCCESS +} + +$opt_help || { + test compile = "$opt_mode" && func_mode_compile ${1+"$@"} +} + +func_mode_help () +{ + # We need to display help for each of the modes. + case $opt_mode in + "") + # Generic help is extracted from the usage comments + # at the start of this file. + func_help + ;; + + clean) + $ECHO \ +"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... + +Remove files from the build directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed +to RM. + +If FILE is a libtool library, object or program, all the files associated +with it are deleted. Otherwise, only FILE itself is deleted using RM." + ;; + + compile) + $ECHO \ +"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE + +Compile a source file into a libtool library object. + +This mode accepts the following additional options: + + -o OUTPUT-FILE set the output file name to OUTPUT-FILE + -no-suppress do not suppress compiler output for multiple passes + -prefer-pic try to build PIC objects only + -prefer-non-pic try to build non-PIC objects only + -shared do not build a '.o' file suitable for static linking + -static only build a '.o' file suitable for static linking + -Wc,FLAG pass FLAG directly to the compiler + +COMPILE-COMMAND is a command to be used in creating a 'standard' object file +from the given SOURCEFILE. + +The output file name is determined by removing the directory component from +SOURCEFILE, then substituting the C source code suffix '.c' with the +library object suffix, '.lo'." + ;; + + execute) + $ECHO \ +"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... + +Automatically set library path, then run a program. + +This mode accepts the following additional options: + + -dlopen FILE add the directory containing FILE to the library path + +This mode sets the library path environment variable according to '-dlopen' +flags. + +If any of the ARGS are libtool executable wrappers, then they are translated +into their corresponding uninstalled binary, and any of their required library +directories are added to the library path. + +Then, COMMAND is executed, with ARGS as arguments." + ;; + + finish) + $ECHO \ +"Usage: $progname [OPTION]... --mode=finish [LIBDIR]... + +Complete the installation of libtool libraries. + +Each LIBDIR is a directory that contains libtool libraries. + +The commands that this mode executes may require superuser privileges. Use +the '--dry-run' option if you just want to see what would be executed." + ;; + + install) + $ECHO \ +"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... + +Install executables or libraries. + +INSTALL-COMMAND is the installation command. The first component should be +either the 'install' or 'cp' program. + +The following components of INSTALL-COMMAND are treated specially: + + -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation + +The rest of the components are interpreted as arguments to that command (only +BSD-compatible install options are recognized)." + ;; + + link) + $ECHO \ +"Usage: $progname [OPTION]... --mode=link LINK-COMMAND... + +Link object files or libraries together to form another library, or to +create an executable program. + +LINK-COMMAND is a command using the C compiler that you would use to create +a program from several object files. + +The following components of LINK-COMMAND are treated specially: + + -all-static do not do any dynamic linking at all + -avoid-version do not add a version suffix if possible + -bindir BINDIR specify path to binaries directory (for systems where + libraries must be found in the PATH setting at runtime) + -dlopen FILE '-dlpreopen' FILE if it cannot be dlopened at runtime + -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols + -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) + -export-symbols SYMFILE + try to export only the symbols listed in SYMFILE + -export-symbols-regex REGEX + try to export only the symbols matching REGEX + -LLIBDIR search LIBDIR for required installed libraries + -lNAME OUTPUT-FILE requires the installed library libNAME + -module build a library that can dlopened + -no-fast-install disable the fast-install mode + -no-install link a not-installable executable + -no-undefined declare that a library does not refer to external symbols + -o OUTPUT-FILE create OUTPUT-FILE from the specified objects + -objectlist FILE use a list of object files found in FILE to specify objects + -os2dllname NAME force a short DLL name on OS/2 (no effect on other OSes) + -precious-files-regex REGEX + don't remove output files matching REGEX + -release RELEASE specify package release information + -rpath LIBDIR the created library will eventually be installed in LIBDIR + -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries + -shared only do dynamic linking of libtool libraries + -shrext SUFFIX override the standard shared library file extension + -static do not do any dynamic linking of uninstalled libtool libraries + -static-libtool-libs + do not do any dynamic linking of libtool libraries + -version-info CURRENT[:REVISION[:AGE]] + specify library version info [each variable defaults to 0] + -weak LIBNAME declare that the target provides the LIBNAME interface + -Wc,FLAG + -Xcompiler FLAG pass linker-specific FLAG directly to the compiler + -Wl,FLAG + -Xlinker FLAG pass linker-specific FLAG directly to the linker + -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) + +All other options (arguments beginning with '-') are ignored. + +Every other argument is treated as a filename. Files ending in '.la' are +treated as uninstalled libtool libraries, other files are standard or library +object files. + +If the OUTPUT-FILE ends in '.la', then a libtool library is created, +only library objects ('.lo' files) may be specified, and '-rpath' is +required, except when creating a convenience library. + +If OUTPUT-FILE ends in '.a' or '.lib', then a standard library is created +using 'ar' and 'ranlib', or on Windows using 'lib'. + +If OUTPUT-FILE ends in '.lo' or '.$objext', then a reloadable object file +is created, otherwise an executable program is created." + ;; + + uninstall) + $ECHO \ +"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... + +Remove libraries from an installation directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed +to RM. + +If FILE is a libtool library, all the files associated with it are deleted. +Otherwise, only FILE itself is deleted using RM." + ;; + + *) + func_fatal_help "invalid operation mode '$opt_mode'" + ;; + esac + + echo + $ECHO "Try '$progname --help' for more information about other modes." +} + +# Now that we've collected a possible --mode arg, show help if necessary +if $opt_help; then + if test : = "$opt_help"; then + func_mode_help + else + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + func_mode_help + done + } | $SED -n '1p; 2,$s/^Usage:/ or: /p' + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + echo + func_mode_help + done + } | + $SED '1d + /^When reporting/,/^Report/{ + H + d + } + $x + /information about other modes/d + /more detailed .*MODE/d + s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' + fi + exit $? +fi + + +# func_mode_execute arg... +func_mode_execute () +{ + $debug_cmd + + # The first argument is the command name. + cmd=$nonopt + test -z "$cmd" && \ + func_fatal_help "you must specify a COMMAND" + + # Handle -dlopen flags immediately. + for file in $opt_dlopen; do + test -f "$file" \ + || func_fatal_help "'$file' is not a file" + + dir= + case $file in + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "'$lib' is not a valid libtool archive" + + # Read the libtool library. + dlname= + library_names= + func_source "$file" + + # Skip this library if it cannot be dlopened. + if test -z "$dlname"; then + # Warn if it was a shared library. + test -n "$library_names" && \ + func_warning "'$file' was not linked with '-export-dynamic'" + continue + fi + + func_dirname "$file" "" "." + dir=$func_dirname_result + + if test -f "$dir/$objdir/$dlname"; then + func_append dir "/$objdir" + else + if test ! -f "$dir/$dlname"; then + func_fatal_error "cannot find '$dlname' in '$dir' or '$dir/$objdir'" + fi + fi + ;; + + *.lo) + # Just add the directory containing the .lo file. + func_dirname "$file" "" "." + dir=$func_dirname_result + ;; + + *) + func_warning "'-dlopen' is ignored for non-libtool libraries and objects" + continue + ;; + esac + + # Get the absolute pathname. + absdir=`cd "$dir" && pwd` + test -n "$absdir" && dir=$absdir + + # Now add the directory to shlibpath_var. + if eval "test -z \"\$$shlibpath_var\""; then + eval "$shlibpath_var=\"\$dir\"" + else + eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" + fi + done + + # This variable tells wrapper scripts just to set shlibpath_var + # rather than running their programs. + libtool_execute_magic=$magic + + # Check if any of the arguments is a wrapper script. + args= + for file + do + case $file in + -* | *.la | *.lo ) ;; + *) + # Do a test to see if this is really a libtool program. + if func_ltwrapper_script_p "$file"; then + func_source "$file" + # Transform arg to wrapped name. + file=$progdir/$program + elif func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + func_source "$func_ltwrapper_scriptname_result" + # Transform arg to wrapped name. + file=$progdir/$program + fi + ;; + esac + # Quote arguments (to preserve shell metacharacters). + func_append_quoted args "$file" + done + + if $opt_dry_run; then + # Display what would be done. + if test -n "$shlibpath_var"; then + eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" + echo "export $shlibpath_var" + fi + $ECHO "$cmd$args" + exit $EXIT_SUCCESS + else + if test -n "$shlibpath_var"; then + # Export the shlibpath_var. + eval "export $shlibpath_var" + fi + + # Restore saved environment variables + for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES + do + eval "if test \"\${save_$lt_var+set}\" = set; then + $lt_var=\$save_$lt_var; export $lt_var + else + $lt_unset $lt_var + fi" + done + + # Now prepare to actually exec the command. + exec_cmd=\$cmd$args + fi +} + +test execute = "$opt_mode" && func_mode_execute ${1+"$@"} + + +# func_mode_finish arg... +func_mode_finish () +{ + $debug_cmd + + libs= + libdirs= + admincmds= + + for opt in "$nonopt" ${1+"$@"} + do + if test -d "$opt"; then + func_append libdirs " $opt" + + elif test -f "$opt"; then + if func_lalib_unsafe_p "$opt"; then + func_append libs " $opt" + else + func_warning "'$opt' is not a valid libtool archive" + fi + + else + func_fatal_error "invalid argument '$opt'" + fi + done + + if test -n "$libs"; then + if test -n "$lt_sysroot"; then + sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` + sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" + else + sysroot_cmd= + fi + + # Remove sysroot references + if $opt_dry_run; then + for lib in $libs; do + echo "removing references to $lt_sysroot and '=' prefixes from $lib" + done + else + tmpdir=`func_mktempdir` + for lib in $libs; do + $SED -e "$sysroot_cmd s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ + > $tmpdir/tmp-la + mv -f $tmpdir/tmp-la $lib + done + ${RM}r "$tmpdir" + fi + fi + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + for libdir in $libdirs; do + if test -n "$finish_cmds"; then + # Do each command in the finish commands. + func_execute_cmds "$finish_cmds" 'admincmds="$admincmds +'"$cmd"'"' + fi + if test -n "$finish_eval"; then + # Do the single finish_eval. + eval cmds=\"$finish_eval\" + $opt_dry_run || eval "$cmds" || func_append admincmds " + $cmds" + fi + done + fi + + # Exit here if they wanted silent mode. + $opt_quiet && exit $EXIT_SUCCESS + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + echo "----------------------------------------------------------------------" + echo "Libraries have been installed in:" + for libdir in $libdirs; do + $ECHO " $libdir" + done + echo + echo "If you ever happen to want to link against installed libraries" + echo "in a given directory, LIBDIR, you must either use libtool, and" + echo "specify the full pathname of the library, or use the '-LLIBDIR'" + echo "flag during linking and do at least one of the following:" + if test -n "$shlibpath_var"; then + echo " - add LIBDIR to the '$shlibpath_var' environment variable" + echo " during execution" + fi + if test -n "$runpath_var"; then + echo " - add LIBDIR to the '$runpath_var' environment variable" + echo " during linking" + fi + if test -n "$hardcode_libdir_flag_spec"; then + libdir=LIBDIR + eval flag=\"$hardcode_libdir_flag_spec\" + + $ECHO " - use the '$flag' linker flag" + fi + if test -n "$admincmds"; then + $ECHO " - have your system administrator run these commands:$admincmds" + fi + if test -f /etc/ld.so.conf; then + echo " - have your system administrator add LIBDIR to '/etc/ld.so.conf'" + fi + echo + + echo "See any operating system documentation about shared libraries for" + case $host in + solaris2.[6789]|solaris2.1[0-9]) + echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" + echo "pages." + ;; + *) + echo "more information, such as the ld(1) and ld.so(8) manual pages." + ;; + esac + echo "----------------------------------------------------------------------" + fi + exit $EXIT_SUCCESS +} + +test finish = "$opt_mode" && func_mode_finish ${1+"$@"} + + +# func_mode_install arg... +func_mode_install () +{ + $debug_cmd + + # There may be an optional sh(1) argument at the beginning of + # install_prog (especially on Windows NT). + if test "$SHELL" = "$nonopt" || test /bin/sh = "$nonopt" || + # Allow the use of GNU shtool's install command. + case $nonopt in *shtool*) :;; *) false;; esac + then + # Aesthetically quote it. + func_quote_for_eval "$nonopt" + install_prog="$func_quote_for_eval_result " + arg=$1 + shift + else + install_prog= + arg=$nonopt + fi + + # The real first argument should be the name of the installation program. + # Aesthetically quote it. + func_quote_for_eval "$arg" + func_append install_prog "$func_quote_for_eval_result" + install_shared_prog=$install_prog + case " $install_prog " in + *[\\\ /]cp\ *) install_cp=: ;; + *) install_cp=false ;; + esac + + # We need to accept at least all the BSD install flags. + dest= + files= + opts= + prev= + install_type= + isdir=false + stripme= + no_mode=: + for arg + do + arg2= + if test -n "$dest"; then + func_append files " $dest" + dest=$arg + continue + fi + + case $arg in + -d) isdir=: ;; + -f) + if $install_cp; then :; else + prev=$arg + fi + ;; + -g | -m | -o) + prev=$arg + ;; + -s) + stripme=" -s" + continue + ;; + -*) + ;; + *) + # If the previous option needed an argument, then skip it. + if test -n "$prev"; then + if test X-m = "X$prev" && test -n "$install_override_mode"; then + arg2=$install_override_mode + no_mode=false + fi + prev= + else + dest=$arg + continue + fi + ;; + esac + + # Aesthetically quote the argument. + func_quote_for_eval "$arg" + func_append install_prog " $func_quote_for_eval_result" + if test -n "$arg2"; then + func_quote_for_eval "$arg2" + fi + func_append install_shared_prog " $func_quote_for_eval_result" + done + + test -z "$install_prog" && \ + func_fatal_help "you must specify an install program" + + test -n "$prev" && \ + func_fatal_help "the '$prev' option requires an argument" + + if test -n "$install_override_mode" && $no_mode; then + if $install_cp; then :; else + func_quote_for_eval "$install_override_mode" + func_append install_shared_prog " -m $func_quote_for_eval_result" + fi + fi + + if test -z "$files"; then + if test -z "$dest"; then + func_fatal_help "no file or destination specified" + else + func_fatal_help "you must specify a destination" + fi + fi + + # Strip any trailing slash from the destination. + func_stripname '' '/' "$dest" + dest=$func_stripname_result + + # Check to see that the destination is a directory. + test -d "$dest" && isdir=: + if $isdir; then + destdir=$dest + destname= + else + func_dirname_and_basename "$dest" "" "." + destdir=$func_dirname_result + destname=$func_basename_result + + # Not a directory, so check to see that there is only one file specified. + set dummy $files; shift + test "$#" -gt 1 && \ + func_fatal_help "'$dest' is not a directory" + fi + case $destdir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + for file in $files; do + case $file in + *.lo) ;; + *) + func_fatal_help "'$destdir' must be an absolute directory name" + ;; + esac + done + ;; + esac + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic=$magic + + staticlibs= + future_libdirs= + current_libdirs= + for file in $files; do + + # Do each installation. + case $file in + *.$libext) + # Do the static libraries later. + func_append staticlibs " $file" + ;; + + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "'$file' is not a valid libtool archive" + + library_names= + old_library= + relink_command= + func_source "$file" + + # Add the libdir to current_libdirs if it is the destination. + if test "X$destdir" = "X$libdir"; then + case "$current_libdirs " in + *" $libdir "*) ;; + *) func_append current_libdirs " $libdir" ;; + esac + else + # Note the libdir as a future libdir. + case "$future_libdirs " in + *" $libdir "*) ;; + *) func_append future_libdirs " $libdir" ;; + esac + fi + + func_dirname "$file" "/" "" + dir=$func_dirname_result + func_append dir "$objdir" + + if test -n "$relink_command"; then + # Determine the prefix the user has applied to our future dir. + inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` + + # Don't allow the user to place us outside of our expected + # location b/c this prevents finding dependent libraries that + # are installed to the same prefix. + # At present, this check doesn't affect windows .dll's that + # are installed into $libdir/../bin (currently, that works fine) + # but it's something to keep an eye on. + test "$inst_prefix_dir" = "$destdir" && \ + func_fatal_error "error: cannot install '$file' to a directory not ending in $libdir" + + if test -n "$inst_prefix_dir"; then + # Stick the inst_prefix_dir data into the link command. + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` + else + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` + fi + + func_warning "relinking '$file'" + func_show_eval "$relink_command" \ + 'func_fatal_error "error: relink '\''$file'\'' with the above command before installing it"' + fi + + # See the names of the shared library. + set dummy $library_names; shift + if test -n "$1"; then + realname=$1 + shift + + srcname=$realname + test -n "$relink_command" && srcname=${realname}T + + # Install the shared library and build the symlinks. + func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ + 'exit $?' + tstripme=$stripme + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + case $realname in + *.dll.a) + tstripme= + ;; + esac + ;; + os2*) + case $realname in + *_dll.a) + tstripme= + ;; + esac + ;; + esac + if test -n "$tstripme" && test -n "$striplib"; then + func_show_eval "$striplib $destdir/$realname" 'exit $?' + fi + + if test "$#" -gt 0; then + # Delete the old symlinks, and create new ones. + # Try 'ln -sf' first, because the 'ln' binary might depend on + # the symlink we replace! Solaris /bin/ln does not understand -f, + # so we also need to try rm && ln -s. + for linkname + do + test "$linkname" != "$realname" \ + && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" + done + fi + + # Do each command in the postinstall commands. + lib=$destdir/$realname + func_execute_cmds "$postinstall_cmds" 'exit $?' + fi + + # Install the pseudo-library for information purposes. + func_basename "$file" + name=$func_basename_result + instname=$dir/${name}i + func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' + + # Maybe install the static library, too. + test -n "$old_library" && func_append staticlibs " $dir/$old_library" + ;; + + *.lo) + # Install (i.e. copy) a libtool object. + + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile=$destdir/$destname + else + func_basename "$file" + destfile=$func_basename_result + destfile=$destdir/$destfile + fi + + # Deduce the name of the destination old-style object file. + case $destfile in + *.lo) + func_lo2o "$destfile" + staticdest=$func_lo2o_result + ;; + *.$objext) + staticdest=$destfile + destfile= + ;; + *) + func_fatal_help "cannot copy a libtool object to '$destfile'" + ;; + esac + + # Install the libtool object if requested. + test -n "$destfile" && \ + func_show_eval "$install_prog $file $destfile" 'exit $?' + + # Install the old object if enabled. + if test yes = "$build_old_libs"; then + # Deduce the name of the old-style object file. + func_lo2o "$file" + staticobj=$func_lo2o_result + func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' + fi + exit $EXIT_SUCCESS + ;; + + *) + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile=$destdir/$destname + else + func_basename "$file" + destfile=$func_basename_result + destfile=$destdir/$destfile + fi + + # If the file is missing, and there is a .exe on the end, strip it + # because it is most likely a libtool script we actually want to + # install + stripped_ext= + case $file in + *.exe) + if test ! -f "$file"; then + func_stripname '' '.exe' "$file" + file=$func_stripname_result + stripped_ext=.exe + fi + ;; + esac + + # Do a test to see if this is really a libtool program. + case $host in + *cygwin* | *mingw*) + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + wrapper=$func_ltwrapper_scriptname_result + else + func_stripname '' '.exe' "$file" + wrapper=$func_stripname_result + fi + ;; + *) + wrapper=$file + ;; + esac + if func_ltwrapper_script_p "$wrapper"; then + notinst_deplibs= + relink_command= + + func_source "$wrapper" + + # Check the variables that should have been set. + test -z "$generated_by_libtool_version" && \ + func_fatal_error "invalid libtool wrapper script '$wrapper'" + + finalize=: + for lib in $notinst_deplibs; do + # Check to see that each library is installed. + libdir= + if test -f "$lib"; then + func_source "$lib" + fi + libfile=$libdir/`$ECHO "$lib" | $SED 's%^.*/%%g'` + if test -n "$libdir" && test ! -f "$libfile"; then + func_warning "'$lib' has not been installed in '$libdir'" + finalize=false + fi + done + + relink_command= + func_source "$wrapper" + + outputname= + if test no = "$fast_install" && test -n "$relink_command"; then + $opt_dry_run || { + if $finalize; then + tmpdir=`func_mktempdir` + func_basename "$file$stripped_ext" + file=$func_basename_result + outputname=$tmpdir/$file + # Replace the output file specification. + relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` + + $opt_quiet || { + func_quote_for_expand "$relink_command" + eval "func_echo $func_quote_for_expand_result" + } + if eval "$relink_command"; then : + else + func_error "error: relink '$file' with the above command before installing it" + $opt_dry_run || ${RM}r "$tmpdir" + continue + fi + file=$outputname + else + func_warning "cannot relink '$file'" + fi + } + else + # Install the binary that we compiled earlier. + file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` + fi + fi + + # remove .exe since cygwin /usr/bin/install will append another + # one anyway + case $install_prog,$host in + */usr/bin/install*,*cygwin*) + case $file:$destfile in + *.exe:*.exe) + # this is ok + ;; + *.exe:*) + destfile=$destfile.exe + ;; + *:*.exe) + func_stripname '' '.exe' "$destfile" + destfile=$func_stripname_result + ;; + esac + ;; + esac + func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' + $opt_dry_run || if test -n "$outputname"; then + ${RM}r "$tmpdir" + fi + ;; + esac + done + + for file in $staticlibs; do + func_basename "$file" + name=$func_basename_result + + # Set up the ranlib parameters. + oldlib=$destdir/$name + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + + func_show_eval "$install_prog \$file \$oldlib" 'exit $?' + + if test -n "$stripme" && test -n "$old_striplib"; then + func_show_eval "$old_striplib $tool_oldlib" 'exit $?' + fi + + # Do each command in the postinstall commands. + func_execute_cmds "$old_postinstall_cmds" 'exit $?' + done + + test -n "$future_libdirs" && \ + func_warning "remember to run '$progname --finish$future_libdirs'" + + if test -n "$current_libdirs"; then + # Maybe just do a dry run. + $opt_dry_run && current_libdirs=" -n$current_libdirs" + exec_cmd='$SHELL "$progpath" $preserve_args --finish$current_libdirs' + else + exit $EXIT_SUCCESS + fi +} + +test install = "$opt_mode" && func_mode_install ${1+"$@"} + + +# func_generate_dlsyms outputname originator pic_p +# Extract symbols from dlprefiles and create ${outputname}S.o with +# a dlpreopen symbol table. +func_generate_dlsyms () +{ + $debug_cmd + + my_outputname=$1 + my_originator=$2 + my_pic_p=${3-false} + my_prefix=`$ECHO "$my_originator" | $SED 's%[^a-zA-Z0-9]%_%g'` + my_dlsyms= + + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + if test -n "$NM" && test -n "$global_symbol_pipe"; then + my_dlsyms=${my_outputname}S.c + else + func_error "not configured to extract global symbols from dlpreopened files" + fi + fi + + if test -n "$my_dlsyms"; then + case $my_dlsyms in + "") ;; + *.c) + # Discover the nlist of each of the dlfiles. + nlist=$output_objdir/$my_outputname.nm + + func_show_eval "$RM $nlist ${nlist}S ${nlist}T" + + # Parse the name list into a source file. + func_verbose "creating $output_objdir/$my_dlsyms" + + $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ +/* $my_dlsyms - symbol resolution table for '$my_outputname' dlsym emulation. */ +/* Generated by $PROGRAM (GNU $PACKAGE) $VERSION */ + +#ifdef __cplusplus +extern \"C\" { +#endif + +#if defined __GNUC__ && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#endif + +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) + +/* External symbol declarations for the compiler. */\ +" + + if test yes = "$dlself"; then + func_verbose "generating symbol list for '$output'" + + $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" + + # Add our own program objects to the symbol list. + progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` + for progfile in $progfiles; do + func_to_tool_file "$progfile" func_convert_file_msys_to_w32 + func_verbose "extracting global C symbols from '$func_to_tool_file_result'" + $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" + done + + if test -n "$exclude_expsyms"; then + $opt_dry_run || { + eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + if test -n "$export_symbols_regex"; then + $opt_dry_run || { + eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + export_symbols=$output_objdir/$outputname.exp + $opt_dry_run || { + $RM $export_symbols + eval "$SED -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' + ;; + esac + } + else + $opt_dry_run || { + eval "$SED -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' + eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' + ;; + esac + } + fi + fi + + for dlprefile in $dlprefiles; do + func_verbose "extracting global C symbols from '$dlprefile'" + func_basename "$dlprefile" + name=$func_basename_result + case $host in + *cygwin* | *mingw* | *cegcc* ) + # if an import library, we need to obtain dlname + if func_win32_import_lib_p "$dlprefile"; then + func_tr_sh "$dlprefile" + eval "curr_lafile=\$libfile_$func_tr_sh_result" + dlprefile_dlbasename= + if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then + # Use subshell, to avoid clobbering current variable values + dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` + if test -n "$dlprefile_dlname"; then + func_basename "$dlprefile_dlname" + dlprefile_dlbasename=$func_basename_result + else + # no lafile. user explicitly requested -dlpreopen . + $sharedlib_from_linklib_cmd "$dlprefile" + dlprefile_dlbasename=$sharedlib_from_linklib_result + fi + fi + $opt_dry_run || { + if test -n "$dlprefile_dlbasename"; then + eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' + else + func_warning "Could not compute DLL name from $name" + eval '$ECHO ": $name " >> "$nlist"' + fi + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | + $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" + } + else # not an import lib + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + fi + ;; + *) + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + ;; + esac + done + + $opt_dry_run || { + # Make sure we have at least an empty file. + test -f "$nlist" || : > "$nlist" + + if test -n "$exclude_expsyms"; then + $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T + $MV "$nlist"T "$nlist" + fi + + # Try sorting and uniquifying the output. + if $GREP -v "^: " < "$nlist" | + if sort -k 3 /dev/null 2>&1; then + sort -k 3 + else + sort +2 + fi | + uniq > "$nlist"S; then + : + else + $GREP -v "^: " < "$nlist" > "$nlist"S + fi + + if test -f "$nlist"S; then + eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' + else + echo '/* NONE */' >> "$output_objdir/$my_dlsyms" + fi + + func_show_eval '$RM "${nlist}I"' + if test -n "$global_symbol_to_import"; then + eval "$global_symbol_to_import"' < "$nlist"S > "$nlist"I' + fi + + echo >> "$output_objdir/$my_dlsyms" "\ + +/* The mapping between symbol names and symbols. */ +typedef struct { + const char *name; + void *address; +} lt_dlsymlist; +extern LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[];\ +" + + if test -s "$nlist"I; then + echo >> "$output_objdir/$my_dlsyms" "\ +static void lt_syminit(void) +{ + LT_DLSYM_CONST lt_dlsymlist *symbol = lt_${my_prefix}_LTX_preloaded_symbols; + for (; symbol->name; ++symbol) + {" + $SED 's/.*/ if (STREQ (symbol->name, \"&\")) symbol->address = (void *) \&&;/' < "$nlist"I >> "$output_objdir/$my_dlsyms" + echo >> "$output_objdir/$my_dlsyms" "\ + } +}" + fi + echo >> "$output_objdir/$my_dlsyms" "\ +LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[] = +{ {\"$my_originator\", (void *) 0}," + + if test -s "$nlist"I; then + echo >> "$output_objdir/$my_dlsyms" "\ + {\"@INIT@\", (void *) <_syminit}," + fi + + case $need_lib_prefix in + no) + eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + *) + eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + esac + echo >> "$output_objdir/$my_dlsyms" "\ + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt_${my_prefix}_LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif\ +" + } # !$opt_dry_run + + pic_flag_for_symtable= + case "$compile_command " in + *" -static "*) ;; + *) + case $host in + # compiling the symbol table file with pic_flag works around + # a FreeBSD bug that causes programs to crash when -lm is + # linked before any other PIC object. But we must not use + # pic_flag when linking with -static. The problem exists in + # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. + *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) + pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; + *-*-hpux*) + pic_flag_for_symtable=" $pic_flag" ;; + *) + $my_pic_p && pic_flag_for_symtable=" $pic_flag" + ;; + esac + ;; + esac + symtab_cflags= + for arg in $LTCFLAGS; do + case $arg in + -pie | -fpie | -fPIE) ;; + *) func_append symtab_cflags " $arg" ;; + esac + done + + # Now compile the dynamic symbol file. + func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' + + # Clean up the generated files. + func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T" "${nlist}I"' + + # Transform the symbol file into the correct name. + symfileobj=$output_objdir/${my_outputname}S.$objext + case $host in + *cygwin* | *mingw* | *cegcc* ) + if test -f "$output_objdir/$my_outputname.def"; then + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + else + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + fi + ;; + *) + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + ;; + esac + ;; + *) + func_fatal_error "unknown suffix for '$my_dlsyms'" + ;; + esac + else + # We keep going just in case the user didn't refer to + # lt_preloaded_symbols. The linker will fail if global_symbol_pipe + # really was required. + + # Nullify the symbol file. + compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` + fi +} + +# func_cygming_gnu_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is a GNU/binutils-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_gnu_implib_p () +{ + $debug_cmd + + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` + test -n "$func_cygming_gnu_implib_tmp" +} + +# func_cygming_ms_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is an MS-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_ms_implib_p () +{ + $debug_cmd + + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` + test -n "$func_cygming_ms_implib_tmp" +} + +# func_win32_libid arg +# return the library type of file 'arg' +# +# Need a lot of goo to handle *both* DLLs and import libs +# Has to be a shell function in order to 'eat' the argument +# that is supplied when $file_magic_command is called. +# Despite the name, also deal with 64 bit binaries. +func_win32_libid () +{ + $debug_cmd + + win32_libid_type=unknown + win32_fileres=`file -L $1 2>/dev/null` + case $win32_fileres in + *ar\ archive\ import\ library*) # definitely import + win32_libid_type="x86 archive import" + ;; + *ar\ archive*) # could be an import, or static + # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. + if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | + $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then + case $nm_interface in + "MS dumpbin") + if func_cygming_ms_implib_p "$1" || + func_cygming_gnu_implib_p "$1" + then + win32_nmres=import + else + win32_nmres= + fi + ;; + *) + func_to_tool_file "$1" func_convert_file_msys_to_w32 + win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | + $SED -n -e ' + 1,100{ + / I /{ + s|.*|import| + p + q + } + }'` + ;; + esac + case $win32_nmres in + import*) win32_libid_type="x86 archive import";; + *) win32_libid_type="x86 archive static";; + esac + fi + ;; + *DLL*) + win32_libid_type="x86 DLL" + ;; + *executable*) # but shell scripts are "executable" too... + case $win32_fileres in + *MS\ Windows\ PE\ Intel*) + win32_libid_type="x86 DLL" + ;; + esac + ;; + esac + $ECHO "$win32_libid_type" +} + +# func_cygming_dll_for_implib ARG +# +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib () +{ + $debug_cmd + + sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` +} + +# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs +# +# The is the core of a fallback implementation of a +# platform-specific function to extract the name of the +# DLL associated with the specified import library LIBNAME. +# +# SECTION_NAME is either .idata$6 or .idata$7, depending +# on the platform and compiler that created the implib. +# +# Echos the name of the DLL associated with the +# specified import library. +func_cygming_dll_for_implib_fallback_core () +{ + $debug_cmd + + match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` + $OBJDUMP -s --section "$1" "$2" 2>/dev/null | + $SED '/^Contents of section '"$match_literal"':/{ + # Place marker at beginning of archive member dllname section + s/.*/====MARK====/ + p + d + } + # These lines can sometimes be longer than 43 characters, but + # are always uninteresting + /:[ ]*file format pe[i]\{,1\}-/d + /^In archive [^:]*:/d + # Ensure marker is printed + /^====MARK====/p + # Remove all lines with less than 43 characters + /^.\{43\}/!d + # From remaining lines, remove first 43 characters + s/^.\{43\}//' | + $SED -n ' + # Join marker and all lines until next marker into a single line + /^====MARK====/ b para + H + $ b para + b + :para + x + s/\n//g + # Remove the marker + s/^====MARK====// + # Remove trailing dots and whitespace + s/[\. \t]*$// + # Print + /./p' | + # we now have a list, one entry per line, of the stringified + # contents of the appropriate section of all members of the + # archive that possess that section. Heuristic: eliminate + # all those that have a first or second character that is + # a '.' (that is, objdump's representation of an unprintable + # character.) This should work for all archives with less than + # 0x302f exports -- but will fail for DLLs whose name actually + # begins with a literal '.' or a single character followed by + # a '.'. + # + # Of those that remain, print the first one. + $SED -e '/^\./d;/^.\./d;q' +} + +# func_cygming_dll_for_implib_fallback ARG +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# +# This fallback implementation is for use when $DLLTOOL +# does not support the --identify-strict option. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib_fallback () +{ + $debug_cmd + + if func_cygming_gnu_implib_p "$1"; then + # binutils import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` + elif func_cygming_ms_implib_p "$1"; then + # ms-generated import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` + else + # unknown + sharedlib_from_linklib_result= + fi +} + + +# func_extract_an_archive dir oldlib +func_extract_an_archive () +{ + $debug_cmd + + f_ex_an_ar_dir=$1; shift + f_ex_an_ar_oldlib=$1 + if test yes = "$lock_old_archive_extraction"; then + lockfile=$f_ex_an_ar_oldlib.lock + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + fi + func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ + 'stat=$?; rm -f "$lockfile"; exit $stat' + if test yes = "$lock_old_archive_extraction"; then + $opt_dry_run || rm -f "$lockfile" + fi + if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then + : + else + func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" + fi +} + + +# func_extract_archives gentop oldlib ... +func_extract_archives () +{ + $debug_cmd + + my_gentop=$1; shift + my_oldlibs=${1+"$@"} + my_oldobjs= + my_xlib= + my_xabs= + my_xdir= + + for my_xlib in $my_oldlibs; do + # Extract the objects. + case $my_xlib in + [\\/]* | [A-Za-z]:[\\/]*) my_xabs=$my_xlib ;; + *) my_xabs=`pwd`"/$my_xlib" ;; + esac + func_basename "$my_xlib" + my_xlib=$func_basename_result + my_xlib_u=$my_xlib + while :; do + case " $extracted_archives " in + *" $my_xlib_u "*) + func_arith $extracted_serial + 1 + extracted_serial=$func_arith_result + my_xlib_u=lt$extracted_serial-$my_xlib ;; + *) break ;; + esac + done + extracted_archives="$extracted_archives $my_xlib_u" + my_xdir=$my_gentop/$my_xlib_u + + func_mkdir_p "$my_xdir" + + case $host in + *-darwin*) + func_verbose "Extracting $my_xabs" + # Do not bother doing anything if just a dry run + $opt_dry_run || { + darwin_orig_dir=`pwd` + cd $my_xdir || exit $? + darwin_archive=$my_xabs + darwin_curdir=`pwd` + func_basename "$darwin_archive" + darwin_base_archive=$func_basename_result + darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` + if test -n "$darwin_arches"; then + darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` + darwin_arch= + func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" + for darwin_arch in $darwin_arches; do + func_mkdir_p "unfat-$$/$darwin_base_archive-$darwin_arch" + $LIPO -thin $darwin_arch -output "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" "$darwin_archive" + cd "unfat-$$/$darwin_base_archive-$darwin_arch" + func_extract_an_archive "`pwd`" "$darwin_base_archive" + cd "$darwin_curdir" + $RM "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" + done # $darwin_arches + ## Okay now we've a bunch of thin objects, gotta fatten them up :) + darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$sed_basename" | sort -u` + darwin_file= + darwin_files= + for darwin_file in $darwin_filelist; do + darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` + $LIPO -create -output "$darwin_file" $darwin_files + done # $darwin_filelist + $RM -rf unfat-$$ + cd "$darwin_orig_dir" + else + cd $darwin_orig_dir + func_extract_an_archive "$my_xdir" "$my_xabs" + fi # $darwin_arches + } # !$opt_dry_run + ;; + *) + func_extract_an_archive "$my_xdir" "$my_xabs" + ;; + esac + my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` + done + + func_extract_archives_result=$my_oldobjs +} + + +# func_emit_wrapper [arg=no] +# +# Emit a libtool wrapper script on stdout. +# Don't directly open a file because we may want to +# incorporate the script contents within a cygwin/mingw +# wrapper executable. Must ONLY be called from within +# func_mode_link because it depends on a number of variables +# set therein. +# +# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR +# variable will take. If 'yes', then the emitted script +# will assume that the directory where it is stored is +# the $objdir directory. This is a cygwin/mingw-specific +# behavior. +func_emit_wrapper () +{ + func_emit_wrapper_arg1=${1-no} + + $ECHO "\ +#! $SHELL + +# $output - temporary wrapper script for $objdir/$outputname +# Generated by $PROGRAM (GNU $PACKAGE) $VERSION +# +# The $output program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='$sed_quote_subst' + +# Be Bourne compatible +if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command=\"$relink_command\" + +# This environment variable determines our operation mode. +if test \"\$libtool_install_magic\" = \"$magic\"; then + # install mode needs the following variables: + generated_by_libtool_version='$macro_version' + notinst_deplibs='$notinst_deplibs' +else + # When we are sourced in execute mode, \$file and \$ECHO are already set. + if test \"\$libtool_execute_magic\" != \"$magic\"; then + file=\"\$0\"" + + qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"` + $ECHO "\ + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + ECHO=\"$qECHO\" + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ that is used only on +# windows platforms, and (c) all begin with the string "--lt-" +# (application programs are unlikely to have options that match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's $0 value, followed by "$@". +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=\$0 + shift + for lt_opt + do + case \"\$lt_opt\" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` + test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. + lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` + cat \"\$lt_dump_D/\$lt_dump_F\" + exit 0 + ;; + --lt-*) + \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n \"\$lt_option_debug\"; then + echo \"$outputname:$output:\$LINENO: libtool wrapper (GNU $PACKAGE) $VERSION\" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + \$ECHO \"$outputname:$output:\$LINENO: newargv[\$lt_dump_args_N]: \$lt_arg\" + lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ +" + case $host in + # Backslashes separate directories on plain windows + *-*-mingw | *-*-os2* | *-cegcc*) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir\\\\\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} +" + ;; + + *) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir/\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir/\$program\" \${1+\"\$@\"} +" + ;; + esac + $ECHO "\ + \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from \$@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + case \" \$* \" in + *\\ --lt-*) + for lt_wr_arg + do + case \$lt_wr_arg in + --lt-*) ;; + *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; + esac + shift + done ;; + esac + func_exec_program_core \${1+\"\$@\"} +} + + # Parse options + func_parse_lt_options \"\$0\" \${1+\"\$@\"} + + # Find the directory that this script lives in. + thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` + test \"x\$thisdir\" = \"x\$file\" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` + while test -n \"\$file\"; do + destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` + + # If there was a directory component, then change thisdir. + if test \"x\$destdir\" != \"x\$file\"; then + case \"\$destdir\" in + [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; + *) thisdir=\"\$thisdir/\$destdir\" ;; + esac + fi + + file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` + file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` + done + + # Usually 'no', except on cygwin/mingw when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 + if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then + # special case for '.' + if test \"\$thisdir\" = \".\"; then + thisdir=\`pwd\` + fi + # remove .libs from thisdir + case \"\$thisdir\" in + *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; + $objdir ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=\`cd \"\$thisdir\" && pwd\` + test -n \"\$absdir\" && thisdir=\"\$absdir\" +" + + if test yes = "$fast_install"; then + $ECHO "\ + program=lt-'$outputname'$exeext + progdir=\"\$thisdir/$objdir\" + + if test ! -f \"\$progdir/\$program\" || + { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | $SED 1q\`; \\ + test \"X\$file\" != \"X\$progdir/\$program\"; }; then + + file=\"\$\$-\$program\" + + if test ! -d \"\$progdir\"; then + $MKDIR \"\$progdir\" + else + $RM \"\$progdir/\$file\" + fi" + + $ECHO "\ + + # relink executable if necessary + if test -n \"\$relink_command\"; then + if relink_command_output=\`eval \$relink_command 2>&1\`; then : + else + \$ECHO \"\$relink_command_output\" >&2 + $RM \"\$progdir/\$file\" + exit 1 + fi + fi + + $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || + { $RM \"\$progdir/\$program\"; + $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } + $RM \"\$progdir/\$file\" + fi" + else + $ECHO "\ + program='$outputname' + progdir=\"\$thisdir/$objdir\" +" + fi + + $ECHO "\ + + if test -f \"\$progdir/\$program\"; then" + + # fixup the dll searchpath if we need to. + # + # Fix the DLL searchpath if we need to. Do this before prepending + # to shlibpath, because on Windows, both are PATH and uninstalled + # libraries must come first. + if test -n "$dllsearchpath"; then + $ECHO "\ + # Add the dll search path components to the executable PATH + PATH=$dllsearchpath:\$PATH +" + fi + + # Export our shlibpath_var if we have one. + if test yes = "$shlibpath_overrides_runpath" && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + $ECHO "\ + # Add our own library path to $shlibpath_var + $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" + + # Some systems cannot cope with colon-terminated $shlibpath_var + # The second colon is a workaround for a bug in BeOS R4 sed + $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` + + export $shlibpath_var +" + fi + + $ECHO "\ + if test \"\$libtool_execute_magic\" != \"$magic\"; then + # Run the actual program with our arguments. + func_exec_program \${1+\"\$@\"} + fi + else + # The program doesn't exist. + \$ECHO \"\$0: error: '\$progdir/\$program' does not exist\" 1>&2 + \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 + \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 + exit 1 + fi +fi\ +" +} + + +# func_emit_cwrapperexe_src +# emit the source code for a wrapper executable on stdout +# Must ONLY be called from within func_mode_link because +# it depends on a number of variable set therein. +func_emit_cwrapperexe_src () +{ + cat < +#include +#ifdef _MSC_VER +# include +# include +# include +#else +# include +# include +# ifdef __CYGWIN__ +# include +# endif +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) + +/* declarations of non-ANSI functions */ +#if defined __MINGW32__ +# ifdef __STRICT_ANSI__ +int _putenv (const char *); +# endif +#elif defined __CYGWIN__ +# ifdef __STRICT_ANSI__ +char *realpath (const char *, char *); +int putenv (char *); +int setenv (const char *, const char *, int); +# endif +/* #elif defined other_platform || defined ... */ +#endif + +/* portability defines, excluding path handling macros */ +#if defined _MSC_VER +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +# define S_IXUSR _S_IEXEC +#elif defined __MINGW32__ +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +#elif defined __CYGWIN__ +# define HAVE_SETENV +# define FOPEN_WB "wb" +/* #elif defined other platforms ... */ +#endif + +#if defined PATH_MAX +# define LT_PATHMAX PATH_MAX +#elif defined MAXPATHLEN +# define LT_PATHMAX MAXPATHLEN +#else +# define LT_PATHMAX 1024 +#endif + +#ifndef S_IXOTH +# define S_IXOTH 0 +#endif +#ifndef S_IXGRP +# define S_IXGRP 0 +#endif + +/* path handling portability macros */ +#ifndef DIR_SEPARATOR +# define DIR_SEPARATOR '/' +# define PATH_SEPARATOR ':' +#endif + +#if defined _WIN32 || defined __MSDOS__ || defined __DJGPP__ || \ + defined __OS2__ +# define HAVE_DOS_BASED_FILE_SYSTEM +# define FOPEN_WB "wb" +# ifndef DIR_SEPARATOR_2 +# define DIR_SEPARATOR_2 '\\' +# endif +# ifndef PATH_SEPARATOR_2 +# define PATH_SEPARATOR_2 ';' +# endif +#endif + +#ifndef DIR_SEPARATOR_2 +# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) +#else /* DIR_SEPARATOR_2 */ +# define IS_DIR_SEPARATOR(ch) \ + (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) +#endif /* DIR_SEPARATOR_2 */ + +#ifndef PATH_SEPARATOR_2 +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) +#else /* PATH_SEPARATOR_2 */ +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) +#endif /* PATH_SEPARATOR_2 */ + +#ifndef FOPEN_WB +# define FOPEN_WB "w" +#endif +#ifndef _O_BINARY +# define _O_BINARY 0 +#endif + +#define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) +#define XFREE(stale) do { \ + if (stale) { free (stale); stale = 0; } \ +} while (0) + +#if defined LT_DEBUGWRAPPER +static int lt_debug = 1; +#else +static int lt_debug = 0; +#endif + +const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ + +void *xmalloc (size_t num); +char *xstrdup (const char *string); +const char *base_name (const char *name); +char *find_executable (const char *wrapper); +char *chase_symlinks (const char *pathspec); +int make_executable (const char *path); +int check_executable (const char *path); +char *strendzap (char *str, const char *pat); +void lt_debugprintf (const char *file, int line, const char *fmt, ...); +void lt_fatal (const char *file, int line, const char *message, ...); +static const char *nonnull (const char *s); +static const char *nonempty (const char *s); +void lt_setenv (const char *name, const char *value); +char *lt_extend_str (const char *orig_value, const char *add, int to_end); +void lt_update_exe_path (const char *name, const char *value); +void lt_update_lib_path (const char *name, const char *value); +char **prepare_spawn (char **argv); +void lt_dump_script (FILE *f); +EOF + + cat <= 0) + && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) + return 1; + else + return 0; +} + +int +make_executable (const char *path) +{ + int rval = 0; + struct stat st; + + lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", + nonempty (path)); + if ((!path) || (!*path)) + return 0; + + if (stat (path, &st) >= 0) + { + rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); + } + return rval; +} + +/* Searches for the full path of the wrapper. Returns + newly allocated full path name if found, NULL otherwise + Does not chase symlinks, even on platforms that support them. +*/ +char * +find_executable (const char *wrapper) +{ + int has_slash = 0; + const char *p; + const char *p_next; + /* static buffer for getcwd */ + char tmp[LT_PATHMAX + 1]; + size_t tmp_len; + char *concat_name; + + lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", + nonempty (wrapper)); + + if ((wrapper == NULL) || (*wrapper == '\0')) + return NULL; + + /* Absolute path? */ +#if defined HAVE_DOS_BASED_FILE_SYSTEM + if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + else + { +#endif + if (IS_DIR_SEPARATOR (wrapper[0])) + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } +#if defined HAVE_DOS_BASED_FILE_SYSTEM + } +#endif + + for (p = wrapper; *p; p++) + if (*p == '/') + { + has_slash = 1; + break; + } + if (!has_slash) + { + /* no slashes; search PATH */ + const char *path = getenv ("PATH"); + if (path != NULL) + { + for (p = path; *p; p = p_next) + { + const char *q; + size_t p_len; + for (q = p; *q; q++) + if (IS_PATH_SEPARATOR (*q)) + break; + p_len = (size_t) (q - p); + p_next = (*q == '\0' ? q : q + 1); + if (p_len == 0) + { + /* empty path: current directory */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = + XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + } + else + { + concat_name = + XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, p, p_len); + concat_name[p_len] = '/'; + strcpy (concat_name + p_len + 1, wrapper); + } + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + } + /* not found in PATH; assume curdir */ + } + /* Relative path | not found in path: prepend cwd */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + return NULL; +} + +char * +chase_symlinks (const char *pathspec) +{ +#ifndef S_ISLNK + return xstrdup (pathspec); +#else + char buf[LT_PATHMAX]; + struct stat s; + char *tmp_pathspec = xstrdup (pathspec); + char *p; + int has_symlinks = 0; + while (strlen (tmp_pathspec) && !has_symlinks) + { + lt_debugprintf (__FILE__, __LINE__, + "checking path component for symlinks: %s\n", + tmp_pathspec); + if (lstat (tmp_pathspec, &s) == 0) + { + if (S_ISLNK (s.st_mode) != 0) + { + has_symlinks = 1; + break; + } + + /* search backwards for last DIR_SEPARATOR */ + p = tmp_pathspec + strlen (tmp_pathspec) - 1; + while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + p--; + if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + { + /* no more DIR_SEPARATORS left */ + break; + } + *p = '\0'; + } + else + { + lt_fatal (__FILE__, __LINE__, + "error accessing file \"%s\": %s", + tmp_pathspec, nonnull (strerror (errno))); + } + } + XFREE (tmp_pathspec); + + if (!has_symlinks) + { + return xstrdup (pathspec); + } + + tmp_pathspec = realpath (pathspec, buf); + if (tmp_pathspec == 0) + { + lt_fatal (__FILE__, __LINE__, + "could not follow symlinks for %s", pathspec); + } + return xstrdup (tmp_pathspec); +#endif +} + +char * +strendzap (char *str, const char *pat) +{ + size_t len, patlen; + + assert (str != NULL); + assert (pat != NULL); + + len = strlen (str); + patlen = strlen (pat); + + if (patlen <= len) + { + str += len - patlen; + if (STREQ (str, pat)) + *str = '\0'; + } + return str; +} + +void +lt_debugprintf (const char *file, int line, const char *fmt, ...) +{ + va_list args; + if (lt_debug) + { + (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); + va_start (args, fmt); + (void) vfprintf (stderr, fmt, args); + va_end (args); + } +} + +static void +lt_error_core (int exit_status, const char *file, + int line, const char *mode, + const char *message, va_list ap) +{ + fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); + vfprintf (stderr, message, ap); + fprintf (stderr, ".\n"); + + if (exit_status >= 0) + exit (exit_status); +} + +void +lt_fatal (const char *file, int line, const char *message, ...) +{ + va_list ap; + va_start (ap, message); + lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); + va_end (ap); +} + +static const char * +nonnull (const char *s) +{ + return s ? s : "(null)"; +} + +static const char * +nonempty (const char *s) +{ + return (s && !*s) ? "(empty)" : nonnull (s); +} + +void +lt_setenv (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_setenv) setting '%s' to '%s'\n", + nonnull (name), nonnull (value)); + { +#ifdef HAVE_SETENV + /* always make a copy, for consistency with !HAVE_SETENV */ + char *str = xstrdup (value); + setenv (name, str, 1); +#else + size_t len = strlen (name) + 1 + strlen (value) + 1; + char *str = XMALLOC (char, len); + sprintf (str, "%s=%s", name, value); + if (putenv (str) != EXIT_SUCCESS) + { + XFREE (str); + } +#endif + } +} + +char * +lt_extend_str (const char *orig_value, const char *add, int to_end) +{ + char *new_value; + if (orig_value && *orig_value) + { + size_t orig_value_len = strlen (orig_value); + size_t add_len = strlen (add); + new_value = XMALLOC (char, add_len + orig_value_len + 1); + if (to_end) + { + strcpy (new_value, orig_value); + strcpy (new_value + orig_value_len, add); + } + else + { + strcpy (new_value, add); + strcpy (new_value + add_len, orig_value); + } + } + else + { + new_value = xstrdup (add); + } + return new_value; +} + +void +lt_update_exe_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + /* some systems can't cope with a ':'-terminated path #' */ + size_t len = strlen (new_value); + while ((len > 0) && IS_PATH_SEPARATOR (new_value[len-1])) + { + new_value[--len] = '\0'; + } + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +void +lt_update_lib_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +EOF + case $host_os in + mingw*) + cat <<"EOF" + +/* Prepares an argument vector before calling spawn(). + Note that spawn() does not by itself call the command interpreter + (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : + ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&v); + v.dwPlatformId == VER_PLATFORM_WIN32_NT; + }) ? "cmd.exe" : "command.com"). + Instead it simply concatenates the arguments, separated by ' ', and calls + CreateProcess(). We must quote the arguments since Win32 CreateProcess() + interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a + special way: + - Space and tab are interpreted as delimiters. They are not treated as + delimiters if they are surrounded by double quotes: "...". + - Unescaped double quotes are removed from the input. Their only effect is + that within double quotes, space and tab are treated like normal + characters. + - Backslashes not followed by double quotes are not special. + - But 2*n+1 backslashes followed by a double quote become + n backslashes followed by a double quote (n >= 0): + \" -> " + \\\" -> \" + \\\\\" -> \\" + */ +#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +char ** +prepare_spawn (char **argv) +{ + size_t argc; + char **new_argv; + size_t i; + + /* Count number of arguments. */ + for (argc = 0; argv[argc] != NULL; argc++) + ; + + /* Allocate new argument vector. */ + new_argv = XMALLOC (char *, argc + 1); + + /* Put quoted arguments into the new argument vector. */ + for (i = 0; i < argc; i++) + { + const char *string = argv[i]; + + if (string[0] == '\0') + new_argv[i] = xstrdup ("\"\""); + else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) + { + int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); + size_t length; + unsigned int backslashes; + const char *s; + char *quoted_string; + char *p; + + length = 0; + backslashes = 0; + if (quote_around) + length++; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + length += backslashes + 1; + length++; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + length += backslashes + 1; + + quoted_string = XMALLOC (char, length + 1); + + p = quoted_string; + backslashes = 0; + if (quote_around) + *p++ = '"'; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + { + unsigned int j; + for (j = backslashes + 1; j > 0; j--) + *p++ = '\\'; + } + *p++ = c; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + { + unsigned int j; + for (j = backslashes; j > 0; j--) + *p++ = '\\'; + *p++ = '"'; + } + *p = '\0'; + + new_argv[i] = quoted_string; + } + else + new_argv[i] = (char *) string; + } + new_argv[argc] = NULL; + + return new_argv; +} +EOF + ;; + esac + + cat <<"EOF" +void lt_dump_script (FILE* f) +{ +EOF + func_emit_wrapper yes | + $SED -n -e ' +s/^\(.\{79\}\)\(..*\)/\1\ +\2/ +h +s/\([\\"]\)/\\\1/g +s/$/\\n/ +s/\([^\n]*\).*/ fputs ("\1", f);/p +g +D' + cat <<"EOF" +} +EOF +} +# end: func_emit_cwrapperexe_src + +# func_win32_import_lib_p ARG +# True if ARG is an import lib, as indicated by $file_magic_cmd +func_win32_import_lib_p () +{ + $debug_cmd + + case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in + *import*) : ;; + *) false ;; + esac +} + +# func_suncc_cstd_abi +# !!ONLY CALL THIS FOR SUN CC AFTER $compile_command IS FULLY EXPANDED!! +# Several compiler flags select an ABI that is incompatible with the +# Cstd library. Avoid specifying it if any are in CXXFLAGS. +func_suncc_cstd_abi () +{ + $debug_cmd + + case " $compile_command " in + *" -compat=g "*|*\ -std=c++[0-9][0-9]\ *|*" -library=stdcxx4 "*|*" -library=stlport4 "*) + suncc_use_cstd_abi=no + ;; + *) + suncc_use_cstd_abi=yes + ;; + esac +} + +# func_mode_link arg... +func_mode_link () +{ + $debug_cmd + + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + # It is impossible to link a dll without this setting, and + # we shouldn't force the makefile maintainer to figure out + # what system we are compiling for in order to pass an extra + # flag for every libtool invocation. + # allow_undefined=no + + # FIXME: Unfortunately, there are problems with the above when trying + # to make a dll that has undefined symbols, in which case not + # even a static library is built. For now, we need to specify + # -no-undefined on the libtool link line when we can be certain + # that all symbols are satisfied, otherwise we get a static library. + allow_undefined=yes + ;; + *) + allow_undefined=yes + ;; + esac + libtool_args=$nonopt + base_compile="$nonopt $@" + compile_command=$nonopt + finalize_command=$nonopt + + compile_rpath= + finalize_rpath= + compile_shlibpath= + finalize_shlibpath= + convenience= + old_convenience= + deplibs= + old_deplibs= + compiler_flags= + linker_flags= + dllsearchpath= + lib_search_path=`pwd` + inst_prefix_dir= + new_inherited_linker_flags= + + avoid_version=no + bindir= + dlfiles= + dlprefiles= + dlself=no + export_dynamic=no + export_symbols= + export_symbols_regex= + generated= + libobjs= + ltlibs= + module=no + no_install=no + objs= + os2dllname= + non_pic_objects= + precious_files_regex= + prefer_static_libs=no + preload=false + prev= + prevarg= + release= + rpath= + xrpath= + perm_rpath= + temp_rpath= + thread_safe=no + vinfo= + vinfo_number=no + weak_libs= + single_module=$wl-single_module + func_infer_tag $base_compile + + # We need to know -static, to get the right output filenames. + for arg + do + case $arg in + -shared) + test yes != "$build_libtool_libs" \ + && func_fatal_configuration "cannot build a shared library" + build_old_libs=no + break + ;; + -all-static | -static | -static-libtool-libs) + case $arg in + -all-static) + if test yes = "$build_libtool_libs" && test -z "$link_static_flag"; then + func_warning "complete static linking is impossible in this configuration" + fi + if test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + -static) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=built + ;; + -static-libtool-libs) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + esac + build_libtool_libs=no + build_old_libs=yes + break + ;; + esac + done + + # See if our shared archives depend on static archives. + test -n "$old_archive_from_new_cmds" && build_old_libs=yes + + # Go through the arguments, transforming them on the way. + while test "$#" -gt 0; do + arg=$1 + shift + func_quote_for_eval "$arg" + qarg=$func_quote_for_eval_unquoted_result + func_append libtool_args " $func_quote_for_eval_result" + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + case $prev in + output) + func_append compile_command " @OUTPUT@" + func_append finalize_command " @OUTPUT@" + ;; + esac + + case $prev in + bindir) + bindir=$arg + prev= + continue + ;; + dlfiles|dlprefiles) + $preload || { + # Add the symbol object into the linking commands. + func_append compile_command " @SYMFILE@" + func_append finalize_command " @SYMFILE@" + preload=: + } + case $arg in + *.la | *.lo) ;; # We handle these cases below. + force) + if test no = "$dlself"; then + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + self) + if test dlprefiles = "$prev"; then + dlself=yes + elif test dlfiles = "$prev" && test yes != "$dlopen_self"; then + dlself=yes + else + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + *) + if test dlfiles = "$prev"; then + func_append dlfiles " $arg" + else + func_append dlprefiles " $arg" + fi + prev= + continue + ;; + esac + ;; + expsyms) + export_symbols=$arg + test -f "$arg" \ + || func_fatal_error "symbol file '$arg' does not exist" + prev= + continue + ;; + expsyms_regex) + export_symbols_regex=$arg + prev= + continue + ;; + framework) + case $host in + *-*-darwin*) + case "$deplibs " in + *" $qarg.ltframework "*) ;; + *) func_append deplibs " $qarg.ltframework" # this is fixed later + ;; + esac + ;; + esac + prev= + continue + ;; + inst_prefix) + inst_prefix_dir=$arg + prev= + continue + ;; + mllvm) + # Clang does not use LLVM to link, so we can simply discard any + # '-mllvm $arg' options when doing the link step. + prev= + continue + ;; + objectlist) + if test -f "$arg"; then + save_arg=$arg + moreargs= + for fil in `cat "$save_arg"` + do +# func_append moreargs " $fil" + arg=$fil + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test none = "$pic_object" && + test none = "$non_pic_object"; then + func_fatal_error "cannot find name of object for '$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + if test none != "$pic_object"; then + # Prepend the subdirectory the object is found in. + pic_object=$xdir$pic_object + + if test dlfiles = "$prev"; then + if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test dlprefiles = "$prev"; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg=$pic_object + fi + + # Non-PIC object. + if test none != "$non_pic_object"; then + # Prepend the subdirectory the object is found in. + non_pic_object=$xdir$non_pic_object + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test none = "$pic_object"; then + arg=$non_pic_object + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object=$pic_object + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "'$arg' is not a valid libtool object" + fi + fi + done + else + func_fatal_error "link input file '$arg' does not exist" + fi + arg=$save_arg + prev= + continue + ;; + os2dllname) + os2dllname=$arg + prev= + continue + ;; + precious_regex) + precious_files_regex=$arg + prev= + continue + ;; + release) + release=-$arg + prev= + continue + ;; + rpath | xrpath) + # We need an absolute path. + case $arg in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + if test rpath = "$prev"; then + case "$rpath " in + *" $arg "*) ;; + *) func_append rpath " $arg" ;; + esac + else + case "$xrpath " in + *" $arg "*) ;; + *) func_append xrpath " $arg" ;; + esac + fi + prev= + continue + ;; + shrext) + shrext_cmds=$arg + prev= + continue + ;; + weak) + func_append weak_libs " $arg" + prev= + continue + ;; + xcclinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xcompiler) + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xlinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $wl$qarg" + prev= + func_append compile_command " $wl$qarg" + func_append finalize_command " $wl$qarg" + continue + ;; + *) + eval "$prev=\"\$arg\"" + prev= + continue + ;; + esac + fi # test -n "$prev" + + prevarg=$arg + + case $arg in + -all-static) + if test -n "$link_static_flag"; then + # See comment for -static flag below, for more details. + func_append compile_command " $link_static_flag" + func_append finalize_command " $link_static_flag" + fi + continue + ;; + + -allow-undefined) + # FIXME: remove this flag sometime in the future. + func_fatal_error "'-allow-undefined' must not be used because it is the default" + ;; + + -avoid-version) + avoid_version=yes + continue + ;; + + -bindir) + prev=bindir + continue + ;; + + -dlopen) + prev=dlfiles + continue + ;; + + -dlpreopen) + prev=dlprefiles + continue + ;; + + -export-dynamic) + export_dynamic=yes + continue + ;; + + -export-symbols | -export-symbols-regex) + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + func_fatal_error "more than one -exported-symbols argument is not allowed" + fi + if test X-export-symbols = "X$arg"; then + prev=expsyms + else + prev=expsyms_regex + fi + continue + ;; + + -framework) + prev=framework + continue + ;; + + -inst-prefix-dir) + prev=inst_prefix + continue + ;; + + # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* + # so, if we see these flags be careful not to treat them like -L + -L[A-Z][A-Z]*:*) + case $with_gcc/$host in + no/*-*-irix* | /*-*-irix*) + func_append compile_command " $arg" + func_append finalize_command " $arg" + ;; + esac + continue + ;; + + -L*) + func_stripname "-L" '' "$arg" + if test -z "$func_stripname_result"; then + if test "$#" -gt 0; then + func_fatal_error "require no space between '-L' and '$1'" + else + func_fatal_error "need path for '-L' option" + fi + fi + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + absdir=`cd "$dir" && pwd` + test -z "$absdir" && \ + func_fatal_error "cannot determine absolute directory name of '$dir'" + dir=$absdir + ;; + esac + case "$deplibs " in + *" -L$dir "* | *" $arg "*) + # Will only happen for absolute or sysroot arguments + ;; + *) + # Preserve sysroot, but never include relative directories + case $dir in + [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; + *) func_append deplibs " -L$dir" ;; + esac + func_append lib_search_path " $dir" + ;; + esac + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$dir:"*) ;; + ::) dllsearchpath=$dir;; + *) func_append dllsearchpath ":$dir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + continue + ;; + + -l*) + if test X-lc = "X$arg" || test X-lm = "X$arg"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) + # These systems don't actually have a C or math library (as such) + continue + ;; + *-*-os2*) + # These systems don't actually have a C library (as such) + test X-lc = "X$arg" && continue + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig*) + # Do not include libc due to us having libc/libc_r. + test X-lc = "X$arg" && continue + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C and math libraries are in the System framework + func_append deplibs " System.ltframework" + continue + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + test X-lc = "X$arg" && continue + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + test X-lc = "X$arg" && continue + ;; + esac + elif test X-lc_r = "X$arg"; then + case $host in + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig*) + # Do not include libc_r directly, use -pthread flag. + continue + ;; + esac + fi + func_append deplibs " $arg" + continue + ;; + + -mllvm) + prev=mllvm + continue + ;; + + -module) + module=yes + continue + ;; + + # Tru64 UNIX uses -model [arg] to determine the layout of C++ + # classes, name mangling, and exception handling. + # Darwin uses the -arch flag to determine output architecture. + -model|-arch|-isysroot|--sysroot) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + prev=xcompiler + continue + ;; + + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + continue + ;; + + -multi_module) + single_module=$wl-multi_module + continue + ;; + + -no-fast-install) + fast_install=no + continue + ;; + + -no-install) + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) + # The PATH hackery in wrapper scripts is required on Windows + # and Darwin in order for the loader to find any dlls it needs. + func_warning "'-no-install' is ignored for $host" + func_warning "assuming '-no-fast-install' instead" + fast_install=no + ;; + *) no_install=yes ;; + esac + continue + ;; + + -no-undefined) + allow_undefined=no + continue + ;; + + -objectlist) + prev=objectlist + continue + ;; + + -os2dllname) + prev=os2dllname + continue + ;; + + -o) prev=output ;; + + -precious-files-regex) + prev=precious_regex + continue + ;; + + -release) + prev=release + continue + ;; + + -rpath) + prev=rpath + continue + ;; + + -R) + prev=xrpath + continue + ;; + + -R*) + func_stripname '-R' '' "$arg" + dir=$func_stripname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + =*) + func_stripname '=' '' "$dir" + dir=$lt_sysroot$func_stripname_result + ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + continue + ;; + + -shared) + # The effects of -shared are defined in a previous loop. + continue + ;; + + -shrext) + prev=shrext + continue + ;; + + -static | -static-libtool-libs) + # The effects of -static are defined in a previous loop. + # We used to do the same as -all-static on platforms that + # didn't have a PIC flag, but the assumption that the effects + # would be equivalent was wrong. It would break on at least + # Digital Unix and AIX. + continue + ;; + + -thread-safe) + thread_safe=yes + continue + ;; + + -version-info) + prev=vinfo + continue + ;; + + -version-number) + prev=vinfo + vinfo_number=yes + continue + ;; + + -weak) + prev=weak + continue + ;; + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs=$IFS; IFS=, + for flag in $args; do + IFS=$save_ifs + func_quote_for_eval "$flag" + func_append arg " $func_quote_for_eval_result" + func_append compiler_flags " $func_quote_for_eval_result" + done + IFS=$save_ifs + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Wl,*) + func_stripname '-Wl,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs=$IFS; IFS=, + for flag in $args; do + IFS=$save_ifs + func_quote_for_eval "$flag" + func_append arg " $wl$func_quote_for_eval_result" + func_append compiler_flags " $wl$func_quote_for_eval_result" + func_append linker_flags " $func_quote_for_eval_result" + done + IFS=$save_ifs + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Xlinker) + prev=xlinker + continue + ;; + + -XCClinker) + prev=xcclinker + continue + ;; + + # -msg_* for osf cc + -msg_*) + func_quote_for_eval "$arg" + arg=$func_quote_for_eval_result + ;; + + # Flags to be passed through unchanged, with rationale: + # -64, -mips[0-9] enable 64-bit mode for the SGI compiler + # -r[0-9][0-9]* specify processor for the SGI compiler + # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler + # +DA*, +DD* enable 64-bit mode for the HP compiler + # -q* compiler args for the IBM compiler + # -m*, -t[45]*, -txscale* architecture-specific flags for GCC + # -F/path path to uninstalled frameworks, gcc on darwin + # -p, -pg, --coverage, -fprofile-* profiling flags for GCC + # -fstack-protector* stack protector flags for GCC + # @file GCC response files + # -tp=* Portland pgcc target processor selection + # --sysroot=* for sysroot support + # -O*, -g*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization + # -specs=* GCC specs files + # -stdlib=* select c++ std lib with clang + -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ + -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ + -O*|-g*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-stdlib=*| \ + -specs=*) + func_quote_for_eval "$arg" + arg=$func_quote_for_eval_result + func_append compile_command " $arg" + func_append finalize_command " $arg" + func_append compiler_flags " $arg" + continue + ;; + + -Z*) + if test os2 = "`expr $host : '.*\(os2\)'`"; then + # OS/2 uses -Zxxx to specify OS/2-specific options + compiler_flags="$compiler_flags $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case $arg in + -Zlinker | -Zstack) + prev=xcompiler + ;; + esac + continue + else + # Otherwise treat like 'Some other compiler flag' below + func_quote_for_eval "$arg" + arg=$func_quote_for_eval_result + fi + ;; + + # Some other compiler flag. + -* | +*) + func_quote_for_eval "$arg" + arg=$func_quote_for_eval_result + ;; + + *.$objext) + # A standard object. + func_append objs " $arg" + ;; + + *.lo) + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test none = "$pic_object" && + test none = "$non_pic_object"; then + func_fatal_error "cannot find name of object for '$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + test none = "$pic_object" || { + # Prepend the subdirectory the object is found in. + pic_object=$xdir$pic_object + + if test dlfiles = "$prev"; then + if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test dlprefiles = "$prev"; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg=$pic_object + } + + # Non-PIC object. + if test none != "$non_pic_object"; then + # Prepend the subdirectory the object is found in. + non_pic_object=$xdir$non_pic_object + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test none = "$pic_object"; then + arg=$non_pic_object + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object=$pic_object + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "'$arg' is not a valid libtool object" + fi + fi + ;; + + *.$libext) + # An archive. + func_append deplibs " $arg" + func_append old_deplibs " $arg" + continue + ;; + + *.la) + # A libtool-controlled library. + + func_resolve_sysroot "$arg" + if test dlfiles = "$prev"; then + # This library was specified with -dlopen. + func_append dlfiles " $func_resolve_sysroot_result" + prev= + elif test dlprefiles = "$prev"; then + # The library was specified with -dlpreopen. + func_append dlprefiles " $func_resolve_sysroot_result" + prev= + else + func_append deplibs " $func_resolve_sysroot_result" + fi + continue + ;; + + # Some other compiler argument. + *) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + func_quote_for_eval "$arg" + arg=$func_quote_for_eval_result + ;; + esac # arg + + # Now actually substitute the argument into the commands. + if test -n "$arg"; then + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + done # argument parsing loop + + test -n "$prev" && \ + func_fatal_help "the '$prevarg' option requires an argument" + + if test yes = "$export_dynamic" && test -n "$export_dynamic_flag_spec"; then + eval arg=\"$export_dynamic_flag_spec\" + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + + oldlibs= + # calculate the name of the file, without its directory + func_basename "$output" + outputname=$func_basename_result + libobjs_save=$libobjs + + if test -n "$shlibpath_var"; then + # get the directories listed in $shlibpath_var + eval shlib_search_path=\`\$ECHO \"\$$shlibpath_var\" \| \$SED \'s/:/ /g\'\` + else + shlib_search_path= + fi + eval sys_lib_search_path=\"$sys_lib_search_path_spec\" + eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" + + # Definition is injected by LT_CONFIG during libtool generation. + func_munge_path_list sys_lib_dlsearch_path "$LT_SYS_LIBRARY_PATH" + + func_dirname "$output" "/" "" + output_objdir=$func_dirname_result$objdir + func_to_tool_file "$output_objdir/" + tool_output_objdir=$func_to_tool_file_result + # Create the object directory. + func_mkdir_p "$output_objdir" + + # Determine the type of output + case $output in + "") + func_fatal_help "you must specify an output file" + ;; + *.$libext) linkmode=oldlib ;; + *.lo | *.$objext) linkmode=obj ;; + *.la) linkmode=lib ;; + *) linkmode=prog ;; # Anything else should be a program. + esac + + specialdeplibs= + + libs= + # Find all interdependent deplibs by searching for libraries + # that are linked more than once (e.g. -la -lb -la) + for deplib in $deplibs; do + if $opt_preserve_dup_deps; then + case "$libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append libs " $deplib" + done + + if test lib = "$linkmode"; then + libs="$predeps $libs $compiler_lib_search_path $postdeps" + + # Compute libraries that are listed more than once in $predeps + # $postdeps and mark them as special (i.e., whose duplicates are + # not to be eliminated). + pre_post_deps= + if $opt_duplicate_compiler_generated_deps; then + for pre_post_dep in $predeps $postdeps; do + case "$pre_post_deps " in + *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; + esac + func_append pre_post_deps " $pre_post_dep" + done + fi + pre_post_deps= + fi + + deplibs= + newdependency_libs= + newlib_search_path= + need_relink=no # whether we're linking any uninstalled libtool libraries + notinst_deplibs= # not-installed libtool libraries + notinst_path= # paths that contain not-installed libtool libraries + + case $linkmode in + lib) + passes="conv dlpreopen link" + for file in $dlfiles $dlprefiles; do + case $file in + *.la) ;; + *) + func_fatal_help "libraries can '-dlopen' only libtool libraries: $file" + ;; + esac + done + ;; + prog) + compile_deplibs= + finalize_deplibs= + alldeplibs=false + newdlfiles= + newdlprefiles= + passes="conv scan dlopen dlpreopen link" + ;; + *) passes="conv" + ;; + esac + + for pass in $passes; do + # The preopen pass in lib mode reverses $deplibs; put it back here + # so that -L comes before libs that need it for instance... + if test lib,link = "$linkmode,$pass"; then + ## FIXME: Find the place where the list is rebuilt in the wrong + ## order, and fix it there properly + tmp_deplibs= + for deplib in $deplibs; do + tmp_deplibs="$deplib $tmp_deplibs" + done + deplibs=$tmp_deplibs + fi + + if test lib,link = "$linkmode,$pass" || + test prog,scan = "$linkmode,$pass"; then + libs=$deplibs + deplibs= + fi + if test prog = "$linkmode"; then + case $pass in + dlopen) libs=$dlfiles ;; + dlpreopen) libs=$dlprefiles ;; + link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; + esac + fi + if test lib,dlpreopen = "$linkmode,$pass"; then + # Collect and forward deplibs of preopened libtool libs + for lib in $dlprefiles; do + # Ignore non-libtool-libs + dependency_libs= + func_resolve_sysroot "$lib" + case $lib in + *.la) func_source "$func_resolve_sysroot_result" ;; + esac + + # Collect preopened libtool deplibs, except any this library + # has declared as weak libs + for deplib in $dependency_libs; do + func_basename "$deplib" + deplib_base=$func_basename_result + case " $weak_libs " in + *" $deplib_base "*) ;; + *) func_append deplibs " $deplib" ;; + esac + done + done + libs=$dlprefiles + fi + if test dlopen = "$pass"; then + # Collect dlpreopened libraries + save_deplibs=$deplibs + deplibs= + fi + + for deplib in $libs; do + lib= + found=false + case $deplib in + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append compiler_flags " $deplib" + if test lib = "$linkmode"; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -l*) + if test lib != "$linkmode" && test prog != "$linkmode"; then + func_warning "'-l' is ignored for archives/objects" + continue + fi + func_stripname '-l' '' "$deplib" + name=$func_stripname_result + if test lib = "$linkmode"; then + searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" + else + searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" + fi + for searchdir in $searchdirs; do + for search_ext in .la $std_shrext .so .a; do + # Search the libtool library + lib=$searchdir/lib$name$search_ext + if test -f "$lib"; then + if test .la = "$search_ext"; then + found=: + else + found=false + fi + break 2 + fi + done + done + if $found; then + # deplib is a libtool library + # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, + # We need to do some special things here, and not later. + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + case " $predeps $postdeps " in + *" $deplib "*) + if func_lalib_p "$lib"; then + library_names= + old_library= + func_source "$lib" + for l in $old_library $library_names; do + ll=$l + done + if test "X$ll" = "X$old_library"; then # only static version available + found=false + func_dirname "$lib" "" "." + ladir=$func_dirname_result + lib=$ladir/$old_library + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + fi + ;; + *) ;; + esac + fi + else + # deplib doesn't seem to be a libtool library + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + ;; # -l + *.ltframework) + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + if test lib = "$linkmode"; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -L*) + case $linkmode in + lib) + deplibs="$deplib $deplibs" + test conv = "$pass" && continue + newdependency_libs="$deplib $newdependency_libs" + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + prog) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + continue + fi + if test scan = "$pass"; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + *) + func_warning "'-L' is ignored for archives/objects" + ;; + esac # linkmode + continue + ;; # -L + -R*) + if test link = "$pass"; then + func_stripname '-R' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # Make sure the xrpath contains only unique directories. + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + fi + deplibs="$deplib $deplibs" + continue + ;; + *.la) + func_resolve_sysroot "$deplib" + lib=$func_resolve_sysroot_result + ;; + *.$libext) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + continue + fi + case $linkmode in + lib) + # Linking convenience modules into shared libraries is allowed, + # but linking other static libraries is non-portable. + case " $dlpreconveniencelibs " in + *" $deplib "*) ;; + *) + valid_a_lib=false + case $deplibs_check_method in + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ + | $EGREP "$match_pattern_regex" > /dev/null; then + valid_a_lib=: + fi + ;; + pass_all) + valid_a_lib=: + ;; + esac + if $valid_a_lib; then + echo + $ECHO "*** Warning: Linking the shared library $output against the" + $ECHO "*** static library $deplib is not portable!" + deplibs="$deplib $deplibs" + else + echo + $ECHO "*** Warning: Trying to link with static lib archive $deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because the file extensions .$libext of this argument makes me believe" + echo "*** that it is just a static archive that I should not use here." + fi + ;; + esac + continue + ;; + prog) + if test link != "$pass"; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + continue + ;; + esac # linkmode + ;; # *.$libext + *.lo | *.$objext) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + elif test prog = "$linkmode"; then + if test dlpreopen = "$pass" || test yes != "$dlopen_support" || test no = "$build_libtool_libs"; then + # If there is no dlopen support or we're linking statically, + # we need to preload. + func_append newdlprefiles " $deplib" + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append newdlfiles " $deplib" + fi + fi + continue + ;; + %DEPLIBS%) + alldeplibs=: + continue + ;; + esac # case $deplib + + $found || test -f "$lib" \ + || func_fatal_error "cannot find the library '$lib' or unhandled argument '$deplib'" + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$lib" \ + || func_fatal_error "'$lib' is not a valid libtool archive" + + func_dirname "$lib" "" "." + ladir=$func_dirname_result + + dlname= + dlopen= + dlpreopen= + libdir= + library_names= + old_library= + inherited_linker_flags= + # If the library was installed with an old release of libtool, + # it will not redefine variables installed, or shouldnotlink + installed=yes + shouldnotlink=no + avoidtemprpath= + + + # Read the .la file + func_source "$lib" + + # Convert "-framework foo" to "foo.ltframework" + if test -n "$inherited_linker_flags"; then + tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` + for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do + case " $new_inherited_linker_flags " in + *" $tmp_inherited_linker_flag "*) ;; + *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; + esac + done + fi + dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + if test lib,link = "$linkmode,$pass" || + test prog,scan = "$linkmode,$pass" || + { test prog != "$linkmode" && test lib != "$linkmode"; }; then + test -n "$dlopen" && func_append dlfiles " $dlopen" + test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" + fi + + if test conv = "$pass"; then + # Only check for convenience libraries + deplibs="$lib $deplibs" + if test -z "$libdir"; then + if test -z "$old_library"; then + func_fatal_error "cannot find name of link library for '$lib'" + fi + # It is a libtool convenience library, so add in its objects. + func_append convenience " $ladir/$objdir/$old_library" + func_append old_convenience " $ladir/$objdir/$old_library" + elif test prog != "$linkmode" && test lib != "$linkmode"; then + func_fatal_error "'$lib' is not a convenience library" + fi + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done + continue + fi # $pass = conv + + + # Get the name of the library we link against. + linklib= + if test -n "$old_library" && + { test yes = "$prefer_static_libs" || + test built,no = "$prefer_static_libs,$installed"; }; then + linklib=$old_library + else + for l in $old_library $library_names; do + linklib=$l + done + fi + if test -z "$linklib"; then + func_fatal_error "cannot find name of link library for '$lib'" + fi + + # This library was specified with -dlopen. + if test dlopen = "$pass"; then + test -z "$libdir" \ + && func_fatal_error "cannot -dlopen a convenience library: '$lib'" + if test -z "$dlname" || + test yes != "$dlopen_support" || + test no = "$build_libtool_libs" + then + # If there is no dlname, no dlopen support or we're linking + # statically, we need to preload. We also need to preload any + # dependent libraries so libltdl's deplib preloader doesn't + # bomb out in the load deplibs phase. + func_append dlprefiles " $lib $dependency_libs" + else + func_append newdlfiles " $lib" + fi + continue + fi # $pass = dlopen + + # We need an absolute path. + case $ladir in + [\\/]* | [A-Za-z]:[\\/]*) abs_ladir=$ladir ;; + *) + abs_ladir=`cd "$ladir" && pwd` + if test -z "$abs_ladir"; then + func_warning "cannot determine absolute directory name of '$ladir'" + func_warning "passing it literally to the linker, although it might fail" + abs_ladir=$ladir + fi + ;; + esac + func_basename "$lib" + laname=$func_basename_result + + # Find the relevant object directory and library name. + if test yes = "$installed"; then + if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then + func_warning "library '$lib' was moved." + dir=$ladir + absdir=$abs_ladir + libdir=$abs_ladir + else + dir=$lt_sysroot$libdir + absdir=$lt_sysroot$libdir + fi + test yes = "$hardcode_automatic" && avoidtemprpath=yes + else + if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then + dir=$ladir + absdir=$abs_ladir + # Remove this search path later + func_append notinst_path " $abs_ladir" + else + dir=$ladir/$objdir + absdir=$abs_ladir/$objdir + # Remove this search path later + func_append notinst_path " $abs_ladir" + fi + fi # $installed = yes + func_stripname 'lib' '.la' "$laname" + name=$func_stripname_result + + # This library was specified with -dlpreopen. + if test dlpreopen = "$pass"; then + if test -z "$libdir" && test prog = "$linkmode"; then + func_fatal_error "only libraries may -dlpreopen a convenience library: '$lib'" + fi + case $host in + # special handling for platforms with PE-DLLs. + *cygwin* | *mingw* | *cegcc* ) + # Linker will automatically link against shared library if both + # static and shared are present. Therefore, ensure we extract + # symbols from the import library if a shared library is present + # (otherwise, the dlopen module name will be incorrect). We do + # this by putting the import library name into $newdlprefiles. + # We recover the dlopen module name by 'saving' the la file + # name in a special purpose variable, and (later) extracting the + # dlname from the la file. + if test -n "$dlname"; then + func_tr_sh "$dir/$linklib" + eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" + func_append newdlprefiles " $dir/$linklib" + else + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + fi + ;; + * ) + # Prefer using a static library (so that no silly _DYNAMIC symbols + # are required to link). + if test -n "$old_library"; then + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + # Otherwise, use the dlname, so that lt_dlopen finds it. + elif test -n "$dlname"; then + func_append newdlprefiles " $dir/$dlname" + else + func_append newdlprefiles " $dir/$linklib" + fi + ;; + esac + fi # $pass = dlpreopen + + if test -z "$libdir"; then + # Link the convenience library + if test lib = "$linkmode"; then + deplibs="$dir/$old_library $deplibs" + elif test prog,link = "$linkmode,$pass"; then + compile_deplibs="$dir/$old_library $compile_deplibs" + finalize_deplibs="$dir/$old_library $finalize_deplibs" + else + deplibs="$lib $deplibs" # used for prog,scan pass + fi + continue + fi + + + if test prog = "$linkmode" && test link != "$pass"; then + func_append newlib_search_path " $ladir" + deplibs="$lib $deplibs" + + linkalldeplibs=false + if test no != "$link_all_deplibs" || test -z "$library_names" || + test no = "$build_libtool_libs"; then + linkalldeplibs=: + fi + + tmp_libs= + for deplib in $dependency_libs; do + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + esac + # Need to link against all dependency_libs? + if $linkalldeplibs; then + deplibs="$deplib $deplibs" + else + # Need to hardcode shared library paths + # or/and link against static libraries + newdependency_libs="$deplib $newdependency_libs" + fi + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done # for deplib + continue + fi # $linkmode = prog... + + if test prog,link = "$linkmode,$pass"; then + if test -n "$library_names" && + { { test no = "$prefer_static_libs" || + test built,yes = "$prefer_static_libs,$installed"; } || + test -z "$old_library"; }; then + # We need to hardcode the library path + if test -n "$shlibpath_var" && test -z "$avoidtemprpath"; then + # Make sure the rpath contains only unique directories. + case $temp_rpath: in + *"$absdir:"*) ;; + *) func_append temp_rpath "$absdir:" ;; + esac + fi + + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi # $linkmode,$pass = prog,link... + + if $alldeplibs && + { test pass_all = "$deplibs_check_method" || + { test yes = "$build_libtool_libs" && + test -n "$library_names"; }; }; then + # We only need to search for static libraries + continue + fi + fi + + link_static=no # Whether the deplib will be linked statically + use_static_libs=$prefer_static_libs + if test built = "$use_static_libs" && test yes = "$installed"; then + use_static_libs=no + fi + if test -n "$library_names" && + { test no = "$use_static_libs" || test -z "$old_library"; }; then + case $host in + *cygwin* | *mingw* | *cegcc* | *os2*) + # No point in relinking DLLs because paths are not encoded + func_append notinst_deplibs " $lib" + need_relink=no + ;; + *) + if test no = "$installed"; then + func_append notinst_deplibs " $lib" + need_relink=yes + fi + ;; + esac + # This is a shared library + + # Warn about portability, can't link against -module's on some + # systems (darwin). Don't bleat about dlopened modules though! + dlopenmodule= + for dlpremoduletest in $dlprefiles; do + if test "X$dlpremoduletest" = "X$lib"; then + dlopenmodule=$dlpremoduletest + break + fi + done + if test -z "$dlopenmodule" && test yes = "$shouldnotlink" && test link = "$pass"; then + echo + if test prog = "$linkmode"; then + $ECHO "*** Warning: Linking the executable $output against the loadable module" + else + $ECHO "*** Warning: Linking the shared library $output against the loadable module" + fi + $ECHO "*** $linklib is not portable!" + fi + if test lib = "$linkmode" && + test yes = "$hardcode_into_libs"; then + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi + + if test -n "$old_archive_from_expsyms_cmds"; then + # figure out the soname + set dummy $library_names + shift + realname=$1 + shift + libname=`eval "\\$ECHO \"$libname_spec\""` + # use dlname if we got it. it's perfectly good, no? + if test -n "$dlname"; then + soname=$dlname + elif test -n "$soname_spec"; then + # bleh windows + case $host in + *cygwin* | mingw* | *cegcc* | *os2*) + func_arith $current - $age + major=$func_arith_result + versuffix=-$major + ;; + esac + eval soname=\"$soname_spec\" + else + soname=$realname + fi + + # Make a new name for the extract_expsyms_cmds to use + soroot=$soname + func_basename "$soroot" + soname=$func_basename_result + func_stripname 'lib' '.dll' "$soname" + newlib=libimp-$func_stripname_result.a + + # If the library has no export list, then create one now + if test -f "$output_objdir/$soname-def"; then : + else + func_verbose "extracting exported symbol list from '$soname'" + func_execute_cmds "$extract_expsyms_cmds" 'exit $?' + fi + + # Create $newlib + if test -f "$output_objdir/$newlib"; then :; else + func_verbose "generating import library for '$soname'" + func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' + fi + # make sure the library variables are pointing to the new library + dir=$output_objdir + linklib=$newlib + fi # test -n "$old_archive_from_expsyms_cmds" + + if test prog = "$linkmode" || test relink != "$opt_mode"; then + add_shlibpath= + add_dir= + add= + lib_linked=yes + case $hardcode_action in + immediate | unsupported) + if test no = "$hardcode_direct"; then + add=$dir/$linklib + case $host in + *-*-sco3.2v5.0.[024]*) add_dir=-L$dir ;; + *-*-sysv4*uw2*) add_dir=-L$dir ;; + *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ + *-*-unixware7*) add_dir=-L$dir ;; + *-*-darwin* ) + # if the lib is a (non-dlopened) module then we cannot + # link against it, someone is ignoring the earlier warnings + if /usr/bin/file -L $add 2> /dev/null | + $GREP ": [^:]* bundle" >/dev/null; then + if test "X$dlopenmodule" != "X$lib"; then + $ECHO "*** Warning: lib $linklib is a module, not a shared library" + if test -z "$old_library"; then + echo + echo "*** And there doesn't seem to be a static archive available" + echo "*** The link will probably fail, sorry" + else + add=$dir/$old_library + fi + elif test -n "$old_library"; then + add=$dir/$old_library + fi + fi + esac + elif test no = "$hardcode_minus_L"; then + case $host in + *-*-sunos*) add_shlibpath=$dir ;; + esac + add_dir=-L$dir + add=-l$name + elif test no = "$hardcode_shlibpath_var"; then + add_shlibpath=$dir + add=-l$name + else + lib_linked=no + fi + ;; + relink) + if test yes = "$hardcode_direct" && + test no = "$hardcode_direct_absolute"; then + add=$dir/$linklib + elif test yes = "$hardcode_minus_L"; then + add_dir=-L$absdir + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add=-l$name + elif test yes = "$hardcode_shlibpath_var"; then + add_shlibpath=$dir + add=-l$name + else + lib_linked=no + fi + ;; + *) lib_linked=no ;; + esac + + if test yes != "$lib_linked"; then + func_fatal_configuration "unsupported hardcode properties" + fi + + if test -n "$add_shlibpath"; then + case :$compile_shlibpath: in + *":$add_shlibpath:"*) ;; + *) func_append compile_shlibpath "$add_shlibpath:" ;; + esac + fi + if test prog = "$linkmode"; then + test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" + test -n "$add" && compile_deplibs="$add $compile_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + if test yes != "$hardcode_direct" && + test yes != "$hardcode_minus_L" && + test yes = "$hardcode_shlibpath_var"; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + fi + fi + fi + + if test prog = "$linkmode" || test relink = "$opt_mode"; then + add_shlibpath= + add_dir= + add= + # Finalize command for both is simple: just hardcode it. + if test yes = "$hardcode_direct" && + test no = "$hardcode_direct_absolute"; then + add=$libdir/$linklib + elif test yes = "$hardcode_minus_L"; then + add_dir=-L$libdir + add=-l$name + elif test yes = "$hardcode_shlibpath_var"; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + add=-l$name + elif test yes = "$hardcode_automatic"; then + if test -n "$inst_prefix_dir" && + test -f "$inst_prefix_dir$libdir/$linklib"; then + add=$inst_prefix_dir$libdir/$linklib + else + add=$libdir/$linklib + fi + else + # We cannot seem to hardcode it, guess we'll fake it. + add_dir=-L$libdir + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add=-l$name + fi + + if test prog = "$linkmode"; then + test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" + test -n "$add" && finalize_deplibs="$add $finalize_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + fi + fi + elif test prog = "$linkmode"; then + # Here we assume that one of hardcode_direct or hardcode_minus_L + # is not unsupported. This is valid on all known static and + # shared platforms. + if test unsupported != "$hardcode_direct"; then + test -n "$old_library" && linklib=$old_library + compile_deplibs="$dir/$linklib $compile_deplibs" + finalize_deplibs="$dir/$linklib $finalize_deplibs" + else + compile_deplibs="-l$name -L$dir $compile_deplibs" + finalize_deplibs="-l$name -L$dir $finalize_deplibs" + fi + elif test yes = "$build_libtool_libs"; then + # Not a shared library + if test pass_all != "$deplibs_check_method"; then + # We're trying link a shared library against a static one + # but the system doesn't support it. + + # Just print a warning and add the library to dependency_libs so + # that the program can be linked against the static library. + echo + $ECHO "*** Warning: This system cannot link to static lib archive $lib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + if test yes = "$module"; then + echo "*** But as you try to build a module library, libtool will still create " + echo "*** a static module, that should work as long as the dlopening application" + echo "*** is linked with the -dlopen flag to resolve symbols at runtime." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using 'nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** 'nm' from GNU binutils and a full rebuild may help." + fi + if test no = "$build_old_libs"; then + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + else + deplibs="$dir/$old_library $deplibs" + link_static=yes + fi + fi # link shared/static library? + + if test lib = "$linkmode"; then + if test -n "$dependency_libs" && + { test yes != "$hardcode_into_libs" || + test yes = "$build_old_libs" || + test yes = "$link_static"; }; then + # Extract -R from dependency_libs + temp_deplibs= + for libdir in $dependency_libs; do + case $libdir in + -R*) func_stripname '-R' '' "$libdir" + temp_xrpath=$func_stripname_result + case " $xrpath " in + *" $temp_xrpath "*) ;; + *) func_append xrpath " $temp_xrpath";; + esac;; + *) func_append temp_deplibs " $libdir";; + esac + done + dependency_libs=$temp_deplibs + fi + + func_append newlib_search_path " $absdir" + # Link against this library + test no = "$link_static" && newdependency_libs="$abs_ladir/$laname $newdependency_libs" + # ... and its dependency_libs + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result";; + *) func_resolve_sysroot "$deplib" ;; + esac + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $func_resolve_sysroot_result "*) + func_append specialdeplibs " $func_resolve_sysroot_result" ;; + esac + fi + func_append tmp_libs " $func_resolve_sysroot_result" + done + + if test no != "$link_all_deplibs"; then + # Add the search paths of all dependency libraries + for deplib in $dependency_libs; do + path= + case $deplib in + -L*) path=$deplib ;; + *.la) + func_resolve_sysroot "$deplib" + deplib=$func_resolve_sysroot_result + func_dirname "$deplib" "" "." + dir=$func_dirname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) absdir=$dir ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + func_warning "cannot determine absolute directory name of '$dir'" + absdir=$dir + fi + ;; + esac + if $GREP "^installed=no" $deplib > /dev/null; then + case $host in + *-*-darwin*) + depdepl= + eval deplibrary_names=`$SED -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` + if test -n "$deplibrary_names"; then + for tmp in $deplibrary_names; do + depdepl=$tmp + done + if test -f "$absdir/$objdir/$depdepl"; then + depdepl=$absdir/$objdir/$depdepl + darwin_install_name=`$OTOOL -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + if test -z "$darwin_install_name"; then + darwin_install_name=`$OTOOL64 -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + fi + func_append compiler_flags " $wl-dylib_file $wl$darwin_install_name:$depdepl" + func_append linker_flags " -dylib_file $darwin_install_name:$depdepl" + path= + fi + fi + ;; + *) + path=-L$absdir/$objdir + ;; + esac + else + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + test -z "$libdir" && \ + func_fatal_error "'$deplib' is not a valid libtool archive" + test "$absdir" != "$libdir" && \ + func_warning "'$deplib' seems to be moved" + + path=-L$absdir + fi + ;; + esac + case " $deplibs " in + *" $path "*) ;; + *) deplibs="$path $deplibs" ;; + esac + done + fi # link_all_deplibs != no + fi # linkmode = lib + done # for deplib in $libs + if test link = "$pass"; then + if test prog = "$linkmode"; then + compile_deplibs="$new_inherited_linker_flags $compile_deplibs" + finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" + else + compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + fi + fi + dependency_libs=$newdependency_libs + if test dlpreopen = "$pass"; then + # Link the dlpreopened libraries before other libraries + for deplib in $save_deplibs; do + deplibs="$deplib $deplibs" + done + fi + if test dlopen != "$pass"; then + test conv = "$pass" || { + # Make sure lib_search_path contains only unique directories. + lib_search_path= + for dir in $newlib_search_path; do + case "$lib_search_path " in + *" $dir "*) ;; + *) func_append lib_search_path " $dir" ;; + esac + done + newlib_search_path= + } + + if test prog,link = "$linkmode,$pass"; then + vars="compile_deplibs finalize_deplibs" + else + vars=deplibs + fi + for var in $vars dependency_libs; do + # Add libraries to $var in reverse order + eval tmp_libs=\"\$$var\" + new_libs= + for deplib in $tmp_libs; do + # FIXME: Pedantically, this is the right thing to do, so + # that some nasty dependency loop isn't accidentally + # broken: + #new_libs="$deplib $new_libs" + # Pragmatically, this seems to cause very few problems in + # practice: + case $deplib in + -L*) new_libs="$deplib $new_libs" ;; + -R*) ;; + *) + # And here is the reason: when a library appears more + # than once as an explicit dependence of a library, or + # is implicitly linked in more than once by the + # compiler, it is considered special, and multiple + # occurrences thereof are not removed. Compare this + # with having the same library being listed as a + # dependency of multiple other libraries: in this case, + # we know (pedantically, we assume) the library does not + # need to be listed more than once, so we keep only the + # last copy. This is not always right, but it is rare + # enough that we require users that really mean to play + # such unportable linking tricks to link the library + # using -Wl,-lname, so that libtool does not consider it + # for duplicate removal. + case " $specialdeplibs " in + *" $deplib "*) new_libs="$deplib $new_libs" ;; + *) + case " $new_libs " in + *" $deplib "*) ;; + *) new_libs="$deplib $new_libs" ;; + esac + ;; + esac + ;; + esac + done + tmp_libs= + for deplib in $new_libs; do + case $deplib in + -L*) + case " $tmp_libs " in + *" $deplib "*) ;; + *) func_append tmp_libs " $deplib" ;; + esac + ;; + *) func_append tmp_libs " $deplib" ;; + esac + done + eval $var=\"$tmp_libs\" + done # for var + fi + + # Add Sun CC postdeps if required: + test CXX = "$tagname" && { + case $host_os in + linux*) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) # Sun C++ 5.9 + func_suncc_cstd_abi + + if test no != "$suncc_use_cstd_abi"; then + func_append postdeps ' -library=Cstd -library=Crun' + fi + ;; + esac + ;; + + solaris*) + func_cc_basename "$CC" + case $func_cc_basename_result in + CC* | sunCC*) + func_suncc_cstd_abi + + if test no != "$suncc_use_cstd_abi"; then + func_append postdeps ' -library=Cstd -library=Crun' + fi + ;; + esac + ;; + esac + } + + # Last step: remove runtime libs from dependency_libs + # (they stay in deplibs) + tmp_libs= + for i in $dependency_libs; do + case " $predeps $postdeps $compiler_lib_search_path " in + *" $i "*) + i= + ;; + esac + if test -n "$i"; then + func_append tmp_libs " $i" + fi + done + dependency_libs=$tmp_libs + done # for pass + if test prog = "$linkmode"; then + dlfiles=$newdlfiles + fi + if test prog = "$linkmode" || test lib = "$linkmode"; then + dlprefiles=$newdlprefiles + fi + + case $linkmode in + oldlib) + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + func_warning "'-dlopen' is ignored for archives" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "'-l' and '-L' are ignored for archives" ;; + esac + + test -n "$rpath" && \ + func_warning "'-rpath' is ignored for archives" + + test -n "$xrpath" && \ + func_warning "'-R' is ignored for archives" + + test -n "$vinfo" && \ + func_warning "'-version-info/-version-number' is ignored for archives" + + test -n "$release" && \ + func_warning "'-release' is ignored for archives" + + test -n "$export_symbols$export_symbols_regex" && \ + func_warning "'-export-symbols' is ignored for archives" + + # Now set the variables for building old libraries. + build_libtool_libs=no + oldlibs=$output + func_append objs "$old_deplibs" + ;; + + lib) + # Make sure we only generate libraries of the form 'libNAME.la'. + case $outputname in + lib*) + func_stripname 'lib' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + ;; + *) + test no = "$module" \ + && func_fatal_help "libtool library '$output' must begin with 'lib'" + + if test no != "$need_lib_prefix"; then + # Add the "lib" prefix for modules if required + func_stripname '' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + else + func_stripname '' '.la' "$outputname" + libname=$func_stripname_result + fi + ;; + esac + + if test -n "$objs"; then + if test pass_all != "$deplibs_check_method"; then + func_fatal_error "cannot build libtool library '$output' from non-libtool objects on this host:$objs" + else + echo + $ECHO "*** Warning: Linking the shared library $output against the non-libtool" + $ECHO "*** objects $objs is not portable!" + func_append libobjs " $objs" + fi + fi + + test no = "$dlself" \ + || func_warning "'-dlopen self' is ignored for libtool libraries" + + set dummy $rpath + shift + test 1 -lt "$#" \ + && func_warning "ignoring multiple '-rpath's for a libtool library" + + install_libdir=$1 + + oldlibs= + if test -z "$rpath"; then + if test yes = "$build_libtool_libs"; then + # Building a libtool convenience library. + # Some compilers have problems with a '.al' extension so + # convenience libraries should have the same extension an + # archive normally would. + oldlibs="$output_objdir/$libname.$libext $oldlibs" + build_libtool_libs=convenience + build_old_libs=yes + fi + + test -n "$vinfo" && \ + func_warning "'-version-info/-version-number' is ignored for convenience libraries" + + test -n "$release" && \ + func_warning "'-release' is ignored for convenience libraries" + else + + # Parse the version information argument. + save_ifs=$IFS; IFS=: + set dummy $vinfo 0 0 0 + shift + IFS=$save_ifs + + test -n "$7" && \ + func_fatal_help "too many parameters to '-version-info'" + + # convert absolute version numbers to libtool ages + # this retains compatibility with .la files and attempts + # to make the code below a bit more comprehensible + + case $vinfo_number in + yes) + number_major=$1 + number_minor=$2 + number_revision=$3 + # + # There are really only two kinds -- those that + # use the current revision as the major version + # and those that subtract age and use age as + # a minor version. But, then there is irix + # that has an extra 1 added just for fun + # + case $version_type in + # correct linux to gnu/linux during the next big refactor + darwin|freebsd-elf|linux|osf|windows|none) + func_arith $number_major + $number_minor + current=$func_arith_result + age=$number_minor + revision=$number_revision + ;; + freebsd-aout|qnx|sunos) + current=$number_major + revision=$number_minor + age=0 + ;; + irix|nonstopux) + func_arith $number_major + $number_minor + current=$func_arith_result + age=$number_minor + revision=$number_minor + lt_irix_increment=no + ;; + esac + ;; + no) + current=$1 + revision=$2 + age=$3 + ;; + esac + + # Check that each of the things are valid numbers. + case $current in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "CURRENT '$current' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + case $revision in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "REVISION '$revision' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + case $age in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "AGE '$age' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + if test "$age" -gt "$current"; then + func_error "AGE '$age' is greater than the current interface number '$current'" + func_fatal_error "'$vinfo' is not valid version information" + fi + + # Calculate the version variables. + major= + versuffix= + verstring= + case $version_type in + none) ;; + + darwin) + # Like Linux, but with the current version available in + # verstring for coding it into the library header + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + # Darwin ld doesn't like 0 for these options... + func_arith $current + 1 + minor_current=$func_arith_result + xlcverstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + # On Darwin other compilers + case $CC in + nagfor*) + verstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + ;; + *) + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + ;; + esac + ;; + + freebsd-aout) + major=.$current + versuffix=.$current.$revision + ;; + + freebsd-elf) + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + ;; + + irix | nonstopux) + if test no = "$lt_irix_increment"; then + func_arith $current - $age + else + func_arith $current - $age + 1 + fi + major=$func_arith_result + + case $version_type in + nonstopux) verstring_prefix=nonstopux ;; + *) verstring_prefix=sgi ;; + esac + verstring=$verstring_prefix$major.$revision + + # Add in all the interfaces that we are compatible with. + loop=$revision + while test 0 -ne "$loop"; do + func_arith $revision - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring=$verstring_prefix$major.$iface:$verstring + done + + # Before this point, $major must not contain '.'. + major=.$major + versuffix=$major.$revision + ;; + + linux) # correct to gnu/linux during the next big refactor + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + ;; + + osf) + func_arith $current - $age + major=.$func_arith_result + versuffix=.$current.$age.$revision + verstring=$current.$age.$revision + + # Add in all the interfaces that we are compatible with. + loop=$age + while test 0 -ne "$loop"; do + func_arith $current - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring=$verstring:$iface.0 + done + + # Make executables depend on our current version. + func_append verstring ":$current.0" + ;; + + qnx) + major=.$current + versuffix=.$current + ;; + + sco) + major=.$current + versuffix=.$current + ;; + + sunos) + major=.$current + versuffix=.$current.$revision + ;; + + windows) + # Use '-' rather than '.', since we only want one + # extension on DOS 8.3 file systems. + func_arith $current - $age + major=$func_arith_result + versuffix=-$major + ;; + + *) + func_fatal_configuration "unknown library version type '$version_type'" + ;; + esac + + # Clear the version info if we defaulted, and they specified a release. + if test -z "$vinfo" && test -n "$release"; then + major= + case $version_type in + darwin) + # we can't check for "0.0" in archive_cmds due to quoting + # problems, so we reset it completely + verstring= + ;; + *) + verstring=0.0 + ;; + esac + if test no = "$need_version"; then + versuffix= + else + versuffix=.0.0 + fi + fi + + # Remove version info from name if versioning should be avoided + if test yes,no = "$avoid_version,$need_version"; then + major= + versuffix= + verstring= + fi + + # Check to see if the archive will have undefined symbols. + if test yes = "$allow_undefined"; then + if test unsupported = "$allow_undefined_flag"; then + if test yes = "$build_old_libs"; then + func_warning "undefined symbols not allowed in $host shared libraries; building static only" + build_libtool_libs=no + else + func_fatal_error "can't build $host shared library unless -no-undefined is specified" + fi + fi + else + # Don't allow undefined symbols. + allow_undefined_flag=$no_undefined_flag + fi + + fi + + func_generate_dlsyms "$libname" "$libname" : + func_append libobjs " $symfileobj" + test " " = "$libobjs" && libobjs= + + if test relink != "$opt_mode"; then + # Remove our outputs, but don't remove object files since they + # may have been created when compiling PIC objects. + removelist= + tempremovelist=`$ECHO "$output_objdir/*"` + for p in $tempremovelist; do + case $p in + *.$objext | *.gcno) + ;; + $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/$libname$release.*) + if test -n "$precious_files_regex"; then + if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 + then + continue + fi + fi + func_append removelist " $p" + ;; + *) ;; + esac + done + test -n "$removelist" && \ + func_show_eval "${RM}r \$removelist" + fi + + # Now set the variables for building old libraries. + if test yes = "$build_old_libs" && test convenience != "$build_libtool_libs"; then + func_append oldlibs " $output_objdir/$libname.$libext" + + # Transform .lo files to .o files. + oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; $lo2o" | $NL2SP` + fi + + # Eliminate all temporary directories. + #for path in $notinst_path; do + # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` + # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` + # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` + #done + + if test -n "$xrpath"; then + # If the user specified any rpath flags, then add them. + temp_xrpath= + for libdir in $xrpath; do + func_replace_sysroot "$libdir" + func_append temp_xrpath " -R$func_replace_sysroot_result" + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + if test yes != "$hardcode_into_libs" || test yes = "$build_old_libs"; then + dependency_libs="$temp_xrpath $dependency_libs" + fi + fi + + # Make sure dlfiles contains only unique files that won't be dlpreopened + old_dlfiles=$dlfiles + dlfiles= + for lib in $old_dlfiles; do + case " $dlprefiles $dlfiles " in + *" $lib "*) ;; + *) func_append dlfiles " $lib" ;; + esac + done + + # Make sure dlprefiles contains only unique files + old_dlprefiles=$dlprefiles + dlprefiles= + for lib in $old_dlprefiles; do + case "$dlprefiles " in + *" $lib "*) ;; + *) func_append dlprefiles " $lib" ;; + esac + done + + if test yes = "$build_libtool_libs"; then + if test -n "$rpath"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) + # these systems don't actually have a c library (as such)! + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C library is in the System framework + func_append deplibs " System.ltframework" + ;; + *-*-netbsd*) + # Don't link with libc until the a.out ld.so is fixed. + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) + # Do not include libc due to us having libc/libc_r. + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + ;; + *) + # Add libc to deplibs on all other systems if necessary. + if test yes = "$build_libtool_need_lc"; then + func_append deplibs " -lc" + fi + ;; + esac + fi + + # Transform deplibs into only deplibs that can be linked in shared. + name_save=$name + libname_save=$libname + release_save=$release + versuffix_save=$versuffix + major_save=$major + # I'm not sure if I'm treating the release correctly. I think + # release should show up in the -l (ie -lgmp5) so we don't want to + # add it in twice. Is that correct? + release= + versuffix= + major= + newdeplibs= + droppeddeps=no + case $deplibs_check_method in + pass_all) + # Don't check for shared/static. Everything works. + # This might be a little naive. We might want to check + # whether the library exists or not. But this is on + # osf3 & osf4 and I'm not really sure... Just + # implementing what was already the behavior. + newdeplibs=$deplibs + ;; + test_compile) + # This code stresses the "libraries are programs" paradigm to its + # limits. Maybe even breaks it. We compile a program, linking it + # against the deplibs as a proxy for the library. Then we can check + # whether they linked in statically or dynamically with ldd. + $opt_dry_run || $RM conftest.c + cat > conftest.c </dev/null` + $nocaseglob + else + potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` + fi + for potent_lib in $potential_libs; do + # Follow soft links. + if ls -lLd "$potent_lib" 2>/dev/null | + $GREP " -> " >/dev/null; then + continue + fi + # The statement above tries to avoid entering an + # endless loop below, in case of cyclic links. + # We might still enter an endless loop, since a link + # loop can be closed while we follow links, + # but so what? + potlib=$potent_lib + while test -h "$potlib" 2>/dev/null; do + potliblink=`ls -ld $potlib | $SED 's/.* -> //'` + case $potliblink in + [\\/]* | [A-Za-z]:[\\/]*) potlib=$potliblink;; + *) potlib=`$ECHO "$potlib" | $SED 's|[^/]*$||'`"$potliblink";; + esac + done + if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | + $SED -e 10q | + $EGREP "$file_magic_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib= + break 2 + fi + done + done + fi + if test -n "$a_deplib"; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib"; then + $ECHO "*** with $libname but no candidates were found. (...for file magic test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a file magic. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + for a_deplib in $deplibs; do + case $a_deplib in + -l*) + func_stripname -l '' "$a_deplib" + name=$func_stripname_result + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + case " $predeps $postdeps " in + *" $a_deplib "*) + func_append newdeplibs " $a_deplib" + a_deplib= + ;; + esac + fi + if test -n "$a_deplib"; then + libname=`eval "\\$ECHO \"$libname_spec\""` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + potential_libs=`ls $i/$libname[.-]* 2>/dev/null` + for potent_lib in $potential_libs; do + potlib=$potent_lib # see symlink-check above in file_magic test + if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ + $EGREP "$match_pattern_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib= + break 2 + fi + done + done + fi + if test -n "$a_deplib"; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib"; then + $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a regex pattern. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + none | unknown | *) + newdeplibs= + tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + for i in $predeps $postdeps; do + # can't use Xsed below, because $i might contain '/' + tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s|$i||"` + done + fi + case $tmp_deplibs in + *[!\ \ ]*) + echo + if test none = "$deplibs_check_method"; then + echo "*** Warning: inter-library dependencies are not supported in this platform." + else + echo "*** Warning: inter-library dependencies are not known to be supported." + fi + echo "*** All declared inter-library dependencies are being dropped." + droppeddeps=yes + ;; + esac + ;; + esac + versuffix=$versuffix_save + major=$major_save + release=$release_save + libname=$libname_save + name=$name_save + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library with the System framework + newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + if test yes = "$droppeddeps"; then + if test yes = "$module"; then + echo + echo "*** Warning: libtool could not satisfy all declared inter-library" + $ECHO "*** dependencies of module $libname. Therefore, libtool will create" + echo "*** a static module, that should work as long as the dlopening" + echo "*** application is linked with the -dlopen flag." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using 'nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** 'nm' from GNU binutils and a full rebuild may help." + fi + if test no = "$build_old_libs"; then + oldlibs=$output_objdir/$libname.$libext + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + else + echo "*** The inter-library dependencies that have been dropped here will be" + echo "*** automatically added whenever a program is linked with this library" + echo "*** or is declared to -dlopen it." + + if test no = "$allow_undefined"; then + echo + echo "*** Since this library must not contain undefined symbols," + echo "*** because either the platform does not support them or" + echo "*** it was explicitly requested with -no-undefined," + echo "*** libtool will only create a static version of it." + if test no = "$build_old_libs"; then + oldlibs=$output_objdir/$libname.$libext + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + fi + fi + # Done checking deplibs! + deplibs=$newdeplibs + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + case $host in + *-*-darwin*) + newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + deplibs=$new_libs + + # All the library-specific variables (install_libdir is set above). + library_names= + old_library= + dlname= + + # Test again, we may have decided not to build it any more + if test yes = "$build_libtool_libs"; then + # Remove $wl instances when linking with ld. + # FIXME: should test the right _cmds variable. + case $archive_cmds in + *\$LD\ *) wl= ;; + esac + if test yes = "$hardcode_into_libs"; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= + rpath=$finalize_rpath + test relink = "$opt_mode" || rpath=$compile_rpath$rpath + for libdir in $rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + func_replace_sysroot "$libdir" + libdir=$func_replace_sysroot_result + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append dep_rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval "dep_rpath=\"$hardcode_libdir_flag_spec\"" + fi + if test -n "$runpath_var" && test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" + fi + test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" + fi + + shlibpath=$finalize_shlibpath + test relink = "$opt_mode" || shlibpath=$compile_shlibpath$shlibpath + if test -n "$shlibpath"; then + eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" + fi + + # Get the real and link names of the library. + eval shared_ext=\"$shrext_cmds\" + eval library_names=\"$library_names_spec\" + set dummy $library_names + shift + realname=$1 + shift + + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname=$realname + fi + if test -z "$dlname"; then + dlname=$soname + fi + + lib=$output_objdir/$realname + linknames= + for link + do + func_append linknames " $link" + done + + # Use standard objects if they are pic + test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` + test "X$libobjs" = "X " && libobjs= + + delfiles= + if test -n "$export_symbols" && test -n "$include_expsyms"; then + $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" + export_symbols=$output_objdir/$libname.uexp + func_append delfiles " $export_symbols" + fi + + orig_export_symbols= + case $host_os in + cygwin* | mingw* | cegcc*) + if test -n "$export_symbols" && test -z "$export_symbols_regex"; then + # exporting using user supplied symfile + func_dll_def_p "$export_symbols" || { + # and it's NOT already a .def file. Must figure out + # which of the given symbols are data symbols and tag + # them as such. So, trigger use of export_symbols_cmds. + # export_symbols gets reassigned inside the "prepare + # the list of exported symbols" if statement, so the + # include_expsyms logic still works. + orig_export_symbols=$export_symbols + export_symbols= + always_export_symbols=yes + } + fi + ;; + esac + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + if test yes = "$always_export_symbols" || test -n "$export_symbols_regex"; then + func_verbose "generating symbol list for '$libname.la'" + export_symbols=$output_objdir/$libname.exp + $opt_dry_run || $RM $export_symbols + cmds=$export_symbols_cmds + save_ifs=$IFS; IFS='~' + for cmd1 in $cmds; do + IFS=$save_ifs + # Take the normal branch if the nm_file_list_spec branch + # doesn't work or if tool conversion is not needed. + case $nm_file_list_spec~$to_tool_file_cmd in + *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) + try_normal_branch=yes + eval cmd=\"$cmd1\" + func_len " $cmd" + len=$func_len_result + ;; + *) + try_normal_branch=no + ;; + esac + if test yes = "$try_normal_branch" \ + && { test "$len" -lt "$max_cmd_len" \ + || test "$max_cmd_len" -le -1; } + then + func_show_eval "$cmd" 'exit $?' + skipped_export=false + elif test -n "$nm_file_list_spec"; then + func_basename "$output" + output_la=$func_basename_result + save_libobjs=$libobjs + save_output=$output + output=$output_objdir/$output_la.nm + func_to_tool_file "$output" + libobjs=$nm_file_list_spec$func_to_tool_file_result + func_append delfiles " $output" + func_verbose "creating $NM input file list: $output" + for obj in $save_libobjs; do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > "$output" + eval cmd=\"$cmd1\" + func_show_eval "$cmd" 'exit $?' + output=$save_output + libobjs=$save_libobjs + skipped_export=false + else + # The command line is too long to execute in one step. + func_verbose "using reloadable object file for export list..." + skipped_export=: + # Break out early, otherwise skipped_export may be + # set to false by a later but shorter cmd. + break + fi + done + IFS=$save_ifs + if test -n "$export_symbols_regex" && test : != "$skipped_export"; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + fi + + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols=$export_symbols + test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test : != "$skipped_export" && test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for '$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands, which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + + tmp_deplibs= + for test_deplib in $deplibs; do + case " $convenience " in + *" $test_deplib "*) ;; + *) + func_append tmp_deplibs " $test_deplib" + ;; + esac + done + deplibs=$tmp_deplibs + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec" && + test yes = "$compiler_needs_object" && + test -z "$libobjs"; then + # extract the archives, so we have objects to list. + # TODO: could optimize this to just extract one archive. + whole_archive_flag_spec= + fi + if test -n "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + else + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + fi + + if test yes = "$thread_safe" && test -n "$thread_safe_flag_spec"; then + eval flag=\"$thread_safe_flag_spec\" + func_append linker_flags " $flag" + fi + + # Make a backup of the uninstalled library when relinking + if test relink = "$opt_mode"; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? + fi + + # Do each of the archive commands. + if test yes = "$module" && test -n "$module_cmds"; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + eval test_cmds=\"$module_expsym_cmds\" + cmds=$module_expsym_cmds + else + eval test_cmds=\"$module_cmds\" + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + eval test_cmds=\"$archive_expsym_cmds\" + cmds=$archive_expsym_cmds + else + eval test_cmds=\"$archive_cmds\" + cmds=$archive_cmds + fi + fi + + if test : != "$skipped_export" && + func_len " $test_cmds" && + len=$func_len_result && + test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + : + else + # The command line is too long to link in one step, link piecewise + # or, if using GNU ld and skipped_export is not :, use a linker + # script. + + # Save the value of $output and $libobjs because we want to + # use them later. If we have whole_archive_flag_spec, we + # want to use save_libobjs as it was before + # whole_archive_flag_spec was expanded, because we can't + # assume the linker understands whole_archive_flag_spec. + # This may have to be revisited, in case too many + # convenience libraries get linked in and end up exceeding + # the spec. + if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + fi + save_output=$output + func_basename "$output" + output_la=$func_basename_result + + # Clear the reloadable object creation command queue and + # initialize k to one. + test_cmds= + concat_cmds= + objlist= + last_robj= + k=1 + + if test -n "$save_libobjs" && test : != "$skipped_export" && test yes = "$with_gnu_ld"; then + output=$output_objdir/$output_la.lnkscript + func_verbose "creating GNU ld script: $output" + echo 'INPUT (' > $output + for obj in $save_libobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + echo ')' >> $output + func_append delfiles " $output" + func_to_tool_file "$output" + output=$func_to_tool_file_result + elif test -n "$save_libobjs" && test : != "$skipped_export" && test -n "$file_list_spec"; then + output=$output_objdir/$output_la.lnk + func_verbose "creating linker input file list: $output" + : > $output + set x $save_libobjs + shift + firstobj= + if test yes = "$compiler_needs_object"; then + firstobj="$1 " + shift + fi + for obj + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + func_append delfiles " $output" + func_to_tool_file "$output" + output=$firstobj\"$file_list_spec$func_to_tool_file_result\" + else + if test -n "$save_libobjs"; then + func_verbose "creating reloadable object files..." + output=$output_objdir/$output_la-$k.$objext + eval test_cmds=\"$reload_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + + # Loop over the list of objects to be linked. + for obj in $save_libobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + if test -z "$objlist" || + test "$len" -lt "$max_cmd_len"; then + func_append objlist " $obj" + else + # The command $test_cmds is almost too long, add a + # command to the queue. + if test 1 -eq "$k"; then + # The first file doesn't have a previous command to add. + reload_objs=$objlist + eval concat_cmds=\"$reload_cmds\" + else + # All subsequent reloadable object files will link in + # the last one created. + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" + fi + last_robj=$output_objdir/$output_la-$k.$objext + func_arith $k + 1 + k=$func_arith_result + output=$output_objdir/$output_la-$k.$objext + objlist=" $obj" + func_len " $last_robj" + func_arith $len0 + $func_len_result + len=$func_arith_result + fi + done + # Handle the remaining objects by creating one last + # reloadable object file. All subsequent reloadable object + # files will link in the last one created. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds$reload_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + func_append delfiles " $output" + + else + output= + fi + + ${skipped_export-false} && { + func_verbose "generating symbol list for '$libname.la'" + export_symbols=$output_objdir/$libname.exp + $opt_dry_run || $RM $export_symbols + libobjs=$output + # Append the command to create the export file. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + } + + test -n "$save_libobjs" && + func_verbose "creating a temporary reloadable object file: $output" + + # Loop through the commands generated above and execute them. + save_ifs=$IFS; IFS='~' + for cmd in $concat_cmds; do + IFS=$save_ifs + $opt_quiet || { + func_quote_for_expand "$cmd" + eval "func_echo $func_quote_for_expand_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS=$save_ifs + + if test -n "$export_symbols_regex" && ${skipped_export-false}; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + + ${skipped_export-false} && { + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols=$export_symbols + test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for '$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands, which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + } + + libobjs=$output + # Restore the value of output. + output=$save_output + + if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + fi + # Expand the library linking commands again to reset the + # value of $libobjs for piecewise linking. + + # Do each of the archive commands. + if test yes = "$module" && test -n "$module_cmds"; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + cmds=$module_expsym_cmds + else + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + cmds=$archive_expsym_cmds + else + cmds=$archive_cmds + fi + fi + fi + + if test -n "$delfiles"; then + # Append the command to remove temporary files to $cmds. + eval cmds=\"\$cmds~\$RM $delfiles\" + fi + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + + save_ifs=$IFS; IFS='~' + for cmd in $cmds; do + IFS=$sp$nl + eval cmd=\"$cmd\" + IFS=$save_ifs + $opt_quiet || { + func_quote_for_expand "$cmd" + eval "func_echo $func_quote_for_expand_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS=$save_ifs + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? + + if test -n "$convenience"; then + if test -z "$whole_archive_flag_spec"; then + func_show_eval '${RM}r "$gentop"' + fi + fi + + exit $EXIT_SUCCESS + fi + + # Create links to the real library. + for linkname in $linknames; do + if test "$realname" != "$linkname"; then + func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' + fi + done + + # If -module or -export-dynamic was specified, set the dlname. + if test yes = "$module" || test yes = "$export_dynamic"; then + # On all known operating systems, these are identical. + dlname=$soname + fi + fi + ;; + + obj) + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + func_warning "'-dlopen' is ignored for objects" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "'-l' and '-L' are ignored for objects" ;; + esac + + test -n "$rpath" && \ + func_warning "'-rpath' is ignored for objects" + + test -n "$xrpath" && \ + func_warning "'-R' is ignored for objects" + + test -n "$vinfo" && \ + func_warning "'-version-info' is ignored for objects" + + test -n "$release" && \ + func_warning "'-release' is ignored for objects" + + case $output in + *.lo) + test -n "$objs$old_deplibs" && \ + func_fatal_error "cannot build library object '$output' from non-libtool objects" + + libobj=$output + func_lo2o "$libobj" + obj=$func_lo2o_result + ;; + *) + libobj= + obj=$output + ;; + esac + + # Delete the old objects. + $opt_dry_run || $RM $obj $libobj + + # Objects from convenience libraries. This assumes + # single-version convenience libraries. Whenever we create + # different ones for PIC/non-PIC, this we'll have to duplicate + # the extraction. + reload_conv_objs= + gentop= + # if reload_cmds runs $LD directly, get rid of -Wl from + # whole_archive_flag_spec and hope we can get by with turning comma + # into space. + case $reload_cmds in + *\$LD[\ \$]*) wl= ;; + esac + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" + test -n "$wl" || tmp_whole_archive_flags=`$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` + reload_conv_objs=$reload_objs\ $tmp_whole_archive_flags + else + gentop=$output_objdir/${obj}x + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + reload_conv_objs="$reload_objs $func_extract_archives_result" + fi + fi + + # If we're not building shared, we need to use non_pic_objs + test yes = "$build_libtool_libs" || libobjs=$non_pic_objects + + # Create the old-style object. + reload_objs=$objs$old_deplibs' '`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; /\.lib$/d; $lo2o" | $NL2SP`' '$reload_conv_objs + + output=$obj + func_execute_cmds "$reload_cmds" 'exit $?' + + # Exit if we aren't doing a library object file. + if test -z "$libobj"; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + fi + + test yes = "$build_libtool_libs" || { + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + # Create an invalid libtool object if no PIC, so that we don't + # accidentally link it into a program. + # $show "echo timestamp > $libobj" + # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? + exit $EXIT_SUCCESS + } + + if test -n "$pic_flag" || test default != "$pic_mode"; then + # Only do commands if we really have different PIC objects. + reload_objs="$libobjs $reload_conv_objs" + output=$libobj + func_execute_cmds "$reload_cmds" 'exit $?' + fi + + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + ;; + + prog) + case $host in + *cygwin*) func_stripname '' '.exe' "$output" + output=$func_stripname_result.exe;; + esac + test -n "$vinfo" && \ + func_warning "'-version-info' is ignored for programs" + + test -n "$release" && \ + func_warning "'-release' is ignored for programs" + + $preload \ + && test unknown,unknown,unknown = "$dlopen_support,$dlopen_self,$dlopen_self_static" \ + && func_warning "'LT_INIT([dlopen])' not used. Assuming no dlopen support." + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library is the System framework + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + case $host in + *-*-darwin*) + # Don't allow lazy linking, it breaks C++ global constructors + # But is supposedly fixed on 10.4 or later (yay!). + if test CXX = "$tagname"; then + case ${MACOSX_DEPLOYMENT_TARGET-10.0} in + 10.[0123]) + func_append compile_command " $wl-bind_at_load" + func_append finalize_command " $wl-bind_at_load" + ;; + esac + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $compile_deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $compile_deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + compile_deplibs=$new_libs + + + func_append compile_command " $compile_deplibs" + func_append finalize_command " $finalize_deplibs" + + if test -n "$rpath$xrpath"; then + # If the user specified any rpath flags, then add them. + for libdir in $rpath $xrpath; do + # This is the magic to use -rpath. + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + fi + + # Now hardcode the library paths + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$libdir" | $SED -e 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$libdir:"*) ;; + ::) dllsearchpath=$libdir;; + *) func_append dllsearchpath ":$libdir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + compile_rpath=$rpath + + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$finalize_perm_rpath " in + *" $libdir "*) ;; + *) func_append finalize_perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + finalize_rpath=$rpath + + if test -n "$libobjs" && test yes = "$build_old_libs"; then + # Transform all the library objects into standard objects. + compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + fi + + func_generate_dlsyms "$outputname" "@PROGRAM@" false + + # template prelinking step + if test -n "$prelink_cmds"; then + func_execute_cmds "$prelink_cmds" 'exit $?' + fi + + wrappers_required=: + case $host in + *cegcc* | *mingw32ce*) + # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. + wrappers_required=false + ;; + *cygwin* | *mingw* ) + test yes = "$build_libtool_libs" || wrappers_required=false + ;; + *) + if test no = "$need_relink" || test yes != "$build_libtool_libs"; then + wrappers_required=false + fi + ;; + esac + $wrappers_required || { + # Replace the output file specification. + compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + link_command=$compile_command$compile_rpath + + # We have no uninstalled library dependencies, so finalize right now. + exit_status=0 + func_show_eval "$link_command" 'exit_status=$?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Delete the generated files. + if test -f "$output_objdir/${outputname}S.$objext"; then + func_show_eval '$RM "$output_objdir/${outputname}S.$objext"' + fi + + exit $exit_status + } + + if test -n "$compile_shlibpath$finalize_shlibpath"; then + compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" + fi + if test -n "$finalize_shlibpath"; then + finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + fi + + compile_var= + finalize_var= + if test -n "$runpath_var"; then + if test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + compile_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + if test -n "$finalize_perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $finalize_perm_rpath; do + func_append rpath "$dir:" + done + finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + fi + + if test yes = "$no_install"; then + # We don't need to create a wrapper script. + link_command=$compile_var$compile_command$compile_rpath + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + # Delete the old output file. + $opt_dry_run || $RM $output + # Link the executable and exit + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + exit $EXIT_SUCCESS + fi + + case $hardcode_action,$fast_install in + relink,*) + # Fast installation is not supported + link_command=$compile_var$compile_command$compile_rpath + relink_command=$finalize_var$finalize_command$finalize_rpath + + func_warning "this platform does not like uninstalled shared libraries" + func_warning "'$output' will be relinked during installation" + ;; + *,yes) + link_command=$finalize_var$compile_command$finalize_rpath + relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` + ;; + *,no) + link_command=$compile_var$compile_command$compile_rpath + relink_command=$finalize_var$finalize_command$finalize_rpath + ;; + *,needless) + link_command=$finalize_var$compile_command$finalize_rpath + relink_command= + ;; + esac + + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` + + # Delete the old output files. + $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname + + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output_objdir/$outputname" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Now create the wrapper script. + func_verbose "creating $output" + + # Quote the relink command for shipping. + if test -n "$relink_command"; then + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_for_eval "$var_value" + relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + fi + done + relink_command="(cd `pwd`; $relink_command)" + relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + fi + + # Only actually do things if not in dry run mode. + $opt_dry_run || { + # win32 will think the script is a binary if it has + # a .exe suffix, so we strip it off here. + case $output in + *.exe) func_stripname '' '.exe' "$output" + output=$func_stripname_result ;; + esac + # test for cygwin because mv fails w/o .exe extensions + case $host in + *cygwin*) + exeext=.exe + func_stripname '' '.exe' "$outputname" + outputname=$func_stripname_result ;; + *) exeext= ;; + esac + case $host in + *cygwin* | *mingw* ) + func_dirname_and_basename "$output" "" "." + output_name=$func_basename_result + output_path=$func_dirname_result + cwrappersource=$output_path/$objdir/lt-$output_name.c + cwrapper=$output_path/$output_name.exe + $RM $cwrappersource $cwrapper + trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 + + func_emit_cwrapperexe_src > $cwrappersource + + # The wrapper executable is built using the $host compiler, + # because it contains $host paths and files. If cross- + # compiling, it, like the target executable, must be + # executed on the $host or under an emulation environment. + $opt_dry_run || { + $LTCC $LTCFLAGS -o $cwrapper $cwrappersource + $STRIP $cwrapper + } + + # Now, create the wrapper script for func_source use: + func_ltwrapper_scriptname $cwrapper + $RM $func_ltwrapper_scriptname_result + trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 + $opt_dry_run || { + # note: this script will not be executed, so do not chmod. + if test "x$build" = "x$host"; then + $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result + else + func_emit_wrapper no > $func_ltwrapper_scriptname_result + fi + } + ;; + * ) + $RM $output + trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 + + func_emit_wrapper no > $output + chmod +x $output + ;; + esac + } + exit $EXIT_SUCCESS + ;; + esac + + # See if we need to build an old-fashioned archive. + for oldlib in $oldlibs; do + + case $build_libtool_libs in + convenience) + oldobjs="$libobjs_save $symfileobj" + addlibs=$convenience + build_libtool_libs=no + ;; + module) + oldobjs=$libobjs_save + addlibs=$old_convenience + build_libtool_libs=no + ;; + *) + oldobjs="$old_deplibs $non_pic_objects" + $preload && test -f "$symfileobj" \ + && func_append oldobjs " $symfileobj" + addlibs=$old_convenience + ;; + esac + + if test -n "$addlibs"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $addlibs + func_append oldobjs " $func_extract_archives_result" + fi + + # Do each command in the archive commands. + if test -n "$old_archive_from_new_cmds" && test yes = "$build_libtool_libs"; then + cmds=$old_archive_from_new_cmds + else + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append oldobjs " $func_extract_archives_result" + fi + + # POSIX demands no paths to be encoded in archives. We have + # to avoid creating archives with duplicate basenames if we + # might have to extract them afterwards, e.g., when creating a + # static archive out of a convenience library, or when linking + # the entirety of a libtool archive into another (currently + # not supported by libtool). + if (for obj in $oldobjs + do + func_basename "$obj" + $ECHO "$func_basename_result" + done | sort | sort -uc >/dev/null 2>&1); then + : + else + echo "copying selected object files to avoid basename conflicts..." + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + func_mkdir_p "$gentop" + save_oldobjs=$oldobjs + oldobjs= + counter=1 + for obj in $save_oldobjs + do + func_basename "$obj" + objbase=$func_basename_result + case " $oldobjs " in + " ") oldobjs=$obj ;; + *[\ /]"$objbase "*) + while :; do + # Make sure we don't pick an alternate name that also + # overlaps. + newobj=lt$counter-$objbase + func_arith $counter + 1 + counter=$func_arith_result + case " $oldobjs " in + *[\ /]"$newobj "*) ;; + *) if test ! -f "$gentop/$newobj"; then break; fi ;; + esac + done + func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" + func_append oldobjs " $gentop/$newobj" + ;; + *) func_append oldobjs " $obj" ;; + esac + done + fi + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + eval cmds=\"$old_archive_cmds\" + + func_len " $cmds" + len=$func_len_result + if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + cmds=$old_archive_cmds + elif test -n "$archiver_list_spec"; then + func_verbose "using command file archive linking..." + for obj in $oldobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > $output_objdir/$libname.libcmd + func_to_tool_file "$output_objdir/$libname.libcmd" + oldobjs=" $archiver_list_spec$func_to_tool_file_result" + cmds=$old_archive_cmds + else + # the command line is too long to link in one step, link in parts + func_verbose "using piecewise archive linking..." + save_RANLIB=$RANLIB + RANLIB=: + objlist= + concat_cmds= + save_oldobjs=$oldobjs + oldobjs= + # Is there a better way of finding the last object in the list? + for obj in $save_oldobjs + do + last_oldobj=$obj + done + eval test_cmds=\"$old_archive_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + for obj in $save_oldobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + func_append objlist " $obj" + if test "$len" -lt "$max_cmd_len"; then + : + else + # the above command should be used before it gets too long + oldobjs=$objlist + if test "$obj" = "$last_oldobj"; then + RANLIB=$save_RANLIB + fi + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$old_archive_cmds\" + objlist= + len=$len0 + fi + done + RANLIB=$save_RANLIB + oldobjs=$objlist + if test -z "$oldobjs"; then + eval cmds=\"\$concat_cmds\" + else + eval cmds=\"\$concat_cmds~\$old_archive_cmds\" + fi + fi + fi + func_execute_cmds "$cmds" 'exit $?' + done + + test -n "$generated" && \ + func_show_eval "${RM}r$generated" + + # Now create the libtool archive. + case $output in + *.la) + old_library= + test yes = "$build_old_libs" && old_library=$libname.$libext + func_verbose "creating $output" + + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_for_eval "$var_value" + relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" + fi + done + # Quote the link command for shipping. + relink_command="(cd `pwd`; $SHELL \"$progpath\" $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" + relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` + if test yes = "$hardcode_automatic"; then + relink_command= + fi + + # Only create the output if not a dry run. + $opt_dry_run || { + for installed in no yes; do + if test yes = "$installed"; then + if test -z "$install_libdir"; then + break + fi + output=$output_objdir/${outputname}i + # Replace all uninstalled libtool libraries with the installed ones + newdependency_libs= + for deplib in $dependency_libs; do + case $deplib in + *.la) + func_basename "$deplib" + name=$func_basename_result + func_resolve_sysroot "$deplib" + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result` + test -z "$libdir" && \ + func_fatal_error "'$deplib' is not a valid libtool archive" + func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" + ;; + -L*) + func_stripname -L '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -L$func_replace_sysroot_result" + ;; + -R*) + func_stripname -R '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -R$func_replace_sysroot_result" + ;; + *) func_append newdependency_libs " $deplib" ;; + esac + done + dependency_libs=$newdependency_libs + newdlfiles= + + for lib in $dlfiles; do + case $lib in + *.la) + func_basename "$lib" + name=$func_basename_result + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "'$lib' is not a valid libtool archive" + func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" + ;; + *) func_append newdlfiles " $lib" ;; + esac + done + dlfiles=$newdlfiles + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + *.la) + # Only pass preopened files to the pseudo-archive (for + # eventual linking with the app. that links it) if we + # didn't already link the preopened objects directly into + # the library: + func_basename "$lib" + name=$func_basename_result + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "'$lib' is not a valid libtool archive" + func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" + ;; + esac + done + dlprefiles=$newdlprefiles + else + newdlfiles= + for lib in $dlfiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlfiles " $abs" + done + dlfiles=$newdlfiles + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlprefiles " $abs" + done + dlprefiles=$newdlprefiles + fi + $RM $output + # place dlname in correct position for cygwin + # In fact, it would be nice if we could use this code for all target + # systems that can't hard-code library paths into their executables + # and that have no shared library path variable independent of PATH, + # but it turns out we can't easily determine that from inspecting + # libtool variables, so we have to hard-code the OSs to which it + # applies here; at the moment, that means platforms that use the PE + # object format with DLL files. See the long comment at the top of + # tests/bindir.at for full details. + tdlname=$dlname + case $host,$output,$installed,$module,$dlname in + *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) + # If a -bindir argument was supplied, place the dll there. + if test -n "$bindir"; then + func_relative_path "$install_libdir" "$bindir" + tdlname=$func_relative_path_result/$dlname + else + # Otherwise fall back on heuristic. + tdlname=../bin/$dlname + fi + ;; + esac + $ECHO > $output "\ +# $outputname - a libtool library file +# Generated by $PROGRAM (GNU $PACKAGE) $VERSION +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='$tdlname' + +# Names of this library. +library_names='$library_names' + +# The name of the static archive. +old_library='$old_library' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags='$new_inherited_linker_flags' + +# Libraries that this one depends upon. +dependency_libs='$dependency_libs' + +# Names of additional weak libraries provided by this library +weak_library_names='$weak_libs' + +# Version information for $libname. +current=$current +age=$age +revision=$revision + +# Is this an already installed library? +installed=$installed + +# Should we warn about portability when linking against -modules? +shouldnotlink=$module + +# Files to dlopen/dlpreopen +dlopen='$dlfiles' +dlpreopen='$dlprefiles' + +# Directory that this library needs to be installed in: +libdir='$install_libdir'" + if test no,yes = "$installed,$need_relink"; then + $ECHO >> $output "\ +relink_command=\"$relink_command\"" + fi + done + } + + # Do a symbolic link so that the libtool archive can be found in + # LD_LIBRARY_PATH before the program is installed. + func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' + ;; + esac + exit $EXIT_SUCCESS +} + +if test link = "$opt_mode" || test relink = "$opt_mode"; then + func_mode_link ${1+"$@"} +fi + + +# func_mode_uninstall arg... +func_mode_uninstall () +{ + $debug_cmd + + RM=$nonopt + files= + rmforce=false + exit_status=0 + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic=$magic + + for arg + do + case $arg in + -f) func_append RM " $arg"; rmforce=: ;; + -*) func_append RM " $arg" ;; + *) func_append files " $arg" ;; + esac + done + + test -z "$RM" && \ + func_fatal_help "you must specify an RM program" + + rmdirs= + + for file in $files; do + func_dirname "$file" "" "." + dir=$func_dirname_result + if test . = "$dir"; then + odir=$objdir + else + odir=$dir/$objdir + fi + func_basename "$file" + name=$func_basename_result + test uninstall = "$opt_mode" && odir=$dir + + # Remember odir for removal later, being careful to avoid duplicates + if test clean = "$opt_mode"; then + case " $rmdirs " in + *" $odir "*) ;; + *) func_append rmdirs " $odir" ;; + esac + fi + + # Don't error if the file doesn't exist and rm -f was used. + if { test -L "$file"; } >/dev/null 2>&1 || + { test -h "$file"; } >/dev/null 2>&1 || + test -f "$file"; then + : + elif test -d "$file"; then + exit_status=1 + continue + elif $rmforce; then + continue + fi + + rmfiles=$file + + case $name in + *.la) + # Possibly a libtool archive, so verify it. + if func_lalib_p "$file"; then + func_source $dir/$name + + # Delete the libtool libraries and symlinks. + for n in $library_names; do + func_append rmfiles " $odir/$n" + done + test -n "$old_library" && func_append rmfiles " $odir/$old_library" + + case $opt_mode in + clean) + case " $library_names " in + *" $dlname "*) ;; + *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; + esac + test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" + ;; + uninstall) + if test -n "$library_names"; then + # Do each command in the postuninstall commands. + func_execute_cmds "$postuninstall_cmds" '$rmforce || exit_status=1' + fi + + if test -n "$old_library"; then + # Do each command in the old_postuninstall commands. + func_execute_cmds "$old_postuninstall_cmds" '$rmforce || exit_status=1' + fi + # FIXME: should reinstall the best remaining shared library. + ;; + esac + fi + ;; + + *.lo) + # Possibly a libtool object, so verify it. + if func_lalib_p "$file"; then + + # Read the .lo file + func_source $dir/$name + + # Add PIC object to the list of files to remove. + if test -n "$pic_object" && test none != "$pic_object"; then + func_append rmfiles " $dir/$pic_object" + fi + + # Add non-PIC object to the list of files to remove. + if test -n "$non_pic_object" && test none != "$non_pic_object"; then + func_append rmfiles " $dir/$non_pic_object" + fi + fi + ;; + + *) + if test clean = "$opt_mode"; then + noexename=$name + case $file in + *.exe) + func_stripname '' '.exe' "$file" + file=$func_stripname_result + func_stripname '' '.exe' "$name" + noexename=$func_stripname_result + # $file with .exe has already been added to rmfiles, + # add $file without .exe + func_append rmfiles " $file" + ;; + esac + # Do a test to see if this is a libtool program. + if func_ltwrapper_p "$file"; then + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + relink_command= + func_source $func_ltwrapper_scriptname_result + func_append rmfiles " $func_ltwrapper_scriptname_result" + else + relink_command= + func_source $dir/$noexename + fi + + # note $name still contains .exe if it was in $file originally + # as does the version of $file that was added into $rmfiles + func_append rmfiles " $odir/$name $odir/${name}S.$objext" + if test yes = "$fast_install" && test -n "$relink_command"; then + func_append rmfiles " $odir/lt-$name" + fi + if test "X$noexename" != "X$name"; then + func_append rmfiles " $odir/lt-$noexename.c" + fi + fi + fi + ;; + esac + func_show_eval "$RM $rmfiles" 'exit_status=1' + done + + # Try to remove the $objdir's in the directories where we deleted files + for dir in $rmdirs; do + if test -d "$dir"; then + func_show_eval "rmdir $dir >/dev/null 2>&1" + fi + done + + exit $exit_status +} + +if test uninstall = "$opt_mode" || test clean = "$opt_mode"; then + func_mode_uninstall ${1+"$@"} +fi + +test -z "$opt_mode" && { + help=$generic_help + func_fatal_help "you must specify a MODE" +} + +test -z "$exec_cmd" && \ + func_fatal_help "invalid operation mode '$opt_mode'" + +if test -n "$exec_cmd"; then + eval exec "$exec_cmd" + exit $EXIT_FAILURE +fi + +exit $exit_status + + +# The TAGs below are defined such that we never get into a situation +# where we disable both kinds of libraries. Given conflicting +# choices, we go for a static library, that is the most portable, +# since we can't tell whether shared libraries were disabled because +# the user asked for that or because the platform doesn't support +# them. This is particularly important on AIX, because we don't +# support having both static and shared libraries enabled at the same +# time on that platform, so we default to a shared-only configuration. +# If a disable-shared tag is given, we'll fallback to a static-only +# configuration. But we'll never go from static-only to shared-only. + +# ### BEGIN LIBTOOL TAG CONFIG: disable-shared +build_libtool_libs=no +build_old_libs=yes +# ### END LIBTOOL TAG CONFIG: disable-shared + +# ### BEGIN LIBTOOL TAG CONFIG: disable-static +build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` +# ### END LIBTOOL TAG CONFIG: disable-static + +# Local Variables: +# mode:shell-script +# sh-indentation:2 +# End: diff --git a/missing b/missing new file mode 100755 index 0000000..625aeb1 --- /dev/null +++ b/missing @@ -0,0 +1,215 @@ +#! /bin/sh +# Common wrapper for a few potentially missing GNU programs. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1996-2018 Free Software Foundation, Inc. +# Originally written by Fran,cois Pinard , 1996. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +if test $# -eq 0; then + echo 1>&2 "Try '$0 --help' for more information" + exit 1 +fi + +case $1 in + + --is-lightweight) + # Used by our autoconf macros to check whether the available missing + # script is modern enough. + exit 0 + ;; + + --run) + # Back-compat with the calling convention used by older automake. + shift + ;; + + -h|--h|--he|--hel|--help) + echo "\ +$0 [OPTION]... PROGRAM [ARGUMENT]... + +Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due +to PROGRAM being missing or too old. + +Options: + -h, --help display this help and exit + -v, --version output version information and exit + +Supported PROGRAM values: + aclocal autoconf autoheader autom4te automake makeinfo + bison yacc flex lex help2man + +Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and +'g' are ignored when checking the name. + +Send bug reports to ." + exit $? + ;; + + -v|--v|--ve|--ver|--vers|--versi|--versio|--version) + echo "missing $scriptversion (GNU Automake)" + exit $? + ;; + + -*) + echo 1>&2 "$0: unknown '$1' option" + echo 1>&2 "Try '$0 --help' for more information" + exit 1 + ;; + +esac + +# Run the given program, remember its exit status. +"$@"; st=$? + +# If it succeeded, we are done. +test $st -eq 0 && exit 0 + +# Also exit now if we it failed (or wasn't found), and '--version' was +# passed; such an option is passed most likely to detect whether the +# program is present and works. +case $2 in --version|--help) exit $st;; esac + +# Exit code 63 means version mismatch. This often happens when the user +# tries to use an ancient version of a tool on a file that requires a +# minimum version. +if test $st -eq 63; then + msg="probably too old" +elif test $st -eq 127; then + # Program was missing. + msg="missing on your system" +else + # Program was found and executed, but failed. Give up. + exit $st +fi + +perl_URL=https://www.perl.org/ +flex_URL=https://github.com/westes/flex +gnu_software_URL=https://www.gnu.org/software + +program_details () +{ + case $1 in + aclocal|automake) + echo "The '$1' program is part of the GNU Automake package:" + echo "<$gnu_software_URL/automake>" + echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/autoconf>" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + autoconf|autom4te|autoheader) + echo "The '$1' program is part of the GNU Autoconf package:" + echo "<$gnu_software_URL/autoconf/>" + echo "It also requires GNU m4 and Perl in order to run:" + echo "<$gnu_software_URL/m4/>" + echo "<$perl_URL>" + ;; + esac +} + +give_advice () +{ + # Normalize program name to check for. + normalized_program=`echo "$1" | sed ' + s/^gnu-//; t + s/^gnu//; t + s/^g//; t'` + + printf '%s\n' "'$1' is $msg." + + configure_deps="'configure.ac' or m4 files included by 'configure.ac'" + case $normalized_program in + autoconf*) + echo "You should only need it if you modified 'configure.ac'," + echo "or m4 files included by it." + program_details 'autoconf' + ;; + autoheader*) + echo "You should only need it if you modified 'acconfig.h' or" + echo "$configure_deps." + program_details 'autoheader' + ;; + automake*) + echo "You should only need it if you modified 'Makefile.am' or" + echo "$configure_deps." + program_details 'automake' + ;; + aclocal*) + echo "You should only need it if you modified 'acinclude.m4' or" + echo "$configure_deps." + program_details 'aclocal' + ;; + autom4te*) + echo "You might have modified some maintainer files that require" + echo "the 'autom4te' program to be rebuilt." + program_details 'autom4te' + ;; + bison*|yacc*) + echo "You should only need it if you modified a '.y' file." + echo "You may want to install the GNU Bison package:" + echo "<$gnu_software_URL/bison/>" + ;; + lex*|flex*) + echo "You should only need it if you modified a '.l' file." + echo "You may want to install the Fast Lexical Analyzer package:" + echo "<$flex_URL>" + ;; + help2man*) + echo "You should only need it if you modified a dependency" \ + "of a man page." + echo "You may want to install the GNU Help2man package:" + echo "<$gnu_software_URL/help2man/>" + ;; + makeinfo*) + echo "You should only need it if you modified a '.texi' file, or" + echo "any other file indirectly affecting the aspect of the manual." + echo "You might want to install the Texinfo package:" + echo "<$gnu_software_URL/texinfo/>" + echo "The spurious makeinfo call might also be the consequence of" + echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" + echo "want to install GNU make:" + echo "<$gnu_software_URL/make/>" + ;; + *) + echo "You might have modified some files without having the proper" + echo "tools for further handling them. Check the 'README' file, it" + echo "often tells you about the needed prerequisites for installing" + echo "this package. You may also peek at any GNU archive site, in" + echo "case some other package contains this missing '$1' program." + ;; + esac +} + +give_advice "$1" | sed -e '1s/^/WARNING: /' \ + -e '2,$s/^/ /' >&2 + +# Propagate the correct exit status (expected to be 127 for a program +# not found, 63 for a program that failed due to version mismatch). +exit $st + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/src/tools/info/Makefile.am b/src/tools/info/Makefile.am new file mode 100644 index 0000000..7bcf886 --- /dev/null +++ b/src/tools/info/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +bin_PROGRAMS = ucx_info + +BUILT_SOURCES = build_config.h +DISTCLEANFILES = build_config.h + +# +# Produce a C header file which contains all defined variables from config.h +# +build_config.h: $(top_builddir)/config.h Makefile + $(SED) -nr 's:\s*#define\s+(\w+)(\s+(\w+)|\s+(".*")|\s*)$$:{"\1", UCS_PP_MAKE_STRING(\3\4)},:p' <$(top_builddir)/config.h >$@ + +ucx_info_CPPFLAGS = $(BASE_CPPFLAGS) +ucx_info_CFLAGS = $(BASE_CFLAGS) +if HAVE_IB +ucx_info_CPPFLAGS += $(IBVERBS_CPPFLAGS) +endif +if HAVE_CRAY_UGNI +ucx_info_CFLAGS += $(CRAY_UGNI_CFLAGS) +endif + +ucx_info_SOURCES = \ + build_info.c \ + proto_info.c \ + sys_info.c \ + tl_info.c \ + type_info.c \ + ucx_info.c + +noinst_HEADERS = \ + ucx_info.h + +nodist_ucx_info_SOURCES = \ + build_config.h + +ucx_info_LDADD = \ + $(abs_top_builddir)/src/uct/libuct.la \ + $(abs_top_builddir)/src/ucp/libucp.la \ + $(abs_top_builddir)/src/ucs/libucs.la \ + $(abs_top_builddir)/src/ucm/libucm.la diff --git a/src/tools/info/Makefile.in b/src/tools/info/Makefile.in new file mode 100644 index 0000000..005eec1 --- /dev/null +++ b/src/tools/info/Makefile.in @@ -0,0 +1,938 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +bin_PROGRAMS = ucx_info$(EXEEXT) +@HAVE_IB_TRUE@am__append_1 = $(IBVERBS_CPPFLAGS) +@HAVE_CRAY_UGNI_TRUE@am__append_2 = $(CRAY_UGNI_CFLAGS) +subdir = src/tools/info +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +PROGRAMS = $(bin_PROGRAMS) +am_ucx_info_OBJECTS = ucx_info-build_info.$(OBJEXT) \ + ucx_info-proto_info.$(OBJEXT) ucx_info-sys_info.$(OBJEXT) \ + ucx_info-tl_info.$(OBJEXT) ucx_info-type_info.$(OBJEXT) \ + ucx_info-ucx_info.$(OBJEXT) +nodist_ucx_info_OBJECTS = +ucx_info_OBJECTS = $(am_ucx_info_OBJECTS) $(nodist_ucx_info_OBJECTS) +ucx_info_DEPENDENCIES = $(abs_top_builddir)/src/uct/libuct.la \ + $(abs_top_builddir)/src/ucp/libucp.la \ + $(abs_top_builddir)/src/ucs/libucs.la \ + $(abs_top_builddir)/src/ucm/libucm.la +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +ucx_info_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(ucx_info_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/ucx_info-build_info.Po \ + ./$(DEPDIR)/ucx_info-proto_info.Po \ + ./$(DEPDIR)/ucx_info-sys_info.Po \ + ./$(DEPDIR)/ucx_info-tl_info.Po \ + ./$(DEPDIR)/ucx_info-type_info.Po \ + ./$(DEPDIR)/ucx_info-ucx_info.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(ucx_info_SOURCES) $(nodist_ucx_info_SOURCES) +DIST_SOURCES = $(ucx_info_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +BUILT_SOURCES = build_config.h +DISTCLEANFILES = build_config.h +ucx_info_CPPFLAGS = $(BASE_CPPFLAGS) $(am__append_1) +ucx_info_CFLAGS = $(BASE_CFLAGS) $(am__append_2) +ucx_info_SOURCES = \ + build_info.c \ + proto_info.c \ + sys_info.c \ + tl_info.c \ + type_info.c \ + ucx_info.c + +noinst_HEADERS = \ + ucx_info.h + +nodist_ucx_info_SOURCES = \ + build_config.h + +ucx_info_LDADD = \ + $(abs_top_builddir)/src/uct/libuct.la \ + $(abs_top_builddir)/src/ucp/libucp.la \ + $(abs_top_builddir)/src/ucs/libucs.la \ + $(abs_top_builddir)/src/ucm/libucm.la + +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/tools/info/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/tools/info/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +ucx_info$(EXEEXT): $(ucx_info_OBJECTS) $(ucx_info_DEPENDENCIES) $(EXTRA_ucx_info_DEPENDENCIES) + @rm -f ucx_info$(EXEEXT) + $(AM_V_CCLD)$(ucx_info_LINK) $(ucx_info_OBJECTS) $(ucx_info_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucx_info-build_info.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucx_info-proto_info.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucx_info-sys_info.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucx_info-tl_info.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucx_info-type_info.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucx_info-ucx_info.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +ucx_info-build_info.o: build_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-build_info.o -MD -MP -MF $(DEPDIR)/ucx_info-build_info.Tpo -c -o ucx_info-build_info.o `test -f 'build_info.c' || echo '$(srcdir)/'`build_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-build_info.Tpo $(DEPDIR)/ucx_info-build_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='build_info.c' object='ucx_info-build_info.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-build_info.o `test -f 'build_info.c' || echo '$(srcdir)/'`build_info.c + +ucx_info-build_info.obj: build_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-build_info.obj -MD -MP -MF $(DEPDIR)/ucx_info-build_info.Tpo -c -o ucx_info-build_info.obj `if test -f 'build_info.c'; then $(CYGPATH_W) 'build_info.c'; else $(CYGPATH_W) '$(srcdir)/build_info.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-build_info.Tpo $(DEPDIR)/ucx_info-build_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='build_info.c' object='ucx_info-build_info.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-build_info.obj `if test -f 'build_info.c'; then $(CYGPATH_W) 'build_info.c'; else $(CYGPATH_W) '$(srcdir)/build_info.c'; fi` + +ucx_info-proto_info.o: proto_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-proto_info.o -MD -MP -MF $(DEPDIR)/ucx_info-proto_info.Tpo -c -o ucx_info-proto_info.o `test -f 'proto_info.c' || echo '$(srcdir)/'`proto_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-proto_info.Tpo $(DEPDIR)/ucx_info-proto_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='proto_info.c' object='ucx_info-proto_info.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-proto_info.o `test -f 'proto_info.c' || echo '$(srcdir)/'`proto_info.c + +ucx_info-proto_info.obj: proto_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-proto_info.obj -MD -MP -MF $(DEPDIR)/ucx_info-proto_info.Tpo -c -o ucx_info-proto_info.obj `if test -f 'proto_info.c'; then $(CYGPATH_W) 'proto_info.c'; else $(CYGPATH_W) '$(srcdir)/proto_info.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-proto_info.Tpo $(DEPDIR)/ucx_info-proto_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='proto_info.c' object='ucx_info-proto_info.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-proto_info.obj `if test -f 'proto_info.c'; then $(CYGPATH_W) 'proto_info.c'; else $(CYGPATH_W) '$(srcdir)/proto_info.c'; fi` + +ucx_info-sys_info.o: sys_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-sys_info.o -MD -MP -MF $(DEPDIR)/ucx_info-sys_info.Tpo -c -o ucx_info-sys_info.o `test -f 'sys_info.c' || echo '$(srcdir)/'`sys_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-sys_info.Tpo $(DEPDIR)/ucx_info-sys_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys_info.c' object='ucx_info-sys_info.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-sys_info.o `test -f 'sys_info.c' || echo '$(srcdir)/'`sys_info.c + +ucx_info-sys_info.obj: sys_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-sys_info.obj -MD -MP -MF $(DEPDIR)/ucx_info-sys_info.Tpo -c -o ucx_info-sys_info.obj `if test -f 'sys_info.c'; then $(CYGPATH_W) 'sys_info.c'; else $(CYGPATH_W) '$(srcdir)/sys_info.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-sys_info.Tpo $(DEPDIR)/ucx_info-sys_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys_info.c' object='ucx_info-sys_info.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-sys_info.obj `if test -f 'sys_info.c'; then $(CYGPATH_W) 'sys_info.c'; else $(CYGPATH_W) '$(srcdir)/sys_info.c'; fi` + +ucx_info-tl_info.o: tl_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-tl_info.o -MD -MP -MF $(DEPDIR)/ucx_info-tl_info.Tpo -c -o ucx_info-tl_info.o `test -f 'tl_info.c' || echo '$(srcdir)/'`tl_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-tl_info.Tpo $(DEPDIR)/ucx_info-tl_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tl_info.c' object='ucx_info-tl_info.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-tl_info.o `test -f 'tl_info.c' || echo '$(srcdir)/'`tl_info.c + +ucx_info-tl_info.obj: tl_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-tl_info.obj -MD -MP -MF $(DEPDIR)/ucx_info-tl_info.Tpo -c -o ucx_info-tl_info.obj `if test -f 'tl_info.c'; then $(CYGPATH_W) 'tl_info.c'; else $(CYGPATH_W) '$(srcdir)/tl_info.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-tl_info.Tpo $(DEPDIR)/ucx_info-tl_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tl_info.c' object='ucx_info-tl_info.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-tl_info.obj `if test -f 'tl_info.c'; then $(CYGPATH_W) 'tl_info.c'; else $(CYGPATH_W) '$(srcdir)/tl_info.c'; fi` + +ucx_info-type_info.o: type_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-type_info.o -MD -MP -MF $(DEPDIR)/ucx_info-type_info.Tpo -c -o ucx_info-type_info.o `test -f 'type_info.c' || echo '$(srcdir)/'`type_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-type_info.Tpo $(DEPDIR)/ucx_info-type_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='type_info.c' object='ucx_info-type_info.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-type_info.o `test -f 'type_info.c' || echo '$(srcdir)/'`type_info.c + +ucx_info-type_info.obj: type_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-type_info.obj -MD -MP -MF $(DEPDIR)/ucx_info-type_info.Tpo -c -o ucx_info-type_info.obj `if test -f 'type_info.c'; then $(CYGPATH_W) 'type_info.c'; else $(CYGPATH_W) '$(srcdir)/type_info.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-type_info.Tpo $(DEPDIR)/ucx_info-type_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='type_info.c' object='ucx_info-type_info.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-type_info.obj `if test -f 'type_info.c'; then $(CYGPATH_W) 'type_info.c'; else $(CYGPATH_W) '$(srcdir)/type_info.c'; fi` + +ucx_info-ucx_info.o: ucx_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-ucx_info.o -MD -MP -MF $(DEPDIR)/ucx_info-ucx_info.Tpo -c -o ucx_info-ucx_info.o `test -f 'ucx_info.c' || echo '$(srcdir)/'`ucx_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-ucx_info.Tpo $(DEPDIR)/ucx_info-ucx_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ucx_info.c' object='ucx_info-ucx_info.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-ucx_info.o `test -f 'ucx_info.c' || echo '$(srcdir)/'`ucx_info.c + +ucx_info-ucx_info.obj: ucx_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -MT ucx_info-ucx_info.obj -MD -MP -MF $(DEPDIR)/ucx_info-ucx_info.Tpo -c -o ucx_info-ucx_info.obj `if test -f 'ucx_info.c'; then $(CYGPATH_W) 'ucx_info.c'; else $(CYGPATH_W) '$(srcdir)/ucx_info.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_info-ucx_info.Tpo $(DEPDIR)/ucx_info-ucx_info.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ucx_info.c' object='ucx_info-ucx_info.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_info_CPPFLAGS) $(CPPFLAGS) $(ucx_info_CFLAGS) $(CFLAGS) -c -o ucx_info-ucx_info.obj `if test -f 'ucx_info.c'; then $(CYGPATH_W) 'ucx_info.c'; else $(CYGPATH_W) '$(srcdir)/ucx_info.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(PROGRAMS) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/ucx_info-build_info.Po + -rm -f ./$(DEPDIR)/ucx_info-proto_info.Po + -rm -f ./$(DEPDIR)/ucx_info-sys_info.Po + -rm -f ./$(DEPDIR)/ucx_info-tl_info.Po + -rm -f ./$(DEPDIR)/ucx_info-type_info.Po + -rm -f ./$(DEPDIR)/ucx_info-ucx_info.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/ucx_info-build_info.Po + -rm -f ./$(DEPDIR)/ucx_info-proto_info.Po + -rm -f ./$(DEPDIR)/ucx_info-sys_info.Po + -rm -f ./$(DEPDIR)/ucx_info-tl_info.Po + -rm -f ./$(DEPDIR)/ucx_info-type_info.Po + -rm -f ./$(DEPDIR)/ucx_info-ucx_info.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS + +.MAKE: all check install install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-binPROGRAMS \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-binPROGRAMS + +.PRECIOUS: Makefile + + +# +# Produce a C header file which contains all defined variables from config.h +# +build_config.h: $(top_builddir)/config.h Makefile + $(SED) -nr 's:\s*#define\s+(\w+)(\s+(\w+)|\s+(".*")|\s*)$$:{"\1", UCS_PP_MAKE_STRING(\3\4)},:p' <$(top_builddir)/config.h >$@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/tools/info/build_info.c b/src/tools/info/build_info.c new file mode 100644 index 0000000..f851e1f --- /dev/null +++ b/src/tools/info/build_info.c @@ -0,0 +1,37 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucx_info.h" + +#include + + +void print_version() +{ + printf("# UCT version=%s revision %s\n", UCT_VERNO_STRING, UCT_SCM_VERSION); + printf("# configured with: %s\n", UCX_CONFIGURE_FLAGS); +} + +void print_build_config() +{ + typedef struct { + const char *name; + const char *value; + } config_var_t; + static config_var_t config_vars[] = { + #include + {NULL, NULL} + }; + config_var_t *var; + + for (var = config_vars; var->name != NULL; ++var) { + printf("#define %-25s %s\n", var->name, var->value); + } +} diff --git a/src/tools/info/proto_info.c b/src/tools/info/proto_info.c new file mode 100644 index 0000000..4c74cd0 --- /dev/null +++ b/src/tools/info/proto_info.c @@ -0,0 +1,211 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucx_info.h" + +#include +#include +#include +#include +#include +#include +#include + + +typedef struct { + ucs_time_t time; + long memory; + int num_fds; +} resource_usage_t; + + +static int get_num_fds() +{ + static const char *fds_dir = "/proc/self/fd"; + struct dirent *entry; + int num_fds; + DIR *dir; + + dir = opendir(fds_dir); + if (dir == NULL) { + return -1; + } + + num_fds = 0; + for (;;) { + errno = 0; + entry = readdir(dir); + if (entry == NULL) { + closedir(dir); + if (errno == 0) { + return num_fds; + } else { + return -1; + } + } + + if (strncmp(entry->d_name, ".", 1)) { + ++num_fds; + } + } +} + +static void get_resource_usage(resource_usage_t *usage) +{ + struct rusage rusage; + int ret; + + usage->time = ucs_get_time(); + + ret = getrusage(RUSAGE_SELF, &rusage); + if (ret == 0) { + usage->memory = rusage.ru_maxrss * 1024; + } else { + usage->memory = -1; + } + + usage->num_fds = get_num_fds(); +} + +static void print_resource_usage(const resource_usage_t *usage_before, + const char *title) +{ + resource_usage_t usage_after; + + get_resource_usage(&usage_after); + + if ((usage_after.memory != -1) && (usage_before->memory != -1) && + (usage_after.num_fds != -1) && (usage_before->num_fds != -1)) + { + printf("# memory: %.2fMB, file descriptors: %d\n", + (usage_after.memory - usage_before->memory) / (1024.0 * 1024.0), + (usage_after.num_fds - usage_before->num_fds)); + } + printf("# create time: %.3f ms\n", + ucs_time_to_msec(usage_after.time - usage_before->time)); + printf("#\n"); +} + +void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags, + uint64_t ctx_features, const ucp_ep_params_t *base_ep_params, + size_t estimated_num_eps, size_t estimated_num_ppn, + unsigned dev_type_bitmap, const char *mem_size) +{ + ucp_config_t *config; + ucs_status_t status; + ucs_status_ptr_t status_ptr; + ucp_context_h context; + ucp_worker_h worker; + ucp_params_t params; + ucp_worker_params_t worker_params; + ucp_ep_params_t ep_params; + ucp_address_t *address; + size_t address_length; + resource_usage_t usage; + ucp_ep_h ep; + + status = ucp_config_read(NULL, NULL, &config); + if (status != UCS_OK) { + return; + } + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_ESTIMATED_NUM_EPS | + UCP_PARAM_FIELD_ESTIMATED_NUM_PPN; + params.features = ctx_features; + params.estimated_num_eps = estimated_num_eps; + params.estimated_num_ppn = estimated_num_ppn; + + get_resource_usage(&usage); + + if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SELF))) { + ucp_config_modify(config, "SELF_DEVICES", ""); + } + if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SHM))) { + ucp_config_modify(config, "SHM_DEVICES", ""); + } + if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_NET))) { + ucp_config_modify(config, "NET_DEVICES", ""); + } + + status = ucp_init(¶ms, config, &context); + if (status != UCS_OK) { + printf("\n"); + goto out_release_config; + } + + if ((print_opts & PRINT_MEM_MAP) && (mem_size != NULL)) { + ucp_mem_print_info(mem_size, context, stdout); + } + + if (print_opts & PRINT_UCP_CONTEXT) { + ucp_context_print_info(context, stdout); + print_resource_usage(&usage, "UCP context"); + } + + if (!(print_opts & (PRINT_UCP_WORKER|PRINT_UCP_EP))) { + goto out_cleanup_context; + } + + worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + worker_params.thread_mode = UCS_THREAD_MODE_MULTI; + + get_resource_usage(&usage); + + status = ucp_worker_create(context, &worker_params, &worker); + if (status != UCS_OK) { + printf("\n"); + goto out_cleanup_context; + } + + if (print_opts & PRINT_UCP_WORKER) { + ucp_worker_print_info(worker, stdout); + print_resource_usage(&usage, "UCP worker"); + } + + if (print_opts & PRINT_UCP_EP) { + status = ucp_worker_get_address(worker, &address, &address_length); + if (status != UCS_OK) { + printf("\n"); + goto out_destroy_worker; + } + + ep_params = *base_ep_params; + + ep_params.field_mask |= UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; + ep_params.address = address; + + status = ucp_ep_create(worker, &ep_params, &ep); + ucp_worker_release_address(worker, address); + if (status != UCS_OK) { + printf("\n"); + goto out_destroy_worker; + } + + ucp_ep_print_info(ep, stdout); + + status_ptr = ucp_disconnect_nb(ep); + if (UCS_PTR_IS_PTR(status_ptr)) { + do { + ucp_worker_progress(worker); + status = ucp_request_test(status_ptr, NULL); + } while (status == UCS_INPROGRESS); + ucp_request_release(status_ptr); + } + } + +out_destroy_worker: + ucp_worker_destroy(worker); +out_cleanup_context: + ucp_cleanup(context); +out_release_config: + ucp_config_release(config); +} diff --git a/src/tools/info/sys_info.c b/src/tools/info/sys_info.c new file mode 100644 index 0000000..435e5a6 --- /dev/null +++ b/src/tools/info/sys_info.c @@ -0,0 +1,94 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucx_info.h" + +#include +#include +#include +#include +#include +#include + + +static const char* cpu_model_names[] = { + [UCS_CPU_MODEL_UNKNOWN] = "unknown", + [UCS_CPU_MODEL_INTEL_IVYBRIDGE] = "IvyBridge", + [UCS_CPU_MODEL_INTEL_SANDYBRIDGE] = "SandyBridge", + [UCS_CPU_MODEL_INTEL_NEHALEM] = "Nehalem", + [UCS_CPU_MODEL_INTEL_WESTMERE] = "Westmere", + [UCS_CPU_MODEL_INTEL_HASWELL] = "Haswell", + [UCS_CPU_MODEL_INTEL_BROADWELL] = "Broadwell", + [UCS_CPU_MODEL_INTEL_SKYLAKE] = "Skylake", + [UCS_CPU_MODEL_ARM_AARCH64] = "ARM 64-bit", + [UCS_CPU_MODEL_AMD_NAPLES] = "Naples", + [UCS_CPU_MODEL_AMD_ROME] = "Rome" +}; + +static const char* cpu_vendor_names[] = { + [UCS_CPU_VENDOR_UNKNOWN] = "unknown", + [UCS_CPU_VENDOR_INTEL] = "Intel", + [UCS_CPU_VENDOR_AMD] = "AMD", + [UCS_CPU_VENDOR_GENERIC_ARM] = "Generic ARM", + [UCS_CPU_VENDOR_GENERIC_PPC] = "Generic PPC" +}; + +static double measure_memcpy_bandwidth(size_t size) +{ + ucs_time_t start_time, end_time; + void *src, *dst; + double result = 0.0; + int iter; + + src = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (src == MAP_FAILED) { + goto out; + } + + dst = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (dst == MAP_FAILED) { + goto out_unmap_src; + } + + memset(dst, 0, size); + memset(src, 0, size); + memcpy(dst, src, size); + + iter = 0; + start_time = ucs_get_time(); + do { + ucs_memcpy_relaxed(dst, src, size); + end_time = ucs_get_time(); + ++iter; + } while (end_time < start_time + ucs_time_from_sec(0.5)); + + result = size * iter / ucs_time_to_sec(end_time - start_time); + + munmap(dst, size); +out_unmap_src: + munmap(src, size); +out: + return result; +} + +void print_sys_info() +{ + size_t size; + + printf("# Timer frequency: %.3f MHz\n", ucs_get_cpu_clocks_per_sec() / 1e6); + printf("# CPU vendor: %s\n", cpu_vendor_names[ucs_arch_get_cpu_vendor()]); + printf("# CPU model: %s\n", cpu_model_names[ucs_arch_get_cpu_model()]); + ucs_arch_print_memcpy_limits(&ucs_global_opts.arch); + printf("# Memcpy bandwidth:\n"); + for (size = 4096; size <= 256 * UCS_MBYTE; size *= 2) { + printf("# %10zu bytes: %.3f MB/s\n", size, + measure_memcpy_bandwidth(size) / UCS_MBYTE); + } +} diff --git a/src/tools/info/tl_info.c b/src/tools/info/tl_info.c new file mode 100644 index 0000000..eee0380 --- /dev/null +++ b/src/tools/info/tl_info.c @@ -0,0 +1,597 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucx_info.h" + +#include +#include +#include +#include +#include +#include + + +#define PRINT_CAP(_name, _cap_flags, _max) \ + if ((_cap_flags) & (UCT_IFACE_FLAG_##_name)) { \ + char *s = strduplower(#_name); \ + printf("# %15s: %s\n", s, size_limit_to_str(0, _max)); \ + free(s); \ + } + +#define PRINT_ZCAP_NO_CHECK(_name, _min, _max, _max_iov) \ + { \ + char *s = strduplower(#_name); \ + printf("# %15s: %s, up to %zu iov\n", s, \ + size_limit_to_str((_min), (_max)), (_max_iov)); \ + free(s); \ + } + +#define PRINT_ZCAP(_name, _cap_flags, _min, _max, _max_iov) \ + if ((_cap_flags) & (UCT_IFACE_FLAG_##_name)) { \ + PRINT_ZCAP_NO_CHECK(_name, _min, _max, _max_iov) \ + } + +#define PRINT_ATOMIC_POST(_name, _cap) \ + print_atomic_info(UCT_ATOMIC_OP_##_name, #_name, "", \ + _cap.atomic32.op_flags, _cap.atomic64.op_flags); + +#define PRINT_ATOMIC_FETCH(_name, _cap, _suffix) \ + print_atomic_info(UCT_ATOMIC_OP_##_name, #_name, _suffix, \ + _cap.atomic32.fop_flags, _cap.atomic64.fop_flags); + +static char *strduplower(const char *str) +{ + char *s, *p; + + s = strdup(str); + for (p = s; *p; ++p) { + *p = tolower(*p); + } + return s; +} + +static void print_atomic_info(uct_atomic_op_t opcode, const char *name, + const char *suffix, uint64_t op32, uint64_t op64) +{ + char amo[256] = "atomic_"; + char *s; + + if ((op32 & UCS_BIT(opcode)) || (op64 & UCS_BIT(opcode))) { + s = strduplower(name); + strncat(amo, suffix, sizeof(amo) - strlen(amo) - 1); + strncat(amo, s, sizeof(amo) - strlen(amo) - 1); + free(s); + + if ((op32 & UCS_BIT(opcode)) && (op64 & UCS_BIT(opcode))) { + printf("# %12s: 32, 64 bit\n", amo); + } else { + printf("# %12s: %d bit\n", amo, + (op32 & UCS_BIT(opcode)) ? 32 : 64); + } + } +} + +static const char *size_limit_to_str(size_t min_size, size_t max_size) +{ + static char buf[128]; + char *ptr, *end; + + ptr = buf; + end = buf + sizeof(buf); + + if ((min_size == 0) && (max_size == SIZE_MAX)) { + snprintf(ptr, end - ptr, "unlimited"); + } else { + if (min_size == 0) { + snprintf(ptr, end - ptr, "<= "); + ptr += strlen(ptr); + } else { + ucs_memunits_to_str(min_size, ptr, end - ptr); + ptr += strlen(ptr); + + snprintf(ptr, end - ptr, ".."); + ptr += strlen(ptr); + } + ucs_memunits_to_str(max_size, ptr, end - ptr); + } + + return buf; +} + +static void print_iface_info(uct_worker_h worker, uct_md_h md, + uct_tl_resource_desc_t *resource) +{ + uct_iface_config_t *iface_config; + uct_iface_attr_t iface_attr; + ucs_status_t status; + uct_iface_h iface; + char buf[200] = {0}; + uct_iface_params_t iface_params = { + .field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_DEVICE | + UCT_IFACE_PARAM_FIELD_STATS_ROOT | + UCT_IFACE_PARAM_FIELD_RX_HEADROOM | + UCT_IFACE_PARAM_FIELD_CPU_MASK, + .open_mode = UCT_IFACE_OPEN_MODE_DEVICE, + .mode.device.tl_name = resource->tl_name, + .mode.device.dev_name = resource->dev_name, + .stats_root = ucs_stats_get_root(), + .rx_headroom = 0 + }; + + UCS_CPU_ZERO(&iface_params.cpu_mask); + status = uct_md_iface_config_read(md, resource->tl_name, NULL, NULL, &iface_config); + if (status != UCS_OK) { + return; + } + + printf("# Transport: %s\n", resource->tl_name); + printf("# Device: %s\n", resource->dev_name); + + status = uct_iface_open(md, worker, &iface_params, iface_config, &iface); + uct_config_release(iface_config); + + if (status != UCS_OK) { + printf("# < failed to open interface >\n"); + /* coverity[leaked_storage] */ + return; + } + + printf("#\n"); + printf("# capabilities:\n"); + status = uct_iface_query(iface, &iface_attr); + if (status != UCS_OK) { + printf("# < failed to query interface >\n"); + } else { + printf("# bandwidth: %-.2f/ppn + %-.2f MB/sec\n", + iface_attr.bandwidth.shared / UCS_MBYTE, + iface_attr.bandwidth.dedicated / UCS_MBYTE); + printf("# latency: %-.0f nsec", iface_attr.latency.overhead * 1e9); + if (iface_attr.latency.growth > 0) { + printf(" + %.0f * N\n", iface_attr.latency.growth * 1e9); + } else { + printf("\n"); + } + printf("# overhead: %-.0f nsec\n", iface_attr.overhead * 1e9); + + PRINT_CAP(PUT_SHORT, iface_attr.cap.flags, iface_attr.cap.put.max_short); + PRINT_CAP(PUT_BCOPY, iface_attr.cap.flags, iface_attr.cap.put.max_bcopy); + PRINT_ZCAP(PUT_ZCOPY, iface_attr.cap.flags, iface_attr.cap.put.min_zcopy, + iface_attr.cap.put.max_zcopy, iface_attr.cap.put.max_iov); + + if (iface_attr.cap.flags & UCT_IFACE_FLAG_PUT_ZCOPY) { + printf("# put_opt_zcopy_align: %s\n", + size_limit_to_str(0, iface_attr.cap.put.opt_zcopy_align)); + printf("# put_align_mtu: %s\n", + size_limit_to_str(0, iface_attr.cap.put.align_mtu)); + } + + PRINT_CAP(GET_SHORT, iface_attr.cap.flags, iface_attr.cap.get.max_short); + PRINT_CAP(GET_BCOPY, iface_attr.cap.flags, iface_attr.cap.get.max_bcopy); + PRINT_ZCAP(GET_ZCOPY, iface_attr.cap.flags, iface_attr.cap.get.min_zcopy, + iface_attr.cap.get.max_zcopy, iface_attr.cap.get.max_iov); + if (iface_attr.cap.flags & UCT_IFACE_FLAG_GET_ZCOPY) { + printf("# get_opt_zcopy_align: %s\n", + size_limit_to_str(0, iface_attr.cap.get.opt_zcopy_align)); + printf("# get_align_mtu: %s\n", + size_limit_to_str(0, iface_attr.cap.get.align_mtu)); + } + + PRINT_CAP(AM_SHORT, iface_attr.cap.flags, iface_attr.cap.am.max_short); + PRINT_CAP(AM_BCOPY, iface_attr.cap.flags, iface_attr.cap.am.max_bcopy); + PRINT_ZCAP(AM_ZCOPY, iface_attr.cap.flags, iface_attr.cap.am.min_zcopy, + iface_attr.cap.am.max_zcopy, iface_attr.cap.am.max_iov); + if (iface_attr.cap.flags & UCT_IFACE_FLAG_AM_ZCOPY) { + printf("# am_opt_zcopy_align: %s\n", + size_limit_to_str(0, iface_attr.cap.am.opt_zcopy_align)); + printf("# am_align_mtu: %s\n", + size_limit_to_str(0, iface_attr.cap.am.align_mtu)); + printf("# am header: %s\n", + size_limit_to_str(0, iface_attr.cap.am.max_hdr)); + } + + PRINT_CAP(TAG_EAGER_SHORT, iface_attr.cap.flags, + iface_attr.cap.tag.eager.max_short); + PRINT_CAP(TAG_EAGER_BCOPY, iface_attr.cap.flags, + iface_attr.cap.tag.eager.max_bcopy); + PRINT_ZCAP(TAG_EAGER_ZCOPY, iface_attr.cap.flags, 0, + iface_attr.cap.tag.eager.max_zcopy, + iface_attr.cap.tag.eager.max_iov); + + if (iface_attr.cap.flags & UCT_IFACE_FLAG_TAG_RNDV_ZCOPY) { + PRINT_ZCAP_NO_CHECK(TAG_RNDV_ZCOPY, 0, + iface_attr.cap.tag.rndv.max_zcopy, + iface_attr.cap.tag.rndv.max_iov); + printf("# rndv private header: %s\n", + size_limit_to_str(0, iface_attr.cap.tag.rndv.max_hdr)); + } + + if (iface_attr.cap.flags & (UCT_IFACE_FLAG_TAG_EAGER_SHORT | + UCT_IFACE_FLAG_TAG_EAGER_BCOPY | + UCT_IFACE_FLAG_TAG_EAGER_ZCOPY | + UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)) { + PRINT_ZCAP_NO_CHECK(TAG_RECV, iface_attr.cap.tag.recv.min_recv, + iface_attr.cap.tag.recv.max_zcopy, + iface_attr.cap.tag.recv.max_iov); + printf("# tag_max_outstanding: %s\n", + size_limit_to_str(0, iface_attr.cap.tag.recv.max_outstanding)); + } + + if (iface_attr.cap.atomic32.op_flags || + iface_attr.cap.atomic64.op_flags || + iface_attr.cap.atomic32.fop_flags || + iface_attr.cap.atomic64.fop_flags) { + if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_DEVICE) { + printf("# domain: device\n"); + } + if (iface_attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_CPU) { + printf("# domain: cpu\n"); + } + + PRINT_ATOMIC_POST(ADD, iface_attr.cap); + PRINT_ATOMIC_POST(AND, iface_attr.cap); + PRINT_ATOMIC_POST(OR, iface_attr.cap); + PRINT_ATOMIC_POST(XOR, iface_attr.cap); + + PRINT_ATOMIC_FETCH(ADD, iface_attr.cap, "f"); + PRINT_ATOMIC_FETCH(AND, iface_attr.cap, "f"); + PRINT_ATOMIC_FETCH(OR, iface_attr.cap, "f"); + PRINT_ATOMIC_FETCH(XOR, iface_attr.cap, "f"); + PRINT_ATOMIC_FETCH(SWAP , iface_attr.cap, ""); + PRINT_ATOMIC_FETCH(CSWAP, iface_attr.cap, ""); + } + + buf[0] = '\0'; + if (iface_attr.cap.flags & (UCT_IFACE_FLAG_CONNECT_TO_EP | + UCT_IFACE_FLAG_CONNECT_TO_IFACE)) { + if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { + strncat(buf, " to ep,", sizeof(buf) - strlen(buf) - 1); + } + if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + strncat(buf, " to iface,", sizeof(buf) - strlen(buf) - 1); + } + buf[strlen(buf) - 1] = '\0'; + } else { + strncat(buf, " none", sizeof(buf) - strlen(buf) - 1); + } + printf("# connection:%s\n", buf); + + printf("# priority: %d\n", iface_attr.priority); + + printf("# device address: %zu bytes\n", iface_attr.device_addr_len); + if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + printf("# iface address: %zu bytes\n", iface_attr.iface_addr_len); + } + if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { + printf("# ep address: %zu bytes\n", iface_attr.ep_addr_len); + } + + buf[0] = '\0'; + if (iface_attr.cap.flags & (UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF | + UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF | + UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF | + UCT_IFACE_FLAG_ERRHANDLE_AM_ID | + UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM | + UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { + + if (iface_attr.cap.flags & (UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF | + UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF | + UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF)) { + strncat(buf, " buffer (", sizeof(buf) - strlen(buf) - 1); + if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF) { + strncat(buf, "short,", sizeof(buf) - strlen(buf) - 1); + } + if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF) { + strncat(buf, "bcopy,", sizeof(buf) - strlen(buf) - 1); + } + if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF) { + strncat(buf, "zcopy,", sizeof(buf) - strlen(buf) - 1); + } + buf[strlen(buf) - 1] = '\0'; + strncat(buf, "),", sizeof(buf) - strlen(buf) - 1); + } + if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_AM_ID) { + strncat(buf, " active-message id,", sizeof(buf) - strlen(buf) - 1); + } + if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM) { + strncat(buf, " remote access,", sizeof(buf) - strlen(buf) - 1); + } + if (iface_attr.cap.flags & UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE) { + strncat(buf, " peer failure,", sizeof(buf) - strlen(buf) - 1); + } + buf[strlen(buf) - 1] = '\0'; + } else { + strncat(buf, " none", sizeof(buf) - strlen(buf) - 1); + } + printf("# error handling:%s\n", buf); + } + + uct_iface_close(iface); + printf("#\n"); +} + +static ucs_status_t print_tl_info(uct_md_h md, const char *tl_name, + uct_tl_resource_desc_t *resources, + unsigned num_resources, + int print_opts, + ucs_config_print_flags_t print_flags) +{ + ucs_async_context_t async; + uct_worker_h worker; + ucs_status_t status; + unsigned i; + + status = ucs_async_context_init(&async, UCS_ASYNC_THREAD_LOCK_TYPE); + if (status != UCS_OK) { + return status; + } + + /* coverity[alloc_arg] */ + status = uct_worker_create(&async, UCS_THREAD_MODE_SINGLE, &worker); + if (status != UCS_OK) { + goto out; + } + + printf("#\n"); + + if (num_resources == 0) { + printf("# (No supported devices found)\n"); + } + for (i = 0; i < num_resources; ++i) { + ucs_assert(!strcmp(tl_name, resources[i].tl_name)); + print_iface_info(worker, md, &resources[i]); + } + + uct_worker_destroy(worker); +out: + ucs_async_context_cleanup(&async); + return status; +} + +static void print_md_info(uct_component_h component, + const uct_component_attr_t *component_attr, + const char *md_name, int print_opts, + ucs_config_print_flags_t print_flags, + const char *req_tl_name) +{ + uct_tl_resource_desc_t *resources, tmp; + unsigned resource_index, j, num_resources, count; + ucs_status_t status; + const char *tl_name; + uct_md_config_t *md_config; + uct_md_attr_t md_attr; + uct_md_h md; + + status = uct_md_config_read(component, NULL, NULL, &md_config); + if (status != UCS_OK) { + goto out; + } + + status = uct_md_open(component, md_name, md_config, &md); + uct_config_release(md_config); + if (status != UCS_OK) { + printf("# < failed to open memory domain %s >\n", md_name); + goto out; + } + + status = uct_md_query_tl_resources(md, &resources, &num_resources); + if (status != UCS_OK) { + printf("# < failed to query memory domain resources >\n"); + goto out_close_md; + } + + if (req_tl_name != NULL) { + resource_index = 0; + while (resource_index < num_resources) { + if (!strcmp(resources[resource_index].tl_name, req_tl_name)) { + break; + } + ++resource_index; + } + if (resource_index == num_resources) { + /* no selected transport on the MD */ + goto out_free_list; + } + } + + status = uct_md_query(md, &md_attr); + if (status != UCS_OK) { + printf("# < failed to query memory domain >\n"); + goto out_free_list; + } else { + printf("#\n"); + printf("# Memory domain: %s\n", md_name); + printf("# Component: %s\n", component_attr->name); + if (md_attr.cap.flags & UCT_MD_FLAG_ALLOC) { + printf("# allocate: %s\n", + size_limit_to_str(0, md_attr.cap.max_alloc)); + } + if (md_attr.cap.flags & UCT_MD_FLAG_REG) { + printf("# register: %s, cost: %.0f", + size_limit_to_str(0, md_attr.cap.max_reg), + md_attr.reg_cost.overhead * 1e9); + if (md_attr.reg_cost.growth * 1e9 > 1e-3) { + printf("+(%.3f*)", md_attr.reg_cost.growth * 1e9); + } + printf(" nsec\n"); + } + if (md_attr.cap.flags & UCT_MD_FLAG_NEED_RKEY) { + printf("# remote key: %zu bytes\n", md_attr.rkey_packed_size); + } + if (md_attr.cap.flags & UCT_MD_FLAG_NEED_MEMH) { + printf("# local memory handle is required for zcopy\n"); + } + if (md_attr.cap.flags & UCT_MD_FLAG_RKEY_PTR) { + printf("# rkey_ptr is supported\n"); + } + if (md_attr.cap.flags & UCT_MD_FLAG_SOCKADDR) { + printf("# supports client-server connection establishment via sockaddr\n"); + } + } + + if (num_resources == 0) { + printf("# < no supported devices found >\n"); + goto out_free_list; + } + + resource_index = 0; + while (resource_index < num_resources) { + /* Gather all resources for this transport */ + tl_name = resources[resource_index].tl_name; + count = 1; + for (j = resource_index + 1; j < num_resources; ++j) { + if (!strcmp(tl_name, resources[j].tl_name)) { + tmp = resources[count + resource_index]; + resources[count + resource_index] = resources[j]; + resources[j] = tmp; + ++count; + } + } + + if ((req_tl_name == NULL) || !strcmp(tl_name, req_tl_name)) { + print_tl_info(md, tl_name, &resources[resource_index], count, + print_opts, print_flags); + } + + resource_index += count; + } + +out_free_list: + uct_release_tl_resource_list(resources); +out_close_md: + uct_md_close(md); +out: + ; +} + +static void print_cm_attr(uct_worker_h worker, uct_component_h component, + const char *comp_name) +{ + uct_cm_config_t *cm_config; + uct_cm_attr_t cm_attr; + ucs_status_t status; + uct_cm_h cm; + + status = uct_cm_config_read(component, NULL, NULL, &cm_config); + if (status != UCS_OK) { + printf("# < failed to read the %s connection manager configuration >\n", + comp_name); + return; + } + + status = uct_cm_open(component, worker, cm_config, &cm); + uct_config_release(cm_config); + if (status != UCS_OK) { + printf("# < failed to open connection manager %s >\n", comp_name); + /* coverity[leaked_storage] */ + return; + } + + cm_attr.field_mask = UCT_CM_ATTR_FIELD_MAX_CONN_PRIV; + status = uct_cm_query(cm, &cm_attr); + if (status != UCS_OK) { + printf("# < failed to query connection manager >\n"); + } else { + printf("#\n"); + printf("# Connection manager: %s\n", comp_name); + printf("# max_conn_priv: %zu bytes\n", cm_attr.max_conn_priv); + } + + uct_cm_close(cm); +} + +static void print_cm_info(uct_component_h component, + const uct_component_attr_t *component_attr) +{ + ucs_async_context_t *async; + uct_worker_h worker; + ucs_status_t status; + + status = ucs_async_context_create(UCS_ASYNC_MODE_THREAD_SPINLOCK, &async); + if (status != UCS_OK) { + printf("# < failed to create asynchronous context >\n"); + return; + } + + status = uct_worker_create(async, UCS_THREAD_MODE_SINGLE, &worker); + if (status != UCS_OK) { + printf("# < failed to create uct worker >\n"); + goto out_async_ctx_destroy; + } + + print_cm_attr(worker, component, component_attr->name); + + uct_worker_destroy(worker); + +out_async_ctx_destroy: + ucs_async_context_destroy(async); +} + +static void print_uct_component_info(uct_component_h component, + int print_opts, + ucs_config_print_flags_t print_flags, + const char *req_tl_name) +{ + uct_component_attr_t component_attr; + ucs_status_t status; + unsigned i; + + component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME | + UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT | + UCT_COMPONENT_ATTR_FIELD_FLAGS; + status = uct_component_query(component, &component_attr); + if (status != UCS_OK) { + printf("# < failed to query component >\n"); + return; + } + + component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES; + component_attr.md_resources = alloca(sizeof(*component_attr.md_resources) * + component_attr.md_resource_count); + status = uct_component_query(component, &component_attr); + if (status != UCS_OK) { + printf("# < failed to query component md resources >\n"); + return; + } + + for (i = 0; i < component_attr.md_resource_count; ++i) { + print_md_info(component, &component_attr, + component_attr.md_resources[i].md_name, + print_opts, print_flags, req_tl_name); + } + + if (component_attr.flags & UCT_COMPONENT_FLAG_CM) { + print_cm_info(component, &component_attr); + } +} + +void print_uct_info(int print_opts, ucs_config_print_flags_t print_flags, + const char *req_tl_name) +{ + uct_component_h *components; + unsigned i, num_components; + ucs_status_t status; + + status = uct_query_components(&components, &num_components); + if (status != UCS_OK) { + printf("# < failed to query UCT components >\n"); + return; + } + + if (print_opts & PRINT_DEVICES) { + for (i = 0; i < num_components; ++i) { + print_uct_component_info(components[i], print_opts, print_flags, + req_tl_name); + } + } + + uct_release_component_list(components); +} + diff --git a/src/tools/info/type_info.c b/src/tools/info/type_info.c new file mode 100644 index 0000000..8d23a30 --- /dev/null +++ b/src/tools/info/type_info.c @@ -0,0 +1,273 @@ +/**_t +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucx_info.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_IB +# include +# include +#endif + +#if HAVE_TL_RC +# include +# include +# include +# if HAVE_MLX5_HW +# include +# endif +#endif + +#if HAVE_TL_DC +# include +# include +#endif + +#if HAVE_TL_UD +# include +# include +# if HAVE_MLX5_HW_UD +# include +# endif +#endif + + +#if HAVE_TL_UGNI +# include +# include +# include +# include +#endif + + +static void print_size(const char *name, size_t size) +{ + int i; + printf(" sizeof(%s)%n = ", name, &i); + while (i++ < 40) { + printf("."); + } + printf(" %-6lu\n", size); +} + +#define PRINT_SIZE(type) print_size(UCS_PP_QUOTE(type), sizeof(type)) + + +void print_type_info(const char * tl_name) +{ + if (tl_name == NULL) { + printf("UCS:\n"); + PRINT_SIZE(ucs_mpool_t); + PRINT_SIZE(ucs_mpool_chunk_t); + PRINT_SIZE(ucs_mpool_elem_t); + PRINT_SIZE(ucs_async_context_t); + PRINT_SIZE(ucs_async_handler_t); + PRINT_SIZE(ucs_async_ops_t); + PRINT_SIZE(ucs_async_pipe_t); + PRINT_SIZE(ucs_async_signal_context_t); + PRINT_SIZE(ucs_async_thread_context_t); + PRINT_SIZE(ucs_class_t); + PRINT_SIZE(ucs_config_field_t); + PRINT_SIZE(ucs_config_parser_t); + PRINT_SIZE(ucs_frag_list_t); + PRINT_SIZE(ucs_frag_list_elem_t); + PRINT_SIZE(ucs_frag_list_head_t); + PRINT_SIZE(ucs_ib_port_spec_t); + PRINT_SIZE(ucs_list_link_t); + PRINT_SIZE(ucs_memtrack_entry_t); + PRINT_SIZE(ucs_mpmc_queue_t); + PRINT_SIZE(ucs_callbackq_t); + PRINT_SIZE(ucs_callbackq_elem_t); + PRINT_SIZE(ucs_ptr_array_t); + PRINT_SIZE(ucs_queue_elem_t); + PRINT_SIZE(ucs_queue_head_t); + PRINT_SIZE(ucs_spinlock_t); + PRINT_SIZE(ucs_timer_t); + PRINT_SIZE(ucs_timer_queue_t); + PRINT_SIZE(ucs_twheel_t); + PRINT_SIZE(ucs_wtimer_t); + PRINT_SIZE(ucs_arbiter_t); + PRINT_SIZE(ucs_arbiter_group_t); + PRINT_SIZE(ucs_arbiter_elem_t); + PRINT_SIZE(ucs_pgtable_t); + PRINT_SIZE(ucs_pgt_entry_t); + PRINT_SIZE(ucs_pgt_dir_t); + PRINT_SIZE(ucs_pgt_region_t); + PRINT_SIZE(ucs_rcache_t); + PRINT_SIZE(ucs_rcache_region_t); + + printf("\nUCT:\n"); + PRINT_SIZE(uct_am_handler_t); + PRINT_SIZE(uct_base_iface_t); + PRINT_SIZE(uct_completion_t); + PRINT_SIZE(uct_ep_t); + PRINT_SIZE(uct_mem_h); + PRINT_SIZE(uct_rkey_t); + PRINT_SIZE(uct_iface_t); + PRINT_SIZE(uct_iface_attr_t); + PRINT_SIZE(uct_iface_config_t); + PRINT_SIZE(uct_iface_mpool_config_t); + PRINT_SIZE(uct_md_config_t); + PRINT_SIZE(uct_iface_ops_t); + PRINT_SIZE(uct_md_t); + PRINT_SIZE(uct_md_attr_t); + PRINT_SIZE(uct_md_ops_t); + PRINT_SIZE(uct_tl_resource_desc_t); + PRINT_SIZE(uct_rkey_bundle_t); + PRINT_SIZE(uct_tcp_ep_t); + PRINT_SIZE(uct_self_ep_t); + +#if HAVE_TL_UGNI + PRINT_SIZE(uct_sockaddr_ugni_t); + PRINT_SIZE(uct_sockaddr_smsg_ugni_t); + PRINT_SIZE(uct_devaddr_ugni_t); +#endif + +#if HAVE_IB + printf("\nIB:\n"); + PRINT_SIZE(uct_ib_address_t); + PRINT_SIZE(uct_ib_device_t); + PRINT_SIZE(uct_ib_md_t); + PRINT_SIZE(uct_ib_mem_t); + PRINT_SIZE(uct_ib_rcache_region_t); + PRINT_SIZE(uct_ib_iface_t); + PRINT_SIZE(uct_ib_iface_config_t); + PRINT_SIZE(uct_ib_iface_recv_desc_t); + PRINT_SIZE(uct_ib_recv_wr_t); +#endif + printf("\n"); + } + +#if HAVE_TL_RC + if (tl_name == NULL || !strcasecmp(tl_name, "rc_verbs") || + !strcasecmp(tl_name, "rc_mlx5")) + { + printf("RC:\n"); + PRINT_SIZE(uct_rc_am_short_hdr_t); + PRINT_SIZE(uct_rc_ep_t); + PRINT_SIZE(uct_rc_hdr_t); + PRINT_SIZE(uct_rc_iface_t); + PRINT_SIZE(uct_rc_iface_config_t); + PRINT_SIZE(uct_rc_iface_send_op_t); + PRINT_SIZE(uct_rc_iface_send_desc_t); + + PRINT_SIZE(uct_rc_iface_send_desc_t); + if (tl_name == NULL || !strcasecmp(tl_name, "rc_verbs")) { + PRINT_SIZE(uct_rc_verbs_ep_t); + PRINT_SIZE(uct_rc_verbs_iface_config_t); + PRINT_SIZE(uct_rc_verbs_iface_t); + } + +#if HAVE_MLX5_HW + if (tl_name == NULL || !strcasecmp(tl_name, "rc_mlx5")) { + PRINT_SIZE(uct_rc_mlx5_am_short_hdr_t); + PRINT_SIZE(uct_rc_mlx5_ep_t); + PRINT_SIZE(uct_rc_mlx5_hdr_t); + PRINT_SIZE(uct_rc_mlx5_iface_common_config_t); + PRINT_SIZE(uct_rc_mlx5_iface_common_t); + } +#endif + printf("\n"); + } +#endif + +#if HAVE_TL_DC + if (tl_name == NULL || !strcasecmp(tl_name, "dc_mlx5")) + { + printf("DC:\n"); + PRINT_SIZE(uct_dc_mlx5_ep_t); + PRINT_SIZE(uct_dc_mlx5_iface_t); + PRINT_SIZE(uct_dc_mlx5_iface_config_t); + printf("\n"); + } +#endif + +#if HAVE_TL_UD + if (tl_name == NULL || !strcasecmp(tl_name, "ud_verbs") || + !strcasecmp(tl_name, "ud_mlx5")) + { + printf("UD:\n"); + PRINT_SIZE(uct_ud_ep_t); + PRINT_SIZE(uct_ud_neth_t); + PRINT_SIZE(uct_ud_iface_t); + PRINT_SIZE(uct_ud_iface_config_t); + PRINT_SIZE(uct_ud_ep_pending_op_t); + PRINT_SIZE(uct_ud_send_skb_t); + PRINT_SIZE(uct_ud_send_skb_inl_t); + PRINT_SIZE(uct_ud_recv_skb_t); + + PRINT_SIZE(uct_rc_iface_send_desc_t); + if (tl_name == NULL || !strcasecmp(tl_name, "ud_verbs")) { + PRINT_SIZE(uct_ud_verbs_ep_t); + PRINT_SIZE(uct_ud_verbs_iface_t); + } + +#if HAVE_MLX5_HW_UD + if (tl_name == NULL || !strcasecmp(tl_name, "ud_mlx5")) { + PRINT_SIZE(uct_ud_mlx5_ep_t); + PRINT_SIZE(uct_ud_mlx5_iface_t); + } +#endif + printf("\n"); + } +#endif + +#if HAVE_TL_UGNI + if (tl_name == NULL || !strcasecmp(tl_name, "ugni")) { + printf("UGNI:\n"); + PRINT_SIZE(uct_ugni_device_t); + PRINT_SIZE(uct_ugni_ep_t); + PRINT_SIZE(uct_ugni_iface_t); + PRINT_SIZE(uct_ugni_md_t); + PRINT_SIZE(uct_ugni_compact_smsg_attr_t); + + printf("\n"); + } +#endif + + printf("\nUCP:\n"); + PRINT_SIZE(ucp_context_t); + PRINT_SIZE(ucp_worker_t); + PRINT_SIZE(ucp_ep_t); + PRINT_SIZE(ucp_ep_ext_gen_t); + PRINT_SIZE(ucp_ep_ext_proto_t); + PRINT_SIZE(ucp_ep_match_entry_t); + PRINT_SIZE(ucp_ep_config_key_t); + PRINT_SIZE(ucp_ep_config_t); + PRINT_SIZE(ucp_request_t); + PRINT_SIZE(ucp_recv_desc_t); + PRINT_SIZE(ucp_tag_recv_info_t); + PRINT_SIZE(ucp_mem_t); + PRINT_SIZE(ucp_rkey_t); + PRINT_SIZE(ucp_wireup_msg_t); + +} diff --git a/src/tools/info/ucx_info.c b/src/tools/info/ucx_info.c new file mode 100644 index 0000000..b70c4be --- /dev/null +++ b/src/tools/info/ucx_info.c @@ -0,0 +1,219 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucx_info.h" + +#include +#include +#include +#include +#include +#include +#include + + +static void usage() { + printf("Usage: ucx_info [options]\n"); + printf("At least one of the following options has to be set:\n"); + printf(" -v Show version information\n"); + printf(" -d Show devices and transports\n"); + printf(" -b Show build configuration\n"); + printf(" -y Show type and structures information\n"); + printf(" -s Show system information\n"); + printf(" -c Show UCX configuration\n"); + printf(" -a Show also hidden configuration\n"); + printf(" -f Display fully decorated output\n"); + printf("\nUCP information (-u is required):\n"); + printf(" -p Show UCP context information\n"); + printf(" -w Show UCP worker information\n"); + printf(" -e Show UCP endpoint configuration\n"); + printf(" -m Show UCP memory allocation method for a given size\n"); + printf(" -u UCP context features to use. String of one or more of:\n"); + printf(" 'a' : atomic operations\n"); + printf(" 'r' : remote memory access\n"); + printf(" 't' : tag matching \n"); + printf(" 'w' : wakeup\n"); + printf(" Modifiers to use in combination with above features:\n"); + printf(" 'e' : error handling\n"); + printf("\nOther settings:\n"); + printf(" -t Filter devices information using specified transport (requires -d)\n"); + printf(" -n Estimated UCP endpoint count (for ucp_init)\n"); + printf(" -N Estimated UCP endpoint count per node (for ucp_init)\n"); + printf(" -D Set which device types to use when creating UCP context:\n"); + printf(" 'all' : all possible devices (default)\n"); + printf(" 'shm' : shared memory devices only\n"); + printf(" 'net' : network devices only\n"); + printf(" 'self' : self transport only\n"); + printf(" -h Show this help message\n"); + printf("\n"); +} + +int main(int argc, char **argv) +{ + ucs_config_print_flags_t print_flags; + ucp_ep_params_t ucp_ep_params; + unsigned dev_type_bitmap; + uint64_t ucp_features; + size_t ucp_num_eps; + size_t ucp_num_ppn; + unsigned print_opts; + char *tl_name, *mem_size; + const char *f; + int c; + + print_opts = 0; + print_flags = (ucs_config_print_flags_t)0; + tl_name = NULL; + ucp_features = 0; + ucp_num_eps = 1; + ucp_num_ppn = 1; + mem_size = NULL; + dev_type_bitmap = UINT_MAX; + ucp_ep_params.field_mask = 0; + while ((c = getopt(argc, argv, "fahvcydbswpet:n:u:D:m:N:")) != -1) { + switch (c) { + case 'f': + print_flags |= UCS_CONFIG_PRINT_CONFIG | UCS_CONFIG_PRINT_HEADER | UCS_CONFIG_PRINT_DOC; + break; + case 'a': + print_flags |= UCS_CONFIG_PRINT_HIDDEN; + break; + case 'c': + print_flags |= UCS_CONFIG_PRINT_CONFIG; + break; + case 'v': + print_opts |= PRINT_VERSION; + break; + case 'd': + print_opts |= PRINT_DEVICES; + break; + case 'b': + print_opts |= PRINT_BUILD_CONFIG; + break; + case 'y': + print_opts |= PRINT_TYPES; + break; + case 's': + print_opts |= PRINT_SYS_INFO; + break; + case 'p': + print_opts |= PRINT_UCP_CONTEXT; + break; + case 'w': + print_opts |= PRINT_UCP_WORKER; + break; + case 'e': + print_opts |= PRINT_UCP_EP; + break; + case 'm': + print_opts |= PRINT_MEM_MAP; + mem_size = optarg; + break; + case 't': + tl_name = optarg; + break; + case 'n': + ucp_num_eps = atol(optarg); + break; + case 'N': + ucp_num_ppn = atol(optarg); + break; + case 'u': + for (f = optarg; *f; ++f) { + switch (*f) { + case 'a': + ucp_features |= UCP_FEATURE_AMO32|UCP_FEATURE_AMO64; + break; + case 'r': + ucp_features |= UCP_FEATURE_RMA; + break; + case 't': + ucp_features |= UCP_FEATURE_TAG; + break; + case 'w': + ucp_features |= UCP_FEATURE_WAKEUP; + break; + case 'e': + ucp_ep_params.field_mask |= UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE; + ucp_ep_params.err_mode = UCP_ERR_HANDLING_MODE_PEER; + break; + default: + usage(); + return -1; + } + } + break; + case 'D': + if (!strcasecmp(optarg, "net")) { + dev_type_bitmap = UCS_BIT(UCT_DEVICE_TYPE_NET); + } else if (!strcasecmp(optarg, "shm")) { + dev_type_bitmap = UCS_BIT(UCT_DEVICE_TYPE_SHM); + } else if (!strcasecmp(optarg, "self")) { + dev_type_bitmap = UCS_BIT(UCT_DEVICE_TYPE_SELF); + } else if (!strcasecmp(optarg, "all")) { + dev_type_bitmap = UINT_MAX; + } else { + usage(); + return -1; + } + break; + case 'h': + usage(); + return 0; + default: + usage(); + return -1; + } + } + + if ((print_opts == 0) && (print_flags == 0)) { + usage(); + return -2; + } + + if (print_opts & PRINT_VERSION) { + print_version(); + } + + if (print_opts & PRINT_SYS_INFO) { + print_sys_info(); + } + + if (print_opts & PRINT_BUILD_CONFIG) { + print_build_config(); + } + + if (print_opts & PRINT_TYPES) { + print_type_info(tl_name); + } + + if ((print_opts & PRINT_DEVICES) || (print_flags & UCS_CONFIG_PRINT_CONFIG)) { + /* if UCS_CONFIG_PRINT_CONFIG is ON, trigger loading UCT modules by + * calling print_uct_info()->uct_component_query() + */ + print_uct_info(print_opts, print_flags, tl_name); + } + + if (print_flags & UCS_CONFIG_PRINT_CONFIG) { + ucs_config_parser_print_all_opts(stdout, print_flags); + } + + if (print_opts & (PRINT_UCP_CONTEXT|PRINT_UCP_WORKER|PRINT_UCP_EP|PRINT_MEM_MAP)) { + if (ucp_features == 0) { + printf("Please select UCP features using -u switch: a|r|t|w\n"); + usage(); + return -1; + } + print_ucp_info(print_opts, print_flags, ucp_features, &ucp_ep_params, + ucp_num_eps, ucp_num_ppn, dev_type_bitmap, mem_size); + } + + return 0; +} diff --git a/src/tools/info/ucx_info.h b/src/tools/info/ucx_info.h new file mode 100644 index 0000000..037de53 --- /dev/null +++ b/src/tools/info/ucx_info.h @@ -0,0 +1,43 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCX_INFO_H +#define UCX_INFO_H + +#include +#include + + +enum { + PRINT_VERSION = UCS_BIT(0), + PRINT_SYS_INFO = UCS_BIT(1), + PRINT_BUILD_CONFIG = UCS_BIT(2), + PRINT_TYPES = UCS_BIT(3), + PRINT_DEVICES = UCS_BIT(4), + PRINT_UCP_CONTEXT = UCS_BIT(5), + PRINT_UCP_WORKER = UCS_BIT(6), + PRINT_UCP_EP = UCS_BIT(7), + PRINT_MEM_MAP = UCS_BIT(8) +}; + + +void print_version(); + +void print_sys_info(); + +void print_build_config(); + +void print_uct_info(int print_opts, ucs_config_print_flags_t print_flags, + const char *req_tl_name); + +void print_type_info(const char * tl_name); + +void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags, + uint64_t ctx_features, const ucp_ep_params_t *base_ep_params, + size_t estimated_num_eps, size_t estimated_num_ppn, + unsigned dev_type_bitmap, const char *mem_size); + +#endif diff --git a/src/tools/perf/Makefile.am b/src/tools/perf/Makefile.am new file mode 100644 index 0000000..bc68f81 --- /dev/null +++ b/src/tools/perf/Makefile.am @@ -0,0 +1,57 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and The University +# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +SUBDIRS = cuda rocm lib +CC = $(UCX_PERFTEST_CC) + +noinst_HEADERS = api/libperf.h +bin_PROGRAMS = ucx_perftest +ucx_perftest_SOURCES = perftest.c +ucx_perftest_CPPFLAGS = $(BASE_CPPFLAGS) $(RTE_CPPFLAGS) +ucx_perftest_CFLAGS = $(BASE_CFLAGS) $(OPENMP_CFLAGS) +ucx_perftest_LDFLAGS = $(RTE_LDFLAGS) -Wl,-dynamic-list-data +ucx_perftest_LDADD = \ + $(abs_top_builddir)/src/uct/libuct.la \ + $(abs_top_builddir)/src/ucp/libucp.la \ + $(abs_top_builddir)/src/ucs/libucs.la \ + lib/libucxperf.la + +perftestdir = $(pkgdatadir)/perftest +dist_perftest_DATA = \ + $(top_srcdir)/contrib/ucx_perftest_config/msg_pow2 \ + $(top_srcdir)/contrib/ucx_perftest_config/msg_pow2_large \ + $(top_srcdir)/contrib/ucx_perftest_config/README \ + $(top_srcdir)/contrib/ucx_perftest_config/test_types_uct \ + $(top_srcdir)/contrib/ucx_perftest_config/test_types_ucp \ + $(top_srcdir)/contrib/ucx_perftest_config/transports + +if HAVE_MPIRUN +.PHONY: ucx test help + +MPI_EXTRA = +MPI_ARGS = -n 2 -map-by node -mca pml ob1 -mca btl self,tcp,sm $(MPI_EXTRA) + +ucx: + $(MAKE) -C $(top_builddir) + +test: ucx ucx_perftest + $(MPIRUN) $(MPI_ARGS) $(abs_builddir)/ucx_perftest$(EXEEXT) $(TEST_ARGS) + +help: + @echo + @echo "Targets:" + @echo " test : Run performance test." + @echo + @echo "Environment variables:" + @echo " MPI_ARGS : Arguments for mpirun (\"$(MPI_ARGS)\")" + @echo " TEST_ARGS : Arguments for performance test (\"$(TEST_ARGS)\")" + @echo + +endif diff --git a/src/tools/perf/Makefile.in b/src/tools/perf/Makefile.in new file mode 100644 index 0000000..91fc71c --- /dev/null +++ b/src/tools/perf/Makefile.in @@ -0,0 +1,1018 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and The University +# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +bin_PROGRAMS = ucx_perftest$(EXEEXT) +subdir = src/tools/perf +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(dist_perftest_DATA) \ + $(noinst_HEADERS) $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(perftestdir)" +PROGRAMS = $(bin_PROGRAMS) +am_ucx_perftest_OBJECTS = ucx_perftest-perftest.$(OBJEXT) +ucx_perftest_OBJECTS = $(am_ucx_perftest_OBJECTS) +ucx_perftest_DEPENDENCIES = $(abs_top_builddir)/src/uct/libuct.la \ + $(abs_top_builddir)/src/ucp/libucp.la \ + $(abs_top_builddir)/src/ucs/libucs.la lib/libucxperf.la +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +ucx_perftest_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(ucx_perftest_CFLAGS) \ + $(CFLAGS) $(ucx_perftest_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/ucx_perftest-perftest.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(ucx_perftest_SOURCES) +DIST_SOURCES = $(ucx_perftest_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +DATA = $(dist_perftest_DATA) +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = $(UCX_PERFTEST_CC) +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +SUBDIRS = cuda rocm lib +noinst_HEADERS = api/libperf.h +ucx_perftest_SOURCES = perftest.c +ucx_perftest_CPPFLAGS = $(BASE_CPPFLAGS) $(RTE_CPPFLAGS) +ucx_perftest_CFLAGS = $(BASE_CFLAGS) $(OPENMP_CFLAGS) +ucx_perftest_LDFLAGS = $(RTE_LDFLAGS) -Wl,-dynamic-list-data +ucx_perftest_LDADD = \ + $(abs_top_builddir)/src/uct/libuct.la \ + $(abs_top_builddir)/src/ucp/libucp.la \ + $(abs_top_builddir)/src/ucs/libucs.la \ + lib/libucxperf.la + +perftestdir = $(pkgdatadir)/perftest +dist_perftest_DATA = \ + $(top_srcdir)/contrib/ucx_perftest_config/msg_pow2 \ + $(top_srcdir)/contrib/ucx_perftest_config/msg_pow2_large \ + $(top_srcdir)/contrib/ucx_perftest_config/README \ + $(top_srcdir)/contrib/ucx_perftest_config/test_types_uct \ + $(top_srcdir)/contrib/ucx_perftest_config/test_types_ucp \ + $(top_srcdir)/contrib/ucx_perftest_config/transports + +@HAVE_MPIRUN_TRUE@MPI_EXTRA = +@HAVE_MPIRUN_TRUE@MPI_ARGS = -n 2 -map-by node -mca pml ob1 -mca btl self,tcp,sm $(MPI_EXTRA) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/tools/perf/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/tools/perf/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +ucx_perftest$(EXEEXT): $(ucx_perftest_OBJECTS) $(ucx_perftest_DEPENDENCIES) $(EXTRA_ucx_perftest_DEPENDENCIES) + @rm -f ucx_perftest$(EXEEXT) + $(AM_V_CCLD)$(ucx_perftest_LINK) $(ucx_perftest_OBJECTS) $(ucx_perftest_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucx_perftest-perftest.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +ucx_perftest-perftest.o: perftest.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_perftest_CPPFLAGS) $(CPPFLAGS) $(ucx_perftest_CFLAGS) $(CFLAGS) -MT ucx_perftest-perftest.o -MD -MP -MF $(DEPDIR)/ucx_perftest-perftest.Tpo -c -o ucx_perftest-perftest.o `test -f 'perftest.c' || echo '$(srcdir)/'`perftest.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_perftest-perftest.Tpo $(DEPDIR)/ucx_perftest-perftest.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='perftest.c' object='ucx_perftest-perftest.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_perftest_CPPFLAGS) $(CPPFLAGS) $(ucx_perftest_CFLAGS) $(CFLAGS) -c -o ucx_perftest-perftest.o `test -f 'perftest.c' || echo '$(srcdir)/'`perftest.c + +ucx_perftest-perftest.obj: perftest.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_perftest_CPPFLAGS) $(CPPFLAGS) $(ucx_perftest_CFLAGS) $(CFLAGS) -MT ucx_perftest-perftest.obj -MD -MP -MF $(DEPDIR)/ucx_perftest-perftest.Tpo -c -o ucx_perftest-perftest.obj `if test -f 'perftest.c'; then $(CYGPATH_W) 'perftest.c'; else $(CYGPATH_W) '$(srcdir)/perftest.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_perftest-perftest.Tpo $(DEPDIR)/ucx_perftest-perftest.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='perftest.c' object='ucx_perftest-perftest.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_perftest_CPPFLAGS) $(CPPFLAGS) $(ucx_perftest_CFLAGS) $(CFLAGS) -c -o ucx_perftest-perftest.obj `if test -f 'perftest.c'; then $(CYGPATH_W) 'perftest.c'; else $(CYGPATH_W) '$(srcdir)/perftest.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-dist_perftestDATA: $(dist_perftest_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_perftest_DATA)'; test -n "$(perftestdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(perftestdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(perftestdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(perftestdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(perftestdir)" || exit $$?; \ + done + +uninstall-dist_perftestDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_perftest_DATA)'; test -n "$(perftestdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(perftestdir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(PROGRAMS) $(DATA) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(perftestdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/ucx_perftest-perftest.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-dist_perftestDATA + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/ucx_perftest-perftest.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-dist_perftestDATA + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-binPROGRAMS \ + clean-generic clean-libtool cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-binPROGRAMS \ + install-data install-data-am install-dist_perftestDATA \ + install-dvi install-dvi-am install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-binPROGRAMS \ + uninstall-dist_perftestDATA + +.PRECIOUS: Makefile + + +@HAVE_MPIRUN_TRUE@.PHONY: ucx test help + +@HAVE_MPIRUN_TRUE@ucx: +@HAVE_MPIRUN_TRUE@ $(MAKE) -C $(top_builddir) + +@HAVE_MPIRUN_TRUE@test: ucx ucx_perftest +@HAVE_MPIRUN_TRUE@ $(MPIRUN) $(MPI_ARGS) $(abs_builddir)/ucx_perftest$(EXEEXT) $(TEST_ARGS) + +@HAVE_MPIRUN_TRUE@help: +@HAVE_MPIRUN_TRUE@ @echo +@HAVE_MPIRUN_TRUE@ @echo "Targets:" +@HAVE_MPIRUN_TRUE@ @echo " test : Run performance test." +@HAVE_MPIRUN_TRUE@ @echo +@HAVE_MPIRUN_TRUE@ @echo "Environment variables:" +@HAVE_MPIRUN_TRUE@ @echo " MPI_ARGS : Arguments for mpirun (\"$(MPI_ARGS)\")" +@HAVE_MPIRUN_TRUE@ @echo " TEST_ARGS : Arguments for performance test (\"$(TEST_ARGS)\")" +@HAVE_MPIRUN_TRUE@ @echo + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/tools/perf/api/libperf.h b/src/tools/perf/api/libperf.h new file mode 100644 index 0000000..984df70 --- /dev/null +++ b/src/tools/perf/api/libperf.h @@ -0,0 +1,230 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) The University of Tennessee and The University +* of Tennessee Research Foundation. 2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCX_LIBPERF_H +#define UCX_LIBPERF_H + +#include + +BEGIN_C_DECLS + +/** @file libperf.h */ + +#include +#include +#include +#include +#include +#include + + +typedef enum { + UCX_PERF_API_UCT, + UCX_PERF_API_UCP, + UCX_PERF_API_LAST +} ucx_perf_api_t; + + +typedef enum { + UCX_PERF_CMD_AM, + UCX_PERF_CMD_PUT, + UCX_PERF_CMD_GET, + UCX_PERF_CMD_ADD, + UCX_PERF_CMD_FADD, + UCX_PERF_CMD_SWAP, + UCX_PERF_CMD_CSWAP, + UCX_PERF_CMD_TAG, + UCX_PERF_CMD_TAG_SYNC, + UCX_PERF_CMD_STREAM, + UCX_PERF_CMD_LAST +} ucx_perf_cmd_t; + + +typedef enum { + UCX_PERF_TEST_TYPE_PINGPONG, /* Ping-pong mode */ + UCX_PERF_TEST_TYPE_STREAM_UNI, /* Unidirectional stream */ + UCX_PERF_TEST_TYPE_STREAM_BI, /* Bidirectional stream */ + UCX_PERF_TEST_TYPE_LAST +} ucx_perf_test_type_t; + + +typedef enum { + UCP_PERF_DATATYPE_CONTIG, + UCP_PERF_DATATYPE_IOV, +} ucp_perf_datatype_t; + + +typedef enum { + UCT_PERF_DATA_LAYOUT_SHORT, + UCT_PERF_DATA_LAYOUT_BCOPY, + UCT_PERF_DATA_LAYOUT_ZCOPY, + UCT_PERF_DATA_LAYOUT_LAST +} uct_perf_data_layout_t; + + +typedef enum { + UCX_PERF_WAIT_MODE_PROGRESS, /* Repeatedly call progress */ + UCX_PERF_WAIT_MODE_SLEEP, /* Go to sleep */ + UCX_PERF_WAIT_MODE_SPIN, /* Spin without calling progress */ + UCX_PERF_WAIT_MODE_LAST +} ucx_perf_wait_mode_t; + + +enum ucx_perf_test_flags { + UCX_PERF_TEST_FLAG_VALIDATE = UCS_BIT(1), /* Validate data. Affects performance. */ + UCX_PERF_TEST_FLAG_ONE_SIDED = UCS_BIT(2), /* For tests which involves only one side, + the responder should not call progress(). */ + UCX_PERF_TEST_FLAG_MAP_NONBLOCK = UCS_BIT(3), /* Map memory in non-blocking mode */ + UCX_PERF_TEST_FLAG_TAG_WILDCARD = UCS_BIT(4), /* For tag tests, use wildcard mask */ + UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE = UCS_BIT(5), /* For tag tests, use probe to get unexpected receive */ + UCX_PERF_TEST_FLAG_VERBOSE = UCS_BIT(7), /* Print error messages */ + UCX_PERF_TEST_FLAG_STREAM_RECV_DATA = UCS_BIT(8) /* For stream tests, use recv data API */ +}; + + +enum { + UCT_PERF_TEST_MAX_FC_WINDOW = 127 /* Maximal flow-control window */ +}; + +/** + * Performance counter type. + */ +typedef uint64_t ucx_perf_counter_t; + + +/* + * Performance test result. + * + * Time values are in seconds. + * Size values are in bytes. + */ +typedef struct ucx_perf_result { + ucx_perf_counter_t iters; + double elapsed_time; + ucx_perf_counter_t bytes; + struct { + double typical; + double moment_average; /* Average since last report */ + double total_average; /* Average of the whole test */ + } + latency, bandwidth, msgrate; +} ucx_perf_result_t; + + +typedef void (*ucx_perf_rte_progress_cb_t)(void *arg); + +typedef unsigned (*ucx_perf_rte_group_size_func_t)(void *rte_group); +typedef unsigned (*ucx_perf_rte_group_index_func_t)(void *rte_group); +typedef void (*ucx_perf_rte_barrier_func_t)(void *rte_group, + ucx_perf_rte_progress_cb_t progress, + void *arg); +typedef void (*ucx_perf_rte_post_vec_func_t)(void *rte_group, + const struct iovec *iovec, + int iovcnt, void **req); +typedef void (*ucx_perf_rte_recv_func_t)(void *rte_group, unsigned src, + void *buffer, size_t max, void *req); +typedef void (*ucx_perf_rte_exchange_vec_func_t)(void *rte_group, void *req); +typedef void (*ucx_perf_rte_report_func_t)(void *rte_group, + const ucx_perf_result_t *result, + void *arg, int is_final); + +/** + * RTE used to bring-up the test + */ +typedef struct ucx_perf_rte { + /* @return Group size */ + ucx_perf_rte_group_size_func_t group_size; + + /* @return My index within the group */ + ucx_perf_rte_group_index_func_t group_index; + + /* Barrier */ + ucx_perf_rte_barrier_func_t barrier; + + /* Direct modex */ + ucx_perf_rte_post_vec_func_t post_vec; + ucx_perf_rte_recv_func_t recv; + ucx_perf_rte_exchange_vec_func_t exchange_vec; + + /* Handle results */ + ucx_perf_rte_report_func_t report; + +} ucx_perf_rte_t; + + +/** + * Describes a performance test. + */ +typedef struct ucx_perf_params { + ucx_perf_api_t api; /* Which API to test */ + ucx_perf_cmd_t command; /* Command to perform */ + ucx_perf_test_type_t test_type; /* Test communication type */ + ucs_thread_mode_t thread_mode; /* Thread mode for communication objects */ + unsigned thread_count; /* Number of threads in the test program */ + ucs_async_mode_t async_mode; /* how async progress and locking is done */ + ucx_perf_wait_mode_t wait_mode; /* How to wait */ + ucs_memory_type_t send_mem_type; /* Send memory type */ + ucs_memory_type_t recv_mem_type; /* Recv memory type */ + unsigned flags; /* See ucx_perf_test_flags. */ + + size_t *msg_size_list; /* Test message sizes list. The size + of the array is in msg_size_cnt */ + size_t msg_size_cnt; /* Number of message sizes in + message sizes list */ + size_t iov_stride; /* Distance between starting address + of consecutive IOV entries. It is + similar to UCT uct_iov_t type stride */ + size_t am_hdr_size; /* Active message header size (included in message size) */ + size_t alignment; /* Message buffer alignment */ + unsigned max_outstanding; /* Maximal number of outstanding sends */ + ucx_perf_counter_t warmup_iter; /* Number of warm-up iterations */ + ucx_perf_counter_t max_iter; /* Iterations limit, 0 - unlimited */ + double max_time; /* Time limit (seconds), 0 - unlimited */ + double report_interval; /* Interval at which to call the report callback */ + + void *rte_group; /* Opaque RTE group handle */ + ucx_perf_rte_t *rte; /* RTE functions used to exchange data */ + void *report_arg; /* Custom argument for report function */ + + struct { + char dev_name[UCT_DEVICE_NAME_MAX]; /* Device name to use */ + char tl_name[UCT_TL_NAME_MAX]; /* Transport to use */ + char md_name[UCT_MD_NAME_MAX]; /* Memory domain name to use */ + uct_perf_data_layout_t data_layout; /* Data layout to use */ + unsigned fc_window; /* Window size for flow control <= UCX_PERF_TEST_MAX_FC_WINDOW */ + } uct; + + struct { + unsigned nonblocking_mode; /* TBD */ + ucp_perf_datatype_t send_datatype; + ucp_perf_datatype_t recv_datatype; + } ucp; + +} ucx_perf_params_t; + + +/* Allocators for each memory type */ +typedef struct ucx_perf_allocator ucx_perf_allocator_t; +extern const ucx_perf_allocator_t* ucx_perf_mem_type_allocators[]; + + +/** + * Initialize performance testing framework. May be called multiple times. + */ +void ucx_perf_global_init(); + + +/** + * Run a UCT performance test. + */ +ucs_status_t ucx_perf_run(ucx_perf_params_t *params, ucx_perf_result_t *result); + + +END_C_DECLS + +#endif /* UCX_PERF_H_ */ diff --git a/src/tools/perf/configure.m4 b/src/tools/perf/configure.m4 new file mode 100644 index 0000000..509c383 --- /dev/null +++ b/src/tools/perf/configure.m4 @@ -0,0 +1,19 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +ucx_perftest_modules="" +m4_include([src/tools/perf/lib/configure.m4]) +m4_include([src/tools/perf/cuda/configure.m4]) +m4_include([src/tools/perf/rocm/configure.m4]) +AC_DEFINE_UNQUOTED([ucx_perftest_MODULES], ["${ucx_perftest_modules}"], + [Perftest loadable modules]) + +# TODO build RTE support (MPI/librte) as loadable modules +AS_IF([test -n "$MPICC"], + [AC_SUBST([UCX_PERFTEST_CC], [$MPICC])], + [AC_SUBST([UCX_PERFTEST_CC], [$CC])]) + +AC_CONFIG_FILES([src/tools/perf/Makefile]) diff --git a/src/tools/perf/cuda/Makefile.am b/src/tools/perf/cuda/Makefile.am new file mode 100644 index 0000000..aa6cb37 --- /dev/null +++ b/src/tools/perf/cuda/Makefile.am @@ -0,0 +1,17 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +if HAVE_CUDA + +module_LTLIBRARIES = libucx_perftest_cuda.la +libucx_perftest_cuda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS) +libucx_perftest_cuda_la_CFLAGS = $(BASE_CFLAGS) $(CUDA_CFLAGS) +libucx_perftest_cuda_la_LDFLAGS = $(CUDA_LDFLAGS) -version-info $(SOVERSION) +libucx_perftest_cuda_la_SOURCES = cuda_alloc.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/tools/perf/cuda/Makefile.in b/src/tools/perf/cuda/Makefile.in new file mode 100644 index 0000000..1d89412 --- /dev/null +++ b/src/tools/perf/cuda/Makefile.in @@ -0,0 +1,847 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/tools/perf/cuda +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +libucx_perftest_cuda_la_LIBADD = +am__libucx_perftest_cuda_la_SOURCES_DIST = cuda_alloc.c +@HAVE_CUDA_TRUE@am_libucx_perftest_cuda_la_OBJECTS = \ +@HAVE_CUDA_TRUE@ libucx_perftest_cuda_la-cuda_alloc.lo +libucx_perftest_cuda_la_OBJECTS = \ + $(am_libucx_perftest_cuda_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libucx_perftest_cuda_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libucx_perftest_cuda_la_CFLAGS) $(CFLAGS) \ + $(libucx_perftest_cuda_la_LDFLAGS) $(LDFLAGS) -o $@ +@HAVE_CUDA_TRUE@am_libucx_perftest_cuda_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = \ + ./$(DEPDIR)/libucx_perftest_cuda_la-cuda_alloc.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libucx_perftest_cuda_la_SOURCES) +DIST_SOURCES = $(am__libucx_perftest_cuda_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_CUDA_TRUE@module_LTLIBRARIES = libucx_perftest_cuda.la +@HAVE_CUDA_TRUE@libucx_perftest_cuda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS) +@HAVE_CUDA_TRUE@libucx_perftest_cuda_la_CFLAGS = $(BASE_CFLAGS) $(CUDA_CFLAGS) +@HAVE_CUDA_TRUE@libucx_perftest_cuda_la_LDFLAGS = $(CUDA_LDFLAGS) -version-info $(SOVERSION) +@HAVE_CUDA_TRUE@libucx_perftest_cuda_la_SOURCES = cuda_alloc.c + +# Automake silent rules +@HAVE_CUDA_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_CUDA_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_CUDA_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_CUDA_TRUE@AM_V_LN_1 = true +@HAVE_CUDA_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/tools/perf/cuda/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/tools/perf/cuda/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libucx_perftest_cuda.la: $(libucx_perftest_cuda_la_OBJECTS) $(libucx_perftest_cuda_la_DEPENDENCIES) $(EXTRA_libucx_perftest_cuda_la_DEPENDENCIES) + $(AM_V_CCLD)$(libucx_perftest_cuda_la_LINK) $(am_libucx_perftest_cuda_la_rpath) $(libucx_perftest_cuda_la_OBJECTS) $(libucx_perftest_cuda_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libucx_perftest_cuda_la-cuda_alloc.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libucx_perftest_cuda_la-cuda_alloc.lo: cuda_alloc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucx_perftest_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libucx_perftest_cuda_la_CFLAGS) $(CFLAGS) -MT libucx_perftest_cuda_la-cuda_alloc.lo -MD -MP -MF $(DEPDIR)/libucx_perftest_cuda_la-cuda_alloc.Tpo -c -o libucx_perftest_cuda_la-cuda_alloc.lo `test -f 'cuda_alloc.c' || echo '$(srcdir)/'`cuda_alloc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libucx_perftest_cuda_la-cuda_alloc.Tpo $(DEPDIR)/libucx_perftest_cuda_la-cuda_alloc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cuda_alloc.c' object='libucx_perftest_cuda_la-cuda_alloc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucx_perftest_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libucx_perftest_cuda_la_CFLAGS) $(CFLAGS) -c -o libucx_perftest_cuda_la-cuda_alloc.lo `test -f 'cuda_alloc.c' || echo '$(srcdir)/'`cuda_alloc.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_CUDA_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libucx_perftest_cuda_la-cuda_alloc.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libucx_perftest_cuda_la-cuda_alloc.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_CUDA_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_CUDA_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_CUDA_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_CUDA_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CUDA_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_CUDA_TRUE@ done +@HAVE_CUDA_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CUDA_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_CUDA_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/tools/perf/cuda/configure.m4 b/src/tools/perf/cuda/configure.m4 new file mode 100644 index 0000000..f2e5cfe --- /dev/null +++ b/src/tools/perf/cuda/configure.m4 @@ -0,0 +1,11 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +UCX_CHECK_CUDA + +AS_IF([test "x$cuda_happy" = "xyes"], [ucx_perftest_modules="${ucx_perftest_modules}:cuda"]) + +AC_CONFIG_FILES([src/tools/perf/cuda/Makefile]) diff --git a/src/tools/perf/cuda/cuda_alloc.c b/src/tools/perf/cuda/cuda_alloc.c new file mode 100644 index 0000000..2fd20c1 --- /dev/null +++ b/src/tools/perf/cuda/cuda_alloc.c @@ -0,0 +1,190 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include + +#include +#include +#include + + +static ucs_status_t ucx_perf_cuda_init(ucx_perf_context_t *perf) +{ + cudaError_t cerr; + unsigned group_index; + int num_gpus; + int gpu_index; + + group_index = rte_call(perf, group_index); + + cerr = cudaGetDeviceCount(&num_gpus); + if (cerr != cudaSuccess) { + return UCS_ERR_NO_DEVICE; + } + + gpu_index = group_index % num_gpus; + + cerr = cudaSetDevice(gpu_index); + if (cerr != cudaSuccess) { + return UCS_ERR_NO_DEVICE; + } + + return UCS_OK; +} + +static inline ucs_status_t ucx_perf_cuda_alloc(size_t length, + ucs_memory_type_t mem_type, + void **address_p) +{ + cudaError_t cerr; + + ucs_assert((mem_type == UCS_MEMORY_TYPE_CUDA) || + (mem_type == UCS_MEMORY_TYPE_CUDA_MANAGED)); + + cerr = ((mem_type == UCS_MEMORY_TYPE_CUDA) ? + cudaMalloc(address_p, length) : + cudaMallocManaged(address_p, length, cudaMemAttachGlobal)); + if (cerr != cudaSuccess) { + ucs_error("failed to allocate memory"); + return UCS_ERR_NO_MEMORY; + } + + return UCS_OK; +} + +static ucs_status_t ucp_perf_cuda_alloc(const ucx_perf_context_t *perf, size_t length, + void **address_p, ucp_mem_h *memh_p, + int non_blk_flag) +{ + return ucx_perf_cuda_alloc(length, UCS_MEMORY_TYPE_CUDA, address_p); +} + +static ucs_status_t ucp_perf_cuda_alloc_managed(const ucx_perf_context_t *perf, + size_t length, void **address_p, + ucp_mem_h *memh_p, int non_blk_flag) +{ + return ucx_perf_cuda_alloc(length, UCS_MEMORY_TYPE_CUDA_MANAGED, address_p); +} + +static void ucp_perf_cuda_free(const ucx_perf_context_t *perf, + void *address, ucp_mem_h memh) +{ + cudaFree(address); +} + +static inline ucs_status_t +uct_perf_cuda_alloc_reg_mem(const ucx_perf_context_t *perf, + size_t length, + ucs_memory_type_t mem_type, + unsigned flags, + uct_allocated_memory_t *alloc_mem) +{ + ucs_status_t status; + + status = ucx_perf_cuda_alloc(length, mem_type, &alloc_mem->address); + if (status != UCS_OK) { + return status; + } + + status = uct_md_mem_reg(perf->uct.md, alloc_mem->address, + length, flags, &alloc_mem->memh); + if (status != UCS_OK) { + cudaFree(alloc_mem->address); + ucs_error("failed to register memory"); + return status; + } + + alloc_mem->mem_type = mem_type; + alloc_mem->md = perf->uct.md; + + return UCS_OK; +} + +static ucs_status_t uct_perf_cuda_alloc(const ucx_perf_context_t *perf, + size_t length, unsigned flags, + uct_allocated_memory_t *alloc_mem) +{ + return uct_perf_cuda_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_CUDA, + flags, alloc_mem); +} + +static ucs_status_t uct_perf_cuda_managed_alloc(const ucx_perf_context_t *perf, + size_t length, unsigned flags, + uct_allocated_memory_t *alloc_mem) +{ + return uct_perf_cuda_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_CUDA_MANAGED, + flags, alloc_mem); +} + +static void uct_perf_cuda_free(const ucx_perf_context_t *perf, + uct_allocated_memory_t *alloc_mem) +{ + ucs_status_t status; + + ucs_assert(alloc_mem->md == perf->uct.md); + + status = uct_md_mem_dereg(perf->uct.md, alloc_mem->memh); + if (status != UCS_OK) { + ucs_error("failed to deregister memory"); + } + + cudaFree(alloc_mem->address); +} + +static void ucx_perf_cuda_memcpy(void *dst, ucs_memory_type_t dst_mem_type, + const void *src, ucs_memory_type_t src_mem_type, + size_t count) +{ + cudaError_t cerr; + + cerr = cudaMemcpy(dst, src, count, cudaMemcpyDefault); + if (cerr != cudaSuccess) { + ucs_error("failed to copy memory: %s", cudaGetErrorString(cerr)); + } +} + +static void* ucx_perf_cuda_memset(void *dst, int value, size_t count) +{ + cudaError_t cerr; + + cerr = cudaMemset(dst, value, count); + if (cerr != cudaSuccess) { + ucs_error("failed to set memory: %s", cudaGetErrorString(cerr)); + } + + return dst; +} + +UCS_STATIC_INIT { + static ucx_perf_allocator_t cuda_allocator = { + .mem_type = UCS_MEMORY_TYPE_CUDA, + .init = ucx_perf_cuda_init, + .ucp_alloc = ucp_perf_cuda_alloc, + .ucp_free = ucp_perf_cuda_free, + .uct_alloc = uct_perf_cuda_alloc, + .uct_free = uct_perf_cuda_free, + .memcpy = ucx_perf_cuda_memcpy, + .memset = ucx_perf_cuda_memset + }; + static ucx_perf_allocator_t cuda_managed_allocator = { + .mem_type = UCS_MEMORY_TYPE_CUDA_MANAGED, + .init = ucx_perf_cuda_init, + .ucp_alloc = ucp_perf_cuda_alloc_managed, + .ucp_free = ucp_perf_cuda_free, + .uct_alloc = uct_perf_cuda_managed_alloc, + .uct_free = uct_perf_cuda_free, + .memcpy = ucx_perf_cuda_memcpy, + .memset = ucx_perf_cuda_memset + }; + + ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] = &cuda_allocator; + ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] = &cuda_managed_allocator; +} +UCS_STATIC_CLEANUP { + ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] = NULL; + ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] = NULL; + +} diff --git a/src/tools/perf/lib/Makefile.am b/src/tools/perf/lib/Makefile.am new file mode 100644 index 0000000..9844c7f --- /dev/null +++ b/src/tools/perf/lib/Makefile.am @@ -0,0 +1,30 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and The University +# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +noinst_LTLIBRARIES = libucxperf.la +libucxperf_la_CPPFLAGS = $(BASE_CPPFLAGS) $(RTE_CPPFLAGS) +libucxperf_la_CFLAGS = $(BASE_CFLAGS) $(OPENMP_CFLAGS) +libucxperf_la_CXXFLAGS = $(BASE_CXXFLAGS) $(OPENMP_CFLAGS) +libucxperf_la_LDFLAGS = $(RTE_LDFLAGS) $(OPENMP_CFLAGS) +libucxperf_la_LIBADD = \ + $(abs_top_builddir)/src/uct/libuct.la \ + $(abs_top_builddir)/src/ucp/libucp.la \ + $(abs_top_builddir)/src/ucs/libucs.la + +# C-linkable C++ code - must override any inherited CXXFLAGS +CXXFLAGS += -nostdlib $(PERF_LIB_CXXFLAGS) + +noinst_HEADERS = \ + libperf_int.h + +libucxperf_la_SOURCES = \ + libperf.c \ + uct_tests.cc \ + ucp_tests.cc diff --git a/src/tools/perf/lib/Makefile.in b/src/tools/perf/lib/Makefile.in new file mode 100644 index 0000000..514c6ee --- /dev/null +++ b/src/tools/perf/lib/Makefile.in @@ -0,0 +1,845 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and The University +# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/tools/perf/lib +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libucxperf_la_DEPENDENCIES = $(abs_top_builddir)/src/uct/libuct.la \ + $(abs_top_builddir)/src/ucp/libucp.la \ + $(abs_top_builddir)/src/ucs/libucs.la +am_libucxperf_la_OBJECTS = libucxperf_la-libperf.lo \ + libucxperf_la-uct_tests.lo libucxperf_la-ucp_tests.lo +libucxperf_la_OBJECTS = $(am_libucxperf_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libucxperf_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(libucxperf_la_CXXFLAGS) $(CXXFLAGS) $(libucxperf_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libucxperf_la-libperf.Plo \ + ./$(DEPDIR)/libucxperf_la-ucp_tests.Plo \ + ./$(DEPDIR)/libucxperf_la-uct_tests.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(libucxperf_la_SOURCES) +DIST_SOURCES = $(libucxperf_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ + +# C-linkable C++ code - must override any inherited CXXFLAGS +CXXFLAGS = @CXXFLAGS@ -nostdlib $(PERF_LIB_CXXFLAGS) +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +noinst_LTLIBRARIES = libucxperf.la +libucxperf_la_CPPFLAGS = $(BASE_CPPFLAGS) $(RTE_CPPFLAGS) +libucxperf_la_CFLAGS = $(BASE_CFLAGS) $(OPENMP_CFLAGS) +libucxperf_la_CXXFLAGS = $(BASE_CXXFLAGS) $(OPENMP_CFLAGS) +libucxperf_la_LDFLAGS = $(RTE_LDFLAGS) $(OPENMP_CFLAGS) +libucxperf_la_LIBADD = \ + $(abs_top_builddir)/src/uct/libuct.la \ + $(abs_top_builddir)/src/ucp/libucp.la \ + $(abs_top_builddir)/src/ucs/libucs.la + +noinst_HEADERS = \ + libperf_int.h + +libucxperf_la_SOURCES = \ + libperf.c \ + uct_tests.cc \ + ucp_tests.cc + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cc .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/tools/perf/lib/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/tools/perf/lib/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libucxperf.la: $(libucxperf_la_OBJECTS) $(libucxperf_la_DEPENDENCIES) $(EXTRA_libucxperf_la_DEPENDENCIES) + $(AM_V_CXXLD)$(libucxperf_la_LINK) $(libucxperf_la_OBJECTS) $(libucxperf_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libucxperf_la-libperf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libucxperf_la-ucp_tests.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libucxperf_la-uct_tests.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libucxperf_la-libperf.lo: libperf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucxperf_la_CPPFLAGS) $(CPPFLAGS) $(libucxperf_la_CFLAGS) $(CFLAGS) -MT libucxperf_la-libperf.lo -MD -MP -MF $(DEPDIR)/libucxperf_la-libperf.Tpo -c -o libucxperf_la-libperf.lo `test -f 'libperf.c' || echo '$(srcdir)/'`libperf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libucxperf_la-libperf.Tpo $(DEPDIR)/libucxperf_la-libperf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='libperf.c' object='libucxperf_la-libperf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucxperf_la_CPPFLAGS) $(CPPFLAGS) $(libucxperf_la_CFLAGS) $(CFLAGS) -c -o libucxperf_la-libperf.lo `test -f 'libperf.c' || echo '$(srcdir)/'`libperf.c + +.cc.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +libucxperf_la-uct_tests.lo: uct_tests.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucxperf_la_CPPFLAGS) $(CPPFLAGS) $(libucxperf_la_CXXFLAGS) $(CXXFLAGS) -MT libucxperf_la-uct_tests.lo -MD -MP -MF $(DEPDIR)/libucxperf_la-uct_tests.Tpo -c -o libucxperf_la-uct_tests.lo `test -f 'uct_tests.cc' || echo '$(srcdir)/'`uct_tests.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libucxperf_la-uct_tests.Tpo $(DEPDIR)/libucxperf_la-uct_tests.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct_tests.cc' object='libucxperf_la-uct_tests.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucxperf_la_CPPFLAGS) $(CPPFLAGS) $(libucxperf_la_CXXFLAGS) $(CXXFLAGS) -c -o libucxperf_la-uct_tests.lo `test -f 'uct_tests.cc' || echo '$(srcdir)/'`uct_tests.cc + +libucxperf_la-ucp_tests.lo: ucp_tests.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucxperf_la_CPPFLAGS) $(CPPFLAGS) $(libucxperf_la_CXXFLAGS) $(CXXFLAGS) -MT libucxperf_la-ucp_tests.lo -MD -MP -MF $(DEPDIR)/libucxperf_la-ucp_tests.Tpo -c -o libucxperf_la-ucp_tests.lo `test -f 'ucp_tests.cc' || echo '$(srcdir)/'`ucp_tests.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libucxperf_la-ucp_tests.Tpo $(DEPDIR)/libucxperf_la-ucp_tests.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp_tests.cc' object='libucxperf_la-ucp_tests.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucxperf_la_CPPFLAGS) $(CPPFLAGS) $(libucxperf_la_CXXFLAGS) $(CXXFLAGS) -c -o libucxperf_la-ucp_tests.lo `test -f 'ucp_tests.cc' || echo '$(srcdir)/'`ucp_tests.cc + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libucxperf_la-libperf.Plo + -rm -f ./$(DEPDIR)/libucxperf_la-ucp_tests.Plo + -rm -f ./$(DEPDIR)/libucxperf_la-uct_tests.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libucxperf_la-libperf.Plo + -rm -f ./$(DEPDIR)/libucxperf_la-ucp_tests.Plo + -rm -f ./$(DEPDIR)/libucxperf_la-uct_tests.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstLTLIBRARIES \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/tools/perf/lib/configure.m4 b/src/tools/perf/lib/configure.m4 new file mode 100644 index 0000000..6ba7737 --- /dev/null +++ b/src/tools/perf/lib/configure.m4 @@ -0,0 +1,28 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +AC_LANG_PUSH([C++]) + +CHECK_COMPILER_FLAG([-fno-exceptions], [-fno-exceptions], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [PERF_LIB_CXXFLAGS="$PERF_LIB_CXXFLAGS -fno-exceptions"], + []) + +CHECK_COMPILER_FLAG([-fno-rtti], [-fno-rtti], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [PERF_LIB_CXXFLAGS="$PERF_LIB_CXXFLAGS -fno-rtti"], + []) + +CHECK_COMPILER_FLAG([--no_exceptions], [--no_exceptions], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [PERF_LIB_CXXFLAGS="$PERF_LIB_CXXFLAGS --no_exceptions"], + []) + +AC_LANG_POP([C++]) + +AC_SUBST([PERF_LIB_CXXFLAGS], [$PERF_LIB_CXXFLAGS]) + +AC_CONFIG_FILES([src/tools/perf/lib/Makefile]) diff --git a/src/tools/perf/lib/libperf.c b/src/tools/perf/lib/libperf.c new file mode 100644 index 0000000..3c2fe3b --- /dev/null +++ b/src/tools/perf/lib/libperf.c @@ -0,0 +1,1811 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) The University of Tennessee and The University +* of Tennessee Research Foundation. 2015-2016. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2017. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#if _OPENMP +#include +#endif /* _OPENMP */ + +#define ATOMIC_OP_CONFIG(_size, _op32, _op64, _op, _msg, _params, _status) \ + _status = __get_atomic_flag((_size), (_op32), (_op64), (_op)); \ + if (_status != UCS_OK) { \ + ucs_error("%s/%s does not support atomic %s for message size %zu bytes", \ + (_params)->uct.tl_name, (_params)->uct.dev_name, \ + (_msg)[_op], (_size)); \ + return _status; \ + } + +#define ATOMIC_OP_CHECK(_size, _attr, _required, _params, _msg) \ + if (!ucs_test_all_flags(_attr, _required)) { \ + if ((_params)->flags & UCX_PERF_TEST_FLAG_VERBOSE) { \ + ucs_error("%s/%s does not support required "#_size"-bit atomic: %s", \ + (_params)->uct.tl_name, (_params)->uct.dev_name, \ + (_msg)[ucs_ffs64(~(_attr) & (_required))]); \ + } \ + return UCS_ERR_UNSUPPORTED; \ + } + +typedef struct { + union { + struct { + size_t dev_addr_len; + size_t iface_addr_len; + size_t ep_addr_len; + } uct; + struct { + size_t addr_len; + } ucp; + }; + size_t rkey_size; + unsigned long recv_buffer; +} ucx_perf_ep_info_t; + + +const ucx_perf_allocator_t* ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_LAST]; + +static const char *perf_iface_ops[] = { + [ucs_ilog2(UCT_IFACE_FLAG_AM_SHORT)] = "am short", + [ucs_ilog2(UCT_IFACE_FLAG_AM_BCOPY)] = "am bcopy", + [ucs_ilog2(UCT_IFACE_FLAG_AM_ZCOPY)] = "am zcopy", + [ucs_ilog2(UCT_IFACE_FLAG_PUT_SHORT)] = "put short", + [ucs_ilog2(UCT_IFACE_FLAG_PUT_BCOPY)] = "put bcopy", + [ucs_ilog2(UCT_IFACE_FLAG_PUT_ZCOPY)] = "put zcopy", + [ucs_ilog2(UCT_IFACE_FLAG_GET_SHORT)] = "get short", + [ucs_ilog2(UCT_IFACE_FLAG_GET_BCOPY)] = "get bcopy", + [ucs_ilog2(UCT_IFACE_FLAG_GET_ZCOPY)] = "get zcopy", + [ucs_ilog2(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)] = "peer failure handler", + [ucs_ilog2(UCT_IFACE_FLAG_CONNECT_TO_IFACE)] = "connect to iface", + [ucs_ilog2(UCT_IFACE_FLAG_CONNECT_TO_EP)] = "connect to ep", + [ucs_ilog2(UCT_IFACE_FLAG_AM_DUP)] = "full reliability", + [ucs_ilog2(UCT_IFACE_FLAG_CB_SYNC)] = "sync callback", + [ucs_ilog2(UCT_IFACE_FLAG_CB_ASYNC)] = "async callback", + [ucs_ilog2(UCT_IFACE_FLAG_EVENT_SEND_COMP)] = "send completion event", + [ucs_ilog2(UCT_IFACE_FLAG_EVENT_RECV)] = "tag or active message event", + [ucs_ilog2(UCT_IFACE_FLAG_EVENT_RECV_SIG)] = "signaled message event", + [ucs_ilog2(UCT_IFACE_FLAG_PENDING)] = "pending", + [ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_SHORT)] = "tag eager short", + [ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)] = "tag eager bcopy", + [ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)] = "tag eager zcopy", + [ucs_ilog2(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)] = "tag rndv zcopy" +}; + +static const char *perf_atomic_op[] = { + [UCT_ATOMIC_OP_ADD] = "add", + [UCT_ATOMIC_OP_AND] = "and", + [UCT_ATOMIC_OP_OR] = "or" , + [UCT_ATOMIC_OP_XOR] = "xor" +}; + +static const char *perf_atomic_fop[] = { + [UCT_ATOMIC_OP_ADD] = "fetch-add", + [UCT_ATOMIC_OP_AND] = "fetch-and", + [UCT_ATOMIC_OP_OR] = "fetch-or", + [UCT_ATOMIC_OP_XOR] = "fetch-xor", + [UCT_ATOMIC_OP_SWAP] = "swap", + [UCT_ATOMIC_OP_CSWAP] = "cswap" +}; + +/* + * This Quickselect routine is based on the algorithm described in + * "Numerical recipes in C", Second Edition, + * Cambridge University Press, 1992, Section 8.5, ISBN 0-521-43108-5 + * This code by Nicolas Devillard - 1998. Public domain. + */ +static ucs_time_t __find_median_quick_select(ucs_time_t arr[], int n) +{ + int low, high ; + int median; + int middle, ll, hh; + +#define ELEM_SWAP(a,b) { register ucs_time_t t=(a);(a)=(b);(b)=t; } + + low = 0 ; high = n-1 ; median = (low + high) / 2; + for (;;) { + if (high <= low) /* One element only */ + return arr[median] ; + + if (high == low + 1) { /* Two elements only */ + if (arr[low] > arr[high]) + ELEM_SWAP(arr[low], arr[high]) ; + return arr[median] ; + } + + /* Find median of low, middle and high items; swap into position low */ + middle = (low + high) / 2; + if (arr[middle] > arr[high]) ELEM_SWAP(arr[middle], arr[high]) ; + if (arr[low] > arr[high]) ELEM_SWAP(arr[low], arr[high]) ; + if (arr[middle] > arr[low]) ELEM_SWAP(arr[middle], arr[low]) ; + + /* Swap low item (now in position middle) into position (low+1) */ + ELEM_SWAP(arr[middle], arr[low+1]) ; + + /* Nibble from each end towards middle, swapping items when stuck */ + ll = low + 1; + hh = high; + for (;;) { + do ll++; while (arr[low] > arr[ll]) ; + do hh--; while (arr[hh] > arr[low]) ; + + if (hh < ll) + break; + + ELEM_SWAP(arr[ll], arr[hh]) ; + } + + /* Swap middle item (in position low) back into correct position */ + ELEM_SWAP(arr[low], arr[hh]) ; + + /* Re-set active partition */ + if (hh <= median) + low = ll; + if (hh >= median) + high = hh - 1; + } +} + +static ucs_status_t +uct_perf_test_alloc_host(const ucx_perf_context_t *perf, size_t length, + unsigned flags, uct_allocated_memory_t *alloc_mem) +{ + ucs_status_t status; + + status = uct_iface_mem_alloc(perf->uct.iface, length, + flags, "perftest", alloc_mem); + if (status != UCS_OK) { + ucs_free(alloc_mem); + ucs_error("failed to allocate memory: %s", ucs_status_string(status)); + return status; + } + + ucs_assert(alloc_mem->md == perf->uct.md); + + return UCS_OK; +} + +static void uct_perf_test_free_host(const ucx_perf_context_t *perf, + uct_allocated_memory_t *alloc_mem) +{ + uct_iface_mem_free(alloc_mem); +} + +static void ucx_perf_test_memcpy_host(void *dst, ucs_memory_type_t dst_mem_type, + const void *src, ucs_memory_type_t src_mem_type, + size_t count) +{ + if ((dst_mem_type != UCS_MEMORY_TYPE_HOST) || + (src_mem_type != UCS_MEMORY_TYPE_HOST)) { + ucs_error("wrong memory type passed src - %d, dst - %d", + src_mem_type, dst_mem_type); + } else { + memcpy(dst, src, count); + } +} + +static ucs_status_t uct_perf_test_alloc_mem(ucx_perf_context_t *perf) +{ + ucx_perf_params_t *params = &perf->params; + ucs_status_t status; + unsigned flags; + size_t buffer_size; + + if ((UCT_PERF_DATA_LAYOUT_ZCOPY == params->uct.data_layout) && params->iov_stride) { + buffer_size = params->msg_size_cnt * params->iov_stride; + } else { + buffer_size = ucx_perf_get_message_size(params); + } + + /* TODO use params->alignment */ + + flags = (params->flags & UCX_PERF_TEST_FLAG_MAP_NONBLOCK) ? + UCT_MD_MEM_FLAG_NONBLOCK : 0; + flags |= UCT_MD_MEM_ACCESS_ALL; + + /* Allocate send buffer memory */ + status = perf->allocator->uct_alloc(perf, buffer_size * params->thread_count, + flags, &perf->uct.send_mem); + if (status != UCS_OK) { + goto err; + } + + perf->send_buffer = perf->uct.send_mem.address; + + /* Allocate receive buffer memory */ + status = perf->allocator->uct_alloc(perf, buffer_size * params->thread_count, + flags, &perf->uct.recv_mem); + if (status != UCS_OK) { + goto err_free_send; + } + + perf->recv_buffer = perf->uct.recv_mem.address; + + /* Allocate IOV datatype memory */ + perf->params.msg_size_cnt = params->msg_size_cnt; + perf->uct.iov = malloc(sizeof(*perf->uct.iov) * + perf->params.msg_size_cnt * + params->thread_count); + if (NULL == perf->uct.iov) { + status = UCS_ERR_NO_MEMORY; + ucs_error("Failed allocate send IOV(%lu) buffer: %s", + perf->params.msg_size_cnt, ucs_status_string(status)); + goto err_free_recv; + } + + perf->offset = 0; + + ucs_debug("allocated memory. Send buffer %p, Recv buffer %p", + perf->send_buffer, perf->recv_buffer); + return UCS_OK; + +err_free_recv: + perf->allocator->uct_free(perf, &perf->uct.recv_mem); +err_free_send: + perf->allocator->uct_free(perf, &perf->uct.send_mem); +err: + return status; +} + +static void uct_perf_test_free_mem(ucx_perf_context_t *perf) +{ + perf->allocator->uct_free(perf, &perf->uct.send_mem); + perf->allocator->uct_free(perf, &perf->uct.recv_mem); + free(perf->uct.iov); +} + +void ucx_perf_test_start_clock(ucx_perf_context_t *perf) +{ + ucs_time_t start_time = ucs_get_time(); + + perf->start_time_acc = ucs_get_accurate_time(); + perf->end_time = (perf->params.max_time == 0.0) ? UINT64_MAX : + ucs_time_from_sec(perf->params.max_time) + start_time; + perf->prev_time = start_time; + perf->prev.time = start_time; + perf->prev.time_acc = perf->start_time_acc; + perf->current.time_acc = perf->start_time_acc; +} + +/* Initialize/reset all parameters that could be modified by the warm-up run */ +static void ucx_perf_test_prepare_new_run(ucx_perf_context_t *perf, + ucx_perf_params_t *params) +{ + unsigned i; + + perf->max_iter = (perf->params.max_iter == 0) ? UINT64_MAX : + perf->params.max_iter; + perf->report_interval = ucs_time_from_sec(perf->params.report_interval); + perf->current.time = 0; + perf->current.msgs = 0; + perf->current.bytes = 0; + perf->current.iters = 0; + perf->prev.msgs = 0; + perf->prev.bytes = 0; + perf->prev.iters = 0; + perf->timing_queue_head = 0; + + for (i = 0; i < TIMING_QUEUE_SIZE; ++i) { + perf->timing_queue[i] = 0; + } + ucx_perf_test_start_clock(perf); +} + +static void ucx_perf_test_init(ucx_perf_context_t *perf, + ucx_perf_params_t *params) +{ + unsigned group_index; + + perf->params = *params; + perf->offset = 0; + group_index = rte_call(perf, group_index); + + if (0 == group_index) { + perf->allocator = ucx_perf_mem_type_allocators[params->send_mem_type]; + } else { + perf->allocator = ucx_perf_mem_type_allocators[params->recv_mem_type]; + } + + ucx_perf_test_prepare_new_run(perf, params); +} + +void ucx_perf_calc_result(ucx_perf_context_t *perf, ucx_perf_result_t *result) +{ + ucs_time_t median; + double factor; + + if (perf->params.test_type == UCX_PERF_TEST_TYPE_PINGPONG) { + factor = 2.0; + } else { + factor = 1.0; + } + + result->iters = perf->current.iters; + result->bytes = perf->current.bytes; + result->elapsed_time = perf->current.time_acc - perf->start_time_acc; + + /* Latency */ + median = __find_median_quick_select(perf->timing_queue, TIMING_QUEUE_SIZE); + result->latency.typical = ucs_time_to_sec(median) / factor; + + result->latency.moment_average = + (perf->current.time_acc - perf->prev.time_acc) + / (perf->current.iters - perf->prev.iters) + / factor; + + result->latency.total_average = + (perf->current.time_acc - perf->start_time_acc) + / perf->current.iters + / factor; + + + /* Bandwidth */ + + result->bandwidth.typical = 0.0; // Undefined + + result->bandwidth.moment_average = + (perf->current.bytes - perf->prev.bytes) / + (perf->current.time_acc - perf->prev.time_acc) * factor; + + result->bandwidth.total_average = + perf->current.bytes / + (perf->current.time_acc - perf->start_time_acc) * factor; + + + /* Packet rate */ + + result->msgrate.typical = 0.0; // Undefined + + result->msgrate.moment_average = + (perf->current.msgs - perf->prev.msgs) / + (perf->current.time_acc - perf->prev.time_acc) * factor; + + result->msgrate.total_average = + perf->current.msgs / + (perf->current.time_acc - perf->start_time_acc) * factor; + +} + +static ucs_status_t ucx_perf_test_check_params(ucx_perf_params_t *params) +{ + size_t it; + + /* check if zero-size messages are requested and supported */ + if ((/* they are not supported by: */ + /* - UCT tests, except UCT AM Short/Bcopy */ + (params->api == UCX_PERF_API_UCT) || + (/* - UCP RMA and AMO tests */ + (params->api == UCX_PERF_API_UCP) && + (params->command != UCX_PERF_CMD_AM) && + (params->command != UCX_PERF_CMD_TAG) && + (params->command != UCX_PERF_CMD_TAG_SYNC) && + (params->command != UCX_PERF_CMD_STREAM))) && + ucx_perf_get_message_size(params) < 1) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Message size too small, need to be at least 1"); + } + return UCS_ERR_INVALID_PARAM; + } + + if ((params->api == UCX_PERF_API_UCP) && + ((params->send_mem_type != UCS_MEMORY_TYPE_HOST) || + (params->recv_mem_type != UCS_MEMORY_TYPE_HOST)) && + ((params->command == UCX_PERF_CMD_PUT) || + (params->command == UCX_PERF_CMD_GET) || + (params->command == UCX_PERF_CMD_ADD) || + (params->command == UCX_PERF_CMD_FADD) || + (params->command == UCX_PERF_CMD_SWAP) || + (params->command == UCX_PERF_CMD_CSWAP))) { + /* TODO: remove when support for non-HOST memory types will be added */ + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("UCP doesn't support RMA/AMO for \"%s\"<->\"%s\" memory types", + ucs_memory_type_names[params->send_mem_type], + ucs_memory_type_names[params->recv_mem_type]); + } + return UCS_ERR_INVALID_PARAM; + } + + if (params->max_outstanding < 1) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("max_outstanding, need to be at least 1"); + } + return UCS_ERR_INVALID_PARAM; + } + + /* check if particular message size fit into stride size */ + if (params->iov_stride) { + for (it = 0; it < params->msg_size_cnt; ++it) { + if (params->msg_size_list[it] > params->iov_stride) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Buffer size %lu bigger than stride %lu", + params->msg_size_list[it], params->iov_stride); + } + return UCS_ERR_INVALID_PARAM; + } + } + } + + return UCS_OK; +} + +void uct_perf_iface_flush_b(ucx_perf_context_t *perf) +{ + ucs_status_t status; + + do { + status = uct_iface_flush(perf->uct.iface, 0, NULL); + uct_worker_progress(perf->uct.worker); + } while (status == UCS_INPROGRESS); +} + +static inline uint64_t __get_flag(uct_perf_data_layout_t layout, uint64_t short_f, + uint64_t bcopy_f, uint64_t zcopy_f) +{ + return (layout == UCT_PERF_DATA_LAYOUT_SHORT) ? short_f : + (layout == UCT_PERF_DATA_LAYOUT_BCOPY) ? bcopy_f : + (layout == UCT_PERF_DATA_LAYOUT_ZCOPY) ? zcopy_f : + 0; +} + +static inline ucs_status_t __get_atomic_flag(size_t size, uint64_t *op32, + uint64_t *op64, uint64_t op) +{ + if (size == sizeof(uint32_t)) { + *op32 = UCS_BIT(op); + return UCS_OK; + } else if (size == sizeof(uint64_t)) { + *op64 = UCS_BIT(op); + return UCS_OK; + } + return UCS_ERR_UNSUPPORTED; +} + +static inline size_t __get_max_size(uct_perf_data_layout_t layout, size_t short_m, + size_t bcopy_m, uint64_t zcopy_m) +{ + return (layout == UCT_PERF_DATA_LAYOUT_SHORT) ? short_m : + (layout == UCT_PERF_DATA_LAYOUT_BCOPY) ? bcopy_m : + (layout == UCT_PERF_DATA_LAYOUT_ZCOPY) ? zcopy_m : + 0; +} + +static ucs_status_t uct_perf_test_check_md_support(ucx_perf_params_t *params, + ucs_memory_type_t mem_type, + uct_md_attr_t *md_attr) +{ + if (!(md_attr->cap.access_mem_type == mem_type) && + !(md_attr->cap.reg_mem_types & UCS_BIT(mem_type))) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Unsupported memory type %s by %s/%s", + ucs_memory_type_names[mem_type], + params->uct.tl_name, params->uct.dev_name); + return UCS_ERR_INVALID_PARAM; + } + } + return UCS_OK; +} + +static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params, + uct_iface_h iface, uct_md_h md) +{ + uint64_t required_flags = 0; + uint64_t atomic_op32 = 0; + uint64_t atomic_op64 = 0; + uint64_t atomic_fop32 = 0; + uint64_t atomic_fop64 = 0; + uct_md_attr_t md_attr; + uct_iface_attr_t attr; + ucs_status_t status; + size_t min_size, max_size, max_iov, message_size; + + status = uct_md_query(md, &md_attr); + if (status != UCS_OK) { + ucs_error("uct_md_query(%s) failed: %s", + params->uct.md_name, ucs_status_string(status)); + return status; + } + + status = uct_iface_query(iface, &attr); + if (status != UCS_OK) { + ucs_error("uct_iface_query(%s/%s) failed: %s", + params->uct.tl_name, params->uct.dev_name, + ucs_status_string(status)); + return status; + } + + min_size = 0; + max_iov = 1; + message_size = ucx_perf_get_message_size(params); + switch (params->command) { + case UCX_PERF_CMD_AM: + required_flags = __get_flag(params->uct.data_layout, UCT_IFACE_FLAG_AM_SHORT, + UCT_IFACE_FLAG_AM_BCOPY, UCT_IFACE_FLAG_AM_ZCOPY); + required_flags |= UCT_IFACE_FLAG_CB_SYNC; + min_size = __get_max_size(params->uct.data_layout, 0, 0, + attr.cap.am.min_zcopy); + max_size = __get_max_size(params->uct.data_layout, attr.cap.am.max_short, + attr.cap.am.max_bcopy, attr.cap.am.max_zcopy); + max_iov = attr.cap.am.max_iov; + break; + case UCX_PERF_CMD_PUT: + required_flags = __get_flag(params->uct.data_layout, UCT_IFACE_FLAG_PUT_SHORT, + UCT_IFACE_FLAG_PUT_BCOPY, UCT_IFACE_FLAG_PUT_ZCOPY); + min_size = __get_max_size(params->uct.data_layout, 0, 0, + attr.cap.put.min_zcopy); + max_size = __get_max_size(params->uct.data_layout, attr.cap.put.max_short, + attr.cap.put.max_bcopy, attr.cap.put.max_zcopy); + max_iov = attr.cap.put.max_iov; + break; + case UCX_PERF_CMD_GET: + required_flags = __get_flag(params->uct.data_layout, UCT_IFACE_FLAG_GET_SHORT, + UCT_IFACE_FLAG_GET_BCOPY, UCT_IFACE_FLAG_GET_ZCOPY); + min_size = __get_max_size(params->uct.data_layout, 0, 0, + attr.cap.get.min_zcopy); + max_size = __get_max_size(params->uct.data_layout, attr.cap.get.max_short, + attr.cap.get.max_bcopy, attr.cap.get.max_zcopy); + max_iov = attr.cap.get.max_iov; + break; + case UCX_PERF_CMD_ADD: + ATOMIC_OP_CONFIG(message_size, &atomic_op32, &atomic_op64, UCT_ATOMIC_OP_ADD, + perf_atomic_op, params, status); + max_size = 8; + break; + case UCX_PERF_CMD_FADD: + ATOMIC_OP_CONFIG(message_size, &atomic_fop32, &atomic_fop64, UCT_ATOMIC_OP_ADD, + perf_atomic_fop, params, status); + max_size = 8; + break; + case UCX_PERF_CMD_SWAP: + ATOMIC_OP_CONFIG(message_size, &atomic_fop32, &atomic_fop64, UCT_ATOMIC_OP_SWAP, + perf_atomic_fop, params, status); + max_size = 8; + break; + case UCX_PERF_CMD_CSWAP: + ATOMIC_OP_CONFIG(message_size, &atomic_fop32, &atomic_fop64, UCT_ATOMIC_OP_CSWAP, + perf_atomic_fop, params, status); + max_size = 8; + break; + default: + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Invalid test command"); + } + return UCS_ERR_INVALID_PARAM; + } + + status = ucx_perf_test_check_params(params); + if (status != UCS_OK) { + return status; + } + + /* check atomics first */ + ATOMIC_OP_CHECK(32, attr.cap.atomic32.op_flags, atomic_op32, params, perf_atomic_op); + ATOMIC_OP_CHECK(64, attr.cap.atomic64.op_flags, atomic_op64, params, perf_atomic_op); + ATOMIC_OP_CHECK(32, attr.cap.atomic32.fop_flags, atomic_fop32, params, perf_atomic_fop); + ATOMIC_OP_CHECK(64, attr.cap.atomic64.fop_flags, atomic_fop64, params, perf_atomic_fop); + + /* check iface flags */ + if (!(atomic_op32 | atomic_op64 | atomic_fop32 | atomic_fop64) && + (!ucs_test_all_flags(attr.cap.flags, required_flags) || !required_flags)) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("%s/%s does not support operation %s", + params->uct.tl_name, params->uct.dev_name, + perf_iface_ops[ucs_ffs64(~attr.cap.flags & required_flags)]); + } + return UCS_ERR_UNSUPPORTED; + } + + if (message_size < min_size) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Message size (%zu) is smaller than min supported (%zu)", + message_size, min_size); + } + return UCS_ERR_UNSUPPORTED; + } + + if (message_size > max_size) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Message size (%zu) is larger than max supported (%zu)", + message_size, max_size); + } + return UCS_ERR_UNSUPPORTED; + } + + if (params->command == UCX_PERF_CMD_AM) { + if ((params->uct.data_layout == UCT_PERF_DATA_LAYOUT_SHORT) && + (params->am_hdr_size != sizeof(uint64_t))) + { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Short AM header size must be 8 bytes"); + } + return UCS_ERR_INVALID_PARAM; + } + + if ((params->uct.data_layout == UCT_PERF_DATA_LAYOUT_ZCOPY) && + (params->am_hdr_size > attr.cap.am.max_hdr)) + { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("AM header size (%zu) is larger than max supported (%zu)", + params->am_hdr_size, attr.cap.am.max_hdr); + } + return UCS_ERR_UNSUPPORTED; + } + + if (params->am_hdr_size > message_size) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("AM header size (%zu) is larger than message size (%zu)", + params->am_hdr_size, message_size); + } + return UCS_ERR_INVALID_PARAM; + } + + if (params->uct.fc_window > UCT_PERF_TEST_MAX_FC_WINDOW) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("AM flow-control window (%d) too large (should be <= %d)", + params->uct.fc_window, UCT_PERF_TEST_MAX_FC_WINDOW); + } + return UCS_ERR_INVALID_PARAM; + } + + if ((params->flags & UCX_PERF_TEST_FLAG_ONE_SIDED) && + (params->flags & UCX_PERF_TEST_FLAG_VERBOSE)) + { + ucs_warn("Running active-message test with on-sided progress"); + } + } + + if (UCT_PERF_DATA_LAYOUT_ZCOPY == params->uct.data_layout) { + if (params->msg_size_cnt > max_iov) { + if ((params->flags & UCX_PERF_TEST_FLAG_VERBOSE) || + !params->msg_size_cnt) { + ucs_error("Wrong number of IOV entries. Requested is %lu, " + "should be in the range 1...%lu", params->msg_size_cnt, + max_iov); + } + return UCS_ERR_UNSUPPORTED; + } + /* if msg_size_cnt == 1 the message size checked above */ + if ((UCX_PERF_CMD_AM == params->command) && (params->msg_size_cnt > 1)) { + if (params->am_hdr_size > params->msg_size_list[0]) { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("AM header size (%lu) larger than the first IOV " + "message size (%lu)", params->am_hdr_size, + params->msg_size_list[0]); + } + return UCS_ERR_INVALID_PARAM; + } + } + } + + status = uct_perf_test_check_md_support(params, params->send_mem_type, &md_attr); + if (status != UCS_OK) { + return status; + } + + status = uct_perf_test_check_md_support(params, params->recv_mem_type, &md_attr); + if (status != UCS_OK) { + return status; + } + + return UCS_OK; +} + +static ucs_status_t uct_perf_test_setup_endpoints(ucx_perf_context_t *perf) +{ + const size_t buffer_size = 2048; + ucx_perf_ep_info_t info, *remote_info; + unsigned group_size, i, group_index; + uct_device_addr_t *dev_addr; + uct_iface_addr_t *iface_addr; + uct_ep_addr_t *ep_addr; + uct_iface_attr_t iface_attr; + uct_md_attr_t md_attr; + uct_ep_params_t ep_params; + void *rkey_buffer; + ucs_status_t status; + struct iovec vec[5]; + void *buffer; + void *req; + + buffer = malloc(buffer_size); + if (buffer == NULL) { + ucs_error("Failed to allocate RTE buffer"); + status = UCS_ERR_NO_MEMORY; + goto err; + } + + status = uct_iface_query(perf->uct.iface, &iface_attr); + if (status != UCS_OK) { + ucs_error("Failed to uct_iface_query: %s", ucs_status_string(status)); + goto err_free; + } + + status = uct_md_query(perf->uct.md, &md_attr); + if (status != UCS_OK) { + ucs_error("Failed to uct_md_query: %s", ucs_status_string(status)); + goto err_free; + } + + if (md_attr.cap.flags & (UCT_MD_FLAG_ALLOC|UCT_MD_FLAG_REG)) { + info.rkey_size = md_attr.rkey_packed_size; + } else { + info.rkey_size = 0; + } + info.uct.dev_addr_len = iface_attr.device_addr_len; + info.uct.iface_addr_len = iface_attr.iface_addr_len; + info.uct.ep_addr_len = iface_attr.ep_addr_len; + info.recv_buffer = (uintptr_t)perf->recv_buffer; + + rkey_buffer = buffer; + dev_addr = UCS_PTR_BYTE_OFFSET(rkey_buffer, info.rkey_size); + iface_addr = UCS_PTR_BYTE_OFFSET(dev_addr, info.uct.dev_addr_len); + ep_addr = UCS_PTR_BYTE_OFFSET(iface_addr, info.uct.iface_addr_len); + ucs_assert_always(UCS_PTR_BYTE_OFFSET(ep_addr, info.uct.ep_addr_len) <= + UCS_PTR_BYTE_OFFSET(buffer, buffer_size)); + + status = uct_iface_get_device_address(perf->uct.iface, dev_addr); + if (status != UCS_OK) { + ucs_error("Failed to uct_iface_get_device_address: %s", + ucs_status_string(status)); + goto err_free; + } + + status = uct_iface_get_address(perf->uct.iface, iface_addr); + if (status != UCS_OK) { + ucs_error("Failed to uct_iface_get_address: %s", ucs_status_string(status)); + goto err_free; + } + + if (info.rkey_size > 0) { + memset(rkey_buffer, 0, info.rkey_size); + status = uct_md_mkey_pack(perf->uct.md, perf->uct.recv_mem.memh, rkey_buffer); + if (status != UCS_OK) { + ucs_error("Failed to uct_rkey_pack: %s", ucs_status_string(status)); + goto err_free; + } + } + + group_size = rte_call(perf, group_size); + group_index = rte_call(perf, group_index); + + perf->uct.peers = calloc(group_size, sizeof(*perf->uct.peers)); + if (perf->uct.peers == NULL) { + goto err_free; + } + + ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + ep_params.iface = perf->uct.iface; + if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { + for (i = 0; i < group_size; ++i) { + if (i == group_index) { + continue; + } + + status = uct_ep_create(&ep_params, &perf->uct.peers[i].ep); + if (status != UCS_OK) { + ucs_error("Failed to uct_ep_create: %s", ucs_status_string(status)); + goto err_destroy_eps; + } + status = uct_ep_get_address(perf->uct.peers[i].ep, ep_addr); + if (status != UCS_OK) { + ucs_error("Failed to uct_ep_get_address: %s", ucs_status_string(status)); + goto err_destroy_eps; + } + } + } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + ep_params.field_mask |= UCT_EP_PARAM_FIELD_DEV_ADDR | + UCT_EP_PARAM_FIELD_IFACE_ADDR; + } + + vec[0].iov_base = &info; + vec[0].iov_len = sizeof(info); + vec[1].iov_base = buffer; + vec[1].iov_len = info.rkey_size + info.uct.dev_addr_len + + info.uct.iface_addr_len + info.uct.ep_addr_len; + + rte_call(perf, post_vec, vec, 2, &req); + rte_call(perf, exchange_vec, req); + + for (i = 0; i < group_size; ++i) { + if (i == group_index) { + continue; + } + + rte_call(perf, recv, i, buffer, buffer_size, req); + + remote_info = buffer; + rkey_buffer = remote_info + 1; + dev_addr = UCS_PTR_BYTE_OFFSET(rkey_buffer, remote_info->rkey_size); + iface_addr = UCS_PTR_BYTE_OFFSET(dev_addr, remote_info->uct.dev_addr_len); + ep_addr = UCS_PTR_BYTE_OFFSET(iface_addr, remote_info->uct.iface_addr_len); + perf->uct.peers[i].remote_addr = remote_info->recv_buffer; + + if (!uct_iface_is_reachable(perf->uct.iface, dev_addr, + remote_info->uct.iface_addr_len ? + iface_addr : NULL)) { + ucs_error("Destination is unreachable"); + status = UCS_ERR_UNREACHABLE; + goto err_destroy_eps; + } + + if (remote_info->rkey_size > 0) { + status = uct_rkey_unpack(perf->uct.cmpt, rkey_buffer, + &perf->uct.peers[i].rkey); + if (status != UCS_OK) { + ucs_error("Failed to uct_rkey_unpack: %s", ucs_status_string(status)); + goto err_destroy_eps; + } + } else { + perf->uct.peers[i].rkey.handle = NULL; + perf->uct.peers[i].rkey.rkey = UCT_INVALID_RKEY; + } + + if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { + status = uct_ep_connect_to_ep(perf->uct.peers[i].ep, dev_addr, ep_addr); + } else if (iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + ep_params.dev_addr = dev_addr; + ep_params.iface_addr = iface_addr; + status = uct_ep_create(&ep_params, &perf->uct.peers[i].ep); + } else { + status = UCS_ERR_UNSUPPORTED; + } + if (status != UCS_OK) { + ucs_error("Failed to connect endpoint: %s", ucs_status_string(status)); + goto err_destroy_eps; + } + } + uct_perf_iface_flush_b(perf); + + free(buffer); + uct_perf_barrier(perf); + return UCS_OK; + +err_destroy_eps: + for (i = 0; i < group_size; ++i) { + if (perf->uct.peers[i].rkey.rkey != UCT_INVALID_RKEY) { + uct_rkey_release(perf->uct.cmpt, &perf->uct.peers[i].rkey); + } + if (perf->uct.peers[i].ep != NULL) { + uct_ep_destroy(perf->uct.peers[i].ep); + } + } + free(perf->uct.peers); +err_free: + free(buffer); +err: + return status; +} + +static void uct_perf_test_cleanup_endpoints(ucx_perf_context_t *perf) +{ + unsigned group_size, group_index, i; + + uct_perf_barrier(perf); + + uct_iface_set_am_handler(perf->uct.iface, UCT_PERF_TEST_AM_ID, NULL, NULL, 0); + + group_size = rte_call(perf, group_size); + group_index = rte_call(perf, group_index); + + for (i = 0; i < group_size; ++i) { + if (i != group_index) { + if (perf->uct.peers[i].rkey.rkey != UCT_INVALID_RKEY) { + uct_rkey_release(perf->uct.cmpt, &perf->uct.peers[i].rkey); + } + if (perf->uct.peers[i].ep) { + uct_ep_destroy(perf->uct.peers[i].ep); + } + } + } + free(perf->uct.peers); +} + +static ucs_status_t ucp_perf_test_fill_params(ucx_perf_params_t *params, + ucp_params_t *ucp_params) +{ + ucs_status_t status; + size_t message_size; + + message_size = ucx_perf_get_message_size(params); + switch (params->command) { + case UCX_PERF_CMD_PUT: + case UCX_PERF_CMD_GET: + ucp_params->features |= UCP_FEATURE_RMA; + break; + case UCX_PERF_CMD_ADD: + case UCX_PERF_CMD_FADD: + case UCX_PERF_CMD_SWAP: + case UCX_PERF_CMD_CSWAP: + if (message_size == sizeof(uint32_t)) { + ucp_params->features |= UCP_FEATURE_AMO32; + } else if (message_size == sizeof(uint64_t)) { + ucp_params->features |= UCP_FEATURE_AMO64; + } else { + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Atomic size should be either 32 or 64 bit"); + } + return UCS_ERR_INVALID_PARAM; + } + + break; + case UCX_PERF_CMD_TAG: + case UCX_PERF_CMD_TAG_SYNC: + ucp_params->features |= UCP_FEATURE_TAG; + ucp_params->field_mask |= UCP_PARAM_FIELD_REQUEST_SIZE; + ucp_params->request_size = sizeof(ucp_perf_request_t); + break; + case UCX_PERF_CMD_STREAM: + ucp_params->features |= UCP_FEATURE_STREAM; + ucp_params->field_mask |= UCP_PARAM_FIELD_REQUEST_SIZE; + ucp_params->request_size = sizeof(ucp_perf_request_t); + break; + default: + if (params->flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Invalid test command"); + } + return UCS_ERR_INVALID_PARAM; + } + + status = ucx_perf_test_check_params(params); + if (status != UCS_OK) { + return status; + } + + return UCS_OK; +} + +static ucs_status_t ucp_perf_test_alloc_iov_mem(ucp_perf_datatype_t datatype, + size_t iovcnt, unsigned thread_count, + ucp_dt_iov_t **iov_p) +{ + ucp_dt_iov_t *iov; + + if (UCP_PERF_DATATYPE_IOV == datatype) { + iov = malloc(sizeof(*iov) * iovcnt * thread_count); + if (NULL == iov) { + ucs_error("Failed allocate IOV buffer with iovcnt=%lu", iovcnt); + return UCS_ERR_NO_MEMORY; + } + *iov_p = iov; + } + + return UCS_OK; +} + +static ucs_status_t +ucp_perf_test_alloc_host(const ucx_perf_context_t *perf, size_t length, + void **address_p, ucp_mem_h *memh, int non_blk_flag) +{ + ucp_mem_map_params_t mem_map_params; + ucp_mem_attr_t mem_attr; + ucs_status_t status; + + mem_map_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + mem_map_params.address = *address_p; + mem_map_params.length = length; + mem_map_params.flags = UCP_MEM_MAP_ALLOCATE; + if (perf->params.flags & UCX_PERF_TEST_FLAG_MAP_NONBLOCK) { + mem_map_params.flags |= non_blk_flag; + } + + status = ucp_mem_map(perf->ucp.context, &mem_map_params, memh); + if (status != UCS_OK) { + goto err; + } + + mem_attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS; + status = ucp_mem_query(*memh, &mem_attr); + if (status != UCS_OK) { + goto err; + } + + *address_p = mem_attr.address; + return UCS_OK; + +err: + return status; +} + +static void ucp_perf_test_free_host(const ucx_perf_context_t *perf, + void *address, ucp_mem_h memh) +{ + ucs_status_t status; + + status = ucp_mem_unmap(perf->ucp.context, memh); + if (status != UCS_OK) { + ucs_warn("ucp_mem_unmap() failed: %s", ucs_status_string(status)); + } +} + +static ucs_status_t ucp_perf_test_alloc_mem(ucx_perf_context_t *perf) +{ + ucx_perf_params_t *params = &perf->params; + ucs_status_t status; + size_t buffer_size; + + if (params->iov_stride) { + buffer_size = params->msg_size_cnt * params->iov_stride; + } else { + buffer_size = ucx_perf_get_message_size(params); + } + + /* Allocate send buffer memory */ + perf->send_buffer = NULL; + status = perf->allocator->ucp_alloc(perf, buffer_size * params->thread_count, + &perf->send_buffer, &perf->ucp.send_memh, + UCP_MEM_MAP_NONBLOCK); + if (status != UCS_OK) { + goto err; + } + + /* Allocate receive buffer memory */ + perf->recv_buffer = NULL; + status = perf->allocator->ucp_alloc(perf, buffer_size * params->thread_count, + &perf->recv_buffer, &perf->ucp.recv_memh, + 0); + if (status != UCS_OK) { + goto err_free_send_buffer; + } + + /* Allocate IOV datatype memory */ + perf->ucp.send_iov = NULL; + status = ucp_perf_test_alloc_iov_mem(params->ucp.send_datatype, + perf->params.msg_size_cnt, + params->thread_count, + &perf->ucp.send_iov); + if (UCS_OK != status) { + goto err_free_buffers; + } + + perf->ucp.recv_iov = NULL; + status = ucp_perf_test_alloc_iov_mem(params->ucp.recv_datatype, + perf->params.msg_size_cnt, + params->thread_count, + &perf->ucp.recv_iov); + if (UCS_OK != status) { + goto err_free_send_iov_buffers; + } + + return UCS_OK; + +err_free_send_iov_buffers: + free(perf->ucp.send_iov); +err_free_buffers: + perf->allocator->ucp_free(perf, perf->recv_buffer, perf->ucp.recv_memh); +err_free_send_buffer: + perf->allocator->ucp_free(perf, perf->send_buffer, perf->ucp.send_memh); +err: + return UCS_ERR_NO_MEMORY; +} + +static void ucp_perf_test_free_mem(ucx_perf_context_t *perf) +{ + free(perf->ucp.recv_iov); + free(perf->ucp.send_iov); + perf->allocator->ucp_free(perf, perf->recv_buffer, perf->ucp.recv_memh); + perf->allocator->ucp_free(perf, perf->send_buffer, perf->ucp.send_memh); +} + +static void ucp_perf_test_destroy_eps(ucx_perf_context_t* perf, + unsigned group_size) +{ + ucs_status_ptr_t *reqs; + ucp_tag_recv_info_t info; + ucs_status_t status; + unsigned i; + + reqs = calloc(sizeof(*reqs), group_size); + + for (i = 0; i < group_size; ++i) { + if (perf->ucp.peers[i].rkey != NULL) { + ucp_rkey_destroy(perf->ucp.peers[i].rkey); + } + if (perf->ucp.peers[i].ep != NULL) { + reqs[i] = ucp_disconnect_nb(perf->ucp.peers[i].ep); + } + } + + for (i = 0; i < group_size; ++i) { + if (!UCS_PTR_IS_PTR(reqs[i])) { + continue; + } + + do { + ucp_worker_progress(perf->ucp.worker); + status = ucp_request_test(reqs[i], &info); + } while (status == UCS_INPROGRESS); + ucp_request_release(reqs[i]); + } + + free(reqs); + free(perf->ucp.peers); +} + +static ucs_status_t ucp_perf_test_exchange_status(ucx_perf_context_t *perf, + ucs_status_t status) +{ + unsigned group_size = rte_call(perf, group_size); + ucs_status_t collective_status = status; + struct iovec vec; + void *req = NULL; + unsigned i; + + vec.iov_base = &status; + vec.iov_len = sizeof(status); + + rte_call(perf, post_vec, &vec, 1, &req); + rte_call(perf, exchange_vec, req); + for (i = 0; i < group_size; ++i) { + rte_call(perf, recv, i, &status, sizeof(status), req); + if (status != UCS_OK) { + collective_status = status; + } + } + return collective_status; +} + +static ucs_status_t ucp_perf_test_setup_endpoints(ucx_perf_context_t *perf, + uint64_t features) +{ + const size_t buffer_size = 2048; + ucx_perf_ep_info_t info, *remote_info; + unsigned group_size, i, group_index; + ucp_address_t *address; + size_t address_length = 0; + ucp_ep_params_t ep_params; + ucs_status_t status; + struct iovec vec[3]; + void *rkey_buffer; + void *req = NULL; + void *buffer; + + group_size = rte_call(perf, group_size); + group_index = rte_call(perf, group_index); + + status = ucp_worker_get_address(perf->ucp.worker, &address, &address_length); + if (status != UCS_OK) { + if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("ucp_worker_get_address() failed: %s", ucs_status_string(status)); + } + goto err; + } + + info.ucp.addr_len = address_length; + info.recv_buffer = (uintptr_t)perf->recv_buffer; + + vec[0].iov_base = &info; + vec[0].iov_len = sizeof(info); + vec[1].iov_base = address; + vec[1].iov_len = address_length; + + if (features & (UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64)) { + status = ucp_rkey_pack(perf->ucp.context, perf->ucp.recv_memh, + &rkey_buffer, &info.rkey_size); + if (status != UCS_OK) { + if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("ucp_rkey_pack() failed: %s", ucs_status_string(status)); + } + ucp_worker_release_address(perf->ucp.worker, address); + goto err; + } + + vec[2].iov_base = rkey_buffer; + vec[2].iov_len = info.rkey_size; + rte_call(perf, post_vec, vec, 3, &req); + ucp_rkey_buffer_release(rkey_buffer); + } else { + info.rkey_size = 0; + rte_call(perf, post_vec, vec, 2, &req); + } + + ucp_worker_release_address(perf->ucp.worker, address); + rte_call(perf, exchange_vec, req); + + perf->ucp.peers = calloc(group_size, sizeof(*perf->ucp.peers)); + if (perf->ucp.peers == NULL) { + goto err; + } + + buffer = malloc(buffer_size); + if (buffer == NULL) { + ucs_error("Failed to allocate RTE receive buffer"); + status = UCS_ERR_NO_MEMORY; + goto err_destroy_eps; + } + + for (i = 0; i < group_size; ++i) { + if (i == group_index) { + continue; + } + + rte_call(perf, recv, i, buffer, buffer_size, req); + + remote_info = buffer; + address = (ucp_address_t*)(remote_info + 1); + rkey_buffer = UCS_PTR_BYTE_OFFSET(address, remote_info->ucp.addr_len); + perf->ucp.peers[i].remote_addr = remote_info->recv_buffer; + + ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; + ep_params.address = address; + + status = ucp_ep_create(perf->ucp.worker, &ep_params, &perf->ucp.peers[i].ep); + if (status != UCS_OK) { + if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("ucp_ep_create() failed: %s", ucs_status_string(status)); + } + goto err_free_buffer; + } + + if (remote_info->rkey_size > 0) { + status = ucp_ep_rkey_unpack(perf->ucp.peers[i].ep, rkey_buffer, + &perf->ucp.peers[i].rkey); + if (status != UCS_OK) { + if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_fatal("ucp_rkey_unpack() failed: %s", ucs_status_string(status)); + } + goto err_free_buffer; + } + } else { + perf->ucp.peers[i].rkey = NULL; + } + } + + free(buffer); + + status = ucp_perf_test_exchange_status(perf, UCS_OK); + if (status != UCS_OK) { + ucp_perf_test_destroy_eps(perf, group_size); + } + + /* force wireup completion */ + status = ucp_worker_flush(perf->ucp.worker); + if (status != UCS_OK) { + ucs_warn("ucp_worker_flush() failed: %s", ucs_status_string(status)); + } + + return status; + +err_free_buffer: + free(buffer); +err_destroy_eps: + ucp_perf_test_destroy_eps(perf, group_size); +err: + (void)ucp_perf_test_exchange_status(perf, status); + return status; +} + +static void ucp_perf_test_cleanup_endpoints(ucx_perf_context_t *perf) +{ + unsigned group_size; + + ucp_perf_barrier(perf); + + group_size = rte_call(perf, group_size); + + ucp_perf_test_destroy_eps(perf, group_size); +} + +static void ucx_perf_set_warmup(ucx_perf_context_t* perf, ucx_perf_params_t* params) +{ + perf->max_iter = ucs_min(params->warmup_iter, ucs_div_round_up(params->max_iter, 10)); + perf->report_interval = ULONG_MAX; +} + +static ucs_status_t uct_perf_create_md(ucx_perf_context_t *perf) +{ + uct_component_h *uct_components; + uct_component_attr_t component_attr; + uct_tl_resource_desc_t *tl_resources; + unsigned md_index, num_components; + unsigned tl_index, num_tl_resources; + unsigned cmpt_index; + ucs_status_t status; + uct_md_h md; + uct_md_config_t *md_config; + + + status = uct_query_components(&uct_components, &num_components); + if (status != UCS_OK) { + goto out; + } + + for (cmpt_index = 0; cmpt_index < num_components; ++cmpt_index) { + + component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT; + status = uct_component_query(uct_components[cmpt_index], &component_attr); + if (status != UCS_OK) { + goto out_release_components_list; + } + + component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES; + component_attr.md_resources = alloca(sizeof(*component_attr.md_resources) * + component_attr.md_resource_count); + status = uct_component_query(uct_components[cmpt_index], &component_attr); + if (status != UCS_OK) { + goto out_release_components_list; + } + + for (md_index = 0; md_index < component_attr.md_resource_count; ++md_index) { + status = uct_md_config_read(uct_components[cmpt_index], NULL, NULL, + &md_config); + if (status != UCS_OK) { + goto out_release_components_list; + } + + ucs_strncpy_zero(perf->params.uct.md_name, + component_attr.md_resources[md_index].md_name, + UCT_MD_NAME_MAX); + + status = uct_md_open(uct_components[cmpt_index], + component_attr.md_resources[md_index].md_name, + md_config, &md); + uct_config_release(md_config); + if (status != UCS_OK) { + goto out_release_components_list; + } + + status = uct_md_query_tl_resources(md, &tl_resources, &num_tl_resources); + if (status != UCS_OK) { + uct_md_close(md); + goto out_release_components_list; + } + + for (tl_index = 0; tl_index < num_tl_resources; ++tl_index) { + if (!strcmp(perf->params.uct.tl_name, tl_resources[tl_index].tl_name) && + !strcmp(perf->params.uct.dev_name, tl_resources[tl_index].dev_name)) + { + uct_release_tl_resource_list(tl_resources); + perf->uct.cmpt = uct_components[cmpt_index]; + perf->uct.md = md; + status = UCS_OK; + goto out_release_components_list; + } + } + + uct_md_close(md); + uct_release_tl_resource_list(tl_resources); + } + } + + ucs_error("Cannot use transport %s on device %s", perf->params.uct.tl_name, + perf->params.uct.dev_name); + status = UCS_ERR_NO_DEVICE; + +out_release_components_list: + uct_release_component_list(uct_components); +out: + return status; +} + +void uct_perf_barrier(ucx_perf_context_t *perf) +{ + rte_call(perf, barrier, (void(*)(void*))uct_worker_progress, + (void*)perf->uct.worker); +} + +void ucp_perf_barrier(ucx_perf_context_t *perf) +{ + rte_call(perf, barrier, (void(*)(void*))ucp_worker_progress, + (void*)perf->ucp.worker); +} + +static ucs_status_t uct_perf_setup(ucx_perf_context_t *perf) +{ + ucx_perf_params_t *params = &perf->params; + uct_iface_config_t *iface_config; + ucs_status_t status; + uct_iface_params_t iface_params = { + .field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_STATS_ROOT | + UCT_IFACE_PARAM_FIELD_RX_HEADROOM | + UCT_IFACE_PARAM_FIELD_CPU_MASK, + .open_mode = UCT_IFACE_OPEN_MODE_DEVICE, + .mode.device.tl_name = params->uct.tl_name, + .mode.device.dev_name = params->uct.dev_name, + .stats_root = ucs_stats_get_root(), + .rx_headroom = 0 + }; + UCS_CPU_ZERO(&iface_params.cpu_mask); + + status = ucs_async_context_init(&perf->uct.async, params->async_mode); + if (status != UCS_OK) { + goto out; + } + + status = uct_worker_create(&perf->uct.async, params->thread_mode, + &perf->uct.worker); + if (status != UCS_OK) { + goto out_cleanup_async; + } + + status = uct_perf_create_md(perf); + if (status != UCS_OK) { + goto out_destroy_worker; + } + + status = uct_md_iface_config_read(perf->uct.md, params->uct.tl_name, NULL, + NULL, &iface_config); + if (status != UCS_OK) { + goto out_destroy_md; + } + + status = uct_iface_open(perf->uct.md, perf->uct.worker, &iface_params, + iface_config, &perf->uct.iface); + uct_config_release(iface_config); + if (status != UCS_OK) { + ucs_error("Failed to open iface: %s", ucs_status_string(status)); + goto out_destroy_md; + } + + status = uct_perf_test_check_capabilities(params, perf->uct.iface, + perf->uct.md); + /* sync status across all processes */ + status = ucp_perf_test_exchange_status(perf, status); + if (status != UCS_OK) { + goto out_iface_close; + } + + status = uct_perf_test_alloc_mem(perf); + if (status != UCS_OK) { + goto out_iface_close; + } + + /* Enable progress before `uct_iface_flush` and `uct_worker_progress` called + * to give a chance to finish connection for some tranports (ib/ud, tcp). + * They may return UCS_INPROGRESS from `uct_iface_flush` when connections are + * in progress */ + uct_iface_progress_enable(perf->uct.iface, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + + status = uct_perf_test_setup_endpoints(perf); + if (status != UCS_OK) { + ucs_error("Failed to setup endpoints: %s", ucs_status_string(status)); + goto out_free_mem; + } + + return UCS_OK; + +out_free_mem: + uct_perf_test_free_mem(perf); +out_iface_close: + uct_iface_close(perf->uct.iface); +out_destroy_md: + uct_md_close(perf->uct.md); +out_destroy_worker: + uct_worker_destroy(perf->uct.worker); +out_cleanup_async: + ucs_async_context_cleanup(&perf->uct.async); +out: + return status; +} + +static void uct_perf_cleanup(ucx_perf_context_t *perf) +{ + uct_perf_test_cleanup_endpoints(perf); + uct_perf_test_free_mem(perf); + uct_iface_close(perf->uct.iface); + uct_md_close(perf->uct.md); + uct_worker_destroy(perf->uct.worker); + ucs_async_context_cleanup(&perf->uct.async); +} + +static ucs_status_t ucp_perf_setup(ucx_perf_context_t *perf) +{ + ucp_params_t ucp_params; + ucp_worker_params_t worker_params; + ucp_config_t *config; + ucs_status_t status; + + ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES; + ucp_params.features = 0; + + status = ucp_perf_test_fill_params(&perf->params, &ucp_params); + if (status != UCS_OK) { + goto err; + } + + status = ucp_config_read(NULL, NULL, &config); + if (status != UCS_OK) { + goto err; + } + + status = ucp_init(&ucp_params, config, &perf->ucp.context); + ucp_config_release(config); + if (status != UCS_OK) { + goto err; + } + + worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + worker_params.thread_mode = perf->params.thread_mode; + + status = ucp_worker_create(perf->ucp.context, &worker_params, + &perf->ucp.worker); + if (status != UCS_OK) { + goto err_cleanup; + } + + status = ucp_perf_test_alloc_mem(perf); + if (status != UCS_OK) { + ucs_warn("ucp test failed to alocate memory"); + goto err_destroy_worker; + } + + status = ucp_perf_test_setup_endpoints(perf, ucp_params.features); + if (status != UCS_OK) { + if (perf->params.flags & UCX_PERF_TEST_FLAG_VERBOSE) { + ucs_error("Failed to setup endpoints: %s", ucs_status_string(status)); + } + goto err_free_mem; + } + + return UCS_OK; + +err_free_mem: + ucp_perf_test_free_mem(perf); +err_destroy_worker: + ucp_worker_destroy(perf->ucp.worker); +err_cleanup: + ucp_cleanup(perf->ucp.context); +err: + return status; +} + +static void ucp_perf_cleanup(ucx_perf_context_t *perf) +{ + ucp_perf_test_cleanup_endpoints(perf); + ucp_perf_barrier(perf); + ucp_perf_test_free_mem(perf); + ucp_worker_destroy(perf->ucp.worker); + ucp_cleanup(perf->ucp.context); +} + +static struct { + ucs_status_t (*setup)(ucx_perf_context_t *perf); + void (*cleanup)(ucx_perf_context_t *perf); + ucs_status_t (*run)(ucx_perf_context_t *perf); + void (*barrier)(ucx_perf_context_t *perf); +} ucx_perf_funcs[] = { + [UCX_PERF_API_UCT] = {uct_perf_setup, uct_perf_cleanup, + uct_perf_test_dispatch, uct_perf_barrier}, + [UCX_PERF_API_UCP] = {ucp_perf_setup, ucp_perf_cleanup, + ucp_perf_test_dispatch, ucp_perf_barrier} +}; + +static ucs_status_t ucx_perf_thread_spawn(ucx_perf_context_t *perf, + ucx_perf_result_t* result); + +ucs_status_t ucx_perf_run(ucx_perf_params_t *params, ucx_perf_result_t *result) +{ + ucx_perf_context_t *perf; + ucs_status_t status; + + ucx_perf_global_init(); + + if (params->command == UCX_PERF_CMD_LAST) { + ucs_error("Test is not selected"); + status = UCS_ERR_INVALID_PARAM; + goto out; + } + + if ((params->api != UCX_PERF_API_UCT) && (params->api != UCX_PERF_API_UCP)) { + ucs_error("Invalid test API parameter (should be UCT or UCP)"); + status = UCS_ERR_INVALID_PARAM; + goto out; + } + + perf = malloc(sizeof(*perf)); + if (perf == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + ucx_perf_test_init(perf, params); + + if (perf->allocator == NULL) { + ucs_error("Unsupported memory types %s<->%s", + ucs_memory_type_names[params->send_mem_type], + ucs_memory_type_names[params->recv_mem_type]); + status = UCS_ERR_UNSUPPORTED; + goto out_free; + } + + if ((params->api == UCX_PERF_API_UCT) && + (perf->allocator->mem_type != UCS_MEMORY_TYPE_HOST)) { + ucs_warn("UCT tests also copy 2-byte values from %s memory to " + "%s memory, which may impact performance results", + ucs_memory_type_names[perf->allocator->mem_type], + ucs_memory_type_names[UCS_MEMORY_TYPE_HOST]); + } + + status = perf->allocator->init(perf); + if (status != UCS_OK) { + goto out_free; + } + + status = ucx_perf_funcs[params->api].setup(perf); + if (status != UCS_OK) { + goto out_free; + } + + if (UCS_THREAD_MODE_SINGLE == params->thread_mode) { + if (params->warmup_iter > 0) { + ucx_perf_set_warmup(perf, params); + status = ucx_perf_funcs[params->api].run(perf); + if (status != UCS_OK) { + goto out_cleanup; + } + + ucx_perf_funcs[params->api].barrier(perf); + ucx_perf_test_prepare_new_run(perf, params); + } + + /* Run test */ + status = ucx_perf_funcs[params->api].run(perf); + ucx_perf_funcs[params->api].barrier(perf); + if (status == UCS_OK) { + ucx_perf_calc_result(perf, result); + rte_call(perf, report, result, perf->params.report_arg, 1); + } + } else { + status = ucx_perf_thread_spawn(perf, result); + } + +out_cleanup: + ucx_perf_funcs[params->api].cleanup(perf); +out_free: + free(perf); +out: + return status; +} + +#if _OPENMP +/* multiple threads sharing the same worker/iface */ + +typedef struct { + pthread_t pt; + int tid; + int ntid; + ucs_status_t* statuses; + ucx_perf_context_t perf; + ucx_perf_result_t result; +} ucx_perf_thread_context_t; + + +static void* ucx_perf_thread_run_test(void* arg) +{ + ucx_perf_thread_context_t* tctx = (ucx_perf_thread_context_t*) arg; + ucx_perf_result_t* result = &tctx->result; + ucx_perf_context_t* perf = &tctx->perf; + ucx_perf_params_t* params = &perf->params; + ucs_status_t* statuses = tctx->statuses; + int tid = tctx->tid; + int i; + + if (params->warmup_iter > 0) { + ucx_perf_set_warmup(perf, params); + statuses[tid] = ucx_perf_funcs[params->api].run(perf); + ucx_perf_funcs[params->api].barrier(perf); + for (i = 0; i < tctx->ntid; i++) { + if (UCS_OK != statuses[i]) { + goto out; + } + } + ucx_perf_test_prepare_new_run(perf, params); + } + + /* Run test */ +#pragma omp barrier + statuses[tid] = ucx_perf_funcs[params->api].run(perf); + ucx_perf_funcs[params->api].barrier(perf); + for (i = 0; i < tctx->ntid; i++) { + if (UCS_OK != statuses[i]) { + goto out; + } + } +#pragma omp master + { + /* Assuming all threads are fairly treated, reporting only tid==0 + TODO: aggregate reports */ + ucx_perf_calc_result(perf, result); + rte_call(perf, report, result, perf->params.report_arg, 1); + } + +out: + return &statuses[tid]; +} + +static ucs_status_t ucx_perf_thread_spawn(ucx_perf_context_t *perf, + ucx_perf_result_t* result) +{ + ucx_perf_thread_context_t* tctx; + ucs_status_t* statuses; + size_t message_size; + ucs_status_t status; + int ti, nti; + + message_size = ucx_perf_get_message_size(&perf->params); + omp_set_num_threads(perf->params.thread_count); + nti = perf->params.thread_count; + + tctx = calloc(nti, sizeof(ucx_perf_thread_context_t)); + statuses = calloc(nti, sizeof(ucs_status_t)); + if ((tctx == NULL) || (statuses == NULL)) { + status = UCS_ERR_NO_MEMORY; + goto out_free; + } + +#pragma omp parallel private(ti) +{ + ti = omp_get_thread_num(); + tctx[ti].tid = ti; + tctx[ti].ntid = nti; + tctx[ti].statuses = statuses; + tctx[ti].perf = *perf; + /* Doctor the src and dst buffers to make them thread specific */ + tctx[ti].perf.send_buffer = UCS_PTR_BYTE_OFFSET(tctx[ti].perf.send_buffer, + ti * message_size); + tctx[ti].perf.recv_buffer = UCS_PTR_BYTE_OFFSET(tctx[ti].perf.recv_buffer, + ti * message_size); + tctx[ti].perf.offset = ti * message_size; + ucx_perf_thread_run_test((void*)&tctx[ti]); +} + + status = UCS_OK; + for (ti = 0; ti < nti; ti++) { + if (UCS_OK != statuses[ti]) { + ucs_error("Thread %d failed to run test: %s", tctx[ti].tid, + ucs_status_string(statuses[ti])); + status = statuses[ti]; + } + } + +out_free: + free(statuses); + free(tctx); + return status; +} +#else +static ucs_status_t ucx_perf_thread_spawn(ucx_perf_context_t *perf, + ucx_perf_result_t* result) { + ucs_error("Invalid test parameter (thread mode requested without OpenMP capabilities)"); + return UCS_ERR_INVALID_PARAM; +} +#endif /* _OPENMP */ + +void ucx_perf_global_init() +{ + static ucx_perf_allocator_t host_allocator = { + .mem_type = UCS_MEMORY_TYPE_HOST, + .init = ucs_empty_function_return_success, + .ucp_alloc = ucp_perf_test_alloc_host, + .ucp_free = ucp_perf_test_free_host, + .uct_alloc = uct_perf_test_alloc_host, + .uct_free = uct_perf_test_free_host, + .memcpy = ucx_perf_test_memcpy_host, + .memset = memset + }; + UCS_MODULE_FRAMEWORK_DECLARE(ucx_perftest); + + ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_HOST] = &host_allocator; + + /* FIXME Memtype allocator modules must be loaded to global scope, otherwise + * alloc hooks, which are using dlsym() to get pointer to original function, + * do not work. Need to use bistro for memtype hooks to fix it. + */ + UCS_MODULE_FRAMEWORK_LOAD(ucx_perftest, UCS_MODULE_LOAD_FLAG_GLOBAL); +} diff --git a/src/tools/perf/lib/libperf_int.h b/src/tools/perf/lib/libperf_int.h new file mode 100644 index 0000000..a361371 --- /dev/null +++ b/src/tools/perf/lib/libperf_int.h @@ -0,0 +1,226 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) The University of Tennessee and The University +* of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef LIBPERF_INT_H_ +#define LIBPERF_INT_H_ + +#include + +BEGIN_C_DECLS + +/** @file libperf_int.h */ + +#include +#include + +#if _OPENMP +#include +#endif + + +#define TIMING_QUEUE_SIZE 2048 +#define UCT_PERF_TEST_AM_ID 5 + + +typedef struct ucx_perf_context ucx_perf_context_t; +typedef struct uct_peer uct_peer_t; +typedef struct ucp_peer ucp_peer_t; +typedef struct ucp_perf_request ucp_perf_request_t; + + +struct ucx_perf_allocator { + ucs_memory_type_t mem_type; + ucs_status_t (*init)(ucx_perf_context_t *perf); + ucs_status_t (*ucp_alloc)(const ucx_perf_context_t *perf, size_t length, + void **address_p, ucp_mem_h *memh, int non_blk_flag); + void (*ucp_free)(const ucx_perf_context_t *perf, void *address, + ucp_mem_h memh); + ucs_status_t (*uct_alloc)(const ucx_perf_context_t *perf, size_t length, + unsigned flags, uct_allocated_memory_t *alloc_mem); + void (*uct_free)(const ucx_perf_context_t *perf, + uct_allocated_memory_t *alloc_mem); + void (*memcpy)(void *dst, ucs_memory_type_t dst_mem_type, + const void *src, ucs_memory_type_t src_mem_type, + size_t count); + void* (*memset)(void *dst, int value, size_t count); +}; + +struct ucx_perf_context { + ucx_perf_params_t params; + + /* Buffers */ + void *send_buffer; + void *recv_buffer; + ptrdiff_t offset; + + /* Measurements */ + double start_time_acc; /* accurate start time */ + ucs_time_t end_time; /* inaccurate end time (upper bound) */ + ucs_time_t prev_time; /* time of previous iteration */ + ucs_time_t report_interval; /* interval of showing report */ + ucx_perf_counter_t max_iter; + + /* Measurements of current/previous **report** */ + struct { + ucx_perf_counter_t msgs; /* number of messages */ + ucx_perf_counter_t bytes; /* number of bytes */ + ucx_perf_counter_t iters; /* number of iterations */ + ucs_time_t time; /* inaccurate time (for median and report interval) */ + double time_acc; /* accurate time (for avg latency/bw/msgrate) */ + } current, prev; + + ucs_time_t timing_queue[TIMING_QUEUE_SIZE]; + unsigned timing_queue_head; + const ucx_perf_allocator_t *allocator; + + union { + struct { + ucs_async_context_t async; + uct_component_h cmpt; + uct_md_h md; + uct_worker_h worker; + uct_iface_h iface; + uct_peer_t *peers; + uct_allocated_memory_t send_mem; + uct_allocated_memory_t recv_mem; + uct_iov_t *iov; + } uct; + + struct { + ucp_context_h context; + ucp_worker_h worker; + ucp_peer_t *peers; + ucp_mem_h send_memh; + ucp_mem_h recv_memh; + ucp_dt_iov_t *send_iov; + ucp_dt_iov_t *recv_iov; + } ucp; + }; +}; + + +struct uct_peer { + uct_ep_h ep; + unsigned long remote_addr; + uct_rkey_bundle_t rkey; +}; + + +struct ucp_peer { + ucp_ep_h ep; + unsigned long remote_addr; + ucp_rkey_h rkey; +}; + + +struct ucp_perf_request { + void *context; +}; + + +#define UCX_PERF_TEST_FOREACH(perf) \ + while (!ucx_perf_context_done(perf)) + +#define rte_call(_perf, _func, ...) \ + ((_perf)->params.rte->_func((_perf)->params.rte_group, ## __VA_ARGS__)) + + +void ucx_perf_test_start_clock(ucx_perf_context_t *perf); + + +void uct_perf_iface_flush_b(ucx_perf_context_t *perf); + + +ucs_status_t uct_perf_test_dispatch(ucx_perf_context_t *perf); + + +ucs_status_t ucp_perf_test_dispatch(ucx_perf_context_t *perf); + + +void ucx_perf_calc_result(ucx_perf_context_t *perf, ucx_perf_result_t *result); + + +void uct_perf_barrier(ucx_perf_context_t *perf); + + +void ucp_perf_barrier(ucx_perf_context_t *perf); + + +static UCS_F_ALWAYS_INLINE int ucx_perf_context_done(ucx_perf_context_t *perf) +{ + return ucs_unlikely((perf->current.iters >= perf->max_iter) || + (perf->current.time > perf->end_time)); +} + + +static inline void ucx_perf_get_time(ucx_perf_context_t *perf) +{ + perf->current.time_acc = ucs_get_accurate_time(); +} + +static inline void ucx_perf_update(ucx_perf_context_t *perf, + ucx_perf_counter_t iters, size_t bytes) +{ + ucx_perf_result_t result; + + perf->current.time = ucs_get_time(); + perf->current.iters += iters; + perf->current.bytes += bytes; + perf->current.msgs += 1; + + perf->timing_queue[perf->timing_queue_head] = + perf->current.time - perf->prev_time; + ++perf->timing_queue_head; + if (perf->timing_queue_head == TIMING_QUEUE_SIZE) { + perf->timing_queue_head = 0; + } + + perf->prev_time = perf->current.time; + + if (perf->current.time - perf->prev.time >= perf->report_interval) { + ucx_perf_get_time(perf); + + /* Disable all other threads' report generation and output. + * The master clause cannot be used here as the unit test + * uct_test_perf runs on single pthreads with no parallel region, + * using that clause will result in undefined behavior. + */ +#if _OPENMP + if (omp_get_thread_num() == 0) +#endif /* _OPENMP */ + { + ucx_perf_calc_result(perf, &result); + rte_call(perf, report, &result, perf->params.report_arg, 0); + } + + perf->prev = perf->current; + } +} + + +/** + * Get the total length of the message size given by parameters + */ +static inline +size_t ucx_perf_get_message_size(const ucx_perf_params_t *params) +{ + size_t length, it; + + ucs_assert(params->msg_size_list != NULL); + + length = 0; + for (it = 0; it < params->msg_size_cnt; ++it) { + length += params->msg_size_list[it]; + } + + return length; +} + +END_C_DECLS + +#endif diff --git a/src/tools/perf/lib/ucp_tests.cc b/src/tools/perf/lib/ucp_tests.cc new file mode 100644 index 0000000..23312ed --- /dev/null +++ b/src/tools/perf/lib/ucp_tests.cc @@ -0,0 +1,558 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) The University of Tennessee and The University +* of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include + +extern "C" { +#include +#include +#include +} +#include + +#include + + +template +class ucp_perf_test_runner { +public: + static const ucp_tag_t TAG = 0x1337a880u; + static const ucp_tag_t TAG_MASK = (FLAGS & UCX_PERF_TEST_FLAG_TAG_WILDCARD) ? + 0 : (ucp_tag_t)-1; + + typedef uint8_t psn_t; + + ucp_perf_test_runner(ucx_perf_context_t &perf) : + m_perf(perf), + m_outstanding(0), + m_max_outstanding(m_perf.params.max_outstanding) + + { + ucs_assert_always(m_max_outstanding > 0); + } + + void create_iov_buffer(ucp_dt_iov_t *iov, void *buffer) + { + size_t iov_length_it, iov_it; + const size_t iovcnt = m_perf.params.msg_size_cnt; + + ucs_assert(NULL != m_perf.params.msg_size_list); + ucs_assert(iovcnt > 0); + + iov_length_it = 0; + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + iov[iov_it].buffer = (char *)buffer + iov_length_it; + iov[iov_it].length = m_perf.params.msg_size_list[iov_it]; + + if (m_perf.params.iov_stride) { + iov_length_it += m_perf.params.iov_stride; + } else { + iov_length_it += iov[iov_it].length; + } + } + } + + ucp_datatype_t ucp_perf_test_get_datatype(ucp_perf_datatype_t datatype, ucp_dt_iov_t *iov, + size_t *length, void **buffer_p) + { + ucp_datatype_t type = ucp_dt_make_contig(1); + if (UCP_PERF_DATATYPE_IOV == datatype) { + *buffer_p = iov; + *length = m_perf.params.msg_size_cnt; + type = ucp_dt_make_iov(); + } + return type; + } + /** + * Make ucp_dt_iov_t iov[msg_size_cnt] array with pointer elements to + * original buffer + */ + void ucp_perf_test_prepare_iov_buffers() + { + if (UCP_PERF_DATATYPE_IOV == m_perf.params.ucp.send_datatype) { + create_iov_buffer(m_perf.ucp.send_iov, m_perf.send_buffer); + } + if (UCP_PERF_DATATYPE_IOV == m_perf.params.ucp.recv_datatype) { + create_iov_buffer(m_perf.ucp.recv_iov, m_perf.recv_buffer); + } + } + + void UCS_F_ALWAYS_INLINE progress_responder() { + if (!(FLAGS & UCX_PERF_TEST_FLAG_ONE_SIDED) && + !(m_perf.params.flags & UCX_PERF_TEST_FLAG_ONE_SIDED)) + { + ucp_worker_progress(m_perf.ucp.worker); + } + } + + void UCS_F_ALWAYS_INLINE progress_requestor() { + ucp_worker_progress(m_perf.ucp.worker); + } + + ucs_status_t UCS_F_ALWAYS_INLINE wait(void *request, bool is_requestor) + { + if (ucs_likely(!UCS_PTR_IS_PTR(request))) { + return UCS_PTR_STATUS(request); + } + + while (!ucp_request_is_completed(request)) { + if (is_requestor) { + progress_requestor(); + } else { + progress_responder(); + } + } + ucp_request_release(request); + return UCS_OK; + } + + ssize_t UCS_F_ALWAYS_INLINE wait_stream_recv(void *request) + { + size_t length; + ucs_status_t status; + + ucs_assert(UCS_PTR_IS_PTR(request)); + + while ((status = ucp_stream_recv_request_test(request, &length)) == + UCS_INPROGRESS) { + progress_responder(); + } + ucp_request_release(request); + + return ucs_likely(status == UCS_OK) ? length : status; + } + + static void send_cb(void *request, ucs_status_t status) + { + ucp_perf_request_t *r = reinterpret_cast(request); + ucp_perf_test_runner *sender = (ucp_perf_test_runner*)r->context; + + sender->send_completed(); + ucp_request_release(request); + } + + void UCS_F_ALWAYS_INLINE wait_window(unsigned n) + { + while (m_outstanding >= (m_max_outstanding - n + 1)) { + progress_requestor(); + } + } + + ucs_status_t UCS_F_ALWAYS_INLINE + send(ucp_ep_h ep, void *buffer, unsigned length, ucp_datatype_t datatype, + uint8_t sn, uint64_t remote_addr, ucp_rkey_h rkey) + { + void *request; + + /* coverity[switch_selector_expr_is_constant] */ + switch (CMD) { + case UCX_PERF_CMD_TAG: + case UCX_PERF_CMD_TAG_SYNC: + case UCX_PERF_CMD_STREAM: + wait_window(1); + /* coverity[switch_selector_expr_is_constant] */ + switch (CMD) { + case UCX_PERF_CMD_TAG: + request = ucp_tag_send_nb(ep, buffer, length, datatype, TAG, + send_cb); + break; + case UCX_PERF_CMD_TAG_SYNC: + request = ucp_tag_send_sync_nb(ep, buffer, length, datatype, TAG, + send_cb); + break; + case UCX_PERF_CMD_STREAM: + request = ucp_stream_send_nb(ep, buffer, length, datatype, + send_cb, 0); + break; + default: + request = UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM); + break; + } + if (ucs_likely(!UCS_PTR_IS_PTR(request))) { + return UCS_PTR_STATUS(request); + } + reinterpret_cast(request)->context = this; + send_started(); + return UCS_OK; + case UCX_PERF_CMD_PUT: + *((uint8_t*)buffer + length - 1) = sn; + return ucp_put(ep, buffer, length, remote_addr, rkey); + case UCX_PERF_CMD_GET: + return ucp_get(ep, buffer, length, remote_addr, rkey); + case UCX_PERF_CMD_ADD: + if (length == sizeof(uint32_t)) { + return ucp_atomic_add32(ep, 1, remote_addr, rkey); + } else if (length == sizeof(uint64_t)) { + return ucp_atomic_add64(ep, 1, remote_addr, rkey); + } else { + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_FADD: + if (length == sizeof(uint32_t)) { + return ucp_atomic_fadd32(ep, 0, remote_addr, rkey, (uint32_t*)buffer); + } else if (length == sizeof(uint64_t)) { + return ucp_atomic_fadd64(ep, 0, remote_addr, rkey, (uint64_t*)buffer); + } else { + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_SWAP: + if (length == sizeof(uint32_t)) { + return ucp_atomic_swap32(ep, 0, remote_addr, rkey, (uint32_t*)buffer); + } else if (length == sizeof(uint64_t)) { + return ucp_atomic_swap64(ep, 0, remote_addr, rkey, (uint64_t*)buffer); + } else { + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_CSWAP: + if (length == sizeof(uint32_t)) { + return ucp_atomic_cswap32(ep, 0, 0, remote_addr, rkey, (uint32_t*)buffer); + } else if (length == sizeof(uint64_t)) { + return ucp_atomic_cswap64(ep, 0, 0, remote_addr, rkey, (uint64_t*)buffer); + } else { + return UCS_ERR_INVALID_PARAM; + } + default: + return UCS_ERR_INVALID_PARAM; + } + } + + ucs_status_t UCS_F_ALWAYS_INLINE + recv(ucp_worker_h worker, ucp_ep_h ep, void *buffer, unsigned length, + ucp_datatype_t datatype, uint8_t sn) + { + volatile uint8_t *ptr; + void *request; + + /* coverity[switch_selector_expr_is_constant] */ + switch (CMD) { + case UCX_PERF_CMD_TAG: + case UCX_PERF_CMD_TAG_SYNC: + if (FLAGS & UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE) { + ucp_tag_recv_info_t tag_info; + while (ucp_tag_probe_nb(worker, TAG, TAG_MASK, 0, &tag_info) == NULL) { + progress_responder(); + } + } + request = ucp_tag_recv_nb(worker, buffer, length, datatype, TAG, TAG_MASK, + (ucp_tag_recv_callback_t)ucs_empty_function); + return wait(request, false); + case UCX_PERF_CMD_PUT: + /* coverity[switch_selector_expr_is_constant] */ + switch (TYPE) { + case UCX_PERF_TEST_TYPE_PINGPONG: + ptr = (volatile uint8_t*)buffer + length - 1; + while (*ptr != sn) { + progress_responder(); + } + return UCS_OK; + case UCX_PERF_TEST_TYPE_STREAM_UNI: + return UCS_OK; + default: + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_GET: + case UCX_PERF_CMD_ADD: + case UCX_PERF_CMD_FADD: + case UCX_PERF_CMD_SWAP: + case UCX_PERF_CMD_CSWAP: + /* coverity[switch_selector_expr_is_constant] */ + switch (TYPE) { + case UCX_PERF_TEST_TYPE_STREAM_UNI: + progress_responder(); + return UCS_OK; + default: + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_STREAM: + if (FLAGS & UCX_PERF_TEST_FLAG_STREAM_RECV_DATA) { + return recv_stream_data(ep, length, datatype); + } else { + return recv_stream(ep, buffer, length, datatype); + } + default: + return UCS_ERR_INVALID_PARAM; + } + } + + ucs_status_t run_pingpong() + { + const psn_t unknown_psn = std::numeric_limits::max(); + unsigned my_index; + ucp_worker_h worker; + ucp_ep_h ep; + void *send_buffer, *recv_buffer; + ucp_datatype_t send_datatype, recv_datatype; + uint64_t remote_addr; + uint8_t sn; + ucp_rkey_h rkey; + size_t length, send_length, recv_length; + + length = ucx_perf_get_message_size(&m_perf.params); + ucs_assert(length >= sizeof(psn_t)); + + ucp_perf_test_prepare_iov_buffers(); + + if (CMD == UCX_PERF_CMD_PUT) { + m_perf.allocator->memcpy((psn_t*)m_perf.recv_buffer + length - 1, + m_perf.allocator->mem_type, + &unknown_psn, UCS_MEMORY_TYPE_HOST, + sizeof(unknown_psn)); + } + + ucp_perf_barrier(&m_perf); + + my_index = rte_call(&m_perf, group_index); + + ucx_perf_test_start_clock(&m_perf); + + send_buffer = m_perf.send_buffer; + recv_buffer = m_perf.recv_buffer; + worker = m_perf.ucp.worker; + ep = m_perf.ucp.peers[1 - my_index].ep; + remote_addr = m_perf.ucp.peers[1 - my_index].remote_addr + m_perf.offset; + rkey = m_perf.ucp.peers[1 - my_index].rkey; + sn = 0; + send_length = length; + recv_length = length; + send_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.send_datatype, + m_perf.ucp.send_iov, &send_length, + &send_buffer); + recv_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.recv_datatype, + m_perf.ucp.recv_iov, &recv_length, + &recv_buffer); + + if (my_index == 0) { + UCX_PERF_TEST_FOREACH(&m_perf) { + send(ep, send_buffer, send_length, send_datatype, sn, remote_addr, rkey); + recv(worker, ep, recv_buffer, recv_length, recv_datatype, sn); + ucx_perf_update(&m_perf, 1, length); + ++sn; + } + } else if (my_index == 1) { + UCX_PERF_TEST_FOREACH(&m_perf) { + recv(worker, ep, recv_buffer, recv_length, recv_datatype, sn); + send(ep, send_buffer, send_length, send_datatype, sn, remote_addr, rkey); + ucx_perf_update(&m_perf, 1, length); + ++sn; + } + } + + wait_window(m_max_outstanding); + ucp_worker_flush(m_perf.ucp.worker); + ucx_perf_get_time(&m_perf); + ucp_perf_barrier(&m_perf); + return UCS_OK; + } + + ucs_status_t run_stream_uni() + { + unsigned my_index; + ucp_worker_h worker; + ucp_ep_h ep; + void *send_buffer, *recv_buffer; + ucp_datatype_t send_datatype, recv_datatype; + uint64_t remote_addr; + ucp_rkey_h rkey; + size_t length, send_length, recv_length; + uint8_t sn; + + length = ucx_perf_get_message_size(&m_perf.params); + ucs_assert(length >= sizeof(psn_t)); + + ucp_perf_test_prepare_iov_buffers(); + + ucp_perf_barrier(&m_perf); + + my_index = rte_call(&m_perf, group_index); + + ucx_perf_test_start_clock(&m_perf); + + send_buffer = m_perf.send_buffer; + recv_buffer = m_perf.recv_buffer; + worker = m_perf.ucp.worker; + ep = m_perf.ucp.peers[1 - my_index].ep; + remote_addr = m_perf.ucp.peers[1 - my_index].remote_addr + m_perf.offset; + rkey = m_perf.ucp.peers[1 - my_index].rkey; + sn = 0; + send_length = length; + recv_length = length; + send_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.send_datatype, + m_perf.ucp.send_iov, &send_length, + &send_buffer); + recv_datatype = ucp_perf_test_get_datatype(m_perf.params.ucp.recv_datatype, + m_perf.ucp.recv_iov, &recv_length, + &recv_buffer); + + if (my_index == 0) { + UCX_PERF_TEST_FOREACH(&m_perf) { + recv(worker, ep, recv_buffer, recv_length, recv_datatype, sn); + ucx_perf_update(&m_perf, 1, length); + ++sn; + } + } else if (my_index == 1) { + UCX_PERF_TEST_FOREACH(&m_perf) { + send(ep, send_buffer, send_length, send_datatype, sn, + remote_addr, rkey); + ucx_perf_update(&m_perf, 1, length); + ++sn; + } + } + + wait_window(m_max_outstanding); + ucp_worker_flush(m_perf.ucp.worker); + ucx_perf_get_time(&m_perf); + + ucp_perf_barrier(&m_perf); + return UCS_OK; + } + + ucs_status_t run() + { + /* coverity[switch_selector_expr_is_constant] */ + switch (TYPE) { + case UCX_PERF_TEST_TYPE_PINGPONG: + return run_pingpong(); + case UCX_PERF_TEST_TYPE_STREAM_UNI: + return run_stream_uni(); + case UCX_PERF_TEST_TYPE_STREAM_BI: + default: + return UCS_ERR_INVALID_PARAM; + } + } + +private: + ucs_status_t UCS_F_ALWAYS_INLINE + recv_stream_data(ucp_ep_h ep, unsigned length, ucp_datatype_t datatype) + { + void *data; + size_t data_length; + size_t total = 0; + + do { + progress_responder(); + data = ucp_stream_recv_data_nb(ep, &data_length); + if (ucs_likely(UCS_PTR_IS_PTR(data))) { + total += data_length; + ucp_stream_data_release(ep, data); + } + } while ((total < length) && !UCS_PTR_IS_ERR(data)); + + return UCS_PTR_IS_ERR(data) ? UCS_PTR_STATUS(data) : UCS_OK; + } + + ucs_status_t UCS_F_ALWAYS_INLINE + recv_stream(ucp_ep_h ep, void *buf, unsigned length, ucp_datatype_t datatype) + { + ssize_t total = 0; + void *rreq; + size_t rlength; + ssize_t rlength_s; + + do { + rreq = ucp_stream_recv_nb(ep, (char *)buf + total, length - total, + datatype, + (ucp_stream_recv_callback_t)ucs_empty_function, + &rlength, 0); + if (ucs_likely(rreq == NULL)) { + total += rlength; + } else if (UCS_PTR_IS_PTR(rreq)) { + rlength_s = wait_stream_recv(rreq); + if (ucs_unlikely(rlength_s < 0)) { + return ucs_status_t(rlength_s); + } + total += rlength_s; + } else { + return UCS_PTR_STATUS(rreq); + } + } while (total < length); + + return UCS_OK; + } + + void UCS_F_ALWAYS_INLINE send_started() + { + ++m_outstanding; + } + + void UCS_F_ALWAYS_INLINE send_completed() + { + --m_outstanding; + } + + ucx_perf_context_t &m_perf; + unsigned m_outstanding; + const unsigned m_max_outstanding; +}; + + +#define TEST_CASE(_perf, _cmd, _type, _flags, _mask) \ + if (((_perf)->params.command == (_cmd)) && \ + ((_perf)->params.test_type == (_type)) && \ + (((_perf)->params.flags & (_mask)) == (_flags))) \ + { \ + ucp_perf_test_runner<_cmd, _type, _flags> r(*_perf); \ + return r.run(); \ + } + +#define TEST_CASE_ALL_STREAM(_perf, _case) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \ + 0, \ + UCX_PERF_TEST_FLAG_STREAM_RECV_DATA) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \ + UCX_PERF_TEST_FLAG_STREAM_RECV_DATA, \ + UCX_PERF_TEST_FLAG_STREAM_RECV_DATA) + +#define TEST_CASE_ALL_TAG(_perf, _case) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \ + 0, \ + UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \ + UCX_PERF_TEST_FLAG_TAG_WILDCARD, \ + UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \ + UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE, \ + UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \ + UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE, \ + UCX_PERF_TEST_FLAG_TAG_WILDCARD|UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE) + +#define TEST_CASE_ALL_OSD(_perf, _case) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \ + 0, UCX_PERF_TEST_FLAG_ONE_SIDED) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, \ + UCX_PERF_TEST_FLAG_ONE_SIDED, UCX_PERF_TEST_FLAG_ONE_SIDED) + +ucs_status_t ucp_perf_test_dispatch(ucx_perf_context_t *perf) +{ + UCS_PP_FOREACH(TEST_CASE_ALL_OSD, perf, + (UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG), + (UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI) + ); + + UCS_PP_FOREACH(TEST_CASE_ALL_TAG, perf, + (UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG), + (UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_PINGPONG), + (UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_STREAM_UNI) + ); + + UCS_PP_FOREACH(TEST_CASE_ALL_STREAM, perf, + (UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG) + ); + + ucs_error("Invalid test case: %d/%d/0x%x", + perf->params.command, perf->params.test_type, + perf->params.flags); + return UCS_ERR_INVALID_PARAM; +} diff --git a/src/tools/perf/lib/uct_tests.cc b/src/tools/perf/lib/uct_tests.cc new file mode 100644 index 0000000..e55d71a --- /dev/null +++ b/src/tools/perf/lib/uct_tests.cc @@ -0,0 +1,693 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) The University of Tennessee and The University +* of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include + +extern "C" { +#include +#include +#include +#include +} + +#include + +template +class uct_perf_test_runner { +public: + + typedef uint8_t psn_t; + + uct_perf_test_runner(ucx_perf_context_t &perf) : + m_perf(perf), + m_max_outstanding(m_perf.params.max_outstanding), + m_send_b_count(0) + + { + ucs_assert_always(m_max_outstanding > 0); + + m_completion.count = 1; + m_completion.func = NULL; + m_last_recvd_sn = 0; + + ucs_status_t status; + uct_iface_attr_t attr; + status = uct_iface_query(m_perf.uct.iface, &attr); + ucs_assert_always(status == UCS_OK); + if (attr.cap.flags & (UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_AM_ZCOPY)) { + status = uct_iface_set_am_handler(m_perf.uct.iface, + UCT_PERF_TEST_AM_ID, am_hander, + (void*)&m_last_recvd_sn, 0); + ucs_assert_always(status == UCS_OK); + } + } + + ~uct_perf_test_runner() { + uct_iface_set_am_handler(m_perf.uct.iface, UCT_PERF_TEST_AM_ID, NULL, + NULL, 0); + } + + /** + * Make uct_iov_t iov[msg_size_cnt] array with pointer elements to + * original buffer + */ + static void uct_perf_get_buffer_iov(uct_iov_t *iov, void *buffer, + unsigned header_size, uct_mem_h memh, + const ucx_perf_context_t *perf) + { + const size_t iovcnt = perf->params.msg_size_cnt; + size_t iov_length_it, iov_it; + + ucs_assert(UCT_PERF_DATA_LAYOUT_ZCOPY == DATA); + ucs_assert(NULL != perf->params.msg_size_list); + ucs_assert(iovcnt > 0); + ucs_assert(perf->params.msg_size_list[0] >= header_size); + + iov_length_it = 0; + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + iov[iov_it].buffer = (char *)buffer + iov_length_it + header_size; + iov[iov_it].length = perf->params.msg_size_list[iov_it] - header_size; + iov[iov_it].memh = memh; + iov[iov_it].stride = 0; + iov[iov_it].count = 1; + + if (perf->params.iov_stride) { + iov_length_it += perf->params.iov_stride - header_size; + } else { + iov_length_it += iov[iov_it].length; + } + + header_size = 0; /* should be zero for next iterations */ + } + + ucs_debug("IOV buffer filled by %lu slices with total length %lu", + iovcnt, iov_length_it); + } + + void uct_perf_test_prepare_iov_buffer() { + if (UCT_PERF_DATA_LAYOUT_ZCOPY == DATA) { + size_t start_iov_buffer_size = 0; + if (UCX_PERF_CMD_AM == CMD) { + start_iov_buffer_size = m_perf.params.am_hdr_size; + } + uct_perf_get_buffer_iov(m_perf.uct.iov, m_perf.send_buffer, + start_iov_buffer_size, + m_perf.uct.send_mem.memh, + &m_perf); + } + } + + /** + * Get the length between beginning of the IOV first buffer and the latest byte + * in the latest IOV buffer. + */ + size_t uct_perf_get_buffer_extent(const ucx_perf_params_t *params) + { + size_t length; + + if ((UCT_PERF_DATA_LAYOUT_ZCOPY == DATA) && params->iov_stride) { + length = ((params->msg_size_cnt - 1) * params->iov_stride) + + params->msg_size_list[params->msg_size_cnt - 1]; + } else { + length = ucx_perf_get_message_size(params); + } + + return length; + } + + inline void set_sn(void *dst_sn, + ucs_memory_type_t dst_mem_type, + const void *src_sn) const { + if (ucs_likely(m_perf.allocator->mem_type == UCS_MEMORY_TYPE_HOST)) { + ucs_assert(dst_mem_type == UCS_MEMORY_TYPE_HOST); + *reinterpret_cast(dst_sn) = *reinterpret_cast(src_sn); + } + + m_perf.allocator->memcpy(dst_sn, dst_mem_type, + src_sn, UCS_MEMORY_TYPE_HOST, + sizeof(psn_t)); + } + + inline psn_t get_sn(const volatile void *sn, + ucs_memory_type_t mem_type) const { + if (ucs_likely(mem_type == UCS_MEMORY_TYPE_HOST)) { + return *reinterpret_cast(sn); + } + + psn_t host_sn; + m_perf.allocator->memcpy(&host_sn, UCS_MEMORY_TYPE_HOST, + const_cast(sn), + mem_type, sizeof(psn_t)); + return host_sn; + } + + inline void set_recv_sn(void *recv_sn, + ucs_memory_type_t recv_mem_type, + const void *src_sn) const { + if (CMD == UCX_PERF_CMD_AM) { + ucs_assert(&m_last_recvd_sn == recv_sn); + *(psn_t*)recv_sn = *(const psn_t*)src_sn; + } else { + set_sn(recv_sn, recv_mem_type, src_sn); + } + } + + inline psn_t get_recv_sn(const volatile void *recv_sn, + ucs_memory_type_t recv_mem_type) const { + if (CMD == UCX_PERF_CMD_AM) { + /* it has to be updated after AM completion */ + ucs_assert(&m_last_recvd_sn == recv_sn); + return m_last_recvd_sn; + } else { + return get_sn(recv_sn, recv_mem_type); + } + } + + void UCS_F_ALWAYS_INLINE progress_responder() { + if (!ONESIDED) { + uct_worker_progress(m_perf.uct.worker); + } + } + + void UCS_F_ALWAYS_INLINE progress_requestor() { + uct_worker_progress(m_perf.uct.worker); + } + + void UCS_F_ALWAYS_INLINE wait_for_window(bool send_window) + { + while (send_window && (outstanding() >= m_max_outstanding)) { + progress_requestor(); + } + } + + static ucs_status_t am_hander(void *arg, void *data, size_t length, + unsigned flags) + { + /* we always assume that buffers provided by TLs are host memory */ + ucs_assert(UCS_CIRCULAR_COMPARE8(*(psn_t*)arg, <=, *(psn_t*)data)); + *(psn_t*)arg = *(psn_t*)data; + return UCS_OK; + } + + static size_t pack_cb(void *dest, void *arg) + { + uct_perf_test_runner *self = (uct_perf_test_runner *)arg; + size_t length = ucx_perf_get_message_size(&self->m_perf.params); + + self->m_perf.allocator->memcpy(/* we always assume that buffers + * provided by TLs are host memory */ + dest, UCS_MEMORY_TYPE_HOST, + self->m_perf.send_buffer, + self->m_perf.uct.send_mem.mem_type, + length); + + return length; + } + + static void unpack_cb(void *arg, const void *data, size_t length) + { + uct_perf_test_runner *self = (uct_perf_test_runner *)arg; + + self->m_perf.allocator->memcpy(self->m_perf.send_buffer, + self->m_perf.uct.send_mem.mem_type, + /* we always assume that buffers + * provided by TLs are host memory */ + data, UCS_MEMORY_TYPE_HOST, + length); + } + + ucs_status_t UCS_F_ALWAYS_INLINE + send(uct_ep_h ep, psn_t sn, psn_t prev_sn, void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) + { + uint64_t am_short_hdr; + size_t header_size; + ssize_t packed_len; + + /* coverity[switch_selector_expr_is_constant] */ + switch (CMD) { + case UCX_PERF_CMD_AM: + /* coverity[switch_selector_expr_is_constant] */ + switch (DATA) { + case UCT_PERF_DATA_LAYOUT_SHORT: + am_short_hdr = sn; + return uct_ep_am_short(ep, UCT_PERF_TEST_AM_ID, am_short_hdr, + (char*)buffer + sizeof(am_short_hdr), + length - sizeof(am_short_hdr)); + case UCT_PERF_DATA_LAYOUT_BCOPY: + set_sn(buffer, m_perf.uct.send_mem.mem_type, &sn); + packed_len = uct_ep_am_bcopy(ep, UCT_PERF_TEST_AM_ID, pack_cb, + (void*)this, 0); + return (packed_len >= 0) ? UCS_OK : (ucs_status_t)packed_len; + case UCT_PERF_DATA_LAYOUT_ZCOPY: + set_sn(buffer, m_perf.uct.send_mem.mem_type, &sn); + header_size = m_perf.params.am_hdr_size; + return uct_ep_am_zcopy(ep, UCT_PERF_TEST_AM_ID, buffer, header_size, + m_perf.uct.iov, m_perf.params.msg_size_cnt, + 0, comp); + default: + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_PUT: + if (TYPE == UCX_PERF_TEST_TYPE_PINGPONG) { + /* Put the control word at the latest byte of the IOV message */ + set_sn(UCS_PTR_BYTE_OFFSET(buffer, + uct_perf_get_buffer_extent(&m_perf.params) - 1), + m_perf.uct.send_mem.mem_type, &sn); + } + /* coverity[switch_selector_expr_is_constant] */ + switch (DATA) { + case UCT_PERF_DATA_LAYOUT_SHORT: + return uct_ep_put_short(ep, buffer, length, remote_addr, rkey); + case UCT_PERF_DATA_LAYOUT_BCOPY: + packed_len = uct_ep_put_bcopy(ep, pack_cb, (void*)this, remote_addr, rkey); + return (packed_len >= 0) ? UCS_OK : (ucs_status_t)packed_len; + case UCT_PERF_DATA_LAYOUT_ZCOPY: + return uct_ep_put_zcopy(ep, m_perf.uct.iov, m_perf.params.msg_size_cnt, + remote_addr, rkey, comp); + default: + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_GET: + /* coverity[switch_selector_expr_is_constant] */ + switch (DATA) { + case UCT_PERF_DATA_LAYOUT_BCOPY: + return uct_ep_get_bcopy(ep, unpack_cb, (void*)this, + length, remote_addr, rkey, comp); + case UCT_PERF_DATA_LAYOUT_ZCOPY: + return uct_ep_get_zcopy(ep, m_perf.uct.iov, m_perf.params.msg_size_cnt, + remote_addr, rkey, comp); + default: + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_ADD: + if (length == sizeof(uint32_t)) { + return uct_ep_atomic32_post(ep, UCT_ATOMIC_OP_ADD, sn - prev_sn, remote_addr, rkey); + } else if (length == sizeof(uint64_t)) { + return uct_ep_atomic64_post(ep, UCT_ATOMIC_OP_ADD, sn - prev_sn, remote_addr, rkey); + } else { + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_FADD: + if (length == sizeof(uint32_t)) { + return uct_ep_atomic32_fetch(ep, UCT_ATOMIC_OP_ADD, sn - prev_sn, + (uint32_t*)buffer, remote_addr, rkey, comp); + } else if (length == sizeof(uint64_t)) { + return uct_ep_atomic64_fetch(ep, UCT_ATOMIC_OP_ADD, sn - prev_sn, + (uint64_t*)buffer, remote_addr, rkey, comp); + } else { + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_SWAP: + if (length == sizeof(uint32_t)) { + return uct_ep_atomic32_fetch(ep, UCT_ATOMIC_OP_SWAP, sn, + (uint32_t*)buffer, remote_addr, rkey, comp); + } else if (length == sizeof(uint64_t)) { + return uct_ep_atomic64_fetch(ep, UCT_ATOMIC_OP_SWAP, sn, + (uint64_t*)buffer, remote_addr, rkey, comp); + } else { + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_CMD_CSWAP: + if (length == sizeof(uint32_t)) { + return uct_ep_atomic_cswap32(ep, prev_sn, sn, remote_addr, rkey, + (uint32_t*)buffer, comp); + } else if (length == sizeof(uint64_t)) { + return uct_ep_atomic_cswap64(ep, prev_sn, sn, remote_addr, rkey, + (uint64_t*)buffer, comp); + } else { + return UCS_ERR_INVALID_PARAM; + } + default: + return UCS_ERR_INVALID_PARAM; + } + } + + void UCS_F_ALWAYS_INLINE + send_b(uct_ep_h ep, psn_t sn, psn_t prev_sn, void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey, uct_completion_t *comp) + { + ucs_status_t status; + for (;;) { + status = send(ep, sn, prev_sn, buffer, length, remote_addr, rkey, comp); + if (ucs_likely(status == UCS_OK)) { + if ((m_send_b_count++ % N_SEND_B_PER_PROGRESS) == 0) { + progress_requestor(); + } + return; + } else if (status == UCS_INPROGRESS) { + ++m_completion.count; + progress_requestor(); + ucs_assert((comp == NULL) || (outstanding() <= m_max_outstanding)); + return; + } else if (status == UCS_ERR_NO_RESOURCE) { + progress_requestor(); + continue; + } else { + ucs_error("Failed to send: %s", ucs_status_string(status)); + return; + } + }; + } + + ucs_status_t run_pingpong() + { + psn_t send_sn, *recv_sn, sn; + unsigned my_index; + uct_ep_h ep; + uint64_t remote_addr; + uct_rkey_t rkey; + void *buffer; + size_t length; + + length = ucx_perf_get_message_size(&m_perf.params); + ucs_assert(length >= sizeof(psn_t)); + + /* coverity[switch_selector_expr_is_constant] */ + switch (CMD) { + case UCX_PERF_CMD_AM: + recv_sn = &m_last_recvd_sn; + break; + case UCX_PERF_CMD_ADD: + recv_sn = (psn_t*)m_perf.recv_buffer; + break; + case UCX_PERF_CMD_PUT: + /* since polling on data, must be end of the buffer */ + recv_sn = (psn_t*)m_perf.recv_buffer + length - 1; + break; + default: + ucs_error("Cannot run this test in ping-pong mode"); + return UCS_ERR_INVALID_PARAM; + } + + uct_perf_test_prepare_iov_buffer(); + + sn = std::numeric_limits::max(); + set_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type, &sn); + + uct_perf_barrier(&m_perf); + + my_index = rte_call(&m_perf, group_index); + + ucx_perf_test_start_clock(&m_perf); + + buffer = m_perf.send_buffer; + remote_addr = m_perf.uct.peers[1 - my_index].remote_addr + m_perf.offset; + rkey = m_perf.uct.peers[1 - my_index].rkey.rkey; + ep = m_perf.uct.peers[1 - my_index].ep; + + send_sn = 0; + if (my_index == 0) { + UCX_PERF_TEST_FOREACH(&m_perf) { + send_b(ep, send_sn, send_sn - 1, buffer, length, remote_addr, + rkey, NULL); + ucx_perf_update(&m_perf, 1, length); + + do { + progress_responder(); + sn = get_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type); + } while (sn != send_sn); + + ++send_sn; + } + } else if (my_index == 1) { + UCX_PERF_TEST_FOREACH(&m_perf) { + do { + progress_responder(); + sn = get_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type); + } while (sn != send_sn); + + send_b(ep, send_sn, send_sn - 1, buffer, length, remote_addr, + rkey, NULL); + ucx_perf_update(&m_perf, 1, length); + ++send_sn; + } + } + + uct_perf_iface_flush_b(&m_perf); + ucx_perf_get_time(&m_perf); + return UCS_OK; + } + + ucs_status_t run_stream_req_uni(bool flow_control, bool send_window, + bool direction_to_responder) + { + unsigned long remote_addr; + volatile psn_t *recv_sn; + psn_t sn, send_sn; + uct_rkey_t rkey; + void *buffer; + unsigned fc_window; + unsigned my_index; + unsigned length; + uct_ep_h ep; + + length = ucx_perf_get_message_size(&m_perf.params); + ucs_assert(length >= sizeof(psn_t)); + ucs_assert(m_perf.params.uct.fc_window <= ((psn_t)-1) / 2); + + m_perf.allocator->memset(m_perf.send_buffer, 0, length); + m_perf.allocator->memset(m_perf.recv_buffer, 0, length); + + uct_perf_test_prepare_iov_buffer(); + + recv_sn = (direction_to_responder ? + ((CMD == UCX_PERF_CMD_AM) ? + &m_last_recvd_sn : + (psn_t*)m_perf.recv_buffer) : + (psn_t*)m_perf.send_buffer); + my_index = rte_call(&m_perf, group_index); + + uct_perf_barrier(&m_perf); + + ucx_perf_test_start_clock(&m_perf); + + ep = m_perf.uct.peers[1 - my_index].ep; + buffer = m_perf.send_buffer; + remote_addr = m_perf.uct.peers[1 - my_index].remote_addr + m_perf.offset; + rkey = m_perf.uct.peers[1 - my_index].rkey.rkey; + fc_window = m_perf.params.uct.fc_window; + + if (my_index == 1) { + /* send_sn is the next SN to send */ + if (flow_control) { + send_sn = 1; + } else{ + send_sn = 0; /* Remote buffer will remain 0 throughout the test */ + } + + set_sn(buffer, m_perf.uct.send_mem.mem_type, &send_sn); + + UCX_PERF_TEST_FOREACH(&m_perf) { + if (flow_control) { + /* Wait until getting ACK from responder */ + sn = get_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type); + ucs_assertv(UCS_CIRCULAR_COMPARE8(send_sn - 1, >=, sn), + "recv_sn=%d iters=%ld", sn, m_perf.current.iters); + + while (UCS_CIRCULAR_COMPARE8(send_sn, >, sn + fc_window)) { + progress_responder(); + sn = get_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type); + } + } + + /* Wait until we have enough sends completed, then take + * the next completion handle in the window. */ + wait_for_window(send_window); + + if (flow_control) { + send_b(ep, send_sn, send_sn - 1, buffer, length, remote_addr, + rkey, &m_completion); + ++send_sn; + } else { + send_b(ep, send_sn, send_sn, buffer, length, remote_addr, + rkey, &m_completion); + } + + ucx_perf_update(&m_perf, 1, length); + } + + if (!flow_control) { + sn = 2; + /* Send "sentinel" value */ + if (direction_to_responder) { + wait_for_window(send_window); + set_sn(buffer, m_perf.uct.send_mem.mem_type, &sn); + send_b(ep, 2, send_sn, buffer, length, remote_addr, rkey, + &m_completion); + } else { + set_sn(m_perf.recv_buffer, + m_perf.uct.recv_mem.mem_type, + &sn); + } + } else { + /* Wait for last ACK, to make sure no more messages will arrive. */ + ucs_assert(direction_to_responder); + + do { + progress_responder(); + sn = get_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type); + } while (UCS_CIRCULAR_COMPARE8((psn_t)(send_sn - 1), >, sn)); + } + } else if (my_index == 0) { + if (flow_control) { + /* Since we're doing flow control, we can count exactly how + * many packets were received. + */ + send_sn = (psn_t)-1; /* Last SN we have sent (as acknowledgment) */ + ucs_assert(direction_to_responder); + UCX_PERF_TEST_FOREACH(&m_perf) { + progress_responder(); + sn = get_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type); + + if (UCS_CIRCULAR_COMPARE8(sn, >, (psn_t)(send_sn + (fc_window / 2)))) { + /* Send ACK every half-window */ + wait_for_window(send_window); + send_b(ep, sn, send_sn, buffer, length, remote_addr, + rkey, &m_completion); + send_sn = sn; + } + + /* Calculate number of iterations */ + m_perf.current.iters += + (psn_t)(sn - (psn_t)m_perf.current.iters); + } + + /* Send ACK for last packet */ + sn = get_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type); + if (UCS_CIRCULAR_COMPARE8(sn, >, send_sn)) { + wait_for_window(send_window); + sn = get_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type); + send_b(ep, sn, send_sn, buffer, length, remote_addr, + rkey, &m_completion); + } + } else { + /* Wait for "sentinel" value */ + ucs_time_t poll_time = ucs_get_time(); + + do { + progress_responder(); + sn = get_recv_sn(recv_sn, m_perf.uct.recv_mem.mem_type); + if (!direction_to_responder) { + if (ucs_get_time() > poll_time + ucs_time_from_msec(1.0)) { + wait_for_window(send_window); + send_b(ep, 0, 0, buffer, length, remote_addr, rkey, + &m_completion); + poll_time = ucs_get_time(); + } + } + } while (sn != 2); + } + } + + uct_perf_iface_flush_b(&m_perf); + ucx_perf_get_time(&m_perf); + ucs_assert(outstanding() == 0); + if (my_index == 1) { + ucx_perf_update(&m_perf, 0, 0); + } + + return UCS_OK; + } + + ucs_status_t run() + { + bool zcopy = (DATA == UCT_PERF_DATA_LAYOUT_ZCOPY); + + /* coverity[switch_selector_expr_is_constant] */ + switch (TYPE) { + case UCX_PERF_TEST_TYPE_PINGPONG: + return run_pingpong(); + case UCX_PERF_TEST_TYPE_STREAM_UNI: + /* coverity[switch_selector_expr_is_constant] */ + switch (CMD) { + case UCX_PERF_CMD_PUT: + return run_stream_req_uni(false, /* No need for flow control for RMA */ + zcopy, /* ZCOPY can return INPROGRESS */ + true /* data goes to responder */); + case UCX_PERF_CMD_ADD: + return run_stream_req_uni(false, /* No need for flow control for RMA */ + false, /* This atomic does not wait for reply */ + true /* Data goes to responder */); + case UCX_PERF_CMD_AM: + return run_stream_req_uni(true, /* Need flow control for active messages, + because they are handled in SW */ + zcopy, /* ZCOPY can return INPROGRESS */ + true /* data goes to responder */); + case UCX_PERF_CMD_GET: + return run_stream_req_uni(false, /* No flow control for RMA/AMO */ + true, /* Waiting for replies */ + false /* For GET, data is delivered to requester */ ); + case UCX_PERF_CMD_FADD: + case UCX_PERF_CMD_SWAP: + case UCX_PERF_CMD_CSWAP: + return run_stream_req_uni(false, /* No flow control for RMA/AMO */ + true, /* Waiting for replies */ + true /* For atomics, data goes both ways, but + the request is easier to predict */ ); + default: + return UCS_ERR_INVALID_PARAM; + } + case UCX_PERF_TEST_TYPE_STREAM_BI: + default: + return UCS_ERR_INVALID_PARAM; + } + } + +private: + inline unsigned outstanding() { + return m_completion.count - 1; + } + + ucx_perf_context_t &m_perf; + const unsigned m_max_outstanding; + uct_completion_t m_completion; + int m_send_b_count; + /* this is only valid for UCT AM tests */ + psn_t m_last_recvd_sn; + const static int N_SEND_B_PER_PROGRESS = 16; +}; + + +#define TEST_CASE(_perf, _cmd, _type, _data, _onesided) \ + if (((_perf)->params.command == (_cmd)) && \ + ((_perf)->params.test_type == (_type)) && \ + ((_perf)->params.uct.data_layout == (_data)) && \ + (!!((_perf)->params.flags & UCX_PERF_TEST_FLAG_ONE_SIDED) == !!(_onesided))) \ + { \ + uct_perf_test_runner<_cmd, _type, _data, _onesided> r(*_perf); \ + return r.run(); \ + } +#define TEST_CASE_ALL_OSD(_perf, _case, _data) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, _data, true) \ + TEST_CASE(_perf, UCS_PP_TUPLE_0 _case, UCS_PP_TUPLE_1 _case, _data, false) +#define TEST_CASE_ALL_DATA(_perf, _case) \ + TEST_CASE_ALL_OSD(_perf, _case, UCT_PERF_DATA_LAYOUT_SHORT) \ + TEST_CASE_ALL_OSD(_perf, _case, UCT_PERF_DATA_LAYOUT_BCOPY) \ + TEST_CASE_ALL_OSD(_perf, _case, UCT_PERF_DATA_LAYOUT_ZCOPY) + +ucs_status_t uct_perf_test_dispatch(ucx_perf_context_t *perf) +{ + UCS_PP_FOREACH(TEST_CASE_ALL_DATA, perf, + (UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_PINGPONG), + (UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG), + (UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_PINGPONG), + (UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI), + (UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI) + ); + + ucs_error("Invalid test case"); + return UCS_ERR_INVALID_PARAM; +} diff --git a/src/tools/perf/perftest.c b/src/tools/perf/perftest.c new file mode 100644 index 0000000..2f1a9ee --- /dev/null +++ b/src/tools/perf/perftest.c @@ -0,0 +1,1591 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) The University of Tennessee and The University +* of Tennessee Research Foundation. 2015. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "api/libperf.h" +#include "lib/libperf_int.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if HAVE_MPI +# include +#elif HAVE_RTE +# include +#endif + +#define MAX_BATCH_FILES 32 +#define TL_RESOURCE_NAME_NONE "" +#define TEST_PARAMS_ARGS "t:n:s:W:O:w:D:i:H:oSCqM:r:T:d:x:A:BUm:" + + +enum { + TEST_FLAG_PRINT_RESULTS = UCS_BIT(0), + TEST_FLAG_PRINT_TEST = UCS_BIT(1), + TEST_FLAG_SET_AFFINITY = UCS_BIT(8), + TEST_FLAG_NUMERIC_FMT = UCS_BIT(9), + TEST_FLAG_PRINT_FINAL = UCS_BIT(10), + TEST_FLAG_PRINT_CSV = UCS_BIT(11) +}; + +typedef struct sock_rte_group { + int is_server; + int connfd; +} sock_rte_group_t; + + +typedef struct test_type { + const char *name; + ucx_perf_api_t api; + ucx_perf_cmd_t command; + ucx_perf_test_type_t test_type; + const char *desc; +} test_type_t; + + +struct perftest_context { + ucx_perf_params_t params; + const char *server_addr; + int port; + int mpi; + unsigned cpu; + unsigned flags; + + unsigned num_batch_files; + char *batch_files[MAX_BATCH_FILES]; + char *test_names[MAX_BATCH_FILES]; + + sock_rte_group_t sock_rte_group; +}; + + +test_type_t tests[] = { + {"am_lat", UCX_PERF_API_UCT, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_PINGPONG, + "active message latency"}, + + {"put_lat", UCX_PERF_API_UCT, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG, + "put latency"}, + + {"add_lat", UCX_PERF_API_UCT, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_PINGPONG, + "atomic add latency"}, + + {"get", UCX_PERF_API_UCT, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI, + "get latency / bandwidth / message rate"}, + + {"fadd", UCX_PERF_API_UCT, UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI, + "atomic fetch-and-add latency / rate"}, + + {"swap", UCX_PERF_API_UCT, UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, + "atomic swap latency / rate"}, + + {"cswap", UCX_PERF_API_UCT, UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, + "atomic compare-and-swap latency / rate"}, + + {"am_bw", UCX_PERF_API_UCT, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_STREAM_UNI, + "active message bandwidth / message rate"}, + + {"put_bw", UCX_PERF_API_UCT, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI, + "put bandwidth / message rate"}, + + {"add_mr", UCX_PERF_API_UCT, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI, + "atomic add message rate"}, + + {"tag_lat", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG, + "tag match latency"}, + + {"tag_bw", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI, + "tag match bandwidth"}, + + {"tag_sync_lat", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_PINGPONG, + "tag sync match latency"}, + + {"tag_sync_bw", UCX_PERF_API_UCP, UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_STREAM_UNI, + "tag sync match bandwidth"}, + + {"ucp_put_lat", UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG, + "put latency"}, + + {"ucp_put_bw", UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI, + "put bandwidth"}, + + {"ucp_get", UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI, + "get latency / bandwidth / message rate"}, + + {"ucp_add", UCX_PERF_API_UCP, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI, + "atomic add bandwidth / message rate"}, + + {"ucp_fadd", UCX_PERF_API_UCP, UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI, + "atomic fetch-and-add latency / bandwidth / rate"}, + + {"ucp_swap", UCX_PERF_API_UCP, UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, + "atomic swap latency / bandwidth / rate"}, + + {"ucp_cswap", UCX_PERF_API_UCP, UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, + "atomic compare-and-swap latency / bandwidth / rate"}, + + {"stream_bw", UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI, + "stream bandwidth"}, + + {"stream_lat", UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG, + "stream latency"}, + + {NULL} +}; + +static int sock_io(int sock, ssize_t (*sock_call)(int, void *, size_t, int), + int poll_events, void *data, size_t size, + void (*progress)(void *arg), void *arg, const char *name) +{ + size_t total = 0; + struct pollfd pfd; + int ret; + + while (total < size) { + pfd.fd = sock; + pfd.events = poll_events; + pfd.revents = 0; + + ret = poll(&pfd, 1, 1); /* poll for 1ms */ + if (ret > 0) { + ucs_assert(ret == 1); + ucs_assert(pfd.revents & poll_events); + + ret = sock_call(sock, (char*)data + total, size - total, 0); + if (ret < 0) { + ucs_error("%s() failed: %m", name); + return -1; + } + total += ret; + } else if ((ret < 0) && (errno != EINTR)) { + ucs_error("poll(fd=%d) failed: %m", sock); + return -1; + } + + /* progress user context */ + if (progress != NULL) { + progress(arg); + } + } + return 0; +} + +static int safe_send(int sock, void *data, size_t size, + void (*progress)(void *arg), void *arg) +{ + typedef ssize_t (*sock_call)(int, void *, size_t, int); + + return sock_io(sock, (sock_call)send, POLLOUT, data, size, progress, arg, "send"); +} + +static int safe_recv(int sock, void *data, size_t size, + void (*progress)(void *arg), void *arg) +{ + return sock_io(sock, recv, POLLIN, data, size, progress, arg, "recv"); +} + +static void print_progress(char **test_names, unsigned num_names, + const ucx_perf_result_t *result, unsigned flags, + int final) +{ + static const char *fmt_csv = "%.0f,%.3f,%.3f,%.3f,%.2f,%.2f,%.0f,%.0f\n"; + static const char *fmt_numeric = "%'14.0f %9.3f %9.3f %9.3f %10.2f %10.2f %'11.0f %'11.0f\n"; + static const char *fmt_plain = "%14.0f %9.3f %9.3f %9.3f %10.2f %10.2f %11.0f %11.0f\n"; + unsigned i; + + if (!(flags & TEST_FLAG_PRINT_RESULTS) || + (!final && (flags & TEST_FLAG_PRINT_FINAL))) + { + return; + } + + if (flags & TEST_FLAG_PRINT_CSV) { + for (i = 0; i < num_names; ++i) { + printf("%s,", test_names[i]); + } + } + + printf((flags & TEST_FLAG_PRINT_CSV) ? fmt_csv : + (flags & TEST_FLAG_NUMERIC_FMT) ? fmt_numeric : + fmt_plain, + (double)result->iters, + result->latency.typical * 1000000.0, + result->latency.moment_average * 1000000.0, + result->latency.total_average * 1000000.0, + result->bandwidth.moment_average / (1024.0 * 1024.0), + result->bandwidth.total_average / (1024.0 * 1024.0), + result->msgrate.moment_average, + result->msgrate.total_average); + fflush(stdout); +} + +static void print_header(struct perftest_context *ctx) +{ + const char *test_api_str; + const char *test_data_str; + test_type_t *test; + unsigned i; + + if (ctx->flags & TEST_FLAG_PRINT_TEST) { + for (test = tests; test->name; ++test) { + if ((test->command == ctx->params.command) && (test->test_type == ctx->params.test_type)) { + break; + } + } + if (test->name != NULL) { + if (test->api == UCX_PERF_API_UCT) { + test_api_str = "transport layer"; + switch (ctx->params.uct.data_layout) { + case UCT_PERF_DATA_LAYOUT_SHORT: + test_data_str = "short"; + break; + case UCT_PERF_DATA_LAYOUT_BCOPY: + test_data_str = "bcopy"; + break; + case UCT_PERF_DATA_LAYOUT_ZCOPY: + test_data_str = "zcopy"; + break; + default: + test_data_str = "(undefined)"; + break; + } + } else if (test->api == UCX_PERF_API_UCP) { + test_api_str = "protocol layer"; + test_data_str = "(automatic)"; /* TODO contig/stride/stream */ + } else { + return; + } + + printf("+------------------------------------------------------------------------------------------+\n"); + printf("| API: %-60s |\n", test_api_str); + printf("| Test: %-60s |\n", test->desc); + printf("| Data layout: %-60s |\n", test_data_str); + printf("| Send memory: %-60s |\n", ucs_memory_type_names[ctx->params.send_mem_type]); + printf("| Recv memory: %-60s |\n", ucs_memory_type_names[ctx->params.recv_mem_type]); + printf("| Message size: %-60zu |\n", ucx_perf_get_message_size(&ctx->params)); + } + } + + if (ctx->flags & TEST_FLAG_PRINT_CSV) { + if (ctx->flags & TEST_FLAG_PRINT_RESULTS) { + for (i = 0; i < ctx->num_batch_files; ++i) { + printf("%s,", basename(ctx->batch_files[i])); + } + printf("iterations,typical_lat,avg_lat,overall_lat,avg_bw,overall_bw,avg_mr,overall_mr\n"); + } + } else { + if (ctx->flags & TEST_FLAG_PRINT_RESULTS) { + printf("+--------------+-----------------------------+---------------------+-----------------------+\n"); + printf("| | latency (usec) | bandwidth (MB/s) | message rate (msg/s) |\n"); + printf("+--------------+---------+---------+---------+----------+----------+-----------+-----------+\n"); + printf("| # iterations | typical | average | overall | average | overall | average | overall |\n"); + printf("+--------------+---------+---------+---------+----------+----------+-----------+-----------+\n"); + } else if (ctx->flags & TEST_FLAG_PRINT_TEST) { + printf("+------------------------------------------------------------------------------------------+\n"); + } + } +} + +static void print_test_name(struct perftest_context *ctx) +{ + char buf[200]; + unsigned i, pos; + + if (!(ctx->flags & TEST_FLAG_PRINT_CSV) && (ctx->num_batch_files > 0)) { + strcpy(buf, "+--------------+---------+---------+---------+----------+----------+-----------+-----------+"); + + pos = 1; + for (i = 0; i < ctx->num_batch_files; ++i) { + if (i != 0) { + buf[pos++] = '/'; + } + memcpy(&buf[pos], ctx->test_names[i], + ucs_min(strlen(ctx->test_names[i]), sizeof(buf) - pos - 1)); + pos += strlen(ctx->test_names[i]); + } + + if (ctx->flags & TEST_FLAG_PRINT_RESULTS) { + printf("%s\n", buf); + } + } +} + +static void print_memory_type_usage(void) +{ + ucs_memory_type_t it; + for (it = UCS_MEMORY_TYPE_HOST; it < UCS_MEMORY_TYPE_LAST; it++) { + if (ucx_perf_mem_type_allocators[it] != NULL) { + printf(" %s - %s\n", + ucs_memory_type_names[it], + ucs_memory_type_descs[it]); + } + } +} + +static void usage(const struct perftest_context *ctx, const char *program) +{ + static const char* api_names[] = { + [UCX_PERF_API_UCT] = "UCT", + [UCX_PERF_API_UCP] = "UCP" + }; + test_type_t *test; + int UCS_V_UNUSED rank; + +#if HAVE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (ctx->mpi && (rank != 0)) { + return; + } +#endif + +#if HAVE_MPI + printf(" Note: test can be also launched as an MPI application\n"); + printf("\n"); +#elif HAVE_RTE + printf(" Note: this test can be also launched as an libRTE application\n"); + printf("\n"); +#endif + printf(" Usage: %s [ server-hostname ] [ options ]\n", program); + printf("\n"); + printf(" Common options:\n"); + printf(" -t test to run:\n"); + for (test = tests; test->name; ++test) { + printf(" %13s - %s %s\n", test->name, + api_names[test->api], test->desc); + } + printf("\n"); + printf(" -s list of scatter-gather sizes for single message (%zu)\n", + ctx->params.msg_size_list[0]); + printf(" for example: \"-s 16,48,8192,8192,14\"\n"); + printf(" -m [,]\n"); + printf(" memory type of message for sender and receiver (host)\n"); + print_memory_type_usage(); + printf(" -n number of iterations to run (%ld)\n", ctx->params.max_iter); + printf(" -w number of warm-up iterations (%zu)\n", + ctx->params.warmup_iter); + printf(" -c set affinity to this CPU (off)\n"); + printf(" -O maximal number of uncompleted outstanding sends (%u)\n", + ctx->params.max_outstanding); + printf(" -i distance between consecutive scatter-gather entries (%zu)\n", + ctx->params.iov_stride); + printf(" -T number of threads in the test (%d), if >1 implies \"-M multi\"\n", + ctx->params.thread_count); + printf(" -B register memory with NONBLOCK flag\n"); + printf(" -b read and execute tests from a batch file: every line in the\n"); + printf(" file is a test to run, first word is test name, the rest of\n"); + printf(" the line is command-line arguments for the test.\n"); + printf(" -p TCP port to use for data exchange (%d)\n", ctx->port); +#if HAVE_MPI + printf(" -P <0|1> disable/enable MPI mode (%d)\n", ctx->mpi); +#endif + printf(" -h show this help message\n"); + printf("\n"); + printf(" Output format:\n"); + printf(" -N use numeric formatting (thousands separator)\n"); + printf(" -f print only final numbers\n"); + printf(" -v print CSV-formatted output\n"); + printf("\n"); + printf(" UCT only:\n"); + printf(" -d device to use for testing\n"); + printf(" -x transport to use for testing\n"); + printf(" -D data layout for sender side:\n"); + printf(" short - short messages (default, cannot be used for get)\n"); + printf(" bcopy - copy-out (cannot be used for atomics)\n"); + printf(" zcopy - zero-copy (cannot be used for atomics)\n"); + printf(" iov - scatter-gather list (iovec)\n"); + printf(" -W flow control window size, for active messages (%u)\n", + ctx->params.uct.fc_window); + printf(" -H active message header size (%zu)\n", + ctx->params.am_hdr_size); + printf(" -A asynchronous progress mode (thread_spinlock)\n"); + printf(" thread_spinlock - separate progress thread with spin locking\n"); + printf(" thread_mutex - separate progress thread with mutex locking\n"); + printf(" signal - signal-based timer\n"); + printf("\n"); + printf(" UCP only:\n"); + printf(" -M thread support level for progress engine (single)\n"); + printf(" single - only the master thread can access\n"); + printf(" serialized - one thread can access at a time\n"); + printf(" multi - multiple threads can access\n"); + printf(" -D [,]\n"); + printf(" data layout for sender and receiver side (contig)\n"); + printf(" contig - Continuous datatype\n"); + printf(" iov - Scatter-gather list\n"); + printf(" -C use wild-card tag for tag tests\n"); + printf(" -U force unexpected flow by using tag probe\n"); + printf(" -r receive mode for stream tests (recv)\n"); + printf(" recv : Use ucp_stream_recv_nb\n"); + printf(" recv_data : Use ucp_stream_recv_data_nb\n"); + printf("\n"); + printf(" NOTE: When running UCP tests, transport and device should be specified by\n"); + printf(" environment variables: UCX_TLS and UCX_[SELF|SHM|NET]_DEVICES.\n"); + printf("\n"); +} + +static ucs_status_t parse_ucp_datatype_params(const char *optarg, + ucp_perf_datatype_t *datatype) +{ + const char *iov_type = "iov"; + const size_t iov_type_size = strlen("iov"); + const char *contig_type = "contig"; + const size_t contig_type_size = strlen("contig"); + + if (0 == strncmp(optarg, iov_type, iov_type_size)) { + *datatype = UCP_PERF_DATATYPE_IOV; + } else if (0 == strncmp(optarg, contig_type, contig_type_size)) { + *datatype = UCP_PERF_DATATYPE_CONTIG; + } else { + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK; +} + +static ucs_status_t parse_mem_type(const char *optarg, + ucs_memory_type_t *mem_type) +{ + ucs_memory_type_t it; + for (it = UCS_MEMORY_TYPE_HOST; it < UCS_MEMORY_TYPE_LAST; it++) { + if(!strcmp(optarg, ucs_memory_type_names[it]) && + (ucx_perf_mem_type_allocators[it] != NULL)) { + *mem_type = it; + return UCS_OK; + } + } + ucs_error("Unsupported memory type: \"%s\"", optarg); + return UCS_ERR_INVALID_PARAM; +} + +static ucs_status_t parse_mem_type_params(const char *optarg, + ucs_memory_type_t *send_mem_type, + ucs_memory_type_t *recv_mem_type) +{ + const char *delim = ","; + char *token = strtok((char*)optarg, delim); + + if (UCS_OK != parse_mem_type(token, send_mem_type)) { + return UCS_ERR_INVALID_PARAM; + } + + token = strtok(NULL, delim); + if (NULL == token) { + *recv_mem_type = *send_mem_type; + return UCS_OK; + } else { + return parse_mem_type(token, recv_mem_type); + } +} + +static ucs_status_t parse_message_sizes_params(const char *optarg, + ucx_perf_params_t *params) +{ + const char delim = ','; + size_t *msg_size_list, token_num, token_it; + char *optarg_ptr, *optarg_ptr2; + + optarg_ptr = (char *)optarg; + token_num = 0; + /* count the number of given message sizes */ + while ((optarg_ptr = strchr(optarg_ptr, delim)) != NULL) { + ++optarg_ptr; + ++token_num; + } + ++token_num; + + msg_size_list = realloc(params->msg_size_list, + sizeof(*params->msg_size_list) * token_num); + if (NULL == msg_size_list) { + return UCS_ERR_NO_MEMORY; + } + + params->msg_size_list = msg_size_list; + + optarg_ptr = (char *)optarg; + errno = 0; + for (token_it = 0; token_it < token_num; ++token_it) { + params->msg_size_list[token_it] = strtoul(optarg_ptr, &optarg_ptr2, 10); + if (((ERANGE == errno) && (ULONG_MAX == params->msg_size_list[token_it])) || + ((errno != 0) && (params->msg_size_list[token_it] == 0)) || + (optarg_ptr == optarg_ptr2)) { + free(params->msg_size_list); + params->msg_size_list = NULL; /* prevent double free */ + ucs_error("Invalid option substring argument at position %lu", token_it); + return UCS_ERR_INVALID_PARAM; + } + optarg_ptr = optarg_ptr2 + 1; + } + + params->msg_size_cnt = token_num; + return UCS_OK; +} + +static ucs_status_t init_test_params(ucx_perf_params_t *params) +{ + memset(params, 0, sizeof(*params)); + params->api = UCX_PERF_API_LAST; + params->command = UCX_PERF_CMD_LAST; + params->test_type = UCX_PERF_TEST_TYPE_LAST; + params->thread_mode = UCS_THREAD_MODE_SINGLE; + params->thread_count = 1; + params->async_mode = UCS_ASYNC_THREAD_LOCK_TYPE; + params->wait_mode = UCX_PERF_WAIT_MODE_LAST; + params->max_outstanding = 1; + params->warmup_iter = 10000; + params->am_hdr_size = 8; + params->alignment = ucs_get_page_size(); + params->max_iter = 1000000l; + params->max_time = 0.0; + params->report_interval = 1.0; + params->flags = UCX_PERF_TEST_FLAG_VERBOSE; + params->uct.fc_window = UCT_PERF_TEST_MAX_FC_WINDOW; + params->uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT; + params->send_mem_type = UCS_MEMORY_TYPE_HOST; + params->recv_mem_type = UCS_MEMORY_TYPE_HOST; + params->msg_size_cnt = 1; + params->iov_stride = 0; + params->ucp.send_datatype = UCP_PERF_DATATYPE_CONTIG; + params->ucp.recv_datatype = UCP_PERF_DATATYPE_CONTIG; + strcpy(params->uct.dev_name, TL_RESOURCE_NAME_NONE); + strcpy(params->uct.tl_name, TL_RESOURCE_NAME_NONE); + + params->msg_size_list = calloc(params->msg_size_cnt, + sizeof(*params->msg_size_list)); + if (params->msg_size_list == NULL) { + return UCS_ERR_NO_MEMORY; + } + + params->msg_size_list[0] = 8; + + return UCS_OK; +} + +static ucs_status_t parse_test_params(ucx_perf_params_t *params, char opt, const char *optarg) +{ + test_type_t *test; + char *optarg2 = NULL; + + switch (opt) { + case 'd': + ucs_snprintf_zero(params->uct.dev_name, sizeof(params->uct.dev_name), + "%s", optarg); + return UCS_OK; + case 'x': + ucs_snprintf_zero(params->uct.tl_name, sizeof(params->uct.tl_name), + "%s", optarg); + return UCS_OK; + case 't': + for (test = tests; test->name; ++test) { + if (!strcmp(optarg, test->name)) { + params->api = test->api; + params->command = test->command; + params->test_type = test->test_type; + break; + } + } + if (test->name == NULL) { + ucs_error("Invalid option argument for -t"); + return UCS_ERR_INVALID_PARAM; + } + return UCS_OK; + case 'D': + if (!strcmp(optarg, "short")) { + params->uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT; + } else if (!strcmp(optarg, "bcopy")) { + params->uct.data_layout = UCT_PERF_DATA_LAYOUT_BCOPY; + } else if (!strcmp(optarg, "zcopy")) { + params->uct.data_layout = UCT_PERF_DATA_LAYOUT_ZCOPY; + } else if (UCS_OK == parse_ucp_datatype_params(optarg, + ¶ms->ucp.send_datatype)) { + optarg2 = strchr(optarg, ','); + if (optarg2) { + if (UCS_OK != parse_ucp_datatype_params(optarg2 + 1, + ¶ms->ucp.recv_datatype)) { + return UCS_ERR_INVALID_PARAM; + } + } + } else { + ucs_error("Invalid option argument for -D"); + return UCS_ERR_INVALID_PARAM; + } + return UCS_OK; + case 'i': + params->iov_stride = atol(optarg); + return UCS_OK; + case 'n': + params->max_iter = atol(optarg); + return UCS_OK; + case 's': + return parse_message_sizes_params(optarg, params); + case 'H': + params->am_hdr_size = atol(optarg); + return UCS_OK; + case 'W': + params->uct.fc_window = atoi(optarg); + return UCS_OK; + case 'O': + params->max_outstanding = atoi(optarg); + return UCS_OK; + case 'w': + params->warmup_iter = atol(optarg); + return UCS_OK; + case 'o': + params->flags |= UCX_PERF_TEST_FLAG_ONE_SIDED; + return UCS_OK; + case 'B': + params->flags |= UCX_PERF_TEST_FLAG_MAP_NONBLOCK; + return UCS_OK; + case 'q': + params->flags &= ~UCX_PERF_TEST_FLAG_VERBOSE; + return UCS_OK; + case 'C': + params->flags |= UCX_PERF_TEST_FLAG_TAG_WILDCARD; + return UCS_OK; + case 'U': + params->flags |= UCX_PERF_TEST_FLAG_TAG_UNEXP_PROBE; + return UCS_OK; + case 'M': + if (!strcmp(optarg, "single")) { + params->thread_mode = UCS_THREAD_MODE_SINGLE; + return UCS_OK; + } else if (!strcmp(optarg, "serialized")) { + params->thread_mode = UCS_THREAD_MODE_SERIALIZED; + return UCS_OK; + } else if (!strcmp(optarg, "multi")) { + params->thread_mode = UCS_THREAD_MODE_MULTI; + return UCS_OK; + } else { + ucs_error("Invalid option argument for -M"); + return UCS_ERR_INVALID_PARAM; + } + case 'T': + params->thread_count = atoi(optarg); + params->thread_mode = UCS_THREAD_MODE_MULTI; + return UCS_OK; + case 'A': + if (!strcmp(optarg, "thread") || !strcmp(optarg, "thread_spinlock")) { + params->async_mode = UCS_ASYNC_MODE_THREAD_SPINLOCK; + return UCS_OK; + } else if (!strcmp(optarg, "thread_mutex")) { + params->async_mode = UCS_ASYNC_MODE_THREAD_MUTEX; + return UCS_OK; + } else if (!strcmp(optarg, "signal")) { + params->async_mode = UCS_ASYNC_MODE_SIGNAL; + return UCS_OK; + } else { + ucs_error("Invalid option argument for -A"); + return UCS_ERR_INVALID_PARAM; + } + case 'r': + if (!strcmp(optarg, "recv_data")) { + params->flags |= UCX_PERF_TEST_FLAG_STREAM_RECV_DATA; + return UCS_OK; + } else if (!strcmp(optarg, "recv")) { + params->flags &= ~UCX_PERF_TEST_FLAG_STREAM_RECV_DATA; + return UCS_OK; + } + return UCS_ERR_INVALID_PARAM; + case 'm': + if (UCS_OK != parse_mem_type_params(optarg, + ¶ms->send_mem_type, + ¶ms->recv_mem_type)) { + return UCS_ERR_INVALID_PARAM; + } + return UCS_OK; + default: + return UCS_ERR_INVALID_PARAM; + } +} + +static ucs_status_t read_batch_file(FILE *batch_file, const char *file_name, + int *line_num, ucx_perf_params_t *params, + char** test_name_p) +{ +#define MAX_SIZE 256 +#define MAX_ARG_SIZE 2048 + ucs_status_t status; + char buf[MAX_ARG_SIZE]; + int argc; + char *argv[MAX_SIZE + 1]; + int c; + char *p; + + do { + if (fgets(buf, sizeof(buf) - 1, batch_file) == NULL) { + return UCS_ERR_NO_ELEM; + } + ++(*line_num); + + argc = 0; + p = strtok(buf, " \t\n\r"); + while (p && (argc < MAX_SIZE)) { + argv[argc++] = p; + p = strtok(NULL, " \t\n\r"); + } + argv[argc] = NULL; + } while ((argc == 0) || (argv[0][0] == '#')); + + optind = 1; + while ((c = getopt (argc, argv, TEST_PARAMS_ARGS)) != -1) { + status = parse_test_params(params, c, optarg); + if (status != UCS_OK) { + ucs_error("in batch file '%s' line %d: -%c %s: %s", + file_name, *line_num, c, optarg, ucs_status_string(status)); + return status; + } + } + + *test_name_p = strdup(argv[0]); + return UCS_OK; +} + +static ucs_status_t parse_opts(struct perftest_context *ctx, int mpi_initialized, + int argc, char **argv) +{ + ucs_status_t status; + int c; + + ucs_trace_func(""); + + ucx_perf_global_init(); /* initialize memory types */ + + status = init_test_params(&ctx->params); + if (status != UCS_OK) { + return status; + } + + ctx->server_addr = NULL; + ctx->num_batch_files = 0; + ctx->port = 13337; + ctx->flags = 0; + ctx->mpi = mpi_initialized; + + optind = 1; + while ((c = getopt (argc, argv, "p:b:Nfvc:P:h" TEST_PARAMS_ARGS)) != -1) { + switch (c) { + case 'p': + ctx->port = atoi(optarg); + break; + case 'b': + if (ctx->num_batch_files < MAX_BATCH_FILES) { + ctx->batch_files[ctx->num_batch_files++] = optarg; + } + break; + case 'N': + ctx->flags |= TEST_FLAG_NUMERIC_FMT; + break; + case 'f': + ctx->flags |= TEST_FLAG_PRINT_FINAL; + break; + case 'v': + ctx->flags |= TEST_FLAG_PRINT_CSV; + break; + case 'c': + ctx->flags |= TEST_FLAG_SET_AFFINITY; + ctx->cpu = atoi(optarg); + break; + case 'P': +#if HAVE_MPI + ctx->mpi = atoi(optarg) && mpi_initialized; + break; +#endif + case 'h': + usage(ctx, ucs_basename(argv[0])); + return UCS_ERR_CANCELED; + default: + status = parse_test_params(&ctx->params, c, optarg); + if (status != UCS_OK) { + usage(ctx, ucs_basename(argv[0])); + return status; + } + break; + } + } + + if (optind < argc) { + ctx->server_addr = argv[optind]; + } + + return UCS_OK; +} + +static unsigned sock_rte_group_size(void *rte_group) +{ + return 2; +} + +static unsigned sock_rte_group_index(void *rte_group) +{ + sock_rte_group_t *group = rte_group; + return group->is_server ? 0 : 1; +} + +static void sock_rte_barrier(void *rte_group, void (*progress)(void *arg), + void *arg) +{ +#pragma omp barrier + +#pragma omp master + { + sock_rte_group_t *group = rte_group; + const unsigned magic = 0xdeadbeef; + unsigned sync; + + sync = magic; + safe_send(group->connfd, &sync, sizeof(unsigned), progress, arg); + + sync = 0; + safe_recv(group->connfd, &sync, sizeof(unsigned), progress, arg); + + ucs_assert(sync == magic); + } +#pragma omp barrier +} + +static void sock_rte_post_vec(void *rte_group, const struct iovec *iovec, + int iovcnt, void **req) +{ + sock_rte_group_t *group = rte_group; + size_t size; + int i; + + size = 0; + for (i = 0; i < iovcnt; ++i) { + size += iovec[i].iov_len; + } + + safe_send(group->connfd, &size, sizeof(size), NULL, NULL); + for (i = 0; i < iovcnt; ++i) { + safe_send(group->connfd, iovec[i].iov_base, iovec[i].iov_len, NULL, + NULL); + } +} + +static void sock_rte_recv(void *rte_group, unsigned src, void *buffer, + size_t max, void *req) +{ + sock_rte_group_t *group = rte_group; + int group_index; + size_t size; + + group_index = sock_rte_group_index(rte_group); + if (src == group_index) { + return; + } + + ucs_assert_always(src == (1 - group_index)); + safe_recv(group->connfd, &size, sizeof(size), NULL, NULL); + ucs_assert_always(size <= max); + safe_recv(group->connfd, buffer, size, NULL, NULL); +} + +static void sock_rte_report(void *rte_group, const ucx_perf_result_t *result, + void *arg, int is_final) +{ + struct perftest_context *ctx = arg; + print_progress(ctx->test_names, ctx->num_batch_files, result, ctx->flags, + is_final); +} + +static ucx_perf_rte_t sock_rte = { + .group_size = sock_rte_group_size, + .group_index = sock_rte_group_index, + .barrier = sock_rte_barrier, + .post_vec = sock_rte_post_vec, + .recv = sock_rte_recv, + .exchange_vec = (ucx_perf_rte_exchange_vec_func_t)ucs_empty_function, + .report = sock_rte_report, +}; + +static ucs_status_t setup_sock_rte(struct perftest_context *ctx) +{ + struct sockaddr_in inaddr; + struct hostent *he; + ucs_status_t status; + int optval = 1; + int sockfd, connfd; + int ret; + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) { + ucs_error("socket() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err; + } + + if (ctx->server_addr == NULL) { + optval = 1; + status = ucs_socket_setopt(sockfd, SOL_SOCKET, SO_REUSEADDR, + &optval, sizeof(optval)); + if (status != UCS_OK) { + goto err_close_sockfd; + } + + inaddr.sin_family = AF_INET; + inaddr.sin_port = htons(ctx->port); + inaddr.sin_addr.s_addr = INADDR_ANY; + memset(inaddr.sin_zero, 0, sizeof(inaddr.sin_zero)); + ret = bind(sockfd, (struct sockaddr*)&inaddr, sizeof(inaddr)); + if (ret < 0) { + ucs_error("bind() failed: %m"); + status = UCS_ERR_INVALID_ADDR; + goto err_close_sockfd; + } + + ret = listen(sockfd, 10); + if (ret < 0) { + ucs_error("listen() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_close_sockfd; + } + + printf("Waiting for connection...\n"); + + /* Accept next connection */ + connfd = accept(sockfd, NULL, NULL); + if (connfd < 0) { + ucs_error("accept() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_close_sockfd; + } + + close(sockfd); + + ret = safe_recv(connfd, &ctx->params, sizeof(ctx->params), NULL, NULL); + if (ret) { + status = UCS_ERR_IO_ERROR; + goto err_close_connfd; + } + + if (ctx->params.msg_size_cnt) { + ctx->params.msg_size_list = calloc(ctx->params.msg_size_cnt, + sizeof(*ctx->params.msg_size_list)); + if (NULL == ctx->params.msg_size_list) { + status = UCS_ERR_NO_MEMORY; + goto err_close_connfd; + } + + ret = safe_recv(connfd, ctx->params.msg_size_list, + sizeof(*ctx->params.msg_size_list) * + ctx->params.msg_size_cnt, + NULL, NULL); + if (ret) { + status = UCS_ERR_IO_ERROR; + goto err_close_connfd; + } + } + + ctx->sock_rte_group.connfd = connfd; + ctx->sock_rte_group.is_server = 1; + } else { + he = gethostbyname(ctx->server_addr); + if (he == NULL || he->h_addr_list == NULL) { + ucs_error("host %s not found: %s", ctx->server_addr, + hstrerror(h_errno)); + status = UCS_ERR_INVALID_ADDR; + goto err_close_sockfd; + } + + inaddr.sin_family = he->h_addrtype; + inaddr.sin_port = htons(ctx->port); + ucs_assert(he->h_length == sizeof(inaddr.sin_addr)); + memcpy(&inaddr.sin_addr, he->h_addr_list[0], he->h_length); + memset(inaddr.sin_zero, 0, sizeof(inaddr.sin_zero)); + + ret = connect(sockfd, (struct sockaddr*)&inaddr, sizeof(inaddr)); + if (ret < 0) { + ucs_error("connect() failed: %m"); + status = UCS_ERR_UNREACHABLE; + goto err_close_sockfd; + } + + safe_send(sockfd, &ctx->params, sizeof(ctx->params), NULL, NULL); + if (ctx->params.msg_size_cnt) { + safe_send(sockfd, ctx->params.msg_size_list, + sizeof(*ctx->params.msg_size_list) * ctx->params.msg_size_cnt, + NULL, NULL); + } + + ctx->sock_rte_group.connfd = sockfd; + ctx->sock_rte_group.is_server = 0; + } + + if (ctx->sock_rte_group.is_server) { + ctx->flags |= TEST_FLAG_PRINT_TEST; + } else { + ctx->flags |= TEST_FLAG_PRINT_RESULTS; + } + + ctx->params.rte_group = &ctx->sock_rte_group; + ctx->params.rte = &sock_rte; + ctx->params.report_arg = ctx; + return UCS_OK; + +err_close_connfd: + close(connfd); + goto err; +err_close_sockfd: + close(sockfd); +err: + return status; +} + +static ucs_status_t cleanup_sock_rte(struct perftest_context *ctx) +{ + close(ctx->sock_rte_group.connfd); + return UCS_OK; +} + +#if HAVE_MPI +static unsigned mpi_rte_group_size(void *rte_group) +{ + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + return size; +} + +static unsigned mpi_rte_group_index(void *rte_group) +{ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + return rank; +} + +static void mpi_rte_barrier(void *rte_group, void (*progress)(void *arg), + void *arg) +{ + int group_size, my_rank, i; + MPI_Request *reqs; + int nreqs = 0; + int dummy; + int flag; + +#pragma omp barrier + +#pragma omp master + + /* + * Naive non-blocking barrier implementation over send/recv, to call user + * progress while waiting for completion. + * Not using MPI_Ibarrier to be compatible with MPI-1. + */ + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &group_size); + + /* allocate maximal possible number of requests */ + reqs = (MPI_Request*)alloca(sizeof(*reqs) * group_size); + + if (my_rank == 0) { + /* root gathers "ping" from all other ranks */ + for (i = 1; i < group_size; ++i) { + MPI_Irecv(&dummy, 0, MPI_INT, + i /* source */, + 1 /* tag */, + MPI_COMM_WORLD, + &reqs[nreqs++]); + } + } else { + /* every non-root rank sends "ping" and waits for "pong" */ + MPI_Send(&dummy, 0, MPI_INT, + 0 /* dest */, + 1 /* tag */, + MPI_COMM_WORLD); + MPI_Irecv(&dummy, 0, MPI_INT, + 0 /* source */, + 2 /* tag */, + MPI_COMM_WORLD, + &reqs[nreqs++]); + } + + /* Waiting for receive requests */ + do { + MPI_Testall(nreqs, reqs, &flag, MPI_STATUSES_IGNORE); + progress(arg); + } while (!flag); + + if (my_rank == 0) { + /* root sends "pong" to all ranks */ + for (i = 1; i < group_size; ++i) { + MPI_Send(&dummy, 0, MPI_INT, + i /* dest */, + 2 /* tag */, + MPI_COMM_WORLD); + } + } + +#pragma omp barrier +} + +static void mpi_rte_post_vec(void *rte_group, const struct iovec *iovec, + int iovcnt, void **req) +{ + int group_size; + int my_rank; + int dest, i; + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &group_size); + + for (dest = 0; dest < group_size; ++dest) { + if (dest == my_rank) { + continue; + } + + for (i = 0; i < iovcnt; ++i) { + MPI_Send(iovec[i].iov_base, iovec[i].iov_len, MPI_BYTE, dest, + i == (iovcnt - 1), /* Send last iov with tag == 1 */ + MPI_COMM_WORLD); + } + } + + *req = (void*)(uintptr_t)1; +} + +static void mpi_rte_recv(void *rte_group, unsigned src, void *buffer, size_t max, + void *req) +{ + MPI_Status status; + size_t offset; + int my_rank; + int count; + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + if (src == my_rank) { + return; + } + + offset = 0; + do { + ucs_assert_always(offset < max); + MPI_Recv(buffer + offset, max - offset, MPI_BYTE, src, MPI_ANY_TAG, + MPI_COMM_WORLD, &status); + MPI_Get_count(&status, MPI_BYTE, &count); + offset += count; + } while (status.MPI_TAG != 1); +} + +static void mpi_rte_report(void *rte_group, const ucx_perf_result_t *result, + void *arg, int is_final) +{ + struct perftest_context *ctx = arg; + print_progress(ctx->test_names, ctx->num_batch_files, result, ctx->flags, + is_final); +} + +static ucx_perf_rte_t mpi_rte = { + .group_size = mpi_rte_group_size, + .group_index = mpi_rte_group_index, + .barrier = mpi_rte_barrier, + .post_vec = mpi_rte_post_vec, + .recv = mpi_rte_recv, + .exchange_vec = (void*)ucs_empty_function, + .report = mpi_rte_report, +}; +#elif HAVE_RTE +static unsigned ext_rte_group_size(void *rte_group) +{ + rte_group_t group = (rte_group_t)rte_group; + return rte_group_size(group); +} + +static unsigned ext_rte_group_index(void *rte_group) +{ + rte_group_t group = (rte_group_t)rte_group; + return rte_group_rank(group); +} + +static void ext_rte_barrier(void *rte_group, void (*progress)(void *arg), + void *arg) +{ +#pragma omp barrier + +#pragma omp master + { + rte_group_t group = (rte_group_t)rte_group; + int rc; + + rc = rte_barrier(group); + if (RTE_SUCCESS != rc) { + ucs_error("Failed to rte_barrier"); + } + } +#pragma omp barrier +} + +static void ext_rte_post_vec(void *rte_group, const struct iovec* iovec, + int iovcnt, void **req) +{ + rte_group_t group = (rte_group_t)rte_group; + rte_srs_session_t session; + rte_iovec_t *r_vec; + int i, rc; + + rc = rte_srs_session_create(group, 0, &session); + if (RTE_SUCCESS != rc) { + ucs_error("Failed to rte_srs_session_create"); + } + + r_vec = calloc(iovcnt, sizeof(rte_iovec_t)); + if (r_vec == NULL) { + return; + } + for (i = 0; i < iovcnt; ++i) { + r_vec[i].iov_base = iovec[i].iov_base; + r_vec[i].type = rte_datatype_uint8_t; + r_vec[i].count = iovec[i].iov_len; + } + rc = rte_srs_set_data(session, "KEY_PERF", r_vec, iovcnt); + if (RTE_SUCCESS != rc) { + ucs_error("Failed to rte_srs_set_data"); + } + *req = session; + free(r_vec); +} + +static void ext_rte_recv(void *rte_group, unsigned src, void *buffer, + size_t max, void *req) +{ + rte_group_t group = (rte_group_t)rte_group; + rte_srs_session_t session = (rte_srs_session_t)req; + void *rte_buffer = NULL; + rte_iovec_t r_vec; + uint32_t offset; + int size; + int rc; + + rc = rte_srs_get_data(session, rte_group_index_to_ec(group, src), + "KEY_PERF", &rte_buffer, &size); + if (RTE_SUCCESS != rc) { + ucs_error("Failed to rte_srs_get_data"); + return; + } + + r_vec.iov_base = buffer; + r_vec.type = rte_datatype_uint8_t; + r_vec.count = max; + + offset = 0; + rte_unpack(&r_vec, rte_buffer, &offset); + + rc = rte_srs_session_destroy(session); + if (RTE_SUCCESS != rc) { + ucs_error("Failed to rte_srs_session_destroy"); + } + free(rte_buffer); +} + +static void ext_rte_exchange_vec(void *rte_group, void * req) +{ + rte_srs_session_t session = (rte_srs_session_t)req; + int rc; + + rc = rte_srs_exchange_data(session); + if (RTE_SUCCESS != rc) { + ucs_error("Failed to rte_srs_exchange_data"); + } +} + +static void ext_rte_report(void *rte_group, const ucx_perf_result_t *result, + void *arg, int is_final) +{ + struct perftest_context *ctx = arg; + print_progress(ctx->test_names, ctx->num_batch_files, result, ctx->flags, + is_final); +} + +static ucx_perf_rte_t ext_rte = { + .group_size = ext_rte_group_size, + .group_index = ext_rte_group_index, + .barrier = ext_rte_barrier, + .report = ext_rte_report, + .post_vec = ext_rte_post_vec, + .recv = ext_rte_recv, + .exchange_vec = ext_rte_exchange_vec, +}; +#endif + +static ucs_status_t setup_mpi_rte(struct perftest_context *ctx) +{ + ucs_trace_func(""); + +#if HAVE_MPI + int size, rank; + + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + ucs_error("This test should run with exactly 2 processes (actual: %d)", size); + return UCS_ERR_INVALID_PARAM; + } + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 1) { + ctx->flags |= TEST_FLAG_PRINT_RESULTS; + } + + ctx->params.rte_group = NULL; + ctx->params.rte = &mpi_rte; + ctx->params.report_arg = ctx; +#elif HAVE_RTE + rte_group_t group; + + rte_init(NULL, NULL, &group); + if (1 == rte_group_rank(group)) { + ctx->flags |= TEST_FLAG_PRINT_RESULTS; + } + + ctx->params.rte_group = group; + ctx->params.rte = &ext_rte; + ctx->params.report_arg = ctx; +#endif + return UCS_OK; +} + +static ucs_status_t cleanup_mpi_rte(struct perftest_context *ctx) +{ +#if HAVE_RTE + rte_finalize(); +#endif + return UCS_OK; +} + +static ucs_status_t check_system(struct perftest_context *ctx) +{ + ucs_sys_cpuset_t cpuset; + unsigned i, count, nr_cpus; + int ret; + + ucs_trace_func(""); + + ret = sysconf(_SC_NPROCESSORS_CONF); + if (ret < 0) { + ucs_error("failed to get local cpu count: %m"); + return UCS_ERR_INVALID_PARAM; + } + nr_cpus = ret; + + memset(&cpuset, 0, sizeof(cpuset)); + if (ctx->flags & TEST_FLAG_SET_AFFINITY) { + if (ctx->cpu >= nr_cpus) { + ucs_error("cpu (%u) ot of range (0..%u)", ctx->cpu, nr_cpus - 1); + return UCS_ERR_INVALID_PARAM; + } + CPU_SET(ctx->cpu, &cpuset); + + ret = ucs_sys_setaffinity(&cpuset); + if (ret) { + ucs_warn("sched_setaffinity() failed: %m"); + return UCS_ERR_INVALID_PARAM; + } + } else { + ret = ucs_sys_getaffinity(&cpuset); + if (ret) { + ucs_warn("sched_getaffinity() failed: %m"); + return UCS_ERR_INVALID_PARAM; + } + + count = 0; + for (i = 0; i < CPU_SETSIZE; ++i) { + if (CPU_ISSET(i, &cpuset)) { + ++count; + } + } + if (count > 2) { + ucs_warn("CPU affinity is not set (bound to %u cpus)." + " Performance may be impacted.", count); + } + } + + return UCS_OK; +} + +static ucs_status_t clone_params(ucx_perf_params_t *dest, + const ucx_perf_params_t *src) +{ + size_t msg_size_list_size; + + *dest = *src; + msg_size_list_size = dest->msg_size_cnt * sizeof(*dest->msg_size_list); + dest->msg_size_list = malloc(msg_size_list_size); + if (dest->msg_size_list == NULL) { + return ((msg_size_list_size != 0) ? UCS_ERR_NO_MEMORY : UCS_OK); + } + + memcpy(dest->msg_size_list, src->msg_size_list, msg_size_list_size); + return UCS_OK; +} + +static ucs_status_t run_test_recurs(struct perftest_context *ctx, + ucx_perf_params_t *parent_params, + unsigned depth) +{ + ucx_perf_params_t params; + ucx_perf_result_t result; + ucs_status_t status; + FILE *batch_file; + int line_num; + + ucs_trace_func("depth=%u, num_files=%u", depth, ctx->num_batch_files); + + if (parent_params->api == UCX_PERF_API_UCP) { + if (strcmp(parent_params->uct.dev_name, TL_RESOURCE_NAME_NONE)) { + ucs_warn("-d '%s' ignored for UCP test; see NOTES section in help message", + parent_params->uct.dev_name); + } + if (strcmp(parent_params->uct.tl_name, TL_RESOURCE_NAME_NONE)) { + ucs_warn("-x '%s' ignored for UCP test; see NOTES section in help message", + parent_params->uct.tl_name); + } + } + + if (depth >= ctx->num_batch_files) { + print_test_name(ctx); + return ucx_perf_run(parent_params, &result); + } + + batch_file = fopen(ctx->batch_files[depth], "r"); + if (batch_file == NULL) { + ucs_error("Failed to open batch file '%s': %m", ctx->batch_files[depth]); + return UCS_ERR_IO_ERROR; + } + + status = clone_params(¶ms, parent_params); + if (status != UCS_OK) { + goto out; + } + + line_num = 0; + while ((status = read_batch_file(batch_file, ctx->batch_files[depth], + &line_num, ¶ms, + &ctx->test_names[depth])) == UCS_OK) { + run_test_recurs(ctx, ¶ms, depth + 1); + free(params.msg_size_list); + free(ctx->test_names[depth]); + ctx->test_names[depth] = NULL; + + status = clone_params(¶ms, parent_params); + if (status != UCS_OK) { + goto out; + } + } + + if (status == UCS_ERR_NO_ELEM) { + status = UCS_OK; + } + + free(params.msg_size_list); +out: + fclose(batch_file); + return status; +} + +static ucs_status_t run_test(struct perftest_context *ctx) +{ + ucs_status_t status; + + ucs_trace_func(""); + + setlocale(LC_ALL, "en_US"); + + print_header(ctx); + + status = run_test_recurs(ctx, &ctx->params, 0); + if (status != UCS_OK) { + ucs_error("Failed to run test: %s", ucs_status_string(status)); + } + + return status; +} + +int main(int argc, char **argv) +{ + struct perftest_context ctx; + ucs_status_t status; + int mpi_initialized; + int mpi_rte; + int ret; + +#if HAVE_MPI + mpi_initialized = !isatty(0) && (MPI_Init(&argc, &argv) == 0); +#else + mpi_initialized = 0; +#endif + + /* Parse command line */ + status = parse_opts(&ctx, mpi_initialized, argc, argv); + if (status != UCS_OK) { + ret = (status == UCS_ERR_CANCELED) ? 0 : -127; + goto out; + } + +#ifdef __COVERITY__ + /* coverity[dont_call] */ + mpi_rte = rand(); /* Shut up deadcode error */ +#endif + + if (ctx.mpi) { + mpi_rte = 1; + } else { +#if HAVE_RTE + mpi_rte = 1; +#else + mpi_rte = 0; +#endif + } + + status = check_system(&ctx); + if (status != UCS_OK) { + ret = -1; + goto out; + } + + /* Create RTE */ + status = (mpi_rte) ? setup_mpi_rte(&ctx) : setup_sock_rte(&ctx); + if (status != UCS_OK) { + ret = -1; + goto out; + } + + /* Run the test */ + status = run_test(&ctx); + if (status != UCS_OK) { + ret = -1; + goto out_cleanup_rte; + } + + ret = 0; + +out_cleanup_rte: + (mpi_rte) ? cleanup_mpi_rte(&ctx) : cleanup_sock_rte(&ctx); +out: + if (ctx.params.msg_size_list) { + free(ctx.params.msg_size_list); + } + if (mpi_initialized) { +#if HAVE_MPI + MPI_Finalize(); +#endif + } + return ret; +} diff --git a/src/tools/perf/rocm/Makefile.am b/src/tools/perf/rocm/Makefile.am new file mode 100644 index 0000000..81757ed --- /dev/null +++ b/src/tools/perf/rocm/Makefile.am @@ -0,0 +1,17 @@ +# +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +if HAVE_HIP + +module_LTLIBRARIES = libucx_perftest_rocm.la +libucx_perftest_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(HIP_CPPFLAGS) +libucx_perftest_rocm_la_CFLAGS = $(BASE_CFLAGS) $(HIP_CFLAGS) +libucx_perftest_rocm_la_LDFLAGS = $(HIP_LDFLAGS) $(HIP_LIBS) -version-info $(SOVERSION) +libucx_perftest_rocm_la_SOURCES = rocm_alloc.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/tools/perf/rocm/Makefile.in b/src/tools/perf/rocm/Makefile.in new file mode 100644 index 0000000..c5f12be --- /dev/null +++ b/src/tools/perf/rocm/Makefile.in @@ -0,0 +1,847 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/tools/perf/rocm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +libucx_perftest_rocm_la_LIBADD = +am__libucx_perftest_rocm_la_SOURCES_DIST = rocm_alloc.c +@HAVE_HIP_TRUE@am_libucx_perftest_rocm_la_OBJECTS = \ +@HAVE_HIP_TRUE@ libucx_perftest_rocm_la-rocm_alloc.lo +libucx_perftest_rocm_la_OBJECTS = \ + $(am_libucx_perftest_rocm_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libucx_perftest_rocm_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libucx_perftest_rocm_la_CFLAGS) $(CFLAGS) \ + $(libucx_perftest_rocm_la_LDFLAGS) $(LDFLAGS) -o $@ +@HAVE_HIP_TRUE@am_libucx_perftest_rocm_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = \ + ./$(DEPDIR)/libucx_perftest_rocm_la-rocm_alloc.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libucx_perftest_rocm_la_SOURCES) +DIST_SOURCES = $(am__libucx_perftest_rocm_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_HIP_TRUE@module_LTLIBRARIES = libucx_perftest_rocm.la +@HAVE_HIP_TRUE@libucx_perftest_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(HIP_CPPFLAGS) +@HAVE_HIP_TRUE@libucx_perftest_rocm_la_CFLAGS = $(BASE_CFLAGS) $(HIP_CFLAGS) +@HAVE_HIP_TRUE@libucx_perftest_rocm_la_LDFLAGS = $(HIP_LDFLAGS) $(HIP_LIBS) -version-info $(SOVERSION) +@HAVE_HIP_TRUE@libucx_perftest_rocm_la_SOURCES = rocm_alloc.c + +# Automake silent rules +@HAVE_HIP_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_HIP_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_HIP_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_HIP_TRUE@AM_V_LN_1 = true +@HAVE_HIP_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/tools/perf/rocm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/tools/perf/rocm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libucx_perftest_rocm.la: $(libucx_perftest_rocm_la_OBJECTS) $(libucx_perftest_rocm_la_DEPENDENCIES) $(EXTRA_libucx_perftest_rocm_la_DEPENDENCIES) + $(AM_V_CCLD)$(libucx_perftest_rocm_la_LINK) $(am_libucx_perftest_rocm_la_rpath) $(libucx_perftest_rocm_la_OBJECTS) $(libucx_perftest_rocm_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libucx_perftest_rocm_la-rocm_alloc.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libucx_perftest_rocm_la-rocm_alloc.lo: rocm_alloc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucx_perftest_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libucx_perftest_rocm_la_CFLAGS) $(CFLAGS) -MT libucx_perftest_rocm_la-rocm_alloc.lo -MD -MP -MF $(DEPDIR)/libucx_perftest_rocm_la-rocm_alloc.Tpo -c -o libucx_perftest_rocm_la-rocm_alloc.lo `test -f 'rocm_alloc.c' || echo '$(srcdir)/'`rocm_alloc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libucx_perftest_rocm_la-rocm_alloc.Tpo $(DEPDIR)/libucx_perftest_rocm_la-rocm_alloc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rocm_alloc.c' object='libucx_perftest_rocm_la-rocm_alloc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucx_perftest_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libucx_perftest_rocm_la_CFLAGS) $(CFLAGS) -c -o libucx_perftest_rocm_la-rocm_alloc.lo `test -f 'rocm_alloc.c' || echo '$(srcdir)/'`rocm_alloc.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_HIP_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libucx_perftest_rocm_la-rocm_alloc.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libucx_perftest_rocm_la-rocm_alloc.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_HIP_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_HIP_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_HIP_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_HIP_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_HIP_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_HIP_TRUE@ done +@HAVE_HIP_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_HIP_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_HIP_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/tools/perf/rocm/configure.m4 b/src/tools/perf/rocm/configure.m4 new file mode 100644 index 0000000..cb662a4 --- /dev/null +++ b/src/tools/perf/rocm/configure.m4 @@ -0,0 +1,11 @@ +# +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +UCX_CHECK_ROCM + +AS_IF([test "x$rocm_happy" = "xyes"], [ucx_perftest_modules="${ucx_perftest_modules}:rocm"]) + +AC_CONFIG_FILES([src/tools/perf/rocm/Makefile]) diff --git a/src/tools/perf/rocm/rocm_alloc.c b/src/tools/perf/rocm/rocm_alloc.c new file mode 100644 index 0000000..f8c0f2d --- /dev/null +++ b/src/tools/perf/rocm/rocm_alloc.c @@ -0,0 +1,189 @@ +/** + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include + +#include +#include + + +static ucs_status_t ucx_perf_rocm_init(ucx_perf_context_t *perf) +{ + hipError_t ret; + unsigned group_index; + int num_gpus; + int gpu_index; + + group_index = rte_call(perf, group_index); + + ret = hipGetDeviceCount(&num_gpus); + if (ret != hipSuccess) { + return UCS_ERR_NO_DEVICE; + } + + gpu_index = group_index % num_gpus; + + ret = hipSetDevice(gpu_index); + if (ret != hipSuccess) { + return UCS_ERR_NO_DEVICE; + } + + return UCS_OK; +} + +static inline ucs_status_t ucx_perf_rocm_alloc(size_t length, + ucs_memory_type_t mem_type, + void **address_p) +{ + hipError_t ret; + + ucs_assert((mem_type == UCS_MEMORY_TYPE_ROCM) || + (mem_type == UCS_MEMORY_TYPE_ROCM_MANAGED)); + + ret = ((mem_type == UCS_MEMORY_TYPE_ROCM) ? + hipMalloc(address_p, length) : + hipMallocManaged(address_p, length, hipMemAttachGlobal)); + if (ret != hipSuccess) { + ucs_error("failed to allocate memory"); + return UCS_ERR_NO_MEMORY; + } + + return UCS_OK; +} + +static ucs_status_t ucp_perf_rocm_alloc(const ucx_perf_context_t *perf, size_t length, + void **address_p, ucp_mem_h *memh_p, + int non_blk_flag) +{ + return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM, address_p); +} + +static ucs_status_t ucp_perf_rocm_alloc_managed(const ucx_perf_context_t *perf, + size_t length, void **address_p, + ucp_mem_h *memh_p, int non_blk_flag) +{ + return ucx_perf_rocm_alloc(length, UCS_MEMORY_TYPE_ROCM_MANAGED, address_p); +} + +static void ucp_perf_rocm_free(const ucx_perf_context_t *perf, + void *address, ucp_mem_h memh) +{ + hipFree(address); +} + +static inline ucs_status_t +uct_perf_rocm_alloc_reg_mem(const ucx_perf_context_t *perf, + size_t length, + ucs_memory_type_t mem_type, + unsigned flags, + uct_allocated_memory_t *alloc_mem) +{ + ucs_status_t status; + + status = ucx_perf_rocm_alloc(length, mem_type, &alloc_mem->address); + if (status != UCS_OK) { + return status; + } + + status = uct_md_mem_reg(perf->uct.md, alloc_mem->address, + length, flags, &alloc_mem->memh); + if (status != UCS_OK) { + hipFree(alloc_mem->address); + ucs_error("failed to register memory"); + return status; + } + + alloc_mem->mem_type = mem_type; + alloc_mem->md = perf->uct.md; + + return UCS_OK; +} + +static ucs_status_t uct_perf_rocm_alloc(const ucx_perf_context_t *perf, + size_t length, unsigned flags, + uct_allocated_memory_t *alloc_mem) +{ + return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM, + flags, alloc_mem); +} + +static ucs_status_t uct_perf_rocm_managed_alloc(const ucx_perf_context_t *perf, + size_t length, unsigned flags, + uct_allocated_memory_t *alloc_mem) +{ + return uct_perf_rocm_alloc_reg_mem(perf, length, UCS_MEMORY_TYPE_ROCM_MANAGED, + flags, alloc_mem); +} + +static void uct_perf_rocm_free(const ucx_perf_context_t *perf, + uct_allocated_memory_t *alloc_mem) +{ + ucs_status_t status; + + ucs_assert(alloc_mem->md == perf->uct.md); + + status = uct_md_mem_dereg(perf->uct.md, alloc_mem->memh); + if (status != UCS_OK) { + ucs_error("failed to deregister memory"); + } + + hipFree(alloc_mem->address); +} + +static void ucx_perf_rocm_memcpy(void *dst, ucs_memory_type_t dst_mem_type, + const void *src, ucs_memory_type_t src_mem_type, + size_t count) +{ + hipError_t ret; + + ret = hipMemcpy(dst, src, count, hipMemcpyDefault); + if (ret != hipSuccess) { + ucs_error("failed to copy memory: %s", hipGetErrorString(ret)); + } +} + +static void* ucx_perf_rocm_memset(void *dst, int value, size_t count) +{ + hipError_t ret; + + ret = hipMemset(dst, value, count); + if (ret != hipSuccess) { + ucs_error("failed to set memory: %s", hipGetErrorString(ret)); + } + + return dst; +} + +UCS_STATIC_INIT { + static ucx_perf_allocator_t rocm_allocator = { + .mem_type = UCS_MEMORY_TYPE_ROCM, + .init = ucx_perf_rocm_init, + .ucp_alloc = ucp_perf_rocm_alloc, + .ucp_free = ucp_perf_rocm_free, + .uct_alloc = uct_perf_rocm_alloc, + .uct_free = uct_perf_rocm_free, + .memcpy = ucx_perf_rocm_memcpy, + .memset = ucx_perf_rocm_memset + }; + static ucx_perf_allocator_t rocm_managed_allocator = { + .mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED, + .init = ucx_perf_rocm_init, + .ucp_alloc = ucp_perf_rocm_alloc_managed, + .ucp_free = ucp_perf_rocm_free, + .uct_alloc = uct_perf_rocm_managed_alloc, + .uct_free = uct_perf_rocm_free, + .memcpy = ucx_perf_rocm_memcpy, + .memset = ucx_perf_rocm_memset + }; + + ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = &rocm_allocator; + ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = &rocm_managed_allocator; +} +UCS_STATIC_CLEANUP { + ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] = NULL; + ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] = NULL; + +} diff --git a/src/tools/profile/Makefile.am b/src/tools/profile/Makefile.am new file mode 100644 index 0000000..cd963d4 --- /dev/null +++ b/src/tools/profile/Makefile.am @@ -0,0 +1,10 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +bin_PROGRAMS = ucx_read_profile +ucx_read_profile_CPPFLAGS = $(BASE_CPPFLAGS) +ucx_read_profile_CFLAGS = $(BASE_CFLAGS) +ucx_read_profile_SOURCES = read_profile.c diff --git a/src/tools/profile/Makefile.in b/src/tools/profile/Makefile.in new file mode 100644 index 0000000..e8f87b0 --- /dev/null +++ b/src/tools/profile/Makefile.in @@ -0,0 +1,804 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +bin_PROGRAMS = ucx_read_profile$(EXEEXT) +subdir = src/tools/profile +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" +PROGRAMS = $(bin_PROGRAMS) +am_ucx_read_profile_OBJECTS = ucx_read_profile-read_profile.$(OBJEXT) +ucx_read_profile_OBJECTS = $(am_ucx_read_profile_OBJECTS) +ucx_read_profile_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +ucx_read_profile_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(ucx_read_profile_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/ucx_read_profile-read_profile.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(ucx_read_profile_SOURCES) +DIST_SOURCES = $(ucx_read_profile_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +ucx_read_profile_CPPFLAGS = $(BASE_CPPFLAGS) +ucx_read_profile_CFLAGS = $(BASE_CFLAGS) +ucx_read_profile_SOURCES = read_profile.c +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/tools/profile/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/tools/profile/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +ucx_read_profile$(EXEEXT): $(ucx_read_profile_OBJECTS) $(ucx_read_profile_DEPENDENCIES) $(EXTRA_ucx_read_profile_DEPENDENCIES) + @rm -f ucx_read_profile$(EXEEXT) + $(AM_V_CCLD)$(ucx_read_profile_LINK) $(ucx_read_profile_OBJECTS) $(ucx_read_profile_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucx_read_profile-read_profile.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +ucx_read_profile-read_profile.o: read_profile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_read_profile_CPPFLAGS) $(CPPFLAGS) $(ucx_read_profile_CFLAGS) $(CFLAGS) -MT ucx_read_profile-read_profile.o -MD -MP -MF $(DEPDIR)/ucx_read_profile-read_profile.Tpo -c -o ucx_read_profile-read_profile.o `test -f 'read_profile.c' || echo '$(srcdir)/'`read_profile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_read_profile-read_profile.Tpo $(DEPDIR)/ucx_read_profile-read_profile.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='read_profile.c' object='ucx_read_profile-read_profile.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_read_profile_CPPFLAGS) $(CPPFLAGS) $(ucx_read_profile_CFLAGS) $(CFLAGS) -c -o ucx_read_profile-read_profile.o `test -f 'read_profile.c' || echo '$(srcdir)/'`read_profile.c + +ucx_read_profile-read_profile.obj: read_profile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_read_profile_CPPFLAGS) $(CPPFLAGS) $(ucx_read_profile_CFLAGS) $(CFLAGS) -MT ucx_read_profile-read_profile.obj -MD -MP -MF $(DEPDIR)/ucx_read_profile-read_profile.Tpo -c -o ucx_read_profile-read_profile.obj `if test -f 'read_profile.c'; then $(CYGPATH_W) 'read_profile.c'; else $(CYGPATH_W) '$(srcdir)/read_profile.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_read_profile-read_profile.Tpo $(DEPDIR)/ucx_read_profile-read_profile.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='read_profile.c' object='ucx_read_profile-read_profile.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_read_profile_CPPFLAGS) $(CPPFLAGS) $(ucx_read_profile_CFLAGS) $(CFLAGS) -c -o ucx_read_profile-read_profile.obj `if test -f 'read_profile.c'; then $(CYGPATH_W) 'read_profile.c'; else $(CYGPATH_W) '$(srcdir)/read_profile.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(bindir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/ucx_read_profile-read_profile.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/ucx_read_profile-read_profile.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-binPROGRAMS \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-binPROGRAMS + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/tools/profile/read_profile.c b/src/tools/profile/read_profile.c new file mode 100644 index 0000000..4803270 --- /dev/null +++ b/src/tools/profile/read_profile.c @@ -0,0 +1,866 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define INDENT 4 +#define PAGER_LESS "less" +#define PAGER_LESS_CMD PAGER_LESS " -R" +#define FUNC_NAME_MAX_LEN 35 +#define MAX_THREADS 256 + +#define TERM_COLOR_CLEAR "\x1B[0m" +#define TERM_COLOR_RED "\x1B[31m" +#define TERM_COLOR_GREEN "\x1B[32m" +#define TERM_COLOR_YELLOW "\x1B[33m" +#define TERM_COLOR_BLUE "\x1B[34m" +#define TERM_COLOR_MAGENTA "\x1B[35m" +#define TERM_COLOR_CYAN "\x1B[36m" +#define TERM_COLOR_WHITE "\x1B[37m" +#define TERM_COLOR_GRAY "\x1B[90m" + +#define NAME_COLOR (opts->raw ? "" : TERM_COLOR_CYAN) +#define HEAD_COLOR (opts->raw ? "" : TERM_COLOR_RED) +#define TS_COLOR (opts->raw ? "" : TERM_COLOR_WHITE) +#define LOC_COLOR (opts->raw ? "" : TERM_COLOR_GRAY) +#define REQ_COLOR (opts->raw ? "" : TERM_COLOR_YELLOW) +#define CLEAR_COLOR (opts->raw ? "" : TERM_COLOR_CLEAR) + +#define print_error(_fmt, ...) \ + fprintf(stderr, "Error: " _fmt "\n", ## __VA_ARGS__) + + +typedef enum { + TIME_UNITS_NSEC, + TIME_UNITS_USEC, + TIME_UNITS_MSEC, + TIME_UNITS_SEC, + TIME_UNITS_LAST +} time_units_t; + + +typedef struct options { + const char *filename; + int raw; + time_units_t time_units; + int thread_list[MAX_THREADS + 1]; +} options_t; + + +typedef struct { + const ucs_profile_thread_header_t *header; + const ucs_profile_thread_location_t *locations; + const ucs_profile_record_t *records; +} profile_thread_data_t; + + +typedef struct { + void *mem; + size_t length; + const ucs_profile_header_t *header; + const ucs_profile_location_t *locations; + profile_thread_data_t *threads; +} profile_data_t; + + +typedef struct { + uint64_t total_time; + size_t count; + unsigned location_idx; +} profile_sorted_location_t; + + +/* Used to redirect output to a "less" command */ +static int output_pipefds[2] = {-1, -1}; + + +static const char* time_units_str[] = { + [TIME_UNITS_NSEC] = "(nsec)", + [TIME_UNITS_USEC] = "(usec)", + [TIME_UNITS_MSEC] = "(msec)", + [TIME_UNITS_SEC] = "(sec)", + [TIME_UNITS_LAST] = NULL +}; + + +static int read_profile_data(const char *file_name, profile_data_t *data) +{ + uint32_t thread_idx; + struct stat stat; + const void *ptr; + int ret, fd; + + fd = open(file_name, O_RDONLY); + if (fd < 0) { + print_error("failed to open %s: %m", file_name); + ret = fd; + goto out; + } + + ret = fstat(fd, &stat); + if (ret < 0) { + print_error("fstat(%s) failed: %m", file_name); + goto out_close; + } + + data->length = stat.st_size; + data->mem = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (data->mem == MAP_FAILED) { + print_error("mmap(%s, length=%zd) failed: %m", file_name, + data->length); + ret = -1; + goto out_close; + } + + ptr = data->mem; + data->header = ptr; + ptr = data->header + 1; + + if (data->header->version != UCS_PROFILE_FILE_VERSION) { + print_error("invalid file version, expected: %u, actual: %u", + UCS_PROFILE_FILE_VERSION, data->header->version); + ret = -EINVAL; + goto err_munmap; + } + + data->locations = ptr; + ptr = data->locations + data->header->num_locations; + + data->threads = calloc(data->header->num_threads, sizeof(*data->threads)); + if (data->threads == NULL) { + print_error("failed to allocate threads array"); + goto err_munmap; + } + + for (thread_idx = 0; thread_idx < data->header->num_threads; ++thread_idx) { + profile_thread_data_t *thread = &data->threads[thread_idx]; + thread->header = ptr; + ptr = thread->header + 1; + thread->locations = ptr; + ptr = thread->locations + data->header->num_locations; + thread->records = ptr; + ptr = thread->records + thread->header->num_records; + } + + ret = 0; + +out_close: + close(fd); +out: + return ret; +err_munmap: + munmap(data->mem, data->length); + goto out_close; +} + +static void release_profile_data(profile_data_t *data) +{ + free(data->threads); + munmap(data->mem, data->length); +} + +static int parse_thread_list(int *thread_list, const char *str) +{ + char *dup, *p, *saveptr, *tailptr; + int thread_idx; + unsigned index; + int ret; + + dup = strdup(str); + if (dup == NULL) { + ret = -ENOMEM; + print_error("failed to duplicate thread list string"); + goto out; + } + + index = 0; + + /* the special value 'all' will create an empty thread list, which means + * use all threads + */ + if (!strcasecmp(dup, "all")) { + goto out_terminate; + } + + p = strtok_r(dup, ",", &saveptr); + while (p != NULL) { + if (index >= MAX_THREADS) { + ret = -EINVAL; + print_error("up to %d threads are supported", MAX_THREADS); + goto out; + } + + thread_idx = strtol(p, &tailptr, 0); + if (*tailptr != '\0') { + ret = -ENOMEM; + print_error("failed to parse thread number '%s'", p); + goto out; + } + + if (thread_idx <= 0) { + ret = -EINVAL; + print_error("invalid thread index %d", thread_idx); + goto out; + } + + thread_list[index++] = thread_idx; + p = strtok_r(NULL, ",", &saveptr); + } + + if (index == 0) { + ret = -EINVAL; + print_error("empty thread list"); + goto out; + } + +out_terminate: + ret = 0; + thread_list[index] = -1; /* terminator */ +out: + free(dup); + return ret; +} + +static const char* thread_list_str(const int *thread_list, char *buf, size_t max) +{ + char *p, *endp; + const int *t; + int ret; + + p = buf; + endp = buf + max - 4; /* leave room for "...\0" */ + + for (t = thread_list; *t != -1; ++t) { + ret = snprintf(p, endp - p, "%d,", *t); + if (ret >= endp - p) { + /* truncated */ + strcat(p, "..."); + return buf; + } + + p += strlen(p); + } + + if (p > buf) { + *(p - 1) = '\0'; + } else { + *buf = '\0'; + } + return buf; +} + +static double time_to_units(profile_data_t *data, options_t *opts, uint64_t time) +{ + static const double time_units_val[] = { + [TIME_UNITS_NSEC] = 1e9, + [TIME_UNITS_USEC] = 1e6, + [TIME_UNITS_MSEC] = 1e3, + [TIME_UNITS_SEC] = 1e0 + }; + + return time * time_units_val[opts->time_units] / data->header->one_second; +} + +static int compare_locations(const void *l1, const void *l2) +{ + const ucs_profile_thread_location_t *loc1 = l1; + const ucs_profile_thread_location_t *loc2 = l2; + return (loc1->total_time > loc2->total_time) ? -1 : + (loc1->total_time < loc2->total_time) ? +1 : + 0; +} + +static int show_profile_data_accum(profile_data_t *data, options_t *opts) +{ + typedef struct { + long overall_time; /* overall threads runtime */ + int thread_list[MAX_THREADS + 1]; + int *last; + } location_thread_info_t; + + const uint32_t num_locations = data->header->num_locations; + profile_sorted_location_t *sorted_locations = NULL; + location_thread_info_t *locations_thread_info = NULL; + const ucs_profile_thread_location_t *thread_location; + location_thread_info_t *loc_thread_info; + profile_sorted_location_t *sorted_loc; + const profile_thread_data_t *thread; + const ucs_profile_location_t *loc; + unsigned location_idx, thread_idx; + char avg_buf[20], total_buf[20], overall_buf[20]; + char thread_list_buf[20]; + char *avg_str, *total_str, *overall_str; + int ret; + int *t; + + sorted_locations = calloc(num_locations, sizeof(*sorted_locations)); + locations_thread_info = calloc(num_locations, sizeof(*locations_thread_info)); + if ((sorted_locations == NULL) || (locations_thread_info == NULL)) { + print_error("failed to allocate locations info"); + ret = -ENOMEM; + goto out; + } + + /* Go over the list of threads provided by the user and accumulate the times + * and counts from all threads. In addition, track which calls were made from + * which threads. + */ + for (location_idx = 0; location_idx < num_locations; ++location_idx) { + sorted_loc = &sorted_locations[location_idx]; + loc_thread_info = &locations_thread_info[location_idx]; + sorted_loc->location_idx = location_idx; + loc_thread_info->thread_list[0] = -1; + loc_thread_info->last = loc_thread_info->thread_list; + loc_thread_info->overall_time = 0; + + for (t = opts->thread_list; *t != -1; ++t) { + thread_idx = *t - 1; + thread = &data->threads[thread_idx]; + thread_location = &thread->locations[location_idx]; + sorted_loc->count += thread_location->count; + sorted_loc->total_time += thread_location->total_time; + + if (thread_location->count > 0) { + loc_thread_info->overall_time += thread->header->end_time - + thread->header->start_time; + *(loc_thread_info->last++) = thread_idx + 1; + } + } + + *loc_thread_info->last = -1; + } + + /* Sort locations */ + qsort(sorted_locations, num_locations, sizeof(*sorted_locations), + compare_locations); + + /* Print locations */ + printf("%s%*s %6s %-6s %6s %-6s %13s %12s %18s%-6s %-*s %s%s\n", + HEAD_COLOR, + FUNC_NAME_MAX_LEN, + "NAME", + "AVG", time_units_str[opts->time_units], + "TOTAL", time_units_str[opts->time_units], + "%OVERALL", + "COUNT", + "FILE", + ":LINE", + FUNC_NAME_MAX_LEN, + "FUNCTION", + "THREADS", + CLEAR_COLOR); + + for (sorted_loc = sorted_locations; + sorted_loc < (sorted_locations + num_locations); ++sorted_loc) { + + if (sorted_loc->count == 0) { + continue; + } + + loc = &data->locations[sorted_loc->location_idx]; + loc_thread_info = &locations_thread_info[sorted_loc->location_idx]; + + switch (loc->type) { + case UCS_PROFILE_TYPE_SCOPE_END: + snprintf(avg_buf, sizeof(avg_buf) - 1, "%.3f", + time_to_units(data, opts, + sorted_loc->total_time / sorted_loc->count)); + snprintf(total_buf, sizeof(total_buf) - 1, "%.2f", + time_to_units(data, opts, sorted_loc->total_time)); + snprintf(overall_buf, sizeof(overall_buf) - 1, "%.3f", + sorted_loc->total_time * 100.0 / loc_thread_info->overall_time); + + avg_str = avg_buf; + total_str = total_buf; + overall_str = overall_buf; + break; + case UCS_PROFILE_TYPE_SAMPLE: + case UCS_PROFILE_TYPE_REQUEST_EVENT: + avg_str = total_str = overall_str = "n/a"; + break; + default: + continue; + } + + printf("%s%*.*s%s %13s %13s %13s %12zu %s%18s:%-6d %-*s %-13s%s\n", + NAME_COLOR, FUNC_NAME_MAX_LEN, FUNC_NAME_MAX_LEN, loc->name, CLEAR_COLOR, + avg_str, + total_str, + overall_str, + sorted_loc->count, + LOC_COLOR, + loc->file, loc->line, + FUNC_NAME_MAX_LEN, loc->function, + thread_list_str(loc_thread_info->thread_list, thread_list_buf, + sizeof(thread_list_buf)), + CLEAR_COLOR); + } + + ret = 0; + +out: + free(locations_thread_info); + free(sorted_locations); + return ret; +} + +KHASH_MAP_INIT_INT64(request_ids, int) + +static void show_profile_data_log(profile_data_t *data, options_t *opts, + int thread_idx) +{ + profile_thread_data_t *thread = &data->threads[thread_idx]; + size_t num_recods = thread->header->num_records; + const ucs_profile_record_t **stack[UCS_PROFILE_STACK_MAX * 2]; + const ucs_profile_record_t **scope_ends; + const ucs_profile_location_t *loc; + const ucs_profile_record_t *rec, *se, **sep; + int nesting, min_nesting; + uint64_t prev_time; + const char *action; + char buf[256]; + khash_t(request_ids) reqids; + int hash_extra_status; + khiter_t hash_it; + int reqid, reqid_ctr = 1; + +#define RECORD_FMT "%s%10.3f%s%*s" +#define RECORD_ARG(_ts) TS_COLOR, time_to_units(data, opts, (_ts)), CLEAR_COLOR, \ + INDENT * nesting, "" +#define PRINT_RECORD() printf("%-*s %s%15s:%-4d %s()%s\n", \ + (int)(60 + strlen(NAME_COLOR) + \ + 2 * strlen(TS_COLOR) + \ + 3 * strlen(CLEAR_COLOR)), \ + buf, \ + LOC_COLOR, \ + basename(loc->file), loc->line, loc->function, \ + CLEAR_COLOR) + + scope_ends = calloc(1, sizeof(*scope_ends) * num_recods); + if (scope_ends == NULL) { + print_error("failed to allocate memory for scope ends"); + return; + } + + printf("\n"); + printf("%sThread %d (tid %d%s)%s\n", HEAD_COLOR, thread_idx + 1, + thread->header->tid, + (thread->header->tid == data->header->pid) ? ", main" : "", + CLEAR_COLOR); + printf("\n"); + + memset(stack, 0, sizeof(stack)); + + /* Find the first record with minimal nesting level, which is the base of call stack */ + nesting = 0; + min_nesting = 0; + for (rec = thread->records; rec < thread->records + num_recods; ++rec) { + loc = &data->locations[rec->location]; + switch (loc->type) { + case UCS_PROFILE_TYPE_SCOPE_BEGIN: + stack[nesting + UCS_PROFILE_STACK_MAX] = &scope_ends[rec - thread->records]; + ++nesting; + break; + case UCS_PROFILE_TYPE_SCOPE_END: + --nesting; + if (nesting < min_nesting) { + min_nesting = nesting; + } + sep = stack[nesting + UCS_PROFILE_STACK_MAX]; + if (sep != NULL) { + *sep = rec; + } + break; + default: + break; + } + } + + if (num_recods > 0) { + prev_time = thread->records[0].timestamp; + } else { + prev_time = 0; + } + + kh_init_inplace(request_ids, &reqids); + + /* Display records */ + nesting = -min_nesting; + for (rec = thread->records; rec < thread->records + num_recods; ++rec) { + loc = &data->locations[rec->location]; + switch (loc->type) { + case UCS_PROFILE_TYPE_SCOPE_BEGIN: + se = scope_ends[rec - thread->records]; + if (se != NULL) { + snprintf(buf, sizeof(buf), RECORD_FMT" %s%s%s %s%.3f%s {", + RECORD_ARG(rec->timestamp - prev_time), + NAME_COLOR, data->locations[se->location].name, + CLEAR_COLOR, TS_COLOR, + time_to_units(data, opts, se->timestamp - rec->timestamp), + CLEAR_COLOR); + } else { + snprintf(buf, sizeof(buf), ""); + } + PRINT_RECORD(); + nesting++; + break; + case UCS_PROFILE_TYPE_SCOPE_END: + --nesting; + printf(RECORD_FMT" }\n", RECORD_ARG(rec->timestamp - prev_time)); + break; + case UCS_PROFILE_TYPE_SAMPLE: + snprintf(buf, sizeof(buf), RECORD_FMT" %s%s%s", + RECORD_ARG(rec->timestamp - prev_time), + NAME_COLOR, loc->name, CLEAR_COLOR); + PRINT_RECORD(); + break; + case UCS_PROFILE_TYPE_REQUEST_NEW: + case UCS_PROFILE_TYPE_REQUEST_EVENT: + case UCS_PROFILE_TYPE_REQUEST_FREE: + if (loc->type == UCS_PROFILE_TYPE_REQUEST_NEW) { + hash_it = kh_put(request_ids, &reqids, rec->param64, + &hash_extra_status); + if (hash_it == kh_end(&reqids)) { + if (hash_extra_status == 0) { + /* old request was not released, replace it */ + hash_it = kh_get(request_ids, &reqids, rec->param64); + reqid = reqid_ctr++; + kh_value(&reqids, hash_it) = reqid; + } else { + reqid = 0; /* error inserting to hash */ + } + } else { + /* new request */ + reqid = reqid_ctr++; + kh_value(&reqids, hash_it) = reqid; + } + action = "NEW "; + } else { + assert(reqid_ctr > 1); + hash_it = kh_get(request_ids, &reqids, rec->param64); + if (hash_it == kh_end(&reqids)) { + reqid = 0; /* could not find request */ + } else { + reqid = kh_value(&reqids, hash_it); + if (loc->type == UCS_PROFILE_TYPE_REQUEST_FREE) { + kh_del(request_ids, &reqids, hash_it); + } + } + if (loc->type == UCS_PROFILE_TYPE_REQUEST_FREE) { + action = "FREE"; + } else { + action = ""; + } + } + snprintf(buf, sizeof(buf), RECORD_FMT" %s%s%s%s %s{%d}%s", + RECORD_ARG(rec->timestamp - prev_time), + REQ_COLOR, action, loc->name, CLEAR_COLOR, + REQ_COLOR, reqid, CLEAR_COLOR); + PRINT_RECORD(); + break; + default: + break; + } + prev_time = rec->timestamp; + } + + kh_destroy_inplace(request_ids, &reqids); + free(scope_ends); +} + +static void close_pipes() +{ + close(output_pipefds[0]); + close(output_pipefds[1]); +} + +static int redirect_output(const profile_data_t *data, options_t *opts) +{ + const char *shell_cmd = "sh"; + struct winsize wsz; + uint64_t num_lines; + const char *pager_cmd; + pid_t pid; + int ret; + int *t; + + ret = ioctl(STDOUT_FILENO, TIOCGWINSZ, &wsz); + if (ret < 0) { + print_error("ioctl(TIOCGWINSZ) failed: %m"); + return ret; + } + + num_lines = 6 + /* header */ + 1; /* footer */ + + if (data->header->mode & UCS_BIT(UCS_PROFILE_MODE_ACCUM)) { + num_lines += 1 + /* locations title */ + data->header->num_locations + /* locations data */ + 1; /* locations footer */ + } + + if (data->header->mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) { + for (t = opts->thread_list; *t != -1; ++t) { + num_lines += 3 + /* thread header */ + data->threads[*t - 1].header->num_records; /* thread records */ + } + } + + if (num_lines <= wsz.ws_row) { + return 0; /* no need to use 'less' */ + } + + ret = pipe(output_pipefds); + if (ret < 0) { + print_error("pipe() failed: %m"); + return ret; + } + + pid = fork(); + if (pid < 0) { + print_error("fork() failed: %m"); + close_pipes(); + return pid; + } + + /* Parent replaces itself with 'less' + * Child continues to dump log + */ + if (pid == 0) { + /* redirect output to pipe */ + ret = dup2(output_pipefds[1], fileno(stdout)); + if (ret < 0) { + print_error("failed to redirect stdout: %m"); + close_pipes(); + return ret; + } + + close_pipes(); + return 0; + } else { + /* redirect input from pipe */ + ret = dup2(output_pipefds[0], fileno(stdin)); + if (ret < 0) { + print_error("failed to redirect stdin: %m"); + exit(ret); + } + + close_pipes(); + + /* If PAGER environment variable is set, use it. If it's not set, or it + * is equal to "less", use "less -R" to show colors. + */ + pager_cmd = getenv("PAGER"); + if ((pager_cmd == NULL) || !strcmp(pager_cmd, PAGER_LESS)) { + pager_cmd = PAGER_LESS_CMD; + } + + /* coverity[tainted_string] */ + ret = execlp(shell_cmd, shell_cmd, "-c", pager_cmd, NULL); + if (ret) { + print_error("failed to execute shell '%s': %m", shell_cmd); + } + return ret; + } +} + +static void show_header(profile_data_t *data, options_t *opts) +{ + char buf[80]; + + printf("\n"); + printf(" ucs lib : %s\n", data->header->ucs_path); + printf(" host : %s\n", data->header->hostname); + printf(" command : %s\n", data->header->cmdline); + printf(" pid : %d\n", data->header->pid); + printf(" threads : %-3d", data->header->num_threads); + if (opts->thread_list[0] != -1) { + printf("(showing %s", + (opts->thread_list[1] == -1) ? "thread" : "threads"); + printf(" %s)", thread_list_str(opts->thread_list, buf, sizeof(buf))); + } + printf("\n\n"); +} + +static int compare_int(const void *a, const void *b) +{ + return *(const int*)a - *(const int*)b; +} + +static int show_profile_data(profile_data_t *data, options_t *opts) +{ + unsigned i, thread_list_len; + int ret; + int *t; + + if (data->header->num_threads > MAX_THREADS) { + print_error("the profile contains %u threads, but only up to %d are " + "supported", data->header->num_threads, MAX_THREADS); + return -EINVAL; + } + + /* validate and count thread numbers */ + if (opts->thread_list[0] == -1) { + for (i = 0; i < data->header->num_threads; ++i) { + opts->thread_list[i] = i + 1; + } + opts->thread_list[i] = -1; + } else { + thread_list_len = 0; + for (t = opts->thread_list; *t != -1; ++t) { + if (*t > data->header->num_threads) { + print_error("thread number %d is out of range (1..%u)", + *t, data->header->num_threads); + return -EINVAL; + } + + ++thread_list_len; + } + assert(thread_list_len > 0); + + /* sort thread numbers and check for duplicates */ + qsort(opts->thread_list, thread_list_len, sizeof(int), compare_int); + for (t = opts->thread_list; *t != -1; ++t) { + if (t[0] == t[1]) { + print_error("duplicate thread number %d", t[0]); + return -EINVAL; + } + } + } + + /* redirect output if needed */ + if (!opts->raw) { + ret = redirect_output(data, opts); + if (ret < 0) { + return ret; + } + } + + show_header(data, opts); + + if (data->header->mode & UCS_BIT(UCS_PROFILE_MODE_ACCUM)) { + show_profile_data_accum(data, opts); + printf("\n"); + } + + if (data->header->mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) { + for (t = opts->thread_list; *t != -1; ++t) { + show_profile_data_log(data, opts, *t - 1); + } + printf("\n"); + } + + return 0; +} + +static void usage() +{ + printf("Usage: ucx_read_profile [options] [profile-file]\n"); + printf("Options are:\n"); + printf(" -r Show raw output\n"); + printf(" -T Comma-separated list of threads to show, " + "e.g. \"1,2,3\", or \"all\" to show all threads\n"); + printf(" -t Select time units to use:\n"); + printf(" sec - seconds\n"); + printf(" msec - milliseconds\n"); + printf(" usec - microseconds (default)\n"); + printf(" nsec - nanoseconds\n"); + printf(" -h Show this help message\n"); +} + +static int parse_args(int argc, char **argv, options_t *opts) +{ + int ret, c; + + opts->raw = !isatty(fileno(stdout)); + opts->time_units = TIME_UNITS_USEC; + ret = parse_thread_list(opts->thread_list, "all"); + if (ret < 0) { + return ret; + } + + while ( (c = getopt(argc, argv, "rT:t:h")) != -1 ) { + switch (c) { + case 'r': + opts->raw = 1; + break; + case 'T': + ret = parse_thread_list(opts->thread_list, optarg); + if (ret < 0) { + return ret; + } + break; + case 't': + if (!strcasecmp(optarg, "sec")) { + opts->time_units = TIME_UNITS_SEC; + } else if (!strcasecmp(optarg, "msec")) { + opts->time_units = TIME_UNITS_MSEC; + } else if (!strcasecmp(optarg, "usec")) { + opts->time_units = TIME_UNITS_USEC; + } else if (!strcasecmp(optarg, "nsec")) { + opts->time_units = TIME_UNITS_NSEC; + } else { + print_error("invalid time units '%s'\n", optarg); + usage(); + return -1; + } + break; + case 'h': + usage(); + return -127; + default: + usage(); + return -1; + } + } + + if (optind >= argc) { + print_error("missing profile file argument\n"); + usage(); + return -1; + } + + opts->filename = argv[optind]; + return 0; +} + +int main(int argc, char **argv) +{ + profile_data_t data = {0}; + options_t opts; + int ret; + + ret = parse_args(argc, argv, &opts); + if (ret < 0) { + return (ret == -127) ? 0 : ret; + } + + ret = read_profile_data(opts.filename, &data); + if (ret < 0) { + return ret; + } + + ret = show_profile_data(&data, &opts); + release_profile_data(&data); + return ret; +} + diff --git a/src/ucm/Makefile.am b/src/ucm/Makefile.am new file mode 100644 index 0000000..e53a30a --- /dev/null +++ b/src/ucm/Makefile.am @@ -0,0 +1,59 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +SUBDIRS = . cuda rocm + +lib_LTLIBRARIES = libucm.la +libucm_ladir = $(includedir)/ucm +libucm_la_LDFLAGS = $(UCM_MODULE_LDFLAGS) \ + -ldl -version-info $(SOVERSION) +libucm_la_CPPFLAGS = $(BASE_CPPFLAGS) -DUCM_MALLOC_PREFIX=ucm_dl +libucm_la_CFLAGS = $(BASE_CFLAGS) $(CFLAGS_NO_DEPRECATED) + +nobase_dist_libucm_la_HEADERS = \ + api/ucm.h + +noinst_HEADERS = \ + event/event.h \ + malloc/malloc_hook.h \ + malloc/allocator.h \ + mmap/mmap.h \ + util/replace.h \ + util/log.h \ + util/reloc.h \ + util/sys.h \ + bistro/bistro_int.h \ + bistro/bistro.h \ + bistro/bistro_x86_64.h \ + bistro/bistro_aarch64.h \ + bistro/bistro_ppc64.h + +libucm_la_SOURCES = \ + event/event.c \ + malloc/malloc_hook.c \ + mmap/install.c \ + util/replace.c \ + util/log.c \ + util/reloc.c \ + util/sys.c \ + bistro/bistro.c \ + bistro/bistro_x86_64.c \ + bistro/bistro_aarch64.c \ + bistro/bistro_ppc64.c + +if HAVE_UCM_PTMALLOC286 +libucm_la_CPPFLAGS += \ + -fno-strict-aliasing \ + -DUSE_LOCKS=1 \ + -DMALLINFO_FIELD_TYPE=int + +libucm_la_SOURCES += \ + ptmalloc286/malloc.c + +noinst_HEADERS += \ + ptmalloc286/malloc-2.8.6.h +endif diff --git a/src/ucm/Makefile.in b/src/ucm/Makefile.in new file mode 100644 index 0000000..37017ed --- /dev/null +++ b/src/ucm/Makefile.in @@ -0,0 +1,1214 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@HAVE_UCM_PTMALLOC286_TRUE@am__append_1 = \ +@HAVE_UCM_PTMALLOC286_TRUE@ -fno-strict-aliasing \ +@HAVE_UCM_PTMALLOC286_TRUE@ -DUSE_LOCKS=1 \ +@HAVE_UCM_PTMALLOC286_TRUE@ -DMALLINFO_FIELD_TYPE=int + +@HAVE_UCM_PTMALLOC286_TRUE@am__append_2 = \ +@HAVE_UCM_PTMALLOC286_TRUE@ ptmalloc286/malloc.c + +@HAVE_UCM_PTMALLOC286_TRUE@am__append_3 = \ +@HAVE_UCM_PTMALLOC286_TRUE@ ptmalloc286/malloc-2.8.6.h + +subdir = src/ucm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(nobase_dist_libucm_la_HEADERS) \ + $(am__noinst_HEADERS_DIST) $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libucm_ladir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libucm_la_LIBADD = +am__libucm_la_SOURCES_DIST = event/event.c malloc/malloc_hook.c \ + mmap/install.c util/replace.c util/log.c util/reloc.c \ + util/sys.c bistro/bistro.c bistro/bistro_x86_64.c \ + bistro/bistro_aarch64.c bistro/bistro_ppc64.c \ + ptmalloc286/malloc.c +am__dirstamp = $(am__leading_dot)dirstamp +@HAVE_UCM_PTMALLOC286_TRUE@am__objects_1 = \ +@HAVE_UCM_PTMALLOC286_TRUE@ ptmalloc286/libucm_la-malloc.lo +am_libucm_la_OBJECTS = event/libucm_la-event.lo \ + malloc/libucm_la-malloc_hook.lo mmap/libucm_la-install.lo \ + util/libucm_la-replace.lo util/libucm_la-log.lo \ + util/libucm_la-reloc.lo util/libucm_la-sys.lo \ + bistro/libucm_la-bistro.lo bistro/libucm_la-bistro_x86_64.lo \ + bistro/libucm_la-bistro_aarch64.lo \ + bistro/libucm_la-bistro_ppc64.lo $(am__objects_1) +libucm_la_OBJECTS = $(am_libucm_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libucm_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libucm_la_CFLAGS) \ + $(CFLAGS) $(libucm_la_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = bistro/$(DEPDIR)/libucm_la-bistro.Plo \ + bistro/$(DEPDIR)/libucm_la-bistro_aarch64.Plo \ + bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Plo \ + bistro/$(DEPDIR)/libucm_la-bistro_x86_64.Plo \ + event/$(DEPDIR)/libucm_la-event.Plo \ + malloc/$(DEPDIR)/libucm_la-malloc_hook.Plo \ + mmap/$(DEPDIR)/libucm_la-install.Plo \ + ptmalloc286/$(DEPDIR)/libucm_la-malloc.Plo \ + util/$(DEPDIR)/libucm_la-log.Plo \ + util/$(DEPDIR)/libucm_la-reloc.Plo \ + util/$(DEPDIR)/libucm_la-replace.Plo \ + util/$(DEPDIR)/libucm_la-sys.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libucm_la_SOURCES) +DIST_SOURCES = $(am__libucm_la_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = event/event.h malloc/malloc_hook.h \ + malloc/allocator.h mmap/mmap.h util/replace.h util/log.h \ + util/reloc.h util/sys.h bistro/bistro_int.h bistro/bistro.h \ + bistro/bistro_x86_64.h bistro/bistro_aarch64.h \ + bistro/bistro_ppc64.h ptmalloc286/malloc-2.8.6.h +HEADERS = $(nobase_dist_libucm_la_HEADERS) $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +SUBDIRS = . cuda rocm +lib_LTLIBRARIES = libucm.la +libucm_ladir = $(includedir)/ucm +libucm_la_LDFLAGS = $(UCM_MODULE_LDFLAGS) \ + -ldl -version-info $(SOVERSION) + +libucm_la_CPPFLAGS = $(BASE_CPPFLAGS) -DUCM_MALLOC_PREFIX=ucm_dl \ + $(am__append_1) +libucm_la_CFLAGS = $(BASE_CFLAGS) $(CFLAGS_NO_DEPRECATED) +nobase_dist_libucm_la_HEADERS = \ + api/ucm.h + +noinst_HEADERS = event/event.h malloc/malloc_hook.h malloc/allocator.h \ + mmap/mmap.h util/replace.h util/log.h util/reloc.h util/sys.h \ + bistro/bistro_int.h bistro/bistro.h bistro/bistro_x86_64.h \ + bistro/bistro_aarch64.h bistro/bistro_ppc64.h $(am__append_3) +libucm_la_SOURCES = event/event.c malloc/malloc_hook.c mmap/install.c \ + util/replace.c util/log.c util/reloc.c util/sys.c \ + bistro/bistro.c bistro/bistro_x86_64.c bistro/bistro_aarch64.c \ + bistro/bistro_ppc64.c $(am__append_2) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/ucm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/ucm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +event/$(am__dirstamp): + @$(MKDIR_P) event + @: > event/$(am__dirstamp) +event/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) event/$(DEPDIR) + @: > event/$(DEPDIR)/$(am__dirstamp) +event/libucm_la-event.lo: event/$(am__dirstamp) \ + event/$(DEPDIR)/$(am__dirstamp) +malloc/$(am__dirstamp): + @$(MKDIR_P) malloc + @: > malloc/$(am__dirstamp) +malloc/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) malloc/$(DEPDIR) + @: > malloc/$(DEPDIR)/$(am__dirstamp) +malloc/libucm_la-malloc_hook.lo: malloc/$(am__dirstamp) \ + malloc/$(DEPDIR)/$(am__dirstamp) +mmap/$(am__dirstamp): + @$(MKDIR_P) mmap + @: > mmap/$(am__dirstamp) +mmap/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mmap/$(DEPDIR) + @: > mmap/$(DEPDIR)/$(am__dirstamp) +mmap/libucm_la-install.lo: mmap/$(am__dirstamp) \ + mmap/$(DEPDIR)/$(am__dirstamp) +util/$(am__dirstamp): + @$(MKDIR_P) util + @: > util/$(am__dirstamp) +util/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) util/$(DEPDIR) + @: > util/$(DEPDIR)/$(am__dirstamp) +util/libucm_la-replace.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/libucm_la-log.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/libucm_la-reloc.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +util/libucm_la-sys.lo: util/$(am__dirstamp) \ + util/$(DEPDIR)/$(am__dirstamp) +bistro/$(am__dirstamp): + @$(MKDIR_P) bistro + @: > bistro/$(am__dirstamp) +bistro/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) bistro/$(DEPDIR) + @: > bistro/$(DEPDIR)/$(am__dirstamp) +bistro/libucm_la-bistro.lo: bistro/$(am__dirstamp) \ + bistro/$(DEPDIR)/$(am__dirstamp) +bistro/libucm_la-bistro_x86_64.lo: bistro/$(am__dirstamp) \ + bistro/$(DEPDIR)/$(am__dirstamp) +bistro/libucm_la-bistro_aarch64.lo: bistro/$(am__dirstamp) \ + bistro/$(DEPDIR)/$(am__dirstamp) +bistro/libucm_la-bistro_ppc64.lo: bistro/$(am__dirstamp) \ + bistro/$(DEPDIR)/$(am__dirstamp) +ptmalloc286/$(am__dirstamp): + @$(MKDIR_P) ptmalloc286 + @: > ptmalloc286/$(am__dirstamp) +ptmalloc286/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ptmalloc286/$(DEPDIR) + @: > ptmalloc286/$(DEPDIR)/$(am__dirstamp) +ptmalloc286/libucm_la-malloc.lo: ptmalloc286/$(am__dirstamp) \ + ptmalloc286/$(DEPDIR)/$(am__dirstamp) + +libucm.la: $(libucm_la_OBJECTS) $(libucm_la_DEPENDENCIES) $(EXTRA_libucm_la_DEPENDENCIES) + $(AM_V_CCLD)$(libucm_la_LINK) -rpath $(libdir) $(libucm_la_OBJECTS) $(libucm_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f bistro/*.$(OBJEXT) + -rm -f bistro/*.lo + -rm -f event/*.$(OBJEXT) + -rm -f event/*.lo + -rm -f malloc/*.$(OBJEXT) + -rm -f malloc/*.lo + -rm -f mmap/*.$(OBJEXT) + -rm -f mmap/*.lo + -rm -f ptmalloc286/*.$(OBJEXT) + -rm -f ptmalloc286/*.lo + -rm -f util/*.$(OBJEXT) + -rm -f util/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@bistro/$(DEPDIR)/libucm_la-bistro.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@bistro/$(DEPDIR)/libucm_la-bistro_aarch64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@bistro/$(DEPDIR)/libucm_la-bistro_x86_64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@event/$(DEPDIR)/libucm_la-event.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@malloc/$(DEPDIR)/libucm_la-malloc_hook.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mmap/$(DEPDIR)/libucm_la-install.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ptmalloc286/$(DEPDIR)/libucm_la-malloc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/libucm_la-log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/libucm_la-reloc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/libucm_la-replace.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@util/$(DEPDIR)/libucm_la-sys.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +event/libucm_la-event.lo: event/event.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT event/libucm_la-event.lo -MD -MP -MF event/$(DEPDIR)/libucm_la-event.Tpo -c -o event/libucm_la-event.lo `test -f 'event/event.c' || echo '$(srcdir)/'`event/event.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) event/$(DEPDIR)/libucm_la-event.Tpo event/$(DEPDIR)/libucm_la-event.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='event/event.c' object='event/libucm_la-event.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o event/libucm_la-event.lo `test -f 'event/event.c' || echo '$(srcdir)/'`event/event.c + +malloc/libucm_la-malloc_hook.lo: malloc/malloc_hook.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT malloc/libucm_la-malloc_hook.lo -MD -MP -MF malloc/$(DEPDIR)/libucm_la-malloc_hook.Tpo -c -o malloc/libucm_la-malloc_hook.lo `test -f 'malloc/malloc_hook.c' || echo '$(srcdir)/'`malloc/malloc_hook.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) malloc/$(DEPDIR)/libucm_la-malloc_hook.Tpo malloc/$(DEPDIR)/libucm_la-malloc_hook.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='malloc/malloc_hook.c' object='malloc/libucm_la-malloc_hook.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o malloc/libucm_la-malloc_hook.lo `test -f 'malloc/malloc_hook.c' || echo '$(srcdir)/'`malloc/malloc_hook.c + +mmap/libucm_la-install.lo: mmap/install.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT mmap/libucm_la-install.lo -MD -MP -MF mmap/$(DEPDIR)/libucm_la-install.Tpo -c -o mmap/libucm_la-install.lo `test -f 'mmap/install.c' || echo '$(srcdir)/'`mmap/install.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mmap/$(DEPDIR)/libucm_la-install.Tpo mmap/$(DEPDIR)/libucm_la-install.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mmap/install.c' object='mmap/libucm_la-install.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o mmap/libucm_la-install.lo `test -f 'mmap/install.c' || echo '$(srcdir)/'`mmap/install.c + +util/libucm_la-replace.lo: util/replace.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT util/libucm_la-replace.lo -MD -MP -MF util/$(DEPDIR)/libucm_la-replace.Tpo -c -o util/libucm_la-replace.lo `test -f 'util/replace.c' || echo '$(srcdir)/'`util/replace.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) util/$(DEPDIR)/libucm_la-replace.Tpo util/$(DEPDIR)/libucm_la-replace.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util/replace.c' object='util/libucm_la-replace.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o util/libucm_la-replace.lo `test -f 'util/replace.c' || echo '$(srcdir)/'`util/replace.c + +util/libucm_la-log.lo: util/log.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT util/libucm_la-log.lo -MD -MP -MF util/$(DEPDIR)/libucm_la-log.Tpo -c -o util/libucm_la-log.lo `test -f 'util/log.c' || echo '$(srcdir)/'`util/log.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) util/$(DEPDIR)/libucm_la-log.Tpo util/$(DEPDIR)/libucm_la-log.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util/log.c' object='util/libucm_la-log.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o util/libucm_la-log.lo `test -f 'util/log.c' || echo '$(srcdir)/'`util/log.c + +util/libucm_la-reloc.lo: util/reloc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT util/libucm_la-reloc.lo -MD -MP -MF util/$(DEPDIR)/libucm_la-reloc.Tpo -c -o util/libucm_la-reloc.lo `test -f 'util/reloc.c' || echo '$(srcdir)/'`util/reloc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) util/$(DEPDIR)/libucm_la-reloc.Tpo util/$(DEPDIR)/libucm_la-reloc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util/reloc.c' object='util/libucm_la-reloc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o util/libucm_la-reloc.lo `test -f 'util/reloc.c' || echo '$(srcdir)/'`util/reloc.c + +util/libucm_la-sys.lo: util/sys.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT util/libucm_la-sys.lo -MD -MP -MF util/$(DEPDIR)/libucm_la-sys.Tpo -c -o util/libucm_la-sys.lo `test -f 'util/sys.c' || echo '$(srcdir)/'`util/sys.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) util/$(DEPDIR)/libucm_la-sys.Tpo util/$(DEPDIR)/libucm_la-sys.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util/sys.c' object='util/libucm_la-sys.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o util/libucm_la-sys.lo `test -f 'util/sys.c' || echo '$(srcdir)/'`util/sys.c + +bistro/libucm_la-bistro.lo: bistro/bistro.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT bistro/libucm_la-bistro.lo -MD -MP -MF bistro/$(DEPDIR)/libucm_la-bistro.Tpo -c -o bistro/libucm_la-bistro.lo `test -f 'bistro/bistro.c' || echo '$(srcdir)/'`bistro/bistro.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) bistro/$(DEPDIR)/libucm_la-bistro.Tpo bistro/$(DEPDIR)/libucm_la-bistro.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bistro/bistro.c' object='bistro/libucm_la-bistro.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o bistro/libucm_la-bistro.lo `test -f 'bistro/bistro.c' || echo '$(srcdir)/'`bistro/bistro.c + +bistro/libucm_la-bistro_x86_64.lo: bistro/bistro_x86_64.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT bistro/libucm_la-bistro_x86_64.lo -MD -MP -MF bistro/$(DEPDIR)/libucm_la-bistro_x86_64.Tpo -c -o bistro/libucm_la-bistro_x86_64.lo `test -f 'bistro/bistro_x86_64.c' || echo '$(srcdir)/'`bistro/bistro_x86_64.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) bistro/$(DEPDIR)/libucm_la-bistro_x86_64.Tpo bistro/$(DEPDIR)/libucm_la-bistro_x86_64.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bistro/bistro_x86_64.c' object='bistro/libucm_la-bistro_x86_64.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o bistro/libucm_la-bistro_x86_64.lo `test -f 'bistro/bistro_x86_64.c' || echo '$(srcdir)/'`bistro/bistro_x86_64.c + +bistro/libucm_la-bistro_aarch64.lo: bistro/bistro_aarch64.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT bistro/libucm_la-bistro_aarch64.lo -MD -MP -MF bistro/$(DEPDIR)/libucm_la-bistro_aarch64.Tpo -c -o bistro/libucm_la-bistro_aarch64.lo `test -f 'bistro/bistro_aarch64.c' || echo '$(srcdir)/'`bistro/bistro_aarch64.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) bistro/$(DEPDIR)/libucm_la-bistro_aarch64.Tpo bistro/$(DEPDIR)/libucm_la-bistro_aarch64.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bistro/bistro_aarch64.c' object='bistro/libucm_la-bistro_aarch64.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o bistro/libucm_la-bistro_aarch64.lo `test -f 'bistro/bistro_aarch64.c' || echo '$(srcdir)/'`bistro/bistro_aarch64.c + +bistro/libucm_la-bistro_ppc64.lo: bistro/bistro_ppc64.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT bistro/libucm_la-bistro_ppc64.lo -MD -MP -MF bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Tpo -c -o bistro/libucm_la-bistro_ppc64.lo `test -f 'bistro/bistro_ppc64.c' || echo '$(srcdir)/'`bistro/bistro_ppc64.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Tpo bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bistro/bistro_ppc64.c' object='bistro/libucm_la-bistro_ppc64.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o bistro/libucm_la-bistro_ppc64.lo `test -f 'bistro/bistro_ppc64.c' || echo '$(srcdir)/'`bistro/bistro_ppc64.c + +ptmalloc286/libucm_la-malloc.lo: ptmalloc286/malloc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -MT ptmalloc286/libucm_la-malloc.lo -MD -MP -MF ptmalloc286/$(DEPDIR)/libucm_la-malloc.Tpo -c -o ptmalloc286/libucm_la-malloc.lo `test -f 'ptmalloc286/malloc.c' || echo '$(srcdir)/'`ptmalloc286/malloc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ptmalloc286/$(DEPDIR)/libucm_la-malloc.Tpo ptmalloc286/$(DEPDIR)/libucm_la-malloc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ptmalloc286/malloc.c' object='ptmalloc286/libucm_la-malloc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_la_CFLAGS) $(CFLAGS) -c -o ptmalloc286/libucm_la-malloc.lo `test -f 'ptmalloc286/malloc.c' || echo '$(srcdir)/'`ptmalloc286/malloc.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf bistro/.libs bistro/_libs + -rm -rf event/.libs event/_libs + -rm -rf malloc/.libs malloc/_libs + -rm -rf mmap/.libs mmap/_libs + -rm -rf ptmalloc286/.libs ptmalloc286/_libs + -rm -rf util/.libs util/_libs +install-nobase_dist_libucm_laHEADERS: $(nobase_dist_libucm_la_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nobase_dist_libucm_la_HEADERS)'; test -n "$(libucm_ladir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(libucm_ladir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libucm_ladir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libucm_ladir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(libucm_ladir)/$$dir"; }; \ + echo " $(INSTALL_HEADER) $$xfiles '$(DESTDIR)$(libucm_ladir)/$$dir'"; \ + $(INSTALL_HEADER) $$xfiles "$(DESTDIR)$(libucm_ladir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_dist_libucm_laHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nobase_dist_libucm_la_HEADERS)'; test -n "$(libucm_ladir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(libucm_ladir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libucm_ladir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f bistro/$(DEPDIR)/$(am__dirstamp) + -rm -f bistro/$(am__dirstamp) + -rm -f event/$(DEPDIR)/$(am__dirstamp) + -rm -f event/$(am__dirstamp) + -rm -f malloc/$(DEPDIR)/$(am__dirstamp) + -rm -f malloc/$(am__dirstamp) + -rm -f mmap/$(DEPDIR)/$(am__dirstamp) + -rm -f mmap/$(am__dirstamp) + -rm -f ptmalloc286/$(DEPDIR)/$(am__dirstamp) + -rm -f ptmalloc286/$(am__dirstamp) + -rm -f util/$(DEPDIR)/$(am__dirstamp) + -rm -f util/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f bistro/$(DEPDIR)/libucm_la-bistro.Plo + -rm -f bistro/$(DEPDIR)/libucm_la-bistro_aarch64.Plo + -rm -f bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Plo + -rm -f bistro/$(DEPDIR)/libucm_la-bistro_x86_64.Plo + -rm -f event/$(DEPDIR)/libucm_la-event.Plo + -rm -f malloc/$(DEPDIR)/libucm_la-malloc_hook.Plo + -rm -f mmap/$(DEPDIR)/libucm_la-install.Plo + -rm -f ptmalloc286/$(DEPDIR)/libucm_la-malloc.Plo + -rm -f util/$(DEPDIR)/libucm_la-log.Plo + -rm -f util/$(DEPDIR)/libucm_la-reloc.Plo + -rm -f util/$(DEPDIR)/libucm_la-replace.Plo + -rm -f util/$(DEPDIR)/libucm_la-sys.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-nobase_dist_libucm_laHEADERS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f bistro/$(DEPDIR)/libucm_la-bistro.Plo + -rm -f bistro/$(DEPDIR)/libucm_la-bistro_aarch64.Plo + -rm -f bistro/$(DEPDIR)/libucm_la-bistro_ppc64.Plo + -rm -f bistro/$(DEPDIR)/libucm_la-bistro_x86_64.Plo + -rm -f event/$(DEPDIR)/libucm_la-event.Plo + -rm -f malloc/$(DEPDIR)/libucm_la-malloc_hook.Plo + -rm -f mmap/$(DEPDIR)/libucm_la-install.Plo + -rm -f ptmalloc286/$(DEPDIR)/libucm_la-malloc.Plo + -rm -f util/$(DEPDIR)/libucm_la-log.Plo + -rm -f util/$(DEPDIR)/libucm_la-reloc.Plo + -rm -f util/$(DEPDIR)/libucm_la-replace.Plo + -rm -f util/$(DEPDIR)/libucm_la-sys.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES \ + uninstall-nobase_dist_libucm_laHEADERS + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man \ + install-nobase_dist_libucm_laHEADERS install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-libLTLIBRARIES \ + uninstall-nobase_dist_libucm_laHEADERS + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/ucm/api/ucm.h b/src/ucm/api/ucm.h new file mode 100644 index 0000000..0e20010 --- /dev/null +++ b/src/ucm/api/ucm.h @@ -0,0 +1,454 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCM_H_ +#define UCM_H_ + +#include + +BEGIN_C_DECLS + +#include +#include +#include + +#include +#include +#include +#include + + +/** + * @brief Memory event types + */ +typedef enum ucm_event_type { + /* Default initialization value */ + UCM_EVENT_NONE = 0, + /* Native events */ + UCM_EVENT_MMAP = UCS_BIT(0), + UCM_EVENT_MUNMAP = UCS_BIT(1), + UCM_EVENT_MREMAP = UCS_BIT(2), + UCM_EVENT_SHMAT = UCS_BIT(3), + UCM_EVENT_SHMDT = UCS_BIT(4), + UCM_EVENT_SBRK = UCS_BIT(5), + UCM_EVENT_MADVISE = UCS_BIT(6), + + /* Aggregate events */ + UCM_EVENT_VM_MAPPED = UCS_BIT(16), + UCM_EVENT_VM_UNMAPPED = UCS_BIT(17), + + /* Non-accessible memory alloc/free events */ + UCM_EVENT_MEM_TYPE_ALLOC = UCS_BIT(20), + UCM_EVENT_MEM_TYPE_FREE = UCS_BIT(21), + + /* Add event handler, but don't install new hooks */ + UCM_EVENT_FLAG_NO_INSTALL = UCS_BIT(24), + + /* When the event handler is added, generate approximated events for + * existing memory allocations. + * Currently implemented only for @ref UCM_EVENT_MEM_TYPE_ALLOC. + */ + UCM_EVENT_FLAG_EXISTING_ALLOC = UCS_BIT(25) + +} ucm_event_type_t; + + +/** + * @brief MMAP hook modes + */ +typedef enum ucm_mmap_hook_mode { + UCM_MMAP_HOOK_NONE, + UCM_MMAP_HOOK_RELOC, + UCM_MMAP_HOOK_BISTRO, + UCM_MMAP_HOOK_LAST +} ucm_mmap_hook_mode_t; + +/** + * @brief Memory event parameters and result. + */ +typedef union ucm_event { + /* + * UCM_EVENT_MMAP + * mmap() is called. + * callbacks: pre, post + */ + struct { + void *result; + void *address; + size_t size; + int prot; + int flags; + int fd; + off_t offset; + } mmap; + + /* + * UCM_EVENT_MUNMAP + * munmap() is called. + */ + struct { + int result; + void *address; + size_t size; + } munmap; + + /* + * UCM_EVENT_MREMAP + * mremap() is called. + */ + struct { + void *result; + void *address; + size_t old_size; + size_t new_size; + int flags; + } mremap; + + /* + * UCM_EVENT_SHMAT + * shmat() is called. + */ + struct { + void *result; + int shmid; + const void *shmaddr; + int shmflg; + } shmat; + + /* + * UCM_EVENT_SHMDT + * shmdt() is called. + */ + struct { + int result; + const void *shmaddr; + } shmdt; + + /* + * UCM_EVENT_SBRK + * sbrk() is called. + */ + struct { + void *result; + intptr_t increment; + } sbrk; + + /* + * UCM_EVENT_MADVISE + * madvise() is called. + */ + struct { + int result; + void *addr; + size_t length; + int advice; + } madvise; + + /* + * UCM_EVENT_VM_MAPPED, UCM_EVENT_VM_UNMAPPED + * + * This is a "read-only" event which is called whenever memory is mapped + * or unmapped from process address space, in addition to the other events. + * It can return only UCM_EVENT_STATUS_NEXT. + * + * For UCM_EVENT_VM_MAPPED, callbacks are post + * For UCM_EVENT_VM_UNMAPPED, callbacks are pre + */ + struct { + void *address; + size_t size; + } vm_mapped, vm_unmapped; + + /* + * UCM_EVENT_MEM_TYPE_ALLOC, UCM_EVENT_MEM_TYPE_FREE + * + * Memory type allocation and deallocation event. + * If mem_type is @ref UCS_MEMORY_TYPE_LAST, the memory type is unknown, and + * further memory type detection is required. + */ + struct { + void *address; + size_t size; + ucs_memory_type_t mem_type; + } mem_type; + +} ucm_event_t; + + +/** + * @brief Global UCM configuration. + * + * Can be safely modified before using UCM functions. + */ +typedef struct ucm_global_config { + ucs_log_level_t log_level; /* Logging level */ + int enable_events; /* Enable memory events */ + ucm_mmap_hook_mode_t mmap_hook_mode; /* MMAP hook mode */ + int enable_malloc_hooks; /* Enable installing malloc hooks */ + int enable_malloc_reloc; /* Enable installing malloc relocations */ + int enable_cuda_reloc; /* Enable installing CUDA relocations */ + int enable_dynamic_mmap_thresh; /* Enable adaptive mmap threshold */ + size_t alloc_alignment; /* Alignment for memory allocations */ + int dlopen_process_rpath; /* Process RPATH section in dlopen hook */ +} ucm_global_config_t; + + +/* Global UCM configuration */ +extern ucm_global_config_t ucm_global_opts; + + +/** + * @brief Memory event callback. + * + * This type describes a callback which handles memory events in the current process. + * + * @param [in] event_type Type of the event being fired. see @ref ucm_event_type_t. + * @param [inout] event Event information. This structure can be updated by + * this callback, as described below. + * @param [in] arg User-defined argument as passed to @ref ucm_set_event_handler. + * + * + * Events are dispatched in order of callback priority (low to high). + * + * The fields of the relevant part of the union are initialized as follows: + * - "result" - to an invalid erroneous return value (depends on the specific event). + * - the rest - to the input parameters of the event. + * + * The callback is allowed to modify the fields, and those modifications will + * be passed to the next callback. Also, the callback is allowed to modify the + * result, but **only if it's currently invalid**. A valid result indicates that + * a previous callback already performed the requested memory operation, so a + * callback should **refrain from actions with side-effects** in this case. + * + * If the result is still invalid after all callbacks are called, the parameters, + * possibly modified by the callbacks, will be passed to the original handler. + * + * + * Important Note: The callback must not call any memory allocation routines, or + * anything which may trigger or wait for memory allocation, because it + * may lead to deadlock or infinite recursion. + * + * @todo describe use cases + * + */ +typedef void (*ucm_event_callback_t)(ucm_event_type_t event_type, + ucm_event_t *event, void *arg); + + +/** + * @brief Install a handler for memory events. + * + * @param [in] events Bit-mask of events to handle. + * @param [in] priority Priority value which defines the order in which event + * callbacks are called. + * < 0 - called before the original implementation, + * >= 0 - called after the original implementation. + * @param [in] cb Event-handling callback. + * @param [in] arg User-defined argument for the callback. + * + * @note If UCM_EVENT_FLAG_NO_INSTALL flag is passed in @a events argument, + * only @cb handler will be registered for @a events. No memory + * events/hooks will be installed. + * + * @return Status code. + */ +ucs_status_t ucm_set_event_handler(int events, int priority, + ucm_event_callback_t cb, void *arg); + + +/** + * @brief Remove a handler for memory events. + * + * @param [in] events Which events to remove. The handler is removed + * completely when all its events are removed. + * @param [in] cb Event-handling callback. + * @param [in] arg User-defined argument for the callback. + */ +void ucm_unset_event_handler(int events, ucm_event_callback_t cb, void *arg); + + +/** + * @brief Add memory events to the external events list. + * + * When the event is set to be external, it means that user is responsible for + * handling it. So, setting a handler for external event will not trigger + * installing of UCM memory hooks (if they were not installed before). In this + * case the corresponding UCM function needs to be invoked to trigger event + * handlers. + * Usage example is when the user disables UCM memory hooks (he may have its + * own hooks, like Open MPI), but it wants to use some UCM based functionality, + * e.g. IB registration cache. IB registration cache needs to be notified about + * UCM_EVENT_VM_UNMAPPED events, therefore it adds specific handler for it. + * In this case user needs to declare UCM_EVENT_VM_UNMAPPED event as external + * and explicitly call ucm_vm_munmap() when some memory release operation + * occurs. + * + * @param [in] events Bit-mask of events which are supposed to be handled + * externally. + * + * @note To take an effect, the event should be set external prior to adding + * event handlers for it. + */ +void ucm_set_external_event(int events); + + +/** + * @brief Remove memory events from the external events list. + * + * When the event is removed from the external events list, any subsequent call + * to ucm_set_event_handler() for that event will trigger installing of UCM + * memory hooks (if they are enabled and were not installed before). + * + * @param [in] events Which events to remove from the external events list. + */ +void ucm_unset_external_event(int events); + + +/** + * @brief Test event handlers + * + * This routine checks if event handlers are called when corresponding system API + * is invoked. + * + * @param [in] events Bit-mask of events which are supposed to be handled + * externally. + * + * @return Status code. + */ +ucs_status_t ucm_test_events(int events); + + +/** + * @brief Call the original implementation of @ref mmap without triggering events. + */ +void *ucm_orig_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset); + + +/** + * @brief Call the original implementation of @ref munmap without triggering events. + */ +int ucm_orig_munmap(void *addr, size_t length); + + +/** + * @brief Call the original implementation of @ref mremap without triggering events. + */ +void *ucm_orig_mremap(void *old_address, size_t old_size, size_t new_size, + int flags); + + +/** + * @brief Call the original implementation of @ref shmat without triggering events. + */ +void *ucm_orig_shmat(int shmid, const void *shmaddr, int shmflg); + + +/** + * @brief Call the original implementation of @ref shmdt without triggering events. + */ +int ucm_orig_shmdt(const void *shmaddr); + + +/** + * @brief Call the original implementation of @ref sbrk without triggering events. + */ +void *ucm_orig_sbrk(intptr_t increment); + + +/** + * @brief Call the original implementation of @ref brk without triggering events. + */ +int ucm_orig_brk(void *addr); + + +/** + * @brief Call the original implementation of @ref madvise without triggering events. + */ +int ucm_orig_madvise(void *addr, size_t length, int advice); + + +/** + * @brief Call the original implementation of @ref mmap and all handlers + * associated with it. + */ +void *ucm_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset); + + +/** + * @brief Call the original implementation of @ref munmap and all handlers + * associated with it. + */ +int ucm_munmap(void *addr, size_t length); + + +/** + * @brief Call the handlers registered for aggregated VM_MMAP event. + */ +void ucm_vm_mmap(void *addr, size_t length); + + +/** + * @brief Call the handlers registered for aggregated VM_MUNMAP event. + */ +void ucm_vm_munmap(void *addr, size_t length); + + +/** + * @brief Call the original implementation of @ref mremap and all handlers + * associated with it. + */ +void *ucm_mremap(void *old_address, size_t old_size, size_t new_size, int flags); + + +/** + * @brief Call the original implementation of @ref shmat and all handlers + * associated with it. + */ +void *ucm_shmat(int shmid, const void *shmaddr, int shmflg); + + +/** + * @brief Call the original implementation of @ref shmdt and all handlers + * associated with it. + */ +int ucm_shmdt(const void *shmaddr); + + +/** + * @brief Call the original implementation of @ref sbrk and all handlers + * associated with it. + */ +void *ucm_sbrk(intptr_t increment); + + +/** + * @brief Call the original implementation of @ref brk and all handlers + * associated with it. + */ +int ucm_brk(void *addr); + + +/** + * @brief Call the original implementation of @ref madvise and all handlers + * associated with it. + */ +int ucm_madvise(void *addr, size_t length, int advice); + + +/** + * @brief Call the original implementation of @ref dlopen and all handlers + * associated with it. + */ +void *ucm_dlopen(const char *filename, int flag); + + +END_C_DECLS + +#endif diff --git a/src/ucm/bistro/bistro.c b/src/ucm/bistro/bistro.c new file mode 100644 index 0000000..1e53cb2 --- /dev/null +++ b/src/ucm/bistro/bistro.c @@ -0,0 +1,106 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +#include +#include + +ucs_status_t ucm_bistro_remove_restore_point(ucm_bistro_restore_point_t *rp) +{ + ucs_assert(rp != NULL); + free(rp); + return UCS_OK; +} + +static void *ucm_bistro_page_align_ptr(void *ptr) +{ + return (void*)ucs_align_down((uintptr_t)ptr, ucm_get_page_size()); +} + +static ucs_status_t ucm_bistro_protect(void *addr, size_t len, int prot) +{ + void *aligned = ucm_bistro_page_align_ptr(addr); + size_t size = UCS_PTR_BYTE_DIFF(aligned, addr) + len; + int res; + + res = mprotect(aligned, size, prot) ? UCS_ERR_INVALID_PARAM : UCS_OK; + if (res) { + ucm_error("Failed to change page protection: %m"); + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK; +} + +ucs_status_t ucm_bistro_apply_patch(void *dst, void *patch, size_t len) +{ + ucs_status_t status; + + status = ucm_bistro_protect(dst, len, UCM_PROT_READ_WRITE_EXEC); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + memcpy(dst, patch, len); + + status = ucm_bistro_protect(dst, len, UCM_PROT_READ_EXEC); + if (!UCS_STATUS_IS_ERR(status)) { + ucs_clear_cache(dst, UCS_PTR_BYTE_OFFSET(dst, len)); + } + return status; +} + +#if defined(__x86_64__) || defined (__aarch64__) +struct ucm_bistro_restore_point { + void *addr; /* address of function to restore */ + ucm_bistro_patch_t patch; /* original function body */ +}; + +ucs_status_t ucm_bistro_create_restore_point(void *addr, ucm_bistro_restore_point_t **rp) +{ + ucm_bistro_restore_point_t *point; + + if (rp == NULL) { + /* restore point is not required */ + return UCS_OK; + } + + point = malloc(sizeof(*point)); + if (!point) { + return UCS_ERR_NO_MEMORY; + } + + point->addr = addr; + point->patch = *(ucm_bistro_patch_t*)addr; + *rp = point; + return UCS_OK; +} + +ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp) +{ + ucs_status_t status; + + status = ucm_bistro_apply_patch(rp->addr, &rp->patch, sizeof(rp->patch)); + if (!UCS_STATUS_IS_ERR(status)) { + ucm_bistro_remove_restore_point(rp); + } + + return status; +} + +void *ucm_bistro_restore_addr(ucm_bistro_restore_point_t *rp) +{ + ucs_assert(rp != NULL); + return rp->addr; +} + +#endif diff --git a/src/ucm/bistro/bistro.h b/src/ucm/bistro/bistro.h new file mode 100644 index 0000000..16e9887 --- /dev/null +++ b/src/ucm/bistro/bistro.h @@ -0,0 +1,58 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCM_BISTRO_BISTRO_H_ +#define UCM_BISTRO_BISTRO_H_ + +#include + +#include + +typedef struct ucm_bistro_restore_point ucm_bistro_restore_point_t; + +#if defined(__powerpc64__) +# include "bistro_ppc64.h" +#elif defined(__aarch64__) +# include "bistro_aarch64.h" +#elif defined(__x86_64__) +# include "bistro_x86_64.h" +#else +# error "Unsupported architecture" +#endif + + +/** + * Restore original function body using restore point created + * by @ref ucm_bistro_patch + * + * @param rp restore point, is removed after success operation + * completed + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp); + +/** + * Remove resore point created by @ref ucm_bistro_patch witout + * restore original function body + * + * @param rp restore point + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucm_bistro_remove_restore_point(ucm_bistro_restore_point_t *rp); + +/** + * Get patch address for restore point + * + * @param rp restore point + * + * @return Address of patched function body + */ +void *ucm_bistro_restore_addr(ucm_bistro_restore_point_t *rp); + +#endif diff --git a/src/ucm/bistro/bistro_aarch64.c b/src/ucm/bistro/bistro_aarch64.c new file mode 100644 index 0000000..2cf0927 --- /dev/null +++ b/src/ucm/bistro/bistro_aarch64.c @@ -0,0 +1,86 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* ******************************************************* + * ARM processors family * + * ***************************************************** */ +#if defined(__aarch64__) + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +/* Register number used to store indirect jump address. + * r15 is the highest numbered temporary register, assuming this one is safe + * to use. */ +#define R15 15 + +#define _MOV(_reg, _shift, _val, _opcode) \ + (((_opcode) << 23) + ((uint32_t)(_shift) << 21) + ((uint32_t)((_val) & 0xffff) << 5) + (_reg)) + +/** + * @brief Generate a mov immediate instruction + * + * @param[in] _reg register number (0-31) + * @param[in] _shift shift amount (0-3) * 16-bits + * @param[in] _value immediate value + */ +#define MOVZ(_reg, _shift, _val) _MOV(_reg, _shift, _val, 0x1a5) + +/** + * @brief Generate a mov immediate with keep instruction + * + * @param[in] _reg register number (0-31) + * @param[in] _shift shift amount (0-3) * 16-bits + * @param[in] _value immediate value + */ +#define MOVK(_reg, _shift, _val) _MOV(_reg, _shift, _val, 0x1e5) + +/** + * @brief Branch to address stored in register + * + * @param[in] _reg register number (0-31) + */ +#define BR(_reg) ((0xd61f << 16) + ((_reg) << 5)) + +ucs_status_t ucm_bistro_patch(const char *symbol, void *hook, + ucm_bistro_restore_point_t **rp) +{ + void *func; + ucs_status_t status; + + ucm_bistro_patch_t patch = { + .reg3 = MOVZ(R15, 3, (uintptr_t)hook >> 48), + .reg2 = MOVK(R15, 2, (uintptr_t)hook >> 32), + .reg1 = MOVK(R15, 1, (uintptr_t)hook >> 16), + .reg0 = MOVK(R15, 0, (uintptr_t)hook), + .br = BR(R15) + }; + + UCM_LOOKUP_SYMBOL(func, symbol); + + status = ucm_bistro_create_restore_point(func, rp); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + return ucm_bistro_apply_patch(func, &patch, sizeof(patch)); +} + +#endif diff --git a/src/ucm/bistro/bistro_aarch64.h b/src/ucm/bistro/bistro_aarch64.h new file mode 100644 index 0000000..487aa92 --- /dev/null +++ b/src/ucm/bistro/bistro_aarch64.h @@ -0,0 +1,41 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCM_BISTRO_BISTRO_AARCH64_H_ +#define UCM_BISTRO_BISTRO_AARCH64_H_ + +#include + +#include +#include + +#define UCM_BISTRO_PROLOGUE +#define UCM_BISTRO_EPILOGUE + +typedef struct ucm_bistro_patch { + uint32_t reg3; /* movz x15, addr, lsl #48 */ + uint32_t reg2; /* movk x15, addr, lsl #32 */ + uint32_t reg1; /* movk x15, addr, lsl #16 */ + uint32_t reg0; /* movk x15, addr */ + uint32_t br; /* br x15 */ +} UCS_S_PACKED ucm_bistro_patch_t; + +/** + * Set library function call hook using Binary Instrumentation + * method (BISTRO): replace function body by user defined call + * + * @param symbol function name to replace + * @param hook user-defined function-replacer + * @param rp restore point used to restore original function, + * optional, may be NULL + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucm_bistro_patch(const char *symbol, void *hook, + ucm_bistro_restore_point_t **rp); + +#endif diff --git a/src/ucm/bistro/bistro_int.h b/src/ucm/bistro/bistro_int.h new file mode 100644 index 0000000..40c80d5 --- /dev/null +++ b/src/ucm/bistro/bistro_int.h @@ -0,0 +1,48 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCM_BISTRO_BISTRO_INT_H_ +#define UCM_BISTRO_BISTRO_INT_H_ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define UCM_PROT_READ_WRITE_EXEC (PROT_READ | PROT_WRITE | PROT_EXEC) +#define UCM_PROT_READ_EXEC (PROT_READ | PROT_EXEC) + +#define UCM_LOOKUP_SYMBOL(_func, _symbol) \ + _func = ucm_bistro_lookup(_symbol); \ + if (!_func) { \ + return UCS_ERR_NO_ELEM; \ + } + +ucs_status_t ucm_bistro_apply_patch(void *dst, void *patch, size_t len); + +ucs_status_t ucm_bistro_create_restore_point(void *addr, ucm_bistro_restore_point_t **rp); + +static inline void *ucm_bistro_lookup(const char *symbol) +{ + void *addr; + + ucs_assert(symbol != NULL); + + addr = dlsym(RTLD_NEXT, symbol); + if (!addr) { + addr = dlsym(RTLD_DEFAULT, symbol); + } + return addr; +} + +#endif diff --git a/src/ucm/bistro/bistro_ppc64.c b/src/ucm/bistro/bistro_ppc64.c new file mode 100644 index 0000000..4b14250 --- /dev/null +++ b/src/ucm/bistro/bistro_ppc64.c @@ -0,0 +1,209 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* ******************************************************* + * POWER-PC processors family * + * ***************************************************** */ +#if defined (__powerpc64__) + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* PowerPC instructions used in patching */ +/* Reference: "PowerPC User Instruction Set Architecture" */ + +/* Use r11 register for jump address */ +#define R11 11 + +#define OPCODE(_rt, _rs, _op) \ + (((_op) << 26) + ((_rt) << 21) + ((_rs) << 16)) + +#define OP0(_rt, _rs, _ui, _op) \ + (OPCODE(_rt, _rs, _op) + ((_ui) & 0xffff)) + +#define MTSPR(_spr, _rs) \ + (OPCODE(_rs, (_spr) & 0x1f, 31) + (((_spr) & ~UCS_MASK(5)) << 6) + (467 << 1)) + +#define BCCTR(_bo, _bi, _bh) \ + (OPCODE(_bo, _bi, 19) + ((_bh) << 11) + (528<<1)) + +#define RLDICR(_rt, _rs, _sh, _mb) \ + (OPCODE(_rs, _rt, 30) + (((_sh) & UCS_MASK(5)) << 11) + ((_sh & ~UCS_MASK(5)) >> 4) + \ + (((_mb) & UCS_MASK(5)) << 6) + ((_mb) && ~UCS_MASK(5)) + UCS_BIT(2)) + +#define ADDIS(_rt, _rs, _ui) OP0(_rt, _rs, _ui, 15) +#define ORI(_rt, _rs, _ui) OP0(_rs, _rt, _ui, 24) +#define ORIS(_rt, _rs, _ui) OP0(_rs, _rt, _ui, 25) + +typedef struct ucm_bistro_base_patch { + uint32_t addis; /* lis r11,(addr >> 48) */ + uint32_t ori1; /* ori r11,r11,(addr >> 32) */ + uint32_t rldicr; /* rldicr r11,r11,32,31 */ + uint32_t oris; /* oris r11,r11,(addr >> 16) */ + uint32_t ori2; /* ori r11,r11,addr */ +} UCS_S_PACKED ucm_bistro_base_patch_t; + +typedef struct ucm_bistro_patch { + ucm_bistro_base_patch_t super; + uint32_t mtspr; /* mtspr r11 */ + uint32_t bcctr; /* bcctr */ +} UCS_S_PACKED ucm_bistro_patch_t; + +struct ucm_bistro_restore_point { + void *entry; + void *hook; + ucm_bistro_base_patch_t hook_patch; + void *func; + ucm_bistro_patch_t func_patch; +}; + +static void ucm_bistro_fill_base_patch(ucm_bistro_base_patch_t *patch, + uint32_t reg, uintptr_t value) +{ + ucs_assert(patch != NULL); + + patch->addis = ADDIS ( reg, 0, (value >> 48)); + patch->ori1 = ORI ( reg, reg, (value >> 32)); + patch->rldicr = RLDICR( reg, reg, 32, 31); + patch->oris = ORIS ( reg, reg, (value >> 16)); + patch->ori2 = ORI ( reg, reg, (value >> 0)); +} + +static void ucm_bistro_fill_patch(ucm_bistro_patch_t *patch, + uint32_t reg, uintptr_t value) +{ + ucs_assert(patch != NULL); + + ucm_bistro_fill_base_patch(&patch->super, reg, value); + + patch->mtspr = MTSPR(9, reg); /* 9 = CTR */ + patch->bcctr = BCCTR(20, 0, 0); /* 20 = always */ +} + +static ucs_status_t ucm_bistro_patch_hook(void *hook, ucm_bistro_restore_point_t *rp, + uint64_t toc) +{ + const uint32_t nop = 0x60000000; + uint32_t *toc_ptr; + ucm_bistro_base_patch_t *toc_patch; + ucm_bistro_base_patch_t patch; + + /* locate reserved code space in hook function */ + for (toc_ptr = hook;; toc_ptr++) { + toc_patch = (ucm_bistro_base_patch_t*)toc_ptr; + if ((toc_patch->addis == nop) && + (toc_patch->ori1 == nop) && + (toc_patch->rldicr == nop) && + (toc_patch->oris == nop) && + (toc_patch->ori2 == nop)) { + break; + } + } + + if (rp) { + rp->hook = toc_ptr; + rp->hook_patch = *toc_patch; + } + + ucm_bistro_fill_base_patch(&patch, 2, toc); + return ucm_bistro_apply_patch(toc_ptr, &patch, sizeof(patch)); +} + +static void *ucm_bistro_get_text_addr(void *addr) +{ +#if !defined (_CALL_ELF) || (_CALL_ELF != 2) + return addr ? *(void**)addr : 0; +#else + return addr; +#endif +} + +ucs_status_t ucm_bistro_patch_toc(const char *symbol, void *hook, + ucm_bistro_restore_point_t **rp, + uint64_t toc) +{ + ucs_status_t status; + void *func; + ucm_bistro_restore_point_t restore; + ucm_bistro_patch_t patch; + + UCM_LOOKUP_SYMBOL(func, symbol); + + restore.entry = func; + + func = ucm_bistro_get_text_addr(func); + hook = ucm_bistro_get_text_addr(hook); + + status = ucm_bistro_patch_hook(hook, &restore, toc); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + +#if defined(_CALL_ELF) && (_CALL_ELF == 2) + func += 8; + hook += 8; +#endif + + ucm_bistro_fill_patch(&patch, R11, (uintptr_t)hook); + + restore.func = func; + restore.func_patch = *(ucm_bistro_patch_t*)func; + + status = ucm_bistro_apply_patch(func, &patch, sizeof(patch)); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + if (rp) { + *rp = malloc(sizeof(restore)); + if (!(*rp)) { + return UCS_ERR_NO_MEMORY; + } + **rp = restore; + } + + return UCS_OK; +} + +ucs_status_t ucm_bistro_restore(ucm_bistro_restore_point_t *rp) +{ + ucs_status_t status; + + ucs_assert(rp != NULL); + + status = ucm_bistro_apply_patch(rp->func, &rp->func_patch, sizeof(rp->func_patch)); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + status = ucm_bistro_apply_patch(rp->hook, &rp->hook_patch, sizeof(rp->hook_patch)); + if (!UCS_STATUS_IS_ERR(status)) { + ucm_bistro_remove_restore_point(rp); + } + + return status; +} + +void *ucm_bistro_restore_addr(ucm_bistro_restore_point_t *rp) +{ + ucs_assert(rp != NULL); + return rp->entry; +} + +#endif diff --git a/src/ucm/bistro/bistro_ppc64.h b/src/ucm/bistro/bistro_ppc64.h new file mode 100644 index 0000000..7b5c3b4 --- /dev/null +++ b/src/ucm/bistro/bistro_ppc64.h @@ -0,0 +1,51 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCM_BISTRO_BISTRO_PPC64_H_ +#define UCM_BISTRO_BISTRO_PPC64_H_ + +#include + +#include + +/* special processing for ppc64 to save and restore TOC (r2) + * Reference: "64-bit PowerPC ELF Application Binary Interface Supplement 1.9" */ +#define UCM_BISTRO_PROLOGUE \ + uint64_t toc_save; \ + asm volatile ("std 2, %0" : "=m" (toc_save)); \ + asm volatile ("nop; nop; nop; nop; nop"); +#define UCM_BISTRO_EPILOGUE \ + asm volatile ("ld 2, %0" : : "m" (toc_save)); + + +/** + * Set library function call hook using Binary Instrumentation + * method (BISTRO): replace function body by user defined call + * + * @param symbol function name to replace + * @param hook user-defined function-replacer + * @param rp restore point used to restore original function, + * optional, may be NULL + * + * @return Error code as defined by @ref ucs_status_t + */ +/* we have to use inline proxy call to save TOC register + * value - PPC is very sensible to this register value */ +ucs_status_t ucm_bistro_patch_toc(const char *symbol, void *hook, + ucm_bistro_restore_point_t **rp, + uint64_t toc); + +static inline +ucs_status_t ucm_bistro_patch(const char *symbol, void *hook, + ucm_bistro_restore_point_t **rp) +{ + uint64_t toc; + asm volatile ("std 2, %0" : "=m" (toc)); + return ucm_bistro_patch_toc(symbol, hook, rp, toc); +} + +#endif diff --git a/src/ucm/bistro/bistro_x86_64.c b/src/ucm/bistro/bistro_x86_64.c new file mode 100644 index 0000000..b2e57b0 --- /dev/null +++ b/src/ucm/bistro/bistro_x86_64.c @@ -0,0 +1,51 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* ******************************************************* + * x86 processors family * + * ***************************************************** */ +#if defined(__x86_64__) + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static const ucm_bistro_patch_t patch_tmpl = { + .mov_r11 = {0x49, 0xbb}, + .jmp_r11 = {0x41, 0xff, 0xe3} +}; + +ucs_status_t ucm_bistro_patch(const char *symbol, void *hook, + ucm_bistro_restore_point_t **rp) +{ + ucm_bistro_patch_t patch = patch_tmpl; + ucs_status_t status; + void *func; + + UCM_LOOKUP_SYMBOL(func, symbol); + + patch.ptr = hook; + + status = ucm_bistro_create_restore_point(func, rp); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + return ucm_bistro_apply_patch(func, &patch, sizeof(patch)); +} +#endif diff --git a/src/ucm/bistro/bistro_x86_64.h b/src/ucm/bistro/bistro_x86_64.h new file mode 100644 index 0000000..bf8d5e9 --- /dev/null +++ b/src/ucm/bistro/bistro_x86_64.h @@ -0,0 +1,39 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCM_BISTRO_BISTRO_X86_64_H_ +#define UCM_BISTRO_BISTRO_X86_64_H_ + +#include + +#include +#include + +#define UCM_BISTRO_PROLOGUE +#define UCM_BISTRO_EPILOGUE + +typedef struct ucm_bistro_patch { + uint8_t mov_r11[2]; /* mov %r11, addr */ + void *ptr; + uint8_t jmp_r11[3]; /* jmp r11 */ +} UCS_S_PACKED ucm_bistro_patch_t; + +/** + * Set library function call hook using Binary Instrumentation + * method (BISTRO): replace function body by user defined call + * + * @param symbol function name to replace + * @param hook user-defined function-replacer + * @param rp restore point used to restore original function, + * optional, may be NULL + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucm_bistro_patch(const char *symbol, void *hook, + ucm_bistro_restore_point_t **rp); + +#endif diff --git a/src/ucm/configure.m4 b/src/ucm/configure.m4 new file mode 100644 index 0000000..2a752b6 --- /dev/null +++ b/src/ucm/configure.m4 @@ -0,0 +1,15 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +AC_SUBST([UCM_MODULE_LDFLAGS], + ["-Xlinker -z -Xlinker interpose -Xlinker --no-as-needed"]) + +ucm_modules="" +m4_include([src/ucm/cuda/configure.m4]) +m4_include([src/ucm/rocm/configure.m4]) +AC_DEFINE_UNQUOTED([ucm_MODULES], ["${ucm_modules}"], [UCM loadable modules]) + +AC_CONFIG_FILES([src/ucm/Makefile]) diff --git a/src/ucm/cuda/Makefile.am b/src/ucm/cuda/Makefile.am new file mode 100644 index 0000000..438960e --- /dev/null +++ b/src/ucm/cuda/Makefile.am @@ -0,0 +1,25 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +if HAVE_CUDA + +module_LTLIBRARIES = libucm_cuda.la +libucm_cuda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS) +libucm_cuda_la_CFLAGS = $(BASE_CFLAGS) $(CUDA_CFLAGS) +libucm_cuda_la_LIBADD = ../libucm.la +libucm_cuda_la_LDFLAGS = $(UCM_MODULE_LDFLAGS) \ + $(patsubst %, -Xlinker %, $(CUDA_LDFLAGS)) \ + -version-info $(SOVERSION) + +noinst_HEADERS = \ + cudamem.h + +libucm_cuda_la_SOURCES = \ + cudamem.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/ucm/cuda/Makefile.in b/src/ucm/cuda/Makefile.in new file mode 100644 index 0000000..45c641e --- /dev/null +++ b/src/ucm/cuda/Makefile.in @@ -0,0 +1,857 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/ucm/cuda +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_CUDA_TRUE@libucm_cuda_la_DEPENDENCIES = ../libucm.la +am__libucm_cuda_la_SOURCES_DIST = cudamem.c +@HAVE_CUDA_TRUE@am_libucm_cuda_la_OBJECTS = libucm_cuda_la-cudamem.lo +libucm_cuda_la_OBJECTS = $(am_libucm_cuda_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libucm_cuda_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libucm_cuda_la_CFLAGS) $(CFLAGS) $(libucm_cuda_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_CUDA_TRUE@am_libucm_cuda_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libucm_cuda_la-cudamem.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libucm_cuda_la_SOURCES) +DIST_SOURCES = $(am__libucm_cuda_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = cudamem.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_CUDA_TRUE@module_LTLIBRARIES = libucm_cuda.la +@HAVE_CUDA_TRUE@libucm_cuda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS) +@HAVE_CUDA_TRUE@libucm_cuda_la_CFLAGS = $(BASE_CFLAGS) $(CUDA_CFLAGS) +@HAVE_CUDA_TRUE@libucm_cuda_la_LIBADD = ../libucm.la +@HAVE_CUDA_TRUE@libucm_cuda_la_LDFLAGS = $(UCM_MODULE_LDFLAGS) \ +@HAVE_CUDA_TRUE@ $(patsubst %, -Xlinker %, $(CUDA_LDFLAGS)) \ +@HAVE_CUDA_TRUE@ -version-info $(SOVERSION) + +@HAVE_CUDA_TRUE@noinst_HEADERS = \ +@HAVE_CUDA_TRUE@ cudamem.h + +@HAVE_CUDA_TRUE@libucm_cuda_la_SOURCES = \ +@HAVE_CUDA_TRUE@ cudamem.c + + +# Automake silent rules +@HAVE_CUDA_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_CUDA_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_CUDA_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_CUDA_TRUE@AM_V_LN_1 = true +@HAVE_CUDA_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/ucm/cuda/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/ucm/cuda/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libucm_cuda.la: $(libucm_cuda_la_OBJECTS) $(libucm_cuda_la_DEPENDENCIES) $(EXTRA_libucm_cuda_la_DEPENDENCIES) + $(AM_V_CCLD)$(libucm_cuda_la_LINK) $(am_libucm_cuda_la_rpath) $(libucm_cuda_la_OBJECTS) $(libucm_cuda_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libucm_cuda_la-cudamem.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libucm_cuda_la-cudamem.lo: cudamem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libucm_cuda_la_CFLAGS) $(CFLAGS) -MT libucm_cuda_la-cudamem.lo -MD -MP -MF $(DEPDIR)/libucm_cuda_la-cudamem.Tpo -c -o libucm_cuda_la-cudamem.lo `test -f 'cudamem.c' || echo '$(srcdir)/'`cudamem.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libucm_cuda_la-cudamem.Tpo $(DEPDIR)/libucm_cuda_la-cudamem.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cudamem.c' object='libucm_cuda_la-cudamem.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libucm_cuda_la_CFLAGS) $(CFLAGS) -c -o libucm_cuda_la-cudamem.lo `test -f 'cudamem.c' || echo '$(srcdir)/'`cudamem.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_CUDA_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libucm_cuda_la-cudamem.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libucm_cuda_la-cudamem.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_CUDA_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_CUDA_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_CUDA_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_CUDA_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CUDA_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_CUDA_TRUE@ done +@HAVE_CUDA_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CUDA_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_CUDA_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/ucm/cuda/configure.m4 b/src/ucm/cuda/configure.m4 new file mode 100644 index 0000000..626ea54 --- /dev/null +++ b/src/ucm/cuda/configure.m4 @@ -0,0 +1,9 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +UCX_CHECK_CUDA +AS_IF([test "x$cuda_happy" = "xyes"], [ucm_modules="${ucm_modules}:cuda"]) +AC_CONFIG_FILES([src/ucm/cuda/Makefile]) diff --git a/src/ucm/cuda/cudamem.c b/src/ucm/cuda/cudamem.c new file mode 100644 index 0000000..6b3460c --- /dev/null +++ b/src/ucm/cuda/cudamem.c @@ -0,0 +1,466 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemFree, CUresult, -1, CUdeviceptr) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemFreeHost, CUresult, -1, void *) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemAlloc, CUresult, -1, CUdeviceptr *, size_t) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemAllocManaged, CUresult, -1, CUdeviceptr *, + size_t, unsigned int) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemAllocPitch, CUresult, -1, CUdeviceptr *, size_t *, + size_t, size_t, unsigned int) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemHostGetDevicePointer, CUresult, -1, CUdeviceptr *, + void *, unsigned int) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cuMemHostUnregister, CUresult, -1, void *) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaFree, cudaError_t, -1, void*) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaFreeHost, cudaError_t, -1, void*) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaMalloc, cudaError_t, -1, void**, size_t) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaMallocManaged, cudaError_t, -1, void**, size_t, unsigned int) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaMallocPitch, cudaError_t, -1, void**, size_t *, + size_t, size_t) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaHostGetDevicePointer, cudaError_t, -1, void**, + void *, unsigned int) +UCM_DEFINE_REPLACE_DLSYM_FUNC(cudaHostUnregister, cudaError_t, -1, void*) + +#if ENABLE_SYMBOL_OVERRIDE +UCM_OVERRIDE_FUNC(cuMemFree, CUresult) +UCM_OVERRIDE_FUNC(cuMemFreeHost, CUresult) +UCM_OVERRIDE_FUNC(cuMemAlloc, CUresult) +UCM_OVERRIDE_FUNC(cuMemAllocManaged, CUresult) +UCM_OVERRIDE_FUNC(cuMemAllocPitch, CUresult) +UCM_OVERRIDE_FUNC(cuMemHostGetDevicePointer, CUresult) +UCM_OVERRIDE_FUNC(cuMemHostUnregister, CUresult) +UCM_OVERRIDE_FUNC(cudaFree, cudaError_t) +UCM_OVERRIDE_FUNC(cudaFreeHost, cudaError_t) +UCM_OVERRIDE_FUNC(cudaMalloc, cudaError_t) +UCM_OVERRIDE_FUNC(cudaMallocManaged, cudaError_t) +UCM_OVERRIDE_FUNC(cudaMallocPitch, cudaError_t) +UCM_OVERRIDE_FUNC(cudaHostGetDevicePointer, cudaError_t) +UCM_OVERRIDE_FUNC(cudaHostUnregister, cudaError_t) +#endif + + +static void ucm_cuda_set_ptr_attr(CUdeviceptr dptr) +{ + if ((void*)dptr == NULL) { + ucm_trace("skipping cuPointerSetAttribute for null pointer"); + return; + } + + unsigned int value = 1; + CUresult ret; + const char *cu_err_str; + + ret = cuPointerSetAttribute(&value, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dptr); + if (ret != CUDA_SUCCESS) { + cuGetErrorString(ret, &cu_err_str); + ucm_warn("cuPointerSetAttribute(%p) failed: %s", (void *) dptr, cu_err_str); + } +} + +static UCS_F_ALWAYS_INLINE void +ucm_dispatch_mem_type_alloc(void *addr, size_t length, ucs_memory_type_t mem_type) +{ + ucm_event_t event; + + event.mem_type.address = addr; + event.mem_type.size = length; + event.mem_type.mem_type = mem_type; + ucm_event_dispatch(UCM_EVENT_MEM_TYPE_ALLOC, &event); +} + +static UCS_F_ALWAYS_INLINE void +ucm_dispatch_mem_type_free(void *addr, size_t length, ucs_memory_type_t mem_type) +{ + ucm_event_t event; + + event.mem_type.address = addr; + event.mem_type.size = length; + event.mem_type.mem_type = mem_type; + ucm_event_dispatch(UCM_EVENT_MEM_TYPE_FREE, &event); +} + +static void ucm_cudafree_dispatch_events(void *dptr) +{ + CUresult ret; + CUdeviceptr pbase; + size_t psize; + + if (dptr == NULL) { + return; + } + + ret = cuMemGetAddressRange(&pbase, &psize, (CUdeviceptr) dptr); + if (ret == CUDA_SUCCESS) { + ucs_assert(dptr == (void *)pbase); + } else { + ucm_debug("cuMemGetAddressRange(devPtr=%p) failed", (void *)dptr); + psize = 1; /* set minimum length */ + } + + ucm_dispatch_mem_type_free((void *)dptr, psize, UCS_MEMORY_TYPE_CUDA); +} + +CUresult ucm_cuMemFree(CUdeviceptr dptr) +{ + CUresult ret; + + ucm_event_enter(); + + ucm_trace("ucm_cuMemFree(dptr=%p)",(void *)dptr); + + ucm_cudafree_dispatch_events((void *)dptr); + + ret = ucm_orig_cuMemFree(dptr); + + ucm_event_leave(); + return ret; +} + +CUresult ucm_cuMemFreeHost(void *p) +{ + CUresult ret; + + ucm_event_enter(); + + ucm_trace("ucm_cuMemFreeHost(ptr=%p)", p); + + ucm_dispatch_vm_munmap(p, 0); + + ret = ucm_orig_cuMemFreeHost(p); + + ucm_event_leave(); + return ret; +} + +CUresult ucm_cuMemAlloc(CUdeviceptr *dptr, size_t size) +{ + CUresult ret; + + ucm_event_enter(); + + ret = ucm_orig_cuMemAlloc(dptr, size); + if (ret == CUDA_SUCCESS) { + ucm_trace("ucm_cuMemAlloc(dptr=%p size:%lu)",(void *)*dptr, size); + ucm_dispatch_mem_type_alloc((void *)*dptr, size, UCS_MEMORY_TYPE_CUDA); + ucm_cuda_set_ptr_attr(*dptr); + } + + ucm_event_leave(); + return ret; +} + +CUresult ucm_cuMemAllocManaged(CUdeviceptr *dptr, size_t size, unsigned int flags) +{ + CUresult ret; + + ucm_event_enter(); + + ret = ucm_orig_cuMemAllocManaged(dptr, size, flags); + if (ret == CUDA_SUCCESS) { + ucm_trace("ucm_cuMemAllocManaged(dptr=%p size:%lu, flags:%d)", + (void *)*dptr, size, flags); + ucm_dispatch_mem_type_alloc((void *)*dptr, size, + UCS_MEMORY_TYPE_CUDA_MANAGED); + } + + ucm_event_leave(); + return ret; +} + +CUresult ucm_cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, + size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes) +{ + CUresult ret; + + ucm_event_enter(); + + ret = ucm_orig_cuMemAllocPitch(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); + if (ret == CUDA_SUCCESS) { + ucm_trace("ucm_cuMemAllocPitch(dptr=%p size:%lu)",(void *)*dptr, + (WidthInBytes * Height)); + ucm_dispatch_mem_type_alloc((void *)*dptr, WidthInBytes * Height, + UCS_MEMORY_TYPE_CUDA); + ucm_cuda_set_ptr_attr(*dptr); + } + + ucm_event_leave(); + return ret; +} + +CUresult ucm_cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags) +{ + CUresult ret; + + ucm_event_enter(); + + ret = ucm_orig_cuMemHostGetDevicePointer(pdptr, p, Flags); + if (ret == CUDA_SUCCESS) { + ucm_trace("ucm_cuMemHostGetDevicePointer(pdptr=%p p=%p)",(void *)*pdptr, p); + } + + ucm_event_leave(); + return ret; +} + +CUresult ucm_cuMemHostUnregister(void *p) +{ + CUresult ret; + + ucm_event_enter(); + + ucm_trace("ucm_cuMemHostUnregister(ptr=%p)", p); + + ret = ucm_orig_cuMemHostUnregister(p); + + ucm_event_leave(); + return ret; +} + +cudaError_t ucm_cudaFree(void *devPtr) +{ + cudaError_t ret; + + ucm_event_enter(); + + ucm_trace("ucm_cudaFree(devPtr=%p)", devPtr); + + ucm_cudafree_dispatch_events((void *)devPtr); + + ret = ucm_orig_cudaFree(devPtr); + + ucm_event_leave(); + + return ret; +} + +cudaError_t ucm_cudaFreeHost(void *ptr) +{ + cudaError_t ret; + + ucm_event_enter(); + + ucm_trace("ucm_cudaFreeHost(ptr=%p)", ptr); + + ucm_dispatch_vm_munmap(ptr, 0); + + ret = ucm_orig_cudaFreeHost(ptr); + + ucm_event_leave(); + return ret; +} + +cudaError_t ucm_cudaMalloc(void **devPtr, size_t size) +{ + cudaError_t ret; + + ucm_event_enter(); + + ret = ucm_orig_cudaMalloc(devPtr, size); + if (ret == cudaSuccess) { + ucm_trace("ucm_cudaMalloc(devPtr=%p size:%lu)", *devPtr, size); + ucm_dispatch_mem_type_alloc(*devPtr, size, UCS_MEMORY_TYPE_CUDA); + ucm_cuda_set_ptr_attr((CUdeviceptr) *devPtr); + } + + ucm_event_leave(); + + return ret; +} + +cudaError_t ucm_cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) +{ + cudaError_t ret; + + ucm_event_enter(); + + ret = ucm_orig_cudaMallocManaged(devPtr, size, flags); + if (ret == cudaSuccess) { + ucm_trace("ucm_cudaMallocManaged(devPtr=%p size:%lu flags:%d)", + *devPtr, size, flags); + ucm_dispatch_mem_type_alloc(*devPtr, size, UCS_MEMORY_TYPE_CUDA_MANAGED); + } + + ucm_event_leave(); + + return ret; +} + +cudaError_t ucm_cudaMallocPitch(void **devPtr, size_t *pitch, + size_t width, size_t height) +{ + cudaError_t ret; + + ucm_event_enter(); + + ret = ucm_orig_cudaMallocPitch(devPtr, pitch, width, height); + if (ret == cudaSuccess) { + ucm_trace("ucm_cudaMallocPitch(devPtr=%p size:%lu)",*devPtr, (width * height)); + ucm_dispatch_mem_type_alloc(*devPtr, (width * height), UCS_MEMORY_TYPE_CUDA); + ucm_cuda_set_ptr_attr((CUdeviceptr) *devPtr); + } + + ucm_event_leave(); + return ret; +} + +cudaError_t ucm_cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) +{ + cudaError_t ret; + + ucm_event_enter(); + + ret = ucm_orig_cudaHostGetDevicePointer(pDevice, pHost, flags); + if (ret == cudaSuccess) { + ucm_trace("ucm_cuMemHostGetDevicePointer(pDevice=%p pHost=%p)", pDevice, pHost); + } + + ucm_event_leave(); + return ret; +} + +cudaError_t ucm_cudaHostUnregister(void *ptr) +{ + cudaError_t ret; + + ucm_event_enter(); + + ucm_trace("ucm_cudaHostUnregister(ptr=%p)", ptr); + + ret = ucm_orig_cudaHostUnregister(ptr); + + ucm_event_leave(); + return ret; +} + +static ucm_reloc_patch_t patches[] = { + {UCS_PP_MAKE_STRING(cuMemFree), ucm_override_cuMemFree}, + {UCS_PP_MAKE_STRING(cuMemFreeHost), ucm_override_cuMemFreeHost}, + {UCS_PP_MAKE_STRING(cuMemAlloc), ucm_override_cuMemAlloc}, + {UCS_PP_MAKE_STRING(cuMemAllocManaged), ucm_override_cuMemAllocManaged}, + {UCS_PP_MAKE_STRING(cuMemAllocPitch), ucm_override_cuMemAllocPitch}, + {UCS_PP_MAKE_STRING(cuMemHostGetDevicePointer), ucm_override_cuMemHostGetDevicePointer}, + {UCS_PP_MAKE_STRING(cuMemHostUnregister), ucm_override_cuMemHostUnregister}, + {UCS_PP_MAKE_STRING(cudaFree), ucm_override_cudaFree}, + {UCS_PP_MAKE_STRING(cudaFreeHost), ucm_override_cudaFreeHost}, + {UCS_PP_MAKE_STRING(cudaMalloc), ucm_override_cudaMalloc}, + {UCS_PP_MAKE_STRING(cudaMallocManaged), ucm_override_cudaMallocManaged}, + {UCS_PP_MAKE_STRING(cudaMallocPitch), ucm_override_cudaMallocPitch}, + {UCS_PP_MAKE_STRING(cudaHostGetDevicePointer), ucm_override_cudaHostGetDevicePointer}, + {UCS_PP_MAKE_STRING(cudaHostUnregister), ucm_override_cudaHostUnregister}, + {NULL, NULL} +}; + +static ucs_status_t ucm_cudamem_install(int events) +{ + static int ucm_cudamem_installed = 0; + static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER; + ucm_reloc_patch_t *patch; + ucs_status_t status = UCS_OK; + + if (!(events & (UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE))) { + goto out; + } + + if (!ucm_global_opts.enable_cuda_reloc) { + ucm_debug("installing cudamem relocations is disabled by configuration"); + status = UCS_ERR_UNSUPPORTED; + goto out; + } + + pthread_mutex_lock(&install_mutex); + + if (ucm_cudamem_installed) { + goto out_unlock; + } + + for (patch = patches; patch->symbol != NULL; ++patch) { + status = ucm_reloc_modify(patch); + if (status != UCS_OK) { + ucm_warn("failed to install relocation table entry for '%s'", patch->symbol); + goto out_unlock; + } + } + + ucm_debug("cudaFree hooks are ready"); + ucm_cudamem_installed = 1; + +out_unlock: + pthread_mutex_unlock(&install_mutex); +out: + return status; +} + +static int ucm_cudamem_scan_regions_cb(void *arg, void *addr, size_t length, + int prot, const char *path) +{ + static const char *cuda_path_pattern = "/dev/nvidia"; + ucm_event_handler_t *handler = arg; + ucm_event_t event; + + /* we are interested in blocks which don't have any access permissions, or + * mapped to nvidia device. + */ + if ((prot & (PROT_READ|PROT_WRITE|PROT_EXEC)) && + strncmp(path, cuda_path_pattern, strlen(cuda_path_pattern))) { + return 0; + } + + ucm_debug("dispatching initial memtype allocation for %p..%p %s", + addr, UCS_PTR_BYTE_OFFSET(addr, length), path); + + event.mem_type.address = addr; + event.mem_type.size = length; + event.mem_type.mem_type = UCS_MEMORY_TYPE_LAST; /* unknown memory type */ + + ucm_event_enter(); + handler->cb(UCM_EVENT_MEM_TYPE_ALLOC, &event, handler->arg); + ucm_event_leave(); + + return 0; +} + +static void ucm_cudamem_get_existing_alloc(ucm_event_handler_t *handler) +{ + if (handler->events & UCM_EVENT_MEM_TYPE_ALLOC) { + ucm_parse_proc_self_maps(ucm_cudamem_scan_regions_cb, handler); + } +} + +static ucm_event_installer_t ucm_cuda_initializer = { + .install = ucm_cudamem_install, + .get_existing_alloc = ucm_cudamem_get_existing_alloc +}; + +UCS_STATIC_INIT { + ucs_list_add_tail(&ucm_event_installer_list, &ucm_cuda_initializer.list); +} + +UCS_STATIC_CLEANUP { + ucs_list_del(&ucm_cuda_initializer.list); +} diff --git a/src/ucm/cuda/cudamem.h b/src/ucm/cuda/cudamem.h new file mode 100644 index 0000000..0326823 --- /dev/null +++ b/src/ucm/cuda/cudamem.h @@ -0,0 +1,101 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCM_CUDAMEM_H_ +#define UCM_CUDAMEM_H_ + +#include +#include +#include + + +/*cuMemFree */ +CUresult ucm_override_cuMemFree(CUdeviceptr dptr); +CUresult ucm_orig_cuMemFree(CUdeviceptr dptr); +CUresult ucm_cuMemFree(CUdeviceptr dptr); + +/*cuMemFreeHost */ +CUresult ucm_override_cuMemFreeHost(void *p); +CUresult ucm_orig_cuMemFreeHost(void *p); +CUresult ucm_cuMemFreeHost(void *p); + +/*cuMemAlloc*/ +CUresult ucm_override_cuMemAlloc(CUdeviceptr *dptr, size_t size); +CUresult ucm_orig_cuMemAlloc(CUdeviceptr *dptr, size_t size); +CUresult ucm_cuMemAlloc(CUdeviceptr *dptr, size_t size); + +/*cuMemAllocManaged*/ +CUresult ucm_override_cuMemAllocManaged(CUdeviceptr *dptr, size_t size, + unsigned int flags); +CUresult ucm_orig_cuMemAllocManaged(CUdeviceptr *dptr, size_t size, unsigned int flags); +CUresult ucm_cuMemAllocManaged(CUdeviceptr *dptr, size_t size, unsigned int flags); + +/*cuMemAllocPitch*/ +CUresult ucm_override_cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, + size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes); +CUresult ucm_orig_cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, + size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes); +CUresult ucm_cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, + size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes); + +/*cuMemHostGetDevicePointer*/ +CUresult ucm_override_cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, + unsigned int Flags); +CUresult ucm_orig_cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, + unsigned int Flags); +CUresult ucm_cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags); + +/*cuMemHostUnregister */ +CUresult ucm_override_cuMemHostUnregister(void *p); +CUresult ucm_orig_cuMemHostUnregister(void *p); +CUresult ucm_cuMemHostUnregister(void *p); + +/*cudaFree*/ +cudaError_t ucm_override_cudaFree(void *devPtr); +cudaError_t ucm_orig_cudaFree(void *devPtr); +cudaError_t ucm_cudaFree(void *devPtr); + +/*cudaFreeHost*/ +cudaError_t ucm_override_cudaFreeHost(void *ptr); +cudaError_t ucm_orig_cudaFreeHost(void *ptr); +cudaError_t ucm_cudaFreeHost(void *ptr); + +/*cudaMalloc*/ +cudaError_t ucm_override_cudaMalloc(void **devPtr, size_t size); +cudaError_t ucm_orig_cudaMalloc(void **devPtr, size_t size); +cudaError_t ucm_cudaMalloc(void **devPtr, size_t size); + +/*cudaMallocManaged*/ +cudaError_t ucm_override_cudaMallocManaged(void **devPtr, size_t size, + unsigned int flags); +cudaError_t ucm_orig_cudaMallocManaged(void **devPtr, size_t size, unsigned int flags); +cudaError_t ucm_cudaMallocManaged(void **devPtr, size_t size, unsigned int flags); + +/*cudaMallocPitch*/ +cudaError_t ucm_override_cudaMallocPitch(void **devPtr, size_t *pitch, + size_t width, size_t height); +cudaError_t ucm_orig_cudaMallocPitch(void **devPtr, size_t *pitch, + size_t width, size_t height); +cudaError_t ucm_cudaMallocPitch(void **devPtr, size_t *pitch, + size_t width, size_t height); + +/*cudaHostGetDevicePointer*/ +cudaError_t ucm_override_cudaHostGetDevicePointer(void **pDevice, void *pHost, + unsigned int flags); +cudaError_t ucm_orig_cudaHostGetDevicePointer(void **pDevice, void *pHost, + unsigned int flags); +cudaError_t ucm_cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags); + + +/*cudaHostUnregister*/ +cudaError_t ucm_override_cudaHostUnregister(void *ptr); +cudaError_t ucm_orig_cudaHostUnregister(void *ptr); +cudaError_t ucm_cudaHostUnregister(void *ptr); + +#endif diff --git a/src/ucm/event/event.c b/src/ucm/event/event.c new file mode 100644 index 0000000..33e2367 --- /dev/null +++ b/src/ucm/event/event.c @@ -0,0 +1,646 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "event.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +UCS_LIST_HEAD(ucm_event_installer_list); + +static ucs_spinlock_t ucm_kh_lock; +#define ucm_ptr_hash(_ptr) kh_int64_hash_func((uintptr_t)(_ptr)) +KHASH_INIT(ucm_ptr_size, const void*, size_t, 1, ucm_ptr_hash, kh_int64_hash_equal) + +static pthread_rwlock_t ucm_event_lock = PTHREAD_RWLOCK_INITIALIZER; +static ucs_list_link_t ucm_event_handlers; +static int ucm_external_events = 0; +static khash_t(ucm_ptr_size) ucm_shmat_ptrs; + +static size_t ucm_shm_size(int shmid) +{ + struct shmid_ds ds; + int ret; + + ret = shmctl(shmid, IPC_STAT, &ds); + if (ret < 0) { + return 0; + } + + return ds.shm_segsz; +} + +static void ucm_event_call_orig(ucm_event_type_t event_type, ucm_event_t *event, + void *arg) +{ + switch (event_type) { + case UCM_EVENT_MMAP: + if (event->mmap.result == MAP_FAILED) { + event->mmap.result = ucm_orig_mmap(event->mmap.address, + event->mmap.size, + event->mmap.prot, + event->mmap.flags, + event->mmap.fd, + event->mmap.offset); + } + break; + case UCM_EVENT_MUNMAP: + if (event->munmap.result == -1) { + event->munmap.result = ucm_orig_munmap(event->munmap.address, + event->munmap.size); + } + break; + case UCM_EVENT_MREMAP: + if (event->mremap.result == MAP_FAILED) { + event->mremap.result = ucm_orig_mremap(event->mremap.address, + event->mremap.old_size, + event->mremap.new_size, + event->mremap.flags); + } + break; + case UCM_EVENT_SHMAT: + if (event->shmat.result == MAP_FAILED) { + event->shmat.result = ucm_orig_shmat(event->shmat.shmid, + event->shmat.shmaddr, + event->shmat.shmflg); + } + break; + case UCM_EVENT_SHMDT: + if (event->shmdt.result == -1) { + event->shmdt.result = ucm_orig_shmdt(event->shmdt.shmaddr); + } + break; + case UCM_EVENT_SBRK: + if (event->sbrk.result == MAP_FAILED) { + event->sbrk.result = ucm_orig_sbrk(event->sbrk.increment); + } + break; + case UCM_EVENT_MADVISE: + if (event->madvise.result == -1) { + event->madvise.result = ucm_orig_madvise(event->madvise.addr, + event->madvise.length, + event->madvise.advice); + } + break; + default: + ucm_warn("Got unknown event %d", event_type); + break; + } +} + +/* + * Add a handler which calls the original implementation, and declare the callback + * list so that initially it will be the single element on that list. + */ +static ucm_event_handler_t ucm_event_orig_handler = { + .list = UCS_LIST_INITIALIZER(&ucm_event_handlers, &ucm_event_handlers), + .events = UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | UCM_EVENT_MREMAP | + UCM_EVENT_SHMAT | UCM_EVENT_SHMDT | UCM_EVENT_SBRK | + UCM_EVENT_MADVISE, /* All events */ + .priority = 0, /* Between negative and positive handlers */ + .cb = ucm_event_call_orig +}; +static ucs_list_link_t ucm_event_handlers = + UCS_LIST_INITIALIZER(&ucm_event_orig_handler.list, + &ucm_event_orig_handler.list); + + +void ucm_event_dispatch(ucm_event_type_t event_type, ucm_event_t *event) +{ + ucm_event_handler_t *handler; + + ucs_list_for_each(handler, &ucm_event_handlers, list) { + if (handler->events & event_type) { + handler->cb(event_type, event, handler->arg); + } + } +} + +#define ucm_event_lock(_lock_func) \ + { \ + int ret; \ + do { \ + ret = _lock_func(&ucm_event_lock); \ + } while (ret == EAGAIN); \ + if (ret != 0) { \ + ucm_fatal("%s() failed: %s", #_lock_func, strerror(ret)); \ + } \ + } + +void ucm_event_enter() +{ + ucm_event_lock(pthread_rwlock_rdlock); +} + +void ucm_event_enter_exclusive() +{ + ucm_event_lock(pthread_rwlock_wrlock); +} + +void ucm_event_leave() +{ + pthread_rwlock_unlock(&ucm_event_lock); +} + +void *ucm_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + ucm_event_t event; + + ucm_trace("ucm_mmap(addr=%p length=%lu prot=0x%x flags=0x%x fd=%d offset=%ld)", + addr, length, prot, flags, fd, offset); + + ucm_event_enter(); + + if ((flags & MAP_FIXED) && (addr != NULL)) { + ucm_dispatch_vm_munmap(addr, length); + } + + event.mmap.result = MAP_FAILED; + event.mmap.address = addr; + event.mmap.size = length; + event.mmap.prot = prot; + event.mmap.flags = flags; + event.mmap.fd = fd; + event.mmap.offset = offset; + ucm_event_dispatch(UCM_EVENT_MMAP, &event); + + if (event.mmap.result != MAP_FAILED) { + /* Use original length */ + ucm_dispatch_vm_mmap(event.mmap.result, length); + } + + ucm_event_leave(); + + return event.mmap.result; +} + +int ucm_munmap(void *addr, size_t length) +{ + ucm_event_t event; + + ucm_event_enter(); + + ucm_trace("ucm_munmap(addr=%p length=%lu)", addr, length); + + ucm_dispatch_vm_munmap(addr, length); + + event.munmap.result = -1; + event.munmap.address = addr; + event.munmap.size = length; + ucm_event_dispatch(UCM_EVENT_MUNMAP, &event); + + ucm_event_leave(); + + return event.munmap.result; +} + +void ucm_vm_mmap(void *addr, size_t length) +{ + ucm_event_enter(); + + ucm_trace("ucm_vm_mmap(addr=%p length=%lu)", addr, length); + ucm_dispatch_vm_mmap(addr, length); + + ucm_event_leave(); +} + +void ucm_vm_munmap(void *addr, size_t length) +{ + ucm_event_enter(); + + ucm_trace("ucm_vm_munmap(addr=%p length=%lu)", addr, length); + ucm_dispatch_vm_munmap(addr, length); + + ucm_event_leave(); +} + +void *ucm_mremap(void *old_address, size_t old_size, size_t new_size, int flags) +{ + ucm_event_t event; + + ucm_event_enter(); + + ucm_trace("ucm_mremap(old_address=%p old_size=%lu new_size=%ld flags=0x%x)", + old_address, old_size, new_size, flags); + + ucm_dispatch_vm_munmap(old_address, old_size); + + event.mremap.result = MAP_FAILED; + event.mremap.address = old_address; + event.mremap.old_size = old_size; + event.mremap.new_size = new_size; + event.mremap.flags = flags; + ucm_event_dispatch(UCM_EVENT_MREMAP, &event); + + if (event.mremap.result != MAP_FAILED) { + /* Use original new_size */ + ucm_dispatch_vm_mmap(event.mremap.result, new_size); + } + + ucm_event_leave(); + + return event.mremap.result; +} + +void *ucm_shmat(int shmid, const void *shmaddr, int shmflg) +{ + uintptr_t attach_addr; + ucm_event_t event; + khiter_t iter; + size_t size; + int result; + + ucm_event_enter(); + + ucm_trace("ucm_shmat(shmid=%d shmaddr=%p shmflg=0x%x)", + shmid, shmaddr, shmflg); + + size = ucm_shm_size(shmid); + + if ((shmflg & SHM_REMAP) && (shmaddr != NULL)) { + attach_addr = (uintptr_t)shmaddr; + if (shmflg & SHM_RND) { + attach_addr -= attach_addr % SHMLBA; + } + ucm_dispatch_vm_munmap((void*)attach_addr, size); + } + + event.shmat.result = MAP_FAILED; + event.shmat.shmid = shmid; + event.shmat.shmaddr = shmaddr; + event.shmat.shmflg = shmflg; + ucm_event_dispatch(UCM_EVENT_SHMAT, &event); + + ucs_spin_lock(&ucm_kh_lock); + if (event.shmat.result != MAP_FAILED) { + iter = kh_put(ucm_ptr_size, &ucm_shmat_ptrs, event.mmap.result, &result); + if (result != -1) { + kh_value(&ucm_shmat_ptrs, iter) = size; + } + ucs_spin_unlock(&ucm_kh_lock); + ucm_dispatch_vm_mmap(event.shmat.result, size); + } else { + ucs_spin_unlock(&ucm_kh_lock); + } + + ucm_event_leave(); + + return event.shmat.result; +} + +int ucm_shmdt(const void *shmaddr) +{ + ucm_event_t event; + khiter_t iter; + size_t size; + + ucm_event_enter(); + + ucm_debug("ucm_shmdt(shmaddr=%p)", shmaddr); + + ucs_spin_lock(&ucm_kh_lock); + iter = kh_get(ucm_ptr_size, &ucm_shmat_ptrs, shmaddr); + if (iter != kh_end(&ucm_shmat_ptrs)) { + size = kh_value(&ucm_shmat_ptrs, iter); + kh_del(ucm_ptr_size, &ucm_shmat_ptrs, iter); + } else { + size = ucm_get_shm_seg_size(shmaddr); + } + ucs_spin_unlock(&ucm_kh_lock); + + ucm_dispatch_vm_munmap((void*)shmaddr, size); + + event.shmdt.result = -1; + event.shmdt.shmaddr = shmaddr; + ucm_event_dispatch(UCM_EVENT_SHMDT, &event); + + ucm_event_leave(); + + return event.shmdt.result; +} + +void *ucm_sbrk(intptr_t increment) +{ + ucm_event_t event; + + ucm_event_enter(); + + ucm_trace("ucm_sbrk(increment=%+ld)", increment); + + if (increment < 0) { + ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), increment), + -increment); + } + + event.sbrk.result = MAP_FAILED; + event.sbrk.increment = increment; + ucm_event_dispatch(UCM_EVENT_SBRK, &event); + + if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) { + ucm_dispatch_vm_mmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), -increment), + increment); + } + + ucm_event_leave(); + + return event.sbrk.result; +} + +int ucm_brk(void *addr) +{ +#if UCM_BISTRO_HOOKS + void *old_addr; + intptr_t increment; + ucm_event_t event; + + old_addr = ucm_brk_syscall(0); + /* in case if addr == NULL - it just returns current pointer */ + increment = addr ? ((intptr_t)addr - (intptr_t)old_addr) : 0; + + ucm_event_enter(); + + ucm_trace("ucm_brk(addr=%p)", addr); + + if (increment < 0) { + ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(old_addr, increment), + -increment); + } + + event.sbrk.result = (void*)-1; + event.sbrk.increment = increment; + ucm_event_dispatch(UCM_EVENT_SBRK, &event); + + if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) { + ucm_dispatch_vm_mmap(old_addr, increment); + } + + ucm_event_leave(); + + return event.sbrk.result == MAP_FAILED ? -1 : 0; +#else + return -1; +#endif +} + +int ucm_madvise(void *addr, size_t length, int advice) +{ + ucm_event_t event; + + ucm_event_enter(); + + ucm_trace("ucm_madvise(addr=%p length=%zu advice=%d)", addr, length, advice); + + /* madvise(MADV_DONTNEED) and madvise(MADV_FREE) are releasing pages */ + if ((advice == MADV_DONTNEED) +#if HAVE_DECL_MADV_REMOVE + || (advice == MADV_REMOVE) +#endif +#if HAVE_DECL_POSIX_MADV_DONTNEED + || (advice == POSIX_MADV_DONTNEED) +#endif +#if HAVE_DECL_MADV_FREE + || (advice == MADV_FREE) +#endif + ) { + ucm_dispatch_vm_munmap(addr, length); + } + + event.madvise.result = -1; + event.madvise.addr = addr; + event.madvise.length = length; + event.madvise.advice = advice; + ucm_event_dispatch(UCM_EVENT_MADVISE, &event); + + ucm_event_leave(); + + return event.madvise.result; +} + +void ucm_event_handler_add(ucm_event_handler_t *handler) +{ + ucm_event_handler_t *elem; + + ucm_event_enter_exclusive(); + ucs_list_for_each(elem, &ucm_event_handlers, list) { + if (handler->priority < elem->priority) { + ucs_list_insert_before(&elem->list, &handler->list); + ucm_event_leave(); + return; + } + } + + ucs_list_add_tail(&ucm_event_handlers, &handler->list); + ucm_event_leave(); +} + +void ucm_event_handler_remove(ucm_event_handler_t *handler) +{ + ucm_event_enter_exclusive(); + ucs_list_del(&handler->list); + ucm_event_leave(); +} + +static int ucm_events_to_native_events(int events) +{ + int native_events; + + native_events = events & ~(UCM_EVENT_VM_MAPPED | UCM_EVENT_VM_UNMAPPED | + UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE); + if (events & UCM_EVENT_VM_MAPPED) { + native_events |= UCM_NATIVE_EVENT_VM_MAPPED; + } + if (events & UCM_EVENT_VM_UNMAPPED) { + native_events |= UCM_NATIVE_EVENT_VM_UNMAPPED; + } + + return native_events; +} + +static ucs_status_t ucm_event_install(int events) +{ + static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER; + UCS_MODULE_FRAMEWORK_DECLARE(ucm); + ucm_event_installer_t *event_installer; + int native_events, malloc_events; + ucs_status_t status; + + UCS_INIT_ONCE(&init_once) { + ucm_prevent_dl_unload(); + } + + /* Replace aggregate events with the native events which make them */ + native_events = ucm_events_to_native_events(events); + + /* TODO lock */ + status = ucm_mmap_install(native_events); + if (status != UCS_OK) { + ucm_debug("failed to install mmap events"); + goto out_unlock; + } + + ucm_debug("mmap hooks are ready"); + + malloc_events = events & ~(UCM_EVENT_MEM_TYPE_ALLOC | + UCM_EVENT_MEM_TYPE_FREE); + status = ucm_malloc_install(malloc_events); + if (status != UCS_OK) { + ucm_debug("failed to install malloc events"); + goto out_unlock; + } + + ucm_debug("malloc hooks are ready"); + + /* Call extra event installers */ + UCS_MODULE_FRAMEWORK_LOAD(ucm, UCS_MODULE_LOAD_FLAG_NODELETE); + ucs_list_for_each(event_installer, &ucm_event_installer_list, list) { + status = event_installer->install(events); + if (status != UCS_OK) { + goto out_unlock; + } + } + + status = UCS_OK; + +out_unlock: + return status; + +} + +ucs_status_t ucm_set_event_handler(int events, int priority, + ucm_event_callback_t cb, void *arg) +{ + ucm_event_installer_t *event_installer; + ucm_event_handler_t *handler; + ucs_status_t status; + int flags; + + if (events & ~(UCM_EVENT_MMAP|UCM_EVENT_MUNMAP|UCM_EVENT_MREMAP| + UCM_EVENT_SHMAT|UCM_EVENT_SHMDT| + UCM_EVENT_SBRK| + UCM_EVENT_MADVISE| + UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED| + UCM_EVENT_MEM_TYPE_ALLOC|UCM_EVENT_MEM_TYPE_FREE| + UCM_EVENT_FLAG_NO_INSTALL| + UCM_EVENT_FLAG_EXISTING_ALLOC)) { + return UCS_ERR_INVALID_PARAM; + } + + if (!ucm_global_opts.enable_events) { + return UCS_ERR_UNSUPPORTED; + } + + /* separate event flags from real events */ + flags = events & (UCM_EVENT_FLAG_NO_INSTALL | + UCM_EVENT_FLAG_EXISTING_ALLOC); + events &= ~flags; + + if (!(flags & UCM_EVENT_FLAG_NO_INSTALL) && (events & ~ucm_external_events)) { + status = ucm_event_install(events & ~ucm_external_events); + if (status != UCS_OK) { + return status; + } + } + + handler = malloc(sizeof(*handler)); + if (handler == NULL) { + return UCS_ERR_NO_MEMORY; + } + + handler->events = events; + handler->priority = priority; + handler->cb = cb; + handler->arg = arg; + + ucm_event_handler_add(handler); + + if (flags & UCM_EVENT_FLAG_EXISTING_ALLOC) { + ucs_list_for_each(event_installer, &ucm_event_installer_list, list) { + event_installer->get_existing_alloc(handler); + } + } + + ucm_debug("added user handler (func=%p arg=%p) for events=0x%x prio=%d", cb, + arg, events, priority); + return UCS_OK; +} + +void ucm_set_external_event(int events) +{ + ucm_event_enter_exclusive(); + ucm_external_events |= events; + ucm_event_leave(); +} + +void ucm_unset_external_event(int events) +{ + ucm_event_enter_exclusive(); + ucm_external_events &= ~events; + ucm_event_leave(); +} + +void ucm_unset_event_handler(int events, ucm_event_callback_t cb, void *arg) +{ + ucm_event_handler_t *elem, *tmp; + UCS_LIST_HEAD(gc_list); + + ucm_event_enter_exclusive(); + ucs_list_for_each_safe(elem, tmp, &ucm_event_handlers, list) { + if ((cb == elem->cb) && (arg == elem->arg)) { + elem->events &= ~events; + if (elem->events == 0) { + ucs_list_del(&elem->list); + ucs_list_add_tail(&gc_list, &elem->list); + } + } + } + ucm_event_leave(); + + /* Do not release memory while we hold event lock - may deadlock */ + ucs_list_for_each_safe(elem, tmp, &gc_list, list) { + free(elem); + } +} + +ucs_status_t ucm_test_events(int events) +{ + return ucm_mmap_test_installed_events(ucm_events_to_native_events(events)); +} + +UCS_STATIC_INIT { + ucs_spinlock_init(&ucm_kh_lock); + kh_init_inplace(ucm_ptr_size, &ucm_shmat_ptrs); +} + +UCS_STATIC_CLEANUP { + ucs_status_t status; + + kh_destroy_inplace(ucm_ptr_size, &ucm_shmat_ptrs); + + status = ucs_spinlock_destroy(&ucm_kh_lock); + if (status != UCS_OK) { + ucm_warn("ucs_spinlock_destroy() failed (%d)", status); + } +} diff --git a/src/ucm/event/event.h b/src/ucm/event/event.h new file mode 100644 index 0000000..763ac3b --- /dev/null +++ b/src/ucm/event/event.h @@ -0,0 +1,79 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCM_EVENT_H_ +#define UCM_EVENT_H_ + +#include +#include +#include +#include + +#define UCM_NATIVE_EVENT_VM_MAPPED (UCM_EVENT_MMAP | UCM_EVENT_MREMAP | \ + UCM_EVENT_SHMAT | UCM_EVENT_SBRK) + +#define UCM_NATIVE_EVENT_VM_UNMAPPED (UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | \ + UCM_EVENT_MREMAP | UCM_EVENT_SHMDT | \ + UCM_EVENT_SHMAT | UCM_EVENT_SBRK | \ + UCM_EVENT_MADVISE) + + +typedef struct ucm_event_handler { + ucs_list_link_t list; + int events; + int priority; + ucm_event_callback_t cb; + void *arg; +} ucm_event_handler_t; + + +typedef struct ucm_event_installer { + ucs_status_t (*install)(int events); + void (*get_existing_alloc)(ucm_event_handler_t *handler); + ucs_list_link_t list; +} ucm_event_installer_t; + +extern ucs_list_link_t ucm_event_installer_list; + +ucs_status_t ucm_set_mmap_hooks(); + +void ucm_event_handler_add(ucm_event_handler_t *handler); + +void ucm_event_handler_remove(ucm_event_handler_t *handler); + +void ucm_event_dispatch(ucm_event_type_t event_type, ucm_event_t *event); + +void ucm_event_enter(); + +void ucm_event_enter_exclusive(); + +void ucm_event_leave(); + +static UCS_F_ALWAYS_INLINE void +ucm_dispatch_vm_mmap(void *addr, size_t length) +{ + ucm_event_t event; + + ucm_trace("vm_map addr=%p length=%zu", addr, length); + + event.vm_mapped.address = addr; + event.vm_mapped.size = length; + ucm_event_dispatch(UCM_EVENT_VM_MAPPED, &event); +} + +static UCS_F_ALWAYS_INLINE void +ucm_dispatch_vm_munmap(void *addr, size_t length) +{ + ucm_event_t event; + + ucm_trace("vm_unmap addr=%p length=%zu", addr, length); + + event.vm_unmapped.address = addr; + event.vm_unmapped.size = length; + ucm_event_dispatch(UCM_EVENT_VM_UNMAPPED, &event); +} + +#endif diff --git a/src/ucm/malloc/allocator.h b/src/ucm/malloc/allocator.h new file mode 100644 index 0000000..9d8510f --- /dev/null +++ b/src/ucm/malloc/allocator.h @@ -0,0 +1,16 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCM_ALLOCATOR_H_ +#define UCM_ALLOCATOR_H_ + +#if HAVE_UCM_PTMALLOC286 +#include +#else +#error "No memory allocator is defined" +#endif + +#endif /* UCM_ALLOCATOR_H_ */ diff --git a/src/ucm/malloc/malloc_hook.c b/src/ucm/malloc/malloc_hook.c new file mode 100644 index 0000000..d971440 --- /dev/null +++ b/src/ucm/malloc/malloc_hook.c @@ -0,0 +1,911 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "malloc_hook.h" + +#ifdef HAVE_MALLOC_H +#include +#endif +#ifdef HAVE_MALLOC_NP_H +#include +#endif +#undef M_TRIM_THRESHOLD +#undef M_MMAP_THRESHOLD +#include "allocator.h" /* have to be included after malloc.h */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* make khash allocate memory directly from operating system */ +#define kmalloc ucm_sys_malloc +#define kcalloc ucm_sys_calloc +#define kfree ucm_sys_free +#define krealloc ucm_sys_realloc +#include + +#include +#include + + +/* Flags for install_state */ +#define UCM_MALLOC_INSTALLED_HOOKS UCS_BIT(0) /* Installed malloc hooks */ +#define UCM_MALLOC_INSTALLED_SBRK_EVH UCS_BIT(1) /* Installed sbrk event handler */ +#define UCM_MALLOC_INSTALLED_OPT_SYMS UCS_BIT(2) /* Installed optional symbols */ +#define UCM_MALLOC_INSTALLED_MALL_SYMS UCS_BIT(3) /* Installed malloc symbols */ + + +/* Mangled symbols of C++ allocators */ +#define UCM_OPERATOR_NEW_SYMBOL "_Znwm" +#define UCM_OPERATOR_DELETE_SYMBOL "_ZdlPv" +#define UCM_OPERATOR_VEC_NEW_SYMBOL "_Znam" +#define UCM_OPERATOR_VEC_DELETE_SYMBOL "_ZdaPv" + +/* Maximal size for mmap threshold - 32mb */ +#define UCM_DEFAULT_MMAP_THRESHOLD_MAX (4ul * 1024 * 1024 * sizeof(long)) + +/* Take out 12 LSB's, since they are the page-offset on most systems */ +#define ucm_mmap_addr_hash(_addr) \ + (khint32_t)((_addr >> 12) ^ (_addr & UCS_MASK(12))) + +#define ucm_mmap_ptr_hash(_p) ucm_mmap_addr_hash((uintptr_t)(_p)) +#define ucm_mmap_ptr_equal(_p1, _p2) ((_p1) == (_p2)) + +KHASH_INIT(mmap_ptrs, void*, char, 0, ucm_mmap_ptr_hash, ucm_mmap_ptr_equal) + + +/* Pointer to memory release function */ +typedef void (*ucm_release_func_t)(void *ptr); + +/* Pointer to get usable size function */ +typedef size_t (*ucm_usable_size_func_t)(void *ptr); + + +typedef struct ucm_malloc_hook_state { + /* + * State of hook installment + */ + pthread_mutex_t install_mutex; /* Protect hooks installation */ + int install_state; /* State of hook installation */ + int installed_events; /* Which events are working */ + int mmap_thresh_set; /* mmap threshold set by user */ + int trim_thresh_set; /* trim threshold set by user */ + int hook_called; /* Our malloc hook was called */ + size_t max_freed_size; /* Maximal size released so far */ + + ucm_usable_size_func_t usable_size; /* function pointer to get usable size */ + + ucm_release_func_t free; /* function pointer to release memory */ + + /* + * Track record of which pointers are ours + */ + ucs_spinlock_t lock; /* Protect heap counters. + Note: Cannot modify events when this lock + is held - may deadlock */ + /* Our heap address range. Used to identify whether a released pointer is ours, + * or was allocated by the previous heap manager. */ + void *heap_start; + void *heap_end; + + /* Save the pointers that we have allocated with mmap, so when they are + * released we would know they are ours, despite the fact they are not in the + * heap address range. */ + khash_t(mmap_ptrs) ptrs; + + /** + * Save the environment strings we've allocated + */ + pthread_mutex_t env_lock; + char **env_strs; + unsigned num_env_strs; +} ucm_malloc_hook_state_t; + + +static ucm_malloc_hook_state_t ucm_malloc_hook_state = { + .install_mutex = PTHREAD_MUTEX_INITIALIZER, + .install_state = 0, + .installed_events = 0, + .mmap_thresh_set = 0, + .trim_thresh_set = 0, + .hook_called = 0, + .max_freed_size = 0, + .usable_size = NULL, + .free = NULL, + .heap_start = (void*)-1, + .heap_end = (void*)-1, + .ptrs = {0}, + .env_lock = PTHREAD_MUTEX_INITIALIZER, + .env_strs = NULL, + .num_env_strs = 0 +}; + +int ucm_dlmallopt_get(int); /* implemented in ptmalloc */ + +static void ucm_malloc_mmaped_ptr_add(void *ptr) +{ + int hash_extra_status; + khiter_t hash_it; + + ucs_spin_lock(&ucm_malloc_hook_state.lock); + + hash_it = kh_put(mmap_ptrs, &ucm_malloc_hook_state.ptrs, ptr, + &hash_extra_status); + ucs_assert_always(hash_extra_status >= 0); + ucs_assert_always(hash_it != kh_end(&ucm_malloc_hook_state.ptrs)); + + ucs_spin_unlock(&ucm_malloc_hook_state.lock); +} + +static int ucm_malloc_mmaped_ptr_remove_if_exists(void *ptr) +{ + khiter_t hash_it; + int found; + + ucs_spin_lock(&ucm_malloc_hook_state.lock); + + hash_it = kh_get(mmap_ptrs, &ucm_malloc_hook_state.ptrs, ptr); + if (hash_it == kh_end(&ucm_malloc_hook_state.ptrs)) { + found = 0; + } else { + found = 1; + kh_del(mmap_ptrs, &ucm_malloc_hook_state.ptrs, hash_it); + } + + ucs_spin_unlock(&ucm_malloc_hook_state.lock); + return found; +} + +static int ucm_malloc_is_address_in_heap(void *ptr) +{ + int in_heap; + + ucs_spin_lock(&ucm_malloc_hook_state.lock); + in_heap = (ptr >= ucm_malloc_hook_state.heap_start) && + (ptr < ucm_malloc_hook_state.heap_end); + ucs_spin_unlock(&ucm_malloc_hook_state.lock); + return in_heap; +} + +static int ucm_malloc_address_remove_if_managed(void *ptr, const char *debug_name) +{ + int is_managed; + + if (ucm_malloc_is_address_in_heap(ptr)) { + is_managed = 1; + } else { + is_managed = ucm_malloc_mmaped_ptr_remove_if_exists(ptr); + } + + ucm_trace("%s(ptr=%p) - %s (heap [%p..%p])", debug_name, ptr, + is_managed ? "ours" : "foreign", + ucm_malloc_hook_state.heap_start, ucm_malloc_hook_state.heap_end); + return is_managed; +} + +static void ucm_malloc_allocated(void *ptr, size_t size, const char *debug_name) +{ + VALGRIND_MALLOCLIKE_BLOCK(ptr, size, 0, 0); + if (ucm_malloc_is_address_in_heap(ptr)) { + ucm_trace("%s(size=%zu)=%p, in heap [%p..%p]", debug_name, size, ptr, + ucm_malloc_hook_state.heap_start, ucm_malloc_hook_state.heap_end); + } else { + ucm_trace("%s(size=%zu)=%p, mmap'ed", debug_name, size, ptr); + ucm_malloc_mmaped_ptr_add(ptr); + } +} + +static void ucm_release_foreign_block(void *ptr, ucm_release_func_t orig_free, + const char *debug_name) +{ + if (RUNNING_ON_VALGRIND) { + /* We want to keep valgrind happy and release foreign memory as well. + * Otherwise, it's safer to do nothing. + */ + if (orig_free == NULL) { + ucm_fatal("%s(): foreign block release function is NULL", debug_name); + } + + ucm_trace("%s: release foreign block %p", debug_name, ptr); + orig_free(ptr); + } +} + +static void *ucm_malloc_impl(size_t size, const char *debug_name) +{ + void *ptr; + + ucm_malloc_hook_state.hook_called = 1; + if (ucm_global_opts.alloc_alignment > 1) { + ptr = ucm_dlmemalign(ucm_global_opts.alloc_alignment, size); + } else { + ptr = ucm_dlmalloc(size); + } + ucm_malloc_allocated(ptr, size, debug_name); + return ptr; +} + +static void ucm_malloc_adjust_thresholds(size_t size) +{ + int mmap_thresh; + + if (size > ucm_malloc_hook_state.max_freed_size) { + /* Valgrind limits the size of brk() segments to 8mb, so must use mmap + * for large allocations. + */ + if (!RUNNING_ON_VALGRIND && + ucm_global_opts.enable_dynamic_mmap_thresh && + !ucm_malloc_hook_state.trim_thresh_set && + !ucm_malloc_hook_state.mmap_thresh_set) { + /* new mmap threshold is increased to the size of released block, + * new trim threshold is twice that size. + */ + mmap_thresh = ucs_min(ucs_max(ucm_dlmallopt_get(M_MMAP_THRESHOLD), size), + UCM_DEFAULT_MMAP_THRESHOLD_MAX); + ucm_trace("adjust mmap threshold to %d", mmap_thresh); + ucm_dlmallopt(M_MMAP_THRESHOLD, mmap_thresh); + ucm_dlmallopt(M_TRIM_THRESHOLD, mmap_thresh * 2); + } + + /* avoid adjusting the threshold for every released block, do it only + * if the size is larger than ever before. + */ + ucm_malloc_hook_state.max_freed_size = size; + } +} + +static inline void ucm_mem_free(void *ptr, size_t size) +{ + VALGRIND_FREELIKE_BLOCK(ptr, 0); + VALGRIND_MAKE_MEM_UNDEFINED(ptr, size); /* Make memory accessible to ptmalloc3 */ + ucm_malloc_adjust_thresholds(size); + ucm_dlfree(ptr); +} + +static void ucm_free_impl(void *ptr, ucm_release_func_t orig_free, + const char *debug_name) +{ + ucm_malloc_hook_state.hook_called = 1; + + if (ptr == NULL) { + /* Ignore */ + } else if (ucm_malloc_address_remove_if_managed(ptr, debug_name)) { + ucm_mem_free(ptr, ucm_dlmalloc_usable_size(ptr)); + } else { + ucm_release_foreign_block(ptr, orig_free, debug_name); + } +} + +static void *ucm_memalign_impl(size_t alignment, size_t size, const char *debug_name) +{ + void *ptr; + + ucm_malloc_hook_state.hook_called = 1; + ptr = ucm_dlmemalign(ucs_max(alignment, ucm_global_opts.alloc_alignment), size); + ucm_malloc_allocated(ptr, size, debug_name); + return ptr; +} + +static void *ucm_malloc(size_t size, const void *caller) +{ + return ucm_malloc_impl(size, "malloc"); +} + +static size_t ucm_malloc_usable_size_common(void *mem, int foreign) +{ + return foreign ? ucm_malloc_hook_state.usable_size(mem) : + dlmalloc_usable_size(mem); +} + +static void *ucm_realloc(void *oldptr, size_t size, const void *caller) +{ + void *newptr; + size_t oldsz; + int foreign; + + ucm_malloc_hook_state.hook_called = 1; + if (oldptr != NULL) { + foreign = !ucm_malloc_address_remove_if_managed(oldptr, "realloc"); + if (RUNNING_ON_VALGRIND || foreign) { + /* If pointer was created by original malloc(), allocate the new pointer + * with the new heap, and copy out the data. Then, release the old pointer. + * We do the same if we are running with valgrind, so we could use client + * requests properly. + */ + newptr = ucm_dlmalloc(size); + ucm_malloc_allocated(newptr, size, "realloc"); + + oldsz = ucm_malloc_usable_size_common(oldptr, foreign); + memcpy(newptr, oldptr, ucs_min(size, oldsz)); + + if (foreign) { + ucm_release_foreign_block(oldptr, ucm_malloc_hook_state.free, "realloc"); + } else{ + ucm_mem_free(oldptr, oldsz); + } + return newptr; + } + } + + newptr = ucm_dlrealloc(oldptr, size); + ucm_malloc_allocated(newptr, size, "realloc"); + return newptr; +} + +static void ucm_free(void *ptr, const void *caller) +{ + ucm_free_impl(ptr, ucm_malloc_hook_state.free, "free"); +} + +static void *ucm_memalign(size_t alignment, size_t size, const void *caller) +{ + return ucm_memalign_impl(alignment, size, "memalign"); +} + +static void* ucm_calloc(size_t nmemb, size_t size) +{ + void *ptr = ucm_malloc_impl(nmemb * size, "calloc"); + if (ptr != NULL) { + memset(ptr, 0, nmemb * size); + } + return ptr; +} + +static void* ucm_valloc(size_t size) +{ + return ucm_malloc_impl(size, "valloc"); +} + +static int ucm_posix_memalign(void **memptr, size_t alignment, size_t size) +{ + void *ptr; + + if (!ucs_is_pow2(alignment)) { + return EINVAL; + } + + ptr = ucm_memalign_impl(alignment, size, "posix_memalign"); + if (ptr == NULL) { + return ENOMEM; + } + + *memptr = ptr; + return 0; +} + +static void* ucm_operator_new(size_t size) +{ + return ucm_malloc_impl(size, "operator new"); +} + +static void ucm_operator_delete(void* ptr) +{ + static ucm_release_func_t orig_delete = NULL; + if (orig_delete == NULL) { + orig_delete = + (ucm_release_func_t)ucm_reloc_get_orig(UCM_OPERATOR_DELETE_SYMBOL, + ucm_operator_delete); + } + ucm_free_impl(ptr, orig_delete, "operator delete"); +} + +static void* ucm_operator_vec_new(size_t size) +{ + return ucm_malloc_impl(size, "operator new[]"); +} + +static void ucm_operator_vec_delete(void* ptr) +{ + static ucm_release_func_t orig_vec_delete = NULL; + if (orig_vec_delete == NULL) { + orig_vec_delete = + (ucm_release_func_t)ucm_reloc_get_orig(UCM_OPERATOR_VEC_DELETE_SYMBOL, + ucm_operator_vec_delete); + } + ucm_free_impl(ptr, orig_vec_delete, "operator delete[]"); +} + +static int ucm_vasprintf(char **strp, const char *fmt, va_list ap) +{ + va_list ap_copy; + size_t length; + char *str; + int ret; + + va_copy(ap_copy, ap); + ret = vsnprintf(NULL, 0, fmt, ap_copy); + va_end(ap_copy); + if (ret < 0) { + return ret; + } + + length = ret + 1; + str = ucm_malloc(length, NULL); + if (str == NULL) { + return -1; + } + + ret = vsnprintf(str, length, fmt, ap); + if (ret < 0) { + ucm_free(str, NULL); + } else { + *strp = str; + } + return ret; +} + +static int ucm_asprintf(char **strp, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = ucm_vasprintf(strp, fmt, ap); + va_end(ap); + return ret; +} + +/* + * We remember the string we pass to putenv() so we would be able to release + * them if they are overwritten (and thus avoid leaks). + */ +static int ucm_add_to_environ(char *env_str) +{ + char *saved_env_str; + unsigned index; + size_t len; + char *p; + + /* Get name length */ + p = strchr(env_str, '='); + if (p == NULL) { + len = strlen(env_str); /* Compare whole string */ + } else { + len = p + 1 - env_str; /* Compare up to and including the '=' character */ + } + + /* Check if we already have variable with same name */ + index = 0; + while (index < ucm_malloc_hook_state.num_env_strs) { + saved_env_str = ucm_malloc_hook_state.env_strs[index]; + if ((strlen(saved_env_str) >= len) && !strncmp(env_str, saved_env_str, len)) { + ucm_trace("replace `%s' with `%s'", saved_env_str, env_str); + ucm_free(saved_env_str, NULL); + goto out_insert; + } + ++index; + } + + /* Not found - enlarge array by one */ + index = ucm_malloc_hook_state.num_env_strs; + ++ucm_malloc_hook_state.num_env_strs; + ucm_malloc_hook_state.env_strs = + ucm_realloc(ucm_malloc_hook_state.env_strs, + sizeof(char*) * ucm_malloc_hook_state.num_env_strs, + NULL); + +out_insert: + ucm_malloc_hook_state.env_strs[index] = env_str; + return 0; +} + +/* + * We need to replace setenv() because glibc keeps a search tree of environment + * strings and releases it with *original* free() (in __tdestroy). + * If we always use putenv() instead of setenv() this search tree will not be used. + */ +static int ucm_setenv(const char *name, const char *value, int overwrite) +{ + char *curr_value; + char *env_str; + int ret; + + pthread_mutex_lock(&ucm_malloc_hook_state.env_lock); + curr_value = getenv(name); + if ((curr_value != NULL) && !overwrite) { + ret = 0; + goto out; + } + + env_str = ucm_malloc(strlen(name) + 1 + strlen(value) + 1, NULL); + if (env_str == NULL) { + errno = ENOMEM; + ret = -1; + goto out; + } + + sprintf(env_str, "%s=%s", name, value); + ret = putenv(env_str); + if (ret != 0) { + goto err_free; + } + + ucm_add_to_environ(env_str); + ret = 0; + goto out; + +err_free: + ucm_free(env_str, NULL); +out: + pthread_mutex_unlock(&ucm_malloc_hook_state.env_lock); + return ret; +} + +static void ucm_malloc_sbrk(ucm_event_type_t event_type, + ucm_event_t *event, void *arg) +{ + ucs_spin_lock(&ucm_malloc_hook_state.lock); + + /* Copy return value from call. We assume the event handler uses a lock. */ + if (ucm_malloc_hook_state.heap_start == (void*)-1) { + ucm_malloc_hook_state.heap_start = event->sbrk.result; /* sbrk() returns the previous break */ + } + ucm_malloc_hook_state.heap_end = ucm_orig_sbrk(0); + + ucm_trace("sbrk(%+ld)=%p - adjusting heap to [%p..%p]", + event->sbrk.increment, event->sbrk.result, + ucm_malloc_hook_state.heap_start, ucm_malloc_hook_state.heap_end); + + ucs_spin_unlock(&ucm_malloc_hook_state.lock); +} + +static int ucs_malloc_is_ready(int events, const char *title) +{ + /* + * In RELOC mode, if malloc hooks are installed - we're good here. + * Otherwise, we have to make sure all events are indeed working - because + * we can't be sure what the existing implementation is doing. + * The implication of this is that in some cases (e.g infinite mmap threshold) + * we will install our memory hooks, even though it may not be required. + */ + ucm_debug("ucs_malloc_is_ready(%s): have 0x%x/0x%x events;" + " mmap_mode=%d hook_called=%d", + title, ucm_malloc_hook_state.installed_events, events, + ucm_mmap_hook_mode(), ucm_malloc_hook_state.hook_called); + + return ((ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) && + ucm_malloc_hook_state.hook_called) || + ucs_test_all_flags(ucm_malloc_hook_state.installed_events, events); +} + +static void ucm_malloc_event_test_callback(ucm_event_type_t event_type, + ucm_event_t *event, void *arg) +{ + int *out_events = arg; + + *out_events |= event_type; +} + +/* Has to be called with install_mutex held */ +static void ucm_malloc_test(int events) +{ + static const size_t small_alloc_count = 128; + static const size_t small_alloc_size = 4096; + static const size_t large_alloc_size = 4 * UCS_MBYTE; + ucm_event_handler_t handler; + void *p[small_alloc_count]; + int out_events; + int i; + + ucm_debug("testing malloc..."); + + /* Install a temporary event handler which will add the supported event + * type to out_events bitmap. + */ + handler.events = events; + handler.priority = -1; + handler.cb = ucm_malloc_event_test_callback; + handler.arg = &out_events; + out_events = 0; + + ucm_event_handler_add(&handler); + + if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) { + /* Trigger both small and large allocations + * TODO check address / stop all threads */ + for (i = 0; i < small_alloc_count; ++i) { + p[i] = malloc(small_alloc_size); + } + for (i = 0; i < small_alloc_count; ++i) { + free(p[i]); + } + + p[0] = malloc(large_alloc_size); + p[0] = realloc(p[0], large_alloc_size * 2); + free(p[0]); + + if (ucm_malloc_hook_state.hook_called) { + ucm_dlmalloc_trim(0); + } + } else { + /* in bistro mode we can't guarantee event fire on malloc calls, + * let's just try to call sbrk directly & catch it */ + ucm_fire_mmap_events(events); + } + + ucm_event_handler_remove(&handler); + + ucm_malloc_hook_state.installed_events |= out_events; + + ucm_debug("malloc test: have 0x%x out of 0x%x, malloc/free hooks were%s called", + ucm_malloc_hook_state.installed_events, events, + ucm_malloc_hook_state.hook_called ? "" : " not"); +} + +static void ucm_malloc_populate_glibc_cache() +{ + char hostname[HOST_NAME_MAX]; + + /* Trigger NSS initialization before we install malloc hooks. + * This is needed because NSS could allocate strings with our malloc(), but + * release them with the original free(). */ + (void)getlogin(); + (void)gethostbyname("localhost"); + (void)gethostname(hostname, sizeof(hostname)); + (void)gethostbyname(hostname); +} + +static void ucm_malloc_install_symbols(ucm_reloc_patch_t *patches) +{ + ucm_reloc_patch_t *patch; + + for (patch = patches; patch->symbol != NULL; ++patch) { + ucm_reloc_modify(patch); + } +} + +static void* ucm_malloc_patchlist_prev_value(const ucm_reloc_patch_t *patches, + const char *symbol) +{ + const ucm_reloc_patch_t *patch; + for (patch = patches; patch->symbol != NULL; ++patch) { + if (!strcmp(patch->symbol, symbol)) { + ucm_debug("previous function pointer for '%s' is %p", symbol, + patch->prev_value); + if (patch->prev_value == NULL) { + goto not_found; + } + + return patch->prev_value; + } + } + +not_found: + ucm_fatal("could not find the previous value of '%s'", symbol); + return NULL; +} + +static int ucm_malloc_mallopt(int param_number, int value) +{ + int success; + + success = ucm_dlmallopt(param_number, value); + if (success) { + switch (param_number) { + case M_TRIM_THRESHOLD: + ucm_malloc_hook_state.trim_thresh_set = 1; + break; + case M_MMAP_THRESHOLD: + ucm_malloc_hook_state.mmap_thresh_set = 1; + break; + } + } + return success; +} + +static size_t ucm_malloc_usable_size(void *mem) +{ + return ucm_malloc_usable_size_common(mem, + !ucm_malloc_is_address_in_heap(mem)); +} + +static char *ucm_malloc_blacklist[] = { + "libnvidia-fatbinaryloader.so", + NULL +}; + +static ucm_reloc_patch_t ucm_malloc_symbol_patches[] = { + { .symbol = "free", .value = ucm_free, .blacklist = ucm_malloc_blacklist }, + { .symbol = "realloc", .value = ucm_realloc, .blacklist = ucm_malloc_blacklist }, + { .symbol = "malloc", .value = ucm_malloc, .blacklist = ucm_malloc_blacklist }, + { .symbol = "memalign", .value = ucm_memalign, .blacklist = ucm_malloc_blacklist }, + { .symbol = "calloc", .value = ucm_calloc, .blacklist = ucm_malloc_blacklist }, + { .symbol = "valloc", .value = ucm_valloc, .blacklist = ucm_malloc_blacklist }, + { .symbol = "posix_memalign", .value = ucm_posix_memalign, .blacklist = ucm_malloc_blacklist }, + { .symbol = "setenv", .value = ucm_setenv, .blacklist = ucm_malloc_blacklist }, + { .symbol = UCM_OPERATOR_NEW_SYMBOL, .value = ucm_operator_new, .blacklist = ucm_malloc_blacklist }, + { .symbol = UCM_OPERATOR_DELETE_SYMBOL, .value = ucm_operator_delete, .blacklist = ucm_malloc_blacklist }, + { .symbol = UCM_OPERATOR_VEC_NEW_SYMBOL, .value = ucm_operator_vec_new, .blacklist = ucm_malloc_blacklist }, + { .symbol = UCM_OPERATOR_VEC_DELETE_SYMBOL, .value = ucm_operator_vec_delete, .blacklist = ucm_malloc_blacklist }, + { .symbol = "asprintf", .value = ucm_asprintf, .blacklist = ucm_malloc_blacklist }, + { .symbol = "__asprintf", .value = ucm_asprintf, .blacklist = ucm_malloc_blacklist }, + { .symbol = "vasprintf", .value = ucm_vasprintf, .blacklist = ucm_malloc_blacklist }, + { .symbol = NULL, .value = NULL } +}; + +static ucm_reloc_patch_t ucm_malloc_optional_symbol_patches[] = { + { "mallopt", ucm_malloc_mallopt }, + { "mallinfo", ucm_dlmallinfo }, + { "malloc_stats", ucm_dlmalloc_stats }, + { "malloc_trim", ucm_dlmalloc_trim }, + { "malloc_usable_size", ucm_malloc_usable_size }, + { NULL, NULL } +}; + +static void ucm_malloc_install_optional_symbols() +{ + if (!(ucm_malloc_hook_state.install_state & UCM_MALLOC_INSTALLED_OPT_SYMS)) { + ucm_malloc_install_symbols(ucm_malloc_optional_symbol_patches); + ucm_malloc_hook_state.usable_size = + (ucm_usable_size_func_t)ucm_malloc_patchlist_prev_value( + ucm_malloc_optional_symbol_patches, + "malloc_usable_size"); + ucm_malloc_hook_state.install_state |= UCM_MALLOC_INSTALLED_OPT_SYMS; + } +} + +static void ucm_malloc_set_env_mallopt() +{ + /* copy values of M_MMAP_THRESHOLD and M_TRIM_THRESHOLD + * if they were overriden by the user + */ + char *p; + + p = getenv("MALLOC_TRIM_THRESHOLD_"); + if (p) { + ucm_debug("set trim_thresh to %d", atoi(p)); + ucm_malloc_mallopt(M_TRIM_THRESHOLD, atoi(p)); + } + + p = getenv("MALLOC_MMAP_THRESHOLD_"); + if (p) { + ucm_debug("set mmap_thresh to %d", atoi(p)); + ucm_malloc_mallopt(M_MMAP_THRESHOLD, atoi(p)); + } +} + +static void ucm_malloc_init_orig_funcs() +{ + /* We cannot use global initializer for these variables; if we do it, + * GCC makes them part of .got, and patching .got actually changes the + * values of these global variables. As a workaround, we initialize + * them here. + * NOTE This also makes sure that libucm.so has a reference to these symbols, + * so patching the relocation tables would find their previous value by libucm + */ + if (ucm_malloc_hook_state.usable_size == NULL) { + ucm_malloc_hook_state.usable_size = (size_t (*)(void *))malloc_usable_size; + } + if ( ucm_malloc_hook_state.free == NULL) { + ucm_malloc_hook_state.free = free; + } +} + +ucs_status_t ucm_malloc_install(int events) +{ + static ucm_event_handler_t sbrk_handler = { + .events = UCM_EVENT_SBRK, + .priority = 1000, + .cb = ucm_malloc_sbrk + }; + ucs_status_t status; + + pthread_mutex_lock(&ucm_malloc_hook_state.install_mutex); + + ucm_malloc_init_orig_funcs(); + + if (ucs_malloc_is_ready(events, "before test")) { + goto out_succ; + } + + ucm_malloc_test(events); + if (ucs_malloc_is_ready(events, "after test")) { + goto out_succ; + } + + if (!ucm_malloc_hook_state.hook_called) { +#ifdef HAVE_MALLOC_TRIM + /* Try to leak less memory from original malloc */ + malloc_trim(0); +#endif + } + + if (!(ucm_malloc_hook_state.install_state & UCM_MALLOC_INSTALLED_SBRK_EVH)) { + ucm_debug("installing malloc-sbrk event handler"); + ucm_event_handler_add(&sbrk_handler); + ucm_malloc_hook_state.install_state |= UCM_MALLOC_INSTALLED_SBRK_EVH; + } + + /* When running on valgrind, don't even try malloc hooks. + * We want to release original blocks to silence the leak check, so we must + * have a way to call the original free(), also these hooks don't work with + * valgrind anyway. + */ +#if HAVE_MALLOC_HOOK + if (ucm_global_opts.enable_malloc_hooks) { + /* Install using malloc hooks. + * TODO detect glibc support in configure-time. + */ + if (!(ucm_malloc_hook_state.install_state & UCM_MALLOC_INSTALLED_HOOKS)) { + ucm_debug("installing malloc hooks"); + __free_hook = ucm_free; + __realloc_hook = ucm_realloc; + __malloc_hook = ucm_malloc; + __memalign_hook = ucm_memalign; + ucm_malloc_hook_state.install_state |= UCM_MALLOC_INSTALLED_HOOKS; + } + + /* Just installed the hooks, test again. */ + ucm_malloc_test(events); + if (ucm_malloc_hook_state.hook_called) { + goto out_install_opt_syms; + } + } else +#endif + { + ucm_debug("using malloc hooks is disabled by configuration"); + } + + /* Install using malloc symbols */ + if (ucm_global_opts.enable_malloc_reloc) { + if (!(ucm_malloc_hook_state.install_state & UCM_MALLOC_INSTALLED_MALL_SYMS)) { + ucm_debug("installing malloc relocations"); + ucm_malloc_populate_glibc_cache(); + ucm_malloc_install_symbols(ucm_malloc_symbol_patches); + ucm_malloc_hook_state.free = + (ucm_release_func_t)ucm_malloc_patchlist_prev_value( + ucm_malloc_symbol_patches, "free"); + ucm_malloc_hook_state.install_state |= UCM_MALLOC_INSTALLED_MALL_SYMS; + } + } else { + ucm_debug("installing malloc relocations is disabled by configuration"); + } + + /* Just installed the symbols, test again */ + ucm_malloc_test(events); + if (ucs_malloc_is_ready(events, "after install")) { + goto out_install_opt_syms; + } + + status = UCS_ERR_UNSUPPORTED; + goto out_unlock; + +out_install_opt_syms: + ucm_malloc_install_optional_symbols(); + ucm_malloc_set_env_mallopt(); +out_succ: + status = UCS_OK; +out_unlock: + pthread_mutex_unlock(&ucm_malloc_hook_state.install_mutex); + return status; +} + +void ucm_malloc_state_reset(int default_mmap_thresh, int default_trim_thresh) +{ + ucm_malloc_hook_state.max_freed_size = 0; + ucm_dlmallopt(M_MMAP_THRESHOLD, default_mmap_thresh); + ucm_dlmallopt(M_TRIM_THRESHOLD, default_trim_thresh); + ucm_malloc_set_env_mallopt(); +} + +UCS_STATIC_INIT { + ucs_spinlock_init(&ucm_malloc_hook_state.lock); + kh_init_inplace(mmap_ptrs, &ucm_malloc_hook_state.ptrs); +} diff --git a/src/ucm/malloc/malloc_hook.h b/src/ucm/malloc/malloc_hook.h new file mode 100644 index 0000000..e35fd0e --- /dev/null +++ b/src/ucm/malloc/malloc_hook.h @@ -0,0 +1,16 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCM_MALLOC_HOOK_H_ +#define UCM_MALLOC_HOOK_H_ + +#include + +ucs_status_t ucm_malloc_install(int events); + +void ucm_malloc_state_reset(int default_mmap_thresh, int default_trim_thresh); + +#endif diff --git a/src/ucm/mmap/install.c b/src/ucm/mmap/install.c new file mode 100644 index 0000000..6824a62 --- /dev/null +++ b/src/ucm/mmap/install.c @@ -0,0 +1,311 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "mmap.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define UCM_IS_HOOK_ENABLED(_entry) \ + ((_entry)->hook_type & UCS_BIT(ucm_mmap_hook_mode())) + +#define UCM_HOOK_STR \ + ((ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) ? "reloc" : "bistro") + +#define UCM_FIRE_EVENT(_event, _mask, _data, _call) \ + do { \ + int exp_events = (_event) & (_mask); \ + (_data)->fired_events = 0; \ + _call; \ + ucm_trace("after %s: got 0x%x/0x%x", UCS_PP_MAKE_STRING(_call), \ + (_data)->fired_events, exp_events); \ + (_data)->out_events &= ~exp_events | (_data)->fired_events; \ + } while(0) + +extern const char *ucm_mmap_hook_modes[]; + +typedef enum ucm_mmap_hook_type { + UCM_HOOK_RELOC = UCS_BIT(UCM_MMAP_HOOK_RELOC), + UCM_HOOK_BISTRO = UCS_BIT(UCM_MMAP_HOOK_BISTRO), + UCM_HOOK_BOTH = UCM_HOOK_RELOC | UCM_HOOK_BISTRO +} ucm_mmap_hook_type_t; + +typedef struct ucm_mmap_func { + ucm_reloc_patch_t patch; + ucm_event_type_t event_type; + ucm_event_type_t deps; + ucm_mmap_hook_type_t hook_type; +} ucm_mmap_func_t; + +typedef struct ucm_mmap_test_events_data { + uint32_t fired_events; + int out_events; +} ucm_mmap_test_events_data_t; + +static ucm_mmap_func_t ucm_mmap_funcs[] = { + { {"mmap", ucm_override_mmap}, UCM_EVENT_MMAP, UCM_EVENT_NONE, UCM_HOOK_BOTH}, + { {"munmap", ucm_override_munmap}, UCM_EVENT_MUNMAP, UCM_EVENT_NONE, UCM_HOOK_BOTH}, +#if HAVE_MREMAP + { {"mremap", ucm_override_mremap}, UCM_EVENT_MREMAP, UCM_EVENT_NONE, UCM_HOOK_BOTH}, +#endif + { {"shmat", ucm_override_shmat}, UCM_EVENT_SHMAT, UCM_EVENT_NONE, UCM_HOOK_BOTH}, + { {"shmdt", ucm_override_shmdt}, UCM_EVENT_SHMDT, UCM_EVENT_SHMAT, UCM_HOOK_BOTH}, + { {"sbrk", ucm_override_sbrk}, UCM_EVENT_SBRK, UCM_EVENT_NONE, UCM_HOOK_RELOC}, +#if UCM_BISTRO_HOOKS + { {"brk", ucm_override_brk}, UCM_EVENT_SBRK, UCM_EVENT_NONE, UCM_HOOK_BISTRO}, +#endif + { {"madvise", ucm_override_madvise}, UCM_EVENT_MADVISE, UCM_EVENT_NONE, UCM_HOOK_BOTH}, + { {NULL, NULL}, UCM_EVENT_NONE} +}; + +static pthread_mutex_t ucm_mmap_install_mutex = PTHREAD_MUTEX_INITIALIZER; +static int ucm_mmap_installed_events = 0; /* events that were reported as installed */ + +static void ucm_mmap_event_test_callback(ucm_event_type_t event_type, + ucm_event_t *event, void *arg) +{ + ucm_mmap_test_events_data_t *data = arg; + + /* This callback may be called from multiple threads, which are just calling + * memory allocations/release, and not testing mmap hooks at the moment. + * So in order to ensure the thread which tests events sees all fired + * events, use atomic OR operation. + */ + ucs_atomic_or32(&data->fired_events, event_type); +} + +/* Fire events with pre/post action. The problem is in call sequence: we + * can't just fire single event - most of the system calls require set of + * calls to eliminate resource leaks or data corruption, such sequence + * produces additional events which may affect to event handling. To + * exclude additional events from processing used pre/post actions where + * set of handled events is cleared and evaluated for every system call */ +static void +ucm_fire_mmap_events_internal(int events, ucm_mmap_test_events_data_t *data) +{ + size_t sbrk_size; + int sbrk_mask; + int shmid; + void *p; + + if (events & (UCM_EVENT_MMAP|UCM_EVENT_MUNMAP|UCM_EVENT_MREMAP| + UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED)) { + UCM_FIRE_EVENT(events, UCM_EVENT_MMAP|UCM_EVENT_VM_MAPPED, + data, p = mmap(NULL, ucm_get_page_size(), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); +#ifdef HAVE_MREMAP + /* generate MAP event */ + UCM_FIRE_EVENT(events, UCM_EVENT_MREMAP|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED, + data, p = mremap(p, ucm_get_page_size(), + ucm_get_page_size() * 2, MREMAP_MAYMOVE)); + /* generate UNMAP event */ + UCM_FIRE_EVENT(events, UCM_EVENT_MREMAP|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED, + data, p = mremap(p, ucm_get_page_size() * 2, ucm_get_page_size(), 0)); +#endif + /* generate UNMAP event */ + UCM_FIRE_EVENT(events, UCM_EVENT_MMAP|UCM_EVENT_VM_MAPPED, + data, p = mmap(p, ucm_get_page_size(), PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + UCM_FIRE_EVENT(events, UCM_EVENT_MUNMAP|UCM_EVENT_VM_UNMAPPED, + data, munmap(p, ucm_get_page_size())); + } + + if (events & (UCM_EVENT_SHMAT|UCM_EVENT_SHMDT|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED)) { + shmid = shmget(IPC_PRIVATE, ucm_get_page_size(), IPC_CREAT | SHM_R | SHM_W); + if (shmid == -1) { + ucm_debug("shmget failed: %m"); + return; + } + + UCM_FIRE_EVENT(events, UCM_EVENT_SHMAT|UCM_EVENT_VM_MAPPED, + data, p = shmat(shmid, NULL, 0)); + UCM_FIRE_EVENT(events, UCM_EVENT_SHMAT|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED, + data, p = shmat(shmid, p, SHM_REMAP)); + shmctl(shmid, IPC_RMID, NULL); + UCM_FIRE_EVENT(events, UCM_EVENT_SHMDT|UCM_EVENT_VM_UNMAPPED, + data, shmdt(p)); + } + + if (events & (UCM_EVENT_SBRK|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED)) { + if (RUNNING_ON_VALGRIND) { + /* on valgrind, doing a non-trivial sbrk() causes heap corruption */ + sbrk_size = 0; + sbrk_mask = UCM_EVENT_SBRK; + } else { + sbrk_size = ucm_get_page_size(); + sbrk_mask = UCM_EVENT_SBRK|UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED; + } + UCM_FIRE_EVENT(events, (UCM_EVENT_SBRK|UCM_EVENT_VM_MAPPED) & sbrk_mask, + data, (void)sbrk(sbrk_size)); + UCM_FIRE_EVENT(events, (UCM_EVENT_SBRK|UCM_EVENT_VM_UNMAPPED) & sbrk_mask, + data, (void)sbrk(-sbrk_size)); + } + + if (events & (UCM_EVENT_MADVISE|UCM_EVENT_VM_UNMAPPED)) { + UCM_FIRE_EVENT(events, UCM_EVENT_MMAP|UCM_EVENT_VM_MAPPED, data, + p = mmap(NULL, ucm_get_page_size(), PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0)); + if (p != MAP_FAILED) { + UCM_FIRE_EVENT(events, UCM_EVENT_MADVISE, data, + madvise(p, ucm_get_page_size(), MADV_DONTNEED)); + UCM_FIRE_EVENT(events, UCM_EVENT_MUNMAP|UCM_EVENT_VM_UNMAPPED, data, + munmap(p, ucm_get_page_size())); + } else { + ucm_debug("mmap failed: %m"); + } + } +} + +void ucm_fire_mmap_events(int events) +{ + ucm_mmap_test_events_data_t data; + + ucm_fire_mmap_events_internal(events, &data); +} + +/* Called with lock held */ +static ucs_status_t ucm_mmap_test_events(int events) +{ + ucm_event_handler_t handler; + ucm_mmap_test_events_data_t data; + + handler.events = events; + handler.priority = -1; + handler.cb = ucm_mmap_event_test_callback; + handler.arg = &data; + data.out_events = events; + + ucm_event_handler_add(&handler); + ucm_fire_mmap_events_internal(events, &data); + ucm_event_handler_remove(&handler); + + ucm_debug("mmap test: got 0x%x out of 0x%x", data.out_events, events); + + /* Return success if we caught all wanted events */ + if (!ucs_test_all_flags(data.out_events, events)) { + return UCS_ERR_UNSUPPORTED; + } + + return UCS_OK; +} + +ucs_status_t ucm_mmap_test_installed_events(int events) +{ + ucs_status_t status; + + /* + * return UCS_OK iff all installed events are actually working + * we don't check the status of events which were not successfully installed + */ + pthread_mutex_lock(&ucm_mmap_install_mutex); + status = ucm_mmap_test_events(events & ucm_mmap_installed_events); + pthread_mutex_unlock(&ucm_mmap_install_mutex); + + return status; +} + +/* Called with lock held */ +static ucs_status_t ucs_mmap_install_reloc(int events) +{ + static int installed_events = 0; + ucm_mmap_func_t *entry; + ucs_status_t status; + + if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_NONE) { + ucm_debug("installing mmap hooks is disabled by configuration"); + return UCS_ERR_UNSUPPORTED; + } + + for (entry = ucm_mmap_funcs; entry->patch.symbol != NULL; ++entry) { + if (!((entry->event_type|entry->deps) & events)) { + /* Not required */ + continue; + } + + if (entry->event_type & installed_events) { + /* Already installed */ + continue; + } + + if (UCM_IS_HOOK_ENABLED(entry)) { + ucm_debug("mmap: installing %s hook for %s = %p for event 0x%x", UCM_HOOK_STR, + entry->patch.symbol, entry->patch.value, entry->event_type); + + if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) { + status = ucm_reloc_modify(&entry->patch); + } else { + ucs_assert(ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO); + status = ucm_bistro_patch(entry->patch.symbol, entry->patch.value, NULL); + } + if (status != UCS_OK) { + ucm_warn("failed to install %s hook for '%s'", + UCM_HOOK_STR, entry->patch.symbol); + return status; + } + + installed_events |= entry->event_type; + } + } + + return UCS_OK; +} + +ucs_status_t ucm_mmap_install(int events) +{ + ucs_status_t status; + + pthread_mutex_lock(&ucm_mmap_install_mutex); + + if (ucs_test_all_flags(ucm_mmap_installed_events, events)) { + /* if we already installed these events, check that they are still + * working, and if not - reinstall them. + */ + status = ucm_mmap_test_events(events); + if (status == UCS_OK) { + goto out_unlock; + } + } + + status = ucs_mmap_install_reloc(events); + if (status != UCS_OK) { + ucm_debug("failed to install relocations for mmap"); + goto out_unlock; + } + + status = ucm_mmap_test_events(events); + if (status != UCS_OK) { + ucm_debug("failed to install mmap events"); + goto out_unlock; + } + + /* status == UCS_OK */ + ucm_mmap_installed_events |= events; + ucm_debug("mmap installed events = 0x%x", ucm_mmap_installed_events); + +out_unlock: + pthread_mutex_unlock(&ucm_mmap_install_mutex); + return status; +} diff --git a/src/ucm/mmap/mmap.h b/src/ucm/mmap/mmap.h new file mode 100644 index 0000000..58252de --- /dev/null +++ b/src/ucm/mmap/mmap.h @@ -0,0 +1,48 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCM_MMAP_H_ +#define UCM_MMAP_H_ + +#include +#include + +#define UCM_MMAP_HOOK_RELOC_STR "reloc" +#define UCM_MMAP_HOOK_BISTRO_STR "bistro" + +#if UCM_BISTRO_HOOKS +# define UCM_DEFAULT_HOOK_MODE UCM_MMAP_HOOK_BISTRO +# define UCM_DEFAULT_HOOK_MODE_STR UCM_MMAP_HOOK_BISTRO_STR +#else +# define UCM_DEFAULT_HOOK_MODE UCM_MMAP_HOOK_RELOC +# define UCM_DEFAULT_HOOK_MODE_STR UCM_MMAP_HOOK_RELOC_STR +#endif + +ucs_status_t ucm_mmap_install(int events); + +void *ucm_override_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); +int ucm_override_munmap(void *addr, size_t length); +void *ucm_override_mremap(void *old_address, size_t old_size, size_t new_size, int flags); +void *ucm_override_shmat(int shmid, const void *shmaddr, int shmflg); +int ucm_override_shmdt(const void *shmaddr); +void *ucm_override_sbrk(intptr_t increment); +void *ucm_sbrk_select(intptr_t increment); +int ucm_override_brk(void *addr); +void *ucm_brk_syscall(void *addr); +int ucm_override_madvise(void *addr, size_t length, int advice); +void ucm_fire_mmap_events(int events); +ucs_status_t ucm_mmap_test_installed_events(int events); + +static UCS_F_ALWAYS_INLINE ucm_mmap_hook_mode_t ucm_mmap_hook_mode(void) +{ + if (RUNNING_ON_VALGRIND && (ucm_global_opts.mmap_hook_mode == UCM_MMAP_HOOK_BISTRO)) { + return UCM_MMAP_HOOK_RELOC; + } + + return ucm_global_opts.mmap_hook_mode; +} + +#endif diff --git a/src/ucm/ptmalloc286/malloc-2.8.6.h b/src/ucm/ptmalloc286/malloc-2.8.6.h new file mode 100644 index 0000000..1b4495a --- /dev/null +++ b/src/ucm/ptmalloc286/malloc-2.8.6.h @@ -0,0 +1,622 @@ +/* + Default header file for malloc-2.8.x, written by Doug Lea + and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ + + This header is for ANSI C/C++ only. You can set any of + the following #defines before including: + + * If UCM_MALLOC_PREFIX is defined, it is assumed that malloc.c + was also compiled with this option, so all routines + have names starting with the value of UCM_MALLOC_PREFIX. + + * If HAVE_USR_INCLUDE_MALLOC_H is defined, it is assumed that this + file will be #included AFTER . This is needed only if + your system defines a struct mallinfo that is incompatible with the + standard one declared here. Otherwise, you can include this file + INSTEAD of your system system . At least on ANSI, all + declarations should be compatible with system versions + + * If MSPACES is defined, declarations for mspace versions are included. +*/ + +#ifndef MALLOC_280_H +#define MALLOC_280_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include /* for size_t */ + +#ifndef ONLY_MSPACES +#define ONLY_MSPACES 0 /* define to a value */ +#elif ONLY_MSPACES != 0 +#define ONLY_MSPACES 1 +#endif /* ONLY_MSPACES */ +#ifndef NO_MALLINFO +#define NO_MALLINFO 0 +#endif /* NO_MALLINFO */ + +#ifndef MSPACES +#if ONLY_MSPACES +#define MSPACES 1 +#else /* ONLY_MSPACES */ +#define MSPACES 0 +#endif /* ONLY_MSPACES */ +#endif /* MSPACES */ + +#include + +#if !ONLY_MSPACES + +#ifdef UCM_MALLOC_PREFIX +#define dlcalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, calloc) +#define dlfree UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, free) +#define dlmalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc) +#define dlmemalign UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, memalign) +#define dlposix_memalign UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, posix_memalign) +#define dlrealloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, realloc) +#define dlvalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, valloc) +#define dlpvalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, pvalloc) +#define dlmallinfo UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, mallinfo) +#define dlmallopt UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, mallopt) +#define dlmalloc_trim UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_trim) +#define dlmalloc_stats UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_stats) +#define dlmalloc_usable_size UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_usable_size) +#define dlmalloc_footprint UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_footprint) +#define dlmalloc_max_footprint UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_max_footprint) +#define dlmalloc_footprint_limit UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_footprint_limit) +#define dlmalloc_set_footprint_limit UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_set_footprint_limit) +#define dlmalloc_inspect_all UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_inspect_all) +#define dlindependent_calloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, independent_calloc) +#define dlindependent_comalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, independent_comalloc) +#define dlbulk_free UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, bulk_free) +#endif /* UCM_MALLOC_PREFIX */ + +#if !NO_MALLINFO +#ifndef HAVE_USR_INCLUDE_MALLOC_H +#ifndef _MALLOC_H +#ifndef MALLINFO_FIELD_TYPE +#define MALLINFO_FIELD_TYPE size_t +#endif /* MALLINFO_FIELD_TYPE */ +#ifndef STRUCT_MALLINFO_DECLARED +#define STRUCT_MALLINFO_DECLARED 1 +struct mallinfo { + MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ + MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ + MALLINFO_FIELD_TYPE smblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ + MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ + MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ + MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ + MALLINFO_FIELD_TYPE fordblks; /* total free space */ + MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ +}; +#endif /* STRUCT_MALLINFO_DECLARED */ +#endif /* _MALLOC_H */ +#endif /* HAVE_USR_INCLUDE_MALLOC_H */ +#endif /* !NO_MALLINFO */ + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or + null if no space is available, in which case errno is set to ENOMEM + on ANSI C systems. + + If n is zero, malloc returns a minimum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 32 bytes on 64bit + systems.) Note that size_t is an unsigned type, so calls with + arguments that would be negative if signed are interpreted as + requests for huge amounts of space, which will often fail. The + maximum supported value of n differs across systems, but is in all + cases less than the maximum representable value of a size_t. +*/ +void* dlmalloc(size_t); + +/* + free(void* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. If p was not malloced or already + freed, free(p) will by default cuase the current program to abort. +*/ +void dlfree(void*); + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +void* dlcalloc(size_t, size_t); + +/* + realloc(void* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p in most cases when possible, otherwise it + employs the equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. realloc with a size + argument of zero (re)allocates a minimum-sized chunk. + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ +void* dlrealloc(void*, size_t); + +/* + realloc_in_place(void* p, size_t n) + Resizes the space allocated for p to size n, only if this can be + done without moving p (i.e., only if there is adjacent space + available if n is greater than p's current allocated size, or n is + less than or equal to p's size). This may be used instead of plain + realloc if an alternative allocation strategy is needed upon failure + to expand space; for example, reallocation of a buffer that must be + memory-aligned or cleared. You can use realloc_in_place to trigger + these alternatives only when needed. + + Returns p if successful; otherwise null. +*/ +void* dlrealloc_in_place(void*, size_t); + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +void* dlmemalign(size_t, size_t); + +/* + int posix_memalign(void** pp, size_t alignment, size_t n); + Allocates a chunk of n bytes, aligned in accord with the alignment + argument. Differs from memalign only in that it (1) assigns the + allocated memory to *pp rather than returning it, (2) fails and + returns EINVAL if the alignment is not a power of two (3) fails and + returns ENOMEM if memory cannot be allocated. +*/ +int dlposix_memalign(void**, size_t, size_t); + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +void* dlvalloc(size_t); + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. None of these are use in this malloc, + so setting them has no effect. But this malloc also supports other + options in mallopt: + + Symbol param # default allowed param values + M_TRIM_THRESHOLD -1 2*1024*1024 any (-1U disables trimming) + M_GRANULARITY -2 page size any power of 2 >= page size + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) +*/ +int dlmallopt(int, int); + +#define M_TRIM_THRESHOLD (-1) +#define M_GRANULARITY (-2) +#define M_MMAP_THRESHOLD (-3) + + +/* + malloc_footprint(); + Returns the number of bytes obtained from the system. The total + number of bytes allocated by malloc, realloc etc., is less than this + value. Unlike mallinfo, this function returns only a precomputed + result, so can be called frequently to monitor memory consumption. + Even if locks are otherwise defined, this function does not use them, + so results might not be up to date. +*/ +size_t dlmalloc_footprint(void); + +/* + malloc_max_footprint(); + Returns the maximum number of bytes obtained from the system. This + value will be greater than current footprint if deallocated space + has been reclaimed by the system. The peak number of bytes allocated + by malloc, realloc etc., is less than this value. Unlike mallinfo, + this function returns only a precomputed result, so can be called + frequently to monitor memory consumption. Even if locks are + otherwise defined, this function does not use them, so results might + not be up to date. +*/ +size_t dlmalloc_max_footprint(void); + +/* + malloc_footprint_limit(); + Returns the number of bytes that the heap is allowed to obtain from + the system, returning the last value returned by + malloc_set_footprint_limit, or the maximum size_t value if + never set. The returned value reflects a permission. There is no + guarantee that this number of bytes can actually be obtained from + the system. +*/ +size_t dlmalloc_footprint_limit(void); + +/* + malloc_set_footprint_limit(); + Sets the maximum number of bytes to obtain from the system, causing + failure returns from malloc and related functions upon attempts to + exceed this value. The argument value may be subject to page + rounding to an enforceable limit; this actual value is returned. + Using an argument of the maximum possible size_t effectively + disables checks. If the argument is less than or equal to the + current malloc_footprint, then all future allocations that require + additional system memory will fail. However, invocation cannot + retroactively deallocate existing used memory. +*/ +size_t dlmalloc_set_footprint_limit(size_t bytes); + +/* + malloc_inspect_all(void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg); + Traverses the heap and calls the given handler for each managed + region, skipping all bytes that are (or may be) used for bookkeeping + purposes. Traversal does not include include chunks that have been + directly memory mapped. Each reported region begins at the start + address, and continues up to but not including the end address. The + first used_bytes of the region contain allocated data. If + used_bytes is zero, the region is unallocated. The handler is + invoked with the given callback argument. If locks are defined, they + are held during the entire traversal. It is a bad idea to invoke + other malloc functions from within the handler. + + For example, to count the number of in-use chunks with size greater + than 1000, you could write: + static int count = 0; + void count_chunks(void* start, void* end, size_t used, void* arg) { + if (used >= 1000) ++count; + } + then: + malloc_inspect_all(count_chunks, NULL); + + malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined. +*/ +void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), + void* arg); + +#if !NO_MALLINFO +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: always zero. + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: always zero + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ + +struct mallinfo dlmallinfo(void); +#endif /* NO_MALLINFO */ + +/* + independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be freed when it is no longer needed. This can be + done all at once using bulk_free. + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +void** dlindependent_calloc(size_t, size_t, void**); + +/* + independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be freed when it is no longer needed. This can be + done all at once using bulk_free. + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +void** dlindependent_comalloc(size_t, size_t*, void**); + +/* + bulk_free(void* array[], size_t n_elements) + Frees and clears (sets to null) each non-null pointer in the given + array. This is likely to be faster than freeing them one-by-one. + If footers are used, pointers that have been allocated in different + mspaces are not freed or cleared, and the count of all such pointers + is returned. For large arrays of pointers with poor locality, it + may be worthwhile to sort this array before calling bulk_free. +*/ +size_t dlbulk_free(void**, size_t n_elements); + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +void* dlpvalloc(size_t); + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative arguments + to sbrk) if there is unused memory at the `high' end of the malloc + pool or in unused MMAP segments. You can call this after freeing + large blocks of memory to potentially reduce the system-level memory + requirements of a program. However, it cannot guarantee to reduce + memory. Under some allocation patterns, some large free blocks of + memory will be locked between two used chunks, so they cannot be + given back to the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, only + the minimum amount of memory to maintain internal data structures + will be left. Non-zero arguments can be supplied to maintain enough + trailing space to service future expected allocations without having + to re-obtain memory from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. +*/ +int dlmalloc_trim(size_t); + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. + + malloc_stats is not compiled if NO_MALLOC_STATS is defined. +*/ +void dlmalloc_stats(void); + +#endif /* !ONLY_MSPACES */ + +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); +*/ +size_t dlmalloc_usable_size(const void*); + +#if MSPACES + +/* + mspace is an opaque type representing an independent + region of space that supports mspace_malloc, etc. +*/ +typedef void* mspace; + +/* + create_mspace creates and returns a new independent space with the + given initial capacity, or, if 0, the default granularity size. It + returns null if there is no system memory available to create the + space. If argument locked is non-zero, the space uses a separate + lock to control access. The capacity of the space will grow + dynamically as needed to service mspace_malloc requests. You can + control the sizes of incremental increases of this space by + compiling with a different DEFAULT_GRANULARITY or dynamically + setting with mallopt(M_GRANULARITY, value). +*/ +mspace create_mspace(size_t capacity, int locked); + +/* + destroy_mspace destroys the given space, and attempts to return all + of its memory back to the system, returning the total number of + bytes freed. After destruction, the results of access to all memory + used by the space become undefined. +*/ +size_t destroy_mspace(mspace msp); + +/* + create_mspace_with_base uses the memory supplied as the initial base + of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this + space is used for bookkeeping, so the capacity must be at least this + large. (Otherwise 0 is returned.) When this initial space is + exhausted, additional memory will be obtained from the system. + Destroying this space will deallocate all additionally allocated + space (if possible) but not the initial base. +*/ +mspace create_mspace_with_base(void* base, size_t capacity, int locked); + +/* + mspace_track_large_chunks controls whether requests for large chunks + are allocated in their own untracked mmapped regions, separate from + others in this mspace. By default large chunks are not tracked, + which reduces fragmentation. However, such chunks are not + necessarily released to the system upon destroy_mspace. Enabling + tracking by setting to true may increase fragmentation, but avoids + leakage when relying on destroy_mspace to release all memory + allocated using this space. The function returns the previous + setting. +*/ +int mspace_track_large_chunks(mspace msp, int enable); + +#if !NO_MALLINFO +/* + mspace_mallinfo behaves as mallinfo, but reports properties of + the given space. +*/ +struct mallinfo mspace_mallinfo(mspace msp); +#endif /* NO_MALLINFO */ + +/* + An alias for mallopt. +*/ +int mspace_mallopt(int, int); + +/* + The following operate identically to their malloc counterparts + but operate only for the given mspace argument +*/ +void* mspace_malloc(mspace msp, size_t bytes); +void mspace_free(mspace msp, void* mem); +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); +void* mspace_realloc(mspace msp, void* mem, size_t newsize); +void* mspace_realloc_in_place(mspace msp, void* mem, size_t newsize); +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]); +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]); +size_t mspace_bulk_free(mspace msp, void**, size_t n_elements); +size_t mspace_usable_size(const void* mem); +void mspace_malloc_stats(mspace msp); +int mspace_trim(mspace msp, size_t pad); +size_t mspace_footprint(mspace msp); +size_t mspace_max_footprint(mspace msp); +size_t mspace_footprint_limit(mspace msp); +size_t mspace_set_footprint_limit(mspace msp, size_t bytes); +void mspace_inspect_all(mspace msp, + void(*handler)(void *, void *, size_t, void*), + void* arg); +#endif /* MSPACES */ + +#ifdef __cplusplus +}; /* end of extern "C" */ +#endif + +#endif /* MALLOC_280_H */ diff --git a/src/ucm/ptmalloc286/malloc.c b/src/ucm/ptmalloc286/malloc.c new file mode 100644 index 0000000..e177996 --- /dev/null +++ b/src/ucm/ptmalloc286/malloc.c @@ -0,0 +1,6307 @@ +/* + This is a version (aka dlmalloc) of malloc/free/realloc written by + Doug Lea and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ Send questions, + comments, complaints, performance data, etc to dl@cs.oswego.edu + +* Version 2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea + Note: There may be an updated version of this malloc obtainable at + ftp://gee.cs.oswego.edu/pub/misc/malloc.c + Check before installing! + +* Quickstart + + This library is all in one file to simplify the most common usage: + ftp it, compile it (-O3), and link it into another program. All of + the compile-time options default to reasonable values for use on + most platforms. You might later want to step through various + compile-time and dynamic tuning options. + + For convenience, an include file for code using this malloc is at: + ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h + You don't really need this .h file unless you call functions not + defined in your system include files. The .h file contains only the + excerpts from this file needed for using this malloc on ANSI C/C++ + systems, so long as you haven't changed compile-time options about + naming and tuning parameters. If you do, then you can create your + own malloc.h that does include all settings by cutting at the point + indicated below. Note that you may already by default be using a C + library containing a malloc that is based on some version of this + malloc (for example in linux). You might still want to use the one + in this file to customize settings or to avoid overheads associated + with library versions. + +* Vital statistics: + + Supported pointer/size_t representation: 4 or 8 bytes + size_t MUST be an unsigned type of the same width as + pointers. (If you are using an ancient system that declares + size_t as a signed type, or need it to be a different width + than pointers, you can use a previous release of this malloc + (e.g. 2.7.2) supporting these.) + + Alignment: 8 bytes (minimum) + This suffices for nearly all current machines and C compilers. + However, you can define MALLOC_ALIGNMENT to be wider than this + if necessary (up to 128bytes), at the expense of using more space. + + Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) + 8 or 16 bytes (if 8byte sizes) + Each malloced chunk has a hidden word of overhead holding size + and status information, and additional cross-check word + if FOOTERS is defined. + + Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) + 8-byte ptrs: 32 bytes (including overhead) + + Even a request for zero bytes (i.e., malloc(0)) returns a + pointer to something of the minimum allocatable size. + The maximum overhead wastage (i.e., number of extra bytes + allocated than were requested in malloc) is less than or equal + to the minimum size, except for requests >= mmap_threshold that + are serviced via mmap(), where the worst case wastage is about + 32 bytes plus the remainder from a system page (the minimal + mmap unit); typically 4096 or 8192 bytes. + + Security: static-safe; optionally more or less + The "security" of malloc refers to the ability of malicious + code to accentuate the effects of errors (for example, freeing + space that is not currently malloc'ed or overwriting past the + ends of chunks) in code that calls malloc. This malloc + guarantees not to modify any memory locations below the base of + heap, i.e., static variables, even in the presence of usage + errors. The routines additionally detect most improper frees + and reallocs. All this holds as long as the static bookkeeping + for malloc itself is not corrupted by some other means. This + is only one aspect of security -- these checks do not, and + cannot, detect all possible programming errors. + + If FOOTERS is defined nonzero, then each allocated chunk + carries an additional check word to verify that it was malloced + from its space. These check words are the same within each + execution of a program using malloc, but differ across + executions, so externally crafted fake chunks cannot be + freed. This improves security by rejecting frees/reallocs that + could corrupt heap memory, in addition to the checks preventing + writes to statics that are always on. This may further improve + security at the expense of time and space overhead. (Note that + FOOTERS may also be worth using with MSPACES.) + + By default detected errors cause the program to abort (calling + "abort()"). You can override this to instead proceed past + errors by defining PROCEED_ON_ERROR. In this case, a bad free + has no effect, and a malloc that encounters a bad address + caused by user overwrites will ignore the bad address by + dropping pointers and indices to all known memory. This may + be appropriate for programs that should continue if at all + possible in the face of programming errors, although they may + run out of memory because dropped memory is never reclaimed. + + If you don't like either of these options, you can define + CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything + else. And if if you are sure that your program using malloc has + no errors or vulnerabilities, you can define INSECURE to 1, + which might (or might not) provide a small performance improvement. + + It is also possible to limit the maximum total allocatable + space, using malloc_set_footprint_limit. This is not + designed as a security feature in itself (calls to set limits + are not screened or privileged), but may be useful as one + aspect of a secure implementation. + + Thread-safety: NOT thread-safe unless USE_LOCKS defined non-zero + When USE_LOCKS is defined, each public call to malloc, free, + etc is surrounded with a lock. By default, this uses a plain + pthread mutex, win32 critical section, or a spin-lock if if + available for the platform and not disabled by setting + USE_SPIN_LOCKS=0. However, if USE_RECURSIVE_LOCKS is defined, + recursive versions are used instead (which are not required for + base functionality but may be needed in layered extensions). + Using a global lock is not especially fast, and can be a major + bottleneck. It is designed only to provide minimal protection + in concurrent environments, and to provide a basis for + extensions. If you are using malloc in a concurrent program, + consider instead using nedmalloc + (http://www.nedprod.com/programs/portable/nedmalloc/) or + ptmalloc (See http://www.malloc.de), which are derived from + versions of this malloc. + + System requirements: Any combination of MORECORE and/or MMAP/MUNMAP + This malloc can use unix sbrk or any emulation (invoked using + the CALL_MORECORE macro) and/or mmap/munmap or any emulation + (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system + memory. On most unix systems, it tends to work best if both + MORECORE and MMAP are enabled. On Win32, it uses emulations + based on VirtualAlloc. It also uses common C library functions + like memset. + + Compliance: I believe it is compliant with the Single Unix Specification + (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably + others as well. + +* Overview of algorithms + + This is not the fastest, most space-conserving, most portable, or + most tunable malloc ever written. However it is among the fastest + while also being among the most space-conserving, portable and + tunable. Consistent balance across these factors results in a good + general-purpose allocator for malloc-intensive programs. + + In most ways, this malloc is a best-fit allocator. Generally, it + chooses the best-fitting existing chunk for a request, with ties + broken in approximately least-recently-used order. (This strategy + normally maintains low fragmentation.) However, for requests less + than 256bytes, it deviates from best-fit when there is not an + exactly fitting available chunk by preferring to use space adjacent + to that used for the previous small request, as well as by breaking + ties in approximately most-recently-used order. (These enhance + locality of series of small allocations.) And for very large requests + (>= 256Kb by default), it relies on system memory mapping + facilities, if supported. (This helps avoid carrying around and + possibly fragmenting memory used only for large chunks.) + + All operations (except malloc_stats and mallinfo) have execution + times that are bounded by a constant factor of the number of bits in + a size_t, not counting any clearing in calloc or copying in realloc, + or actions surrounding MORECORE and MMAP that have times + proportional to the number of non-contiguous regions returned by + system allocation routines, which is often just 1. In real-time + applications, you can optionally suppress segment traversals using + NO_SEGMENT_TRAVERSAL, which assures bounded execution even when + system allocators return non-contiguous spaces, at the typical + expense of carrying around more memory and increased fragmentation. + + The implementation is not very modular and seriously overuses + macros. Perhaps someday all C compilers will do as good a job + inlining modular code as can now be done by brute-force expansion, + but now, enough of them seem not to. + + Some compilers issue a lot of warnings about code that is + dead/unreachable only on some platforms, and also about intentional + uses of negation on unsigned types. All known cases of each can be + ignored. + + For a longer but out of date high-level description, see + http://gee.cs.oswego.edu/dl/html/malloc.html + +* MSPACES + If MSPACES is defined, then in addition to malloc, free, etc., + this file also defines mspace_malloc, mspace_free, etc. These + are versions of malloc routines that take an "mspace" argument + obtained using create_mspace, to control all internal bookkeeping. + If ONLY_MSPACES is defined, only these versions are compiled. + So if you would like to use this allocator for only some allocations, + and your system malloc for others, you can compile with + ONLY_MSPACES and then do something like... + static mspace mymspace = create_mspace(0,0); // for example + #define mymalloc(bytes) mspace_malloc(mymspace, bytes) + + (Note: If you only need one instance of an mspace, you can instead + use "UCM_MALLOC_PREFIX" to relabel the global malloc.) + + You can similarly create thread-local allocators by storing + mspaces as thread-locals. For example: + static __thread mspace tlms = 0; + void* tlmalloc(size_t bytes) { + if (tlms == 0) tlms = create_mspace(0, 0); + return mspace_malloc(tlms, bytes); + } + void tlfree(void* mem) { mspace_free(tlms, mem); } + + Unless FOOTERS is defined, each mspace is completely independent. + You cannot allocate from one and free to another (although + conformance is only weakly checked, so usage errors are not always + caught). If FOOTERS is defined, then each chunk carries around a tag + indicating its originating mspace, and frees are directed to their + originating spaces. Normally, this requires use of locks. + + ------------------------- Compile-time options --------------------------- + +Be careful in setting #define values for numerical constants of type +size_t. On some systems, literal values are not automatically extended +to size_t precision unless they are explicitly casted. You can also +use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. + +WIN32 default: defined if _WIN32 defined + Defining WIN32 sets up defaults for MS environment and compilers. + Otherwise defaults are for unix. Beware that there seem to be some + cases where this malloc might not be a pure drop-in replacement for + Win32 malloc: Random-looking failures from Win32 GDI API's (eg; + SetDIBits()) may be due to bugs in some video driver implementations + when pixel buffers are malloc()ed, and the region spans more than + one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) + default granularity, pixel buffers may straddle virtual allocation + regions more often than when using the Microsoft allocator. You can + avoid this by using VirtualAlloc() and VirtualFree() for all pixel + buffers rather than using malloc(). If this is not possible, + recompile this malloc with a larger DEFAULT_GRANULARITY. Note: + in cases where MSC and gcc (cygwin) are known to differ on WIN32, + conditions use _MSC_VER to distinguish them. + +DLMALLOC_EXPORT default: extern + Defines how public APIs are declared. If you want to export via a + Windows DLL, you might define this as + #define DLMALLOC_EXPORT extern __declspec(dllexport) + If you want a POSIX ELF shared object, you might use + #define DLMALLOC_EXPORT extern __attribute__((visibility("default"))) + +MALLOC_ALIGNMENT default: (size_t)(2 * sizeof(void *)) + Controls the minimum alignment for malloc'ed chunks. It must be a + power of two and at least 8, even on machines for which smaller + alignments would suffice. It may be defined as larger than this + though. Note however that code and data structures are optimized for + the case of 8-byte alignment. + +MSPACES default: 0 (false) + If true, compile in support for independent allocation spaces. + This is only supported if HAVE_MMAP is true. + +ONLY_MSPACES default: 0 (false) + If true, only compile in mspace versions, not regular versions. + +USE_LOCKS default: 0 (false) + Causes each call to each public routine to be surrounded with + pthread or WIN32 mutex lock/unlock. (If set true, this can be + overridden on a per-mspace basis for mspace versions.) If set to a + non-zero value other than 1, locks are used, but their + implementation is left out, so lock functions must be supplied manually, + as described below. + +USE_SPIN_LOCKS default: 1 iff USE_LOCKS and spin locks available + If true, uses custom spin locks for locking. This is currently + supported only gcc >= 4.1, older gccs on x86 platforms, and recent + MS compilers. Otherwise, posix locks or win32 critical sections are + used. + +USE_RECURSIVE_LOCKS default: not defined + If defined nonzero, uses recursive (aka reentrant) locks, otherwise + uses plain mutexes. This is not required for malloc proper, but may + be needed for layered allocators such as nedmalloc. + +LOCK_AT_FORK default: not defined + If defined nonzero, performs pthread_atfork upon initialization + to initialize child lock while holding parent lock. The implementation + assumes that pthread locks (not custom locks) are being used. In other + cases, you may need to customize the implementation. + +FOOTERS default: 0 + If true, provide extra checking and dispatching by placing + information in the footers of allocated chunks. This adds + space and time overhead. + +INSECURE default: 0 + If true, omit checks for usage errors and heap space overwrites. + +UCM_MALLOC_PREFIX default: NOT defined + Causes compiler to prefix all public routines with the string 'dl'. + This can be useful when you only want to use this malloc in one part + of a program, using your regular system malloc elsewhere. + +MALLOC_INSPECT_ALL default: NOT defined + If defined, compiles malloc_inspect_all and mspace_inspect_all, that + perform traversal of all heap space. Unless access to these + functions is otherwise restricted, you probably do not want to + include them in secure implementations. + +ABORT default: defined as abort() + Defines how to abort on failed checks. On most systems, a failed + check cannot die with an "assert" or even print an informative + message, because the underlying print routines in turn call malloc, + which will fail again. Generally, the best policy is to simply call + abort(). It's not very useful to do more than this because many + errors due to overwriting will show up as address faults (null, odd + addresses etc) rather than malloc-triggered checks, so will also + abort. Also, most compilers know that abort() does not return, so + can better optimize code conditionally calling it. + +PROCEED_ON_ERROR default: defined as 0 (false) + Controls whether detected bad addresses cause them to bypassed + rather than aborting. If set, detected bad arguments to free and + realloc are ignored. And all bookkeeping information is zeroed out + upon a detected overwrite of freed heap space, thus losing the + ability to ever return it from malloc again, but enabling the + application to proceed. If PROCEED_ON_ERROR is defined, the + static variable malloc_corruption_error_count is compiled in + and can be examined to see if errors have occurred. This option + generates slower code than the default abort policy. + +DEBUG default: NOT defined + The DEBUG setting is mainly intended for people trying to modify + this code or diagnose problems when porting to new platforms. + However, it may also be able to better isolate user errors than just + using runtime checks. The assertions in the check routines spell + out in more detail the assumptions and invariants underlying the + algorithms. The checking is fairly extensive, and will slow down + execution noticeably. Calling malloc_stats or mallinfo with DEBUG + set will attempt to check every non-mmapped allocated and free chunk + in the course of computing the summaries. + +ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) + Debugging assertion failures can be nearly impossible if your + version of the assert macro causes malloc to be called, which will + lead to a cascade of further failures, blowing the runtime stack. + ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), + which will usually make debugging easier. + +MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 + The action to take before "return 0" when malloc fails to be able to + return memory because there is none available. + +HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES + True if this system supports sbrk or an emulation of it. + +MORECORE default: sbrk + The name of the sbrk-style system routine to call to obtain more + memory. See below for guidance on writing custom MORECORE + functions. The type of the argument to sbrk/MORECORE varies across + systems. It cannot be size_t, because it supports negative + arguments, so it is normally the signed type of the same width as + size_t (sometimes declared as "intptr_t"). It doesn't much matter + though. Internally, we only call it with arguments less than half + the max value of a size_t, which should work across all reasonable + possibilities, although sometimes generating compiler warnings. + +MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE + If true, take advantage of fact that consecutive calls to MORECORE + with positive arguments always return contiguous increasing + addresses. This is true of unix sbrk. It does not hurt too much to + set it true anyway, since malloc copes with non-contiguities. + Setting it false when definitely non-contiguous saves time + and possibly wasted space it would take to discover this though. + +MORECORE_CANNOT_TRIM default: NOT defined + True if MORECORE cannot release space back to the system when given + negative arguments. This is generally necessary only if you are + using a hand-crafted MORECORE function that cannot handle negative + arguments. + +NO_SEGMENT_TRAVERSAL default: 0 + If non-zero, suppresses traversals of memory segments + returned by either MORECORE or CALL_MMAP. This disables + merging of segments that are contiguous, and selectively + releasing them to the OS if unused, but bounds execution times. + +HAVE_MMAP default: 1 (true) + True if this system supports mmap or an emulation of it. If so, and + HAVE_MORECORE is not true, MMAP is used for all system + allocation. If set and HAVE_MORECORE is true as well, MMAP is + primarily used to directly allocate very large blocks. It is also + used as a backup strategy in cases where MORECORE fails to provide + space from system. Note: A single call to MUNMAP is assumed to be + able to unmap memory that may have be allocated using multiple calls + to MMAP, so long as they are adjacent. + +HAVE_MREMAP default: 1 on linux, else 0 + If true realloc() uses mremap() to re-allocate large blocks and + extend or shrink allocation spaces. + +MMAP_CLEARS default: 1 except on WINCE. + True if mmap clears memory so calloc doesn't need to. This is true + for standard unix mmap using /dev/zero and on WIN32 except for WINCE. + +USE_BUILTIN_FFS default: 0 (i.e., not used) + Causes malloc to use the builtin ffs() function to compute indices. + Some compilers may recognize and intrinsify ffs to be faster than the + supplied C version. Also, the case of x86 using gcc is special-cased + to an asm instruction, so is already as fast as it can be, and so + this setting has no effect. Similarly for Win32 under recent MS compilers. + (On most x86s, the asm version is only slightly faster than the C version.) + +malloc_getpagesize default: derive from system includes, or 4096. + The system page size. To the extent possible, this malloc manages + memory from the system in page-size units. This may be (and + usually is) a function rather than a constant. This is ignored + if WIN32, where page size is determined using getSystemInfo during + initialization. + +USE_DEV_RANDOM default: 0 (i.e., not used) + Causes malloc to use /dev/random to initialize secure magic seed for + stamping footers. Otherwise, the current time is used. + +NO_MALLINFO default: 0 + If defined, don't compile "mallinfo". This can be a simple way + of dealing with mismatches between system declarations and + those in this file. + +MALLINFO_FIELD_TYPE default: size_t + The type of the fields in the mallinfo struct. This was originally + defined as "int" in SVID etc, but is more usefully defined as + size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set + +NO_MALLOC_STATS default: 0 + If defined, don't compile "malloc_stats". This avoids calls to + fprintf and bringing in stdio dependencies you might not want. + +REALLOC_ZERO_BYTES_FREES default: not defined + This should be set if a call to realloc with zero bytes should + be the same as a call to free. Some people think it should. Otherwise, + since this malloc returns a unique pointer for malloc(0), so does + realloc(p, 0). + +LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H +LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H +LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H default: NOT defined unless on WIN32 + Define these if your system does not have these header files. + You might need to manually insert some of the declarations they provide. + +DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, + system_info.dwAllocationGranularity in WIN32, + otherwise 64K. + Also settable using mallopt(M_GRANULARITY, x) + The unit for allocating and deallocating memory from the system. On + most systems with contiguous MORECORE, there is no reason to + make this more than a page. However, systems with MMAP tend to + either require or encourage larger granularities. You can increase + this value to prevent system allocation functions to be called so + often, especially if they are slow. The value must be at least one + page and must be a power of two. Setting to 0 causes initialization + to either page size or win32 region size. (Note: In previous + versions of malloc, the equivalent of this option was called + "TOP_PAD") + +DEFAULT_TRIM_THRESHOLD default: 2MB + Also settable using mallopt(M_TRIM_THRESHOLD, x) + The maximum amount of unused top-most memory to keep before + releasing via malloc_trim in free(). Automatic trimming is mainly + useful in long-lived programs using contiguous MORECORE. Because + trimming via sbrk can be slow on some systems, and can sometimes be + wasteful (in cases where programs immediately afterward allocate + more large chunks) the value should be high enough so that your + overall system performance would improve by releasing this much + memory. As a rough guide, you might set to a value close to the + average size of a process (program) running on your system. + Releasing this much memory would allow such a process to run in + memory. Generally, it is worth tuning trim thresholds when a + program undergoes phases where several large chunks are allocated + and released in ways that can reuse each other's storage, perhaps + mixed with phases where there are no such chunks at all. The trim + value must be greater than page size to have any useful effect. To + disable trimming completely, you can set to MAX_SIZE_T. Note that the trick + some people use of mallocing a huge space and then freeing it at + program startup, in an attempt to reserve system memory, doesn't + have the intended effect under automatic trimming, since that memory + will immediately be returned to the system. + +DEFAULT_MMAP_THRESHOLD default: 256K + Also settable using mallopt(M_MMAP_THRESHOLD, x) + The request size threshold for using MMAP to directly service a + request. Requests of at least this size that cannot be allocated + using already-existing space will be serviced via mmap. (If enough + normal freed space already exists it is used instead.) Using mmap + segregates relatively large chunks of memory so that they can be + individually obtained and released from the host system. A request + serviced through mmap is never reused by any other request (at least + not directly; the system may just so happen to remap successive + requests to the same locations). Segregating space in this way has + the benefits that: Mmapped space can always be individually released + back to the system, which helps keep the system level memory demands + of a long-lived program low. Also, mapped memory doesn't become + `locked' between other chunks, as can happen with normally allocated + chunks, which means that even trimming via malloc_trim would not + release them. However, it has the disadvantage that the space + cannot be reclaimed, consolidated, and then used to service later + requests, as happens with normal chunks. The advantages of mmap + nearly always outweigh disadvantages for "large" chunks, but the + value of "large" may vary across systems. The default is an + empirically derived value that works well in most systems. You can + disable mmap by setting to MAX_SIZE_T. + +MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP + The number of consolidated frees between checks to release + unused segments when freeing. When using non-contiguous segments, + especially with multiple mspaces, checking only for topmost space + doesn't always suffice to trigger trimming. To compensate for this, + free() will, with a period of MAX_RELEASE_CHECK_RATE (or the + current number of segments, if greater) try to release unused + segments to the OS when freeing chunks that result in + consolidation. The best value for this parameter is a compromise + between slowing down frees with relatively costly checks that + rarely trigger versus holding on to unused memory. To effectively + disable, set to MAX_SIZE_T. This may lead to a very slight speed + improvement at the expense of carrying around more memory. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +/* Version identifier to allow people to support multiple versions */ +#ifndef DLMALLOC_VERSION +#define DLMALLOC_VERSION 20806 +#endif /* DLMALLOC_VERSION */ + +#ifndef DLMALLOC_EXPORT +#define DLMALLOC_EXPORT extern +#endif + +#ifndef WIN32 +#ifdef _WIN32 +#define WIN32 1 +#endif /* _WIN32 */ +#ifdef _WIN32_WCE +#define LACKS_FCNTL_H +#define WIN32 1 +#endif /* _WIN32_WCE */ +#endif /* WIN32 */ +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#include +#define HAVE_MMAP 1 +#define HAVE_MORECORE 0 +#define LACKS_UNISTD_H +#define LACKS_SYS_PARAM_H +#define LACKS_SYS_MMAN_H +#define LACKS_STRING_H +#define LACKS_STRINGS_H +#define LACKS_SYS_TYPES_H +#define LACKS_ERRNO_H +#define LACKS_SCHED_H +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION +#endif /* MALLOC_FAILURE_ACTION */ +#ifndef MMAP_CLEARS +#ifdef _WIN32_WCE /* WINCE reportedly does not clear */ +#define MMAP_CLEARS 0 +#else +#define MMAP_CLEARS 1 +#endif /* _WIN32_WCE */ +#endif /*MMAP_CLEARS */ +#endif /* WIN32 */ + +#if defined(DARWIN) || defined(_DARWIN) +/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ +#ifndef HAVE_MORECORE +#define HAVE_MORECORE 0 +#define HAVE_MMAP 1 +/* OSX allocators provide 16 byte alignment */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)16U) +#endif +#endif /* HAVE_MORECORE */ +#endif /* DARWIN */ + +#ifndef LACKS_SYS_TYPES_H +#include /* For size_t */ +#endif /* LACKS_SYS_TYPES_H */ + +/* The maximum possible size_t value has all bits set */ +#define MAX_SIZE_T (~(size_t)0) + +#ifndef USE_LOCKS /* ensure true if spin or recursive locks set */ +#define USE_LOCKS ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \ + (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0)) +#endif /* USE_LOCKS */ + +#if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */ +#if ((defined(__GNUC__) && \ + ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) || \ + defined(__i386__) || defined(__x86_64__))) || \ + (defined(_MSC_VER) && _MSC_VER>=1310)) +#ifndef USE_SPIN_LOCKS +#define USE_SPIN_LOCKS 1 +#endif /* USE_SPIN_LOCKS */ +#elif USE_SPIN_LOCKS +#error "USE_SPIN_LOCKS defined without implementation" +#endif /* ... locks available... */ +#elif !defined(USE_SPIN_LOCKS) +#define USE_SPIN_LOCKS 0 +#endif /* USE_LOCKS */ + +#ifndef ONLY_MSPACES +#define ONLY_MSPACES 0 +#endif /* ONLY_MSPACES */ +#ifndef MSPACES +#if ONLY_MSPACES +#define MSPACES 1 +#else /* ONLY_MSPACES */ +#define MSPACES 0 +#endif /* ONLY_MSPACES */ +#endif /* MSPACES */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *))) +#endif /* MALLOC_ALIGNMENT */ +#ifndef FOOTERS +#define FOOTERS 0 +#endif /* FOOTERS */ +#ifndef ABORT +#define ABORT abort() +#endif /* ABORT */ +#ifndef ABORT_ON_ASSERT_FAILURE +#define ABORT_ON_ASSERT_FAILURE 1 +#endif /* ABORT_ON_ASSERT_FAILURE */ +#ifndef PROCEED_ON_ERROR +#define PROCEED_ON_ERROR 0 +#endif /* PROCEED_ON_ERROR */ + +#ifndef INSECURE +#define INSECURE 0 +#endif /* INSECURE */ +#ifndef MALLOC_INSPECT_ALL +#define MALLOC_INSPECT_ALL 0 +#endif /* MALLOC_INSPECT_ALL */ +#ifndef HAVE_MMAP +#define HAVE_MMAP 1 +#endif /* HAVE_MMAP */ +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 1 +#endif /* MMAP_CLEARS */ +#ifndef HAVE_MREMAP +#ifdef linux +#define HAVE_MREMAP 1 +#ifndef _GNU_SOURCE +#define _GNU_SOURCE /* Turns on mremap() definition */ +#endif /* _GNU_SOURCE */ +#else /* linux */ +#define HAVE_MREMAP 0 +#endif /* linux */ +#endif /* HAVE_MREMAP */ +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION errno = ENOMEM; +#endif /* MALLOC_FAILURE_ACTION */ +#ifndef HAVE_MORECORE +#if ONLY_MSPACES +#define HAVE_MORECORE 0 +#else /* ONLY_MSPACES */ +#define HAVE_MORECORE 1 +#endif /* ONLY_MSPACES */ +#endif /* HAVE_MORECORE */ +#if !HAVE_MORECORE +#define MORECORE_CONTIGUOUS 0 +#else /* !HAVE_MORECORE */ +#define MORECORE_DEFAULT sbrk +#ifndef MORECORE_CONTIGUOUS +#define MORECORE_CONTIGUOUS 1 +#endif /* MORECORE_CONTIGUOUS */ +#endif /* HAVE_MORECORE */ +#ifndef DEFAULT_GRANULARITY +#if (MORECORE_CONTIGUOUS || defined(WIN32)) +#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ +#else /* MORECORE_CONTIGUOUS */ +#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) +#endif /* MORECORE_CONTIGUOUS */ +#endif /* DEFAULT_GRANULARITY */ +#ifndef DEFAULT_TRIM_THRESHOLD +#ifndef MORECORE_CANNOT_TRIM +#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) +#else /* MORECORE_CANNOT_TRIM */ +#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T +#endif /* MORECORE_CANNOT_TRIM */ +#endif /* DEFAULT_TRIM_THRESHOLD */ +#ifndef DEFAULT_MMAP_THRESHOLD +#if HAVE_MMAP +#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) +#else /* HAVE_MMAP */ +#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* DEFAULT_MMAP_THRESHOLD */ +#ifndef MAX_RELEASE_CHECK_RATE +#if HAVE_MMAP +#define MAX_RELEASE_CHECK_RATE 4095 +#else +#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* MAX_RELEASE_CHECK_RATE */ +#ifndef USE_BUILTIN_FFS +#define USE_BUILTIN_FFS 0 +#endif /* USE_BUILTIN_FFS */ +#ifndef USE_DEV_RANDOM +#define USE_DEV_RANDOM 0 +#endif /* USE_DEV_RANDOM */ +#ifndef NO_MALLINFO +#define NO_MALLINFO 0 +#endif /* NO_MALLINFO */ +#ifndef MALLINFO_FIELD_TYPE +#define MALLINFO_FIELD_TYPE size_t +#endif /* MALLINFO_FIELD_TYPE */ +#ifndef NO_MALLOC_STATS +#define NO_MALLOC_STATS 0 +#endif /* NO_MALLOC_STATS */ +#ifndef NO_SEGMENT_TRAVERSAL +#define NO_SEGMENT_TRAVERSAL 0 +#endif /* NO_SEGMENT_TRAVERSAL */ + +/* + mallopt tuning options. SVID/XPG defines four standard parameter + numbers for mallopt, normally defined in malloc.h. None of these + are used in this malloc, so setting them has no effect. But this + malloc does support the following options. +*/ + +#define M_TRIM_THRESHOLD (-1) +#define M_GRANULARITY (-2) +#define M_MMAP_THRESHOLD (-3) + +/* ------------------------ Mallinfo declarations ------------------------ */ + +#if !NO_MALLINFO +/* + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing usage properties and + statistics. It should work on any system that has a + /usr/include/malloc.h defining struct mallinfo. The main + declaration needed is the mallinfo struct that is returned (by-copy) + by mallinfo(). The malloinfo struct contains a bunch of fields that + are not even meaningful in this version of malloc. These fields are + are instead filled by mallinfo() with other numbers that might be of + interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else a compliant version is + declared below. These must be precisely the same for mallinfo() to + work. The original SVID version of this struct, defined on most + systems with mallinfo, declares all fields as ints. But some others + define as unsigned long. If your system defines the fields using a + type of different width than listed here, you MUST #include your + system version and #define HAVE_USR_INCLUDE_MALLOC_H. +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + +#ifdef HAVE_USR_INCLUDE_MALLOC_H +#include "/usr/include/malloc.h" +#else /* HAVE_USR_INCLUDE_MALLOC_H */ +#ifndef STRUCT_MALLINFO_DECLARED +/* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */ +#define _STRUCT_MALLINFO +#define STRUCT_MALLINFO_DECLARED 1 +struct mallinfo { + MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ + MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ + MALLINFO_FIELD_TYPE smblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ + MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ + MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ + MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ + MALLINFO_FIELD_TYPE fordblks; /* total free space */ + MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ +}; +#endif /* STRUCT_MALLINFO_DECLARED */ +#endif /* HAVE_USR_INCLUDE_MALLOC_H */ +#endif /* NO_MALLINFO */ + +/* + Try to persuade compilers to inline. The most critical functions for + inlining are defined as macros, so these aren't used for them. +*/ + +#ifndef FORCEINLINE + #if defined(__GNUC__) +#define FORCEINLINE __inline __attribute__ ((always_inline)) + #elif defined(_MSC_VER) + #define FORCEINLINE __forceinline + #endif +#endif +#ifndef NOINLINE + #if defined(__GNUC__) + #define NOINLINE __attribute__ ((noinline)) + #elif defined(_MSC_VER) + #define NOINLINE __declspec(noinline) + #else + #define NOINLINE + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#ifndef FORCEINLINE + #define FORCEINLINE inline +#endif +#endif /* __cplusplus */ +#ifndef FORCEINLINE + #define FORCEINLINE +#endif + +#if !ONLY_MSPACES + +/* ------------------- Declarations of public routines ------------------- */ + +#ifdef UCM_MALLOC_PREFIX +#define dlcalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, calloc) +#define dlfree UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, free) +#define dlmalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc) +#define dlmemalign UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, memalign) +#define dlposix_memalign UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, posix_memalign) +#define dlrealloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, realloc) +#define dlrealloc_in_place UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, realloc_in_place) +#define dlvalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, valloc) +#define dlpvalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, pvalloc) +#define dlmallinfo UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, mallinfo) +#define dlmallopt UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, mallopt) +#define dlmalloc_trim UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_trim) +#define dlmalloc_stats UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_stats) +#define dlmalloc_usable_size UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_usable_size) +#define dlmalloc_footprint UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_footprint) +#define dlmalloc_max_footprint UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_max_footprint) +#define dlmalloc_footprint_limit UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_footprint_limit) +#define dlmalloc_set_footprint_limit UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_set_footprint_limit) +#define dlmalloc_inspect_all UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, malloc_inspect_all) +#define dlindependent_calloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, independent_calloc) +#define dlindependent_comalloc UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, independent_comalloc) +#define dlbulk_free UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, bulk_free) +#define dlmallopt_get UCS_PP_TOKENPASTE(UCM_MALLOC_PREFIX, mallopt_get) +#endif /* UCM_MALLOC_PREFIX */ + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or + null if no space is available, in which case errno is set to ENOMEM + on ANSI C systems. + + If n is zero, malloc returns a minimum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 32 bytes on 64bit + systems.) Note that size_t is an unsigned type, so calls with + arguments that would be negative if signed are interpreted as + requests for huge amounts of space, which will often fail. The + maximum supported value of n differs across systems, but is in all + cases less than the maximum representable value of a size_t. +*/ +DLMALLOC_EXPORT void* dlmalloc(size_t); + +/* + free(void* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. If p was not malloced or already + freed, free(p) will by default cause the current program to abort. +*/ +DLMALLOC_EXPORT void dlfree(void*); + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +DLMALLOC_EXPORT void* dlcalloc(size_t, size_t); + +/* + realloc(void* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p in most cases when possible, otherwise it + employs the equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. realloc with a size + argument of zero (re)allocates a minimum-sized chunk. + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ +DLMALLOC_EXPORT void* dlrealloc(void*, size_t); + +/* + realloc_in_place(void* p, size_t n) + Resizes the space allocated for p to size n, only if this can be + done without moving p (i.e., only if there is adjacent space + available if n is greater than p's current allocated size, or n is + less than or equal to p's size). This may be used instead of plain + realloc if an alternative allocation strategy is needed upon failure + to expand space; for example, reallocation of a buffer that must be + memory-aligned or cleared. You can use realloc_in_place to trigger + these alternatives only when needed. + + Returns p if successful; otherwise null. +*/ +DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t); + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +DLMALLOC_EXPORT void* dlmemalign(size_t, size_t); + +/* + int posix_memalign(void** pp, size_t alignment, size_t n); + Allocates a chunk of n bytes, aligned in accord with the alignment + argument. Differs from memalign only in that it (1) assigns the + allocated memory to *pp rather than returning it, (2) fails and + returns EINVAL if the alignment is not a power of two (3) fails and + returns ENOMEM if memory cannot be allocated. +*/ +DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t); + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +DLMALLOC_EXPORT void* dlvalloc(size_t); + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. To workaround the fact that mallopt is specified to use int, + not size_t parameters, the value -1 is specially treated as the + maximum unsigned size_t value. + + SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. None of these are use in this malloc, + so setting them has no effect. But this malloc also supports other + options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) + M_GRANULARITY -2 page size any power of 2 >= page size + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) +*/ +DLMALLOC_EXPORT int dlmallopt(int, int); + +/* + malloc_footprint(); + Returns the number of bytes obtained from the system. The total + number of bytes allocated by malloc, realloc etc., is less than this + value. Unlike mallinfo, this function returns only a precomputed + result, so can be called frequently to monitor memory consumption. + Even if locks are otherwise defined, this function does not use them, + so results might not be up to date. +*/ +DLMALLOC_EXPORT size_t dlmalloc_footprint(void); + +/* + malloc_max_footprint(); + Returns the maximum number of bytes obtained from the system. This + value will be greater than current footprint if deallocated space + has been reclaimed by the system. The peak number of bytes allocated + by malloc, realloc etc., is less than this value. Unlike mallinfo, + this function returns only a precomputed result, so can be called + frequently to monitor memory consumption. Even if locks are + otherwise defined, this function does not use them, so results might + not be up to date. +*/ +DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void); + +/* + malloc_footprint_limit(); + Returns the number of bytes that the heap is allowed to obtain from + the system, returning the last value returned by + malloc_set_footprint_limit, or the maximum size_t value if + never set. The returned value reflects a permission. There is no + guarantee that this number of bytes can actually be obtained from + the system. +*/ +DLMALLOC_EXPORT size_t dlmalloc_footprint_limit(); + +/* + malloc_set_footprint_limit(); + Sets the maximum number of bytes to obtain from the system, causing + failure returns from malloc and related functions upon attempts to + exceed this value. The argument value may be subject to page + rounding to an enforceable limit; this actual value is returned. + Using an argument of the maximum possible size_t effectively + disables checks. If the argument is less than or equal to the + current malloc_footprint, then all future allocations that require + additional system memory will fail. However, invocation cannot + retroactively deallocate existing used memory. +*/ +DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes); + +#if MALLOC_INSPECT_ALL +/* + malloc_inspect_all(void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg); + Traverses the heap and calls the given handler for each managed + region, skipping all bytes that are (or may be) used for bookkeeping + purposes. Traversal does not include include chunks that have been + directly memory mapped. Each reported region begins at the start + address, and continues up to but not including the end address. The + first used_bytes of the region contain allocated data. If + used_bytes is zero, the region is unallocated. The handler is + invoked with the given callback argument. If locks are defined, they + are held during the entire traversal. It is a bad idea to invoke + other malloc functions from within the handler. + + For example, to count the number of in-use chunks with size greater + than 1000, you could write: + static int count = 0; + void count_chunks(void* start, void* end, size_t used, void* arg) { + if (used >= 1000) ++count; + } + then: + malloc_inspect_all(count_chunks, NULL); + + malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined. +*/ +DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), + void* arg); + +#endif /* MALLOC_INSPECT_ALL */ + +#if !NO_MALLINFO +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: always zero. + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: always zero + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ +DLMALLOC_EXPORT struct mallinfo dlmallinfo(void); +#endif /* NO_MALLINFO */ + +/* + independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be freed when it is no longer needed. This can be + done all at once using bulk_free. + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**); + +/* + independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be freed when it is no longer needed. This can be + done all at once using bulk_free. + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**); + +/* + bulk_free(void* array[], size_t n_elements) + Frees and clears (sets to null) each non-null pointer in the given + array. This is likely to be faster than freeing them one-by-one. + If footers are used, pointers that have been allocated in different + mspaces are not freed or cleared, and the count of all such pointers + is returned. For large arrays of pointers with poor locality, it + may be worthwhile to sort this array before calling bulk_free. +*/ +DLMALLOC_EXPORT size_t dlbulk_free(void**, size_t n_elements); + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +DLMALLOC_EXPORT void* dlpvalloc(size_t); + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative arguments + to sbrk) if there is unused memory at the `high' end of the malloc + pool or in unused MMAP segments. You can call this after freeing + large blocks of memory to potentially reduce the system-level memory + requirements of a program. However, it cannot guarantee to reduce + memory. Under some allocation patterns, some large free blocks of + memory will be locked between two used chunks, so they cannot be + given back to the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, only + the minimum amount of memory to maintain internal data structures + will be left. Non-zero arguments can be supplied to maintain enough + trailing space to service future expected allocations without having + to re-obtain memory from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. +*/ +DLMALLOC_EXPORT int dlmalloc_trim(size_t); + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. +*/ +DLMALLOC_EXPORT void dlmalloc_stats(void); + +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); +*/ +size_t dlmalloc_usable_size(void*); + +#endif /* ONLY_MSPACES */ + +#if MSPACES + +/* + mspace is an opaque type representing an independent + region of space that supports mspace_malloc, etc. +*/ +typedef void* mspace; + +/* + create_mspace creates and returns a new independent space with the + given initial capacity, or, if 0, the default granularity size. It + returns null if there is no system memory available to create the + space. If argument locked is non-zero, the space uses a separate + lock to control access. The capacity of the space will grow + dynamically as needed to service mspace_malloc requests. You can + control the sizes of incremental increases of this space by + compiling with a different DEFAULT_GRANULARITY or dynamically + setting with mallopt(M_GRANULARITY, value). +*/ +DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked); + +/* + destroy_mspace destroys the given space, and attempts to return all + of its memory back to the system, returning the total number of + bytes freed. After destruction, the results of access to all memory + used by the space become undefined. +*/ +DLMALLOC_EXPORT size_t destroy_mspace(mspace msp); + +/* + create_mspace_with_base uses the memory supplied as the initial base + of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this + space is used for bookkeeping, so the capacity must be at least this + large. (Otherwise 0 is returned.) When this initial space is + exhausted, additional memory will be obtained from the system. + Destroying this space will deallocate all additionally allocated + space (if possible) but not the initial base. +*/ +DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked); + +/* + mspace_track_large_chunks controls whether requests for large chunks + are allocated in their own untracked mmapped regions, separate from + others in this mspace. By default large chunks are not tracked, + which reduces fragmentation. However, such chunks are not + necessarily released to the system upon destroy_mspace. Enabling + tracking by setting to true may increase fragmentation, but avoids + leakage when relying on destroy_mspace to release all memory + allocated using this space. The function returns the previous + setting. +*/ +DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable); + + +/* + mspace_malloc behaves as malloc, but operates within + the given space. +*/ +DLMALLOC_EXPORT void* mspace_malloc(mspace msp, size_t bytes); + +/* + mspace_free behaves as free, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_free is not actually needed. + free may be called instead of mspace_free because freed chunks from + any space are handled by their originating spaces. +*/ +DLMALLOC_EXPORT void mspace_free(mspace msp, void* mem); + +/* + mspace_realloc behaves as realloc, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_realloc is not actually + needed. realloc may be called instead of mspace_realloc because + realloced chunks from any space are handled by their originating + spaces. +*/ +DLMALLOC_EXPORT void* mspace_realloc(mspace msp, void* mem, size_t newsize); + +/* + mspace_calloc behaves as calloc, but operates within + the given space. +*/ +DLMALLOC_EXPORT void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); + +/* + mspace_memalign behaves as memalign, but operates within + the given space. +*/ +DLMALLOC_EXPORT void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); + +/* + mspace_independent_calloc behaves as independent_calloc, but + operates within the given space. +*/ +DLMALLOC_EXPORT void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]); + +/* + mspace_independent_comalloc behaves as independent_comalloc, but + operates within the given space. +*/ +DLMALLOC_EXPORT void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]); + +/* + mspace_footprint() returns the number of bytes obtained from the + system for this space. +*/ +DLMALLOC_EXPORT size_t mspace_footprint(mspace msp); + +/* + mspace_max_footprint() returns the peak number of bytes obtained from the + system for this space. +*/ +DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp); + + +#if !NO_MALLINFO +/* + mspace_mallinfo behaves as mallinfo, but reports properties of + the given space. +*/ +DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp); +#endif /* NO_MALLINFO */ + +/* + malloc_usable_size(void* p) behaves the same as malloc_usable_size; +*/ +DLMALLOC_EXPORT size_t mspace_usable_size(const void* mem); + +/* + mspace_malloc_stats behaves as malloc_stats, but reports + properties of the given space. +*/ +DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp); + +/* + mspace_trim behaves as malloc_trim, but + operates within the given space. +*/ +DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad); + +/* + An alias for mallopt. +*/ +DLMALLOC_EXPORT int mspace_mallopt(int, int); + +#endif /* MSPACES */ + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif /* __cplusplus */ + +/* + ======================================================================== + To make a fully customizable malloc.h header file, cut everything + above this line, put into file malloc.h, edit to suit, and #include it + on the next line, as well as in programs that use this malloc. + ======================================================================== +*/ + +/* #include "malloc.h" */ + +/*------------------------------ internal #includes ---------------------- */ + +#ifdef _MSC_VER +#pragma warning( disable : 4146 ) /* no "unsigned" warnings */ +#endif /* _MSC_VER */ +#if !NO_MALLOC_STATS +#include /* for printing in malloc_stats */ +#endif /* NO_MALLOC_STATS */ +#ifndef LACKS_ERRNO_H +#include /* for MALLOC_FAILURE_ACTION */ +#endif /* LACKS_ERRNO_H */ +#ifdef DEBUG +#if ABORT_ON_ASSERT_FAILURE +#undef assert +#define assert(x) if(!(x)) ABORT +#else /* ABORT_ON_ASSERT_FAILURE */ +#include +#endif /* ABORT_ON_ASSERT_FAILURE */ +#else /* DEBUG */ +#ifndef assert +#define assert(x) +#endif +#define DEBUG 0 +#endif /* DEBUG */ +#if !defined(WIN32) && !defined(LACKS_TIME_H) +#include /* for magic initialization */ +#endif /* WIN32 */ +#ifndef LACKS_STDLIB_H +#include /* for abort() */ +#endif /* LACKS_STDLIB_H */ +#ifndef LACKS_STRING_H +#include /* for memset etc */ +#endif /* LACKS_STRING_H */ +#if USE_BUILTIN_FFS +#ifndef LACKS_STRINGS_H +#include /* for ffs */ +#endif /* LACKS_STRINGS_H */ +#endif /* USE_BUILTIN_FFS */ +#if HAVE_MMAP +#ifndef LACKS_SYS_MMAN_H +/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */ +#if (defined(linux) && !defined(__USE_GNU)) +#define __USE_GNU 1 +#include /* for mmap */ +#undef __USE_GNU +#else +#include /* for mmap */ +#endif /* linux */ +#endif /* LACKS_SYS_MMAN_H */ +#ifndef LACKS_FCNTL_H +#include +#endif /* LACKS_FCNTL_H */ +#endif /* HAVE_MMAP */ +#ifndef LACKS_UNISTD_H +#include /* for sbrk, sysconf */ +#else /* LACKS_UNISTD_H */ +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +extern void* sbrk(ptrdiff_t); +#endif /* FreeBSD etc */ +#endif /* LACKS_UNISTD_H */ + +/* Declarations for locking */ +#if USE_LOCKS +#ifndef WIN32 +#if defined (__SVR4) && defined (__sun) /* solaris */ +#include +#elif !defined(LACKS_SCHED_H) +#include +#endif /* solaris or LACKS_SCHED_H */ +#if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || !USE_SPIN_LOCKS +#include +#endif /* USE_RECURSIVE_LOCKS ... */ +#elif defined(_MSC_VER) +#ifndef _M_AMD64 +/* These are already defined on AMD64 builds */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp); +LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value); +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _M_AMD64 */ +#pragma intrinsic (_InterlockedCompareExchange) +#pragma intrinsic (_InterlockedExchange) +#define interlockedcompareexchange _InterlockedCompareExchange +#define interlockedexchange _InterlockedExchange +#elif defined(WIN32) && defined(__GNUC__) +#define interlockedcompareexchange(a, b, c) __sync_val_compare_and_swap(a, c, b) +#define interlockedexchange __sync_lock_test_and_set +#endif /* Win32 */ +#else /* USE_LOCKS */ +#endif /* USE_LOCKS */ + +#ifndef LOCK_AT_FORK +#define LOCK_AT_FORK 0 +#endif + +/* Declarations for bit scanning on win32 */ +#if defined(_MSC_VER) && _MSC_VER>=1300 +#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +unsigned char _BitScanForward(unsigned long *index, unsigned long mask); +unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#define BitScanForward _BitScanForward +#define BitScanReverse _BitScanReverse +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse) +#endif /* BitScanForward */ +#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */ + +#ifndef WIN32 +#ifndef malloc_getpagesize +# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ +# ifndef _SC_PAGE_SIZE +# define _SC_PAGE_SIZE _SC_PAGESIZE +# endif +# endif +# ifdef _SC_PAGE_SIZE +# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) +# else +# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) + extern size_t getpagesize(); +# define malloc_getpagesize getpagesize() +# else +# ifdef WIN32 /* use supplied emulation of getpagesize */ +# define malloc_getpagesize getpagesize() +# else +# ifndef LACKS_SYS_PARAM_H +# include +# endif +# ifdef EXEC_PAGESIZE +# define malloc_getpagesize EXEC_PAGESIZE +# else +# ifdef NBPG +# ifndef CLSIZE +# define malloc_getpagesize NBPG +# else +# define malloc_getpagesize (NBPG * CLSIZE) +# endif +# else +# ifdef NBPC +# define malloc_getpagesize NBPC +# else +# ifdef PAGESIZE +# define malloc_getpagesize PAGESIZE +# else /* just guess */ +# define malloc_getpagesize ((size_t)4096U) +# endif +# endif +# endif +# endif +# endif +# endif +# endif +#endif +#endif + +/* ------------------- size_t and alignment properties -------------------- */ + +/* The byte and bit size of a size_t */ +#define SIZE_T_SIZE (sizeof(size_t)) +#define SIZE_T_BITSIZE (sizeof(size_t) << 3) + +/* Some constants coerced to size_t */ +/* Annoying but necessary to avoid errors on some platforms */ +#define SIZE_T_ZERO ((size_t)0) +#define SIZE_T_ONE ((size_t)1) +#define SIZE_T_TWO ((size_t)2) +#define SIZE_T_FOUR ((size_t)4) +#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) +#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) +#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) +#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) + +/* The bit mask value corresponding to MALLOC_ALIGNMENT */ +#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) + +/* True if address a has acceptable alignment */ +#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) + +/* the number of bytes to offset an address to align it */ +#define align_offset(A)\ + ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ + ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) + +/* -------------------------- MMAP preliminaries ------------------------- */ + +/* + If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and + checks to fail so compiler optimizer can delete code rather than + using so many "#if"s. +*/ + + +/* MORECORE and MMAP must return MFAIL on failure */ +#define MFAIL ((void*)(MAX_SIZE_T)) +#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ + +#if HAVE_MMAP + +#ifndef WIN32 +#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) +#define MMAP_PROT (PROT_READ|PROT_WRITE) +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif /* MAP_ANON */ +#ifdef MAP_ANONYMOUS +#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) +#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#else /* MAP_ANONYMOUS */ +/* + Nearly all versions of mmap support MAP_ANONYMOUS, so the following + is unlikely to be needed, but is supplied just in case. +*/ +#define MMAP_FLAGS (MAP_PRIVATE) +static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ +#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ + (dev_zero_fd = open("/dev/zero", O_RDWR), \ + mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ + mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) +#endif /* MAP_ANONYMOUS */ + +#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) + +#else /* WIN32 */ + +/* Win32 MMAP via VirtualAlloc */ +static FORCEINLINE void* win32mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ +static FORCEINLINE void* win32direct_mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, + PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* This function supports releasing coalesed segments */ +static FORCEINLINE int win32munmap(void* ptr, size_t size) { + MEMORY_BASIC_INFORMATION minfo; + char* cptr = (char*)ptr; + while (size) { + if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) + return -1; + if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || + minfo.State != MEM_COMMIT || minfo.RegionSize > size) + return -1; + if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) + return -1; + cptr += minfo.RegionSize; + size -= minfo.RegionSize; + } + return 0; +} + +#define MMAP_DEFAULT(s) win32mmap(s) +#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) +#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) +#endif /* WIN32 */ +#endif /* HAVE_MMAP */ + +#if HAVE_MREMAP +#ifndef WIN32 +#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) +#endif /* WIN32 */ +#endif /* HAVE_MREMAP */ + +/** + * Define CALL_MORECORE + */ +#if HAVE_MORECORE + #ifdef MORECORE + #define CALL_MORECORE(S) MORECORE(S) + #else /* MORECORE */ + #define CALL_MORECORE(S) MORECORE_DEFAULT(S) + #endif /* MORECORE */ +#else /* HAVE_MORECORE */ + #define CALL_MORECORE(S) MFAIL +#endif /* HAVE_MORECORE */ + +/** + * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP + */ +#if HAVE_MMAP + #define USE_MMAP_BIT (SIZE_T_ONE) + + #ifdef MMAP + #define CALL_MMAP(s) MMAP(s) + #else /* MMAP */ + #define CALL_MMAP(s) MMAP_DEFAULT(s) + #endif /* MMAP */ + #ifdef MUNMAP + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) + #else /* MUNMAP */ + #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) + #endif /* MUNMAP */ + #ifdef DIRECT_MMAP + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #else /* DIRECT_MMAP */ + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) + #endif /* DIRECT_MMAP */ +#else /* HAVE_MMAP */ + #define USE_MMAP_BIT (SIZE_T_ZERO) + + #define MMAP(s) MFAIL + #define MUNMAP(a, s) (-1) + #define DIRECT_MMAP(s) MFAIL + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #define CALL_MMAP(s) MMAP(s) + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) +#endif /* HAVE_MMAP */ + +/** + * Define CALL_MREMAP + */ +#if HAVE_MMAP && HAVE_MREMAP + #ifdef MREMAP + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv)) + #else /* MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) + #endif /* MREMAP */ +#else /* HAVE_MMAP && HAVE_MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL +#endif /* HAVE_MMAP && HAVE_MREMAP */ + +/* mstate bit set if continguous morecore disabled or failed */ +#define USE_NONCONTIGUOUS_BIT (4U) + +/* segment bit set in create_mspace_with_base */ +#define EXTERN_BIT (8U) + + +/* --------------------------- Lock preliminaries ------------------------ */ + +/* + When locks are defined, there is one global lock, plus + one per-mspace lock. + + The global lock_ensures that mparams.magic and other unique + mparams values are initialized only once. It also protects + sequences of calls to MORECORE. In many cases sys_alloc requires + two calls, that should not be interleaved with calls by other + threads. This does not protect against direct calls to MORECORE + by other threads not using this lock, so there is still code to + cope the best we can on interference. + + Per-mspace locks surround calls to malloc, free, etc. + By default, locks are simple non-reentrant mutexes. + + Because lock-protected regions generally have bounded times, it is + OK to use the supplied simple spinlocks. Spinlocks are likely to + improve performance for lightly contended applications, but worsen + performance under heavy contention. + + If USE_LOCKS is > 1, the definitions of lock routines here are + bypassed, in which case you will need to define the type MLOCK_T, + and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK + and TRY_LOCK. You must also declare a + static MLOCK_T malloc_global_mutex = { initialization values };. + +*/ + +#if !USE_LOCKS +#define USE_LOCK_BIT (0U) +#define INITIAL_LOCK(l) (0) +#define DESTROY_LOCK(l) (0) +#define ACQUIRE_MALLOC_GLOBAL_LOCK() +#define RELEASE_MALLOC_GLOBAL_LOCK() + +#else +#if USE_LOCKS > 1 +/* ----------------------- User-defined locks ------------------------ */ +/* Define your own lock implementation here */ +/* #define INITIAL_LOCK(lk) ... */ +/* #define DESTROY_LOCK(lk) ... */ +/* #define ACQUIRE_LOCK(lk) ... */ +/* #define RELEASE_LOCK(lk) ... */ +/* #define TRY_LOCK(lk) ... */ +/* static MLOCK_T malloc_global_mutex = ... */ + +#elif USE_SPIN_LOCKS + +/* First, define CAS_LOCK and CLEAR_LOCK on ints */ +/* Note CAS_LOCK defined to return 0 on success */ + +#if defined(__GNUC__)&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) +#define CAS_LOCK(sl) __sync_lock_test_and_set(sl, 1) +#define CLEAR_LOCK(sl) __sync_lock_release(sl) + +#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) +/* Custom spin locks for older gcc on x86 */ +static FORCEINLINE int x86_cas_lock(int *sl) { + int ret; + int val = 1; + int cmp = 0; + __asm__ __volatile__ ("lock; cmpxchgl %1, %2" + : "=a" (ret) + : "r" (val), "m" (*(sl)), "0"(cmp) + : "memory", "cc"); + return ret; +} + +static FORCEINLINE void x86_clear_lock(int* sl) { + assert(*sl != 0); + int prev = 0; + int ret; + __asm__ __volatile__ ("lock; xchgl %0, %1" + : "=r" (ret) + : "m" (*(sl)), "0"(prev) + : "memory"); +} + +#define CAS_LOCK(sl) x86_cas_lock(sl) +#define CLEAR_LOCK(sl) x86_clear_lock(sl) + +#else /* Win32 MSC */ +#define CAS_LOCK(sl) interlockedexchange(sl, (LONG)1) +#define CLEAR_LOCK(sl) interlockedexchange (sl, (LONG)0) + +#endif /* ... gcc spins locks ... */ + +/* How to yield for a spin lock */ +#define SPINS_PER_YIELD 63 +#if defined(_MSC_VER) +#define SLEEP_EX_DURATION 50 /* delay for yield/sleep */ +#define SPIN_LOCK_YIELD SleepEx(SLEEP_EX_DURATION, FALSE) +#elif defined (__SVR4) && defined (__sun) /* solaris */ +#define SPIN_LOCK_YIELD thr_yield(); +#elif !defined(LACKS_SCHED_H) +#define SPIN_LOCK_YIELD sched_yield(); +#else +#define SPIN_LOCK_YIELD +#endif /* ... yield ... */ + +#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0 +/* Plain spin locks use single word (embedded in malloc_states) */ +static int spin_acquire_lock(int *sl) { + int spins = 0; + while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) { + if ((++spins & SPINS_PER_YIELD) == 0) { + SPIN_LOCK_YIELD; + } + } + return 0; +} + +#define MLOCK_T int +#define TRY_LOCK(sl) !CAS_LOCK(sl) +#define RELEASE_LOCK(sl) CLEAR_LOCK(sl) +#define ACQUIRE_LOCK(sl) (CAS_LOCK(sl)? spin_acquire_lock(sl) : 0) +#define INITIAL_LOCK(sl) (*sl = 0) +#define DESTROY_LOCK(sl) (0) +static MLOCK_T malloc_global_mutex = 0; + +#else /* USE_RECURSIVE_LOCKS */ +/* types for lock owners */ +#ifdef WIN32 +#define THREAD_ID_T DWORD +#define CURRENT_THREAD GetCurrentThreadId() +#define EQ_OWNER(X,Y) ((X) == (Y)) +#else +/* + Note: the following assume that pthread_t is a type that can be + initialized to (casted) zero. If this is not the case, you will need to + somehow redefine these or not use spin locks. +*/ +#define THREAD_ID_T pthread_t +#define CURRENT_THREAD pthread_self() +#define EQ_OWNER(X,Y) pthread_equal(X, Y) +#endif + +struct malloc_recursive_lock { + int sl; + unsigned int c; + THREAD_ID_T threadid; +}; + +#define MLOCK_T struct malloc_recursive_lock +static MLOCK_T malloc_global_mutex = { 0, 0, (THREAD_ID_T)0}; + +static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) { + assert(lk->sl != 0); + if (--lk->c == 0) { + CLEAR_LOCK(&lk->sl); + } +} + +static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) { + THREAD_ID_T mythreadid = CURRENT_THREAD; + int spins = 0; + for (;;) { + if (*((volatile int *)(&lk->sl)) == 0) { + if (!CAS_LOCK(&lk->sl)) { + lk->threadid = mythreadid; + lk->c = 1; + return 0; + } + } + else if (EQ_OWNER(lk->threadid, mythreadid)) { + ++lk->c; + return 0; + } + if ((++spins & SPINS_PER_YIELD) == 0) { + SPIN_LOCK_YIELD; + } + } +} + +static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) { + THREAD_ID_T mythreadid = CURRENT_THREAD; + if (*((volatile int *)(&lk->sl)) == 0) { + if (!CAS_LOCK(&lk->sl)) { + lk->threadid = mythreadid; + lk->c = 1; + return 1; + } + } + else if (EQ_OWNER(lk->threadid, mythreadid)) { + ++lk->c; + return 1; + } + return 0; +} + +#define RELEASE_LOCK(lk) recursive_release_lock(lk) +#define TRY_LOCK(lk) recursive_try_lock(lk) +#define ACQUIRE_LOCK(lk) recursive_acquire_lock(lk) +#define INITIAL_LOCK(lk) ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0) +#define DESTROY_LOCK(lk) (0) +#endif /* USE_RECURSIVE_LOCKS */ + +#elif defined(WIN32) /* Win32 critical sections */ +#define MLOCK_T CRITICAL_SECTION +#define ACQUIRE_LOCK(lk) (EnterCriticalSection(lk), 0) +#define RELEASE_LOCK(lk) LeaveCriticalSection(lk) +#define TRY_LOCK(lk) TryEnterCriticalSection(lk) +#define INITIAL_LOCK(lk) (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000|4000)) +#define DESTROY_LOCK(lk) (DeleteCriticalSection(lk), 0) +#define NEED_GLOBAL_LOCK_INIT + +static MLOCK_T malloc_global_mutex; +static volatile LONG malloc_global_mutex_status; + +/* Use spin loop to initialize global lock */ +static void init_malloc_global_mutex() { + for (;;) { + long stat = malloc_global_mutex_status; + if (stat > 0) + return; + /* transition to < 0 while initializing, then to > 0) */ + if (stat == 0 && + interlockedcompareexchange(&malloc_global_mutex_status, (LONG)-1, (LONG)0) == 0) { + InitializeCriticalSection(&malloc_global_mutex); + interlockedexchange(&malloc_global_mutex_status, (LONG)1); + return; + } + SleepEx(0, FALSE); + } +} + +#else /* pthreads-based locks */ +#define MLOCK_T pthread_mutex_t +#define ACQUIRE_LOCK(lk) pthread_mutex_lock(lk) +#define RELEASE_LOCK(lk) pthread_mutex_unlock(lk) +#define TRY_LOCK(lk) (!pthread_mutex_trylock(lk)) +#define INITIAL_LOCK(lk) pthread_init_lock(lk) +#define DESTROY_LOCK(lk) pthread_mutex_destroy(lk) + +#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE) +/* Cope with old-style linux recursive lock initialization by adding */ +/* skipped internal declaration from pthread.h */ +extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr, + int __kind)); +#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP +#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y) +#endif /* USE_RECURSIVE_LOCKS ... */ + +static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER; + +static int pthread_init_lock (MLOCK_T *lk) { + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr)) return 1; +#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 + if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1; +#endif + if (pthread_mutex_init(lk, &attr)) return 1; + if (pthread_mutexattr_destroy(&attr)) return 1; + return 0; +} + +#endif /* ... lock types ... */ + +/* Common code for all lock types */ +#define USE_LOCK_BIT (2U) + +#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK +#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); +#endif + +#ifndef RELEASE_MALLOC_GLOBAL_LOCK +#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); +#endif + +#endif /* USE_LOCKS */ + +/* ----------------------- Chunk representations ------------------------ */ + +/* + (The following includes lightly edited explanations by Colin Plumb.) + + The malloc_chunk declaration below is misleading (but accurate and + necessary). It declares a "view" into memory allowing access to + necessary fields at known offsets from a given base. + + Chunks of memory are maintained using a `boundary tag' method as + originally described by Knuth. (See the paper by Paul Wilson + ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such + techniques.) Sizes of free chunks are stored both in the front of + each chunk and at the end. This makes consolidating fragmented + chunks into bigger chunks fast. The head fields also hold bits + representing whether chunks are free or in use. + + Here are some pictures to make it clearer. They are "exploded" to + show that the state of a chunk can be thought of as extending from + the high 31 bits of the head field of its header through the + prev_foot and PINUSE_BIT bit of the following chunk header. + + A chunk that's in use looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk (if P = 0) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 1| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + +- -+ + | | + +- -+ + | : + +- size - sizeof(size_t) available payload bytes -+ + : | + chunk-> +- -+ + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| + | Size of next chunk (may or may not be in use) | +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + And if it's free, it looks like this: + + chunk-> +- -+ + | User payload (must be in use, or we would have merged!) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 0| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Prev pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- size - sizeof(struct chunk) unused bytes -+ + : | + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| + | Size of next chunk (must be in use, or we would have merged)| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- User payload -+ + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |0| + +-+ + Note that since we always merge adjacent free chunks, the chunks + adjacent to a free chunk must be in use. + + Given a pointer to a chunk (which can be derived trivially from the + payload pointer) we can, in O(1) time, find out whether the adjacent + chunks are free, and if so, unlink them from the lists that they + are on and merge them with the current chunk. + + Chunks always begin on even word boundaries, so the mem portion + (which is returned to the user) is also on an even word boundary, and + thus at least double-word aligned. + + The P (PINUSE_BIT) bit, stored in the unused low-order bit of the + chunk size (which is always a multiple of two words), is an in-use + bit for the *previous* chunk. If that bit is *clear*, then the + word before the current chunk size contains the previous chunk + size, and can be used to find the front of the previous chunk. + The very first chunk allocated always has this bit set, preventing + access to non-existent (or non-owned) memory. If pinuse is set for + any given chunk, then you CANNOT determine the size of the + previous chunk, and might even get a memory addressing fault when + trying to do so. + + The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of + the chunk size redundantly records whether the current chunk is + inuse (unless the chunk is mmapped). This redundancy enables usage + checks within free and realloc, and reduces indirection when freeing + and consolidating chunks. + + Each freshly allocated chunk must have both cinuse and pinuse set. + That is, each allocated chunk borders either a previously allocated + and still in-use chunk, or the base of its memory arena. This is + ensured by making all allocations from the `lowest' part of any + found chunk. Further, no free chunk physically borders another one, + so each free chunk is known to be preceded and followed by either + inuse chunks or the ends of memory. + + Note that the `foot' of the current chunk is actually represented + as the prev_foot of the NEXT chunk. This makes it easier to + deal with alignments etc but can be very confusing when trying + to extend or adapt this code. + + The exceptions to all this are + + 1. The special chunk `top' is the top-most available chunk (i.e., + the one bordering the end of available memory). It is treated + specially. Top is never included in any bin, is used only if + no other chunk is available, and is released back to the + system if it is very large (see M_TRIM_THRESHOLD). In effect, + the top chunk is treated as larger (and thus less well + fitting) than any other available chunk. The top chunk + doesn't update its trailing size field since there is no next + contiguous chunk that would have to index off it. However, + space is still allocated for it (TOP_FOOT_SIZE) to enable + separation or merging when space is extended. + + 3. Chunks allocated via mmap, have both cinuse and pinuse bits + cleared in their head fields. Because they are allocated + one-by-one, each must carry its own prev_foot field, which is + also used to hold the offset this chunk has within its mmapped + region, which is needed to preserve alignment. Each mmapped + chunk is trailed by the first two fields of a fake next-chunk + for sake of usage checks. + +*/ + +struct malloc_chunk { + size_t prev_foot; /* Size of previous chunk (if free). */ + size_t head; /* Size and inuse bits. */ + struct malloc_chunk* fd; /* double links -- used only if free. */ + struct malloc_chunk* bk; +}; + +typedef struct malloc_chunk mchunk; +typedef struct malloc_chunk* mchunkptr; +typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ +typedef unsigned int bindex_t; /* Described below */ +typedef unsigned int binmap_t; /* Described below */ +typedef unsigned int flag_t; /* The type of various bit flag sets */ + +/* ------------------- Chunks sizes and alignments ----------------------- */ + +#define MCHUNK_SIZE (sizeof(mchunk)) + +#if FOOTERS +#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +#else /* FOOTERS */ +#define CHUNK_OVERHEAD (SIZE_T_SIZE) +#endif /* FOOTERS */ + +/* MMapped chunks need a second word of overhead ... */ +#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +/* ... and additional padding for fake next-chunk at foot */ +#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES) + +/* The smallest size we can malloc is an aligned minimal chunk */ +#define MIN_CHUNK_SIZE\ + ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* conversion from malloc headers to user pointers, and back */ +#define chunk2mem(p) ((void*)((long)(p) + TWO_SIZE_T_SIZES)) +#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) +/* chunk associated with aligned address A */ +#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) + +/* Bounds on request (not chunk) sizes. */ +#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) +#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) + +/* pad request bytes into a usable size */ +#define pad_request(req) \ + (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* pad request, checking for minimum (but not maximum) */ +#define request2size(req) \ + (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) + + +/* ------------------ Operations on head and foot fields ----------------- */ + +/* + The head field of a chunk is or'ed with PINUSE_BIT when previous + adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in + use, unless mmapped, in which case both bits are cleared. + + FLAG4_BIT is not used by this malloc, but might be useful in extensions. +*/ + +#define PINUSE_BIT (SIZE_T_ONE) +#define CINUSE_BIT (SIZE_T_TWO) +#define FLAG4_BIT (SIZE_T_FOUR) +#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) +#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT) + +/* Head value for fenceposts */ +#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) + +/* extraction of fields from head words */ +#define cinuse(p) ((p)->head & CINUSE_BIT) +#define pinuse(p) ((p)->head & PINUSE_BIT) +#define flag4inuse(p) ((p)->head & FLAG4_BIT) +#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT) +#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0) + +#define chunksize(p) ((p)->head & ~(FLAG_BITS)) + +#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) +#define set_flag4(p) ((p)->head |= FLAG4_BIT) +#define clear_flag4(p) ((p)->head &= ~FLAG4_BIT) + +/* Treat space at ptr +/- offset as a chunk */ +#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) +#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) + +/* Ptr to next or previous physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS))) +#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) + +/* extract next chunk's pinuse bit */ +#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) + +/* Get/set size at footer */ +#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot) +#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) + +/* Set size, pinuse bit, and foot */ +#define set_size_and_pinuse_of_free_chunk(p, s)\ + ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) + +/* Set size, pinuse bit, foot, and clear next pinuse */ +#define set_free_with_pinuse(p, s, n)\ + (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) + +/* Get the internal overhead associated with chunk p */ +#define overhead_for(p)\ + (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) + +/* Return true if malloced space is not necessarily cleared */ +#if MMAP_CLEARS +#define calloc_must_clear(p) (!is_mmapped(p)) +#else /* MMAP_CLEARS */ +#define calloc_must_clear(p) (1) +#endif /* MMAP_CLEARS */ + +/* ---------------------- Overlaid data structures ----------------------- */ + +/* + When chunks are not in use, they are treated as nodes of either + lists or trees. + + "Small" chunks are stored in circular doubly-linked lists, and look + like this: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space (may be 0 bytes long) . + . . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Larger chunks are kept in a form of bitwise digital trees (aka + tries) keyed on chunksizes. Because malloc_tree_chunks are only for + free chunks greater than 256 bytes, their size doesn't impose any + constraints on user chunk sizes. Each node looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to left child (child[0]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to right child (child[1]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to parent | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | bin index of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Each tree holding treenodes is a tree of unique chunk sizes. Chunks + of the same size are arranged in a circularly-linked list, with only + the oldest chunk (the next to be used, in our FIFO ordering) + actually in the tree. (Tree members are distinguished by a non-null + parent pointer.) If a chunk with the same size an an existing node + is inserted, it is linked off the existing node using pointers that + work in the same way as fd/bk pointers of small chunks. + + Each tree contains a power of 2 sized range of chunk sizes (the + smallest is 0x100 <= x < 0x180), which is is divided in half at each + tree level, with the chunks in the smaller half of the range (0x100 + <= x < 0x140 for the top nose) in the left subtree and the larger + half (0x140 <= x < 0x180) in the right subtree. This is, of course, + done by inspecting individual bits. + + Using these rules, each node's left subtree contains all smaller + sizes than its right subtree. However, the node at the root of each + subtree has no particular ordering relationship to either. (The + dividing line between the subtree sizes is based on trie relation.) + If we remove the last chunk of a given size from the interior of the + tree, we need to replace it with a leaf node. The tree ordering + rules permit a node to be replaced by any leaf below it. + + The smallest chunk in a tree (a common operation in a best-fit + allocator) can be found by walking a path to the leftmost leaf in + the tree. Unlike a usual binary tree, where we follow left child + pointers until we reach a null, here we follow the right child + pointer any time the left one is null, until we reach a leaf with + both child pointers null. The smallest chunk in the tree will be + somewhere along that path. + + The worst case number of steps to add, find, or remove a node is + bounded by the number of bits differentiating chunks within + bins. Under current bin calculations, this ranges from 6 up to 21 + (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case + is of course much better. +*/ + +struct malloc_tree_chunk { + /* The first four fields must be compatible with malloc_chunk */ + size_t prev_foot; + size_t head; + struct malloc_tree_chunk* fd; + struct malloc_tree_chunk* bk; + + struct malloc_tree_chunk* child[2]; + struct malloc_tree_chunk* parent; + bindex_t index; +}; + +typedef struct malloc_tree_chunk tchunk; +typedef struct malloc_tree_chunk* tchunkptr; +typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ + +/* A little helper macro for trees */ +#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) + +/* ----------------------------- Segments -------------------------------- */ + +/* + Each malloc space may include non-contiguous segments, held in a + list headed by an embedded malloc_segment record representing the + top-most space. Segments also include flags holding properties of + the space. Large chunks that are directly allocated by mmap are not + included in this list. They are instead independently created and + destroyed without otherwise keeping track of them. + + Segment management mainly comes into play for spaces allocated by + MMAP. Any call to MMAP might or might not return memory that is + adjacent to an existing segment. MORECORE normally contiguously + extends the current space, so this space is almost always adjacent, + which is simpler and faster to deal with. (This is why MORECORE is + used preferentially to MMAP when both are available -- see + sys_alloc.) When allocating using MMAP, we don't use any of the + hinting mechanisms (inconsistently) supported in various + implementations of unix mmap, or distinguish reserving from + committing memory. Instead, we just ask for space, and exploit + contiguity when we get it. It is probably possible to do + better than this on some systems, but no general scheme seems + to be significantly better. + + Management entails a simpler variant of the consolidation scheme + used for chunks to reduce fragmentation -- new adjacent memory is + normally prepended or appended to an existing segment. However, + there are limitations compared to chunk consolidation that mostly + reflect the fact that segment processing is relatively infrequent + (occurring only when getting memory from system) and that we + don't expect to have huge numbers of segments: + + * Segments are not indexed, so traversal requires linear scans. (It + would be possible to index these, but is not worth the extra + overhead and complexity for most programs on most platforms.) + * New segments are only appended to old ones when holding top-most + memory; if they cannot be prepended to others, they are held in + different segments. + + Except for the top-most segment of an mstate, each segment record + is kept at the tail of its segment. Segments are added by pushing + segment records onto the list headed by &mstate.seg for the + containing mstate. + + Segment flags control allocation/merge/deallocation policies: + * If EXTERN_BIT set, then we did not allocate this segment, + and so should not try to deallocate or merge with others. + (This currently holds only for the initial segment passed + into create_mspace_with_base.) + * If USE_MMAP_BIT set, the segment may be merged with + other surrounding mmapped segments and trimmed/de-allocated + using munmap. + * If neither bit is set, then the segment was obtained using + MORECORE so can be merged with surrounding MORECORE'd segments + and deallocated/trimmed using MORECORE with negative arguments. +*/ + +struct malloc_segment { + char* base; /* base address */ + size_t size; /* allocated size */ + struct malloc_segment* next; /* ptr to next segment */ + flag_t sflags; /* mmap and extern flag */ +}; + +#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT) +#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) + +typedef struct malloc_segment msegment; +typedef struct malloc_segment* msegmentptr; + +/* ---------------------------- malloc_state ----------------------------- */ + +/* + A malloc_state holds all of the bookkeeping for a space. + The main fields are: + + Top + The topmost chunk of the currently active segment. Its size is + cached in topsize. The actual size of topmost space is + topsize+TOP_FOOT_SIZE, which includes space reserved for adding + fenceposts and segment records if necessary when getting more + space from the system. The size at which to autotrim top is + cached from mparams in trim_check, except that it is disabled if + an autotrim fails. + + Designated victim (dv) + This is the preferred chunk for servicing small requests that + don't have exact fits. It is normally the chunk split off most + recently to service another small request. Its size is cached in + dvsize. The link fields of this chunk are not maintained since it + is not kept in a bin. + + SmallBins + An array of bin headers for free chunks. These bins hold chunks + with sizes less than MIN_LARGE_SIZE bytes. Each bin contains + chunks of all the same size, spaced 8 bytes apart. To simplify + use in double-linked lists, each bin header acts as a malloc_chunk + pointing to the real first node, if it exists (else pointing to + itself). This avoids special-casing for headers. But to avoid + waste, we allocate only the fd/bk pointers of bins, and then use + repositioning tricks to treat these as the fields of a chunk. + + TreeBins + Treebins are pointers to the roots of trees holding a range of + sizes. There are 2 equally spaced treebins for each power of two + from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything + larger. + + Bin maps + There is one bit map for small bins ("smallmap") and one for + treebins ("treemap). Each bin sets its bit when non-empty, and + clears the bit when empty. Bit operations are then used to avoid + bin-by-bin searching -- nearly all "search" is done without ever + looking at bins that won't be selected. The bit maps + conservatively use 32 bits per map word, even if on 64bit system. + For a good description of some of the bit-based techniques used + here, see Henry S. Warren Jr's book "Hacker's Delight" (and + supplement at http://hackersdelight.org/). Many of these are + intended to reduce the branchiness of paths through malloc etc, as + well as to reduce the number of memory locations read or written. + + Segments + A list of segments headed by an embedded malloc_segment record + representing the initial space. + + Address check support + The least_addr field is the least address ever obtained from + MORECORE or MMAP. Attempted frees and reallocs of any address less + than this are trapped (unless INSECURE is defined). + + Magic tag + A cross-check field that should always hold same value as mparams.magic. + + Max allowed footprint + The maximum allowed bytes to allocate from system (zero means no limit) + + Flags + Bits recording whether to use MMAP, locks, or contiguous MORECORE + + Statistics + Each space keeps track of current and maximum system memory + obtained via MORECORE or MMAP. + + Trim support + Fields holding the amount of unused topmost memory that should trigger + trimming, and a counter to force periodic scanning to release unused + non-topmost segments. + + Locking + If USE_LOCKS is defined, the "mutex" lock is acquired and released + around every public call using this mspace. + + Extension support + A void* pointer and a size_t field that can be used to help implement + extensions to this malloc. +*/ + +/* Bin types, widths and sizes */ +#define NSMALLBINS (32U) +#define NTREEBINS (32U) +#define SMALLBIN_SHIFT (3U) +#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) +#define TREEBIN_SHIFT (8U) +#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) +#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) +#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) + +struct malloc_state { + binmap_t smallmap; + binmap_t treemap; + size_t dvsize; + size_t topsize; + char* least_addr; + mchunkptr dv; + mchunkptr top; + size_t trim_check; + size_t release_checks; + size_t magic; + mchunkptr smallbins[(NSMALLBINS+1)*2]; + tbinptr treebins[NTREEBINS]; + size_t footprint; + size_t max_footprint; + size_t footprint_limit; /* zero means no limit */ + flag_t mflags; +#if USE_LOCKS + MLOCK_T mutex; /* locate lock among fields that rarely change */ +#endif /* USE_LOCKS */ + msegment seg; + void* extp; /* Unused but available for extensions */ + size_t exts; +}; + +typedef struct malloc_state* mstate; + +/* ------------- Global malloc_state and malloc_params ------------------- */ + +/* + malloc_params holds global properties, including those that can be + dynamically set using mallopt. There is a single instance, mparams, + initialized in init_mparams. Note that the non-zeroness of "magic" + also serves as an initialization flag. +*/ + +struct malloc_params { + size_t magic; + size_t page_size; + size_t granularity; + size_t mmap_threshold; + size_t trim_threshold; + flag_t default_mflags; +}; + +static struct malloc_params mparams; + +/* Ensure mparams initialized */ +#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams()) + +#if !ONLY_MSPACES + +/* The global malloc_state used for all non-"mspace" calls */ +static struct malloc_state _gm_; +#define gm (&_gm_) +#define is_global(M) ((M) == &_gm_) + +#endif /* !ONLY_MSPACES */ + +#define is_initialized(M) ((M)->top != 0) + +/* -------------------------- system alloc setup ------------------------- */ + +/* Operations on mflags */ + +#define use_lock(M) ((M)->mflags & USE_LOCK_BIT) +#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT) +#if USE_LOCKS +#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT) +#else +#define disable_lock(M) +#endif + +#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT) +#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT) +#if HAVE_MMAP +#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT) +#else +#define disable_mmap(M) +#endif + +#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT) +#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT) + +#define set_lock(M,L)\ + ((M)->mflags = (L)?\ + ((M)->mflags | USE_LOCK_BIT) :\ + ((M)->mflags & ~USE_LOCK_BIT)) + +/* page-align a size */ +#define page_align(S)\ + (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE)) + +/* granularity-align a size */ +#define granularity_align(S)\ + (((S) + (mparams.granularity - SIZE_T_ONE))\ + & ~(mparams.granularity - SIZE_T_ONE)) + + +/* For mmap, use granularity alignment on windows, else page-align */ +#ifdef WIN32 +#define mmap_align(S) granularity_align(S) +#else +#define mmap_align(S) page_align(S) +#endif + +/* For sys_alloc, enough padding to ensure can malloc request on success */ +#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) + +#define is_page_aligned(S)\ + (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) +#define is_granularity_aligned(S)\ + (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) + +/* True if segment S holds address A */ +#define segment_holds(S, A)\ + ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) + +/* Return segment holding given address */ +static msegmentptr segment_holding(mstate m, char* addr) { + msegmentptr sp = &m->seg; + for (;;) { + if (addr >= sp->base && addr < sp->base + sp->size) + return sp; + if ((sp = sp->next) == 0) + return 0; + } +} + +/* Return true if segment contains a segment link */ +static int has_segment_link(mstate m, msegmentptr ss) { + msegmentptr sp = &m->seg; + for (;;) { + if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) + return 1; + if ((sp = sp->next) == 0) + return 0; + } +} + +#ifndef MORECORE_CANNOT_TRIM +#define should_trim(M,s) ((s) > (M)->trim_check) +#else /* MORECORE_CANNOT_TRIM */ +#define should_trim(M,s) (0) +#endif /* MORECORE_CANNOT_TRIM */ + +/* + TOP_FOOT_SIZE is padding at the end of a segment, including space + that may be needed to place segment records and fenceposts when new + noncontiguous segments are added. +*/ +#define TOP_FOOT_SIZE\ + (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + + +/* ------------------------------- Hooks -------------------------------- */ + +/* + PREACTION should be defined to return 0 on success, and nonzero on + failure. If you are not using locking, you can redefine these to do + anything you like. +*/ + +#if USE_LOCKS +#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) +#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } +#else /* USE_LOCKS */ + +#ifndef PREACTION +#define PREACTION(M) (0) +#endif /* PREACTION */ + +#ifndef POSTACTION +#define POSTACTION(M) +#endif /* POSTACTION */ + +#endif /* USE_LOCKS */ + +/* + CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. + USAGE_ERROR_ACTION is triggered on detected bad frees and + reallocs. The argument p is an address that might have triggered the + fault. It is ignored by the two predefined actions, but might be + useful in custom actions that try to help diagnose errors. +*/ + +#if PROCEED_ON_ERROR + +/* A count of the number of corruption errors causing resets */ +int malloc_corruption_error_count; + +/* default corruption action */ +static void reset_on_error(mstate m); + +#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m) +#define USAGE_ERROR_ACTION(m, p) + +#else /* PROCEED_ON_ERROR */ + +#ifndef CORRUPTION_ERROR_ACTION +#define CORRUPTION_ERROR_ACTION(m) ABORT +#endif /* CORRUPTION_ERROR_ACTION */ + +#ifndef USAGE_ERROR_ACTION +#define USAGE_ERROR_ACTION(m,p) ABORT +#endif /* USAGE_ERROR_ACTION */ + +#endif /* PROCEED_ON_ERROR */ + + +/* -------------------------- Debugging setup ---------------------------- */ + +#if ! DEBUG + +#define check_free_chunk(M,P) +#define check_inuse_chunk(M,P) +#define check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) +#define check_malloc_state(M) +#define check_top_chunk(M,P) + +#else /* DEBUG */ +#define check_free_chunk(M,P) do_check_free_chunk(M,P) +#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P) +#define check_top_chunk(M,P) do_check_top_chunk(M,P) +#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P) +#define check_malloc_state(M) do_check_malloc_state(M) + +static void do_check_any_chunk(mstate m, mchunkptr p); +static void do_check_top_chunk(mstate m, mchunkptr p); +static void do_check_mmapped_chunk(mstate m, mchunkptr p); +static void do_check_inuse_chunk(mstate m, mchunkptr p); +static void do_check_free_chunk(mstate m, mchunkptr p); +static void do_check_malloced_chunk(mstate m, void* mem, size_t s); +static void do_check_tree(mstate m, tchunkptr t); +static void do_check_treebin(mstate m, bindex_t i); +static void do_check_smallbin(mstate m, bindex_t i); +static void do_check_malloc_state(mstate m); +static int bin_find(mstate m, mchunkptr x); +static size_t traverse_and_check(mstate m); +#endif /* DEBUG */ + +/* ---------------------------- Indexing Bins ---------------------------- */ + +#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) +#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT) +#define small_index2size(i) ((i) << SMALLBIN_SHIFT) +#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) + +/* addressing by index. See above about smallbin repositioning */ +#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) +#define treebin_at(M,i) (&((M)->treebins[i])) + +/* assign tree index for size S to variable I. Use x86 asm if possible */ +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define compute_tree_index(S, I)\ +{\ + unsigned int X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); \ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined (__INTEL_COMPILER) +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K = _bit_scan_reverse (X); \ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K;\ + _BitScanReverse((DWORD *) &K, (DWORD) X);\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#else /* GNUC */ +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int Y = (unsigned int)X;\ + unsigned int N = ((Y - 0x100) >> 16) & 8;\ + unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ + N += K;\ + N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ + K = 14 - N + ((Y <<= K) >> 15);\ + I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ + }\ +} +#endif /* GNUC */ + +/* Bit representing maximum resolved size in a treebin at i */ +#define bit_for_tree_index(i) \ + (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) + +/* Shift placing maximum resolved bit in a treebin at i as sign bit */ +#define leftshift_for_tree_index(i) \ + ((i == NTREEBINS-1)? 0 : \ + ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) + +/* The size of the smallest chunk held in bin with index i */ +#define minsize_for_tree_index(i) \ + ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ + (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) + + +/* ------------------------ Operations on bin maps ----------------------- */ + +/* bit corresponding to given index */ +#define idx2bit(i) ((binmap_t)(1) << (i)) + +/* Mark/Clear bits with given index */ +#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) +#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) +#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) + +#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) +#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) +#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) + +/* isolate the least set bit of a bitmap */ +#define least_bit(x) ((x) & -(x)) + +/* mask with all bits to left of least bit of x on */ +#define left_bits(x) ((x<<1) | -(x<<1)) + +/* mask with all bits to left of or equal to least bit of x on */ +#define same_or_left_bits(x) ((x) | -(x)) + +/* index corresponding to given bit. Use x86 asm if possible */ + +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + J = __builtin_ctz(X); \ + I = (bindex_t)J;\ +} + +#elif defined (__INTEL_COMPILER) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + J = _bit_scan_forward (X); \ + I = (bindex_t)J;\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + _BitScanForward((DWORD *) &J, X);\ + I = (bindex_t)J;\ +} + +#elif USE_BUILTIN_FFS +#define compute_bit2idx(X, I) I = ffs(X)-1 + +#else +#define compute_bit2idx(X, I)\ +{\ + unsigned int Y = X - 1;\ + unsigned int K = Y >> (16-4) & 16;\ + unsigned int N = K; Y >>= K;\ + N += K = Y >> (8-3) & 8; Y >>= K;\ + N += K = Y >> (4-2) & 4; Y >>= K;\ + N += K = Y >> (2-1) & 2; Y >>= K;\ + N += K = Y >> (1-0) & 1; Y >>= K;\ + I = (bindex_t)(N + Y);\ +} +#endif /* GNUC */ + + +/* ----------------------- Runtime Check Support ------------------------- */ + +/* + For security, the main invariant is that malloc/free/etc never + writes to a static address other than malloc_state, unless static + malloc_state itself has been corrupted, which cannot occur via + malloc (because of these checks). In essence this means that we + believe all pointers, sizes, maps etc held in malloc_state, but + check all of those linked or offsetted from other embedded data + structures. These checks are interspersed with main code in a way + that tends to minimize their run-time cost. + + When FOOTERS is defined, in addition to range checking, we also + verify footer fields of inuse chunks, which can be used guarantee + that the mstate controlling malloc/free is intact. This is a + streamlined version of the approach described by William Robertson + et al in "Run-time Detection of Heap-based Overflows" LISA'03 + http://www.usenix.org/events/lisa03/tech/robertson.html The footer + of an inuse chunk holds the xor of its mstate and a random seed, + that is checked upon calls to free() and realloc(). This is + (probabalistically) unguessable from outside the program, but can be + computed by any code successfully malloc'ing any chunk, so does not + itself provide protection against code that has already broken + security through some other means. Unlike Robertson et al, we + always dynamically check addresses of all offset chunks (previous, + next, etc). This turns out to be cheaper than relying on hashes. +*/ + +#if !INSECURE +/* Check if address a is at least as high as any from MORECORE or MMAP */ +#define ok_address(M, a) ((char*)(a) >= (M)->least_addr) +/* Check if address of next chunk n is higher than base chunk p */ +#define ok_next(p, n) ((char*)(p) < (char*)(n)) +/* Check if p has inuse status */ +#define ok_inuse(p) is_inuse(p) +/* Check if p has its pinuse bit on */ +#define ok_pinuse(p) pinuse(p) + +#else /* !INSECURE */ +#define ok_address(M, a) (1) +#define ok_next(b, n) (1) +#define ok_inuse(p) (1) +#define ok_pinuse(p) (1) +#endif /* !INSECURE */ + +#if (FOOTERS && !INSECURE) +/* Check if (alleged) mstate m has expected magic field */ +#define ok_magic(M) ((M)->magic == mparams.magic) +#else /* (FOOTERS && !INSECURE) */ +#define ok_magic(M) (1) +#endif /* (FOOTERS && !INSECURE) */ + +/* In gcc, use __builtin_expect to minimize impact of checks */ +#if !INSECURE +#if defined(__GNUC__) && __GNUC__ >= 3 +#define RTCHECK(e) __builtin_expect(e, 1) +#else /* GNUC */ +#define RTCHECK(e) (e) +#endif /* GNUC */ +#else /* !INSECURE */ +#define RTCHECK(e) (1) +#endif /* !INSECURE */ + +/* macros to set up inuse chunks with or without footers */ + +#if !FOOTERS + +#define mark_inuse_foot(M,p,s) + +/* Macros for setting head/foot of non-mmapped chunks */ + +/* Set cinuse bit and pinuse bit of next chunk */ +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set size, cinuse and pinuse bit of this chunk */ +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) + +#else /* FOOTERS */ + +/* Set foot of inuse chunk to be xor of mstate and seed */ +#define mark_inuse_foot(M,p,s)\ + (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) + +#define get_mstate_for(p)\ + ((mstate)(((mchunkptr)((char*)(p) +\ + (chunksize(p))))->prev_foot ^ mparams.magic)) + +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ + mark_inuse_foot(M,p,s)) + +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ + mark_inuse_foot(M,p,s)) + +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + mark_inuse_foot(M, p, s)) + +#endif /* !FOOTERS */ + +/* ---------------------------- setting mparams -------------------------- */ + +#if LOCK_AT_FORK +static void pre_fork(void) { ACQUIRE_LOCK(&(gm)->mutex); } +static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->mutex); } +static void post_fork_child(void) { INITIAL_LOCK(&(gm)->mutex); } +#endif /* LOCK_AT_FORK */ + +/* Initialize mparams */ +static int init_mparams(void) { +#ifdef NEED_GLOBAL_LOCK_INIT + if (malloc_global_mutex_status <= 0) + init_malloc_global_mutex(); +#endif + + ACQUIRE_MALLOC_GLOBAL_LOCK(); + if (mparams.magic == 0) { + size_t magic; + size_t psize; + size_t gsize; + +#ifndef WIN32 + psize = malloc_getpagesize; + gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); +#else /* WIN32 */ + { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + psize = system_info.dwPageSize; + gsize = ((DEFAULT_GRANULARITY != 0)? + DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); + } +#endif /* WIN32 */ + + /* Sanity-check configuration: + size_t must be unsigned and as wide as pointer type. + ints must be at least 4 bytes. + alignment must be at least 8. + Alignment, min chunk size, and page size must all be powers of 2. + */ + if ((sizeof(size_t) != sizeof(char*)) || + (MAX_SIZE_T < MIN_CHUNK_SIZE) || + (sizeof(int) < 4) || + (MALLOC_ALIGNMENT < (size_t)8U) || + ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || + ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || + ((gsize & (gsize-SIZE_T_ONE)) != 0) || + ((psize & (psize-SIZE_T_ONE)) != 0)) + ABORT; + mparams.granularity = gsize; + mparams.page_size = psize; + mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; + mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; +#if MORECORE_CONTIGUOUS + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; +#else /* MORECORE_CONTIGUOUS */ + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; +#endif /* MORECORE_CONTIGUOUS */ + +#if !ONLY_MSPACES + /* Set up lock for main malloc area */ + gm->mflags = mparams.default_mflags; + (void)INITIAL_LOCK(&gm->mutex); +#endif +#if LOCK_AT_FORK + pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child); +#endif + + { +#if USE_DEV_RANDOM + int fd; + unsigned char buf[sizeof(size_t)]; + /* Try to use /dev/urandom, else fall back on using time */ + if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && + read(fd, buf, sizeof(buf)) == sizeof(buf)) { + magic = *((size_t *) buf); + close(fd); + } + else +#endif /* USE_DEV_RANDOM */ +#ifdef WIN32 + magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); +#elif defined(LACKS_TIME_H) + magic = (size_t)&magic ^ (size_t)0x55555555U; +#else + magic = (size_t)(time(0) ^ (size_t)0x55555555U); +#endif + magic |= (size_t)8U; /* ensure nonzero */ + magic &= ~(size_t)7U; /* improve chances of fault for bad values */ + /* Until memory modes commonly available, use volatile-write */ + (*(volatile size_t *)(&(mparams.magic))) = magic; + } + } + + RELEASE_MALLOC_GLOBAL_LOCK(); + return 1; +} + +/* support for mallopt */ +static int change_mparam(int param_number, int value) { + size_t val; + ensure_initialization(); + val = (value == -1)? MAX_SIZE_T : (size_t)value; + switch(param_number) { + case M_TRIM_THRESHOLD: + mparams.trim_threshold = val; + return 1; + case M_GRANULARITY: + if (val >= mparams.page_size && ((val & (val-1)) == 0)) { + mparams.granularity = val; + return 1; + } + else + return 0; + case M_MMAP_THRESHOLD: + mparams.mmap_threshold = val; + return 1; + default: + return 0; + } +} + +#if DEBUG +/* ------------------------- Debugging Support --------------------------- */ + +/* Check properties of any chunk, whether free, inuse, mmapped etc */ +static void do_check_any_chunk(mstate m, mchunkptr p) { + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); +} + +/* Check properties of top chunk */ +static void do_check_top_chunk(mstate m, mchunkptr p) { + msegmentptr sp = segment_holding(m, (char*)p); + size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */ + assert(sp != 0); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(sz == m->topsize); + assert(sz > 0); + assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); + assert(pinuse(p)); + assert(!pinuse(chunk_plus_offset(p, sz))); +} + +/* Check properties of (inuse) mmapped chunks */ +static void do_check_mmapped_chunk(mstate m, mchunkptr p) { + size_t sz = chunksize(p); + size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD); + assert(is_mmapped(p)); + assert(use_mmap(m)); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(!is_small(sz)); + assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); + assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); + assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); +} + +/* Check properties of inuse chunks */ +static void do_check_inuse_chunk(mstate m, mchunkptr p) { + do_check_any_chunk(m, p); + assert(is_inuse(p)); + assert(next_pinuse(p)); + /* If not pinuse and not mmapped, previous chunk has OK offset */ + assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); + if (is_mmapped(p)) + do_check_mmapped_chunk(m, p); +} + +/* Check properties of free chunks */ +static void do_check_free_chunk(mstate m, mchunkptr p) { + size_t sz = chunksize(p); + mchunkptr next = chunk_plus_offset(p, sz); + do_check_any_chunk(m, p); + assert(!is_inuse(p)); + assert(!next_pinuse(p)); + assert (!is_mmapped(p)); + if (p != m->dv && p != m->top) { + if (sz >= MIN_CHUNK_SIZE) { + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(is_aligned(chunk2mem(p))); + assert(next->prev_foot == sz); + assert(pinuse(p)); + assert (next == m->top || is_inuse(next)); + assert(p->fd->bk == p); + assert(p->bk->fd == p); + } + else /* markers are always of size SIZE_T_SIZE */ + assert(sz == SIZE_T_SIZE); + } +} + +/* Check properties of malloced chunks at the point they are malloced */ +static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + size_t sz = p->head & ~INUSE_BITS; + do_check_inuse_chunk(m, p); + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(sz >= MIN_CHUNK_SIZE); + assert(sz >= s); + /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ + assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); + } +} + +/* Check a tree and its subtrees. */ +static void do_check_tree(mstate m, tchunkptr t) { + tchunkptr head = 0; + tchunkptr u = t; + bindex_t tindex = t->index; + size_t tsize = chunksize(t); + bindex_t idx; + compute_tree_index(tsize, idx); + assert(tindex == idx); + assert(tsize >= MIN_LARGE_SIZE); + assert(tsize >= minsize_for_tree_index(idx)); + assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); + + do { /* traverse through chain of same-sized nodes */ + do_check_any_chunk(m, ((mchunkptr)u)); + assert(u->index == tindex); + assert(chunksize(u) == tsize); + assert(!is_inuse(u)); + assert(!next_pinuse(u)); + assert(u->fd->bk == u); + assert(u->bk->fd == u); + if (u->parent == 0) { + assert(u->child[0] == 0); + assert(u->child[1] == 0); + } + else { + assert(head == 0); /* only one node on chain has parent */ + head = u; + assert(u->parent != u); + assert (u->parent->child[0] == u || + u->parent->child[1] == u || + *((tbinptr*)(u->parent)) == u); + if (u->child[0] != 0) { + assert(u->child[0]->parent == u); + assert(u->child[0] != u); + do_check_tree(m, u->child[0]); + } + if (u->child[1] != 0) { + assert(u->child[1]->parent == u); + assert(u->child[1] != u); + do_check_tree(m, u->child[1]); + } + if (u->child[0] != 0 && u->child[1] != 0) { + assert(chunksize(u->child[0]) < chunksize(u->child[1])); + } + } + u = u->fd; + } while (u != t); + assert(head != 0); +} + +/* Check all the chunks in a treebin. */ +static void do_check_treebin(mstate m, bindex_t i) { + tbinptr* tb = treebin_at(m, i); + tchunkptr t = *tb; + int empty = (m->treemap & (1U << i)) == 0; + if (t == 0) + assert(empty); + if (!empty) + do_check_tree(m, t); +} + +/* Check all the chunks in a smallbin. */ +static void do_check_smallbin(mstate m, bindex_t i) { + sbinptr b = smallbin_at(m, i); + mchunkptr p = b->bk; + unsigned int empty = (m->smallmap & (1U << i)) == 0; + if (p == b) + assert(empty); + if (!empty) { + for (; p != b; p = p->bk) { + size_t size = chunksize(p); + mchunkptr q; + /* each chunk claims to be free */ + do_check_free_chunk(m, p); + /* chunk belongs in bin */ + assert(small_index(size) == i); + assert(p->bk == b || chunksize(p->bk) == chunksize(p)); + /* chunk is followed by an inuse chunk */ + q = next_chunk(p); + if (q->head != FENCEPOST_HEAD) + do_check_inuse_chunk(m, q); + } + } +} + +/* Find x in a bin. Used in other check functions. */ +static int bin_find(mstate m, mchunkptr x) { + size_t size = chunksize(x); + if (is_small(size)) { + bindex_t sidx = small_index(size); + sbinptr b = smallbin_at(m, sidx); + if (smallmap_is_marked(m, sidx)) { + mchunkptr p = b; + do { + if (p == x) + return 1; + } while ((p = p->fd) != b); + } + } + else { + bindex_t tidx; + compute_tree_index(size, tidx); + if (treemap_is_marked(m, tidx)) { + tchunkptr t = *treebin_at(m, tidx); + size_t sizebits = size << leftshift_for_tree_index(tidx); + while (t != 0 && chunksize(t) != size) { + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + sizebits <<= 1; + } + if (t != 0) { + tchunkptr u = t; + do { + if (u == (tchunkptr)x) + return 1; + } while ((u = u->fd) != t); + } + } + } + return 0; +} + +/* Traverse each chunk and check it; return total */ +static size_t traverse_and_check(mstate m) { + size_t sum = 0; + if (is_initialized(m)) { + msegmentptr s = &m->seg; + sum += m->topsize + TOP_FOOT_SIZE; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + mchunkptr lastq = 0; + assert(pinuse(q)); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + sum += chunksize(q); + if (is_inuse(q)) { + assert(!bin_find(m, q)); + do_check_inuse_chunk(m, q); + } + else { + assert(q == m->dv || bin_find(m, q)); + assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */ + do_check_free_chunk(m, q); + } + lastq = q; + q = next_chunk(q); + } + s = s->next; + } + } + return sum; +} + + +/* Check all properties of malloc_state. */ +static void do_check_malloc_state(mstate m) { + bindex_t i; + size_t total; + /* check bins */ + for (i = 0; i < NSMALLBINS; ++i) + do_check_smallbin(m, i); + for (i = 0; i < NTREEBINS; ++i) + do_check_treebin(m, i); + + if (m->dvsize != 0) { /* check dv chunk */ + do_check_any_chunk(m, m->dv); + assert(m->dvsize == chunksize(m->dv)); + assert(m->dvsize >= MIN_CHUNK_SIZE); + assert(bin_find(m, m->dv) == 0); + } + + if (m->top != 0) { /* check top chunk */ + do_check_top_chunk(m, m->top); + /*assert(m->topsize == chunksize(m->top)); redundant */ + assert(m->topsize > 0); + assert(bin_find(m, m->top) == 0); + } + + total = traverse_and_check(m); + assert(total <= m->footprint); + assert(m->footprint <= m->max_footprint); +} +#endif /* DEBUG */ + +/* ----------------------------- statistics ------------------------------ */ + +#if !NO_MALLINFO +static struct mallinfo internal_mallinfo(mstate m) { + struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + ensure_initialization(); + if (!PREACTION(m)) { + check_malloc_state(m); + if (is_initialized(m)) { + size_t nfree = SIZE_T_ONE; /* top always free */ + size_t mfree = m->topsize + TOP_FOOT_SIZE; + size_t sum = mfree; + msegmentptr s = &m->seg; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + size_t sz = chunksize(q); + sum += sz; + if (!is_inuse(q)) { + mfree += sz; + ++nfree; + } + q = next_chunk(q); + } + s = s->next; + } + + nm.arena = sum; + nm.ordblks = nfree; + nm.hblkhd = m->footprint - sum; + nm.usmblks = m->max_footprint; + nm.uordblks = m->footprint - mfree; + nm.fordblks = mfree; + nm.keepcost = m->topsize; + } + + POSTACTION(m); + } + return nm; +} +#endif /* !NO_MALLINFO */ + +#if !NO_MALLOC_STATS +static void internal_malloc_stats(mstate m) { + ensure_initialization(); + if (!PREACTION(m)) { + size_t maxfp = 0; + size_t fp = 0; + size_t used = 0; + check_malloc_state(m); + if (is_initialized(m)) { + msegmentptr s = &m->seg; + maxfp = m->max_footprint; + fp = m->footprint; + used = fp - (m->topsize + TOP_FOOT_SIZE); + + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + if (!is_inuse(q)) + used -= chunksize(q); + q = next_chunk(q); + } + s = s->next; + } + } + POSTACTION(m); /* drop lock */ + fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); + fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp)); + fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used)); + } +} +#endif /* NO_MALLOC_STATS */ + +/* ----------------------- Operations on smallbins ----------------------- */ + +/* + Various forms of linking and unlinking are defined as macros. Even + the ones for trees, which are very long but have very short typical + paths. This is ugly but reduces reliance on inlining support of + compilers. +*/ + +/* Link a free chunk into a smallbin */ +#define insert_small_chunk(M, P, S) {\ + bindex_t I = small_index(S);\ + mchunkptr B = smallbin_at(M, I);\ + mchunkptr F = B;\ + assert(S >= MIN_CHUNK_SIZE);\ + if (!smallmap_is_marked(M, I))\ + mark_smallmap(M, I);\ + else if (RTCHECK(ok_address(M, B->fd)))\ + F = B->fd;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + B->fd = P;\ + F->bk = P;\ + P->fd = F;\ + P->bk = B;\ +} + +/* Unlink a chunk from a smallbin */ +#define unlink_small_chunk(M, P, S) {\ + mchunkptr F = P->fd;\ + mchunkptr B = P->bk;\ + bindex_t I = small_index(S);\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (RTCHECK(F == smallbin_at(M,I) || (ok_address(M, F) && F->bk == P))) { \ + if (B == F) {\ + clear_smallmap(M, I);\ + }\ + else if (RTCHECK(B == smallbin_at(M,I) ||\ + (ok_address(M, B) && B->fd == P))) {\ + F->bk = B;\ + B->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + +/* Unlink the first chunk from a smallbin */ +#define unlink_first_small_chunk(M, B, P, I) {\ + mchunkptr F = P->fd;\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (B == F) {\ + clear_smallmap(M, I);\ + }\ + else if (RTCHECK(ok_address(M, F) && F->bk == P)) {\ + F->bk = B;\ + B->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + +/* Replace dv node, binning the old one */ +/* Used only when dvsize known to be small */ +#define replace_dv(M, P, S) {\ + size_t DVS = M->dvsize;\ + assert(is_small(DVS));\ + if (DVS != 0) {\ + mchunkptr DV = M->dv;\ + insert_small_chunk(M, DV, DVS);\ + }\ + M->dvsize = S;\ + M->dv = P;\ +} + +/* ------------------------- Operations on trees ------------------------- */ + +/* Insert chunk into tree */ +#define insert_large_chunk(M, X, S) {\ + tbinptr* H;\ + bindex_t I;\ + compute_tree_index(S, I);\ + H = treebin_at(M, I);\ + X->index = I;\ + X->child[0] = X->child[1] = 0;\ + if (!treemap_is_marked(M, I)) {\ + mark_treemap(M, I);\ + *H = X;\ + X->parent = (tchunkptr)H;\ + X->fd = X->bk = X;\ + }\ + else {\ + tchunkptr T = *H;\ + size_t K = S << leftshift_for_tree_index(I);\ + for (;;) {\ + if (chunksize(T) != S) {\ + tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ + K <<= 1;\ + if (*C != 0)\ + T = *C;\ + else if (RTCHECK(ok_address(M, C))) {\ + *C = X;\ + X->parent = T;\ + X->fd = X->bk = X;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + else {\ + tchunkptr F = T->fd;\ + if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ + T->fd = F->bk = X;\ + X->fd = F;\ + X->bk = T;\ + X->parent = 0;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + }\ + }\ +} + +/* + Unlink steps: + + 1. If x is a chained node, unlink it from its same-sized fd/bk links + and choose its bk node as its replacement. + 2. If x was the last node of its size, but not a leaf node, it must + be replaced with a leaf node (not merely one with an open left or + right), to make sure that lefts and rights of descendents + correspond properly to bit masks. We use the rightmost descendent + of x. We could use any other leaf, but this is easy to locate and + tends to counteract removal of leftmosts elsewhere, and so keeps + paths shorter than minimally guaranteed. This doesn't loop much + because on average a node in a tree is near the bottom. + 3. If x is the base of a chain (i.e., has parent links) relink + x's parent and children to x's replacement (or null if none). +*/ + +#define unlink_large_chunk(M, X) {\ + tchunkptr XP = X->parent;\ + tchunkptr R;\ + if (X->bk != X) {\ + tchunkptr F = X->fd;\ + R = X->bk;\ + if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) {\ + F->bk = R;\ + R->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else {\ + tchunkptr* RP;\ + if (((R = *(RP = &(X->child[1]))) != 0) ||\ + ((R = *(RP = &(X->child[0]))) != 0)) {\ + tchunkptr* CP;\ + while ((*(CP = &(R->child[1])) != 0) ||\ + (*(CP = &(R->child[0])) != 0)) {\ + R = *(RP = CP);\ + }\ + if (RTCHECK(ok_address(M, RP)))\ + *RP = 0;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + }\ + if (XP != 0) {\ + tbinptr* H = treebin_at(M, X->index);\ + if (X == *H) {\ + if ((*H = R) == 0) \ + clear_treemap(M, X->index);\ + }\ + else if (RTCHECK(ok_address(M, XP))) {\ + if (XP->child[0] == X) \ + XP->child[0] = R;\ + else \ + XP->child[1] = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + if (R != 0) {\ + if (RTCHECK(ok_address(M, R))) {\ + tchunkptr C0, C1;\ + R->parent = XP;\ + if ((C0 = X->child[0]) != 0) {\ + if (RTCHECK(ok_address(M, C0))) {\ + R->child[0] = C0;\ + C0->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + if ((C1 = X->child[1]) != 0) {\ + if (RTCHECK(ok_address(M, C1))) {\ + R->child[1] = C1;\ + C1->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ +} + +/* Relays to large vs small bin operations */ + +#define insert_chunk(M, P, S)\ + if (is_small(S)) insert_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } + +#define unlink_chunk(M, P, S)\ + if (is_small(S)) unlink_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } + + +/* Relays to internal calls to malloc/free from realloc, memalign etc */ + +#if ONLY_MSPACES +#define internal_malloc(m, b) mspace_malloc(m, b) +#define internal_free(m, mem) mspace_free(m,mem); +#else /* ONLY_MSPACES */ +#if MSPACES +#define internal_malloc(m, b)\ + ((m == gm)? dlmalloc(b) : mspace_malloc(m, b)) +#define internal_free(m, mem)\ + if (m == gm) dlfree(mem); else mspace_free(m,mem); +#else /* MSPACES */ +#define internal_malloc(m, b) dlmalloc(b) +#define internal_free(m, mem) dlfree(mem) +#endif /* MSPACES */ +#endif /* ONLY_MSPACES */ + +/* ----------------------- Direct-mmapping chunks ----------------------- */ + +/* + Directly mmapped chunks are set up with an offset to the start of + the mmapped region stored in the prev_foot field of the chunk. This + allows reconstruction of the required argument to MUNMAP when freed, + and also allows adjustment of the returned chunk to meet alignment + requirements (especially in memalign). +*/ + +/* Malloc using mmap */ +static void* mmap_alloc(mstate m, size_t nb) { + size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + if (m->footprint_limit != 0) { + size_t fp = m->footprint + mmsize; + if (fp <= m->footprint || fp > m->footprint_limit) + return 0; + } + if (mmsize > nb) { /* Check for wrap around 0 */ + char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); + if (mm != CMFAIL) { + size_t offset = align_offset(chunk2mem(mm)); + size_t psize = mmsize - offset - MMAP_FOOT_PAD; + mchunkptr p = (mchunkptr)(mm + offset); + p->prev_foot = offset; + p->head = psize; + mark_inuse_foot(m, p, psize); + chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; + + if (m->least_addr == 0 || mm < m->least_addr) + m->least_addr = mm; + if ((m->footprint += mmsize) > m->max_footprint) + m->max_footprint = m->footprint; + assert(is_aligned(chunk2mem(p))); + check_mmapped_chunk(m, p); + return chunk2mem(p); + } + } + return 0; +} + +/* Realloc using mmap */ +static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) { + size_t oldsize = chunksize(oldp); + (void)flags; /* placate people compiling -Wunused */ + if (is_small(nb)) /* Can't shrink mmap regions below small size */ + return 0; + /* Keep old chunk if big enough but not too big */ + if (oldsize >= nb + SIZE_T_SIZE && + (oldsize - nb) <= (mparams.granularity << 1)) + return oldp; + else { + size_t offset = oldp->prev_foot; + size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; + size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + char* cp = (char*)CALL_MREMAP((char*)oldp - offset, + oldmmsize, newmmsize, flags); + if (cp != CMFAIL) { + mchunkptr newp = (mchunkptr)(cp + offset); + size_t psize = newmmsize - offset - MMAP_FOOT_PAD; + newp->head = psize; + mark_inuse_foot(m, newp, psize); + chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; + + if (cp < m->least_addr) + m->least_addr = cp; + if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) + m->max_footprint = m->footprint; + check_mmapped_chunk(m, newp); + return newp; + } + } + return 0; +} + + +/* -------------------------- mspace management -------------------------- */ + +/* Initialize top chunk and its size */ +static void init_top(mstate m, mchunkptr p, size_t psize) { + /* Ensure alignment */ + size_t offset = align_offset(chunk2mem(p)); + p = (mchunkptr)((char*)p + offset); + psize -= offset; + + m->top = p; + m->topsize = psize; + p->head = psize | PINUSE_BIT; + /* set size of fake trailing chunk holding overhead space only once */ + chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; + m->trim_check = mparams.trim_threshold; /* reset on each update */ +} + +/* Initialize bins for a new mstate that is otherwise zeroed out */ +static void init_bins(mstate m) { + /* Establish circular links for smallbins */ + bindex_t i; + for (i = 0; i < NSMALLBINS; ++i) { + sbinptr bin = smallbin_at(m,i); + bin->fd = bin->bk = bin; + } +} + +#if PROCEED_ON_ERROR + +/* default corruption action */ +static void reset_on_error(mstate m) { + int i; + ++malloc_corruption_error_count; + /* Reinitialize fields to forget about all memory */ + m->smallmap = m->treemap = 0; + m->dvsize = m->topsize = 0; + m->seg.base = 0; + m->seg.size = 0; + m->seg.next = 0; + m->top = m->dv = 0; + for (i = 0; i < NTREEBINS; ++i) + *treebin_at(m, i) = 0; + init_bins(m); +} +#endif /* PROCEED_ON_ERROR */ + +/* Allocate chunk and prepend remainder with chunk in successor base. */ +static void* prepend_alloc(mstate m, char* newbase, char* oldbase, + size_t nb) { + mchunkptr p = align_as_chunk(newbase); + mchunkptr oldfirst = align_as_chunk(oldbase); + size_t psize = (char*)oldfirst - (char*)p; + mchunkptr q = chunk_plus_offset(p, nb); + size_t qsize = psize - nb; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + + assert((char*)oldfirst > (char*)q); + assert(pinuse(oldfirst)); + assert(qsize >= MIN_CHUNK_SIZE); + + /* consolidate remainder with first chunk of old base */ + if (oldfirst == m->top) { + size_t tsize = m->topsize += qsize; + m->top = q; + q->head = tsize | PINUSE_BIT; + check_top_chunk(m, q); + } + else if (oldfirst == m->dv) { + size_t dsize = m->dvsize += qsize; + m->dv = q; + set_size_and_pinuse_of_free_chunk(q, dsize); + } + else { + if (!is_inuse(oldfirst)) { + size_t nsize = chunksize(oldfirst); + unlink_chunk(m, oldfirst, nsize); + oldfirst = chunk_plus_offset(oldfirst, nsize); + qsize += nsize; + } + set_free_with_pinuse(q, qsize, oldfirst); + insert_chunk(m, q, qsize); + check_free_chunk(m, q); + } + + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); +} + +/* Add a segment to hold a new noncontiguous region */ +static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { + /* Determine locations and sizes of segment, fenceposts, old top */ + char* old_top = (char*)m->top; + msegmentptr oldsp = segment_holding(m, old_top); + char* old_end = oldsp->base + oldsp->size; + size_t ssize = pad_request(sizeof(struct malloc_segment)); + char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + size_t offset = align_offset(chunk2mem(rawsp)); + char* asp = rawsp + offset; + char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; + mchunkptr sp = (mchunkptr)csp; + msegmentptr ss = (msegmentptr)(chunk2mem(sp)); + mchunkptr tnext = chunk_plus_offset(sp, ssize); + mchunkptr p = tnext; + int nfences = 0; + + /* reset top to new space */ + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + + /* Set up segment record */ + assert(is_aligned(ss)); + set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); + *ss = m->seg; /* Push current record */ + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.sflags = mmapped; + m->seg.next = ss; + + /* Insert trailing fenceposts */ + for (;;) { + mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); + p->head = FENCEPOST_HEAD; + ++nfences; + if ((char*)(&(nextp->head)) < old_end) + p = nextp; + else + break; + } + assert(nfences >= 2); + + /* Insert the rest of old top into a bin as an ordinary free chunk */ + if (csp != old_top) { + mchunkptr q = (mchunkptr)old_top; + size_t psize = csp - old_top; + mchunkptr tn = chunk_plus_offset(q, psize); + set_free_with_pinuse(q, psize, tn); + insert_chunk(m, q, psize); + } + + check_top_chunk(m, m->top); +} + +/* -------------------------- System allocation -------------------------- */ + +/* Get memory from system using MORECORE or MMAP */ +static void* sys_alloc(mstate m, size_t nb) { + char* tbase = CMFAIL; + size_t tsize = 0; + flag_t mmap_flag = 0; + size_t asize; /* allocation size */ + + ensure_initialization(); + + /* Directly map large chunks, but only if already initialized */ + if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) { + void* mem = mmap_alloc(m, nb); + if (mem != 0) + return mem; + } + + asize = granularity_align(nb + SYS_ALLOC_PADDING); + if (asize <= nb) + return 0; /* wraparound */ + if (m->footprint_limit != 0) { + size_t fp = m->footprint + asize; + if (fp <= m->footprint || fp > m->footprint_limit) + return 0; + } + + /* + Try getting memory in any of three ways (in most-preferred to + least-preferred order): + 1. A call to MORECORE that can normally contiguously extend memory. + (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or + or main space is mmapped or a previous contiguous call failed) + 2. A call to MMAP new space (disabled if not HAVE_MMAP). + Note that under the default settings, if MORECORE is unable to + fulfill a request, and HAVE_MMAP is true, then mmap is + used as a noncontiguous system allocator. This is a useful backup + strategy for systems with holes in address spaces -- in this case + sbrk cannot contiguously expand the heap, but mmap may be able to + find space. + 3. A call to MORECORE that cannot usually contiguously extend memory. + (disabled if not HAVE_MORECORE) + + In all cases, we need to request enough bytes from system to ensure + we can malloc nb bytes upon success, so pad with enough space for + top_foot, plus alignment-pad to make sure we don't lose bytes if + not on boundary, and round this up to a granularity unit. + */ + + if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { + char* br = CMFAIL; + size_t ssize = asize; /* sbrk call size */ + msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); + ACQUIRE_MALLOC_GLOBAL_LOCK(); + + if (ss == 0) { /* First time through or recovery */ + char* base = (char*)CALL_MORECORE(0); + if (base != CMFAIL) { + size_t fp; + /* Adjust to end on a page boundary */ + if (!is_page_aligned(base)) + ssize += (page_align((size_t)base) - (size_t)base); + fp = m->footprint + ssize; /* recheck limits */ + if (ssize > nb && ssize < HALF_MAX_SIZE_T && + (m->footprint_limit == 0 || + (fp > m->footprint && fp <= m->footprint_limit)) && + (br = (char*)(CALL_MORECORE(ssize))) == base) { + tbase = base; + tsize = ssize; + } + } + } + else { + /* Subtract out existing available top space from MORECORE request. */ + ssize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING); + /* Use mem here only if it did continuously extend old space */ + if (ssize < HALF_MAX_SIZE_T && + (br = (char*)(CALL_MORECORE(ssize))) == ss->base+ss->size) { + tbase = br; + tsize = ssize; + } + } + + if (tbase == CMFAIL) { /* Cope with partial failure */ + if (br != CMFAIL) { /* Try to use/extend the space we did get */ + if (ssize < HALF_MAX_SIZE_T && + ssize < nb + SYS_ALLOC_PADDING) { + size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - ssize); + if (esize < HALF_MAX_SIZE_T) { + char* end = (char*)CALL_MORECORE(esize); + if (end != CMFAIL) + ssize += esize; + else { /* Can't use; try to release */ + (void) CALL_MORECORE(-ssize); + br = CMFAIL; + } + } + } + } + if (br != CMFAIL) { /* Use the space we did get */ + tbase = br; + tsize = ssize; + } + else + disable_contiguous(m); /* Don't try contiguous path in the future */ + } + + RELEASE_MALLOC_GLOBAL_LOCK(); + } + + if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ + char* mp = (char*)(CALL_MMAP(asize)); + if (mp != CMFAIL) { + tbase = mp; + tsize = asize; + mmap_flag = USE_MMAP_BIT; + } + } + + if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ + if (asize < HALF_MAX_SIZE_T) { + char* br = CMFAIL; + char* end = CMFAIL; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + br = (char*)(CALL_MORECORE(asize)); + end = (char*)(CALL_MORECORE(0)); + RELEASE_MALLOC_GLOBAL_LOCK(); + if (br != CMFAIL && end != CMFAIL && br < end) { + size_t ssize = end - br; + if (ssize > nb + TOP_FOOT_SIZE) { + tbase = br; + tsize = ssize; + } + } + } + } + + if (tbase != CMFAIL) { + + if ((m->footprint += tsize) > m->max_footprint) + m->max_footprint = m->footprint; + + if (!is_initialized(m)) { /* first-time initialization */ + if (m->least_addr == 0 || tbase < m->least_addr) + m->least_addr = tbase; + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.sflags = mmap_flag; + m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; + init_bins(m); +#if !ONLY_MSPACES + if (is_global(m)) + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + else +#endif + { + /* Offset top by embedded malloc_state */ + mchunkptr mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); + } + } + + else { + /* Try to merge with an existing segment */ + msegmentptr sp = &m->seg; + /* Only consider most recent segment if traversal suppressed */ + while (sp != 0 && tbase != sp->base + sp->size) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + (sp->sflags & USE_MMAP_BIT) == mmap_flag && + segment_holds(sp, m->top)) { /* append */ + sp->size += tsize; + init_top(m, m->top, m->topsize + tsize); + } + else { + if (tbase < m->least_addr) + m->least_addr = tbase; + sp = &m->seg; + while (sp != 0 && sp->base != tbase + tsize) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + (sp->sflags & USE_MMAP_BIT) == mmap_flag) { + char* oldbase = sp->base; + sp->base = tbase; + sp->size += tsize; + return prepend_alloc(m, tbase, oldbase, nb); + } + else + add_segment(m, tbase, tsize, mmap_flag); + } + } + + if (nb < m->topsize) { /* Allocate from new or extended top space */ + size_t rsize = m->topsize -= nb; + mchunkptr p = m->top; + mchunkptr r = m->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + check_top_chunk(m, m->top); + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); + } + } + + MALLOC_FAILURE_ACTION; + return 0; +} + +/* ----------------------- system deallocation -------------------------- */ + +/* Unmap and unlink any mmapped segments that don't contain used chunks */ +static size_t release_unused_segments(mstate m) { + size_t released = 0; + int nsegs = 0; + msegmentptr pred = &m->seg; + msegmentptr sp = pred->next; + while (sp != 0) { + char* base = sp->base; + size_t size = sp->size; + msegmentptr next = sp->next; + ++nsegs; + if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { + mchunkptr p = align_as_chunk(base); + size_t psize = chunksize(p); + /* Can unmap if first chunk holds entire segment and not pinned */ + if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { + tchunkptr tp = (tchunkptr)p; + assert(segment_holds(sp, (char*)sp)); + if (p == m->dv) { + m->dv = 0; + m->dvsize = 0; + } + else { + unlink_large_chunk(m, tp); + } + if (CALL_MUNMAP(base, size) == 0) { + released += size; + m->footprint -= size; + /* unlink obsoleted record */ + sp = pred; + sp->next = next; + } + else { /* back out if cannot unmap */ + insert_large_chunk(m, tp, psize); + } + } + } + if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */ + break; + pred = sp; + sp = next; + } + /* Reset check counter */ + m->release_checks = (((size_t) nsegs > (size_t) MAX_RELEASE_CHECK_RATE)? + (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE); + return released; +} + +static int sys_trim(mstate m, size_t pad) { + size_t released = 0; + ensure_initialization(); + if (pad < MAX_REQUEST && is_initialized(m)) { + pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ + + if (m->topsize > pad) { + /* Shrink top space in granularity-size units, keeping at least one */ + size_t unit = mparams.granularity; + size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - + SIZE_T_ONE) * unit; + msegmentptr sp = segment_holding(m, (char*)m->top); + + if (!is_extern_segment(sp)) { + if (is_mmapped_segment(sp)) { + if (HAVE_MMAP && + sp->size >= extra && + !has_segment_link(m, sp)) { /* can't shrink if pinned */ + size_t newsize = sp->size - extra; + (void)newsize; /* placate people compiling -Wunused-variable */ + /* Prefer mremap, fall back to munmap */ + /* coverity[offset_free] */ + if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || + (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { + released = extra; + } + } + } + else if (HAVE_MORECORE) { + if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ + extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + { + /* Make sure end of memory is where we last set it. */ + char* old_br = (char*)(CALL_MORECORE(0)); + if (old_br == sp->base + sp->size) { + char* rel_br = (char*)(CALL_MORECORE(-extra)); + char* new_br = (char*)(CALL_MORECORE(0)); + if (rel_br != CMFAIL && new_br < old_br) + released = old_br - new_br; + } + } + RELEASE_MALLOC_GLOBAL_LOCK(); + } + } + + if (released != 0) { + sp->size -= released; + m->footprint -= released; + init_top(m, m->top, m->topsize - released); + check_top_chunk(m, m->top); + } + } + + /* Unmap any unused mmapped segments */ + if (HAVE_MMAP) + released += release_unused_segments(m); + + /* On failure, disable autotrim to avoid repeated failed future calls */ + if (released == 0 && m->topsize > m->trim_check) + m->trim_check = MAX_SIZE_T; + } + + return (released != 0)? 1 : 0; +} + +/* Consolidate and bin a chunk. Differs from exported versions + of free mainly in that the chunk need not be marked as inuse. +*/ +static void dispose_chunk(mstate m, mchunkptr p, size_t psize) { + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + mchunkptr prev; + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + m->footprint -= psize; + return; + } + prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(m, prev))) { /* consolidate backward */ + if (p != m->dv) { + unlink_chunk(m, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + m->dvsize = psize; + set_free_with_pinuse(p, psize, next); + return; + } + } + else { + CORRUPTION_ERROR_ACTION(m); + return; + } + } + if (RTCHECK(ok_address(m, next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == m->top) { + size_t tsize = m->topsize += psize; + m->top = p; + p->head = tsize | PINUSE_BIT; + if (p == m->dv) { + m->dv = 0; + m->dvsize = 0; + } + return; + } + else if (next == m->dv) { + size_t dsize = m->dvsize += psize; + m->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + return; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(m, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == m->dv) { + m->dvsize = psize; + return; + } + } + } + else { + set_free_with_pinuse(p, psize, next); + } + insert_chunk(m, p, psize); + } + else { + CORRUPTION_ERROR_ACTION(m); + } +} + +/* ---------------------------- malloc --------------------------- */ + +/* allocate a large request from the best fitting chunk in a treebin */ +static void* tmalloc_large(mstate m, size_t nb) { + tchunkptr v = 0; + size_t rsize = -nb; /* Unsigned negation */ + tchunkptr t; + bindex_t idx; + compute_tree_index(nb, idx); + if ((t = *treebin_at(m, idx)) != 0) { + /* Traverse tree for this bin looking for node with size == nb */ + size_t sizebits = nb << leftshift_for_tree_index(idx); + tchunkptr rst = 0; /* The deepest untaken right subtree */ + for (;;) { + tchunkptr rt; + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + v = t; + if ((rsize = trem) == 0) + break; + } + rt = t->child[1]; + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + if (rt != 0 && rt != t) + rst = rt; + if (t == 0) { + t = rst; /* set t to least subtree holding sizes > nb */ + break; + } + sizebits <<= 1; + } + } + if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ + binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; + if (leftbits != 0) { + bindex_t i; + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + t = *treebin_at(m, i); + } + } + + while (t != 0) { /* find smallest of tree or subtree */ + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + t = leftmost_child(t); + } + + /* If dv is a better fit, return 0 so malloc will use it */ + if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { + if (RTCHECK(ok_address(m, v))) { /* split */ + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + insert_chunk(m, r, rsize); + } + return chunk2mem(v); + } + } + CORRUPTION_ERROR_ACTION(m); + } + return 0; +} + +/* allocate a small request from the best fitting chunk in a treebin */ +static void* tmalloc_small(mstate m, size_t nb) { + tchunkptr t, v; + size_t rsize; + bindex_t i; + binmap_t leastbit = least_bit(m->treemap); + compute_bit2idx(leastbit, i); + v = t = *treebin_at(m, i); + rsize = chunksize(t) - nb; + + while ((t = leftmost_child(t)) != 0) { + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + } + + if (RTCHECK(ok_address(m, v))) { + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(m, r, rsize); + } + return chunk2mem(v); + } + } + + CORRUPTION_ERROR_ACTION(m); + return 0; +} + +#if !ONLY_MSPACES + +void* dlmalloc(size_t bytes) { + /* + Basic algorithm: + If a small request (< 256 bytes minus per-chunk overhead): + 1. If one exists, use a remainderless chunk in associated smallbin. + (Remainderless means that there are too few excess bytes to + represent as a chunk.) + 2. If it is big enough, use the dv chunk, which is normally the + chunk adjacent to the one used for the most recent small request. + 3. If one exists, split the smallest available chunk in a bin, + saving remainder in dv. + 4. If it is big enough, use the top chunk. + 5. If available, get memory from system and use it + Otherwise, for a large request: + 1. Find the smallest available binned chunk that fits, and use it + if it is better fitting than dv chunk, splitting if necessary. + 2. If better fitting than any binned chunk, use the dv chunk. + 3. If it is big enough, use the top chunk. + 4. If request size >= mmap threshold, try to directly mmap this chunk. + 5. If available, get memory from system and use it + + The ugly goto's here ensure that postaction occurs along all paths. + */ + +#if USE_LOCKS + ensure_initialization(); /* initialize in sys_alloc if not using locks */ +#endif + + if (!PREACTION(gm)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = gm->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(gm, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(gm, b, p, idx); + set_inuse_and_pinuse(gm, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb > gm->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(gm, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(gm, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(gm, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(gm, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + + if (nb <= gm->dvsize) { + size_t rsize = gm->dvsize - nb; + mchunkptr p = gm->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = gm->dv = chunk_plus_offset(p, nb); + gm->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + } + else { /* exhaust dv */ + size_t dvs = gm->dvsize; + gm->dvsize = 0; + gm->dv = 0; + set_inuse_and_pinuse(gm, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb < gm->topsize) { /* Split top */ + size_t rsize = gm->topsize -= nb; + mchunkptr p = gm->top; + mchunkptr r = gm->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + mem = chunk2mem(p); + check_top_chunk(gm, gm->top); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + mem = sys_alloc(gm, nb); + + postaction: + POSTACTION(gm); + return mem; + } + + return 0; +} + +/* ---------------------------- free --------------------------- */ + +void dlfree(void* mem) { + /* + Consolidate freed chunks with preceeding or succeeding bordering + free chunks, if they exist, and then place in a bin. Intermixed + with special cases for top, dv, mmapped chunks, and usage errors. + */ + + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } +#else /* FOOTERS */ +#define fm gm +#endif /* FOOTERS */ + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +#if !FOOTERS +#undef fm +#endif /* FOOTERS */ +} + +void* dlcalloc(size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = dlmalloc(req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +#endif /* !ONLY_MSPACES */ + +/* ------------ Internal support for realloc, memalign, etc -------------- */ + +/* Try to realloc; only in-place unless can_move true */ +static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb, + int can_move) { + mchunkptr newp = 0; + size_t oldsize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, oldsize); + if (RTCHECK(ok_address(m, p) && ok_inuse(p) && + ok_next(p, next) && ok_pinuse(next))) { + if (is_mmapped(p)) { + newp = mmap_resize(m, p, nb, can_move); + } + else if (oldsize >= nb) { /* already big enough */ + size_t rsize = oldsize - nb; + if (rsize >= MIN_CHUNK_SIZE) { /* split off remainder */ + mchunkptr r = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + set_inuse(m, r, rsize); + dispose_chunk(m, r, rsize); + } + newp = p; + } + else if (next == m->top) { /* extend into top */ + if (oldsize + m->topsize > nb) { + size_t newsize = oldsize + m->topsize; + size_t newtopsize = newsize - nb; + mchunkptr newtop = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + newtop->head = newtopsize |PINUSE_BIT; + m->top = newtop; + m->topsize = newtopsize; + newp = p; + } + } + else if (next == m->dv) { /* extend into dv */ + size_t dvs = m->dvsize; + if (oldsize + dvs >= nb) { + size_t dsize = oldsize + dvs - nb; + if (dsize >= MIN_CHUNK_SIZE) { + mchunkptr r = chunk_plus_offset(p, nb); + mchunkptr n = chunk_plus_offset(r, dsize); + set_inuse(m, p, nb); + set_size_and_pinuse_of_free_chunk(r, dsize); + clear_pinuse(n); + m->dvsize = dsize; + m->dv = r; + } + else { /* exhaust dv */ + size_t newsize = oldsize + dvs; + set_inuse(m, p, newsize); + m->dvsize = 0; + m->dv = 0; + } + newp = p; + } + } + else if (!cinuse(next)) { /* extend into next free chunk */ + size_t nextsize = chunksize(next); + if (oldsize + nextsize >= nb) { + size_t rsize = oldsize + nextsize - nb; + unlink_chunk(m, next, nextsize); + if (rsize < MIN_CHUNK_SIZE) { + size_t newsize = oldsize + nextsize; + set_inuse(m, p, newsize); + } + else { + mchunkptr r = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + set_inuse(m, r, rsize); + dispose_chunk(m, r, rsize); + } + newp = p; + } + } + } + else { + USAGE_ERROR_ACTION(m, chunk2mem(p)); + } + return newp; +} + +static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { + void* mem = 0; + if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ + alignment = MIN_CHUNK_SIZE; + if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ + size_t a = MALLOC_ALIGNMENT << 1; + while (a < alignment) a <<= 1; + alignment = a; + } + if (bytes >= MAX_REQUEST - alignment) { + if (m != 0) { /* Test isn't needed but avoids compiler warning */ + MALLOC_FAILURE_ACTION; + } + } + else { + size_t nb = request2size(bytes); + size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; + mem = internal_malloc(m, req); + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (PREACTION(m)) + return 0; + if ((((size_t)(mem)) & (alignment - 1)) != 0) { /* misaligned */ + /* + Find an aligned spot inside chunk. Since we need to give + back leading space in a chunk of at least MIN_CHUNK_SIZE, if + the first calculation places us at a spot with less than + MIN_CHUNK_SIZE leader, we can move to the next aligned spot. + We've allocated enough total room so that this is always + possible. + */ + char* br = (char*)mem2chunk((size_t)(((size_t)((char*)mem + alignment - + SIZE_T_ONE)) & + -alignment)); + char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? + br : br+alignment; + mchunkptr newp = (mchunkptr)pos; + size_t leadsize = pos - (char*)(p); + size_t newsize = chunksize(p) - leadsize; + + if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ + newp->prev_foot = p->prev_foot + leadsize; + newp->head = newsize; + } + else { /* Otherwise, give back leader, use the rest */ + set_inuse(m, newp, newsize); + set_inuse(m, p, leadsize); + dispose_chunk(m, p, leadsize); + } + p = newp; + } + + /* Give back spare room at the end */ + if (!is_mmapped(p)) { + size_t size = chunksize(p); + if (size > nb + MIN_CHUNK_SIZE) { + size_t remainder_size = size - nb; + mchunkptr remainder = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + set_inuse(m, remainder, remainder_size); + dispose_chunk(m, remainder, remainder_size); + } + } + + mem = chunk2mem(p); + assert (chunksize(p) >= nb); + assert(((size_t)mem & (alignment - 1)) == 0); + check_inuse_chunk(m, p); + POSTACTION(m); + } + } + return mem; +} + +/* + Common support for independent_X routines, handling + all of the combinations that can result. + The opts arg has: + bit 0 set if all elements are same size (using sizes[0]) + bit 1 set if elements should be zeroed +*/ +static void** ialloc(mstate m, + size_t n_elements, + size_t* sizes, + int opts, + void* chunks[]) { + + size_t element_size; /* chunksize of each element, if all same */ + size_t contents_size; /* total size of elements */ + size_t array_size; /* request size of pointer array */ + void* mem; /* malloced aggregate space */ + mchunkptr p; /* corresponding chunk */ + size_t remainder_size; /* remaining bytes while splitting */ + void** marray; /* either "chunks" or malloced ptr array */ + mchunkptr array_chunk; /* chunk for malloced ptr array */ + flag_t was_enabled; /* to disable mmap */ + size_t size; + size_t i; + + ensure_initialization(); + /* compute array length, if needed */ + if (chunks != 0) { + if (n_elements == 0) + return chunks; /* nothing to do */ + marray = chunks; + array_size = 0; + } + else { + /* if empty req, must still return chunk representing empty array */ + if (n_elements == 0) + return (void**)internal_malloc(m, 0); + marray = 0; + array_size = request2size(n_elements * (sizeof(void*))); + } + + /* compute total element size */ + if (opts & 0x1) { /* all-same-size */ + element_size = request2size(*sizes); + contents_size = n_elements * element_size; + } + else { /* add up all the sizes */ + element_size = 0; + contents_size = 0; + for (i = 0; i != n_elements; ++i) + contents_size += request2size(sizes[i]); + } + + size = contents_size + array_size; + + /* + Allocate the aggregate chunk. First disable direct-mmapping so + malloc won't use it, since we would not be able to later + free/realloc space internal to a segregated mmap region. + */ + was_enabled = use_mmap(m); + disable_mmap(m); + mem = internal_malloc(m, size - CHUNK_OVERHEAD); + if (was_enabled) + enable_mmap(m); + if (mem == 0) + return 0; + + if (PREACTION(m)) return 0; + p = mem2chunk(mem); + remainder_size = chunksize(p); + + assert(!is_mmapped(p)); + + if (opts & 0x2) { /* optionally clear the elements */ + memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); + } + + /* If not provided, allocate the pointer array as final part of chunk */ + if (marray == 0) { + size_t array_chunk_size; + array_chunk = chunk_plus_offset(p, contents_size); + array_chunk_size = remainder_size - contents_size; + marray = (void**) (chunk2mem(array_chunk)); + set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); + remainder_size = contents_size; + } + + /* split out elements */ + for (i = 0; ; ++i) { + marray[i] = chunk2mem(p); + if (i != n_elements-1) { + if (element_size != 0) + size = element_size; + else + size = request2size(sizes[i]); + remainder_size -= size; + set_size_and_pinuse_of_inuse_chunk(m, p, size); + p = chunk_plus_offset(p, size); + } + else { /* the final element absorbs any overallocation slop */ + set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); + break; + } + } + +#if DEBUG + if (marray != chunks) { + /* final element must have exactly exhausted chunk */ + if (element_size != 0) { + assert(remainder_size == element_size); + } + else { + assert(remainder_size == request2size(sizes[i])); + } + check_inuse_chunk(m, mem2chunk(marray)); + } + for (i = 0; i != n_elements; ++i) + check_inuse_chunk(m, mem2chunk(marray[i])); + +#endif /* DEBUG */ + + POSTACTION(m); + return marray; +} + +/* Try to free all pointers in the given array. + Note: this could be made faster, by delaying consolidation, + at the price of disabling some user integrity checks, We + still optimize some consolidations by combining adjacent + chunks before freeing, which will occur often if allocated + with ialloc or the array is sorted. +*/ +static size_t internal_bulk_free(mstate m, void* array[], size_t nelem) { + size_t unfreed = 0; + if (!PREACTION(m)) { + void** a; + void** fence = &(array[nelem]); + for (a = array; a != fence; ++a) { + void* mem = *a; + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + size_t psize = chunksize(p); +#if FOOTERS + if (get_mstate_for(p) != m) { + ++unfreed; + continue; + } +#endif + check_inuse_chunk(m, p); + *a = 0; + if (RTCHECK(ok_address(m, p) && ok_inuse(p))) { + void ** b = a + 1; /* try to merge with next chunk */ + mchunkptr next = next_chunk(p); + if (b != fence && *b == chunk2mem(next)) { + size_t newsize = chunksize(next) + psize; + set_inuse(m, p, newsize); + *b = chunk2mem(p); + } + else + dispose_chunk(m, p, psize); + } + else { + CORRUPTION_ERROR_ACTION(m); + break; + } + } + } + if (should_trim(m, m->topsize)) + sys_trim(m, 0); + POSTACTION(m); + } + return unfreed; +} + +/* Traversal */ +#if MALLOC_INSPECT_ALL +static void internal_inspect_all(mstate m, + void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg) { + if (is_initialized(m)) { + mchunkptr top = m->top; + msegmentptr s; + for (s = &m->seg; s != 0; s = s->next) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) { + mchunkptr next = next_chunk(q); + size_t sz = chunksize(q); + size_t used; + void* start; + if (is_inuse(q)) { + used = sz - CHUNK_OVERHEAD; /* must not be mmapped */ + start = chunk2mem(q); + } + else { + used = 0; + if (is_small(sz)) { /* offset by possible bookkeeping */ + start = (void*)((char*)q + sizeof(struct malloc_chunk)); + } + else { + start = (void*)((char*)q + sizeof(struct malloc_tree_chunk)); + } + } + if (start < (void*)next) /* skip if all space is bookkeeping */ + handler(start, next, used, arg); + if (q == top) + break; + q = next; + } + } + } +} +#endif /* MALLOC_INSPECT_ALL */ + +/* ------------------ Exported realloc, memalign, etc -------------------- */ + +#if !ONLY_MSPACES + +void* dlrealloc(void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem == 0) { + mem = dlmalloc(bytes); + } + else if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } +#ifdef REALLOC_ZERO_BYTES_FREES + else if (bytes == 0) { + dlfree(oldmem); + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = gm; +#else /* FOOTERS */ + mstate m = get_mstate_for(oldp); + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + if (!PREACTION(m)) { + mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1); + POSTACTION(m); + if (newp != 0) { + check_inuse_chunk(m, newp); + mem = chunk2mem(newp); + } + else { + mem = internal_malloc(m, bytes); + if (mem != 0) { + size_t oc = chunksize(oldp) - overhead_for(oldp); + memcpy(mem, oldmem, (oc < bytes)? oc : bytes); + internal_free(m, oldmem); + } + } + } + } + return mem; +} + +void* dlrealloc_in_place(void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem != 0) { + if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = gm; +#else /* FOOTERS */ + mstate m = get_mstate_for(oldp); + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + if (!PREACTION(m)) { + mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0); + POSTACTION(m); + if (newp == oldp) { + check_inuse_chunk(m, newp); + mem = oldmem; + } + } + } + } + return mem; +} + +void* dlmemalign(size_t alignment, size_t bytes) { + if (alignment <= MALLOC_ALIGNMENT) { + return dlmalloc(bytes); + } + return internal_memalign(gm, alignment, bytes); +} + +int dlposix_memalign(void** pp, size_t alignment, size_t bytes) { + void* mem = 0; + if (alignment == MALLOC_ALIGNMENT) + mem = dlmalloc(bytes); + else { + size_t d = alignment / sizeof(void*); + size_t r = alignment % sizeof(void*); + if (r != 0 || d == 0 || (d & (d-SIZE_T_ONE)) != 0) + return EINVAL; + else if (bytes <= MAX_REQUEST - alignment) { + if (alignment < MIN_CHUNK_SIZE) + alignment = MIN_CHUNK_SIZE; + mem = internal_memalign(gm, alignment, bytes); + } + } + if (mem == 0) + return ENOMEM; + else { + *pp = mem; + return 0; + } +} + +void* dlvalloc(size_t bytes) { + size_t pagesz; + ensure_initialization(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, bytes); +} + +void* dlpvalloc(size_t bytes) { + size_t pagesz; + ensure_initialization(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); +} + +void** dlindependent_calloc(size_t n_elements, size_t elem_size, + void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + /* coverity[callee_ptr_arith] */ + return ialloc(gm, n_elements, &sz, 3, chunks); +} + +void** dlindependent_comalloc(size_t n_elements, size_t sizes[], + void* chunks[]) { + return ialloc(gm, n_elements, sizes, 0, chunks); +} + +size_t dlbulk_free(void* array[], size_t nelem) { + return internal_bulk_free(gm, array, nelem); +} + +#if MALLOC_INSPECT_ALL +void dlmalloc_inspect_all(void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg) { + ensure_initialization(); + if (!PREACTION(gm)) { + internal_inspect_all(gm, handler, arg); + POSTACTION(gm); + } +} +#endif /* MALLOC_INSPECT_ALL */ + +int dlmalloc_trim(size_t pad) { + int result = 0; + ensure_initialization(); + if (!PREACTION(gm)) { + result = sys_trim(gm, pad); + POSTACTION(gm); + } + return result; +} + +size_t dlmalloc_footprint(void) { + return gm->footprint; +} + +size_t dlmalloc_max_footprint(void) { + return gm->max_footprint; +} + +size_t dlmalloc_footprint_limit(void) { + size_t maf = gm->footprint_limit; + return maf == 0 ? MAX_SIZE_T : maf; +} + +size_t dlmalloc_set_footprint_limit(size_t bytes) { + size_t result; /* invert sense of 0 */ + if (bytes == 0) + result = granularity_align(1); /* Use minimal size */ + else if (bytes == MAX_SIZE_T) + result = 0; /* disable */ + else + result = granularity_align(bytes); + return gm->footprint_limit = result; +} + +#if !NO_MALLINFO +struct mallinfo dlmallinfo(void) { + return internal_mallinfo(gm); +} +#endif /* NO_MALLINFO */ + +#if !NO_MALLOC_STATS +void dlmalloc_stats() { + internal_malloc_stats(gm); +} +#endif /* NO_MALLOC_STATS */ + +int dlmallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +/* extension to use in testing */ +int dlmallopt_get(int param_number) { + + ensure_initialization(); + switch(param_number) { + case M_TRIM_THRESHOLD: + return mparams.trim_threshold; + case M_GRANULARITY: + return mparams.granularity; + case M_MMAP_THRESHOLD: + return mparams.mmap_threshold; + default: + return 0; + } +} + +size_t dlmalloc_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + +#endif /* !ONLY_MSPACES */ + +/* ----------------------------- user mspaces ---------------------------- */ + +#if MSPACES + +static mstate init_user_mstate(char* tbase, size_t tsize) { + size_t msize = pad_request(sizeof(struct malloc_state)); + mchunkptr mn; + mchunkptr msp = align_as_chunk(tbase); + mstate m = (mstate)(chunk2mem(msp)); + memset(m, 0, msize); + (void)INITIAL_LOCK(&m->mutex); + msp->head = (msize|INUSE_BITS); + m->seg.base = m->least_addr = tbase; + m->seg.size = m->footprint = m->max_footprint = tsize; + m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; + m->mflags = mparams.default_mflags; + m->extp = 0; + m->exts = 0; + disable_contiguous(m); + init_bins(m); + mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); + check_top_chunk(m, m->top); + return m; +} + +mspace create_mspace(size_t capacity, int locked) { + mstate m = 0; + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); + if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + size_t rs = ((capacity == 0)? mparams.granularity : + (capacity + TOP_FOOT_SIZE + msize)); + size_t tsize = granularity_align(rs); + char* tbase = (char*)(CALL_MMAP(tsize)); + if (tbase != CMFAIL) { + m = init_user_mstate(tbase, tsize); + m->seg.sflags = USE_MMAP_BIT; + set_lock(m, locked); + } + } + return (mspace)m; +} + +mspace create_mspace_with_base(void* base, size_t capacity, int locked) { + mstate m = 0; + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); + if (capacity > msize + TOP_FOOT_SIZE && + capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + m = init_user_mstate((char*)base, capacity); + m->seg.sflags = EXTERN_BIT; + set_lock(m, locked); + } + return (mspace)m; +} + +int mspace_track_large_chunks(mspace msp, int enable) { + int ret = 0; + mstate ms = (mstate)msp; + if (!PREACTION(ms)) { + if (!use_mmap(ms)) { + ret = 1; + } + if (!enable) { + enable_mmap(ms); + } else { + disable_mmap(ms); + } + POSTACTION(ms); + } + return ret; +} + +size_t destroy_mspace(mspace msp) { + size_t freed = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + msegmentptr sp = &ms->seg; + (void)DESTROY_LOCK(&ms->mutex); /* destroy before unmapped */ + while (sp != 0) { + char* base = sp->base; + size_t size = sp->size; + flag_t flag = sp->sflags; + (void)base; /* placate people compiling -Wunused-variable */ + sp = sp->next; + if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && + CALL_MUNMAP(base, size) == 0) + freed += size; + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return freed; +} + +/* + mspace versions of routines are near-clones of the global + versions. This is not so nice but better than the alternatives. +*/ + +void* mspace_malloc(mspace msp, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (!PREACTION(ms)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = ms->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(ms, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(ms, b, p, idx); + set_inuse_and_pinuse(ms, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb > ms->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(ms, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(ms, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(ms, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(ms, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + + if (nb <= ms->dvsize) { + size_t rsize = ms->dvsize - nb; + mchunkptr p = ms->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = ms->dv = chunk_plus_offset(p, nb); + ms->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + } + else { /* exhaust dv */ + size_t dvs = ms->dvsize; + ms->dvsize = 0; + ms->dv = 0; + set_inuse_and_pinuse(ms, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb < ms->topsize) { /* Split top */ + size_t rsize = ms->topsize -= nb; + mchunkptr p = ms->top; + mchunkptr r = ms->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + mem = chunk2mem(p); + check_top_chunk(ms, ms->top); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + mem = sys_alloc(ms, nb); + + postaction: + POSTACTION(ms); + return mem; + } + + return 0; +} + +void mspace_free(mspace msp, void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + (void)msp; /* placate people compiling -Wunused */ +#else /* FOOTERS */ + mstate fm = (mstate)msp; +#endif /* FOOTERS */ + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +} + +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = internal_malloc(ms, req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem == 0) { + mem = mspace_malloc(msp, bytes); + } + else if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } +#ifdef REALLOC_ZERO_BYTES_FREES + else if (bytes == 0) { + mspace_free(msp, oldmem); + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = (mstate)msp; +#else /* FOOTERS */ + mstate m = get_mstate_for(oldp); + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + if (!PREACTION(m)) { + mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1); + POSTACTION(m); + if (newp != 0) { + check_inuse_chunk(m, newp); + mem = chunk2mem(newp); + } + else { + mem = mspace_malloc(m, bytes); + if (mem != 0) { + size_t oc = chunksize(oldp) - overhead_for(oldp); + memcpy(mem, oldmem, (oc < bytes)? oc : bytes); + mspace_free(m, oldmem); + } + } + } + } + return mem; +} + +void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem != 0) { + if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = (mstate)msp; +#else /* FOOTERS */ + mstate m = get_mstate_for(oldp); + (void)msp; /* placate people compiling -Wunused */ + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + if (!PREACTION(m)) { + mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0); + POSTACTION(m); + if (newp == oldp) { + check_inuse_chunk(m, newp); + mem = oldmem; + } + } + } + } + return mem; +} + +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (alignment <= MALLOC_ALIGNMENT) + return mspace_malloc(msp, bytes); + return internal_memalign(ms, alignment, bytes); +} + +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, &sz, 3, chunks); +} + +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, sizes, 0, chunks); +} + +size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem) { + return internal_bulk_free((mstate)msp, array, nelem); +} + +#if MALLOC_INSPECT_ALL +void mspace_inspect_all(mspace msp, + void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg) { + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + if (!PREACTION(ms)) { + internal_inspect_all(ms, handler, arg); + POSTACTION(ms); + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } +} +#endif /* MALLOC_INSPECT_ALL */ + +int mspace_trim(mspace msp, size_t pad) { + int result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + if (!PREACTION(ms)) { + result = sys_trim(ms, pad); + POSTACTION(ms); + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +#if !NO_MALLOC_STATS +void mspace_malloc_stats(mspace msp) { + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + internal_malloc_stats(ms); + } + else { + USAGE_ERROR_ACTION(ms,ms); + } +} +#endif /* NO_MALLOC_STATS */ + +size_t mspace_footprint(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +size_t mspace_max_footprint(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->max_footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +size_t mspace_footprint_limit(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + size_t maf = ms->footprint_limit; + result = (maf == 0) ? MAX_SIZE_T : maf; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +size_t mspace_set_footprint_limit(mspace msp, size_t bytes) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + if (bytes == 0) + result = granularity_align(1); /* Use minimal size */ + if (bytes == MAX_SIZE_T) + result = 0; /* disable */ + else + result = granularity_align(bytes); + ms->footprint_limit = result; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +#if !NO_MALLINFO +struct mallinfo mspace_mallinfo(mspace msp) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + } + return internal_mallinfo(ms); +} +#endif /* NO_MALLINFO */ + +size_t mspace_usable_size(const void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + +int mspace_mallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +#endif /* MSPACES */ + + +/* -------------------- Alternative MORECORE functions ------------------- */ + +/* + Guidelines for creating a custom version of MORECORE: + + * For best performance, MORECORE should allocate in multiples of pagesize. + * MORECORE may allocate more memory than requested. (Or even less, + but this will usually result in a malloc failure.) + * MORECORE must not allocate memory when given argument zero, but + instead return one past the end address of memory from previous + nonzero call. + * For best performance, consecutive calls to MORECORE with positive + arguments should return increasing addresses, indicating that + space has been contiguously extended. + * Even though consecutive calls to MORECORE need not return contiguous + addresses, it must be OK for malloc'ed chunks to span multiple + regions in those cases where they do happen to be contiguous. + * MORECORE need not handle negative arguments -- it may instead + just return MFAIL when given negative arguments. + Negative arguments are always multiples of pagesize. MORECORE + must not misinterpret negative args as large positive unsigned + args. You can suppress all such calls from even occurring by defining + MORECORE_CANNOT_TRIM, + + As an example alternative MORECORE, here is a custom allocator + kindly contributed for pre-OSX macOS. It uses virtually but not + necessarily physically contiguous non-paged memory (locked in, + present and won't get swapped out). You can use it by uncommenting + this section, adding some #includes, and setting up the appropriate + defines above: + + #define MORECORE osMoreCore + + There is also a shutdown routine that should somehow be called for + cleanup upon program exit. + + #define MAX_POOL_ENTRIES 100 + #define MINIMUM_MORECORE_SIZE (64 * 1024U) + static int next_os_pool; + void *our_os_pools[MAX_POOL_ENTRIES]; + + void *osMoreCore(int size) + { + void *ptr = 0; + static void *sbrk_top = 0; + + if (size > 0) + { + if (size < MINIMUM_MORECORE_SIZE) + size = MINIMUM_MORECORE_SIZE; + if (CurrentExecutionLevel() == kTaskLevel) + ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); + if (ptr == 0) + { + return (void *) MFAIL; + } + // save ptrs so they can be freed during cleanup + our_os_pools[next_os_pool] = ptr; + next_os_pool++; + ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); + sbrk_top = (char *) ptr + size; + return ptr; + } + else if (size < 0) + { + // we don't currently support shrink behavior + return (void *) MFAIL; + } + else + { + return sbrk_top; + } + } + + // cleanup any allocated memory pools + // called as last thing before shutting down driver + + void osCleanupMem(void) + { + void **ptr; + + for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) + if (*ptr) + { + PoolDeallocate(*ptr); + *ptr = 0; + } + } + +*/ + + +/* ----------------------------------------------------------------------- +History: + v2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea + * fix bad comparison in dlposix_memalign + * don't reuse adjusted asize in sys_alloc + * add LOCK_AT_FORK -- thanks to Kirill Artamonov for the suggestion + * reduce compiler warnings -- thanks to all who reported/suggested these + + v2.8.5 Sun May 22 10:26:02 2011 Doug Lea (dl at gee) + * Always perform unlink checks unless INSECURE + * Add posix_memalign. + * Improve realloc to expand in more cases; expose realloc_in_place. + Thanks to Peter Buhr for the suggestion. + * Add footprint_limit, inspect_all, bulk_free. Thanks + to Barry Hayes and others for the suggestions. + * Internal refactorings to avoid calls while holding locks + * Use non-reentrant locks by default. Thanks to Roland McGrath + for the suggestion. + * Small fixes to mspace_destroy, reset_on_error. + * Various configuration extensions/changes. Thanks + to all who contributed these. + + V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu) + * Update Creative Commons URL + + V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) + * Use zeros instead of prev foot for is_mmapped + * Add mspace_track_large_chunks; thanks to Jean Brouwers + * Fix set_inuse in internal_realloc; thanks to Jean Brouwers + * Fix insufficient sys_alloc padding when using 16byte alignment + * Fix bad error check in mspace_footprint + * Adaptations for ptmalloc; thanks to Wolfram Gloger. + * Reentrant spin locks; thanks to Earl Chew and others + * Win32 improvements; thanks to Niall Douglas and Earl Chew + * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options + * Extension hook in malloc_state + * Various small adjustments to reduce warnings on some compilers + * Various configuration extensions/changes for more platforms. Thanks + to all who contributed these. + + V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) + * Add max_footprint functions + * Ensure all appropriate literals are size_t + * Fix conditional compilation problem for some #define settings + * Avoid concatenating segments with the one provided + in create_mspace_with_base + * Rename some variables to avoid compiler shadowing warnings + * Use explicit lock initialization. + * Better handling of sbrk interference. + * Simplify and fix segment insertion, trimming and mspace_destroy + * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x + * Thanks especially to Dennis Flanagan for help on these. + + V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee) + * Fix memalign brace error. + + V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee) + * Fix improper #endif nesting in C++ + * Add explicit casts needed for C++ + + V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee) + * Use trees for large bins + * Support mspaces + * Use segments to unify sbrk-based and mmap-based system allocation, + removing need for emulation on most platforms without sbrk. + * Default safety checks + * Optional footer checks. Thanks to William Robertson for the idea. + * Internal code refactoring + * Incorporate suggestions and platform-specific changes. + Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, + Aaron Bachmann, Emery Berger, and others. + * Speed up non-fastbin processing enough to remove fastbins. + * Remove useless cfree() to avoid conflicts with other apps. + * Remove internal memcpy, memset. Compilers handle builtins better. + * Remove some options that no one ever used and rename others. + + V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) + * Fix malloc_state bitmap array misdeclaration + + V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) + * Allow tuning of FIRST_SORTED_BIN_SIZE + * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. + * Better detection and support for non-contiguousness of MORECORE. + Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger + * Bypass most of malloc if no frees. Thanks To Emery Berger. + * Fix freeing of old top non-contiguous chunk im sysmalloc. + * Raised default trim and map thresholds to 256K. + * Fix mmap-related #defines. Thanks to Lubos Lunak. + * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. + * Branch-free bin calculation + * Default trim and mmap thresholds now 256K. + + V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) + * Introduce independent_comalloc and independent_calloc. + Thanks to Michael Pachos for motivation and help. + * Make optional .h file available + * Allow > 2GB requests on 32bit systems. + * new WIN32 sbrk, mmap, munmap, lock code from . + Thanks also to Andreas Mueller , + and Anonymous. + * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for + helping test this.) + * memalign: check alignment arg + * realloc: don't try to shift chunks backwards, since this + leads to more fragmentation in some programs and doesn't + seem to help in any others. + * Collect all cases in malloc requiring system memory into sysmalloc + * Use mmap as backup to sbrk + * Place all internal state in malloc_state + * Introduce fastbins (although similar to 2.5.1) + * Many minor tunings and cosmetic improvements + * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK + * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS + Thanks to Tony E. Bennett and others. + * Include errno.h to support default failure action. + + V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) + * return null for negative arguments + * Added Several WIN32 cleanups from Martin C. Fong + * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' + (e.g. WIN32 platforms) + * Cleanup header file inclusion for WIN32 platforms + * Cleanup code to avoid Microsoft Visual C++ compiler complaints + * Add 'UCM_MALLOC_PREFIX' to quickly allow co-existence with existing + memory allocation routines + * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) + * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to + usage of 'assert' in non-WIN32 code + * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to + avoid infinite loop + * Always call 'fREe()' rather than 'free()' + + V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) + * Fixed ordering problem with boundary-stamping + + V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) + * Added pvalloc, as recommended by H.J. Liu + * Added 64bit pointer support mainly from Wolfram Gloger + * Added anonymously donated WIN32 sbrk emulation + * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen + * malloc_extend_top: fix mask error that caused wastage after + foreign sbrks + * Add linux mremap support code from HJ Liu + + V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) + * Integrated most documentation with the code. + * Add support for mmap, with help from + Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Use last_remainder in more cases. + * Pack bins using idea from colin@nyx10.cs.du.edu + * Use ordered bins instead of best-fit threshhold + * Eliminate block-local decls to simplify tracing and debugging. + * Support another case of realloc via move into top + * Fix error occuring when initial sbrk_base not word-aligned. + * Rely on page size for units instead of SBRK_UNIT to + avoid surprises about sbrk alignment conventions. + * Add mallinfo, mallopt. Thanks to Raymond Nijssen + (raymond@es.ele.tue.nl) for the suggestion. + * Add `pad' argument to malloc_trim and top_pad mallopt parameter. + * More precautions for cases where other routines call sbrk, + courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Added macros etc., allowing use in linux libc from + H.J. Lu (hjl@gnu.ai.mit.edu) + * Inverted this history list + + V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) + * Re-tuned and fixed to behave more nicely with V2.6.0 changes. + * Removed all preallocation code since under current scheme + the work required to undo bad preallocations exceeds + the work saved in good cases for most test programs. + * No longer use return list or unconsolidated bins since + no scheme using them consistently outperforms those that don't + given above changes. + * Use best fit for very large chunks to prevent some worst-cases. + * Added some support for debugging + + V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) + * Removed footers when chunks are in use. Thanks to + Paul Wilson (wilson@cs.texas.edu) for the suggestion. + + V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) + * Added malloc_trim, with help from Wolfram Gloger + (wmglo@Dent.MED.Uni-Muenchen.DE). + + V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) + + V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) + * realloc: try to expand in both directions + * malloc: swap order of clean-bin strategy; + * realloc: only conditionally expand backwards + * Try not to scavenge used bins + * Use bin counts as a guide to preallocation + * Occasionally bin return list chunks in first scan + * Add a few optimizations from colin@nyx10.cs.du.edu + + V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) + * faster bin computation & slightly different binning + * merged all consolidations to one part of malloc proper + (eliminating old malloc_find_space & malloc_clean_bin) + * Scan 2 returns chunks (not just 1) + * Propagate failure in realloc if malloc returns 0 + * Add stuff to allow compilation on non-ANSI compilers + from kpv@research.att.com + + V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) + * removed potential for odd address access in prev_chunk + * removed dependency on getpagesize.h + * misc cosmetics and a bit more internal documentation + * anticosmetics: mangled names in macros to evade debugger strangeness + * tested on sparc, hp-700, dec-mips, rs6000 + with gcc & native cc (hp, dec only) allowing + Detlefs & Zorn comparison study (in SIGPLAN Notices.) + + Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) + * Based loosely on libg++-1.2X malloc. (It retains some of the overall + structure of old version, but most details differ.) + +*/ diff --git a/src/ucm/rocm/Makefile.am b/src/ucm/rocm/Makefile.am new file mode 100644 index 0000000..20c6528 --- /dev/null +++ b/src/ucm/rocm/Makefile.am @@ -0,0 +1,26 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +if HAVE_ROCM + +module_LTLIBRARIES = libucm_rocm.la +libucm_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(ROCM_CPPFLAGS) +libucm_rocm_la_CFLAGS = $(BASE_CFLAGS) $(ROCM_CFLAGS) +libucm_rocm_la_LIBADD = ../libucm.la +libucm_rocm_la_LDFLAGS = $(UCM_MODULE_LDFLAGS) \ + $(patsubst %, -Xlinker %, $(ROCM_LDFLAGS)) \ + -version-info $(SOVERSION) + +noinst_HEADERS = \ + rocmmem.h + +libucm_rocm_la_SOURCES = \ + rocmmem.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/ucm/rocm/Makefile.in b/src/ucm/rocm/Makefile.in new file mode 100644 index 0000000..d6c33fe --- /dev/null +++ b/src/ucm/rocm/Makefile.in @@ -0,0 +1,858 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/ucm/rocm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_ROCM_TRUE@libucm_rocm_la_DEPENDENCIES = ../libucm.la +am__libucm_rocm_la_SOURCES_DIST = rocmmem.c +@HAVE_ROCM_TRUE@am_libucm_rocm_la_OBJECTS = libucm_rocm_la-rocmmem.lo +libucm_rocm_la_OBJECTS = $(am_libucm_rocm_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libucm_rocm_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libucm_rocm_la_CFLAGS) $(CFLAGS) $(libucm_rocm_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_ROCM_TRUE@am_libucm_rocm_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libucm_rocm_la-rocmmem.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libucm_rocm_la_SOURCES) +DIST_SOURCES = $(am__libucm_rocm_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = rocmmem.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_ROCM_TRUE@module_LTLIBRARIES = libucm_rocm.la +@HAVE_ROCM_TRUE@libucm_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(ROCM_CPPFLAGS) +@HAVE_ROCM_TRUE@libucm_rocm_la_CFLAGS = $(BASE_CFLAGS) $(ROCM_CFLAGS) +@HAVE_ROCM_TRUE@libucm_rocm_la_LIBADD = ../libucm.la +@HAVE_ROCM_TRUE@libucm_rocm_la_LDFLAGS = $(UCM_MODULE_LDFLAGS) \ +@HAVE_ROCM_TRUE@ $(patsubst %, -Xlinker %, $(ROCM_LDFLAGS)) \ +@HAVE_ROCM_TRUE@ -version-info $(SOVERSION) + +@HAVE_ROCM_TRUE@noinst_HEADERS = \ +@HAVE_ROCM_TRUE@ rocmmem.h + +@HAVE_ROCM_TRUE@libucm_rocm_la_SOURCES = \ +@HAVE_ROCM_TRUE@ rocmmem.c + + +# Automake silent rules +@HAVE_ROCM_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_ROCM_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_ROCM_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_ROCM_TRUE@AM_V_LN_1 = true +@HAVE_ROCM_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/ucm/rocm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/ucm/rocm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libucm_rocm.la: $(libucm_rocm_la_OBJECTS) $(libucm_rocm_la_DEPENDENCIES) $(EXTRA_libucm_rocm_la_DEPENDENCIES) + $(AM_V_CCLD)$(libucm_rocm_la_LINK) $(am_libucm_rocm_la_rpath) $(libucm_rocm_la_OBJECTS) $(libucm_rocm_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libucm_rocm_la-rocmmem.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libucm_rocm_la-rocmmem.lo: rocmmem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_rocm_la_CFLAGS) $(CFLAGS) -MT libucm_rocm_la-rocmmem.lo -MD -MP -MF $(DEPDIR)/libucm_rocm_la-rocmmem.Tpo -c -o libucm_rocm_la-rocmmem.lo `test -f 'rocmmem.c' || echo '$(srcdir)/'`rocmmem.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libucm_rocm_la-rocmmem.Tpo $(DEPDIR)/libucm_rocm_la-rocmmem.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rocmmem.c' object='libucm_rocm_la-rocmmem.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucm_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libucm_rocm_la_CFLAGS) $(CFLAGS) -c -o libucm_rocm_la-rocmmem.lo `test -f 'rocmmem.c' || echo '$(srcdir)/'`rocmmem.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_ROCM_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libucm_rocm_la-rocmmem.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libucm_rocm_la-rocmmem.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_ROCM_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_ROCM_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_ROCM_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_ROCM_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_ROCM_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_ROCM_TRUE@ done +@HAVE_ROCM_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_ROCM_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_ROCM_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/ucm/rocm/configure.m4 b/src/ucm/rocm/configure.m4 new file mode 100644 index 0000000..02c520b --- /dev/null +++ b/src/ucm/rocm/configure.m4 @@ -0,0 +1,10 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +UCX_CHECK_ROCM +AS_IF([test "x$rocm_happy" = "xyes"], [ucm_modules="${ucm_modules}:rocm"]) +AC_CONFIG_FILES([src/ucm/rocm/Makefile]) diff --git a/src/ucm/rocm/rocmmem.c b/src/ucm/rocm/rocmmem.c new file mode 100644 index 0000000..bf441d6 --- /dev/null +++ b/src/ucm/rocm/rocmmem.c @@ -0,0 +1,199 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_allocate, hsa_status_t, + HSA_STATUS_ERROR, hsa_amd_memory_pool_t, + size_t, uint32_t, void**) +UCM_DEFINE_REPLACE_DLSYM_FUNC(hsa_amd_memory_pool_free, hsa_status_t, + HSA_STATUS_ERROR, void*) + +#if ENABLE_SYMBOL_OVERRIDE +UCM_OVERRIDE_FUNC(hsa_amd_memory_pool_allocate, hsa_status_t) +UCM_OVERRIDE_FUNC(hsa_amd_memory_pool_free, hsa_status_t) +#endif + +static UCS_F_ALWAYS_INLINE void +ucm_dispatch_mem_type_alloc(void *addr, size_t length, ucs_memory_type_t mem_type) +{ + ucm_event_t event; + + event.mem_type.address = addr; + event.mem_type.size = length; + event.mem_type.mem_type = mem_type; + ucm_event_dispatch(UCM_EVENT_MEM_TYPE_ALLOC, &event); +} + +static UCS_F_ALWAYS_INLINE void +ucm_dispatch_mem_type_free(void *addr, size_t length, ucs_memory_type_t mem_type) +{ + ucm_event_t event; + + event.mem_type.address = addr; + event.mem_type.size = length; + event.mem_type.mem_type = mem_type; + ucm_event_dispatch(UCM_EVENT_MEM_TYPE_FREE, &event); +} + +static void ucm_hsa_amd_memory_pool_free_dispatch_events(void *ptr) +{ + size_t size; + hsa_status_t status; + hsa_device_type_t dev_type; + ucs_memory_type_t mem_type = UCS_MEMORY_TYPE_ROCM; + hsa_amd_pointer_info_t info = { + .size = sizeof(hsa_amd_pointer_info_t), + }; + + if (ptr == NULL) { + return; + } + + status = hsa_amd_pointer_info(ptr, &info, NULL, NULL, NULL); + if (status != HSA_STATUS_SUCCESS) { + ucm_warn("hsa_amd_pointer_info(dptr=%p) failed", ptr); + size = 1; /* set minimum length */ + } + else { + size = info.sizeInBytes; + } + + status = hsa_agent_get_info(info.agentOwner, HSA_AGENT_INFO_DEVICE, &dev_type); + if (status == HSA_STATUS_SUCCESS) { + if (info.type != HSA_EXT_POINTER_TYPE_HSA) { + ucm_warn("ucm free non HSA managed memory %p", ptr); + return; + } + + if (dev_type != HSA_DEVICE_TYPE_GPU) { + mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED; + } + } + + ucm_dispatch_mem_type_free(ptr, size, mem_type); +} + +hsa_status_t ucm_hsa_amd_memory_pool_free(void* ptr) +{ + hsa_status_t status; + + ucm_event_enter(); + + ucm_trace("ucm_hsa_amd_memory_pool_free(ptr=%p)", ptr); + + ucm_hsa_amd_memory_pool_free_dispatch_events(ptr); + + status = ucm_orig_hsa_amd_memory_pool_free(ptr); + + ucm_event_leave(); + return status; +} + +hsa_status_t ucm_hsa_amd_memory_pool_allocate( + hsa_amd_memory_pool_t memory_pool, size_t size, + uint32_t flags, void** ptr) +{ + ucs_memory_type_t type = UCS_MEMORY_TYPE_ROCM; + uint32_t pool_flags = 0; + hsa_status_t status; + + status = hsa_amd_memory_pool_get_info(memory_pool, + HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, + &pool_flags); + if (status == HSA_STATUS_SUCCESS && + !(pool_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED)) { + type = UCS_MEMORY_TYPE_ROCM_MANAGED; + } + + ucm_event_enter(); + + status = ucm_orig_hsa_amd_memory_pool_allocate(memory_pool, size, flags, ptr); + if (status == HSA_STATUS_SUCCESS) { + ucm_trace("ucm_hsa_amd_memory_pool_allocate(ptr=%p size:%lu)", *ptr, size); + ucm_dispatch_mem_type_alloc(*ptr, size, type); + } + + ucm_event_leave(); + return status; +} + +static ucm_reloc_patch_t patches[] = { + {UCS_PP_MAKE_STRING(hsa_amd_memory_pool_allocate), + ucm_override_hsa_amd_memory_pool_allocate}, + {UCS_PP_MAKE_STRING(hsa_amd_memory_pool_free), + ucm_override_hsa_amd_memory_pool_free}, + {NULL, NULL} +}; + +static ucs_status_t ucm_rocmmem_install(int events) +{ + static int ucm_rocmmem_installed = 0; + static pthread_mutex_t install_mutex = PTHREAD_MUTEX_INITIALIZER; + ucm_reloc_patch_t *patch; + ucs_status_t status = UCS_OK; + + if (!(events & (UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE))) { + goto out; + } + + /* TODO: check mem reloc */ + + pthread_mutex_lock(&install_mutex); + + if (ucm_rocmmem_installed) { + goto out_unlock; + } + + for (patch = patches; patch->symbol != NULL; ++patch) { + status = ucm_reloc_modify(patch); + if (status != UCS_OK) { + ucm_warn("failed to install relocation table entry for '%s'", patch->symbol); + goto out_unlock; + } + } + + ucm_debug("rocm hooks are ready"); + ucm_rocmmem_installed = 1; + +out_unlock: + pthread_mutex_unlock(&install_mutex); +out: + return status; +} + +static void ucm_rocmmem_get_existing_alloc(ucm_event_handler_t *handler) +{ +} + +static ucm_event_installer_t ucm_rocm_initializer = { + .install = ucm_rocmmem_install, + .get_existing_alloc = ucm_rocmmem_get_existing_alloc +}; + +UCS_STATIC_INIT { + ucs_list_add_tail(&ucm_event_installer_list, &ucm_rocm_initializer.list); +} + +UCS_STATIC_CLEANUP { + ucs_list_del(&ucm_rocm_initializer.list); +} diff --git a/src/ucm/rocm/rocmmem.h b/src/ucm/rocm/rocmmem.h new file mode 100644 index 0000000..23bb3b5 --- /dev/null +++ b/src/ucm/rocm/rocmmem.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCM_ROCMMEM_H_ +#define UCM_ROCMMEM_H_ + +#include +#include + +/* hsa_amd_memory_pool_allocate */ +hsa_status_t ucm_override_hsa_amd_memory_pool_allocate( + hsa_amd_memory_pool_t memory_pool, size_t size, + uint32_t flags, void** ptr); +hsa_status_t ucm_orig_hsa_amd_memory_pool_allocate( + hsa_amd_memory_pool_t memory_pool, size_t size, + uint32_t flags, void** ptr); +hsa_status_t ucm_hsa_amd_memory_pool_allocate( + hsa_amd_memory_pool_t memory_pool, size_t size, + uint32_t flags, void** ptr); + +/* hsa_amd_memory_pool_free */ +hsa_status_t ucm_override_hsa_amd_memory_pool_free(void* ptr); +hsa_status_t ucm_orig_hsa_amd_memory_pool_free(void* ptr); +hsa_status_t ucm_hsa_amd_memory_pool_free(void* ptr); + +#endif diff --git a/src/ucm/util/log.c b/src/ucm/util/log.c new file mode 100644 index 0000000..a508f70 --- /dev/null +++ b/src/ucm/util/log.c @@ -0,0 +1,282 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "log.h" +#include "sys.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UCM_LOG_BUG_SIZE 256 + +static int ucm_log_fileno = 1; /* stdout */ +static char ucm_log_hostname[HOST_NAME_MAX] = {0}; + +const char *ucm_log_level_names[] = { + [UCS_LOG_LEVEL_FATAL] = "FATAL", + [UCS_LOG_LEVEL_ERROR] = "ERROR", + [UCS_LOG_LEVEL_WARN] = "WARN", + [UCS_LOG_LEVEL_INFO] = "INFO", + [UCS_LOG_LEVEL_DEBUG] = "DEBUG", + [UCS_LOG_LEVEL_TRACE] = "TRACE", + NULL +}; + +/* Flags for ucm_log_ltoa */ +#define UCM_LOG_LTOA_FLAG_SIGN UCS_BIT(0) /* print sign */ +#define UCM_LOG_LTOA_FLAG_UNSIGN UCS_BIT(1) /* unsigned number */ +#define UCM_LOG_LTOA_FLAG_LONG UCS_BIT(2) /* long number */ +#define UCM_LOG_LTOA_FLAG_PAD0 UCS_BIT(3) /* pad with zeroes */ +#define UCM_LOG_LTOA_PAD_LEFT UCS_BIT(4) /* pad to left */ + + +static char *ucm_log_add_padding(char *p, char *end, int pad, char fill) +{ + while ((pad > 0) && (p < end)) { + *(p++) = fill; + --pad; + } + return p; +} + +/* + * Convert a long integer to a string. + * @return Pointer to the end of the string (after last character written). + */ +static char *ucm_log_ltoa(char *p, char *end, long n, int base, int flags, + int pad) +{ + static const char digits[] = "0123456789abcdef"; + long div; + + if (((n < 0) || (flags & UCM_LOG_LTOA_FLAG_SIGN)) && (p < end)) { + *(p++) = (n < 0 ) ? '-' : '+'; + } + + if (n == 0) { + if (p < end) { + *(p++) = '0'; + } + goto out; + } + + n = labs(n); + + div = 1; + while ((n / div) != 0) { + div *= base; + --pad; + } + + if (!(flags & UCM_LOG_LTOA_PAD_LEFT)) { + p = ucm_log_add_padding(p, end, pad, + (flags & UCM_LOG_LTOA_FLAG_PAD0) ? '0' : ' '); + } + + div /= base; + while ((p < end) && (div > 0)) { + *(p++) = digits[(n / div + base) % base]; + div /= base; + } + + if (flags & UCM_LOG_LTOA_PAD_LEFT) { + p = ucm_log_add_padding(p, end, pad, ' '); + } + +out: + return p; +} + +/* + * Implement basic formatted print. + * We can't use snprintf() because it may potentially call malloc(). + * + * Supported format characters: + * %[-]?[0-9]?s + * %m + * %% + * %[+]?[0-9]?[l]?[dxup] + */ +static void ucm_log_vsnprintf(char *buf, size_t max, const char *fmt, va_list ap) +{ + const char *pf; + char *pb, *endb; + union { + char *s; + long d; + unsigned long u; + uintptr_t p; + } value; + int flags; + int pad; + int base; + int eno; + + pf = fmt; + pb = buf; + endb = buf + max - 1; + eno = errno; + + while ((pb < endb) && (*pf != '\0')) { + if (*pf != '%') { + *(pb++) = *(pf++); + continue; + } + + /* Data field */ + pad = 0; + flags = 0; + base = 10; + while (pb < endb) { + ++pf; + switch (*pf) { + /* The '%' character */ + case '%': + *(pb++) = '%'; + goto done; + + /* Error message */ + case 'm': + ucm_strerror(eno, pb, endb - pb); + pb += strlen(pb); + goto done; + + /* String */ + case 's': + value.s = va_arg(ap, char *); + if (!value.s) { + value.s = "(null)"; + } + pad -= strlen(value.s); + if (!(flags & UCM_LOG_LTOA_PAD_LEFT)) { + pb = ucm_log_add_padding(pb, endb, pad, ' '); + } + while ((pb < endb) && (*value.s != '\0')) { + *(pb++) = *(value.s++); + } + if (flags & UCM_LOG_LTOA_PAD_LEFT) { + pb = ucm_log_add_padding(pb, endb, pad, ' '); + } + goto done; + + /* Signed number */ + case 'd': + if (flags & UCM_LOG_LTOA_FLAG_LONG) { + value.d = va_arg(ap, long); + } else { + value.d = va_arg(ap, int); + } + pb = ucm_log_ltoa(pb, endb, value.d, base, flags, pad); + goto done; + + /* Hex number */ + case 'x': + base = 16; + /* Fall thru */ + + /* Unsigned number */ + case 'u': + if (flags & UCM_LOG_LTOA_FLAG_LONG) { + value.u = va_arg(ap, unsigned long); + } else { + value.u = va_arg(ap, unsigned); + } + flags |= UCM_LOG_LTOA_FLAG_UNSIGN; + pb = ucm_log_ltoa(pb, endb, value.u, base, flags, pad); + goto done; + + /* Pointer */ + case 'p': + value.p = va_arg(ap, uintptr_t); + if (pb < endb) { + *(pb++) = '0'; + } + if (pb < endb) { + *(pb++) = 'x'; + } + pb = ucm_log_ltoa(pb, endb, value.p, 16, flags, pad); + goto done; + + /* Flags and modifiers */ + case '+': + flags |= UCM_LOG_LTOA_FLAG_SIGN; + break; + case '-': + flags |= UCM_LOG_LTOA_PAD_LEFT; + break; + case 'l': + flags |= UCM_LOG_LTOA_FLAG_LONG; + break; + case '0': + if (pad == 0) { + flags |= UCM_LOG_LTOA_FLAG_PAD0; + } + /* Fall thru */ + default: + if (isdigit(*pf)) { + pad = (pad * 10) + (*pf - '0'); + } + break; + } + } +done: + ++pf; + } + *pb = '\0'; +} + +static void ucm_log_snprintf(char *buf, size_t max, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + ucm_log_vsnprintf(buf, max, fmt, ap); + va_end(ap); +} + +void __ucm_log(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, ...) +{ + char buf[UCM_LOG_BUG_SIZE]; + size_t length; + va_list ap; + struct timeval tv; + ssize_t nwrite; + + gettimeofday(&tv, NULL); + ucm_log_snprintf(buf, UCM_LOG_BUG_SIZE - 1, "[%lu.%06lu] [%s:%d] %18s:%-4d UCX %s ", + tv.tv_sec, tv.tv_usec, ucm_log_hostname, getpid(), + ucs_basename(file), line, ucm_log_level_names[level]); + buf[UCM_LOG_BUG_SIZE - 1] = '\0'; + + length = strlen(buf); + va_start(ap, message); + ucm_log_vsnprintf(buf + length, UCM_LOG_BUG_SIZE - length, message, ap); + va_end(ap); + strncat(buf, "\n", UCM_LOG_BUG_SIZE - 1); + + /* Use writev to avoid potential calls to malloc() in buffered IO functions */ + nwrite = write(ucm_log_fileno, buf, strlen(buf)); + (void)nwrite; + + if (level <= UCS_LOG_LEVEL_FATAL) { + abort(); + } +} + +UCS_STATIC_INIT { + gethostname(ucm_log_hostname, sizeof(ucm_log_hostname)); +} diff --git a/src/ucm/util/log.h b/src/ucm/util/log.h new file mode 100644 index 0000000..6ba8b46 --- /dev/null +++ b/src/ucm/util/log.h @@ -0,0 +1,38 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCM_LOG_H_ +#define UCM_LOG_H_ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + + +#define ucm_log(_level, _message, ...) \ + if (((_level) <= UCS_MAX_LOG_LEVEL) && \ + ((_level) <= (int)ucm_global_opts.log_level)) { \ + __ucm_log(__FILE__, __LINE__, __FUNCTION__, (_level), _message, \ + ## __VA_ARGS__); \ + } + +#define ucm_fatal(_message, ...) ucm_log(UCS_LOG_LEVEL_FATAL, _message, ## __VA_ARGS__) +#define ucm_error(_message, ...) ucm_log(UCS_LOG_LEVEL_ERROR, _message, ## __VA_ARGS__) +#define ucm_warn(_message, ...) ucm_log(UCS_LOG_LEVEL_WARN, _message, ## __VA_ARGS__) +#define ucm_info(_message, ...) ucm_log(UCS_LOG_LEVEL_INFO, _message, ## __VA_ARGS__) +#define ucm_debug(_message, ...) ucm_log(UCS_LOG_LEVEL_DEBUG, _message, ## __VA_ARGS__) +#define ucm_trace(_message, ...) ucm_log(UCS_LOG_LEVEL_TRACE, _message, ## __VA_ARGS__) + +extern const char *ucm_log_level_names[]; + +void __ucm_log(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, ...) + UCS_F_PRINTF(5, 6); + +#endif diff --git a/src/ucm/util/reloc.c b/src/ucm/util/reloc.c new file mode 100644 index 0000000..c4121b4 --- /dev/null +++ b/src/ucm/util/reloc.c @@ -0,0 +1,518 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifndef NVALGRIND +# include +#else +# define RUNNING_ON_VALGRIND 0 +#endif + +#include "reloc.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef void * (*ucm_reloc_dlopen_func_t)(const char *, int); + +typedef struct ucm_auxv { + long type; + long value; +} UCS_S_PACKED ucm_auxv_t; + + +typedef struct ucm_reloc_dl_iter_context { + ucm_reloc_patch_t *patch; + ucs_status_t status; + ElfW(Addr) libucm_base_addr; /* Base address to store previous value */ +} ucm_reloc_dl_iter_context_t; + + +static ucm_reloc_patch_t ucm_reloc_dlopen_patch = { + .symbol = "dlopen", + .value = ucm_dlopen +}; + + +/* List of patches to be applied to additional libraries */ +static UCS_LIST_HEAD(ucm_reloc_patch_list); +static ucm_reloc_dlopen_func_t ucm_reloc_orig_dlopen = NULL; +static pthread_mutex_t ucm_reloc_patch_list_lock = PTHREAD_MUTEX_INITIALIZER; + + +static uintptr_t +ucm_reloc_get_entry(ElfW(Addr) base, const ElfW(Phdr) *dphdr, ElfW(Sxword) tag) +{ + ElfW(Dyn) *entry; + for (entry = (void*)(base + dphdr->p_vaddr); entry->d_tag != 0; ++entry) { + if (entry->d_tag == tag) { + return entry->d_un.d_val; + } + } + return 0; +} + +static void ucm_reloc_file_lock(int fd, int l_type) +{ + struct flock fl = { l_type, SEEK_SET, 0, 0}; + int ret; + + ret = fcntl(fd, F_SETLKW, &fl); + if (ret < 0) { + ucm_warn("fcntl(fd=%d, F_SETLKW, l_type=%d) failed: %m", fd, l_type); + } +} + +static int ucm_reloc_get_aux_phsize() +{ +#define UCM_RELOC_AUXV_BUF_LEN 16 + static const char *proc_auxv_filename = "/proc/self/auxv"; + static int phsize = 0; + ucm_auxv_t buffer[UCM_RELOC_AUXV_BUF_LEN]; + ucm_auxv_t *auxv; + unsigned count; + ssize_t nread; + int found; + int fd; + + /* Can avoid lock here - worst case we'll read the file more than once */ + if (phsize == 0) { + fd = open(proc_auxv_filename, O_RDONLY); + if (fd < 0) { + ucm_error("failed to open '%s' for reading: %m", proc_auxv_filename); + return fd; + } + + if (RUNNING_ON_VALGRIND) { + /* Work around a bug caused by valgrind's fake /proc/self/auxv - + * every time this file is opened when running with valgrind, a + * a duplicate of the same fd is returned, so all share the same + * file offset. + */ + ucm_reloc_file_lock(fd, F_WRLCK); + lseek(fd, 0, SEEK_SET); + } + + /* Use small buffer on the stack, avoid using malloc() */ + found = 0; + do { + nread = read(fd, buffer, sizeof(buffer)); + if (nread < 0) { + ucm_error("failed to read %lu bytes from %s (ret=%ld): %m", + sizeof(buffer), proc_auxv_filename, nread); + break; + } + + count = nread / sizeof(buffer[0]); + for (auxv = buffer; (auxv < buffer + count) && (auxv->type != AT_NULL); + ++auxv) + { + if (auxv->type == AT_PHENT) { + found = 1; + phsize = auxv->value; + ucm_debug("read phent from %s: %d", proc_auxv_filename, phsize); + if (phsize == 0) { + ucm_error("phsize is 0"); + } + break; + } + } + } while ((count > 0) && (phsize == 0)); + + if (!found) { + ucm_error("AT_PHENT entry not found in %s", proc_auxv_filename); + } + + if (RUNNING_ON_VALGRIND) { + ucm_reloc_file_lock(fd, F_UNLCK); + } + close(fd); + } + return phsize; +} + +ElfW(Rela) *ucm_reloc_find_sym(void *table, size_t table_size, const char *symbol, + void *strtab, ElfW(Sym) *symtab) +{ + ElfW(Rela) *reloc; + char *elf_sym; + + for (reloc = table; + (void*)reloc < UCS_PTR_BYTE_OFFSET(table, table_size); + ++reloc) { + elf_sym = (char*)strtab + symtab[ELF64_R_SYM(reloc->r_info)].st_name; + if (!strcmp(symbol, elf_sym)) { + return reloc; + } + } + return NULL; +} + + +static ucs_status_t +ucm_reloc_modify_got(ElfW(Addr) base, const ElfW(Phdr) *phdr, const char UCS_V_UNUSED *phname, + int phnum, int phsize, + const ucm_reloc_dl_iter_context_t *ctx) +{ + const char *section_name; + ElfW(Phdr) *dphdr; + ElfW(Rela) *reloc; + ElfW(Sym) *symtab; + void *jmprel, *rela, *strtab; + size_t pltrelsz, relasz; + long page_size; + void **entry; + void *prev_value; + void *page; + int ret; + int i; + + page_size = ucm_get_page_size(); + + if (!strcmp(phname, "")) { + phname = "(empty)"; + } + + /* find PT_DYNAMIC */ + dphdr = NULL; + for (i = 0; i < phnum; ++i) { + dphdr = UCS_PTR_BYTE_OFFSET(phdr, phsize * i); + if (dphdr->p_type == PT_DYNAMIC) { + break; + } + } + if (dphdr == NULL) { + return UCS_ERR_NO_ELEM; + } + + /* Get ELF tables pointers */ + jmprel = (void*)ucm_reloc_get_entry(base, dphdr, DT_JMPREL); + symtab = (void*)ucm_reloc_get_entry(base, dphdr, DT_SYMTAB); + strtab = (void*)ucm_reloc_get_entry(base, dphdr, DT_STRTAB); + pltrelsz = ucm_reloc_get_entry(base, dphdr, DT_PLTRELSZ); + + if ((symtab == NULL) || (strtab == NULL)) { + /* no DT_SYMTAB or DT_STRTAB sections are defined */ + return UCS_OK; + } + + section_name = ".got.plt"; + reloc = ucm_reloc_find_sym(jmprel, pltrelsz, ctx->patch->symbol, + strtab, symtab); + if (reloc == NULL) { + /* if not found in .got.plt, search in .got */ + section_name = ".got"; + rela = (void*)ucm_reloc_get_entry(base, dphdr, DT_RELA); + relasz = ucm_reloc_get_entry(base, dphdr, DT_RELASZ); + reloc = ucm_reloc_find_sym(rela, relasz, ctx->patch->symbol, + strtab, symtab); + } + if (reloc == NULL) { + return UCS_OK; + } + + entry = (void *)(base + reloc->r_offset); + prev_value = *entry; + + if (prev_value == ctx->patch->value) { + ucm_trace("%s entry '%s' in %s at [%p] up-to-date", + section_name, ctx->patch->symbol, ucs_basename(phname), entry); + return UCS_OK; + } + + /* enable writing to the page */ + page = (void *)((intptr_t)entry & ~(page_size - 1)); + ret = mprotect(page, page_size, PROT_READ|PROT_WRITE); + if (ret < 0) { + ucm_error("failed to modify %s page %p to rw: %m", section_name, page); + return UCS_ERR_UNSUPPORTED; + } + + *entry = ctx->patch->value; + ucm_debug("%s entry '%s' in %s at [%p] modified from %p to %p", + section_name, ctx->patch->symbol, basename(phname), entry, + prev_value, ctx->patch->value); + + /* store default entry to prev_value to guarantee valid pointers + * throughout life time of the process */ + if (base == ctx->libucm_base_addr) { + ctx->patch->prev_value = prev_value; + ucm_debug("'%s' prev_value is %p'", ctx->patch->symbol, prev_value); + } + + return UCS_OK; +} + +static int ucm_reloc_phdr_iterator(struct dl_phdr_info *info, size_t size, + void *data) +{ + ucm_reloc_dl_iter_context_t *ctx = data; + int phsize; + int i; + + /* check if shared object is black-listed for this patch */ + if (ctx->patch->blacklist) { + for (i = 0; ctx->patch->blacklist[i]; i++) { + if (strstr(info->dlpi_name, ctx->patch->blacklist[i])) { + /* shared object is black-listed */ + return 0; + } + } + } + + phsize = ucm_reloc_get_aux_phsize(); + if (phsize <= 0) { + ucm_error("failed to read phent size"); + ctx->status = UCS_ERR_UNSUPPORTED; + return -1; + } + + ctx->status = ucm_reloc_modify_got(info->dlpi_addr, info->dlpi_phdr, + info->dlpi_name, info->dlpi_phnum, + phsize, ctx); + if (ctx->status != UCS_OK) { + return -1; /* stop iteration if got a real error */ + } + + /* Continue iteration and patch all remaining objects. */ + return 0; +} + +/* called with lock held */ +static ucs_status_t ucm_reloc_apply_patch(ucm_reloc_patch_t *patch, + ElfW(Addr) libucm_base_addr) +{ + ucm_reloc_dl_iter_context_t ctx; + + ctx.patch = patch; + ctx.status = UCS_OK; + ctx.libucm_base_addr = libucm_base_addr; + + /* Avoid locks here because we don't modify ELF data structures. + * Worst case the same symbol will be written more than once. + */ + (void)dl_iterate_phdr(ucm_reloc_phdr_iterator, &ctx); + return ctx.status; +} + +/* read serinfo from 'module_path', result buffer must be destroyed + * by free() call */ +static Dl_serinfo *ucm_dlopen_load_serinfo(const char *module_path) +{ + Dl_serinfo *serinfo = NULL; + Dl_serinfo serinfo_size; + void *module; + int res; + + module = ucm_reloc_orig_dlopen(module_path, RTLD_LAZY); + if (module == NULL) { /* requested module can't be loaded */ + ucm_debug("failed to open %s: %s", module_path, dlerror()); + return NULL; + } + + /* try to get search info from requested module */ + res = dlinfo(module, RTLD_DI_SERINFOSIZE, &serinfo_size); + if (res) { + ucm_debug("dlinfo(RTLD_DI_SERINFOSIZE) failed"); + goto close_module; + } + + serinfo = malloc(serinfo_size.dls_size); + if (serinfo == NULL) { + ucm_error("failed to allocate %zu bytes for Dl_serinfo", + serinfo_size.dls_size); + goto close_module; + } + + *serinfo = serinfo_size; + res = dlinfo(module, RTLD_DI_SERINFO, serinfo); + if (res) { + ucm_debug("dlinfo(RTLD_DI_SERINFO) failed"); + free(serinfo); + serinfo = NULL; + } + +close_module: + dlclose(module); + return serinfo; +} + +void *ucm_dlopen(const char *filename, int flag) +{ + void *handle; + ucm_reloc_patch_t *patch; + Dl_serinfo *serinfo; + Dl_info dl_info; + int res; + int i; + char file_path[PATH_MAX]; + struct stat file_stat; + + ucm_debug("open module: %s, flag: %x", filename, flag); + + if (ucm_reloc_orig_dlopen == NULL) { + ucm_reloc_orig_dlopen = + (ucm_reloc_dlopen_func_t)ucm_reloc_get_orig(ucm_reloc_dlopen_patch.symbol, + ucm_reloc_dlopen_patch.value); + + if (ucm_reloc_orig_dlopen == NULL) { + ucm_fatal("ucm_reloc_orig_dlopen is NULL"); + } + } + + if (!ucm_global_opts.dlopen_process_rpath) { + goto fallback_load_lib; + } + + if (filename == NULL) { + /* return handle to main program */ + goto fallback_load_lib; + } + + /* failed to open module directly, try to use RPATH from from caller + * to locate requested module */ + if (filename[0] == '/') { /* absolute path - fallback to legacy mode */ + goto fallback_load_lib; + } + + /* try to get module info */ + res = dladdr(__builtin_return_address(0), &dl_info); + if (!res) { + ucm_debug("dladdr failed"); + goto fallback_load_lib; + } + + serinfo = ucm_dlopen_load_serinfo(dl_info.dli_fname); + if (serinfo == NULL) { + /* failed to load serinfo, try just dlopen */ + goto fallback_load_lib; + } + + for (i = 0; i < serinfo->dls_cnt; i++) { + ucm_concat_path(file_path, sizeof(file_path), + serinfo->dls_serpath[i].dls_name, filename); + ucm_debug("check for %s", file_path); + + res = stat(file_path, &file_stat); + if (res) { + continue; + } + + free(serinfo); + /* ok, file exists, let's try to load it */ + handle = ucm_reloc_orig_dlopen(file_path, flag); + if (handle == NULL) { + return NULL; + } + + goto out_apply_patches; + } + + free(serinfo); + /* ok, we can't lookup module in dirs listed in caller module, + * let's fallback to legacy mode */ +fallback_load_lib: + handle = ucm_reloc_orig_dlopen(filename, flag); + if (handle == NULL) { + return NULL; + } + +out_apply_patches: + /* + * Every time a new shared object is loaded, we must update its relocations + * with our list of patches (including dlopen itself). We have to go over + * the entire list of shared objects, since there more objects could be + * loaded due to dependencies. + */ + + pthread_mutex_lock(&ucm_reloc_patch_list_lock); + ucs_list_for_each(patch, &ucm_reloc_patch_list, list) { + ucm_debug("in dlopen(%s), re-applying '%s' to %p", filename, + patch->symbol, patch->value); + ucm_reloc_apply_patch(patch, 0); + } + pthread_mutex_unlock(&ucm_reloc_patch_list_lock); + + return handle; +} + +/* called with lock held */ +static ucs_status_t ucm_reloc_install_dlopen() +{ + static int installed = 0; + ucs_status_t status; + + if (installed) { + return UCS_OK; + } + + ucm_reloc_orig_dlopen = + (ucm_reloc_dlopen_func_t)ucm_reloc_get_orig(ucm_reloc_dlopen_patch.symbol, + ucm_reloc_dlopen_patch.value); + + status = ucm_reloc_apply_patch(&ucm_reloc_dlopen_patch, 0); + if (status != UCS_OK) { + return status; + } + + ucs_list_add_tail(&ucm_reloc_patch_list, &ucm_reloc_dlopen_patch.list); + + installed = 1; + return UCS_OK; +} + +ucs_status_t ucm_reloc_modify(ucm_reloc_patch_t *patch) +{ + ucs_status_t status; + Dl_info dlinfo; + int ret; + + /* Take default symbol value from the current library */ + ret = dladdr(ucm_reloc_modify, &dlinfo); + if (!ret) { + ucm_error("dladdr() failed to query current library"); + return UCS_ERR_UNSUPPORTED; + } + + /* Take lock first to handle a possible race where dlopen() is called + * from another thread and we may end up not patching it. + */ + pthread_mutex_lock(&ucm_reloc_patch_list_lock); + + status = ucm_reloc_install_dlopen(); + if (status != UCS_OK) { + goto out_unlock; + } + + status = ucm_reloc_apply_patch(patch, (uintptr_t)dlinfo.dli_fbase); + if (status != UCS_OK) { + goto out_unlock; + } + + ucs_list_add_tail(&ucm_reloc_patch_list, &patch->list); + +out_unlock: + pthread_mutex_unlock(&ucm_reloc_patch_list_lock); + return status; +} + diff --git a/src/ucm/util/reloc.h b/src/ucm/util/reloc.h new file mode 100644 index 0000000..285cfaa --- /dev/null +++ b/src/ucm/util/reloc.h @@ -0,0 +1,75 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCM_UTIL_RELOC_H_ +#define UCM_UTIL_RELOC_H_ + +#include +#include +#include +#include + + +/** + * Tracks which symbols need to be patched for currently loaded libraries and + * for libraries to be loaded in the future. We have the 'list' field so the + * library could put those on a list without extra memory allocations. + */ +typedef struct ucm_reloc_patch { + const char *symbol; + void *value; + void *prev_value; + ucs_list_link_t list; + char **blacklist; +} ucm_reloc_patch_t; + + +/** + * Modify process' relocation table. + * + * @param [in] patch What and how to modify. After this call, the structure + * will be owned by the library and the same patching will + * happen in all objects loaded subsequently. + */ +ucs_status_t ucm_reloc_modify(ucm_reloc_patch_t* patch); + + +/** + * Get the original implementation of 'symbol', which is not equal to 'replacement'. + * + * This function is static to make sure the symbol search is done from the context + * of the shared object which defines the replacement function. + * If the replacement function is defined in a loadbale module, the symbols it + * imports from other libraries may not be available in global scope. + * + * @param [in] symbol Symbol name. + * @param [in] replacement Symbol replacement, which should be ignored. + * + * @return Original function pointer for 'symbol'. + */ +static void* UCS_F_MAYBE_UNUSED +ucm_reloc_get_orig(const char *symbol, void *replacement) +{ + const char *error; + void *func_ptr; + + func_ptr = dlsym(RTLD_NEXT, symbol); + if (func_ptr == NULL) { + (void)dlerror(); + func_ptr = dlsym(RTLD_DEFAULT, symbol); + if (func_ptr == replacement) { + error = dlerror(); + ucm_fatal("could not find address of original %s(): %s", symbol, + error ? error : "Unknown error"); + } + } + + ucm_debug("original %s() is at %p", symbol, func_ptr); + return func_ptr; +} + +#endif diff --git a/src/ucm/util/replace.c b/src/ucm/util/replace.c new file mode 100644 index 0000000..6d8abae --- /dev/null +++ b/src/ucm/util/replace.c @@ -0,0 +1,165 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifndef MAP_FAILED +#define MAP_FAILED ((void*)-1) +#endif + +#ifdef PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP +pthread_mutex_t ucm_reloc_get_orig_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; +#else +pthread_mutex_t ucm_reloc_get_orig_lock; +static void ucm_reloc_get_orig_lock_init(void) __attribute__((constructor(101))); +static void ucm_reloc_get_orig_lock_init(void) +{ + pthread_mutexattr_t attr; + + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&ucm_reloc_get_orig_lock, &attr); +} +#endif +pthread_t volatile ucm_reloc_get_orig_thread = (pthread_t)-1; + +UCM_DEFINE_REPLACE_FUNC(mmap, void*, MAP_FAILED, void*, size_t, int, int, int, off_t) +UCM_DEFINE_REPLACE_FUNC(munmap, int, -1, void*, size_t) +#if HAVE_MREMAP +UCM_DEFINE_REPLACE_FUNC(mremap, void*, MAP_FAILED, void*, size_t, size_t, int) +#endif +UCM_DEFINE_REPLACE_FUNC(shmat, void*, MAP_FAILED, int, const void*, int) +UCM_DEFINE_REPLACE_FUNC(shmdt, int, -1, const void*) +UCM_DEFINE_REPLACE_FUNC(sbrk, void*, MAP_FAILED, intptr_t) +UCM_DEFINE_REPLACE_FUNC(brk, int, -1, void*) +UCM_DEFINE_REPLACE_FUNC(madvise, int, -1, void*, size_t, int) + +UCM_DEFINE_SELECT_FUNC(mmap, void*, MAP_FAILED, SYS_mmap, void*, size_t, int, int, int, off_t) +UCM_DEFINE_SELECT_FUNC(munmap, int, -1, SYS_munmap, void*, size_t) +#if HAVE_MREMAP +UCM_DEFINE_SELECT_FUNC(mremap, void*, MAP_FAILED, SYS_mremap, void*, size_t, size_t, int) +#endif +UCM_DEFINE_SELECT_FUNC(madvise, int, -1, SYS_madvise, void*, size_t, int) + +#if UCM_BISTRO_HOOKS +#if HAVE_DECL_SYS_SHMAT + +UCM_DEFINE_SELECT_FUNC(shmat, void*, MAP_FAILED, SYS_shmat, int, const void*, int) + +#elif HAVE_DECL_SYS_IPC +# ifndef IPCOP_shmat +# define IPCOP_shmat 21 +# endif + +_UCM_DEFINE_DLSYM_FUNC(shmat, ucm_orig_dlsym_shmat, ucm_override_shmat, + void*, MAP_FAILED, int, const void*, int) + +void *ucm_orig_shmat(int shmid, const void *shmaddr, int shmflg) +{ + unsigned long res; + void *addr; + + if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) { + return ucm_orig_dlsym_shmat(shmid, shmaddr, shmflg); + } else { + /* Using IPC syscall of shmat implementation */ + res = syscall(SYS_ipc, IPCOP_shmat, shmid, shmflg, &addr, shmaddr); + + return res ? MAP_FAILED : addr; + } +} + +#endif + +#if HAVE_DECL_SYS_SHMDT + +UCM_DEFINE_SELECT_FUNC(shmdt, int, -1, SYS_shmdt, const void*) + +#elif HAVE_DECL_SYS_IPC +# ifndef IPCOP_shmdt +# define IPCOP_shmdt 22 +# endif + +_UCM_DEFINE_DLSYM_FUNC(shmdt, ucm_orig_dlsym_shmdt, ucm_override_shmdt, + int, -1, const void*) + +int ucm_orig_shmdt(const void *shmaddr) +{ + if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) { + return ucm_orig_dlsym_shmdt(shmaddr); + } else { + /* Using IPC syscall of shmdt implementation */ + return syscall(SYS_ipc, IPCOP_shmdt, 0, 0, 0, shmaddr); + } +} + +#endif + +#if HAVE___CURBRK +extern void *__curbrk; +#endif + +_UCM_DEFINE_DLSYM_FUNC(brk, ucm_orig_dlsym_brk, ucm_override_brk, int, -1, void*) + +void *ucm_brk_syscall(void *addr) +{ + return (void*)syscall(SYS_brk, addr); +} + +int ucm_orig_brk(void *addr) +{ + void *new_addr; + +#if HAVE___CURBRK + __curbrk = +#endif + new_addr = ucm_brk_syscall(addr); + + if (new_addr < addr) { + errno = ENOMEM; + return -1; + } else { + return 0; + } +} + +_UCM_DEFINE_DLSYM_FUNC(sbrk, ucm_orig_dlsym_sbrk, ucm_override_sbrk, + void*, MAP_FAILED, intptr_t) + +void *ucm_orig_sbrk(intptr_t increment) +{ + void *prev; + + if (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_RELOC) { + return ucm_orig_dlsym_sbrk(increment); + } else { + prev = ucm_brk_syscall(0); + return ucm_orig_brk(UCS_PTR_BYTE_OFFSET(prev, increment)) ? (void*)-1 : prev; + } +} + +#else /* UCM_BISTRO_HOOKS */ + +UCM_DEFINE_DLSYM_FUNC(sbrk, void*, MAP_FAILED, intptr_t) +UCM_DEFINE_DLSYM_FUNC(shmat, void*, MAP_FAILED, int, const void*, int) +UCM_DEFINE_DLSYM_FUNC(shmdt, int, -1, const void*) + +#endif /* UCM_BISTRO_HOOKS */ diff --git a/src/ucm/util/replace.h b/src/ucm/util/replace.h new file mode 100644 index 0000000..4b91b03 --- /dev/null +++ b/src/ucm/util/replace.h @@ -0,0 +1,130 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCM_UTIL_REPLACE_H_ +#define UCM_UTIL_REPLACE_H_ + +#include +#include +#include +#include + +extern pthread_mutex_t ucm_reloc_get_orig_lock; +extern pthread_t volatile ucm_reloc_get_orig_thread; + +/** + * Define a replacement function to a memory-mapping function call, which calls + * the event handler, and if event handler returns error code - calls the original + * function. + */ + +/* Due to CUDA API redifinition we have to create proxy macro to eliminate + * redifinition of internal finction names */ +#define UCM_DEFINE_REPLACE_FUNC(_name, _rettype, _fail_val, ...) \ + _UCM_DEFINE_REPLACE_FUNC(ucm_override_##_name, ucm_##_name, _rettype, _fail_val, __VA_ARGS__) + +#define _UCM_DEFINE_REPLACE_FUNC(_over_name, _ucm_name, _rettype, _fail_val, ...) \ + \ + /* Define a symbol which goes to the replacement - in case we are loaded first */ \ + _rettype _over_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \ + { \ + _rettype res; \ + UCM_BISTRO_PROLOGUE; \ + ucm_trace("%s()", __FUNCTION__); \ + \ + if (ucs_unlikely(ucm_reloc_get_orig_thread == pthread_self())) { \ + return (_rettype)_fail_val; \ + } \ + res = _ucm_name(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \ + UCM_BISTRO_EPILOGUE; \ + return res; \ + } + +#define UCM_OVERRIDE_FUNC(_name, _rettype) \ + _rettype _name() __attribute__ ((alias (UCS_PP_QUOTE(ucm_override_##_name)))); \ + +#define UCM_DEFINE_DLSYM_FUNC(_name, _rettype, _fail_val, ...) \ + _UCM_DEFINE_DLSYM_FUNC(_name, ucm_orig_##_name, ucm_override_##_name, \ + _rettype, _fail_val, __VA_ARGS__) + +#define _UCM_DEFINE_DLSYM_FUNC(_name, _orig_name, _over_name, _rettype, _fail_val, ...) \ + _rettype _over_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)); \ + \ + /* Call the original function using dlsym(RTLD_NEXT) */ \ + _rettype _orig_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \ + { \ + typedef _rettype (*func_ptr_t) (__VA_ARGS__); \ + static func_ptr_t orig_func_ptr = NULL; \ + \ + ucm_trace("%s()", __FUNCTION__); \ + \ + if (ucs_unlikely(orig_func_ptr == NULL)) { \ + pthread_mutex_lock(&ucm_reloc_get_orig_lock); \ + ucm_reloc_get_orig_thread = pthread_self(); \ + orig_func_ptr = (func_ptr_t)ucm_reloc_get_orig(UCS_PP_QUOTE(_name), \ + _over_name); \ + ucm_reloc_get_orig_thread = (pthread_t)-1; \ + pthread_mutex_unlock(&ucm_reloc_get_orig_lock); \ + } \ + return orig_func_ptr(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \ + } + +#define UCM_DEFINE_REPLACE_DLSYM_FUNC(_name, _rettype, _fail_val, ...) \ + _UCM_DEFINE_DLSYM_FUNC(_name, ucm_orig_##_name, ucm_override_##_name, \ + _rettype, _fail_val, __VA_ARGS__) \ + _UCM_DEFINE_REPLACE_FUNC(ucm_override_##_name, ucm_##_name, \ + _rettype, _fail_val, __VA_ARGS__) + +#define UCM_DEFINE_SYSCALL_FUNC(_name, _rettype, _syscall_id, ...) \ + /* Call syscall */ \ + _rettype ucm_orig_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \ + { \ + return (_rettype)syscall(_syscall_id, UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \ + } + +#if UCM_BISTRO_HOOKS +# define UCM_DEFINE_SELECT_FUNC(_name, _rettype, _fail_val, _syscall_id, ...) \ + _UCM_DEFINE_DLSYM_FUNC(_name, ucm_orig_##_name##_dlsym, ucm_override_##_name, \ + _rettype, _fail_val, __VA_ARGS__) \ + UCM_DEFINE_SYSCALL_FUNC(_name##_syscall, _rettype, _syscall_id, __VA_ARGS__) \ + _rettype ucm_orig_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \ + { \ + return (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO) ? \ + ucm_orig_##_name##_syscall(UCM_FUNC_PASS_ARGS(__VA_ARGS__)) : \ + ucm_orig_##_name##_dlsym(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \ + } +#else +# define UCM_DEFINE_SELECT_FUNC(_name, _rettype, _fail_val, _syscall_id, ...) \ + UCM_DEFINE_DLSYM_FUNC(_name, _rettype, _fail_val, __VA_ARGS__) +#endif + +/* + * Define argument list with given types. + */ +#define UCM_FUNC_DEFINE_ARGS(...) \ + UCS_PP_FOREACH_SEP(_UCM_FUNC_ARG_DEFINE, _, \ + UCS_PP_ZIP((UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))), \ + (__VA_ARGS__))) + +/* + * Pass auto-generated arguments to a function call. + */ +#define UCM_FUNC_PASS_ARGS(...) \ + UCS_PP_FOREACH_SEP(_UCM_FUNC_ARG_PASS, _, UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))) + + +/* + * Helpers + */ +#define _UCM_FUNC_ARG_DEFINE(_, _bundle) \ + __UCM_FUNC_ARG_DEFINE(_, UCS_PP_TUPLE_0 _bundle, UCS_PP_TUPLE_1 _bundle) +#define __UCM_FUNC_ARG_DEFINE(_, _index, _type) \ + _type UCS_PP_TOKENPASTE(arg, _index) +#define _UCM_FUNC_ARG_PASS(_, _index) \ + UCS_PP_TOKENPASTE(arg, _index) + +#endif diff --git a/src/ucm/util/sys.c b/src/ucm/util/sys.c new file mode 100644 index 0000000..eebd58a --- /dev/null +++ b/src/ucm/util/sys.c @@ -0,0 +1,342 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE /* for dladdr */ +#endif + +#include "sys.h" + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define UCM_PROC_SELF_MAPS "/proc/self/maps" + +ucm_global_config_t ucm_global_opts = { + .log_level = UCS_LOG_LEVEL_WARN, + .enable_events = 1, + .mmap_hook_mode = UCM_DEFAULT_HOOK_MODE, + .enable_malloc_hooks = 1, + .enable_malloc_reloc = 0, + .enable_cuda_reloc = 1, + .enable_dynamic_mmap_thresh = 1, + .alloc_alignment = 16, + .dlopen_process_rpath = 1 +}; + +size_t ucm_get_page_size() +{ + static long page_size = -1; + long value; + + if (page_size == -1) { + value = sysconf(_SC_PAGESIZE); + if (value < 0) { + page_size = 4096; + } else { + page_size = value; + } + } + return page_size; +} + +static void *ucm_sys_complete_alloc(void *ptr, size_t size) +{ + *(size_t*)ptr = size; + return UCS_PTR_BYTE_OFFSET(ptr, sizeof(size_t)); +} + +void *ucm_sys_malloc(size_t size) +{ + size_t sys_size; + void *ptr; + + sys_size = ucs_align_up_pow2(size + sizeof(size_t), ucm_get_page_size()); + ptr = ucm_orig_mmap(NULL, sys_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + ucm_error("mmap(size=%zu) failed: %m", sys_size); + return NULL; + } + + return ucm_sys_complete_alloc(ptr, sys_size); +} + +void *ucm_sys_calloc(size_t nmemb, size_t size) +{ + size_t total_size = size * nmemb; + void *ptr; + + ptr = ucm_sys_malloc(total_size); + if (ptr == NULL) { + return NULL; + } + + memset(ptr, 0, total_size); + return ptr; +} + +void ucm_sys_free(void *ptr) +{ + size_t size; + + if (ptr == NULL) { + return; + } + + /* Do not use UCS_PTR_BYTE_OFFSET macro here due to coverity + * false positive. + * TODO: check for false positive on newer coverity. */ + ptr = (char*)ptr - sizeof(size_t); + size = *(size_t*)ptr; + munmap(ptr, size); +} + +void *ucm_sys_realloc(void *ptr, size_t size) +{ + size_t oldsize, sys_size; + void *oldptr, *newptr; + + if (ptr == NULL) { + return ucm_sys_malloc(size); + } + + oldptr = UCS_PTR_BYTE_OFFSET(ptr, -sizeof(size_t)); + oldsize = *(size_t*)oldptr; + sys_size = ucs_align_up_pow2(size + sizeof(size_t), ucm_get_page_size()); + + if (sys_size == oldsize) { + return ptr; + } + + newptr = ucm_orig_mremap(oldptr, oldsize, sys_size, MREMAP_MAYMOVE); + if (newptr == MAP_FAILED) { + ucm_error("mremap(oldptr=%p oldsize=%zu, newsize=%zu) failed: %m", + oldptr, oldsize, sys_size); + return NULL; + } + + return ucm_sys_complete_alloc(newptr, sys_size); +} + +void ucm_parse_proc_self_maps(ucm_proc_maps_cb_t cb, void *arg) +{ + static char *buffer = MAP_FAILED; + static size_t buffer_size = 32768; + static pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER; + ssize_t read_size, offset; + unsigned long start, end; + char prot_c[4]; + int line_num; + int prot; + char *ptr, *newline; + int maps_fd; + int ret; + int n; + + maps_fd = open(UCM_PROC_SELF_MAPS, O_RDONLY); + if (maps_fd < 0) { + ucm_fatal("cannot open %s for reading: %m", UCM_PROC_SELF_MAPS); + } + + /* read /proc/self/maps fully into the buffer */ + pthread_rwlock_wrlock(&lock); + + if (buffer == MAP_FAILED) { + buffer = ucm_orig_mmap(NULL, buffer_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (buffer == MAP_FAILED) { + ucm_fatal("failed to allocate maps buffer(size=%zu): %m", buffer_size); + } + } + + offset = 0; + for (;;) { + read_size = read(maps_fd, buffer + offset, buffer_size - offset); + if (read_size < 0) { + /* error */ + if (errno != EINTR) { + ucm_fatal("failed to read from %s: %m", UCM_PROC_SELF_MAPS); + } + } else if (read_size == buffer_size - offset) { + /* enlarge buffer */ + buffer = ucm_orig_mremap(buffer, buffer_size, buffer_size * 2, + MREMAP_MAYMOVE); + if (buffer == MAP_FAILED) { + ucm_fatal("failed to allocate maps buffer(size=%zu)", buffer_size); + } + buffer_size *= 2; + + /* read again from the beginning of the file */ + ret = lseek(maps_fd, 0, SEEK_SET); + if (ret < 0) { + ucm_fatal("failed to lseek(0): %m"); + } + offset = 0; + } else if (read_size == 0) { + /* finished reading */ + buffer[offset] = '\0'; + break; + } else { + /* more data could be available even if the buffer is not full */ + offset += read_size; + } + } + pthread_rwlock_unlock(&lock); + + close(maps_fd); + + pthread_rwlock_rdlock(&lock); + + ptr = buffer; + line_num = 1; + while ( (newline = strchr(ptr, '\n')) != NULL ) { + /* address perms offset dev inode pathname + * 00400000-0040b000 r-xp 00001a00 0a:0b 12345 /dev/mydev + */ + *newline = '\0'; + ret = sscanf(ptr, "%lx-%lx %4c %*x %*x:%*x %*d %n", + &start, &end, prot_c, + /* ignore offset, dev, inode */ + &n /* save number of chars before path begins */); + if (ret < 3) { + ucm_warn("failed to parse %s line %d: '%s'", + UCM_PROC_SELF_MAPS, line_num, ptr); + } else { + prot = 0; + if (prot_c[0] == 'r') { + prot |= PROT_READ; + } + if (prot_c[1] == 'w') { + prot |= PROT_WRITE; + } + if (prot_c[2] == 'x') { + prot |= PROT_EXEC; + } + + if (cb(arg, (void*)start, end - start, prot, ptr + n)) { + goto out; + } + } + + ptr = newline + 1; + ++line_num; + } + +out: + pthread_rwlock_unlock(&lock); +} + +typedef struct { + const void *shmaddr; + size_t seg_size; +} ucm_get_shm_seg_size_ctx_t; + +static int ucm_get_shm_seg_size_cb(void *arg, void *addr, size_t length, + int prot, const char *path) +{ + ucm_get_shm_seg_size_ctx_t *ctx = arg; + if (addr == ctx->shmaddr) { + ctx->seg_size = length; + return 1; + } + return 0; +} + +size_t ucm_get_shm_seg_size(const void *shmaddr) +{ + ucm_get_shm_seg_size_ctx_t ctx = { shmaddr, 0 }; + ucm_parse_proc_self_maps(ucm_get_shm_seg_size_cb, &ctx); + return ctx.seg_size; +} + +void ucm_strerror(int eno, char *buf, size_t max) +{ +#if STRERROR_R_CHAR_P + char *ret = strerror_r(eno, buf, max); + if (ret != buf) { + strncpy(buf, ret, max); + } +#else + (void)strerror_r(eno, buf, max); +#endif +} + +void ucm_prevent_dl_unload() +{ + Dl_info info; + void *dl; + int ret; + + /* Get the path to current library by current function pointer */ + (void)dlerror(); + ret = dladdr(ucm_prevent_dl_unload, &info); + if (ret == 0) { + ucm_warn("could not find address of current library: %s", dlerror()); + return; + } + + /* Load the current library with NODELETE flag, to prevent it from being + * unloaded. This will create extra reference to the library, but also add + * NODELETE flag to the dynamic link map. + */ + (void)dlerror(); + dl = dlopen(info.dli_fname, RTLD_LOCAL|RTLD_LAZY|RTLD_NODELETE); + if (dl == NULL) { + ucm_warn("failed to load '%s': %s", info.dli_fname, dlerror()); + return; + } + + ucm_debug("reloaded '%s' at %p with NODELETE flag", info.dli_fname, dl); + + /* Now we drop our reference to the lib, and it won't be unloaded anymore */ + dlclose(dl); +} + +char *ucm_concat_path(char *buffer, size_t max, const char *dir, const char *file) +{ + size_t len; + + len = strlen(dir); + while (len && (dir[len - 1] == '/')) { + len--; /* trim closing '/' */ + } + + len = ucs_min(len, max); + memcpy(buffer, dir, len); + max -= len; + if (max < 2) { /* buffer is shorter than dir - copy dir only */ + buffer[len - 1] = '\0'; + return buffer; + } + + buffer[len] = '/'; + max--; + + while (file[0] == '/') { + file++; /* trim beginning '/' */ + } + + strncpy(buffer + len + 1, file, max); + buffer[max + len] = '\0'; /* force close string */ + + return buffer; +} diff --git a/src/ucm/util/sys.h b/src/ucm/util/sys.h new file mode 100644 index 0000000..37a1d92 --- /dev/null +++ b/src/ucm/util/sys.h @@ -0,0 +1,91 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCM_UTIL_SYS_H_ +#define UCM_UTIL_SYS_H_ + +#include + + +/* + * Substitutes for glibc memory allocation routines, which take memory + * directly from the operating system, and therefore are safe to use from + * malloc hooks. + */ +void *ucm_sys_malloc(size_t size); +void *ucm_sys_calloc(size_t nmemb, size_t size); +void ucm_sys_free(void *ptr); +void *ucm_sys_realloc(void *oldptr, size_t newsize); + + +/** + * Callback function for processing entries in /proc/self/maps. + * + * @param [in] arg User-defined argument. + * @param [in] addr Mapping start address. + * @param [in] length Mapping length. + * @param [in] prot Mapping memory protection flags (PROT_xx). + * @param [in] path Backing file path, or NULL for anonymous mapping. + * + * @return 0 to continue iteration, nonzero - stop iteration. + */ +typedef int (*ucm_proc_maps_cb_t)(void *arg, void *addr, size_t length, + int prot, const char *path); + + +/** + * @return Page size on the system. + */ +size_t ucm_get_page_size(); + + +/** + * Read and process entries from /proc/self/maps. + * + * @param [in] cb Callback function that would be called for each entry + * found in /proc/self/maps. + * @param [in] arg User-defined argument for the function. + */ +void ucm_parse_proc_self_maps(ucm_proc_maps_cb_t cb, void *arg); + + +/** + * @brief Get the size of a shared memory segment, attached with shmat() + * + * @param [in] shmaddr Segment pointer. + * @return Segment size, or 0 if not found. + */ +size_t ucm_get_shm_seg_size(const void *shmaddr); + + +/** + * @brief Convert a errno number to error string + * + * @param [in] en errno value + * @param [out] buf Buffer to put the error string in + * @param [in] max Size of the buffer + */ +void ucm_strerror(int eno, char *buf, size_t max); + + +void ucm_prevent_dl_unload(); + + +/* + * Concatenate directory and file names into full path. + * + * @param buffer Filled with the result path. + * @param max Maximal buffer size. + * @param dir Directory name. + * @param file File name. + * + * @return Result buffer. + */ +char *ucm_concat_path(char *buffer, size_t max, const char *dir, const char *file); + + +#endif diff --git a/src/ucp/Makefile.am b/src/ucp/Makefile.am new file mode 100644 index 0000000..d540e83 --- /dev/null +++ b/src/ucp/Makefile.am @@ -0,0 +1,113 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED. +# Copyright (C) Los Alamos National Security, LLC. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +lib_LTLIBRARIES = libucp.la + +libucp_la_CFLAGS = $(BASE_CFLAGS) +libucp_la_LIBS = +libucp_la_CPPFLAGS = $(BASE_CPPFLAGS) +libucp_la_LDFLAGS = -ldl -version-info $(SOVERSION) +libucp_la_LIBADD = ../ucs/libucs.la ../uct/libuct.la +libucp_ladir = $(includedir)/ucp + +nobase_dist_libucp_la_HEADERS = \ + api/ucp_compat.h \ + api/ucp_def.h \ + api/ucp_version.h \ + api/ucp.h + +noinst_HEADERS = \ + core/ucp_am.h \ + core/ucp_am.inl \ + core/ucp_context.h \ + core/ucp_ep.h \ + core/ucp_ep.inl \ + core/ucp_listener.h \ + core/ucp_mm.h \ + core/ucp_proxy_ep.h \ + core/ucp_request.h \ + core/ucp_request.inl \ + core/ucp_worker.h \ + core/ucp_thread.h \ + core/ucp_types.h \ + dt/dt.h \ + dt/dt.inl \ + dt/dt_contig.h \ + dt/dt_iov.h \ + dt/dt_generic.h \ + proto/proto_am.h \ + proto/proto_am.inl \ + rma/rma.h \ + rma/rma.inl \ + tag/eager.h \ + tag/rndv.h \ + tag/tag_match.h \ + tag/tag_match.inl \ + tag/offload.h \ + wireup/address.h \ + wireup/ep_match.h \ + wireup/wireup_ep.h \ + wireup/wireup.h \ + wireup/wireup_cm.h \ + stream/stream.h + +devel_headers = \ + core/ucp_resource.h + +if INSTALL_DEVEL_HEADERS +nobase_dist_libucp_la_HEADERS += $(devel_headers) +else +noinst_HEADERS += $(devel_headers) +endif + +if ENABLE_EXPERIMENTAL_API +nobase_dist_libucp_la_HEADERS += api/ucpx.h +else +noinst_HEADERS += api/ucpx.h +endif + +libucp_la_SOURCES = \ + core/ucp_context.c \ + core/ucp_am.c \ + core/ucp_ep.c \ + core/ucp_listener.c \ + core/ucp_mm.c \ + core/ucp_proxy_ep.c \ + core/ucp_request.c \ + core/ucp_rkey.c \ + core/ucp_version.c \ + core/ucp_worker.c \ + dt/dt_contig.c \ + dt/dt_iov.c \ + dt/dt_generic.c \ + dt/dt.c \ + proto/proto_am.c \ + rma/amo_basic.c \ + rma/amo_send.c \ + rma/amo_sw.c \ + rma/rma_basic.c \ + rma/rma_send.c \ + rma/rma_sw.c \ + rma/flush.c \ + tag/eager_rcv.c \ + tag/eager_snd.c \ + tag/probe.c \ + tag/rndv.c \ + tag/tag_match.c \ + tag/tag_recv.c \ + tag/tag_send.c \ + tag/offload.c \ + wireup/address.c \ + wireup/ep_match.c \ + wireup/select.c \ + wireup/signaling_ep.c \ + wireup/wireup_ep.c \ + wireup/wireup.c \ + wireup/wireup_cm.c \ + stream/stream_send.c \ + stream/stream_recv.c + diff --git a/src/ucp/Makefile.in b/src/ucp/Makefile.in new file mode 100644 index 0000000..babd78a --- /dev/null +++ b/src/ucp/Makefile.in @@ -0,0 +1,1512 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED. +# Copyright (C) Los Alamos National Security, LLC. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@INSTALL_DEVEL_HEADERS_TRUE@am__append_1 = $(devel_headers) +@INSTALL_DEVEL_HEADERS_FALSE@am__append_2 = $(devel_headers) +@ENABLE_EXPERIMENTAL_API_TRUE@am__append_3 = api/ucpx.h +@ENABLE_EXPERIMENTAL_API_FALSE@am__append_4 = api/ucpx.h +subdir = src/ucp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am \ + $(am__nobase_dist_libucp_la_HEADERS_DIST) \ + $(am__noinst_HEADERS_DIST) $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libucp_ladir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libucp_la_DEPENDENCIES = ../ucs/libucs.la ../uct/libuct.la +am__dirstamp = $(am__leading_dot)dirstamp +am_libucp_la_OBJECTS = core/libucp_la-ucp_context.lo \ + core/libucp_la-ucp_am.lo core/libucp_la-ucp_ep.lo \ + core/libucp_la-ucp_listener.lo core/libucp_la-ucp_mm.lo \ + core/libucp_la-ucp_proxy_ep.lo core/libucp_la-ucp_request.lo \ + core/libucp_la-ucp_rkey.lo core/libucp_la-ucp_version.lo \ + core/libucp_la-ucp_worker.lo dt/libucp_la-dt_contig.lo \ + dt/libucp_la-dt_iov.lo dt/libucp_la-dt_generic.lo \ + dt/libucp_la-dt.lo proto/libucp_la-proto_am.lo \ + rma/libucp_la-amo_basic.lo rma/libucp_la-amo_send.lo \ + rma/libucp_la-amo_sw.lo rma/libucp_la-rma_basic.lo \ + rma/libucp_la-rma_send.lo rma/libucp_la-rma_sw.lo \ + rma/libucp_la-flush.lo tag/libucp_la-eager_rcv.lo \ + tag/libucp_la-eager_snd.lo tag/libucp_la-probe.lo \ + tag/libucp_la-rndv.lo tag/libucp_la-tag_match.lo \ + tag/libucp_la-tag_recv.lo tag/libucp_la-tag_send.lo \ + tag/libucp_la-offload.lo wireup/libucp_la-address.lo \ + wireup/libucp_la-ep_match.lo wireup/libucp_la-select.lo \ + wireup/libucp_la-signaling_ep.lo wireup/libucp_la-wireup_ep.lo \ + wireup/libucp_la-wireup.lo wireup/libucp_la-wireup_cm.lo \ + stream/libucp_la-stream_send.lo \ + stream/libucp_la-stream_recv.lo +libucp_la_OBJECTS = $(am_libucp_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libucp_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libucp_la_CFLAGS) \ + $(CFLAGS) $(libucp_la_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = core/$(DEPDIR)/libucp_la-ucp_am.Plo \ + core/$(DEPDIR)/libucp_la-ucp_context.Plo \ + core/$(DEPDIR)/libucp_la-ucp_ep.Plo \ + core/$(DEPDIR)/libucp_la-ucp_listener.Plo \ + core/$(DEPDIR)/libucp_la-ucp_mm.Plo \ + core/$(DEPDIR)/libucp_la-ucp_proxy_ep.Plo \ + core/$(DEPDIR)/libucp_la-ucp_request.Plo \ + core/$(DEPDIR)/libucp_la-ucp_rkey.Plo \ + core/$(DEPDIR)/libucp_la-ucp_version.Plo \ + core/$(DEPDIR)/libucp_la-ucp_worker.Plo \ + dt/$(DEPDIR)/libucp_la-dt.Plo \ + dt/$(DEPDIR)/libucp_la-dt_contig.Plo \ + dt/$(DEPDIR)/libucp_la-dt_generic.Plo \ + dt/$(DEPDIR)/libucp_la-dt_iov.Plo \ + proto/$(DEPDIR)/libucp_la-proto_am.Plo \ + rma/$(DEPDIR)/libucp_la-amo_basic.Plo \ + rma/$(DEPDIR)/libucp_la-amo_send.Plo \ + rma/$(DEPDIR)/libucp_la-amo_sw.Plo \ + rma/$(DEPDIR)/libucp_la-flush.Plo \ + rma/$(DEPDIR)/libucp_la-rma_basic.Plo \ + rma/$(DEPDIR)/libucp_la-rma_send.Plo \ + rma/$(DEPDIR)/libucp_la-rma_sw.Plo \ + stream/$(DEPDIR)/libucp_la-stream_recv.Plo \ + stream/$(DEPDIR)/libucp_la-stream_send.Plo \ + tag/$(DEPDIR)/libucp_la-eager_rcv.Plo \ + tag/$(DEPDIR)/libucp_la-eager_snd.Plo \ + tag/$(DEPDIR)/libucp_la-offload.Plo \ + tag/$(DEPDIR)/libucp_la-probe.Plo \ + tag/$(DEPDIR)/libucp_la-rndv.Plo \ + tag/$(DEPDIR)/libucp_la-tag_match.Plo \ + tag/$(DEPDIR)/libucp_la-tag_recv.Plo \ + tag/$(DEPDIR)/libucp_la-tag_send.Plo \ + wireup/$(DEPDIR)/libucp_la-address.Plo \ + wireup/$(DEPDIR)/libucp_la-ep_match.Plo \ + wireup/$(DEPDIR)/libucp_la-select.Plo \ + wireup/$(DEPDIR)/libucp_la-signaling_ep.Plo \ + wireup/$(DEPDIR)/libucp_la-wireup.Plo \ + wireup/$(DEPDIR)/libucp_la-wireup_cm.Plo \ + wireup/$(DEPDIR)/libucp_la-wireup_ep.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libucp_la_SOURCES) +DIST_SOURCES = $(libucp_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__nobase_dist_libucp_la_HEADERS_DIST = api/ucp_compat.h \ + api/ucp_def.h api/ucp_version.h api/ucp.h core/ucp_resource.h \ + api/ucpx.h +am__noinst_HEADERS_DIST = core/ucp_am.h core/ucp_am.inl \ + core/ucp_context.h core/ucp_ep.h core/ucp_ep.inl \ + core/ucp_listener.h core/ucp_mm.h core/ucp_proxy_ep.h \ + core/ucp_request.h core/ucp_request.inl core/ucp_worker.h \ + core/ucp_thread.h core/ucp_types.h dt/dt.h dt/dt.inl \ + dt/dt_contig.h dt/dt_iov.h dt/dt_generic.h proto/proto_am.h \ + proto/proto_am.inl rma/rma.h rma/rma.inl tag/eager.h \ + tag/rndv.h tag/tag_match.h tag/tag_match.inl tag/offload.h \ + wireup/address.h wireup/ep_match.h wireup/wireup_ep.h \ + wireup/wireup.h wireup/wireup_cm.h stream/stream.h \ + core/ucp_resource.h api/ucpx.h +HEADERS = $(nobase_dist_libucp_la_HEADERS) $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +lib_LTLIBRARIES = libucp.la +libucp_la_CFLAGS = $(BASE_CFLAGS) +libucp_la_LIBS = +libucp_la_CPPFLAGS = $(BASE_CPPFLAGS) +libucp_la_LDFLAGS = -ldl -version-info $(SOVERSION) +libucp_la_LIBADD = ../ucs/libucs.la ../uct/libuct.la +libucp_ladir = $(includedir)/ucp +nobase_dist_libucp_la_HEADERS = api/ucp_compat.h api/ucp_def.h \ + api/ucp_version.h api/ucp.h $(am__append_1) $(am__append_3) +noinst_HEADERS = core/ucp_am.h core/ucp_am.inl core/ucp_context.h \ + core/ucp_ep.h core/ucp_ep.inl core/ucp_listener.h \ + core/ucp_mm.h core/ucp_proxy_ep.h core/ucp_request.h \ + core/ucp_request.inl core/ucp_worker.h core/ucp_thread.h \ + core/ucp_types.h dt/dt.h dt/dt.inl dt/dt_contig.h dt/dt_iov.h \ + dt/dt_generic.h proto/proto_am.h proto/proto_am.inl rma/rma.h \ + rma/rma.inl tag/eager.h tag/rndv.h tag/tag_match.h \ + tag/tag_match.inl tag/offload.h wireup/address.h \ + wireup/ep_match.h wireup/wireup_ep.h wireup/wireup.h \ + wireup/wireup_cm.h stream/stream.h $(am__append_2) \ + $(am__append_4) +devel_headers = \ + core/ucp_resource.h + +libucp_la_SOURCES = \ + core/ucp_context.c \ + core/ucp_am.c \ + core/ucp_ep.c \ + core/ucp_listener.c \ + core/ucp_mm.c \ + core/ucp_proxy_ep.c \ + core/ucp_request.c \ + core/ucp_rkey.c \ + core/ucp_version.c \ + core/ucp_worker.c \ + dt/dt_contig.c \ + dt/dt_iov.c \ + dt/dt_generic.c \ + dt/dt.c \ + proto/proto_am.c \ + rma/amo_basic.c \ + rma/amo_send.c \ + rma/amo_sw.c \ + rma/rma_basic.c \ + rma/rma_send.c \ + rma/rma_sw.c \ + rma/flush.c \ + tag/eager_rcv.c \ + tag/eager_snd.c \ + tag/probe.c \ + tag/rndv.c \ + tag/tag_match.c \ + tag/tag_recv.c \ + tag/tag_send.c \ + tag/offload.c \ + wireup/address.c \ + wireup/ep_match.c \ + wireup/select.c \ + wireup/signaling_ep.c \ + wireup/wireup_ep.c \ + wireup/wireup.c \ + wireup/wireup_cm.c \ + stream/stream_send.c \ + stream/stream_recv.c + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/ucp/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/ucp/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +core/$(am__dirstamp): + @$(MKDIR_P) core + @: > core/$(am__dirstamp) +core/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) core/$(DEPDIR) + @: > core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_context.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_am.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_ep.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_listener.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_mm.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_proxy_ep.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_request.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_rkey.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_version.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +core/libucp_la-ucp_worker.lo: core/$(am__dirstamp) \ + core/$(DEPDIR)/$(am__dirstamp) +dt/$(am__dirstamp): + @$(MKDIR_P) dt + @: > dt/$(am__dirstamp) +dt/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) dt/$(DEPDIR) + @: > dt/$(DEPDIR)/$(am__dirstamp) +dt/libucp_la-dt_contig.lo: dt/$(am__dirstamp) \ + dt/$(DEPDIR)/$(am__dirstamp) +dt/libucp_la-dt_iov.lo: dt/$(am__dirstamp) \ + dt/$(DEPDIR)/$(am__dirstamp) +dt/libucp_la-dt_generic.lo: dt/$(am__dirstamp) \ + dt/$(DEPDIR)/$(am__dirstamp) +dt/libucp_la-dt.lo: dt/$(am__dirstamp) dt/$(DEPDIR)/$(am__dirstamp) +proto/$(am__dirstamp): + @$(MKDIR_P) proto + @: > proto/$(am__dirstamp) +proto/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) proto/$(DEPDIR) + @: > proto/$(DEPDIR)/$(am__dirstamp) +proto/libucp_la-proto_am.lo: proto/$(am__dirstamp) \ + proto/$(DEPDIR)/$(am__dirstamp) +rma/$(am__dirstamp): + @$(MKDIR_P) rma + @: > rma/$(am__dirstamp) +rma/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) rma/$(DEPDIR) + @: > rma/$(DEPDIR)/$(am__dirstamp) +rma/libucp_la-amo_basic.lo: rma/$(am__dirstamp) \ + rma/$(DEPDIR)/$(am__dirstamp) +rma/libucp_la-amo_send.lo: rma/$(am__dirstamp) \ + rma/$(DEPDIR)/$(am__dirstamp) +rma/libucp_la-amo_sw.lo: rma/$(am__dirstamp) \ + rma/$(DEPDIR)/$(am__dirstamp) +rma/libucp_la-rma_basic.lo: rma/$(am__dirstamp) \ + rma/$(DEPDIR)/$(am__dirstamp) +rma/libucp_la-rma_send.lo: rma/$(am__dirstamp) \ + rma/$(DEPDIR)/$(am__dirstamp) +rma/libucp_la-rma_sw.lo: rma/$(am__dirstamp) \ + rma/$(DEPDIR)/$(am__dirstamp) +rma/libucp_la-flush.lo: rma/$(am__dirstamp) \ + rma/$(DEPDIR)/$(am__dirstamp) +tag/$(am__dirstamp): + @$(MKDIR_P) tag + @: > tag/$(am__dirstamp) +tag/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) tag/$(DEPDIR) + @: > tag/$(DEPDIR)/$(am__dirstamp) +tag/libucp_la-eager_rcv.lo: tag/$(am__dirstamp) \ + tag/$(DEPDIR)/$(am__dirstamp) +tag/libucp_la-eager_snd.lo: tag/$(am__dirstamp) \ + tag/$(DEPDIR)/$(am__dirstamp) +tag/libucp_la-probe.lo: tag/$(am__dirstamp) \ + tag/$(DEPDIR)/$(am__dirstamp) +tag/libucp_la-rndv.lo: tag/$(am__dirstamp) \ + tag/$(DEPDIR)/$(am__dirstamp) +tag/libucp_la-tag_match.lo: tag/$(am__dirstamp) \ + tag/$(DEPDIR)/$(am__dirstamp) +tag/libucp_la-tag_recv.lo: tag/$(am__dirstamp) \ + tag/$(DEPDIR)/$(am__dirstamp) +tag/libucp_la-tag_send.lo: tag/$(am__dirstamp) \ + tag/$(DEPDIR)/$(am__dirstamp) +tag/libucp_la-offload.lo: tag/$(am__dirstamp) \ + tag/$(DEPDIR)/$(am__dirstamp) +wireup/$(am__dirstamp): + @$(MKDIR_P) wireup + @: > wireup/$(am__dirstamp) +wireup/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) wireup/$(DEPDIR) + @: > wireup/$(DEPDIR)/$(am__dirstamp) +wireup/libucp_la-address.lo: wireup/$(am__dirstamp) \ + wireup/$(DEPDIR)/$(am__dirstamp) +wireup/libucp_la-ep_match.lo: wireup/$(am__dirstamp) \ + wireup/$(DEPDIR)/$(am__dirstamp) +wireup/libucp_la-select.lo: wireup/$(am__dirstamp) \ + wireup/$(DEPDIR)/$(am__dirstamp) +wireup/libucp_la-signaling_ep.lo: wireup/$(am__dirstamp) \ + wireup/$(DEPDIR)/$(am__dirstamp) +wireup/libucp_la-wireup_ep.lo: wireup/$(am__dirstamp) \ + wireup/$(DEPDIR)/$(am__dirstamp) +wireup/libucp_la-wireup.lo: wireup/$(am__dirstamp) \ + wireup/$(DEPDIR)/$(am__dirstamp) +wireup/libucp_la-wireup_cm.lo: wireup/$(am__dirstamp) \ + wireup/$(DEPDIR)/$(am__dirstamp) +stream/$(am__dirstamp): + @$(MKDIR_P) stream + @: > stream/$(am__dirstamp) +stream/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) stream/$(DEPDIR) + @: > stream/$(DEPDIR)/$(am__dirstamp) +stream/libucp_la-stream_send.lo: stream/$(am__dirstamp) \ + stream/$(DEPDIR)/$(am__dirstamp) +stream/libucp_la-stream_recv.lo: stream/$(am__dirstamp) \ + stream/$(DEPDIR)/$(am__dirstamp) + +libucp.la: $(libucp_la_OBJECTS) $(libucp_la_DEPENDENCIES) $(EXTRA_libucp_la_DEPENDENCIES) + $(AM_V_CCLD)$(libucp_la_LINK) -rpath $(libdir) $(libucp_la_OBJECTS) $(libucp_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f core/*.$(OBJEXT) + -rm -f core/*.lo + -rm -f dt/*.$(OBJEXT) + -rm -f dt/*.lo + -rm -f proto/*.$(OBJEXT) + -rm -f proto/*.lo + -rm -f rma/*.$(OBJEXT) + -rm -f rma/*.lo + -rm -f stream/*.$(OBJEXT) + -rm -f stream/*.lo + -rm -f tag/*.$(OBJEXT) + -rm -f tag/*.lo + -rm -f wireup/*.$(OBJEXT) + -rm -f wireup/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_am.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_context.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_listener.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_mm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_proxy_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_request.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_rkey.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_version.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@core/$(DEPDIR)/libucp_la-ucp_worker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dt/$(DEPDIR)/libucp_la-dt.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dt/$(DEPDIR)/libucp_la-dt_contig.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dt/$(DEPDIR)/libucp_la-dt_generic.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dt/$(DEPDIR)/libucp_la-dt_iov.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@proto/$(DEPDIR)/libucp_la-proto_am.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rma/$(DEPDIR)/libucp_la-amo_basic.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rma/$(DEPDIR)/libucp_la-amo_send.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rma/$(DEPDIR)/libucp_la-amo_sw.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rma/$(DEPDIR)/libucp_la-flush.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rma/$(DEPDIR)/libucp_la-rma_basic.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rma/$(DEPDIR)/libucp_la-rma_send.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rma/$(DEPDIR)/libucp_la-rma_sw.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@stream/$(DEPDIR)/libucp_la-stream_recv.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@stream/$(DEPDIR)/libucp_la-stream_send.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag/$(DEPDIR)/libucp_la-eager_rcv.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag/$(DEPDIR)/libucp_la-eager_snd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag/$(DEPDIR)/libucp_la-offload.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag/$(DEPDIR)/libucp_la-probe.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag/$(DEPDIR)/libucp_la-rndv.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag/$(DEPDIR)/libucp_la-tag_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag/$(DEPDIR)/libucp_la-tag_recv.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tag/$(DEPDIR)/libucp_la-tag_send.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@wireup/$(DEPDIR)/libucp_la-address.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@wireup/$(DEPDIR)/libucp_la-ep_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@wireup/$(DEPDIR)/libucp_la-select.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@wireup/$(DEPDIR)/libucp_la-signaling_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@wireup/$(DEPDIR)/libucp_la-wireup.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@wireup/$(DEPDIR)/libucp_la-wireup_cm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@wireup/$(DEPDIR)/libucp_la-wireup_ep.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +core/libucp_la-ucp_context.lo: core/ucp_context.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_context.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_context.Tpo -c -o core/libucp_la-ucp_context.lo `test -f 'core/ucp_context.c' || echo '$(srcdir)/'`core/ucp_context.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_context.Tpo core/$(DEPDIR)/libucp_la-ucp_context.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_context.c' object='core/libucp_la-ucp_context.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_context.lo `test -f 'core/ucp_context.c' || echo '$(srcdir)/'`core/ucp_context.c + +core/libucp_la-ucp_am.lo: core/ucp_am.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_am.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_am.Tpo -c -o core/libucp_la-ucp_am.lo `test -f 'core/ucp_am.c' || echo '$(srcdir)/'`core/ucp_am.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_am.Tpo core/$(DEPDIR)/libucp_la-ucp_am.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_am.c' object='core/libucp_la-ucp_am.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_am.lo `test -f 'core/ucp_am.c' || echo '$(srcdir)/'`core/ucp_am.c + +core/libucp_la-ucp_ep.lo: core/ucp_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_ep.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_ep.Tpo -c -o core/libucp_la-ucp_ep.lo `test -f 'core/ucp_ep.c' || echo '$(srcdir)/'`core/ucp_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_ep.Tpo core/$(DEPDIR)/libucp_la-ucp_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_ep.c' object='core/libucp_la-ucp_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_ep.lo `test -f 'core/ucp_ep.c' || echo '$(srcdir)/'`core/ucp_ep.c + +core/libucp_la-ucp_listener.lo: core/ucp_listener.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_listener.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_listener.Tpo -c -o core/libucp_la-ucp_listener.lo `test -f 'core/ucp_listener.c' || echo '$(srcdir)/'`core/ucp_listener.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_listener.Tpo core/$(DEPDIR)/libucp_la-ucp_listener.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_listener.c' object='core/libucp_la-ucp_listener.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_listener.lo `test -f 'core/ucp_listener.c' || echo '$(srcdir)/'`core/ucp_listener.c + +core/libucp_la-ucp_mm.lo: core/ucp_mm.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_mm.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_mm.Tpo -c -o core/libucp_la-ucp_mm.lo `test -f 'core/ucp_mm.c' || echo '$(srcdir)/'`core/ucp_mm.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_mm.Tpo core/$(DEPDIR)/libucp_la-ucp_mm.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_mm.c' object='core/libucp_la-ucp_mm.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_mm.lo `test -f 'core/ucp_mm.c' || echo '$(srcdir)/'`core/ucp_mm.c + +core/libucp_la-ucp_proxy_ep.lo: core/ucp_proxy_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_proxy_ep.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_proxy_ep.Tpo -c -o core/libucp_la-ucp_proxy_ep.lo `test -f 'core/ucp_proxy_ep.c' || echo '$(srcdir)/'`core/ucp_proxy_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_proxy_ep.Tpo core/$(DEPDIR)/libucp_la-ucp_proxy_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_proxy_ep.c' object='core/libucp_la-ucp_proxy_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_proxy_ep.lo `test -f 'core/ucp_proxy_ep.c' || echo '$(srcdir)/'`core/ucp_proxy_ep.c + +core/libucp_la-ucp_request.lo: core/ucp_request.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_request.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_request.Tpo -c -o core/libucp_la-ucp_request.lo `test -f 'core/ucp_request.c' || echo '$(srcdir)/'`core/ucp_request.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_request.Tpo core/$(DEPDIR)/libucp_la-ucp_request.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_request.c' object='core/libucp_la-ucp_request.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_request.lo `test -f 'core/ucp_request.c' || echo '$(srcdir)/'`core/ucp_request.c + +core/libucp_la-ucp_rkey.lo: core/ucp_rkey.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_rkey.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_rkey.Tpo -c -o core/libucp_la-ucp_rkey.lo `test -f 'core/ucp_rkey.c' || echo '$(srcdir)/'`core/ucp_rkey.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_rkey.Tpo core/$(DEPDIR)/libucp_la-ucp_rkey.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_rkey.c' object='core/libucp_la-ucp_rkey.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_rkey.lo `test -f 'core/ucp_rkey.c' || echo '$(srcdir)/'`core/ucp_rkey.c + +core/libucp_la-ucp_version.lo: core/ucp_version.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_version.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_version.Tpo -c -o core/libucp_la-ucp_version.lo `test -f 'core/ucp_version.c' || echo '$(srcdir)/'`core/ucp_version.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_version.Tpo core/$(DEPDIR)/libucp_la-ucp_version.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_version.c' object='core/libucp_la-ucp_version.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_version.lo `test -f 'core/ucp_version.c' || echo '$(srcdir)/'`core/ucp_version.c + +core/libucp_la-ucp_worker.lo: core/ucp_worker.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT core/libucp_la-ucp_worker.lo -MD -MP -MF core/$(DEPDIR)/libucp_la-ucp_worker.Tpo -c -o core/libucp_la-ucp_worker.lo `test -f 'core/ucp_worker.c' || echo '$(srcdir)/'`core/ucp_worker.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) core/$(DEPDIR)/libucp_la-ucp_worker.Tpo core/$(DEPDIR)/libucp_la-ucp_worker.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='core/ucp_worker.c' object='core/libucp_la-ucp_worker.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o core/libucp_la-ucp_worker.lo `test -f 'core/ucp_worker.c' || echo '$(srcdir)/'`core/ucp_worker.c + +dt/libucp_la-dt_contig.lo: dt/dt_contig.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT dt/libucp_la-dt_contig.lo -MD -MP -MF dt/$(DEPDIR)/libucp_la-dt_contig.Tpo -c -o dt/libucp_la-dt_contig.lo `test -f 'dt/dt_contig.c' || echo '$(srcdir)/'`dt/dt_contig.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) dt/$(DEPDIR)/libucp_la-dt_contig.Tpo dt/$(DEPDIR)/libucp_la-dt_contig.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dt/dt_contig.c' object='dt/libucp_la-dt_contig.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o dt/libucp_la-dt_contig.lo `test -f 'dt/dt_contig.c' || echo '$(srcdir)/'`dt/dt_contig.c + +dt/libucp_la-dt_iov.lo: dt/dt_iov.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT dt/libucp_la-dt_iov.lo -MD -MP -MF dt/$(DEPDIR)/libucp_la-dt_iov.Tpo -c -o dt/libucp_la-dt_iov.lo `test -f 'dt/dt_iov.c' || echo '$(srcdir)/'`dt/dt_iov.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) dt/$(DEPDIR)/libucp_la-dt_iov.Tpo dt/$(DEPDIR)/libucp_la-dt_iov.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dt/dt_iov.c' object='dt/libucp_la-dt_iov.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o dt/libucp_la-dt_iov.lo `test -f 'dt/dt_iov.c' || echo '$(srcdir)/'`dt/dt_iov.c + +dt/libucp_la-dt_generic.lo: dt/dt_generic.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT dt/libucp_la-dt_generic.lo -MD -MP -MF dt/$(DEPDIR)/libucp_la-dt_generic.Tpo -c -o dt/libucp_la-dt_generic.lo `test -f 'dt/dt_generic.c' || echo '$(srcdir)/'`dt/dt_generic.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) dt/$(DEPDIR)/libucp_la-dt_generic.Tpo dt/$(DEPDIR)/libucp_la-dt_generic.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dt/dt_generic.c' object='dt/libucp_la-dt_generic.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o dt/libucp_la-dt_generic.lo `test -f 'dt/dt_generic.c' || echo '$(srcdir)/'`dt/dt_generic.c + +dt/libucp_la-dt.lo: dt/dt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT dt/libucp_la-dt.lo -MD -MP -MF dt/$(DEPDIR)/libucp_la-dt.Tpo -c -o dt/libucp_la-dt.lo `test -f 'dt/dt.c' || echo '$(srcdir)/'`dt/dt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) dt/$(DEPDIR)/libucp_la-dt.Tpo dt/$(DEPDIR)/libucp_la-dt.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dt/dt.c' object='dt/libucp_la-dt.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o dt/libucp_la-dt.lo `test -f 'dt/dt.c' || echo '$(srcdir)/'`dt/dt.c + +proto/libucp_la-proto_am.lo: proto/proto_am.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT proto/libucp_la-proto_am.lo -MD -MP -MF proto/$(DEPDIR)/libucp_la-proto_am.Tpo -c -o proto/libucp_la-proto_am.lo `test -f 'proto/proto_am.c' || echo '$(srcdir)/'`proto/proto_am.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) proto/$(DEPDIR)/libucp_la-proto_am.Tpo proto/$(DEPDIR)/libucp_la-proto_am.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='proto/proto_am.c' object='proto/libucp_la-proto_am.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o proto/libucp_la-proto_am.lo `test -f 'proto/proto_am.c' || echo '$(srcdir)/'`proto/proto_am.c + +rma/libucp_la-amo_basic.lo: rma/amo_basic.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT rma/libucp_la-amo_basic.lo -MD -MP -MF rma/$(DEPDIR)/libucp_la-amo_basic.Tpo -c -o rma/libucp_la-amo_basic.lo `test -f 'rma/amo_basic.c' || echo '$(srcdir)/'`rma/amo_basic.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rma/$(DEPDIR)/libucp_la-amo_basic.Tpo rma/$(DEPDIR)/libucp_la-amo_basic.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rma/amo_basic.c' object='rma/libucp_la-amo_basic.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o rma/libucp_la-amo_basic.lo `test -f 'rma/amo_basic.c' || echo '$(srcdir)/'`rma/amo_basic.c + +rma/libucp_la-amo_send.lo: rma/amo_send.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT rma/libucp_la-amo_send.lo -MD -MP -MF rma/$(DEPDIR)/libucp_la-amo_send.Tpo -c -o rma/libucp_la-amo_send.lo `test -f 'rma/amo_send.c' || echo '$(srcdir)/'`rma/amo_send.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rma/$(DEPDIR)/libucp_la-amo_send.Tpo rma/$(DEPDIR)/libucp_la-amo_send.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rma/amo_send.c' object='rma/libucp_la-amo_send.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o rma/libucp_la-amo_send.lo `test -f 'rma/amo_send.c' || echo '$(srcdir)/'`rma/amo_send.c + +rma/libucp_la-amo_sw.lo: rma/amo_sw.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT rma/libucp_la-amo_sw.lo -MD -MP -MF rma/$(DEPDIR)/libucp_la-amo_sw.Tpo -c -o rma/libucp_la-amo_sw.lo `test -f 'rma/amo_sw.c' || echo '$(srcdir)/'`rma/amo_sw.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rma/$(DEPDIR)/libucp_la-amo_sw.Tpo rma/$(DEPDIR)/libucp_la-amo_sw.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rma/amo_sw.c' object='rma/libucp_la-amo_sw.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o rma/libucp_la-amo_sw.lo `test -f 'rma/amo_sw.c' || echo '$(srcdir)/'`rma/amo_sw.c + +rma/libucp_la-rma_basic.lo: rma/rma_basic.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT rma/libucp_la-rma_basic.lo -MD -MP -MF rma/$(DEPDIR)/libucp_la-rma_basic.Tpo -c -o rma/libucp_la-rma_basic.lo `test -f 'rma/rma_basic.c' || echo '$(srcdir)/'`rma/rma_basic.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rma/$(DEPDIR)/libucp_la-rma_basic.Tpo rma/$(DEPDIR)/libucp_la-rma_basic.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rma/rma_basic.c' object='rma/libucp_la-rma_basic.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o rma/libucp_la-rma_basic.lo `test -f 'rma/rma_basic.c' || echo '$(srcdir)/'`rma/rma_basic.c + +rma/libucp_la-rma_send.lo: rma/rma_send.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT rma/libucp_la-rma_send.lo -MD -MP -MF rma/$(DEPDIR)/libucp_la-rma_send.Tpo -c -o rma/libucp_la-rma_send.lo `test -f 'rma/rma_send.c' || echo '$(srcdir)/'`rma/rma_send.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rma/$(DEPDIR)/libucp_la-rma_send.Tpo rma/$(DEPDIR)/libucp_la-rma_send.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rma/rma_send.c' object='rma/libucp_la-rma_send.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o rma/libucp_la-rma_send.lo `test -f 'rma/rma_send.c' || echo '$(srcdir)/'`rma/rma_send.c + +rma/libucp_la-rma_sw.lo: rma/rma_sw.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT rma/libucp_la-rma_sw.lo -MD -MP -MF rma/$(DEPDIR)/libucp_la-rma_sw.Tpo -c -o rma/libucp_la-rma_sw.lo `test -f 'rma/rma_sw.c' || echo '$(srcdir)/'`rma/rma_sw.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rma/$(DEPDIR)/libucp_la-rma_sw.Tpo rma/$(DEPDIR)/libucp_la-rma_sw.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rma/rma_sw.c' object='rma/libucp_la-rma_sw.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o rma/libucp_la-rma_sw.lo `test -f 'rma/rma_sw.c' || echo '$(srcdir)/'`rma/rma_sw.c + +rma/libucp_la-flush.lo: rma/flush.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT rma/libucp_la-flush.lo -MD -MP -MF rma/$(DEPDIR)/libucp_la-flush.Tpo -c -o rma/libucp_la-flush.lo `test -f 'rma/flush.c' || echo '$(srcdir)/'`rma/flush.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rma/$(DEPDIR)/libucp_la-flush.Tpo rma/$(DEPDIR)/libucp_la-flush.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rma/flush.c' object='rma/libucp_la-flush.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o rma/libucp_la-flush.lo `test -f 'rma/flush.c' || echo '$(srcdir)/'`rma/flush.c + +tag/libucp_la-eager_rcv.lo: tag/eager_rcv.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT tag/libucp_la-eager_rcv.lo -MD -MP -MF tag/$(DEPDIR)/libucp_la-eager_rcv.Tpo -c -o tag/libucp_la-eager_rcv.lo `test -f 'tag/eager_rcv.c' || echo '$(srcdir)/'`tag/eager_rcv.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tag/$(DEPDIR)/libucp_la-eager_rcv.Tpo tag/$(DEPDIR)/libucp_la-eager_rcv.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tag/eager_rcv.c' object='tag/libucp_la-eager_rcv.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o tag/libucp_la-eager_rcv.lo `test -f 'tag/eager_rcv.c' || echo '$(srcdir)/'`tag/eager_rcv.c + +tag/libucp_la-eager_snd.lo: tag/eager_snd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT tag/libucp_la-eager_snd.lo -MD -MP -MF tag/$(DEPDIR)/libucp_la-eager_snd.Tpo -c -o tag/libucp_la-eager_snd.lo `test -f 'tag/eager_snd.c' || echo '$(srcdir)/'`tag/eager_snd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tag/$(DEPDIR)/libucp_la-eager_snd.Tpo tag/$(DEPDIR)/libucp_la-eager_snd.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tag/eager_snd.c' object='tag/libucp_la-eager_snd.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o tag/libucp_la-eager_snd.lo `test -f 'tag/eager_snd.c' || echo '$(srcdir)/'`tag/eager_snd.c + +tag/libucp_la-probe.lo: tag/probe.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT tag/libucp_la-probe.lo -MD -MP -MF tag/$(DEPDIR)/libucp_la-probe.Tpo -c -o tag/libucp_la-probe.lo `test -f 'tag/probe.c' || echo '$(srcdir)/'`tag/probe.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tag/$(DEPDIR)/libucp_la-probe.Tpo tag/$(DEPDIR)/libucp_la-probe.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tag/probe.c' object='tag/libucp_la-probe.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o tag/libucp_la-probe.lo `test -f 'tag/probe.c' || echo '$(srcdir)/'`tag/probe.c + +tag/libucp_la-rndv.lo: tag/rndv.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT tag/libucp_la-rndv.lo -MD -MP -MF tag/$(DEPDIR)/libucp_la-rndv.Tpo -c -o tag/libucp_la-rndv.lo `test -f 'tag/rndv.c' || echo '$(srcdir)/'`tag/rndv.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tag/$(DEPDIR)/libucp_la-rndv.Tpo tag/$(DEPDIR)/libucp_la-rndv.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tag/rndv.c' object='tag/libucp_la-rndv.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o tag/libucp_la-rndv.lo `test -f 'tag/rndv.c' || echo '$(srcdir)/'`tag/rndv.c + +tag/libucp_la-tag_match.lo: tag/tag_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT tag/libucp_la-tag_match.lo -MD -MP -MF tag/$(DEPDIR)/libucp_la-tag_match.Tpo -c -o tag/libucp_la-tag_match.lo `test -f 'tag/tag_match.c' || echo '$(srcdir)/'`tag/tag_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tag/$(DEPDIR)/libucp_la-tag_match.Tpo tag/$(DEPDIR)/libucp_la-tag_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tag/tag_match.c' object='tag/libucp_la-tag_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o tag/libucp_la-tag_match.lo `test -f 'tag/tag_match.c' || echo '$(srcdir)/'`tag/tag_match.c + +tag/libucp_la-tag_recv.lo: tag/tag_recv.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT tag/libucp_la-tag_recv.lo -MD -MP -MF tag/$(DEPDIR)/libucp_la-tag_recv.Tpo -c -o tag/libucp_la-tag_recv.lo `test -f 'tag/tag_recv.c' || echo '$(srcdir)/'`tag/tag_recv.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tag/$(DEPDIR)/libucp_la-tag_recv.Tpo tag/$(DEPDIR)/libucp_la-tag_recv.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tag/tag_recv.c' object='tag/libucp_la-tag_recv.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o tag/libucp_la-tag_recv.lo `test -f 'tag/tag_recv.c' || echo '$(srcdir)/'`tag/tag_recv.c + +tag/libucp_la-tag_send.lo: tag/tag_send.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT tag/libucp_la-tag_send.lo -MD -MP -MF tag/$(DEPDIR)/libucp_la-tag_send.Tpo -c -o tag/libucp_la-tag_send.lo `test -f 'tag/tag_send.c' || echo '$(srcdir)/'`tag/tag_send.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tag/$(DEPDIR)/libucp_la-tag_send.Tpo tag/$(DEPDIR)/libucp_la-tag_send.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tag/tag_send.c' object='tag/libucp_la-tag_send.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o tag/libucp_la-tag_send.lo `test -f 'tag/tag_send.c' || echo '$(srcdir)/'`tag/tag_send.c + +tag/libucp_la-offload.lo: tag/offload.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT tag/libucp_la-offload.lo -MD -MP -MF tag/$(DEPDIR)/libucp_la-offload.Tpo -c -o tag/libucp_la-offload.lo `test -f 'tag/offload.c' || echo '$(srcdir)/'`tag/offload.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tag/$(DEPDIR)/libucp_la-offload.Tpo tag/$(DEPDIR)/libucp_la-offload.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tag/offload.c' object='tag/libucp_la-offload.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o tag/libucp_la-offload.lo `test -f 'tag/offload.c' || echo '$(srcdir)/'`tag/offload.c + +wireup/libucp_la-address.lo: wireup/address.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT wireup/libucp_la-address.lo -MD -MP -MF wireup/$(DEPDIR)/libucp_la-address.Tpo -c -o wireup/libucp_la-address.lo `test -f 'wireup/address.c' || echo '$(srcdir)/'`wireup/address.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) wireup/$(DEPDIR)/libucp_la-address.Tpo wireup/$(DEPDIR)/libucp_la-address.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='wireup/address.c' object='wireup/libucp_la-address.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o wireup/libucp_la-address.lo `test -f 'wireup/address.c' || echo '$(srcdir)/'`wireup/address.c + +wireup/libucp_la-ep_match.lo: wireup/ep_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT wireup/libucp_la-ep_match.lo -MD -MP -MF wireup/$(DEPDIR)/libucp_la-ep_match.Tpo -c -o wireup/libucp_la-ep_match.lo `test -f 'wireup/ep_match.c' || echo '$(srcdir)/'`wireup/ep_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) wireup/$(DEPDIR)/libucp_la-ep_match.Tpo wireup/$(DEPDIR)/libucp_la-ep_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='wireup/ep_match.c' object='wireup/libucp_la-ep_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o wireup/libucp_la-ep_match.lo `test -f 'wireup/ep_match.c' || echo '$(srcdir)/'`wireup/ep_match.c + +wireup/libucp_la-select.lo: wireup/select.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT wireup/libucp_la-select.lo -MD -MP -MF wireup/$(DEPDIR)/libucp_la-select.Tpo -c -o wireup/libucp_la-select.lo `test -f 'wireup/select.c' || echo '$(srcdir)/'`wireup/select.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) wireup/$(DEPDIR)/libucp_la-select.Tpo wireup/$(DEPDIR)/libucp_la-select.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='wireup/select.c' object='wireup/libucp_la-select.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o wireup/libucp_la-select.lo `test -f 'wireup/select.c' || echo '$(srcdir)/'`wireup/select.c + +wireup/libucp_la-signaling_ep.lo: wireup/signaling_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT wireup/libucp_la-signaling_ep.lo -MD -MP -MF wireup/$(DEPDIR)/libucp_la-signaling_ep.Tpo -c -o wireup/libucp_la-signaling_ep.lo `test -f 'wireup/signaling_ep.c' || echo '$(srcdir)/'`wireup/signaling_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) wireup/$(DEPDIR)/libucp_la-signaling_ep.Tpo wireup/$(DEPDIR)/libucp_la-signaling_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='wireup/signaling_ep.c' object='wireup/libucp_la-signaling_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o wireup/libucp_la-signaling_ep.lo `test -f 'wireup/signaling_ep.c' || echo '$(srcdir)/'`wireup/signaling_ep.c + +wireup/libucp_la-wireup_ep.lo: wireup/wireup_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT wireup/libucp_la-wireup_ep.lo -MD -MP -MF wireup/$(DEPDIR)/libucp_la-wireup_ep.Tpo -c -o wireup/libucp_la-wireup_ep.lo `test -f 'wireup/wireup_ep.c' || echo '$(srcdir)/'`wireup/wireup_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) wireup/$(DEPDIR)/libucp_la-wireup_ep.Tpo wireup/$(DEPDIR)/libucp_la-wireup_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='wireup/wireup_ep.c' object='wireup/libucp_la-wireup_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o wireup/libucp_la-wireup_ep.lo `test -f 'wireup/wireup_ep.c' || echo '$(srcdir)/'`wireup/wireup_ep.c + +wireup/libucp_la-wireup.lo: wireup/wireup.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT wireup/libucp_la-wireup.lo -MD -MP -MF wireup/$(DEPDIR)/libucp_la-wireup.Tpo -c -o wireup/libucp_la-wireup.lo `test -f 'wireup/wireup.c' || echo '$(srcdir)/'`wireup/wireup.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) wireup/$(DEPDIR)/libucp_la-wireup.Tpo wireup/$(DEPDIR)/libucp_la-wireup.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='wireup/wireup.c' object='wireup/libucp_la-wireup.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o wireup/libucp_la-wireup.lo `test -f 'wireup/wireup.c' || echo '$(srcdir)/'`wireup/wireup.c + +wireup/libucp_la-wireup_cm.lo: wireup/wireup_cm.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT wireup/libucp_la-wireup_cm.lo -MD -MP -MF wireup/$(DEPDIR)/libucp_la-wireup_cm.Tpo -c -o wireup/libucp_la-wireup_cm.lo `test -f 'wireup/wireup_cm.c' || echo '$(srcdir)/'`wireup/wireup_cm.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) wireup/$(DEPDIR)/libucp_la-wireup_cm.Tpo wireup/$(DEPDIR)/libucp_la-wireup_cm.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='wireup/wireup_cm.c' object='wireup/libucp_la-wireup_cm.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o wireup/libucp_la-wireup_cm.lo `test -f 'wireup/wireup_cm.c' || echo '$(srcdir)/'`wireup/wireup_cm.c + +stream/libucp_la-stream_send.lo: stream/stream_send.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT stream/libucp_la-stream_send.lo -MD -MP -MF stream/$(DEPDIR)/libucp_la-stream_send.Tpo -c -o stream/libucp_la-stream_send.lo `test -f 'stream/stream_send.c' || echo '$(srcdir)/'`stream/stream_send.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) stream/$(DEPDIR)/libucp_la-stream_send.Tpo stream/$(DEPDIR)/libucp_la-stream_send.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stream/stream_send.c' object='stream/libucp_la-stream_send.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o stream/libucp_la-stream_send.lo `test -f 'stream/stream_send.c' || echo '$(srcdir)/'`stream/stream_send.c + +stream/libucp_la-stream_recv.lo: stream/stream_recv.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -MT stream/libucp_la-stream_recv.lo -MD -MP -MF stream/$(DEPDIR)/libucp_la-stream_recv.Tpo -c -o stream/libucp_la-stream_recv.lo `test -f 'stream/stream_recv.c' || echo '$(srcdir)/'`stream/stream_recv.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) stream/$(DEPDIR)/libucp_la-stream_recv.Tpo stream/$(DEPDIR)/libucp_la-stream_recv.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stream/stream_recv.c' object='stream/libucp_la-stream_recv.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucp_la_CPPFLAGS) $(CPPFLAGS) $(libucp_la_CFLAGS) $(CFLAGS) -c -o stream/libucp_la-stream_recv.lo `test -f 'stream/stream_recv.c' || echo '$(srcdir)/'`stream/stream_recv.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf core/.libs core/_libs + -rm -rf dt/.libs dt/_libs + -rm -rf proto/.libs proto/_libs + -rm -rf rma/.libs rma/_libs + -rm -rf stream/.libs stream/_libs + -rm -rf tag/.libs tag/_libs + -rm -rf wireup/.libs wireup/_libs +install-nobase_dist_libucp_laHEADERS: $(nobase_dist_libucp_la_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nobase_dist_libucp_la_HEADERS)'; test -n "$(libucp_ladir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(libucp_ladir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libucp_ladir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libucp_ladir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(libucp_ladir)/$$dir"; }; \ + echo " $(INSTALL_HEADER) $$xfiles '$(DESTDIR)$(libucp_ladir)/$$dir'"; \ + $(INSTALL_HEADER) $$xfiles "$(DESTDIR)$(libucp_ladir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_dist_libucp_laHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nobase_dist_libucp_la_HEADERS)'; test -n "$(libucp_ladir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(libucp_ladir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libucp_ladir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f core/$(DEPDIR)/$(am__dirstamp) + -rm -f core/$(am__dirstamp) + -rm -f dt/$(DEPDIR)/$(am__dirstamp) + -rm -f dt/$(am__dirstamp) + -rm -f proto/$(DEPDIR)/$(am__dirstamp) + -rm -f proto/$(am__dirstamp) + -rm -f rma/$(DEPDIR)/$(am__dirstamp) + -rm -f rma/$(am__dirstamp) + -rm -f stream/$(DEPDIR)/$(am__dirstamp) + -rm -f stream/$(am__dirstamp) + -rm -f tag/$(DEPDIR)/$(am__dirstamp) + -rm -f tag/$(am__dirstamp) + -rm -f wireup/$(DEPDIR)/$(am__dirstamp) + -rm -f wireup/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -f core/$(DEPDIR)/libucp_la-ucp_am.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_context.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_ep.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_listener.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_mm.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_proxy_ep.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_request.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_rkey.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_version.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_worker.Plo + -rm -f dt/$(DEPDIR)/libucp_la-dt.Plo + -rm -f dt/$(DEPDIR)/libucp_la-dt_contig.Plo + -rm -f dt/$(DEPDIR)/libucp_la-dt_generic.Plo + -rm -f dt/$(DEPDIR)/libucp_la-dt_iov.Plo + -rm -f proto/$(DEPDIR)/libucp_la-proto_am.Plo + -rm -f rma/$(DEPDIR)/libucp_la-amo_basic.Plo + -rm -f rma/$(DEPDIR)/libucp_la-amo_send.Plo + -rm -f rma/$(DEPDIR)/libucp_la-amo_sw.Plo + -rm -f rma/$(DEPDIR)/libucp_la-flush.Plo + -rm -f rma/$(DEPDIR)/libucp_la-rma_basic.Plo + -rm -f rma/$(DEPDIR)/libucp_la-rma_send.Plo + -rm -f rma/$(DEPDIR)/libucp_la-rma_sw.Plo + -rm -f stream/$(DEPDIR)/libucp_la-stream_recv.Plo + -rm -f stream/$(DEPDIR)/libucp_la-stream_send.Plo + -rm -f tag/$(DEPDIR)/libucp_la-eager_rcv.Plo + -rm -f tag/$(DEPDIR)/libucp_la-eager_snd.Plo + -rm -f tag/$(DEPDIR)/libucp_la-offload.Plo + -rm -f tag/$(DEPDIR)/libucp_la-probe.Plo + -rm -f tag/$(DEPDIR)/libucp_la-rndv.Plo + -rm -f tag/$(DEPDIR)/libucp_la-tag_match.Plo + -rm -f tag/$(DEPDIR)/libucp_la-tag_recv.Plo + -rm -f tag/$(DEPDIR)/libucp_la-tag_send.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-address.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-ep_match.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-select.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-signaling_ep.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-wireup.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-wireup_cm.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-wireup_ep.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-nobase_dist_libucp_laHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f core/$(DEPDIR)/libucp_la-ucp_am.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_context.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_ep.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_listener.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_mm.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_proxy_ep.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_request.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_rkey.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_version.Plo + -rm -f core/$(DEPDIR)/libucp_la-ucp_worker.Plo + -rm -f dt/$(DEPDIR)/libucp_la-dt.Plo + -rm -f dt/$(DEPDIR)/libucp_la-dt_contig.Plo + -rm -f dt/$(DEPDIR)/libucp_la-dt_generic.Plo + -rm -f dt/$(DEPDIR)/libucp_la-dt_iov.Plo + -rm -f proto/$(DEPDIR)/libucp_la-proto_am.Plo + -rm -f rma/$(DEPDIR)/libucp_la-amo_basic.Plo + -rm -f rma/$(DEPDIR)/libucp_la-amo_send.Plo + -rm -f rma/$(DEPDIR)/libucp_la-amo_sw.Plo + -rm -f rma/$(DEPDIR)/libucp_la-flush.Plo + -rm -f rma/$(DEPDIR)/libucp_la-rma_basic.Plo + -rm -f rma/$(DEPDIR)/libucp_la-rma_send.Plo + -rm -f rma/$(DEPDIR)/libucp_la-rma_sw.Plo + -rm -f stream/$(DEPDIR)/libucp_la-stream_recv.Plo + -rm -f stream/$(DEPDIR)/libucp_la-stream_send.Plo + -rm -f tag/$(DEPDIR)/libucp_la-eager_rcv.Plo + -rm -f tag/$(DEPDIR)/libucp_la-eager_snd.Plo + -rm -f tag/$(DEPDIR)/libucp_la-offload.Plo + -rm -f tag/$(DEPDIR)/libucp_la-probe.Plo + -rm -f tag/$(DEPDIR)/libucp_la-rndv.Plo + -rm -f tag/$(DEPDIR)/libucp_la-tag_match.Plo + -rm -f tag/$(DEPDIR)/libucp_la-tag_recv.Plo + -rm -f tag/$(DEPDIR)/libucp_la-tag_send.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-address.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-ep_match.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-select.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-signaling_ep.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-wireup.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-wireup_cm.Plo + -rm -f wireup/$(DEPDIR)/libucp_la-wireup_ep.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES \ + uninstall-nobase_dist_libucp_laHEADERS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libLTLIBRARIES clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man \ + install-nobase_dist_libucp_laHEADERS install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES \ + uninstall-nobase_dist_libucp_laHEADERS + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/ucp/api/ucp.h b/src/ucp/api/ucp.h new file mode 100644 index 0000000..18aa3f2 --- /dev/null +++ b/src/ucp/api/ucp.h @@ -0,0 +1,3145 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* Copyright (C) Los Alamos National Security, LLC. 2018 ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCP_H_ +#define UCP_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** + * @defgroup UCP_API Unified Communication Protocol (UCP) API + * @{ + * This section describes UCP API. + * @} + */ + +/** + * @defgroup UCP_CONTEXT UCP Application Context + * @ingroup UCP_API + * @{ + * Application context is a primary concept of UCP design which + * provides an isolation mechanism, allowing resources associated + * with the context to separate or share network communication context + * across multiple instances of applications. + * + * This section provides a detailed description of this concept and + * routines associated with it. + * + * @} + */ + + + /** + * @defgroup UCP_WORKER UCP Worker + * @ingroup UCP_API + * @{ + * UCP Worker routines + * @} + */ + + + /** + * @defgroup UCP_MEM UCP Memory routines + * @ingroup UCP_API + * @{ + * UCP Memory routines + * @} + */ + + + /** + * @defgroup UCP_WAKEUP UCP Wake-up routines + * @ingroup UCP_API + * @{ + * UCP Wake-up routines + * @} + */ + + + /** + * @defgroup UCP_ENDPOINT UCP Endpoint + * @ingroup UCP_API + * @{ + * UCP Endpoint routines + * @} + */ + + + /** + * @defgroup UCP_COMM UCP Communication routines + * @ingroup UCP_API + * @{ + * UCP Communication routines + * @} + */ + + + /** + * @defgroup UCP_CONFIG UCP Configuration + * @ingroup UCP_API + * @{ + * This section describes routines for configuration + * of the UCP network layer + * @} + */ + + + /** + * @defgroup UCP_DATATYPE UCP Data type routines + * @ingroup UCP_API + * @{ + * UCP Data type routines + * @} + */ + + +/** + * @ingroup UCP_CONTEXT + * @brief UCP context parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_params_field { + UCP_PARAM_FIELD_FEATURES = UCS_BIT(0), /**< features */ + UCP_PARAM_FIELD_REQUEST_SIZE = UCS_BIT(1), /**< request_size */ + UCP_PARAM_FIELD_REQUEST_INIT = UCS_BIT(2), /**< request_init */ + UCP_PARAM_FIELD_REQUEST_CLEANUP = UCS_BIT(3), /**< request_cleanup */ + UCP_PARAM_FIELD_TAG_SENDER_MASK = UCS_BIT(4), /**< tag_sender_mask */ + UCP_PARAM_FIELD_MT_WORKERS_SHARED = UCS_BIT(5), /**< mt_workers_shared */ + UCP_PARAM_FIELD_ESTIMATED_NUM_EPS = UCS_BIT(6), /**< estimated_num_eps */ + UCP_PARAM_FIELD_ESTIMATED_NUM_PPN = UCS_BIT(7) /**< estimated_num_ppn */ +}; + + +/** + * @ingroup UCP_CONTEXT + * @brief UCP configuration features + * + * The enumeration list describes the features supported by UCP. An + * application can request the features using @ref ucp_params_t "UCP parameters" + * during @ref ucp_init "UCP initialization" process. + */ +enum ucp_feature { + UCP_FEATURE_TAG = UCS_BIT(0), /**< Request tag matching + support */ + UCP_FEATURE_RMA = UCS_BIT(1), /**< Request remote memory + access support */ + UCP_FEATURE_AMO32 = UCS_BIT(2), /**< Request 32-bit atomic + operations support */ + UCP_FEATURE_AMO64 = UCS_BIT(3), /**< Request 64-bit atomic + operations support */ + UCP_FEATURE_WAKEUP = UCS_BIT(4), /**< Request interrupt + notification support */ + UCP_FEATURE_STREAM = UCS_BIT(5), /**< Request stream support */ + UCP_FEATURE_AM = UCS_BIT(6) /**< Request Active Message + support */ +}; + + +/** + * @ingroup UCP_WORKER + * @brief UCP worker parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_worker_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_worker_params_field { + UCP_WORKER_PARAM_FIELD_THREAD_MODE = UCS_BIT(0), /**< UCP thread mode */ + UCP_WORKER_PARAM_FIELD_CPU_MASK = UCS_BIT(1), /**< Worker's CPU bitmap */ + UCP_WORKER_PARAM_FIELD_EVENTS = UCS_BIT(2), /**< Worker's events bitmap */ + UCP_WORKER_PARAM_FIELD_USER_DATA = UCS_BIT(3), /**< User data */ + UCP_WORKER_PARAM_FIELD_EVENT_FD = UCS_BIT(4) /**< External event file + descriptor */ +}; + + +/** + * @ingroup UCP_WORKER + * @brief UCP listener parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_listener_params_t + * are present. It is used to enable backward compatibility support. + */ +enum ucp_listener_params_field { + /** + * Sock address and length. + */ + UCP_LISTENER_PARAM_FIELD_SOCK_ADDR = UCS_BIT(0), + + /** + * User's callback and argument for handling the creation of an endpoint. + * */ + UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER = UCS_BIT(1), + + /**< User's callback and argument for handling the incoming connection + * request. */ + UCP_LISTENER_PARAM_FIELD_CONN_HANDLER = UCS_BIT(2) +}; + + +/** + * @ingroup UCP_WORKER + * @brief UCP worker address flags. + * + * The enumeration list describes possible UCP worker address flags, indicating + * what needs to be included to the worker address returned by + * @ref ucp_worker_query "ucp_worker_query()" routine. + */ +typedef enum { + + /**< Pack addresses of network devices only. Using such shortened addresses + * for the remote node peers will reduce the amount of wireup data being + * exchanged during connection establishment phase. */ + UCP_WORKER_ADDRESS_FLAG_NET_ONLY = UCS_BIT(0) +} ucp_worker_address_flags_t; + + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP endpoint parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_ep_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_ep_params_field { + UCP_EP_PARAM_FIELD_REMOTE_ADDRESS = UCS_BIT(0), /**< Address of remote + peer */ + UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE = UCS_BIT(1), /**< Error handling mode. + @ref ucp_err_handling_mode_t */ + UCP_EP_PARAM_FIELD_ERR_HANDLER = UCS_BIT(2), /**< Handler to process + transport level errors */ + UCP_EP_PARAM_FIELD_USER_DATA = UCS_BIT(3), /**< User data pointer */ + UCP_EP_PARAM_FIELD_SOCK_ADDR = UCS_BIT(4), /**< Socket address field */ + UCP_EP_PARAM_FIELD_FLAGS = UCS_BIT(5), /**< Endpoint flags */ + UCP_EP_PARAM_FIELD_CONN_REQUEST = UCS_BIT(6) /**< Connection request field */ +}; + + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP endpoint parameters flags. + * + * The enumeration list describes the endpoint's parameters flags supported by + * @ref ucp_ep_create() function. + */ +enum ucp_ep_params_flags_field { + UCP_EP_PARAMS_FLAGS_CLIENT_SERVER = UCS_BIT(0), /**< Using a client-server + connection establishment + mechanism. + @ref ucs_sock_addr_t + sockaddr field + must be provided and + contain the address + of the remote peer */ + UCP_EP_PARAMS_FLAGS_NO_LOOPBACK = UCS_BIT(1) /**< Avoid connecting the + endpoint to itself when + connecting the endpoint + to the same worker it + was created on. + Affects protocols which + send to a particular + remote endpoint, for + example stream */ +}; + + +/** + * @ingroup UCP_ENDPOINT + * @brief Close UCP endpoint modes. + * + * The enumeration is used to specify the behavior of @ref ucp_ep_close_nb. + */ +enum ucp_ep_close_mode { + UCP_EP_CLOSE_MODE_FORCE = 0, /**< @ref ucp_ep_close_nb releases + the endpoint without any + confirmation from the peer. All + outstanding requests will be + completed with + @ref UCS_ERR_CANCELED error. + @note This mode may cause + transport level errors on remote + side, so it requires set + @ref UCP_ERR_HANDLING_MODE_PEER + for all endpoints created on + both (local and remote) sides to + avoid undefined behavior. */ + UCP_EP_CLOSE_MODE_FLUSH = 1 /**< @ref ucp_ep_close_nb schedules + flushes on all outstanding + operations. */ +}; + + +/** + * @ingroup UCP_MEM + * @brief UCP memory mapping parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_mem_map_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_mem_map_params_field { + UCP_MEM_MAP_PARAM_FIELD_ADDRESS = UCS_BIT(0), /**< Address of the memory that + will be used in the + @ref ucp_mem_map routine. */ + UCP_MEM_MAP_PARAM_FIELD_LENGTH = UCS_BIT(1), /**< The size of memory that + will be allocated or + registered in the + @ref ucp_mem_map routine.*/ + UCP_MEM_MAP_PARAM_FIELD_FLAGS = UCS_BIT(2) /**< Allocation flags. */ +}; + +/** + * @ingroup UCP_MEM + * @brief UCP memory advice parameters field mask. + * + * The enumeration allows specifying which fields in @ref ucp_mem_advise_params_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_mem_advise_params_field { + UCP_MEM_ADVISE_PARAM_FIELD_ADDRESS = UCS_BIT(0), /**< Address of the memory */ + UCP_MEM_ADVISE_PARAM_FIELD_LENGTH = UCS_BIT(1), /**< The size of memory */ + UCP_MEM_ADVISE_PARAM_FIELD_ADVICE = UCS_BIT(2) /**< Advice on memory usage */ +}; + + +/** + * @ingroup UCP_CONTEXT + * @brief UCP context attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_context_attr_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_context_attr_field { + UCP_ATTR_FIELD_REQUEST_SIZE = UCS_BIT(0), /**< UCP request size */ + UCP_ATTR_FIELD_THREAD_MODE = UCS_BIT(1) /**< UCP context thread flag */ +}; + + +/** + * @ingroup UCP_WORKER + * @brief UCP worker attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_worker_attr_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_worker_attr_field { + UCP_WORKER_ATTR_FIELD_THREAD_MODE = UCS_BIT(0), /**< UCP thread mode */ + UCP_WORKER_ATTR_FIELD_ADDRESS = UCS_BIT(1), /**< UCP address */ + UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS = UCS_BIT(2) /**< UCP address flags */ +}; + + +/** + * @ingroup UCP_WORKER + * @brief UCP listener attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_listener_attr_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_listener_attr_field { + UCP_LISTENER_ATTR_FIELD_SOCKADDR = UCS_BIT(0) /**< Sockaddr used for listening */ +}; + + +/** + * @ingroup UCP_DATATYPE + * @brief UCP data type classification + * + * The enumeration list describes the datatypes supported by UCP. + */ +enum ucp_dt_type { + UCP_DATATYPE_CONTIG = 0, /**< Contiguous datatype */ + UCP_DATATYPE_STRIDED = 1, /**< Strided datatype */ + UCP_DATATYPE_IOV = 2, /**< Scatter-gather list with multiple pointers */ + UCP_DATATYPE_GENERIC = 7, /**< Generic datatype with + user-defined pack/unpack routines */ + UCP_DATATYPE_SHIFT = 3, /**< Number of bits defining + the datatype classification */ + UCP_DATATYPE_CLASS_MASK = UCS_MASK(UCP_DATATYPE_SHIFT) /**< Data-type class + mask */ +}; + + +/** + * @ingroup UCP_MEM + * @brief UCP memory mapping flags. + * + * The enumeration list describes the memory mapping flags supported by @ref + * ucp_mem_map() function. + */ +enum { + UCP_MEM_MAP_NONBLOCK = UCS_BIT(0), /**< Complete the mapping faster, possibly by + not populating the pages in the mapping + up-front, and mapping them later when + they are accessed by communication + routines. */ + UCP_MEM_MAP_ALLOCATE = UCS_BIT(1), /**< Identify requirement for allocation, + if passed address is not a null-pointer + then it will be used as a hint or direct + address for allocation. */ + UCP_MEM_MAP_FIXED = UCS_BIT(2) /**< Don't interpret address as a hint: + place the mapping at exactly that + address. The address must be a multiple + of the page size. */ +}; + + +/** + * @ingroup UCP_WORKER + * @brief Flags for a UCP Active Message callback. + * + * Flags that indicate how to handle UCP Active Messages + * Currently only UCP_AM_FLAG_WHOLE_MSG is supported, + * which indicates the entire message is handled in one + * callback. + */ +enum ucp_am_cb_flags { + UCP_AM_FLAG_WHOLE_MSG = UCS_BIT(0) +}; + + +/** + * @ingroup UCP_WORKER + * @brief Flags for sending a UCP Active Message. + * + * Flags dictate the behavior of ucp_am_send_nb + * currently the only flag tells UCP to pass in + * the sending endpoint to the call + * back so a reply can be defined. + */ +enum ucp_send_am_flags { + UCP_AM_SEND_REPLY = UCS_BIT(0) +}; + + +/** + * @ingroup UCP_ENDPOINT + * @brief Descriptor flags for Active Message callback. + * + * In a callback, if flags is set to UCP_CB_PARAM_FLAG_DATA in + * a callback then data was allocated, so if UCS_INPROGRESS is + * returned from the callback, the data parameter will persist + * and the user has to call @ref ucp_am_data_release when data is + * no longer needed. + */ +enum ucp_cb_param_flags { + UCP_CB_PARAM_FLAG_DATA = UCS_BIT(0) +}; + + +/** + * @ingroup UCP_COMM + * @brief Atomic operation requested for ucp_atomic_post + * + * This enumeration defines which atomic memory operation should be + * performed by the ucp_atomic_post family of fuctions. All of these are + * non-fetching atomics and will not result in a request handle. + */ +typedef enum { + UCP_ATOMIC_POST_OP_ADD, /**< Atomic add */ + UCP_ATOMIC_POST_OP_AND, /**< Atomic and */ + UCP_ATOMIC_POST_OP_OR, /**< Atomic or */ + UCP_ATOMIC_POST_OP_XOR, /**< Atomic xor */ + UCP_ATOMIC_POST_OP_LAST +} ucp_atomic_post_op_t; + + +/** + * @ingroup UCP_COMM + * @brief Atomic operation requested for ucp_atomic_fetch + * + * This enumeration defines which atomic memory operation should be performed + * by the ucp_atomic_fetch family of functions. All of these functions + * will fetch data from the remote node. + */ +typedef enum { + UCP_ATOMIC_FETCH_OP_FADD, /**< Atomic Fetch and add */ + UCP_ATOMIC_FETCH_OP_SWAP, /**< Atomic swap */ + UCP_ATOMIC_FETCH_OP_CSWAP, /**< Atomic conditional swap */ + UCP_ATOMIC_FETCH_OP_FAND, /**< Atomic Fetch and and */ + UCP_ATOMIC_FETCH_OP_FOR, /**< Atomic Fetch and or */ + UCP_ATOMIC_FETCH_OP_FXOR, /**< Atomic Fetch and xor */ + UCP_ATOMIC_FETCH_OP_LAST +} ucp_atomic_fetch_op_t; + + +/** + * @ingroup UCP_COMM + * @brief Flags to define behavior of @ref ucp_stream_recv_nb function + * + * This enumeration defines behavior of @ref ucp_stream_recv_nb function. + */ +typedef enum { + UCP_STREAM_RECV_FLAG_WAITALL = UCS_BIT(0) /**< This flag requests that + operation will not be + completed untill all amout + of requested data is + received and placed in the + user buffer. */ +} ucp_stream_recv_flags_t; + + +/** + * @ingroup UCP_DATATYPE + * @brief Generate an identifier for contiguous data type. + * + * This macro creates an identifier for contiguous datatype that is defined by + * the size of the basic element. + * + * @param [in] _elem_size Size of the basic element of the type. + * + * @return Data-type identifier. + * + * @note In case of partial receive, the buffer will be filled with integral + * count of elements. + */ +#define ucp_dt_make_contig(_elem_size) \ + (((ucp_datatype_t)(_elem_size) << UCP_DATATYPE_SHIFT) | UCP_DATATYPE_CONTIG) + + +/** + * @ingroup UCP_DATATYPE + * @brief Generate an identifier for Scatter-gather IOV data type. + * + * This macro creates an identifier for datatype of scatter-gather list + * with multiple pointers + * + * @return Data-type identifier. + * + * @note In case of partial receive, @ref ucp_dt_iov_t::buffer can be filled + * with any number of bytes according to its @ref ucp_dt_iov_t::length. + */ +#define ucp_dt_make_iov() (UCP_DATATYPE_IOV) + + +/** + * @ingroup UCP_DATATYPE + * @brief Structure for scatter-gather I/O. + * + * This structure is used to specify a list of buffers which can be used + * within a single data transfer function call. + * + * @note If @a length is zero, the memory pointed to by @a buffer + * will not be accessed. Otherwise, @a buffer must point to valid memory. + */ +typedef struct ucp_dt_iov { + void *buffer; /**< Pointer to a data buffer */ + size_t length; /**< Length of the @a buffer in bytes */ +} ucp_dt_iov_t; + + +/** + * @ingroup UCP_DATATYPE + * @brief UCP generic data type descriptor + * + * This structure provides a generic datatype descriptor that + * is used for definition of application defined datatypes. + + * Typically, the descriptor is used for an integration with datatype + * engines implemented within MPI and SHMEM implementations. + * + * @note In case of partial receive, any amount of received data is acceptable + * which matches buffer size. + */ +typedef struct ucp_generic_dt_ops { + + /** + * @ingroup UCP_DATATYPE + * @brief Start a packing request. + * + * The pointer refers to application defined start-to-pack routine. It will + * be called from the @ref ucp_tag_send_nb routine. + * + * @param [in] context User-defined context. + * @param [in] buffer Buffer to pack. + * @param [in] count Number of elements to pack into the buffer. + * + * @return A custom state that is passed to the following + * @ref ucp_generic_dt_ops::unpack "pack()" routine. + */ + void* (*start_pack)(void *context, const void *buffer, size_t count); + + /** + * @ingroup UCP_DATATYPE + * @brief Start an unpacking request. + * + * The pointer refers to application defined start-to-unpack routine. It will + * be called from the @ref ucp_tag_recv_nb routine. + * + * @param [in] context User-defined context. + * @param [in] buffer Buffer to unpack to. + * @param [in] count Number of elements to unpack in the buffer. + * + * @return A custom state that is passed later to the following + * @ref ucp_generic_dt_ops::unpack "unpack()" routine. + */ + void* (*start_unpack)(void *context, void *buffer, size_t count); + + /** + * @ingroup UCP_DATATYPE + * @brief Get the total size of packed data. + * + * The pointer refers to user defined routine that returns the size of data + * in a packed format. + * + * @param [in] state State as returned by + * @ref ucp_generic_dt_ops::start_pack + * "start_pack()" routine. + * + * @return The size of the data in a packed form. + */ + size_t (*packed_size)(void *state); + + /** + * @ingroup UCP_DATATYPE + * @brief Pack data. + * + * The pointer refers to application defined pack routine. + * + * @param [in] state State as returned by + * @ref ucp_generic_dt_ops::start_pack + * "start_pack()" routine. + * @param [in] offset Virtual offset in the output stream. + * @param [in] dest Destination to pack the data to. + * @param [in] max_length Maximal length to pack. + * + * @return The size of the data that was written to the destination buffer. + * Must be less than or equal to @e max_length. + */ + size_t (*pack) (void *state, size_t offset, void *dest, size_t max_length); + + /** + * @ingroup UCP_DATATYPE + * @brief Unpack data. + * + * The pointer refers to application defined unpack routine. + * + * @param [in] state State as returned by + * @ref ucp_generic_dt_ops::start_unpack + * "start_unpack()" routine. + * @param [in] offset Virtual offset in the input stream. + * @param [in] src Source to unpack the data from. + * @param [in] length Length to unpack. + * + * @return UCS_OK or an error if unpacking failed. + */ + ucs_status_t (*unpack)(void *state, size_t offset, const void *src, size_t length); + + /** + * @ingroup UCP_DATATYPE + * @brief Finish packing/unpacking. + * + * The pointer refers to application defined finish routine. + * + * @param [in] state State as returned by + * @ref ucp_generic_dt_ops::start_pack + * "start_pack()" + * and + * @ref ucp_generic_dt_ops::start_unpack + * "start_unpack()" + * routines. + */ + void (*finish)(void *state); +} ucp_generic_dt_ops_t; + + +/** + * @ingroup UCP_CONFIG + * @brief Tuning parameters for UCP library. + * + * The structure defines the parameters that are used for + * UCP library tuning during UCP library @ref ucp_init "initialization". + * + * @note UCP library implementation uses the @ref ucp_feature "features" + * parameter to optimize the library functionality that minimize memory + * footprint. For example, if the application does not require send/receive + * semantics UCP library may avoid allocation of expensive resources associated with + * send/receive queues. + */ +typedef struct ucp_params { + /** + * Mask of valid fields in this structure, using bits from @ref ucp_params_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * UCP @ref ucp_feature "features" that are used for library + * initialization. It is recommended for applications only to request + * the features that are required for an optimal functionality + * This field must be specified. + */ + uint64_t features; + + /** + * The size of a reserved space in a non-blocking requests. Typically + * applications use this space for caching own structures in order to avoid + * costly memory allocations, pointer dereferences, and cache misses. + * For example, MPI implementation can use this memory for caching MPI + * descriptors + * This field defaults to 0 if not specified. + */ + size_t request_size; + + /** + * Pointer to a routine that is used for the request initialization. + * This function will be called only on the very first time a request memory + * is initialized, and may not be called again if a request is reused. + * If a request should be reset before the next reuse, it can be done before + * calling @ref ucp_request_free. + * + * @e NULL can be used if no such is function required, which is also the + * default if this field is not specified by @ref field_mask. + */ + ucp_request_init_callback_t request_init; + + /** + * Pointer to a routine that is responsible for final cleanup of the memory + * associated with the request. This routine may not be called every time a + * request is released. For some implementations, the cleanup call may be + * delayed and only invoked at @ref ucp_worker_destroy. + * + * @e NULL can be used if no such function is required, which is also the + * default if this field is not specified by @ref field_mask. + */ + ucp_request_cleanup_callback_t request_cleanup; + + /** + * Mask which specifies particular bits of the tag which can uniquely + * identify the sender (UCP endpoint) in tagged operations. + * This field defaults to 0 if not specified. + */ + uint64_t tag_sender_mask; + + /** + * This flag indicates if this context is shared by multiple workers + * from different threads. If so, this context needs thread safety + * support; otherwise, the context does not need to provide thread + * safety. + * For example, if the context is used by single worker, and that + * worker is shared by multiple threads, this context does not need + * thread safety; if the context is used by worker 1 and worker 2, + * and worker 1 is used by thread 1 and worker 2 is used by thread 2, + * then this context needs thread safety. + * Note that actual thread mode may be different from mode passed + * to @ref ucp_init. To get actual thread mode use + * @ref ucp_context_query. + */ + int mt_workers_shared; + + /** + * An optimization hint of how many endpoints will be created on this context. + * For example, when used from MPI or SHMEM libraries, this number will specify + * the number of ranks (or processing elements) in the job. + * Does not affect semantics, but only transport selection criteria and the + * resulting performance. + * The value can be also set by UCX_NUM_EPS environment variable. In such case + * it will override the number of endpoints set by @e estimated_num_eps + */ + size_t estimated_num_eps; + + /** + * An optimization hint for a single node. For example, when used from MPI or + * OpenSHMEM libraries, this number will specify the number of Processes Per + * Node (PPN) in the job. Does not affect semantics, only transport selection + * criteria and the resulting performance. + * The value can be also set by the UCX_NUM_PPN environment variable, which + * will override the number of endpoints set by @e estimated_num_ppn + */ + size_t estimated_num_ppn; +} ucp_params_t; + + +/** + * @ingroup UCP_CONTEXT + * @brief Context attributes. + * + * The structure defines the attributes which characterize + * the particular context. + */ +typedef struct ucp_context_attr { + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_context_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Size of UCP non-blocking request. When pre-allocated request is used + * (e.g. in @ref ucp_tag_recv_nbr) it should have enough space to fit + * UCP request data, which is defined by this value. + */ + size_t request_size; + + /** + * Thread safe level of the context. For supported thread levels please + * see @ref ucs_thread_mode_t. + */ + ucs_thread_mode_t thread_mode; +} ucp_context_attr_t; + + +/** + * @ingroup UCP_WORKER + * @brief UCP worker attributes. + * + * The structure defines the attributes which characterize + * the particular worker. + */ +typedef struct ucp_worker_attr { + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_worker_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Thread safe level of the worker. + */ + ucs_thread_mode_t thread_mode; + + /** + * Flags indicating requested details of the worker address. + * If @ref UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS bit is set in the field_mask, + * this value should be set as well. Possible flags are specified + * in @ref ucp_worker_address_flags_t. @note This is an input attribute. + */ + uint32_t address_flags; + + /** + * Worker address, which can be passed to remote instances of the UCP library + * in order to connect to this worker. The memory for the address handle is + * allocated by @ref ucp_worker_query "ucp_worker_query()" routine, and + * must be released by using @ref ucp_worker_release_address + * "ucp_worker_release_address()" routine. + */ + ucp_address_t *address; + + /** + * Size of worker address in bytes. + */ + size_t address_length; +} ucp_worker_attr_t; + + +/** + * @ingroup UCP_WORKER + * @brief Tuning parameters for the UCP worker. + * + * The structure defines the parameters that are used for the + * UCP worker tuning during the UCP worker @ref ucp_worker_create "creation". + */ +typedef struct ucp_worker_params { + /** + * Mask of valid fields in this structure, using bits from @ref ucp_worker_params_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * The parameter thread_mode suggests the thread safety mode which worker + * and the associated resources should be created with. This is an + * optional parameter. The default value is UCS_THREAD_MODE_SINGLE and + * it is used when the value of the parameter is not set. When this + * parameter along with its corresponding bit in the + * field_mask - UCP_WORKER_PARAM_FIELD_THREAD_MODE is set, the + * @ref ucp_worker_create attempts to create worker with this thread mode. + * The thread mode with which worker is created can differ from the + * suggested mode. The actual thread mode of the worker should be obtained + * using the query interface @ref ucp_worker_query. + */ + ucs_thread_mode_t thread_mode; + + /** + * Mask of which CPUs worker resources should preferably be allocated on. + * This value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * UCP_WORKER_PARAM_FIELD_CPU_MASK), resources are allocated according to + * system's default policy. + */ + ucs_cpu_set_t cpu_mask; + + /** + * Mask of events (@ref ucp_wakeup_event_t) which are expected on wakeup. + * This value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * UCP_WORKER_PARAM_FIELD_EVENTS), all types of events will trigger on + * wakeup. + */ + unsigned events; + + /** + * User data associated with the current worker. + * This value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * UCP_WORKER_PARAM_FIELD_USER_DATA), it will default to NULL. + */ + void *user_data; + + /** + * External event file descriptor. + * This value is optional. + * If @ref UCP_WORKER_PARAM_FIELD_EVENT_FD is set in the field_mask, events + * on the worker will be reported on the provided event file descriptor. In + * this case, calling @ref ucp_worker_get_efd will result in an error. + * The provided file descriptor must be capable of aggregating notifications + * for arbitrary events, for example @c epoll(7) on Linux systems. + * @ref user_data will be used as the event user-data on systems which + * support it. For example, on Linux, it will be placed in + * @c epoll_data_t::ptr, when returned from @c epoll_wait(2). + * + * Otherwise, events will be reported to the event file descriptor returned + * from @ref ucp_worker_get_efd(). + */ + int event_fd; + +} ucp_worker_params_t; + + +/** + * @ingroup UCP_WORKER + * @brief UCP listener attributes. + * + * The structure defines the attributes which characterize + * the particular listener. + */ +typedef struct ucp_listener_attr { + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_listener_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Sockaddr on which this listener is listening for incoming connection + * requests. + */ + struct sockaddr_storage sockaddr; +} ucp_listener_attr_t; + + +/** + * @ingroup UCP_WORKER + * @brief Parameters for a UCP listener object. + * + * This structure defines parameters for @ref ucp_listener_create, which is used to + * listen for incoming client/server connections. + */ +typedef struct ucp_listener_params { + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_listener_params_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * An address in the form of a sockaddr. + * This field is mandatory for filling (along with its corresponding bit + * in the field_mask - @ref UCP_LISTENER_PARAM_FIELD_SOCK_ADDR). + * The @ref ucp_listener_create routine will return with an error if sockaddr + * is not specified. + */ + ucs_sock_addr_t sockaddr; + + /** + * Handler to endpoint creation in a client-server connection flow. + * In order for the callback inside this handler to be invoked, the + * UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER needs to be set in the + * field_mask. + */ + ucp_listener_accept_handler_t accept_handler; + + /** + * Handler of an incoming connection request in a client-server connection + * flow. In order for the callback inside this handler to be invoked, the + * @ref UCP_LISTENER_PARAM_FIELD_CONN_HANDLER needs to be set in the + * field_mask. + */ + ucp_listener_conn_handler_t conn_handler; +} ucp_listener_params_t; + + +/** + * @ingroup UCP_ENDPOINT + * @brief Output parameter of @ref ucp_stream_worker_poll function. + * + * The structure defines the endpoint and its user data. + */ +typedef struct ucp_stream_poll_ep { + /** + * Endpoint handle. + */ + ucp_ep_h ep; + + /** + * User data associated with an endpoint passed in + * @ref ucp_ep_params_t::user_data. + */ + void *user_data; + + /** + * Reserved for future use. + */ + unsigned flags; + + /** + * Reserved for future use. + */ + uint8_t reserved[16]; +} ucp_stream_poll_ep_t; + + +/** + * @ingroup UCP_MEM + * @brief Tuning parameters for the UCP memory mapping. + * + * The structure defines the parameters that are used for the + * UCP memory mapping tuning during the @ref ucp_mem_map "ucp_mem_map" routine. + */ +typedef struct ucp_mem_map_params { + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_mem_map_params_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * If the address is not NULL, the routine maps (registers) the memory segment + * pointed to by this address. + * If the pointer is NULL, the library allocates mapped (registered) memory + * segment and returns its address in this argument. + * Therefore, this value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * @ref UCP_MEM_MAP_PARAM_FIELD_ADDRESS), the ucp_mem_map routine will consider + * address as set to NULL and will allocate memory. + */ + void *address; + + /** + * Length (in bytes) to allocate or map (register). + * This field is mandatory for filling (along with its corresponding bit + * in the field_mask - @ref UCP_MEM_MAP_PARAM_FIELD_LENGTH). + * The @ref ucp_mem_map routine will return with an error if the length isn't + * specified. + */ + size_t length; + + /** + * Allocation flags, e.g. @ref UCP_MEM_MAP_NONBLOCK. + * This value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * @ref UCP_MEM_MAP_PARAM_FIELD_FLAGS), the @ref ucp_mem_map routine will + * consider the flags as set to zero. + */ + unsigned flags; +} ucp_mem_map_params_t; + + +/** + * @ingroup UCP_CONTEXT + * @brief UCP receive information descriptor + * + * The UCP receive information descriptor is allocated by application and filled + * in with the information about the received message by @ref ucp_tag_probe_nb + * or @ref ucp_tag_recv_request_test routines or + * @ref ucp_tag_recv_callback_t callback argument. + */ +struct ucp_tag_recv_info { + /** Sender tag */ + ucp_tag_t sender_tag; + /** The size of the received data */ + size_t length; +}; + + +/** + * @ingroup UCP_CONFIG + * @brief Read UCP configuration descriptor + * + * The routine fetches the information about UCP library configuration from + * the run-time environment. Then, the fetched descriptor is used for + * UCP library @ref ucp_init "initialization". The Application can print out the + * descriptor using @ref ucp_config_print "print" routine. In addition + * the application is responsible for @ref ucp_config_release "releasing" the + * descriptor back to the UCP library. + * + * @param [in] env_prefix If non-NULL, the routine searches for the + * environment variables that start with + * @e UCX__ prefix. + * Otherwise, the routine searches for the + * environment variables that start with + * @e UCX_ prefix. + * @param [in] filename If non-NULL, read configuration from the file + * defined by @e filename. If the file does not + * exist, it will be ignored and no error reported + * to the application. + * @param [out] config_p Pointer to configuration descriptor as defined by + * @ref ucp_config_t "ucp_config_t". + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_config_read(const char *env_prefix, const char *filename, + ucp_config_t **config_p); + + +/** + * @ingroup UCP_CONFIG + * @brief Release configuration descriptor + * + * The routine releases the configuration descriptor that was allocated through + * @ref ucp_config_read "ucp_config_read()" routine. + * + * @param [out] config Configuration descriptor as defined by + * @ref ucp_config_t "ucp_config_t". + */ +void ucp_config_release(ucp_config_t *config); + + +/** + * @ingroup UCP_CONFIG + * @brief Modify context configuration. + * + * The routine changes one configuration setting stored in @ref ucp_config_t + * "configuration" descriptor. + * + * @param [in] config Configuration to modify. + * @param [in] name Configuration variable name. + * @param [in] value Value to set. + * + * @return Error code. + */ +ucs_status_t ucp_config_modify(ucp_config_t *config, const char *name, + const char *value); + + +/** + * @ingroup UCP_CONFIG + * @brief Print configuration information + * + * The routine prints the configuration information that is stored in + * @ref ucp_config_t "configuration" descriptor. + * + * @todo Expose ucs_config_print_flags_t + * + * @param [in] config @ref ucp_config_t "Configuration descriptor" + * to print. + * @param [in] stream Output stream to print the configuration to. + * @param [in] title Configuration title to print. + * @param [in] print_flags Flags that control various printing options. + */ +void ucp_config_print(const ucp_config_t *config, FILE *stream, + const char *title, ucs_config_print_flags_t print_flags); + + +/** + * @ingroup UCP_CONTEXT + * @brief Get UCP library version. + * + * This routine returns the UCP library version. + * + * @param [out] major_version Filled with library major version. + * @param [out] minor_version Filled with library minor version. + * @param [out] release_number Filled with library release number. + */ +void ucp_get_version(unsigned *major_version, unsigned *minor_version, + unsigned *release_number); + + +/** + * @ingroup UCP_CONTEXT + * @brief Get UCP library version as a string. + * + * This routine returns the UCP library version as a string which consists of: + * "major.minor.release". + */ +const char *ucp_get_version_string(void); + + +/** @cond PRIVATE_INTERFACE */ +/** + * @ingroup UCP_CONTEXT + * @brief UCP context initialization with particular API version. + * + * This is an internal routine used to check compatibility with a particular + * API version. @ref ucp_init should be used to create UCP context. + */ +ucs_status_t ucp_init_version(unsigned api_major_version, unsigned api_minor_version, + const ucp_params_t *params, const ucp_config_t *config, + ucp_context_h *context_p); +/** @endcond */ + + +/** + * @ingroup UCP_CONTEXT + * @brief UCP context initialization. + * + * This routine creates and initializes a @ref ucp_context_h + * "UCP application context". + * + * @warning This routine must be called before any other UCP function + * call in the application. + * + * This routine checks API version compatibility, then discovers the available + * network interfaces, and initializes the network resources required for + * discovering of the network and memory related devices. + * This routine is responsible for initialization all information required for + * a particular application scope, for example, MPI application, OpenSHMEM + * application, etc. + * + * @note + * @li Higher level protocols can add additional communication isolation, as + * MPI does with it's communicator object. A single communication context may + * be used to support multiple MPI communicators. + * @li The context can be used to isolate the communication that corresponds to + * different protocols. For example, if MPI and OpenSHMEM are using UCP to + * isolate the MPI communication from the OpenSHMEM communication, users should + * use different application context for each of the communication libraries. + * + * @param [in] config UCP configuration descriptor allocated through + * @ref ucp_config_read "ucp_config_read()" routine. + * @param [in] params User defined @ref ucp_params_t configurations for the + * @ref ucp_context_h "UCP application context". + * @param [out] context_p Initialized @ref ucp_context_h + * "UCP application context". + * + * @return Error code as defined by @ref ucs_status_t + */ +static inline ucs_status_t ucp_init(const ucp_params_t *params, + const ucp_config_t *config, + ucp_context_h *context_p) +{ + return ucp_init_version(UCP_API_MAJOR, UCP_API_MINOR, params, config, + context_p); +} + + +/** + * @ingroup UCP_CONTEXT + * @brief Release UCP application context. + * + * This routine finalizes and releases the resources associated with a + * @ref ucp_context_h "UCP application context". + * + * @warning An application cannot call any UCP routine + * once the UCP application context released. + * + * The cleanup process releases and shuts down all resources associated with + * the application context. After calling this routine, calling any UCP + * routine without calling @ref ucp_init "UCP initialization routine" is invalid. + * + * @param [in] context_p Handle to @ref ucp_context_h + * "UCP application context". + */ +void ucp_cleanup(ucp_context_h context_p); + + +/** + * @ingroup UCP_CONTEXT + * @brief Get attributes specific to a particular context. + * + * This routine fetches information about the context. + * + * @param [in] context_p Handle to @ref ucp_context_h + * "UCP application context". + * + * @param [out] attr Filled with attributes of @p context_p context. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_context_query(ucp_context_h context_p, + ucp_context_attr_t *attr); + + +/** + * @ingroup UCP_CONTEXT + * @brief Print context information. + * + * This routine prints information about the context configuration: including + * memory domains, transport resources, and other useful information associated + * with the context. + * + * @param [in] context Print this context object's configuration. + * @param [in] stream Output stream on which to print the information. + */ +void ucp_context_print_info(const ucp_context_h context, FILE *stream); + + +/** + * @ingroup UCP_WORKER + * @brief Create a worker object. + * + * This routine allocates and initializes a @ref ucp_worker_h "worker" object. + * Each worker is associated with one and only one @ref ucp_context_h + * "application" context. In the same time, an application context can create + * multiple @ref ucp_worker_h "workers" in order to enable concurrent access to + * communication resources. For example, application can allocate a dedicated + * worker for each application thread, where every worker can be progressed + * independently of others. + * + * @note The worker object is allocated within context of the calling thread + * + * @param [in] context Handle to @ref ucp_context_h + * "UCP application context". + * @param [in] params User defined @ref ucp_worker_params_t configurations for the + * @ref ucp_worker_h "UCP worker". + * @param [out] worker_p A pointer to the worker object allocated by the + * UCP library + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_worker_create(ucp_context_h context, + const ucp_worker_params_t *params, + ucp_worker_h *worker_p); + + +/** + * @ingroup UCP_WORKER + * @brief Destroy a worker object. + * + * This routine releases the resources associated with a + * @ref ucp_worker_h "UCP worker". + * + * @warning Once the UCP worker destroy the worker handle cannot be used with any + * UCP routine. + * + * The destroy process releases and shuts down all resources associated with + * the @ref ucp_worker_h "worker". + * + * @param [in] worker Worker object to destroy. + */ +void ucp_worker_destroy(ucp_worker_h worker); + + +/** + * @ingroup UCP_WORKER + * @brief Get attributes specific to a particular worker. + * + * This routine fetches information about the worker. + * + * @param [in] worker Worker object to query. + * @param [out] attr Filled with attributes of worker. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_worker_query(ucp_worker_h worker, + ucp_worker_attr_t *attr); + + +/** + * @ingroup UCP_WORKER + * @brief Print information about the worker. + * + * This routine prints information about the protocols being used, thresholds, + * UCT transport methods, and other useful information associated with the worker. + * + * @param [in] worker Worker object to print information for. + * @param [in] stream Output stream to print the information to. + */ +void ucp_worker_print_info(ucp_worker_h worker, FILE *stream); + + +/** + * @ingroup UCP_WORKER + * @brief Get the address of the worker object. + * + * This routine returns the address of the worker object. This address can be + * passed to remote instances of the UCP library in order to connect to this + * worker. The memory for the address handle is allocated by this function, and + * must be released by using @ref ucp_worker_release_address + * "ucp_worker_release_address()" routine. + * + * @param [in] worker Worker object whose address to return. + * @param [out] address_p A pointer to the worker address. + * @param [out] address_length_p The size in bytes of the address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_worker_get_address(ucp_worker_h worker, + ucp_address_t **address_p, + size_t *address_length_p); + + +/** + * @ingroup UCP_WORKER + * @brief Release an address of the worker object. + * + * This routine release an @ref ucp_address_t "address handle" associated within + * the @ref ucp_worker_h "worker" object. + * + * @warning Once the address released the address handle cannot be used with any + * UCP routine. + * + * @param [in] worker Worker object that is associated with the + * address object. + * @param [in] address Address to release; the address object has to + * be allocated using @ref ucp_worker_get_address + * "ucp_worker_get_address()" routine. + * + * @todo We should consider to change it to return int so we can catch the + * errors when worker != address + */ +void ucp_worker_release_address(ucp_worker_h worker, ucp_address_t *address); + + +/** + * @ingroup UCP_WORKER + * @brief Progress all communications on a specific worker. + * + * This routine explicitly progresses all communication operations on a worker. + * + * @note + * @li Typically, request wait and test routines call @ref + * ucp_worker_progress "this routine" to progress any outstanding operations. + * @li Transport layers, implementing asynchronous progress using threads, + * require callbacks and other user code to be thread safe. + * @li The state of communication can be advanced (progressed) by blocking + * routines. Nevertheless, the non-blocking routines can not be used for + * communication progress. + * + * @param [in] worker Worker to progress. + * + * @return Non-zero if any communication was progressed, zero otherwise. + */ +unsigned ucp_worker_progress(ucp_worker_h worker); + + +/** + * @ingroup UCP_WORKER + * @brief Poll for endpoints that are ready to consume streaming data. + * + * This non-blocking routine returns endpoints on a worker which are ready + * to consume streaming data. The ready endpoints are placed in @a poll_eps + * array, and the function return value indicates how many are there. + * + * @param [in] worker Worker to poll. + * @param [out] poll_eps Pointer to array of endpoints, should be + * allocated by user. + * @param [in] max_eps Maximal number of endpoints which should be filled + * in @a poll_eps. + * @param [in] flags Reserved for future use. + * + * @return Negative value indicates an error according to @ref ucs_status_t. + * On success, non-negative value (less or equal @a max_eps) indicates + * actual number of endpoints filled in @a poll_eps array. + * + */ +ssize_t ucp_stream_worker_poll(ucp_worker_h worker, + ucp_stream_poll_ep_t *poll_eps, size_t max_eps, + unsigned flags); + + +/** + * @ingroup UCP_WAKEUP + * @brief Obtain an event file descriptor for event notification. + * + * This routine returns a valid file descriptor for polling functions. + * The file descriptor will get signaled when an event occurs, as part of the + * wake-up mechanism. Signaling means a call to poll() or select() with this + * file descriptor will return at this point, with this descriptor marked as the + * reason (or one of the reasons) the function has returned. The user does not + * need to release the obtained file descriptor. + * + * The wake-up mechanism exists to allow for the user process to register for + * notifications on events of the underlying interfaces, and wait until such + * occur. This is an alternative to repeated polling for request completion. + * The goal is to allow for waiting while consuming minimal resources from the + * system. This is recommended for cases where traffic is infrequent, and + * latency can be traded for lower resource consumption while waiting for it. + * + * There are two alternative ways to use the wakeup mechanism: the first is the + * file descriptor obtained per worker (this function) and the second is the + * @ref ucp_worker_wait function for waiting on the next event internally. + * + * @note UCP @ref ucp_feature "features" have to be triggered + * with @ref UCP_FEATURE_WAKEUP to select proper transport + * + * @param [in] worker Worker of notified events. + * @param [out] fd File descriptor. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_worker_get_efd(ucp_worker_h worker, int *fd); + + +/** + * @ingroup UCP_WAKEUP + * @brief Wait for an event of the worker. + * + * This routine waits (blocking) until an event has happened, as part of the + * wake-up mechanism. + * + * This function is guaranteed to return only if new communication events occur + * on the @a worker. Therefore one must drain all existing events before waiting + * on the file descriptor. This can be achieved by calling + * @ref ucp_worker_progress repeatedly until it returns 0. + * + * There are two alternative ways to use the wakeup mechanism. The first is by + * polling on a per-worker file descriptor obtained from @ref ucp_worker_get_efd. + * The second is by using this function to perform an internal wait for the next + * event associated with the specified worker. + * + * @note During the blocking call the wake-up mechanism relies on other means of + * notification and may not progress some of the requests as it would when + * calling @ref ucp_worker_progress (which is not invoked in that duration). + * + * @note UCP @ref ucp_feature "features" have to be triggered + * with @ref UCP_FEATURE_WAKEUP to select proper transport + * + * @param [in] worker Worker to wait for events on. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_worker_wait(ucp_worker_h worker); + + +/** + * @ingroup UCP_WAKEUP + * @brief Wait for memory update on the address + * + * This routine waits for a memory update at the local memory @a address. This + * is a blocking routine. The routine returns when the memory address is + * updated ("write") or an event occurs in the system. + * + * This function is guaranteed to return only if new communication events occur + * on the worker or @a address is modified. Therefore one must drain all existing + * events before waiting on the file descriptor. This can be achieved by calling + * @ref ucp_worker_progress repeatedly until it returns 0. + * + * @note This routine can be used by an application that executes busy-waiting + * loop checking for a memory update. Instead of continuous busy-waiting on an + * address the application can use @a ucp_worker_wait_mem, which may suspend + * execution until the memory is updated. The goal of the routine is to provide + * an opportunity for energy savings for architectures that support this + * functionality. + * + * @param [in] worker Worker to wait for updates on. + * @param [in] address Local memory address + */ +void ucp_worker_wait_mem(ucp_worker_h worker, void *address); + + +/** + * @ingroup UCP_WAKEUP + * @brief Turn on event notification for the next event. + * + * This routine needs to be called before waiting on each notification on this + * worker, so will typically be called once the processing of the previous event + * is over, as part of the wake-up mechanism. + * + * The worker must be armed before waiting on an event (must be re-armed after + * it has been signaled for re-use) with @ref ucp_worker_arm. + * The events triggering a signal of the file descriptor from + * @ref ucp_worker_get_efd depend on the interfaces used by the worker and + * defined in the transport layer, and typically represent a request completion + * or newly available resources. It can also be triggered by calling + * @ref ucp_worker_signal . + * + * The file descriptor is guaranteed to become signaled only if new communication + * events occur on the @a worker. Therefore one must drain all existing events + * before waiting on the file descriptor. This can be achieved by calling + * @ref ucp_worker_progress repeatedly until it returns 0. + * + * @code {.c} + * void application_initialization() { + * // should be called once in application init flow and before + * // process_comminucation() is used + * ... + * status = ucp_worker_get_efd(worker, &fd); + * ... + * } + * + * void process_comminucation() { + * // should be called every time need to wait for some condition such as + * // ucp request completion in sleep mode. + * + * for (;;) { + * // check for stop condition as long as progress is made + * if (check_for_events()) { + * break; + * } else if (ucp_worker_progress(worker)) { + * continue; // some progress happened but condition not met + * } + * + * // arm the worker and clean-up fd + * status = ucp_worker_arm(worker); + * if (UCS_OK == status) { + * poll(&fds, nfds, timeout); // wait for events (sleep mode) + * } else if (UCS_ERR_BUSY == status) { + * continue; // could not arm, need to progress more + * } else { + * abort(); + * } + * } + * } + * @endcode + * + * @note UCP @ref ucp_feature "features" have to be triggered + * with @ref UCP_FEATURE_WAKEUP to select proper transport + * + * @param [in] worker Worker of notified events. + * + * @return ::UCS_OK The operation completed successfully. File descriptor + * will be signaled by new events. + * @return ::UCS_ERR_BUSY There are unprocessed events which prevent the + * file descriptor from being armed. These events should + * be removed by calling @ref ucp_worker_progress(). + * The operation is not completed. File descriptor + * will not be signaled by new events. + * @return @ref ucs_status_t "Other" different error codes in case of issues. + */ +ucs_status_t ucp_worker_arm(ucp_worker_h worker); + + +/** + * @ingroup UCP_WAKEUP + * @brief Cause an event of the worker. + * + * This routine signals that the event has happened, as part of the wake-up + * mechanism. This function causes a blocking call to @ref ucp_worker_wait or + * waiting on a file descriptor from @ref ucp_worker_get_efd to return, even + * if no event from the underlying interfaces has taken place. + * + * @note It's safe to use this routine from any thread, even if UCX is compiled + * without multi-threading support and/or initialized with any value of + * @ref ucp_params_t::mt_workers_shared and + * @ref ucp_worker_params_t::thread_mode parameters + * + * @param [in] worker Worker to wait for events on. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_worker_signal(ucp_worker_h worker); + + +/** + * @ingroup UCP_WORKER + * @brief Accept connections on a local address of the worker object. + * + * This routine binds the worker object to a @ref ucs_sock_addr_t sockaddr + * which is set by the user. + * The worker will listen to incoming connection requests and upon receiving such + * a request from the remote peer, an endpoint to it will be created. + * The user's call-back will be invoked once the endpoint is created. + * + * @param [in] worker Worker object that is associated with the + * params object. + * @param [in] params User defined @ref ucp_listener_params_t + * configurations for the @ref ucp_listener_h. + * @param [out] listener_p A handle to the created listener, can be released + * by calling @ref ucp_listener_destroy + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_listener_create(ucp_worker_h worker, + const ucp_listener_params_t *params, + ucp_listener_h *listener_p); + + +/** + * @ingroup UCP_WORKER + * @brief Stop accepting connections on a local address of the worker object. + * + * This routine unbinds the worker from the given handle and stops + * listening for incoming connection requests on it. + * + * @param [in] listener A handle to the listener to stop listening on. + */ +void ucp_listener_destroy(ucp_listener_h listener); + + +/** + * @ingroup UCP_WORKER + * @brief Get attributes specific to a particular listener. + * + * This routine fetches information about the listener. + * + * @param [in] listener listener object to query. + * @param [out] attr Filled with attributes of the listener. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_listener_query(ucp_listener_h listener, ucp_listener_attr_t *attr); + + +/** + * @ingroup UCP_ENDPOINT + * @brief Create and connect an endpoint. + * + * This routine creates and connects an @ref ucp_ep_h "endpoint" on a @ref + * ucp_worker_h "local worker" for a destination @ref ucp_address_t "address" + * that identifies the remote @ref ucp_worker_h "worker". This function is + * non-blocking, and communications may begin immediately after it returns. If + * the connection process is not completed, communications may be delayed. + * The created @ref ucp_ep_h "endpoint" is associated with one and only one + * @ref ucp_worker_h "worker". + * + * @param [in] worker Handle to the worker; the endpoint + * is associated with the worker. + * @param [in] params User defined @ref ucp_ep_params_t configurations + * for the @ref ucp_ep_h "UCP endpoint". + * @param [out] ep_p A handle to the created endpoint. + * + * @return Error code as defined by @ref ucs_status_t + * + * @note One of the following fields has to be specified: + * - ucp_ep_params_t::address + * - ucp_ep_params_t::sockaddr + * - ucp_ep_params_t::conn_request + + * @note By default, ucp_ep_create() will connect an endpoint to itself if + * the endpoint is destined to the same @a worker on which it was created, + * i.e. @a params.address belongs to @a worker. This behavior can be changed by + * passing the @ref UCP_EP_PARAMS_FLAGS_NO_LOOPBACK flag in @a params.flags. + * In that case, the endpoint will be connected to the *next* endpoint created + * in the same way on the same @a worker. + */ +ucs_status_t ucp_ep_create(ucp_worker_h worker, const ucp_ep_params_t *params, + ucp_ep_h *ep_p); + + +/** + * @ingroup UCP_ENDPOINT + * + * @brief Non-blocking @ref ucp_ep_h "endpoint" closure. + * + * This routine releases the @ref ucp_ep_h "endpoint". The endpoint closure + * process depends on the selected @a mode. + * + * @param [in] ep Handle to the endpoint to close. + * @param [in] mode One from @ref ucp_ep_close_mode value. + * + * @return UCS_OK - The endpoint is closed successfully. + * @return UCS_PTR_IS_ERR(_ptr) - The closure failed and an error code indicates + * the transport level status. However, resources + * are released and the @a endpoint can no longer + * be used. + * @return otherwise - The closure process is started, and can be + * completed at any point in time. A request handle + * is returned to the application in order to track + * progress of the endpoint closure. The application + * is responsible for releasing the handle using the + * @ref ucp_request_free routine. + * + * @note @ref ucp_ep_close_nb replaces deprecated @ref ucp_disconnect_nb and + * @ref ucp_ep_destroy + */ +ucs_status_ptr_t ucp_ep_close_nb(ucp_ep_h ep, unsigned mode); + + +/** + * @ingroup UCP_WORKER + * + * @brief Reject an incoming connection request. + * + * Reject the incoming connection request and release associated resources. If + * the remote initiator endpoint has set an @ref ucp_ep_params_t::err_handler, + * it will be invoked with status @ref UCS_ERR_REJECTED. + * + * @param [in] listener Handle to the listener on which the connection + * request was received. + * @param [in] conn_request Handle to the connection request to reject. + * + * @return Error code as defined by @ref ucs_status_t + * + */ +ucs_status_t ucp_listener_reject(ucp_listener_h listener, + ucp_conn_request_h conn_request); + + +/** + * @ingroup UCP_ENDPOINT + * @brief Print endpoint information. + * + * This routine prints information about the endpoint transport methods, their + * thresholds, and other useful information associated with the endpoint. + * + * @param [in] ep Endpoint object whose configuration to print. + * @param [in] stream Output stream to print the information to. + */ +void ucp_ep_print_info(ucp_ep_h ep, FILE *stream); + + +/** + * @ingroup UCP_ENDPOINT + * + * @brief Non-blocking flush of outstanding AMO and RMA operations on the + * @ref ucp_ep_h "endpoint". + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_ep_h "endpoint". All the AMO and RMA operations issued on the + * @a ep prior to this call are completed both at the origin and at the target + * @ref ucp_ep_h "endpoint" when this call returns. + * + * @param [in] ep UCP endpoint. + * @param [in] flags Flags for flush operation. Reserved for future use. + * @param [in] cb Callback which will be called when the flush operation + * completes. + * + * @return NULL - The flush operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. + * @return otherwise - Flush operation was scheduled and can be completed + * in any point in time. The request handle is returned + * to the application in order to track progress. The + * application is responsible for releasing the handle + * using @ref ucp_request_free "ucp_request_free()" + * routine. + * + * + * The following example demonstrates how blocking flush can be implemented + * using non-blocking flush: + * @code {.c} + * void empty_function(void *request, ucs_status_t status) + * { + * } + * + * ucs_status_t blocking_ep_flush(ucp_ep_h ep, ucp_worker_h worker) + * { + * void *request; + * + * request = ucp_ep_flush_nb(ep, 0, empty_function); + * if (request == NULL) { + * return UCS_OK; + * } else if (UCS_PTR_IS_ERR(request)) { + * return UCS_PTR_STATUS(request); + * } else { + * ucs_status_t status; + * do { + * ucp_worker_progress(worker); + * status = ucp_request_check_status(request); + * } while (status == UCS_INPROGRESS); + * ucp_request_free(request); + * return status; + * } + * } + * @endcode */ +ucs_status_ptr_t ucp_ep_flush_nb(ucp_ep_h ep, unsigned flags, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_MEM + * @brief Map or allocate memory for zero-copy operations. + * + * This routine maps or/and allocates a user-specified memory segment with @ref + * ucp_context_h "UCP application context" and the network resources associated + * with it. If the application specifies NULL as an address for the memory + * segment, the routine allocates a mapped memory segment and returns its + * address in the @a address_p argument. The network stack associated with an + * application context can typically send and receive data from the mapped + * memory without CPU intervention; some devices and associated network stacks + * require the memory to be mapped to send and receive data. The @ref ucp_mem_h + * "memory handle" includes all information required to access the memory + * locally using UCP routines, while @ref ucp_rkey_h + * "remote registration handle" provides an information that is necessary for + * remote memory access. + * + * @note + * Another well know terminology for the "map" operation that is typically + * used in the context of networking is memory "registration" or "pinning". The + * UCP library registers the memory the available hardware so it can be + * assessed directly by the hardware. + * + * Memory mapping assumptions: + * @li A given memory segment can be mapped by several different communication + * stacks, if these are compatible. + * @li The @a memh_p handle returned may be used with any sub-region of the + * mapped memory. + * @li If a large segment is registered, and then segmented for subsequent use + * by a user, then the user is responsible for segmentation and subsequent + * management. + * + * + * + * + * + * + * + * + * + * + * + *
Matrix of behavior
parameter/flag @ref UCP_MEM_MAP_NONBLOCK "NONBLOCK"@ref UCP_MEM_MAP_ALLOCATE "ALLOCATE"@ref UCP_MEM_MAP_FIXED "FIXED"@ref ucp_mem_map_params.address "address"@b result + *
@b value 0/1 - the value\n only affects the\n register/map\n phase0 0 0 @ref anch_err "error" if length > 0 + *
1 0 0 @ref anch_alloc_reg "alloc+register" + *
0 1 0 @ref anch_err "error"
0 0 defined @ref anch_reg "register" + *
1 1 0 @ref anch_err "error"
1 0 defined @ref anch_alloc_hint_reg "alloc+register,hint" + *
0 1 defined @ref anch_err "error"
1 1 defined @ref anch_alloc_fixed_reg "alloc+register,fixed" + *
+ * + * @note + * @li \anchor anch_reg @b register means that the memory will be registered in + * corresponding transports for RMA/AMO operations. This case intends that + * the memory was allocated by user before. + * @li \anchor anch_alloc_reg @b alloc+register means that the memory will be allocated + * in the memory provided by the system and registered in corresponding + * transports for RMA/AMO operations. + * @li \anchor anch_alloc_hint_reg alloc+register,hint means that + * the memory will be allocated with using @ref ucp_mem_map_params.address + * as a hint and registered in corresponding transports for RMA/AMO operations. + * @li \anchor anch_alloc_fixed_reg alloc+register,fixed means that the memory + * will be allocated and registered in corresponding transports for RMA/AMO + * operations. + * @li \anchor anch_err @b error is an erroneous combination of the parameters. + * + * @param [in] context Application @ref ucp_context_h "context" to map + * (register) and allocate the memory on. + * @param [in] params User defined @ref ucp_mem_map_params_t configurations + * for the @ref ucp_mem_h "UCP memory handle". + * @param [out] memh_p UCP @ref ucp_mem_h "handle" for the allocated + * segment. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_mem_map(ucp_context_h context, const ucp_mem_map_params_t *params, + ucp_mem_h *memh_p); + + +/** + * @ingroup UCP_MEM + * @brief Unmap memory segment + * + * This routine unmaps a user specified memory segment, that was previously + * mapped using the @ref ucp_mem_map "ucp_mem_map()" routine. The unmap + * routine will also release the resources associated with the memory + * @ref ucp_mem_h "handle". When the function returns, the @ref ucp_mem_h + * and associated @ref ucp_rkey_h "remote key" will be invalid and cannot be + * used with any UCP routine. + * + * @note + * Another well know terminology for the "unmap" operation that is typically + * used in the context of networking is memory "de-registration". The UCP + * library de-registers the memory the available hardware so it can be returned + * back to the operation system. + * + * Error cases: + * @li Once memory is unmapped a network access to the region may cause a + * failure. + * + * @param [in] context Application @ref ucp_context_h "context" which was + * used to allocate/map the memory. + * @param [in] memh @ref ucp_mem_h "Handle" to memory region. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_mem_unmap(ucp_context_h context, ucp_mem_h memh); + + +/** + * @ingroup UCP_MEM + * @brief query mapped memory segment + * + * This routine returns address and length of memory segment mapped with + * @ref ucp_mem_map "ucp_mem_map()" routine. + * + * @param [in] memh @ref ucp_mem_h "Handle" to memory region. + * @param [out] attr Filled with attributes of the @ref ucp_mem_h + * "UCP memory handle". + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_mem_query(const ucp_mem_h memh, ucp_mem_attr_t *attr); + + +/** + * @ingroup UCP_MEM + * @brief Print memory mapping information. + * + * This routine maps memory and prints information about the created memory handle: + * including the mapped memory length, the allocation method, and other useful + * information associated with the memory handle. + * + * @param [in] mem_size Size of the memory to map. + * @param [in] context The context on which the memory is mapped. + * @param [in] stream Output stream on which to print the information. + */ +void ucp_mem_print_info(const char *mem_size, ucp_context_h context, FILE *stream); + + +/** + * @ingroup UCP_MEM + * @brief list of UCP memory use advice. + * + * The enumeration list describes memory advice supported by @ref + * ucp_mem_advise() function. + */ +typedef enum ucp_mem_advice { + UCP_MADV_NORMAL = 0, /**< No special treatment */ + UCP_MADV_WILLNEED /**< can be used on the memory mapped with + @ref UCP_MEM_MAP_NONBLOCK to speed up memory + mapping and to avoid page faults when + the memory is accessed for the first time. */ +} ucp_mem_advice_t; + + +/** + * @ingroup UCP_MEM + * @brief Tuning parameters for the UCP memory advice. + * + * This structure defines the parameters that are used for the + * UCP memory advice tuning during the @ref ucp_mem_advise "ucp_mem_advise" + * routine. + */ +typedef struct ucp_mem_advise_params { + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_mem_advise_params_field. All fields are mandatory. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Memory base address. + */ + void *address; + + /** + * Length (in bytes) to allocate or map (register). + */ + size_t length; + + /** + * Memory use advice @ref ucp_mem_advice + */ + ucp_mem_advice_t advice; +} ucp_mem_advise_params_t; + + +/** + * @ingroup UCP_MEM + * @brief give advice about the use of memory + * + * This routine advises the UCP about how to handle memory range beginning at + * address and size of length bytes. This call does not influence the semantics + * of the application, but may influence its performance. The UCP may ignore + * the advice. + * + * @param [in] context Application @ref ucp_context_h "context" which was + * used to allocate/map the memory. + * @param [in] memh @ref ucp_mem_h "Handle" to memory region. + * @param [in] params Memory base address and length. The advice field + * is used to pass memory use advice as defined in + * the @ref ucp_mem_advice list + * The memory range must belong to the @a memh + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_mem_advise(ucp_context_h context, ucp_mem_h memh, + ucp_mem_advise_params_t *params); + + +/** + * @ingroup UCP_MEM + * @brief Pack memory region remote access key. + * + * This routine allocates memory buffer and packs into the buffer + * a remote access key (RKEY) object. RKEY is an opaque object that provides + * the information that is necessary for remote memory access. + * This routine packs the RKEY object in a portable format such that the + * object can be @ref ucp_ep_rkey_unpack "unpacked" on any platform supported by the + * UCP library. In order to release the memory buffer allocated by this routine + * the application is responsible for calling the @ref ucp_rkey_buffer_release + * "ucp_rkey_buffer_release()" routine. + * + * + * @note + * @li RKEYs for InfiniBand and Cray Aries networks typically includes + * InifiniBand and Aries key. + * @li In order to enable remote direct memory access to the memory associated + * with the memory handle the application is responsible for sharing the RKEY with + * the peers that will initiate the access. + * + * @param [in] context Application @ref ucp_context_h "context" which was + * used to allocate/map the memory. + * @param [in] memh @ref ucp_mem_h "Handle" to memory region. + * @param [out] rkey_buffer_p Memory buffer allocated by the library. + * The buffer contains packed RKEY. + * @param [out] size_p Size (in bytes) of the packed RKEY. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh, + void **rkey_buffer_p, size_t *size_p); + + +/** + * @ingroup UCP_MEM + * @brief Release packed remote key buffer. + * + * This routine releases the buffer that was allocated using @ref ucp_rkey_pack + * "ucp_rkey_pack()". + * + * @warning + * @li Once memory is released an access to the memory may cause a + * failure. + * @li If the input memory address was not allocated using + * @ref ucp_rkey_pack "ucp_rkey_pack()" routine the behaviour of this routine + * is undefined. + * + * @param [in] rkey_buffer Buffer to release. + */ +void ucp_rkey_buffer_release(void *rkey_buffer); + + +/** + * @ingroup UCP_MEM + * @brief Create remote access key from packed buffer. + * + * This routine unpacks the remote key (RKEY) object into the local memory + * such that it can be accessed and used by UCP routines. The RKEY object has + * to be packed using the @ref ucp_rkey_pack "ucp_rkey_pack()" routine. + * Application code should not make any changes to the content of the RKEY + * buffer. + * + * @note The application is responsible for releasing the RKEY object when + * it is no longer needed, by calling the @ref ucp_rkey_destroy + * "ucp_rkey_destroy()" routine. + * @note The remote key object can be used for communications only on the + * endpoint on which it was unpacked. + * + * @param [in] ep Endpoint to access using the remote key. + * @param [in] rkey_buffer Packed rkey. + * @param [out] rkey_p Remote key handle. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, const void *rkey_buffer, + ucp_rkey_h *rkey_p); + + +/** + * @ingroup UCP_MEM + * @brief Get a local pointer to remote memory. + * + * This routine returns a local pointer to the remote memory described + * by the rkey. + * + * @note This routine can return a valid pointer only for the endpoints + * that are reachable via shared memory. + * + * @param [in] rkey A remote key handle. + * @param [in] raddr A remote memory address within the memory area + * described by the rkey. + * @param [out] addr_p A pointer that can be used for direct + * access to the remote memory. + * + * @return Error code as defined by @ref ucs_status_t if the remote memory + * cannot be accessed directly or the remote memory address is not valid. + */ +ucs_status_t ucp_rkey_ptr(ucp_rkey_h rkey, uint64_t raddr, void **addr_p); + + +/** + * @ingroup UCP_MEM + * @brief Destroy the remote key + * + * This routine destroys the RKEY object and the memory that was allocated + * using the @ref ucp_ep_rkey_unpack "ucp_ep_rkey_unpack()" routine. This + * routine also releases any resources that are associated with the RKEY + * object. + * + * @warning + * @li Once the RKEY object is released an access to the memory will cause an + * undefined failure. + * @li If the RKEY object was not created using + * @ref ucp_ep_rkey_unpack "ucp_ep_rkey_unpack()" routine the behavior of this + * routine is undefined. + * @li The RKEY object must be destroyed after all outstanding operations which + * are using it are flushed, and before the endpoint on which it was unpacked + * is destroyed. + * + * @param [in] rkey Remote key to destroy. + */ +void ucp_rkey_destroy(ucp_rkey_h rkey); + + +/** + * @ingroup UCP_WORKER + * @brief Add user defined callback for Active Message. + * + * This routine installs a user defined callback to handle incoming Active + * Messages with a specific id. This callback is called whenever an Active + * Message that was sent from the remote peer by @ref ucp_am_send_nb is + * received on this worker. + * + * @param [in] worker UCP worker on which to set the Active Message + * handler. + * @param [in] id Active Message id. + * @param [in] cb Active Message callback. NULL to clear. + * @param [in] arg Active Message argument, which will be passed + * in to every invocation of the callback as the + * arg argument. + * @param [in] flags Dictates how an Active Message is handled on the + * remote endpoint. Currently only + * UCP_AM_FLAG_WHOLE_MSG is supported, which + * indicates the callback will not be invoked + * until all data has arrived. + * + * @return error code if the worker does not support Active Messages or + * requested callback flags. + */ +ucs_status_t ucp_worker_set_am_handler(ucp_worker_h worker, uint16_t id, + ucp_am_callback_t cb, void *arg, + uint32_t flags); + + +/** + * @ingroup UCP_COMM + * @brief Send Active Message. + * + * This routine sends an Active Message to an ep. It does not support + * CUDA memory. + * + * @param [in] ep UCP endpoint where the Active Message will be run. + * @param [in] id Active Message id. Specifies which registered + * callback to run. + * @param [in] buffer Pointer to the data to be sent to the target node + * of the Active Message. + * @param [in] count Number of elements to send. + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] cb Callback that is invoked upon completion of the + * data transfer if it is not completed immediately. + * @param [in] flags For Future use. + * + * @return NULL Active Message was sent immediately. + * @return UCS_PTR_IS_ERR(_ptr) Error sending Active Message. + * @return otherwise Pointer to request, and Active Message is known + * to be completed after cb is run. + */ +ucs_status_ptr_t ucp_am_send_nb(ucp_ep_h ep, uint16_t id, + const void *buffer, size_t count, + ucp_datatype_t datatype, + ucp_send_callback_t cb, unsigned flags); + + +/** + * @ingroup UCP_COMM + * @brief Releases Active Message data. + * + * This routine releases data that persisted through an Active Message + * callback because that callback returned UCS_INPROGRESS. + * + * @param [in] worker Worker which received the Active Message. + * @param [in] data Pointer to data that was passed into + * the Active Message callback as the data + * parameter. + */ +void ucp_am_data_release(ucp_worker_h worker, void *data); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream send operation. + * + * This routine sends data that is described by the local address @a buffer, + * size @a count, and @a datatype object to the destination endpoint @a ep. + * The routine is non-blocking and therefore returns immediately, however + * the actual send operation may be delayed. The send operation is considered + * completed when it is safe to reuse the source @e buffer. If the send + * operation is completed immediately the routine returns UCS_OK and the + * call-back function @a cb is @b not invoked. If the operation is + * @b not completed immediately and no error reported, then the UCP library will + * schedule invocation of the call-back @a cb upon completion of the send + * operation. In other words, the completion of the operation will be signaled + * either by the return code or by the call-back. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send. + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] cb Callback function that is invoked whenever the + * send operation is completed. It is important to note + * that the call-back is only invoked in a case when + * the operation cannot be completed in place. + * @param [in] flags Reserved for future use. + * + * @return NULL - The send operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. The application is + * responsible for releasing the handle using + * @ref ucp_request_free routine. + */ +ucs_status_ptr_t ucp_stream_send_nb(ucp_ep_h ep, const void *buffer, size_t count, + ucp_datatype_t datatype, ucp_send_callback_t cb, + unsigned flags); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-send operations + * + * This routine sends a messages that is described by the local address @a + * buffer, size @a count, and @a datatype object to the destination endpoint + * @a ep. Each message is associated with a @a tag value that is used for + * message matching on the @ref ucp_tag_recv_nb "receiver". The routine is + * non-blocking and therefore returns immediately, however the actual send + * operation may be delayed. The send operation is considered completed when + * it is safe to reuse the source @e buffer. If the send operation is + * completed immediately the routine return UCS_OK and the call-back function + * @a cb is @b not invoked. If the operation is @b not completed immediately + * and no error reported then the UCP library will schedule to invoke the + * call-back @a cb whenever the send operation will be completed. In other + * words, the completion of a message can be signaled by the return code or + * the call-back. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag. + * @param [in] cb Callback function that is invoked whenever the + * send operation is completed. It is important to note + * that the call-back is only invoked in a case when + * the operation cannot be completed in place. + * + * @return NULL - The send operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_tag_send_nb(ucp_ep_h ep, const void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, + ucp_send_callback_t cb); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-send operations with user provided request + * + * This routine provides a convenient and efficient way to implement a + * blocking send pattern. It also completes requests faster than + * @ref ucp_tag_send_nb() because: + * @li it always uses @ref uct_ep_am_bcopy() to send data up to the + * rendezvous threshold. + * @li its rendezvous threshold is higher than the one used by + * the @ref ucp_tag_send_nb(). The threshold is controlled by + * the @b UCX_SEND_NBR_RNDV_THRESH environment variable. + * @li its request handling is simpler. There is no callback and no need + * to allocate and free requests. In fact request can be allocated by + * caller on the stack. + * + * This routine sends a messages that is described by the local address @a + * buffer, size @a count, and @a datatype object to the destination endpoint + * @a ep. Each message is associated with a @a tag value that is used for + * message matching on the @ref ucp_tag_recv_nbr "receiver". + * + * The routine is non-blocking and therefore returns immediately, however + * the actual send operation may be delayed. The send operation is considered + * completed when it is safe to reuse the source @e buffer. If the send + * operation is completed immediately the routine returns UCS_OK. + * + * If the operation is @b not completed immediately and no error reported + * then the UCP library will fill a user provided @a req and + * return UCS_INPROGRESS status. In order to monitor completion of the + * operation @ref ucp_request_check_status() should be used. + * + * Following pseudo code implements a blocking send function: + * @code + * MPI_send(...) + * { + * char *request; + * ucs_status_t status; + * + * // allocate request on the stack + * // ucp_context_query() was used to get ucp_request_size + * request = alloca(ucp_request_size); + * + * // note: make sure that there is enough memory before the + * // request handle + * status = ucp_tag_send_nbr(ep, ..., request + ucp_request_size); + * if (status != UCS_INPROGRESS) { + * return status; + * } + * + * do { + * ucp_worker_progress(worker); + * status = ucp_request_check_status(request + ucp_request_size); + * } while (status == UCS_INPROGRESS); + * + * return status; + * } + * @endcode + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag. + * @param [in] req Request handle allocated by the user. There should + * be at least UCP request size bytes of available + * space before the @a req. The size of UCP request + * can be obtained by @ref ucp_context_query function. + * + * @return UCS_OK - The send operation was completed immediately. + * @return UCS_INPROGRESS - The send was not completed and is in progress. + * @ref ucp_request_check_status() should be used to + * monitor @a req status. + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_tag_send_nbr(ucp_ep_h ep, const void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, void *req); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking synchronous tagged-send operation. + * + * Same as @ref ucp_tag_send_nb, except the request completes only after there + * is a remote tag match on the message (which does not always mean the remote + * receive has been completed). This function never completes "in-place", and + * always returns a request handle. + * + * @note The user should not modify any part of the @a buffer after this + * operation is called, until the operation completes. + * @note Returns @ref UCS_ERR_UNSUPPORTED if @ref UCP_ERR_HANDLING_MODE_PEER is + * enabled. This is a temporary implementation-related constraint that + * will be addressed in future releases. + * + * @param [in] ep Destination endpoint handle. + * @param [in] buffer Pointer to the message buffer (payload). + * @param [in] count Number of elements to send + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag. + * @param [in] cb Callback function that is invoked whenever the + * send operation is completed. + * + * @return UCS_PTR_IS_ERR(_ptr) - The send operation failed. + * @return otherwise - Operation was scheduled for send and can be + * completed in any point in time. The request handle + * is returned to the application in order to track + * progress of the message. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_tag_send_sync_nb(ucp_ep_h ep, const void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream receive operation of structured data into a + * user-supplied buffer. + * + * This routine receives data that is described by the local address @a buffer, + * size @a count, and @a datatype object on the endpoint @a ep. The routine is + * non-blocking and therefore returns immediately. The receive operation is + * considered complete when the message is delivered to the buffer. If data is + * not immediately available, the operation will be scheduled for receive and + * a request handle will be returned. In order to notify the application about + * completion of a scheduled receive operation, the UCP library will invoke + * the call-back @a cb when data is in the receive buffer and ready for + * application access. If the receive operation cannot be started, the routine + * returns an error. + * + * @param [in] ep UCP endpoint that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data to. + * @param [in] count Number of elements to receive into @a buffer. + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] cb Callback function that is invoked whenever the + * receive operation is completed and the data is ready + * in the receive @a buffer. It is important to note + * that the call-back is only invoked in a case when + * the operation cannot be completed immediately. + * @param [out] length Size of the received data in bytes. The value is + * valid only if return code is UCS_OK. + * @note The amount of data received, in bytes, is always an + * integral multiple of the @a datatype size. + * @param [in] flags Flags defined in @ref ucp_stream_recv_flags_t. + * + * @return NULL - The receive operation was completed + * immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. A request + * handle is returned to the application in order + * to track progress of the operation. + * The application is responsible for releasing + * the handle by calling the + * @ref ucp_request_free routine. + */ +ucs_status_ptr_t ucp_stream_recv_nb(ucp_ep_h ep, void *buffer, size_t count, + ucp_datatype_t datatype, + ucp_stream_recv_callback_t cb, + size_t *length, unsigned flags); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking stream receive operation of unstructured data into + * a UCP-supplied buffer. + * + * This routine receives any available data from endpoint @a ep. + * Unlike @ref ucp_stream_recv_nb, the returned data is unstructured and is + * treated as an array of bytes. If data is immediately available, + * UCS_STATUS_PTR(_ptr) is returned as a pointer to the data, and @a length + * is set to the size of the returned data buffer. The routine is non-blocking + * and therefore returns immediately. + * + * @param [in] ep UCP endpoint that is used for the receive + * operation. + * @param [out] length Length of received data. + * + * @return NULL - No received data available on the @a ep. + * @return UCS_PTR_IS_ERR(_ptr) - the receive operation failed and + * UCS_PTR_STATUS(_ptr) indicates an error. + * @return otherwise - The pointer to the data UCS_STATUS_PTR(_ptr) + * is returned to the application. After the data + * is processed, the application is responsible + * for releasing the data buffer by calling the + * @ref ucp_stream_data_release routine. + * + * @note This function returns packed data (equivalent to ucp_dt_make_contig(1)). + * @note This function returns a pointer to a UCP-supplied buffer, whereas + * @ref ucp_stream_recv_nb places the data into a user-provided buffer. + * In some cases, receiving data directly into a UCP-supplied buffer can + * be more optimal, for example by processing the incoming data in-place + * and thus avoiding extra memory copy operations. + */ +ucs_status_ptr_t ucp_stream_recv_data_nb(ucp_ep_h ep, size_t *length); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-receive operation. + * + * This routine receives a messages that is described by the local address @a + * buffer, size @a count, and @a datatype object on the @a worker. The tag + * value of the receive message has to match the @a tag and @a tag_mask values, + * where the @a tag_mask indicates what bits of the tag have to be matched. The + * routine is a non-blocking and therefore returns immediately. The receive + * operation is considered completed when the message is delivered to the @a + * buffer. In order to notify the application about completion of the receive + * operation the UCP library will invoke the call-back @a cb when the received + * message is in the receive buffer and ready for application access. If the + * receive operation cannot be stated the routine returns an error. + * + * @note This routine cannot return UCS_OK. It always returns a request + * handle or an error. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data to. + * @param [in] count Number of elements to receive + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag to expect. + * @param [in] tag_mask Bit mask that indicates the bits that are used for + * the matching of the incoming tag + * against the expected tag. + * @param [in] cb Callback function that is invoked whenever the + * receive operation is completed and the data is ready + * in the receive @a buffer. + * + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. The request + * handle is returned to the application in order + * to track progress of the operation. The + * application is responsible for releasing the + * handle using @ref ucp_request_free + * "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_tag_recv_nb(ucp_worker_h worker, void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, + ucp_tag_t tag_mask, ucp_tag_recv_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking tagged-receive operation. + * + * This routine receives a message that is described by the local address @a + * buffer, size @a count, and @a datatype object on the @a worker. The tag + * value of the receive message has to match the @a tag and @a tag_mask values, + * where the @a tag_mask indicates what bits of the tag have to be matched. The + * routine is a non-blocking and therefore returns immediately. The receive + * operation is considered completed when the message is delivered to the @a + * buffer. In order to monitor completion of the operation + * @ref ucp_request_check_status or @ref ucp_tag_recv_request_test should be + * used. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data to. + * @param [in] count Number of elements to receive + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] tag Message tag to expect. + * @param [in] tag_mask Bit mask that indicates the bits that are used for + * the matching of the incoming tag + * against the expected tag. + * @param [in] req Request handle allocated by the user. There should + * be at least UCP request size bytes of available + * space before the @a req. The size of UCP request + * can be obtained by @ref ucp_context_query function. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_tag_recv_nbr(ucp_worker_h worker, void *buffer, size_t count, + ucp_datatype_t datatype, ucp_tag_t tag, + ucp_tag_t tag_mask, void *req); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking probe and return a message. + * + * This routine probes (checks) if a messages described by the @a tag and + * @a tag_mask was received (fully or partially) on the @a worker. The tag + * value of the received message has to match the @a tag and @a tag_mask + * values, where the @a tag_mask indicates what bits of the tag have to be + * matched. The function returns immediately and if the message is matched it + * returns a handle for the message. + * + * @param [in] worker UCP worker that is used for the probe operation. + * @param [in] tag Message tag to probe for. + * @param [in] tag_mask Bit mask that indicates the bits that are used for + * the matching of the incoming tag + * against the expected tag. + * @param [in] remove The flag indicates if the matched message has to + * be removed from UCP library. + * If true (1), the message handle is removed from + * the UCP library and the application is responsible + * to call @ref ucp_tag_msg_recv_nb + * "ucp_tag_msg_recv_nb()" in order to receive the data + * and release the resources associated with the + * message handle. + * If false (0), the return value is merely an indication + * to whether a matching message is present, and it cannot + * be used in any other way, and in particular it cannot + * be passed to @ref ucp_tag_msg_recv_nb(). + * @param [out] info If the matching message is found the descriptor is + * filled with the details about the message. + * + * @return NULL - No match found. + * @return Message handle (not NULL) - If message is matched the message handle + * is returned. + * + * @note This function does not advance the communication state of the network. + * If this routine is used in busy-poll mode, need to make sure + * @ref ucp_worker_progress() is called periodically to extract messages + * from the transport. + */ +ucp_tag_message_h ucp_tag_probe_nb(ucp_worker_h worker, ucp_tag_t tag, + ucp_tag_t tag_mask, int remove, + ucp_tag_recv_info_t *info); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking receive operation for a probed message. + * + * This routine receives a messages that is described by the local address @a + * buffer, size @a count, @a message handle, and @a datatype object on the @a + * worker. The @a message handle can be obtain by calling the @ref + * ucp_tag_probe_nb "ucp_tag_probe_nb()" routine. @ref ucp_tag_msg_recv_nb + * "ucp_tag_msg_recv_nb()" routine is a non-blocking and therefore returns + * immediately. The receive operation is considered completed when the message + * is delivered to the @a buffer. In order to notify the application about + * completion of the receive operation the UCP library will invoke the + * call-back @a cb when the received message is in the receive buffer and ready + * for application access. If the receive operation cannot be stated the + * routine returns an error. + * + * @param [in] worker UCP worker that is used for the receive operation. + * @param [in] buffer Pointer to the buffer to receive the data to. + * @param [in] count Number of elements to receive + * @param [in] datatype Datatype descriptor for the elements in the buffer. + * @param [in] message Message handle. + * @param [in] cb Callback function that is invoked whenever the + * receive operation is completed and the data is ready + * in the receive @a buffer. + * + * @return UCS_PTR_IS_ERR(_ptr) - The receive operation failed. + * @return otherwise - Operation was scheduled for receive. The request + * handle is returned to the application in order + * to track progress of the operation. The + * application is responsible for releasing the + * handle using @ref ucp_request_free + * "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_tag_msg_recv_nb(ucp_worker_h worker, void *buffer, + size_t count, ucp_datatype_t datatype, + ucp_tag_message_h message, + ucp_tag_recv_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking implicit remote memory put operation. + * + * This routine initiates a storage of contiguous block of data that is + * described by the local address @a buffer in the remote contiguous memory + * region described by @a remote_addr address and the @ref ucp_rkey_h "memory + * handle" @a rkey. The routine returns immediately and @b does @b not + * guarantee re-usability of the source address @e buffer. If the operation is + * completed immediately the routine return UCS_OK, otherwise UCS_INPROGRESS + * or an error is returned to user. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote memory address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_put_nbi(ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking remote memory put operation. + * + * This routine initiates a storage of contiguous block of data that is + * described by the local address @a buffer in the remote contiguous memory + * region described by @a remote_addr address and the @ref ucp_rkey_h "memory + * handle" @a rkey. The routine returns immediately and @b does @b not + * guarantee re-usability of the source address @e buffer. If the operation is + * completed immediately the routine return UCS_OK, otherwise UCS_INPROGRESS + * or an error is returned to user. If the put operation completes immediately, + * the routine returns UCS_OK and the call-back routine @a cb is @b not + * invoked. If the operation is @b not completed immediately and no error is + * reported, then the UCP library will schedule invocation of the call-back + * routine @a cb upon completion of the put operation. In other words, the + * completion of a put operation can be signaled by the return code or + * execution of the call-back. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote memory address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * @param [in] cb Call-back function that is invoked whenever the + * put operation is completed and the local buffer + * can be modified. Does not guarantee remote + * completion. + * + * @return NULL - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_put_nb(ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Non-blocking implicit remote memory get operation. + * + * This routine initiate a load of contiguous block of data that is described + * by the remote memory address @a remote_addr and the @ref ucp_rkey_h "memory handle" + * @a rkey in the local contiguous memory region described by @a buffer + * address. The routine returns immediately and @b does @b not guarantee that + * remote data is loaded and stored under the local address @e buffer. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" in order + * guarantee that remote data is loaded and stored under the local address + * @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local destination address. + * @param [in] length Length of the data (in bytes) stored under the + * destination address. + * @param [in] remote_addr Pointer to the source remote memory address + * to read from. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey); + +/** + * @ingroup UCP_COMM + * @brief Non-blocking remote memory get operation. + * + * This routine initiates a load of a contiguous block of data that is + * described by the remote memory address @a remote_addr and the @ref ucp_rkey_h + * "memory handle" @a rkey in the local contiguous memory region described + * by @a buffer address. The routine returns immediately and @b does @b not + * guarantee that remote data is loaded and stored under the local address @e + * buffer. If the operation is completed immediately the routine return UCS_OK, + * otherwise UCS_INPROGRESS or an error is returned to user. If the get + * operation completes immediately, the routine returns UCS_OK and the + * call-back routine @a cb is @b not invoked. If the operation is @b not + * completed immediately and no error is reported, then the UCP library will + * schedule invocation of the call-back routine @a cb upon completion of the + * get operation. In other words, the completion of a get operation can be + * signaled by the return code or execution of the call-back. + * + * @note A user can use @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" + * in order to guarantee re-usability of the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local destination address. + * @param [in] length Length of the data (in bytes) stored under the + * destination address. + * @param [in] remote_addr Pointer to the source remote memory address + * to read from. + * @param [in] rkey Remote memory key associated with the + * remote memory address. + * @param [in] cb Call-back function that is invoked whenever the + * get operation is completed and the data is + * visible to the local process. + * + * @return NULL - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t ucp_get_nb(ucp_ep_h ep, void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb); + +/** + * @ingroup UCP_COMM + * @brief Post an atomic memory operation. + * + * This routine posts an atomic memory operation to a remote value. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. + * Return from the function does not guarantee completion. A user must + * call @ref ucp_ep_flush_nb or @ref ucp_worker_flush_nb to guarantee that the + * remote value has been updated. + * + * @param [in] ep UCP endpoint. + * @param [in] opcode One of @ref ucp_atomic_post_op_t. + * @param [in] value Source operand for the atomic operation. + * @param [in] op_size Size of value in bytes + * @param [in] remote_addr Remote address to operate on. + * @param [in] rkey Remote key handle for the remote memory address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_post(ucp_ep_h ep, ucp_atomic_post_op_t opcode, uint64_t value, + size_t op_size, uint64_t remote_addr, ucp_rkey_h rkey); + + +/** + * @ingroup UCP_COMM + * @brief Post an atomic fetch operation. + * + * This routine will post an atomic fetch operation to remote memory. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. + * The routine is non-blocking and therefore returns immediately. However the + * actual atomic operation may be delayed. The atomic operation is not considered complete + * until the values in remote and local memory are completed. If the atomic operation + * completes immediately, the routine returns UCS_OK and the call-back routine + * @a cb is @b not invoked. If the operation is @b not completed immediately and no + * error is reported, then the UCP library will schedule invocation of the call-back + * routine @a cb upon completion of the atomic operation. In other words, the completion + * of an atomic operation can be signaled by the return code or execution of the call-back. + * + * @note The user should not modify any part of the @a result after this + * operation is called, until the operation completes. + * + * @param [in] ep UCP endpoint. + * @param [in] opcode One of @ref ucp_atomic_fetch_op_t. + * @param [in] value Source operand for atomic operation. In the case of CSWAP + * this is the conditional for the swap. For SWAP this is + * the value to be placed in remote memory. + * @param [inout] result Local memory address to store resulting fetch to. + * In the case of CSWAP the value in result will be + * swapped into the @a remote_addr if the condition + * is true. + * @param [in] op_size Size of value in bytes and pointer type for result + * @param [in] remote_addr Remote address to operate on. + * @param [in] rkey Remote key handle for the remote memory address. + * @param [in] cb Call-back function that is invoked whenever the + * send operation is completed. It is important to note + * that the call-back function is only invoked in a case when + * the operation cannot be completed in place. + * + * @return NULL - The operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The operation failed. + * @return otherwise - Operation was scheduled and can be + * completed at any point in time. The request handle + * is returned to the application in order to track + * progress of the operation. The application is + * responsible for releasing the handle using + * @ref ucp_request_free "ucp_request_free()" routine. + */ +ucs_status_ptr_t +ucp_atomic_fetch_nb(ucp_ep_h ep, ucp_atomic_fetch_op_t opcode, + uint64_t value, void *result, size_t op_size, + uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb); + + +/** + * @ingroup UCP_COMM + * @brief Check the status of non-blocking request. + * + * This routine checks the state of the request and returns its current status. + * Any value different from UCS_INPROGRESS means that request is in a completed + * state. + * + * @param [in] request Non-blocking request to check. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_request_check_status(void *request); + + +/** + * @ingroup UCP_COMM + * @brief Check the status and currently available state of non-blocking request + * returned from @ref ucp_tag_recv_nb routine. + * + * This routine checks the state and returns current status of the request + * returned from @ref ucp_tag_recv_nb routine or the user allocated request + * for @ref ucp_tag_recv_nbr. Any value different from UCS_INPROGRESS means + * that the request is in a completed state. + * + * @param [in] request Non-blocking request to check. + * @param [out] info It is filled with the details about the message + * available at the moment of calling. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_tag_recv_request_test(void *request, ucp_tag_recv_info_t *info); + + +/** + * @ingroup UCP_COMM + * @brief Check the status and currently available state of non-blocking request + * returned from @ref ucp_stream_recv_nb routine. + * + * This routine checks the state and returns current status of the request + * returned from @ref ucp_stream_recv_nb routine. Any value different from + * UCS_INPROGRESS means that the request is in a completed state. + * + * @param [in] request Non-blocking request to check. + * @param [out] length_p The size of the received data in bytes. This value + * is only valid if the status is UCS_OK. If valid, it + * is always an integral multiple of the datatype size + * associated with the request. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_stream_recv_request_test(void *request, size_t *length_p); + +/** + * @ingroup UCP_COMM + * @brief Cancel an outstanding communications request. + * + * @param [in] worker UCP worker. + * @param [in] request Non-blocking request to cancel. + * + * This routine tries to cancels an outstanding communication request. After + * calling this routine, the @a request will be in completed or canceled (but + * not both) state regardless of the status of the target endpoint associated + * with the communication request. If the request is completed successfully, + * the @ref ucp_send_callback_t "send" or @ref ucp_tag_recv_callback_t + * "receive" completion callbacks (based on the type of the request) will be + * called with the @a status argument of the callback set to UCS_OK, and in a + * case it is canceled the @a status argument is set to UCS_ERR_CANCELED. It is + * important to note that in order to release the request back to the library + * the application is responsible for calling @ref ucp_request_free + * "ucp_request_free()". + */ +void ucp_request_cancel(ucp_worker_h worker, void *request); + + +/** + * @ingroup UCP_COMM + * @brief Release UCP data buffer returned by @ref ucp_stream_recv_data_nb. + * + * @param [in] ep Endpoint @a data received from. + * @param [in] data Data pointer to release, which was returned from + * @ref ucp_stream_recv_data_nb. + * + * This routine releases internal UCP data buffer returned by + * @ref ucp_stream_recv_data_nb when @a data is processed, the application can't + * use this buffer after calling this function. + */ +void ucp_stream_data_release(ucp_ep_h ep, void *data); + + +/** + * @ingroup UCP_COMM + * @brief Release a communications request. + * + * @param [in] request Non-blocking request to release. + * + * This routine releases the non-blocking request back to the library, regardless + * of its current state. Communications operations associated with this request + * will make progress internally, however no further notifications or callbacks + * will be invoked for this request. + */ +void ucp_request_free(void *request); + + +/** + * @ingroup UCP_DATATYPE + * @brief Create a generic datatype. + * + * This routine create a generic datatype object. + * The generic datatype is described by the @a ops @ref ucp_generic_dt_ops_t + * "object" which provides a table of routines defining the operations for + * generic datatype manipulation. Typically, generic datatypes are used for + * integration with datatype engines provided with MPI implementations (MPICH, + * Open MPI, etc). + * The application is responsible for releasing the @a datatype_p object using + * @ref ucp_dt_destroy "ucp_dt_destroy()" routine. + * + * @param [in] ops Generic datatype function table as defined by + * @ref ucp_generic_dt_ops_t . + * @param [in] context Application defined context passed to this + * routine. The context is passed as a parameter + * to the routines in the @a ops table. + * @param [out] datatype_p A pointer to datatype object. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_dt_create_generic(const ucp_generic_dt_ops_t *ops, void *context, + ucp_datatype_t *datatype_p); + + +/** + * @ingroup UCP_DATATYPE + * @brief Destroy a datatype and release its resources. + * + * This routine destroys the @a datatype object and + * releases any resources that are associated with the object. + * The @a datatype object must be allocated using @ref ucp_dt_create_generic + * "ucp_dt_create_generic()" routine. + * + * @warning + * @li Once the @a datatype object is released an access to this object may + * cause an undefined failure. + * + * @param [in] datatype Datatype object to destroy. + */ +void ucp_dt_destroy(ucp_datatype_t datatype); + + +/** + * @ingroup UCP_WORKER + * + * @brief Assures ordering between non-blocking operations + * + * This routine ensures ordering of non-blocking communication operations on + * the @ref ucp_worker_h "UCP worker". Communication operations issued on the + * @a worker prior to this call are guaranteed to be completed before any + * subsequent communication operations to the same @ref ucp_worker_h "worker" + * which follow the call to @ref ucp_worker_fence "fence". + * + * @note The primary difference between @ref ucp_worker_fence "ucp_worker_fence()" + * and the @ref ucp_worker_flush_nb "ucp_worker_flush_nb()" is the fact the fence + * routine does not guarantee completion of the operations on the call return but + * only ensures the order between communication operations. The + * @ref ucp_worker_flush_nb "flush" operation on return guarantees that all + * operations are completed and corresponding memory regions were updated. + * + * @param [in] worker UCP worker. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_worker_fence(ucp_worker_h worker); + + +/** + * @ingroup UCP_WORKER + * + * @brief Flush outstanding AMO and RMA operations on the @ref ucp_worker_h + * "worker" + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_worker_h "worker". All the AMO and RMA operations issued on the + * @a worker prior to this call are completed both at the origin and at the + * target when this call returns. + * + * @note For description of the differences between @ref ucp_worker_flush_nb + * "flush" and @ref ucp_worker_fence "fence" operations please see + * @ref ucp_worker_fence "ucp_worker_fence()" + * + * @param [in] worker UCP worker. + * @param [in] flags Flags for flush operation. Reserved for future use. + * @param [in] cb Callback which will be called when the flush operation + * completes. + * + * @return NULL - The flush operation was completed immediately. + * @return UCS_PTR_IS_ERR(_ptr) - The flush operation failed. + * @return otherwise - Flush operation was scheduled and can be completed + * in any point in time. The request handle is returned + * to the application in order to track progress. The + * application is responsible for releasing the handle + * using @ref ucp_request_free "ucp_request_free()" + * routine. + */ +ucs_status_ptr_t ucp_worker_flush_nb(ucp_worker_h worker, unsigned flags, + ucp_send_callback_t cb); + + +/** + * @example ucp_hello_world.c + * UCP hello world client / server example utility. + */ + +END_C_DECLS + +#endif diff --git a/src/ucp/api/ucp_compat.h b/src/ucp/api/ucp_compat.h new file mode 100644 index 0000000..3c543ca --- /dev/null +++ b/src/ucp/api/ucp_compat.h @@ -0,0 +1,504 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_COMPAT_H_ +#define UCP_COMPAT_H_ + + +#include +#include + +BEGIN_C_DECLS + +/** @file ucp_compat.h */ + +/** + * @ingroup UCP_WORKER + * @deprecated Replaced by @ref ucp_listener_conn_handler_t. + */ +typedef struct ucp_listener_accept_handler { + ucp_listener_accept_callback_t cb; /**< Endpoint creation callback */ + void *arg; /**< User defined argument for the + callback */ +} ucp_listener_accept_handler_t; + + +/** + * @ingroup UCP_COMM + * @deprecated Replaced by @ref ucp_request_test. + */ +int ucp_request_is_completed(void *request); + + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_request_free. + */ +void ucp_request_release(void *request); + + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_ep_close_nb. + */ +void ucp_ep_destroy(ucp_ep_h ep); + + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_ep_close_nb. + */ +ucs_status_ptr_t ucp_disconnect_nb(ucp_ep_h ep); + + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_tag_recv_request_test and + * @ref ucp_request_check_status depends on use case. + * + * @note Please use @ref ucp_request_check_status for cases that only need to + * check the completion status of an outstanding request. + * @ref ucp_request_check_status can be used for any type of request. + * @ref ucp_tag_recv_request_test should only be used for requests + * returned by @ref ucp_tag_recv_nb (or request allocated by user for + * @ref ucp_tag_recv_nbr) for which additional information + * (returned via the @a info pointer) is needed. + */ +ucs_status_t ucp_request_test(void *request, ucp_tag_recv_info_t *info); + + +/** + * @ingroup UCP_ENDPOINT + * @deprecated Replaced by @ref ucp_ep_flush_nb. + */ +ucs_status_t ucp_ep_flush(ucp_ep_h ep); + +/** + * @ingroup UCP_WORKER + * + * @brief Flush outstanding AMO and RMA operations on the @ref ucp_worker_h + * "worker" + * @deprecated Replaced by @ref ucp_worker_flush_nb. The following example + * implements the same functionality using @ref ucp_worker_flush_nb : + * @code + * ucs_status_t worker_flush(ucp_worker_h worker) + * { + * void *request = ucp_worker_flush_nb(worker); + * if (request == NULL) { + * return UCS_OK; + * } else if (UCS_PTR_IS_ERR(request)) { + * return UCS_PTR_STATUS(request); + * } else { + * ucs_status_t status; + * do { + * ucp_worker_progress(worker); + * status = ucp_request_check_status(request); + * } while (status == UCS_INPROGRESS); + * ucp_request_release(request); + * return status; + * } + * } + * @endcode + * + * + * This routine flushes all outstanding AMO and RMA communications on the + * @ref ucp_worker_h "worker". All the AMO and RMA operations issued on the + * @a worker prior to this call are completed both at the origin and at the + * target when this call returns. + * + * @note For description of the differences between @ref ucp_worker_flush + * "flush" and @ref ucp_worker_fence "fence" operations please see + * @ref ucp_worker_fence "ucp_worker_fence()" + * + * @param [in] worker UCP worker. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_worker_flush(ucp_worker_h worker); + + +/** + * @ingroup UCP_COMM + * @brief Blocking remote memory put operation. + * @deprecated Replaced by @ref ucp_put_nb. The following example implements + * the same functionality using @ref ucp_put_nb : + * @code + * void empty_callback(void *request, ucs_status_t status) + * { + * } + * + * ucs_status_t put(ucp_ep_h ep, const void *buffer, size_t length, + * uint64_t remote_addr, ucp_rkey_h rkey) + * { + * void *request = ucp_put_nb(ep, buffer, length, remote_addr, rkey, + * empty_callback), + * if (request == NULL) { + * return UCS_OK; + * } else if (UCS_PTR_IS_ERR(request)) { + * return UCS_PTR_STATUS(request); + * } else { + * ucs_status_t status; + * do { + * ucp_worker_progress(worker); + * status = ucp_request_check_status(request); + * } while (status == UCS_INPROGRESS); + * ucp_request_release(request); + * return status; + * } + * } + * @endcode + * + * This routine stores contiguous block of data that is described by the + * local address @a buffer in the remote contiguous memory region described by + * @a remote_addr address and the @ref ucp_rkey_h "memory handle" @a rkey. The + * routine returns when it is safe to reuse the source address @e buffer. + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_put(ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey); + + +/** + * @ingroup UCP_COMM + * @brief Blocking remote memory get operation. + * @deprecated Replaced by @ref ucp_get_nb. @see ucp_put. + * + * This routine loads contiguous block of data that is described by the remote + * address @a remote_addr and the @ref ucp_rkey_h "memory handle" @a rkey in + * the local contiguous memory region described by @a buffer address. The + * routine returns when remote data is loaded and stored under the local address + * @e buffer. + * + * + * @param [in] ep Remote endpoint handle. + * @param [in] buffer Pointer to the local source address. + * @param [in] length Length of the data (in bytes) stored under the + * source address. + * @param [in] remote_addr Pointer to the destination remote address + * to write to. + * @param [in] rkey Remote memory key associated with the + * remote address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_get(ucp_ep_h ep, void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey); + + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic add operation for 32 bit integers + * @deprecated Replaced by @ref ucp_atomic_post with opcode UCP_ATOMIC_POST_OP_ADD. + * @see ucp_put. + * + * This routine performs an add operation on a 32 bit integer value atomically. + * The remote integer value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a add value is the value that is used for the add operation. + * When the operation completes the sum of the original remote value and the + * operand value (@a add) is stored in remote memory. + * The call to the routine returns immediately, independent of operation + * completion. + * + * @note The remote address must be aligned to 32 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] add Value to add. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_add32(ucp_ep_h ep, uint32_t add, + uint64_t remote_addr, ucp_rkey_h rkey); + + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic add operation for 64 bit integers + * @deprecated Replaced by @ref ucp_atomic_post with opcode UCP_ATOMIC_POST_OP_ADD. + * @see ucp_put. + * + * This routine performs an add operation on a 64 bit integer value atomically. + * The remote integer value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a add value is the value that is used for the add operation. + * When the operation completes the sum of the original remote value and the + * operand value (@a add) is stored in remote memory. + * The call to the routine returns immediately, independent of operation + * completion. + * + * @note The remote address must be aligned to 64 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] add Value to add. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_add64(ucp_ep_h ep, uint64_t add, + uint64_t remote_addr, ucp_rkey_h rkey); + + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic fetch and add operation for 32 bit integers + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_FADD. + * @see ucp_put. + * + * This routine performs an add operation on a 32 bit integer value atomically. + * The remote integer value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a add value is the value that is used for the add operation. + * When the operation completes, the original remote value is stored in the + * local memory @a result, and the sum of the original remote value and the + * operand value is stored in remote memory. + * The call to the routine returns when the operation is completed and the + * @a result value is updated. + * + * @note The remote address must be aligned to 32 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] add Value to add. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_fadd32(ucp_ep_h ep, uint32_t add, uint64_t remote_addr, + ucp_rkey_h rkey, uint32_t *result); + + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic fetch and add operation for 64 bit integers + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_FADD. + * @see ucp_put. + * + * This routine performs an add operation on a 64 bit integer value atomically. + * The remote integer value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a add value is the value that is used for the add operation. + * When the operation completes, the original remote value is stored in the + * local memory @a result, and the sum of the original remote value and the + * operand value is stored in remote memory. + * The call to the routine returns when the operation is completed and the + * @a result value is updated. + * + * @note The remote address must be aligned to 64 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] add Value to add. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_fadd64(ucp_ep_h ep, uint64_t add, uint64_t remote_addr, + ucp_rkey_h rkey, uint64_t *result); + + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic swap operation for 32 bit values + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_SWAP. + * @see ucp_put. + * + * This routine swaps a 32 bit value between local and remote memory. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a swap value is the value that is used for the swap operation. + * When the operation completes, the remote value is stored in the + * local memory @a result, and the operand value (@a swap) is stored in remote + * memory. The call to the routine returns when the operation is completed and + * the @a result value is updated. + * + * @note The remote address must be aligned to 32 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] swap Value to swap. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_swap32(ucp_ep_h ep, uint32_t swap, uint64_t remote_addr, + ucp_rkey_h rkey, uint32_t *result); + + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic swap operation for 64 bit values + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_SWAP. + * @see ucp_put. + * + * This routine swaps a 64 bit value between local and remote memory. + * The remote value is described by the combination of the remote + * memory address @a remote_addr and the @ref ucp_rkey_h "remote memory handle" + * @a rkey. The @a swap value is the value that is used for the swap operation. + * When the operation completes, the remote value is stored in the + * local memory @a result, and the operand value (@a swap) is stored in remote + * memory. The call to the routine returns when the operation is completed and + * the @a result value is updated. + * + * @note The remote address must be aligned to 64 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] swap Value to swap. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_swap64(ucp_ep_h ep, uint64_t swap, uint64_t remote_addr, + ucp_rkey_h rkey, uint64_t *result); + + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic conditional swap (cswap) operation for 32 bit values. + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_CSWAP. + * @see ucp_put. + * + * This routine conditionally swaps a 32 bit value between local and remote + * memory. The swap occurs only if the condition value (@a continue) is equal + * to the remote value, otherwise the remote memory is not modified. The + * remote value is described by the combination of the remote memory address @p + * remote_addr and the @ref ucp_rkey_h "remote memory handle" @a rkey. The @p + * swap value is the value that is used to update the remote memory if the + * condition is true. The call to the routine returns when the operation is + * completed and the @a result value is updated. + * + * @note The remote address must be aligned to 32 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] compare Value to compare to. + * @param [in] swap Value to swap. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_cswap32(ucp_ep_h ep, uint32_t compare, uint32_t swap, + uint64_t remote_addr, ucp_rkey_h rkey, + uint32_t *result); + + +/** + * @ingroup UCP_COMM + * @brief Blocking atomic conditional swap (cswap) operation for 64 bit values. + * @deprecated Replaced by @ref ucp_atomic_fetch_nb with opcode UCP_ATOMIC_FETCH_OP_CSWAP. + * @see ucp_put. + * + * This routine conditionally swaps a 64 bit value between local and remote + * memory. The swap occurs only if the condition value (@a continue) is equal + * to the remote value, otherwise the remote memory is not modified. The + * remote value is described by the combination of the remote memory address @p + * remote_addr and the @ref ucp_rkey_h "remote memory handle" @a rkey. The @p + * swap value is the value that is used to update the remote memory if the + * condition is true. The call to the routine returns when the operation is + * completed and the @a result value is updated. + * + * @note The remote address must be aligned to 64 bit. + * + * @param [in] ep Remote endpoint handle. + * @param [in] compare Value to compare to. + * @param [in] swap Value to swap. + * @param [in] remote_addr Pointer to the destination remote address + * of the atomic variable. + * @param [in] rkey Remote memory key associated with the + * remote address. + * @param [out] result Pointer to the address that is used to store + * the previous value of the atomic variable described + * by the @a remote_addr + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t ucp_atomic_cswap64(ucp_ep_h ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, ucp_rkey_h rkey, + uint64_t *result); + + +/** + * @ingroup UCP_ENDPOINT + * @brief Modify endpoint parameters. + * + * @deprecated Use @ref ucp_listener_conn_handler_t instead of @ref + * ucp_listener_accept_handler_t, if you have other use case please + * submit an issue on https://github.com/openucx/ucx or report to + * ucx-group@elist.ornl.gov + * + * This routine modifies @ref ucp_ep_h "endpoint" created by @ref ucp_ep_create + * or @ref ucp_listener_accept_callback_t. For example, this API can be used + * to setup custom parameters like @ref ucp_ep_params_t::user_data or + * @ref ucp_ep_params_t::err_handler to endpoint created by + * @ref ucp_listener_accept_callback_t. + * + * @param [in] ep A handle to the endpoint. + * @param [in] params User defined @ref ucp_ep_params_t configurations + * for the @ref ucp_ep_h "UCP endpoint". + * + * @return NULL - The endpoint is modified successfully. + * @return UCS_PTR_IS_ERR(_ptr) - The reconfiguration failed and an error code + * indicates the status. However, the @a endpoint + * is not modified and can be used further. + * @return otherwise - The reconfiguration process is started, and can be + * completed at any point in time. A request handle + * is returned to the application in order to track + * progress of the endpoint modification. + * The application is responsible for releasing the + * handle using the @ref ucp_request_free routine. + * + * @note See the documentation of @ref ucp_ep_params_t for details, only some of + * the parameters can be modified. + */ +ucs_status_ptr_t ucp_ep_modify_nb(ucp_ep_h ep, const ucp_ep_params_t *params); + + +END_C_DECLS + +#endif diff --git a/src/ucp/api/ucp_def.h b/src/ucp/api/ucp_def.h new file mode 100644 index 0000000..97ea61c --- /dev/null +++ b/src/ucp/api/ucp_def.h @@ -0,0 +1,581 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +* Copyright (C) IBM 2015. ALL RIGHTS RESERVED. +* Copyright (C) Los Alamos National Security, LLC. 2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCP_DEF_H_ +#define UCP_DEF_H_ + +#include +#include +#include +#include + + +/** + * @ingroup UCP_CONTEXT + * @brief UCP receive information descriptor + * + * The UCP receive information descriptor is allocated by application and filled + * in with the information about the received message by @ref ucp_tag_probe_nb + * or @ref ucp_tag_recv_request_test routines or + * @ref ucp_tag_recv_callback_t callback argument. + */ +typedef struct ucp_tag_recv_info ucp_tag_recv_info_t; + + +/** + * @ingroup UCP_CONTEXT + * @brief UCP Application Context + * + * UCP application context (or just a context) is an opaque handle that holds a + * UCP communication instance's global information. It represents a single UCP + * communication instance. The communication instance could be an OS process + * (an application) that uses UCP library. This global information includes + * communication resources, endpoints, memory, temporary file storage, and + * other communication information directly associated with a specific UCP + * instance. The context also acts as an isolation mechanism, allowing + * resources associated with the context to manage multiple concurrent + * communication instances. For example, users using both MPI and OpenSHMEM + * sessions simultaneously can isolate their communication by allocating and + * using separate contexts for each of them. Alternatively, users can share the + * communication resources (memory, network resource context, etc.) between + * them by using the same application context. A message sent or a RMA + * operation performed in one application context cannot be received in any + * other application context. + */ +typedef struct ucp_context *ucp_context_h; + + +/** + * @ingroup UCP_CONFIG + * @brief UCP configuration descriptor + * + * This descriptor defines the configuration for @ref ucp_context_h + * "UCP application context". The configuration is loaded from the run-time + * environment (using configuration files of environment variables) + * using @ref ucp_config_read "ucp_config_read" routine and can be printed + * using @ref ucp_config_print "ucp_config_print" routine. In addition, + * application is responsible to release the descriptor using + * @ref ucp_config_release "ucp_config_release" routine. + * + * @todo This structure will be modified through a dedicated function. + */ +typedef struct ucp_config ucp_config_t; + + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP Endpoint + * + * The endpoint handle is an opaque object that is used to address a remote + * @ref ucp_worker_h "worker". It typically provides a description of source, + * destination, or both. All UCP communication routines address a destination + * with the endpoint handle. The endpoint handle is associated with only one + * @ref ucp_context_h "UCP context". UCP provides the @ref ucp_ep_create + * "endpoint create" routine to create the endpoint handle and the @ref + * ucp_ep_destroy "destroy" routine to destroy the endpoint handle. + */ +typedef struct ucp_ep *ucp_ep_h; + + +/** + * @ingroup UCP_ENDPOINT + * @brief UCP connection request + * + * A server-side handle to incoming connection request. Can be used to create an + * endpoint which connects back to the client. + */ +typedef struct ucp_conn_request *ucp_conn_request_h; + + +/** + * @ingroup UCP_WORKER + * @brief UCP worker address + * + * The address handle is an opaque object that is used as an identifier for a + * @ref ucp_worker_h "worker" instance. + */ +typedef struct ucp_address ucp_address_t; + + +/** + * @ingroup UCP_ENDPOINT + * @brief Error handling mode for the UCP endpoint. + * + * Specifies error handling mode for the UCP endpoint. + */ +typedef enum { + UCP_ERR_HANDLING_MODE_NONE, /**< No guarantees about error + * reporting, imposes minimal + * overhead from a performance + * perspective. @note In this + * mode, any error reporting will + * not generate calls to @ref + * ucp_ep_params_t::err_handler. + */ + UCP_ERR_HANDLING_MODE_PEER /**< Guarantees that send requests + * are always completed + * (successfully or error) even in + * case of remote failure, disables + * protocols and APIs which may + * cause a hang or undefined + * behavior in case of peer failure, + * may affect performance and + * memory footprint */ +} ucp_err_handling_mode_t; + + +/** + * @ingroup UCP_MEM + * @brief UCP Remote memory handle + * + * Remote memory handle is an opaque object representing remote memory access + * information. Typically, the handle includes a memory access key and other + * network hardware specific information, which are input to remote memory + * access operations, such as PUT, GET, and ATOMIC. The object is + * communicated to remote peers to enable an access to the memory region. + */ +typedef struct ucp_rkey *ucp_rkey_h; + + +/** + * @ingroup UCP_MEM + * @brief UCP Memory handle + * + * Memory handle is an opaque object representing a memory region allocated + * through UCP library, which is optimized for remote memory access + * operations (zero-copy operations). The memory handle is a self-contained + * object, which includes the information required to access the memory region + * locally, while @ref ucp_rkey_h "remote key" is used to access it + * remotely. The memory could be registered to one or multiple network resources + * that are supported by UCP, such as InfiniBand, Gemini, and others. + */ +typedef struct ucp_mem *ucp_mem_h; + + +/** + * @ingroup UCP_WORKER + * @brief UCP listen handle. + * + * The listener handle is an opaque object that is used for listening on a + * specific address and accepting connections from clients. + */ +typedef struct ucp_listener *ucp_listener_h; + + +/** + * @ingroup UCP_MEM + * @brief Attributes of the @ref ucp_mem_h "UCP Memory handle", filled by + * @ref ucp_mem_query function. + */ +typedef struct ucp_mem_attr { + /** + * Mask of valid fields in this structure, using bits from @ref ucp_mem_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Address of the memory segment. + */ + void *address; + + /** + * Size of the memory segment. + */ + size_t length; +} ucp_mem_attr_t; + + +/** + * @ingroup UCP_MEM + * @brief UCP Memory handle attributes field mask. + * + * The enumeration allows specifying which fields in @ref ucp_mem_attr_t are + * present. It is used to enable backward compatibility support. + */ +enum ucp_mem_attr_field { + UCP_MEM_ATTR_FIELD_ADDRESS = UCS_BIT(0), /**< Virtual address */ + UCP_MEM_ATTR_FIELD_LENGTH = UCS_BIT(1) /**< The size of memory region */ +}; + + +/** + * @ingroup UCP_WORKER + * @brief UCP Worker + * + * UCP worker is an opaque object representing the communication context. The + * worker represents an instance of a local communication resource and the + * progress engine associated with it. The progress engine is a construct that + * is responsible for asynchronous and independent progress of communication + * directives. The progress engine could be implemented in hardware or software. + * The worker object abstracts an instance of network resources such as a host + * channel adapter port, network interface, or multiple resources such as + * multiple network interfaces or communication ports. It could also represent + * virtual communication resources that are defined across multiple devices. + * Although the worker can represent multiple network resources, it is + * associated with a single @ref ucp_context_h "UCX application context". + * All communication functions require a context to perform the operation on + * the dedicated hardware resource(s) and an @ref ucp_ep_h "endpoint" to address the + * destination. + * + * @note Worker are parallel "threading points" that an upper layer may use to + * optimize concurrent communications. + */ + typedef struct ucp_worker *ucp_worker_h; + + +/** + * @ingroup UCP_COMM + * @brief UCP Tag Identifier + * + * UCP tag identifier is a 64bit object used for message identification. + * UCP tag send and receive operations use the object for an implementation + * tag matching semantics (derivative of MPI tag matching semantics). + */ +typedef uint64_t ucp_tag_t; + + +/** + * @ingroup UCP_COMM + * @brief UCP Message descriptor. + * + * UCP Message descriptor is an opaque handle for a message returned by + * @ref ucp_tag_probe_nb. This handle can be passed to @ref ucp_tag_msg_recv_nb + * in order to receive the message data to a specific buffer. + */ +typedef struct ucp_recv_desc *ucp_tag_message_h; + + +/** + * @ingroup UCP_COMM + * @brief UCP Datatype Identifier + * + * UCP datatype identifier is a 64bit object used for datatype identification. + * Predefined UCP identifiers are defined by @ref ucp_dt_type. + */ +typedef uint64_t ucp_datatype_t; + + +/** + * @ingroup UCP_CONTEXT + * @brief Request initialization callback. + * + * This callback routine is responsible for the request initialization. + * + * @param [in] request Request handle to initialize. + */ +typedef void (*ucp_request_init_callback_t)(void *request); + + +/** + * @ingroup UCP_CONTEXT + * @brief Request cleanup callback. + * + * This callback routine is responsible for cleanup of the memory + * associated with the request. + * + * @param [in] request Request handle to cleanup. + */ +typedef void (*ucp_request_cleanup_callback_t)(void *request); + + +/** + * @ingroup UCP_COMM + * @brief Completion callback for non-blocking sends. + * + * This callback routine is invoked whenever the @ref ucp_tag_send_nb + * "send operation" is completed. It is important to note that the call-back is + * only invoked in a case when the operation cannot be completed in place. + * + * @param [in] request The completed send request. + * @param [in] status Completion status. If the send operation was completed + * successfully UCS_OK is returned. If send operation was + * canceled UCS_ERR_CANCELED is returned. + * Otherwise, an @ref ucs_status_t "error status" is + * returned. + */ +typedef void (*ucp_send_callback_t)(void *request, ucs_status_t status); + + + /** + * @ingroup UCP_COMM + * @brief Callback to process peer failure. + * + * This callback routine is invoked when transport level error detected. + * + * @param [in] arg User argument to be passed to the callback. + * @param [in] ep Endpoint to handle transport level error. Upon return + * from the callback, this @a ep is no longer usable and + * all subsequent operations on this @a ep will fail with + * the error code passed in @a status. + * @param [in] status @ref ucs_status_t "error status". + */ +typedef void (*ucp_err_handler_cb_t)(void *arg, ucp_ep_h ep, ucs_status_t status); + + + /** + * @ingroup UCP_COMM + * @brief UCP endpoint error handling context. + * + * This structure should be initialized in @ref ucp_ep_params_t to handle peer failure + */ +typedef struct ucp_err_handler { + ucp_err_handler_cb_t cb; /**< Error handler callback, if NULL, will + not be called. */ + void *arg; /**< User defined argument associated with + an endpoint, it will be overridden by + @ref ucp_ep_params_t::user_data if both + are set. */ +} ucp_err_handler_t; + + +/** + * @ingroup UCP_WORKER + * @brief A callback for accepting client/server connections on a listener + * @ref ucp_listener_h. + * + * This callback routine is invoked on the server side upon creating a connection + * to a remote client. The user can pass an argument to this callback. + * The user is responsible for releasing the @a ep handle using the + * @ref ucp_ep_destroy "ucp_ep_destroy()" routine. + * + * @param [in] ep Handle to a newly created endpoint which is connected + * to the remote peer which has initiated the connection. + * @param [in] arg User's argument for the callback. + */ +typedef void (*ucp_listener_accept_callback_t)(ucp_ep_h ep, void *arg); + + +/** + * @ingroup UCP_WORKER + * @brief A callback for handling of incoming connection request @a conn_request + * from a client. + * + * This callback routine is invoked on the server side to handle incoming + * connections from remote clients. The user can pass an argument to this + * callback. The @a conn_request handle has to be released, either by @ref + * ucp_ep_create or @ref ucp_listener_reject routine. + * + * @param [in] conn_request Connection request handle. + * @param [in] arg User's argument for the callback. + */ +typedef void +(*ucp_listener_conn_callback_t)(ucp_conn_request_h conn_request, void *arg); + + +/** + * @ingroup UCP_WORKER + * @brief UCP callback to handle the connection request in a client-server + * connection establishment flow. + * + * This structure is used for handling an incoming connection request on + * the listener. Setting this type of handler allows creating an endpoint on + * any other worker and not limited to the worker on which the listener was + * created. + * @note + * - Other than communication progress routines, it is allowed to call all + * other communication routines from the callback in the struct. + * - The callback is thread safe with respect to the worker it is invoked on. + * - It is the user's responsibility to avoid potential dead lock accessing + * different worker. + */ +typedef struct ucp_listener_conn_handler { + ucp_listener_conn_callback_t cb; /**< Connection request callback */ + void *arg; /**< User defined argument for the + callback */ +} ucp_listener_conn_handler_t; + + +/** + * @ingroup UCP_COMM + * @brief Completion callback for non-blocking stream oriented receives. + * + * This callback routine is invoked whenever the @ref ucp_stream_recv_nb + * "receive operation" is completed and the data is ready in the receive buffer. + * + * @param [in] request The completed receive request. + * @param [in] status Completion status. If the send operation was completed + * successfully UCS_OK is returned. Otherwise, + * an @ref ucs_status_t "error status" is returned. + * @param [in] length The size of the received data in bytes, always + * boundary of base datatype size. The value is valid + * only if the status is UCS_OK. + */ +typedef void (*ucp_stream_recv_callback_t)(void *request, ucs_status_t status, + size_t length); + + +/** + * @ingroup UCP_COMM + * @brief Completion callback for non-blocking tag receives. + * + * This callback routine is invoked whenever the @ref ucp_tag_recv_nb + * "receive operation" is completed and the data is ready in the receive buffer. + * + * @param [in] request The completed receive request. + * @param [in] status Completion status. If the send operation was completed + * successfully UCS_OK is returned. If send operation was + * canceled UCS_ERR_CANCELED is returned. If the data can + * not fit into the receive buffer the + * @ref UCS_ERR_MESSAGE_TRUNCATED error code is returned. + * Otherwise, an @ref ucs_status_t "error status" is + * returned. + * @param [in] info @ref ucp_tag_recv_info_t "Completion information" + * The @a info descriptor is Valid only if the status is + * UCS_OK. + */ +typedef void (*ucp_tag_recv_callback_t)(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info); + +/** + * @ingroup UCP_WORKER + * @brief UCP worker wakeup events mask. + * + * The enumeration allows specifying which events are expected on wakeup. Empty + * events are possible for any type of event except for @ref UCP_WAKEUP_TX and + * @ref UCP_WAKEUP_RX. + * + * @note Send completions are reported by POLLIN-like events (see poll man + * page). Since outgoing operations can be initiated at any time, UCP does not + * generate POLLOUT-like events, although it must be noted that outgoing + * operations may be queued depending upon resource availability. + */ +typedef enum ucp_wakeup_event_types { + UCP_WAKEUP_RMA = UCS_BIT(0), /**< Remote memory access send completion */ + UCP_WAKEUP_AMO = UCS_BIT(1), /**< Atomic operation send completion */ + UCP_WAKEUP_TAG_SEND = UCS_BIT(2), /**< Tag send completion */ + UCP_WAKEUP_TAG_RECV = UCS_BIT(3), /**< Tag receive completion */ + UCP_WAKEUP_TX = UCS_BIT(10),/**< This event type will generate an + event on completion of any + outgoing operation (complete or + partial, according to the + underlying protocol) for any type + of transfer (send, atomic, or + RMA). */ + UCP_WAKEUP_RX = UCS_BIT(11),/**< This event type will generate an + event on completion of any receive + operation (complete or partial, + according to the underlying + protocol). */ + UCP_WAKEUP_EDGE = UCS_BIT(16) /**< Use edge-triggered wakeup. The event + file descriptor will be signaled only + for new events, rather than existing + ones. */ +} ucp_wakeup_event_t; + + +/** + * @ingroup UCP_ENDPOINT + * @brief Callback to process incoming Active Message. + * + * When the callback is called, @a flags indicates how @a data should be handled. + * + * @param [in] arg User-defined argument. + * @param [in] data Points to the received data. This data may + * persist after the callback returns and needs + * to be freed with @ref ucp_am_data_release. + * @param [in] length Length of data. + * @param [in] reply_ep If the Active Message is sent with the + * UCP_AM_SEND_REPLY flag, the sending ep + * will be passed in. If not, NULL will be passed. + * @param [in] flags If this flag is set to UCP_CB_PARAM_FLAG_DATA, + * the callback can return UCS_INPROGRESS and + * data will persist after the callback returns. + * + * @return UCS_OK @a data will not persist after the callback returns. + * + * @return UCS_INPROGRESS Can only be returned if flags is set to + * UCP_CB_PARAM_FLAG_DATA. If UCP_INPROGRESS + * is returned, data will persist after the + * callback has returned. To free the memory, + * a pointer to the data must be passed into + * @ref ucp_am_data_release. + * + * @note This callback should be set and released + * by @ref ucp_worker_set_am_handler function. + * + */ +typedef ucs_status_t (*ucp_am_callback_t)(void *arg, void *data, size_t length, + ucp_ep_h reply_ep, unsigned flags); + + +/** + * @ingroup UCP_ENDPOINT + * @brief Tuning parameters for the UCP endpoint. + * + * The structure defines the parameters that are used for the + * UCP endpoint tuning during the UCP ep @ref ucp_ep_create "creation". + */ +typedef struct ucp_ep_params { + /** + * Mask of valid fields in this structure, using bits from + * @ref ucp_ep_params_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** + * Destination address; this field should be set along with its + * corresponding bit in the field_mask - @ref + * UCP_EP_PARAM_FIELD_REMOTE_ADDRESS and must be obtained using @ref + * ucp_worker_get_address. + */ + const ucp_address_t *address; + + /** + * Desired error handling mode, optional parameter. Default value is + * @ref UCP_ERR_HANDLING_MODE_NONE. + */ + ucp_err_handling_mode_t err_mode; + + /** + * Handler to process transport level failure. + */ + ucp_err_handler_t err_handler; + + /** + * User data associated with an endpoint. See @ref ucp_stream_poll_ep_t and + * @ref ucp_err_handler_t + */ + void *user_data; + + /** + * Endpoint flags from @ref ucp_ep_params_flags_field. + * This value is optional. + * If it's not set (along with its corresponding bit in the field_mask - + * @ref UCP_EP_PARAM_FIELD_FLAGS), the @ref ucp_ep_create() routine will + * consider the flags as set to zero. + */ + unsigned flags; + + /** + * Destination address in the form of a sockaddr; this field should be set + * along with its corresponding bit in the field_mask - @ref + * UCP_EP_PARAM_FIELD_SOCK_ADDR and must be obtained from the user, it means + * that this type of the endpoint creation is possible only on client side + * in client-server connection establishment flow. + */ + ucs_sock_addr_t sockaddr; + + /** + * Connection request from client; this field should be set along with its + * corresponding bit in the field_mask - @ref + * UCP_EP_PARAM_FIELD_CONN_REQUEST and must be obtained from @ref + * ucp_listener_conn_callback_t, it means that this type of the endpoint + * creation is possible only on server side in client-server connection + * establishment flow. + */ + ucp_conn_request_h conn_request; + +} ucp_ep_params_t; + + +#endif diff --git a/src/ucp/api/ucp_version.h b/src/ucp/api/ucp_version.h new file mode 100644 index 0000000..62732fe --- /dev/null +++ b/src/ucp/api/ucp_version.h @@ -0,0 +1,22 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + + +/** + * Construct a UCP version identifier from major and minor version numbers. + */ +#define UCP_VERSION(_major, _minor) \ + (((_major) << UCP_VERSION_MAJOR_SHIFT) | \ + ((_minor) << UCP_VERSION_MINOR_SHIFT)) +#define UCP_VERSION_MAJOR_SHIFT 24 +#define UCP_VERSION_MINOR_SHIFT 16 + + +/** + * UCP API version is 1.8 + */ +#define UCP_API_MAJOR 1 +#define UCP_API_MINOR 8 +#define UCP_API_VERSION UCP_VERSION(1, 8) diff --git a/src/ucp/api/ucp_version.h.in b/src/ucp/api/ucp_version.h.in new file mode 100644 index 0000000..dc14a17 --- /dev/null +++ b/src/ucp/api/ucp_version.h.in @@ -0,0 +1,22 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + + +/** + * Construct a UCP version identifier from major and minor version numbers. + */ +#define UCP_VERSION(_major, _minor) \ + (((_major) << UCP_VERSION_MAJOR_SHIFT) | \ + ((_minor) << UCP_VERSION_MINOR_SHIFT)) +#define UCP_VERSION_MAJOR_SHIFT 24 +#define UCP_VERSION_MINOR_SHIFT 16 + + +/** + * UCP API version is @MAJOR_VERSION@.@MINOR_VERSION@ + */ +#define UCP_API_MAJOR @MAJOR_VERSION@ +#define UCP_API_MINOR @MINOR_VERSION@ +#define UCP_API_VERSION UCP_VERSION(@MAJOR_VERSION@, @MINOR_VERSION@) diff --git a/src/ucp/api/ucpx.h b/src/ucp/api/ucpx.h new file mode 100644 index 0000000..c6cc8ed --- /dev/null +++ b/src/ucp/api/ucpx.h @@ -0,0 +1,26 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCPX_H_ +#define UCPX_H_ + +#include +#include + +/* + * This header file is for experimental UCP API. + * APIs defined here are NOT stable and may be removed / changed without notice. + * By default, this header file is not installed. In order to install it, need + * to run ./configure --enable-experimental-api + */ + +BEGIN_C_DECLS + + + +END_C_DECLS + +#endif diff --git a/src/ucp/core/ucp_am.c b/src/ucp/core/ucp_am.c new file mode 100644 index 0000000..a8bdf1f --- /dev/null +++ b/src/ucp/core/ucp_am.c @@ -0,0 +1,717 @@ +/** +* Copyright (C) Los Alamos National Security, LLC. 2019 ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucp_am.h" +#include "ucp_am.inl" + +#include +#include +#include +#include +#include +#include +#include + + +void ucp_am_ep_init(ucp_ep_h ep) +{ + ucp_ep_ext_proto_t *ep_ext = ucp_ep_ext_proto(ep); + + if (ep->worker->context->config.features & UCP_FEATURE_AM) { + ucs_list_head_init(&ep_ext->am.started_ams); + } +} + +void ucp_am_ep_cleanup(ucp_ep_h ep) +{ + ucp_ep_ext_proto_t *ep_ext = ucp_ep_ext_proto(ep); + + if (ep->worker->context->config.features & UCP_FEATURE_AM) { + if (ucs_unlikely(!ucs_list_is_empty(&ep_ext->am.started_ams))) { + ucs_warn("worker : %p not all UCP active messages have been" + "run to completion", ep->worker); + } + } +} + +UCS_PROFILE_FUNC_VOID(ucp_am_data_release, + (worker, data), + ucp_worker_h worker, void *data) +{ + ucp_recv_desc_t *rdesc = (ucp_recv_desc_t *)data - 1; + ucp_recv_desc_t *desc; + + if (rdesc->flags & UCP_RECV_DESC_FLAG_MALLOC) { + ucs_free(rdesc); + return; + } else if (rdesc->flags & UCP_RECV_DESC_FLAG_AM_HDR) { + desc = rdesc; + rdesc = UCS_PTR_BYTE_OFFSET(rdesc, -sizeof(ucp_am_hdr_t)); + *rdesc = *desc; + } else if (rdesc->flags & UCP_RECV_DESC_FLAG_AM_REPLY) { + desc = rdesc; + rdesc = UCS_PTR_BYTE_OFFSET(rdesc, -sizeof(ucp_am_reply_hdr_t)); + *rdesc = *desc; + } + ucp_recv_desc_release(rdesc); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_worker_set_am_handler, + (worker, id, cb, arg, flags), + ucp_worker_h worker, uint16_t id, + ucp_am_callback_t cb, void *arg, + uint32_t flags) +{ + size_t num_entries; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_AM, + return UCS_ERR_INVALID_PARAM); + + if (id >= worker->am_cb_array_len) { + num_entries = ucs_align_up_pow2(id + 1, UCP_AM_CB_BLOCK_SIZE); + worker->am_cbs = ucs_realloc(worker->am_cbs, num_entries * + sizeof(ucp_worker_am_entry_t), + "UCP AM callback array"); + memset(worker->am_cbs + worker->am_cb_array_len, + 0, (num_entries - worker->am_cb_array_len) + * sizeof(ucp_worker_am_entry_t)); + + worker->am_cb_array_len = num_entries; + } + + worker->am_cbs[id].cb = cb; + worker->am_cbs[id].context = arg; + worker->am_cbs[id].flags = flags; + + return UCS_OK; +} + +static size_t +ucp_am_bcopy_pack_args_single(void *dest, void *arg) +{ + ucp_am_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + + ucs_assert(req->send.state.dt.offset == 0); + + hdr->am_hdr.am_id = req->send.am.am_id; + hdr->am_hdr.length = req->send.length; + hdr->am_hdr.flags = req->send.am.flags; + + length = ucp_dt_pack(req->send.ep->worker, req->send.datatype, + UCS_MEMORY_TYPE_HOST, hdr + 1, req->send.buffer, + &req->send.state.dt, req->send.length); + ucs_assert(length == req->send.length); + + return sizeof(*hdr) + length; +} + +static size_t +ucp_am_bcopy_pack_args_single_reply(void *dest, void *arg) +{ + ucp_am_reply_hdr_t *reply_hdr = dest; + ucp_request_t *req = arg; + size_t length; + size_t hdr_size; + + ucs_assert(req->send.state.dt.offset == 0); + + reply_hdr->super.am_hdr.am_id = req->send.am.am_id; + reply_hdr->super.am_hdr.length = req->send.length; + reply_hdr->super.am_hdr.flags = req->send.am.flags; + reply_hdr->ep_ptr = ucp_request_get_dest_ep_ptr(req); + + length = ucp_dt_pack(req->send.ep->worker, req->send.datatype, + UCS_MEMORY_TYPE_HOST, reply_hdr + 1, + req->send.buffer, + &req->send.state.dt, req->send.length); + hdr_size = sizeof(*reply_hdr); + ucs_assert(length == req->send.length); + + return hdr_size + length; +} + +static size_t +ucp_am_bcopy_pack_args_first(void *dest, void *arg) +{ + ucp_am_long_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + + length = ucp_ep_get_max_bcopy(req->send.ep, req->send.lane) - + sizeof(*hdr); + hdr->total_size = req->send.length; + hdr->am_id = req->send.am.am_id; + hdr->msg_id = req->send.am.message_id; + hdr->ep = ucp_request_get_dest_ep_ptr(req); + hdr->offset = req->send.state.dt.offset; + + ucs_assert(req->send.state.dt.offset == 0); + ucs_assert(req->send.length > length); + + return sizeof(*hdr) + ucp_dt_pack(req->send.ep->worker, + req->send.datatype, + UCS_MEMORY_TYPE_HOST, + hdr + 1, req->send.buffer, + &req->send.state.dt, length); +} + +static size_t +ucp_am_bcopy_pack_args_mid(void *dest, void *arg) +{ + ucp_am_long_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + size_t max_bcopy; + + max_bcopy = ucp_ep_get_max_bcopy(req->send.ep, req->send.lane); + length = ucs_min(max_bcopy - sizeof(*hdr), + req->send.length - req->send.state.dt.offset); + + hdr->msg_id = req->send.am.message_id; + hdr->offset = req->send.state.dt.offset; + hdr->ep = ucp_request_get_dest_ep_ptr(req); + hdr->am_id = req->send.am.am_id; + hdr->total_size = req->send.length; + + return sizeof(*hdr) + ucp_dt_pack(req->send.ep->worker, + req->send.datatype, + UCS_MEMORY_TYPE_HOST, + hdr + 1, req->send.buffer, + &req->send.state.dt, length); +} + +static ucs_status_t ucp_am_send_short(ucp_ep_h ep, uint16_t id, + const void *payload, size_t length) +{ + uct_ep_h am_ep = ucp_ep_get_am_uct_ep(ep); + ucp_am_hdr_t hdr; + + hdr.am_hdr.am_id = id; + hdr.am_hdr.length = length; + hdr.am_hdr.flags = 0; + ucs_assert(sizeof(ucp_am_hdr_t) == sizeof(uint64_t)); + + return uct_ep_am_short(am_ep, UCP_AM_ID_SINGLE, hdr.u64, + (void *)payload, length); +} + +static ucs_status_t ucp_am_contig_short(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucs_status_t status = ucp_am_send_short(req->send.ep, + req->send.am.am_id, + req->send.buffer, + req->send.length); + if (ucs_likely(status == UCS_OK)) { + ucp_request_complete_send(req, UCS_OK); + } + + return status; +} + +static ucs_status_t ucp_am_bcopy_single(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucs_status_t status; + + status = ucp_do_am_bcopy_single(self, UCP_AM_ID_SINGLE, + ucp_am_bcopy_pack_args_single); + if (status == UCS_OK) { + ucp_request_send_generic_dt_finish(req); + ucp_request_complete_send(req, UCS_OK); + } + + return status; +} + +static ucs_status_t ucp_am_bcopy_single_reply(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucs_status_t status; + + status = ucp_do_am_bcopy_single(self, UCP_AM_ID_SINGLE_REPLY, + ucp_am_bcopy_pack_args_single_reply); + if (status == UCS_OK) { + ucp_request_send_generic_dt_finish(req); + ucp_request_complete_send(req, UCS_OK); + } + + return status; +} + +static ucs_status_t ucp_am_bcopy_multi(uct_pending_req_t *self) +{ + ucs_status_t status = ucp_do_am_bcopy_multi(self, UCP_AM_ID_MULTI, + UCP_AM_ID_MULTI, + ucp_am_bcopy_pack_args_first, + ucp_am_bcopy_pack_args_mid, 0); + ucp_request_t *req; + + if (status == UCS_OK) { + req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_request_send_generic_dt_finish(req); + ucp_request_complete_send(req, UCS_OK); + } else if (status == UCP_STATUS_PENDING_SWITCH) { + status = UCS_OK; + } + + return status; +} + +static ucs_status_t ucp_am_bcopy_multi_reply(uct_pending_req_t *self) +{ + ucs_status_t status = ucp_do_am_bcopy_multi(self, UCP_AM_ID_MULTI_REPLY, + UCP_AM_ID_MULTI_REPLY, + ucp_am_bcopy_pack_args_first, + ucp_am_bcopy_pack_args_mid, 0); + ucp_request_t *req; + + if (status == UCS_OK) { + req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_request_send_generic_dt_finish(req); + ucp_request_complete_send(req, UCS_OK); + } else if (status == UCP_STATUS_PENDING_SWITCH) { + status = UCS_OK; + } + + return status; +} + +static ucs_status_t ucp_am_zcopy_single(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_am_hdr_t hdr; + + hdr.am_hdr.am_id = req->send.am.am_id; + hdr.am_hdr.length = req->send.length; + hdr.am_hdr.flags = req->send.am.flags; + + return ucp_do_am_zcopy_single(self, UCP_AM_ID_SINGLE, &hdr, + sizeof(hdr), ucp_proto_am_zcopy_req_complete); +} + +static ucs_status_t ucp_am_zcopy_single_reply(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_am_reply_hdr_t reply_hdr; + + reply_hdr.super.am_hdr.am_id = req->send.am.am_id; + reply_hdr.super.am_hdr.length = req->send.length; + reply_hdr.super.am_hdr.flags = req->send.am.flags; + reply_hdr.ep_ptr = ucp_request_get_dest_ep_ptr(req); + + return ucp_do_am_zcopy_single(self, UCP_AM_ID_SINGLE_REPLY, + &reply_hdr, sizeof(reply_hdr), + ucp_proto_am_zcopy_req_complete); +} + +static ucs_status_t ucp_am_zcopy_multi(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_am_long_hdr_t hdr; + + hdr.ep = ucp_request_get_dest_ep_ptr(req); + hdr.msg_id = req->send.am.message_id; + hdr.offset = req->send.state.dt.offset; + hdr.am_id = req->send.am.am_id; + hdr.total_size = req->send.length; + + return ucp_do_am_zcopy_multi(self, UCP_AM_ID_MULTI, + UCP_AM_ID_MULTI, + &hdr, sizeof(hdr), + &hdr, sizeof(hdr), + ucp_proto_am_zcopy_req_complete, 0); +} + +static ucs_status_t ucp_am_zcopy_multi_reply(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_am_long_hdr_t hdr; + + hdr.ep = ucp_request_get_dest_ep_ptr(req); + hdr.msg_id = req->send.am.message_id; + hdr.offset = req->send.state.dt.offset; + hdr.am_id = req->send.am.am_id; + hdr.total_size = req->send.length; + + return ucp_do_am_zcopy_multi(self, UCP_AM_ID_MULTI_REPLY, + UCP_AM_ID_MULTI_REPLY, + &hdr, sizeof(hdr), + &hdr, sizeof(hdr), + ucp_proto_am_zcopy_req_complete, 0); +} + +static void ucp_am_send_req_init(ucp_request_t *req, ucp_ep_h ep, + const void *buffer, uintptr_t datatype, + size_t count, uint16_t flags, + uint16_t am_id) +{ + req->flags = UCP_REQUEST_FLAG_SEND_AM; + req->send.ep = ep; + req->send.am.am_id = am_id; + req->send.am.flags = flags; + req->send.buffer = (void *) buffer; + req->send.datatype = datatype; + req->send.mem_type = UCS_MEMORY_TYPE_HOST; + req->send.lane = ep->am_lane; + + ucp_request_send_state_init(req, datatype, count); + req->send.length = ucp_dt_length(req->send.datatype, count, + req->send.buffer, + &req->send.state.dt); +} + +static UCS_F_ALWAYS_INLINE ucs_status_ptr_t +ucp_am_send_req(ucp_request_t *req, size_t count, + const ucp_ep_msg_config_t *msg_config, + ucp_send_callback_t cb, const ucp_request_send_proto_t *proto) +{ + + size_t zcopy_thresh = ucp_proto_get_zcopy_threshold(req, msg_config, + count, SIZE_MAX); + ssize_t max_short = ucp_am_get_short_max(req, msg_config); + ucs_status_t status; + + status = ucp_request_send_start(req, max_short, + zcopy_thresh, SIZE_MAX, + count, msg_config, + proto); + if (status != UCS_OK) { + return UCS_STATUS_PTR(status); + } + + /* Start the request. + * If it is completed immediately, release the request and return the status. + * Otherwise, return the request. + */ + status = ucp_request_send(req, 0); + if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { + ucs_trace_req("releasing send request %p, returning status %s", req, + ucs_status_string(status)); + ucp_request_put(req); + return UCS_STATUS_PTR(status); + } + + ucp_request_set_callback(req, send.cb, cb); + + return req + 1; +} + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_am_send_nb, + (ep, id, payload, count, datatype, cb, flags), + ucp_ep_h ep, uint16_t id, const void *payload, + size_t count, uintptr_t datatype, + ucp_send_callback_t cb, unsigned flags) +{ + ucs_status_t status; + ucs_status_ptr_t ret; + ucp_request_t *req; + size_t length; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(ep->worker->context, UCP_FEATURE_AM, + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); + + if (ucs_unlikely((flags != 0) && !(flags & UCP_AM_SEND_REPLY))) { + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM); + } + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + if (ucs_likely(!(flags & UCP_AM_SEND_REPLY)) && + (ucs_likely(UCP_DT_IS_CONTIG(datatype)))) { + length = ucp_contig_dt_length(datatype, count); + + if (ucs_likely((ssize_t)length <= ucp_ep_config(ep)->am.max_short)) { + status = ucp_am_send_short(ep, id, payload, length); + if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { + UCP_EP_STAT_TAG_OP(ep, EAGER); + ret = UCS_STATUS_PTR(status); + goto out; + } + } + } + + req = ucp_request_get(ep->worker); + if (ucs_unlikely(req == NULL)) { + ret = UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + goto out; + } + + ucp_am_send_req_init(req, ep, payload, datatype, count, flags, id); + status = ucp_ep_resolve_dest_ep_ptr(ep, ep->am_lane); + if (ucs_unlikely(status != UCS_OK)) { + ret = UCS_STATUS_PTR(status); + goto out; + } + + if (flags & UCP_AM_SEND_REPLY) { + ret = ucp_am_send_req(req, count, &ucp_ep_config(ep)->am, cb, + ucp_ep_config(ep)->am_u.reply_proto); + } else { + ret = ucp_am_send_req(req, count, &ucp_ep_config(ep)->am, cb, + ucp_ep_config(ep)->am_u.proto); + } + +out: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return ret; +} + +static ucs_status_t +ucp_am_handler_common(ucp_worker_h worker, void *hdr_end, + size_t hdr_size, size_t args_length, + ucp_ep_h reply_ep, uint16_t am_id, + uint16_t desc_flag, unsigned am_flags) +{ + ucp_recv_desc_t *desc = NULL; + uint16_t recv_flags = 0; + ucs_status_t status; + + if (ucs_unlikely((am_id >= worker->am_cb_array_len) || + (worker->am_cbs[am_id].cb == NULL))) { + ucs_warn("UCP Active Message was received with id : %u, but there" + "is no registered callback for that id", am_id); + return UCS_OK; + } + + if (ucs_unlikely(am_flags & UCT_CB_PARAM_FLAG_DESC)) { + recv_flags |= desc_flag; + } + + /* TODO find way to do this without rewriting header if + * UCT_CB_PARAM_FLAG_DESC flag is set + */ + status = ucp_recv_desc_init(worker, hdr_end, hdr_size + args_length, + 0, am_flags, 0, + recv_flags, 0, &desc); + if (ucs_unlikely(UCS_STATUS_IS_ERR(status))) { + ucs_error("worker %p could not allocate descriptor for active message" + "on callback : %u", worker, am_id); + return status; + } + + ucs_assert(desc != NULL); + + status = worker->am_cbs[am_id].cb(worker->am_cbs[am_id].context, + desc + 1, + args_length, + reply_ep, + UCP_CB_PARAM_FLAG_DATA); + if (ucs_unlikely(am_flags & UCT_CB_PARAM_FLAG_DESC)) { + return status; + } + + if (status == UCS_OK) { + ucp_recv_desc_release(desc); + return UCS_OK; + } else if (status == UCS_INPROGRESS) { + return UCS_OK; + } + + return status; +} + +static ucs_status_t +ucp_am_handler_reply(void *am_arg, void *am_data, size_t am_length, + unsigned am_flags) +{ + ucp_am_reply_hdr_t *hdr = (ucp_am_reply_hdr_t *)am_data; + ucp_worker_h worker = (ucp_worker_h)am_arg; + uint16_t am_id = hdr->super.am_hdr.am_id; + ucp_ep_h reply_ep; + + reply_ep = ucp_worker_get_ep_by_ptr(worker, hdr->ep_ptr); + + return ucp_am_handler_common(worker, hdr + 1, sizeof(*hdr), + am_length - sizeof(*hdr), reply_ep, + am_id, UCP_RECV_DESC_FLAG_AM_REPLY, + am_flags); +} + +static ucs_status_t +ucp_am_handler(void *am_arg, void *am_data, size_t am_length, + unsigned am_flags) +{ + ucp_worker_h worker = (ucp_worker_h)am_arg; + ucp_am_hdr_t *hdr = (ucp_am_hdr_t *)am_data; + uint16_t am_id = hdr->am_hdr.am_id; + + return ucp_am_handler_common(worker, hdr + 1, sizeof(*hdr), + am_length - sizeof(*hdr), NULL, + am_id, UCP_RECV_DESC_FLAG_AM_HDR, + am_flags); +} + +static ucp_am_unfinished_t * +ucp_am_find_unfinished(ucp_worker_h worker, ucp_ep_h ep, + ucp_ep_ext_proto_t *ep_ext, + ucp_am_long_hdr_t *hdr, size_t am_length) +{ + ucp_am_unfinished_t *unfinished; + /* TODO make this hash table for faster lookup */ + ucs_list_for_each(unfinished, &ep_ext->am.started_ams, list) { + if (unfinished->msg_id == hdr->msg_id) { + return unfinished; + } + } + + return NULL; +} + +static ucs_status_t +ucp_am_handle_unfinished(ucp_worker_h worker, + ucp_am_unfinished_t *unfinished, + ucp_am_long_hdr_t *long_hdr, + size_t am_length, ucp_ep_h reply_ep) +{ + uint16_t am_id; + ucs_status_t status; + + memcpy(UCS_PTR_BYTE_OFFSET(unfinished->all_data + 1, long_hdr->offset), + long_hdr + 1, am_length - sizeof(*long_hdr)); + unfinished->left -= am_length - sizeof(*long_hdr); + if (unfinished->left == 0) { + am_id = long_hdr->am_id; + status = worker->am_cbs[am_id].cb(worker->am_cbs[am_id].context, + unfinished->all_data + 1, + long_hdr->total_size, + reply_ep, + UCP_CB_PARAM_FLAG_DATA); + + if (status != UCS_INPROGRESS) { + ucs_free(unfinished->all_data); + } + + ucs_list_del(&unfinished->list); + ucs_free(unfinished); + } + + return UCS_OK; +} + +static ucs_status_t +ucp_am_long_handler_common(void *am_arg, void *am_data, size_t am_length, + unsigned am_flags, ucp_ep_h reply_ep) +{ + ucp_worker_h worker = (ucp_worker_h)am_arg; + ucp_am_long_hdr_t *long_hdr = (ucp_am_long_hdr_t *)am_data; + ucp_ep_h ep = ucp_worker_get_ep_by_ptr(worker, + long_hdr->ep); + ucp_ep_ext_proto_t *ep_ext = ucp_ep_ext_proto(ep); + ucp_recv_desc_t *all_data; + size_t left; + ucp_am_unfinished_t *unfinished; + + if (ucs_unlikely((long_hdr->am_id >= worker->am_cb_array_len) || + (worker->am_cbs[long_hdr->am_id].cb == NULL))) { + ucs_warn("UCP Active Message was received with id : %u, but there" + "is no registered callback for that id", long_hdr->am_id); + return UCS_OK; + } + + /* if there are multiple messages, + * we first check to see if any of the other messages + * have arrived. If any messages have arrived, + * we copy ourselves into the buffer and leave + */ + unfinished = ucp_am_find_unfinished(worker, ep, ep_ext, long_hdr, am_length); + + if (unfinished) { + return ucp_am_handle_unfinished(worker, unfinished, + long_hdr, am_length, + reply_ep); + } + + /* If I am first, I make the buffer for everyone to go into, + * copy myself in, and put myself on the list so people can find me + */ + all_data = ucs_malloc(long_hdr->total_size + sizeof(ucp_recv_desc_t), + "ucp recv desc for long AM"); + if (ucs_unlikely(all_data == NULL)) { + return UCS_ERR_NO_MEMORY; + } + + all_data->flags = UCP_RECV_DESC_FLAG_MALLOC; + + left = long_hdr->total_size - (am_length - + sizeof(ucp_am_long_hdr_t)); + + memcpy(UCS_PTR_BYTE_OFFSET(all_data + 1, long_hdr->offset), + long_hdr + 1, am_length - sizeof(ucp_am_long_hdr_t)); + + /* Can't use a desc for this because of the buffer */ + unfinished = ucs_malloc(sizeof(ucp_am_unfinished_t), + "unfinished UCP AM"); + if (ucs_unlikely(unfinished == NULL)) { + ucs_free(all_data); + return UCS_ERR_NO_MEMORY; + } + + unfinished->all_data = all_data; + unfinished->left = left; + unfinished->msg_id = long_hdr->msg_id; + + ucs_list_add_head(&ep_ext->am.started_ams, &unfinished->list); + + return UCS_OK; +} + +static ucs_status_t +ucp_am_long_handler_reply(void *am_arg, void *am_data, size_t am_length, + unsigned am_flags) +{ + ucp_worker_h worker = (ucp_worker_h)am_arg; + ucp_am_long_hdr_t *long_hdr = (ucp_am_long_hdr_t *)am_data; + ucp_ep_h ep = ucp_worker_get_ep_by_ptr(worker, + long_hdr->ep); + + return ucp_am_long_handler_common(am_arg, am_data, am_length, am_flags, + ep); +} + +static ucs_status_t +ucp_am_long_handler(void *am_arg, void *am_data, size_t am_length, + unsigned am_flags) +{ + return ucp_am_long_handler_common(am_arg, am_data, am_length, am_flags, + NULL); +} + +UCP_DEFINE_AM(UCP_FEATURE_AM, UCP_AM_ID_SINGLE, + ucp_am_handler, NULL, 0); +UCP_DEFINE_AM(UCP_FEATURE_AM, UCP_AM_ID_MULTI, + ucp_am_long_handler, NULL, 0); +UCP_DEFINE_AM(UCP_FEATURE_AM, UCP_AM_ID_SINGLE_REPLY, + ucp_am_handler_reply, NULL, 0); +UCP_DEFINE_AM(UCP_FEATURE_AM, UCP_AM_ID_MULTI_REPLY, + ucp_am_long_handler_reply, NULL, 0); + +const ucp_request_send_proto_t ucp_am_proto = { + .contig_short = ucp_am_contig_short, + .bcopy_single = ucp_am_bcopy_single, + .bcopy_multi = ucp_am_bcopy_multi, + .zcopy_single = ucp_am_zcopy_single, + .zcopy_multi = ucp_am_zcopy_multi, + .zcopy_completion = ucp_proto_am_zcopy_completion, + .only_hdr_size = sizeof(ucp_am_hdr_t) +}; + +const ucp_request_send_proto_t ucp_am_reply_proto = { + .contig_short = NULL, + .bcopy_single = ucp_am_bcopy_single_reply, + .bcopy_multi = ucp_am_bcopy_multi_reply, + .zcopy_single = ucp_am_zcopy_single_reply, + .zcopy_multi = ucp_am_zcopy_multi_reply, + .zcopy_completion = ucp_proto_am_zcopy_completion, + .only_hdr_size = sizeof(ucp_am_reply_hdr_t) +}; diff --git a/src/ucp/core/ucp_am.h b/src/ucp/core/ucp_am.h new file mode 100644 index 0000000..2eff288 --- /dev/null +++ b/src/ucp/core/ucp_am.h @@ -0,0 +1,53 @@ +/** + * Copyright (C) Los Alamos National Security, LLC. 2019 ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "ucp_ep.h" + +#define UCP_AM_CB_BLOCK_SIZE 16 + + +typedef union { + struct { + uint32_t length; /* length of an AM. Ideally it would be size_t + * but we want to keep this struct at 64 bits + * to fit in uct_ep_am_short header. MAX_SHORT + * or b/zcopy MTU + * should be much smaller than this anyway */ + uint16_t am_id; /* Index into callback array */ + uint16_t flags; /* currently unused in this header + because replies require long header + defined by @ref ucp_am_send_flags */ + } am_hdr; + + uint64_t u64; /* This is used to ensure the size of + the header is 64 bytes and aligned */ +} ucp_am_hdr_t; + +typedef struct { + ucp_am_hdr_t super; + uintptr_t ep_ptr; +} UCS_S_PACKED ucp_am_reply_hdr_t; + +typedef struct { + size_t total_size; /* length of buffer needed for all data */ + uint64_t msg_id; /* method to match parts of the same AM */ + uintptr_t ep; /* end point ptr, used for maintaing list + of arrivals */ + size_t offset; /* how far this message goes into large + the entire AM buffer */ + uint16_t am_id; /* index into callback array */ +} UCS_S_PACKED ucp_am_long_hdr_t; + +typedef struct { + ucs_list_link_t list; /* entry into list of unfinished AM's */ + ucp_recv_desc_t *all_data; /* buffer for all parts of the AM */ + uint64_t msg_id; /* way to match up all parts of AM */ + size_t left; +} ucp_am_unfinished_t; + +void ucp_am_ep_init(ucp_ep_h ep); + +void ucp_am_ep_cleanup(ucp_ep_h ep); diff --git a/src/ucp/core/ucp_am.inl b/src/ucp/core/ucp_am.inl new file mode 100644 index 0000000..f271654 --- /dev/null +++ b/src/ucp/core/ucp_am.inl @@ -0,0 +1,19 @@ +/** +* Copyright (C) Los Alamos National Security, LLC. 2019 ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include + +static UCS_F_ALWAYS_INLINE ssize_t +ucp_am_get_short_max(const ucp_request_t *req, + const ucp_ep_msg_config_t *msg_config) +{ + return (!UCP_DT_IS_CONTIG(req->send.datatype) || + (req->flags & UCP_REQUEST_FLAG_SYNC) || + (!UCP_MEM_IS_ACCESSIBLE_FROM_CPU(req->send.mem_type))) || + ((req->flags & UCP_REQUEST_FLAG_SEND_AM) && + (req->send.am.flags & UCP_AM_SEND_REPLY)) ? + -1 : msg_config->max_short; +} diff --git a/src/ucp/core/ucp_context.c b/src/ucp/core/ucp_context.c new file mode 100644 index 0000000..a0cc64d --- /dev/null +++ b/src/ucp/core/ucp_context.c @@ -0,0 +1,1696 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. + * Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucp_context.h" +#include "ucp_request.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define UCP_RSC_CONFIG_ALL "all" + +ucp_am_handler_t ucp_am_handlers[UCP_AM_ID_LAST] = {{0, NULL, NULL}}; + +static const char *ucp_atomic_modes[] = { + [UCP_ATOMIC_MODE_CPU] = "cpu", + [UCP_ATOMIC_MODE_DEVICE] = "device", + [UCP_ATOMIC_MODE_GUESS] = "guess", + [UCP_ATOMIC_MODE_LAST] = NULL, +}; + +static const char * ucp_device_type_names[] = { + [UCT_DEVICE_TYPE_NET] = "network", + [UCT_DEVICE_TYPE_SHM] = "intra-node", + [UCT_DEVICE_TYPE_ACC] = "accelerator", + [UCT_DEVICE_TYPE_SELF] = "loopback", +}; + +static const char * ucp_rndv_modes[] = { + [UCP_RNDV_MODE_GET_ZCOPY] = "get_zcopy", + [UCP_RNDV_MODE_PUT_ZCOPY] = "put_zcopy", + [UCP_RNDV_MODE_AUTO] = "auto", + [UCP_RNDV_MODE_LAST] = NULL, +}; + +static ucs_config_field_t ucp_config_table[] = { + {"NET_DEVICES", UCP_RSC_CONFIG_ALL, + "Specifies which network device(s) to use. The order is not meaningful.\n" + "\"all\" would use all available devices.", + ucs_offsetof(ucp_config_t, devices[UCT_DEVICE_TYPE_NET]), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"SHM_DEVICES", UCP_RSC_CONFIG_ALL, + "Specifies which intra-node device(s) to use. The order is not meaningful.\n" + "\"all\" would use all available devices.", + ucs_offsetof(ucp_config_t, devices[UCT_DEVICE_TYPE_SHM]), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"ACC_DEVICES", UCP_RSC_CONFIG_ALL, + "Specifies which accelerator device(s) to use. The order is not meaningful.\n" + "\"all\" would use all available devices.", + ucs_offsetof(ucp_config_t, devices[UCT_DEVICE_TYPE_ACC]), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"SELF_DEVICES", UCP_RSC_CONFIG_ALL, + "Specifies which loop-back device(s) to use. The order is not meaningful.\n" + "\"all\" would use all available devices.", + ucs_offsetof(ucp_config_t, devices[UCT_DEVICE_TYPE_SELF]), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"TLS", UCP_RSC_CONFIG_ALL, + "Comma-separated list of transports to use. The order is not meaningful.\n" + " - all : use all the available transports.\n" + " - sm/shm : all shared memory transports (mm, cma, knem).\n" + " - mm : shared memory transports - only memory mappers.\n" + " - ugni : ugni_smsg and ugni_rdma (uses ugni_udt for bootstrap).\n" + " - ib : all infiniband transports (rc/rc_mlx5, ud/ud_mlx5, dc_mlx5).\n" + " - rc_v : rc verbs (uses ud for bootstrap).\n" + " - rc_x : rc with accelerated verbs (uses ud_mlx5 for bootstrap).\n" + " - rc : rc_v and rc_x (preferably if available).\n" + " - ud_v : ud verbs.\n" + " - ud_x : ud with accelerated verbs.\n" + " - ud : ud_v and ud_x (preferably if available).\n" + " - dc/dc_x : dc with accelerated verbs.\n" + " - tcp : sockets over TCP/IP.\n" + " - cuda : CUDA (NVIDIA GPU) memory support.\n" + " - rocm : ROCm (AMD GPU) memory support.\n" + " Using a \\ prefix before a transport name treats it as an explicit transport name\n" + " and disables aliasing.\n", + ucs_offsetof(ucp_config_t, tls), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"ALLOC_PRIO", "md:sysv,md:posix,huge,thp,md:*,mmap,heap", + "Priority of memory allocation methods. Each item in the list can be either\n" + "an allocation method (huge, thp, mmap, libc) or md: which means to use the\n" + "specified memory domain for allocation. NAME can be either a UCT component\n" + "name, or a wildcard - '*' - which is equivalent to all UCT components.", + ucs_offsetof(ucp_config_t, alloc_prio), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"SOCKADDR_TLS_PRIORITY", "rdmacm,sockcm", + "Priority of sockaddr transports for client/server connection establishment.\n" + "The '*' wildcard expands to all the available sockaddr transports.", + ucs_offsetof(ucp_config_t, sockaddr_cm_tls), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"SOCKADDR_AUX_TLS", "ud", + "Transports to use for exchanging additional address information while\n" + "establishing client/server connection. ", + ucs_offsetof(ucp_config_t, sockaddr_aux_tls), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"WARN_INVALID_CONFIG", "y", + "Issue a warning in case of invalid device and/or transport configuration.", + ucs_offsetof(ucp_config_t, warn_invalid_config), UCS_CONFIG_TYPE_BOOL}, + + {"BCOPY_THRESH", "0", + "Threshold for switching from short to bcopy protocol", + ucs_offsetof(ucp_config_t, ctx.bcopy_thresh), UCS_CONFIG_TYPE_MEMUNITS}, + + {"RNDV_THRESH", "auto", + "Threshold for switching from eager to rendezvous protocol", + ucs_offsetof(ucp_config_t, ctx.rndv_thresh), UCS_CONFIG_TYPE_MEMUNITS}, + + {"RNDV_SEND_NBR_THRESH", "256k", + "Threshold for switching from eager to rendezvous protocol in ucp_tag_send_nbr().\n" + "Relevant only if UCX_RNDV_THRESH is set to \"auto\".", + ucs_offsetof(ucp_config_t, ctx.rndv_send_nbr_thresh), UCS_CONFIG_TYPE_MEMUNITS}, + + {"RNDV_THRESH_FALLBACK", "inf", + "Message size to start using the rendezvous protocol in case the calculated threshold " + "is zero or negative", + ucs_offsetof(ucp_config_t, ctx.rndv_thresh_fallback), UCS_CONFIG_TYPE_MEMUNITS}, + + {"RNDV_PERF_DIFF", "1", + "The percentage allowed for performance difference between rendezvous and " + "the eager_zcopy protocol", + ucs_offsetof(ucp_config_t, ctx.rndv_perf_diff), UCS_CONFIG_TYPE_DOUBLE}, + + {"MAX_EAGER_LANES", NULL, "", + ucs_offsetof(ucp_config_t, ctx.max_eager_lanes), UCS_CONFIG_TYPE_UINT}, + + {"MAX_EAGER_RAILS", "1", + "Maximal number of devices on which an eager operation may be executed in parallel", + ucs_offsetof(ucp_config_t, ctx.max_eager_lanes), UCS_CONFIG_TYPE_UINT}, + + {"MAX_RNDV_LANES", NULL,"", + ucs_offsetof(ucp_config_t, ctx.max_rndv_lanes), UCS_CONFIG_TYPE_UINT}, + + {"MAX_RNDV_RAILS", "2", + "Maximal number of devices on which a rendezvous operation may be executed in parallel", + ucs_offsetof(ucp_config_t, ctx.max_rndv_lanes), UCS_CONFIG_TYPE_UINT}, + + {"RNDV_SCHEME", "auto", + "Communication scheme in RNDV protocol.\n" + " get_zcopy - use get_zcopy scheme in RNDV protocol.\n" + " put_zcopy - use put_zcopy scheme in RNDV protocol.\n" + " auto - runtime automatically chooses optimal scheme to use.\n", + ucs_offsetof(ucp_config_t, ctx.rndv_mode), UCS_CONFIG_TYPE_ENUM(ucp_rndv_modes)}, + + {"ZCOPY_THRESH", "auto", + "Threshold for switching from buffer copy to zero copy protocol", + ucs_offsetof(ucp_config_t, ctx.zcopy_thresh), UCS_CONFIG_TYPE_MEMUNITS}, + + {"BCOPY_BW", "auto", + "Estimation of buffer copy bandwidth", + ucs_offsetof(ucp_config_t, ctx.bcopy_bw), UCS_CONFIG_TYPE_BW}, + + {"ATOMIC_MODE", "guess", + "Atomic operations synchronization mode.\n" + " cpu - atomic operations are consistent with respect to the CPU.\n" + " device - atomic operations are performed on one of the transport devices,\n" + " and there is guarantee of consistency with respect to the CPU." + " guess - atomic operations mode is configured based on underlying\n" + " transport capabilities. If one of active transports supports\n" + " the DEVICE atomic mode, the DEVICE mode is selected.\n" + " Otherwise the CPU mode is selected.", + ucs_offsetof(ucp_config_t, ctx.atomic_mode), UCS_CONFIG_TYPE_ENUM(ucp_atomic_modes)}, + + {"MAX_WORKER_NAME", UCS_PP_MAKE_STRING(UCP_WORKER_NAME_MAX), + "Maximal length of worker name. " +#if ENABLE_DEBUG_DATA + "Sent to remote peer as part of worker address." +#else + "Not sent to remote peer per build configuration." +#endif + , + ucs_offsetof(ucp_config_t, ctx.max_worker_name), UCS_CONFIG_TYPE_UINT}, + + {"USE_MT_MUTEX", "n", "Use mutex for multithreading support in UCP.\n" + "n - Not use mutex for multithreading support in UCP (use spinlock by default).\n" + "y - Use mutex for multithreading support in UCP.\n", + ucs_offsetof(ucp_config_t, ctx.use_mt_mutex), UCS_CONFIG_TYPE_BOOL}, + + {"ADAPTIVE_PROGRESS", "y", + "Enable adaptive progress mechanism, which turns on polling only on active\n" + "transport interfaces.", + ucs_offsetof(ucp_config_t, ctx.adaptive_progress), UCS_CONFIG_TYPE_BOOL}, + + {"SEG_SIZE", "8192", + "Size of a segment in the worker preregistered memory pool.", + ucs_offsetof(ucp_config_t, ctx.seg_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"TM_THRESH", "1024", /* TODO: calculate automatically */ + "Threshold for using tag matching offload capabilities.\n" + "Smaller buffers will not be posted to the transport.", + ucs_offsetof(ucp_config_t, ctx.tm_thresh), UCS_CONFIG_TYPE_MEMUNITS}, + + {"TM_MAX_BB_SIZE", "1024", /* TODO: calculate automatically */ + "Maximal size for posting \"bounce buffer\" (UCX internal preregistered memory) for\n" + "tag offload receives. When message arrives, it is copied into the user buffer (similar\n" + "to eager protocol). The size values has to be equal or less than segment size.\n" + "Also the value has to be bigger than UCX_TM_THRESH to take an effect." , + ucs_offsetof(ucp_config_t, ctx.tm_max_bb_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"TM_FORCE_THRESH", "8192", /* TODO: calculate automatically */ + "Threshold for forcing tag matching offload mode. Every tag receive operation\n" + "with buffer bigger than this threshold would force offloading of all uncompleted\n" + "non-offloaded receive operations to the transport (e. g. operations with\n" + "buffers below the UCX_TM_THRESH value). Offloading may be unsuccessful in certain\n" + "cases (non-contig buffer, or sender wildcard).", + ucs_offsetof(ucp_config_t, ctx.tm_force_thresh), UCS_CONFIG_TYPE_MEMUNITS}, + + {"TM_SW_RNDV", "n", + "Use software rendezvous protocol with tag offload. If enabled, tag offload\n" + "mode will be used for messages sent with eager protocol only.", + ucs_offsetof(ucp_config_t, ctx.tm_sw_rndv), UCS_CONFIG_TYPE_BOOL}, + + {"NUM_EPS", "auto", + "An optimization hint of how many endpoints would be created on this context.\n" + "Does not affect semantics, but only transport selection criteria and the\n" + "resulting performance.\n" + " If set to a value different from \"auto\" it will override the value passed\n" + "to ucp_init()", + ucs_offsetof(ucp_config_t, ctx.estimated_num_eps), UCS_CONFIG_TYPE_ULUNITS}, + + {"NUM_PPN", "auto", + "An optimization hint for the number of processes expected to be launched\n" + "on a single node. Does not affect semantics, only transport selection criteria\n" + "and the resulting performance.\n", + ucs_offsetof(ucp_config_t, ctx.estimated_num_ppn), UCS_CONFIG_TYPE_ULUNITS}, + + {"RNDV_FRAG_SIZE", "512k", + "RNDV fragment size \n", + ucs_offsetof(ucp_config_t, ctx.rndv_frag_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"MEMTYPE_CACHE", "y", + "Enable memory type (cuda/rocm) cache \n", + ucs_offsetof(ucp_config_t, ctx.enable_memtype_cache), UCS_CONFIG_TYPE_BOOL}, + + {"FLUSH_WORKER_EPS", "y", + "Enable flushing the worker by flushing its endpoints. Allows completing\n" + "the flush operation in a bounded time even if there are new requests on\n" + "another thread, or incoming active messages, but consumes more resources.", + ucs_offsetof(ucp_config_t, ctx.flush_worker_eps), UCS_CONFIG_TYPE_BOOL}, + + {"UNIFIED_MODE", "n", + "Enable various optimizations intended for homogeneous environment.\n" + "Enabling this mode implies that the local transport resources/devices\n" + "of all entities which connect to each other are the same.", + ucs_offsetof(ucp_config_t, ctx.unified_mode), UCS_CONFIG_TYPE_BOOL}, + + {"SOCKADDR_CM_ENABLE", "n" /* TODO: set try by default */, + "Enable alternative wireup protocol for sockaddr connected endpoints.\n" + "Enabling this mode changes underlying UCT mechanism for connection\n" + "establishment and enables synchronized close protocol which does not\n" + "require out of band synchronization before destroying UCP resources.", + ucs_offsetof(ucp_config_t, ctx.sockaddr_cm_enable), UCS_CONFIG_TYPE_TERNARY}, + + {NULL} +}; +UCS_CONFIG_REGISTER_TABLE(ucp_config_table, "UCP context", NULL, ucp_config_t) + + +static ucp_tl_alias_t ucp_tl_aliases[] = { + { "mm", { "posix", "sysv", "xpmem" } }, /* for backward compatibility */ + { "sm", { "posix", "sysv", "xpmem", "knem", "cma", "rdmacm", "sockcm", NULL } }, + { "shm", { "posix", "sysv", "xpmem", "knem", "cma", "rdmacm", "sockcm", NULL } }, + { "ib", { "rc_verbs", "ud_verbs", "rc_mlx5", "ud_mlx5", "dc_mlx5", "rdmacm", NULL } }, + { "ud_v", { "ud_verbs", "rdmacm", NULL } }, + { "ud_x", { "ud_mlx5", "rdmacm", NULL } }, + { "ud", { "ud_mlx5", "ud_verbs", "rdmacm", NULL } }, + { "rc_v", { "rc_verbs", "ud_verbs:aux", "rdmacm", NULL } }, + { "rc_x", { "rc_mlx5", "ud_mlx5:aux", "rdmacm", NULL } }, + { "rc", { "rc_mlx5", "ud_mlx5:aux", "rc_verbs", "ud_verbs:aux", "rdmacm", NULL } }, + { "dc", { "dc_mlx5", "rdmacm", NULL } }, + { "dc_x", { "dc_mlx5", "rdmacm", NULL } }, + { "ugni", { "ugni_smsg", "ugni_udt:aux", "ugni_rdma", NULL } }, + { "cuda", { "cuda_copy", "cuda_ipc", "gdr_copy", NULL } }, + { "rocm", { "rocm_copy", "rocm_ipc", "rocm_gdr", NULL } }, + { NULL } +}; + + +const char *ucp_feature_str[] = { + [ucs_ilog2(UCP_FEATURE_TAG)] = "UCP_FEATURE_TAG", + [ucs_ilog2(UCP_FEATURE_RMA)] = "UCP_FEATURE_RMA", + [ucs_ilog2(UCP_FEATURE_AMO32)] = "UCP_FEATURE_AMO32", + [ucs_ilog2(UCP_FEATURE_AMO64)] = "UCP_FEATURE_AMO64", + [ucs_ilog2(UCP_FEATURE_WAKEUP)] = "UCP_FEATURE_WAKEUP", + [ucs_ilog2(UCP_FEATURE_STREAM)] = "UCP_FEATURE_STREAM", + [ucs_ilog2(UCP_FEATURE_AM)] = "UCP_FEATURE_AM", + NULL +}; + + +ucs_status_t ucp_config_read(const char *env_prefix, const char *filename, + ucp_config_t **config_p) +{ + ucp_config_t *config; + ucs_status_t status; + + config = ucs_malloc(sizeof(*config), "ucp config"); + if (config == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + status = ucs_config_parser_fill_opts(config, ucp_config_table, env_prefix, + NULL, 0); + if (status != UCS_OK) { + goto err_free; + } + + *config_p = config; + return UCS_OK; + +err_free: + ucs_free(config); +err: + return status; +} + +void ucp_config_release(ucp_config_t *config) +{ + ucs_config_parser_release_opts(config, ucp_config_table); + ucs_free(config); +} + +ucs_status_t ucp_config_modify(ucp_config_t *config, const char *name, + const char *value) +{ + return ucs_config_parser_set_value(config, ucp_config_table, name, value); +} + +void ucp_config_print(const ucp_config_t *config, FILE *stream, + const char *title, ucs_config_print_flags_t print_flags) +{ + ucs_config_parser_print_opts(stream, title, config, ucp_config_table, NULL, + print_flags); +} + +/* Search str in the array. If str_suffix is specified, search for + * 'str:str_suffix' string. + * @return bitmap of indexes in which the string appears in the array. + */ +static uint64_t ucp_str_array_search(const char **array, unsigned array_len, + const char *str, const char *str_suffix) +{ + const size_t len = strlen(str); + uint64_t result; + const char *p; + int i; + + result = 0; + for (i = 0; i < array_len; ++i) { + if (str_suffix == NULL) { + if (!strcmp(array[i], str)) { + result |= UCS_BIT(i); + } + } else if (!strncmp(array[i], str, len)) { + p = array[i] + len; + if ((*p == ':') && !strcmp(p + 1, str_suffix)) { + result |= UCS_BIT(i); + } + } + } + + return result; +} + +static unsigned ucp_tl_alias_count(ucp_tl_alias_t *alias) +{ + unsigned count; + for (count = 0; alias->tls[count] != NULL; ++count); + return count; +} + +static int ucp_tls_array_is_present(const char **tls, unsigned count, + const char *tl_name, const char *info, + uint8_t *rsc_flags, uint64_t *tl_cfg_mask) +{ + uint64_t mask; + + if ((mask = ucp_str_array_search(tls, count, tl_name, NULL)) != 0) { + *tl_cfg_mask |= mask; + ucs_trace("enabling tl '%s'%s", tl_name, info); + return 1; + } else if ((mask = ucp_str_array_search(tls, count, tl_name, "aux")) != 0) { + /* Search for tl names with 'aux' suffix, such tls can be + * used for auxiliary wireup purposes only */ + *rsc_flags |= UCP_TL_RSC_FLAG_AUX; + *tl_cfg_mask |= mask; + ucs_trace("enabling auxiliary tl '%s'%s", tl_name, info); + return 1; + } else { + return 0; + } +} + +static int ucp_config_is_tl_enabled(const char **names, unsigned count, + const char *tl_name, int is_alias, + uint8_t *rsc_flags, uint64_t *tl_cfg_mask) +{ + char strict_name[UCT_TL_NAME_MAX + 1]; + + snprintf(strict_name, sizeof(strict_name), "\\%s", tl_name); + return /* strict name, with leading \\ */ + (!is_alias && ucp_tls_array_is_present(names, count, strict_name, "", + rsc_flags, tl_cfg_mask)) || + /* plain transport name */ + ucp_tls_array_is_present(names, count, tl_name, "", rsc_flags, + tl_cfg_mask) || + /* all available transports */ + ucp_tls_array_is_present(names, count, UCP_RSC_CONFIG_ALL, "", rsc_flags, + tl_cfg_mask); +} + +static int ucp_is_resource_in_device_list(const uct_tl_resource_desc_t *resource, + const ucs_config_names_array_t *devices, + uint64_t *dev_cfg_mask, + uct_device_type_t dev_type) +{ + uint64_t mask, exclusive_mask; + + /* go over the device list from the user and check (against the available resources) + * which can be satisfied */ + ucs_assert_always(devices[dev_type].count <= 64); /* Using uint64_t bitmap */ + mask = ucp_str_array_search((const char**)devices[dev_type].names, + devices[dev_type].count, resource->dev_name, + NULL); + if (!mask) { + /* if the user's list is 'all', use all the available resources */ + mask = ucp_str_array_search((const char**)devices[dev_type].names, + devices[dev_type].count, UCP_RSC_CONFIG_ALL, + NULL); + } + + /* warn if we got new device which appears more than once */ + exclusive_mask = mask & ~(*dev_cfg_mask); + if (exclusive_mask && !ucs_is_pow2(exclusive_mask)) { + ucs_warn("device '%s' is specified multiple times", + devices[dev_type].names[ucs_ilog2(exclusive_mask)]); + } + + *dev_cfg_mask |= mask; + return !!mask; +} + +static int ucp_is_resource_in_transports_list(const char *tl_name, + const char **names, unsigned count, + uint8_t *rsc_flags, uint64_t *tl_cfg_mask) +{ + uint64_t dummy_mask, tmp_tl_cfg_mask; + uint8_t tmp_rsc_flags; + ucp_tl_alias_t *alias; + int tl_enabled; + char info[32]; + unsigned alias_arr_count; + + ucs_assert(count > 0); + if (ucp_config_is_tl_enabled(names, count, tl_name, 0, + rsc_flags, tl_cfg_mask)) { + tl_enabled = 1; + } else { + tl_enabled = 0; + + /* check aliases */ + for (alias = ucp_tl_aliases; alias->alias != NULL; ++alias) { + /* If an alias is enabled, and the transport is part of this alias, + * enable the transport. + */ + alias_arr_count = ucp_tl_alias_count(alias); + snprintf(info, sizeof(info), "for alias '%s'", alias->alias); + dummy_mask = 0; + tmp_rsc_flags = 0; + tmp_tl_cfg_mask = 0; + if (ucp_config_is_tl_enabled(names, count, alias->alias, 1, + &tmp_rsc_flags, &tmp_tl_cfg_mask) && + ucp_tls_array_is_present(alias->tls, alias_arr_count, tl_name, + info, &tmp_rsc_flags, &dummy_mask)) { + *rsc_flags |= tmp_rsc_flags; + *tl_cfg_mask |= tmp_tl_cfg_mask; + tl_enabled = 1; + break; + } + } + } + + return tl_enabled; +} + +static int ucp_is_resource_enabled(const uct_tl_resource_desc_t *resource, + const ucp_config_t *config, uint8_t *rsc_flags, + uint64_t dev_cfg_masks[], uint64_t *tl_cfg_mask) +{ + int device_enabled, tl_enabled; + + /* Find the enabled devices */ + device_enabled = (*rsc_flags & UCP_TL_RSC_FLAG_SOCKADDR) || + ucp_is_resource_in_device_list(resource, config->devices, + &dev_cfg_masks[resource->dev_type], + resource->dev_type); + + + /* Find the enabled UCTs */ + tl_enabled = ucp_is_resource_in_transports_list(resource->tl_name, + (const char**)config->tls.names, + config->tls.count, rsc_flags, + tl_cfg_mask); + + ucs_trace(UCT_TL_RESOURCE_DESC_FMT " is %sabled", + UCT_TL_RESOURCE_DESC_ARG(resource), + (device_enabled && tl_enabled) ? "en" : "dis"); + return device_enabled && tl_enabled; +} + +static void ucp_add_tl_resource_if_enabled(ucp_context_h context, ucp_tl_md_t *md, + ucp_rsc_index_t md_index, + const ucp_config_t *config, + const uct_tl_resource_desc_t *resource, + uint8_t rsc_flags, unsigned *num_resources_p, + uint64_t dev_cfg_masks[], + uint64_t *tl_cfg_mask) +{ + ucp_rsc_index_t dev_index, i; + + if (ucp_is_resource_enabled(resource, config, &rsc_flags, dev_cfg_masks, + tl_cfg_mask)) { + context->tl_rscs[context->num_tls].tl_rsc = *resource; + context->tl_rscs[context->num_tls].md_index = md_index; + context->tl_rscs[context->num_tls].tl_name_csum = + ucs_crc16_string(resource->tl_name); + context->tl_rscs[context->num_tls].flags = rsc_flags; + + dev_index = 0; + for (i = 0; i < context->num_tls; ++i) { + if (!strcmp(context->tl_rscs[i].tl_rsc.dev_name, resource->dev_name)) { + dev_index = context->tl_rscs[i].dev_index; + break; + } else { + dev_index = ucs_max(context->tl_rscs[i].dev_index + 1, dev_index); + } + } + context->tl_rscs[context->num_tls].dev_index = dev_index; + + ++context->num_tls; + ++(*num_resources_p); + } +} + +static ucs_status_t ucp_add_tl_resources(ucp_context_h context, + ucp_rsc_index_t md_index, + const ucp_config_t *config, + unsigned *num_resources_p, + ucs_string_set_t avail_devices[], + ucs_string_set_t *avail_tls, + uint64_t dev_cfg_masks[], + uint64_t *tl_cfg_mask) +{ + ucp_tl_md_t *md = &context->tl_mds[md_index]; + uct_tl_resource_desc_t *tl_resources; + uct_tl_resource_desc_t sa_rsc; + ucp_tl_resource_desc_t *tmp; + unsigned num_tl_resources; + unsigned num_sa_resources; + ucs_status_t status; + ucp_rsc_index_t i; + + *num_resources_p = 0; + + /* check what are the available uct resources */ + status = uct_md_query_tl_resources(md->md, &tl_resources, &num_tl_resources); + if (status != UCS_OK) { + ucs_error("Failed to query resources: %s", ucs_status_string(status)); + goto err; + } + + /* If the md supports client-server connection establishment via sockaddr, + add a new tl resource here for the client side iface. */ + num_sa_resources = !!(md->attr.cap.flags & UCT_MD_FLAG_SOCKADDR); + + if ((num_tl_resources == 0) && (!num_sa_resources)) { + ucs_debug("No tl resources found for md %s", md->rsc.md_name); + goto out_free_resources; + } + + tmp = ucs_realloc(context->tl_rscs, + sizeof(*context->tl_rscs) * + (context->num_tls + num_tl_resources + num_sa_resources), + "ucp resources"); + if (tmp == NULL) { + ucs_error("Failed to allocate resources"); + status = UCS_ERR_NO_MEMORY; + goto err_free_resources; + } + + /* print configuration */ + for (i = 0; i < config->tls.count; ++i) { + ucs_trace("allowed transport %d : '%s'", i, config->tls.names[i]); + } + + /* copy only the resources enabled by user configuration */ + context->tl_rscs = tmp; + for (i = 0; i < num_tl_resources; ++i) { + if (!(md->attr.cap.flags & UCT_MD_FLAG_SOCKADDR)) { + ucs_string_set_addf(&avail_devices[tl_resources[i].dev_type], + "'%s'(%s)", tl_resources[i].dev_name, + context->tl_cmpts[md->cmpt_index].attr.name); + ucs_string_set_add(avail_tls, tl_resources[i].tl_name); + } + ucp_add_tl_resource_if_enabled(context, md, md_index, config, + &tl_resources[i], 0, num_resources_p, + dev_cfg_masks, tl_cfg_mask); + } + + /* add sockaddr dummy resource, if md supports it */ + if (md->attr.cap.flags & UCT_MD_FLAG_SOCKADDR) { + sa_rsc.dev_type = UCT_DEVICE_TYPE_NET; + ucs_snprintf_zero(sa_rsc.tl_name, UCT_TL_NAME_MAX, "%s", md->rsc.md_name); + ucs_snprintf_zero(sa_rsc.dev_name, UCT_DEVICE_NAME_MAX, "sockaddr"); + ucp_add_tl_resource_if_enabled(context, md, md_index, config, &sa_rsc, + UCP_TL_RSC_FLAG_SOCKADDR, num_resources_p, + dev_cfg_masks, tl_cfg_mask); + } + +out_free_resources: + uct_release_tl_resource_list(tl_resources); + return UCS_OK; + +err_free_resources: + uct_release_tl_resource_list(tl_resources); +err: + return status; +} + +static void ucp_get_aliases_set(ucs_string_set_t *avail_tls) +{ + ucp_tl_alias_t *alias; + const char **tl_name; + + for (alias = ucp_tl_aliases; alias->alias != NULL; ++alias) { + for (tl_name = alias->tls; *tl_name != NULL; ++tl_name) { + if (ucs_string_set_contains(avail_tls, *tl_name)) { + ucs_string_set_add(avail_tls, alias->alias); + break; + } + } + } +} + +static void ucp_report_unavailable(const ucs_config_names_array_t* cfg, + uint64_t mask, const char *title1, + const char *title2, + const ucs_string_set_t *avail_names) +{ + ucs_string_buffer_t avail_strb, unavail_strb; + unsigned i; + int found; + + ucs_string_buffer_init(&unavail_strb); + + found = 0; + for (i = 0; i < cfg->count; i++) { + if (!(mask & UCS_BIT(i)) && strcmp(cfg->names[i], UCP_RSC_CONFIG_ALL)) { + ucs_string_buffer_appendf(&unavail_strb, "%s'%s'", + found ? "," : "", + cfg->names[i]); + ++found; + } + } + + if (found) { + ucs_string_buffer_init(&avail_strb); + ucs_string_set_print_sorted(avail_names, &avail_strb, ", "); + ucs_warn("%s%s%s %s %s not available, please use one or more of: %s", + title1, title2, + (found > 1) ? "s" : "", + ucs_string_buffer_cstr(&unavail_strb), + (found > 1) ? "are" : "is", + ucs_string_buffer_cstr(&avail_strb)); + ucs_string_buffer_cleanup(&avail_strb); + } + + ucs_string_buffer_cleanup(&unavail_strb); +} + +const char * ucp_find_tl_name_by_csum(ucp_context_t *context, uint16_t tl_name_csum) +{ + ucp_tl_resource_desc_t *rsc; + + for (rsc = context->tl_rscs; rsc < context->tl_rscs + context->num_tls; ++rsc) { + if (!(rsc->flags & UCP_TL_RSC_FLAG_SOCKADDR) && (rsc->tl_name_csum == tl_name_csum)) { + return rsc->tl_rsc.tl_name; + } + } + return NULL; +} + +static ucs_status_t ucp_check_tl_names(ucp_context_t *context) +{ + ucp_tl_resource_desc_t *rsc; + const char *tl_name; + + /* Make sure there we don't have two different transports with same checksum. */ + for (rsc = context->tl_rscs; rsc < context->tl_rscs + context->num_tls; ++rsc) { + tl_name = ucp_find_tl_name_by_csum(context, rsc->tl_name_csum); + if ((tl_name != NULL) && strcmp(rsc->tl_rsc.tl_name, tl_name)) { + ucs_error("Transports '%s' and '%s' have same checksum (0x%x), " + "please rename one of them to avoid collision", + rsc->tl_rsc.tl_name, tl_name, rsc->tl_name_csum); + return UCS_ERR_ALREADY_EXISTS; + } + } + return UCS_OK; +} + +const char* ucp_tl_bitmap_str(ucp_context_h context, uint64_t tl_bitmap, + char *str, size_t max_str_len) +{ + ucp_rsc_index_t i; + char *p, *endp; + + p = str; + endp = str + max_str_len; + + ucs_for_each_bit(i, tl_bitmap) { + ucs_snprintf_zero(p, endp - p, "%s ", + context->tl_rscs[i].tl_rsc.tl_name); + p += strlen(p); + } + + return str; +} + + +static void ucp_free_resources(ucp_context_t *context) +{ + ucp_rsc_index_t i; + + if (context->memtype_cache != NULL) { + ucs_memtype_cache_destroy(context->memtype_cache); + } + + ucs_free(context->tl_rscs); + for (i = 0; i < context->num_mds; ++i) { + uct_md_close(context->tl_mds[i].md); + } + ucs_free(context->tl_mds); + ucs_free(context->tl_cmpts); +} + +static ucs_status_t ucp_check_resource_config(const ucp_config_t *config) +{ + /* if we got here then num_resources > 0. + * if the user's device list is empty, there is no match */ + if ((0 == config->devices[UCT_DEVICE_TYPE_NET].count) && + (0 == config->devices[UCT_DEVICE_TYPE_SHM].count) && + (0 == config->devices[UCT_DEVICE_TYPE_ACC].count) && + (0 == config->devices[UCT_DEVICE_TYPE_SELF].count)) { + ucs_error("The device lists are empty. Please specify the devices you would like to use " + "or omit the UCX_*_DEVICES so that the default will be used."); + return UCS_ERR_NO_ELEM; + } + + /* if we got here then num_resources > 0. + * if the user's tls list is empty, there is no match */ + if (0 == config->tls.count) { + ucs_error("The TLs list is empty. Please specify the transports you would like to use " + "or omit the UCX_TLS so that the default will be used."); + return UCS_ERR_NO_ELEM; + } + + return UCS_OK; +} + +static ucs_status_t ucp_fill_tl_md(ucp_context_h context, + ucp_rsc_index_t cmpt_index, + const uct_md_resource_desc_t *md_rsc, + ucp_tl_md_t *tl_md) +{ + uct_md_config_t *md_config; + ucs_status_t status; + + /* Initialize tl_md structure */ + tl_md->cmpt_index = cmpt_index; + tl_md->rsc = *md_rsc; + + /* Read MD configuration */ + status = uct_md_config_read(context->tl_cmpts[cmpt_index].cmpt, NULL, NULL, + &md_config); + if (status != UCS_OK) { + return status; + } + + status = uct_md_open(context->tl_cmpts[cmpt_index].cmpt, md_rsc->md_name, + md_config, &tl_md->md); + uct_config_release(md_config); + if (status != UCS_OK) { + return status; + } + + VALGRIND_MAKE_MEM_UNDEFINED(&tl_md->attr, sizeof(tl_md->attr)); + /* Save MD attributes */ + status = uct_md_query(tl_md->md, &tl_md->attr); + if (status != UCS_OK) { + uct_md_close(tl_md->md); + return status; + } + + return UCS_OK; +} + +static void ucp_resource_config_array_str(const ucs_config_names_array_t *array, + const char *title, char *buf, size_t max) +{ + char *p, *endp; + unsigned i; + + if (ucp_str_array_search((const char**)array->names, array->count, + UCP_RSC_CONFIG_ALL, NULL)) { + strncpy(buf, "", max); + return; + } + + p = buf; + endp = buf + max; + + if (strlen(title)) { + snprintf(p, endp - p, "%s:", title); + p += strlen(p); + } + + for (i = 0; i < array->count; ++i) { + snprintf(p, endp - p, "%s%c", array->names[i], + (i == array->count - 1) ? ' ' : ','); + p += strlen(p); + } +} + +static void ucp_resource_config_str(const ucp_config_t *config, char *buf, + size_t max) +{ + int dev_type_idx; + char *p, *endp, *devs_p; + + p = buf; + endp = buf + max; + + ucp_resource_config_array_str(&config->tls, "", p, endp - p); + + if (strlen(p)) { + p += strlen(p); + snprintf(p, endp - p, "on "); + p += strlen(p); + } + + devs_p = p; + for (dev_type_idx = 0; dev_type_idx < UCT_DEVICE_TYPE_LAST; ++dev_type_idx) { + ucp_resource_config_array_str(&config->devices[dev_type_idx], + ucp_device_type_names[dev_type_idx], p, + endp - p); + p += strlen(p); + } + + if (devs_p == p) { + snprintf(p, endp - p, "all devices"); + } +} + +static void ucp_fill_sockaddr_aux_tls_config(ucp_context_h context, + const ucp_config_t *config) +{ + const char **tl_names = (const char**)config->sockaddr_aux_tls.aux_tls; + unsigned count = config->sockaddr_aux_tls.count; + uint8_t dummy_flags = 0; + uint64_t dummy_mask = 0; + ucp_rsc_index_t tl_id; + + context->config.sockaddr_aux_rscs_bitmap = 0; + + /* Check if any of the context's resources are present in the sockaddr + * auxiliary transports for the client-server flow */ + ucs_for_each_bit(tl_id, context->tl_bitmap) { + if (ucp_is_resource_in_transports_list(context->tl_rscs[tl_id].tl_rsc.tl_name, + tl_names, count, &dummy_flags, + &dummy_mask)) { + context->config.sockaddr_aux_rscs_bitmap |= UCS_BIT(tl_id); + } + } +} + +static void ucp_fill_sockaddr_tls_prio_list(ucp_context_h context, + const char **sockaddr_tl_names, + ucp_rsc_index_t num_sockaddr_tls) +{ + uint64_t sa_tls_bitmap = 0; + ucp_rsc_index_t idx = 0; + ucp_tl_resource_desc_t *resource; + ucp_rsc_index_t tl_id; + ucp_tl_md_t *tl_md; + ucp_rsc_index_t j; + + /* Set a bitmap of sockaddr transports */ + for (j = 0; j < context->num_tls; ++j) { + resource = &context->tl_rscs[j]; + tl_md = &context->tl_mds[resource->md_index]; + if (tl_md->attr.cap.flags & UCT_MD_FLAG_SOCKADDR) { + sa_tls_bitmap |= UCS_BIT(j); + } + } + + /* Parse the sockaddr transports priority list */ + for (j = 0; j < num_sockaddr_tls; j++) { + /* go over the priority list and find the transport's tl_id in the + * sockaddr tls bitmap. save the tl_id's for the client/server usage + * later */ + ucs_for_each_bit(tl_id, sa_tls_bitmap) { + resource = &context->tl_rscs[tl_id]; + + if (!strcmp(sockaddr_tl_names[j], "*") || + !strncmp(sockaddr_tl_names[j], resource->tl_rsc.tl_name, + UCT_TL_NAME_MAX)) { + context->config.sockaddr_tl_ids[idx] = tl_id; + idx++; + sa_tls_bitmap &= ~UCS_BIT(tl_id); + } + } + } + + context->config.num_sockaddr_tls = idx; +} + +static void ucp_fill_sockaddr_cms_prio_list(ucp_context_h context, + const char **sockaddr_cm_names, + ucp_rsc_index_t num_sockaddr_cms, + int sockaddr_cm_enable) +{ + uint64_t cm_cmpts_bitmap = context->config.cm_cmpts_bitmap; + uint64_t cm_cmpts_bitmap_safe; + ucp_rsc_index_t cmpt_idx, cm_idx; + + memset(&context->config.cm_cmpt_idxs, UCP_NULL_RESOURCE, UCP_MAX_RESOURCES); + context->config.num_cm_cmpts = 0; + + if (!sockaddr_cm_enable) { + return; + } + + /* Parse the sockaddr CMs priority list */ + for (cm_idx = 0; cm_idx < num_sockaddr_cms; ++cm_idx) { + /* go over the priority list and find the CM's cm_idx in the + * sockaddr CMs bitmap. Save the cmpt_idx for the client/server usage + * later */ + cm_cmpts_bitmap_safe = cm_cmpts_bitmap; + ucs_for_each_bit(cmpt_idx, cm_cmpts_bitmap_safe) { + if (!strcmp(sockaddr_cm_names[cm_idx], "*") || + !strncmp(sockaddr_cm_names[cm_idx], + context->tl_cmpts[cmpt_idx].attr.name, + UCT_COMPONENT_NAME_MAX)) { + context->config.cm_cmpt_idxs[cm_idx] = cmpt_idx; + cm_cmpts_bitmap &= ~UCS_BIT(cmpt_idx); + ++context->config.num_cm_cmpts; + } + } + } +} + +static void ucp_fill_sockaddr_prio_list(ucp_context_h context, + const ucp_config_t *config) +{ + const char **sockaddr_tl_names = (const char**)config->sockaddr_cm_tls.cm_tls; + unsigned num_sockaddr_tls = config->sockaddr_cm_tls.count; + int sockaddr_cm_enable = context->config.ext.sockaddr_cm_enable != + UCS_NO; + + /* Check if a list of sockaddr transports/CMs has valid length */ + if (num_sockaddr_tls > UCP_MAX_RESOURCES) { + ucs_warn("sockaddr transports or connection managers list is too long, " + "only first %d entries will be used", UCP_MAX_RESOURCES); + num_sockaddr_tls = UCP_MAX_RESOURCES; + } + + ucp_fill_sockaddr_tls_prio_list(context, sockaddr_tl_names, + num_sockaddr_tls); + ucp_fill_sockaddr_cms_prio_list(context, sockaddr_tl_names, + num_sockaddr_tls, sockaddr_cm_enable); +} + +static ucs_status_t ucp_check_resources(ucp_context_h context, + const ucp_config_t *config) +{ + char info_str[128]; + ucp_rsc_index_t tl_id; + ucp_tl_resource_desc_t *resource; + unsigned num_usable_tls; + + /* Error check: Make sure there is at least one transport that is not + * sockaddr or auxiliary */ + num_usable_tls = 0; + for (tl_id = 0; tl_id < context->num_tls; ++tl_id) { + ucs_assert(context->tl_rscs != NULL); + resource = &context->tl_rscs[tl_id]; + if (!(resource->flags & (UCP_TL_RSC_FLAG_AUX|UCP_TL_RSC_FLAG_SOCKADDR))) { + num_usable_tls++; + } + } + + if (num_usable_tls == 0) { + ucp_resource_config_str(config, info_str, sizeof(info_str)); + ucs_error("no usable transports/devices (asked %s)", info_str); + return UCS_ERR_NO_DEVICE; + } + + /* Error check: Make sure there are not too many transports */ + if (context->num_tls >= UCP_MAX_RESOURCES) { + ucs_error("exceeded transports/devices limit " + "(%u requested, up to %d are supported)", + context->num_tls, UCP_MAX_RESOURCES); + return UCS_ERR_EXCEEDS_LIMIT; + } + + return ucp_check_tl_names(context); +} + +static ucs_status_t ucp_add_component_resources(ucp_context_h context, + ucp_rsc_index_t cmpt_index, + ucs_string_set_t avail_devices[], + ucs_string_set_t *avail_tls, + uint64_t dev_cfg_masks[], + uint64_t *tl_cfg_mask, + const ucp_config_t *config) +{ + const ucp_tl_cmpt_t *tl_cmpt = &context->tl_cmpts[cmpt_index]; + uct_component_attr_t uct_component_attr; + unsigned num_tl_resources; + ucs_status_t status; + ucp_rsc_index_t i; + unsigned md_index; + uint64_t mem_type_mask; + uint64_t mem_type_bitmap; + + + /* List memory domain resources */ + uct_component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES; + uct_component_attr.md_resources = + ucs_alloca(tl_cmpt->attr.md_resource_count * + sizeof(*uct_component_attr.md_resources)); + status = uct_component_query(tl_cmpt->cmpt, &uct_component_attr); + if (status != UCS_OK) { + goto out; + } + + /* Open all memory domains */ + mem_type_mask = UCS_BIT(UCS_MEMORY_TYPE_HOST); + for (i = 0; i < tl_cmpt->attr.md_resource_count; ++i) { + md_index = context->num_mds; + status = ucp_fill_tl_md(context, cmpt_index, + &uct_component_attr.md_resources[i], + &context->tl_mds[md_index]); + if (status != UCS_OK) { + continue; + } + + /* Add communication resources of each MD */ + status = ucp_add_tl_resources(context, md_index, config, + &num_tl_resources, avail_devices, + avail_tls, dev_cfg_masks, tl_cfg_mask); + if (status != UCS_OK) { + uct_md_close(context->tl_mds[md_index].md); + goto out; + } + + /* If the MD does not have transport resources (device or sockaddr), + * don't use it */ + if (num_tl_resources > 0) { + /* List of memory type MDs */ + mem_type_bitmap = context->tl_mds[md_index].attr.cap.detect_mem_types; + if (~mem_type_mask & mem_type_bitmap) { + context->mem_type_detect_mds[context->num_mem_type_detect_mds] = md_index; + ++context->num_mem_type_detect_mds; + mem_type_mask |= mem_type_bitmap; + } + ++context->num_mds; + } else { + ucs_debug("closing md %s because it has no selected transport resources", + context->tl_mds[md_index].rsc.md_name); + uct_md_close(context->tl_mds[md_index].md); + } + } + + status = UCS_OK; +out: + return status; +} + +static ucs_status_t ucp_fill_resources(ucp_context_h context, + const ucp_config_t *config) +{ + uint64_t dev_cfg_masks[UCT_DEVICE_TYPE_LAST] = {}; + uint64_t tl_cfg_mask = 0; + ucs_string_set_t avail_devices[UCT_DEVICE_TYPE_LAST]; + ucs_string_set_t avail_tls; + uct_component_h *uct_components; + unsigned i, num_uct_components; + uct_device_type_t dev_type; + ucs_status_t status; + unsigned max_mds; + + context->tl_cmpts = NULL; + context->num_cmpts = 0; + context->tl_mds = NULL; + context->num_mds = 0; + context->tl_rscs = NULL; + context->num_tls = 0; + context->memtype_cache = NULL; + context->num_mem_type_detect_mds = 0; + + for (i = 0; i < UCS_MEMORY_TYPE_LAST; ++i) { + context->mem_type_access_tls[i] = 0; + } + + ucs_string_set_init(&avail_tls); + UCS_STATIC_ASSERT(UCT_DEVICE_TYPE_NET == 0); + for (dev_type = UCT_DEVICE_TYPE_NET; dev_type < UCT_DEVICE_TYPE_LAST; ++dev_type) { + ucs_string_set_init(&avail_devices[dev_type]); + } + + status = ucp_check_resource_config(config); + if (status != UCS_OK) { + goto out_cleanup_avail_devices; + } + + status = uct_query_components(&uct_components, &num_uct_components); + if (status != UCS_OK) { + goto out_cleanup_avail_devices; + } + + if (num_uct_components > UCP_MAX_RESOURCES) { + ucs_error("too many components: %u, max: %u", num_uct_components, + UCP_MAX_RESOURCES); + status = UCS_ERR_EXCEEDS_LIMIT; + goto out_release_components; + } + + context->num_cmpts = num_uct_components; + context->tl_cmpts = ucs_calloc(context->num_cmpts, + sizeof(*context->tl_cmpts), "ucp_tl_cmpts"); + if (context->tl_cmpts == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out_release_components; + } + + context->config.cm_cmpts_bitmap = 0; + + max_mds = 0; + for (i = 0; i < context->num_cmpts; ++i) { + memset(&context->tl_cmpts[i].attr, 0, sizeof(context->tl_cmpts[i].attr)); + context->tl_cmpts[i].cmpt = uct_components[i]; + context->tl_cmpts[i].attr.field_mask = + UCT_COMPONENT_ATTR_FIELD_NAME | + UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT | + UCT_COMPONENT_ATTR_FIELD_FLAGS; + status = uct_component_query(context->tl_cmpts[i].cmpt, + &context->tl_cmpts[i].attr); + if (status != UCS_OK) { + goto err_free_resources; + } + + if (context->tl_cmpts[i].attr.flags & UCT_COMPONENT_FLAG_CM) { + context->config.cm_cmpts_bitmap |= UCS_BIT(i); + } + + max_mds += context->tl_cmpts[i].attr.md_resource_count; + } + + if ((context->config.ext.sockaddr_cm_enable == UCS_YES) && + (context->config.cm_cmpts_bitmap == 0)) { + ucs_error("there are no UCT components with CM capability"); + status = UCS_ERR_UNSUPPORTED; + goto err_free_resources; + } + + /* Allocate actual array of MDs */ + context->tl_mds = ucs_malloc(max_mds * sizeof(*context->tl_mds), + "ucp_tl_mds"); + if (context->tl_mds == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_free_resources; + } + + /* Collect resources of each component */ + for (i = 0; i < context->num_cmpts; ++i) { + status = ucp_add_component_resources(context, i, avail_devices, + &avail_tls, dev_cfg_masks, + &tl_cfg_mask, config); + if (status != UCS_OK) { + goto err_free_resources; + } + } + + /* Create memtype cache if we have memory type MDs, and it's enabled by + * configuration + */ + if (context->num_mem_type_detect_mds && context->config.ext.enable_memtype_cache) { + status = ucs_memtype_cache_create(&context->memtype_cache); + if (status != UCS_OK) { + ucs_debug("could not create memtype cache for mem_type allocations"); + goto err_free_resources; + } + } + + /* If unified mode is enabled, initialize tl_bitmap to 0. + * Then the worker will open all available transport resources and will + * select only the best ones for each particular device. + */ + context->tl_bitmap = config->ctx.unified_mode ? 0 : UCS_MASK(context->num_tls); + + /* Warn about devices and transports which were specified explicitly in the + * configuration, but are not available + */ + if (config->warn_invalid_config) { + UCS_STATIC_ASSERT(UCT_DEVICE_TYPE_NET == 0); + for (dev_type = UCT_DEVICE_TYPE_NET; dev_type < UCT_DEVICE_TYPE_LAST; ++dev_type) { + ucp_report_unavailable(&config->devices[dev_type], + dev_cfg_masks[dev_type], + ucp_device_type_names[dev_type], " device", + &avail_devices[dev_type]); + } + + ucp_get_aliases_set(&avail_tls); + ucp_report_unavailable(&config->tls, tl_cfg_mask, "", "transport", + &avail_tls); + } + + /* Validate context resources */ + status = ucp_check_resources(context, config); + if (status != UCS_OK) { + goto err_free_resources; + } + + ucp_fill_sockaddr_aux_tls_config(context, config); + ucp_fill_sockaddr_prio_list(context, config); + + ucs_assert(status == UCS_OK); + goto out_release_components; + +err_free_resources: + ucp_free_resources(context); +out_release_components: + uct_release_component_list(uct_components); +out_cleanup_avail_devices: + UCS_STATIC_ASSERT(UCT_DEVICE_TYPE_NET == 0); + for (dev_type = UCT_DEVICE_TYPE_NET; dev_type < UCT_DEVICE_TYPE_LAST; ++dev_type) { + ucs_string_set_cleanup(&avail_devices[dev_type]); + } + ucs_string_set_cleanup(&avail_tls); + return status; +} + +static void ucp_apply_params(ucp_context_h context, const ucp_params_t *params, + ucp_mt_type_t mt_type) +{ + if (params->field_mask & UCP_PARAM_FIELD_FEATURES) { + context->config.features = params->features; + } else { + context->config.features = 0; + } + if (!context->config.features) { + ucs_warn("empty features set passed to ucp context create"); + } + + if (params->field_mask & UCP_PARAM_FIELD_TAG_SENDER_MASK) { + context->config.tag_sender_mask = params->tag_sender_mask; + } else { + context->config.tag_sender_mask = 0; + } + + if (params->field_mask & UCP_PARAM_FIELD_REQUEST_SIZE) { + context->config.request.size = params->request_size; + } else { + context->config.request.size = 0; + } + + if (params->field_mask & UCP_PARAM_FIELD_REQUEST_INIT) { + context->config.request.init = params->request_init; + } else { + context->config.request.init = NULL; + } + + if (params->field_mask & UCP_PARAM_FIELD_REQUEST_CLEANUP) { + context->config.request.cleanup = params->request_cleanup; + } else { + context->config.request.cleanup = NULL; + } + + if (params->field_mask & UCP_PARAM_FIELD_ESTIMATED_NUM_EPS) { + context->config.est_num_eps = params->estimated_num_eps; + } else { + context->config.est_num_eps = 1; + } + + if (params->field_mask & UCP_PARAM_FIELD_ESTIMATED_NUM_PPN) { + context->config.est_num_ppn = params->estimated_num_ppn; + } else { + context->config.est_num_ppn = 1; + } + + if ((params->field_mask & UCP_PARAM_FIELD_MT_WORKERS_SHARED) && + params->mt_workers_shared) { + context->mt_lock.mt_type = mt_type; + } else { + context->mt_lock.mt_type = UCP_MT_TYPE_NONE; + } +} + +static ucs_status_t ucp_fill_config(ucp_context_h context, + const ucp_params_t *params, + const ucp_config_t *config) +{ + unsigned i, num_alloc_methods, method; + const char *method_name; + ucs_status_t status; + + ucp_apply_params(context, params, + config->ctx.use_mt_mutex ? UCP_MT_TYPE_MUTEX + : UCP_MT_TYPE_SPINLOCK); + + context->config.ext = config->ctx; + + if (context->config.ext.estimated_num_eps != UCS_ULUNITS_AUTO) { + /* num_eps was set via the env variable. Override current value */ + context->config.est_num_eps = context->config.ext.estimated_num_eps; + } + ucs_debug("estimated number of endpoints is %d", + context->config.est_num_eps); + + if (context->config.ext.estimated_num_ppn != UCS_ULUNITS_AUTO) { + /* num_ppn was set via the env variable. Override current value */ + context->config.est_num_ppn = context->config.ext.estimated_num_ppn; + } + ucs_debug("estimated number of endpoints per node is %d", + context->config.est_num_ppn); + + if (context->config.ext.bcopy_bw == UCS_BANDWIDTH_AUTO) { + /* bcopy_bw wasn't set via the env variable. Calculate the value */ + context->config.ext.bcopy_bw = ucs_cpu_get_memcpy_bw(); + } + ucs_debug("estimated bcopy bandwidth is %f", + context->config.ext.bcopy_bw); + + /* always init MT lock in context even though it is disabled by user, + * because we need to use context lock to protect ucp_mm_ and ucp_rkey_ + * routines */ + UCP_THREAD_LOCK_INIT(&context->mt_lock); + + /* Get allocation alignment from configuration, make sure it's valid */ + if (config->alloc_prio.count == 0) { + ucs_error("No allocation methods specified - aborting"); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + num_alloc_methods = config->alloc_prio.count; + context->config.num_alloc_methods = num_alloc_methods; + + /* Allocate an array to hold the allocation methods configuration */ + context->config.alloc_methods = ucs_calloc(num_alloc_methods, + sizeof(*context->config.alloc_methods), + "ucp_alloc_methods"); + if (context->config.alloc_methods == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + /* Parse the allocation methods specified in the configuration */ + for (i = 0; i < num_alloc_methods; ++i) { + method_name = config->alloc_prio.methods[i]; + if (!strncasecmp(method_name, "md:", 3)) { + /* If the method name begins with 'md:', treat it as memory domain + * component name. + */ + context->config.alloc_methods[i].method = UCT_ALLOC_METHOD_MD; + ucs_strncpy_zero(context->config.alloc_methods[i].cmpt_name, + method_name + 3, UCT_COMPONENT_NAME_MAX); + ucs_debug("allocation method[%d] is md '%s'", i, method_name + 3); + } else { + /* Otherwise, this is specific allocation method name. + */ + context->config.alloc_methods[i].method = UCT_ALLOC_METHOD_LAST; + for (method = 0; method < UCT_ALLOC_METHOD_LAST; ++method) { + if ((method != UCT_ALLOC_METHOD_MD) && + !strcmp(method_name, uct_alloc_method_names[method])) + { + /* Found the allocation method in the internal name list */ + context->config.alloc_methods[i].method = (uct_alloc_method_t)method; + strcpy(context->config.alloc_methods[i].cmpt_name, ""); + ucs_debug("allocation method[%d] is '%s'", i, method_name); + break; + } + } + if (context->config.alloc_methods[i].method == UCT_ALLOC_METHOD_LAST) { + ucs_error("Invalid allocation method: %s", method_name); + status = UCS_ERR_INVALID_PARAM; + goto err_free; + } + } + } + + /* Need to check TM_SEG_SIZE value if it is enabled only */ + if (context->config.ext.tm_max_bb_size > context->config.ext.tm_thresh) { + if (context->config.ext.tm_max_bb_size < sizeof(ucp_request_hdr_t)) { + /* In case of expected SW RNDV message, the header (ucp_request_hdr_t) is + * scattered to UCP user buffer. Make sure that bounce buffer is used for + * messages which can not fit SW RNDV hdr. */ + context->config.ext.tm_max_bb_size = sizeof(ucp_request_hdr_t); + ucs_info("UCX_TM_MAX_BB_SIZE value: %zu, adjusted to: %zu", + context->config.ext.tm_max_bb_size, sizeof(ucp_request_hdr_t)); + } + + if (context->config.ext.tm_max_bb_size > context->config.ext.seg_size) { + context->config.ext.tm_max_bb_size = context->config.ext.seg_size; + ucs_info("Wrong UCX_TM_MAX_BB_SIZE value: %zu, adjusted to: %zu", + context->config.ext.tm_max_bb_size, + context->config.ext.seg_size); + } + } + + return UCS_OK; + +err_free: + ucs_free(context->config.alloc_methods); +err: + UCP_THREAD_LOCK_FINALIZE(&context->mt_lock); + return status; +} + +static void ucp_free_config(ucp_context_h context) +{ + ucs_free(context->config.alloc_methods); +} + +ucs_status_t ucp_init_version(unsigned api_major_version, unsigned api_minor_version, + const ucp_params_t *params, const ucp_config_t *config, + ucp_context_h *context_p) +{ + unsigned major_version, minor_version, release_number; + ucp_config_t *dfl_config = NULL; + ucp_context_t *context; + ucs_status_t status; + ucs_debug_address_info_t addr_info; + + ucp_get_version(&major_version, &minor_version, &release_number); + + if ((api_major_version != major_version) || + ((api_major_version == major_version) && (api_minor_version > minor_version))) { + status = ucs_debug_lookup_address(ucp_init_version, &addr_info); + ucs_warn("UCP version is incompatible, required: %d.%d, actual: %d.%d (release %d %s)", + api_major_version, api_minor_version, + major_version, minor_version, release_number, + status == UCS_OK ? addr_info.file.path : ""); + } + + if (config == NULL) { + status = ucp_config_read(NULL, NULL, &dfl_config); + if (status != UCS_OK) { + goto err; + } + config = dfl_config; + } + + /* allocate a ucp context */ + context = ucs_calloc(1, sizeof(*context), "ucp context"); + if (context == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_release_config; + } + + status = ucp_fill_config(context, params, config); + if (status != UCS_OK) { + goto err_free_ctx; + } + + /* fill resources we should use */ + status = ucp_fill_resources(context, config); + if (status != UCS_OK) { + goto err_free_config; + } + + if (dfl_config != NULL) { + ucp_config_release(dfl_config); + } + + ucs_debug("created ucp context %p [%d mds %d tls] features 0x%lx tl bitmap 0x%lx", + context, context->num_mds, context->num_tls, + context->config.features, context->tl_bitmap); + + *context_p = context; + return UCS_OK; + +err_free_config: + ucp_free_config(context); +err_free_ctx: + ucs_free(context); +err_release_config: + if (dfl_config != NULL) { + ucp_config_release(dfl_config); + } +err: + return status; +} + +void ucp_cleanup(ucp_context_h context) +{ + ucp_free_resources(context); + ucp_free_config(context); + UCP_THREAD_LOCK_FINALIZE(&context->mt_lock); + ucs_free(context); +} + +void ucp_dump_payload(ucp_context_h context, char *buffer, size_t max, + const void *data, size_t length) +{ + size_t data_size = ucs_global_opts.log_data_size; + char *p, *endp; + size_t offset; + + if (data_size == 0) { + return; + } + + p = buffer; + endp = buffer + max; + + strncat(p, " : ", endp - p); + p = p + strlen(p); + + offset = 0; + while ((offset < length) && (offset < data_size) && (p < endp)) { + snprintf(p, endp - p, "%02x", ((const uint8_t*)data)[offset]); + p += strlen(p); + ++offset; + } +} + +void ucp_context_uct_atomic_iface_flags(ucp_context_h context, + ucp_tl_iface_atomic_flags_t *atomic) +{ + if (context->config.features & UCP_FEATURE_AMO32) { + atomic->atomic32.op_flags = UCP_ATOMIC_OP_MASK; + atomic->atomic32.fop_flags = UCP_ATOMIC_FOP_MASK; + } else { + atomic->atomic32.op_flags = 0; + atomic->atomic32.fop_flags = 0; + } + + if (context->config.features & UCP_FEATURE_AMO64) { + atomic->atomic64.op_flags = UCP_ATOMIC_OP_MASK; + atomic->atomic64.fop_flags = UCP_ATOMIC_FOP_MASK; + } else { + atomic->atomic64.op_flags = 0; + atomic->atomic64.fop_flags = 0; + } +} + +ucs_status_t ucp_context_query(ucp_context_h context, ucp_context_attr_t *attr) +{ + if (attr->field_mask & UCP_ATTR_FIELD_REQUEST_SIZE) { + attr->request_size = sizeof(ucp_request_t); + } + if (attr->field_mask & UCP_ATTR_FIELD_THREAD_MODE) { + if (UCP_THREAD_IS_REQUIRED(&context->mt_lock)) { + attr->thread_mode = UCS_THREAD_MODE_MULTI; + } else { + attr->thread_mode = UCS_THREAD_MODE_SINGLE; + } + } + + return UCS_OK; +} + +void ucp_context_print_info(ucp_context_h context, FILE *stream) +{ + ucp_rsc_index_t cmpt_index, md_index, rsc_index; + + fprintf(stream, "#\n"); + fprintf(stream, "# UCP context\n"); + fprintf(stream, "#\n"); + + for (cmpt_index = 0; cmpt_index < context->num_cmpts; ++cmpt_index) { + fprintf(stream, "# component %-2d : %s\n", + cmpt_index, context->tl_cmpts[cmpt_index].attr.name); + } + fprintf(stream, "#\n"); + + for (md_index = 0; md_index < context->num_mds; ++md_index) { + fprintf(stream, "# md %-2d : component %-2d %s \n", + md_index, context->tl_mds[md_index].cmpt_index, + context->tl_mds[md_index].rsc.md_name); + } + + fprintf(stream, "#\n"); + + for (rsc_index = 0; rsc_index < context->num_tls; ++rsc_index) { + ucp_tl_resource_desc_t *rsc = &context->tl_rscs[rsc_index]; + fprintf(stream, "# resource %-2d : md %-2d dev %-2d flags %c%c " + UCT_TL_RESOURCE_DESC_FMT"\n", + rsc_index, rsc->md_index, rsc->dev_index, + (rsc->flags & UCP_TL_RSC_FLAG_AUX) ? 'a' : '-', + (rsc->flags & UCP_TL_RSC_FLAG_SOCKADDR) ? 's' : '-', + UCT_TL_RESOURCE_DESC_ARG(&rsc->tl_rsc)); + } + + fprintf(stream, "#\n"); +} + +uct_md_h ucp_context_find_tl_md(ucp_context_h context, const char *md_name) +{ + ucp_rsc_index_t rsc_index; + + for (rsc_index = 0; rsc_index < context->num_mds; ++rsc_index) { + if (strstr(context->tl_mds[rsc_index].rsc.md_name, md_name)) { + return context->tl_mds[rsc_index].md; + } + } + + return NULL; +} + +ucs_memory_type_t +ucp_memory_type_detect_mds(ucp_context_h context, const void *address, size_t size) +{ + ucs_memory_type_t mem_type; + unsigned i, md_index; + ucs_status_t status; + + for (i = 0; i < context->num_mem_type_detect_mds; ++i) { + md_index = context->mem_type_detect_mds[i]; + status = uct_md_detect_memory_type(context->tl_mds[md_index].md, + address, size, &mem_type); + if (status == UCS_OK) { + if (context->memtype_cache != NULL) { + ucs_memtype_cache_update(context->memtype_cache, address, size, + mem_type); + } + return mem_type; + } + } + + /* Memory type not detected by any memtype MD - assume it is host memory */ + return UCS_MEMORY_TYPE_HOST; +} + +uint64_t ucp_context_dev_tl_bitmap(ucp_context_h context, const char *dev_name) +{ + uint64_t tl_bitmap; + ucp_rsc_index_t tl_idx; + + tl_bitmap = 0; + + ucs_for_each_bit(tl_idx, context->tl_bitmap) { + if (strcmp(context->tl_rscs[tl_idx].tl_rsc.dev_name, dev_name)) { + continue; + } + + tl_bitmap |= UCS_BIT(tl_idx); + } + + return tl_bitmap; +} + +uint64_t ucp_context_dev_idx_tl_bitmap(ucp_context_h context, + ucp_rsc_index_t dev_idx) +{ + uint64_t tl_bitmap; + ucp_rsc_index_t tl_idx; + + tl_bitmap = 0; + + ucs_for_each_bit(tl_idx, context->tl_bitmap) { + if (context->tl_rscs[tl_idx].dev_index == dev_idx) { + tl_bitmap |= UCS_BIT(tl_idx); + } + } + + return tl_bitmap; +} diff --git a/src/ucp/core/ucp_context.h b/src/ucp/core/ucp_context.h new file mode 100644 index 0000000..9bc16b1 --- /dev/null +++ b/src/ucp/core/ucp_context.h @@ -0,0 +1,452 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_CONTEXT_H_ +#define UCP_CONTEXT_H_ + +#include "ucp_types.h" +#include "ucp_thread.h" + +#include +#include +#include +#include +#include +#include +#include + + +enum { + /* The flag indicates that the resource may be used for auxiliary + * wireup communications only */ + UCP_TL_RSC_FLAG_AUX = UCS_BIT(0), + /* The flag indicates that the resource may be used for client-server + * connection establishment with a sockaddr */ + UCP_TL_RSC_FLAG_SOCKADDR = UCS_BIT(1) +}; + + +typedef struct ucp_context_config { + /** Threshold for switching UCP to buffered copy(bcopy) protocol */ + size_t bcopy_thresh; + /** Threshold for switching UCP to rendezvous protocol */ + size_t rndv_thresh; + /** Threshold for switching UCP to rendezvous protocol + * in ucp_tag_send_nbr() */ + size_t rndv_send_nbr_thresh; + /** Threshold for switching UCP to rendezvous protocol in case the calculated + * threshold is zero or negative */ + size_t rndv_thresh_fallback; + /** The percentage allowed for performance difference between rendezvous + * and the eager_zcopy protocol */ + double rndv_perf_diff; + /** Threshold for switching UCP to zero copy protocol */ + size_t zcopy_thresh; + /** Communication scheme in RNDV protocol */ + ucp_rndv_mode_t rndv_mode; + /** Estimation of bcopy bandwidth */ + double bcopy_bw; + /** Segment size in the worker pre-registered memory pool */ + size_t seg_size; + /** RNDV pipeline fragment size */ + size_t rndv_frag_size; + /** Threshold for using tag matching offload capabilities. Smaller buffers + * will not be posted to the transport. */ + size_t tm_thresh; + /** Threshold for forcing tag matching offload capabilities */ + size_t tm_force_thresh; + /** Upper bound for posting tm offload receives with internal UCP + * preregistered bounce buffers. */ + size_t tm_max_bb_size; + /** Enabling SW rndv protocol with tag offload mode */ + int tm_sw_rndv; + /** Maximal size of worker name for debugging */ + unsigned max_worker_name; + /** Atomic mode */ + ucp_atomic_mode_t atomic_mode; + /** If use mutex for MT support or not */ + int use_mt_mutex; + /** On-demand progress */ + int adaptive_progress; + /** Eager-am multi-lane support */ + unsigned max_eager_lanes; + /** Rendezvous-get multi-lane support */ + unsigned max_rndv_lanes; + /** Estimated number of endpoints */ + size_t estimated_num_eps; + /** Estimated number of processes per node */ + size_t estimated_num_ppn; + /** Memtype cache */ + int enable_memtype_cache; + /** Enable flushing endpoints while flushing a worker */ + int flush_worker_eps; + /** Enable optimizations suitable for homogeneous systems */ + int unified_mode; + /** Enable cm wireup-and-close protocol for client-server connections */ + ucs_ternary_value_t sockaddr_cm_enable; +} ucp_context_config_t; + + +struct ucp_config { + /** Array of device lists names to use. + * This array holds three lists - network devices, shared memory devices + * and acceleration devices */ + ucs_config_names_array_t devices[UCT_DEVICE_TYPE_LAST]; + /** Array of transport names to use */ + ucs_config_names_array_t tls; + /** Array of memory allocation methods */ + UCS_CONFIG_STRING_ARRAY_FIELD(methods) alloc_prio; + /** Array of transports for partial worker address to pack */ + UCS_CONFIG_STRING_ARRAY_FIELD(aux_tls) sockaddr_aux_tls; + /** Array of transports for client-server transports and port selection */ + UCS_CONFIG_STRING_ARRAY_FIELD(cm_tls) sockaddr_cm_tls; + /** Warn on invalid configuration */ + int warn_invalid_config; + /** Configuration saved directly in the context */ + ucp_context_config_t ctx; +}; + + +/** + * UCP communication resource descriptor + */ +typedef struct ucp_tl_resource_desc { + uct_tl_resource_desc_t tl_rsc; /* UCT resource descriptor */ + uint16_t tl_name_csum; /* Checksum of transport name */ + ucp_rsc_index_t md_index; /* Memory domain index (within the context) */ + ucp_rsc_index_t dev_index; /* Arbitrary device index. Resources + with same index have same device name. */ + uint8_t flags; /* Flags that describe resource specifics */ +} ucp_tl_resource_desc_t; + + +/** + * Transport aliases. + */ +typedef struct ucp_tl_alias { + const char *alias; /* Alias name */ + const char* tls[8]; /* Transports which are selected by the alias */ +} ucp_tl_alias_t; + + +/** + * UCT component + */ +typedef struct ucp_tl_cmpt { + uct_component_h cmpt; /* UCT component handle */ + uct_component_attr_t attr; /* UCT component attributes */ +} ucp_tl_cmpt_t; + + +/** + * Memory domain. + */ +typedef struct ucp_tl_md { + uct_md_h md; /* Memory domain handle */ + ucp_rsc_index_t cmpt_index; /* Index of owning component */ + uct_md_resource_desc_t rsc; /* Memory domain resource */ + uct_md_attr_t attr; /* Memory domain attributes */ +} ucp_tl_md_t; + + +/** + * UCP context + */ +typedef struct ucp_context { + + ucp_tl_cmpt_t *tl_cmpts; /* UCT components */ + ucp_rsc_index_t num_cmpts; /* Number of UCT components */ + + ucp_tl_md_t *tl_mds; /* Memory domain resources */ + ucp_rsc_index_t num_mds; /* Number of memory domains */ + + /* List of MDs which detect non host memory type */ + ucp_rsc_index_t mem_type_detect_mds[UCS_MEMORY_TYPE_LAST]; + ucp_rsc_index_t num_mem_type_detect_mds; /* Number of mem type MDs */ + ucs_memtype_cache_t *memtype_cache; /* mem type allocation cache */ + + ucp_tl_resource_desc_t *tl_rscs; /* Array of communication resources */ + uint64_t tl_bitmap; /* Cached map of tl resources used by workers. + * Not all resources may be used if unified + * mode is enabled. */ + ucp_rsc_index_t num_tls; /* Number of resources in the array */ + + /* Mask of memory type communication resources */ + uint64_t mem_type_access_tls[UCS_MEMORY_TYPE_LAST]; + + struct { + + /* Bitmap of features supported by the context */ + uint64_t features; + uint64_t tag_sender_mask; + + /* How many endpoints are expected to be created */ + int est_num_eps; + + /* How many endpoints are expected to be created on single node */ + int est_num_ppn; + + struct { + size_t size; /* Request size for user */ + ucp_request_init_callback_t init; /* Initialization user callback */ + ucp_request_cleanup_callback_t cleanup; /* Cleanup user callback */ + } request; + + /* Array of allocation methods, a mix of MD allocation methods and non-MD */ + struct { + /* Allocation method */ + uct_alloc_method_t method; + + /* Component name to use, if method is MD */ + char cmpt_name[UCT_COMPONENT_NAME_MAX]; + } *alloc_methods; + unsigned num_alloc_methods; + + /* Cached map of components which support CM capability */ + uint64_t cm_cmpts_bitmap; + + /* Bitmap of sockaddr auxiliary transports to pack for client/server flow */ + uint64_t sockaddr_aux_rscs_bitmap; + + /* Array of sockaddr transports indexes. + * The indexes appear in the configured priority order */ + ucp_rsc_index_t sockaddr_tl_ids[UCP_MAX_RESOURCES]; + ucp_rsc_index_t num_sockaddr_tls; + /* Array of CMs indexes. The indexes appear in the configured priority + * order. */ + ucp_rsc_index_t cm_cmpt_idxs[UCP_MAX_RESOURCES]; + ucp_rsc_index_t num_cm_cmpts; + + /* Configuration supplied by the user */ + ucp_context_config_t ext; + + } config; + + /* All configurations about multithreading support */ + ucp_mt_lock_t mt_lock; + +} ucp_context_t; + + +typedef struct ucp_am_handler { + uint64_t features; + uct_am_callback_t cb; + ucp_am_tracer_t tracer; + uint32_t flags; + uct_am_callback_t proxy_cb; +} ucp_am_handler_t; + +typedef struct ucp_tl_iface_atomic_flags { + struct { + uint64_t op_flags; /**< Attributes for atomic-post operations */ + uint64_t fop_flags; /**< Attributes for atomic-fetch operations */ + } atomic32, atomic64; +} ucp_tl_iface_atomic_flags_t; + +#define UCP_ATOMIC_OP_MASK (UCS_BIT(UCT_ATOMIC_OP_ADD) | \ + UCS_BIT(UCT_ATOMIC_OP_AND) | \ + UCS_BIT(UCT_ATOMIC_OP_OR) | \ + UCS_BIT(UCT_ATOMIC_OP_XOR)) + +#define UCP_ATOMIC_FOP_MASK (UCS_BIT(UCT_ATOMIC_OP_ADD) | \ + UCS_BIT(UCT_ATOMIC_OP_AND) | \ + UCS_BIT(UCT_ATOMIC_OP_OR) | \ + UCS_BIT(UCT_ATOMIC_OP_XOR) | \ + UCS_BIT(UCT_ATOMIC_OP_SWAP) | \ + UCS_BIT(UCT_ATOMIC_OP_CSWAP)) + + +/* + * Define UCP active message handler. + */ +#define UCP_DEFINE_AM(_features, _id, _cb, _tracer, _flags) \ + UCS_STATIC_INIT { \ + ucp_am_handlers[_id].features = _features; \ + ucp_am_handlers[_id].cb = _cb; \ + ucp_am_handlers[_id].tracer = _tracer; \ + ucp_am_handlers[_id].flags = _flags; \ + } + + +/** + * Defines a proxy handler which counts received messages on ucp_worker_iface_t + * context. It's used to determine if there is activity on a transport interface. + */ +#define UCP_DEFINE_AM_PROXY(_id) \ + \ + static ucs_status_t \ + ucp_am_##_id##_counting_proxy(void *arg, void *data, size_t length, \ + unsigned flags) \ + { \ + ucp_worker_iface_t *wiface = arg; \ + wiface->proxy_recv_count++; \ + return ucp_am_handlers[_id].cb(wiface->worker, data, length, flags); \ + } \ + \ + UCS_STATIC_INIT { \ + ucp_am_handlers[_id].proxy_cb = ucp_am_##_id##_counting_proxy; \ + } + + +#define UCP_CHECK_PARAM_NON_NULL(_param, _status, _action) \ + if ((_param) == NULL) { \ + ucs_error("the parameter %s must not be NULL", #_param); \ + (_status) = UCS_ERR_INVALID_PARAM; \ + _action; \ + }; + + +/** + * Check if at least one feature flag from @a _flags is initialized. + */ +#define UCP_CONTEXT_CHECK_FEATURE_FLAGS(_context, _flags, _action) \ + do { \ + if (ENABLE_PARAMS_CHECK && \ + ucs_unlikely(!((_context)->config.features & (_flags)))) { \ + size_t feature_list_str_max = 512; \ + char *feature_list_str = ucs_alloca(feature_list_str_max); \ + ucs_error("feature flags %s were not set for ucp_init()", \ + ucs_flags_str(feature_list_str, feature_list_str_max, \ + (_flags) & ~(_context)->config.features, \ + ucp_feature_str)); \ + _action; \ + } \ + } while (0) + + +#define UCP_PARAM_VALUE(_obj, _params, _name, _flag, _default) \ + (((_params)->field_mask & (UCP_##_obj##_PARAM_FIELD_##_flag)) ? \ + (_params)->_name : (_default)) + + +#define ucp_assert_memtype(_context, _buffer, _length, _mem_type) \ + ucs_assert(ucp_memory_type_detect(_context, _buffer, _length) == (_mem_type)) + + +extern ucp_am_handler_t ucp_am_handlers[]; +extern const char *ucp_feature_str[]; + +void ucp_dump_payload(ucp_context_h context, char *buffer, size_t max, + const void *data, size_t length); + +void ucp_context_tag_offload_enable(ucp_context_h context); + +void ucp_context_uct_atomic_iface_flags(ucp_context_h context, + ucp_tl_iface_atomic_flags_t *atomic); + +const char * ucp_find_tl_name_by_csum(ucp_context_t *context, uint16_t tl_name_csum); + +const char* ucp_tl_bitmap_str(ucp_context_h context, uint64_t tl_bitmap, + char *str, size_t max_str_len); + +const char* ucp_feature_flags_str(unsigned feature_flags, char *str, + size_t max_str_len); + +ucs_memory_type_t +ucp_memory_type_detect_mds(ucp_context_h context, const void *address, size_t length); + +/** + * Calculate a small value to overcome float imprecision + * between two float values + */ +static UCS_F_ALWAYS_INLINE +double ucp_calc_epsilon(double val1, double val2) +{ + return (val1 + val2) * (1e-6); +} + +/** + * Compare two scores and return: + * - `-1` if score1 < score2 + * - `0` if score1 == score2 + * - `1` if score1 > score2 + */ +static UCS_F_ALWAYS_INLINE +int ucp_score_cmp(double score1, double score2) +{ + double diff = score1 - score2; + return ((fabs(diff) < ucp_calc_epsilon(score1, score2)) ? + 0 : ucs_signum(diff)); +} + +/** + * Compare two scores taking into account priorities if scores are equal + */ +static UCS_F_ALWAYS_INLINE +int ucp_score_prio_cmp(double score1, int prio1, double score2, int prio2) +{ + int score_res = ucp_score_cmp(score1, score2); + + return score_res ? score_res : ucs_signum(prio1 - prio2); +} + +static UCS_F_ALWAYS_INLINE +int ucp_is_scalable_transport(ucp_context_h context, size_t max_num_eps) +{ + return (max_num_eps >= (size_t)context->config.est_num_eps); +} + +static UCS_F_ALWAYS_INLINE double +ucp_tl_iface_latency(ucp_context_h context, const uct_iface_attr_t *iface_attr) +{ + return iface_attr->latency.overhead + + (iface_attr->latency.growth * context->config.est_num_eps); +} + +static UCS_F_ALWAYS_INLINE double +ucp_tl_iface_bandwidth(ucp_context_h context, const uct_ppn_bandwidth_t *bandwidth) +{ + return bandwidth->dedicated + (bandwidth->shared / context->config.est_num_ppn); +} + +static UCS_F_ALWAYS_INLINE int ucp_memory_type_cache_is_empty(ucp_context_h context) +{ + return (context->memtype_cache && + !context->memtype_cache->pgtable.num_regions); +} + +static UCS_F_ALWAYS_INLINE ucs_memory_type_t +ucp_memory_type_detect(ucp_context_h context, const void *address, size_t length) +{ + ucs_memory_type_t mem_type; + ucs_status_t status; + + if (ucs_likely(context->num_mem_type_detect_mds == 0)) { + return UCS_MEMORY_TYPE_HOST; + } + + if (ucs_likely(context->memtype_cache != NULL)) { + if (!context->memtype_cache->pgtable.num_regions) { + return UCS_MEMORY_TYPE_HOST; + } + + status = ucs_memtype_cache_lookup(context->memtype_cache, address, + length, &mem_type); + if (status != UCS_OK) { + ucs_assert(status == UCS_ERR_NO_ELEM); + return UCS_MEMORY_TYPE_HOST; + } + + if (mem_type != UCS_MEMORY_TYPE_LAST) { + return mem_type; + } + + /* mem_type is UCS_MEMORY_TYPE_LAST: fall thru to memory detection by + * UCT memory domains */ + } + + return ucp_memory_type_detect_mds(context, address, length); +} + +uint64_t ucp_context_dev_tl_bitmap(ucp_context_h context, const char *dev_name); + +uint64_t ucp_context_dev_idx_tl_bitmap(ucp_context_h context, + ucp_rsc_index_t dev_idx); + +#endif diff --git a/src/ucp/core/ucp_ep.c b/src/ucp/core/ucp_ep.c new file mode 100644 index 0000000..f26230a --- /dev/null +++ b/src/ucp/core/ucp_ep.c @@ -0,0 +1,2039 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) Los Alamos National Security, LLC. 2019 ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucp_ep.h" +#include "ucp_worker.h" +#include "ucp_am.h" +#include "ucp_ep.inl" +#include "ucp_request.inl" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +typedef struct { + double reg_growth; + double reg_overhead; + double overhead; + double latency; + size_t bw; +} ucp_ep_thresh_params_t; + +extern const ucp_request_send_proto_t ucp_stream_am_proto; +extern const ucp_request_send_proto_t ucp_am_proto; +extern const ucp_request_send_proto_t ucp_am_reply_proto; + +#if ENABLE_STATS +static ucs_stats_class_t ucp_ep_stats_class = { + .name = "ucp_ep", + .num_counters = UCP_EP_STAT_LAST, + .counter_names = { + [UCP_EP_STAT_TAG_TX_EAGER] = "tx_eager", + [UCP_EP_STAT_TAG_TX_EAGER_SYNC] = "tx_eager_sync", + [UCP_EP_STAT_TAG_TX_RNDV] = "tx_rndv" + } +}; +#endif + + +void ucp_ep_config_key_reset(ucp_ep_config_key_t *key) +{ + ucp_lane_index_t i; + memset(key, 0, sizeof(*key)); + key->num_lanes = 0; + for (i = 0; i < UCP_MAX_LANES; ++i) { + key->lanes[i].rsc_index = UCP_NULL_RESOURCE; + key->lanes[i].proxy_lane = UCP_NULL_LANE; + key->lanes[i].dst_md_index = UCP_MAX_MDS; + } + key->am_lane = UCP_NULL_LANE; + key->wireup_lane = UCP_NULL_LANE; + key->cm_lane = UCP_NULL_LANE; + key->tag_lane = UCP_NULL_LANE; + key->rma_bw_md_map = 0; + key->reachable_md_map = 0; + key->dst_md_cmpts = NULL; + key->err_mode = UCP_ERR_HANDLING_MODE_NONE; + key->status = UCS_OK; + memset(key->am_bw_lanes, UCP_NULL_LANE, sizeof(key->am_bw_lanes)); + memset(key->rma_lanes, UCP_NULL_LANE, sizeof(key->rma_lanes)); + memset(key->rma_bw_lanes, UCP_NULL_LANE, sizeof(key->rma_bw_lanes)); + memset(key->amo_lanes, UCP_NULL_LANE, sizeof(key->amo_lanes)); +} + +ucs_status_t ucp_ep_new(ucp_worker_h worker, const char *peer_name, + const char *message, ucp_ep_h *ep_p) +{ + ucs_status_t status; + ucp_ep_config_key_t key; + ucp_lane_index_t lane; + ucp_ep_h ep; + + ep = ucs_strided_alloc_get(&worker->ep_alloc, "ucp_ep"); + if (ep == NULL) { + ucs_error("Failed to allocate ep"); + status = UCS_ERR_NO_MEMORY; + goto err; + } + + ucp_ep_config_key_reset(&key); + + status = ucp_worker_get_ep_config(worker, &key, 0, &ep->cfg_index); + if (status != UCS_OK) { + goto err_free_ep; + } + + ep->worker = worker; + ep->am_lane = UCP_NULL_LANE; + ep->flags = 0; + ep->conn_sn = (ucp_ep_conn_sn_t)-1; + ucp_ep_ext_gen(ep)->user_data = NULL; + ucp_ep_ext_gen(ep)->dest_ep_ptr = 0; + ucp_ep_ext_gen(ep)->err_cb = NULL; + UCS_STATIC_ASSERT(sizeof(ucp_ep_ext_gen(ep)->ep_match) >= + sizeof(ucp_ep_ext_gen(ep)->listener)); + UCS_STATIC_ASSERT(sizeof(ucp_ep_ext_gen(ep)->ep_match) >= + sizeof(ucp_ep_ext_gen(ep)->flush_state)); + memset(&ucp_ep_ext_gen(ep)->ep_match, 0, + sizeof(ucp_ep_ext_gen(ep)->ep_match)); + + ucp_stream_ep_init(ep); + ucp_am_ep_init(ep); + + for (lane = 0; lane < UCP_MAX_LANES; ++lane) { + ep->uct_eps[lane] = NULL; + } + +#if ENABLE_DEBUG_DATA + ucs_snprintf_zero(ep->peer_name, UCP_WORKER_NAME_MAX, "%s", peer_name); +#endif + + /* Create statistics */ + status = UCS_STATS_NODE_ALLOC(&ep->stats, &ucp_ep_stats_class, + worker->stats, "-%p", ep); + if (status != UCS_OK) { + goto err_free_ep; + } + + ucs_list_add_tail(&worker->all_eps, &ucp_ep_ext_gen(ep)->ep_list); + *ep_p = ep; + ucs_debug("created ep %p to %s %s", ep, ucp_ep_peer_name(ep), message); + return UCS_OK; + +err_free_ep: + ucs_strided_alloc_put(&worker->ep_alloc, ep); +err: + return status; +} + +void ucp_ep_delete(ucp_ep_h ep) +{ + ucs_callbackq_remove_if(&ep->worker->uct->progress_q, + ucp_wireup_msg_ack_cb_pred, ep); + UCS_STATS_NODE_FREE(ep->stats); + ucs_list_del(&ucp_ep_ext_gen(ep)->ep_list); + ucs_strided_alloc_put(&ep->worker->ep_alloc, ep); +} + +ucs_status_t +ucp_ep_create_sockaddr_aux(ucp_worker_h worker, unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_address, + ucp_ep_h *ep_p) +{ + ucp_wireup_ep_t *wireup_ep; + ucs_status_t status; + ucp_ep_h ep; + + /* allocate endpoint */ + status = ucp_ep_new(worker, remote_address->name, "listener", &ep); + if (status != UCS_OK) { + goto err; + } + + status = ucp_ep_init_create_wireup(ep, ep_init_flags, &wireup_ep); + if (status != UCS_OK) { + goto err_delete; + } + + status = ucp_wireup_ep_connect_aux(wireup_ep, ep_init_flags, remote_address); + if (status != UCS_OK) { + goto err_destroy_wireup_ep; + } + + *ep_p = ep; + return status; + +err_destroy_wireup_ep: + uct_ep_destroy(ep->uct_eps[0]); +err_delete: + ucp_ep_delete(ep); +err: + return status; +} + +void ucp_ep_config_key_set_err_mode(ucp_ep_config_key_t *key, + unsigned ep_init_flags) +{ + key->err_mode = (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE) ? + UCP_ERR_HANDLING_MODE_PEER : UCP_ERR_HANDLING_MODE_NONE; +} + +int ucp_ep_is_sockaddr_stub(ucp_ep_h ep) +{ + /* Only a sockaddr client-side endpoint may be created as a "stub" */ + return ucp_ep_get_rsc_index(ep, 0) == UCP_NULL_RESOURCE; +} + +static ucs_status_t +ucp_ep_adjust_params(ucp_ep_h ep, const ucp_ep_params_t *params) +{ + /* handle a case where the existing endpoint is incomplete */ + + if (params->field_mask & UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE) { + if (ucp_ep_config(ep)->key.err_mode != params->err_mode) { + ucs_error("asymmetric endpoint configuration not supported, " + "error handling level mismatch"); + return UCS_ERR_UNSUPPORTED; + } + } + + if (params->field_mask & UCP_EP_PARAM_FIELD_ERR_HANDLER) { + ucp_ep_ext_gen(ep)->user_data = params->err_handler.arg; + ucp_ep_ext_gen(ep)->err_cb = params->err_handler.cb; + } + + if (params->field_mask & UCP_EP_PARAM_FIELD_USER_DATA) { + /* user_data overrides err_handler.arg */ + ucp_ep_ext_gen(ep)->user_data = params->user_data; + } + + return UCS_OK; +} + +ucs_status_t ucp_worker_create_mem_type_endpoints(ucp_worker_h worker) +{ + ucp_context_h context = worker->context; + ucp_unpacked_address_t local_address; + unsigned i, mem_type; + ucs_status_t status; + void *address_buffer; + size_t address_length; + + for (mem_type = 0; mem_type < UCS_MEMORY_TYPE_LAST; mem_type++) { + if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type) || + !context->mem_type_access_tls[mem_type]) { + continue; + } + + status = ucp_address_pack(worker, NULL, + context->mem_type_access_tls[mem_type], + UCP_ADDRESS_PACK_FLAG_ALL, NULL, + &address_length, &address_buffer); + if (status != UCS_OK) { + goto err_cleanup_eps; + } + + status = ucp_address_unpack(worker, address_buffer, UINT64_MAX, &local_address); + if (status != UCS_OK) { + goto err_free_address_buffer; + } + + status = ucp_ep_create_to_worker_addr(worker, UINT64_MAX, + &local_address, + UCP_EP_INIT_FLAG_MEM_TYPE, + "mem type", + &worker->mem_type_ep[mem_type]); + if (status != UCS_OK) { + goto err_free_address_list; + } + + ucs_free(local_address.address_list); + ucs_free(address_buffer); + } + + return UCS_OK; + +err_free_address_list: + ucs_free(local_address.address_list); +err_free_address_buffer: + ucs_free(address_buffer); +err_cleanup_eps: + for (i = 0; i < UCS_MEMORY_TYPE_LAST; i++) { + if (worker->mem_type_ep[i]) { + ucp_ep_destroy_internal(worker->mem_type_ep[i]); + } + } + return status; +} + +ucs_status_t ucp_ep_init_create_wireup(ucp_ep_h ep, unsigned ep_init_flags, + ucp_wireup_ep_t **wireup_ep) +{ + ucp_ep_config_key_t key; + ucs_status_t status; + + ucp_ep_config_key_reset(&key); + ucp_ep_config_key_set_err_mode(&key, ep_init_flags); + + key.num_lanes = 1; + /* all operations will use the first lane, which is a stub endpoint before + * reconfiguration */ + key.am_lane = 0; + if (ucp_worker_sockaddr_is_cm_proto(ep->worker)) { + key.cm_lane = 0; + } else { + key.wireup_lane = 0; + } + + status = ucp_worker_get_ep_config(ep->worker, &key, 0, &ep->cfg_index); + if (status != UCS_OK) { + return status; + } + + ep->am_lane = key.am_lane; + ep->flags |= UCP_EP_FLAG_CONNECT_REQ_QUEUED; + + status = ucp_wireup_ep_create(ep, &ep->uct_eps[0]); + if (status != UCS_OK) { + return status; + } + + *wireup_ep = ucs_derived_of(ep->uct_eps[0], ucp_wireup_ep_t); + return UCS_OK; +} + +ucs_status_t ucp_ep_create_to_worker_addr(ucp_worker_h worker, + uint64_t local_tl_bitmap, + const ucp_unpacked_address_t *remote_address, + unsigned ep_init_flags, + const char *message, ucp_ep_h *ep_p) +{ + unsigned addr_indices[UCP_MAX_LANES]; + ucs_status_t status; + ucp_ep_h ep; + + /* allocate endpoint */ + status = ucp_ep_new(worker, remote_address->name, message, &ep); + if (status != UCS_OK) { + goto err; + } + + /* initialize transport endpoints */ + status = ucp_wireup_init_lanes(ep, ep_init_flags, local_tl_bitmap, + remote_address, addr_indices); + if (status != UCS_OK) { + goto err_delete; + } + + ucs_assert(!(ucp_ep_get_tl_bitmap(ep) & ~local_tl_bitmap)); + + *ep_p = ep; + return UCS_OK; + +err_delete: + ucp_ep_delete(ep); +err: + return status; +} + +static ucs_status_t ucp_ep_create_to_sock_addr(ucp_worker_h worker, + const ucp_ep_params_t *params, + ucp_ep_h *ep_p) +{ + char peer_name[UCS_SOCKADDR_STRING_LEN]; + ucp_wireup_ep_t *wireup_ep; + ucs_status_t status; + ucp_ep_h ep; + + if (!(params->field_mask & UCP_EP_PARAM_FIELD_SOCK_ADDR)) { + ucs_error("destination socket address is missing"); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + UCP_CHECK_PARAM_NON_NULL(params->sockaddr.addr, status, goto err); + + /* allocate endpoint */ + ucs_sockaddr_str(params->sockaddr.addr, peer_name, sizeof(peer_name)); + + status = ucp_ep_new(worker, peer_name, "from api call", &ep); + if (status != UCS_OK) { + goto err; + } + + status = ucp_ep_init_create_wireup(ep, ucp_ep_init_flags(worker, params), + &wireup_ep); + if (status != UCS_OK) { + goto err_delete; + } + + status = ucp_ep_adjust_params(ep, params); + if (status != UCS_OK) { + goto err_cleanup_lanes; + } + + status = ucp_worker_sockaddr_is_cm_proto(ep->worker) ? + ucp_ep_client_cm_connect_start(ep, params) : + ucp_wireup_ep_connect_to_sockaddr(ep->uct_eps[0], params); + if (status != UCS_OK) { + goto err_cleanup_lanes; + } + + *ep_p = ep; + return UCS_OK; + +err_cleanup_lanes: + ucp_ep_cleanup_lanes(ep); +err_delete: + ucp_ep_delete(ep); +err: + return status; +} + +/** + * Create an endpoint on the server side connected to the client endpoint. + */ +ucs_status_t ucp_ep_create_server_accept(ucp_worker_h worker, + const ucp_conn_request_h conn_request, + ucp_ep_h *ep_p) +{ + const ucp_wireup_sockaddr_data_t *sa_data = &conn_request->sa_data; + unsigned ep_init_flags = 0; + ucp_unpacked_address_t remote_addr; + uint64_t addr_flags; + unsigned i; + ucs_status_t status; + + if (sa_data->err_mode == UCP_ERR_HANDLING_MODE_PEER) { + ep_init_flags |= UCP_EP_INIT_ERR_MODE_PEER_FAILURE; + } + + if (sa_data->addr_mode == UCP_WIREUP_SA_DATA_CM_ADDR) { + addr_flags = UCP_ADDRESS_PACK_FLAG_IFACE_ADDR | + UCP_ADDRESS_PACK_FLAG_EP_ADDR | + UCP_ADDRESS_PACK_FLAG_TRACE; + } else { + addr_flags = UINT64_MAX; + } + + /* coverity[overrun-local] */ + status = ucp_address_unpack(worker, sa_data + 1, addr_flags, &remote_addr); + if (status != UCS_OK) { + goto out; + } + + switch (sa_data->addr_mode) { + case UCP_WIREUP_SA_DATA_FULL_ADDR: + /* create endpoint to the worker address we got in the private data */ + status = ucp_ep_create_to_worker_addr(worker, UINT64_MAX, &remote_addr, + ep_init_flags | + UCP_EP_INIT_CREATE_AM_LANE, + "listener", ep_p); + if (status != UCS_OK) { + goto out_free_address; + } + + ucs_assert(ucp_ep_config(*ep_p)->key.err_mode == sa_data->err_mode); + ucp_ep_flush_state_reset(*ep_p); + ucp_ep_update_dest_ep_ptr(*ep_p, sa_data->ep_ptr); + break; + case UCP_WIREUP_SA_DATA_PARTIAL_ADDR: + status = ucp_ep_create_sockaddr_aux(worker, ep_init_flags, + &remote_addr, ep_p); + if (status != UCS_OK) { + goto out_free_address; + } + + ucp_ep_update_dest_ep_ptr(*ep_p, sa_data->ep_ptr); + /* the server's ep should be aware of the sent address from the client */ + (*ep_p)->flags |= UCP_EP_FLAG_LISTENER; + /* NOTE: protect union */ + ucs_assert(!((*ep_p)->flags & (UCP_EP_FLAG_ON_MATCH_CTX | + UCP_EP_FLAG_FLUSH_STATE_VALID))); + break; + case UCP_WIREUP_SA_DATA_CM_ADDR: + ucs_assert(ucp_worker_sockaddr_is_cm_proto(worker)); + for (i = 0; i < remote_addr.address_count; ++i) { + remote_addr.address_list[i].dev_addr = conn_request->remote_dev_addr; + remote_addr.address_list[i].dev_index = conn_request->sa_data.dev_index; + } + status = ucp_ep_cm_server_create_connected(worker, + ep_init_flags | + UCP_EP_INIT_CM_WIREUP_SERVER, + &remote_addr, conn_request, + ep_p); + if (status != UCS_OK) { + goto out_free_address; + } + + (*ep_p)->flags |= UCP_EP_FLAG_LISTENER; + ucp_ep_ext_gen(*ep_p)->listener = conn_request->listener; + break; + default: + ucs_fatal("client sockaddr data contains invalid address mode %d", + sa_data->addr_mode); + } + +out_free_address: + ucs_free(remote_addr.address_list); +out: + return status; +} + +static ucs_status_t +ucp_ep_create_api_conn_request(ucp_worker_h worker, + const ucp_ep_params_t *params, ucp_ep_h *ep_p) +{ + ucp_conn_request_h conn_request = params->conn_request; + ucp_ep_h ep; + ucs_status_t status; + + status = ucp_ep_create_server_accept(worker, conn_request, &ep); + if (status != UCS_OK) { + goto out; + } + + status = ucp_ep_adjust_params(ep, params); + if (status != UCS_OK) { + goto out_ep_destroy; + } + + if (ucp_worker_sockaddr_is_cm_proto(worker)) { + goto out; + } + + if (ep->flags & UCP_EP_FLAG_LISTENER) { + status = ucp_wireup_send_pre_request(ep); + } else { + /* send wireup request message, to connect the client to the server's + new endpoint */ + ucs_assert(!(ep->flags & UCP_EP_FLAG_CONNECT_REQ_QUEUED)); + status = ucp_wireup_send_request(ep); + } + + if (status == UCS_OK) { + goto out; + } + +out_ep_destroy: + ucp_ep_destroy_internal(ep); +out: + if (status == UCS_OK) { + if (!ucp_worker_sockaddr_is_cm_proto(worker)) { + status = uct_iface_accept(conn_request->uct.iface, + conn_request->uct_req); + } + } else { + if (ucp_worker_sockaddr_is_cm_proto(worker)) { + uct_listener_reject(conn_request->uct.listener, + conn_request->uct_req); + } else { + uct_iface_reject(conn_request->uct.iface, conn_request->uct_req); + } + } + + if (ucp_worker_sockaddr_is_cm_proto(worker)) { + ucs_free(conn_request->remote_dev_addr); + } + + ucs_free(conn_request); + + if (status == UCS_OK) { + *ep_p = ep; + } + + return status; +} + +static ucs_status_t +ucp_ep_create_api_to_worker_addr(ucp_worker_h worker, + const ucp_ep_params_t *params, ucp_ep_h *ep_p) +{ + ucp_unpacked_address_t remote_address; + ucp_ep_conn_sn_t conn_sn; + ucs_status_t status; + unsigned flags; + ucp_ep_h ep; + + if (!(params->field_mask & UCP_EP_PARAM_FIELD_REMOTE_ADDRESS)) { + status = UCS_ERR_INVALID_PARAM; + ucs_error("remote worker address is missing"); + goto out; + } + + UCP_CHECK_PARAM_NON_NULL(params->address, status, goto out); + + status = ucp_address_unpack(worker, params->address, UINT64_MAX, &remote_address); + if (status != UCS_OK) { + goto out; + } + + /* Check if there is already an unconnected internal endpoint to the same + * destination address. + * In case of loopback connection, search the hash table for an endpoint with + * even/odd matching, so that every 2 endpoints connected to the local worker + * with be paired to each other. + * Note that if a loopback endpoint had the UCP_EP_PARAMS_FLAGS_NO_LOOPBACK + * flag set, it will not be added to ep_match as an unexpected ep. Because + * dest_ep_ptr will be initialized, a WIREUP_REQUEST (if sent) will have + * dst_ep != 0. So, ucp_wireup_request() will not create an unexpected ep + * in ep_match. + */ + conn_sn = ucp_ep_match_get_next_sn(&worker->ep_match_ctx, remote_address.uuid); + ep = ucp_ep_match_retrieve_unexp(&worker->ep_match_ctx, remote_address.uuid, + conn_sn ^ (remote_address.uuid == worker->uuid)); + if (ep != NULL) { + status = ucp_ep_adjust_params(ep, params); + if (status != UCS_OK) { + ucp_ep_destroy_internal(ep); + } + + ucp_ep_flush_state_reset(ep); + ucp_stream_ep_activate(ep); + goto out_free_address; + } + + status = ucp_ep_create_to_worker_addr(worker, UINT64_MAX, &remote_address, + ucp_ep_init_flags(worker, params), + "from api call", &ep); + if (status != UCS_OK) { + goto out_free_address; + } + + status = ucp_ep_adjust_params(ep, params); + if (status != UCS_OK) { + ucp_ep_destroy_internal(ep); + goto out_free_address; + } + + ep->conn_sn = conn_sn; + + /* + * If we are connecting to our own worker, and loopback is allowed, connect + * the endpoint to itself by updating dest_ep_ptr. + * Otherwise, add the new ep to the matching context as an expected endpoint, + * waiting for connection request from the peer endpoint + */ + flags = UCP_PARAM_VALUE(EP, params, flags, FLAGS, 0); + if ((remote_address.uuid == worker->uuid) && + !(flags & UCP_EP_PARAMS_FLAGS_NO_LOOPBACK)) { + ucp_ep_update_dest_ep_ptr(ep, (uintptr_t)ep); + ucp_ep_flush_state_reset(ep); + } else { + ucp_ep_match_insert_exp(&worker->ep_match_ctx, remote_address.uuid, ep); + } + + /* if needed, send initial wireup message */ + if (!(ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED)) { + ucs_assert(!(ep->flags & UCP_EP_FLAG_CONNECT_REQ_QUEUED)); + status = ucp_wireup_send_request(ep); + if (status != UCS_OK) { + goto out_free_address; + } + } + + status = UCS_OK; + +out_free_address: + ucs_free(remote_address.address_list); +out: + if (status == UCS_OK) { + *ep_p = ep; + } + return status; +} + +ucs_status_t ucp_ep_create(ucp_worker_h worker, const ucp_ep_params_t *params, + ucp_ep_h *ep_p) +{ + ucs_status_t status; + unsigned flags; + ucp_ep_h ep = NULL; + + UCS_ASYNC_BLOCK(&worker->async); + + flags = UCP_PARAM_VALUE(EP, params, flags, FLAGS, 0); + if (flags & UCP_EP_PARAMS_FLAGS_CLIENT_SERVER) { + status = ucp_ep_create_to_sock_addr(worker, params, &ep); + } else if (params->field_mask & UCP_EP_PARAM_FIELD_CONN_REQUEST) { + status = ucp_ep_create_api_conn_request(worker, params, &ep); + } else if (params->field_mask & UCP_EP_PARAM_FIELD_REMOTE_ADDRESS) { + status = ucp_ep_create_api_to_worker_addr(worker, params, &ep); + } else { + status = UCS_ERR_INVALID_PARAM; + } + + if (status == UCS_OK) { + ep->flags |= UCP_EP_FLAG_USED; + *ep_p = ep; + } + + UCS_ASYNC_UNBLOCK(&worker->async); + return status; +} + +ucs_status_ptr_t ucp_ep_modify_nb(ucp_ep_h ep, const ucp_ep_params_t *params) +{ + ucp_worker_h worker = ep->worker; + ucs_status_t status; + + if (params->field_mask & (UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | + UCP_EP_PARAM_FIELD_SOCK_ADDR | + UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE)) { + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM); + } + + UCS_ASYNC_BLOCK(&worker->async); + + status = ucp_ep_adjust_params(ep, params); + + UCS_ASYNC_UNBLOCK(&worker->async); + + return UCS_STATUS_PTR(status); +} + +void ucp_ep_err_pending_purge(uct_pending_req_t *self, void *arg) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucs_status_t status = UCS_PTR_STATUS(arg); + + ucp_request_send_state_ff(req, status); +} + +static void ucp_destroyed_ep_pending_purge(uct_pending_req_t *self, void *arg) +{ + ucs_bug("pending request %p on ep %p should have been flushed", self, arg); +} + +void ucp_ep_destroy_internal(ucp_ep_h ep) +{ + ucs_debug("ep %p: destroy", ep); + ucp_ep_cleanup_lanes(ep); + ucp_ep_delete(ep); +} + +void ucp_ep_cleanup_lanes(ucp_ep_h ep) +{ + ucp_lane_index_t lane, proxy_lane; + uct_ep_h uct_ep; + + ucs_debug("ep %p: cleanup lanes", ep); + + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + uct_ep = ep->uct_eps[lane]; + if (uct_ep != NULL) { + ucs_debug("ep %p: purge uct_ep[%d]=%p", ep, lane, uct_ep); + uct_ep_pending_purge(uct_ep, ucp_destroyed_ep_pending_purge, ep); + } + } + + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + uct_ep = ep->uct_eps[lane]; + if (uct_ep == NULL) { + continue; + } + + proxy_lane = ucp_ep_get_proxy_lane(ep, lane); + if ((proxy_lane != UCP_NULL_LANE) && (proxy_lane != lane) && + (ep->uct_eps[lane] == ep->uct_eps[proxy_lane])) + { + /* duplicate of another lane */ + continue; + } + + ucs_debug("ep %p: destroy uct_ep[%d]=%p", ep, lane, uct_ep); + uct_ep_destroy(uct_ep); + } + + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + ep->uct_eps[lane] = NULL; + } +} + +/* Must be called with async lock held */ +void ucp_ep_disconnected(ucp_ep_h ep, int force) +{ + /* remove pending slow-path progress in case it wasn't removed yet */ + ucs_callbackq_remove_if(&ep->worker->uct->progress_q, + ucp_worker_err_handle_remove_filter, ep); + + /* remove pending slow-path function it wasn't removed yet */ + ucs_callbackq_remove_if(&ep->worker->uct->progress_q, + ucp_listener_accept_cb_remove_filter, ep); + + ucp_ep_cm_slow_cbq_cleanup(ep); + + ucp_stream_ep_cleanup(ep); + ucp_am_ep_cleanup(ep); + + ep->flags &= ~UCP_EP_FLAG_USED; + + if ((ep->flags & (UCP_EP_FLAG_CONNECT_REQ_QUEUED|UCP_EP_FLAG_REMOTE_CONNECTED)) + && !force) { + /* Endpoints which have remote connection are destroyed only when the + * worker is destroyed, to enable remote endpoints keep sending + * TODO negotiate disconnect. + */ + ucs_trace("not destroying ep %p because of connection from remote", ep); + return; + } + + ucp_ep_match_remove_ep(&ep->worker->ep_match_ctx, ep); + ucp_ep_destroy_internal(ep); +} + +unsigned ucp_ep_local_disconnect_progress(void *arg) +{ + ucp_request_t *req = arg; + ucp_ep_h ep = req->send.ep; + ucs_async_context_t *async = &ep->worker->async; /* ep becomes invalid */ + + ucs_assert(!(req->flags & UCP_REQUEST_FLAG_COMPLETED)); + + UCS_ASYNC_BLOCK(async); + ucs_debug("ep %p: disconnected with request %p, %s", ep, req, + ucs_status_string(req->status)); + ucp_ep_disconnected(ep, req->send.flush.uct_flags & UCT_FLUSH_FLAG_CANCEL); + UCS_ASYNC_UNBLOCK(async); + + /* Complete send request from here, to avoid releasing the request while + * slow-path element is still pending */ + ucp_request_complete_send(req, req->status); + + return 0; +} + +static void ucp_ep_set_close_request(ucp_ep_h ep, ucp_request_t *request, + const char *debug_msg) +{ + ucs_trace("ep %p: setting close request %p, %s", ep, request, debug_msg); + + ucp_ep_flush_state_invalidate(ep); + ucp_ep_ext_gen(ep)->close_req.req = request; + ep->flags |= UCP_EP_FLAG_CLOSE_REQ_VALID; +} + +static void ucp_ep_close_flushed_callback(ucp_request_t *req) +{ + ucp_ep_h ep = req->send.ep; + ucs_async_context_t *async = &ep->worker->async; + + /* in case of force close, schedule ucp_ep_local_disconnect_progress to + * destroy the ep and all its lanes */ + if (req->send.flush.uct_flags & UCT_FLUSH_FLAG_CANCEL) { + goto out; + } + + UCS_ASYNC_BLOCK(async); + + ucs_debug("ep %p: flags 0x%x close flushed callback for request %p", ep, + ep->flags, req); + + if (ucp_ep_is_cm_local_connected(ep)) { + /* Now, when close flush is completed and we are still locally connected, + * we have to notify remote side */ + ucp_ep_cm_disconnect_cm_lane(ep); + if (ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED) { + /* Wait disconnect notification from remote side to complete this + * request */ + ucp_ep_set_close_request(ep, req, "close flushed callback"); + UCS_ASYNC_UNBLOCK(async); + return; + } + } + UCS_ASYNC_UNBLOCK(async); + +out: + /* If a flush is completed from a pending/completion callback, we need to + * schedule slow-path callback to release the endpoint later, since a UCT + * endpoint cannot be released from pending/completion callback context. + */ + ucs_trace("adding slow-path callback to destroy ep %p", ep); + req->send.disconnect.prog_id = UCS_CALLBACKQ_ID_NULL; + uct_worker_progress_register_safe(ep->worker->uct, + ucp_ep_local_disconnect_progress, + req, UCS_CALLBACKQ_FLAG_ONESHOT, + &req->send.disconnect.prog_id); +} + +static ucs_status_t ucp_ep_close_nb_check_params(ucp_ep_h ep, unsigned mode) +{ + /* CM lane tracks remote state, so it can be used with any modes of close + * and error handling */ + if ((mode == UCP_EP_CLOSE_MODE_FLUSH) || + (ucp_ep_get_cm_lane(ep) != UCP_NULL_LANE)) { + return UCS_OK; + } + + /* In case of close in force mode, remote peer failure detection mechanism + * should be enabled (CM lane is handled above) to prevent hang or any + * other undefined behavior */ + if ((mode == UCP_EP_CLOSE_MODE_FORCE) && + (ucp_ep_config(ep)->key.err_mode == UCP_ERR_HANDLING_MODE_PEER)) { + return UCS_OK; + } + + return UCS_ERR_INVALID_PARAM; +} + +ucs_status_ptr_t ucp_ep_close_nb(ucp_ep_h ep, unsigned mode) +{ + ucp_worker_h worker = ep->worker; + void *request; + ucp_request_t *close_req; + ucs_status_t status; + + status = ucp_ep_close_nb_check_params(ep, mode); + if (status != UCS_OK) { + return UCS_STATUS_PTR(status); + } + + UCS_ASYNC_BLOCK(&worker->async); + + ep->flags |= UCP_EP_FLAG_CLOSED; + request = ucp_ep_flush_internal(ep, + (mode == UCP_EP_CLOSE_MODE_FLUSH) ? + UCT_FLUSH_FLAG_LOCAL : UCT_FLUSH_FLAG_CANCEL, + NULL, 0, NULL, + ucp_ep_close_flushed_callback, "close"); + + if (!UCS_PTR_IS_PTR(request)) { + if (ucp_ep_is_cm_local_connected(ep) && + (mode == UCP_EP_CLOSE_MODE_FLUSH)) { + /* lanes already flushed, start disconnect on CM lane */ + ucp_ep_cm_disconnect_cm_lane(ep); + close_req = ucp_ep_cm_close_request_get(ep); + if (close_req != NULL) { + request = close_req + 1; + ucp_ep_set_close_request(ep, close_req, "close"); + } else { + request = UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + } + } else { + ucp_ep_disconnected(ep, mode == UCP_EP_CLOSE_MODE_FORCE); + } + } + + UCS_ASYNC_UNBLOCK(&worker->async); + return request; +} + +ucs_status_ptr_t ucp_disconnect_nb(ucp_ep_h ep) +{ + return ucp_ep_close_nb(ep, UCP_EP_CLOSE_MODE_FLUSH); +} + +void ucp_ep_destroy(ucp_ep_h ep) +{ + ucp_worker_h worker = ep->worker; + ucs_status_ptr_t *request; + ucs_status_t status; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + request = ucp_disconnect_nb(ep); + if (request == NULL) { + goto out; + } else if (UCS_PTR_IS_ERR(request)) { + ucs_warn("disconnect failed: %s", + ucs_status_string(UCS_PTR_STATUS(request))); + goto out; + } else { + do { + ucp_worker_progress(worker); + status = ucp_request_check_status(request); + } while (status == UCS_INPROGRESS); + + ucp_request_release(request); + } + +out: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + return; +} + +int ucp_ep_config_is_equal(const ucp_ep_config_key_t *key1, + const ucp_ep_config_key_t *key2) +{ + ucp_lane_index_t lane; + int i; + + if ((key1->num_lanes != key2->num_lanes) || + memcmp(key1->rma_lanes, key2->rma_lanes, sizeof(key1->rma_lanes)) || + memcmp(key1->am_bw_lanes, key2->am_bw_lanes, sizeof(key1->am_bw_lanes)) || + memcmp(key1->rma_bw_lanes, key2->rma_bw_lanes, sizeof(key1->rma_bw_lanes)) || + memcmp(key1->amo_lanes, key2->amo_lanes, sizeof(key1->amo_lanes)) || + (key1->rma_bw_md_map != key2->rma_bw_md_map) || + (key1->reachable_md_map != key2->reachable_md_map) || + (key1->am_lane != key2->am_lane) || + (key1->tag_lane != key2->tag_lane) || + (key1->wireup_lane != key2->wireup_lane) || + (key1->cm_lane != key2->cm_lane) || + (key1->err_mode != key2->err_mode) || + (key1->status != key2->status)) + { + return 0; + } + + for (lane = 0; lane < key1->num_lanes; ++lane) { + if ((key1->lanes[lane].rsc_index != key2->lanes[lane].rsc_index) || + (key1->lanes[lane].proxy_lane != key2->lanes[lane].proxy_lane) || + (key1->lanes[lane].dst_md_index != key2->lanes[lane].dst_md_index)) + { + return 0; + } + } + + for (i = 0; i < ucs_popcount(key1->reachable_md_map); ++i) { + if (key1->dst_md_cmpts[i] != key2->dst_md_cmpts[i]) { + return 0; + } + } + + return 1; +} + +static void ucp_ep_config_calc_params(ucp_worker_h worker, + const ucp_ep_config_t *config, + const ucp_lane_index_t *lanes, + ucp_ep_thresh_params_t *params) +{ + ucp_context_h context = worker->context; + ucp_md_map_t md_map = 0; + ucp_lane_index_t lane; + ucp_rsc_index_t rsc_index; + ucp_md_index_t md_index; + uct_md_attr_t *md_attr; + uct_iface_attr_t *iface_attr; + ucp_worker_iface_t *wiface; + int i; + + memset(params, 0, sizeof(*params)); + + for (i = 0; (i < UCP_MAX_LANES) && (lanes[i] != UCP_NULL_LANE); i++) { + lane = lanes[i]; + rsc_index = config->key.lanes[lane].rsc_index; + md_index = config->md_index[lane]; + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + + if (!(md_map & UCS_BIT(md_index))) { + md_map |= UCS_BIT(md_index); + md_attr = &context->tl_mds[md_index].attr; + if (md_attr->cap.flags & UCT_MD_FLAG_REG) { + params->reg_growth += md_attr->reg_cost.growth; + params->reg_overhead += md_attr->reg_cost.overhead; + params->overhead += iface_attr->overhead; + params->latency += ucp_tl_iface_latency(context, iface_attr); + } + } + wiface = ucp_worker_iface(worker, rsc_index); + params->bw += ucp_tl_iface_bandwidth(context, &wiface->attr.bandwidth); + } +} + +static size_t ucp_ep_config_calc_rndv_thresh(ucp_worker_t *worker, + const ucp_ep_config_t *config, + const ucp_lane_index_t *eager_lanes, + const ucp_lane_index_t *rndv_lanes, + int recv_reg_cost) +{ + ucp_context_h context = worker->context; + double diff_percent = 1.0 - context->config.ext.rndv_perf_diff / 100.0; + ucp_ep_thresh_params_t eager_zcopy; + ucp_ep_thresh_params_t rndv; + double numerator, denumerator; + ucp_rsc_index_t eager_rsc_index; + uct_iface_attr_t *eager_iface_attr; + double rts_latency; + + /* All formulas and descriptions are listed at + * https://github.com/openucx/ucx/wiki/Rendezvous-Protocol-threshold-for-multilane-mode */ + + ucp_ep_config_calc_params(worker, config, eager_lanes, &eager_zcopy); + ucp_ep_config_calc_params(worker, config, rndv_lanes, &rndv); + + if (!eager_zcopy.bw || !rndv.bw) { + goto fallback; + } + + eager_rsc_index = config->key.lanes[eager_lanes[0]].rsc_index; + eager_iface_attr = ucp_worker_iface_get_attr(worker, eager_rsc_index); + + /* RTS/RTR latency is used from lanes[0] */ + rts_latency = ucp_tl_iface_latency(context, eager_iface_attr); + + numerator = diff_percent * (rndv.reg_overhead * (1 + recv_reg_cost) + + (2 * rts_latency) + (2 * rndv.latency) + + (2 * eager_zcopy.overhead) + rndv.overhead) - + eager_zcopy.reg_overhead - eager_zcopy.overhead; + + denumerator = eager_zcopy.reg_growth + + 1.0 / ucs_min(eager_zcopy.bw, context->config.ext.bcopy_bw) - + diff_percent * + (rndv.reg_growth * (1 + recv_reg_cost) + 1.0 / rndv.bw); + + if ((numerator > 0) && (denumerator > 0)) { + return ucs_max(numerator / denumerator, eager_iface_attr->cap.am.max_bcopy); + } + +fallback: + return context->config.ext.rndv_thresh_fallback; +} + +static size_t ucp_ep_thresh(size_t thresh_value, size_t min_value, + size_t max_value) +{ + size_t thresh; + + ucs_assert(min_value <= max_value); + + thresh = ucs_max(min_value, thresh_value); + thresh = ucs_min(max_value, thresh); + + return thresh; +} + +static void ucp_ep_config_adjust_max_short(ssize_t *max_short, + size_t thresh) +{ + *max_short = ucs_min((size_t)(*max_short + 1), thresh) - 1; + ucs_assert(*max_short >= -1); +} + +/* With tag offload, SW RNDV requests are temporarily stored in the receiver + * user buffer when matched. Thus, minimum message size allowed to be sent with + * RNDV protocol should be bigger than maximal possible SW RNDV request + * (i.e. header plus packed keys size). */ +size_t ucp_ep_tag_offload_min_rndv_thresh(ucp_ep_config_t *config) +{ + return sizeof(ucp_rndv_rts_hdr_t) + config->tag.rndv.rkey_size; +} + +static void ucp_ep_config_set_am_rndv_thresh(ucp_worker_h worker, + uct_iface_attr_t *iface_attr, + uct_md_attr_t *md_attr, + ucp_ep_config_t *config, + size_t min_rndv_thresh, + size_t max_rndv_thresh) +{ + ucp_context_h context = worker->context; + size_t rndv_thresh, rndv_nbr_thresh, min_thresh; + + ucs_assert(config->key.am_lane != UCP_NULL_LANE); + ucs_assert(config->key.lanes[config->key.am_lane].rsc_index != UCP_NULL_RESOURCE); + + if (!ucp_ep_config_test_rndv_support(config)) { + /* Disable RNDV */ + rndv_thresh = rndv_nbr_thresh = SIZE_MAX; + } else if (context->config.ext.rndv_thresh == UCS_MEMUNITS_AUTO) { + /* auto - Make UCX calculate the AM rndv threshold on its own.*/ + rndv_thresh = ucp_ep_config_calc_rndv_thresh(worker, config, + config->key.am_bw_lanes, + config->key.am_bw_lanes, + 0); + rndv_nbr_thresh = context->config.ext.rndv_send_nbr_thresh; + ucs_trace("active message rendezvous threshold is %zu", rndv_thresh); + } else { + rndv_thresh = context->config.ext.rndv_thresh; + rndv_nbr_thresh = context->config.ext.rndv_thresh; + + /* adjust max_short if rndv_thresh is set externally */ + ucp_ep_config_adjust_max_short(&config->tag.eager.max_short, + rndv_thresh); + } + + min_thresh = ucs_max(iface_attr->cap.am.min_zcopy, min_rndv_thresh); + + config->tag.rndv.am_thresh = ucp_ep_thresh(rndv_thresh, + min_thresh, + max_rndv_thresh); + + config->tag.rndv_send_nbr.am_thresh = ucp_ep_thresh(rndv_nbr_thresh, + min_thresh, + max_rndv_thresh); + + ucs_trace("Active Message rndv threshold is %zu (send_nbr: %zu)", + config->tag.rndv.am_thresh, config->tag.rndv_send_nbr.am_thresh); +} + +static void ucp_ep_config_set_rndv_thresh(ucp_worker_t *worker, + ucp_ep_config_t *config, + ucp_lane_index_t *lanes, + size_t min_rndv_thresh, + size_t max_rndv_thresh) +{ + ucp_context_t *context = worker->context; + ucp_lane_index_t lane = lanes[0]; + ucp_rsc_index_t rsc_index; + size_t rndv_thresh, rndv_nbr_thresh, min_thresh; + uct_iface_attr_t *iface_attr; + + if (lane == UCP_NULL_LANE) { + ucs_debug("rendezvous (get_zcopy) protocol is not supported"); + return; + } + + rsc_index = config->key.lanes[lane].rsc_index; + if (rsc_index == UCP_NULL_RESOURCE) { + return; + } + + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + + if (!ucp_ep_config_test_rndv_support(config)) { + /* Disable RNDV */ + rndv_thresh = rndv_nbr_thresh = SIZE_MAX; + } else if (context->config.ext.rndv_thresh == UCS_MEMUNITS_AUTO) { + /* auto - Make UCX calculate the RMA (get_zcopy) rndv threshold on its own.*/ + rndv_thresh = ucp_ep_config_calc_rndv_thresh(worker, config, + config->key.am_bw_lanes, + lanes, 1); + rndv_nbr_thresh = context->config.ext.rndv_send_nbr_thresh; + } else { + rndv_thresh = context->config.ext.rndv_thresh; + rndv_nbr_thresh = context->config.ext.rndv_thresh; + + /* adjust max_short if rndv_thresh is set externally */ + ucp_ep_config_adjust_max_short(&config->tag.eager.max_short, + rndv_thresh); + } + + min_thresh = ucs_max(iface_attr->cap.get.min_zcopy, min_rndv_thresh); + + /* TODO: need to check minimal PUT Zcopy */ + config->tag.rndv.rma_thresh = ucp_ep_thresh(rndv_thresh, + min_thresh, + max_rndv_thresh); + + config->tag.rndv_send_nbr.rma_thresh = ucp_ep_thresh(rndv_nbr_thresh, + min_thresh, + max_rndv_thresh); + + ucs_trace("rndv threshold is %zu (send_nbr: %zu)", + config->tag.rndv.rma_thresh, config->tag.rndv_send_nbr.rma_thresh); +} + +static void ucp_ep_config_set_memtype_thresh(ucp_memtype_thresh_t *max_eager_short, + ssize_t max_short, int num_mem_type_mds) +{ + if (!num_mem_type_mds) { + max_eager_short->memtype_off = max_short; + } + + max_eager_short->memtype_on = max_short; +} + +static void ucp_ep_config_init_attrs(ucp_worker_t *worker, ucp_rsc_index_t rsc_index, + ucp_ep_msg_config_t *config, size_t max_short, + size_t max_bcopy, size_t max_zcopy, + size_t max_iov, uint64_t short_flag, + uint64_t bcopy_flag, uint64_t zcopy_flag, + unsigned hdr_len, size_t adjust_min_val) +{ + ucp_context_t *context = worker->context; + const uct_md_attr_t *md_attr; + uct_iface_attr_t *iface_attr; + size_t it; + size_t zcopy_thresh; + int mem_type; + + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + + if ((iface_attr->cap.flags & short_flag)) { + config->max_short = max_short - hdr_len; + } else { + config->max_short = -1; + } + + if (iface_attr->cap.flags & bcopy_flag) { + config->max_bcopy = max_bcopy; + } else { + config->max_bcopy = SIZE_MAX; + } + + md_attr = &context->tl_mds[context->tl_rscs[rsc_index].md_index].attr; + if (!(iface_attr->cap.flags & zcopy_flag) || + ((md_attr->cap.flags & UCT_MD_FLAG_NEED_MEMH) && + !(md_attr->cap.flags & UCT_MD_FLAG_REG))) { + return; + } + + config->max_zcopy = max_zcopy; + config->max_iov = ucs_min(UCP_MAX_IOV, max_iov); + + if (context->config.ext.zcopy_thresh == UCS_MEMUNITS_AUTO) { + config->zcopy_auto_thresh = 1; + for (it = 0; it < UCP_MAX_IOV; ++it) { + zcopy_thresh = ucp_ep_config_get_zcopy_auto_thresh(it + 1, + &md_attr->reg_cost, + context, + ucp_tl_iface_bandwidth(context, + &iface_attr->bandwidth)); + zcopy_thresh = ucs_min(zcopy_thresh, adjust_min_val); + config->sync_zcopy_thresh[it] = zcopy_thresh; + config->zcopy_thresh[it] = zcopy_thresh; + } + } else { + config->zcopy_auto_thresh = 0; + config->sync_zcopy_thresh[0] = config->zcopy_thresh[0] = + ucs_min(context->config.ext.zcopy_thresh, adjust_min_val); + + /* adjust max_short if zcopy_thresh is set externally */ + ucp_ep_config_adjust_max_short(&config->max_short, + config->zcopy_thresh[0]); + } + + for (mem_type = 0; mem_type < UCS_MEMORY_TYPE_LAST; mem_type++) { + if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type)) { + config->mem_type_zcopy_thresh[mem_type] = config->zcopy_thresh[0]; + } else if (md_attr->cap.reg_mem_types & UCS_BIT(mem_type)) { + config->mem_type_zcopy_thresh[mem_type] = 1; + } + } +} + +static ucs_status_t ucp_ep_config_key_copy(ucp_ep_config_key_t *dst, + const ucp_ep_config_key_t *src) +{ + *dst = *src; + dst->dst_md_cmpts = ucs_calloc(ucs_popcount(src->reachable_md_map), + sizeof(*dst->dst_md_cmpts), + "ucp_dst_md_cmpts"); + if (dst->dst_md_cmpts == NULL) { + ucs_error("failed to allocate ucp_ep dest component list"); + return UCS_ERR_NO_MEMORY; + } + + memcpy(dst->dst_md_cmpts, src->dst_md_cmpts, + ucs_popcount(src->reachable_md_map) * sizeof(*dst->dst_md_cmpts)); + return UCS_OK; +} + +ucs_status_t ucp_ep_config_init(ucp_worker_h worker, ucp_ep_config_t *config, + const ucp_ep_config_key_t *key) +{ + ucp_context_h context = worker->context; + ucp_lane_index_t tag_lanes[2] = {UCP_NULL_LANE, UCP_NULL_LANE}; + ucp_lane_index_t get_zcopy_lane_count; + ucp_lane_index_t put_zcopy_lane_count; + ucp_ep_rma_config_t *rma_config; + uct_iface_attr_t *iface_attr; + uct_md_attr_t *md_attr; + ucs_memory_type_t mem_type; + ucp_rsc_index_t rsc_index; + ucp_lane_index_t lane; + size_t it; + size_t max_rndv_thresh; + size_t max_am_rndv_thresh; + size_t min_rndv_thresh; + size_t min_am_rndv_thresh; + ucs_status_t status; + double rndv_max_bw; + int i; + + memset(config, 0, sizeof(*config)); + + status = ucp_ep_config_key_copy(&config->key, key); + if (status != UCS_OK) { + return status; + } + + /* Default settings */ + for (it = 0; it < UCP_MAX_IOV; ++it) { + config->am.zcopy_thresh[it] = SIZE_MAX; + config->am.sync_zcopy_thresh[it] = SIZE_MAX; + config->tag.eager.zcopy_thresh[it] = SIZE_MAX; + config->tag.eager.sync_zcopy_thresh[it] = SIZE_MAX; + } + + UCS_STATIC_ASSERT(UCS_MEMORY_TYPE_HOST == 0); + for (mem_type = UCS_MEMORY_TYPE_HOST; mem_type < UCS_MEMORY_TYPE_LAST; mem_type++) { + config->am.mem_type_zcopy_thresh[mem_type] = SIZE_MAX; + config->tag.eager.mem_type_zcopy_thresh[mem_type] = SIZE_MAX; + } + + config->tag.eager.zcopy_auto_thresh = 0; + config->am.zcopy_auto_thresh = 0; + config->p2p_lanes = 0; + config->bcopy_thresh = context->config.ext.bcopy_thresh; + config->tag.lane = UCP_NULL_LANE; + config->tag.proto = &ucp_tag_eager_proto; + config->tag.sync_proto = &ucp_tag_eager_sync_proto; + config->tag.rndv.rma_thresh = SIZE_MAX; + config->tag.rndv.min_get_zcopy = 0; + config->tag.rndv.max_get_zcopy = SIZE_MAX; + config->tag.rndv.min_put_zcopy = 0; + config->tag.rndv.max_put_zcopy = SIZE_MAX; + config->tag.rndv.am_thresh = SIZE_MAX; + config->tag.rndv_send_nbr.am_thresh = SIZE_MAX; + config->tag.rndv_send_nbr.rma_thresh = SIZE_MAX; + config->tag.rndv.rkey_size = ucp_rkey_packed_size(context, + config->key.rma_bw_md_map); + for (lane = 0; lane < UCP_MAX_LANES; ++lane) { + config->tag.rndv.get_zcopy_lanes[lane] = UCP_NULL_LANE; + config->tag.rndv.put_zcopy_lanes[lane] = UCP_NULL_LANE; + } + + config->tag.rndv.rkey_ptr_dst_mds = 0; + config->stream.proto = &ucp_stream_am_proto; + config->am_u.proto = &ucp_am_proto; + config->am_u.reply_proto = &ucp_am_reply_proto; + max_rndv_thresh = SIZE_MAX; + max_am_rndv_thresh = SIZE_MAX; + min_am_rndv_thresh = 0; + + config->tag.offload.max_eager_short.memtype_on = -1; + config->tag.offload.max_eager_short.memtype_off = -1; + config->tag.max_eager_short.memtype_on = -1; + config->tag.max_eager_short.memtype_off = -1; + + for (lane = 0; lane < config->key.num_lanes; ++lane) { + rsc_index = config->key.lanes[lane].rsc_index; + if (rsc_index != UCP_NULL_RESOURCE) { + config->md_index[lane] = context->tl_rscs[rsc_index].md_index; + if (ucp_worker_is_tl_p2p(worker, rsc_index)) { + config->p2p_lanes |= UCS_BIT(lane); + } + } else { + config->md_index[lane] = UCP_NULL_RESOURCE; + } + } + + /* configuration for rndv */ + get_zcopy_lane_count = 0; + put_zcopy_lane_count = 0; + rndv_max_bw = 0; + for (i = 0; (i < config->key.num_lanes) && + (config->key.rma_bw_lanes[i] != UCP_NULL_LANE); ++i) { + lane = config->key.rma_bw_lanes[i]; + rsc_index = config->key.lanes[lane].rsc_index; + + if (rsc_index != UCP_NULL_RESOURCE) { + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + md_attr = &context->tl_mds[context->tl_rscs[rsc_index].md_index].attr; + + /* Rkey_ptr */ + if (md_attr->cap.flags & UCT_MD_FLAG_RKEY_PTR) { + config->tag.rndv.rkey_ptr_dst_mds |= + UCS_BIT(config->key.lanes[lane].dst_md_index); + } + + /* GET Zcopy */ + if (iface_attr->cap.flags & UCT_IFACE_FLAG_GET_ZCOPY) { + config->tag.rndv.min_get_zcopy = ucs_max(config->tag.rndv.min_get_zcopy, + iface_attr->cap.get.min_zcopy); + + config->tag.rndv.max_get_zcopy = ucs_min(config->tag.rndv.max_get_zcopy, + iface_attr->cap.get.max_zcopy); + ucs_assert(get_zcopy_lane_count < UCP_MAX_LANES); + config->tag.rndv.get_zcopy_lanes[get_zcopy_lane_count++] = lane; + } + + /* PUT Zcopy */ + if (iface_attr->cap.flags & UCT_IFACE_FLAG_PUT_ZCOPY) { + config->tag.rndv.min_put_zcopy = ucs_max(config->tag.rndv.min_put_zcopy, + iface_attr->cap.put.min_zcopy); + + config->tag.rndv.max_put_zcopy = ucs_min(config->tag.rndv.max_put_zcopy, + iface_attr->cap.put.max_zcopy); + ucs_assert(put_zcopy_lane_count < UCP_MAX_LANES); + config->tag.rndv.put_zcopy_lanes[put_zcopy_lane_count++] = lane; + } + + rndv_max_bw = ucs_max(rndv_max_bw, + ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth)); + } + } + + if (get_zcopy_lane_count == 0) { + /* if there are no RNDV RMA BW lanes that support GET Zcopy, reset + * min/max values to show that the scheme is unsupported */ + config->tag.rndv.min_get_zcopy = SIZE_MAX; + config->tag.rndv.max_get_zcopy = 0; + } + + if (put_zcopy_lane_count == 0) { + /* if there are no RNDV RMA BW lanes that support PUT Zcopy, reset + * min/max values to show that the scheme is unsupported */ + config->tag.rndv.min_put_zcopy = SIZE_MAX; + config->tag.rndv.max_put_zcopy = 0; + } + + if (rndv_max_bw > 0) { + for (i = 0; (i < config->key.num_lanes) && + (config->key.rma_bw_lanes[i] != UCP_NULL_LANE); ++i) { + lane = config->key.rma_bw_lanes[i]; + rsc_index = config->key.lanes[lane].rsc_index; + + if (rsc_index != UCP_NULL_RESOURCE) { + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + config->tag.rndv.scale[lane] = + ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth) / + rndv_max_bw; + } + } + } + + /* Configuration for tag offload */ + if (config->key.tag_lane != UCP_NULL_LANE) { + lane = config->key.tag_lane; + rsc_index = config->key.lanes[lane].rsc_index; + if (rsc_index != UCP_NULL_RESOURCE) { + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + ucp_ep_config_init_attrs(worker, rsc_index, &config->tag.eager, + iface_attr->cap.tag.eager.max_short, + iface_attr->cap.tag.eager.max_bcopy, + iface_attr->cap.tag.eager.max_zcopy, + iface_attr->cap.tag.eager.max_iov, + UCT_IFACE_FLAG_TAG_EAGER_SHORT, + UCT_IFACE_FLAG_TAG_EAGER_BCOPY, + UCT_IFACE_FLAG_TAG_EAGER_ZCOPY, 0, + iface_attr->cap.tag.eager.max_bcopy); + + config->tag.offload.max_rndv_iov = iface_attr->cap.tag.rndv.max_iov; + config->tag.offload.max_rndv_zcopy = iface_attr->cap.tag.rndv.max_zcopy; + config->tag.sync_proto = &ucp_tag_offload_sync_proto; + config->tag.proto = &ucp_tag_offload_proto; + config->tag.lane = lane; + max_rndv_thresh = iface_attr->cap.tag.eager.max_zcopy; + max_am_rndv_thresh = iface_attr->cap.tag.eager.max_bcopy; + min_rndv_thresh = ucp_ep_tag_offload_min_rndv_thresh(config); + min_am_rndv_thresh = min_rndv_thresh; + + ucs_assert_always(iface_attr->cap.tag.rndv.max_hdr >= + sizeof(ucp_tag_offload_unexp_rndv_hdr_t)); + + if (config->key.am_lane != UCP_NULL_LANE) { + /* Must have active messages for using rendezvous */ + tag_lanes[0] = lane; + ucp_ep_config_set_rndv_thresh(worker, config, tag_lanes, + min_rndv_thresh, max_rndv_thresh); + } + + /* Max Eager short has to be set after Zcopy and RNDV thresholds */ + ucp_ep_config_set_memtype_thresh(&config->tag.offload.max_eager_short, + config->tag.eager.max_short, + context->num_mem_type_detect_mds); + } + } + + /* Configuration for active messages */ + if (config->key.am_lane != UCP_NULL_LANE) { + lane = config->key.am_lane; + rsc_index = config->key.lanes[lane].rsc_index; + if (rsc_index != UCP_NULL_RESOURCE) { + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + md_attr = &context->tl_mds[context->tl_rscs[rsc_index].md_index].attr; + ucp_ep_config_init_attrs(worker, rsc_index, &config->am, + iface_attr->cap.am.max_short, + iface_attr->cap.am.max_bcopy, + iface_attr->cap.am.max_zcopy, + iface_attr->cap.am.max_iov, + UCT_IFACE_FLAG_AM_SHORT, + UCT_IFACE_FLAG_AM_BCOPY, + UCT_IFACE_FLAG_AM_ZCOPY, + sizeof(ucp_eager_hdr_t), SIZE_MAX); + + /* All keys must fit in RNDV packet. + * TODO remove some MDs if they don't + */ + ucs_assert_always(config->tag.rndv.rkey_size <= config->am.max_bcopy); + + if (!ucp_ep_is_tag_offload_enabled(config)) { + /* Tag offload is disabled, AM will be used for all + * tag-matching protocols */ + /* TODO: set threshold level based on all available lanes */ + + config->tag.eager = config->am; + config->tag.lane = lane; + min_rndv_thresh = iface_attr->cap.get.min_zcopy; + min_am_rndv_thresh = iface_attr->cap.am.min_zcopy; + + ucp_ep_config_set_rndv_thresh(worker, config, + config->key.rma_bw_lanes, + min_rndv_thresh, + max_rndv_thresh); + + /* Max Eager short has to be set after Zcopy and RNDV thresholds */ + ucp_ep_config_set_memtype_thresh(&config->tag.max_eager_short, + config->tag.eager.max_short, + context->num_mem_type_detect_mds); + } + + /* Calculate rndv threshold for AM Rendezvous, which may be used by + * any tag-matching protocol (AM and offload). */ + ucp_ep_config_set_am_rndv_thresh(worker, iface_attr, md_attr, config, + min_am_rndv_thresh, + max_am_rndv_thresh); + } else { + /* Stub endpoint */ + config->am.max_bcopy = UCP_MIN_BCOPY; + } + } + + memset(&config->rma, 0, sizeof(config->rma)); + + /* Configuration for remote memory access */ + for (lane = 0; lane < config->key.num_lanes; ++lane) { + rma_config = &config->rma[lane]; + rma_config->put_zcopy_thresh = SIZE_MAX; + rma_config->get_zcopy_thresh = SIZE_MAX; + rma_config->max_put_short = SIZE_MAX; + rma_config->max_get_short = SIZE_MAX; + rma_config->max_put_bcopy = SIZE_MAX; + rma_config->max_get_bcopy = SIZE_MAX; + + if (ucp_ep_config_get_multi_lane_prio(config->key.rma_lanes, lane) == -1) { + continue; + } + + rsc_index = config->key.lanes[lane].rsc_index; + + if (rsc_index != UCP_NULL_RESOURCE) { + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + /* PUT */ + if (iface_attr->cap.flags & UCT_IFACE_FLAG_PUT_ZCOPY) { + rma_config->max_put_zcopy = iface_attr->cap.put.max_zcopy; + /* TODO: formula */ + if (context->config.ext.zcopy_thresh == UCS_MEMUNITS_AUTO) { + rma_config->put_zcopy_thresh = 16384; + } else { + rma_config->put_zcopy_thresh = context->config.ext.zcopy_thresh; + } + rma_config->put_zcopy_thresh = ucs_max(rma_config->put_zcopy_thresh, + iface_attr->cap.put.min_zcopy); + } + if (iface_attr->cap.flags & UCT_IFACE_FLAG_PUT_BCOPY) { + rma_config->max_put_bcopy = ucs_min(iface_attr->cap.put.max_bcopy, + rma_config->put_zcopy_thresh); + } + if (iface_attr->cap.flags & UCT_IFACE_FLAG_PUT_SHORT) { + rma_config->max_put_short = ucs_min(iface_attr->cap.put.max_short, + rma_config->max_put_bcopy); + } + + /* GET */ + if (iface_attr->cap.flags & UCT_IFACE_FLAG_GET_ZCOPY) { + /* TODO: formula */ + rma_config->max_get_zcopy = iface_attr->cap.get.max_zcopy; + if (context->config.ext.zcopy_thresh == UCS_MEMUNITS_AUTO) { + rma_config->get_zcopy_thresh = 16384; + } else { + rma_config->get_zcopy_thresh = context->config.ext.zcopy_thresh; + } + rma_config->get_zcopy_thresh = ucs_max(rma_config->get_zcopy_thresh, + iface_attr->cap.get.min_zcopy); + } + if (iface_attr->cap.flags & UCT_IFACE_FLAG_GET_BCOPY) { + rma_config->max_get_bcopy = ucs_min(iface_attr->cap.get.max_bcopy, + rma_config->get_zcopy_thresh); + } + if (iface_attr->cap.flags & UCT_IFACE_FLAG_GET_SHORT) { + rma_config->max_get_short = ucs_min(iface_attr->cap.get.max_short, + rma_config->max_get_bcopy); + } + } else { + rma_config->max_put_bcopy = UCP_MIN_BCOPY; /* Stub endpoint */ + } + } + + return UCS_OK; +} + +void ucp_ep_config_cleanup(ucp_worker_h worker, ucp_ep_config_t *config) +{ + ucs_free(config->key.dst_md_cmpts); +} + +static int ucp_ep_is_short_lower_thresh(ssize_t max_short, + size_t thresh) +{ + return ((max_short < 0) || + (((size_t)max_short + 1) < thresh)); +} + +static void ucp_ep_config_print_tag_proto(FILE *stream, const char *name, + ssize_t max_eager_short, + size_t zcopy_thresh, + size_t rndv_rma_thresh, + size_t rndv_am_thresh) +{ + size_t max_bcopy, min_rndv, max_short; + + min_rndv = ucs_min(rndv_rma_thresh, rndv_am_thresh); + max_bcopy = ucs_min(zcopy_thresh, min_rndv); + + fprintf(stream, "# %23s: 0", name); + + /* print eager short */ + if (max_eager_short > 0) { + max_short = max_eager_short; + ucs_assert(max_short <= SSIZE_MAX); + fprintf(stream, "....%zu" , max_short + 1); + } else if (!max_eager_short) { + fprintf(stream, "....%zu" , max_eager_short); + } + + /* print eager bcopy */ + if (ucp_ep_is_short_lower_thresh(max_eager_short, max_bcopy) && max_bcopy) { + fprintf(stream, "...."); + if (max_bcopy < SIZE_MAX) { + fprintf(stream, "%zu", max_bcopy); + } + } + + /* print eager zcopy */ + if (ucp_ep_is_short_lower_thresh(max_eager_short, min_rndv) && + (zcopy_thresh < min_rndv)) { + fprintf(stream, "...."); + if (min_rndv < SIZE_MAX) { + fprintf(stream, "%zu", min_rndv); + } + } + + /* print rendezvous */ + if (min_rndv < SIZE_MAX) { + fprintf(stream, "...."); + } + + fprintf(stream, "(inf)\n"); +} + +static void ucp_ep_config_print_rma_proto(FILE *stream, const char *name, + ucp_lane_index_t lane, + size_t bcopy_thresh, size_t zcopy_thresh) +{ + + fprintf(stream, "# %20s[%d]: 0", name, lane); + if (bcopy_thresh > 0) { + fprintf(stream, ".."); + } + if (bcopy_thresh < zcopy_thresh) { + if (bcopy_thresh > 0) { + fprintf(stream, "..%zu", bcopy_thresh); + } + fprintf(stream, ".."); + } + if (zcopy_thresh < SIZE_MAX) { + fprintf(stream, "..%zu..", zcopy_thresh); + } + fprintf(stream, "..(inf)\n"); +} + +int ucp_ep_config_get_multi_lane_prio(const ucp_lane_index_t *lanes, + ucp_lane_index_t lane) +{ + int prio; + for (prio = 0; prio < UCP_MAX_LANES; ++prio) { + if (lane == lanes[prio]) { + return prio; + } + } + return -1; +} + +void ucp_ep_config_lane_info_str(ucp_context_h context, + const ucp_ep_config_key_t *key, + const unsigned *addr_indices, + ucp_lane_index_t lane, + ucp_rsc_index_t aux_rsc_index, + char *buf, size_t max) +{ + uct_tl_resource_desc_t *rsc; + ucp_rsc_index_t rsc_index; + ucp_lane_index_t proxy_lane; + ucp_md_index_t dst_md_index; + ucp_rsc_index_t cmpt_index; + char *p, *endp; + char *desc_str; + int prio; + + p = buf; + endp = buf + max; + rsc_index = key->lanes[lane].rsc_index; + proxy_lane = key->lanes[lane].proxy_lane; + rsc = &context->tl_rscs[rsc_index].tl_rsc; + + if ((proxy_lane == lane) || (proxy_lane == UCP_NULL_LANE)) { + if (key->lanes[lane].proxy_lane == lane) { + desc_str = " "; + } else { + desc_str = ""; + } + snprintf(p, endp - p, "lane[%d]: %2d:" UCT_TL_RESOURCE_DESC_FMT " md[%d]%s %-*c-> ", + lane, rsc_index, UCT_TL_RESOURCE_DESC_ARG(rsc), + context->tl_rscs[rsc_index].md_index, desc_str, + 20 - (int)(strlen(rsc->dev_name) + strlen(rsc->tl_name) + strlen(desc_str)), + ' '); + p += strlen(p); + + if (addr_indices != NULL) { + snprintf(p, endp - p, "addr[%d].", addr_indices[lane]); + p += strlen(p); + } + + } else { + snprintf(p, endp - p, "lane[%d]: proxy to lane[%d] %12c -> ", lane, + proxy_lane, ' '); + p += strlen(p); + } + + dst_md_index = key->lanes[lane].dst_md_index; + cmpt_index = ucp_ep_config_get_dst_md_cmpt(key, dst_md_index); + snprintf(p, endp - p, "md[%d]/%-8s", dst_md_index, + context->tl_cmpts[cmpt_index].attr.name); + p += strlen(p); + + prio = ucp_ep_config_get_multi_lane_prio(key->rma_lanes, lane); + if (prio != -1) { + snprintf(p, endp - p, " rma#%d", prio); + p += strlen(p); + } + + prio = ucp_ep_config_get_multi_lane_prio(key->rma_bw_lanes, lane); + if (prio != -1) { + snprintf(p, endp - p, " rma_bw#%d", prio); + p += strlen(p); + } + + prio = ucp_ep_config_get_multi_lane_prio(key->amo_lanes, lane); + if (prio != -1) { + snprintf(p, endp - p, " amo#%d", prio); + p += strlen(p); + } + + if (key->am_lane == lane) { + snprintf(p, endp - p, " am"); + p += strlen(p); + } + + prio = ucp_ep_config_get_multi_lane_prio(key->am_bw_lanes, lane); + if (prio != -1) { + snprintf(p, endp - p, " am_bw#%d", prio); + p += strlen(p); + } + + if (lane == key->tag_lane) { + snprintf(p, endp - p, " tag_offload"); + p += strlen(p); + } + + if (key->wireup_lane == lane) { + snprintf(p, endp - p, " wireup"); + p += strlen(p); + if (aux_rsc_index != UCP_NULL_RESOURCE) { + snprintf(p, endp - p, "{" UCT_TL_RESOURCE_DESC_FMT "}", + UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[aux_rsc_index].tl_rsc)); + } + } +} + +static void ucp_ep_config_print(FILE *stream, ucp_worker_h worker, + const ucp_ep_config_t *config, + const unsigned *addr_indices, + ucp_rsc_index_t aux_rsc_index) +{ + ucp_context_h context = worker->context; + char lane_info[128] = {0}; + ucp_md_index_t md_index; + ucp_lane_index_t lane; + + for (lane = 0; lane < config->key.num_lanes; ++lane) { + ucp_ep_config_lane_info_str(context, &config->key, addr_indices, lane, + aux_rsc_index, lane_info, sizeof(lane_info)); + fprintf(stream, "# %s\n", lane_info); + } + fprintf(stream, "#\n"); + + if (context->config.features & UCP_FEATURE_TAG) { + ucp_ep_config_print_tag_proto(stream, "tag_send", + config->tag.eager.max_short, + config->tag.eager.zcopy_thresh[0], + config->tag.rndv.rma_thresh, + config->tag.rndv.am_thresh); + ucp_ep_config_print_tag_proto(stream, "tag_send_nbr", + config->tag.eager.max_short, + /* disable zcopy */ + ucs_min(config->tag.rndv_send_nbr.rma_thresh, + config->tag.rndv_send_nbr.am_thresh), + config->tag.rndv_send_nbr.rma_thresh, + config->tag.rndv_send_nbr.am_thresh); + ucp_ep_config_print_tag_proto(stream, "tag_send_sync", + config->tag.eager.max_short, + config->tag.eager.sync_zcopy_thresh[0], + config->tag.rndv.rma_thresh, + config->tag.rndv.am_thresh); + } + + if (context->config.features & UCP_FEATURE_RMA) { + for (lane = 0; lane < config->key.num_lanes; ++lane) { + if (ucp_ep_config_get_multi_lane_prio(config->key.rma_lanes, lane) == -1) { + continue; + } + ucp_ep_config_print_rma_proto(stream, "put", lane, + ucs_max(config->rma[lane].max_put_short + 1, + config->bcopy_thresh), + config->rma[lane].put_zcopy_thresh); + ucp_ep_config_print_rma_proto(stream, "get", lane, 0, + config->rma[lane].get_zcopy_thresh); + } + } + + if (context->config.features & (UCP_FEATURE_TAG|UCP_FEATURE_RMA)) { + fprintf(stream, "#\n"); + fprintf(stream, "# %23s: mds ", "rma_bw"); + ucs_for_each_bit(md_index, config->key.rma_bw_md_map) { + fprintf(stream, "[%d] ", md_index); + } + } + + if (context->config.features & UCP_FEATURE_TAG) { + fprintf(stream, "rndv_rkey_size %zu\n", config->tag.rndv.rkey_size); + } +} + +void ucp_ep_print_info(ucp_ep_h ep, FILE *stream) +{ + ucp_rsc_index_t aux_rsc_index; + ucp_lane_index_t wireup_lane; + uct_ep_h wireup_ep; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + fprintf(stream, "#\n"); + fprintf(stream, "# UCP endpoint\n"); + fprintf(stream, "#\n"); + fprintf(stream, "# peer: %s\n", ucp_ep_peer_name(ep)); + + /* if there is a wireup lane, set aux_rsc_index to the stub ep resource */ + aux_rsc_index = UCP_NULL_RESOURCE; + wireup_lane = ucp_ep_config(ep)->key.wireup_lane; + if (wireup_lane != UCP_NULL_LANE) { + wireup_ep = ep->uct_eps[wireup_lane]; + if (ucp_wireup_ep_test(wireup_ep)) { + aux_rsc_index = ucp_wireup_ep_get_aux_rsc_index(wireup_ep); + } + } + + ucp_ep_config_print(stream, ep->worker, ucp_ep_config(ep), NULL, + aux_rsc_index); + + fprintf(stream, "#\n"); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); +} + +size_t ucp_ep_config_get_zcopy_auto_thresh(size_t iovcnt, + const uct_linear_growth_t *reg_cost, + const ucp_context_h context, + double bandwidth) +{ + double zcopy_thresh; + double bcopy_bw = context->config.ext.bcopy_bw; + + zcopy_thresh = (iovcnt * reg_cost->overhead) / + ((1.0 / bcopy_bw) - (1.0 / bandwidth) - (iovcnt * reg_cost->growth)); + + if (zcopy_thresh < 0.0) { + return SIZE_MAX; + } + + return zcopy_thresh; +} + +ucp_wireup_ep_t * ucp_ep_get_cm_wireup_ep(ucp_ep_h ep) +{ + const ucp_lane_index_t lane = ucp_ep_get_cm_lane(ep); + + if (lane == UCP_NULL_LANE) { + return NULL; + } + + return ucp_wireup_ep_test(ep->uct_eps[lane]) ? + ucs_derived_of(ep->uct_eps[lane], ucp_wireup_ep_t) : NULL; +} + +uct_ep_h ucp_ep_get_cm_uct_ep(ucp_ep_h ep) +{ + ucp_lane_index_t lane; + ucp_wireup_ep_t *wireup_ep; + + lane = ucp_ep_get_cm_lane(ep); + if (lane == UCP_NULL_LANE) { + return NULL; + } + + wireup_ep = ucp_ep_get_cm_wireup_ep(ep); + return (wireup_ep == NULL) ? ep->uct_eps[lane] : wireup_ep->super.uct_ep; +} + +int ucp_ep_is_cm_local_connected(ucp_ep_h ep) +{ + return (ucp_ep_get_cm_lane(ep) != UCP_NULL_LANE) && + (ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED); +} + +uint64_t ucp_ep_get_tl_bitmap(ucp_ep_h ep) +{ + uint64_t tl_bitmap = 0; + ucp_lane_index_t lane; + ucp_rsc_index_t rsc_idx; + + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (lane == ucp_ep_get_cm_lane(ep)) { + continue; + } + + rsc_idx = ucp_ep_get_rsc_index(ep, lane); + if (rsc_idx == UCP_NULL_RESOURCE) { + continue; + } + + tl_bitmap |= UCS_BIT(rsc_idx); + } + + return tl_bitmap; +} + +void ucp_ep_invoke_err_cb(ucp_ep_h ep, ucs_status_t status) +{ + /* Do not invoke error handler if it's not enabled */ + if ((ucp_ep_config(ep)->key.err_mode == UCP_ERR_HANDLING_MODE_NONE) || + /* error callback is not set */ + (ucp_ep_ext_gen(ep)->err_cb == NULL) || + /* the EP has been closed by user */ + (ep->flags & UCP_EP_FLAG_CLOSED)) { + return; + } + + ucs_assert(ep->flags & UCP_EP_FLAG_USED); + ucs_debug("ep %p: calling user error callback %p with arg %p and status %s", + ep, ucp_ep_ext_gen(ep)->err_cb, ucp_ep_ext_gen(ep)->user_data, + ucs_status_string(status)); + ucp_ep_ext_gen(ep)->err_cb(ucp_ep_ext_gen(ep)->user_data, ep, status); +} + +int ucp_ep_config_test_rndv_support(const ucp_ep_config_t *config) +{ + return (config->key.err_mode == UCP_ERR_HANDLING_MODE_NONE) || + (config->key.cm_lane != UCP_NULL_LANE); +} diff --git a/src/ucp/core/ucp_ep.h b/src/ucp/core/ucp_ep.h new file mode 100644 index 0000000..dbf48bf --- /dev/null +++ b/src/ucp/core/ucp_ep.h @@ -0,0 +1,507 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (C) Los Alamos National Security, LLC. 2019 ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_EP_H_ +#define UCP_EP_H_ + +#include "ucp_types.h" + +#include +#include +#include +#include +#include +#include + + +#define UCP_MAX_IOV 16UL + + +/* Configuration */ +typedef uint16_t ucp_ep_cfg_index_t; + + +/* Endpoint flags type */ +#if ENABLE_DEBUG_DATA || UCS_ENABLE_ASSERT +typedef uint32_t ucp_ep_flags_t; +#else +typedef uint16_t ucp_ep_flags_t; +#endif + + +/** + * Endpoint flags + */ +enum { + UCP_EP_FLAG_LOCAL_CONNECTED = UCS_BIT(0), /* All local endpoints are connected */ + UCP_EP_FLAG_REMOTE_CONNECTED = UCS_BIT(1), /* All remote endpoints are connected */ + UCP_EP_FLAG_CONNECT_REQ_QUEUED = UCS_BIT(2), /* Connection request was queued */ + UCP_EP_FLAG_FAILED = UCS_BIT(3), /* EP is in failed state */ + UCP_EP_FLAG_USED = UCS_BIT(4), /* EP is in use by the user */ + UCP_EP_FLAG_STREAM_HAS_DATA = UCS_BIT(5), /* EP has data in the ext.stream.match_q */ + UCP_EP_FLAG_ON_MATCH_CTX = UCS_BIT(6), /* EP is on match queue */ + UCP_EP_FLAG_DEST_EP = UCS_BIT(7), /* dest_ep_ptr is valid */ + UCP_EP_FLAG_LISTENER = UCS_BIT(8), /* EP holds pointer to a listener + (on server side due to receiving partial + worker address from the client) */ + UCP_EP_FLAG_CONNECT_PRE_REQ_QUEUED = UCS_BIT(9), /* Pre-Connection request was queued */ + UCP_EP_FLAG_CLOSED = UCS_BIT(10),/* EP was closed */ + UCP_EP_FLAG_CLOSE_REQ_VALID = UCS_BIT(11),/* close protocol is started and + close_req is valid */ + + /* DEBUG bits */ + UCP_EP_FLAG_CONNECT_REQ_SENT = UCS_BIT(16),/* DEBUG: Connection request was sent */ + UCP_EP_FLAG_CONNECT_REP_SENT = UCS_BIT(17),/* DEBUG: Connection reply was sent */ + UCP_EP_FLAG_CONNECT_ACK_SENT = UCS_BIT(18),/* DEBUG: Connection ACK was sent */ + UCP_EP_FLAG_CONNECT_REQ_IGNORED = UCS_BIT(19),/* DEBUG: Connection request was ignored */ + UCP_EP_FLAG_CONNECT_PRE_REQ_SENT = UCS_BIT(20),/* DEBUG: Connection pre-request was sent */ + UCP_EP_FLAG_SOCKADDR_PARTIAL_ADDR = UCS_BIT(21),/* DEBUG: Partial worker address was sent + to the remote peer when starting + connection establishment on this EP */ + UCP_EP_FLAG_FLUSH_STATE_VALID = UCS_BIT(22) /* DEBUG: flush_state is valid */ +}; + + +/** + * UCP endpoint statistics counters + */ +enum { + UCP_EP_STAT_TAG_TX_EAGER, + UCP_EP_STAT_TAG_TX_EAGER_SYNC, + UCP_EP_STAT_TAG_TX_RNDV, + UCP_EP_STAT_LAST +}; + + +/** + * Endpoint init flags + */ +enum { + UCP_EP_INIT_FLAG_MEM_TYPE = UCS_BIT(0), /**< Endpoint for local mem type transfers */ + UCP_EP_INIT_CREATE_AM_LANE = UCS_BIT(1), /**< Endpoint requires an AM lane */ + UCP_EP_INIT_CM_WIREUP_CLIENT = UCS_BIT(2), /**< Endpoint wireup protocol is based on CM, + client side */ + UCP_EP_INIT_CM_WIREUP_SERVER = UCS_BIT(3), /**< Endpoint wireup protocol is based on CM, + server side */ + UCP_EP_INIT_ERR_MODE_PEER_FAILURE = UCS_BIT(4) /**< Endpoint requires an + @ref UCP_ERR_HANDLING_MODE_PEER */ +}; + + +#define UCP_EP_STAT_TAG_OP(_ep, _op) \ + UCS_STATS_UPDATE_COUNTER((_ep)->stats, UCP_EP_STAT_TAG_TX_##_op, 1); + + +/* + * Endpoint configuration key. + * This is filled by to the transport selection logic, according to the local + * resources and set of remote addresses. + */ +typedef struct ucp_ep_config_key { + + ucp_lane_index_t num_lanes; /* Number of active lanes */ + + struct { + ucp_rsc_index_t rsc_index; /* Resource index */ + ucp_lane_index_t proxy_lane; /* UCP_NULL_LANE - no proxy + otherwise - in which lane the real + transport endpoint is stored */ + ucp_md_index_t dst_md_index; /* Destination memory domain index */ + } lanes[UCP_MAX_LANES]; + + ucp_lane_index_t am_lane; /* Lane for AM (can be NULL) */ + ucp_lane_index_t tag_lane; /* Lane for tag matching offload (can be NULL) */ + ucp_lane_index_t wireup_lane; /* Lane for wireup messages (can be NULL) */ + ucp_lane_index_t cm_lane; /* Lane for holding a CM connection */ + + /* Lanes for remote memory access, sorted by priority, highest first */ + ucp_lane_index_t rma_lanes[UCP_MAX_LANES]; + + /* Lanes for high-bw memory access, sorted by priority, highest first */ + ucp_lane_index_t rma_bw_lanes[UCP_MAX_LANES]; + + /* Lanes for atomic operations, sorted by priority, highest first */ + ucp_lane_index_t amo_lanes[UCP_MAX_LANES]; + + /* Lanes for high-bw active messages, sorted by priority, highest first */ + ucp_lane_index_t am_bw_lanes[UCP_MAX_LANES]; + + /* Local memory domains to send remote keys for in high-bw rma protocols + * NOTE: potentially it can be different than what is imposed by rma_bw_lanes, + * since these are the MDs used by remote side for accessing our memory. */ + ucp_md_map_t rma_bw_md_map; + + /* Bitmap of remote mds which are reachable from this endpoint (with any set + * of transports which could be selected in the future). + */ + ucp_md_map_t reachable_md_map; + + /* Array with popcount(reachable_md_map) elements, each entry holds the local + * component index to be used for unpacking remote key from each set bit in + * reachable_md_map */ + ucp_rsc_index_t *dst_md_cmpts; + + /* Error handling mode */ + ucp_err_handling_mode_t err_mode; + ucs_status_t status; +} ucp_ep_config_key_t; + + +/* + * Configuration for RMA protocols + */ +typedef struct ucp_ep_rma_config { + size_t max_put_short; /* Maximal payload of put short */ + size_t max_put_bcopy; /* Maximal total size of put_bcopy */ + size_t max_put_zcopy; + size_t max_get_short; /* Maximal payload of get short */ + size_t max_get_bcopy; /* Maximal total size of get_bcopy */ + size_t max_get_zcopy; + size_t put_zcopy_thresh; + size_t get_zcopy_thresh; +} ucp_ep_rma_config_t; + + +/* + * Configuration for AM and tag offload protocols + */ +typedef struct ucp_ep_msg_config { + ssize_t max_short; + size_t max_bcopy; + size_t max_zcopy; + size_t max_iov; + + /* zero-copy threshold for operations which do not have to wait for remote side */ + size_t zcopy_thresh[UCP_MAX_IOV]; + + /* zero-copy threshold for mem type buffers */ + size_t mem_type_zcopy_thresh[UCS_MEMORY_TYPE_LAST]; + + /* zero-copy threshold for operations which anyways have to wait for remote side */ + size_t sync_zcopy_thresh[UCP_MAX_IOV]; + uint8_t zcopy_auto_thresh; /* if != 0 the zcopy enabled */ +} ucp_ep_msg_config_t; + + +/* + * Thresholds with and without non-host memory + */ +typedef struct ucp_memtype_thresh { + ssize_t memtype_on; + ssize_t memtype_off; +} ucp_memtype_thresh_t; + + +typedef struct ucp_ep_config { + + /* A key which uniquely defines the configuration, and all other fields of + * configuration (in the current worker) and defined only by it. + */ + ucp_ep_config_key_t key; + + /* Bitmap of which lanes are p2p; affects the behavior of connection + * establishment protocols. + */ + ucp_lane_map_t p2p_lanes; + + /* Configuration for each lane that provides RMA */ + ucp_ep_rma_config_t rma[UCP_MAX_LANES]; + + /* Threshold for switching from put_short to put_bcopy */ + size_t bcopy_thresh; + + /* Configuration for AM lane */ + ucp_ep_msg_config_t am; + + /* MD index of each lane */ + ucp_md_index_t md_index[UCP_MAX_LANES]; + + struct { + /* Protocols used for tag matching operations + * (can be AM based or tag offload). */ + const ucp_request_send_proto_t *proto; + const ucp_request_send_proto_t *sync_proto; + + /* Lane used for tag matching operations. */ + ucp_lane_index_t lane; + + /* Maximal size for eager short. */ + ucp_memtype_thresh_t max_eager_short; + + /* Configuration of the lane used for eager protocols + * (can be AM or tag offload). */ + ucp_ep_msg_config_t eager; + + struct { + /* Maximal total size of rndv_get_zcopy */ + size_t max_get_zcopy; + /* Minimal size of rndv_get_zcopy */ + size_t min_get_zcopy; + /* Maximal total size of rndv_put_zcopy */ + size_t max_put_zcopy; + /* Minimal size of rndv_put_zcopy */ + size_t min_put_zcopy; + /* Threshold for switching from eager to RMA based rendezvous */ + size_t rma_thresh; + /* Threshold for switching from eager to AM based rendezvous */ + size_t am_thresh; + /* Total size of packed rkey, according to high-bw md_map */ + size_t rkey_size; + /* remote memory domains which support rkey_ptr */ + ucp_md_map_t rkey_ptr_dst_mds; + /* Lanes for GET zcopy */ + ucp_lane_index_t get_zcopy_lanes[UCP_MAX_LANES]; + /* Lanes for PUT zcopy */ + ucp_lane_index_t put_zcopy_lanes[UCP_MAX_LANES]; + /* BW based scale factor */ + double scale[UCP_MAX_LANES]; + } rndv; + + /* special thresholds for the ucp_tag_send_nbr() */ + struct { + /* Threshold for switching from eager to RMA based rendezvous */ + size_t rma_thresh; + /* Threshold for switching from eager to AM based rendezvous */ + size_t am_thresh; + } rndv_send_nbr; + + struct { + /* Maximal size for eager short. */ + ucp_memtype_thresh_t max_eager_short; + + /* Maximal iov count for RNDV offload */ + size_t max_rndv_iov; + /* Maximal total size for RNDV offload */ + size_t max_rndv_zcopy; + } offload; + } tag; + + struct { + /* Protocols used for stream operations + * (currently it's only AM based). */ + const ucp_request_send_proto_t *proto; + } stream; + + struct { + /* Protocols used for am operations */ + const ucp_request_send_proto_t *proto; + const ucp_request_send_proto_t *reply_proto; + } am_u; + +} ucp_ep_config_t; + + +/** + * Protocol layer endpoint, represents a connection to a remote worker + */ +typedef struct ucp_ep { + ucp_worker_h worker; /* Worker this endpoint belongs to */ + + ucp_ep_cfg_index_t cfg_index; /* Configuration index */ + ucp_ep_conn_sn_t conn_sn; /* Sequence number for remote connection */ + ucp_lane_index_t am_lane; /* Cached value */ + ucp_ep_flags_t flags; /* Endpoint flags */ + + /* TODO allocate ep dynamically according to number of lanes */ + uct_ep_h uct_eps[UCP_MAX_LANES]; /* Transports for every lane */ + +#if ENABLE_DEBUG_DATA + char peer_name[UCP_WORKER_NAME_MAX]; +#endif + + UCS_STATS_NODE_DECLARE(stats) + +} ucp_ep_t; + + +/** + * Status of protocol-level remote completions + */ +typedef struct { + ucs_queue_head_t reqs; /* Queue of flush requests which + are waiting for remote completion */ + uint32_t send_sn; /* Sequence number of sent operations */ + uint32_t cmpl_sn; /* Sequence number of completions */ +} ucp_ep_flush_state_t; + + +/** + * Status of protocol-level remote completions + */ +typedef struct { + ucp_request_t *req; /* Flush request which is + used in close protocol */ +} ucp_ep_close_proto_req_t; + +/* + * Endpoint extension for generic non fast-path data + */ +typedef struct { + uintptr_t dest_ep_ptr; /* Remote EP pointer */ + void *user_data; /* User data associated with ep */ + ucs_list_link_t ep_list; /* List entry in worker's all eps list */ + ucp_err_handler_cb_t err_cb; /* Error handler */ + + /* Endpoint match context and remote completion status are mutually exclusive, + * since remote completions are counted only after the endpoint is already + * matched to a remote peer. + */ + union { + ucp_ep_match_t ep_match; /* Matching with remote endpoints */ + ucp_ep_flush_state_t flush_state; /* Remove completion status */ + ucp_listener_h listener; /* Listener that may be associated with ep */ + ucp_ep_close_proto_req_t close_req; /* Close protocol request */ + }; +} ucp_ep_ext_gen_t; + + +/* + * Endpoint extension for specific protocols + */ +typedef struct { + struct { + ucs_list_link_t ready_list; /* List entry in worker's EP list */ + ucs_queue_head_t match_q; /* Queue of receive data or requests, + depends on UCP_EP_FLAG_STREAM_HAS_DATA */ + } stream; + + struct { + ucs_list_link_t started_ams; + } am; +} ucp_ep_ext_proto_t; + + +enum { + UCP_WIREUP_SA_DATA_FULL_ADDR = 0, /* Sockaddr client data contains full + address. */ + UCP_WIREUP_SA_DATA_PARTIAL_ADDR, /* Sockaddr client data contains partial + address, wireup protocol requires + extra MSGs. */ + UCP_WIREUP_SA_DATA_CM_ADDR /* Sockaddr client data contains address + for CM based wireup: there is only + iface and ep address of transport + lanes, remote device address is + provided by CM and has to be added to + unpacked UCP address locally. */ +}; + + +struct ucp_wireup_sockaddr_data { + uintptr_t ep_ptr; /**< Endpoint pointer */ + uint8_t err_mode; /**< Error handling mode */ + uint8_t addr_mode; /**< The attached address format + defined by + UCP_WIREUP_SA_DATA_xx */ + uint8_t dev_index; /**< Device address index used to + build remote address in + UCP_WIREUP_SA_DATA_CM_ADDR + mode */ + /* packed worker address follows */ +} UCS_S_PACKED; + + +typedef struct ucp_conn_request { + ucp_listener_h listener; + union { + uct_listener_h listener; + uct_iface_h iface; + } uct; + uct_conn_request_h uct_req; + char dev_name[UCT_DEVICE_NAME_MAX]; + uct_device_addr_t *remote_dev_addr; + ucp_wireup_sockaddr_data_t sa_data; + /* packed worker address follows */ +} ucp_conn_request_t; + + +void ucp_ep_config_key_reset(ucp_ep_config_key_t *key); + +void ucp_ep_config_lane_info_str(ucp_context_h context, + const ucp_ep_config_key_t *key, + const unsigned *addr_indices, + ucp_lane_index_t lane, + ucp_rsc_index_t aux_rsc_index, + char *buf, size_t max); + +ucs_status_t ucp_ep_new(ucp_worker_h worker, const char *peer_name, + const char *message, ucp_ep_h *ep_p); + +void ucp_ep_delete(ucp_ep_h ep); + +ucs_status_t ucp_ep_init_create_wireup(ucp_ep_h ep, unsigned ep_init_flags, + ucp_wireup_ep_t **wireup_ep); + +ucs_status_t ucp_ep_create_to_worker_addr(ucp_worker_h worker, + uint64_t local_tl_bitmap, + const ucp_unpacked_address_t *remote_address, + unsigned ep_init_flags, + const char *message, ucp_ep_h *ep_p); + +ucs_status_t ucp_ep_create_server_accept(ucp_worker_h worker, + const ucp_conn_request_h conn_request, + ucp_ep_h *ep_p); + +ucs_status_ptr_t ucp_ep_flush_internal(ucp_ep_h ep, unsigned uct_flags, + ucp_send_callback_t req_cb, + unsigned req_flags, + ucp_request_t *worker_req, + ucp_request_callback_t flushed_cb, + const char *debug_name); + +ucs_status_t +ucp_ep_create_sockaddr_aux(ucp_worker_h worker, unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_address, + ucp_ep_h *ep_p); + +void ucp_ep_config_key_set_err_mode(ucp_ep_config_key_t *key, + unsigned ep_init_flags); + +void ucp_ep_err_pending_purge(uct_pending_req_t *self, void *arg); + +void ucp_ep_disconnected(ucp_ep_h ep, int force); + +void ucp_ep_destroy_internal(ucp_ep_h ep); + +void ucp_ep_cleanup_lanes(ucp_ep_h ep); + +int ucp_ep_is_sockaddr_stub(ucp_ep_h ep); + +ucs_status_t ucp_ep_config_init(ucp_worker_h worker, ucp_ep_config_t *config, + const ucp_ep_config_key_t *key); + +void ucp_ep_config_cleanup(ucp_worker_h worker, ucp_ep_config_t *config); + +int ucp_ep_config_is_equal(const ucp_ep_config_key_t *key1, + const ucp_ep_config_key_t *key2); + +int ucp_ep_config_get_multi_lane_prio(const ucp_lane_index_t *lanes, + ucp_lane_index_t lane); + +size_t ucp_ep_config_get_zcopy_auto_thresh(size_t iovcnt, + const uct_linear_growth_t *reg_cost, + const ucp_context_h context, + double bandwidth); + +ucs_status_t ucp_worker_create_mem_type_endpoints(ucp_worker_h worker); + +ucp_wireup_ep_t * ucp_ep_get_cm_wireup_ep(ucp_ep_h ep); + +uint64_t ucp_ep_get_tl_bitmap(ucp_ep_h ep); + +uct_ep_h ucp_ep_get_cm_uct_ep(ucp_ep_h ep); + +int ucp_ep_is_cm_local_connected(ucp_ep_h ep); + +unsigned ucp_ep_local_disconnect_progress(void *arg); + +size_t ucp_ep_tag_offload_min_rndv_thresh(ucp_ep_config_t *config); + +void ucp_ep_invoke_err_cb(ucp_ep_h ep, ucs_status_t status); + +int ucp_ep_config_test_rndv_support(const ucp_ep_config_t *config); + +#endif diff --git a/src/ucp/core/ucp_ep.inl b/src/ucp/core/ucp_ep.inl new file mode 100644 index 0000000..7b8b695 --- /dev/null +++ b/src/ucp/core/ucp_ep.inl @@ -0,0 +1,229 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_EP_INL_ +#define UCP_EP_INL_ + +#include "ucp_ep.h" +#include "ucp_worker.h" +#include "ucp_context.h" + +#include +#include + + +static inline ucp_ep_config_t *ucp_ep_config(ucp_ep_h ep) +{ + return &ep->worker->ep_config[ep->cfg_index]; +} + +static inline ucp_lane_index_t ucp_ep_get_am_lane(ucp_ep_h ep) +{ + ucs_assert(ucp_ep_config(ep)->key.am_lane != UCP_NULL_LANE); + return ep->am_lane; +} + +static inline ucp_lane_index_t ucp_ep_get_wireup_msg_lane(ucp_ep_h ep) +{ + ucp_lane_index_t lane = ucp_ep_config(ep)->key.wireup_lane; + return (lane == UCP_NULL_LANE) ? ucp_ep_get_am_lane(ep) : lane; +} + +static inline ucp_lane_index_t ucp_ep_get_tag_lane(ucp_ep_h ep) +{ + ucs_assert(ucp_ep_config(ep)->key.tag_lane != UCP_NULL_LANE); + return ucp_ep_config(ep)->key.tag_lane; +} + +static inline int ucp_ep_is_tag_offload_enabled(ucp_ep_config_t *config) +{ + ucp_lane_index_t lane = config->key.tag_lane; + + if (lane != UCP_NULL_LANE) { + ucs_assert(config->key.lanes[lane].rsc_index != UCP_NULL_RESOURCE); + return 1; + } + return 0; +} + +static inline uct_ep_h ucp_ep_get_am_uct_ep(ucp_ep_h ep) +{ + return ep->uct_eps[ucp_ep_get_am_lane(ep)]; +} + +static inline uct_ep_h ucp_ep_get_tag_uct_ep(ucp_ep_h ep) +{ + return ep->uct_eps[ucp_ep_get_tag_lane(ep)]; +} + +static inline ucp_rsc_index_t ucp_ep_get_rsc_index(ucp_ep_h ep, ucp_lane_index_t lane) +{ + return ucp_ep_config(ep)->key.lanes[lane].rsc_index; +} + +static inline uct_iface_attr_t *ucp_ep_get_iface_attr(ucp_ep_h ep, ucp_lane_index_t lane) +{ + return ucp_worker_iface_get_attr(ep->worker, ucp_ep_get_rsc_index(ep, lane)); +} + +static inline size_t ucp_ep_get_max_bcopy(ucp_ep_h ep, ucp_lane_index_t lane) +{ + return ucp_ep_get_iface_attr(ep, lane)->cap.am.max_bcopy; +} + +static inline size_t ucp_ep_get_max_zcopy(ucp_ep_h ep, ucp_lane_index_t lane) +{ + return ucp_ep_get_iface_attr(ep, lane)->cap.am.max_zcopy; +} + +static inline size_t ucp_ep_get_max_iov(ucp_ep_h ep, ucp_lane_index_t lane) +{ + return ucp_ep_get_iface_attr(ep, lane)->cap.am.max_iov; +} + +static inline ucp_lane_index_t ucp_ep_num_lanes(ucp_ep_h ep) +{ + return ucp_ep_config(ep)->key.num_lanes; +} + +static inline int ucp_ep_is_lane_p2p(ucp_ep_h ep, ucp_lane_index_t lane) +{ + return ucp_ep_config(ep)->p2p_lanes & UCS_BIT(lane); +} + +static inline ucp_lane_index_t ucp_ep_get_proxy_lane(ucp_ep_h ep, + ucp_lane_index_t lane) +{ + return ucp_ep_config(ep)->key.lanes[lane].proxy_lane; +} + +static inline ucp_md_index_t ucp_ep_md_index(ucp_ep_h ep, ucp_lane_index_t lane) +{ + return ucp_ep_config(ep)->md_index[lane]; +} + +static inline const uct_md_attr_t* ucp_ep_md_attr(ucp_ep_h ep, ucp_lane_index_t lane) +{ + ucp_context_h context = ep->worker->context; + return &context->tl_mds[ucp_ep_md_index(ep, lane)].attr; +} + +static UCS_F_ALWAYS_INLINE ucp_ep_ext_gen_t* ucp_ep_ext_gen(ucp_ep_h ep) +{ + return (ucp_ep_ext_gen_t*)ucs_strided_elem_get(ep, 0, 1); +} + +static UCS_F_ALWAYS_INLINE ucp_ep_ext_proto_t* ucp_ep_ext_proto(ucp_ep_h ep) +{ + return (ucp_ep_ext_proto_t*)ucs_strided_elem_get(ep, 0, 2); +} + +static UCS_F_ALWAYS_INLINE ucp_ep_h ucp_ep_from_ext_gen(ucp_ep_ext_gen_t *ep_ext) +{ + return (ucp_ep_h)ucs_strided_elem_get(ep_ext, 1, 0); +} + +static UCS_F_ALWAYS_INLINE ucp_ep_h ucp_ep_from_ext_proto(ucp_ep_ext_proto_t *ep_ext) +{ + return (ucp_ep_h)ucs_strided_elem_get(ep_ext, 2, 0); +} + +static UCS_F_ALWAYS_INLINE ucp_ep_flush_state_t* ucp_ep_flush_state(ucp_ep_h ep) +{ + ucs_assert(ep->flags & UCP_EP_FLAG_FLUSH_STATE_VALID); + ucs_assert(!(ep->flags & UCP_EP_FLAG_ON_MATCH_CTX)); + ucs_assert(!(ep->flags & UCP_EP_FLAG_LISTENER)); + ucs_assert(!(ep->flags & UCP_EP_FLAG_CLOSE_REQ_VALID)); + return &ucp_ep_ext_gen(ep)->flush_state; +} + +static UCS_F_ALWAYS_INLINE uintptr_t ucp_ep_dest_ep_ptr(ucp_ep_h ep) +{ +#if UCS_ENABLE_ASSERT + if (!(ep->flags & UCP_EP_FLAG_DEST_EP)) { + return 0; /* Let remote side assert if it gets NULL pointer */ + } +#endif + return ucp_ep_ext_gen(ep)->dest_ep_ptr; +} + +/* + * Make sure we have a valid dest_ep_ptr value, so protocols which require a + * reply from remote side could be used. + */ +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_ep_resolve_dest_ep_ptr(ucp_ep_h ep, ucp_lane_index_t lane) +{ + if (ep->flags & UCP_EP_FLAG_DEST_EP) { + return UCS_OK; + } + + return ucp_wireup_connect_remote(ep, lane); +} + +static inline void ucp_ep_update_dest_ep_ptr(ucp_ep_h ep, uintptr_t ep_ptr) +{ + if (ep->flags & UCP_EP_FLAG_DEST_EP) { + ucs_assertv(ep_ptr == ucp_ep_ext_gen(ep)->dest_ep_ptr, + "ep=%p ep_ptr=0x%lx ep->dest_ep_ptr=0x%lx", + ep, ep_ptr, ucp_ep_ext_gen(ep)->dest_ep_ptr); + } + + ucs_assert(ep_ptr != 0); + ucs_trace("ep %p: set dest_ep_ptr to 0x%lx", ep, ep_ptr); + ep->flags |= UCP_EP_FLAG_DEST_EP; + ucp_ep_ext_gen(ep)->dest_ep_ptr = ep_ptr; +} + +static inline const char* ucp_ep_peer_name(ucp_ep_h ep) +{ +#if ENABLE_DEBUG_DATA + return ep->peer_name; +#else + return ""; +#endif +} + +static inline void ucp_ep_flush_state_reset(ucp_ep_h ep) +{ + ucp_ep_flush_state_t *flush_state = &ucp_ep_ext_gen(ep)->flush_state; + + ucs_assert(!(ep->flags & (UCP_EP_FLAG_ON_MATCH_CTX | + UCP_EP_FLAG_LISTENER))); + ucs_assert(!(ep->flags & UCP_EP_FLAG_FLUSH_STATE_VALID) || + ((flush_state->send_sn == 0) && + (flush_state->cmpl_sn == 0) && + ucs_queue_is_empty(&flush_state->reqs))); + + flush_state->send_sn = 0; + flush_state->cmpl_sn = 0; + ucs_queue_head_init(&flush_state->reqs); + ep->flags |= UCP_EP_FLAG_FLUSH_STATE_VALID; +} + +static inline void ucp_ep_flush_state_invalidate(ucp_ep_h ep) +{ + ucs_assert(ucs_queue_is_empty(&ucp_ep_flush_state(ep)->reqs)); + ep->flags &= ~UCP_EP_FLAG_FLUSH_STATE_VALID; +} + +/* get index of the local component which can reach a remote memory domain */ +static inline ucp_rsc_index_t +ucp_ep_config_get_dst_md_cmpt(const ucp_ep_config_key_t *key, + ucp_md_index_t dst_md_index) +{ + unsigned index = ucs_popcount(key->reachable_md_map & UCS_MASK(dst_md_index)); + + return key->dst_md_cmpts[index]; +} + +static inline ucp_lane_index_t ucp_ep_get_cm_lane(ucp_ep_h ep) +{ + return ucp_ep_config(ep)->key.cm_lane; +} + +#endif diff --git a/src/ucp/core/ucp_listener.c b/src/ucp/core/ucp_listener.c new file mode 100644 index 0000000..2d268ee --- /dev/null +++ b/src/ucp/core/ucp_listener.c @@ -0,0 +1,550 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucp_listener.h" +#include "uct/base/uct_cm.h" + +#include +#include +#include +#include +#include +#include +#include + + +static unsigned ucp_listener_accept_cb_progress(void *arg) +{ + ucp_ep_h ep = arg; + ucp_listener_h listener = ucp_ep_ext_gen(ep)->listener; + + /* NOTE: protect union */ + ucs_assert(!(ep->flags & (UCP_EP_FLAG_ON_MATCH_CTX | + UCP_EP_FLAG_FLUSH_STATE_VALID))); + ucs_assert(ep->flags & UCP_EP_FLAG_LISTENER); + + ep->flags &= ~UCP_EP_FLAG_LISTENER; + ep->flags |= UCP_EP_FLAG_USED; + ucp_stream_ep_activate(ep); + ucp_ep_flush_state_reset(ep); + + /* + * listener is NULL if the EP was created with UCP_EP_PARAM_FIELD_EP_ADDR + * and we are here because long address requires wireup protocol + */ + if (listener && listener->accept_cb) { + listener->accept_cb(ep, listener->arg); + } + + return 1; +} + +int ucp_listener_accept_cb_remove_filter(const ucs_callbackq_elem_t *elem, + void *arg) +{ + ucp_ep_h ep = elem->arg; + + return (elem->cb == ucp_listener_accept_cb_progress) && (ep == arg); +} + +void ucp_listener_schedule_accept_cb(ucp_ep_h ep) +{ + uct_worker_cb_id_t prog_id = UCS_CALLBACKQ_ID_NULL; + + uct_worker_progress_register_safe(ep->worker->uct, + ucp_listener_accept_cb_progress, + ep, UCS_CALLBACKQ_FLAG_ONESHOT, + &prog_id); +} + +static unsigned ucp_listener_conn_request_progress(void *arg) +{ + ucp_conn_request_h conn_request = arg; + ucp_listener_h listener = conn_request->listener; + ucp_worker_h worker = listener->worker; + ucp_ep_h ep; + ucs_status_t status; + + ucs_trace_func("listener=%p", listener); + + if (listener->conn_cb) { + listener->conn_cb(conn_request, listener->arg); + return 1; + } + + UCS_ASYNC_BLOCK(&worker->async); + status = ucp_ep_create_server_accept(worker, conn_request, &ep); + + if (status != UCS_OK) { + goto out; + } + + if (ep->flags & UCP_EP_FLAG_LISTENER) { + status = ucp_wireup_send_pre_request(ep); + } else { + /* send wireup request message, to connect the client to the server's + new endpoint */ + ucs_assert(!(ep->flags & UCP_EP_FLAG_CONNECT_REQ_QUEUED)); + status = ucp_wireup_send_request(ep); + } + + if (status != UCS_OK) { + goto out; + } + + status = uct_iface_accept(conn_request->uct.iface, conn_request->uct_req); + if (status != UCS_OK) { + ucp_ep_destroy_internal(ep); + goto out; + } + + if (listener->accept_cb != NULL) { + if (ep->flags & UCP_EP_FLAG_LISTENER) { + ucs_assert(!(ep->flags & UCP_EP_FLAG_USED)); + ucp_ep_ext_gen(ep)->listener = listener; + } else { + ep->flags |= UCP_EP_FLAG_USED; + listener->accept_cb(ep, listener->arg); + } + } + +out: + if (status != UCS_OK) { + ucs_error("connection request failed on listener %p with status %s", + listener, ucs_status_string(status)); + uct_iface_reject(conn_request->uct.iface, conn_request->uct_req); + } + + UCS_ASYNC_UNBLOCK(&worker->async); + ucs_free(conn_request); + return 1; +} + +static int ucp_listener_remove_filter(const ucs_callbackq_elem_t *elem, + void *arg) +{ + ucp_listener_h *listener = elem->arg; + + return (elem->cb == ucp_listener_conn_request_progress) && (listener == arg); +} + +static void ucp_listener_conn_request_callback(uct_iface_h tl_iface, void *arg, + uct_conn_request_h uct_req, + const void *conn_priv_data, + size_t length) +{ + ucp_listener_h listener = arg; + uct_worker_cb_id_t prog_id = UCS_CALLBACKQ_ID_NULL; + ucp_conn_request_h conn_request; + + ucs_trace("listener %p: got connection request", listener); + + /* Defer wireup init and user's callback to be invoked from the main thread */ + conn_request = ucs_malloc(ucs_offsetof(ucp_conn_request_t, sa_data) + + length, "accept connection request"); + if (conn_request == NULL) { + ucs_error("failed to allocate connect request, " + "rejecting connection request %p on TL iface %p, reason %s", + uct_req, tl_iface, ucs_status_string(UCS_ERR_NO_MEMORY)); + uct_iface_reject(tl_iface, uct_req); + return; + } + + conn_request->listener = listener; + conn_request->uct_req = uct_req; + conn_request->uct.iface = tl_iface; + memcpy(&conn_request->sa_data, conn_priv_data, length); + + uct_worker_progress_register_safe(listener->worker->uct, + ucp_listener_conn_request_progress, + conn_request, UCS_CALLBACKQ_FLAG_ONESHOT, + &prog_id); + + /* If the worker supports the UCP_FEATURE_WAKEUP feature, signal the user so + * that he can wake-up on this event */ + ucp_worker_signal_internal(listener->worker); +} + +ucs_status_t ucp_listener_query(ucp_listener_h listener, + ucp_listener_attr_t *attr) +{ + ucs_status_t status; + + if (attr->field_mask & UCP_LISTENER_ATTR_FIELD_SOCKADDR) { + status = ucs_sockaddr_copy((struct sockaddr *)&attr->sockaddr, + (struct sockaddr *)&listener->sockaddr); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +static void ucp_listener_close_uct_listeners(ucp_listener_h listener) +{ + ucp_rsc_index_t i; + + ucs_assert_always(ucp_worker_sockaddr_is_cm_proto(listener->worker)); + + for (i = 0; i < listener->num_rscs; ++i) { + uct_listener_destroy(listener->listeners[i]); + } + + ucs_free(listener->listeners); + + listener->listeners = NULL; + listener->num_rscs = 0; +} + +static void ucp_listener_close_ifaces(ucp_listener_h listener) +{ + ucp_worker_h worker; + int i; + + ucs_assert_always(!ucp_worker_sockaddr_is_cm_proto(listener->worker)); + + for (i = 0; i < listener->num_rscs; i++) { + worker = listener->wifaces[i]->worker; + ucs_assert_always(worker == listener->worker); + /* remove pending slow-path progress in case it wasn't removed yet */ + ucs_callbackq_remove_if(&worker->uct->progress_q, + ucp_listener_remove_filter, listener); + ucp_worker_iface_cleanup(listener->wifaces[i]); + } + + ucs_free(listener->wifaces); +} + +static ucs_status_t +ucp_listen_on_cm(ucp_listener_h listener, const ucp_listener_params_t *params) +{ + ucp_worker_h worker = listener->worker; + const ucp_rsc_index_t num_cms = ucp_worker_num_cm_cmpts(worker); + struct sockaddr_storage addr_storage; + struct sockaddr *addr; + uct_listener_h *uct_listeners; + uct_listener_params_t uct_params; + uct_listener_attr_t uct_attr; + uint16_t port, uct_listen_port; + ucp_rsc_index_t i; + char addr_str[UCS_SOCKADDR_STRING_LEN]; + ucp_worker_cm_t *ucp_cm; + ucs_status_t status; + + addr = (struct sockaddr *)&addr_storage; + status = ucs_sockaddr_copy(addr, params->sockaddr.addr); + if (status != UCS_OK) { + return status; + } + + ucs_assert_always(num_cms > 0); + + uct_params.field_mask = UCT_LISTENER_PARAM_FIELD_CONN_REQUEST_CB | + UCT_LISTENER_PARAM_FIELD_USER_DATA; + uct_params.conn_request_cb = ucp_cm_server_conn_request_cb; + uct_params.user_data = listener; + + listener->num_rscs = 0; + uct_listeners = ucs_calloc(num_cms, sizeof(*uct_listeners), + "uct_listeners_arr"); + if (uct_listeners == NULL) { + ucs_error("Can't allocate memory for UCT listeners array"); + return UCS_ERR_NO_MEMORY; + } + + listener->listeners = uct_listeners; + + for (i = 0; i < num_cms; ++i) { + ucp_cm = &worker->cms[i]; + status = uct_listener_create(ucp_cm->cm, addr, + params->sockaddr.addrlen, &uct_params, + &uct_listeners[listener->num_rscs]); + if (status != UCS_OK) { + ucs_debug("failed to create UCT listener on CM %p (component %s) " + "with address %s status %s", ucp_cm->cm, + worker->context->tl_cmpts[ucp_cm->cmpt_idx].attr.name, + ucs_sockaddr_str(params->sockaddr.addr, addr_str, + UCS_SOCKADDR_STRING_LEN), + ucs_status_string(status)); + continue; + } + + ++listener->num_rscs; + + status = ucs_sockaddr_get_port(addr, &port); + if (status != UCS_OK) { + goto err_destroy_listeners; + } + + uct_attr.field_mask = UCT_LISTENER_ATTR_FIELD_SOCKADDR; + status = uct_listener_query(uct_listeners[listener->num_rscs - 1], + &uct_attr); + if (status != UCS_OK) { + goto err_destroy_listeners; + } + + status = ucs_sockaddr_get_port((struct sockaddr *)&uct_attr.sockaddr, + &uct_listen_port); + if (status != UCS_OK) { + goto err_destroy_listeners; + } + + if (port != uct_listen_port) { + ucs_assert(port == 0); + status = ucs_sockaddr_set_port(addr, uct_listen_port); + if (status != UCS_OK) { + goto err_destroy_listeners; + } + } + } + + if (listener->num_rscs > 0) { + status = ucs_sockaddr_copy((struct sockaddr *)&listener->sockaddr, + addr); + if (status != UCS_OK) { + goto err_destroy_listeners; + } + } + + /* return the status of the last call of uct_listener_create if no listener + was created */ + return (listener->num_rscs > 0) ? UCS_OK : status; + +err_destroy_listeners: + ucp_listener_close_uct_listeners(listener); + return status; +} + +static ucs_status_t +ucp_listen_on_iface(ucp_listener_h listener, + const ucp_listener_params_t *params) +{ + ucp_worker_h worker = listener->worker; + ucp_context_h context = listener->worker->context; + int sockaddr_tls = 0; + char saddr_str[UCS_SOCKADDR_STRING_LEN]; + ucp_tl_resource_desc_t *resource; + uct_iface_params_t iface_params; + struct sockaddr_storage *listen_sock; + ucp_worker_iface_t **tmp; + ucp_rsc_index_t tl_id; + ucs_status_t status; + ucp_tl_md_t *tl_md; + uint16_t port; + int i; + + status = ucs_sockaddr_get_port(params->sockaddr.addr, &port); + if (status != UCS_OK) { + return status; + } + + /* Go through all the available resources and for each one, check if the given + * sockaddr is accessible from its md. Start listening on all the mds that + * satisfy this. + * If the given port is set to 0, i.e. use a random port, the first transport + * in the sockaddr priority list from the environment configuration will + * dictate the port to listen on for the other sockaddr transports in the list. + * */ + for (i = 0; i < context->config.num_sockaddr_tls; i++) { + tl_id = context->config.sockaddr_tl_ids[i]; + resource = &context->tl_rscs[tl_id]; + tl_md = &context->tl_mds[resource->md_index]; + + if (!uct_md_is_sockaddr_accessible(tl_md->md, ¶ms->sockaddr, + UCT_SOCKADDR_ACC_LOCAL)) { + continue; + } + + tmp = ucs_realloc(listener->wifaces, + sizeof(*tmp) * (sockaddr_tls + 1), + "listener wifaces"); + if (tmp == NULL) { + ucs_error("failed to allocate listener wifaces"); + status = UCS_ERR_NO_MEMORY; + goto err_close_listener_wifaces; + } + + listener->wifaces = tmp; + + iface_params.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_SOCKADDR; + iface_params.open_mode = UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER; + iface_params.mode.sockaddr.conn_request_cb = ucp_listener_conn_request_callback; + iface_params.mode.sockaddr.conn_request_arg = listener; + iface_params.mode.sockaddr.listen_sockaddr = params->sockaddr; + iface_params.mode.sockaddr.cb_flags = UCT_CB_FLAG_ASYNC; + + if (port) { + /* Set the port for the next sockaddr iface. This port was either + * obtained from the user or generated by the first created sockaddr + * iface if the port from the user was equal to zero */ + status = ucs_sockaddr_set_port( + (struct sockaddr *) + iface_params.mode.sockaddr.listen_sockaddr.addr, port); + if (status != UCS_OK) { + ucs_error("failed to set port parameter (%d) for creating %s iface", + port, resource->tl_rsc.tl_name); + goto err_close_listener_wifaces; + } + } + + status = ucp_worker_iface_open(worker, tl_id, &iface_params, + &listener->wifaces[sockaddr_tls]); + if (status != UCS_OK) { + ucs_error("failed to open listener on %s on md %s", + ucs_sockaddr_str( + iface_params.mode.sockaddr.listen_sockaddr.addr, + saddr_str, sizeof(saddr_str)), + tl_md->rsc.md_name); + goto err_close_listener_wifaces; + } + + status = ucp_worker_iface_init(worker, tl_id, + listener->wifaces[sockaddr_tls]); + if ((status != UCS_OK) || + ((context->config.features & UCP_FEATURE_WAKEUP) && + !(listener->wifaces[sockaddr_tls]->attr.cap.flags & + UCT_IFACE_FLAG_CB_ASYNC))) { + ucp_worker_iface_cleanup(listener->wifaces[sockaddr_tls]); + goto err_close_listener_wifaces; + } + + listen_sock = &listener->wifaces[sockaddr_tls]->attr.listen_sockaddr; + status = ucs_sockaddr_get_port((struct sockaddr *)listen_sock, &port); + if (status != UCS_OK) { + goto err_close_listener_wifaces; + } + + sockaddr_tls++; + listener->num_rscs = sockaddr_tls; + ucs_trace("listener %p: accepting connections on %s on %s", + listener, tl_md->rsc.md_name, + ucs_sockaddr_str(iface_params.mode.sockaddr.listen_sockaddr.addr, + saddr_str, sizeof(saddr_str))); + } + + if (!sockaddr_tls) { + ucs_error("none of the available transports can listen for connections on %s", + ucs_sockaddr_str(params->sockaddr.addr, saddr_str, + sizeof(saddr_str))); + listener->num_rscs = 0; + status = UCS_ERR_UNREACHABLE; + goto err_close_listener_wifaces; + } + + listen_sock = &listener->wifaces[sockaddr_tls - 1]->attr.listen_sockaddr; + status = ucs_sockaddr_copy((struct sockaddr *)&listener->sockaddr, + (struct sockaddr *)listen_sock); + if (status != UCS_OK) { + goto err_close_listener_wifaces; + } + + return UCS_OK; + +err_close_listener_wifaces: + ucp_listener_close_ifaces(listener); + return status; +} + +ucs_status_t ucp_listener_create(ucp_worker_h worker, + const ucp_listener_params_t *params, + ucp_listener_h *listener_p) +{ + ucp_listener_h listener; + ucs_status_t status; + + if (!(params->field_mask & UCP_LISTENER_PARAM_FIELD_SOCK_ADDR)) { + ucs_error("missing sockaddr for listener"); + return UCS_ERR_INVALID_PARAM; + } + + UCP_CHECK_PARAM_NON_NULL(params->sockaddr.addr, status, return status); + + if (ucs_test_all_flags(params->field_mask, + UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER | + UCP_LISTENER_PARAM_FIELD_CONN_HANDLER)) { + ucs_error("only one accept handler should be provided"); + return UCS_ERR_INVALID_PARAM; + } + + listener = ucs_calloc(1, sizeof(*listener), "ucp_listener"); + if (listener == NULL) { + ucs_error("cannot allocate memory for UCP listener"); + return UCS_ERR_NO_MEMORY; + } + + UCS_ASYNC_BLOCK(&worker->async); + + listener->worker = worker; + + if (params->field_mask & UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER) { + UCP_CHECK_PARAM_NON_NULL(params->accept_handler.cb, status, + goto err_free_listener); + listener->accept_cb = params->accept_handler.cb; + listener->arg = params->accept_handler.arg; + } else if (params->field_mask & UCP_LISTENER_PARAM_FIELD_CONN_HANDLER) { + UCP_CHECK_PARAM_NON_NULL(params->conn_handler.cb, status, + goto err_free_listener); + listener->conn_cb = params->conn_handler.cb; + listener->arg = params->conn_handler.arg; + } + + if (ucp_worker_sockaddr_is_cm_proto(worker)) { + status = ucp_listen_on_cm(listener, params); + } else { + status = ucp_listen_on_iface(listener, params); + } + + if (status == UCS_OK) { + *listener_p = listener; + goto out; + } + +err_free_listener: + ucs_free(listener); +out: + UCS_ASYNC_UNBLOCK(&worker->async); + return status; +} + +void ucp_listener_destroy(ucp_listener_h listener) +{ + ucs_trace("listener %p: destroying", listener); + + if (ucp_worker_sockaddr_is_cm_proto(listener->worker)) { + ucp_listener_close_uct_listeners(listener); + } else { + ucp_listener_close_ifaces(listener); + } + + ucs_free(listener); +} + +ucs_status_t ucp_listener_reject(ucp_listener_h listener, + ucp_conn_request_h conn_request) +{ + ucp_worker_h worker = listener->worker; + + UCS_ASYNC_BLOCK(&worker->async); + + if (ucp_worker_sockaddr_is_cm_proto(worker)) { + uct_listener_reject(conn_request->uct.listener, conn_request->uct_req); + ucs_free(conn_request->remote_dev_addr); + } else { + uct_iface_reject(conn_request->uct.iface, conn_request->uct_req); + } + + UCS_ASYNC_UNBLOCK(&worker->async); + + ucs_free(conn_request); + + return UCS_OK; +} diff --git a/src/ucp/core/ucp_listener.h b/src/ucp/core/ucp_listener.h new file mode 100644 index 0000000..5385a2e --- /dev/null +++ b/src/ucp/core/ucp_listener.h @@ -0,0 +1,48 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCP_LISTENER_H_ +#define UCP_LISTENER_H_ + +#include "ucp_worker.h" + +/** + * UCP listener + */ +typedef struct ucp_listener { + ucp_worker_h worker; + + union { + ucp_worker_iface_t **wifaces; /* Array of UCT interface + pointers to listen on */ + uct_listener_h *listeners;/* Array of UCT listeners to + listen on */ + }; + + struct sockaddr_storage sockaddr; /* Listening sockaddr */ + ucp_rsc_index_t num_rscs; /* Number of UCT listening + resources (wifaces or + listeners) */ + ucp_listener_accept_callback_t accept_cb; /* Listen accept callback + which creates an endpoint + */ + ucp_listener_conn_callback_t conn_cb; /* Listen callback which + creates a handle to + connection request to the + remote endpoint */ + void *arg; /* User's arg for the accept + callback */ + uct_worker_cb_id_t prog_id; /* Slow-path callback */ +} ucp_listener_t; + + +void ucp_listener_schedule_accept_cb(ucp_ep_h ep); + +int ucp_listener_accept_cb_remove_filter(const ucs_callbackq_elem_t *elem, + void *arg); + +#endif diff --git a/src/ucp/core/ucp_mm.c b/src/ucp/core/ucp_mm.c new file mode 100644 index 0000000..86bb7cf --- /dev/null +++ b/src/ucp/core/ucp_mm.c @@ -0,0 +1,754 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucp_mm.h" +#include "ucp_context.h" +#include "ucp_worker.h" + +#include +#include +#include +#include +#include +#include +#include + + +static ucp_mem_t ucp_mem_dummy_handle = { + .address = NULL, + .length = 0, + .alloc_method = UCT_ALLOC_METHOD_LAST, + .alloc_md = NULL, + .md_map = 0 +}; + + +ucs_status_t ucp_mem_rereg_mds(ucp_context_h context, ucp_md_map_t reg_md_map, + void *address, size_t length, unsigned uct_flags, + uct_md_h alloc_md, ucs_memory_type_t mem_type, + uct_mem_h *alloc_md_memh_p, uct_mem_h *uct_memh, + ucp_md_map_t *md_map_p) +{ + unsigned memh_index, prev_memh_index; + uct_mem_h *prev_uct_memh; + ucp_md_map_t new_md_map; + const uct_md_attr_t *md_attr; + unsigned prev_num_memh; + unsigned md_index; + ucs_status_t status; + ucs_log_level_t level; + + if (reg_md_map == *md_map_p) { + return UCS_OK; /* shortcut - no changes required */ + } + + prev_num_memh = ucs_popcount(*md_map_p & reg_md_map); + prev_uct_memh = ucs_alloca(prev_num_memh * sizeof(*prev_uct_memh)); + + /* Go over previous handles, save only the ones we will need */ + memh_index = 0; + prev_memh_index = 0; + ucs_for_each_bit(md_index, *md_map_p) { + if (reg_md_map & UCS_BIT(md_index)) { + /* memh still needed, save it */ + ucs_assert(prev_memh_index < prev_num_memh); + prev_uct_memh[prev_memh_index++] = uct_memh[memh_index]; + } else if (alloc_md == context->tl_mds[md_index].md) { + /* memh not needed and allocated, return it */ + if (alloc_md_memh_p != NULL) { + *alloc_md_memh_p = uct_memh[memh_index]; + } + } else { + /* memh not needed and registered, deregister it */ + ucs_trace("de-registering memh[%d]=%p from md[%d]", memh_index, + uct_memh[memh_index], md_index); + status = uct_md_mem_dereg(context->tl_mds[md_index].md, + uct_memh[memh_index]); + if (status != UCS_OK) { + ucs_warn("failed to dereg from md[%d]=%s: %s", md_index, + context->tl_mds[md_index].rsc.md_name, + ucs_status_string(status)); + } + } + + VALGRIND_MAKE_MEM_UNDEFINED(&uct_memh[memh_index], + sizeof(uct_memh[memh_index])); + ++memh_index; + } + + /* prev_uct_memh should contain the handles which should be reused */ + ucs_assert(prev_memh_index == prev_num_memh); + + /* Go over requested MD map, and use / register new handles */ + new_md_map = 0; + memh_index = 0; + prev_memh_index = 0; + ucs_for_each_bit(md_index, reg_md_map) { + md_attr = &context->tl_mds[md_index].attr; + if (*md_map_p & UCS_BIT(md_index)) { + /* already registered, use previous memh */ + ucs_assert(prev_memh_index < prev_num_memh); + uct_memh[memh_index++] = prev_uct_memh[prev_memh_index++]; + new_md_map |= UCS_BIT(md_index); + } else if (context->tl_mds[md_index].md == alloc_md) { + /* already allocated, add the memh we got from allocation */ + ucs_assert(alloc_md_memh_p != NULL); + uct_memh[memh_index++] = *alloc_md_memh_p; + new_md_map |= UCS_BIT(md_index); + } else if (!length) { + /* don't register zero-length regions */ + continue; + } else if (md_attr->cap.flags & UCT_MD_FLAG_REG) { + if (!(md_attr->cap.reg_mem_types & UCS_BIT(mem_type))) { + status = UCS_ERR_UNSUPPORTED; + } else { + ucs_assert(address && length); + + /* MD supports registration, register new memh on it */ + status = uct_md_mem_reg(context->tl_mds[md_index].md, address, + length, uct_flags, &uct_memh[memh_index]); + } + + if (status == UCS_OK) { + ucs_trace("registered address %p length %zu on md[%d] memh[%d]=%p", + address, length, md_index, memh_index, + uct_memh[memh_index]); + new_md_map |= UCS_BIT(md_index); + ++memh_index; + continue; + } + + level = (uct_flags & UCT_MD_MEM_FLAG_HIDE_ERRORS) ? + UCS_LOG_LEVEL_DEBUG : UCS_LOG_LEVEL_ERROR; + + ucs_log(level, + "failed to register address %p mem_type bit 0x%lx length %zu on " + "md[%d]=%s: %s (md reg_mem_types 0x%lx)", + address, UCS_BIT(mem_type), length, md_index, + context->tl_mds[md_index].rsc.md_name, + ucs_status_string(status), + md_attr->cap.reg_mem_types); + + if (!(uct_flags & UCT_MD_MEM_FLAG_HIDE_ERRORS)) { + goto err_dereg; + } + } + } + + /* Update md_map, note that MDs which did not support registration will be + * missing from the map.*/ + *md_map_p = new_md_map; + return UCS_OK; + +err_dereg: + ucp_mem_rereg_mds(context, 0, NULL, 0, 0, alloc_md, mem_type, + alloc_md_memh_p, uct_memh, md_map_p); + return status; + +} + +/** + * @return Whether MD number 'md_index' is selected by the configuration as part + * of allocation method number 'config_method_index'. + */ +static int ucp_is_md_selected_by_config(ucp_context_h context, + unsigned config_method_index, + unsigned md_index) +{ + const char *cfg_cmpt_name; + const char *cmpt_name; + + cfg_cmpt_name = context->config.alloc_methods[config_method_index].cmpt_name; + cmpt_name = context->tl_mds[md_index].attr.component_name; + + return !strncmp(cfg_cmpt_name, "*", UCT_COMPONENT_NAME_MAX) || + !strncmp(cfg_cmpt_name, cmpt_name, UCT_COMPONENT_NAME_MAX); +} + +static ucs_status_t ucp_mem_alloc(ucp_context_h context, size_t length, + unsigned uct_flags, const char *name, ucp_mem_h memh) +{ + uct_allocated_memory_t mem; + uct_alloc_method_t method; + unsigned method_index, md_index, num_mds; + ucs_status_t status; + uct_md_h *mds; + + mds = ucs_calloc(context->num_mds, sizeof(*mds), "temp mds"); + if (mds == NULL) { + return UCS_ERR_NO_MEMORY; + } + + for (method_index = 0; method_index < context->config.num_alloc_methods; + ++method_index) + { + method = context->config.alloc_methods[method_index].method; + + /* If we are trying MD method, gather all MDs which match the component + * name specified in the configuration. + */ + num_mds = 0; + if (method == UCT_ALLOC_METHOD_MD) { + for (md_index = 0; md_index < context->num_mds; ++md_index) { + if (ucp_is_md_selected_by_config(context, method_index, md_index)) { + mds[num_mds++] = context->tl_mds[md_index].md; + } + } + } + + status = uct_mem_alloc(memh->address, length, uct_flags, &method, 1, mds, + num_mds, name, &mem); + if (status == UCS_OK) { + goto allocated; + } + } + + status = UCS_ERR_NO_MEMORY; + goto out; + +allocated: + ucs_debug("allocated memory at %p with method %s, now registering it", + mem.address, uct_alloc_method_names[mem.method]); + memh->address = mem.address; + memh->length = mem.length; + memh->alloc_method = mem.method; + memh->mem_type = mem.mem_type; + memh->alloc_md = mem.md; + memh->md_map = 0; + status = ucp_mem_rereg_mds(context, UCS_MASK(context->num_mds), memh->address, + memh->length, uct_flags | UCT_MD_MEM_FLAG_HIDE_ERRORS, + memh->alloc_md, memh->mem_type, &mem.memh, + memh->uct, &memh->md_map); + if (status != UCS_OK) { + uct_mem_free(&mem); + } +out: + ucs_free(mds); + return status; +} + + +static inline unsigned +ucp_mem_map_params2uct_flags(ucp_mem_map_params_t *params) +{ + unsigned flags = 0; + + if (params->field_mask & UCP_MEM_MAP_PARAM_FIELD_FLAGS) { + if (params->flags & UCP_MEM_MAP_NONBLOCK) { + flags |= UCT_MD_MEM_FLAG_NONBLOCK; + } + + if (params->flags & UCP_MEM_MAP_FIXED) { + flags |= UCT_MD_MEM_FLAG_FIXED; + } + } + + flags |= UCT_MD_MEM_ACCESS_ALL; + /* TODO: disable atomic if ucp context does not have it */ + + return flags; +} + +/* Matrix of behavior + * |--------------------------------------------------------------------------------| + * | parameter | value | + * |-----------|--------------------------------------------------------------------| + * | ALLOCATE | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | + * | FIXED | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 1 | + * | addr | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | + * |-----------|--------|-----------|-----|-----|-----|-----------|-----|-----------| + * | result | err if | alloc/reg | err | reg | err | alloc/reg | err | alloc/reg | + * | | len >0 | | | | | (hint) | | (fixed) | + * |--------------------------------------------------------------------------------| + */ +static inline ucs_status_t ucp_mem_map_check_and_adjust_params(ucp_mem_map_params_t *params) +{ + if (!(params->field_mask & UCP_MEM_MAP_PARAM_FIELD_LENGTH)) { + ucs_error("The length value for mapping memory isn't set: %s", + ucs_status_string(UCS_ERR_INVALID_PARAM)); + return UCS_ERR_INVALID_PARAM; + } + + /* First of all, define all fields */ + if (!(params->field_mask & UCP_MEM_MAP_PARAM_FIELD_ADDRESS)) { + params->field_mask |= UCP_MEM_MAP_PARAM_FIELD_ADDRESS; + params->address = NULL; + } + + if (!(params->field_mask & UCP_MEM_MAP_PARAM_FIELD_FLAGS)) { + params->field_mask |= UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params->flags = 0; + } + + if ((params->flags & UCP_MEM_MAP_FIXED) && + (!params->address || + ((uintptr_t)params->address % ucs_get_page_size()))) { + ucs_error("UCP_MEM_MAP_FIXED flag requires page aligned address"); + return UCS_ERR_INVALID_PARAM; + } + + /* Now, lets check the rest of erroneous cases from the matrix */ + if (params->address == NULL) { + if (!(params->flags & UCP_MEM_MAP_ALLOCATE) && (params->length > 0)) { + ucs_error("Undefined address with nonzero length requires " + "UCP_MEM_MAP_ALLOCATE flag"); + return UCS_ERR_INVALID_PARAM; + } + } else if (!(params->flags & UCP_MEM_MAP_ALLOCATE) && + (params->flags & UCP_MEM_MAP_FIXED)) { + ucs_error("Wrong combination of flags when address is defined"); + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK; +} + +static inline int ucp_mem_map_is_allocate(ucp_mem_map_params_t *params) +{ + return (params->field_mask & UCP_MEM_MAP_PARAM_FIELD_FLAGS) && + (params->flags & UCP_MEM_MAP_ALLOCATE); +} + +static ucs_status_t ucp_mem_map_common(ucp_context_h context, void *address, + size_t length, unsigned uct_flags, + int is_allocate, ucp_mem_h *memh_p) +{ + ucs_status_t status; + ucp_mem_h memh; + + /* Allocate the memory handle */ + ucs_assert(context->num_mds > 0); + memh = ucs_malloc(sizeof(*memh) + context->num_mds * sizeof(memh->uct[0]), + "ucp_memh"); + if (memh == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + memh->address = address; + memh->length = length; + + if (is_allocate) { + ucs_debug("allocation user memory at %p length %zu", address, length); + status = ucp_mem_alloc(context, length, uct_flags, + "user allocation", memh); + if (status != UCS_OK) { + goto err_free_memh; + } + } else { + memh->mem_type = ucp_memory_type_detect(context, address, length); + memh->alloc_method = UCT_ALLOC_METHOD_LAST; + memh->alloc_md = NULL; + memh->md_map = 0; + + ucs_debug("registering user memory at %p length %zu mem_type %s", + address, length, ucs_memory_type_names[memh->mem_type]); + status = ucp_mem_rereg_mds(context, UCS_MASK(context->num_mds), + memh->address, memh->length, + uct_flags | UCT_MD_MEM_FLAG_HIDE_ERRORS, + NULL, memh->mem_type, NULL, memh->uct, + &memh->md_map); + if (status != UCS_OK) { + goto err_free_memh; + } + } + + ucs_debug("%s buffer %p length %zu memh %p md_map 0x%lx", + (memh->alloc_method == UCT_ALLOC_METHOD_LAST) ? "mapped" : "allocated", + memh->address, memh->length, memh, memh->md_map); + *memh_p = memh; + status = UCS_OK; + goto out; + +err_free_memh: + ucs_free(memh); +out: + return status; +} + +static ucs_status_t ucp_mem_unmap_common(ucp_context_h context, ucp_mem_h memh) +{ + uct_allocated_memory_t mem; + uct_mem_h alloc_md_memh; + ucs_status_t status; + + ucs_debug("unmapping buffer %p memh %p", memh->address, memh); + + /* Unregister from all memory domains */ + alloc_md_memh = UCT_MEM_HANDLE_NULL; + status = ucp_mem_rereg_mds(context, 0, NULL, 0, 0, memh->alloc_md, memh->mem_type, + &alloc_md_memh, memh->uct, &memh->md_map); + if (status != UCS_OK) { + goto out; + } + + /* If the memory was also allocated, release it */ + if (memh->alloc_method != UCT_ALLOC_METHOD_LAST) { + mem.address = memh->address; + mem.length = memh->length; + mem.method = memh->alloc_method; + mem.md = memh->alloc_md; /* May be NULL if method is not MD */ + mem.memh = alloc_md_memh; /* May be INVALID if method is not MD */ + + status = uct_mem_free(&mem); + if (status != UCS_OK) { + goto out; + } + } + + ucs_free(memh); + status = UCS_OK; +out: + return status; +} + +ucs_status_t ucp_mem_map(ucp_context_h context, const ucp_mem_map_params_t *params, + ucp_mem_h *memh_p) +{ + ucs_status_t status; + ucp_mem_map_params_t mem_params; + + /* always acquire context lock */ + UCP_THREAD_CS_ENTER(&context->mt_lock); + + mem_params = *params; + status = ucp_mem_map_check_and_adjust_params(&mem_params); + if (status != UCS_OK) { + goto out; + } + + if (mem_params.length == 0) { + ucs_debug("mapping zero length buffer, return dummy memh"); + *memh_p = &ucp_mem_dummy_handle; + status = UCS_OK; + goto out; + } + + status = ucp_mem_map_common(context, mem_params.address, mem_params.length, + ucp_mem_map_params2uct_flags(&mem_params), + ucp_mem_map_is_allocate(&mem_params), memh_p); +out: + UCP_THREAD_CS_EXIT(&context->mt_lock); + return status; +} + +ucs_status_t ucp_mem_unmap(ucp_context_h context, ucp_mem_h memh) +{ + ucs_status_t status; + + /* always acquire context lock */ + UCP_THREAD_CS_ENTER(&context->mt_lock); + + if (memh == &ucp_mem_dummy_handle) { + ucs_debug("unmapping zero length buffer (dummy memh, do nothing)"); + status = UCS_OK; + goto out; + } + + status = ucp_mem_unmap_common(context, memh); +out: + UCP_THREAD_CS_EXIT(&context->mt_lock); + return status; +} + +ucs_status_t ucp_mem_type_reg_buffers(ucp_worker_h worker, void *remote_addr, + size_t length, ucs_memory_type_t mem_type, + ucp_md_index_t md_index, uct_mem_h *memh, + ucp_md_map_t *md_map, + uct_rkey_bundle_t *rkey_bundle) +{ + ucp_context_h context = worker->context; + const uct_md_attr_t *md_attr = &context->tl_mds[md_index].attr; + uct_component_h cmpt; + ucp_tl_md_t *tl_md; + ucs_status_t status; + char *rkey_buffer; + + if (!(md_attr->cap.flags & UCT_MD_FLAG_NEED_RKEY)) { + rkey_bundle->handle = NULL; + rkey_bundle->rkey = UCT_INVALID_RKEY; + status = UCS_OK; + goto out; + } + + tl_md = &context->tl_mds[md_index]; + cmpt = context->tl_cmpts[tl_md->cmpt_index].cmpt; + + status = ucp_mem_rereg_mds(context, UCS_BIT(md_index), remote_addr, length, + UCT_MD_MEM_ACCESS_ALL | + UCT_MD_MEM_FLAG_HIDE_ERRORS, + NULL, mem_type, NULL, memh, md_map); + if (status != UCS_OK) { + goto out; + } + + rkey_buffer = ucs_alloca(md_attr->rkey_packed_size); + status = uct_md_mkey_pack(tl_md->md, memh[0], rkey_buffer); + if (status != UCS_OK) { + ucs_error("failed to pack key from md[%d]: %s", + md_index, ucs_status_string(status)); + goto out_dereg_mem; + } + + status = uct_rkey_unpack(cmpt, rkey_buffer, rkey_bundle); + if (status != UCS_OK) { + ucs_error("failed to unpack key from md[%d]: %s", + md_index, ucs_status_string(status)); + goto out_dereg_mem; + } + + return UCS_OK; + +out_dereg_mem: + ucp_mem_rereg_mds(context, 0, NULL, 0, 0, NULL, mem_type, NULL, + memh, md_map); +out: + *memh = UCT_MEM_HANDLE_NULL; + return status; +} + +void ucp_mem_type_unreg_buffers(ucp_worker_h worker, ucs_memory_type_t mem_type, + ucp_md_index_t md_index, uct_mem_h *memh, + ucp_md_map_t *md_map, + uct_rkey_bundle_t *rkey_bundle) +{ + ucp_context_h context = worker->context; + ucp_rsc_index_t cmpt_index; + + if (rkey_bundle->rkey != UCT_INVALID_RKEY) { + cmpt_index = context->tl_mds[md_index].cmpt_index; + uct_rkey_release(context->tl_cmpts[cmpt_index].cmpt, rkey_bundle); + } + + ucp_mem_rereg_mds(context, 0, NULL, 0, 0, NULL, mem_type, NULL, + memh, md_map); +} + +ucs_status_t ucp_mem_query(const ucp_mem_h memh, ucp_mem_attr_t *attr) +{ + if (attr->field_mask & UCP_MEM_ATTR_FIELD_ADDRESS) { + attr->address = memh->address; + } + + if (attr->field_mask & UCP_MEM_ATTR_FIELD_LENGTH) { + attr->length = memh->length; + } + + return UCS_OK; +} + +static ucs_status_t ucp_advice2uct(unsigned ucp_advice, uct_mem_advice_t *uct_advice) +{ + switch(ucp_advice) { + case UCP_MADV_NORMAL: + *uct_advice = UCT_MADV_NORMAL; + return UCS_OK; + case UCP_MADV_WILLNEED: + *uct_advice = UCT_MADV_WILLNEED; + return UCS_OK; + } + return UCS_ERR_INVALID_PARAM; +} + +ucs_status_t +ucp_mem_advise(ucp_context_h context, ucp_mem_h memh, + ucp_mem_advise_params_t *params) +{ + ucs_status_t status, tmp_status; + int md_index; + uct_mem_advice_t uct_advice; + uct_mem_h uct_memh; + + if (!ucs_test_all_flags(params->field_mask, + UCP_MEM_ADVISE_PARAM_FIELD_ADDRESS| + UCP_MEM_ADVISE_PARAM_FIELD_LENGTH| + UCP_MEM_ADVISE_PARAM_FIELD_ADVICE)) { + return UCS_ERR_INVALID_PARAM; + } + + if ((params->address < memh->address) || + (UCS_PTR_BYTE_OFFSET(params->address, params->length) > + UCS_PTR_BYTE_OFFSET(memh->address, memh->length))) { + return UCS_ERR_INVALID_PARAM; + } + + status = ucp_advice2uct(params->advice, &uct_advice); + if (status != UCS_OK) { + return status; + } + + ucs_debug("advice buffer %p length %llu memh %p flags %x", + params->address, (unsigned long long)params->length, memh, + params->advice); + + if (memh == &ucp_mem_dummy_handle) { + return UCS_OK; + } + + UCP_THREAD_CS_ENTER(&context->mt_lock); + + status = UCS_OK; + for (md_index = 0; md_index < context->num_mds; ++md_index) { + uct_memh = ucp_memh2uct(memh, md_index); + if (!(context->tl_mds[md_index].attr.cap.flags & UCT_MD_FLAG_ADVISE) || + (uct_memh == NULL)) { + continue; + } + tmp_status = uct_md_mem_advise(context->tl_mds[md_index].md, uct_memh, + params->address, params->length, uct_advice); + if (tmp_status != UCS_OK) { + status = tmp_status; + } + } + + UCP_THREAD_CS_EXIT(&context->mt_lock); + return status; +} + +static inline ucs_status_t +ucp_mpool_malloc(ucp_worker_h worker, ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +{ + ucp_mem_desc_t *chunk_hdr; + ucp_mem_h memh; + ucs_status_t status; + ucp_mem_map_params_t mem_params; + + /* Need to get default flags from ucp_mem_map_params2uct_flags() */ + mem_params.field_mask = 0; + status = ucp_mem_map_common(worker->context, NULL, + *size_p + sizeof(*chunk_hdr), + ucp_mem_map_params2uct_flags(&mem_params), + 1, &memh); + if (status != UCS_OK) { + goto out; + } + + chunk_hdr = memh->address; + chunk_hdr->memh = memh; + *chunk_p = chunk_hdr + 1; + *size_p = memh->length - sizeof(*chunk_hdr); +out: + return status; +} + +static inline void +ucp_mpool_free(ucp_worker_h worker, ucs_mpool_t *mp, void *chunk) +{ + ucp_mem_desc_t *chunk_hdr; + + chunk_hdr = (ucp_mem_desc_t*)chunk - 1; + ucp_mem_unmap_common(worker->context, chunk_hdr->memh); +} + +void ucp_mpool_obj_init(ucs_mpool_t *mp, void *obj, void *chunk) +{ + ucp_mem_desc_t *elem_hdr = obj; + ucp_mem_desc_t *chunk_hdr = (ucp_mem_desc_t*)((ucp_mem_desc_t*)chunk - 1); + elem_hdr->memh = chunk_hdr->memh; +} + +ucs_status_t ucp_reg_mpool_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +{ + ucp_worker_h worker = ucs_container_of(mp, ucp_worker_t, reg_mp); + + return ucp_mpool_malloc(worker, mp, size_p, chunk_p); +} + +void ucp_reg_mpool_free(ucs_mpool_t *mp, void *chunk) +{ + ucp_worker_h worker = ucs_container_of(mp, ucp_worker_t, reg_mp); + + ucp_mpool_free(worker, mp, chunk); +} + +ucs_status_t ucp_frag_mpool_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +{ + ucp_worker_h worker = ucs_container_of(mp, ucp_worker_t, rndv_frag_mp); + + return ucp_mpool_malloc(worker, mp, size_p, chunk_p); +} + +void ucp_frag_mpool_free(ucs_mpool_t *mp, void *chunk) +{ + ucp_worker_h worker = ucs_container_of(mp, ucp_worker_t, rndv_frag_mp); + + ucp_mpool_free(worker, mp, chunk); +} + +void ucp_mem_print_info(const char *mem_size, ucp_context_h context, FILE *stream) +{ + size_t min_page_size, max_page_size; + ucp_mem_map_params_t mem_params; + size_t mem_size_value; + char memunits_str[32]; + ucs_status_t status; + unsigned md_index; + ucp_mem_h memh; + + status = ucs_str_to_memunits(mem_size, &mem_size_value); + if (status != UCS_OK) { + printf("\n"); + return; + } + + mem_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + mem_params.address = NULL; + mem_params.length = mem_size_value; + mem_params.flags = UCP_MEM_MAP_ALLOCATE; + + status = ucp_mem_map(context, &mem_params, &memh); + if (status != UCS_OK) { + printf("\n", mem_size); + return; + } + + fprintf(stream, "#\n"); + fprintf(stream, "# UCP memory allocation\n"); + fprintf(stream, "#\n"); + + ucs_memunits_to_str(memh->length, memunits_str, sizeof(memunits_str)); + fprintf(stream, "# allocated %s at address %p with ", memunits_str, + memh->address); + + if (memh->alloc_md == NULL) { + fprintf(stream, "%s", uct_alloc_method_names[memh->alloc_method]); + } else { + for (md_index = 0; md_index < context->num_mds; ++md_index) { + if (memh->alloc_md == context->tl_mds[md_index].md) { + fprintf(stream, "%s", context->tl_mds[md_index].rsc.md_name); + break; + } + } + } + + ucs_get_mem_page_size(memh->address, memh->length, &min_page_size, + &max_page_size); + ucs_memunits_to_str(min_page_size, memunits_str, sizeof(memunits_str)); + fprintf(stream, ", pagesize: %s", memunits_str); + if (min_page_size != max_page_size) { + ucs_memunits_to_str(max_page_size, memunits_str, sizeof(memunits_str)); + fprintf(stream, "-%s", memunits_str); + } + + fprintf(stream, "\n"); + fprintf(stream, "# registered on: "); + ucs_for_each_bit(md_index, memh->md_map) { + fprintf(stream, "%s ", context->tl_mds[md_index].rsc.md_name); + } + fprintf(stream, "\n"); + fprintf(stream, "#\n"); + + status = ucp_mem_unmap(context, memh); + if (status != UCS_OK) { + printf("\n", mem_size); + } +} diff --git a/src/ucp/core/ucp_mm.h b/src/ucp/core/ucp_mm.h new file mode 100644 index 0000000..89bfca5 --- /dev/null +++ b/src/ucp/core/ucp_mm.h @@ -0,0 +1,233 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_MM_H_ +#define UCP_MM_H_ + +#include +#include +#include +#include +#include +#include + +#include + + +/* Remote keys with that many remote MDs or less would be allocated from a + * memory pool. + */ +#define UCP_RKEY_MPOOL_MAX_MD 3 + + +/** + * UCT remote key along with component handle which should be used to release it. + * + */ +typedef struct ucp_tl_rkey { + uct_rkey_bundle_t rkey; + uct_component_h cmpt; +} ucp_tl_rkey_t; + +/** + * Rkey flags + */ +enum { + UCP_RKEY_DESC_FLAG_POOL = UCS_BIT(0) /* Descriptor was allocated from pool + and must be retuned to pool, not free */ +}; + +/** + * Remote memory key structure. + * Contains remote keys for UCT MDs. + * md_map specifies which MDs from the current context are present in the array. + * The array itself contains only the MDs specified in md_map, without gaps. + */ +typedef struct ucp_rkey { + /* cached values for the most recent endpoint configuration */ + struct { + ucp_ep_cfg_index_t ep_cfg_index; /* EP configuration relevant for the cache */ + ucp_lane_index_t rma_lane; /* Lane to use for RMAs */ + ucp_lane_index_t amo_lane; /* Lane to use for AMOs */ + unsigned max_put_short;/* Cached value of max_put_short */ + uct_rkey_t rma_rkey; /* Key to use for RMAs */ + uct_rkey_t amo_rkey; /* Key to use for AMOs */ + ucp_amo_proto_t *amo_proto; /* Protocol for AMOs */ + ucp_rma_proto_t *rma_proto; /* Protocol for RMAs */ + } cache; + ucp_md_map_t md_map; /* Which *remote* MDs have valid memory handles */ + ucs_memory_type_t mem_type; /* Memory type of remote key memory */ + uint8_t flags; /* Rkey flags */ +#if ENABLE_PARAMS_CHECK + ucp_ep_h ep; +#endif + ucp_tl_rkey_t tl_rkey[0]; /* UCT rkey for every remote MD */ +} ucp_rkey_t; + + +/** + * Memory handle. + * Contains general information, and a list of UCT handles. + * md_map specifies which MDs from the current context are present in the array. + * The array itself contains only the MDs specified in md_map, without gaps. + */ +typedef struct ucp_mem { + void *address; /* Region start address */ + size_t length; /* Region length */ + uct_alloc_method_t alloc_method; /* Method used to allocate the memory */ + ucs_memory_type_t mem_type; /**< type of allocated memory */ + uct_md_h alloc_md; /* MD used to allocated the memory */ + ucp_md_map_t md_map; /* Which MDs have valid memory handles */ + uct_mem_h uct[0]; /* Valid memory handles, as popcount(md_map) */ +} ucp_mem_t; + + +/** + * Memory descriptor. + * Contains a memory handle of the chunk it belongs to. + */ +typedef struct ucp_mem_desc { + ucp_mem_h memh; +} ucp_mem_desc_t; + + +void ucp_rkey_resolve_inner(ucp_rkey_h rkey, ucp_ep_h ep); + +ucp_lane_index_t ucp_rkey_find_rma_lane(ucp_context_h context, + const ucp_ep_config_t *config, + ucs_memory_type_t mem_type, + const ucp_lane_index_t *lanes, + ucp_rkey_h rkey, + ucp_lane_map_t ignore, + uct_rkey_t *uct_rkey_p); + +ucs_status_t ucp_reg_mpool_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p); + +void ucp_reg_mpool_free(ucs_mpool_t *mp, void *chunk); + +void ucp_mpool_obj_init(ucs_mpool_t *mp, void *obj, void *chunk); + +ucs_status_t ucp_frag_mpool_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p); + +void ucp_frag_mpool_free(ucs_mpool_t *mp, void *chunk); + +/** + * Update memory registration to a specified set of memory domains. + * + * @param [in] context UCP context with MDs to use for registration. + * @param [in] reg_md_map Map of memory domains to update the registration to. + * MDs which are present in reg_md_map, but not yet + * registered will be registered. + * MDs which were registered, but not present in r + * eg_md_map, will be de-registered. + * @param [in] address Address to register, unused if reg_md_map == 0 + * @param [in] length Length to register, unused if reg_md_map == 0 + * @param [in] uct_flags Flags for UCT registration, unused if reg_md_map == 0 + * @param [in] alloc_md If != NULL, MD that was used to register the memory. + * This MD will not be used to register the memory again; + * rather, the memh will be taken from *alloc_md_memh. + * @param [inout] alloc_md_memh_p If non-NULL, specifies/filled with the memory + * handle on alloc_md. + * @param [inout] uct_memh Array of memory handles to update. + * @param [inout] md_map_p Current map of registered MDs, updated by the function + * to the new map o + * + * In case alloc_md != NULL, alloc_md_memh will hold the memory key obtained from + * allocation. It will be put in the array of keys in the proper index. + */ +ucs_status_t ucp_mem_rereg_mds(ucp_context_h context, ucp_md_map_t reg_md_map, + void *address, size_t length, unsigned uct_flags, + uct_md_h alloc_md, ucs_memory_type_t mem_type, + uct_mem_h *alloc_md_memh_p, uct_mem_h *uct_memh, + ucp_md_map_t *md_map_p); + +size_t ucp_rkey_packed_size(ucp_context_h context, ucp_md_map_t md_map); + +void ucp_rkey_packed_copy(ucp_context_h context, ucp_md_map_t md_map, + ucs_memory_type_t mem_type, void *rkey_buffer, + const void* uct_rkeys[]); + +ssize_t ucp_rkey_pack_uct(ucp_context_h context, ucp_md_map_t md_map, + const uct_mem_h *memh, ucs_memory_type_t mem_type, + void *rkey_buffer); + +void ucp_rkey_dump_packed(const void *rkey_buffer, char *buffer, size_t max); + +ucs_status_t ucp_mem_type_reg_buffers(ucp_worker_h worker, void *remote_addr, + size_t length, ucs_memory_type_t mem_type, + ucp_md_index_t md_index, uct_mem_h *memh, + ucp_md_map_t *md_map, + uct_rkey_bundle_t *rkey_bundle); + +void ucp_mem_type_unreg_buffers(ucp_worker_h worker, ucs_memory_type_t mem_type, + ucp_md_index_t md_index, uct_mem_h *memh, + ucp_md_map_t *md_map, + uct_rkey_bundle_t *rkey_bundle); + +static UCS_F_ALWAYS_INLINE ucp_md_map_t +ucp_rkey_packed_md_map(const void *rkey_buffer) +{ + return *(const ucp_md_map_t*)rkey_buffer; +} + +static UCS_F_ALWAYS_INLINE uct_mem_h +ucp_memh_map2uct(const uct_mem_h *uct, ucp_md_map_t md_map, ucp_md_index_t md_idx) +{ + if (!(md_map & UCS_BIT(md_idx))) { + return NULL; + } + + return uct[ucs_bitmap2idx(md_map, md_idx)]; +} + +static UCS_F_ALWAYS_INLINE uct_mem_h +ucp_memh2uct(ucp_mem_h memh, ucp_md_index_t md_idx) +{ + return ucp_memh_map2uct(memh->uct, memh->md_map, md_idx); +} + + +#define UCP_RKEY_RESOLVE_NOCHECK(_rkey, _ep, _op_type) \ + ({ \ + ucs_status_t status = UCS_OK; \ + if (ucs_unlikely((_ep)->cfg_index != (_rkey)->cache.ep_cfg_index)) { \ + ucp_rkey_resolve_inner(_rkey, _ep); \ + } \ + if (ucs_unlikely((_rkey)->cache._op_type##_lane == UCP_NULL_LANE)) { \ + ucs_error("remote memory is unreachable (remote md_map 0x%lx)", \ + (_rkey)->md_map); \ + status = UCS_ERR_UNREACHABLE; \ + } \ + status; \ + }) + + +#if ENABLE_PARAMS_CHECK +#define UCP_RKEY_RESOLVE(_rkey, _ep, _op_type) \ + ({ \ + ucs_status_t status; \ + if ((_rkey)->ep != (_ep)) { \ + ucs_error("cannot use a remote key on a different endpoint than it was unpacked on"); \ + status = UCS_ERR_INVALID_PARAM; \ + } else { \ + status = UCP_RKEY_RESOLVE_NOCHECK(_rkey, _ep, _op_type); \ + } \ + status; \ + }) +#else +#define UCP_RKEY_RESOLVE UCP_RKEY_RESOLVE_NOCHECK +#endif + +#define UCP_MEM_IS_HOST(_mem_type) ((_mem_type) == UCS_MEMORY_TYPE_HOST) +#define UCP_MEM_IS_ROCM(_mem_type) ((_mem_type) == UCS_MEMORY_TYPE_ROCM) +#define UCP_MEM_IS_CUDA(_mem_type) ((_mem_type) == UCS_MEMORY_TYPE_CUDA) +#define UCP_MEM_IS_CUDA_MANAGED(_mem_type) ((_mem_type) == UCS_MEMORY_TYPE_CUDA_MANAGED) +#define UCP_MEM_IS_ROCM_MANAGED(_mem_type) ((_mem_type) == UCS_MEMORY_TYPE_ROCM_MANAGED) +#define UCP_MEM_IS_ACCESSIBLE_FROM_CPU(_mem_type) \ + (UCS_BIT(_mem_type) & UCS_MEMORY_TYPES_CPU_ACCESSIBLE) + +#endif diff --git a/src/ucp/core/ucp_proxy_ep.c b/src/ucp/core/ucp_proxy_ep.c new file mode 100644 index 0000000..8e38420 --- /dev/null +++ b/src/ucp/core/ucp_proxy_ep.c @@ -0,0 +1,246 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucp_proxy_ep.h" +#include "ucp_ep.inl" + +#include + + +#define UCP_PROXY_EP_PASTE_ARG_NAME(_, _index) \ + , UCS_PP_TOKENPASTE(arg, _index) + +#define UCP_PROXY_EP_PASTE_ARG_TYPE(_, _bundle) \ + , UCS_PP_TUPLE_1 _bundle UCS_PP_TOKENPASTE(arg, UCS_PP_TUPLE_0 _bundle) + +/* Generate list of typed arguments for a proxy function prototype */ +#define UCP_PROXY_EP_FUNC_ARGS(...) \ + uct_ep_h ep \ + UCS_PP_FOREACH(UCP_PROXY_EP_PASTE_ARG_TYPE, _, \ + UCS_PP_ZIP((UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))), \ + (__VA_ARGS__))) + +/* Generate a list of arguments passed to function call */ +#define UCP_PROXY_EP_FUNC_CALL(...) \ + proxy_ep->uct_ep \ + UCS_PP_FOREACH(UCP_PROXY_EP_PASTE_ARG_NAME, _, \ + UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))) + +/* Generate a return statement based on return type */ +#define UCP_PROXY_EP_RETURN(_retval) \ + UCS_PP_TOKENPASTE(UCP_PROXY_EP_RETURN_, _retval) + +#define UCP_PROXY_EP_RETURN_ucs_status_t return +#define UCP_PROXY_EP_RETURN_ucs_status_ptr_t return +#define UCP_PROXY_EP_RETURN_ssize_t return +#define UCP_PROXY_EP_RETURN_void + + +/* + * Define a proxy endpoint operation, which redirects the call to the underlying + * transport endpoint. + */ +#define UCP_PROXY_EP_DEFINE_OP(_retval, _name, ...) \ + static _retval ucp_proxy_ep_##_name(UCP_PROXY_EP_FUNC_ARGS(__VA_ARGS__)) \ + { \ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); \ + UCP_PROXY_EP_RETURN(_retval) \ + uct_ep_##_name(UCP_PROXY_EP_FUNC_CALL(__VA_ARGS__)); \ + } + + +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, put_short, const void*, unsigned, + uint64_t, uct_rkey_t) +UCP_PROXY_EP_DEFINE_OP(ssize_t, put_bcopy, uct_pack_callback_t, void*, + uint64_t, uct_rkey_t) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, put_zcopy, const uct_iov_t*, size_t, + uint64_t, uct_rkey_t, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, get_bcopy, uct_unpack_callback_t, void*, + size_t, uint64_t, uct_rkey_t, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, get_zcopy, const uct_iov_t*, size_t, + uint64_t, uct_rkey_t, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, am_short, uint8_t, uint64_t, const void*, + unsigned) +UCP_PROXY_EP_DEFINE_OP(ssize_t, am_bcopy, uint8_t, uct_pack_callback_t, void*, + unsigned) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, am_zcopy, uint8_t, const void*, unsigned, + const uct_iov_t*, size_t, unsigned, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, atomic_cswap64, uint64_t, uint64_t, + uint64_t, uct_rkey_t, uint64_t*, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, atomic_cswap32, uint32_t, uint32_t, + uint64_t, uct_rkey_t, uint32_t*, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, atomic64_post, uct_atomic_op_t, + uint64_t, uint64_t, uct_rkey_t) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, atomic32_post, uct_atomic_op_t, + uint32_t, uint64_t, uct_rkey_t) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, atomic64_fetch, uct_atomic_op_t, uint64_t, + uint64_t*, uint64_t, uct_rkey_t, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, atomic32_fetch, uct_atomic_op_t, uint32_t, + uint32_t*, uint64_t, uct_rkey_t, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, tag_eager_short, uct_tag_t, const void*, + size_t) +UCP_PROXY_EP_DEFINE_OP(ssize_t, tag_eager_bcopy, uct_tag_t, uint64_t, + uct_pack_callback_t, void*, unsigned) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, tag_eager_zcopy, uct_tag_t, uint64_t, + const uct_iov_t*, size_t, unsigned, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_ptr_t, tag_rndv_zcopy, uct_tag_t, const void*, + unsigned, const uct_iov_t*, size_t, unsigned, + uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, tag_rndv_cancel, void*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, tag_rndv_request, uct_tag_t, const void*, + unsigned, unsigned) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, pending_add, uct_pending_req_t*, unsigned) +UCP_PROXY_EP_DEFINE_OP(void, pending_purge, uct_pending_purge_callback_t, void*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, flush, unsigned, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, fence, unsigned) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, check, unsigned, uct_completion_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, get_address, uct_ep_addr_t*) +UCP_PROXY_EP_DEFINE_OP(ucs_status_t, connect_to_ep, const uct_device_addr_t*, + const uct_ep_addr_t*) +static UCS_CLASS_DEFINE_NAMED_DELETE_FUNC(ucp_proxy_ep_destroy, ucp_proxy_ep_t, + uct_ep_t); + +static ucs_status_t ucp_proxy_ep_fatal(uct_iface_h iface, ...) +{ + ucs_bug("unsupported function on proxy endpoint"); + return UCS_ERR_UNSUPPORTED; +} + +UCS_CLASS_INIT_FUNC(ucp_proxy_ep_t, const uct_iface_ops_t *ops, ucp_ep_h ucp_ep, + uct_ep_h uct_ep, int is_owner) +{ + #define UCP_PROXY_EP_SET_OP(_name) \ + self->iface.ops._name = (ops->_name != NULL) ? ops->_name : ucp_proxy_##_name + + self->super.iface = &self->iface; + self->ucp_ep = ucp_ep; + self->uct_ep = uct_ep; + self->is_owner = is_owner; + + UCP_PROXY_EP_SET_OP(ep_put_short); + UCP_PROXY_EP_SET_OP(ep_put_short); + UCP_PROXY_EP_SET_OP(ep_put_bcopy); + UCP_PROXY_EP_SET_OP(ep_put_zcopy); + UCP_PROXY_EP_SET_OP(ep_get_bcopy); + UCP_PROXY_EP_SET_OP(ep_get_zcopy); + UCP_PROXY_EP_SET_OP(ep_am_short); + UCP_PROXY_EP_SET_OP(ep_am_bcopy); + UCP_PROXY_EP_SET_OP(ep_am_zcopy); + UCP_PROXY_EP_SET_OP(ep_atomic_cswap64); + UCP_PROXY_EP_SET_OP(ep_atomic_cswap32); + UCP_PROXY_EP_SET_OP(ep_atomic64_post); + UCP_PROXY_EP_SET_OP(ep_atomic32_post); + UCP_PROXY_EP_SET_OP(ep_atomic64_fetch); + UCP_PROXY_EP_SET_OP(ep_atomic32_fetch); + UCP_PROXY_EP_SET_OP(ep_tag_eager_short); + UCP_PROXY_EP_SET_OP(ep_tag_eager_bcopy); + UCP_PROXY_EP_SET_OP(ep_tag_eager_zcopy); + UCP_PROXY_EP_SET_OP(ep_tag_rndv_zcopy); + UCP_PROXY_EP_SET_OP(ep_tag_rndv_cancel); + UCP_PROXY_EP_SET_OP(ep_tag_rndv_request); + UCP_PROXY_EP_SET_OP(ep_pending_add); + UCP_PROXY_EP_SET_OP(ep_pending_purge); + UCP_PROXY_EP_SET_OP(ep_flush); + UCP_PROXY_EP_SET_OP(ep_fence); + UCP_PROXY_EP_SET_OP(ep_check); + UCP_PROXY_EP_SET_OP(ep_destroy); + UCP_PROXY_EP_SET_OP(ep_get_address); + UCP_PROXY_EP_SET_OP(ep_connect_to_ep); + + self->iface.ops.iface_tag_recv_zcopy = (uct_iface_tag_recv_zcopy_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_tag_recv_cancel = (uct_iface_tag_recv_cancel_func_t)ucp_proxy_ep_fatal; + self->iface.ops.ep_create = (uct_ep_create_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_flush = (uct_iface_flush_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_fence = (uct_iface_fence_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_progress_enable = (uct_iface_progress_enable_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_progress_disable = (uct_iface_progress_disable_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_progress = (uct_iface_progress_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_event_fd_get = (uct_iface_event_fd_get_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_event_arm = (uct_iface_event_arm_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_close = (uct_iface_close_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_query = (uct_iface_query_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_get_device_address = (uct_iface_get_device_address_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_get_address = (uct_iface_get_address_func_t)ucp_proxy_ep_fatal; + self->iface.ops.iface_is_reachable = (uct_iface_is_reachable_func_t)ucp_proxy_ep_fatal; + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(ucp_proxy_ep_t) +{ + if ((self->uct_ep != NULL) && self->is_owner) { + uct_ep_destroy(self->uct_ep); + } +} + +int ucp_proxy_ep_test(uct_ep_h uct_ep) +{ + return uct_ep->iface->ops.ep_destroy == ucp_proxy_ep_destroy; +} + +uct_ep_h ucp_proxy_ep_extract(uct_ep_h ep) +{ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); + uct_ep_h uct_ep; + + uct_ep = proxy_ep->uct_ep; + proxy_ep->uct_ep = NULL; + return uct_ep; +} + +static void ucp_proxy_ep_replace_if_owned(uct_ep_h uct_ep, uct_ep_h owned_ep, + uct_ep_h replacement_ep) +{ + ucp_proxy_ep_t *proxy_ep; + + if (ucp_proxy_ep_test(uct_ep)) { + proxy_ep = ucs_derived_of(uct_ep, ucp_proxy_ep_t); + if (proxy_ep->uct_ep == owned_ep) { + proxy_ep->uct_ep = replacement_ep; + } + ucs_assert(replacement_ep != NULL); + } +} + +void ucp_proxy_ep_replace(ucp_proxy_ep_t *proxy_ep) +{ + ucp_ep_h ucp_ep = proxy_ep->ucp_ep; + ucp_lane_index_t lane; + uct_ep_h tl_ep = NULL; + + ucs_assert(proxy_ep->uct_ep != NULL); + for (lane = 0; lane < ucp_ep_num_lanes(ucp_ep); ++lane) { + if (ucp_ep->uct_eps[lane] == &proxy_ep->super) { + ucs_assert(proxy_ep->uct_ep != NULL); /* make sure there is only one match */ + ucp_ep->uct_eps[lane] = proxy_ep->uct_ep; + tl_ep = ucp_ep->uct_eps[lane]; + proxy_ep->uct_ep = NULL; + } + } + + /* go through the lanes and check if the proxy ep that is being destroyed, + * is pointed to by another proxy ep. if so, redirect that other proxy ep + * to point to the underlying uct ep. */ + for (lane = 0; lane < ucp_ep_num_lanes(ucp_ep); ++lane) { + ucp_proxy_ep_replace_if_owned(ucp_ep->uct_eps[lane], &proxy_ep->super, + tl_ep); + } + + uct_ep_destroy(&proxy_ep->super); +} + +void ucp_proxy_ep_set_uct_ep(ucp_proxy_ep_t *proxy_ep, uct_ep_h uct_ep, + int is_owner) +{ + proxy_ep->uct_ep = uct_ep; + proxy_ep->is_owner = is_owner; +} + +UCS_CLASS_DEFINE(ucp_proxy_ep_t, void); diff --git a/src/ucp/core/ucp_proxy_ep.h b/src/ucp/core/ucp_proxy_ep.h new file mode 100644 index 0000000..fe15d32 --- /dev/null +++ b/src/ucp/core/ucp_proxy_ep.h @@ -0,0 +1,52 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_PROXY_EP_H_ +#define UCP_PROXY_EP_H_ + +#include "ucp_types.h" + +#include +#include + + +/** + * Generic proxy endpoint, used to change behavior of a specific transport lane + * without adding data-path checks when not needed. + * By default, all transport endpoint operations are redirected to the underlying + * UCT endpoint, and interface operations would result in a fatal error. + * When this endpoint is destroyed, the lane in UCP endpoint is replaced with + * the real transport endpoint. + * + * TODO make sure it works with err handling and print_ucp_info + */ +typedef struct ucp_proxy_ep { + uct_ep_t super; /**< Derived from uct_ep */ + uct_iface_t iface; /**< Embedded stub interface */ + ucp_ep_h ucp_ep; /**< Pointer to UCP endpoint */ + uct_ep_h uct_ep; /**< Underlying transport endpoint */ + int is_owner; /**< Is uct_ep owned by this proxy ep */ +} ucp_proxy_ep_t; + + +UCS_CLASS_DECLARE(ucp_proxy_ep_t, const uct_iface_ops_t *ops, ucp_ep_h ucp_ep, + uct_ep_h uct_ep, int is_owner); + + +/** + * Replace the proxy endpoint by the underlying transport endpoint, and destroy + * the proxy endpoint. + */ +void ucp_proxy_ep_replace(ucp_proxy_ep_t *proxy_ep); + +int ucp_proxy_ep_test(uct_ep_h ep); + +uct_ep_h ucp_proxy_ep_extract(uct_ep_h ep); + +void ucp_proxy_ep_set_uct_ep(ucp_proxy_ep_t *proxy_ep, uct_ep_h uct_ep, + int is_owner); + +#endif diff --git a/src/ucp/core/ucp_request.c b/src/ucp/core/ucp_request.c new file mode 100644 index 0000000..3cca377 --- /dev/null +++ b/src/ucp/core/ucp_request.c @@ -0,0 +1,420 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucp_context.h" +#include "ucp_worker.h" +#include "ucp_request.inl" + +#include + +#include +#include +#include + + +int ucp_request_is_completed(void *request) +{ + ucp_request_t *req = (ucp_request_t*)request - 1; + return !!(req->flags & UCP_REQUEST_FLAG_COMPLETED); +} + +ucs_status_t ucp_request_check_status(void *request) +{ + ucp_request_t *req = (ucp_request_t*)request - 1; + + if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { + ucs_assert(req->status != UCS_INPROGRESS); + return req->status; + } + return UCS_INPROGRESS; +} + +ucs_status_t ucp_tag_recv_request_test(void *request, ucp_tag_recv_info_t *info) +{ + ucp_request_t *req = (ucp_request_t*)request - 1; + ucs_status_t status = ucp_request_check_status(request); + + if (status != UCS_INPROGRESS) { + ucs_assert(req->flags & UCP_REQUEST_FLAG_RECV); + *info = req->recv.tag.info; + } + + return status; +} + +ucs_status_t ucp_stream_recv_request_test(void *request, size_t *length_p) +{ + ucp_request_t *req = (ucp_request_t*)request - 1; + ucs_status_t status = ucp_request_check_status(request); + + if (status != UCS_INPROGRESS) { + ucs_assert(req->flags & UCP_REQUEST_FLAG_STREAM_RECV); + *length_p = req->recv.stream.length; + } + + return status; +} + +static UCS_F_ALWAYS_INLINE void +ucp_request_release_common(void *request, uint8_t cb_flag, const char *debug_name) +{ + ucp_request_t *req = (ucp_request_t*)request - 1; + ucp_worker_h UCS_V_UNUSED worker = ucs_container_of(ucs_mpool_obj_owner(req), + ucp_worker_t, req_mp); + uint32_t flags; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + flags = req->flags; + ucs_trace_req("%s request %p (%p) "UCP_REQUEST_FLAGS_FMT, debug_name, + req, req + 1, UCP_REQUEST_FLAGS_ARG(flags)); + + ucs_assert(!(flags & UCP_REQUEST_DEBUG_FLAG_EXTERNAL)); + ucs_assert(!(flags & UCP_REQUEST_FLAG_RELEASED)); + + if (ucs_likely(flags & UCP_REQUEST_FLAG_COMPLETED)) { + ucp_request_put(req); + } else { + req->flags = (flags | UCP_REQUEST_FLAG_RELEASED) & ~cb_flag; + } + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); +} + +UCS_PROFILE_FUNC_VOID(ucp_request_release, (request), void *request) +{ + /* mark request as released */ + ucp_request_release_common(request, 0, "release"); +} + +UCS_PROFILE_FUNC_VOID(ucp_request_free, (request), void *request) +{ + /* mark request as released and disable the callback */ + ucp_request_release_common(request, UCP_REQUEST_FLAG_CALLBACK, "free"); +} + +UCS_PROFILE_FUNC_VOID(ucp_request_cancel, (worker, request), + ucp_worker_h worker, void *request) +{ + ucp_request_t *req = (ucp_request_t*)request - 1; + int removed; + + if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { + return; + } + + if (req->flags & UCP_REQUEST_FLAG_EXPECTED) { + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + removed = ucp_tag_exp_remove(&worker->tm, req); + /* If tag posted to the transport need to wait its completion */ + if (removed && !(req->flags & UCP_REQUEST_FLAG_OFFLOADED)) { + ucp_request_complete_tag_recv(req, UCS_ERR_CANCELED); + } + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + } +} + +static void ucp_worker_request_init_proxy(ucs_mpool_t *mp, void *obj, void *chunk) +{ + ucp_worker_h worker = ucs_container_of(mp, ucp_worker_t, req_mp); + ucp_context_h context = worker->context; + ucp_request_t *req = obj; + + if (context->config.request.init != NULL) { + context->config.request.init(req + 1); + } +} + +static void ucp_worker_request_fini_proxy(ucs_mpool_t *mp, void *obj) +{ + ucp_worker_h worker = ucs_container_of(mp, ucp_worker_t, req_mp); + ucp_context_h context = worker->context; + ucp_request_t *req = obj; + + if (context->config.request.cleanup != NULL) { + context->config.request.cleanup(req + 1); + } +} + +ucs_mpool_ops_t ucp_request_mpool_ops = { + .chunk_alloc = ucs_mpool_hugetlb_malloc, + .chunk_release = ucs_mpool_hugetlb_free, + .obj_init = ucp_worker_request_init_proxy, + .obj_cleanup = ucp_worker_request_fini_proxy +}; + +ucs_mpool_ops_t ucp_rndv_get_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = NULL, + .obj_cleanup = NULL +}; + +int ucp_request_pending_add(ucp_request_t *req, ucs_status_t *req_status, + unsigned pending_flags) +{ + ucs_status_t status; + uct_ep_h uct_ep; + + ucs_assertv(req->send.lane != UCP_NULL_LANE, "%s() did not set req->send.lane", + ucs_debug_get_symbol_name(req->send.uct.func)); + + uct_ep = req->send.ep->uct_eps[req->send.lane]; + status = uct_ep_pending_add(uct_ep, &req->send.uct, pending_flags); + if (status == UCS_OK) { + ucs_trace_data("ep %p: added pending uct request %p to lane[%d]=%p", + req->send.ep, req, req->send.lane, uct_ep); + *req_status = UCS_INPROGRESS; + req->send.pending_lane = req->send.lane; + return 1; + } else if (status == UCS_ERR_BUSY) { + /* Could not add, try to send again */ + return 0; + } + + /* Unexpected error while adding to pending */ + ucs_fatal("invalid return status from uct_ep_pending_add(): %s", + ucs_status_string(status)); +} + +static void ucp_request_dt_dereg(ucp_context_t *context, ucp_dt_reg_t *dt_reg, + size_t count, ucp_request_t *req_dbg) +{ + size_t i; + + for (i = 0; i < count; ++i) { + ucp_trace_req(req_dbg, "mem dereg buffer %ld/%ld md_map 0x%"PRIx64, + i, count, dt_reg[i].md_map); + ucp_mem_rereg_mds(context, 0, NULL, 0, 0, NULL, UCS_MEMORY_TYPE_HOST, NULL, + dt_reg[i].memh, &dt_reg[i].md_map); + ucs_assert(dt_reg[i].md_map == 0); + } +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_request_memory_reg, + (context, md_map, buffer, length, datatype, state, mem_type, req_dbg, uct_flags), + ucp_context_t *context, ucp_md_map_t md_map, void *buffer, + size_t length, ucp_datatype_t datatype, ucp_dt_state_t *state, + ucs_memory_type_t mem_type, ucp_request_t *req_dbg, unsigned uct_flags) +{ + size_t iov_it, iovcnt; + const ucp_dt_iov_t *iov; + ucp_dt_reg_t *dt_reg; + ucs_status_t status; + int flags; + int level; + + ucs_trace_func("context=%p md_map=0x%lx buffer=%p length=%zu datatype=0x%lu " + "state=%p", context, md_map, buffer, length, datatype, state); + + status = UCS_OK; + flags = UCT_MD_MEM_ACCESS_RMA | uct_flags; + switch (datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + ucs_assert(ucs_popcount(md_map) <= UCP_MAX_OP_MDS); + status = ucp_mem_rereg_mds(context, md_map, buffer, length, flags, + NULL, mem_type, NULL, state->dt.contig.memh, + &state->dt.contig.md_map); + ucp_trace_req(req_dbg, "mem reg md_map 0x%"PRIx64"/0x%"PRIx64, + state->dt.contig.md_map, md_map); + break; + case UCP_DATATYPE_IOV: + iovcnt = state->dt.iov.iovcnt; + iov = buffer; + dt_reg = ucs_malloc(sizeof(*dt_reg) * iovcnt, "iov_dt_reg"); + if (NULL == dt_reg) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + dt_reg[iov_it].md_map = 0; + if (iov[iov_it].length) { + status = ucp_mem_rereg_mds(context, md_map, iov[iov_it].buffer, + iov[iov_it].length, flags, NULL, + mem_type, NULL, dt_reg[iov_it].memh, + &dt_reg[iov_it].md_map); + if (status != UCS_OK) { + /* unregister previously registered memory */ + ucp_request_dt_dereg(context, dt_reg, iov_it, req_dbg); + ucs_free(dt_reg); + goto err; + } + ucp_trace_req(req_dbg, + "mem reg iov %ld/%ld md_map 0x%"PRIx64"/0x%"PRIx64, + iov_it, iovcnt, dt_reg[iov_it].md_map, md_map); + } + } + state->dt.iov.dt_reg = dt_reg; + break; + default: + status = UCS_ERR_INVALID_PARAM; + ucs_error("Invalid data type %lx", datatype); + } + +err: + if (status != UCS_OK) { + level = (flags & UCT_MD_MEM_FLAG_HIDE_ERRORS) ? + UCS_LOG_LEVEL_DEBUG : UCS_LOG_LEVEL_ERROR; + ucs_log(level, + "failed to register user buffer datatype 0x%lx address %p len %zu:" + " %s", datatype, buffer, length, ucs_status_string(status)); + } + return status; +} + +UCS_PROFILE_FUNC_VOID(ucp_request_memory_dereg, (context, datatype, state, req_dbg), + ucp_context_t *context, ucp_datatype_t datatype, + ucp_dt_state_t *state, ucp_request_t *req_dbg) +{ + ucs_trace_func("context=%p datatype=0x%lu state=%p", context, datatype, + state); + + switch (datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + ucp_request_dt_dereg(context, &state->dt.contig, 1, req_dbg); + break; + case UCP_DATATYPE_IOV: + if (state->dt.iov.dt_reg != NULL) { + ucp_request_dt_dereg(context, state->dt.iov.dt_reg, + state->dt.iov.iovcnt, req_dbg); + ucs_free(state->dt.iov.dt_reg); + state->dt.iov.dt_reg = NULL; + } + break; + default: + break; + } +} + +/* NOTE: deprecated */ +ucs_status_t ucp_request_test(void *request, ucp_tag_recv_info_t *info) +{ + ucp_request_t *req = (ucp_request_t*)request - 1; + + if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { + if (req->flags & UCP_REQUEST_FLAG_RECV) { + *info = req->recv.tag.info; + } + ucs_assert(req->status != UCS_INPROGRESS); + return req->status; + } + return UCS_INPROGRESS; +} + +ucs_status_t +ucp_request_send_start(ucp_request_t *req, ssize_t max_short, + size_t zcopy_thresh, size_t zcopy_max, size_t dt_count, + const ucp_ep_msg_config_t* msg_config, + const ucp_request_send_proto_t *proto) +{ + size_t length = req->send.length; + ucs_status_t status; + int multi; + + if ((ssize_t)length <= max_short) { + /* short */ + req->send.uct.func = proto->contig_short; + UCS_PROFILE_REQUEST_EVENT(req, "start_contig_short", req->send.length); + return UCS_OK; + } else if (length < zcopy_thresh) { + /* bcopy */ + ucp_request_send_state_reset(req, NULL, UCP_REQUEST_SEND_PROTO_BCOPY_AM); + if (length <= msg_config->max_bcopy - proto->only_hdr_size) { + req->send.uct.func = proto->bcopy_single; + UCS_PROFILE_REQUEST_EVENT(req, "start_bcopy_single", req->send.length); + } else { + req->send.uct.func = proto->bcopy_multi; + + if (req->flags & UCP_REQUEST_FLAG_SEND_AM) { + req->send.am.message_id = req->send.ep->worker->am_message_id++; + } else if (req->flags & UCP_REQUEST_FLAG_SEND_TAG) { + req->send.tag.message_id = req->send.ep->worker->am_message_id++; + req->send.tag.am_bw_index = 1; + } + + req->send.pending_lane = UCP_NULL_LANE; + UCS_PROFILE_REQUEST_EVENT(req, "start_bcopy_multi", req->send.length); + } + return UCS_OK; + } else if (length < zcopy_max) { + /* zcopy */ + ucp_request_send_state_reset(req, proto->zcopy_completion, + UCP_REQUEST_SEND_PROTO_ZCOPY_AM); + status = ucp_request_send_buffer_reg_lane(req, req->send.lane, 0); + if (status != UCS_OK) { + return status; + } + + if (ucs_unlikely(length > msg_config->max_zcopy - proto->only_hdr_size)) { + multi = 1; + } else if (ucs_unlikely(UCP_DT_IS_IOV(req->send.datatype))) { + if (dt_count <= msg_config->max_iov) { + multi = 0; + } else { + multi = ucp_dt_iov_count_nonempty(req->send.buffer, dt_count) > + msg_config->max_iov; + } + } else { + multi = 0; + } + + if (multi) { + req->send.uct.func = proto->zcopy_multi; + + if (req->flags & UCP_REQUEST_FLAG_SEND_AM) { + req->send.am.message_id = req->send.ep->worker->am_message_id++; + } else if (req->flags & UCP_REQUEST_FLAG_SEND_TAG) { + req->send.tag.message_id = req->send.ep->worker->am_message_id++; + req->send.tag.am_bw_index = 1; + } + + req->send.pending_lane = UCP_NULL_LANE; + UCS_PROFILE_REQUEST_EVENT(req, "start_zcopy_multi", req->send.length); + } else { + req->send.uct.func = proto->zcopy_single; + UCS_PROFILE_REQUEST_EVENT(req, "start_zcopy_single", req->send.length); + } + return UCS_OK; + } + + return UCS_ERR_NO_PROGRESS; +} + +void ucp_request_send_state_ff(ucp_request_t *req, ucs_status_t status) +{ + if (req->send.state.uct_comp.func) { + req->send.state.dt.offset = req->send.length; + req->send.state.uct_comp.count = 0; + req->send.state.uct_comp.func(&req->send.state.uct_comp, status); + } else { + ucp_request_complete_send(req, status); + } +} + +ucs_status_t ucp_request_recv_msg_truncated(ucp_request_t *req, size_t length, + size_t offset) +{ + ucp_dt_generic_t *dt_gen; + + ucs_debug("message truncated: recv_length %zu offset %zu buffer_size %zu", + length, offset, req->recv.length); + + if (UCP_DT_IS_GENERIC(req->recv.datatype)) { + dt_gen = ucp_dt_generic(req->recv.datatype); + UCS_PROFILE_NAMED_CALL_VOID("dt_finish", dt_gen->ops.finish, + req->recv.state.dt.generic.state); + } + + return UCS_ERR_MESSAGE_TRUNCATED; +} + + diff --git a/src/ucp/core/ucp_request.h b/src/ucp/core/ucp_request.h new file mode 100644 index 0000000..a348047 --- /dev/null +++ b/src/ucp/core/ucp_request.h @@ -0,0 +1,372 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * Copyright (c) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. + * Copyright (C) Los Alamos National Security, LLC. 2019 ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_REQUEST_H_ +#define UCP_REQUEST_H_ + +#include "ucp_context.h" +#include "ucp_mm.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +#define ucp_trace_req(_sreq, _message, ...) \ + ucs_trace_req("req %p: " _message, (_sreq), ## __VA_ARGS__) + + +/** + * Request flags + */ +enum { + UCP_REQUEST_FLAG_COMPLETED = UCS_BIT(0), + UCP_REQUEST_FLAG_RELEASED = UCS_BIT(1), + UCP_REQUEST_FLAG_EXPECTED = UCS_BIT(3), + UCP_REQUEST_FLAG_LOCAL_COMPLETED = UCS_BIT(4), + UCP_REQUEST_FLAG_REMOTE_COMPLETED = UCS_BIT(5), + UCP_REQUEST_FLAG_CALLBACK = UCS_BIT(6), + UCP_REQUEST_FLAG_RECV = UCS_BIT(7), + UCP_REQUEST_FLAG_SYNC = UCS_BIT(8), + UCP_REQUEST_FLAG_OFFLOADED = UCS_BIT(10), + UCP_REQUEST_FLAG_BLOCK_OFFLOAD = UCS_BIT(11), + UCP_REQUEST_FLAG_STREAM_RECV_WAITALL = UCS_BIT(12), + UCP_REQUEST_FLAG_SEND_AM = UCS_BIT(13), + UCP_REQUEST_FLAG_SEND_TAG = UCS_BIT(14), +#if UCS_ENABLE_ASSERT + UCP_REQUEST_FLAG_STREAM_RECV = UCS_BIT(16), + UCP_REQUEST_DEBUG_FLAG_EXTERNAL = UCS_BIT(17) +#else + UCP_REQUEST_DEBUG_FLAG_EXTERNAL = 0 +#endif +}; + + +/** + * Protocols enumerator to work with send request state + */ +enum { + UCP_REQUEST_SEND_PROTO_BCOPY_AM = 0, + UCP_REQUEST_SEND_PROTO_ZCOPY_AM, + UCP_REQUEST_SEND_PROTO_RNDV_GET, + UCP_REQUEST_SEND_PROTO_RNDV_PUT, + UCP_REQUEST_SEND_PROTO_RMA +}; + + +/** + * Receive descriptor flags. + */ +enum { + UCP_RECV_DESC_FLAG_UCT_DESC = UCS_BIT(0), /* Descriptor allocated by UCT */ + UCP_RECV_DESC_FLAG_EAGER = UCS_BIT(1), /* Eager tag message */ + UCP_RECV_DESC_FLAG_EAGER_ONLY = UCS_BIT(2), /* Eager tag message with single fragment */ + UCP_RECV_DESC_FLAG_EAGER_SYNC = UCS_BIT(3), /* Eager tag message which requires reply */ + UCP_RECV_DESC_FLAG_EAGER_OFFLOAD = UCS_BIT(4), /* Eager tag from offload */ + UCP_RECV_DESC_FLAG_EAGER_LAST = UCS_BIT(5), /* Last fragment of eager tag message. + Used by tag offload protocol. */ + UCP_RECV_DESC_FLAG_RNDV = UCS_BIT(6), /* Rendezvous request */ + UCP_RECV_DESC_FLAG_MALLOC = UCS_BIT(7), /* Descriptor was allocated with malloc + and must be freed, not returned to the + memory pool */ + UCP_RECV_DESC_FLAG_AM_HDR = UCS_BIT(8), /* Descriptor was orignally allocated by + uct and the ucp level am header must + be accounted for when releasing + descriptors */ + UCP_RECV_DESC_FLAG_AM_REPLY = UCS_BIT(9) /* AM that needed a reply */ +}; + + +/** + * Receive descriptor list pointers + */ +enum { + UCP_RDESC_HASH_LIST = 0, + UCP_RDESC_ALL_LIST = 1 +}; + + +/** + * Request in progress. + */ +struct ucp_request { + ucs_status_t status; /* Operation status */ + uint32_t flags; /* Request flags */ + + union { + + /* "send" part - used for tag_send, stream_send, put, get, and atomic + * operations */ + struct { + ucp_ep_h ep; + void *buffer; /* Send buffer */ + ucp_datatype_t datatype; /* Send type */ + size_t length; /* Total length, in bytes */ + ucs_memory_type_t mem_type; /* Memory type */ + ucp_send_callback_t cb; /* Completion callback */ + + union { + + ucp_wireup_msg_t wireup; + + /* Tagged send */ + struct { + ucp_tag_t tag; + uint64_t message_id; /* message ID used in AM */ + ucp_lane_index_t am_bw_index; /* AM BW lane index */ + uintptr_t rreq_ptr; /* receive request ptr on the + recv side (used in AM rndv) */ + } tag; + + struct { + uint64_t remote_addr; /* Remote address */ + ucp_rkey_h rkey; /* Remote memory key */ + } rma; + + struct { + uintptr_t remote_request; /* pointer to the send request on receiver side */ + ucp_request_t *sreq; /* original send request of frag put */ + uint8_t am_id; + ucs_status_t status; + ucp_tag_t sender_tag; /* Sender tag, which is sent back in sync ack */ + ucp_request_callback_t comp_cb; /* Called to complete the request */ + } proto; + + struct { + uct_pending_req_t *req; + ucp_wireup_ep_t *wireup_ep; + } proxy; + + struct { + uint64_t remote_address; /* address of the sender's data buffer */ + uintptr_t remote_request; /* pointer to the sender's send request */ + ucp_request_t *rreq; /* receive request on the recv side */ + ucp_rkey_h rkey; /* key for remote send buffer */ + ucp_lane_map_t lanes_map; /* used lanes map */ + ucp_lane_index_t lane_count; /* number of lanes used in transaction */ + } rndv_get; + + struct { + uint64_t remote_address; /* address of the receiver's data buffer */ + uintptr_t remote_request; /* pointer to the receiver's receive request */ + ucp_request_t *sreq; /* send request on the send side */ + ucp_rkey_h rkey; /* key for remote receive buffer */ + uct_rkey_t uct_rkey; /* UCT remote key */ + } rndv_put; + + struct { + uintptr_t remote_request; /* pointer to the send request on receiver side */ + ucp_request_t *rreq; /* pointer to the receive request */ + size_t length; /* the length of the data that should be fetched + * from sender side */ + } rndv_rtr; + + struct { + ucp_request_callback_t flushed_cb;/* Called when flushed */ + ucp_request_t *worker_req; + ucs_queue_elem_t queue; /* Queue element in proto_status */ + unsigned uct_flags; /* Flags to pass to @ref uct_ep_flush */ + uct_worker_cb_id_t prog_id; /* Progress callback ID */ + uint32_t cmpl_sn; /* Sequence number of the remote completion + this request is waiting for */ + uint8_t sw_started; + uint8_t sw_done; + ucp_lane_map_t lanes; /* Which lanes need to be flushed */ + } flush; + + struct { + uct_worker_cb_id_t prog_id;/* Slow-path callback */ + } disconnect; + + struct { + uint64_t remote_addr; /* Remote address */ + ucp_rkey_h rkey; /* Remote memory key */ + uint64_t value; /* Atomic argument */ + uct_atomic_op_t uct_op; /* Requested UCT AMO */ + } amo; + + struct { + ucs_queue_elem_t queue; /* Elem in outgoing ssend reqs queue */ + ucp_tag_t ssend_tag; /* Tag in offload sync send */ + void *rndv_op; /* Handler of issued rndv send. Need to cancel + the operation if it is completed by SW. */ + } tag_offload; + + struct { + uintptr_t req; /* Remote get request pointer */ + } get_reply; + + struct { + uintptr_t req; /* Remote atomic request pointer */ + ucp_atomic_reply_t data; /* Atomic reply data */ + } atomic_reply; + + struct { + uint16_t am_id; + uint64_t message_id; /* used to identify matching parts + of a large message */ + unsigned flags; + } am; + }; + + /* This structure holds all mutable fields, and everything else + * except common send/recv fields 'status' and 'flags' is + * immutable + * TODO: rework RMA case where length is used instead of dt.offset */ + struct { + ucp_dt_state_t dt; /* Position in the send buffer */ + uct_completion_t uct_comp; /* UCT completion */ + } state; + + ucp_lane_index_t pending_lane; /* Lane on which request was moved + * to pending state */ + ucp_lane_index_t lane; /* Lane on which this request is being sent */ + uct_pending_req_t uct; /* UCT pending request */ + ucp_mem_desc_t *mdesc; + } send; + + /* "receive" part - used for tag_recv and stream_recv operations */ + struct { + ucs_queue_elem_t queue; /* Expected queue element */ + void *buffer; /* Buffer to receive data to */ + ucp_datatype_t datatype; /* Receive type */ + size_t length; /* Total length, in bytes */ + ucs_memory_type_t mem_type; /* Memory type */ + ucp_dt_state_t state; + ucp_worker_t *worker; + uct_tag_context_t uct_ctx; /* Transport offload context */ + + union { + struct { + ucp_tag_t tag; /* Expected tag */ + ucp_tag_t tag_mask; /* Expected tag mask */ + uint64_t sn; /* Tag match sequence */ + ucp_tag_recv_callback_t cb; /* Completion callback */ + ucp_tag_recv_info_t info; /* Completion info to fill */ + ssize_t remaining; /* How much more data to be received */ + + /* Can use union, because rdesc is used in expected flow, + * while non_contig_buf is used in unexpected flow only. */ + union { + ucp_mem_desc_t *rdesc; /* Offload bounce buffer */ + void *non_contig_buf; /* Used for assembling + multi-fragment + non-contig unexpected + message in tag offload flow. */ + }; + ucp_worker_iface_t *wiface; /* Cached iface this request + is received on. Used in + tag offload expected callbacks*/ + } tag; + + struct { + ucp_request_t *rreq; /* recv request on recv side */ + size_t offset; /* offset in recv buffer */ + } frag; + + struct { + ucp_stream_recv_callback_t cb; /* Completion callback */ + size_t offset; /* Receive data offset */ + size_t length; /* Completion info to fill */ + } stream; + }; + } recv; + + struct { + ucp_worker_h worker; /* Worker to flush */ + ucp_send_callback_t cb; /* Completion callback */ + uct_worker_cb_id_t prog_id; /* Progress callback ID */ + int comp_count; /* Countdown to request completion */ + ucp_ep_ext_gen_t *next_ep; /* Next endpoint to flush */ + } flush_worker; + }; +}; + + +/** + * Unexpected receive descriptor. If it is initialized in the headroom of UCT + * descriptor, the layout looks like the following: + * + * + * headroom data + * |-------------------------------------------|-------------------------| + * | unused | ucp_recv_desc | priv_length | | + * | | | | | + * |-------------------------------------------|-------------------------| + * + * Some protocols (i. e. tag offload) may need some space right before the + * incoming data to add specific headers needed for further message processing. + * Note: priv_length value should be in [0, UCP_WORKER_HEADROOM_PRIV_SIZE] range. + */ +struct ucp_recv_desc { + union { + ucs_list_link_t tag_list[2]; /* Hash list TAG-element */ + ucs_queue_elem_t stream_queue; /* Queue STREAM-element */ + ucs_queue_elem_t tag_frag_queue; /* Tag fragments queue */ + }; + uint32_t length; /* Received length */ + uint32_t payload_offset; /* Offset from end of the descriptor + * to AM data */ + uint16_t flags; /* Flags */ + int16_t priv_length; /* Number of bytes consumed from + headroom private space, except the + space needed for ucp_recv_desc itself. + It is used for releasing descriptor + back to UCT only */ +}; + + +/** + * Defines protocol functions for ucp_request_send_start() function. + * TODO will be removed when switching to new protocols implementation. + */ +struct ucp_request_send_proto { + uct_pending_callback_t contig_short; /**< Progress short data */ + uct_pending_callback_t bcopy_single; /**< Progress bcopy single fragment */ + uct_pending_callback_t bcopy_multi; /**< Progress bcopy multi-fragment */ + uct_pending_callback_t zcopy_single; /**< Progress zcopy single fragment */ + uct_pending_callback_t zcopy_multi; /**< Progress zcopy multi-fragment */ + uct_completion_callback_t zcopy_completion; /**< Callback for UCT zcopy completion */ + size_t only_hdr_size; /**< Header size for single / short */ +}; + + +extern ucs_mpool_ops_t ucp_request_mpool_ops; +extern ucs_mpool_ops_t ucp_rndv_get_mpool_ops; + + +int ucp_request_pending_add(ucp_request_t *req, ucs_status_t *req_status, + unsigned pending_flags); + +ucs_status_t ucp_request_memory_reg(ucp_context_t *context, ucp_md_map_t md_map, + void *buffer, size_t length, ucp_datatype_t datatype, + ucp_dt_state_t *state, ucs_memory_type_t mem_type, + ucp_request_t *req_dbg, unsigned uct_flags); + +void ucp_request_memory_dereg(ucp_context_t *context, ucp_datatype_t datatype, + ucp_dt_state_t *state, ucp_request_t *req_dbg); + +ucs_status_t ucp_request_send_start(ucp_request_t *req, ssize_t max_short, + size_t zcopy_thresh, size_t zcopy_max, + size_t dt_count, + const ucp_ep_msg_config_t* msg_config, + const ucp_request_send_proto_t *proto); + +/* Fast-forward to data end */ +void ucp_request_send_state_ff(ucp_request_t *req, ucs_status_t status); + +ucs_status_t ucp_request_recv_msg_truncated(ucp_request_t *req, size_t length, + size_t offset); + +#endif diff --git a/src/ucp/core/ucp_request.inl b/src/ucp/core/ucp_request.inl new file mode 100644 index 0000000..1ed42a9 --- /dev/null +++ b/src/ucp/core/ucp_request.inl @@ -0,0 +1,563 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_REQUEST_INL_ +#define UCP_REQUEST_INL_ + +#include "ucp_request.h" +#include "ucp_worker.h" +#include "ucp_ep.inl" + +#include +#include +#include +#include +#include +#include + + +#define UCP_REQUEST_FLAGS_FMT \ + "%c%c%c%c%c%c%c" + +#define UCP_REQUEST_FLAGS_ARG(_flags) \ + (((_flags) & UCP_REQUEST_FLAG_COMPLETED) ? 'd' : '-'), \ + (((_flags) & UCP_REQUEST_FLAG_RELEASED) ? 'f' : '-'), \ + (((_flags) & UCP_REQUEST_FLAG_EXPECTED) ? 'e' : '-'), \ + (((_flags) & UCP_REQUEST_FLAG_LOCAL_COMPLETED) ? 'L' : '-'), \ + (((_flags) & UCP_REQUEST_FLAG_CALLBACK) ? 'c' : '-'), \ + (((_flags) & UCP_REQUEST_FLAG_RECV) ? 'r' : '-'), \ + (((_flags) & UCP_REQUEST_FLAG_SYNC) ? 's' : '-') + +#define UCP_RECV_DESC_FMT \ + "rdesc %p %c%c%c%c%c%c len %u+%u" + +#define UCP_RECV_DESC_ARG(_rdesc) \ + (_rdesc), \ + (((_rdesc)->flags & UCP_RECV_DESC_FLAG_UCT_DESC) ? 't' : '-'), \ + (((_rdesc)->flags & UCP_RECV_DESC_FLAG_EAGER) ? 'e' : '-'), \ + (((_rdesc)->flags & UCP_RECV_DESC_FLAG_EAGER_ONLY) ? 'o' : '-'), \ + (((_rdesc)->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC) ? 's' : '-'), \ + (((_rdesc)->flags & UCP_RECV_DESC_FLAG_EAGER_OFFLOAD) ? 'f' : '-'), \ + (((_rdesc)->flags & UCP_RECV_DESC_FLAG_RNDV) ? 'r' : '-'), \ + (_rdesc)->payload_offset, \ + ((_rdesc)->length - (_rdesc)->payload_offset) + + +/* defined as a macro to print the call site */ +#define ucp_request_get(_worker) \ + ({ \ + ucp_request_t *_req = ucs_mpool_get_inline(&(_worker)->req_mp); \ + if (_req != NULL) { \ + VALGRIND_MAKE_MEM_DEFINED(_req + 1, \ + (_worker)->context->config.request.size); \ + ucs_trace_req("allocated request %p", _req); \ + UCS_PROFILE_REQUEST_NEW(_req, "ucp_request", 0); \ + } \ + _req; \ + }) + +#define ucp_request_complete(_req, _cb, _status, ...) \ + { \ + (_req)->status = (_status); \ + if (ucs_likely((_req)->flags & UCP_REQUEST_FLAG_CALLBACK)) { \ + (_req)->_cb((_req) + 1, (_status), ## __VA_ARGS__); \ + } \ + if (ucs_unlikely(((_req)->flags |= UCP_REQUEST_FLAG_COMPLETED) & \ + UCP_REQUEST_FLAG_RELEASED)) { \ + ucp_request_put(_req); \ + } \ + } + +#define ucp_request_set_callback(_req, _cb, _value) \ + { \ + (_req)->_cb = _value; \ + (_req)->flags |= UCP_REQUEST_FLAG_CALLBACK; \ + ucs_trace_data("request %p %s set to %p", _req, #_cb, _value); \ + } + + +static UCS_F_ALWAYS_INLINE void +ucp_request_put(ucp_request_t *req) +{ + ucs_trace_req("put request %p", req); + UCS_PROFILE_REQUEST_FREE(req); + ucs_mpool_put_inline(req); +} + +static UCS_F_ALWAYS_INLINE void +ucp_request_complete_send(ucp_request_t *req, ucs_status_t status) +{ + ucs_trace_req("completing send request %p (%p) "UCP_REQUEST_FLAGS_FMT" %s", + req, req + 1, UCP_REQUEST_FLAGS_ARG(req->flags), + ucs_status_string(status)); + UCS_PROFILE_REQUEST_EVENT(req, "complete_send", status); + ucp_request_complete(req, send.cb, status); +} + +static UCS_F_ALWAYS_INLINE void +ucp_request_complete_tag_recv(ucp_request_t *req, ucs_status_t status) +{ + ucs_trace_req("completing receive request %p (%p) "UCP_REQUEST_FLAGS_FMT + " stag 0x%"PRIx64" len %zu, %s", + req, req + 1, UCP_REQUEST_FLAGS_ARG(req->flags), + req->recv.tag.info.sender_tag, req->recv.tag.info.length, + ucs_status_string(status)); + UCS_PROFILE_REQUEST_EVENT(req, "complete_recv", status); + ucp_request_complete(req, recv.tag.cb, status, &req->recv.tag.info); +} + +static UCS_F_ALWAYS_INLINE void +ucp_request_complete_stream_recv(ucp_request_t *req, ucp_ep_ext_proto_t* ep_ext, + ucs_status_t status) +{ + /* dequeue request before complete */ + ucp_request_t *check_req UCS_V_UNUSED = + ucs_queue_pull_elem_non_empty(&ep_ext->stream.match_q, ucp_request_t, + recv.queue); + ucs_assert(check_req == req); + ucs_assert(req->recv.stream.offset > 0); + + req->recv.stream.length = req->recv.stream.offset; + ucs_trace_req("completing stream receive request %p (%p) " + UCP_REQUEST_FLAGS_FMT" count %zu, %s", + req, req + 1, UCP_REQUEST_FLAGS_ARG(req->flags), + req->recv.stream.length, ucs_status_string(status)); + UCS_PROFILE_REQUEST_EVENT(req, "complete_recv", status); + ucp_request_complete(req, recv.stream.cb, status, req->recv.stream.length); +} + +static UCS_F_ALWAYS_INLINE int +ucp_request_can_complete_stream_recv(ucp_request_t *req) +{ + /* NOTE: first check is needed to avoid heavy "%" operation if request is + * completely filled */ + if (req->recv.stream.offset == req->recv.length) { + return 1; + } + + if (req->flags & UCP_REQUEST_FLAG_STREAM_RECV_WAITALL) { + return 0; + } + + /* 0-length stream recv is meaningless if this was not requested explicitely */ + if (req->recv.stream.offset == 0) { + return 0; + } + + if (ucs_likely(UCP_DT_IS_CONTIG(req->recv.datatype))) { + return req->recv.stream.offset % + ucp_contig_dt_elem_size(req->recv.datatype) == 0; + } + + /* Currently, all data types except contig has granularity 1 byte */ + return 1; +} + +/* + * @return Whether completed. + * *req_status if filled with the completion status if completed. + */ +static int UCS_F_ALWAYS_INLINE +ucp_request_try_send(ucp_request_t *req, ucs_status_t *req_status, + unsigned pending_flags) +{ + ucs_status_t status; + + /* coverity wrongly resolves (*req).send.uct.func to test_uct_pending::pending_send_op_ok */ + /* coverity[address_free] */ + status = req->send.uct.func(&req->send.uct); + if (status == UCS_OK) { + /* Completed the operation */ + *req_status = UCS_OK; + return 1; + } else if (status == UCS_INPROGRESS) { + /* Not completed, but made progress */ + return 0; + } else if (status != UCS_ERR_NO_RESOURCE) { + /* Unexpected error */ + ucp_request_complete_send(req, status); + *req_status = status; + return 1; + } + + /* No send resources, try to add to pending queue */ + ucs_assert(status == UCS_ERR_NO_RESOURCE); + return ucp_request_pending_add(req, req_status, pending_flags); +} + +/** + * Start sending a request. + * + * @param [in] req Request to start. + * @param [in] pending_flags flags to be passed to UCT if request will be + * added to pending queue. + * + * @return UCS_OK - completed (callback will not be called) + * UCS_INPROGRESS - started but not completed + * other error - failure + */ +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_request_send(ucp_request_t *req, unsigned pending_flags) +{ + ucs_status_t status = UCS_ERR_NOT_IMPLEMENTED; + while (!ucp_request_try_send(req, &status, pending_flags)); + return status; +} + +static UCS_F_ALWAYS_INLINE +void ucp_request_send_generic_dt_finish(ucp_request_t *req) +{ + ucp_dt_generic_t *dt; + if (UCP_DT_IS_GENERIC(req->send.datatype)) { + dt = ucp_dt_generic(req->send.datatype); + ucs_assert(NULL != dt); + dt->ops.finish(req->send.state.dt.dt.generic.state); + } +} + +static UCS_F_ALWAYS_INLINE +void ucp_request_recv_generic_dt_finish(ucp_request_t *req) +{ + ucp_dt_generic_t *dt; + if (UCP_DT_IS_GENERIC(req->recv.datatype)) { + dt = ucp_dt_generic(req->recv.datatype); + ucs_assert(NULL != dt); + dt->ops.finish(req->recv.state.dt.generic.state); + } +} + +static UCS_F_ALWAYS_INLINE void +ucp_request_send_state_init(ucp_request_t *req, ucp_datatype_t datatype, + size_t dt_count) +{ + ucp_dt_generic_t *dt_gen; + void *state_gen; + + VALGRIND_MAKE_MEM_UNDEFINED(&req->send.state.uct_comp.count, + sizeof(req->send.state.uct_comp.count)); + VALGRIND_MAKE_MEM_UNDEFINED(&req->send.state.dt.offset, + sizeof(req->send.state.dt.offset)); + + req->send.state.uct_comp.func = NULL; + + switch (datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + req->send.state.dt.dt.contig.md_map = 0; + return; + case UCP_DATATYPE_IOV: + req->send.state.dt.dt.iov.iovcnt_offset = 0; + req->send.state.dt.dt.iov.iov_offset = 0; + req->send.state.dt.dt.iov.iovcnt = dt_count; + req->send.state.dt.dt.iov.dt_reg = NULL; + return; + case UCP_DATATYPE_GENERIC: + dt_gen = ucp_dt_generic(datatype); + state_gen = dt_gen->ops.start_pack(dt_gen->context, req->send.buffer, + dt_count); + req->send.state.dt.dt.generic.state = state_gen; + return; + default: + ucs_fatal("Invalid data type"); + } +} + +static UCS_F_ALWAYS_INLINE void +ucp_request_send_state_reset(ucp_request_t *req, + uct_completion_callback_t comp_cb, unsigned proto) +{ + switch (proto) { + case UCP_REQUEST_SEND_PROTO_RMA: + ucs_assert(UCP_DT_IS_CONTIG(req->send.datatype)); + /* Fall through */ + case UCP_REQUEST_SEND_PROTO_RNDV_GET: + case UCP_REQUEST_SEND_PROTO_RNDV_PUT: + case UCP_REQUEST_SEND_PROTO_ZCOPY_AM: + req->send.state.uct_comp.func = comp_cb; + req->send.state.uct_comp.count = 0; + /* Fall through */ + case UCP_REQUEST_SEND_PROTO_BCOPY_AM: + req->send.state.dt.offset = 0; + break; + default: + ucs_fatal("unknown protocol"); + } +} + +/** + * Advance state of send request after UCT operation. This function applies + * @a new_dt_state to @a req request according to @a proto protocol. Also, UCT + * completion counter will be incremented if @a proto requires it. + * + * @param [inout] req Send request. + * @param [in] new_dt_state State which was progressed by + * @ref ucp_dt_pack or @ref ucp_dt_iov_copy_uct. + * @param [in] proto Internal UCP protocol identifier + * (UCP_REQUEST_SEND_PROTO_*) + * @param [in] status Status of the last UCT operation which + * progressed @a proto protocol. + */ +static UCS_F_ALWAYS_INLINE void +ucp_request_send_state_advance(ucp_request_t *req, + const ucp_dt_state_t *new_dt_state, + unsigned proto, ucs_status_t status) +{ + if (ucs_unlikely(UCS_STATUS_IS_ERR(status))) { + /* Don't advance after failed operation in order to continue on next try + * from last valid point. + */ + return; + } + + switch (proto) { + case UCP_REQUEST_SEND_PROTO_RMA: + if (status == UCS_INPROGRESS) { + ++req->send.state.uct_comp.count; + } + break; + case UCP_REQUEST_SEND_PROTO_ZCOPY_AM: + /* Fall through */ + case UCP_REQUEST_SEND_PROTO_RNDV_GET: + case UCP_REQUEST_SEND_PROTO_RNDV_PUT: + if (status == UCS_INPROGRESS) { + ++req->send.state.uct_comp.count; + } + /* Fall through */ + case UCP_REQUEST_SEND_PROTO_BCOPY_AM: + ucs_assert(new_dt_state != NULL); + if (UCP_DT_IS_CONTIG(req->send.datatype)) { + req->send.state.dt.offset = new_dt_state->offset; + } else { + req->send.state.dt = *new_dt_state; + } + break; + default: + ucs_fatal("unknown protocol"); + } + + /* offset is not used for RMA */ + ucs_assert((proto == UCP_REQUEST_SEND_PROTO_RMA) || + (req->send.state.dt.offset <= req->send.length)); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_request_send_buffer_reg(ucp_request_t *req, ucp_md_map_t md_map, + unsigned uct_flags) +{ + return ucp_request_memory_reg(req->send.ep->worker->context, md_map, + (void*)req->send.buffer, req->send.length, + req->send.datatype, &req->send.state.dt, + req->send.mem_type, req, uct_flags); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_request_send_buffer_reg_lane(ucp_request_t *req, ucp_lane_index_t lane, + unsigned uct_flags) +{ + ucp_md_map_t md_map = UCS_BIT(ucp_ep_md_index(req->send.ep, lane)); + return ucp_request_send_buffer_reg(req, md_map, uct_flags); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_send_request_add_reg_lane(ucp_request_t *req, ucp_lane_index_t lane) +{ + /* add new lane to registration map */ + ucp_md_map_t md_map = UCS_BIT(ucp_ep_md_index(req->send.ep, lane)) | + req->send.state.dt.dt.contig.md_map; + ucs_assert(ucs_popcount(md_map) <= UCP_MAX_OP_MDS); + return ucp_request_send_buffer_reg(req, md_map, 0); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_request_recv_buffer_reg(ucp_request_t *req, ucp_md_map_t md_map, + size_t length) +{ + return ucp_request_memory_reg(req->recv.worker->context, md_map, + req->recv.buffer, length, + req->recv.datatype, &req->recv.state, + req->recv.mem_type, req, + UCT_MD_MEM_FLAG_HIDE_ERRORS); +} + +static UCS_F_ALWAYS_INLINE void ucp_request_send_buffer_dereg(ucp_request_t *req) +{ + ucp_request_memory_dereg(req->send.ep->worker->context, req->send.datatype, + &req->send.state.dt, req); +} + +static UCS_F_ALWAYS_INLINE void ucp_request_recv_buffer_dereg(ucp_request_t *req) +{ + ucp_request_memory_dereg(req->recv.worker->context, req->recv.datatype, + &req->recv.state, req); +} + +static UCS_F_ALWAYS_INLINE void +ucp_request_wait_uct_comp(ucp_request_t *req) +{ + while (req->send.state.uct_comp.count > 0) { + ucp_worker_progress(req->send.ep->worker); + } +} + +static UCS_F_ALWAYS_INLINE void +ucp_request_unpack_contig(ucp_request_t *req, void *buf, const void *data, + size_t length) +{ + if (ucs_likely(UCP_MEM_IS_ACCESSIBLE_FROM_CPU(req->recv.mem_type))) { + UCS_PROFILE_NAMED_CALL("memcpy_recv", ucs_memcpy_relaxed, buf, + data, length); + } else { + ucp_mem_type_unpack(req->recv.worker, buf, data, length, + req->recv.mem_type); + } +} + +/** + * Unpack receive data to a request + * + * req - receive request + * data - data to unpack + * length - + * offset - offset of received data within the request, for OOO fragments + * + * + */ +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_request_recv_data_unpack(ucp_request_t *req, const void *data, + size_t length, size_t offset, int last) +{ + ucp_dt_generic_t *dt_gen; + ucs_status_t status; + + ucs_assert(req->status == UCS_OK); + + ucp_trace_req(req, "unpack recv_data req_len %zu data_len %zu offset %zu last: %s", + req->recv.length, length, offset, last ? "yes" : "no"); + + if (ucs_unlikely((length + offset) > req->recv.length)) { + return ucp_request_recv_msg_truncated(req, length, offset); + } + + switch (req->recv.datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + ucp_request_unpack_contig(req, + UCS_PTR_BYTE_OFFSET(req->recv.buffer, offset), + data, length); + return UCS_OK; + + case UCP_DATATYPE_IOV: + if (offset != req->recv.state.offset) { + ucp_dt_iov_seek(req->recv.buffer, req->recv.state.dt.iov.iovcnt, + offset - req->recv.state.offset, + &req->recv.state.dt.iov.iov_offset, + &req->recv.state.dt.iov.iovcnt_offset); + req->recv.state.offset = offset; + } + UCS_PROFILE_CALL(ucp_dt_iov_scatter, req->recv.buffer, + req->recv.state.dt.iov.iovcnt, data, length, + &req->recv.state.dt.iov.iov_offset, + &req->recv.state.dt.iov.iovcnt_offset); + req->recv.state.offset += length; + return UCS_OK; + + case UCP_DATATYPE_GENERIC: + dt_gen = ucp_dt_generic(req->recv.datatype); + status = UCS_PROFILE_NAMED_CALL("dt_unpack", dt_gen->ops.unpack, + req->recv.state.dt.generic.state, + offset, data, length); + if (last || (status != UCS_OK)) { + UCS_PROFILE_NAMED_CALL_VOID("dt_finish", dt_gen->ops.finish, + req->recv.state.dt.generic.state); + } + return status; + + default: + ucs_fatal("unexpected datatype=%lx", req->recv.datatype); + } +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_recv_desc_init(ucp_worker_h worker, void *data, size_t length, + int data_offset, unsigned am_flags, uint16_t hdr_len, + uint16_t rdesc_flags, uint16_t priv_length, + ucp_recv_desc_t **rdesc_p) +{ + ucp_recv_desc_t *rdesc; + void *data_hdr; + ucs_status_t status; + + if (ucs_unlikely(am_flags & UCT_CB_PARAM_FLAG_DESC)) { + /* slowpath */ + ucs_assert(priv_length <= UCP_WORKER_HEADROOM_PRIV_SIZE); + data_hdr = UCS_PTR_BYTE_OFFSET(data, -data_offset); + rdesc = (ucp_recv_desc_t *)data_hdr - 1; + rdesc->flags = rdesc_flags | UCP_RECV_DESC_FLAG_UCT_DESC; + rdesc->priv_length = priv_length; + status = UCS_INPROGRESS; + } else { + rdesc = (ucp_recv_desc_t*)ucs_mpool_get_inline(&worker->am_mp); + if (rdesc == NULL) { + ucs_error("ucp recv descriptor is not allocated"); + return UCS_ERR_NO_MEMORY; + } + + /* No need to initialize rdesc->priv_length here, because it is only + * needed for releasing UCT descriptor. */ + + rdesc->flags = rdesc_flags; + status = UCS_OK; + memcpy(UCS_PTR_BYTE_OFFSET(rdesc + 1, data_offset), data, length); + } + + rdesc->length = length + data_offset; + rdesc->payload_offset = hdr_len; + *rdesc_p = rdesc; + return status; +} + +static UCS_F_ALWAYS_INLINE void +ucp_recv_desc_release(ucp_recv_desc_t *rdesc) +{ + ucs_trace_req("release receive descriptor %p", rdesc); + if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_UCT_DESC)) { + /* uct desc is slowpath */ + uct_iface_release_desc(UCS_PTR_BYTE_OFFSET(rdesc, + -(UCP_WORKER_HEADROOM_PRIV_SIZE - + rdesc->priv_length))); + } else { + ucs_mpool_put_inline(rdesc); + } +} + +static UCS_F_ALWAYS_INLINE ucp_lane_index_t +ucp_send_request_get_next_am_bw_lane(ucp_request_t *req) +{ + ucp_lane_index_t lane; + + /* at least one lane must be initialized */ + ucs_assert(ucp_ep_config(req->send.ep)->key.am_bw_lanes[0] != UCP_NULL_LANE); + + lane = (req->send.tag.am_bw_index >= UCP_MAX_LANES) ? + UCP_NULL_LANE : + ucp_ep_config(req->send.ep)->key.am_bw_lanes[req->send.tag.am_bw_index]; + if (lane != UCP_NULL_LANE) { + req->send.tag.am_bw_index++; + return lane; + } else { + req->send.tag.am_bw_index = 1; + return ucp_ep_config(req->send.ep)->key.am_bw_lanes[0]; + } +} + +static UCS_F_ALWAYS_INLINE uintptr_t ucp_request_get_dest_ep_ptr(ucp_request_t *req) +{ + /* This function may return 0, but in such cases the message should not be + * sent at all because the am_lane would point to a wireup (proxy) endpoint. + * So only the receiver side has an assertion that ep_ptr != 0. + */ + return ucp_ep_dest_ep_ptr(req->send.ep); +} + +#endif diff --git a/src/ucp/core/ucp_resource.h b/src/ucp/core/ucp_resource.h new file mode 100644 index 0000000..0560948 --- /dev/null +++ b/src/ucp/core/ucp_resource.h @@ -0,0 +1,20 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_CORE_RESOURCE_H_ +#define UCP_CORE_RESOURCE_H_ + +#include +#include + +BEGIN_C_DECLS + +uct_md_h ucp_context_find_tl_md(ucp_context_h context, const char *md_name); + +END_C_DECLS + +#endif diff --git a/src/ucp/core/ucp_rkey.c b/src/ucp/core/ucp_rkey.c new file mode 100644 index 0000000..419432c --- /dev/null +++ b/src/ucp/core/ucp_rkey.c @@ -0,0 +1,497 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucp_mm.h" +#include "ucp_request.h" +#include "ucp_ep.inl" + +#include +#include +#include +#include +#include + + +static struct { + ucp_md_map_t md_map; + uint8_t mem_type; +} UCS_S_PACKED ucp_mem_dummy_buffer = {0, UCS_MEMORY_TYPE_HOST}; + + +size_t ucp_rkey_packed_size(ucp_context_h context, ucp_md_map_t md_map) +{ + size_t size, md_size; + unsigned md_index; + + size = sizeof(ucp_md_map_t); + size += sizeof(uint8_t); + ucs_for_each_bit (md_index, md_map) { + md_size = context->tl_mds[md_index].attr.rkey_packed_size; + ucs_assert_always(md_size <= UINT8_MAX); + size += sizeof(uint8_t) + md_size; + } + return size; +} + +void ucp_rkey_packed_copy(ucp_context_h context, ucp_md_map_t md_map, + ucs_memory_type_t mem_type, void *rkey_buffer, + const void* uct_rkeys[]) +{ + uint8_t *p = rkey_buffer; + unsigned md_index; + size_t md_size; + + *(ucp_md_map_t*)p = md_map; + p += sizeof(ucp_md_map_t); + + *(p++) = mem_type; + + ucs_for_each_bit(md_index, md_map) { + md_size = context->tl_mds[md_index].attr.rkey_packed_size; + ucs_assert_always(md_size <= UINT8_MAX); + *(p++) = md_size; + memcpy(p, *uct_rkeys, md_size); + p += md_size; + ++uct_rkeys; + } +} + +ssize_t ucp_rkey_pack_uct(ucp_context_h context, ucp_md_map_t md_map, + const uct_mem_h *memh, ucs_memory_type_t mem_type, + void *rkey_buffer) +{ + uint8_t *p = rkey_buffer; + ucs_status_t status = UCS_OK; + unsigned md_index, uct_memh_index; + size_t md_size; + char UCS_V_UNUSED buf[128]; + + /* Check that md_map is valid */ + ucs_assert(ucs_test_all_flags(UCS_MASK(context->num_mds), md_map)); + + /* Write the MD map */ + *(ucp_md_map_t*)p = md_map; + p += sizeof(ucp_md_map_t); + + /* Write memory type */ + UCS_STATIC_ASSERT(UCS_MEMORY_TYPE_LAST <= 255); + *(p++) = mem_type; + + /* Write both size and rkey_buffer for each UCT rkey */ + uct_memh_index = 0; + ucs_for_each_bit (md_index, md_map) { + md_size = context->tl_mds[md_index].attr.rkey_packed_size; + *(p++) = md_size; + status = uct_md_mkey_pack(context->tl_mds[md_index].md, + memh[uct_memh_index], p); + if (status != UCS_OK) { + return status; + } + + ucs_trace("rkey[%d]=%s for md[%d]=%s", uct_memh_index, + ucs_str_dump_hex(p, md_size, buf, sizeof(buf), SIZE_MAX), + md_index, context->tl_mds[md_index].rsc.md_name); + + ++uct_memh_index; + p += md_size; + } + + return UCS_PTR_BYTE_DIFF(rkey_buffer, p); +} + +ucs_status_t ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh, + void **rkey_buffer_p, size_t *size_p) +{ + void *rkey_buffer, *p; + ucs_status_t status; + ssize_t packed_size; + size_t size; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(context, UCP_FEATURE_RMA | UCP_FEATURE_AMO, + return UCS_ERR_INVALID_PARAM); + + /* always acquire context lock */ + UCP_THREAD_CS_ENTER(&context->mt_lock); + + ucs_trace("packing rkeys for buffer %p memh %p md_map 0x%lx", + memh->address, memh, memh->md_map); + + if (memh->length == 0) { + /* dummy memh, return dummy key */ + *rkey_buffer_p = &ucp_mem_dummy_buffer; + *size_p = sizeof(ucp_mem_dummy_buffer); + status = UCS_OK; + goto out; + } + + size = ucp_rkey_packed_size(context, memh->md_map); + rkey_buffer = ucs_malloc(size, "ucp_rkey_buffer"); + if (rkey_buffer == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + p = rkey_buffer; + + packed_size = ucp_rkey_pack_uct(context, memh->md_map, memh->uct, + memh->mem_type, p); + if (packed_size < 0) { + status = (ucs_status_t)packed_size; + goto err_destroy; + } + + ucs_assert(packed_size == size); + + *rkey_buffer_p = rkey_buffer; + *size_p = size; + status = UCS_OK; + goto out; + +err_destroy: + ucs_free(rkey_buffer); +out: + UCP_THREAD_CS_EXIT(&context->mt_lock); + return status; +} + +void ucp_rkey_buffer_release(void *rkey_buffer) +{ + if (rkey_buffer == &ucp_mem_dummy_buffer) { + /* Dummy key, just return */ + return; + } + ucs_free(rkey_buffer); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_ep_rkey_unpack, (ep, rkey_buffer, rkey_p), + ucp_ep_h ep, const void *rkey_buffer, + ucp_rkey_h *rkey_p) +{ + ucp_worker_h worker = ep->worker; + const ucp_ep_config_t *ep_config; + unsigned remote_md_index; + ucp_md_map_t md_map, remote_md_map; + ucp_rsc_index_t cmpt_index; + ucp_tl_rkey_t *tl_rkey; + unsigned rkey_index; + unsigned md_count; + ucs_status_t status; + ucp_rkey_h rkey; + ucs_memory_type_t mem_type; + uint8_t md_size; + const uint8_t *p; + uint8_t flags; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + ep_config = ucp_ep_config(ep); + + /* Count the number of remote MDs in the rkey buffer */ + p = rkey_buffer; + + /* Read remote MD map */ + remote_md_map = *(ucp_md_map_t*)p; + ucs_trace("ep %p: unpacking rkey with md_map 0x%lx", ep, remote_md_map); + + /* MD map for the unpacked rkey */ + md_map = remote_md_map & ucp_ep_config(ep)->key.reachable_md_map; + md_count = ucs_popcount(md_map); + p += sizeof(ucp_md_map_t); + + /* Allocate rkey handle which holds UCT rkeys for all remote MDs. Small key + * allocations are done from a memory pool. + * We keep all of them to handle a future transport switch. + */ + flags = 0; + if (md_count <= UCP_RKEY_MPOOL_MAX_MD) { + rkey = ucs_mpool_get_inline(&worker->rkey_mp); + flags = UCP_RKEY_DESC_FLAG_POOL; + } else { + rkey = ucs_malloc(sizeof(*rkey) + (sizeof(rkey->tl_rkey[0]) * md_count), + "ucp_rkey"); + } + if (rkey == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out_unlock; + } + + /* Read memory type */ + mem_type = (ucs_memory_type_t)*(p++); + + rkey->md_map = md_map; + rkey->mem_type = mem_type; + rkey->flags = flags; +#if ENABLE_PARAMS_CHECK + rkey->ep = ep; +#endif + + /* Unpack rkey of each UCT MD */ + rkey_index = 0; /* Index of the rkey in the array */ + /* Go over remote MD indices */ + ucs_for_each_bit (remote_md_index, remote_md_map) { + md_size = *(p++); + + /* Use bit operations to iterate through the indices of the remote MDs + * as provided in the md_map. md_map always holds a bitmap of MD indices + * that remain to be used. Every time we find the next valid MD index. + * If some rkeys cannot be unpacked, we remove them from the local map. + */ + ucs_assert(UCS_BIT(remote_md_index) & remote_md_map); + ucs_assert_always(remote_md_index <= UCP_MD_INDEX_BITS); + + /* Unpack only reachable rkeys */ + if (UCS_BIT(remote_md_index) & rkey->md_map) { + ucs_assert(rkey_index < md_count); + + tl_rkey = &rkey->tl_rkey[rkey_index]; + cmpt_index = ucp_ep_config_get_dst_md_cmpt(&ep_config->key, + remote_md_index); + tl_rkey->cmpt = worker->context->tl_cmpts[cmpt_index].cmpt; + + status = uct_rkey_unpack(tl_rkey->cmpt, p, &tl_rkey->rkey); + if (status == UCS_OK) { + ucs_trace("rkey[%d] for remote md %d is 0x%lx", rkey_index, + remote_md_index, tl_rkey->rkey.rkey); + ++rkey_index; + } else if (status == UCS_ERR_UNREACHABLE) { + rkey->md_map &= ~UCS_BIT(remote_md_index); + ucs_trace("rkey[%d] for remote md %d is 0x%lx not reachable", + rkey_index, remote_md_index, tl_rkey->rkey.rkey); + /* FIXME this can make malloc allocated key be released to mpool */ + } else { + ucs_error("failed to unpack remote key from remote md[%d]: %s", + remote_md_index, ucs_status_string(status)); + goto err_destroy; + } + } + + p += md_size; + } + + /* Silence clang checker - assert that if some rkeys are unpacked, then + * rkey->md_map is nozero. + */ + ucs_assert((rkey_index > 0) || (rkey->md_map == 0)); + + ucp_rkey_resolve_inner(rkey, ep); + *rkey_p = rkey; + status = UCS_OK; + +out_unlock: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + return status; + +err_destroy: + ucp_rkey_destroy(rkey); + goto out_unlock; +} + +void ucp_rkey_dump_packed(const void *rkey_buffer, char *buffer, size_t max) +{ + char *p = buffer; + char *endp = buffer + max; + const uint8_t *rkey_buf = rkey_buffer; + ucp_md_map_t md_map; + unsigned md_index; + uint8_t md_size; + int first; + + snprintf(p, endp - p, "{"); + p += strlen(p); + + md_map = *(ucp_md_map_t*)(rkey_buf); + rkey_buf += sizeof(ucp_md_map_t) + sizeof(uint8_t); + + first = 1; + ucs_for_each_bit(md_index, md_map) { + md_size = *rkey_buf; + rkey_buf += sizeof(uint8_t); + + if (!first) { + snprintf(p, endp - p, ","); + p += strlen(p); + } + first = 0; + + snprintf(p, endp - p, "%d:", md_index); + p += strlen(p); + + ucs_str_dump_hex(rkey_buf, md_size, p, endp - p, SIZE_MAX); + p += strlen(p); + + rkey_buf += md_size; + } + + snprintf(p, endp - p, "}"); +} + +ucs_status_t ucp_rkey_ptr(ucp_rkey_h rkey, uint64_t raddr, void **addr_p) +{ + unsigned remote_md_index, rkey_index; + ucs_status_t status; + + rkey_index = 0; + ucs_for_each_bit(remote_md_index, rkey->md_map) { + status = uct_rkey_ptr(rkey->tl_rkey[rkey_index].cmpt, + &rkey->tl_rkey[rkey_index].rkey, raddr, addr_p); + if ((status == UCS_OK) || + (status == UCS_ERR_INVALID_ADDR)) { + return status; + } + + ++rkey_index; + } + + return UCS_ERR_UNREACHABLE; +} + +void ucp_rkey_destroy(ucp_rkey_h rkey) +{ + unsigned remote_md_index, rkey_index; + ucp_worker_h UCS_V_UNUSED worker; + + rkey_index = 0; + ucs_for_each_bit(remote_md_index, rkey->md_map) { + uct_rkey_release(rkey->tl_rkey[rkey_index].cmpt, + &rkey->tl_rkey[rkey_index].rkey); + ++rkey_index; + } + + if (rkey->flags & UCP_RKEY_DESC_FLAG_POOL) { + worker = ucs_container_of(ucs_mpool_obj_owner(rkey), ucp_worker_t, + rkey_mp); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + ucs_mpool_put_inline(rkey); + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + } else { + ucs_free(rkey); + } +} + +ucp_lane_index_t ucp_rkey_find_rma_lane(ucp_context_h context, + const ucp_ep_config_t *config, + ucs_memory_type_t mem_type, + const ucp_lane_index_t *lanes, + ucp_rkey_h rkey, + ucp_lane_map_t ignore, + uct_rkey_t *uct_rkey_p) +{ + ucp_md_index_t dst_md_index; + ucp_lane_index_t lane; + ucp_md_index_t md_index; + uct_md_attr_t *md_attr; + uint8_t rkey_index; + int prio; + + for (prio = 0; prio < UCP_MAX_LANES; ++prio) { + lane = lanes[prio]; + if (lane == UCP_NULL_LANE) { + return UCP_NULL_LANE; /* No more lanes */ + } else if (ignore & UCS_BIT(lane)) { + continue; /* lane is in ignore mask, do not process it */ + } + + md_index = config->md_index[lane]; + md_attr = &context->tl_mds[md_index].attr; + + if ((md_index != UCP_NULL_RESOURCE) && + (!(md_attr->cap.flags & UCT_MD_FLAG_NEED_RKEY))) + { + /* Lane does not need rkey, can use the lane with invalid rkey */ + if (!rkey || ((mem_type == md_attr->cap.access_mem_type) && + (mem_type == rkey->mem_type))) { + *uct_rkey_p = UCT_INVALID_RKEY; + return lane; + } + } + + if ((md_index != UCP_NULL_RESOURCE) && + (!(md_attr->cap.reg_mem_types & UCS_BIT(mem_type)))) { + continue; + } + + dst_md_index = config->key.lanes[lane].dst_md_index; + if (rkey->md_map & UCS_BIT(dst_md_index)) { + /* Return first matching lane */ + rkey_index = ucs_bitmap2idx(rkey->md_map, dst_md_index); + *uct_rkey_p = rkey->tl_rkey[rkey_index].rkey.rkey; + return lane; + } + } + + return UCP_NULL_LANE; +} + +void ucp_rkey_resolve_inner(ucp_rkey_h rkey, ucp_ep_h ep) +{ + ucp_context_h context = ep->worker->context; + ucp_ep_config_t *config = ucp_ep_config(ep); + ucs_status_t status; + uct_rkey_t uct_rkey; + int rma_sw, amo_sw; + + rkey->cache.rma_lane = ucp_rkey_find_rma_lane(context, config, + UCS_MEMORY_TYPE_HOST, + config->key.rma_lanes, rkey, + 0, &uct_rkey); + rma_sw = (rkey->cache.rma_lane == UCP_NULL_LANE); + if (rma_sw) { + rkey->cache.rma_proto = &ucp_rma_sw_proto; + rkey->cache.rma_rkey = UCT_INVALID_RKEY; + rkey->cache.max_put_short = 0; + } else { + rkey->cache.rma_proto = &ucp_rma_basic_proto; + rkey->cache.rma_rkey = uct_rkey; + rkey->cache.rma_proto = &ucp_rma_basic_proto; + rkey->cache.max_put_short = config->rma[rkey->cache.rma_lane].max_put_short; + } + + rkey->cache.amo_lane = ucp_rkey_find_rma_lane(context, config, + UCS_MEMORY_TYPE_HOST, + config->key.amo_lanes, rkey, + 0, &uct_rkey); + amo_sw = (rkey->cache.amo_lane == UCP_NULL_LANE); + if (amo_sw) { + rkey->cache.amo_proto = &ucp_amo_sw_proto; + rkey->cache.amo_rkey = UCT_INVALID_RKEY; + } else { + rkey->cache.amo_proto = &ucp_amo_basic_proto; + rkey->cache.amo_rkey = uct_rkey; + } + + /* If we use sw rma/amo need to resolve destination endpoint in order to + * receive responses and completion messages + */ + if ((amo_sw || rma_sw) && (config->key.am_lane != UCP_NULL_LANE)) { + status = ucp_ep_resolve_dest_ep_ptr(ep, config->key.am_lane); + if (status != UCS_OK) { + ucs_debug("ep %p: failed to resolve destination ep, " + "sw rma cannot be used", ep); + } else { + /* if we can resolve destination ep, save the active message lane + * as the rma/amo lane in the rkey cache + */ + if (amo_sw) { + rkey->cache.amo_lane = config->key.am_lane; + } + if (rma_sw) { + rkey->cache.rma_lane = config->key.am_lane; + } + } + } + + rkey->cache.ep_cfg_index = ep->cfg_index; + + ucs_trace("rkey %p ep %p @ cfg[%d] %s: lane[%d] rkey 0x%"PRIx64" " + "%s: lane[%d] rkey 0x%"PRIx64"", + rkey, ep, ep->cfg_index, + rkey->cache.rma_proto->name, rkey->cache.rma_lane, rkey->cache.rma_rkey, + rkey->cache.amo_proto->name, rkey->cache.amo_lane, rkey->cache.amo_rkey); +} diff --git a/src/ucp/core/ucp_thread.h b/src/ucp/core/ucp_thread.h new file mode 100644 index 0000000..363b603 --- /dev/null +++ b/src/ucp/core/ucp_thread.h @@ -0,0 +1,109 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_THREAD_H_ +#define UCP_THREAD_H_ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include + + +/* + * Multi-thread mode + */ +typedef enum ucp_mt_type { + UCP_MT_TYPE_NONE = 0, + UCP_MT_TYPE_SPINLOCK, + UCP_MT_TYPE_MUTEX +} ucp_mt_type_t; + + +/** + * Multi-thread lock + */ +typedef struct ucp_mt_lock { + ucp_mt_type_t mt_type; + union { + /* Lock for multithreading support. Either spinlock or mutex is used at + at one time. Spinlock is the default option. */ + pthread_mutex_t mt_mutex; + ucs_spinlock_t mt_spinlock; + } lock; +} ucp_mt_lock_t; + + +#if ENABLE_MT + +#define UCP_THREAD_IS_REQUIRED(_lock_ptr) \ + ((_lock_ptr)->mt_type) +#define UCP_THREAD_LOCK_INIT(_lock_ptr) \ + do { \ + if ((_lock_ptr)->mt_type == UCP_MT_TYPE_MUTEX) { \ + pthread_mutex_init(&((_lock_ptr)->lock.mt_mutex), NULL); \ + } else { \ + ucs_spinlock_init(&((_lock_ptr)->lock.mt_spinlock)); \ + } \ + } while (0) +#define UCP_THREAD_LOCK_FINALIZE(_lock_ptr) \ + do { \ + ucs_status_t status; \ + \ + if ((_lock_ptr)->mt_type == UCP_MT_TYPE_MUTEX) { \ + pthread_mutex_destroy(&((_lock_ptr)->lock.mt_mutex)); \ + } else { \ + status = ucs_spinlock_destroy(&((_lock_ptr)->lock.mt_spinlock)); \ + if (status != UCS_OK) { \ + ucs_warn("ucs_spinlock_destroy() failed (%d)", status); \ + } \ + } \ + } while (0) +#define UCP_THREAD_CS_ENTER(_lock_ptr) \ + { \ + if ((_lock_ptr)->mt_type == UCP_MT_TYPE_MUTEX) { \ + pthread_mutex_lock(&((_lock_ptr)->lock.mt_mutex)); \ + } else { \ + ucs_spin_lock(&((_lock_ptr)->lock.mt_spinlock)); \ + } \ + } +#define UCP_THREAD_CS_EXIT(_lock_ptr) \ + { \ + if ((_lock_ptr)->mt_type == UCP_MT_TYPE_MUTEX) { \ + pthread_mutex_unlock(&((_lock_ptr)->lock.mt_mutex)); \ + } else { \ + ucs_spin_unlock(&((_lock_ptr)->lock.mt_spinlock)); \ + } \ + } + +#else + +#define UCP_THREAD_IS_REQUIRED(_lock_ptr) 0 +#define UCP_THREAD_LOCK_INIT(_lock_ptr) {} +#define UCP_THREAD_LOCK_FINALIZE(_lock_ptr) {} +#define UCP_THREAD_CS_ENTER(_lock_ptr) {} +#define UCP_THREAD_CS_EXIT(_lock_ptr) {} + +#endif + +#define UCP_THREAD_CS_ENTER_CONDITIONAL(_lock_ptr) \ + { \ + if (UCP_THREAD_IS_REQUIRED(_lock_ptr)) { \ + UCP_THREAD_CS_ENTER(_lock_ptr); \ + } \ + } +#define UCP_THREAD_CS_EXIT_CONDITIONAL(_lock_ptr) \ + { \ + if (UCP_THREAD_IS_REQUIRED(_lock_ptr)) { \ + UCP_THREAD_CS_EXIT(_lock_ptr); \ + } \ + } + + +#endif diff --git a/src/ucp/core/ucp_types.h b/src/ucp/core/ucp_types.h new file mode 100644 index 0000000..a5db2d4 --- /dev/null +++ b/src/ucp/core/ucp_types.h @@ -0,0 +1,135 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_TYPES_H_ +#define UCP_TYPES_H_ + +#include +#include +#include +#include + + +#define UCP_WORKER_NAME_MAX 32 /* Worker name for debugging */ +#define UCP_MIN_BCOPY 64 /* Minimal size for bcopy */ +#define UCP_FEATURE_AMO (UCP_FEATURE_AMO32|UCP_FEATURE_AMO64) + +/* Resources */ +#define UCP_MAX_RESOURCES 64 /* up to 64 only due to tl_bitmap usage */ +#define UCP_NULL_RESOURCE ((ucp_rsc_index_t)-1) +typedef uint8_t ucp_rsc_index_t; + +/* MDs */ +#define UCP_UINT_TYPE(_bits) typedef UCS_PP_TOKENPASTE(UCS_PP_TOKENPASTE(uint, _bits), _t) +#define UCP_MD_INDEX_BITS 64 /* How many bits are in MD index */ +typedef ucp_rsc_index_t ucp_md_index_t; +#define UCP_MAX_MDS ucs_min(UCP_MD_INDEX_BITS, UCP_MAX_RESOURCES) +#define UCP_MAX_OP_MDS 4 /* maximal number of MDs per single op */ +UCP_UINT_TYPE(UCP_MD_INDEX_BITS) ucp_md_map_t; + +/* Lanes */ +#define UCP_MAX_LANES 6 +#define UCP_NULL_LANE ((ucp_lane_index_t)-1) +typedef uint8_t ucp_lane_index_t; +typedef uint8_t ucp_lane_map_t; + +/* Connection sequence number */ +typedef uint16_t ucp_ep_conn_sn_t; + +/* Forward declarations */ +typedef struct ucp_request ucp_request_t; +typedef struct ucp_recv_desc ucp_recv_desc_t; +typedef struct ucp_address_iface_attr ucp_address_iface_attr_t; +typedef struct ucp_address_entry ucp_address_entry_t; +typedef struct ucp_unpacked_address ucp_unpacked_address_t; +typedef struct ucp_wireup_ep ucp_wireup_ep_t; +typedef struct ucp_request_send_proto ucp_request_send_proto_t; +typedef struct ucp_worker_iface ucp_worker_iface_t; +typedef struct ucp_worker_cm ucp_worker_cm_t; +typedef struct ucp_rma_proto ucp_rma_proto_t; +typedef struct ucp_amo_proto ucp_amo_proto_t; +typedef struct ucp_wireup_sockaddr_data ucp_wireup_sockaddr_data_t; + + +/** + * Active message codes + */ +enum { + UCP_AM_ID_WIREUP = 1, /* Connection establishment */ + + UCP_AM_ID_EAGER_ONLY = 2, /* Single packet eager TAG */ + UCP_AM_ID_EAGER_FIRST = 3, /* First eager fragment */ + UCP_AM_ID_EAGER_MIDDLE = 4, /* Middle eager fragment */ + + UCP_AM_ID_EAGER_SYNC_ONLY = 6, /* Single packet eager-sync */ + UCP_AM_ID_EAGER_SYNC_FIRST = 7, /* First eager-sync fragment */ + UCP_AM_ID_EAGER_SYNC_ACK = 8, /* Eager-sync acknowledge */ + + UCP_AM_ID_RNDV_RTS = 9, /* Ready-to-Send to init rendezvous */ + UCP_AM_ID_RNDV_ATS = 10, /* Ack-to-Send after finishing a get operation */ + UCP_AM_ID_RNDV_RTR = 11, /* Ready-to-Receive rendezvous for a receiver + with a generic datatype */ + UCP_AM_ID_RNDV_DATA = 12, /* Rndv data fragments when using software + rndv (bcopy) */ + UCP_AM_ID_OFFLOAD_SYNC_ACK = 14, /* Eager sync ack for tag offload proto */ + + UCP_AM_ID_STREAM_DATA = 15, /* Eager STREAM packet */ + + UCP_AM_ID_RNDV_ATP = 16, /* Ack-to-put complete after finishing a put_zcopy */ + + UCP_AM_ID_PUT = 17, /* Remote memory write */ + UCP_AM_ID_GET_REQ = 18, /* Remote memory read request */ + UCP_AM_ID_GET_REP = 19, /* Remote memory read reply */ + UCP_AM_ID_ATOMIC_REQ = 20, /* Remote memory atomic request */ + UCP_AM_ID_ATOMIC_REP = 21, /* Remote memory atomic reply */ + UCP_AM_ID_CMPL = 22, /* Remote memory operation completion */ + UCP_AM_ID_SINGLE = 23, /* For user defined Active Messages */ + UCP_AM_ID_MULTI = 24, /* For user defined AM if message + does not fit in one AM */ + UCP_AM_ID_SINGLE_REPLY = 25, /* For user defined AM when a reply + is needed */ + UCP_AM_ID_MULTI_REPLY = 26, + UCP_AM_ID_LAST +}; + + +/** + * Atomic operations mode. + */ +typedef enum { + UCP_ATOMIC_MODE_CPU, /* Use CPU-based atomics */ + UCP_ATOMIC_MODE_DEVICE, /* Use device-based atomics */ + UCP_ATOMIC_MODE_GUESS, /* If all transports support CPU AMOs only (no DEVICE), + * the CPU is selected, otherwise DEVICE is selected */ + UCP_ATOMIC_MODE_LAST +} ucp_atomic_mode_t; + + +/** + * Communication scheme in RNDV protocol. + */ +typedef enum { + UCP_RNDV_MODE_GET_ZCOPY, /* Use get_zcopy scheme in RNDV protocol */ + UCP_RNDV_MODE_PUT_ZCOPY, /* Use put_zcopy scheme in RNDV protocol */ + UCP_RNDV_MODE_AUTO, /* Runtime automatically chooses optimal scheme to use */ + UCP_RNDV_MODE_LAST +} ucp_rndv_mode_t; + +/** + * Active message tracer. + */ +typedef void (*ucp_am_tracer_t)(ucp_worker_h worker, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max); + + +/** + * Internal callback for UCP requests + */ +typedef void (*ucp_request_callback_t)(ucp_request_t *req); + + +#endif diff --git a/src/ucp/core/ucp_version.c b/src/ucp/core/ucp_version.c new file mode 100644 index 0000000..fb72afc --- /dev/null +++ b/src/ucp/core/ucp_version.c @@ -0,0 +1,18 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + + +void ucp_get_version(unsigned *major_version, unsigned *minor_version, + unsigned *release_number) +{ + *major_version = 1; + *minor_version = 8; + *release_number = 0; +} + +const char *ucp_get_version_string() +{ + return "1.8.0"; +} diff --git a/src/ucp/core/ucp_version.c.in b/src/ucp/core/ucp_version.c.in new file mode 100644 index 0000000..c4e17a3 --- /dev/null +++ b/src/ucp/core/ucp_version.c.in @@ -0,0 +1,18 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + + +void ucp_get_version(unsigned *major_version, unsigned *minor_version, + unsigned *release_number) +{ + *major_version = @MAJOR_VERSION@; + *minor_version = @MINOR_VERSION@; + *release_number = @PATCH_VERSION@; +} + +const char *ucp_get_version_string() +{ + return "@MAJOR_VERSION@.@MINOR_VERSION@.@PATCH_VERSION@"; +} diff --git a/src/ucp/core/ucp_worker.c b/src/ucp/core/ucp_worker.c new file mode 100644 index 0000000..fd943b9 --- /dev/null +++ b/src/ucp/core/ucp_worker.c @@ -0,0 +1,2202 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ucp_am.h" +#include "ucp_worker.h" +#include "ucp_mm.h" +#include "ucp_request.inl" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define UCP_WORKER_HEADROOM_SIZE \ + (sizeof(ucp_recv_desc_t) + UCP_WORKER_HEADROOM_PRIV_SIZE) + +typedef enum ucp_worker_event_fd_op { + UCP_WORKER_EPFD_OP_ADD, + UCP_WORKER_EPFD_OP_DEL +} ucp_worker_event_fd_op_t; + +#if ENABLE_STATS +static ucs_stats_class_t ucp_worker_tm_offload_stats_class = { + .name = "tag_offload", + .num_counters = UCP_WORKER_STAT_TAG_OFFLOAD_LAST, + .counter_names = { + [UCP_WORKER_STAT_TAG_OFFLOAD_POSTED] = "posted", + [UCP_WORKER_STAT_TAG_OFFLOAD_MATCHED] = "matched", + [UCP_WORKER_STAT_TAG_OFFLOAD_MATCHED_SW_RNDV] = "matched_sw_rndv", + [UCP_WORKER_STAT_TAG_OFFLOAD_CANCELED] = "canceled", + [UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_TAG_EXCEED] = "block_tag_exceed", + [UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_NON_CONTIG] = "block_non_contig", + [UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_WILDCARD] = "block_wildcard", + [UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_SW_PEND] = "block_sw_pend", + [UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_NO_IFACE] = "block_no_iface", + [UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_MEM_REG] = "block_mem_reg", + [UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_EGR] = "rx_unexp_egr", + [UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_RNDV] = "rx_unexp_rndv", + [UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_SW_RNDV] = "rx_unexp_sw_rndv" + } +}; + +static ucs_stats_class_t ucp_worker_stats_class = { + .name = "ucp_worker", + .num_counters = UCP_WORKER_STAT_LAST, + .counter_names = { + [UCP_WORKER_STAT_TAG_RX_EAGER_MSG] = "rx_eager_msg", + [UCP_WORKER_STAT_TAG_RX_EAGER_SYNC_MSG] = "rx_sync_msg", + [UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_EXP] = "rx_eager_chunk_exp", + [UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP] = "rx_eager_chunk_unexp", + [UCP_WORKER_STAT_TAG_RX_RNDV_EXP] = "rx_rndv_rts_exp", + [UCP_WORKER_STAT_TAG_RX_RNDV_UNEXP] = "rx_rndv_rts_unexp" + } +}; +#endif + + +ucs_mpool_ops_t ucp_am_mpool_ops = { + .chunk_alloc = ucs_mpool_hugetlb_malloc, + .chunk_release = ucs_mpool_hugetlb_free, + .obj_init = ucs_empty_function, + .obj_cleanup = ucs_empty_function +}; + + +ucs_mpool_ops_t ucp_reg_mpool_ops = { + .chunk_alloc = ucp_reg_mpool_malloc, + .chunk_release = ucp_reg_mpool_free, + .obj_init = ucp_mpool_obj_init, + .obj_cleanup = ucs_empty_function +}; + +ucs_mpool_ops_t ucp_frag_mpool_ops = { + .chunk_alloc = ucp_frag_mpool_malloc, + .chunk_release = ucp_frag_mpool_free, + .obj_init = ucp_mpool_obj_init, + .obj_cleanup = ucs_empty_function +}; + +void ucp_worker_iface_check_events(ucp_worker_iface_t *wiface, int force); + +static UCS_F_ALWAYS_INLINE double +ucp_worker_iface_latency(ucp_worker_h worker, ucp_worker_iface_t *wiface) +{ + return wiface->attr.latency.overhead + + wiface->attr.latency.growth * worker->context->config.est_num_eps; +} + +static ucs_status_t ucp_worker_wakeup_ctl_fd(ucp_worker_h worker, + ucp_worker_event_fd_op_t op, + int event_fd) +{ + ucs_event_set_type_t events = UCS_EVENT_SET_EVREAD; + ucs_status_t status; + + if (!(worker->context->config.features & UCP_FEATURE_WAKEUP)) { + return UCS_OK; + } + + if (worker->flags & UCP_WORKER_FLAG_EDGE_TRIGGERED) { + events |= UCS_EVENT_SET_EDGE_TRIGGERED; + } + + switch (op) { + case UCP_WORKER_EPFD_OP_ADD: + status = ucs_event_set_add(worker->event_set, event_fd, + events, worker->user_data); + break; + case UCP_WORKER_EPFD_OP_DEL: + status = ucs_event_set_del(worker->event_set, event_fd); + break; + default: + ucs_bug("Unknown operation (%d) was passed", op); + status = UCS_ERR_INVALID_PARAM; + break; + } + + return status; +} + +static void ucp_worker_set_am_handlers(ucp_worker_iface_t *wiface, int is_proxy) +{ + ucp_worker_h worker = wiface->worker; + ucp_context_h context = worker->context; + ucs_status_t status; + unsigned am_id; + + ucs_trace_func("iface=%p is_proxy=%d", wiface->iface, is_proxy); + + for (am_id = 0; am_id < UCP_AM_ID_LAST; ++am_id) { + if (!(wiface->attr.cap.flags & (UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_AM_ZCOPY))) { + continue; + } + + if (!(context->config.features & ucp_am_handlers[am_id].features)) { + continue; + } + + if (!(ucp_am_handlers[am_id].flags & UCT_CB_FLAG_ASYNC) && + !(wiface->attr.cap.flags & UCT_IFACE_FLAG_CB_SYNC)) + { + /* Do not register a sync callback on interface which does not + * support it. The transport selection logic should not use async + * transports for protocols with sync active message handlers. + */ + continue; + } + + if (is_proxy && (ucp_am_handlers[am_id].proxy_cb != NULL)) { + /* we care only about sync active messages, and this also makes sure + * the counter is not accessed from another thread. + */ + ucs_assert(!(ucp_am_handlers[am_id].flags & UCT_CB_FLAG_ASYNC)); + status = uct_iface_set_am_handler(wiface->iface, am_id, + ucp_am_handlers[am_id].proxy_cb, + wiface, + ucp_am_handlers[am_id].flags); + } else { + status = uct_iface_set_am_handler(wiface->iface, am_id, + ucp_am_handlers[am_id].cb, + worker, + ucp_am_handlers[am_id].flags); + } + if (status != UCS_OK) { + ucs_fatal("failed to set active message handler id %d: %s", am_id, + ucs_status_string(status)); + } + } +} + +static ucs_status_t ucp_stub_am_handler(void *arg, void *data, size_t length, + unsigned flags) +{ + ucp_worker_h worker = arg; + ucs_trace("worker %p: drop message", worker); + return UCS_OK; +} + +static void ucp_worker_remove_am_handlers(ucp_worker_h worker) +{ + ucp_context_h context = worker->context; + ucp_worker_iface_t *wiface; + ucp_rsc_index_t iface_id; + unsigned am_id; + + ucs_debug("worker %p: remove active message handlers", worker); + + for (iface_id = 0; iface_id < worker->num_ifaces; ++iface_id) { + wiface = worker->ifaces[iface_id]; + if (!(wiface->attr.cap.flags & (UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_AM_ZCOPY))) { + continue; + } + for (am_id = 0; am_id < UCP_AM_ID_LAST; ++am_id) { + if (context->config.features & ucp_am_handlers[am_id].features) { + (void)uct_iface_set_am_handler(wiface->iface, + am_id, ucp_stub_am_handler, + worker, UCT_CB_FLAG_ASYNC); + } + } + } +} + +static void ucp_worker_am_tracer(void *arg, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max) +{ + ucp_worker_h worker = arg; + ucp_am_tracer_t tracer; + + if (id < UCP_AM_ID_LAST) { + tracer = ucp_am_handlers[id].tracer; + if (tracer != NULL) { + tracer(worker, type, id, data, length, buffer, max); + } + } +} + +static ucs_status_t ucp_worker_wakeup_init(ucp_worker_h worker, + const ucp_worker_params_t *params) +{ + ucp_context_h context = worker->context; + unsigned events; + ucs_status_t status; + + if (!(context->config.features & UCP_FEATURE_WAKEUP)) { + worker->event_fd = -1; + worker->event_set = NULL; + worker->eventfd = -1; + worker->uct_events = 0; + status = UCS_OK; + goto out; + } + + if (params->field_mask & UCP_WORKER_PARAM_FIELD_EVENTS) { + events = params->events; + } else { + events = UCP_WAKEUP_RMA | UCP_WAKEUP_AMO | UCP_WAKEUP_TAG_SEND | + UCP_WAKEUP_TAG_RECV | UCP_WAKEUP_TX | UCP_WAKEUP_RX; + } + + if (params->field_mask & UCP_WORKER_PARAM_FIELD_EVENT_FD) { + worker->flags |= UCP_WORKER_FLAG_EXTERNAL_EVENT_FD; + status = ucs_event_set_create_from_fd(&worker->event_set, + params->event_fd); + } else { + status = ucs_event_set_create(&worker->event_set); + } + if (status != UCS_OK) { + goto out; + } + + status = ucs_event_set_fd_get(worker->event_set, &worker->event_fd); + if (status != UCS_OK) { + goto err_cleanup_event_set; + } + + if (events & UCP_WAKEUP_EDGE) { + worker->flags |= UCP_WORKER_FLAG_EDGE_TRIGGERED; + } + + worker->eventfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (worker->eventfd == -1) { + ucs_error("Failed to create event fd: %m"); + status = UCS_ERR_IO_ERROR; + goto err_cleanup_event_set; + } + + ucp_worker_wakeup_ctl_fd(worker, UCP_WORKER_EPFD_OP_ADD, worker->eventfd); + + worker->uct_events = 0; + + /* FIXME: any TAG flag initializes all types of completion because of + * possible issues in RNDV protocol. The optimization may be + * implemented with using of separated UCP descriptors or manual + * signaling in RNDV and similar cases, see conversation in PR #1277 + */ + if ((events & UCP_WAKEUP_TAG_SEND) || + ((events & UCP_WAKEUP_TAG_RECV) && + (context->config.ext.rndv_thresh != UCS_MEMUNITS_INF))) + { + worker->uct_events |= UCT_EVENT_SEND_COMP; + } + + if (events & (UCP_WAKEUP_TAG_RECV | UCP_WAKEUP_RX)) { + worker->uct_events |= UCT_EVENT_RECV; + } + + if (events & (UCP_WAKEUP_RMA | UCP_WAKEUP_AMO | UCP_WAKEUP_TX)) { + worker->uct_events |= UCT_EVENT_SEND_COMP; + } + + return UCS_OK; + +err_cleanup_event_set: + ucs_event_set_cleanup(worker->event_set); + worker->event_set = NULL; + worker->event_fd = -1; +out: + return status; +} + +static void ucp_worker_wakeup_cleanup(ucp_worker_h worker) +{ + if (worker->event_set != NULL) { + ucs_assert(worker->event_fd != -1); + ucs_event_set_cleanup(worker->event_set); + worker->event_set = NULL; + worker->event_fd = -1; + } + if (worker->eventfd != -1) { + close(worker->eventfd); + } +} + +static void ucp_worker_iface_disarm(ucp_worker_iface_t *wiface) +{ + ucs_status_t status; + + if (wiface->flags & UCP_WORKER_IFACE_FLAG_ON_ARM_LIST) { + status = ucp_worker_wakeup_ctl_fd(wiface->worker, + UCP_WORKER_EPFD_OP_DEL, + wiface->event_fd); + ucs_assert_always(status == UCS_OK); + ucs_list_del(&wiface->arm_list); + wiface->flags &= ~UCP_WORKER_IFACE_FLAG_ON_ARM_LIST; + } +} + +static ucs_status_t ucp_worker_wakeup_signal_fd(ucp_worker_h worker) +{ + uint64_t dummy = 1; + int ret; + + ucs_trace_func("worker=%p fd=%d", worker, worker->eventfd); + + do { + ret = write(worker->eventfd, &dummy, sizeof(dummy)); + if (ret == sizeof(dummy)) { + return UCS_OK; + } else if (ret == -1) { + if (errno == EAGAIN) { + return UCS_OK; + } else if (errno != EINTR) { + ucs_error("Signaling wakeup failed: %m"); + return UCS_ERR_IO_ERROR; + } + } else { + ucs_assert(ret == 0); + } + } while (ret == 0); + + return UCS_OK; +} + +void ucp_worker_signal_internal(ucp_worker_h worker) +{ + if (worker->context->config.features & UCP_FEATURE_WAKEUP) { + ucp_worker_wakeup_signal_fd(worker); + } +} + +static unsigned ucp_worker_iface_err_handle_progress(void *arg) +{ + ucp_worker_err_handle_arg_t *err_handle_arg = arg; + ucp_worker_h worker = err_handle_arg->worker; + ucp_ep_h ucp_ep = err_handle_arg->ucp_ep; + uct_ep_h uct_ep = err_handle_arg->uct_ep; + ucs_status_t status = err_handle_arg->status; + ucp_lane_index_t failed_lane = err_handle_arg->failed_lane; + ucp_lane_index_t lane; + ucp_ep_config_key_t key; + + UCS_ASYNC_BLOCK(&worker->async); + + ucs_debug("ep %p: handle error on lane[%d]=%p: %s", + ucp_ep, failed_lane, uct_ep, ucs_status_string(status)); + + ucs_assert(ucp_ep->flags & UCP_EP_FLAG_FAILED); + + /* Destroy all lanes except failed one since ucp_ep becomes unusable as well */ + for (lane = 0; lane < ucp_ep_num_lanes(ucp_ep); ++lane) { + if (ucp_ep->uct_eps[lane] == NULL) { + continue; + } + + /* Purge pending queue */ + ucs_trace("ep %p: purge pending on uct_ep[%d]=%p", ucp_ep, lane, + ucp_ep->uct_eps[lane]); + uct_ep_pending_purge(ucp_ep->uct_eps[lane], ucp_ep_err_pending_purge, + UCS_STATUS_PTR(status)); + + if (lane != failed_lane) { + ucs_trace("ep %p: destroy uct_ep[%d]=%p", ucp_ep, lane, + ucp_ep->uct_eps[lane]); + uct_ep_destroy(ucp_ep->uct_eps[lane]); + ucp_ep->uct_eps[lane] = NULL; + } + } + + /* Move failed lane to index 0 */ + if ((failed_lane != 0) && (failed_lane != UCP_NULL_LANE)) { + ucp_ep->uct_eps[0] = ucp_ep->uct_eps[failed_lane]; + ucp_ep->uct_eps[failed_lane] = NULL; + } + + /* NOTE: if failed ep is wireup auxiliary/sockaddr then we need to replace + * the lane with failed ep and destroy wireup ep + */ + if (ucp_ep->uct_eps[0] != uct_ep) { + ucs_assert(ucp_wireup_ep_is_owner(ucp_ep->uct_eps[0], uct_ep)); + ucp_wireup_ep_disown(ucp_ep->uct_eps[0], uct_ep); + ucs_trace("ep %p: destroy failed wireup ep %p", ucp_ep, ucp_ep->uct_eps[0]); + uct_ep_destroy(ucp_ep->uct_eps[0]); + ucp_ep->uct_eps[0] = uct_ep; + } + + /* Redirect all lanes to failed one */ + key = ucp_ep_config(ucp_ep)->key; + key.am_lane = 0; + key.wireup_lane = 0; + key.tag_lane = 0; + key.rma_lanes[0] = 0; + key.rma_bw_lanes[0] = 0; + key.amo_lanes[0] = 0; + key.lanes[0].rsc_index = UCP_NULL_RESOURCE; + key.num_lanes = 1; + key.status = status; + + status = ucp_worker_get_ep_config(worker, &key, 0, &ucp_ep->cfg_index); + if (status != UCS_OK) { + ucs_fatal("ep %p: could not change configuration to error state: %s", + ucp_ep, ucs_status_string(status)); + } + + ucp_ep->am_lane = 0; + + if (!(ucp_ep->flags & UCP_EP_FLAG_USED)) { + ucs_debug("ep %p: destroy internal endpoint due to peer failure", ucp_ep); + ucp_ep_disconnected(ucp_ep, 1); + } else { + ucp_ep_invoke_err_cb(ucp_ep, key.status); + } + + ucs_free(err_handle_arg); + UCS_ASYNC_UNBLOCK(&worker->async); + return 1; +} + +int ucp_worker_err_handle_remove_filter(const ucs_callbackq_elem_t *elem, + void *arg) +{ + ucp_worker_err_handle_arg_t *err_handle_arg = elem->arg; + + if ((elem->cb == ucp_worker_iface_err_handle_progress) && + (err_handle_arg->ucp_ep == arg)) { + /* release err handling argument to avoid memory leak */ + ucs_free(err_handle_arg); + return 1; + } + + return 0; +} + +ucs_status_t ucp_worker_set_ep_failed(ucp_worker_h worker, ucp_ep_h ucp_ep, + uct_ep_h uct_ep, ucp_lane_index_t lane, + ucs_status_t status) +{ + uct_worker_cb_id_t prog_id = UCS_CALLBACKQ_ID_NULL; + ucs_status_t ret_status = UCS_OK; + ucp_rsc_index_t rsc_index; + uct_tl_resource_desc_t *tl_rsc; + ucp_worker_err_handle_arg_t *err_handle_arg; + + if (ucp_ep->flags & UCP_EP_FLAG_FAILED) { + goto out_ok; + } + + /* In case if this is a local failure we need to notify remote side */ + if (ucp_ep_is_cm_local_connected(ucp_ep)) { + ucp_ep_cm_disconnect_cm_lane(ucp_ep); + } + + /* set endpoint to failed to prevent wireup_ep switch */ + ucp_ep->flags |= UCP_EP_FLAG_FAILED; + + if (ucp_ep_config(ucp_ep)->key.err_mode == UCP_ERR_HANDLING_MODE_NONE) { + /* NOTE: if user has not requested error handling on the endpoint, + * the failure is considered unhandled */ + ret_status = status; + goto out; + } + + err_handle_arg = ucs_malloc(sizeof(*err_handle_arg), "ucp_worker_err_handle_arg"); + if (err_handle_arg == NULL) { + ucs_error("failed to allocate ucp_worker_err_handle_arg"); + ret_status = UCS_ERR_NO_MEMORY; + goto out; + } + + err_handle_arg->worker = worker; + err_handle_arg->ucp_ep = ucp_ep; + err_handle_arg->uct_ep = uct_ep; + err_handle_arg->status = status; + err_handle_arg->failed_lane = lane; + + /* invoke the rest of the error handling flow from the main thread */ + uct_worker_progress_register_safe(worker->uct, + ucp_worker_iface_err_handle_progress, + err_handle_arg, UCS_CALLBACKQ_FLAG_ONESHOT, + &prog_id); + + if ((ucp_ep_ext_gen(ucp_ep)->err_cb == NULL) && + (ucp_ep->flags & UCP_EP_FLAG_USED)) { + if (lane != UCP_NULL_LANE) { + rsc_index = ucp_ep_get_rsc_index(ucp_ep, lane); + tl_rsc = &worker->context->tl_rscs[rsc_index].tl_rsc; + ucs_error("error '%s' will not be handled for ep %p - " + UCT_TL_RESOURCE_DESC_FMT " since no error callback is installed", + ucs_status_string(status), ucp_ep, + UCT_TL_RESOURCE_DESC_ARG(tl_rsc)); + } else { + ucs_assert(uct_ep == NULL); + ucs_error("error '%s' occurred on wireup will not be handled for ep %p " + "since no error callback is installed", + ucs_status_string(status), ucp_ep); + } + ret_status = status; + goto out; + } + +out_ok: + ret_status = UCS_OK; + +out: + /* If the worker supports the UCP_FEATURE_WAKEUP feature, signal the user so + * that he can wake-up on this event */ + ucp_worker_signal_internal(worker); + + return ret_status; +} + +static ucs_status_t +ucp_worker_iface_error_handler(void *arg, uct_ep_h uct_ep, ucs_status_t status) +{ + ucp_worker_h worker = (ucp_worker_h)arg; + ucp_lane_index_t lane; + ucs_status_t ret_status; + ucp_ep_ext_gen_t *ep_ext; + ucp_ep_h ucp_ep; + + UCS_ASYNC_BLOCK(&worker->async); + + ucs_debug("worker %p: error handler called for uct_ep %p: %s", + worker, uct_ep, ucs_status_string(status)); + + /* TODO: need to optimize uct_ep -> ucp_ep lookup */ + ucs_list_for_each(ep_ext, &worker->all_eps, ep_list) { + ucp_ep = ucp_ep_from_ext_gen(ep_ext); + for (lane = 0; lane < ucp_ep_num_lanes(ucp_ep); ++lane) { + if ((uct_ep == ucp_ep->uct_eps[lane]) || + ucp_wireup_ep_is_owner(ucp_ep->uct_eps[lane], uct_ep)) { + ret_status = ucp_worker_set_ep_failed(worker, ucp_ep, uct_ep, + lane, status); + UCS_ASYNC_UNBLOCK(&worker->async); + return ret_status; + } + } + } + + ucs_error("no uct_ep_h %p associated with ucp_ep_h on ucp_worker_h %p", + uct_ep, worker); + UCS_ASYNC_UNBLOCK(&worker->async); + + return UCS_ERR_NO_ELEM; +} + +void ucp_worker_iface_activate(ucp_worker_iface_t *wiface, unsigned uct_flags) +{ + ucp_worker_h worker = wiface->worker; + ucs_status_t status; + + ucs_trace("activate iface %p acount=%u aifaces=%u", wiface->iface, + wiface->activate_count, worker->num_active_ifaces); + + if (wiface->activate_count++ > 0) { + return; /* was already activated */ + } + + /* Stop ongoing activation process, if such exists */ + uct_worker_progress_unregister_safe(worker->uct, &wiface->check_events_id); + + /* Set default active message handlers */ + ucp_worker_set_am_handlers(wiface, 0); + + /* Add to user wakeup */ + if (wiface->attr.cap.flags & UCP_WORKER_UCT_ALL_EVENT_CAP_FLAGS) { + status = ucp_worker_wakeup_ctl_fd(worker, UCP_WORKER_EPFD_OP_ADD, + wiface->event_fd); + ucs_assert_always(status == UCS_OK); + wiface->flags |= UCP_WORKER_IFACE_FLAG_ON_ARM_LIST; + ucs_list_add_tail(&worker->arm_ifaces, &wiface->arm_list); + } + + ++worker->num_active_ifaces; + + uct_iface_progress_enable(wiface->iface, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV | uct_flags); +} + +static void ucp_worker_iface_deactivate(ucp_worker_iface_t *wiface, int force) +{ + ucs_trace("deactivate iface %p force=%d acount=%u aifaces=%u", + wiface->iface, force, wiface->activate_count, + wiface->worker->num_active_ifaces); + + if (!force) { + ucs_assert(wiface->activate_count > 0); + if (--wiface->activate_count > 0) { + return; /* not completely deactivated yet */ + } + --wiface->worker->num_active_ifaces; + } + + /* Avoid progress on the interface to reduce overhead */ + uct_iface_progress_disable(wiface->iface, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + + /* Remove from user wakeup */ + ucp_worker_iface_disarm(wiface); + + /* Set proxy active message handlers to count receives */ + ucp_worker_set_am_handlers(wiface, 1); + + /* Prepare for next receive event */ + ucp_worker_iface_check_events(wiface, force); +} + +void ucp_worker_iface_progress_ep(ucp_worker_iface_t *wiface) +{ + ucs_trace_func("iface=%p", wiface->iface); + + UCS_ASYNC_BLOCK(&wiface->worker->async); + + /* This function may be called from progress thread (such as when processing + * wireup messages), so ask UCT to be thread-safe. + */ + ucp_worker_iface_activate(wiface, UCT_PROGRESS_THREAD_SAFE); + + UCS_ASYNC_UNBLOCK(&wiface->worker->async); +} + +void ucp_worker_iface_unprogress_ep(ucp_worker_iface_t *wiface) +{ + ucs_trace_func("iface=%p", wiface->iface); + + UCS_ASYNC_BLOCK(&wiface->worker->async); + ucp_worker_iface_deactivate(wiface, 0); + UCS_ASYNC_UNBLOCK(&wiface->worker->async); +} + +/* + * If active messages were received by am proxy handler, activate the interface. + * Otherwise, arm the interface event and make sure that when an active message + * is received in the future, the interface would be activated. + */ +static ucs_status_t ucp_worker_iface_check_events_do(ucp_worker_iface_t *wiface, + unsigned *progress_count) +{ + unsigned prev_recv_count; + ucs_status_t status; + + ucs_trace_func("wiface=%p iface=%p", wiface, wiface->iface); + + if (wiface->activate_count > 0) { + ucs_trace("iface %p already activated", wiface->iface); + *progress_count = 0; + return UCS_OK; + } + + prev_recv_count = wiface->proxy_recv_count; + + *progress_count = uct_iface_progress(wiface->iface); + if (prev_recv_count != wiface->proxy_recv_count) { + /* Received relevant active messages, activate the interface */ + ucp_worker_iface_activate(wiface, 0); + return UCS_OK; + } else if (*progress_count == 0) { + /* Arm the interface to wait for next event */ + ucs_assert(wiface->attr.cap.flags & UCP_WORKER_UCT_RECV_EVENT_CAP_FLAGS); + status = uct_iface_event_arm(wiface->iface, + UCP_WORKER_UCT_RECV_EVENT_ARM_FLAGS); + if (status == UCS_OK) { + ucs_trace("armed iface %p", wiface->iface); + + /* re-enable events, which were disabled by ucp_suspended_iface_event() */ + status = ucs_async_modify_handler(wiface->event_fd, + UCS_EVENT_SET_EVREAD); + if (status != UCS_OK) { + ucs_fatal("failed to modify %d event handler to UCS_EVENT_SET_EVREAD: %s", + wiface->event_fd, ucs_status_string(status)); + } + + return UCS_OK; + } else if (status != UCS_ERR_BUSY) { + ucs_fatal("failed to arm iface %p: %s", wiface->iface, + ucs_status_string(status)); + } else { + ucs_trace("arm iface %p returned BUSY", wiface->iface); + return UCS_ERR_BUSY; + } + } else { + ucs_trace("wiface %p progress returned %u, but no active messages were received", + wiface, *progress_count); + return UCS_ERR_BUSY; + } +} + +static unsigned ucp_worker_iface_check_events_progress(void *arg) +{ + ucp_worker_iface_t *wiface = arg; + ucp_worker_h worker = wiface->worker; + unsigned progress_count; + ucs_status_t status; + + ucs_trace_func("iface=%p, worker=%p", wiface->iface, worker); + + /* Check if we either had active messages or were able to arm the interface. + * In these cases, the work is done and this progress callback can be removed. + */ + UCS_ASYNC_BLOCK(&worker->async); + status = ucp_worker_iface_check_events_do(wiface, &progress_count); + if (status == UCS_OK) { + uct_worker_progress_unregister_safe(worker->uct, &wiface->check_events_id); + } + UCS_ASYNC_UNBLOCK(&worker->async); + + return progress_count; +} + +void ucp_worker_iface_check_events(ucp_worker_iface_t *wiface, int force) +{ + unsigned progress_count; + ucs_status_t status; + + ucs_trace_func("iface=%p, force=%d", wiface->iface, force); + + if (force) { + do { + /* coverity wrongly resolves rc's progress to ucp_listener_conn_request_progress + * which in turn releases wiface->iface. this leads coverity to assume + * that ucp_worker_iface_check_events_do() dereferences a freed pointer + * in the subsequent call in the following loop */ + /* coverity[freed_arg] */ + status = ucp_worker_iface_check_events_do(wiface, &progress_count); + ucs_assert(progress_count == 0); + } while (status == UCS_ERR_BUSY); + ucs_assert(status == UCS_OK); + } else { + /* Check events on the main progress loop, to make this function safe to + * call from async context, and avoid starvation of other progress callbacks. + */ + uct_worker_progress_register_safe(wiface->worker->uct, + ucp_worker_iface_check_events_progress, + wiface, 0, &wiface->check_events_id); + } +} + +void ucp_worker_iface_event(int fd, void *arg) +{ + ucp_worker_iface_t *wiface = arg; + ucp_worker_h worker = wiface->worker; + ucs_status_t status; + + ucs_trace_func("fd=%d iface=%p", fd, wiface->iface); + + status = ucs_async_modify_handler(wiface->event_fd, 0); + if (status != UCS_OK) { + ucs_fatal("failed to modify %d event handler to : %s", + wiface->event_fd, ucs_status_string(status)); + } + + /* Do more work on the main thread */ + ucp_worker_iface_check_events(wiface, 0); + + /* Signal user wakeup, to report the first message on the interface */ + ucp_worker_signal_internal(worker); +} + +static void ucp_worker_uct_iface_close(ucp_worker_iface_t *wiface) +{ + if (wiface->iface != NULL) { + uct_iface_close(wiface->iface); + wiface->iface = NULL; + } +} + +static int ucp_worker_iface_find_better(ucp_worker_h worker, + ucp_worker_iface_t *wiface, + ucp_rsc_index_t *better_index) +{ + ucp_context_h ctx = worker->context; + ucp_rsc_index_t rsc_index; + ucp_worker_iface_t *if_iter; + uint64_t test_flags; + double latency_iter, latency_cur, bw_cur; + + ucs_assert(wiface != NULL); + + latency_cur = ucp_worker_iface_latency(worker, wiface); + bw_cur = ucp_tl_iface_bandwidth(ctx, &wiface->attr.bandwidth); + + test_flags = wiface->attr.cap.flags & ~(UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_CONNECT_TO_EP); + + for (rsc_index = 0; rsc_index < ctx->num_tls; ++rsc_index) { + if_iter = worker->ifaces[rsc_index]; + + /* Need to check resources which belong to the same device only */ + if ((ctx->tl_rscs[rsc_index].dev_index != ctx->tl_rscs[wiface->rsc_index].dev_index) || + (if_iter->flags & UCP_WORKER_IFACE_FLAG_UNUSED) || + (rsc_index == wiface->rsc_index)) { + continue; + } + + latency_iter = ucp_worker_iface_latency(worker, if_iter); + + /* Check that another iface: */ + if (/* 1. Supports all capabilities of the target iface (at least), + * except ...CONNECT_TO... caps. */ + ucs_test_all_flags(if_iter->attr.cap.flags, test_flags) && + /* 2. Has the same or better performance characteristics */ + (if_iter->attr.overhead <= wiface->attr.overhead) && + (ucp_tl_iface_bandwidth(ctx, &if_iter->attr.bandwidth) >= bw_cur) && + /* swap latencies in args list since less is better */ + (ucp_score_prio_cmp(latency_cur, if_iter->attr.priority, + latency_iter, wiface->attr.priority) >= 0) && + /* 3. The found transport is scalable enough or both + * transport are unscalable */ + (ucp_is_scalable_transport(ctx, if_iter->attr.max_num_eps) || + !ucp_is_scalable_transport(ctx, wiface->attr.max_num_eps))) + { + *better_index = rsc_index; + /* Do not check this iface anymore, because better one exists. + * It helps to avoid the case when two interfaces with the same + * caps and performance exclude each other. */ + wiface->flags |= UCP_WORKER_IFACE_FLAG_UNUSED; + return 1; + } + } + + /* Better resource wasn't found */ + *better_index = 0; + return 0; +} + +/** + * @brief Find the minimal possible set of tl interfaces for each device + * + * @param [in] worker UCP worker. + * @param [out] tl_bitmap Map of the relevant tl resources. + * + * @return Error code as defined by @ref ucs_status_t + */ +static void ucp_worker_select_best_ifaces(ucp_worker_h worker, + uint64_t *tl_bitmap_p) +{ + ucp_context_h context = worker->context; + uint64_t tl_bitmap = 0; + ucp_rsc_index_t repl_ifaces[UCP_MAX_RESOURCES]; + ucp_worker_iface_t *wiface; + ucp_rsc_index_t tl_id, iface_id; + + /* For each iface check whether there is another iface, which: + * 1. Supports at least the same capabilities + * 2. Provides equivalent or better performance + */ + for (tl_id = 0; tl_id < context->num_tls; ++tl_id) { + wiface = worker->ifaces[tl_id]; + if (!ucp_worker_iface_find_better(worker, wiface, &repl_ifaces[tl_id])) { + tl_bitmap |= UCS_BIT(tl_id); + } + } + + *tl_bitmap_p = tl_bitmap; + worker->num_ifaces = ucs_popcount(tl_bitmap); + ucs_assert(worker->num_ifaces <= context->num_tls); + + if (worker->num_ifaces == context->num_tls) { + return; + } + + ucs_assert(worker->num_ifaces < context->num_tls); + + /* Some ifaces need to be closed */ + for (tl_id = 0, iface_id = 0; tl_id < context->num_tls; ++tl_id) { + wiface = worker->ifaces[tl_id]; + if (tl_bitmap & UCS_BIT(tl_id)) { + if (iface_id != tl_id) { + worker->ifaces[iface_id] = wiface; + } + ++iface_id; + } else { + ucs_debug("closing resource[%d] "UCT_TL_RESOURCE_DESC_FMT + ", since resource[%d] "UCT_TL_RESOURCE_DESC_FMT + " is better, worker %p", + tl_id, UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[tl_id].tl_rsc), + repl_ifaces[tl_id], + UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[repl_ifaces[tl_id]].tl_rsc), + worker); + /* Ifaces should not be initialized yet, just close it + * (no need for cleanup) */ + ucp_worker_uct_iface_close(wiface); + ucs_free(wiface); + } + } +} + +/** + * @brief Open all resources as interfaces on this worker + * + * This routine opens interfaces on the tl resources according to the + * bitmap in the context. If bitmap is not set, the routine opens interfaces + * on all available resources and select the best ones. Then it caches obtained + * bitmap on the context, so the next workers could use it instead of + * constructing it themselves. + * + * @param [in] worker UCP worker. + * + * @return Error code as defined by @ref ucs_status_t + */ +static ucs_status_t ucp_worker_add_resource_ifaces(ucp_worker_h worker) +{ + ucp_context_h context = worker->context; + ucp_tl_resource_desc_t *resource; + uct_iface_params_t iface_params; + ucp_rsc_index_t tl_id, iface_id; + ucp_worker_iface_t *wiface; + uint64_t ctx_tl_bitmap, tl_bitmap; + unsigned num_ifaces; + ucs_status_t status; + + /* If tl_bitmap is already set, just use it. Otherwise open ifaces on all + * available resources and then select the best ones. */ + ctx_tl_bitmap = context->tl_bitmap; + if (ctx_tl_bitmap) { + num_ifaces = ucs_popcount(ctx_tl_bitmap); + tl_bitmap = ctx_tl_bitmap; + } else { + num_ifaces = context->num_tls; + tl_bitmap = UCS_MASK(context->num_tls); + } + + worker->ifaces = ucs_calloc(num_ifaces, sizeof(*worker->ifaces), + "ucp ifaces array"); + if (worker->ifaces == NULL) { + ucs_error("failed to allocate worker ifaces"); + return UCS_ERR_NO_MEMORY; + } + + worker->num_ifaces = num_ifaces; + iface_id = 0; + + ucs_for_each_bit(tl_id, tl_bitmap) { + iface_params.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE; + resource = &context->tl_rscs[tl_id]; + + if (resource->flags & UCP_TL_RSC_FLAG_SOCKADDR) { + iface_params.open_mode = UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT; + } else { + iface_params.open_mode = UCT_IFACE_OPEN_MODE_DEVICE; + iface_params.field_mask |= UCT_IFACE_PARAM_FIELD_DEVICE; + iface_params.mode.device.tl_name = resource->tl_rsc.tl_name; + iface_params.mode.device.dev_name = resource->tl_rsc.dev_name; + } + + status = ucp_worker_iface_open(worker, tl_id, &iface_params, + &worker->ifaces[iface_id++]); + if (status != UCS_OK) { + return status; + } + } + + if (!ctx_tl_bitmap) { + /* Context bitmap is not set, need to select the best tl resources */ + tl_bitmap = 0; + ucp_worker_select_best_ifaces(worker, &tl_bitmap); + ucs_assert(tl_bitmap); + + /* Cache tl_bitmap on the context, so the next workers would not need + * to select best ifaces. */ + context->tl_bitmap = tl_bitmap; + ucs_debug("selected tl bitmap: 0x%lx (%d tls)", + tl_bitmap, ucs_popcount(tl_bitmap)); + } + + worker->scalable_tl_bitmap = 0; + ucs_for_each_bit(tl_id, context->tl_bitmap) { + wiface = ucp_worker_iface(worker, tl_id); + + if (ucp_is_scalable_transport(context, wiface->attr.max_num_eps)) { + worker->scalable_tl_bitmap |= UCS_BIT(tl_id); + } + } + + ucs_debug("selected scalable tl bitmap: 0x%lx (%d tls)", + worker->scalable_tl_bitmap, + ucs_popcount(worker->scalable_tl_bitmap)); + + iface_id = 0; + ucs_for_each_bit(tl_id, tl_bitmap) { + status = ucp_worker_iface_init(worker, tl_id, + worker->ifaces[iface_id++]); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +static void ucp_worker_close_ifaces(ucp_worker_h worker) +{ + ucp_rsc_index_t iface_id; + ucp_worker_iface_t *wiface; + + UCS_ASYNC_BLOCK(&worker->async); + for (iface_id = 0; iface_id < worker->num_ifaces; ++iface_id) { + wiface = worker->ifaces[iface_id]; + if (wiface != NULL) { + ucp_worker_iface_cleanup(wiface); + } + } + ucs_free(worker->ifaces); + UCS_ASYNC_UNBLOCK(&worker->async); +} + +ucs_status_t ucp_worker_iface_open(ucp_worker_h worker, ucp_rsc_index_t tl_id, + uct_iface_params_t *iface_params, + ucp_worker_iface_t **wiface_p) +{ + ucp_context_h context = worker->context; + ucp_tl_resource_desc_t *resource = &context->tl_rscs[tl_id]; + uct_md_h md = context->tl_mds[resource->md_index].md; + uct_iface_config_t *iface_config; + const char *cfg_tl_name; + ucp_worker_iface_t *wiface; + ucs_status_t status; + + wiface = ucs_calloc(1, sizeof(*wiface), "ucp_iface"); + if (wiface == NULL) { + return UCS_ERR_NO_MEMORY; + } + + wiface->rsc_index = tl_id; + wiface->worker = worker; + wiface->event_fd = -1; + wiface->activate_count = 0; + wiface->check_events_id = UCS_CALLBACKQ_ID_NULL; + wiface->proxy_recv_count = 0; + wiface->post_count = 0; + wiface->flags = 0; + + /* Read interface or md configuration */ + if (resource->flags & UCP_TL_RSC_FLAG_SOCKADDR) { + cfg_tl_name = NULL; + } else { + cfg_tl_name = resource->tl_rsc.tl_name; + } + status = uct_md_iface_config_read(md, cfg_tl_name, NULL, NULL, &iface_config); + if (status != UCS_OK) { + goto err_free_iface; + } + + UCS_STATIC_ASSERT(UCP_WORKER_HEADROOM_PRIV_SIZE >= sizeof(ucp_eager_sync_hdr_t)); + + /* Fill rest of uct_iface params (caller should fill specific mode fields) */ + iface_params->field_mask |= UCT_IFACE_PARAM_FIELD_STATS_ROOT | + UCT_IFACE_PARAM_FIELD_RX_HEADROOM | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS | + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_ARG | + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_ARG | + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_CB | + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_CB | + UCT_IFACE_PARAM_FIELD_CPU_MASK; + iface_params->stats_root = UCS_STATS_RVAL(worker->stats); + iface_params->rx_headroom = UCP_WORKER_HEADROOM_SIZE; + iface_params->err_handler_arg = worker; + iface_params->err_handler = ucp_worker_iface_error_handler; + iface_params->err_handler_flags = UCT_CB_FLAG_ASYNC; + iface_params->eager_arg = iface_params->rndv_arg = wiface; + iface_params->eager_cb = ucp_tag_offload_unexp_eager; + iface_params->rndv_cb = ucp_tag_offload_unexp_rndv; + iface_params->cpu_mask = worker->cpu_mask; + + /* Open UCT interface */ + status = uct_iface_open(md, worker->uct, iface_params, iface_config, + &wiface->iface); + uct_config_release(iface_config); + + if (status != UCS_OK) { + goto err_free_iface; + } + + VALGRIND_MAKE_MEM_UNDEFINED(&wiface->attr, sizeof(wiface->attr)); + + status = uct_iface_query(wiface->iface, &wiface->attr); + if (status != UCS_OK) { + goto err_close_iface; + } + + ucs_debug("created interface[%d]=%p using "UCT_TL_RESOURCE_DESC_FMT" on worker %p", + tl_id, wiface->iface, UCT_TL_RESOURCE_DESC_ARG(&resource->tl_rsc), + worker); + + *wiface_p = wiface; + + return UCS_OK; + +err_close_iface: + uct_iface_close(wiface->iface); +err_free_iface: + ucs_free(wiface); + return status; +} + +ucs_status_t ucp_worker_iface_init(ucp_worker_h worker, ucp_rsc_index_t tl_id, + ucp_worker_iface_t *wiface) +{ + ucp_context_h context = worker->context; + ucp_tl_resource_desc_t *resource = &context->tl_rscs[tl_id]; + ucs_status_t status; + + ucs_assert(wiface != NULL); + + /* Set wake-up handlers */ + if (wiface->attr.cap.flags & UCP_WORKER_UCT_ALL_EVENT_CAP_FLAGS) { + status = uct_iface_event_fd_get(wiface->iface, &wiface->event_fd); + if (status != UCS_OK) { + goto out_close_iface; + } + + /* Register event handler without actual events so we could modify it later. */ + status = ucs_async_set_event_handler(worker->async.mode, wiface->event_fd, + 0, ucp_worker_iface_event, wiface, + &worker->async); + if (status != UCS_OK) { + ucs_fatal("failed to register event handler: %s", + ucs_status_string(status)); + } + } + + /* Set active message handlers */ + if ((wiface->attr.cap.flags & (UCT_IFACE_FLAG_AM_SHORT| + UCT_IFACE_FLAG_AM_BCOPY| + UCT_IFACE_FLAG_AM_ZCOPY))) + { + status = uct_iface_set_am_tracer(wiface->iface, ucp_worker_am_tracer, + worker); + if (status != UCS_OK) { + goto out_close_iface; + } + + if (context->config.ext.adaptive_progress && + (wiface->attr.cap.flags & UCP_WORKER_UCT_RECV_EVENT_CAP_FLAGS)) + { + ucp_worker_iface_deactivate(wiface, 1); + } else { + ucp_worker_iface_activate(wiface, 0); + } + } + + context->mem_type_access_tls[context->tl_mds[resource->md_index]. + attr.cap.access_mem_type] |= UCS_BIT(tl_id); + + return UCS_OK; + +out_close_iface: + ucp_worker_uct_iface_close(wiface); + return status; +} + +void ucp_worker_iface_cleanup(ucp_worker_iface_t *wiface) +{ + ucs_status_t status; + + uct_worker_progress_unregister_safe(wiface->worker->uct, + &wiface->check_events_id); + + ucp_worker_iface_disarm(wiface); + + if ((wiface->event_fd != -1) && + (wiface->attr.cap.flags & UCP_WORKER_UCT_ALL_EVENT_CAP_FLAGS)) { + status = ucs_async_remove_handler(wiface->event_fd, 1); + if (status != UCS_OK) { + ucs_warn("failed to remove event handler for fd %d: %s", + wiface->event_fd, ucs_status_string(status)); + } + } + + ucp_worker_uct_iface_close(wiface); + ucs_free(wiface); +} + +static void ucp_worker_close_cms(ucp_worker_h worker) +{ + const ucp_rsc_index_t num_cms = ucp_worker_num_cm_cmpts(worker); + ucp_rsc_index_t i; + + for (i = 0; (i < num_cms) && (worker->cms[i].cm != NULL); ++i) { + uct_cm_close(worker->cms[i].cm); + } + + ucs_free(worker->cms); + worker->cms = NULL; +} + +static ucs_status_t ucp_worker_add_resource_cms(ucp_worker_h worker) +{ + ucp_context_h context = worker->context; + uct_cm_config_t *cm_config; + uct_component_h cmpt; + ucp_rsc_index_t cmpt_index, cm_cmpt_index, i; + ucs_status_t status; + + if (!ucp_worker_sockaddr_is_cm_proto(worker)) { + worker->cms = NULL; + return UCS_OK; + } + + UCS_ASYNC_BLOCK(&worker->async); + + worker->cms = ucs_calloc(ucp_worker_num_cm_cmpts(worker), + sizeof(*worker->cms), "ucp cms"); + if (worker->cms == NULL) { + ucs_error("can't allocate CMs array"); + status = UCS_ERR_NO_MEMORY; + goto out; + } + + for (i = 0, cm_cmpt_index = 0; cm_cmpt_index < context->config.num_cm_cmpts; + ++cm_cmpt_index) { + cmpt_index = context->config.cm_cmpt_idxs[cm_cmpt_index]; + cmpt = context->tl_cmpts[cmpt_index].cmpt; + + status = uct_cm_config_read(cmpt, NULL, NULL, &cm_config); + if (status != UCS_OK) { + ucs_error("failed to read cm configuration on component %s", + context->tl_cmpts[cmpt_index].attr.name); + goto err_free_cms; + } + + status = uct_cm_open(cmpt, worker->uct, cm_config, &worker->cms[i].cm); + if (status != UCS_OK) { + ucs_error("failed to open CM on component %s with status %s", + context->tl_cmpts[cmpt_index].attr.name, + ucs_status_string(status)); + goto err_free_cms; + } + + uct_config_release(cm_config); + worker->cms[i++].cmpt_idx = cmpt_index; + } + + status = UCS_OK; + goto out; + +err_free_cms: + ucp_worker_close_cms(worker); +out: + UCS_ASYNC_UNBLOCK(&worker->async); + return status; +} + +static void ucp_worker_enable_atomic_tl(ucp_worker_h worker, const char *mode, + ucp_rsc_index_t rsc_index) +{ + ucs_assert(rsc_index != UCP_NULL_RESOURCE); + ucs_trace("worker %p: using %s atomics on iface[%d]=" UCT_TL_RESOURCE_DESC_FMT, + worker, mode, rsc_index, + UCT_TL_RESOURCE_DESC_ARG(&worker->context->tl_rscs[rsc_index].tl_rsc)); + worker->atomic_tls |= UCS_BIT(rsc_index); +} + +static void ucp_worker_init_cpu_atomics(ucp_worker_h worker) +{ + ucp_rsc_index_t iface_id; + ucp_worker_iface_t *wiface; + + ucs_debug("worker %p: using cpu atomics", worker); + + /* Enable all interfaces which have host-based atomics */ + for (iface_id = 0; iface_id < worker->num_ifaces; ++iface_id) { + wiface = worker->ifaces[iface_id]; + if (wiface->attr.cap.flags & UCT_IFACE_FLAG_ATOMIC_CPU) { + ucp_worker_enable_atomic_tl(worker, "cpu", wiface->rsc_index); + } + } +} + +static void ucp_worker_init_device_atomics(ucp_worker_h worker) +{ + ucp_context_h context = worker->context; + ucp_address_iface_attr_t dummy_iface_attr; + ucp_tl_resource_desc_t *rsc, *best_rsc; + uct_iface_attr_t *iface_attr; + ucp_rsc_index_t rsc_index; + ucp_rsc_index_t iface_id; + uint64_t iface_cap_flags; + double score, best_score; + ucp_rsc_index_t md_index; + ucp_worker_iface_t *wiface; + uct_md_attr_t *md_attr; + uint64_t supp_tls; + uint8_t priority, best_priority; + ucp_tl_iface_atomic_flags_t atomic; + + ucp_context_uct_atomic_iface_flags(context, &atomic); + + iface_cap_flags = UCT_IFACE_FLAG_ATOMIC_DEVICE; + + dummy_iface_attr.bandwidth.dedicated = 1e12; + dummy_iface_attr.bandwidth.shared = 0; + dummy_iface_attr.cap_flags = UINT64_MAX; + dummy_iface_attr.overhead = 0; + dummy_iface_attr.priority = 0; + dummy_iface_attr.lat_ovh = 0; + + supp_tls = 0; + best_score = -1; + best_rsc = NULL; + best_priority = 0; + + /* Select best interface for atomics device */ + for (iface_id = 0; iface_id < worker->num_ifaces; ++iface_id) { + wiface = worker->ifaces[iface_id]; + rsc_index = wiface->rsc_index; + rsc = &context->tl_rscs[rsc_index]; + md_index = rsc->md_index; + md_attr = &context->tl_mds[md_index].attr; + iface_attr = &wiface->attr; + + if (!(md_attr->cap.flags & UCT_MD_FLAG_REG) || + !ucs_test_all_flags(iface_attr->cap.flags, iface_cap_flags) || + !ucs_test_all_flags(iface_attr->cap.atomic32.op_flags, atomic.atomic32.op_flags) || + !ucs_test_all_flags(iface_attr->cap.atomic32.fop_flags, atomic.atomic32.fop_flags) || + !ucs_test_all_flags(iface_attr->cap.atomic64.op_flags, atomic.atomic64.op_flags) || + !ucs_test_all_flags(iface_attr->cap.atomic64.fop_flags, atomic.atomic64.fop_flags)) + { + continue; + } + + supp_tls |= UCS_BIT(rsc_index); + priority = iface_attr->priority; + + score = ucp_wireup_amo_score_func(context, md_attr, iface_attr, + &dummy_iface_attr); + if (ucp_is_scalable_transport(worker->context, + iface_attr->max_num_eps) && + ((score > best_score) || + ((score == best_score) && (priority > best_priority)))) + { + best_rsc = rsc; + best_score = score; + best_priority = priority; + } + } + + if (best_rsc == NULL) { + ucs_debug("worker %p: no support for atomics", worker); + return; + } + + ucs_debug("worker %p: using device atomics", worker); + + /* Enable atomics on all resources using same device as the "best" resource */ + ucs_for_each_bit(rsc_index, context->tl_bitmap) { + rsc = &context->tl_rscs[rsc_index]; + if ((supp_tls & UCS_BIT(rsc_index)) && + (rsc->md_index == best_rsc->md_index) && + !strncmp(rsc->tl_rsc.dev_name, best_rsc->tl_rsc.dev_name, + UCT_DEVICE_NAME_MAX)) + { + ucp_worker_enable_atomic_tl(worker, "device", rsc_index); + } + } +} + +static void ucp_worker_init_guess_atomics(ucp_worker_h worker) +{ + uint64_t accumulated_flags = 0; + ucp_rsc_index_t iface_id; + + for (iface_id = 0; iface_id < worker->num_ifaces; ++iface_id) { + if (ucp_is_scalable_transport(worker->context, + worker->ifaces[iface_id]->attr.max_num_eps)) { + accumulated_flags |= worker->ifaces[iface_id]->attr.cap.flags; + } + } + + if (accumulated_flags & UCT_IFACE_FLAG_ATOMIC_DEVICE) { + ucp_worker_init_device_atomics(worker); + } else { + ucp_worker_init_cpu_atomics(worker); + } +} + +static void ucp_worker_init_atomic_tls(ucp_worker_h worker) +{ + ucp_context_h context = worker->context; + + worker->atomic_tls = 0; + + if (context->config.features & UCP_FEATURE_AMO) { + switch(context->config.ext.atomic_mode) { + case UCP_ATOMIC_MODE_CPU: + ucp_worker_init_cpu_atomics(worker); + break; + case UCP_ATOMIC_MODE_DEVICE: + ucp_worker_init_device_atomics(worker); + break; + case UCP_ATOMIC_MODE_GUESS: + ucp_worker_init_guess_atomics(worker); + break; + default: + ucs_fatal("unsupported atomic mode: %d", + context->config.ext.atomic_mode); + } + } +} + +static char* ucp_worker_add_feature_rsc(ucp_context_h context, + const ucp_ep_config_key_t *key, + ucp_lane_map_t lanes_bitmap, + const char *feature_str, + char *buf, size_t max) +{ + char *p = buf; + char *endp = buf + max; + int sep = 0; + ucp_rsc_index_t rsc_idx; + ucp_lane_index_t lane; + + if (!lanes_bitmap) { + return p; + } + + snprintf(p, endp - p, "%s(", feature_str); + p += strlen(p); + + ucs_for_each_bit(lane, lanes_bitmap) { + ucs_assert(lane < UCP_MAX_LANES); /* make coverity happy */ + rsc_idx = key->lanes[lane].rsc_index; + snprintf(p, endp - p, "%*s"UCT_TL_RESOURCE_DESC_FMT, sep, "", + UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[rsc_idx].tl_rsc)); + p += strlen(p); + sep = 1; /* add space between tl names */ + } + + snprintf(p, endp - p, "); "); + p += strlen(p); + + return p; +} + +static void ucp_worker_print_used_tls(const ucp_ep_config_key_t *key, + ucp_context_h context, + ucp_ep_cfg_index_t config_idx) +{ + char info[256] = {0}; + ucp_lane_map_t tag_lanes_map = 0; + ucp_lane_map_t rma_lanes_map = 0; + ucp_lane_map_t amo_lanes_map = 0; + ucp_lane_map_t stream_lanes_map = 0; + ucp_lane_index_t lane; + char *p, *endp; + + if (!ucs_log_is_enabled(UCS_LOG_LEVEL_INFO)) { + return; + } + + p = info; + endp = p + sizeof(info); + + snprintf(p, endp - p, "ep_cfg[%d]: ", config_idx); + p += strlen(p); + + for (lane = 0; lane < key->num_lanes; ++lane) { + if (((key->am_lane == lane) || (lane == key->tag_lane) || + (ucp_ep_config_get_multi_lane_prio(key->am_bw_lanes, lane) >= 0) || + (ucp_ep_config_get_multi_lane_prio(key->rma_bw_lanes, lane) >= 0)) && + (context->config.features & UCP_FEATURE_TAG)) { + tag_lanes_map |= UCS_BIT(lane); + } + + if ((key->am_lane == lane) && + (context->config.features & UCP_FEATURE_STREAM)) { + stream_lanes_map |= UCS_BIT(lane); + } + + if ((ucp_ep_config_get_multi_lane_prio(key->rma_lanes, lane) >= 0)) { + rma_lanes_map |= UCS_BIT(lane); + } + + if ((ucp_ep_config_get_multi_lane_prio(key->amo_lanes, lane) >= 0)) { + amo_lanes_map |= UCS_BIT(lane); + } + } + + p = ucp_worker_add_feature_rsc(context, key, tag_lanes_map, "tag", + p, endp - p); + p = ucp_worker_add_feature_rsc(context, key, rma_lanes_map, "rma", + p, endp - p); + p = ucp_worker_add_feature_rsc(context, key, amo_lanes_map, "amo", + p, endp - p); + ucp_worker_add_feature_rsc(context, key, stream_lanes_map, "stream", + p, endp - p); + ucs_info("%s", info); +} + +static ucs_status_t ucp_worker_init_mpools(ucp_worker_h worker) +{ + size_t max_mp_entry_size = 0; + ucp_context_t *context = worker->context; + uct_iface_attr_t *if_attr; + ucp_rsc_index_t iface_id; + ucs_status_t status; + + for (iface_id = 0; iface_id < worker->num_ifaces; ++iface_id) { + if_attr = &worker->ifaces[iface_id]->attr; + max_mp_entry_size = ucs_max(max_mp_entry_size, + if_attr->cap.am.max_short); + max_mp_entry_size = ucs_max(max_mp_entry_size, + if_attr->cap.am.max_bcopy); + max_mp_entry_size = ucs_max(max_mp_entry_size, + if_attr->cap.am.max_zcopy); + } + + status = ucs_mpool_init(&worker->am_mp, 0, + max_mp_entry_size + UCP_WORKER_HEADROOM_SIZE, + 0, UCS_SYS_CACHE_LINE_SIZE, 128, UINT_MAX, + &ucp_am_mpool_ops, "ucp_am_bufs"); + if (status != UCS_OK) { + goto out; + } + + status = ucs_mpool_init(&worker->reg_mp, 0, + context->config.ext.seg_size + sizeof(ucp_mem_desc_t), + sizeof(ucp_mem_desc_t), UCS_SYS_CACHE_LINE_SIZE, + 128, UINT_MAX, &ucp_reg_mpool_ops, "ucp_reg_bufs"); + if (status != UCS_OK) { + goto err_release_am_mpool; + } + + status = ucs_mpool_init(&worker->rndv_frag_mp, 0, + context->config.ext.rndv_frag_size + sizeof(ucp_mem_desc_t), + sizeof(ucp_mem_desc_t), UCS_SYS_CACHE_LINE_SIZE, 128, + UINT_MAX, &ucp_frag_mpool_ops, "ucp_rndv_frags"); + if (status != UCS_OK) { + goto err_release_reg_mpool; + } + + return UCS_OK; + +err_release_reg_mpool: + ucs_mpool_cleanup(&worker->reg_mp, 0); +err_release_am_mpool: + ucs_mpool_cleanup(&worker->am_mp, 0); +out: + return status; +} + +/* All the ucp endpoints will share the configurations. No need for every ep to + * have it's own configuration (to save memory footprint). Same config can be used + * by different eps. + * A 'key' identifies an entry in the ep_config array. An entry holds the key and + * additional configuration parameters and thresholds. + */ +ucs_status_t ucp_worker_get_ep_config(ucp_worker_h worker, + const ucp_ep_config_key_t *key, + int print_cfg, + ucp_ep_cfg_index_t *config_idx_p) +{ + ucp_ep_cfg_index_t config_idx; + ucs_status_t status; + + /* Search for the given key in the ep_config array */ + for (config_idx = 0; config_idx < worker->ep_config_count; ++config_idx) { + if (ucp_ep_config_is_equal(&worker->ep_config[config_idx].key, key)) { + goto out; + } + } + + if (worker->ep_config_count >= worker->ep_config_max) { + /* TODO support larger number of configurations */ + ucs_fatal("too many ep configurations: %d", worker->ep_config_count); + } + + /* Create new configuration */ + config_idx = worker->ep_config_count++; + status = ucp_ep_config_init(worker, &worker->ep_config[config_idx], key); + if (status != UCS_OK) { + return status; + } + + if (print_cfg) { + ucp_worker_print_used_tls(key, worker->context, config_idx); + } + +out: + *config_idx_p = config_idx; + return UCS_OK; +} + +static ucs_mpool_ops_t ucp_rkey_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = NULL, + .obj_cleanup = NULL +}; + +ucs_status_t ucp_worker_create(ucp_context_h context, + const ucp_worker_params_t *params, + ucp_worker_h *worker_p) +{ + ucs_thread_mode_t uct_thread_mode; + unsigned config_count; + unsigned name_length; + ucp_worker_h worker; + ucs_status_t status; + + config_count = ucs_min((context->num_tls + 1) * (context->num_tls + 1) * context->num_tls, + UINT8_MAX); + + worker = ucs_calloc(1, sizeof(*worker) + + sizeof(*worker->ep_config) * config_count, + "ucp worker"); + if (worker == NULL) { + return UCS_ERR_NO_MEMORY; + } + + uct_thread_mode = UCS_THREAD_MODE_SINGLE; + worker->flags = 0; + + if (params->field_mask & UCP_WORKER_PARAM_FIELD_THREAD_MODE) { +#if ENABLE_MT + if (params->thread_mode != UCS_THREAD_MODE_SINGLE) { + /* UCT is serialized by UCP lock or by UCP user */ + uct_thread_mode = UCS_THREAD_MODE_SERIALIZED; + } + + if (params->thread_mode == UCS_THREAD_MODE_MULTI) { + worker->flags |= UCP_WORKER_FLAG_MT; + } +#else + if (params->thread_mode != UCS_THREAD_MODE_SINGLE) { + ucs_debug("forced single thread mode on worker create"); + } +#endif + } + + worker->context = context; + worker->uuid = ucs_generate_uuid((uintptr_t)worker); + worker->flush_ops_count = 0; + worker->inprogress = 0; + worker->ep_config_max = config_count; + worker->ep_config_count = 0; + worker->num_active_ifaces = 0; + worker->num_ifaces = 0; + worker->am_message_id = ucs_generate_uuid(0); + ucs_list_head_init(&worker->arm_ifaces); + ucs_list_head_init(&worker->stream_ready_eps); + ucs_list_head_init(&worker->all_eps); + ucp_ep_match_init(&worker->ep_match_ctx); + + UCS_STATIC_ASSERT(sizeof(ucp_ep_ext_gen_t) <= sizeof(ucp_ep_t)); + if (context->config.features & (UCP_FEATURE_STREAM | UCP_FEATURE_AM)) { + UCS_STATIC_ASSERT(sizeof(ucp_ep_ext_proto_t) <= sizeof(ucp_ep_t)); + ucs_strided_alloc_init(&worker->ep_alloc, sizeof(ucp_ep_t), 3); + } else { + ucs_strided_alloc_init(&worker->ep_alloc, sizeof(ucp_ep_t), 2); + } + + if (params->field_mask & UCP_WORKER_PARAM_FIELD_USER_DATA) { + worker->user_data = params->user_data; + } else { + worker->user_data = NULL; + } + + if (context->config.features & UCP_FEATURE_AM){ + worker->am_cbs = NULL; + worker->am_cb_array_len = 0; + } + name_length = ucs_min(UCP_WORKER_NAME_MAX, + context->config.ext.max_worker_name + 1); + ucs_snprintf_zero(worker->name, name_length, "%s:%d", ucs_get_host_name(), + getpid()); + + /* Create statistics */ + status = UCS_STATS_NODE_ALLOC(&worker->stats, &ucp_worker_stats_class, + ucs_stats_get_root(), "-%p", worker); + if (status != UCS_OK) { + goto err_free; + } + + status = UCS_STATS_NODE_ALLOC(&worker->tm_offload_stats, + &ucp_worker_tm_offload_stats_class, + worker->stats); + if (status != UCS_OK) { + goto err_free_stats; + } + + status = ucs_async_context_init(&worker->async, + context->config.ext.use_mt_mutex ? + UCS_ASYNC_MODE_THREAD_MUTEX : + UCS_ASYNC_THREAD_LOCK_TYPE); + if (status != UCS_OK) { + goto err_free_tm_offload_stats; + } + + /* Create the underlying UCT worker */ + status = uct_worker_create(&worker->async, uct_thread_mode, &worker->uct); + if (status != UCS_OK) { + goto err_destroy_async; + } + + /* Create memory pool for requests */ + status = ucs_mpool_init(&worker->req_mp, 0, + sizeof(ucp_request_t) + context->config.request.size, + 0, UCS_SYS_CACHE_LINE_SIZE, 128, UINT_MAX, + &ucp_request_mpool_ops, "ucp_requests"); + if (status != UCS_OK) { + goto err_destroy_uct_worker; + } + + /* create memory pool for small rkeys */ + status = ucs_mpool_init(&worker->rkey_mp, 0, + sizeof(ucp_rkey_t) + + sizeof(ucp_tl_rkey_t) * UCP_RKEY_MPOOL_MAX_MD, + 0, UCS_SYS_CACHE_LINE_SIZE, 128, UINT_MAX, + &ucp_rkey_mpool_ops, "ucp_rkeys"); + if (status != UCS_OK) { + goto err_req_mp_cleanup; + } + + /* Create UCS event set which combines events from all transports */ + status = ucp_worker_wakeup_init(worker, params); + if (status != UCS_OK) { + goto err_rkey_mp_cleanup; + } + + if (params->field_mask & UCP_WORKER_PARAM_FIELD_CPU_MASK) { + worker->cpu_mask = params->cpu_mask; + } else { + UCS_CPU_ZERO(&worker->cpu_mask); + } + + /* Initialize tag matching */ + status = ucp_tag_match_init(&worker->tm); + if (status != UCS_OK) { + goto err_wakeup_cleanup; + } + + /* Open all resources as interfaces on this worker */ + status = ucp_worker_add_resource_ifaces(worker); + if (status != UCS_OK) { + goto err_close_ifaces; + } + + /* Open all resources as connection managers on this worker */ + status = ucp_worker_add_resource_cms(worker); + if (status != UCS_OK) { + goto err_close_cms; + } + + /* create mem type endponts */ + status = ucp_worker_create_mem_type_endpoints(worker); + if (status != UCS_OK) { + goto err_close_cms; + } + + /* Init AM and registered memory pools */ + status = ucp_worker_init_mpools(worker); + if (status != UCS_OK) { + goto err_close_cms; + } + + /* Select atomic resources */ + ucp_worker_init_atomic_tls(worker); + + /* At this point all UCT memory domains and interfaces are already created + * so warn about unused environment variables. + */ + ucs_config_parser_warn_unused_env_vars_once(); + + *worker_p = worker; + return UCS_OK; + +err_close_cms: + ucp_worker_close_cms(worker); +err_close_ifaces: + ucp_worker_close_ifaces(worker); + ucp_tag_match_cleanup(&worker->tm); +err_wakeup_cleanup: + ucp_worker_wakeup_cleanup(worker); +err_rkey_mp_cleanup: + ucs_mpool_cleanup(&worker->rkey_mp, 1); +err_req_mp_cleanup: + ucs_mpool_cleanup(&worker->req_mp, 1); +err_destroy_uct_worker: + uct_worker_destroy(worker->uct); +err_destroy_async: + ucs_async_context_cleanup(&worker->async); +err_free_tm_offload_stats: + UCS_STATS_NODE_FREE(worker->tm_offload_stats); +err_free_stats: + UCS_STATS_NODE_FREE(worker->stats); +err_free: + ucs_strided_alloc_cleanup(&worker->ep_alloc); + ucs_free(worker); + return status; +} + +static void ucp_worker_destroy_eps(ucp_worker_h worker) +{ + ucp_ep_ext_gen_t *ep_ext, *tmp; + + ucs_debug("worker %p: destroy all endpoints", worker); + ucs_list_for_each_safe(ep_ext, tmp, &worker->all_eps, ep_list) { + ucp_ep_disconnected(ucp_ep_from_ext_gen(ep_ext), 1); + } +} + +static void ucp_worker_destroy_ep_configs(ucp_worker_h worker) +{ + unsigned i; + + for (i = 0; i < worker->ep_config_count; ++i) { + ucp_ep_config_cleanup(worker, &worker->ep_config[i]); + } + + worker->ep_config_count = 0; +} + +void ucp_worker_destroy(ucp_worker_h worker) +{ + ucs_trace_func("worker=%p", worker); + + UCS_ASYNC_BLOCK(&worker->async); + ucs_free(worker->am_cbs); + ucp_worker_destroy_eps(worker); + ucp_worker_remove_am_handlers(worker); + ucp_worker_close_cms(worker); + UCS_ASYNC_UNBLOCK(&worker->async); + + ucp_worker_destroy_ep_configs(worker); + ucs_mpool_cleanup(&worker->am_mp, 1); + ucs_mpool_cleanup(&worker->reg_mp, 1); + ucs_mpool_cleanup(&worker->rndv_frag_mp, 1); + ucp_worker_close_ifaces(worker); + ucp_tag_match_cleanup(&worker->tm); + ucp_worker_wakeup_cleanup(worker); + ucs_mpool_cleanup(&worker->rkey_mp, 1); + ucs_mpool_cleanup(&worker->req_mp, 1); + uct_worker_destroy(worker->uct); + ucs_async_context_cleanup(&worker->async); + ucp_ep_match_cleanup(&worker->ep_match_ctx); + ucs_strided_alloc_cleanup(&worker->ep_alloc); + UCS_STATS_NODE_FREE(worker->tm_offload_stats); + UCS_STATS_NODE_FREE(worker->stats); + ucs_free(worker); +} + +ucs_status_t ucp_worker_query(ucp_worker_h worker, + ucp_worker_attr_t *attr) +{ + ucp_context_h context = worker->context; + ucs_status_t status = UCS_OK; + uint64_t tl_bitmap; + ucp_rsc_index_t tl_id; + + if (attr->field_mask & UCP_WORKER_ATTR_FIELD_THREAD_MODE) { + if (worker->flags & UCP_WORKER_FLAG_MT) { + attr->thread_mode = UCS_THREAD_MODE_MULTI; + } else { + attr->thread_mode = UCS_THREAD_MODE_SINGLE; + } + } + + if (attr->field_mask & UCP_WORKER_ATTR_FIELD_ADDRESS) { + /* If UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS is not set, + * pack all tl adresses */ + tl_bitmap = UINT64_MAX; + + if (attr->field_mask & UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS) { + if (attr->address_flags & UCP_WORKER_ADDRESS_FLAG_NET_ONLY) { + tl_bitmap = 0; + ucs_for_each_bit(tl_id, context->tl_bitmap) { + if (context->tl_rscs[tl_id].tl_rsc.dev_type == UCT_DEVICE_TYPE_NET) { + tl_bitmap |= UCS_BIT(tl_id); + } + } + } + } + + status = ucp_address_pack(worker, NULL, tl_bitmap, + UCP_ADDRESS_PACK_FLAG_ALL, NULL, + &attr->address_length, + (void**)&attr->address); + } + + return status; +} + +unsigned ucp_worker_progress(ucp_worker_h worker) +{ + unsigned count; + + /* worker->inprogress is used only for assertion check. + * coverity[assert_side_effect] + */ + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + /* check that ucp_worker_progress is not called from within ucp_worker_progress */ + ucs_assert(worker->inprogress++ == 0); + count = uct_worker_progress(worker->uct); + ucs_async_check_miss(&worker->async); + + /* coverity[assert_side_effect] */ + ucs_assert(--worker->inprogress == 0); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + + return count; +} + +ssize_t ucp_stream_worker_poll(ucp_worker_h worker, + ucp_stream_poll_ep_t *poll_eps, + size_t max_eps, unsigned flags) +{ + ssize_t count = 0; + ucp_ep_ext_proto_t *ep_ext; + ucp_ep_h ep; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_STREAM, + return UCS_ERR_INVALID_PARAM); + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + while ((count < max_eps) && !ucs_list_is_empty(&worker->stream_ready_eps)) { + ep_ext = ucp_stream_worker_dequeue_ep_head(worker); + ep = ucp_ep_from_ext_proto(ep_ext); + poll_eps[count].ep = ep; + poll_eps[count].user_data = ucp_ep_ext_gen(ep)->user_data; + ++count; + } + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + + return count; +} + +ucs_status_t ucp_worker_get_efd(ucp_worker_h worker, int *fd) +{ + ucs_status_t status; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_WAKEUP, + return UCS_ERR_INVALID_PARAM); + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + if (worker->flags & UCP_WORKER_FLAG_EXTERNAL_EVENT_FD) { + status = UCS_ERR_UNSUPPORTED; + } else { + *fd = worker->event_fd; + status = UCS_OK; + } + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + return status; +} + +ucs_status_t ucp_worker_arm(ucp_worker_h worker) +{ + ucp_worker_iface_t *wiface; + ucs_status_t status; + uint64_t dummy; + int ret; + + ucs_trace_func("worker=%p", worker); + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_WAKEUP, + return UCS_ERR_INVALID_PARAM); + + /* Read from event pipe. If some events are found, return BUSY, + * Otherwise, continue to arm the transport interfaces. + */ + do { + ret = read(worker->eventfd, &dummy, sizeof(dummy)); + if (ret == sizeof(dummy)) { + status = UCS_ERR_BUSY; + goto out; + } else if (ret == -1) { + if (errno == EAGAIN) { + break; /* No more events */ + } else if (errno != EINTR) { + ucs_error("Read from internal event fd failed: %m"); + status = UCS_ERR_IO_ERROR; + goto out; + } + } else { + ucs_assert(ret == 0); + } + } while (ret != 0); + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + /* Go over arm_list of active interfaces which support events and arm them */ + ucs_list_for_each(wiface, &worker->arm_ifaces, arm_list) { + ucs_assert(wiface->activate_count > 0); + status = uct_iface_event_arm(wiface->iface, worker->uct_events); + ucs_trace("arm iface %p returned %s", wiface->iface, + ucs_status_string(status)); + if (status != UCS_OK) { + goto out_unlock; + } + } + + status = UCS_OK; + +out_unlock: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); +out: + ucs_trace("ucp_worker_arm returning %s", ucs_status_string(status)); + return status; +} + +void ucp_worker_wait_mem(ucp_worker_h worker, void *address) +{ + ucs_arch_wait_mem(address); +} + +ucs_status_t ucp_worker_wait(ucp_worker_h worker) +{ + ucp_worker_iface_t *wiface; + struct pollfd *pfd; + ucs_status_t status; + nfds_t nfds; + int ret; + + ucs_trace_func("worker %p", worker); + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_WAKEUP, + return UCS_ERR_INVALID_PARAM); + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + status = ucp_worker_arm(worker); + if (status == UCS_ERR_BUSY) { /* if UCS_ERR_BUSY returned - no poll() must called */ + status = UCS_OK; + goto out_unlock; + } else if (status != UCS_OK) { + goto out_unlock; + } + + if (worker->flags & UCP_WORKER_FLAG_EXTERNAL_EVENT_FD) { + pfd = ucs_alloca(sizeof(*pfd) * worker->context->num_tls); + nfds = 0; + ucs_list_for_each(wiface, &worker->arm_ifaces, arm_list) { + pfd[nfds].fd = wiface->event_fd; + pfd[nfds].events = POLLIN; + ++nfds; + } + } else { + pfd = ucs_alloca(sizeof(*pfd)); + pfd->fd = worker->event_fd; + pfd->events = POLLIN; + nfds = 1; + } + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + + /* poll is thread safe system call, though can have unpredictable results + * because of using the same descriptor in multiple threads. + */ + for (;;) { + ret = poll(pfd, nfds, -1); + if (ret >= 0) { + ucs_assertv(ret == 1, "ret=%d", ret); + status = UCS_OK; + goto out; + } else { + if (errno != EINTR) { + ucs_error("poll(nfds=%d) returned %d: %m", (int)nfds, ret); + status = UCS_ERR_IO_ERROR; + goto out; + } + } + } + +out_unlock: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); +out: + return status; +} + +ucs_status_t ucp_worker_signal(ucp_worker_h worker) +{ + ucs_trace_func("worker %p", worker); + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_WAKEUP, + return UCS_ERR_INVALID_PARAM); + return ucp_worker_wakeup_signal_fd(worker); +} + +ucs_status_t ucp_worker_get_address(ucp_worker_h worker, ucp_address_t **address_p, + size_t *address_length_p) +{ + ucs_status_t status; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + status = ucp_address_pack(worker, NULL, UINT64_MAX, + UCP_ADDRESS_PACK_FLAG_ALL, NULL, + address_length_p, (void**)address_p); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + + return status; +} + +void ucp_worker_release_address(ucp_worker_h worker, ucp_address_t *address) +{ + ucs_free(address); +} + + +void ucp_worker_print_info(ucp_worker_h worker, FILE *stream) +{ + ucp_context_h context = worker->context; + ucp_address_t *address; + size_t address_length; + ucs_status_t status; + ucp_rsc_index_t rsc_index; + int first; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + fprintf(stream, "#\n"); + fprintf(stream, "# UCP worker '%s'\n", ucp_worker_get_name(worker)); + fprintf(stream, "#\n"); + + status = ucp_worker_get_address(worker, &address, &address_length); + if (status == UCS_OK) { + ucp_worker_release_address(worker, address); + fprintf(stream, "# address: %zu bytes\n", address_length); + } else { + fprintf(stream, "# \n"); + } + + if (context->config.features & UCP_FEATURE_AMO) { + fprintf(stream, "# atomics: "); + first = 1; + for (rsc_index = 0; rsc_index < worker->context->num_tls; ++rsc_index) { + if (worker->atomic_tls & UCS_BIT(rsc_index)) { + if (!first) { + fprintf(stream, ", "); + } + fprintf(stream, "%d:"UCT_TL_RESOURCE_DESC_FMT, rsc_index, + UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[rsc_index].tl_rsc)); + first = 0; + } + } + fprintf(stream, "\n"); + } + + fprintf(stream, "#\n"); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); +} diff --git a/src/ucp/core/ucp_worker.h b/src/ucp/core/ucp_worker.h new file mode 100644 index 0000000..6281d66 --- /dev/null +++ b/src/ucp/core/ucp_worker.h @@ -0,0 +1,352 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_WORKER_H_ +#define UCP_WORKER_H_ + +#include "ucp_ep.h" +#include "ucp_context.h" +#include "ucp_thread.h" + +#include +#include +#include +#include +#include +#include + + +/* The size of the private buffer in UCT descriptor headroom, which UCP may + * use for its own needs. This size does not include ucp_recv_desc_t length, + * because it is common for all cases and protocols (TAG, STREAM). */ +#define UCP_WORKER_HEADROOM_PRIV_SIZE 32 + + +#if ENABLE_MT + +#define UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(_worker) \ + do { \ + if ((_worker)->flags & UCP_WORKER_FLAG_MT) { \ + UCS_ASYNC_BLOCK(&(_worker)->async); \ + } \ + } while (0) + + +#define UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(_worker) \ + do { \ + if ((_worker)->flags & UCP_WORKER_FLAG_MT) { \ + UCS_ASYNC_UNBLOCK(&(_worker)->async); \ + } \ + } while (0) + + +#else + +#define UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(_worker) +#define UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(_worker) + +#endif + + +/** + * UCP worker flags + */ +enum { + UCP_WORKER_FLAG_EXTERNAL_EVENT_FD = UCS_BIT(0), /**< worker event fd is external */ + UCP_WORKER_FLAG_EDGE_TRIGGERED = UCS_BIT(1), /**< events are edge-triggered */ + UCP_WORKER_FLAG_MT = UCS_BIT(2) /**< MT locking is required */ +}; + + +/** + * UCP iface flags + */ +enum { + UCP_WORKER_IFACE_FLAG_OFFLOAD_ACTIVATED = UCS_BIT(0), /**< UCP iface receives tag + offload messages */ + UCP_WORKER_IFACE_FLAG_ON_ARM_LIST = UCS_BIT(1), /**< UCP iface is an element + of arm_ifaces list, so + it needs to be armed + in ucp_worker_arm(). */ + UCP_WORKER_IFACE_FLAG_UNUSED = UCS_BIT(2) /**< There is another UCP iface + with the same caps, but + with better performance */ +}; + + +/** + * UCP worker statistics counters + */ +enum { + /* Total number of received eager messages */ + UCP_WORKER_STAT_TAG_RX_EAGER_MSG, + UCP_WORKER_STAT_TAG_RX_EAGER_SYNC_MSG, + + /* Total number of received eager chunks (every message + * can be split into a bunch of chunks). It is possible that + * some chunks of the message arrived unexpectedly and then + * receive had been posted and the rest arrived expectedly */ + UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_EXP, + UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP, + + UCP_WORKER_STAT_TAG_RX_RNDV_EXP, + UCP_WORKER_STAT_TAG_RX_RNDV_UNEXP, + UCP_WORKER_STAT_LAST +}; + + +/** + * UCP worker tag offload statistics counters + */ +enum { + UCP_WORKER_STAT_TAG_OFFLOAD_POSTED, + UCP_WORKER_STAT_TAG_OFFLOAD_MATCHED, + UCP_WORKER_STAT_TAG_OFFLOAD_MATCHED_SW_RNDV, + UCP_WORKER_STAT_TAG_OFFLOAD_CANCELED, + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_TAG_EXCEED, + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_NON_CONTIG, + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_WILDCARD, + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_SW_PEND, + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_NO_IFACE, + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_MEM_REG, + UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_EGR, + UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_RNDV, + UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_SW_RNDV, + UCP_WORKER_STAT_TAG_OFFLOAD_LAST +}; + + +#define UCP_WORKER_UCT_RECV_EVENT_ARM_FLAGS (UCT_EVENT_RECV | \ + UCT_EVENT_RECV_SIG) +#define UCP_WORKER_UCT_RECV_EVENT_CAP_FLAGS (UCT_IFACE_FLAG_EVENT_RECV | \ + UCT_IFACE_FLAG_EVENT_RECV_SIG) +#define UCP_WORKER_UCT_ALL_EVENT_CAP_FLAGS (UCT_IFACE_FLAG_EVENT_SEND_COMP | \ + UCT_IFACE_FLAG_EVENT_RECV | \ + UCT_IFACE_FLAG_EVENT_RECV_SIG) +#define UCP_WORKER_UCT_UNSIG_EVENT_CAP_FLAGS (UCT_IFACE_FLAG_EVENT_SEND_COMP | \ + UCT_IFACE_FLAG_EVENT_RECV) + + +#define UCP_WORKER_STAT_EAGER_MSG(_worker, _flags) \ + UCS_STATS_UPDATE_COUNTER((_worker)->stats, \ + (_flags & UCP_RECV_DESC_FLAG_EAGER_SYNC) ? \ + UCP_WORKER_STAT_TAG_RX_EAGER_SYNC_MSG : \ + UCP_WORKER_STAT_TAG_RX_EAGER_MSG, 1); + +#define UCP_WORKER_STAT_EAGER_CHUNK(_worker, _is_exp) \ + UCS_STATS_UPDATE_COUNTER((_worker)->stats, \ + UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_##_is_exp, 1); + +#define UCP_WORKER_STAT_RNDV(_worker, _is_exp) \ + UCS_STATS_UPDATE_COUNTER((_worker)->stats, \ + UCP_WORKER_STAT_TAG_RX_RNDV_##_is_exp, 1); + +#define UCP_WORKER_STAT_TAG_OFFLOAD(_worker, _name) \ + UCS_STATS_UPDATE_COUNTER((_worker)->tm_offload_stats, \ + UCP_WORKER_STAT_TAG_OFFLOAD_##_name, 1); + +#define ucp_worker_mpool_get(_mp) \ + ({ \ + ucp_mem_desc_t *rdesc = ucs_mpool_get_inline((_mp)); \ + if (rdesc != NULL) { \ + VALGRIND_MAKE_MEM_DEFINED(rdesc, sizeof(*rdesc)); \ + } \ + rdesc; \ + }) + + +/** + * UCP worker iface, which encapsulates UCT iface, its attributes and + * some auxiliary info needed for tag matching offloads. + */ +struct ucp_worker_iface { + uct_iface_h iface; /* UCT interface */ + uct_iface_attr_t attr; /* UCT interface attributes */ + ucp_worker_h worker; /* The parent worker */ + ucs_list_link_t arm_list; /* Element in arm_ifaces list */ + ucp_rsc_index_t rsc_index; /* Resource index */ + int event_fd; /* Event FD, or -1 if undefined */ + unsigned activate_count;/* How many times this iface has + been activated */ + int check_events_id;/* Callback id for check_events */ + unsigned proxy_recv_count;/* Counts active messages on proxy handler */ + unsigned post_count; /* Counts uncompleted requests which are + offloaded to the transport */ + uint8_t flags; /* Interface flags */ +}; + + +/** + * UCP worker CM, which encapsulates UCT CM and its auxiliary info. + */ +struct ucp_worker_cm { + uct_cm_h cm; /* UCT CM handle */ + ucp_rsc_index_t cmpt_idx; /* Index of corresponding + component */ +}; + +/** + * Data that is stored about each callback registered with a worker + */ +typedef struct ucp_worker_am_entry { + ucp_am_callback_t cb; + void *context; + uint32_t flags; +} ucp_worker_am_entry_t; + +/** + * UCP worker (thread context). + */ +typedef struct ucp_worker { + unsigned flags; /* Worker flags */ + ucs_async_context_t async; /* Async context for this worker */ + ucp_context_h context; /* Back-reference to UCP context */ + uint64_t uuid; /* Unique ID for wireup */ + uct_worker_h uct; /* UCT worker handle */ + ucs_mpool_t req_mp; /* Memory pool for requests */ + ucs_mpool_t rkey_mp; /* Pool for small memory keys */ + uint64_t atomic_tls; /* Which resources can be used for atomics */ + + int inprogress; + char name[UCP_WORKER_NAME_MAX]; /* Worker name */ + + unsigned flush_ops_count;/* Number of pending operations */ + + int event_fd; /* Allocated (on-demand) event fd for wakeup */ + ucs_sys_event_set_t *event_set; /* Allocated UCS event set for wakeup */ + int eventfd; /* Event fd to support signal() calls */ + unsigned uct_events; /* UCT arm events */ + ucs_list_link_t arm_ifaces; /* List of interfaces to arm */ + + void *user_data; /* User-defined data */ + ucs_strided_alloc_t ep_alloc; /* Endpoint allocator */ + ucs_list_link_t stream_ready_eps; /* List of EPs with received stream data */ + ucs_list_link_t all_eps; /* List of all endpoints */ + ucp_ep_match_ctx_t ep_match_ctx; /* Endpoint-to-endpoint matching context */ + ucp_worker_iface_t **ifaces; /* Array of pointers to interfaces, + one for each resource */ + unsigned num_ifaces; /* Number of elements in ifaces array */ + unsigned num_active_ifaces; /* Number of activated ifaces */ + uint64_t scalable_tl_bitmap; /* Map of scalable tl resources */ + ucp_worker_cm_t *cms; /* Array of CMs, one for each component */ + ucs_mpool_t am_mp; /* Memory pool for AM receives */ + ucs_mpool_t reg_mp; /* Registered memory pool */ + ucs_mpool_t rndv_frag_mp; /* Memory pool for RNDV fragments */ + ucp_tag_match_t tm; /* Tag-matching queues and offload info */ + uint64_t am_message_id; /* For matching long am's */ + ucp_ep_h mem_type_ep[UCS_MEMORY_TYPE_LAST];/* memory type eps */ + + UCS_STATS_NODE_DECLARE(stats) + UCS_STATS_NODE_DECLARE(tm_offload_stats) + + ucp_worker_am_entry_t *am_cbs; /*array of callbacks and their data */ + size_t am_cb_array_len; /*len of callback array */ + + ucs_cpu_set_t cpu_mask; /* Save CPU mask for subsequent calls to ucp_worker_listen */ + unsigned ep_config_max; /* Maximal number of configurations */ + unsigned ep_config_count; /* Current number of configurations */ + ucp_ep_config_t ep_config[0]; /* Array of transport limits and thresholds */ +} ucp_worker_t; + + +/** + * UCP worker argument for the error handling callback + */ +typedef struct ucp_worker_err_handle_arg { + ucp_worker_h worker; + ucp_ep_h ucp_ep; + uct_ep_h uct_ep; + ucp_lane_index_t failed_lane; + ucs_status_t status; +} ucp_worker_err_handle_arg_t; + + +ucs_status_t ucp_worker_get_ep_config(ucp_worker_h worker, + const ucp_ep_config_key_t *key, + int print_cfg, + ucp_ep_cfg_index_t *config_idx_p); + +ucs_status_t ucp_worker_iface_open(ucp_worker_h worker, ucp_rsc_index_t tl_id, + uct_iface_params_t *iface_params, + ucp_worker_iface_t **wiface); + +ucs_status_t ucp_worker_iface_init(ucp_worker_h worker, ucp_rsc_index_t tl_id, + ucp_worker_iface_t *wiface); + +void ucp_worker_iface_cleanup(ucp_worker_iface_t *wiface); + +void ucp_worker_iface_progress_ep(ucp_worker_iface_t *wiface); + +void ucp_worker_iface_unprogress_ep(ucp_worker_iface_t *wiface); + +void ucp_worker_signal_internal(ucp_worker_h worker); + +void ucp_worker_iface_activate(ucp_worker_iface_t *wiface, unsigned uct_flags); + +int ucp_worker_err_handle_remove_filter(const ucs_callbackq_elem_t *elem, + void *arg); +ucs_status_t ucp_worker_set_ep_failed(ucp_worker_h worker, ucp_ep_h ucp_ep, + uct_ep_h uct_ep, ucp_lane_index_t lane, + ucs_status_t status); + +static inline const char* ucp_worker_get_name(ucp_worker_h worker) +{ + return worker->name; +} + +/* get ep by pointer received from remote side, do some debug checks */ +static inline ucp_ep_h ucp_worker_get_ep_by_ptr(ucp_worker_h worker, + uintptr_t ep_ptr) +{ + ucp_ep_h ep = (ucp_ep_h)ep_ptr; + + ucs_assert(ep != NULL); + ucs_assertv(ep->worker == worker, "worker=%p ep=%p ep->worker=%p", worker, + ep, ep->worker); + return ep; +} + +static UCS_F_ALWAYS_INLINE ucp_worker_iface_t* +ucp_worker_iface(ucp_worker_h worker, ucp_rsc_index_t rsc_index) +{ + return (rsc_index == UCP_NULL_RESOURCE) ? NULL : + worker->ifaces[ucs_bitmap2idx(worker->context->tl_bitmap, rsc_index)]; +} + +static UCS_F_ALWAYS_INLINE uct_iface_attr_t* +ucp_worker_iface_get_attr(ucp_worker_h worker, ucp_rsc_index_t rsc_index) +{ + return &ucp_worker_iface(worker, rsc_index)->attr; +} + +static UCS_F_ALWAYS_INLINE double +ucp_worker_iface_bandwidth(ucp_worker_h worker, ucp_rsc_index_t rsc_index) +{ + uct_iface_attr_t *iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + + return ucp_tl_iface_bandwidth(worker->context, &iface_attr->bandwidth); +} + +static UCS_F_ALWAYS_INLINE int +ucp_worker_unified_mode(ucp_worker_h worker) +{ + return worker->context->config.ext.unified_mode; +} + +static UCS_F_ALWAYS_INLINE ucp_rsc_index_t +ucp_worker_num_cm_cmpts(const ucp_worker_h worker) +{ + return worker->context->config.num_cm_cmpts; +} + +static UCS_F_ALWAYS_INLINE int +ucp_worker_sockaddr_is_cm_proto(const ucp_worker_h worker) +{ + return !!ucp_worker_num_cm_cmpts(worker); +} + +#endif diff --git a/src/ucp/dt/dt.c b/src/ucp/dt/dt.c new file mode 100644 index 0000000..b85bcc1 --- /dev/null +++ b/src/ucp/dt/dt.c @@ -0,0 +1,142 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "dt.h" + +#include +#include +#include +#include + + +UCS_PROFILE_FUNC(ucs_status_t, ucp_mem_type_unpack, + (worker, buffer, recv_data, recv_length, mem_type), + ucp_worker_h worker, void *buffer, const void *recv_data, + size_t recv_length, ucs_memory_type_t mem_type) +{ + ucp_ep_h ep = worker->mem_type_ep[mem_type]; + ucp_md_map_t md_map = 0; + ucp_lane_index_t lane; + unsigned md_index; + uct_mem_h memh[1]; + ucs_status_t status; + uct_rkey_bundle_t rkey_bundle; + + if (recv_length == 0) { + return UCS_OK; + } + + lane = ucp_ep_config(ep)->key.rma_lanes[0]; + md_index = ucp_ep_md_index(ep, lane); + + status = ucp_mem_type_reg_buffers(worker, buffer, recv_length, + mem_type, md_index, memh, &md_map, + &rkey_bundle); + if (status != UCS_OK) { + ucs_error("failed to register buffer with mem type domain %s", + ucs_memory_type_names[mem_type]); + return status; + } + + status = uct_ep_put_short(ep->uct_eps[lane], recv_data, recv_length, + (uint64_t)buffer, rkey_bundle.rkey); + if (status != UCS_OK) { + ucs_error("uct_ep_put_short() failed %s", ucs_status_string(status)); + } + + ucp_mem_type_unreg_buffers(worker, mem_type, md_index, memh, + &md_map, &rkey_bundle); + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_mem_type_pack, + (worker, dest, src, length, mem_type), + ucp_worker_h worker, void *dest, const void *src, size_t length, + ucs_memory_type_t mem_type) +{ + ucp_ep_h ep = worker->mem_type_ep[mem_type]; + ucp_md_map_t md_map = 0; + ucp_lane_index_t lane; + ucp_md_index_t md_index; + ucs_status_t status; + uct_mem_h memh[1]; + uct_rkey_bundle_t rkey_bundle; + + if (length == 0) { + return UCS_OK; + } + + lane = ucp_ep_config(ep)->key.rma_lanes[0]; + md_index = ucp_ep_md_index(ep, lane); + + status = ucp_mem_type_reg_buffers(worker, (void *)src, length, mem_type, + md_index, memh, &md_map, &rkey_bundle); + if (status != UCS_OK) { + ucs_error("failed to register buffer with mem type domain %s", + ucs_memory_type_names[mem_type]); + return status; + } + + status = uct_ep_get_short(ep->uct_eps[lane], dest, length, + (uint64_t)src, rkey_bundle.rkey); + if (status != UCS_OK) { + ucs_error("uct_ep_get_short() failed %s", ucs_status_string(status)); + } + + ucp_mem_type_unreg_buffers(worker, mem_type, md_index, memh, + &md_map, &rkey_bundle); + return status; +} + +size_t ucp_dt_pack(ucp_worker_h worker, ucp_datatype_t datatype, + ucs_memory_type_t mem_type, void *dest, const void *src, + ucp_dt_state_t *state, size_t length) +{ + size_t result_len = 0; + ucp_dt_generic_t *dt; + + if (!length) { + return length; + } + + switch (datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type)) { + UCS_PROFILE_CALL(ucs_memcpy_relaxed, dest, + UCS_PTR_BYTE_OFFSET(src, state->offset), length); + } else { + ucp_mem_type_pack(worker, dest, + UCS_PTR_BYTE_OFFSET(src, state->offset), + length, mem_type); + } + result_len = length; + break; + + case UCP_DATATYPE_IOV: + UCS_PROFILE_CALL_VOID(ucp_dt_iov_gather, dest, src, length, + &state->dt.iov.iov_offset, + &state->dt.iov.iovcnt_offset); + result_len = length; + break; + + case UCP_DATATYPE_GENERIC: + dt = ucp_dt_generic(datatype); + result_len = UCS_PROFILE_NAMED_CALL("dt_pack", dt->ops.pack, + state->dt.generic.state, + state->offset, dest, length); + break; + + default: + ucs_error("Invalid data type"); + } + + state->offset += result_len; + return result_len; +} diff --git a/src/ucp/dt/dt.h b/src/ucp/dt/dt.h new file mode 100644 index 0000000..c1a5afc --- /dev/null +++ b/src/ucp/dt/dt.h @@ -0,0 +1,64 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_DT_H_ +#define UCP_DT_H_ + +#include "dt_contig.h" +#include "dt_iov.h" +#include "dt_generic.h" + +#include +#include +#include + + +/** + * Memory registration state of a buffer/operation + */ +typedef struct ucp_dt_reg { + ucp_md_map_t md_map; /* Map of used memory domains */ + uct_mem_h memh[UCP_MAX_OP_MDS]; +} ucp_dt_reg_t; + + +/** + * State of progressing sent/receive operation on a datatype. + */ +typedef struct ucp_dt_state { + size_t offset; /* Total offset in overall payload. */ + union { + ucp_dt_reg_t contig; + struct { + size_t iov_offset; /* Offset in the IOV item */ + size_t iovcnt_offset; /* The IOV item to start copy */ + size_t iovcnt; /* Number of IOV buffers */ + ucp_dt_reg_t *dt_reg; /* Pointer to IOV memh[iovcnt] */ + } iov; + struct { + void *state; + } generic; + } dt; +} ucp_dt_state_t; + + +size_t ucp_dt_pack(ucp_worker_h worker, ucp_datatype_t datatype, + ucs_memory_type_t mem_type, void *dest, const void *src, + ucp_dt_state_t *state, size_t length); + + +ucs_status_t ucp_mem_type_pack(ucp_worker_h worker, void *dest, + const void *src, size_t length, + ucs_memory_type_t mem_type); + + +ucs_status_t ucp_mem_type_unpack(ucp_worker_h worker, void *buffer, + const void *recv_data, size_t recv_length, + ucs_memory_type_t mem_type); + +#endif /* UCP_DT_H_ */ + diff --git a/src/ucp/dt/dt.inl b/src/ucp/dt/dt.inl new file mode 100644 index 0000000..95e462d --- /dev/null +++ b/src/ucp/dt/dt.inl @@ -0,0 +1,134 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_DT_INL_ +#define UCP_DT_INL_ + +#include + +/** + * Get the total length of the data + */ +static UCS_F_ALWAYS_INLINE +size_t ucp_dt_length(ucp_datatype_t datatype, size_t count, + const ucp_dt_iov_t *iov, const ucp_dt_state_t *state) +{ + ucp_dt_generic_t *dt_gen; + + switch (datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + return ucp_contig_dt_length(datatype, count); + + case UCP_DATATYPE_IOV: + ucs_assert(NULL != iov); + return ucp_dt_iov_length(iov, count); + + case UCP_DATATYPE_GENERIC: + dt_gen = ucp_dt_generic(datatype); + ucs_assert(NULL != state); + ucs_assert(NULL != dt_gen); + return dt_gen->ops.packed_size(state->dt.generic.state); + + default: + ucs_error("Invalid data type"); + } + + return 0; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_dt_unpack_only(ucp_worker_h worker, void *buffer, size_t count, + ucp_datatype_t datatype, ucs_memory_type_t mem_type, + const void *data, size_t length, int truncation) +{ + size_t iov_offset, iovcnt_offset; + ucp_dt_generic_t *dt_gen; + ucs_status_t status; + size_t buffer_size; + void *state; + + switch (datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + if (truncation && + ucs_unlikely(length > (buffer_size = ucp_contig_dt_length(datatype, count)))) { + goto err_truncated; + } + if (ucs_likely(UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type))) { + UCS_PROFILE_NAMED_CALL("memcpy_recv", ucs_memcpy_relaxed, buffer, data, length); + } else { + ucp_mem_type_unpack(worker, buffer, data, length, mem_type); + } + return UCS_OK; + + case UCP_DATATYPE_IOV: + if (truncation && + ucs_unlikely(length > (buffer_size = ucp_dt_iov_length(buffer, count)))) { + goto err_truncated; + } + iov_offset = iovcnt_offset = 0; + UCS_PROFILE_CALL(ucp_dt_iov_scatter, buffer, count, data, length, + &iov_offset, &iovcnt_offset); + return UCS_OK; + + case UCP_DATATYPE_GENERIC: + dt_gen = ucp_dt_generic(datatype); + state = UCS_PROFILE_NAMED_CALL("dt_start", dt_gen->ops.start_unpack, + dt_gen->context, buffer, count); + if (truncation && + ucs_unlikely(length > (buffer_size = dt_gen->ops.packed_size(state)))) { + UCS_PROFILE_NAMED_CALL_VOID("dt_finish", dt_gen->ops.finish, state); + goto err_truncated; + } + status = UCS_PROFILE_NAMED_CALL("dt_unpack", dt_gen->ops.unpack, state, + 0, data, length); + UCS_PROFILE_NAMED_CALL_VOID("dt_finish", dt_gen->ops.finish, state); + return status; + + default: + ucs_fatal("unexpected datatype=%lx", datatype); + } + +err_truncated: + ucs_debug("message truncated: recv_length %zu buffer_size %zu", length, + buffer_size); + return UCS_ERR_MESSAGE_TRUNCATED; +} + +static UCS_F_ALWAYS_INLINE void +ucp_dt_recv_state_init(ucp_dt_state_t *dt_state, void *buffer, + ucp_datatype_t dt, size_t dt_count) +{ + ucp_dt_generic_t *dt_gen; + + switch (dt & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + dt_state->dt.contig.md_map = 0; + break; + case UCP_DATATYPE_IOV: + /* on receive side, only IOV uses offset field, to allow seeking + * to different position. + * TODO remove offset from dt_state, move it inside iov and send state. + */ + dt_state->offset = 0; + dt_state->dt.iov.iov_offset = 0; + dt_state->dt.iov.iovcnt_offset = 0; + dt_state->dt.iov.iovcnt = dt_count; + dt_state->dt.iov.dt_reg = NULL; + break; + case UCP_DATATYPE_GENERIC: + dt_gen = ucp_dt_generic(dt); + dt_state->dt.generic.state = + UCS_PROFILE_NAMED_CALL("dt_start", dt_gen->ops.start_unpack, + dt_gen->context, buffer, dt_count); + ucs_trace("dt state %p buffer %p count %zu dt_gen state=%p", dt_state, + buffer, dt_count, dt_state->dt.generic.state); + break; + default: + break; + } +} + +#endif diff --git a/src/ucp/dt/dt_contig.c b/src/ucp/dt/dt_contig.c new file mode 100644 index 0000000..0a646e6 --- /dev/null +++ b/src/ucp/dt/dt_contig.c @@ -0,0 +1,23 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "dt_contig.h" + +#include +#include + + +size_t ucp_memcpy_pack(void *dest, void *arg) +{ + ucp_memcpy_pack_context_t *ctx = arg; + size_t length = ctx->length; + UCS_PROFILE_CALL(memcpy, dest, ctx->src, length); + return length; +} diff --git a/src/ucp/dt/dt_contig.h b/src/ucp/dt/dt_contig.h new file mode 100644 index 0000000..ab0737f --- /dev/null +++ b/src/ucp/dt/dt_contig.h @@ -0,0 +1,41 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_DT_CONTIG_H_ +#define UCP_DT_CONTIG_H_ + +#include +#include + + +/** + * Context for memcpy pack callback. + */ +typedef struct { + const void *src; + size_t length; +} ucp_memcpy_pack_context_t; + + +size_t ucp_memcpy_pack(void *dest, void *arg); + + +static inline size_t ucp_contig_dt_elem_size(ucp_datatype_t datatype) +{ + return datatype >> UCP_DATATYPE_SHIFT; +} + +#define UCP_DT_IS_CONTIG(_datatype) \ + (((_datatype) & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_CONTIG) + +static inline size_t ucp_contig_dt_length(ucp_datatype_t datatype, size_t count) +{ + ucs_assert(UCP_DT_IS_CONTIG(datatype)); + return count * ucp_contig_dt_elem_size(datatype); +} + +#endif diff --git a/src/ucp/dt/dt_generic.c b/src/ucp/dt/dt_generic.c new file mode 100644 index 0000000..cf38142 --- /dev/null +++ b/src/ucp/dt/dt_generic.c @@ -0,0 +1,50 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "dt_generic.h" + +#include +#include + + +ucs_status_t ucp_dt_create_generic(const ucp_generic_dt_ops_t *ops, void *context, + ucp_datatype_t *datatype_p) +{ + ucp_dt_generic_t *dt; + int ret; + + ret = ucs_posix_memalign((void **)&dt, + ucs_max(sizeof(void *), UCS_BIT(UCP_DATATYPE_SHIFT)), + sizeof(*dt), "generic_dt"); + if (ret != 0) { + return UCS_ERR_NO_MEMORY; + } + + dt->ops = *ops; + dt->context = context; + *datatype_p = ((uintptr_t)dt) | UCP_DATATYPE_GENERIC; + return UCS_OK; +} + +void ucp_dt_destroy(ucp_datatype_t datatype) +{ + ucp_dt_generic_t *dt; + + switch (datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + break; + case UCP_DATATYPE_GENERIC: + dt = ucp_dt_generic(datatype); + ucs_free(dt); + break; + default: + break; + } +} diff --git a/src/ucp/dt/dt_generic.h b/src/ucp/dt/dt_generic.h new file mode 100644 index 0000000..bf0647b --- /dev/null +++ b/src/ucp/dt/dt_generic.h @@ -0,0 +1,31 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_DT_GENERIC_H_ +#define UCP_DT_GENERIC_H_ + +#include + + +/** + * Generic datatype structure. + */ +typedef struct ucp_dt_generic { + void *context; + ucp_generic_dt_ops_t ops; +} ucp_dt_generic_t; + + +static inline ucp_dt_generic_t* ucp_dt_generic(ucp_datatype_t datatype) +{ + return (ucp_dt_generic_t*)(void*)(datatype & ~UCP_DATATYPE_CLASS_MASK); +} + +#define UCP_DT_IS_GENERIC(_datatype) \ + (((_datatype) & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_GENERIC) + +#endif /* UCP_DT_GENERIC_H_ */ diff --git a/src/ucp/dt/dt_iov.c b/src/ucp/dt/dt_iov.c new file mode 100644 index 0000000..1458467 --- /dev/null +++ b/src/ucp/dt/dt_iov.c @@ -0,0 +1,112 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "dt_iov.h" + +#include +#include + +#include +#include + + +void ucp_dt_iov_gather(void *dest, const ucp_dt_iov_t *iov, size_t length, + size_t *iov_offset, size_t *iovcnt_offset) +{ + size_t item_len, item_reminder, item_len_to_copy; + size_t length_it = 0; + + ucs_assert(length > 0); + while (length_it < length) { + item_len = iov[*iovcnt_offset].length; + item_reminder = item_len - *iov_offset; + + item_len_to_copy = item_reminder - + ucs_max((ssize_t)((length_it + item_reminder) - length), 0); + memcpy(UCS_PTR_BYTE_OFFSET(dest, length_it), + UCS_PTR_BYTE_OFFSET(iov[*iovcnt_offset].buffer, *iov_offset), + item_len_to_copy); + length_it += item_len_to_copy; + + ucs_assert(length_it <= length); + if (length_it < length) { + *iov_offset = 0; + ++(*iovcnt_offset); + } else { + *iov_offset += item_len_to_copy; + } + } +} + +size_t ucp_dt_iov_scatter(ucp_dt_iov_t *iov, size_t iovcnt, const void *src, + size_t length, size_t *iov_offset, size_t *iovcnt_offset) +{ + size_t item_len, item_len_to_copy; + size_t length_it = 0; + + while ((length_it < length) && (*iovcnt_offset < iovcnt)) { + item_len = iov[*iovcnt_offset].length; + item_len_to_copy = ucs_min(ucs_max((ssize_t)(item_len - *iov_offset), 0), + length - length_it); + ucs_assert(*iov_offset <= item_len); + + memcpy(UCS_PTR_BYTE_OFFSET(iov[*iovcnt_offset].buffer, *iov_offset), + UCS_PTR_BYTE_OFFSET(src, length_it), + item_len_to_copy); + length_it += item_len_to_copy; + + ucs_assert(length_it <= length); + if (length_it < length) { + *iov_offset = 0; + ++(*iovcnt_offset); + } else { + *iov_offset += item_len_to_copy; + } + } + return length_it; +} + +void ucp_dt_iov_seek(ucp_dt_iov_t *iov, size_t iovcnt, ptrdiff_t distance, + size_t *iov_offset, size_t *iovcnt_offset) +{ + ssize_t new_iov_offset; /* signed, since it can be negative */ + size_t length_it; + + new_iov_offset = ((ssize_t)*iov_offset) + distance; + + if (new_iov_offset < 0) { + /* seek backwards */ + do { + ucs_assert(*iovcnt_offset > 0); + --(*iovcnt_offset); + new_iov_offset += iov[*iovcnt_offset].length; + } while (new_iov_offset < 0); + } else { + /* seek forward */ + while (new_iov_offset >= (length_it = iov[*iovcnt_offset].length)) { + new_iov_offset -= length_it; + ++(*iovcnt_offset); + ucs_assert(*iovcnt_offset < iovcnt); + } + } + + *iov_offset = new_iov_offset; +} + +size_t ucp_dt_iov_count_nonempty(const ucp_dt_iov_t *iov, size_t iovcnt) +{ + size_t iov_it, count; + + count = 0; + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + count += iov[iov_it].length != 0; + } + return count; +} diff --git a/src/ucp/dt/dt_iov.h b/src/ucp/dt/dt_iov.h new file mode 100644 index 0000000..c19a7ba --- /dev/null +++ b/src/ucp/dt/dt_iov.h @@ -0,0 +1,101 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_DT_IOV_H_ +#define UCP_DT_IOV_H_ + +#include + + +#define UCP_DT_IS_IOV(_datatype) \ + (((_datatype) & UCP_DATATYPE_CLASS_MASK) == UCP_DATATYPE_IOV) + + +/** + * Get the total length of the data contains in IOV buffers + */ +static inline size_t ucp_dt_iov_length(const ucp_dt_iov_t *iov, size_t iovcnt) +{ + size_t iov_it, total_length = 0; + + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + total_length += iov[iov_it].length; + } + + return total_length; +} + +/** + * Copy iov data buffers from @a src to contiguous buffer @a dest with + * a iov item data @a iov_offset and iov item @a iovcnt_offset + * + * @param [in] dest Destination contiguous buffer + * (no offset applicable) + * @param [in] iov Source @ref ucp_dt_iov_t buffer + * @param [in] length Total data length to copy in bytes + * @param [inout] iov_offset The offset in bytes to start copying + * from an @a iov item pointed by + * @a iovcnt_offset. The @a iov_offset is not aligned + * by @ref ucp_dt_iov_t items length. + * @param [inout] iovcnt_offset Auxiliary offset to select @a iov item which + * belongs to the @a iov_offset. The point to start + * copying from should be selected as + * iov[iovcnt_offset].buffer + iov_offset + */ +void ucp_dt_iov_gather(void *dest, const ucp_dt_iov_t *iov, size_t length, + size_t *iov_offset, size_t *iovcnt_offset); + +/** + * Copy contiguous buffer @a src into @ref ucp_dt_iov_t data buffers in @a iov + * with an iov item data @a iov_offset and iov item @a iovcnt_offset + * + * @param [in] iov Destination @ref ucp_dt_iov_t buffer + * @param [in] iovcnt Size of the @a iov buffer + * @param [in] src Source contiguous buffer (no offset applicable) + * @param [in] length Total data length to copy in bytes + * @param [inout] iov_offset The offset in bytes to start copying + * to an @a iov item pointed by @a iovcnt_offset. + * The @a iov_offset is not aligned by + * @ref ucp_dt_iov_t items length. + * @param [inout] iovcnt_offset Auxiliary offset to select @a iov item which + * belongs to the @a iov_offset. The point to + * start copying from should be selected as + * iov[iovcnt_offset].buffer + iov_offset + * + * @return Size in bytes that is actually copied from @a src to @a iov. It must + * be less or equal to @a length. + */ +size_t ucp_dt_iov_scatter(ucp_dt_iov_t *iov, size_t iovcnt, const void *src, + size_t length, size_t *iov_offset, size_t *iovcnt_offset); + + +/** + * Seek to a logical offset in the iov + * + * @param [in] iov @ref ucp_dt_iov_t buffer to seek in + * @param [in] iovcnt Number of entries the @a iov buffer + * @param [in] distance Distance to move, relative to the current + * current location + * @param [inout] iov_offset The offset in bytes from the beginning of the + * current iov entry + * @param [inout] iovcnt_offset Current @a iov item index + */ +void ucp_dt_iov_seek(ucp_dt_iov_t *iov, size_t iovcnt, ptrdiff_t distance, + size_t *iov_offset, size_t *iovcnt_offset); + + +/** + * Count non-empty buffers in the iov + * + * @param [in] iov @ref ucp_dt_iov_t buffer to count + * @param [in] iovcnt Number of entries the @a iov buffer + * + * @return Number of non-empty buffers in the iovec + */ +size_t ucp_dt_iov_count_nonempty(const ucp_dt_iov_t *iov, size_t iovcnt); + +#endif diff --git a/src/ucp/proto/proto_am.c b/src/ucp/proto/proto_am.c new file mode 100644 index 0000000..01be173 --- /dev/null +++ b/src/ucp/proto/proto_am.c @@ -0,0 +1,112 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "proto_am.inl" + +#include + + +static inline size_t ucp_proto_max_packed_size() +{ + return ucs_max(sizeof(ucp_reply_hdr_t), + sizeof(ucp_offload_ssend_hdr_t)); +} + +static size_t ucp_proto_pack(void *dest, void *arg) +{ + ucp_request_t *req = arg; + ucp_reply_hdr_t *rep_hdr; + ucp_offload_ssend_hdr_t *off_rep_hdr; + + switch (req->send.proto.am_id) { + case UCP_AM_ID_EAGER_SYNC_ACK: + case UCP_AM_ID_RNDV_ATS: + case UCP_AM_ID_RNDV_ATP: + rep_hdr = dest; + rep_hdr->reqptr = req->send.proto.remote_request; + rep_hdr->status = req->send.proto.status; + return sizeof(*rep_hdr); + case UCP_AM_ID_OFFLOAD_SYNC_ACK: + off_rep_hdr = dest; + off_rep_hdr->sender_tag = req->send.proto.sender_tag; + off_rep_hdr->ep_ptr = ucp_request_get_dest_ep_ptr(req); + return sizeof(*off_rep_hdr); + } + + ucs_fatal("unexpected am_id"); + return 0; +} + +ucs_status_t +ucp_do_am_single(uct_pending_req_t *self, uint8_t am_id, + uct_pack_callback_t pack_cb, ssize_t max_packed_size) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ssize_t packed_len; + uint64_t *buffer; + + /* if packed data can fit short active message, use it, because it should + * be faster than bcopy. + */ + if ((max_packed_size <= UCS_ALLOCA_MAX_SIZE) && + (max_packed_size <= ucp_ep_config(ep)->am.max_short)) { + req->send.lane = ucp_ep_get_am_lane(ep); + buffer = ucs_alloca(max_packed_size); + packed_len = pack_cb(buffer, req); + ucs_assertv((packed_len >= 0) && (packed_len <= max_packed_size), + "packed_len=%zd max_packed_size=%zu", packed_len, + max_packed_size); + + return uct_ep_am_short(ep->uct_eps[req->send.lane], am_id, buffer[0], + &buffer[1], packed_len - sizeof(uint64_t)); + } else { + return ucp_do_am_bcopy_single(self, am_id, pack_cb); + } +} + +ucs_status_t ucp_proto_progress_am_single(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucs_status_t status = ucp_do_am_single(self, req->send.proto.am_id, + ucp_proto_pack, + ucp_proto_max_packed_size()); + if (status == UCS_OK) { + req->send.proto.comp_cb(req); + } + return status; +} + +void ucp_proto_am_zcopy_req_complete(ucp_request_t *req, ucs_status_t status) +{ + ucs_assert(req->send.state.uct_comp.count == 0); + ucp_request_send_buffer_dereg(req); /* TODO register+lane change */ + ucp_request_complete_send(req, status); +} + +void ucp_proto_am_zcopy_completion(uct_completion_t *self, + ucs_status_t status) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + if (req->send.state.dt.offset == req->send.length) { + ucp_proto_am_zcopy_req_complete(req, status); + } else if (status != UCS_OK) { + ucs_assert(req->send.state.uct_comp.count == 0); + ucs_assert(status != UCS_INPROGRESS); + + /* NOTE: the request is in pending queue if data was not completely sent, + * just dereg the buffer here and complete request on purge + * pending later. + */ + ucp_request_send_buffer_dereg(req); + req->send.state.uct_comp.func = NULL; + } +} diff --git a/src/ucp/proto/proto_am.h b/src/ucp/proto/proto_am.h new file mode 100644 index 0000000..ee99021 --- /dev/null +++ b/src/ucp/proto/proto_am.h @@ -0,0 +1,42 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_PROTO_AM_H_ +#define UCP_PROTO_AM_H_ + +#include +#include + + +/** + * Header segment for a transaction + */ +typedef struct { + uintptr_t ep_ptr; + uintptr_t reqptr; +} UCS_S_PACKED ucp_request_hdr_t; + + +/** + * Header for transaction acknowledgment + */ +typedef struct { + uint64_t reqptr; + ucs_status_t status; +} UCS_S_PACKED ucp_reply_hdr_t; + + +ucs_status_t +ucp_do_am_single(uct_pending_req_t *self, uint8_t am_id, + uct_pack_callback_t pack_cb, ssize_t max_packed_size); + +ucs_status_t ucp_proto_progress_am_single(uct_pending_req_t *self); + +void ucp_proto_am_zcopy_completion(uct_completion_t *self, ucs_status_t status); + +void ucp_proto_am_zcopy_req_complete(ucp_request_t *req, ucs_status_t status); + +#endif diff --git a/src/ucp/proto/proto_am.inl b/src/ucp/proto/proto_am.inl new file mode 100644 index 0000000..20824df --- /dev/null +++ b/src/ucp/proto/proto_am.inl @@ -0,0 +1,449 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_PROTO_AM_INL_ +#define UCP_PROTO_AM_INL_ + +#include "proto_am.h" + +#include +#include +#include +#include +#include +#include + + +#define UCP_STATUS_PENDING_SWITCH (UCS_ERR_LAST - 1) + +typedef void (*ucp_req_complete_func_t)(ucp_request_t *req, ucs_status_t status); + + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_do_am_bcopy_single(uct_pending_req_t *self, uint8_t am_id, + uct_pack_callback_t pack_cb) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucp_dt_state_t state = req->send.state.dt; + ssize_t packed_len; + + req->send.lane = ucp_ep_get_am_lane(ep); + packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], am_id, pack_cb, + req, 0); + if (ucs_unlikely(packed_len < 0)) { + /* Reset the state to the previous one */ + req->send.state.dt = state; + return (ucs_status_t)packed_len; + } + + ucs_assertv((size_t)packed_len <= ucp_ep_get_max_bcopy(ep, req->send.lane), + "packed_len=%zd max_bcopy=%zu", + packed_len, ucp_ep_get_max_bcopy(ep, req->send.lane)); + + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE +ucs_status_t ucp_do_am_bcopy_multi(uct_pending_req_t *self, uint8_t am_id_first, + uint8_t am_id_middle, + uct_pack_callback_t pack_first, + uct_pack_callback_t pack_middle, + int enable_am_bw) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucp_dt_state_t state = req->send.state.dt; + ucs_status_t status; + ssize_t packed_len; + uct_ep_h uct_ep; + int pending_adde_res; + + req->send.lane = (!enable_am_bw || (state.offset == 0)) ? /* first part of message must be sent */ + ucp_ep_get_am_lane(ep) : /* via AM lane */ + ucp_send_request_get_next_am_bw_lane(req); + uct_ep = ep->uct_eps[req->send.lane]; + + for (;;) { + if (state.offset == 0) { + /* First */ + packed_len = uct_ep_am_bcopy(uct_ep, am_id_first, pack_first, req, 0); + UCS_PROFILE_REQUEST_EVENT_CHECK_STATUS(req, "am_bcopy_first", + packed_len, packed_len); + } else { + ucs_assert(state.offset < req->send.length); + /* Middle or last */ + packed_len = uct_ep_am_bcopy(uct_ep, am_id_middle, pack_middle, req, 0); + UCS_PROFILE_REQUEST_EVENT_CHECK_STATUS(req, "am_bcopy_middle", + packed_len, packed_len); + } + + if (ucs_unlikely(packed_len < 0)) { + /* Reset the state to the previous one */ + req->send.state.dt = state; + + if (req->send.lane != req->send.pending_lane) { + /* switch to new pending lane */ + pending_adde_res = ucp_request_pending_add(req, &status, 0); + if (!pending_adde_res) { + /* failed to switch req to pending queue, try again */ + continue; + } + ucs_assert(status == UCS_INPROGRESS); + return (ucs_status_t)UCP_STATUS_PENDING_SWITCH; + } else { + return (ucs_status_t)packed_len; + } + } else { + ucs_assertv(/* The packed length has to be the same as maximum + * AM Bcopy for the first and middle segments */ + ((req->send.state.dt.offset < req->send.length) && + (packed_len == ucp_ep_get_max_bcopy(ep, req->send.lane))) || + /* The packed length has to be the same or less than + * maximum AM Bcopy for the last segment */ + (packed_len <= ucp_ep_get_max_bcopy(ep, req->send.lane)), + "packed_len=%zd max_bcopy=%zu", + packed_len, ucp_ep_get_max_bcopy(ep, req->send.lane)); + ucs_assertv(req->send.state.dt.offset <= req->send.length, + "offset=%zd length=%zu", + req->send.state.dt.offset, req->send.length); + ucs_assert(state.offset < req->send.state.dt.offset); + /* If the last segment was sent, return UCS_OK, + * otherwise - UCS_INPROGRESS */ + return ((req->send.state.dt.offset < req->send.length) ? + UCS_INPROGRESS : UCS_OK); + } + } +} + +static UCS_F_ALWAYS_INLINE +void ucp_dt_iov_copy_uct(ucp_context_h context, uct_iov_t *iov, size_t *iovcnt, + size_t max_dst_iov, ucp_dt_state_t *state, + const ucp_dt_iov_t *src_iov, ucp_datatype_t datatype, + size_t length_max, ucp_md_index_t md_index, + ucp_mem_desc_t *mdesc) +{ + size_t iov_offset, max_src_iov, src_it, dst_it; + size_t length_it = 0; + ucp_md_index_t memh_index; + + switch (datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + if (context->tl_mds[md_index].attr.cap.flags & UCT_MD_FLAG_REG) { + if (mdesc) { + memh_index = ucs_bitmap2idx(mdesc->memh->md_map, md_index); + iov[0].memh = mdesc->memh->uct[memh_index]; + } else { + memh_index = ucs_bitmap2idx(state->dt.contig.md_map, md_index); + iov[0].memh = state->dt.contig.memh[memh_index]; + } + } else { + iov[0].memh = UCT_MEM_HANDLE_NULL; + } + iov[0].buffer = UCS_PTR_BYTE_OFFSET(src_iov, state->offset); + iov[0].length = length_max; + iov[0].stride = 0; + iov[0].count = 1; + + *iovcnt = 1; + length_it = iov[0].length; + break; + case UCP_DATATYPE_IOV: + iov_offset = state->dt.iov.iov_offset; + max_src_iov = state->dt.iov.iovcnt; + src_it = state->dt.iov.iovcnt_offset; + dst_it = 0; + state->dt.iov.iov_offset = 0; + while ((dst_it < max_dst_iov) && (src_it < max_src_iov)) { + if (src_iov[src_it].length) { + iov[dst_it].buffer = UCS_PTR_BYTE_OFFSET(src_iov[src_it].buffer, iov_offset); + iov[dst_it].length = src_iov[src_it].length - iov_offset; + iov[dst_it].memh = state->dt.iov.dt_reg[src_it].memh[0]; + iov[dst_it].stride = 0; + iov[dst_it].count = 1; + length_it += iov[dst_it].length; + + ++dst_it; + if (length_it >= length_max) { + iov[dst_it - 1].length -= (length_it - length_max); + length_it = length_max; + state->dt.iov.iov_offset = iov_offset + iov[dst_it - 1].length; + break; + } + } + iov_offset = 0; + ++src_it; + } + + state->dt.iov.iovcnt_offset = src_it; + *iovcnt = dst_it; + break; + default: + ucs_error("Invalid data type"); + } + + state->offset += length_it; +} + +static UCS_F_ALWAYS_INLINE +ucs_status_t ucp_do_am_zcopy_single(uct_pending_req_t *self, uint8_t am_id, + const void *hdr, size_t hdr_size, + ucp_req_complete_func_t complete) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + size_t max_iov = ucp_ep_config(ep)->am.max_iov; + uct_iov_t *iov = ucs_alloca(max_iov * sizeof(uct_iov_t)); + size_t iovcnt = 0; + ucp_dt_state_t state = req->send.state.dt; + ucs_status_t status; + + req->send.lane = ucp_ep_get_am_lane(ep); + + ucp_dt_iov_copy_uct(ep->worker->context,iov, &iovcnt, max_iov, + &state, req->send.buffer, req->send.datatype, + req->send.length, ucp_ep_md_index(ep, req->send.lane), NULL); + + status = uct_ep_am_zcopy(ep->uct_eps[req->send.lane], am_id, (void*)hdr, + hdr_size, iov, iovcnt, 0, + &req->send.state.uct_comp); + if (status == UCS_OK) { + complete(req, UCS_OK); + } else { + ucp_request_send_state_advance(req, &state, + UCP_REQUEST_SEND_PROTO_ZCOPY_AM, + status); + } + return UCS_STATUS_IS_ERR(status) ? status : UCS_OK; +} + +static UCS_F_ALWAYS_INLINE +void ucp_am_zcopy_complete_last_stage(ucp_request_t *req, ucp_dt_state_t *state, + ucp_req_complete_func_t complete) +{ + ucp_request_send_state_advance(req, state, + UCP_REQUEST_SEND_PROTO_ZCOPY_AM, + UCS_OK); + + /* Complete a request on a last stage if all previous AM + * Zcopy operations completed successfully. If there are + * operations that are in progress on other lanes, the last + * completed operation will complete the request */ + if (!req->send.state.uct_comp.count) { + complete(req, UCS_OK); + } +} + +static UCS_F_ALWAYS_INLINE +ucs_status_t ucp_do_am_zcopy_multi(uct_pending_req_t *self, uint8_t am_id_first, + uint8_t am_id_middle, + const void *hdr_first, size_t hdr_size_first, + const void *hdr_middle, size_t hdr_size_middle, + ucp_req_complete_func_t complete, int enable_am_bw) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + unsigned flag_iov_mid = 0; + size_t iovcnt = 0; + ucp_dt_state_t state; + size_t max_middle; + size_t max_iov; + uct_iov_t *iov; + size_t offset; + size_t mid_len; + ucs_status_t status; + uct_ep_h uct_ep; + int pending_adde_res; + + if (UCP_DT_IS_CONTIG(req->send.datatype)) { + if (enable_am_bw && req->send.state.dt.offset) { + req->send.lane = ucp_send_request_get_next_am_bw_lane(req); + ucp_send_request_add_reg_lane(req, req->send.lane); + } else { + req->send.lane = ucp_ep_get_am_lane(ep); + } + } else { + ucs_assert(UCP_DT_IS_IOV(req->send.datatype)); + /* disable multilane for IOV datatype. + * TODO: add IOV processing for multilane */ + req->send.lane = ucp_ep_get_am_lane(ep); + } + + uct_ep = ep->uct_eps[req->send.lane]; + max_middle = ucp_ep_get_max_zcopy(ep, req->send.lane) - hdr_size_middle; + max_iov = ucp_ep_get_max_iov(ep, req->send.lane); + iov = ucs_alloca(max_iov * sizeof(uct_iov_t)); + + for (;;) { + state = req->send.state.dt; + offset = state.offset; + + ucs_assert(max_iov > 0); + if (UCP_DT_IS_IOV(req->send.datatype)) { + /* This flag should guarantee middle stage usage if iovcnt exceeded */ + flag_iov_mid = ((state.dt.iov.iovcnt_offset + max_iov) < + state.dt.iov.iovcnt); + } else { + ucs_assert(UCP_DT_IS_CONTIG(req->send.datatype)); + } + + if (offset == 0) { + /* First stage */ + ucs_assert(req->send.lane == ucp_ep_get_am_lane(ep)); + ucp_dt_iov_copy_uct(ep->worker->context, iov, &iovcnt, max_iov, &state, + req->send.buffer, req->send.datatype, + max_middle - hdr_size_first + hdr_size_middle, + ucp_ep_md_index(ep, req->send.lane), NULL); + ucs_assertv(state.offset != 0, "state must be changed on 1st stage"); + ucs_assertv(state.offset < req->send.length, "state.offset=%zu", + state.offset); + + status = uct_ep_am_zcopy(uct_ep, am_id_first, (void*)hdr_first, + hdr_size_first, iov, iovcnt, 0, + &req->send.state.uct_comp); + + UCS_PROFILE_REQUEST_EVENT_CHECK_STATUS(req, "am_zcopy_first", + iov[0].length, status); + } else { + /* Middle or last stage */ + mid_len = ucs_min(max_middle, req->send.length - offset); + ucs_assert(offset + mid_len <= req->send.length); + ucp_dt_iov_copy_uct(ep->worker->context, iov, &iovcnt, max_iov, &state, + req->send.buffer, req->send.datatype, mid_len, + ucp_ep_md_index(ep, req->send.lane), NULL); + + if (offset < state.offset) { + status = uct_ep_am_zcopy(uct_ep, am_id_middle, (void*)hdr_middle, + hdr_size_middle, iov, iovcnt, 0, + &req->send.state.uct_comp); + } else if (state.offset == req->send.length) { + /* Empty IOVs on last stage */ + ucp_am_zcopy_complete_last_stage(req, &state, complete); + return UCS_OK; + } else { + ucs_assert(offset == state.offset); + /* Empty IOVs in the middle */ + ucp_request_send_state_advance(req, &state, + UCP_REQUEST_SEND_PROTO_ZCOPY_AM, + UCS_OK); + continue; + } + + UCS_PROFILE_REQUEST_EVENT_CHECK_STATUS(req, "am_zcopy_middle", + iov[0].length, status); + + if (!flag_iov_mid && (offset + mid_len == req->send.length)) { + /* Last stage */ + if (status == UCS_OK) { + ucp_am_zcopy_complete_last_stage(req, &state, complete); + return UCS_OK; + } + + ucp_request_send_state_advance(req, &state, + UCP_REQUEST_SEND_PROTO_ZCOPY_AM, + status); + if (!UCS_STATUS_IS_ERR(status)) { + return UCS_OK; + } + } + } + + if (status == UCS_ERR_NO_RESOURCE) { + if (req->send.lane != req->send.pending_lane) { + /* switch to new pending lane */ + pending_adde_res = ucp_request_pending_add(req, &status, 0); + if (!pending_adde_res) { + /* failed to switch req to pending queue, try again */ + continue; + } + ucs_assert(status == UCS_INPROGRESS); + return UCS_OK; + } + } + ucp_request_send_state_advance(req, &state, + UCP_REQUEST_SEND_PROTO_ZCOPY_AM, + status); + + return UCS_STATUS_IS_ERR(status) ? status : UCS_INPROGRESS; + } +} + +static UCS_F_ALWAYS_INLINE size_t +ucp_proto_get_zcopy_threshold(const ucp_request_t *req, + const ucp_ep_msg_config_t *msg_config, + size_t count, size_t max_zcopy) +{ + ucp_worker_h worker; + ucp_lane_index_t lane; + ucp_rsc_index_t rsc_index; + size_t zcopy_thresh; + + if (ucs_unlikely(msg_config->max_zcopy == 0)) { + return max_zcopy; + } + + if (ucs_likely(UCP_DT_IS_CONTIG(req->send.datatype))) { + return ucs_min(max_zcopy, msg_config->mem_type_zcopy_thresh[req->send.mem_type]); + } else if (UCP_DT_IS_IOV(req->send.datatype)) { + if (0 == count) { + /* disable zcopy */ + zcopy_thresh = max_zcopy; + } else if (!msg_config->zcopy_auto_thresh) { + /* The user defined threshold or no zcopy enabled */ + zcopy_thresh = msg_config->zcopy_thresh[0]; + } else if (count <= UCP_MAX_IOV) { + /* Using pre-calculated thresholds */ + zcopy_thresh = msg_config->zcopy_thresh[count - 1]; + } else { + /* Calculate threshold */ + lane = req->send.lane; + rsc_index = ucp_ep_config(req->send.ep)->key.lanes[lane].rsc_index; + worker = req->send.ep->worker; + zcopy_thresh = ucp_ep_config_get_zcopy_auto_thresh(count, + &ucp_ep_md_attr(req->send.ep, lane)->reg_cost, + worker->context, + ucp_worker_iface_bandwidth(worker, rsc_index)); + } + return ucs_min(max_zcopy, zcopy_thresh); + } else if (UCP_DT_IS_GENERIC(req->send.datatype)) { + return max_zcopy; + } + + ucs_error("Unsupported datatype"); + + return max_zcopy; +} + +static UCS_F_ALWAYS_INLINE ssize_t +ucp_proto_get_short_max(const ucp_request_t *req, + const ucp_ep_msg_config_t *msg_config) +{ + return (!UCP_DT_IS_CONTIG(req->send.datatype) || + (req->flags & UCP_REQUEST_FLAG_SYNC) || + (!UCP_MEM_IS_HOST(req->send.mem_type))) ? + -1 : msg_config->max_short; +} + +static UCS_F_ALWAYS_INLINE ucp_request_t* +ucp_proto_ssend_ack_request_alloc(ucp_worker_h worker, uintptr_t ep_ptr) +{ + ucp_request_t *req; + + req = ucp_request_get(worker); + if (req == NULL) { + return NULL; + } + + req->flags = 0; + req->send.ep = ucp_worker_get_ep_by_ptr(worker, ep_ptr); + req->send.uct.func = ucp_proto_progress_am_single; + req->send.proto.comp_cb = ucp_request_put; + req->send.proto.status = UCS_OK; + + return req; +} + +#endif diff --git a/src/ucp/rma/amo_basic.c b/src/ucp/rma/amo_basic.c new file mode 100644 index 0000000..9b1253e --- /dev/null +++ b/src/ucp/rma/amo_basic.c @@ -0,0 +1,104 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rma.h" +#include "rma.inl" + +#include + + +static UCS_F_ALWAYS_INLINE +ucs_status_t ucp_amo_check_send_status(ucp_request_t *req, ucs_status_t status) +{ + if (status == UCS_INPROGRESS) { + return UCS_OK; + } + /* Complete for UCS_OK and unexpected errors */ + if (status != UCS_ERR_NO_RESOURCE) { + ucp_request_complete_send(req, status); + } + return status; +} + +static ucs_status_t ucp_amo_basic_progress_post(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_rkey_h rkey = req->send.amo.rkey; + ucp_ep_t *ep = req->send.ep; + uint64_t value = req->send.amo.value; + uint64_t remote_addr = req->send.amo.remote_addr; + uct_atomic_op_t op = req->send.amo.uct_op; + ucs_status_t status; + + req->send.lane = rkey->cache.amo_lane; + if (req->send.length == sizeof(uint64_t)) { + status = UCS_PROFILE_CALL(uct_ep_atomic64_post, + ep->uct_eps[req->send.lane], op, value, + remote_addr, rkey->cache.amo_rkey); + } else { + ucs_assert(req->send.length == sizeof(uint32_t)); + status = UCS_PROFILE_CALL(uct_ep_atomic32_post, + ep->uct_eps[req->send.lane], op, value, + remote_addr, rkey->cache.amo_rkey); + } + + return ucp_amo_check_send_status(req, status); +} + +static ucs_status_t ucp_amo_basic_progress_fetch(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_rkey_h rkey = req->send.amo.rkey; + ucp_ep_t *ep = req->send.ep; + uint64_t value = req->send.amo.value; + uint64_t *result = req->send.buffer; + uint64_t remote_addr = req->send.amo.remote_addr; + uct_atomic_op_t op = req->send.amo.uct_op; + ucs_status_t status; + + req->send.lane = rkey->cache.amo_lane; + if (req->send.length == sizeof(uint64_t)) { + if (op != UCT_ATOMIC_OP_CSWAP) { + status = uct_ep_atomic64_fetch(ep->uct_eps[req->send.lane], + op, value, result, + remote_addr, + rkey->cache.amo_rkey, + &req->send.state.uct_comp); + } else { + status = uct_ep_atomic_cswap64(ep->uct_eps[req->send.lane], + value, *result, + remote_addr, rkey->cache.amo_rkey, result, + &req->send.state.uct_comp); + } + } else { + ucs_assert(req->send.length == sizeof(uint32_t)); + if (op != UCT_ATOMIC_OP_CSWAP) { + status = uct_ep_atomic32_fetch(ep->uct_eps[req->send.lane], + op, value, (uint32_t*)result, + remote_addr, + rkey->cache.amo_rkey, + &req->send.state.uct_comp); + } else { + status = uct_ep_atomic_cswap32(ep->uct_eps[req->send.lane], + value, *result, remote_addr, + rkey->cache.amo_rkey, (uint32_t*)result, + &req->send.state.uct_comp); + } + } + + return ucp_amo_check_send_status(req, status); +} + +ucp_amo_proto_t ucp_amo_basic_proto = { + .name = "basic_amo", + .progress_fetch = ucp_amo_basic_progress_fetch, + .progress_post = ucp_amo_basic_progress_post +}; diff --git a/src/ucp/rma/amo_send.c b/src/ucp/rma/amo_send.c new file mode 100644 index 0000000..646bb7a --- /dev/null +++ b/src/ucp/rma/amo_send.c @@ -0,0 +1,292 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rma.h" +#include "rma.inl" + +#include +#include +#include +#include +#include + +#include + + +#define UCP_AMO_CHECK_PARAM(_context, _remote_addr, _size, _opcode, \ + _last_opcode, _action) \ + { \ + if (ENABLE_PARAMS_CHECK && \ + ucs_unlikely(((_remote_addr) % (_size)) != 0)) { \ + ucs_error("atomic variable must be naturally aligned " \ + "(remote address 0x%"PRIx64", size %zu)", (_remote_addr), \ + (_size)); \ + _action; \ + } \ + \ + if (ENABLE_PARAMS_CHECK && \ + ucs_unlikely(((_size) != 4) && (_size != 8))) { \ + ucs_error("invalid atomic operation size: %zu", (_size)); \ + _action; \ + } \ + \ + UCP_CONTEXT_CHECK_FEATURE_FLAGS((_context), ((_size) == 4) ? \ + UCP_FEATURE_AMO32 : UCP_FEATURE_AMO64, \ + _action); \ + \ + if (ENABLE_PARAMS_CHECK && \ + (ucs_unlikely((_opcode) >= (_last_opcode)))) { \ + ucs_error("invalid atomic opcode %d ", _opcode); \ + _action; \ + } \ + } + + +static uct_atomic_op_t ucp_uct_op_table[] = { + [UCP_ATOMIC_POST_OP_ADD] = UCT_ATOMIC_OP_ADD, + [UCP_ATOMIC_POST_OP_AND] = UCT_ATOMIC_OP_AND, + [UCP_ATOMIC_POST_OP_OR] = UCT_ATOMIC_OP_OR, + [UCP_ATOMIC_POST_OP_XOR] = UCT_ATOMIC_OP_XOR +}; + +static uct_atomic_op_t ucp_uct_fop_table[] = { + [UCP_ATOMIC_FETCH_OP_FADD] = UCT_ATOMIC_OP_ADD, + [UCP_ATOMIC_FETCH_OP_FAND] = UCT_ATOMIC_OP_AND, + [UCP_ATOMIC_FETCH_OP_FOR] = UCT_ATOMIC_OP_OR, + [UCP_ATOMIC_FETCH_OP_FXOR] = UCT_ATOMIC_OP_XOR, + [UCP_ATOMIC_FETCH_OP_SWAP] = UCT_ATOMIC_OP_SWAP, + [UCP_ATOMIC_FETCH_OP_CSWAP] = UCT_ATOMIC_OP_CSWAP, +}; + + +static void ucp_amo_completed_single(uct_completion_t *self, + ucs_status_t status) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + ucs_trace("invoking completion on AMO request %p", req); + ucp_request_complete_send(req, status); +} + +static UCS_F_ALWAYS_INLINE void +ucp_amo_init_common(ucp_request_t *req, ucp_ep_h ep, uct_atomic_op_t op, + uint64_t remote_addr, ucp_rkey_h rkey, uint64_t value, + size_t size) +{ + req->flags = 0; + req->send.ep = ep; + req->send.length = size; + req->send.amo.uct_op = op; + req->send.amo.remote_addr = remote_addr; + req->send.amo.rkey = rkey; + req->send.amo.value = value; +#if UCS_ENABLE_ASSERT + req->send.lane = UCP_NULL_LANE; +#endif +} + +static UCS_F_ALWAYS_INLINE void +ucp_amo_init_fetch(ucp_request_t *req, ucp_ep_h ep, void *buffer, + uct_atomic_op_t op, size_t op_size, uint64_t remote_addr, + ucp_rkey_h rkey, uint64_t value, const ucp_amo_proto_t *proto) +{ + ucp_amo_init_common(req, ep, op, remote_addr, rkey, value, op_size); + req->send.state.uct_comp.count = 1; + req->send.state.uct_comp.func = ucp_amo_completed_single; + req->send.uct.func = proto->progress_fetch; + req->send.buffer = buffer; +} + +static UCS_F_ALWAYS_INLINE +void ucp_amo_init_post(ucp_request_t *req, ucp_ep_h ep, uct_atomic_op_t op, + size_t op_size, uint64_t remote_addr, ucp_rkey_h rkey, + uint64_t value, const ucp_amo_proto_t *proto) +{ + ucp_amo_init_common(req, ep, op, remote_addr, rkey, value, op_size); + req->send.uct.func = proto->progress_post; +} + +ucs_status_ptr_t ucp_atomic_fetch_nb(ucp_ep_h ep, ucp_atomic_fetch_op_t opcode, + uint64_t value, void *result, size_t op_size, + uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb) +{ + ucs_status_ptr_t status_p; + ucs_status_t status; + ucp_request_t *req; + + UCP_AMO_CHECK_PARAM(ep->worker->context, remote_addr, op_size, opcode, + UCP_ATOMIC_FETCH_OP_LAST, + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("atomic_fetch_nb opcode %d value %"PRIu64" buffer %p size %zu" + " remote_addr %"PRIx64" rkey %p to %s cb %p", + opcode, value, result, op_size, remote_addr, rkey, + ucp_ep_peer_name(ep), cb); + + status = UCP_RKEY_RESOLVE(rkey, ep, amo); + if (status != UCS_OK) { + status_p = UCS_STATUS_PTR(UCS_ERR_UNREACHABLE); + goto out; + } + + req = ucp_request_get(ep->worker); + if (ucs_unlikely(NULL == req)) { + status_p = UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + goto out; + } + + ucp_amo_init_fetch(req, ep, result, ucp_uct_fop_table[opcode], op_size, + remote_addr, rkey, value, rkey->cache.amo_proto); + + status_p = ucp_rma_send_request_cb(req, cb); + +out: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return status_p; +} + +ucs_status_t ucp_atomic_post(ucp_ep_h ep, ucp_atomic_post_op_t opcode, uint64_t value, + size_t op_size, uint64_t remote_addr, ucp_rkey_h rkey) +{ + ucs_status_ptr_t status_p; + ucs_status_t status; + ucp_request_t *req; + + UCP_AMO_CHECK_PARAM(ep->worker->context, remote_addr, op_size, opcode, + UCP_ATOMIC_POST_OP_LAST, return UCS_ERR_INVALID_PARAM); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("atomic_post opcode %d value %"PRIu64" size %zu " + "remote_addr %"PRIx64" rkey %p to %s", + opcode, value, op_size, remote_addr, rkey, + ucp_ep_peer_name(ep)); + + status = UCP_RKEY_RESOLVE(rkey, ep, amo); + if (status != UCS_OK) { + goto out; + } + + req = ucp_request_get(ep->worker); + if (ucs_unlikely(NULL == req)) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + ucp_amo_init_post(req, ep, ucp_uct_op_table[opcode], op_size, remote_addr, + rkey, value, rkey->cache.amo_proto); + + status_p = ucp_rma_send_request_cb(req, (ucp_send_callback_t)ucs_empty_function); + if (UCS_PTR_IS_PTR(status_p)) { + ucp_request_release(status_p); + status = UCS_OK; + } else { + status = UCS_PTR_STATUS(status_p); + } + +out: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return status; +} + +static inline ucs_status_t +ucp_atomic_fetch_b(ucp_ep_h ep, ucp_atomic_fetch_op_t opcode, uint64_t value, + void *result, size_t size, uint64_t remote_addr, + ucp_rkey_h rkey, const char *op_name) +{ + void *request; + + request = ucp_atomic_fetch_nb(ep, opcode, value, result, size, remote_addr, + rkey, (ucp_send_callback_t)ucs_empty_function); + return ucp_rma_wait(ep->worker, request, op_name); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_add32, (ep, add, remote_addr, rkey), + ucp_ep_h ep, uint32_t add, uint64_t remote_addr, ucp_rkey_h rkey) +{ + return ucp_atomic_post(ep, UCP_ATOMIC_POST_OP_ADD, add, sizeof(add), + remote_addr, rkey); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_add64, (ep, add, remote_addr, rkey), + ucp_ep_h ep, uint64_t add, uint64_t remote_addr, ucp_rkey_h rkey) +{ + return ucp_atomic_post(ep, UCP_ATOMIC_POST_OP_ADD, add, sizeof(add), + remote_addr, rkey); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_fadd32, (ep, add, remote_addr, rkey, result), + ucp_ep_h ep, uint32_t add, uint64_t remote_addr, ucp_rkey_h rkey, + uint32_t *result) +{ + return ucp_atomic_fetch_b(ep, UCP_ATOMIC_FETCH_OP_FADD, add, result, + sizeof(add), remote_addr, rkey, "atomic_fadd32"); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_fadd64, (ep, add, remote_addr, rkey, result), + ucp_ep_h ep, uint64_t add, uint64_t remote_addr, ucp_rkey_h rkey, + uint64_t *result) +{ + return ucp_atomic_fetch_b(ep, UCP_ATOMIC_FETCH_OP_FADD, add, result, + sizeof(add), remote_addr, rkey, "atomic_fadd64"); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_swap32, (ep, swap, remote_addr, rkey, result), + ucp_ep_h ep, uint32_t swap, uint64_t remote_addr, ucp_rkey_h rkey, + uint32_t *result) +{ + return ucp_atomic_fetch_b(ep, UCP_ATOMIC_FETCH_OP_SWAP, swap, result, + sizeof(swap), remote_addr, rkey, "atomic_swap32"); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_swap64, (ep, swap, remote_addr, rkey, result), + ucp_ep_h ep, uint64_t swap, uint64_t remote_addr, ucp_rkey_h rkey, + uint64_t *result) +{ + return ucp_atomic_fetch_b(ep, UCP_ATOMIC_FETCH_OP_SWAP, swap, result, + sizeof(swap), remote_addr, rkey, "atomic_swap64"); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_atomic_cswap_b(ucp_ep_h ep, uint64_t compare, uint64_t swap, size_t size, + uint64_t remote_addr, ucp_rkey_h rkey, void *result, + const char *op_name) +{ + char tmp[sizeof(swap)]; /* sufficient storage for maximal operand size */ + ucs_status_t status; + + memcpy(tmp, &swap, size); + status = ucp_atomic_fetch_b(ep, UCP_ATOMIC_FETCH_OP_CSWAP, compare, &tmp, + size, remote_addr, rkey, op_name); + if (status == UCS_OK) { + memcpy(result, tmp, size); + } + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_cswap32, + (ep, compare, swap, remote_addr, rkey, result), + ucp_ep_h ep, uint32_t compare, uint32_t swap, + uint64_t remote_addr, ucp_rkey_h rkey, uint32_t *result) +{ + return ucp_atomic_cswap_b(ep, compare, swap, sizeof(swap), remote_addr, + rkey, result, "atomic_cswap32"); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_cswap64, + (ep, compare, swap, remote_addr, rkey, result), + ucp_ep_h ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, ucp_rkey_h rkey, uint64_t *result) +{ + return ucp_atomic_cswap_b(ep, compare, swap, sizeof(swap), remote_addr, + rkey, result, "atomic_cswap64"); +} diff --git a/src/ucp/rma/amo_sw.c b/src/ucp/rma/amo_sw.c new file mode 100644 index 0000000..0d7ce00 --- /dev/null +++ b/src/ucp/rma/amo_sw.c @@ -0,0 +1,313 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rma.h" +#include "rma.inl" + +#include +#include + + +static size_t ucp_amo_sw_pack(void *dest, void *arg, uint8_t fetch) +{ + ucp_request_t *req = arg; + ucp_atomic_req_hdr_t *atomich = dest; + ucp_ep_t *ep = req->send.ep; + size_t size = req->send.length; + size_t length; + + atomich->address = req->send.rma.remote_addr; + atomich->req.ep_ptr = ucp_ep_dest_ep_ptr(ep); + atomich->req.reqptr = fetch ? (uintptr_t)req : 0; + atomich->length = size; + atomich->opcode = req->send.amo.uct_op; + + memcpy(atomich + 1, &req->send.amo.value, size); + length = sizeof(*atomich) + size; + + if (req->send.amo.uct_op == UCT_ATOMIC_OP_CSWAP) { + /* compare-swap has two arguments */ + memcpy(UCS_PTR_BYTE_OFFSET(atomich + 1, size), req->send.buffer, size); + length += size; + } + + return length; +} + +static size_t ucp_amo_sw_post_pack_cb(void *dest, void *arg) +{ + return ucp_amo_sw_pack(dest, arg, 0); +} + +static size_t ucp_amo_sw_fetch_pack_cb(void *dest, void *arg) +{ + return ucp_amo_sw_pack(dest, arg, 1); +} + +static ucs_status_t ucp_amo_sw_progress(uct_pending_req_t *self, + uct_pack_callback_t pack_cb, int fetch) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucs_status_t status; + ssize_t packed_len; + + req->send.lane = ucp_ep_get_am_lane(ep); + packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], + UCP_AM_ID_ATOMIC_REQ, pack_cb, req, 0); + if (packed_len > 0) { + ucp_ep_rma_remote_request_sent(ep); + if (!fetch) { + ucp_request_complete_send(req, UCS_OK); + } + return UCS_OK; + } else { + status = (ucs_status_t)packed_len; + if (status != UCS_ERR_NO_RESOURCE) { + /* failure */ + ucp_request_complete_send(req, status); + } + return status; + } +} + +static ucs_status_t ucp_amo_sw_progress_post(uct_pending_req_t *self) +{ + return ucp_amo_sw_progress(self, ucp_amo_sw_post_pack_cb, 0); +} + +static ucs_status_t ucp_amo_sw_progress_fetch(uct_pending_req_t *self) +{ + return ucp_amo_sw_progress(self, ucp_amo_sw_fetch_pack_cb, 1); +} + +ucp_amo_proto_t ucp_amo_sw_proto = { + .name = "sw_amo", + .progress_fetch = ucp_amo_sw_progress_fetch, + .progress_post = ucp_amo_sw_progress_post +}; + +static size_t ucp_amo_sw_pack_atomic_reply(void *dest, void *arg) +{ + ucp_rma_rep_hdr_t *hdr = dest; + ucp_request_t *req = arg; + + hdr->req = req->send.get_reply.req; + + switch (req->send.length) { + case sizeof(uint32_t): + *(uint32_t*)(hdr + 1) = req->send.atomic_reply.data.reply32; + break; + case sizeof(uint64_t): + *(uint64_t*)(hdr + 1) = req->send.atomic_reply.data.reply64; + break; + default: + ucs_fatal("invalid atomic length: %zu", req->send.length); + } + + return sizeof(*hdr) + req->send.length; +} + +static ucs_status_t ucp_progress_atomic_reply(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ssize_t packed_len; + + req->send.lane = ucp_ep_get_am_lane(ep); + packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], UCP_AM_ID_ATOMIC_REP, + ucp_amo_sw_pack_atomic_reply, req, 0); + + if (packed_len < 0) { + return (ucs_status_t)packed_len; + } + + ucs_assert(packed_len == sizeof(ucp_rma_rep_hdr_t) + req->send.length); + ucp_request_put(req); + return UCS_OK; +} + +#define DEFINE_AMO_SW_OP(_bits) \ + static void ucp_amo_sw_do_op##_bits(const ucp_atomic_req_hdr_t *atomicreqh) \ + { \ + uint##_bits##_t *ptr = (void*)atomicreqh->address; \ + uint##_bits##_t *args = (void*)(atomicreqh + 1); \ + \ + switch (atomicreqh->opcode) { \ + case UCT_ATOMIC_OP_ADD: \ + ucs_atomic_add##_bits(ptr, args[0]); \ + break; \ + case UCT_ATOMIC_OP_AND: \ + ucs_atomic_and##_bits(ptr, args[0]); \ + break; \ + case UCT_ATOMIC_OP_OR: \ + ucs_atomic_or##_bits(ptr, args[0]); \ + break; \ + case UCT_ATOMIC_OP_XOR: \ + ucs_atomic_xor##_bits(ptr, args[0]); \ + break; \ + default: \ + ucs_fatal("invalid opcode: %d", atomicreqh->opcode); \ + } \ + } + +#define DEFINE_AMO_SW_FOP(_bits) \ + static void ucp_amo_sw_do_fop##_bits(const ucp_atomic_req_hdr_t *atomicreqh, \ + ucp_atomic_reply_t *result) \ + { \ + uint##_bits##_t *ptr = (void*)atomicreqh->address; \ + uint##_bits##_t *args = (void*)(atomicreqh + 1); \ + \ + switch (atomicreqh->opcode) { \ + case UCT_ATOMIC_OP_ADD: \ + result->reply##_bits = ucs_atomic_fadd##_bits(ptr, args[0]); \ + break; \ + case UCT_ATOMIC_OP_AND: \ + result->reply##_bits = ucs_atomic_fand##_bits(ptr, args[0]); \ + break; \ + case UCT_ATOMIC_OP_OR: \ + result->reply##_bits = ucs_atomic_for##_bits(ptr, args[0]); \ + break; \ + case UCT_ATOMIC_OP_XOR: \ + result->reply##_bits = ucs_atomic_fxor##_bits(ptr, args[0]); \ + break; \ + case UCT_ATOMIC_OP_SWAP: \ + result->reply##_bits = ucs_atomic_swap##_bits(ptr, args[0]); \ + break; \ + case UCT_ATOMIC_OP_CSWAP: \ + result->reply##_bits = ucs_atomic_cswap##_bits(ptr, args[0], args[1]); \ + break; \ + default: \ + ucs_fatal("invalid opcode: %d", atomicreqh->opcode); \ + } \ + } + +DEFINE_AMO_SW_OP(32) +DEFINE_AMO_SW_OP(64) +DEFINE_AMO_SW_FOP(32) +DEFINE_AMO_SW_FOP(64) + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_req_handler, (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + ucp_atomic_req_hdr_t *atomicreqh = data; + ucp_worker_h worker = arg; + ucp_ep_h ep = ucp_worker_get_ep_by_ptr(worker, + atomicreqh->req.ep_ptr); + ucp_rsc_index_t amo_rsc_idx = ucs_ffs64_safe(worker->atomic_tls); + ucp_request_t *req; + + if (ucs_unlikely((amo_rsc_idx != UCP_MAX_RESOURCES) && + (ucp_worker_iface_get_attr(worker, + amo_rsc_idx)->cap.flags & + UCT_IFACE_FLAG_ATOMIC_DEVICE))) { + ucs_error("Unsupported: got software atomic request while device atomics are selected on worker %p", + worker); + /* TODO: this situation will be possible then CM wireup is implemented + * and CM lane is bound to suboptimal device, then need to execute + * AMO on fastest resource from worker->atomic_tls using loopback + * EP and continue SW AMO protocol */ + } + + if (atomicreqh->req.reqptr == 0) { + /* atomic operation without result */ + switch (atomicreqh->length) { + case sizeof(uint32_t): + ucp_amo_sw_do_op32(atomicreqh); + break; + case sizeof(uint64_t): + ucp_amo_sw_do_op64(atomicreqh); + break; + default: + ucs_fatal("invalid atomic length: %u", atomicreqh->length); + } + ucp_rma_sw_send_cmpl(ep); + } else { + /* atomic operation with result */ + req = ucp_request_get(worker); + if (req == NULL) { + ucs_error("failed to allocate atomic reply"); + return UCS_OK; + } + + switch (atomicreqh->length) { + case sizeof(uint32_t): + ucp_amo_sw_do_fop32(atomicreqh, &req->send.atomic_reply.data); + break; + case sizeof(uint64_t): + ucp_amo_sw_do_fop64(atomicreqh, &req->send.atomic_reply.data); + break; + default: + ucs_fatal("invalid atomic length: %u", atomicreqh->length); + } + + req->send.ep = ep; + req->send.atomic_reply.req = atomicreqh->req.reqptr; + req->send.length = atomicreqh->length; + req->send.uct.func = ucp_progress_atomic_reply; + ucp_request_send(req, 0); + } + + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_atomic_rep_handler, (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + ucp_rma_rep_hdr_t *hdr = data; + size_t frag_length = length - sizeof(*hdr); + ucp_request_t *req = (ucp_request_t*)hdr->req; + ucp_ep_h ep = req->send.ep; + + memcpy(req->send.buffer, hdr + 1, frag_length); + ucp_request_complete_send(req, UCS_OK); + ucp_ep_rma_remote_request_completed(ep); + return UCS_OK; +} + +static void ucp_amo_sw_dump_packet(ucp_worker_h worker, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max) +{ + const ucp_atomic_req_hdr_t *atomich; + const ucp_rma_rep_hdr_t *reph; + size_t header_len; + char *p; + + switch (id) { + case UCP_AM_ID_ATOMIC_REQ: + atomich = data; + snprintf(buffer, max, + "ATOMIC_REQ [addr 0x%lx len %u reqptr 0x%lx ep 0x%lx op %d]", + atomich->address, atomich->length, atomich->req.reqptr, + atomich->req.ep_ptr, atomich->opcode); + header_len = sizeof(*atomich);; + break; + case UCP_AM_ID_ATOMIC_REP: + reph = data; + snprintf(buffer, max, "ATOMIC_REP [reqptr 0x%lx]", reph->req); + header_len = sizeof(*reph); + break; + default: + return; + } + + p = buffer + strlen(buffer); + ucp_dump_payload(worker->context, p, buffer + max - p, + UCS_PTR_BYTE_OFFSET(data, header_len), + length - header_len); +} + +UCP_DEFINE_AM(UCP_FEATURE_AMO, UCP_AM_ID_ATOMIC_REQ, ucp_atomic_req_handler, + ucp_amo_sw_dump_packet, 0); +UCP_DEFINE_AM(UCP_FEATURE_AMO, UCP_AM_ID_ATOMIC_REP, ucp_atomic_rep_handler, + ucp_amo_sw_dump_packet, 0); + +UCP_DEFINE_AM_PROXY(UCP_AM_ID_ATOMIC_REQ); diff --git a/src/ucp/rma/flush.c b/src/ucp/rma/flush.c new file mode 100644 index 0000000..fde843b --- /dev/null +++ b/src/ucp/rma/flush.c @@ -0,0 +1,525 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include + +#include "rma.inl" + + +static void ucp_ep_flush_error(ucp_request_t *req, ucs_status_t status) +{ + if (ucp_ep_config(req->send.ep)->key.err_mode != UCP_ERR_HANDLING_MODE_PEER) { + ucs_error("error during flush: %s", ucs_status_string(status)); + } + + req->status = status; + --req->send.state.uct_comp.count; +} + +static int ucp_ep_flush_is_completed(ucp_request_t *req) +{ + return (req->send.state.uct_comp.count == 0) && req->send.flush.sw_done; +} + +static void ucp_ep_flush_progress(ucp_request_t *req) +{ + ucp_ep_h ep = req->send.ep; + ucp_ep_flush_state_t *flush_state; + ucp_lane_index_t lane; + ucs_status_t status; + uct_ep_h uct_ep; + + ucs_trace("ep %p: progress flush req %p, lanes 0x%x count %d", ep, req, + req->send.flush.lanes, req->send.state.uct_comp.count); + + while (req->send.flush.lanes) { + + /* Search for next lane to start flush */ + lane = ucs_ffs64(req->send.flush.lanes); + uct_ep = ep->uct_eps[lane]; + if (uct_ep == NULL) { + req->send.flush.lanes &= ~UCS_BIT(lane); + --req->send.state.uct_comp.count; + continue; + } + + /* Start flush operation on UCT endpoint */ + if (req->send.flush.uct_flags & UCT_FLUSH_FLAG_CANCEL) { + uct_ep_pending_purge(uct_ep, ucp_ep_err_pending_purge, + UCS_STATUS_PTR(UCS_ERR_CANCELED)); + } + status = uct_ep_flush(uct_ep, req->send.flush.uct_flags, + &req->send.state.uct_comp); + ucs_trace("flushing ep %p lane[%d]: %s", ep, lane, + ucs_status_string(status)); + if (status == UCS_OK) { + req->send.flush.lanes &= ~UCS_BIT(lane); + --req->send.state.uct_comp.count; + ucs_trace("ep %p: flush comp %p count reduced to %d", ep, + &req->send.state.uct_comp, req->send.state.uct_comp.count); + } else if (status == UCS_INPROGRESS) { + req->send.flush.lanes &= ~UCS_BIT(lane); + } else if (status == UCS_ERR_NO_RESOURCE) { + if (req->send.lane != UCP_NULL_LANE) { + ucs_trace("ep %p: not adding pending flush %p on lane %d, " + "because it's already pending on lane %d", + ep, req, lane, req->send.lane); + break; + } + + status = uct_ep_pending_add(uct_ep, &req->send.uct, 0); + ucs_trace("adding pending flush on ep %p lane[%d]: %s", ep, lane, + ucs_status_string(status)); + if (status == UCS_OK) { + req->send.lane = lane; + req->send.flush.lanes &= ~UCS_BIT(lane); + } else if (status != UCS_ERR_BUSY) { + ucp_ep_flush_error(req, status); + break; + } + } else { + ucp_ep_flush_error(req, status); + break; + } + } + + if (!req->send.flush.sw_started && (req->send.state.uct_comp.count == 0)) { + /* Start waiting for remote completions only after all lanes are flushed + * on the transport level, so we are sure all pending requests were sent. + * We don't need to wait for remote completions in these cases: + * - The flush operation is in 'cancel' mode + * - The endpoint is either not used or did not resolve the peer endpoint, + * which means we didn't have any user operations which require remote + * completion. In this case, the flush state may not even be initialized. + */ + if ((req->send.flush.uct_flags & UCT_FLUSH_FLAG_CANCEL) || + !ucs_test_all_flags(ep->flags, UCP_EP_FLAG_USED|UCP_EP_FLAG_DEST_EP)) { + ucs_trace_req("flush request %p not waiting for remote completions", + req); + req->send.flush.sw_done = 1; + } else { + /* All pending requests were sent, so 'send_sn' value is up-to-date */ + flush_state = ucp_ep_flush_state(ep); + if (flush_state->send_sn == flush_state->cmpl_sn) { + req->send.flush.sw_done = 1; + ucs_trace_req("flush request %p remote completions done", req); + } else { + req->send.flush.cmpl_sn = flush_state->send_sn; + ucs_queue_push(&flush_state->reqs, &req->send.flush.queue); + ucs_trace_req("added flush request %p to ep remote completion queue" + " with sn %d", req, req->send.flush.cmpl_sn); + } + } + req->send.flush.sw_started = 1; + } +} + +static void ucp_ep_flush_slow_path_remove(ucp_request_t *req) +{ + ucp_ep_h ep = req->send.ep; + uct_worker_progress_unregister_safe(ep->worker->uct, + &req->send.flush.prog_id); +} + +static int ucp_flush_check_completion(ucp_request_t *req) +{ + /* Check if flushed all lanes */ + if (!ucp_ep_flush_is_completed(req)) { + return 0; + } + + ucs_trace_req("flush req %p completed", req); + ucp_ep_flush_slow_path_remove(req); + req->send.flush.flushed_cb(req); + return 1; +} + +static unsigned ucp_ep_flush_resume_slow_path_callback(void *arg) +{ + ucp_request_t *req = arg; + + ucp_ep_flush_slow_path_remove(req); + ucp_ep_flush_progress(req); + ucp_flush_check_completion(req); + return 0; +} + +static ucs_status_t ucp_ep_flush_progress_pending(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_lane_index_t lane = req->send.lane; + ucp_ep_h ep = req->send.ep; + ucs_status_t status; + int completed; + + ucs_assert(!(req->flags & UCP_REQUEST_FLAG_COMPLETED)); + + status = uct_ep_flush(ep->uct_eps[lane], req->send.flush.uct_flags, + &req->send.state.uct_comp); + ucs_trace("flushing ep %p lane[%d]: %s", ep, lane, + ucs_status_string(status)); + if (status == UCS_OK) { + --req->send.state.uct_comp.count; /* UCT endpoint is flushed */ + } + + /* since req->flush.pend.lane is still non-NULL, this function will not + * put anything on pending. + */ + ucp_ep_flush_progress(req); + completed = ucp_flush_check_completion(req); + + /* If the operation has not completed, add slow-path progress to resume */ + if (!completed && req->send.flush.lanes) { + ucs_trace("ep %p: adding slow-path callback to resume flush", ep); + uct_worker_progress_register_safe(ep->worker->uct, + ucp_ep_flush_resume_slow_path_callback, + req, 0, &req->send.flush.prog_id); + } + + if ((status == UCS_OK) || (status == UCS_INPROGRESS)) { + /* flushed callback might release the request */ + if (!completed) { + req->send.lane = UCP_NULL_LANE; + } + return UCS_OK; + } else if (status == UCS_ERR_NO_RESOURCE) { + return UCS_ERR_NO_RESOURCE; + } else { + ucp_ep_flush_error(req, status); + return UCS_OK; + } +} + +static void ucp_ep_flush_completion(uct_completion_t *self, ucs_status_t status) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + + ucs_trace_req("flush completion req=%p status=%d", req, status); + + ucs_assert(!(req->flags & UCP_REQUEST_FLAG_COMPLETED)); + + req->status = status; + + if (status == UCS_OK) { + ucp_ep_flush_progress(req); + } else { + /* force flush completion in case of error */ + req->send.flush.sw_done = 1; + req->send.state.uct_comp.count = 0; + } + + + ucs_trace_req("flush completion req=%p comp_count=%d", req, req->send.state.uct_comp.count); + ucp_flush_check_completion(req); +} + +void ucp_ep_flush_remote_completed(ucp_request_t *req) +{ + ucs_trace_req("flush remote ops completed req=%p", req); + + if (!req->send.flush.sw_done) { + req->send.flush.sw_done = 1; + ucp_flush_check_completion(req); + } +} + +ucs_status_ptr_t ucp_ep_flush_internal(ucp_ep_h ep, unsigned uct_flags, + ucp_send_callback_t req_cb, + unsigned req_flags, + ucp_request_t *worker_req, + ucp_request_callback_t flushed_cb, + const char *debug_name) +{ + ucs_status_t status; + ucp_request_t *req; + + ucs_debug("%s ep %p", debug_name, ep); + + if (ep->flags & UCP_EP_FLAG_FAILED) { + return NULL; + } + + req = ucp_request_get(ep->worker); + if (req == NULL) { + return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + } + + /* + * Flush operation can be queued on the pending queue of only one of the + * lanes (indicated by req->send.lane) and scheduled for completion on any + * number of lanes. req->send.uct_comp.count keeps track of how many lanes + * are not flushed yet, and when it reaches zero, it means all lanes are + * flushed. req->send.flush.lanes keeps track of which lanes we still have + * to start flush on. + */ + req->flags = req_flags; + req->status = UCS_OK; + req->send.ep = ep; + req->send.cb = req_cb; + req->send.flush.flushed_cb = flushed_cb; + req->send.flush.lanes = UCS_MASK(ucp_ep_num_lanes(ep)); + req->send.flush.prog_id = UCS_CALLBACKQ_ID_NULL; + req->send.flush.uct_flags = uct_flags; + req->send.flush.worker_req = worker_req; + req->send.flush.sw_started = 0; + req->send.flush.sw_done = 0; + + req->send.lane = UCP_NULL_LANE; + req->send.uct.func = ucp_ep_flush_progress_pending; + req->send.state.uct_comp.func = ucp_ep_flush_completion; + req->send.state.uct_comp.count = ucp_ep_num_lanes(ep); + + ucp_ep_flush_progress(req); + + if (ucp_ep_flush_is_completed(req)) { + status = req->status; + ucs_trace_req("ep %p: releasing flush request %p, returning status %s", + ep, req, ucs_status_string(status)); + ucp_request_put(req); + return UCS_STATUS_PTR(status); + } + + ucs_trace_req("ep %p: return inprogress flush request %p (%p)", ep, req, + req + 1); + return req + 1; +} + +static void ucp_ep_flushed_callback(ucp_request_t *req) +{ + ucp_request_complete_send(req, req->status); +} + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_ep_flush_nb, (ep, flags, cb), + ucp_ep_h ep, unsigned flags, ucp_send_callback_t cb) +{ + void *request; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + request = ucp_ep_flush_internal(ep, UCT_FLUSH_FLAG_LOCAL, cb, + UCP_REQUEST_FLAG_CALLBACK, NULL, + ucp_ep_flushed_callback, "flush_nb"); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + + return request; +} + +static ucs_status_t ucp_worker_flush_check(ucp_worker_h worker) +{ + ucp_rsc_index_t iface_id; + ucp_worker_iface_t *wiface; + ucs_status_t status; + + if (worker->flush_ops_count) { + return UCS_INPROGRESS; + } + + for (iface_id = 0; iface_id < worker->num_ifaces; ++iface_id) { + wiface = worker->ifaces[iface_id]; + if (wiface->iface == NULL) { + continue; + } + + status = uct_iface_flush(wiface->iface, 0, NULL); + if (status != UCS_OK) { + if (UCS_STATUS_IS_ERR(status)) { + ucs_error("iface[%d] "UCT_TL_RESOURCE_DESC_FMT" flush failed: %s", + iface_id, + UCT_TL_RESOURCE_DESC_ARG(&worker->context->tl_rscs[wiface->rsc_index].tl_rsc), + ucs_status_string(status)); + } + return status; + } + } + + return UCS_OK; +} + +static void ucp_worker_flush_complete_one(ucp_request_t *req, ucs_status_t status, + int force_progress_unreg) +{ + ucp_worker_h worker = req->flush_worker.worker; + int complete; + + --req->flush_worker.comp_count; + complete = (req->flush_worker.comp_count == 0) || (status != UCS_OK); + + if (complete || force_progress_unreg) { + uct_worker_progress_unregister_safe(worker->uct, + &req->flush_worker.prog_id); + } + + if (complete) { + ucs_assert(status != UCS_INPROGRESS); + ucp_request_complete(req, flush_worker.cb, status); + } +} + +static void ucp_worker_flush_ep_flushed_cb(ucp_request_t *req) +{ + ucp_worker_flush_complete_one(req->send.flush.worker_req, UCS_OK, 0); + ucp_request_put(req); +} + +static unsigned ucp_worker_flush_progress(void *arg) +{ + ucp_request_t *req = arg; + ucp_worker_h worker = req->flush_worker.worker; + ucp_ep_ext_gen_t *next_ep = req->flush_worker.next_ep; + void *ep_flush_request; + ucs_status_t status; + ucp_ep_h ep; + + status = ucp_worker_flush_check(worker); + if ((status == UCS_OK) || (&next_ep->ep_list == &worker->all_eps)) { + /* If all ifaces are flushed, or we finished going over all endpoints, + * no need to progress this request actively any more. Just wait until + * all associated endpoint flush requests are completed. + */ + ucp_worker_flush_complete_one(req, UCS_OK, 1); + } else if (status != UCS_INPROGRESS) { + /* Error returned from uct iface flush */ + ucp_worker_flush_complete_one(req, status, 1); + } else if (worker->context->config.ext.flush_worker_eps) { + /* Some endpoints are not flushed yet. Take next endpoint from the list + * and start flush operation on it. + */ + ep = ucp_ep_from_ext_gen(next_ep); + req->flush_worker.next_ep = ucs_list_next(&next_ep->ep_list, + ucp_ep_ext_gen_t, ep_list); + + ep_flush_request = ucp_ep_flush_internal(ep, UCT_FLUSH_FLAG_LOCAL, NULL, + UCP_REQUEST_FLAG_RELEASED, req, + ucp_worker_flush_ep_flushed_cb, + "flush_worker"); + if (UCS_PTR_IS_ERR(ep_flush_request)) { + /* endpoint flush resulted in an error */ + status = UCS_PTR_STATUS(ep_flush_request); + ucs_warn("ucp_ep_flush_internal() failed: %s", ucs_status_string(status)); + } else if (ep_flush_request != NULL) { + /* endpoint flush started, increment refcount */ + ++req->flush_worker.comp_count; + } + } + + return 0; +} + +static ucs_status_ptr_t ucp_worker_flush_nb_internal(ucp_worker_h worker, + ucp_send_callback_t cb, + unsigned req_flags) +{ + ucs_status_t status; + ucp_request_t *req; + + status = ucp_worker_flush_check(worker); + if ((status != UCS_INPROGRESS) && (status != UCS_ERR_NO_RESOURCE)) { + return UCS_STATUS_PTR(status); + } + + req = ucp_request_get(worker); + if (req == NULL) { + return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + } + + req->flags = req_flags; + req->status = UCS_OK; + req->flush_worker.worker = worker; + req->flush_worker.cb = cb; + req->flush_worker.comp_count = 1; /* counting starts from 1, and decremented + when finished going over all endpoints */ + req->flush_worker.prog_id = UCS_CALLBACKQ_ID_NULL; + req->flush_worker.next_ep = ucs_list_head(&worker->all_eps, + ucp_ep_ext_gen_t, ep_list); + + uct_worker_progress_register_safe(worker->uct, ucp_worker_flush_progress, + req, 0, &req->flush_worker.prog_id); + return req + 1; +} + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_worker_flush_nb, (worker, flags, cb), + ucp_worker_h worker, unsigned flags, ucp_send_callback_t cb) +{ + void *request; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + request = ucp_worker_flush_nb_internal(worker, cb, + UCP_REQUEST_FLAG_CALLBACK); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + + return request; +} + +static ucs_status_t ucp_flush_wait(ucp_worker_h worker, void *request) +{ + return ucp_rma_wait(worker, request, "flush"); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_worker_flush, (worker), ucp_worker_h worker) +{ + ucs_status_t status; + void *request; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + request = ucp_worker_flush_nb_internal(worker, NULL, 0); + status = ucp_flush_wait(worker, request); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_ep_flush, (ep), ucp_ep_h ep) +{ + ucs_status_t status; + void *request; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + request = ucp_ep_flush_internal(ep, UCT_FLUSH_FLAG_LOCAL, NULL, 0, NULL, + ucp_ep_flushed_callback, "flush"); + status = ucp_flush_wait(ep->worker, request); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_worker_fence, (worker), ucp_worker_h worker) +{ + ucp_rsc_index_t rsc_index; + ucp_worker_iface_t *wiface; + ucs_status_t status; + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + ucs_for_each_bit(rsc_index, worker->context->tl_bitmap) { + wiface = ucp_worker_iface(worker, rsc_index); + if (wiface->iface == NULL) { + continue; + } + + status = uct_iface_fence(wiface->iface, 0); + if (status != UCS_OK) { + goto out; + } + } + status = UCS_OK; + +out: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + return status; +} diff --git a/src/ucp/rma/rma.h b/src/ucp/rma/rma.h new file mode 100644 index 0000000..073f882 --- /dev/null +++ b/src/ucp/rma/rma.h @@ -0,0 +1,88 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_RMA_H_ +#define UCP_RMA_H_ + +#include +#include +#include + + +/** + * Defines functions for RMA protocol + */ +struct ucp_rma_proto { + const char *name; + uct_pending_callback_t progress_put; + uct_pending_callback_t progress_get; +}; + + +/** + * Defines functions for AMO protocol + */ +struct ucp_amo_proto { + const char *name; + uct_pending_callback_t progress_fetch; + uct_pending_callback_t progress_post; +}; + + +/** + * Atomic reply data + */ +typedef union { + uint32_t reply32; /* 32-bit reply */ + uint64_t reply64; /* 64-bit reply */ +} ucp_atomic_reply_t; + + +typedef struct { + uint64_t address; + uintptr_t ep_ptr; +} UCS_S_PACKED ucp_put_hdr_t; + + +typedef struct { + uintptr_t ep_ptr; +} UCS_S_PACKED ucp_cmpl_hdr_t; + + +typedef struct { + uint64_t address; + uint64_t length; + ucp_request_hdr_t req; +} UCS_S_PACKED ucp_get_req_hdr_t; + + +typedef struct { + uintptr_t req; +} UCS_S_PACKED ucp_rma_rep_hdr_t; + + +typedef struct { + uint64_t address; + ucp_request_hdr_t req; /* NULL if no reply */ + uint8_t length; + uint8_t opcode; +} UCS_S_PACKED ucp_atomic_req_hdr_t; + + +extern ucp_rma_proto_t ucp_rma_basic_proto; +extern ucp_rma_proto_t ucp_rma_sw_proto; +extern ucp_amo_proto_t ucp_amo_basic_proto; +extern ucp_amo_proto_t ucp_amo_sw_proto; + + +ucs_status_t ucp_rma_request_advance(ucp_request_t *req, ssize_t frag_length, + ucs_status_t status); + +void ucp_ep_flush_remote_completed(ucp_request_t *req); + +void ucp_rma_sw_send_cmpl(ucp_ep_h ep); + +#endif diff --git a/src/ucp/rma/rma.inl b/src/ucp/rma/rma.inl new file mode 100644 index 0000000..c52c3bc --- /dev/null +++ b/src/ucp/rma/rma.inl @@ -0,0 +1,80 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_RMA_INL_ +#define UCP_RMA_INL_ + +#include "rma.h" + +#include +#include +#include + + +static UCS_F_ALWAYS_INLINE ucs_status_ptr_t +ucp_rma_send_request_cb(ucp_request_t *req, ucp_send_callback_t cb) +{ + ucs_status_t status = ucp_request_send(req, 0); + + if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { + ucs_trace_req("releasing send request %p, returning status %s", req, + ucs_status_string(status)); + ucp_request_put(req); + return UCS_STATUS_PTR(status); + } + + ucs_trace_req("returning request %p, status %s", req, + ucs_status_string(status)); + ucp_request_set_callback(req, send.cb, cb); + return req + 1; +} + +static inline ucs_status_t ucp_rma_wait(ucp_worker_h worker, void *user_req, + const char *op_name) +{ + ucs_status_t status; + ucp_request_t *req; + + if (ucs_likely(user_req == NULL)) { + return UCS_OK; + } else if (ucs_unlikely(UCS_PTR_IS_ERR(user_req))) { + ucs_warn("%s failed: %s", op_name, + ucs_status_string(UCS_PTR_STATUS(user_req))); + return UCS_PTR_STATUS(user_req); + } else { + req = (ucp_request_t*)user_req - 1; + do { + ucp_worker_progress(worker); + } while (!(req->flags & UCP_REQUEST_FLAG_COMPLETED)); + status = req->status; + ucp_request_release(user_req); + return status; + } +} + +static inline void ucp_ep_rma_remote_request_sent(ucp_ep_t *ep) +{ + ++ucp_ep_flush_state(ep)->send_sn; + ++ep->worker->flush_ops_count; +} + +static inline void ucp_ep_rma_remote_request_completed(ucp_ep_t *ep) +{ + ucp_ep_flush_state_t *flush_state = ucp_ep_flush_state(ep); + ucp_request_t *req; + + --ep->worker->flush_ops_count; + ++flush_state->cmpl_sn; + + ucs_queue_for_each_extract(req, &flush_state->reqs, send.flush.queue, + UCS_CIRCULAR_COMPARE32(req->send.flush.cmpl_sn, + <= , + flush_state->cmpl_sn)) { + ucp_ep_flush_remote_completed(req); + } +} + +#endif diff --git a/src/ucp/rma/rma_basic.c b/src/ucp/rma/rma_basic.c new file mode 100644 index 0000000..cc1ac44 --- /dev/null +++ b/src/ucp/rma/rma_basic.c @@ -0,0 +1,127 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) Los Alamos National Security, LLC. 2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rma.h" + +#include + + +static ucs_status_t ucp_rma_basic_progress_put(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucp_rkey_h rkey = req->send.rma.rkey; + ucp_lane_index_t lane = req->send.lane; + ucp_ep_rma_config_t *rma_config = &ucp_ep_config(ep)->rma[lane]; + ucs_status_t status; + ssize_t packed_len; + + ucs_assert(rkey->cache.ep_cfg_index == ep->cfg_index); + ucs_assert(rkey->cache.rma_lane == lane); + + if ((req->send.length <= rma_config->max_put_short) || + (req->send.length <= ucp_ep_config(ep)->bcopy_thresh)) + { + packed_len = ucs_min(req->send.length, rma_config->max_put_short); + status = UCS_PROFILE_CALL(uct_ep_put_short, + ep->uct_eps[lane], + req->send.buffer, + packed_len, + req->send.rma.remote_addr, + rkey->cache.rma_rkey); + } else if (ucs_likely(req->send.length < rma_config->put_zcopy_thresh)) { + ucp_memcpy_pack_context_t pack_ctx; + pack_ctx.src = req->send.buffer; + pack_ctx.length = ucs_min(req->send.length, rma_config->max_put_bcopy); + packed_len = UCS_PROFILE_CALL(uct_ep_put_bcopy, + ep->uct_eps[lane], + ucp_memcpy_pack, + &pack_ctx, + req->send.rma.remote_addr, + rkey->cache.rma_rkey); + status = (packed_len > 0) ? UCS_OK : (ucs_status_t)packed_len; + } else { + uct_iov_t iov; + + /* TODO: leave last fragment for bcopy */ + packed_len = ucs_min(req->send.length, rma_config->max_put_zcopy); + /* TODO: use ucp_dt_iov_copy_uct */ + iov.buffer = (void *)req->send.buffer; + iov.length = packed_len; + iov.count = 1; + iov.memh = req->send.state.dt.dt.contig.memh[0]; + + status = UCS_PROFILE_CALL(uct_ep_put_zcopy, + ep->uct_eps[lane], + &iov, 1, + req->send.rma.remote_addr, + rkey->cache.rma_rkey, + &req->send.state.uct_comp); + ucp_request_send_state_advance(req, NULL, UCP_REQUEST_SEND_PROTO_RMA, + status); + } + + return ucp_rma_request_advance(req, packed_len, status); +} + +static ucs_status_t ucp_rma_basic_progress_get(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucp_rkey_h rkey = req->send.rma.rkey; + ucp_lane_index_t lane = req->send.lane; + ucp_ep_rma_config_t *rma_config = &ucp_ep_config(ep)->rma[lane]; + ucs_status_t status; + size_t frag_length; + + ucs_assert(rkey->cache.ep_cfg_index == ep->cfg_index); + ucs_assert(rkey->cache.rma_lane == lane); + + if (ucs_likely(req->send.length < rma_config->get_zcopy_thresh)) { + frag_length = ucs_min(rma_config->max_get_bcopy, req->send.length); + status = UCS_PROFILE_CALL(uct_ep_get_bcopy, + ep->uct_eps[lane], + (uct_unpack_callback_t)memcpy, + (void*)req->send.buffer, + frag_length, + req->send.rma.remote_addr, + rkey->cache.rma_rkey, + &req->send.state.uct_comp); + } else { + uct_iov_t iov; + frag_length = ucs_min(req->send.length, rma_config->max_get_zcopy); + iov.buffer = (void *)req->send.buffer; + iov.length = frag_length; + iov.count = 1; + iov.memh = req->send.state.dt.dt.contig.memh[0]; + + status = UCS_PROFILE_CALL(uct_ep_get_zcopy, + ep->uct_eps[lane], + &iov, 1, + req->send.rma.remote_addr, + rkey->cache.rma_rkey, + &req->send.state.uct_comp); + } + + if (status == UCS_INPROGRESS) { + ucp_request_send_state_advance(req, 0, UCP_REQUEST_SEND_PROTO_RMA, + UCS_INPROGRESS); + } + + return ucp_rma_request_advance(req, frag_length, status); +} + +ucp_rma_proto_t ucp_rma_basic_proto = { + .name = "basic_rma", + .progress_put = ucp_rma_basic_progress_put, + .progress_get = ucp_rma_basic_progress_get +}; diff --git a/src/ucp/rma/rma_send.c b/src/ucp/rma/rma_send.c new file mode 100644 index 0000000..3caebd2 --- /dev/null +++ b/src/ucp/rma/rma_send.c @@ -0,0 +1,346 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rma.h" +#include "rma.inl" + +#include + +#include +#include +#include + + +#define UCP_RMA_CHECK_BUFFER(_buffer, _action) \ + do { \ + if (ENABLE_PARAMS_CHECK && ucs_unlikely((_buffer) == NULL)) { \ + _action; \ + } \ + } while (0) + + +#define UCP_RMA_CHECK_ZERO_LENGTH(_length, _action) \ + do { \ + if ((_length) == 0) { \ + _action; \ + } \ + } while (0) + + +#define UCP_RMA_CHECK(_context, _buffer, _length) \ + do { \ + UCP_CONTEXT_CHECK_FEATURE_FLAGS(_context, UCP_FEATURE_RMA, \ + return UCS_ERR_INVALID_PARAM); \ + UCP_RMA_CHECK_ZERO_LENGTH(_length, return UCS_OK); \ + UCP_RMA_CHECK_BUFFER(_buffer, return UCS_ERR_INVALID_PARAM); \ + } while (0) + + +#define UCP_RMA_CHECK_PTR(_context, _buffer, _length) \ + do { \ + UCP_CONTEXT_CHECK_FEATURE_FLAGS(_context, UCP_FEATURE_RMA, \ + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); \ + UCP_RMA_CHECK_ZERO_LENGTH(_length, return NULL); \ + UCP_RMA_CHECK_BUFFER(_buffer, \ + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); \ + } while (0) + + +/* request can be released if + * - all fragments were sent (length == 0) (bcopy & zcopy mix) + * - all zcopy fragments are done (uct_comp.count == 0) + * - and request was allocated from the mpool + * (checked in ucp_request_complete_send) + * + * Request can be released either immediately or in the completion callback. + * We must check req length in the completion callback to avoid the following + * scenario: + * partial_send;no_resos;progress; + * send_completed;cb called;req free(ooops); + * next_partial_send; (oops req already freed) + */ +ucs_status_t ucp_rma_request_advance(ucp_request_t *req, ssize_t frag_length, + ucs_status_t status) +{ + ucs_assert(status != UCS_ERR_NOT_IMPLEMENTED); + + if (ucs_unlikely(UCS_STATUS_IS_ERR(status))) { + if (status != UCS_ERR_NO_RESOURCE) { + ucp_request_send_buffer_dereg(req); + ucp_request_complete_send(req, status); + } + return status; + } + + ucs_assert(frag_length >= 0); + ucs_assert(req->send.length >= frag_length); + req->send.length -= frag_length; + if (req->send.length == 0) { + /* bcopy is the fast path */ + if (ucs_likely(req->send.state.uct_comp.count == 0)) { + ucp_request_send_buffer_dereg(req); + ucp_request_complete_send(req, UCS_OK); + } + return UCS_OK; + } + req->send.buffer = UCS_PTR_BYTE_OFFSET(req->send.buffer, frag_length); + req->send.rma.remote_addr += frag_length; + return UCS_INPROGRESS; +} + +static void ucp_rma_request_bcopy_completion(uct_completion_t *self, + ucs_status_t status) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + + if (ucs_likely(req->send.length == req->send.state.dt.offset)) { + ucp_request_complete_send(req, status); + } +} + +static void ucp_rma_request_zcopy_completion(uct_completion_t *self, + ucs_status_t status) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + + if (ucs_likely(req->send.length == req->send.state.dt.offset)) { + ucp_request_send_buffer_dereg(req); + ucp_request_complete_send(req, status); + } +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_rma_request_init(ucp_request_t *req, ucp_ep_h ep, const void *buffer, + size_t length, uint64_t remote_addr, ucp_rkey_h rkey, + uct_pending_callback_t cb, size_t zcopy_thresh, int flags) +{ + req->flags = flags; /* Implicit release */ + req->send.ep = ep; + req->send.buffer = (void*)buffer; + req->send.datatype = ucp_dt_make_contig(1); + req->send.mem_type = UCS_MEMORY_TYPE_HOST; + req->send.length = length; + req->send.rma.remote_addr = remote_addr; + req->send.rma.rkey = rkey; + req->send.uct.func = cb; + req->send.lane = rkey->cache.rma_lane; + ucp_request_send_state_init(req, ucp_dt_make_contig(1), length); + ucp_request_send_state_reset(req, + (length < zcopy_thresh) ? + ucp_rma_request_bcopy_completion : + ucp_rma_request_zcopy_completion, + UCP_REQUEST_SEND_PROTO_RMA); +#if UCS_ENABLE_ASSERT + req->send.cb = NULL; +#endif + if (length < zcopy_thresh) { + return UCS_OK; + } + + return ucp_request_send_buffer_reg_lane(req, req->send.lane, 0); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_rma_nonblocking(ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey, + uct_pending_callback_t progress_cb, size_t zcopy_thresh) +{ + ucs_status_t status; + ucp_request_t *req; + + req = ucp_request_get(ep->worker); + if (req == NULL) { + return UCS_ERR_NO_MEMORY; + } + + status = ucp_rma_request_init(req, ep, buffer, length, remote_addr, rkey, + progress_cb, zcopy_thresh, + UCP_REQUEST_FLAG_RELEASED); + if (ucs_unlikely(status != UCS_OK)) { + return status; + } + + return ucp_request_send(req, 0); +} + +static UCS_F_ALWAYS_INLINE ucs_status_ptr_t +ucp_rma_nonblocking_cb(ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey, + uct_pending_callback_t progress_cb, size_t zcopy_thresh, + ucp_send_callback_t cb) +{ + ucs_status_t status; + ucp_request_t *req; + + req = ucp_request_get(ep->worker); + if (req == NULL) { + return UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + } + + status = ucp_rma_request_init(req, ep, buffer, length, remote_addr, rkey, + progress_cb, zcopy_thresh, 0); + if (ucs_unlikely(status != UCS_OK)) { + return UCS_STATUS_PTR(status); + } + + return ucp_rma_send_request_cb(req, cb); +} + +ucs_status_t ucp_put_nbi(ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey) +{ + ucp_ep_rma_config_t *rma_config; + ucs_status_t status; + + UCP_RMA_CHECK(ep->worker->context, buffer, length); + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("put_nbi buffer %p length %zu remote_addr %"PRIx64" rkey %p to %s", + buffer, length, remote_addr, rkey, ucp_ep_peer_name(ep)); + + status = UCP_RKEY_RESOLVE(rkey, ep, rma); + if (status != UCS_OK) { + goto out_unlock; + } + + /* Fast path for a single short message */ + if (ucs_likely((ssize_t)length <= (int)rkey->cache.max_put_short)) { + status = UCS_PROFILE_CALL(uct_ep_put_short, ep->uct_eps[rkey->cache.rma_lane], + buffer, length, remote_addr, rkey->cache.rma_rkey); + if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { + goto out_unlock; + } + } + + rma_config = &ucp_ep_config(ep)->rma[rkey->cache.rma_lane]; + status = ucp_rma_nonblocking(ep, buffer, length, remote_addr, rkey, + rkey->cache.rma_proto->progress_put, + rma_config->put_zcopy_thresh); +out_unlock: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return status; +} + +ucs_status_ptr_t ucp_put_nb(ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb) +{ + ucp_ep_rma_config_t *rma_config; + ucs_status_ptr_t ptr_status; + ucs_status_t status; + + UCP_RMA_CHECK_PTR(ep->worker->context, buffer, length); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("put_nb buffer %p length %zu remote_addr %"PRIx64" rkey %p to %s cb %p", + buffer, length, remote_addr, rkey, ucp_ep_peer_name(ep), cb); + + status = UCP_RKEY_RESOLVE(rkey, ep, rma); + if (status != UCS_OK) { + ptr_status = UCS_STATUS_PTR(status); + goto out_unlock; + } + + /* Fast path for a single short message */ + if (ucs_likely((ssize_t)length <= (int)rkey->cache.max_put_short)) { + status = UCS_PROFILE_CALL(uct_ep_put_short, ep->uct_eps[rkey->cache.rma_lane], + buffer, length, remote_addr, rkey->cache.rma_rkey); + if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { + ptr_status = UCS_STATUS_PTR(status); + goto out_unlock; + } + } + + rma_config = &ucp_ep_config(ep)->rma[rkey->cache.rma_lane]; + ptr_status = ucp_rma_nonblocking_cb(ep, buffer, length, remote_addr, rkey, + rkey->cache.rma_proto->progress_put, + rma_config->put_zcopy_thresh, cb); +out_unlock: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return ptr_status; +} + +ucs_status_t ucp_get_nbi(ucp_ep_h ep, void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey) +{ + ucp_ep_rma_config_t *rma_config; + ucs_status_t status; + + UCP_RMA_CHECK(ep->worker->context, buffer, length); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("get_nbi buffer %p length %zu remote_addr %"PRIx64" rkey %p from %s", + buffer, length, remote_addr, rkey, ucp_ep_peer_name(ep)); + + status = UCP_RKEY_RESOLVE(rkey, ep, rma); + if (status != UCS_OK) { + goto out_unlock; + } + + rma_config = &ucp_ep_config(ep)->rma[rkey->cache.rma_lane]; + status = ucp_rma_nonblocking(ep, buffer, length, remote_addr, rkey, + rkey->cache.rma_proto->progress_get, + rma_config->get_zcopy_thresh); +out_unlock: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return status; +} + +ucs_status_ptr_t ucp_get_nb(ucp_ep_h ep, void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey, + ucp_send_callback_t cb) +{ + ucp_ep_rma_config_t *rma_config; + ucs_status_ptr_t ptr_status; + ucs_status_t status; + + UCP_RMA_CHECK_PTR(ep->worker->context, buffer, length); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("get_nb buffer %p length %zu remote_addr %"PRIx64" rkey %p from %s cb %p", + buffer, length, remote_addr, rkey, ucp_ep_peer_name(ep), cb); + + status = UCP_RKEY_RESOLVE(rkey, ep, rma); + if (status != UCS_OK) { + ptr_status = UCS_STATUS_PTR(status); + goto out_unlock; + } + + rma_config = &ucp_ep_config(ep)->rma[rkey->cache.rma_lane]; + ptr_status = ucp_rma_nonblocking_cb(ep, buffer, length, remote_addr, rkey, + rkey->cache.rma_proto->progress_get, + rma_config->get_zcopy_thresh, cb); +out_unlock: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return ptr_status; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_put, (ep, buffer, length, remote_addr, rkey), + ucp_ep_h ep, const void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey) +{ + return ucp_rma_wait(ep->worker, + ucp_put_nb(ep, buffer, length, remote_addr, rkey, + (ucp_send_callback_t)ucs_empty_function), + "put"); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_get, (ep, buffer, length, remote_addr, rkey), + ucp_ep_h ep, void *buffer, size_t length, + uint64_t remote_addr, ucp_rkey_h rkey) +{ + return ucp_rma_wait(ep->worker, + ucp_get_nb(ep, buffer, length, remote_addr, rkey, + (ucp_send_callback_t)ucs_empty_function), + "get"); +} diff --git a/src/ucp/rma/rma_sw.c b/src/ucp/rma/rma_sw.c new file mode 100644 index 0000000..eff48fb --- /dev/null +++ b/src/ucp/rma/rma_sw.c @@ -0,0 +1,305 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rma.h" +#include "rma.inl" + +#include +#include + + +static size_t ucp_rma_sw_put_pack_cb(void *dest, void *arg) +{ + ucp_request_t *req = arg; + ucp_ep_t *ep = req->send.ep; + ucp_put_hdr_t *puth = dest; + size_t length; + + puth->address = req->send.rma.remote_addr; + puth->ep_ptr = ucp_ep_dest_ep_ptr(ep); + + ucs_assert(puth->ep_ptr != 0); + + length = ucs_min(req->send.length, + ucp_ep_config(ep)->am.max_bcopy - sizeof(*puth)); + memcpy(puth + 1, req->send.buffer, length); + + return sizeof(*puth) + length; +} + +static ucs_status_t ucp_rma_sw_progress_put(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ssize_t packed_len; + ucs_status_t status; + + ucs_assert(req->send.lane == ucp_ep_get_am_lane(ep)); + + packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], UCP_AM_ID_PUT, + ucp_rma_sw_put_pack_cb, req, 0); + if (packed_len > 0) { + status = UCS_OK; + ucp_ep_rma_remote_request_sent(ep); + } else { + status = (ucs_status_t)packed_len; + } + + return ucp_rma_request_advance(req, packed_len - sizeof(ucp_put_hdr_t), + status); +} + +static size_t ucp_rma_sw_get_req_pack_cb(void *dest, void *arg) +{ + ucp_request_t *req = arg; + ucp_get_req_hdr_t *getreqh = dest; + + getreqh->address = req->send.rma.remote_addr; + getreqh->length = req->send.length; + getreqh->req.ep_ptr = ucp_ep_dest_ep_ptr(req->send.ep); + getreqh->req.reqptr = (uintptr_t)req; + ucs_assert(getreqh->req.ep_ptr != 0); + + return sizeof(*getreqh); +} + +static ucs_status_t ucp_rma_sw_progress_get(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucs_status_t status; + ssize_t packed_len; + + ucs_assert(req->send.lane == ucp_ep_get_am_lane(ep)); + + packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], UCP_AM_ID_GET_REQ, + ucp_rma_sw_get_req_pack_cb, req, 0); + if (packed_len < 0) { + status = (ucs_status_t)packed_len; + if (status != UCS_ERR_NO_RESOURCE) { + ucp_request_complete_send(req, status); + } + return status; + } + + /* get request packet sent, complete the request object when all data arrives */ + ucs_assert(packed_len == sizeof(ucp_get_req_hdr_t)); + ucp_ep_rma_remote_request_sent(ep); + return UCS_OK; +} + +ucp_rma_proto_t ucp_rma_sw_proto = { + .name = "sw_rma", + .progress_put = ucp_rma_sw_progress_put, + .progress_get = ucp_rma_sw_progress_get +}; + +static size_t ucp_rma_sw_pack_rma_ack(void *dest, void *arg) +{ + ucp_cmpl_hdr_t *hdr = dest; + ucp_request_t *req = arg; + + hdr->ep_ptr = ucp_ep_dest_ep_ptr(req->send.ep); + return sizeof(*hdr); +} + +static ucs_status_t ucp_progress_rma_cmpl(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ssize_t packed_len; + + req->send.lane = ucp_ep_get_am_lane(ep); + + packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], UCP_AM_ID_CMPL, + ucp_rma_sw_pack_rma_ack, req, 0); + if (packed_len < 0) { + return (ucs_status_t)packed_len; + } + + ucs_assert(packed_len == sizeof(ucp_cmpl_hdr_t)); + ucp_request_put(req); + return UCS_OK; +} + +void ucp_rma_sw_send_cmpl(ucp_ep_h ep) +{ + ucp_request_t *req; + + req = ucp_request_get(ep->worker); + if (req == NULL) { + ucs_error("failed to allocate put completion"); + return; + } + + req->send.ep = ep; + req->send.uct.func = ucp_progress_rma_cmpl; + ucp_request_send(req, 0); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_put_handler, (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + ucp_put_hdr_t *puth = data; + ucp_worker_h worker = arg; + + memcpy((void*)puth->address, puth + 1, length - sizeof(*puth)); + ucp_rma_sw_send_cmpl(ucp_worker_get_ep_by_ptr(worker, puth->ep_ptr)); + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_rma_cmpl_handler, (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + ucp_cmpl_hdr_t *putackh = data; + ucp_worker_h worker = arg; + ucp_ep_h ep = ucp_worker_get_ep_by_ptr(worker, putackh->ep_ptr); + + ucp_ep_rma_remote_request_completed(ep); + return UCS_OK; +} + +static size_t ucp_rma_sw_pack_get_reply(void *dest, void *arg) +{ + ucp_rma_rep_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + + length = ucs_min(req->send.length, + ucp_ep_config(req->send.ep)->am.max_bcopy - sizeof(*hdr)); + hdr->req = req->send.get_reply.req; + memcpy(hdr + 1, req->send.buffer, length); + + return sizeof(*hdr) + length; +} + +static ucs_status_t ucp_progress_get_reply(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ssize_t packed_len, payload_len; + + req->send.lane = ucp_ep_get_am_lane(ep); + packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], UCP_AM_ID_GET_REP, + ucp_rma_sw_pack_get_reply, req, 0); + if (packed_len < 0) { + return (ucs_status_t)packed_len; + } + + payload_len = packed_len - sizeof(ucp_rma_rep_hdr_t); + ucs_assert(payload_len >= 0); + + req->send.buffer = UCS_PTR_BYTE_OFFSET(req->send.buffer, payload_len); + req->send.length -= payload_len; + + if (req->send.length == 0) { + ucp_request_put(req); + return UCS_OK; + } else { + return UCS_INPROGRESS; + } +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_get_req_handler, (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + ucp_get_req_hdr_t *getreqh = data; + ucp_worker_h worker = arg; + ucp_ep_h ep = ucp_worker_get_ep_by_ptr(worker, + getreqh->req.ep_ptr); + ucp_request_t *req; + + req = ucp_request_get(worker); + if (req == NULL) { + ucs_error("failed to allocate get reply"); + return UCS_OK; + } + + req->send.ep = ep; + req->send.buffer = (void*)getreqh->address; + req->send.length = getreqh->length; + req->send.get_reply.req = getreqh->req.reqptr; + req->send.uct.func = ucp_progress_get_reply; + + ucp_request_send(req, 0); + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_get_rep_handler, (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + ucp_rma_rep_hdr_t *getreph = data; + size_t frag_length = length - sizeof(*getreph); + ucp_request_t *req = (ucp_request_t*)getreph->req; + ucp_ep_h ep = req->send.ep; + + memcpy(req->send.buffer, getreph + 1, frag_length); + + /* complete get request on last fragment of the reply */ + if (ucp_rma_request_advance(req, frag_length, UCS_OK) == UCS_OK) { + ucp_ep_rma_remote_request_completed(ep); + } + + return UCS_OK; +} + +static void ucp_rma_sw_dump_packet(ucp_worker_h worker, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max) +{ + const ucp_get_req_hdr_t *geth; + const ucp_rma_rep_hdr_t *reph; + const ucp_cmpl_hdr_t *cmplh; + const ucp_put_hdr_t *puth; + size_t header_len; + char *p; + + switch (id) { + case UCP_AM_ID_PUT: + puth = data; + snprintf(buffer, max, "PUT [addr 0x%lx ep_ptr 0x%lx]", puth->address, + puth->ep_ptr); + header_len = sizeof(*puth); + break; + case UCP_AM_ID_GET_REQ: + geth = data; + snprintf(buffer, max, "GET_REQ [addr 0x%lx len %zu reqptr 0x%lx ep 0x%lx]", + geth->address, geth->length, geth->req.reqptr, geth->req.ep_ptr); + return; + case UCP_AM_ID_GET_REP: + reph = data; + snprintf(buffer, max, "GET_REP [reqptr 0x%lx]", reph->req); + header_len = sizeof(*reph); + break; + case UCP_AM_ID_CMPL: + cmplh = data; + snprintf(buffer, max, "CMPL [ep_ptr 0x%lx]", cmplh->ep_ptr); + return; + default: + return; + } + + p = buffer + strlen(buffer); + ucp_dump_payload(worker->context, p, buffer + max - p, + UCS_PTR_BYTE_OFFSET(data, header_len), + length - header_len); +} + +UCP_DEFINE_AM(UCP_FEATURE_RMA, UCP_AM_ID_PUT, ucp_put_handler, + ucp_rma_sw_dump_packet, 0); +UCP_DEFINE_AM(UCP_FEATURE_RMA, UCP_AM_ID_GET_REQ, ucp_get_req_handler, + ucp_rma_sw_dump_packet, 0); +UCP_DEFINE_AM(UCP_FEATURE_RMA, UCP_AM_ID_GET_REP, ucp_get_rep_handler, + ucp_rma_sw_dump_packet, 0); +UCP_DEFINE_AM(UCP_FEATURE_RMA|UCP_FEATURE_AMO, UCP_AM_ID_CMPL, + ucp_rma_cmpl_handler, ucp_rma_sw_dump_packet, 0); + +UCP_DEFINE_AM_PROXY(UCP_AM_ID_PUT); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_GET_REQ); diff --git a/src/ucp/stream/stream.h b/src/ucp/stream/stream.h new file mode 100644 index 0000000..e196792 --- /dev/null +++ b/src/ucp/stream/stream.h @@ -0,0 +1,70 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_STREAM_H_ +#define UCP_STREAM_H_ + +#include +#include +#include + + +typedef struct { + uintptr_t ep_ptr; +} UCS_S_PACKED ucp_stream_am_hdr_t; + + +typedef struct { + union { + ucp_stream_am_hdr_t hdr; + ucp_recv_desc_t *rdesc; + }; +} ucp_stream_am_data_t; + + +void ucp_stream_ep_init(ucp_ep_h ep); + +void ucp_stream_ep_cleanup(ucp_ep_h ep); + +void ucp_stream_ep_activate(ucp_ep_h ep); + + +static UCS_F_ALWAYS_INLINE int ucp_stream_ep_is_queued(ucp_ep_ext_proto_t *ep_ext) +{ + return ep_ext->stream.ready_list.next != NULL; +} + +static UCS_F_ALWAYS_INLINE int ucp_stream_ep_has_data(ucp_ep_ext_proto_t *ep_ext) +{ + return ucp_ep_from_ext_proto(ep_ext)->flags & UCP_EP_FLAG_STREAM_HAS_DATA; +} + +static UCS_F_ALWAYS_INLINE +void ucp_stream_ep_enqueue(ucp_ep_ext_proto_t *ep_ext, ucp_worker_h worker) +{ + ucs_assert(!ucp_stream_ep_is_queued(ep_ext)); + ucs_list_add_tail(&worker->stream_ready_eps, &ep_ext->stream.ready_list); +} + +static UCS_F_ALWAYS_INLINE void ucp_stream_ep_dequeue(ucp_ep_ext_proto_t *ep_ext) +{ + ucs_list_del(&ep_ext->stream.ready_list); + ep_ext->stream.ready_list.next = NULL; +} + +static UCS_F_ALWAYS_INLINE ucp_ep_ext_proto_t* +ucp_stream_worker_dequeue_ep_head(ucp_worker_h worker) +{ + ucp_ep_ext_proto_t *ep_ext = ucs_list_head(&worker->stream_ready_eps, + ucp_ep_ext_proto_t, + stream.ready_list); + + ucs_assert(ep_ext->stream.ready_list.next != NULL); + ucp_stream_ep_dequeue(ep_ext); + return ep_ext; +} + +#endif /* UCP_STREAM_H_ */ diff --git a/src/ucp/stream/stream_recv.c b/src/ucp/stream/stream_recv.c new file mode 100644 index 0000000..2bec799 --- /dev/null +++ b/src/ucp/stream/stream_recv.c @@ -0,0 +1,510 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* @verbatim + * Data layout within Stream AM + * |---------------------------------------------------------------------------------------------------------------------------| + * | ucp_recv_desc_t | \ / | ucp_stream_am_data_t | payload | + * |-----------------------------------------------------------------| \ / |----------------------|-------------------------| + * | stream_queue | length | payload_offset | flags | \/ | am_header | | + * | tag_list (not used) | | | | /\ | rdesc | | + * |---------------------|----------------|----------------|---------| / \ |----------------------|-------------------------| + * | 4 * sizeof(ptr) | 32 bits | 32 bits | 16 bits | / \ | 64 bits | up to TL AM buffer size | + * |---------------------------------------------------------------------------------------------------------------------------| + * @endverbatim + * + * stream_queue is an entry link in the "unexpected" queue per endpoint + * length is an actual size of 'payload' + * payload_offset is a distance between 'ucp_recv_desc_t *' and 'payload *' + * X is an optional empty space which is a result of partial + * handled payload in case when 'length' greater than user's + * buffer size passed to @ref ucp_stream_recv_nb + * am_header is an active message header, not actual after ucp_recv_desc_t + * initialization and setup of offsets + * rdesc pointer to 'ucp_recv_desc_t *', it's needed to get access to + * 'ucp_recv_desc_t *' inside @ref ucp_stream_release_data after + * the buffer was returned to user by + * @ref ucp_stream_recv_data_nb as a pointer to 'paylod' + */ + + +#define ucp_stream_rdesc_payload(_rdesc) \ + (UCS_PTR_BYTE_OFFSET((_rdesc), (_rdesc)->payload_offset)) + + +#define ucp_stream_rdesc_am_data(_rdesc) \ + ((ucp_stream_am_data_t *) \ + UCS_PTR_BYTE_OFFSET(ucp_stream_rdesc_payload(_rdesc), \ + -sizeof(ucp_stream_am_data_t))) + + +#define ucp_stream_rdesc_from_data(_data) \ + ((ucp_stream_am_data_t *)_data - 1)->rdesc + + +static UCS_F_ALWAYS_INLINE ucp_recv_desc_t * +ucp_stream_rdesc_dequeue(ucp_ep_ext_proto_t *ep_ext) +{ + ucp_recv_desc_t *rdesc = ucs_queue_pull_elem_non_empty(&ep_ext->stream.match_q, + ucp_recv_desc_t, + stream_queue); + ucs_assert(ucp_stream_ep_has_data(ep_ext)); + if (ucs_unlikely(ucs_queue_is_empty(&ep_ext->stream.match_q))) { + ucp_ep_from_ext_proto(ep_ext)->flags &= ~UCP_EP_FLAG_STREAM_HAS_DATA; + if (ucp_stream_ep_is_queued(ep_ext)) { + ucp_stream_ep_dequeue(ep_ext); + } + } + + return rdesc; +} + +static UCS_F_ALWAYS_INLINE ucp_recv_desc_t * +ucp_stream_rdesc_get(ucp_ep_ext_proto_t *ep_ext) +{ + ucp_recv_desc_t *rdesc = ucs_queue_head_elem_non_empty(&ep_ext->stream.match_q, + ucp_recv_desc_t, + stream_queue); + + ucs_assert(ucp_stream_ep_has_data(ep_ext)); + ucs_trace_data("ep %p, rdesc %p with %u stream bytes", + ucp_ep_from_ext_proto(ep_ext), rdesc, rdesc->length); + + return rdesc; +} + +static UCS_F_ALWAYS_INLINE ucs_status_ptr_t +ucp_stream_recv_data_nb_nolock(ucp_ep_h ep, size_t *length) +{ + ucp_ep_ext_proto_t *ep_ext = ucp_ep_ext_proto(ep); + ucp_recv_desc_t *rdesc; + ucp_stream_am_data_t *am_data; + + if (ucs_unlikely(!ucp_stream_ep_has_data(ep_ext))) { + return UCS_STATUS_PTR(UCS_OK); + } + + rdesc = ucp_stream_rdesc_dequeue(ep_ext); + + *length = rdesc->length; + am_data = ucp_stream_rdesc_am_data(rdesc); + am_data->rdesc = rdesc; + return am_data + 1; +} + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_stream_recv_data_nb, (ep, length), + ucp_ep_h ep, size_t *length) +{ + ucs_status_ptr_t status_ptr; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(ep->worker->context, UCP_FEATURE_STREAM, + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + status_ptr = ucp_stream_recv_data_nb_nolock(ep, length); + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + + return status_ptr; +} + +static UCS_F_ALWAYS_INLINE void +ucp_stream_rdesc_dequeue_and_release(ucp_recv_desc_t *rdesc, + ucp_ep_ext_proto_t *ep_ext) +{ + ucs_assert(ucp_stream_ep_has_data(ep_ext)); + ucs_assert(rdesc == ucs_queue_head_elem_non_empty(&ep_ext->stream.match_q, + ucp_recv_desc_t, + stream_queue)); + ucp_stream_rdesc_dequeue(ep_ext); + ucp_recv_desc_release(rdesc); +} + +UCS_PROFILE_FUNC_VOID(ucp_stream_data_release, (ep, data), + ucp_ep_h ep, void *data) +{ + ucp_recv_desc_t *rdesc = ucp_stream_rdesc_from_data(data); + + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucp_recv_desc_release(rdesc); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); +} + +static UCS_F_ALWAYS_INLINE ssize_t +ucp_stream_rdata_unpack(const void *rdata, size_t length, ucp_request_t *dst_req) +{ + size_t valid_len; + int last; + ucs_status_t status; + + /* Truncated error is not actual for stream, need to adjust */ + valid_len = dst_req->recv.length - dst_req->recv.stream.offset; + if (valid_len <= length) { + last = (valid_len == length); + } else { + valid_len = length; + last = !(dst_req->flags & UCP_REQUEST_FLAG_STREAM_RECV_WAITALL); + } + + status = ucp_request_recv_data_unpack(dst_req, rdata, valid_len, + dst_req->recv.stream.offset, last); + if (ucs_likely(status == UCS_OK)) { + dst_req->recv.stream.offset += valid_len; + ucs_trace_data("unpacked %zd bytes of stream data %p", + valid_len, rdata); + return valid_len; + } + + ucs_assert(status != UCS_ERR_MESSAGE_TRUNCATED); + return status; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_stream_rdesc_advance(ucp_recv_desc_t *rdesc, ssize_t offset, + ucp_ep_ext_proto_t *ep_ext) +{ + ucs_assert(offset <= rdesc->length); + + if (ucs_unlikely(offset < 0)) { + return (ucs_status_t)offset; + } else if (ucs_likely(offset == rdesc->length)) { + ucp_stream_rdesc_dequeue_and_release(rdesc, ep_ext); + } else { + rdesc->length -= offset; + rdesc->payload_offset += offset; + } + + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_stream_process_rdesc_inplace(ucp_recv_desc_t *rdesc, ucp_datatype_t dt, + void *buffer, size_t count, size_t length, + ucp_ep_ext_proto_t *ep_ext) +{ + ucp_worker_h worker = ucp_ep_from_ext_proto(ep_ext)->worker; + ucs_status_t status; + ssize_t unpacked; + ucs_memory_type_t mem_type; + + mem_type = ucp_memory_type_detect(worker->context, buffer, length); + status = ucp_dt_unpack_only(worker, buffer, count, dt, mem_type, + ucp_stream_rdesc_payload(rdesc), length, 0); + + unpacked = ucs_likely(status == UCS_OK) ? length : status; + + return ucp_stream_rdesc_advance(rdesc, unpacked, ep_ext); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_stream_process_rdesc(ucp_recv_desc_t *rdesc, ucp_ep_ext_proto_t *ep_ext, + ucp_request_t *req) +{ + ssize_t unpacked; + + unpacked = ucp_stream_rdata_unpack(ucp_stream_rdesc_payload(rdesc), + rdesc->length, req); + ucs_assert(req->recv.stream.offset <= req->recv.length); + + return ucp_stream_rdesc_advance(rdesc, unpacked, ep_ext); +} + +static UCS_F_ALWAYS_INLINE void +ucp_stream_recv_request_init(ucp_request_t *req, ucp_ep_h ep, void *buffer, + size_t count, size_t length, + ucp_datatype_t datatype, + ucp_stream_recv_callback_t cb, + uint32_t request_flags) +{ + req->flags = UCP_REQUEST_FLAG_CALLBACK | request_flags; +#if UCS_ENABLE_ASSERT + req->flags |= UCP_REQUEST_FLAG_STREAM_RECV; + req->status = UCS_OK; /* for ucp_request_recv_data_unpack() */ +#endif + req->recv.stream.cb = cb; + req->recv.stream.length = 0; + req->recv.stream.offset = 0; + + ucp_dt_recv_state_init(&req->recv.state, buffer, datatype, count); + + req->recv.worker = ep->worker; + req->recv.buffer = buffer; + req->recv.datatype = datatype; + req->recv.length = ucs_likely(!UCP_DT_IS_GENERIC(datatype)) ? length : + ucp_dt_length(datatype, count, NULL, &req->recv.state); + req->recv.mem_type = ucp_memory_type_detect(ep->worker->context, + (void*)buffer, req->recv.length); +} + +static UCS_F_ALWAYS_INLINE int +ucp_stream_recv_nb_is_inplace(ucp_ep_ext_proto_t *ep_ext, size_t dt_length) +{ + return ucp_stream_ep_has_data(ep_ext) && + (ucp_stream_rdesc_get(ep_ext)->length >= dt_length); +} + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_stream_recv_nb, + (ep, buffer, count, datatype, cb, length, flags), + ucp_ep_h ep, void *buffer, size_t count, + ucp_datatype_t datatype, ucp_stream_recv_callback_t cb, + size_t *length, unsigned flags) +{ + ucs_status_t status = UCS_OK; + ucp_ep_ext_proto_t *ep_ext = ucp_ep_ext_proto(ep); + size_t dt_length; + ucp_request_t *req; + ucp_recv_desc_t *rdesc; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(ep->worker->context, UCP_FEATURE_STREAM, + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + if (ucs_likely(!UCP_DT_IS_GENERIC(datatype))) { + dt_length = ucp_dt_length(datatype, count, buffer, NULL); + if (ucs_likely(ucp_stream_recv_nb_is_inplace(ep_ext, dt_length))) { + status = ucp_stream_process_rdesc_inplace(ucp_stream_rdesc_get(ep_ext), + datatype, buffer, count, + dt_length, ep_ext); + *length = dt_length; + goto out_status; + } + } else { + dt_length = 0; /* Suppress warnings of paranoid compilers */ + } + + req = ucp_request_get(ep->worker); + if (ucs_unlikely(req == NULL)) { + status = UCS_ERR_NO_MEMORY; + goto out_status; + } + + ucp_stream_recv_request_init(req, ep, buffer, count, dt_length, datatype, + cb, (flags & UCP_STREAM_RECV_FLAG_WAITALL) ? + UCP_REQUEST_FLAG_STREAM_RECV_WAITALL : 0); + + /* OK, lets obtain all arrived data which matches the recv size */ + while ((req->recv.stream.offset < req->recv.length) && + ucp_stream_ep_has_data(ep_ext)) { + + rdesc = ucp_stream_rdesc_get(ep_ext); + status = ucp_stream_process_rdesc(rdesc, ep_ext, req); + if (ucs_unlikely(status != UCS_OK)) { + goto out_put_request; + } + + /* + * NOTE: generic datatype can be completed with any amount of data to + * avoid extra logic in ucp_stream_process_rdesc, exception is + * WAITALL flag + */ + if (ucs_unlikely(UCP_DT_IS_GENERIC(req->recv.datatype)) && + !(req->flags & UCP_REQUEST_FLAG_STREAM_RECV_WAITALL)) { + break; + } + } + + ucs_assert(req->recv.stream.offset <= req->recv.length); + + if (ucp_request_can_complete_stream_recv(req)) { + *length = req->recv.stream.offset; + } else { + ucs_assert(!ucp_stream_ep_has_data(ep_ext)); + ucs_queue_push(&ep_ext->stream.match_q, &req->recv.queue); + req += 1; + goto out; + } + +out_put_request: + ucp_request_put(req); + +out_status: + req = UCS_STATUS_PTR(status); + +out: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return req; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_stream_am_data_process(ucp_worker_t *worker, ucp_ep_ext_proto_t *ep_ext, + ucp_stream_am_data_t *am_data, size_t length, + unsigned am_flags) +{ + ucp_recv_desc_t rdesc_tmp; + void *payload; + ucp_recv_desc_t *rdesc; + ucp_request_t *req; + ssize_t unpacked; + + rdesc_tmp.length = length; + rdesc_tmp.payload_offset = sizeof(*am_data); /* add sizeof(*rdesc) only if + am_data wont be handled in + place */ + + /* First, process expected requests */ + if (!ucp_stream_ep_has_data(ep_ext)) { + while (!ucs_queue_is_empty(&ep_ext->stream.match_q)) { + req = ucs_queue_head_elem_non_empty(&ep_ext->stream.match_q, + ucp_request_t, recv.queue); + payload = UCS_PTR_BYTE_OFFSET(am_data, rdesc_tmp.payload_offset); + unpacked = ucp_stream_rdata_unpack(payload, rdesc_tmp.length, req); + if (ucs_unlikely(unpacked < 0)) { + ucs_fatal("failed to unpack from am_data %p with offset %u to request %p", + am_data, rdesc_tmp.payload_offset, req); + } else if (unpacked == rdesc_tmp.length) { + if (ucp_request_can_complete_stream_recv(req)) { + ucp_request_complete_stream_recv(req, ep_ext, UCS_OK); + } + return UCS_OK; + } + ucp_stream_rdesc_advance(&rdesc_tmp, unpacked, ep_ext); + /* This request is full, try next one */ + ucs_assert(ucp_request_can_complete_stream_recv(req)); + ucp_request_complete_stream_recv(req, ep_ext, UCS_OK); + } + } + + ucs_assert(rdesc_tmp.length > 0); + + /* Now, enqueue the rest of data */ + if (ucs_likely(!(am_flags & UCT_CB_PARAM_FLAG_DESC))) { + rdesc = (ucp_recv_desc_t*)ucs_mpool_get_inline(&worker->am_mp); + ucs_assertv_always(rdesc != NULL, + "ucp recv descriptor is not allocated"); + rdesc->length = rdesc_tmp.length; + /* reset offset to improve locality */ + rdesc->payload_offset = sizeof(*rdesc) + sizeof(*am_data); + rdesc->flags = 0; + memcpy(ucp_stream_rdesc_payload(rdesc), + UCS_PTR_BYTE_OFFSET(am_data, rdesc_tmp.payload_offset), + rdesc_tmp.length); + } else { + /* slowpath */ + rdesc = (ucp_recv_desc_t *)am_data - 1; + rdesc->length = rdesc_tmp.length; + rdesc->payload_offset = rdesc_tmp.payload_offset + sizeof(*rdesc); + rdesc->priv_length = 0; + rdesc->flags = UCP_RECV_DESC_FLAG_UCT_DESC; + } + + ucp_ep_from_ext_proto(ep_ext)->flags |= UCP_EP_FLAG_STREAM_HAS_DATA; + ucs_queue_push(&ep_ext->stream.match_q, &rdesc->stream_queue); + + return UCS_INPROGRESS; +} + +void ucp_stream_ep_init(ucp_ep_h ep) +{ + ucp_ep_ext_proto_t *ep_ext = ucp_ep_ext_proto(ep); + + if (ep->worker->context->config.features & UCP_FEATURE_STREAM) { + ep_ext->stream.ready_list.prev = NULL; + ep_ext->stream.ready_list.next = NULL; + ucs_queue_head_init(&ep_ext->stream.match_q); + } +} + +void ucp_stream_ep_cleanup(ucp_ep_h ep) +{ + size_t length; + void *data; + + if (ep->worker->context->config.features & UCP_FEATURE_STREAM) { + while ((data = ucp_stream_recv_data_nb_nolock(ep, &length)) != NULL) { + ucs_assert_always(!UCS_PTR_IS_ERR(data)); + ucp_stream_data_release(ep, data); + } + + if (ucp_stream_ep_is_queued(ucp_ep_ext_proto(ep))) { + ucp_stream_ep_dequeue(ucp_ep_ext_proto(ep)); + } + } +} + +void ucp_stream_ep_activate(ucp_ep_h ep) +{ + ucp_ep_ext_proto_t *ep_ext = ucp_ep_ext_proto(ep); + + if ((ep->worker->context->config.features & UCP_FEATURE_STREAM) && + ucp_stream_ep_has_data(ep_ext) && !ucp_stream_ep_is_queued(ep_ext)) { + ucp_stream_ep_enqueue(ep_ext, ep->worker); + } +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_stream_am_handler(void *am_arg, void *am_data, size_t am_length, + unsigned am_flags) +{ + ucp_worker_h worker = am_arg; + ucp_stream_am_data_t *data = am_data; + ucp_ep_h ep; + ucp_ep_ext_proto_t *ep_ext; + ucs_status_t status; + + ucs_assert(am_length >= sizeof(ucp_stream_am_hdr_t)); + + ep = ucp_worker_get_ep_by_ptr(worker, data->hdr.ep_ptr); + ep_ext = ucp_ep_ext_proto(ep); + + if (ucs_unlikely(ep->flags & UCP_EP_FLAG_CLOSED)) { + ucs_trace_data("ep %p: stream is invalid", ep); + /* drop the data */ + return UCS_OK; + } + + status = ucp_stream_am_data_process(worker, ep_ext, data, + am_length - sizeof(data->hdr), + am_flags); + if (status == UCS_OK) { + /* rdesc was processed in place */ + return UCS_OK; + } + + ucs_assert(status == UCS_INPROGRESS); + + if (!ucp_stream_ep_is_queued(ep_ext) && (ep->flags & UCP_EP_FLAG_USED)) { + ucp_stream_ep_enqueue(ep_ext, worker); + } + + return (am_flags & UCT_CB_PARAM_FLAG_DESC) ? UCS_INPROGRESS : UCS_OK; +} + +static void ucp_stream_am_dump(ucp_worker_h worker, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max) +{ + const ucp_stream_am_hdr_t *hdr = data; + size_t hdr_len = sizeof(*hdr); + char *p; + + snprintf(buffer, max, "STREAM ep_ptr 0x%lx", hdr->ep_ptr); + p = buffer + strlen(buffer); + + ucs_assert(hdr->ep_ptr != 0); + ucp_dump_payload(worker->context, p, buffer + max - p, + UCS_PTR_BYTE_OFFSET(data, hdr_len), length - hdr_len); +} + +UCP_DEFINE_AM(UCP_FEATURE_STREAM, UCP_AM_ID_STREAM_DATA, ucp_stream_am_handler, + ucp_stream_am_dump, 0); + +UCP_DEFINE_AM_PROXY(UCP_AM_ID_STREAM_DATA); diff --git a/src/ucp/stream/stream_send.c b/src/ucp/stream/stream_send.c new file mode 100644 index 0000000..4794ec9 --- /dev/null +++ b/src/ucp/stream/stream_send.c @@ -0,0 +1,262 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_stream_send_am_short(ucp_ep_t *ep, const void *buffer, size_t length) +{ + UCS_STATIC_ASSERT(sizeof(ep->worker->uuid) == sizeof(uint64_t)); + + return uct_ep_am_short(ucp_ep_get_am_uct_ep(ep), UCP_AM_ID_STREAM_DATA, + ucp_ep_dest_ep_ptr(ep), buffer, length); +} + +static void ucp_stream_send_req_init(ucp_request_t* req, ucp_ep_h ep, + const void* buffer, uintptr_t datatype, + size_t count, uint32_t flags) +{ + req->flags = flags; + req->send.ep = ep; + req->send.buffer = (void*)buffer; + req->send.datatype = datatype; + req->send.lane = ep->am_lane; + ucp_request_send_state_init(req, datatype, count); + req->send.length = ucp_dt_length(req->send.datatype, count, + req->send.buffer, + &req->send.state.dt); + req->send.mem_type = ucp_memory_type_detect(ep->worker->context, + (void*)buffer, + req->send.length); + VALGRIND_MAKE_MEM_UNDEFINED(&req->send.tag, sizeof(req->send.tag)); +} + +static UCS_F_ALWAYS_INLINE ucs_status_ptr_t +ucp_stream_send_req(ucp_request_t *req, size_t count, + const ucp_ep_msg_config_t* msg_config, + ucp_send_callback_t cb, const ucp_request_send_proto_t *proto) +{ + size_t zcopy_thresh = ucp_proto_get_zcopy_threshold(req, msg_config, + count, SIZE_MAX); + ssize_t max_short = ucp_proto_get_short_max(req, msg_config); + + ucs_status_t status = ucp_request_send_start(req, max_short, zcopy_thresh, + SIZE_MAX, count, msg_config, + proto); + if (status != UCS_OK) { + return UCS_STATUS_PTR(status); + } + + /* + * Start the request. + * If it is completed immediately, release the request and return the status. + * Otherwise, return the request. + */ + status = ucp_request_send(req, 0); + if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { + ucs_trace_req("releasing send request %p, returning status %s", req, + ucs_status_string(status)); + ucp_request_put(req); + return UCS_STATUS_PTR(status); + } + + ucp_request_set_callback(req, send.cb, cb) + ucs_trace_req("returning send request %p", req); + return req + 1; +} + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_stream_send_nb, + (ep, buffer, count, datatype, cb, flags), + ucp_ep_h ep, const void *buffer, size_t count, + uintptr_t datatype, ucp_send_callback_t cb, unsigned flags) +{ + ucp_request_t *req; + size_t length; + ucs_status_t status; + ucs_status_ptr_t ret; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(ep->worker->context, UCP_FEATURE_STREAM, + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("stream_send_nb buffer %p count %zu to %s cb %p flags %u", + buffer, count, ucp_ep_peer_name(ep), cb, flags); + + if (ucs_unlikely(flags != 0)) { + ret = UCS_STATUS_PTR(UCS_ERR_NOT_IMPLEMENTED); + goto out; + } + + status = ucp_ep_resolve_dest_ep_ptr(ep, ep->am_lane); + if (status != UCS_OK) { + ret = UCS_STATUS_PTR(status); + goto out; + } + + if (ucs_likely(UCP_DT_IS_CONTIG(datatype)) && + ucp_memory_type_cache_is_empty(ep->worker->context)) { + length = ucp_contig_dt_length(datatype, count); + if (ucs_likely((ssize_t)length <= ucp_ep_config(ep)->am.max_short)) { + status = UCS_PROFILE_CALL(ucp_stream_send_am_short, ep, buffer, + length); + if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { + UCP_EP_STAT_TAG_OP(ep, EAGER); + ret = UCS_STATUS_PTR(status); /* UCS_OK also goes here */ + goto out; + } + } + } + + req = ucp_request_get(ep->worker); + if (ucs_unlikely(req == NULL)) { + ret = UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + goto out; + } + + ucp_stream_send_req_init(req, ep, buffer, datatype, count, flags); + + ret = ucp_stream_send_req(req, count, &ucp_ep_config(ep)->am, cb, + ucp_ep_config(ep)->stream.proto); + +out: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return ret; +} + +static ucs_status_t ucp_stream_contig_am_short(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucs_status_t status = ucp_stream_send_am_short(req->send.ep, + req->send.buffer, + req->send.length); + if (ucs_likely(status == UCS_OK)) { + ucp_request_complete_send(req, UCS_OK); + } + return status; +} + +static size_t ucp_stream_pack_am_single_dt(void *dest, void *arg) +{ + ucp_stream_am_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + + hdr->ep_ptr = ucp_request_get_dest_ep_ptr(req); + + ucs_assert(req->send.state.dt.offset == 0); + + length = ucp_dt_pack(req->send.ep->worker, req->send.datatype, + req->send.mem_type, hdr + 1, req->send.buffer, + &req->send.state.dt, req->send.length); + ucs_assert(length == req->send.length); + return sizeof(*hdr) + length; +} + +static ucs_status_t ucp_stream_bcopy_single(uct_pending_req_t *self) +{ + ucs_status_t status; + + status = ucp_do_am_bcopy_single(self, UCP_AM_ID_STREAM_DATA, + ucp_stream_pack_am_single_dt); + if (status == UCS_OK) { + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_request_send_generic_dt_finish(req); + ucp_request_complete_send(req, UCS_OK); + } + return status; +} + +static size_t ucp_stream_pack_am_first_dt(void *dest, void *arg) +{ + ucp_stream_am_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + + hdr->ep_ptr = ucp_request_get_dest_ep_ptr(req); + length = ucp_ep_config(req->send.ep)->am.max_bcopy - sizeof(*hdr); + + ucs_assert(req->send.state.dt.offset == 0); + ucs_assert(req->send.length > length); + return sizeof(*hdr) + ucp_dt_pack(req->send.ep->worker, req->send.datatype, + req->send.mem_type, hdr + 1, req->send.buffer, + &req->send.state.dt, length); +} + +static size_t ucp_stream_pack_am_middle_dt(void *dest, void *arg) +{ + ucp_stream_am_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + + hdr->ep_ptr = ucp_request_get_dest_ep_ptr(req); + length = ucs_min(ucp_ep_config(req->send.ep)->am.max_bcopy - sizeof(*hdr), + req->send.length - req->send.state.dt.offset); + return sizeof(*hdr) + ucp_dt_pack(req->send.ep->worker, req->send.datatype, + req->send.mem_type, hdr + 1, req->send.buffer, + &req->send.state.dt, length); +} + +static ucs_status_t ucp_stream_bcopy_multi(uct_pending_req_t *self) +{ + ucs_status_t status = ucp_do_am_bcopy_multi(self, + UCP_AM_ID_STREAM_DATA, + UCP_AM_ID_STREAM_DATA, + ucp_stream_pack_am_first_dt, + ucp_stream_pack_am_middle_dt, 0); + if (status == UCS_OK) { + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_request_send_generic_dt_finish(req); + ucp_request_complete_send(req, UCS_OK); + } else if (status == UCP_STATUS_PENDING_SWITCH) { + status = UCS_OK; + } + return status; +} + +static ucs_status_t ucp_stream_eager_zcopy_single(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_stream_am_hdr_t hdr; + + hdr.ep_ptr = ucp_request_get_dest_ep_ptr(req); + return ucp_do_am_zcopy_single(self, UCP_AM_ID_STREAM_DATA, &hdr, + sizeof(hdr), ucp_proto_am_zcopy_req_complete); +} + +static ucs_status_t ucp_stream_eager_zcopy_multi(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_stream_am_hdr_t hdr; + + hdr.ep_ptr = ucp_request_get_dest_ep_ptr(req); + return ucp_do_am_zcopy_multi(self, + UCP_AM_ID_STREAM_DATA, + UCP_AM_ID_STREAM_DATA, + &hdr, sizeof(hdr), &hdr, sizeof(hdr), + ucp_proto_am_zcopy_req_complete, 0); +} + +const ucp_request_send_proto_t ucp_stream_am_proto = { + .contig_short = ucp_stream_contig_am_short, + .bcopy_single = ucp_stream_bcopy_single, + .bcopy_multi = ucp_stream_bcopy_multi, + .zcopy_single = ucp_stream_eager_zcopy_single, + .zcopy_multi = ucp_stream_eager_zcopy_multi, + .zcopy_completion = ucp_proto_am_zcopy_completion, + .only_hdr_size = sizeof(ucp_stream_am_hdr_t) +}; diff --git a/src/ucp/tag/eager.h b/src/ucp/tag/eager.h new file mode 100644 index 0000000..7c70342 --- /dev/null +++ b/src/ucp/tag/eager.h @@ -0,0 +1,80 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_TAG_EAGER_H_ +#define UCP_TAG_EAGER_H_ + +#include "tag_match.h" + +#include +#include +#include +#include +#include + + +/* + * EAGER_ONLY, EAGER_MIDDLE + */ +typedef struct { + ucp_tag_hdr_t super; +} UCS_S_PACKED ucp_eager_hdr_t; + + +/* + * EAGER_FIRST + */ +typedef struct { + ucp_eager_hdr_t super; + size_t total_len; + uint64_t msg_id; +} UCS_S_PACKED ucp_eager_first_hdr_t; + + +/* + * EAGER_MIDDLE + */ +typedef struct { + uint64_t msg_id; + size_t offset; +} UCS_S_PACKED ucp_eager_middle_hdr_t; + + +/* + * EAGER_SYNC_ONLY + */ +typedef struct { + ucp_eager_hdr_t super; + ucp_request_hdr_t req; +} UCS_S_PACKED ucp_eager_sync_hdr_t; + + +/* + * EAGER_SYNC_FIRST + */ +typedef struct { + ucp_eager_first_hdr_t super; + ucp_request_hdr_t req; +} UCS_S_PACKED ucp_eager_sync_first_hdr_t; + + +extern const ucp_request_send_proto_t ucp_tag_eager_proto; +extern const ucp_request_send_proto_t ucp_tag_eager_sync_proto; + +void ucp_tag_eager_sync_send_ack(ucp_worker_h worker, void *hdr, uint16_t recv_flags); + +void ucp_tag_eager_sync_completion(ucp_request_t *req, uint32_t flag, + ucs_status_t status); + +void ucp_tag_eager_zcopy_completion(uct_completion_t *self, ucs_status_t status); + +void ucp_tag_eager_zcopy_req_complete(ucp_request_t *req, ucs_status_t status); + +void ucp_tag_eager_sync_zcopy_req_complete(ucp_request_t *req, ucs_status_t status); + +void ucp_tag_eager_sync_zcopy_completion(uct_completion_t *self, ucs_status_t status); + +#endif diff --git a/src/ucp/tag/eager_rcv.c b/src/ucp/tag/eager_rcv.c new file mode 100644 index 0000000..38d7805 --- /dev/null +++ b/src/ucp/tag/eager_rcv.c @@ -0,0 +1,516 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "eager.h" +#include "tag_match.inl" +#include "offload.h" + +#include +#include +#include +#include + +static UCS_F_ALWAYS_INLINE void +ucp_eager_expected_handler(ucp_worker_t *worker, ucp_request_t *req, + void *data, size_t recv_len, ucp_tag_t recv_tag, + uint16_t flags) +{ + ucs_trace_req("found req %p", req); + UCS_PROFILE_REQUEST_EVENT(req, "eager_recv", recv_len); + + /* First fragment fills the receive information */ + UCP_WORKER_STAT_EAGER_MSG(worker, flags); + UCP_WORKER_STAT_EAGER_CHUNK(worker, EXP); + + req->recv.tag.info.sender_tag = recv_tag; + + /* Cancel req in transport if it was offloaded, + * because it arrived either: + * 1) via SW TM (e. g. peer doesn't support offload) + * 2) as unexpected via HW TM */ + ucp_tag_offload_try_cancel(worker, req, + UCP_TAG_OFFLOAD_CANCEL_FORCE | + UCP_TAG_OFFLOAD_CANCEL_DEREG); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_eager_offload_handler(void *arg, void *data, size_t length, + unsigned tl_flags, uint16_t flags, ucp_tag_t recv_tag) +{ + ucp_worker_t *worker = arg; + ucp_request_t *req; + ucp_recv_desc_t *rdesc; + ucp_tag_t *rdesc_hdr; + ucs_status_t status; + + req = ucp_tag_exp_search(&worker->tm, recv_tag); + if (req != NULL) { + ucp_eager_expected_handler(worker, req, data, length, recv_tag, flags); + req->recv.tag.info.length = length; + status = ucp_request_recv_data_unpack(req, data, length, 0, 1); + ucp_request_complete_tag_recv(req, status); + status = UCS_OK; + } else { + status = ucp_recv_desc_init(worker, data, length, sizeof(ucp_tag_t), + tl_flags, sizeof(ucp_tag_t), flags, + sizeof(ucp_tag_t), &rdesc); + if (!UCS_STATUS_IS_ERR(status)) { + rdesc_hdr = (ucp_tag_t*)(rdesc + 1); + *rdesc_hdr = recv_tag; + ucp_tag_unexp_recv(&worker->tm, rdesc, recv_tag); + } + } + + return status; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_eager_tagged_handler(void *arg, void *data, size_t length, unsigned am_flags, + uint16_t flags, uint16_t hdr_len, uint16_t priv_length) +{ + ucp_worker_h worker = arg; + ucp_eager_hdr_t *eager_hdr = data; + ucp_eager_first_hdr_t *eagerf_hdr; + ucp_recv_desc_t *rdesc; + ucp_request_t *req; + ucs_status_t status; + ucp_tag_t recv_tag; + size_t recv_len; + + ucs_assert(length >= hdr_len); + ucs_assert(flags & UCP_RECV_DESC_FLAG_EAGER); + + recv_tag = eager_hdr->super.tag; + recv_len = length - hdr_len; + + req = ucp_tag_exp_search(&worker->tm, recv_tag); + if (req != NULL) { + ucp_eager_expected_handler(worker, req, data, recv_len, recv_tag, flags); + + if (flags & UCP_RECV_DESC_FLAG_EAGER_SYNC) { + ucp_tag_eager_sync_send_ack(worker, data, flags); + } + + if (flags & UCP_RECV_DESC_FLAG_EAGER_ONLY) { + req->recv.tag.info.length = recv_len; + status = ucp_request_recv_data_unpack(req, + UCS_PTR_BYTE_OFFSET(data, hdr_len), + recv_len, 0, 1); + ucp_request_complete_tag_recv(req, status); + } else { + eagerf_hdr = data; + req->recv.tag.info.length = + req->recv.tag.remaining = eagerf_hdr->total_len; + + status = ucp_tag_request_process_recv_data(req, + UCS_PTR_BYTE_OFFSET(data, hdr_len), + recv_len, 0, 0, flags); + ucs_assert(status == UCS_INPROGRESS); + + ucp_tag_frag_list_process_queue(&worker->tm, req, eagerf_hdr->msg_id + UCS_STATS_ARG(UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_EXP)); + } + + status = UCS_OK; + } else { + status = ucp_recv_desc_init(worker, data, length, 0, am_flags, hdr_len, + flags, priv_length, &rdesc); + if (!UCS_STATUS_IS_ERR(status)) { + ucp_tag_unexp_recv(&worker->tm, rdesc, recv_tag); + } + } + + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_eager_only_handler, + (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + return ucp_eager_tagged_handler(arg, data, length, am_flags, + UCP_RECV_DESC_FLAG_EAGER | + UCP_RECV_DESC_FLAG_EAGER_ONLY, + sizeof(ucp_eager_hdr_t), 0); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_eager_first_handler, + (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + return ucp_eager_tagged_handler(arg, data, length, am_flags, + UCP_RECV_DESC_FLAG_EAGER, + sizeof(ucp_eager_first_hdr_t), 0); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_eager_common_middle_handler(ucp_worker_t *worker, void *data, size_t length, + uint16_t hdr_len, unsigned tl_flags, + uint16_t flags, uint16_t priv_length) +{ + ucp_eager_middle_hdr_t *hdr = data; + ucp_tag_frag_match_t *matchq; + ucp_recv_desc_t *rdesc; + ucp_request_t *req; + ucs_status_t status; + size_t recv_len; + khiter_t iter; + int ret; + + iter = kh_put(ucp_tag_frag_hash, &worker->tm.frag_hash, hdr->msg_id, &ret); + ucs_assert(ret >= 0); + matchq = &kh_value(&worker->tm.frag_hash, iter); + if (ret != 0) { + /* initialize a previously empty hash entry */ + ucp_tag_frag_match_init_unexp(matchq); + } + + if (ucp_tag_frag_match_is_unexp(matchq)) { + /* add new received descriptor to the queue */ + status = ucp_recv_desc_init(worker, data, length, 0, tl_flags, + hdr_len, flags, priv_length, &rdesc); + if (ucs_likely(!UCS_STATUS_IS_ERR(status))) { + ucp_tag_frag_match_add_unexp(matchq, rdesc, hdr->offset); + } else if (ucs_queue_is_empty(&matchq->unexp_q)) { + /* If adding the first fragment to the unexpected queue fails, + * remove the element from the hash. Otherwise hash would contain an + * empty queue, which is not allowed, because queue implementation + * relies on the address of its head for certain operations (e.g. + * ucs_queue_is_empty). And khash may change address of its elements + * during resize (provoked by kh_put). */ + kh_del(ucp_tag_frag_hash, &worker->tm.frag_hash, iter); + } + } else { + /* If fragment is expected, the corresponding element must be present + * in the hash (added in ucp_tag_frag_list_process_queue). */ + ucs_assert(ret == 0); + + /* hash entry contains a request, copy data to user buffer */ + req = matchq->exp_req; + recv_len = length - hdr_len; + + UCP_WORKER_STAT_EAGER_CHUNK(worker, EXP); + + /* Need to use hdr_len rather than sizeof(*hdr), because tag offload flow + * can use extended header for sync sends. */ + status = ucp_tag_request_process_recv_data(req, + UCS_PTR_BYTE_OFFSET(data, hdr_len), + recv_len, hdr->offset, 0, flags); + if (status != UCS_INPROGRESS) { + /* request completed, delete hash entry */ + kh_del(ucp_tag_frag_hash, &worker->tm.frag_hash, iter); + } + + status = UCS_OK; + } + + /* If hash contains queue of unexpected fragments, it should not be empty */ + ucs_assert(!ucp_tag_frag_match_is_unexp(matchq) || + !ucs_queue_is_empty(&matchq->unexp_q)); + + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_eager_middle_handler, + (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + return ucp_eager_common_middle_handler(arg, data, length, + sizeof(ucp_eager_middle_hdr_t), + am_flags, UCP_RECV_DESC_FLAG_EAGER, + 0); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_eager_sync_only_handler, + (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + return ucp_eager_tagged_handler(arg, data, length, am_flags, + UCP_RECV_DESC_FLAG_EAGER| + UCP_RECV_DESC_FLAG_EAGER_ONLY| + UCP_RECV_DESC_FLAG_EAGER_SYNC, + sizeof(ucp_eager_sync_hdr_t), 0); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_eager_sync_first_handler, + (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + return ucp_eager_tagged_handler(arg, data, length, am_flags, + UCP_RECV_DESC_FLAG_EAGER| + UCP_RECV_DESC_FLAG_EAGER_SYNC, + sizeof(ucp_eager_sync_first_hdr_t), 0); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_eager_offload_sync_ack_handler, + (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + ucp_offload_ssend_hdr_t *rep_hdr = data; + ucp_worker_t *worker = arg; + ucs_queue_head_t *queue = &worker->tm.offload.sync_reqs; + ucp_request_t *sreq; + ucs_queue_iter_t iter; + + ucs_queue_for_each_safe(sreq, iter, queue, send.tag_offload.queue) { + if ((sreq->send.tag_offload.ssend_tag == rep_hdr->sender_tag) && + ((uintptr_t)sreq->send.ep == rep_hdr->ep_ptr)) { + ucp_tag_eager_sync_completion(sreq, UCP_REQUEST_FLAG_REMOTE_COMPLETED, + UCS_OK); + ucs_queue_del_iter(queue, iter); + return UCS_OK; + } + } + ucs_error("unexpected sync ack received: tag %"PRIx64" ep_ptr 0x%lx", + rep_hdr->sender_tag, rep_hdr->ep_ptr); + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_eager_sync_ack_handler, + (arg, data, length, am_flags), + void *arg, void *data, size_t length, unsigned am_flags) +{ + ucp_reply_hdr_t *rep_hdr = data; + ucp_request_t *req; + + req = (ucp_request_t*)rep_hdr->reqptr; + ucp_tag_eager_sync_completion(req, UCP_REQUEST_FLAG_REMOTE_COMPLETED, UCS_OK); + return UCS_OK; +} + +#define ucp_tag_eager_offload_priv(_flags, _data, _length, _priv_type) \ + ({ \ + size_t priv_len = sizeof(_priv_type); \ + typeof(_priv_type) *priv_data; \ + if (ucs_unlikely((_flags) & UCT_CB_PARAM_FLAG_DESC)) { \ + priv_data = UCS_PTR_BYTE_OFFSET(_data, -priv_len); \ + } else { /* Can not shift back, no headroom */ \ + priv_data = ucs_alloca((_length) + priv_len); \ + memcpy(UCS_PTR_BYTE_OFFSET(priv_data, priv_len), _data, (_length)); \ + } \ + priv_data; \ + }) + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_tag_offload_eager_first_handler(ucp_worker_h worker, void *data, + size_t length, unsigned tl_flags, + uct_tag_t stag, uint16_t flags, + void **context) +{ + ucp_eager_first_hdr_t *priv; + uint64_t msg_ctx; + int priv_len; + + /* First part of the fragmented message. Pass message id back to UCT, + * so it will be provided with the rest of message fragments. Immediate + * data (indicating sync send) is passed with last fragment only, so + * ack will be sent upon receiving of the last fragment. */ + msg_ctx = worker->am_message_id++; + *(uint64_t*)context = msg_ctx; + priv_len = sizeof(*priv); + priv = ucp_tag_eager_offload_priv(tl_flags, data, length, + ucp_eager_first_hdr_t); + priv->super.super.tag = stag; + priv->total_len = SIZE_MAX; /* length is not known at this point */ + priv->msg_id = msg_ctx; + return ucp_eager_tagged_handler(worker, priv, length + priv_len, + tl_flags, flags, priv_len, priv_len); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_tag_offload_eager_middle_handler(ucp_worker_h worker, void *data, + size_t length, unsigned tl_flags, + uct_tag_t stag, uint64_t imm, + uint16_t flags, void **context) +{ + ucp_offload_last_ssend_hdr_t *l_priv; + ucp_eager_middle_hdr_t *m_priv; + void *tag_priv; + int priv_len; + + /* Last fragment may contain immediate data, indicating that it is + * synchronous send */ + if (!(tl_flags & UCT_CB_PARAM_FLAG_MORE) && imm) { + l_priv = ucp_tag_eager_offload_priv(tl_flags, data, length, + ucp_offload_last_ssend_hdr_t); + priv_len = sizeof(*l_priv); + tag_priv = l_priv; + l_priv->ssend_ack.sender_tag = stag; + l_priv->ssend_ack.ep_ptr = imm; + m_priv = &l_priv->super; + flags |= UCP_RECV_DESC_FLAG_EAGER_SYNC | + UCP_RECV_DESC_FLAG_EAGER_LAST; + } else { + m_priv = ucp_tag_eager_offload_priv(tl_flags, data, length, + ucp_eager_middle_hdr_t); + priv_len = sizeof(*m_priv); + tag_priv = m_priv; + flags |= (tl_flags & UCT_CB_PARAM_FLAG_MORE) ? + 0 : UCP_RECV_DESC_FLAG_EAGER_LAST; + } + + /* Offset is calculated during data processing in the + * ucp_tag_request_process_recv_data function */ + m_priv->offset = 0; + m_priv->msg_id = *(uint64_t*)context; + + return ucp_eager_common_middle_handler(worker, tag_priv, length + priv_len, + priv_len, tl_flags, flags, priv_len); +} + +/* TODO: can handle multi-fragment messages in a more efficient way by saving + * request or some unexp descriptors handle in the context. This would eliminate + * the need for fragments hashing on UCP level. */ +UCS_PROFILE_FUNC(ucs_status_t, ucp_tag_offload_unexp_eager, + (arg, data, length, tl_flags, stag, imm, context), + void *arg, void *data, size_t length, unsigned tl_flags, + uct_tag_t stag, uint64_t imm, void **context) +{ + /* Align data with AM protocol. We should add tag before the data. */ + ucp_worker_iface_t *wiface = arg; + ucp_worker_t *worker = wiface->worker; + uint16_t flags = UCP_RECV_DESC_FLAG_EAGER | + UCP_RECV_DESC_FLAG_EAGER_OFFLOAD; + ucp_eager_sync_hdr_t *priv; + int priv_len; + + UCP_WORKER_STAT_TAG_OFFLOAD(wiface->worker, RX_UNEXP_EGR); + + /* Fast path - single-fragment, non-sync eager message */ + if (ucs_likely((tl_flags & UCT_CB_PARAM_FLAG_FIRST) && + !(tl_flags & UCT_CB_PARAM_FLAG_MORE) && + !imm)) { + ucp_tag_offload_unexp(wiface, stag, length); + + return ucp_eager_offload_handler(wiface->worker, data, length, tl_flags, + flags | UCP_RECV_DESC_FLAG_EAGER_ONLY, + stag); + } + + if (!(tl_flags & UCT_CB_PARAM_FLAG_FIRST)) { + /* Either middle or last fragment */ + return ucp_tag_offload_eager_middle_handler(worker, data, length, + tl_flags, stag, imm, flags, + context); + } + + /* Either first eager fragment or entire sync eager message */ + ucp_tag_offload_unexp(wiface, stag, length); + + if (tl_flags & UCT_CB_PARAM_FLAG_MORE) { + /* First part of the fragmented message */ + return ucp_tag_offload_eager_first_handler(worker, data, length, + tl_flags, stag, flags, + context); + } + + /* Sync eager only packet */ + ucs_assert(!(tl_flags & UCT_CB_PARAM_FLAG_MORE)); + ucs_assert(imm); + + flags |= UCP_RECV_DESC_FLAG_EAGER_ONLY | + UCP_RECV_DESC_FLAG_EAGER_SYNC; + priv_len = sizeof(*priv); + priv = ucp_tag_eager_offload_priv(tl_flags, data, length, + ucp_eager_sync_hdr_t); + priv->req.reqptr = 0ul; + priv->req.ep_ptr = imm; + priv->super.super.tag = stag; + return ucp_eager_tagged_handler(worker, priv, length + priv_len, + tl_flags, flags, priv_len, priv_len); +} + +static void ucp_eager_dump(ucp_worker_h worker, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max) +{ + const ucp_eager_first_hdr_t *eager_first_hdr = data; + const ucp_eager_hdr_t *eager_hdr = data; + const ucp_eager_middle_hdr_t *eager_mid_hdr = data; + const ucp_eager_sync_first_hdr_t *eagers_first_hdr = data; + const ucp_eager_sync_hdr_t *eagers_hdr = data; + const ucp_reply_hdr_t *rep_hdr = data; + const ucp_offload_ssend_hdr_t *off_rep_hdr = data; + size_t header_len; + char *p; + + switch (id) { + case UCP_AM_ID_EAGER_ONLY: + snprintf(buffer, max, "EGR_O tag %"PRIx64, eager_hdr->super.tag); + header_len = sizeof(*eager_hdr); + break; + case UCP_AM_ID_EAGER_FIRST: + snprintf(buffer, max, "EGR_F tag %"PRIx64" msgid %"PRIx64" len %zu", + eager_first_hdr->super.super.tag, eager_first_hdr->msg_id, + eager_first_hdr->total_len); + header_len = sizeof(*eager_first_hdr); + break; + case UCP_AM_ID_EAGER_MIDDLE: + snprintf(buffer, max, "EGR_M msgid %"PRIx64" offset %zu", + eager_mid_hdr->msg_id, eager_mid_hdr->offset); + header_len = sizeof(*eager_mid_hdr); + break; + case UCP_AM_ID_EAGER_SYNC_ONLY: + ucs_assert(eagers_hdr->req.ep_ptr != 0); + snprintf(buffer, max, "EGRS tag %"PRIx64" ep_ptr 0x%lx request 0x%lx", + eagers_hdr->super.super.tag, eagers_hdr->req.ep_ptr, + eagers_hdr->req.reqptr); + header_len = sizeof(*eagers_hdr); + break; + case UCP_AM_ID_EAGER_SYNC_FIRST: + snprintf(buffer, max, "EGRS_F tag %"PRIx64" msgid %"PRIx64" len %zu " + "ep_ptr 0x%lx request 0x%lx", + eagers_first_hdr->super.super.super.tag, + eagers_first_hdr->super.msg_id, + eagers_first_hdr->super.total_len, + eagers_first_hdr->req.ep_ptr, + eagers_first_hdr->req.reqptr); + header_len = sizeof(*eagers_first_hdr); + break; + case UCP_AM_ID_EAGER_SYNC_ACK: + snprintf(buffer, max, "EGRS_A request 0x%lx status '%s'", rep_hdr->reqptr, + ucs_status_string(rep_hdr->status)); + header_len = sizeof(*rep_hdr); + break; + case UCP_AM_ID_OFFLOAD_SYNC_ACK: + snprintf(buffer, max, "EGRS_A_O tag %"PRIx64" ep_ptr 0x%lx", + off_rep_hdr->sender_tag, off_rep_hdr->ep_ptr); + header_len = sizeof(*rep_hdr); + break; + default: + return; + } + + p = buffer + strlen(buffer); + ucp_dump_payload(worker->context, p, buffer + max - p, + UCS_PTR_BYTE_OFFSET(data, header_len), length - header_len); +} + +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_EAGER_ONLY, ucp_eager_only_handler, + ucp_eager_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_EAGER_FIRST, ucp_eager_first_handler, + ucp_eager_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_EAGER_MIDDLE, ucp_eager_middle_handler, + ucp_eager_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_EAGER_SYNC_ONLY, + ucp_eager_sync_only_handler, ucp_eager_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_EAGER_SYNC_FIRST, + ucp_eager_sync_first_handler, ucp_eager_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_EAGER_SYNC_ACK, + ucp_eager_sync_ack_handler, ucp_eager_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_OFFLOAD_SYNC_ACK, + ucp_eager_offload_sync_ack_handler, ucp_eager_dump, 0); + +UCP_DEFINE_AM_PROXY(UCP_AM_ID_EAGER_ONLY); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_EAGER_FIRST); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_EAGER_MIDDLE); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_EAGER_SYNC_ONLY); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_EAGER_SYNC_FIRST); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_EAGER_SYNC_ACK); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_OFFLOAD_SYNC_ACK); diff --git a/src/ucp/tag/eager_snd.c b/src/ucp/tag/eager_snd.c new file mode 100644 index 0000000..bfaa459 --- /dev/null +++ b/src/ucp/tag/eager_snd.c @@ -0,0 +1,350 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "eager.h" +#include "offload.h" + +#include +#include + + +/* packing start */ + +static UCS_F_ALWAYS_INLINE size_t +ucp_tag_pack_eager_common(ucp_request_t *req, void *dest, + size_t length, size_t hdr_length, + int UCS_V_UNUSED single, + int UCS_V_UNUSED first) +{ + size_t packed_length; + + ucs_assert((length + hdr_length) <= + ucp_ep_get_max_bcopy(req->send.ep, req->send.lane)); + ucs_assert(!first || (req->send.state.dt.offset == 0)); + + packed_length = ucp_dt_pack(req->send.ep->worker, req->send.datatype, + req->send.mem_type, dest, req->send.buffer, + &req->send.state.dt, length); + ucs_assert((single && (packed_length == req->send.length)) || + (packed_length < req->send.length)); + return packed_length + hdr_length; +} + +static size_t ucp_tag_pack_eager_only_dt(void *dest, void *arg) +{ + ucp_eager_hdr_t *hdr = dest; + ucp_request_t *req = arg; + + hdr->super.tag = req->send.tag.tag; + + return ucp_tag_pack_eager_common(req, hdr + 1, req->send.length, + sizeof(*hdr), 1, 1); +} + +static size_t ucp_tag_pack_eager_sync_only_dt(void *dest, void *arg) +{ + ucp_eager_sync_hdr_t *hdr = dest; + ucp_request_t *req = arg; + + hdr->super.super.tag = req->send.tag.tag; + hdr->req.ep_ptr = ucp_request_get_dest_ep_ptr(req); + hdr->req.reqptr = (uintptr_t)req; + + return ucp_tag_pack_eager_common(req, hdr + 1, req->send.length, + sizeof(*hdr), 1, 1); +} + +static size_t ucp_tag_pack_eager_first_dt(void *dest, void *arg) +{ + ucp_eager_first_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + + ucs_assert(req->send.lane == ucp_ep_get_am_lane(req->send.ep)); + + length = ucp_ep_get_max_bcopy(req->send.ep, req->send.lane) - + sizeof(*hdr); + hdr->super.super.tag = req->send.tag.tag; + hdr->total_len = req->send.length; + hdr->msg_id = req->send.tag.message_id; + + return ucp_tag_pack_eager_common(req, hdr + 1, length, sizeof(*hdr), 0, 1); +} + +static size_t ucp_tag_pack_eager_sync_first_dt(void *dest, void *arg) +{ + ucp_eager_sync_first_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + + ucs_assert(req->send.lane == ucp_ep_get_am_lane(req->send.ep)); + + length = ucp_ep_get_max_bcopy(req->send.ep, + req->send.lane) - + sizeof(*hdr); + hdr->super.super.super.tag = req->send.tag.tag; + hdr->super.total_len = req->send.length; + hdr->req.ep_ptr = ucp_request_get_dest_ep_ptr(req); + hdr->super.msg_id = req->send.tag.message_id; + hdr->req.reqptr = (uintptr_t)req; + + return ucp_tag_pack_eager_common(req, hdr + 1, length, sizeof(*hdr), 0, 1); +} + +static size_t ucp_tag_pack_eager_middle_dt(void *dest, void *arg) +{ + ucp_eager_middle_hdr_t *hdr = dest; + ucp_request_t *req = arg; + size_t length; + + length = ucs_min(ucp_ep_get_max_bcopy(req->send.ep, req->send.lane) - + sizeof(*hdr), + req->send.length - req->send.state.dt.offset); + hdr->msg_id = req->send.tag.message_id; + hdr->offset = req->send.state.dt.offset; + + return ucp_tag_pack_eager_common(req, hdr + 1, length, sizeof(*hdr), 0, 0); +} + +/* eager */ + +static ucs_status_t ucp_tag_eager_contig_short(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucs_status_t status; + + req->send.lane = ucp_ep_get_am_lane(ep); + status = uct_ep_am_short(ep->uct_eps[req->send.lane], UCP_AM_ID_EAGER_ONLY, + req->send.tag.tag, req->send.buffer, req->send.length); + if (status != UCS_OK) { + return status; + } + + ucp_request_complete_send(req, UCS_OK); + return UCS_OK; +} + +static ucs_status_t ucp_tag_eager_bcopy_single(uct_pending_req_t *self) +{ + ucs_status_t status = ucp_do_am_bcopy_single(self, UCP_AM_ID_EAGER_ONLY, + ucp_tag_pack_eager_only_dt); + if (status == UCS_OK) { + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_request_send_generic_dt_finish(req); + ucp_request_complete_send(req, UCS_OK); + } + return status; +} + +static ucs_status_t ucp_tag_eager_bcopy_multi(uct_pending_req_t *self) +{ + ucs_status_t status = ucp_do_am_bcopy_multi(self, + UCP_AM_ID_EAGER_FIRST, + UCP_AM_ID_EAGER_MIDDLE, + ucp_tag_pack_eager_first_dt, + ucp_tag_pack_eager_middle_dt, 1); + if (status == UCS_OK) { + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_request_send_generic_dt_finish(req); + ucp_request_complete_send(req, UCS_OK); + } else if (status == UCP_STATUS_PENDING_SWITCH) { + status = UCS_OK; + } + return status; +} + +static ucs_status_t ucp_tag_eager_zcopy_single(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_eager_hdr_t hdr; + + hdr.super.tag = req->send.tag.tag; + return ucp_do_am_zcopy_single(self, UCP_AM_ID_EAGER_ONLY, &hdr, sizeof(hdr), + ucp_proto_am_zcopy_req_complete); +} + +static ucs_status_t ucp_tag_eager_zcopy_multi(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_eager_first_hdr_t first_hdr; + ucp_eager_middle_hdr_t middle_hdr; + + first_hdr.super.super.tag = req->send.tag.tag; + first_hdr.total_len = req->send.length; + first_hdr.msg_id = req->send.tag.message_id; + middle_hdr.msg_id = req->send.tag.message_id; + middle_hdr.offset = req->send.state.dt.offset; + + return ucp_do_am_zcopy_multi(self, + UCP_AM_ID_EAGER_FIRST, + UCP_AM_ID_EAGER_MIDDLE, + &first_hdr, sizeof(first_hdr), + &middle_hdr, sizeof(middle_hdr), + ucp_proto_am_zcopy_req_complete, 1); +} + +ucs_status_t ucp_tag_send_start_rndv(uct_pending_req_t *self); + +const ucp_request_send_proto_t ucp_tag_eager_proto = { + .contig_short = ucp_tag_eager_contig_short, + .bcopy_single = ucp_tag_eager_bcopy_single, + .bcopy_multi = ucp_tag_eager_bcopy_multi, + .zcopy_single = ucp_tag_eager_zcopy_single, + .zcopy_multi = ucp_tag_eager_zcopy_multi, + .zcopy_completion = ucp_proto_am_zcopy_completion, + .only_hdr_size = sizeof(ucp_eager_hdr_t) +}; + +/* eager sync */ + +void ucp_tag_eager_sync_completion(ucp_request_t *req, uint32_t flag, + ucs_status_t status) +{ + static const uint16_t all_completed = UCP_REQUEST_FLAG_LOCAL_COMPLETED | + UCP_REQUEST_FLAG_REMOTE_COMPLETED; + + ucs_assertv(!(req->flags & flag), "req->flags=%d flag=%d", req->flags, flag); + req->flags |= flag; + if (ucs_test_all_flags(req->flags, all_completed)) { + ucp_request_complete_send(req, status); + } +} + +static ucs_status_t ucp_tag_eager_sync_bcopy_single(uct_pending_req_t *self) +{ + ucs_status_t status = ucp_do_am_bcopy_single(self, UCP_AM_ID_EAGER_SYNC_ONLY, + ucp_tag_pack_eager_sync_only_dt); + if (status == UCS_OK) { + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_request_send_generic_dt_finish(req); + ucp_tag_eager_sync_completion(req, UCP_REQUEST_FLAG_LOCAL_COMPLETED, + UCS_OK); + } + return status; +} + +static ucs_status_t ucp_tag_eager_sync_bcopy_multi(uct_pending_req_t *self) +{ + ucs_status_t status = ucp_do_am_bcopy_multi(self, + UCP_AM_ID_EAGER_SYNC_FIRST, + UCP_AM_ID_EAGER_MIDDLE, + ucp_tag_pack_eager_sync_first_dt, + ucp_tag_pack_eager_middle_dt, 1); + if (status == UCS_OK) { + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_request_send_generic_dt_finish(req); + ucp_tag_eager_sync_completion(req, UCP_REQUEST_FLAG_LOCAL_COMPLETED, + UCS_OK); + } else if (status == UCP_STATUS_PENDING_SWITCH) { + status = UCS_OK; + } + return status; +} + +void +ucp_tag_eager_sync_zcopy_req_complete(ucp_request_t *req, ucs_status_t status) +{ + ucs_assert(req->send.state.uct_comp.count == 0); + ucp_request_send_buffer_dereg(req); /* TODO register+lane change */ + ucp_tag_eager_sync_completion(req, UCP_REQUEST_FLAG_LOCAL_COMPLETED, + status); +} + +void ucp_tag_eager_sync_zcopy_completion(uct_completion_t *self, + ucs_status_t status) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + if (req->send.state.dt.offset == req->send.length) { + ucp_tag_eager_sync_zcopy_req_complete(req, status); + } else if (status != UCS_OK) { + ucs_fatal("error handling is not supported with tag-sync protocol"); + } +} + +static ucs_status_t ucp_tag_eager_sync_zcopy_single(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_eager_sync_hdr_t hdr; + + hdr.super.super.tag = req->send.tag.tag; + hdr.req.ep_ptr = ucp_request_get_dest_ep_ptr(req); + hdr.req.reqptr = (uintptr_t)req; + + return ucp_do_am_zcopy_single(self, UCP_AM_ID_EAGER_SYNC_ONLY, &hdr, sizeof(hdr), + ucp_tag_eager_sync_zcopy_req_complete); +} + +static ucs_status_t ucp_tag_eager_sync_zcopy_multi(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_eager_sync_first_hdr_t first_hdr; + ucp_eager_middle_hdr_t middle_hdr; + + first_hdr.super.super.super.tag = req->send.tag.tag; + first_hdr.super.total_len = req->send.length; + first_hdr.req.ep_ptr = ucp_request_get_dest_ep_ptr(req); + first_hdr.req.reqptr = (uintptr_t)req; + first_hdr.super.msg_id = req->send.tag.message_id; + middle_hdr.msg_id = req->send.tag.message_id; + middle_hdr.offset = req->send.state.dt.offset; + + return ucp_do_am_zcopy_multi(self, + UCP_AM_ID_EAGER_SYNC_FIRST, + UCP_AM_ID_EAGER_MIDDLE, + &first_hdr, sizeof(first_hdr), + &middle_hdr, sizeof(middle_hdr), + ucp_tag_eager_sync_zcopy_req_complete, 1); +} + +const ucp_request_send_proto_t ucp_tag_eager_sync_proto = { + .contig_short = NULL, + .bcopy_single = ucp_tag_eager_sync_bcopy_single, + .bcopy_multi = ucp_tag_eager_sync_bcopy_multi, + .zcopy_single = ucp_tag_eager_sync_zcopy_single, + .zcopy_multi = ucp_tag_eager_sync_zcopy_multi, + .zcopy_completion = ucp_tag_eager_sync_zcopy_completion, + .only_hdr_size = sizeof(ucp_eager_sync_hdr_t) +}; + +void ucp_tag_eager_sync_send_ack(ucp_worker_h worker, void *hdr, uint16_t recv_flags) +{ + ucp_request_hdr_t *reqhdr; + ucp_request_t *req; + + ucs_assert(recv_flags & UCP_RECV_DESC_FLAG_EAGER_SYNC); + + if (recv_flags & UCP_RECV_DESC_FLAG_EAGER_ONLY) { + reqhdr = &((ucp_eager_sync_hdr_t*)hdr)->req; /* only */ + } else { + reqhdr = &((ucp_eager_sync_first_hdr_t*)hdr)->req; /* first */ + } + + if (recv_flags & UCP_RECV_DESC_FLAG_EAGER_OFFLOAD) { + ucp_tag_offload_sync_send_ack(worker, reqhdr->ep_ptr, + ((ucp_eager_sync_hdr_t*)hdr)->super.super.tag, + recv_flags); + return; + } + + ucs_assert(reqhdr->reqptr != 0); + req = ucp_proto_ssend_ack_request_alloc(worker, reqhdr->ep_ptr); + if (req == NULL) { + ucs_fatal("could not allocate request"); + } + + req->send.proto.am_id = UCP_AM_ID_EAGER_SYNC_ACK; + req->send.proto.remote_request = reqhdr->reqptr; + + ucs_trace_req("send_sync_ack req %p ep %p", req, req->send.ep); + + ucp_request_send(req, 0); +} diff --git a/src/ucp/tag/offload.c b/src/ucp/tag/offload.c new file mode 100644 index 0000000..c9683b2 --- /dev/null +++ b/src/ucp/tag/offload.c @@ -0,0 +1,754 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "offload.h" +#include "eager.h" +#include "rndv.h" + +#include +#include +#include +#include +#include +#include + + +void ucp_tag_offload_iface_activate(ucp_worker_iface_t *iface) +{ + ucp_worker_t *worker = iface->worker; + ucp_context_t *context = worker->context; + + if (worker->tm.offload.iface == NULL) { + ucs_assert(worker->tm.offload.thresh == SIZE_MAX); + ucs_assert(worker->tm.offload.zcopy_thresh == SIZE_MAX); + ucs_assert(worker->tm.offload.iface == NULL); + + worker->tm.offload.thresh = context->config.ext.tm_thresh; + worker->tm.offload.zcopy_thresh = context->config.ext.tm_max_bb_size; + + /* Cache active offload iface. Can use it if this will be the only + * active iface on the worker. Otherwise would need to retrieve + * offload-capable iface from the offload hash table. */ + worker->tm.offload.iface = iface; + + ucs_debug("Enable TM offload: thresh %zu, zcopy_thresh %zu", + worker->tm.offload.thresh, worker->tm.offload.zcopy_thresh); + } + + iface->flags |= UCP_WORKER_IFACE_FLAG_OFFLOAD_ACTIVATED; + + ucs_debug("Activate tag offload iface %p", iface); +} + +static UCS_F_ALWAYS_INLINE ucp_worker_iface_t* +ucp_tag_offload_iface(ucp_worker_t *worker, ucp_tag_t tag) +{ + khiter_t hash_it; + ucp_tag_t key_tag; + + if (worker->num_active_ifaces == 1) { + ucs_assert(worker->tm.offload.iface != NULL); + return worker->tm.offload.iface; + } + + key_tag = worker->context->config.tag_sender_mask & tag; + hash_it = kh_get(ucp_tag_offload_hash, &worker->tm.offload.tag_hash, + key_tag); + + return (hash_it == kh_end(&worker->tm.offload.tag_hash)) ? + NULL : kh_value(&worker->tm.offload.tag_hash, hash_it); +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_offload_release_buf(ucp_request_t *req, int dereg) +{ + if (req->recv.tag.rdesc != NULL) { + ucs_mpool_put_inline(req->recv.tag.rdesc); + } else if (dereg) { + ucp_request_recv_buffer_dereg(req); + } +} + +/* Tag consumed by the transport - need to remove it from expected queue */ +UCS_PROFILE_FUNC_VOID(ucp_tag_offload_tag_consumed, (self), + uct_tag_context_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, recv.uct_ctx); + ucs_queue_head_t *queue; + + queue = &ucp_tag_exp_get_req_queue(&req->recv.worker->tm, req)->queue; + ucs_queue_remove(queue, &req->recv.queue); +} + +/* Message is scattered to user buffer by the transport, complete the request */ +UCS_PROFILE_FUNC_VOID(ucp_tag_offload_completed, + (self, stag, imm, length, status), + uct_tag_context_t *self, uct_tag_t stag, + uint64_t imm, size_t length, ucs_status_t status) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, recv.uct_ctx); + ucp_eager_sync_hdr_t hdr; + + req->recv.tag.info.sender_tag = stag; + req->recv.tag.info.length = length; + + if (ucs_unlikely(status != UCS_OK)) { + ucp_tag_offload_release_buf(req, 1); + goto out; + } + + if (ucs_unlikely(imm)) { + hdr.req.ep_ptr = imm; + hdr.req.reqptr = 0; /* unused */ + hdr.super.super.tag = stag; + + /* Sync send - need to send a reply */ + ucp_tag_eager_sync_send_ack(req->recv.worker, &hdr, + UCP_RECV_DESC_FLAG_EAGER_ONLY | + UCP_RECV_DESC_FLAG_EAGER_SYNC | + UCP_RECV_DESC_FLAG_EAGER_OFFLOAD); + } + + if (req->recv.tag.rdesc != NULL) { + status = ucp_request_recv_data_unpack(req, req->recv.tag.rdesc + 1, + length, 0, 1); + ucs_mpool_put_inline(req->recv.tag.rdesc); + } else { + ucp_request_recv_buffer_dereg(req); + } + + UCP_WORKER_STAT_TAG_OFFLOAD(req->recv.worker, MATCHED); +out: + --req->recv.tag.wiface->post_count; + ucp_request_complete_tag_recv(req, status); +} + +/* RNDV request matched by the transport. Need to proceed with SW based RNDV */ +UCS_PROFILE_FUNC_VOID(ucp_tag_offload_rndv_cb, + (self, stag, header, header_length, status), + uct_tag_context_t *self, uct_tag_t stag, + const void *header, unsigned header_length, + ucs_status_t status) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, recv.uct_ctx); + void *header_host_copy; + + UCP_WORKER_STAT_TAG_OFFLOAD(req->recv.worker, MATCHED_SW_RNDV); + + --req->recv.tag.wiface->post_count; + if (ucs_unlikely(status != UCS_OK)) { + ucp_tag_offload_release_buf(req, 1); + ucp_request_complete_tag_recv(req, status); + return; + } + + ucs_assert(header_length >= sizeof(ucp_rndv_rts_hdr_t)); + + if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(req->recv.mem_type)) { + ucp_rndv_matched(req->recv.worker, req, header); + } else { + /* SW rendezvous request is stored in the user buffer (temporarily) + when matched. If user buffer allocated on GPU memory, need to "pack" + it to the host memory staging buffer for further processing. */ + header_host_copy = ucs_alloca(header_length); + ucp_mem_type_pack(req->recv.worker, header_host_copy, header, + header_length, req->recv.mem_type); + ucp_rndv_matched(req->recv.worker, req, header_host_copy); + } + + ucp_tag_offload_release_buf(req, 0); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_tag_offload_unexp_rndv, + (arg, flags, stag, hdr, hdr_length, remote_addr, length, rkey_buf), + void *arg, unsigned flags, uint64_t stag, const void *hdr, + unsigned hdr_length, uint64_t remote_addr, size_t length, + const void *rkey_buf) +{ + ucp_worker_iface_t *iface = arg; + ucp_worker_t *worker = iface->worker; + const void *uct_rkeys[] = { rkey_buf }; + const ucp_tag_offload_unexp_rndv_hdr_t *rndv_hdr; + ucp_rndv_rts_hdr_t *dummy_rts; + ucp_md_index_t md_index; + size_t dummy_rts_size; + size_t rkey_size; + + if (remote_addr) { + /* Unexpected tag offload RNDV */ + ucs_assert(hdr_length == sizeof(*rndv_hdr)); + rndv_hdr = hdr; + + /* Calculate size for dummy (on-stack) RTS packet */ + md_index = rndv_hdr->md_index; + rkey_size = ucp_rkey_packed_size(worker->context, UCS_BIT(md_index)); + dummy_rts_size = sizeof(*dummy_rts) + rkey_size; + + /* Build the dummy RTS packet, copy meta-data from unexpected rndv header + * and remote key from rkey_buf. + */ + dummy_rts = ucs_alloca(dummy_rts_size); + dummy_rts->super.tag = stag; + dummy_rts->sreq.ep_ptr = rndv_hdr->ep_ptr; + dummy_rts->sreq.reqptr = rndv_hdr->reqptr; + dummy_rts->address = remote_addr; + dummy_rts->size = length; + + ucp_rkey_packed_copy(worker->context, UCS_BIT(md_index), + UCS_MEMORY_TYPE_HOST, dummy_rts + 1, uct_rkeys); + + UCP_WORKER_STAT_TAG_OFFLOAD(worker, RX_UNEXP_RNDV); + ucp_rndv_process_rts(worker, dummy_rts, dummy_rts_size, 0); + } else { + /* Unexpected tag offload rndv request. Sender buffer is either + non-contig or it's length > rndv.max_zcopy capability of tag lane. + Pass 0 as tl flags, because RTS needs to be stored in UCP mpool. + The header is a full SW RTS packet, + */ + ucs_assert(hdr_length >= sizeof(ucp_rndv_rts_hdr_t)); + UCP_WORKER_STAT_TAG_OFFLOAD(worker, RX_UNEXP_SW_RNDV); + ucp_rndv_process_rts(worker, (void*)hdr, hdr_length, 0); + } + + /* Unexpected RNDV (both SW and HW) need to enable offload capabilities. + * Pass TM_THRESH value as a length to make sure tag is added to the + * hash table if there is a need (i.e. we have several active ifaces). */ + ucp_tag_offload_unexp(iface, stag, worker->tm.offload.thresh); + + return UCS_OK; +} + +UCS_PROFILE_FUNC_VOID(ucp_tag_offload_cancel, (worker, req, mode), + ucp_worker_t *worker, ucp_request_t *req, unsigned mode) +{ + + ucp_worker_iface_t *wiface = req->recv.tag.wiface; + ucs_status_t status; + + ucs_assert(wiface != NULL); + status = uct_iface_tag_recv_cancel(wiface->iface, &req->recv.uct_ctx, + mode & UCP_TAG_OFFLOAD_CANCEL_FORCE); + if (status != UCS_OK) { + ucs_error("Failed to cancel recv in the transport: %s", + ucs_status_string(status)); + return; + } + UCP_WORKER_STAT_TAG_OFFLOAD(worker, CANCELED); + + /* if cancel is not forced, need to wait its completion */ + if (mode & UCP_TAG_OFFLOAD_CANCEL_FORCE) { + ucp_tag_offload_release_buf(req, mode & UCP_TAG_OFFLOAD_CANCEL_DEREG); + --wiface->post_count; + } +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_tag_offload_do_post(ucp_request_t *req) +{ + ucp_worker_t *worker = req->recv.worker; + ucp_context_t *context = worker->context; + size_t length = req->recv.length; + ucp_mem_desc_t *rdesc = NULL; + ucp_worker_iface_t *wiface; + ucs_status_t status; + ucp_rsc_index_t mdi; + uct_iov_t iov; + + wiface = ucp_tag_offload_iface(worker, req->recv.tag.tag); + if (ucs_unlikely(wiface == NULL)) { + UCP_WORKER_STAT_TAG_OFFLOAD(worker, BLOCK_NO_IFACE); + return UCS_ERR_NO_RESOURCE; + } + + mdi = context->tl_rscs[wiface->rsc_index].md_index; + + /* Do not use bounce buffer for receives to GPU memory to avoid + * cost of h2d transfers (i.e. cuda_copy from staging to dest memory). */ + if ((length >= worker->tm.offload.zcopy_thresh) || + !UCP_MEM_IS_ACCESSIBLE_FROM_CPU(req->recv.mem_type)) { + if (length > wiface->attr.cap.tag.recv.max_zcopy) { + /* Post maximum allowed length. If sender sends smaller message + * (which is allowed per MPI standard), max recv should fit it. + * Otherwise sender will send SW RNDV req, which is small enough. */ + ucs_assert(wiface->attr.cap.tag.rndv.max_zcopy <= + wiface->attr.cap.tag.recv.max_zcopy); + + length = wiface->attr.cap.tag.recv.max_zcopy; + } + + /* register the whole buffer to support SW RNDV fallback */ + status = ucp_request_memory_reg(context, UCS_BIT(mdi), req->recv.buffer, + req->recv.length, req->recv.datatype, + &req->recv.state, req->recv.mem_type, + req, UCT_MD_MEM_FLAG_HIDE_ERRORS); + if ((status != UCS_OK) || !req->recv.state.dt.contig.md_map) { + /* Can't register this buffer on the offload iface */ + UCP_WORKER_STAT_TAG_OFFLOAD(worker, BLOCK_MEM_REG); + return status; + } + + req->recv.tag.rdesc = NULL; + iov.buffer = (void*)req->recv.buffer; + iov.memh = req->recv.state.dt.contig.memh[0]; + } else { + rdesc = ucp_worker_mpool_get(&worker->reg_mp); + if (rdesc == NULL) { + return UCS_ERR_NO_MEMORY; + } + + iov.memh = ucp_memh2uct(rdesc->memh, mdi); + iov.buffer = rdesc + 1; + req->recv.tag.rdesc = rdesc; + } + + iov.length = length; + iov.count = 1; + iov.stride = 0; + + req->recv.uct_ctx.tag_consumed_cb = ucp_tag_offload_tag_consumed; + req->recv.uct_ctx.completed_cb = ucp_tag_offload_completed; + req->recv.uct_ctx.rndv_cb = ucp_tag_offload_rndv_cb; + + status = uct_iface_tag_recv_zcopy(wiface->iface, req->recv.tag.tag, + req->recv.tag.tag_mask, &iov, 1, + &req->recv.uct_ctx); + if (status != UCS_OK) { + ucs_assert((status == UCS_ERR_NO_RESOURCE) || + (status == UCS_ERR_EXCEEDS_LIMIT) || + (status == UCS_ERR_ALREADY_EXISTS)); + /* No more matching entries in the transport. + * TODO keep registration in case SW RNDV protocol will be used */ + ucp_tag_offload_release_buf(req, 1); + UCP_WORKER_STAT_TAG_OFFLOAD(worker, BLOCK_TAG_EXCEED); + return status; + } + + UCP_WORKER_STAT_TAG_OFFLOAD(worker, POSTED); + req->flags |= UCP_REQUEST_FLAG_OFFLOADED; + req->recv.tag.wiface = wiface; + ++wiface->post_count; + ucs_trace_req("recv request %p (%p) was posted to transport (rsc %d)", + req, req + 1, wiface->rsc_index); + return UCS_OK; +} + +/** + * @brief Offload all pending non-offloaded requests + * + * This routine tries to offload all pending non-offloaded requests on the + * specific queue of expected requests. + * + * + * @param [in] req Receive request being processed. + * @param [in] req_queue Specific request queue from the expected queues hash, + * which corresponds to the 'req' request tag. + * . + * + * @return 0 - Some (or all) pending requests can't be offloaded to the transport. + * 1 - All pending requests on the specific queue were offloaded to + * the transport. + */ +static UCS_F_ALWAYS_INLINE int +ucp_tag_offload_post_sw_reqs(ucp_request_t *req, ucp_request_queue_t *req_queue) +{ + ucp_worker_t *worker = req->recv.worker; + ucs_status_t status; + ucp_request_t *req_exp; + ucp_worker_iface_t *wiface; + size_t max_post; + + /* If large enough buffer is being posted to the transport, + * try to post all unposted requests from the same TM queue before. + * Check that: + * 1. The receive buffer being posted is large enough (>= FORCE_THRESH) + * 2. There is no any request which can't be posted to the transport + * (sender rank wildcard or non-contig type) + * 3. Transport tag list is big enough to fit all unposted requests plus + * the one being posted */ + if ((req->recv.length < worker->context->config.ext.tm_force_thresh) || + req_queue->block_count) { + return 0; + } + + wiface = ucp_tag_offload_iface(worker, req->recv.tag.tag); + if (ucs_unlikely(wiface == NULL)) { + UCP_WORKER_STAT_TAG_OFFLOAD(worker, BLOCK_NO_IFACE); + return 0; + } + + max_post = wiface->attr.cap.tag.recv.max_outstanding - wiface->post_count; + + if (req_queue->sw_count >= max_post) { + return 0; + } + + ucs_queue_for_each(req_exp, &req_queue->queue, recv.queue) { + if (req_exp->flags & UCP_REQUEST_FLAG_OFFLOADED) { + continue; + } + ucs_assert(req_exp != req); + status = ucp_tag_offload_do_post(req_exp); + if (status != UCS_OK) { + return 0; + } + --req_queue->sw_count; + --worker->tm.expected.sw_all_count; + } + + return 1; +} + +UCS_PROFILE_FUNC(int, ucp_tag_offload_post, (req, req_queue), + ucp_request_t *req, ucp_request_queue_t *req_queue) +{ + ucp_worker_t *worker = req->recv.worker; + ucp_context_t *context = worker->context; + + if (!UCP_DT_IS_CONTIG(req->recv.datatype)) { + /* Non-contig buffers not supported yet. */ + UCP_WORKER_STAT_TAG_OFFLOAD(worker, BLOCK_NON_CONTIG); + return 0; + } + + if (req->recv.tag.tag_mask != UCP_TAG_MASK_FULL) { + if (!ucp_tag_is_specific_source(context, req->recv.tag.tag_mask)) { + /* Sender rank wildcard */ + UCP_WORKER_STAT_TAG_OFFLOAD(worker, BLOCK_WILDCARD); + return 0; + } else if (worker->tm.expected.sw_all_count) { + /* There are some requests which must be completed in SW. + * Do not post tags to HW until they are completed. */ + UCP_WORKER_STAT_TAG_OFFLOAD(worker, BLOCK_SW_PEND); + return 0; + } + } else if (worker->tm.expected.wildcard.sw_count || + (req_queue->sw_count && !ucp_tag_offload_post_sw_reqs(req, req_queue))) { + /* There are some requests which must be completed in SW */ + UCP_WORKER_STAT_TAG_OFFLOAD(worker, BLOCK_SW_PEND); + return 0; + } + + if (ucp_tag_offload_do_post(req) != UCS_OK) { + return 0; + } + + return 1; +} + +static size_t ucp_tag_offload_pack_eager(void *dest, void *arg) +{ + ucp_request_t *req = arg; + size_t length; + + length = ucp_dt_pack(req->send.ep->worker, req->send.datatype, + req->send.mem_type, dest, req->send.buffer, + &req->send.state.dt, req->send.length); + ucs_assert(length == req->send.length); + return length; +} + +static ucs_status_t ucp_tag_offload_eager_short(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucs_status_t status; + + req->send.lane = ucp_ep_get_tag_lane(ep); + status = uct_ep_tag_eager_short(ep->uct_eps[req->send.lane], + req->send.tag.tag, req->send.buffer, + req->send.length); + if (status == UCS_OK) { + ucp_request_complete_send(req, UCS_OK); + } + return status; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_do_tag_offload_bcopy(uct_pending_req_t *self, uint64_t imm_data, + uct_pack_callback_t pack_cb) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ssize_t packed_len; + + req->send.lane = ucp_ep_get_tag_lane(ep); + packed_len = uct_ep_tag_eager_bcopy(ep->uct_eps[req->send.lane], + req->send.tag.tag, imm_data, + pack_cb, req, 0); + if (packed_len < 0) { + return (ucs_status_t)packed_len; + } + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_do_tag_offload_zcopy(uct_pending_req_t *self, uint64_t imm_data, + ucp_req_complete_func_t complete) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucp_dt_state_t dt_state = req->send.state.dt; + size_t max_iov = ucp_ep_config(ep)->tag.eager.max_iov; + uct_iov_t *iov = ucs_alloca(max_iov * sizeof(uct_iov_t)); + size_t iovcnt = 0; + ucs_status_t status; + + req->send.lane = ucp_ep_get_tag_lane(ep); + + ucp_dt_iov_copy_uct(ep->worker->context, iov, &iovcnt, max_iov, &dt_state, + req->send.buffer, req->send.datatype, req->send.length, + ucp_ep_md_index(ep, req->send.lane), NULL); + + status = uct_ep_tag_eager_zcopy(ep->uct_eps[req->send.lane], req->send.tag.tag, + imm_data, iov, iovcnt, 0, + &req->send.state.uct_comp); + if (status == UCS_OK) { + complete(req, UCS_OK); + } else if (status == UCS_INPROGRESS) { + ucp_request_send_state_advance(req, &dt_state, + UCP_REQUEST_SEND_PROTO_ZCOPY_AM, status); + } + + return UCS_STATUS_IS_ERR(status) ? status : UCS_OK; +} + +static ucs_status_t ucp_tag_offload_eager_bcopy(uct_pending_req_t *self) +{ + ucs_status_t status = ucp_do_tag_offload_bcopy(self, 0ul, + ucp_tag_offload_pack_eager); + + if (status == UCS_OK) { + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_request_send_generic_dt_finish(req); + ucp_request_complete_send(req, UCS_OK); + } + return status; +} + +static ucs_status_t ucp_tag_offload_eager_zcopy(uct_pending_req_t *self) +{ + return ucp_do_tag_offload_zcopy(self, 0ul, + ucp_proto_am_zcopy_req_complete); +} + +ucs_status_t ucp_tag_offload_sw_rndv(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + ucp_rndv_rts_hdr_t *rndv_rts_hdr; + unsigned rndv_hdr_len; + size_t packed_len; + + ucs_assert((UCP_DT_IS_CONTIG(req->send.datatype) && + (req->send.length > ucp_ep_config(ep)->tag.offload.max_rndv_zcopy)) || + !UCP_DT_IS_CONTIG(req->send.datatype) || + !(ep->worker->context->tl_mds[ucp_ep_md_index(ep, req->send.lane)].attr.cap. + reg_mem_types & UCS_BIT(req->send.mem_type)) || + ep->worker->context->config.ext.tm_sw_rndv); + + /* send RTS to allow fallback to SW RNDV on receiver */ + rndv_hdr_len = sizeof(ucp_rndv_rts_hdr_t) + ucp_ep_config(ep)->tag.rndv.rkey_size; + rndv_rts_hdr = ucs_alloca(rndv_hdr_len); + packed_len = ucp_tag_rndv_rts_pack(rndv_rts_hdr, req); + ucs_assert((rndv_rts_hdr->address != 0) || !UCP_DT_IS_CONTIG(req->send.datatype) || + !ucp_rndv_is_get_zcopy(req->send.mem_type, + ep->worker->context->config.ext.rndv_mode)); + return uct_ep_tag_rndv_request(ep->uct_eps[req->send.lane], req->send.tag.tag, + rndv_rts_hdr, packed_len, 0); +} + +static void ucp_tag_offload_rndv_zcopy_completion(uct_completion_t *self, + ucs_status_t status) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + ucp_proto_am_zcopy_req_complete(req, status); +} + +ucs_status_t ucp_tag_offload_rndv_zcopy(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = req->send.ep; + size_t max_iov = ucp_ep_config(ep)->tag.eager.max_iov; + uct_iov_t *iov = ucs_alloca(max_iov * sizeof(uct_iov_t)); + size_t iovcnt = 0; + ucp_rsc_index_t md_index; + ucp_dt_state_t dt_state; + void *rndv_op; + + md_index = ucp_ep_md_index(ep, req->send.lane); + + ucp_tag_offload_unexp_rndv_hdr_t rndv_hdr = { + .ep_ptr = ucp_request_get_dest_ep_ptr(req), + .reqptr = (uintptr_t)req, + .md_index = md_index + }; + + dt_state = req->send.state.dt; + + UCS_STATIC_ASSERT(sizeof(ucp_rsc_index_t) <= sizeof(rndv_hdr.md_index)); + ucs_assert_always(UCP_DT_IS_CONTIG(req->send.datatype)); + + ucp_dt_iov_copy_uct(ep->worker->context, iov, &iovcnt, max_iov, &dt_state, + req->send.buffer, req->send.datatype, req->send.length, + ucp_ep_md_index(ep, req->send.lane), NULL); + + rndv_op = uct_ep_tag_rndv_zcopy(ep->uct_eps[req->send.lane], req->send.tag.tag, + &rndv_hdr, sizeof(rndv_hdr), iov, iovcnt, 0, + &req->send.state.uct_comp); + if (UCS_PTR_IS_ERR(rndv_op)) { + return UCS_PTR_STATUS(rndv_op); + } + ucp_request_send_state_advance(req, &dt_state, + UCP_REQUEST_SEND_PROTO_RNDV_GET, + UCS_INPROGRESS); + + req->flags |= UCP_REQUEST_FLAG_OFFLOADED; + req->send.tag_offload.rndv_op = rndv_op; + return UCS_OK; +} + +void ucp_tag_offload_cancel_rndv(ucp_request_t *req) +{ + ucp_ep_t *ep = req->send.ep; + ucs_status_t status; + + status = uct_ep_tag_rndv_cancel(ep->uct_eps[ucp_ep_get_tag_lane(ep)], + req->send.tag_offload.rndv_op); + if (status != UCS_OK) { + ucs_error("Failed to cancel tag rndv op %s", ucs_status_string(status)); + } + + req->flags &= ~UCP_REQUEST_FLAG_OFFLOADED; +} + +ucs_status_t ucp_tag_offload_start_rndv(ucp_request_t *sreq) +{ + ucp_ep_t *ep = sreq->send.ep; + ucp_context_t *context = ep->worker->context; + ucp_md_index_t mdi = ucp_ep_md_index(ep, sreq->send.lane); + uct_md_attr_t *md_attr = &context->tl_mds[mdi].attr; + ucs_status_t status; + + /* should be set by ucp_tag_send_req_init() */ + ucs_assert(sreq->send.lane == ucp_ep_get_tag_lane(ep)); + + if (UCP_DT_IS_CONTIG(sreq->send.datatype) && + !context->config.ext.tm_sw_rndv && + (sreq->send.length <= ucp_ep_config(ep)->tag.offload.max_rndv_zcopy) && + (md_attr->cap.reg_mem_types & UCS_BIT(sreq->send.mem_type))) { + ucp_request_send_state_reset(sreq, ucp_tag_offload_rndv_zcopy_completion, + UCP_REQUEST_SEND_PROTO_RNDV_GET); + + /* Register send buffer with tag lane, because tag offload rndv + * protocol will perform RDMA_READ on it (if it arrives expectedly) */ + status = ucp_request_send_buffer_reg_lane(sreq, sreq->send.lane, 0); + if (status != UCS_OK) { + return status; + } + + /* contiguous buffer, offload can be used, but only a single lane */ + sreq->send.uct.func = ucp_tag_offload_rndv_zcopy; + } else { + ucp_request_send_state_reset(sreq, NULL, UCP_REQUEST_SEND_PROTO_RNDV_GET); + + /* RNDV will be performed by the SW - can register with SW RNDV lanes + * to get multirail benefits */ + status = ucp_tag_rndv_reg_send_buffer(sreq); + if (status != UCS_OK) { + return status; + } + + /* offload enabled but can't be used */ + sreq->send.uct.func = ucp_tag_offload_sw_rndv; + } + + return UCS_OK; +} + +const ucp_request_send_proto_t ucp_tag_offload_proto = { + .contig_short = ucp_tag_offload_eager_short, + .bcopy_single = ucp_tag_offload_eager_bcopy, + .bcopy_multi = NULL, + .zcopy_single = ucp_tag_offload_eager_zcopy, + .zcopy_multi = NULL, + .zcopy_completion = ucp_proto_am_zcopy_completion, + .only_hdr_size = 0 +}; + +/* Eager sync */ +static UCS_F_ALWAYS_INLINE void +ucp_tag_offload_sync_posted(ucp_worker_t *worker, ucp_request_t *req) +{ + req->send.tag_offload.ssend_tag = req->send.tag.tag; + ucs_queue_push(&worker->tm.offload.sync_reqs, &req->send.tag_offload.queue); +} + +static ucs_status_t ucp_tag_offload_eager_sync_bcopy(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_worker_t *worker = req->send.ep->worker; + ucs_status_t status; + + status = ucp_do_tag_offload_bcopy(self, ucp_request_get_dest_ep_ptr(req), + ucp_tag_offload_pack_eager); + if (status == UCS_OK) { + ucp_tag_offload_sync_posted(worker, req); + ucp_request_send_generic_dt_finish(req); + ucp_tag_eager_sync_completion(req, UCP_REQUEST_FLAG_LOCAL_COMPLETED, + UCS_OK); + } + return status; +} + +static ucs_status_t ucp_tag_offload_eager_sync_zcopy(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_worker_t *worker = req->send.ep->worker; + ucs_status_t status; + + status = ucp_do_tag_offload_zcopy(self, ucp_request_get_dest_ep_ptr(req), + ucp_tag_eager_sync_zcopy_req_complete); + if (status == UCS_OK) { + ucp_tag_offload_sync_posted(worker, req); + } + return status; +} + +void ucp_tag_offload_sync_send_ack(ucp_worker_h worker, uintptr_t ep_ptr, + ucp_tag_t stag, uint16_t recv_flags) +{ + ucp_request_t *req; + + ucs_assert(recv_flags & UCP_RECV_DESC_FLAG_EAGER_OFFLOAD); + + req = ucp_proto_ssend_ack_request_alloc(worker, ep_ptr); + if (req == NULL) { + ucs_fatal("could not allocate request"); + } + + req->send.proto.am_id = UCP_AM_ID_OFFLOAD_SYNC_ACK; + req->send.proto.sender_tag = stag; + + ucs_trace_req("tag_offload send_sync_ack ep 0x%lx tag %"PRIx64"", + ep_ptr, stag); + + ucp_request_send(req, 0); +} + +const ucp_request_send_proto_t ucp_tag_offload_sync_proto = { + .contig_short = NULL, + .bcopy_single = ucp_tag_offload_eager_sync_bcopy, + .bcopy_multi = NULL, + .zcopy_single = ucp_tag_offload_eager_sync_zcopy, + .zcopy_multi = NULL, + .zcopy_completion = ucp_tag_eager_sync_zcopy_completion, + .only_hdr_size = 0 +}; diff --git a/src/ucp/tag/offload.h b/src/ucp/tag/offload.h new file mode 100644 index 0000000..39cb97c --- /dev/null +++ b/src/ucp/tag/offload.h @@ -0,0 +1,163 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_TAG_OFFLOAD_H_ +#define UCP_TAG_OFFLOAD_H_ + +#include +#include +#include +#include + + +enum { + UCP_TAG_OFFLOAD_CANCEL_FORCE = UCS_BIT(0), + UCP_TAG_OFFLOAD_CANCEL_DEREG = UCS_BIT(1) +}; + +/** + * Header for unexpected rendezvous + */ +typedef struct { + uintptr_t ep_ptr; + uintptr_t reqptr; /* Request pointer */ + uint8_t md_index; /* md index */ +} UCS_S_PACKED ucp_tag_offload_unexp_rndv_hdr_t; + + +/** + * Header for sync send acknowledgment + */ +typedef struct { + uintptr_t ep_ptr; + ucp_tag_t sender_tag; +} UCS_S_PACKED ucp_offload_ssend_hdr_t; + + +/** + * Header for multi-fragmented sync send acknowledgment + * (carried by last fragment) + */ +typedef struct { + ucp_eager_middle_hdr_t super; + ucp_offload_ssend_hdr_t ssend_ack; +} UCS_S_PACKED ucp_offload_last_ssend_hdr_t; + + +extern const ucp_request_send_proto_t ucp_tag_offload_proto; +extern const ucp_request_send_proto_t ucp_tag_offload_sync_proto; + +ucs_status_t ucp_tag_offload_rndv_zcopy(uct_pending_req_t *self); + +ucs_status_t ucp_tag_offload_sw_rndv(uct_pending_req_t *self); + +void ucp_tag_offload_cancel_rndv(ucp_request_t *req); + +ucs_status_t ucp_tag_offload_start_rndv(ucp_request_t *sreq); + +ucs_status_t ucp_tag_offload_unexp_eager(void *arg, void *data, size_t length, + unsigned flags, uct_tag_t stag, + uint64_t imm, void **context); + + +ucs_status_t ucp_tag_offload_unexp_rndv(void *arg, unsigned flags, uint64_t stag, + const void *hdr, unsigned hdr_length, + uint64_t remote_addr, size_t length, + const void *rkey_buf); + +void ucp_tag_offload_cancel(ucp_worker_t *worker, ucp_request_t *req, unsigned mode); + +int ucp_tag_offload_post(ucp_request_t *req, ucp_request_queue_t *req_queue); + +void ucp_tag_offload_sync_send_ack(ucp_worker_h worker, uintptr_t ep_ptr, + ucp_tag_t stag, uint16_t recv_flags); + +/** + * @brief Activate tag offload interface + * + * @param [in] wiface UCP worker interface. + */ +void ucp_tag_offload_iface_activate(ucp_worker_iface_t *wiface); + +static UCS_F_ALWAYS_INLINE void +ucp_tag_offload_try_post(ucp_worker_t *worker, ucp_request_t *req, + ucp_request_queue_t *req_queue) +{ + if (ucs_unlikely(req->recv.length >= worker->tm.offload.thresh)) { + if (ucp_tag_offload_post(req, req_queue)) { + return; + } + } + + ++worker->tm.expected.sw_all_count; + ++req_queue->sw_count; + req_queue->block_count += !!(req->flags & UCP_REQUEST_FLAG_BLOCK_OFFLOAD); +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_offload_try_cancel(ucp_worker_t *worker, ucp_request_t *req, unsigned mode) +{ + if (ucs_unlikely(req->flags & UCP_REQUEST_FLAG_OFFLOADED)) { + ucp_tag_offload_cancel(worker, req, mode); + } +} + +/** + * @brief Handle tag offload unexpected message + * + * The routine activates tag offload interface if it the first unexpected + * message received on this interface. Also it maintains hash of tags, if + * more than one interface is active. Then, when expected receive request needs + * to be offloaded, the corresponding offload-capable interface is retrieved + * from the hash. + * + * @note Hash key is a tag masked with 'tag_sender_mask', because it needs to + * identify a particular sender, rather than every single tag. + * + * @note Tag is added to the hash table for messages bigger than TM_THRESH. + * Smaller messages are not supposed to be matched in HW, thus no need + * to waste time on hashing for them. + * + * + * @param [in] wiface UCP worker interface. + * @param [in] tag Tag of the arrived unexpected message. + */ +static UCS_F_ALWAYS_INLINE void +ucp_tag_offload_unexp(ucp_worker_iface_t *wiface, ucp_tag_t tag, size_t length) +{ + ucp_worker_t *worker = wiface->worker; + ucp_tag_t tag_key; + khiter_t hash_it; + int ret; + + ++wiface->proxy_recv_count; + + if (ucs_unlikely(!(wiface->flags & UCP_WORKER_IFACE_FLAG_OFFLOAD_ACTIVATED))) { + ucp_tag_offload_iface_activate(wiface); + } + + /* Need to hash all tags of messages arriving to offload-capable interface + if more than one interface is activated on the worker. This is needed to + avoid unwanted postings of receive buffers (those, which are expected to + arrive from offload incapable iface) to the HW. */ + if (ucs_unlikely((length >= worker->tm.offload.thresh) && + (worker->num_active_ifaces > 1))) { + tag_key = worker->context->config.tag_sender_mask & tag; + hash_it = kh_get(ucp_tag_offload_hash, &worker->tm.offload.tag_hash, + tag_key); + if (ucs_likely(hash_it != kh_end(&worker->tm.offload.tag_hash))) { + return; + } + + hash_it = kh_put(ucp_tag_offload_hash, &worker->tm.offload.tag_hash, + tag_key, &ret); + ucs_assertv((ret == 1) || (ret == 2), "ret=%d", ret); + kh_value(&worker->tm.offload.tag_hash, hash_it) = wiface; + } +} + + +#endif diff --git a/src/ucp/tag/probe.c b/src/ucp/tag/probe.c new file mode 100644 index 0000000..ed3623d --- /dev/null +++ b/src/ucp/tag/probe.c @@ -0,0 +1,53 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "eager.h" +#include "rndv.h" +#include "tag_match.inl" + +#include +#include +#include + + +ucp_tag_message_h ucp_tag_probe_nb(ucp_worker_h worker, ucp_tag_t tag, + ucp_tag_t tag_mask, int remove, + ucp_tag_recv_info_t *info) +{ + ucp_context_h UCS_V_UNUSED context = worker->context; + ucp_recv_desc_t *rdesc; + uint16_t flags; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_TAG, + return NULL); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + ucs_trace_req("probe_nb tag %"PRIx64"/%"PRIx64" remove=%d", tag, tag_mask, + remove); + + rdesc = ucp_tag_unexp_search(&worker->tm, tag, tag_mask, remove, "probe"); + if (rdesc != NULL) { + flags = rdesc->flags; + info->sender_tag = ucp_rdesc_get_tag(rdesc); + + if (flags & UCP_RECV_DESC_FLAG_EAGER_ONLY) { + info->length = rdesc->length - rdesc->payload_offset; + } else if (flags & UCP_RECV_DESC_FLAG_EAGER) { + info->length = ((ucp_eager_first_hdr_t*)(rdesc + 1))->total_len; + } else { + ucs_assert(flags & UCP_RECV_DESC_FLAG_RNDV); + info->length = ((ucp_rndv_rts_hdr_t*)(rdesc + 1))->size; + } + } + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + + return rdesc; +} diff --git a/src/ucp/tag/rndv.c b/src/ucp/tag/rndv.c new file mode 100644 index 0000000..de04ca5 --- /dev/null +++ b/src/ucp/tag/rndv.c @@ -0,0 +1,1488 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rndv.h" +#include "tag_match.inl" +#include "offload.h" + +#include +#include + +static int ucp_rndv_is_recv_pipeline_needed(ucp_request_t *rndv_req, + ucs_memory_type_t mem_type) +{ + ucp_md_index_t md_index; + uct_md_attr_t *md_attr; + + /* no bw lanes */ + if (!ucp_ep_config(rndv_req->send.ep)->key.rma_bw_md_map) { + return 0; + } + + /* check if there is a bw lane to register mem type */ + ucs_for_each_bit(md_index, + ucp_ep_config(rndv_req->send.ep)->key.rma_bw_md_map) { + md_attr = &rndv_req->send.ep->worker->context->tl_mds[md_index].attr; + if (md_attr->cap.reg_mem_types & UCS_BIT(mem_type)) { + return 0; + } + } + + return 1; +} + +static ucp_lane_index_t +ucp_rndv_req_get_zcopy_rma_lane(ucp_request_t *rndv_req, ucp_lane_map_t ignore, + uct_rkey_t *uct_rkey_p) +{ + ucp_ep_h ep = rndv_req->send.ep; + ucp_ep_config_t *ep_config = ucp_ep_config(ep); + + return ucp_rkey_find_rma_lane(ep->worker->context, ep_config, + rndv_req->send.mem_type, + ep_config->tag.rndv.get_zcopy_lanes, + rndv_req->send.rndv_get.rkey, ignore, uct_rkey_p); +} + +size_t ucp_tag_rndv_rts_pack(void *dest, void *arg) +{ + ucp_request_t *sreq = arg; /* send request */ + ucp_rndv_rts_hdr_t *rndv_rts_hdr = dest; + ucp_worker_h worker = sreq->send.ep->worker; + ssize_t packed_rkey_size; + + rndv_rts_hdr->super.tag = sreq->send.tag.tag; + rndv_rts_hdr->sreq.reqptr = (uintptr_t)sreq; + rndv_rts_hdr->sreq.ep_ptr = ucp_request_get_dest_ep_ptr(sreq); + rndv_rts_hdr->size = sreq->send.length; + + /* Pack remote keys (which can be empty list) */ + if (UCP_DT_IS_CONTIG(sreq->send.datatype) && + ucp_rndv_is_get_zcopy(sreq->send.mem_type, + worker->context->config.ext.rndv_mode)) { + /* pack rkey, ask target to do get_zcopy */ + rndv_rts_hdr->address = (uintptr_t)sreq->send.buffer; + packed_rkey_size = ucp_rkey_pack_uct(worker->context, + sreq->send.state.dt.dt.contig.md_map, + sreq->send.state.dt.dt.contig.memh, + sreq->send.mem_type, + rndv_rts_hdr + 1); + if (packed_rkey_size < 0) { + ucs_fatal("failed to pack rendezvous remote key: %s", + ucs_status_string((ucs_status_t)packed_rkey_size)); + } + + ucs_assert(packed_rkey_size <= + ucp_ep_config(sreq->send.ep)->tag.rndv.rkey_size); + } else { + rndv_rts_hdr->address = 0; + packed_rkey_size = 0; + } + + return sizeof(*rndv_rts_hdr) + packed_rkey_size; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_proto_progress_rndv_rts, (self), + uct_pending_req_t *self) +{ + ucp_request_t *sreq = ucs_container_of(self, ucp_request_t, send.uct); + size_t packed_rkey_size; + + /* send the RTS. the pack_cb will pack all the necessary fields in the RTS */ + packed_rkey_size = ucp_ep_config(sreq->send.ep)->tag.rndv.rkey_size; + return ucp_do_am_single(self, UCP_AM_ID_RNDV_RTS, ucp_tag_rndv_rts_pack, + sizeof(ucp_rndv_rts_hdr_t) + packed_rkey_size); +} + +static size_t ucp_tag_rndv_rtr_pack(void *dest, void *arg) +{ + ucp_request_t *rndv_req = arg; + ucp_rndv_rtr_hdr_t *rndv_rtr_hdr = dest; + ucp_request_t *rreq = rndv_req->send.rndv_rtr.rreq; + ssize_t packed_rkey_size; + + rndv_rtr_hdr->sreq_ptr = rndv_req->send.rndv_rtr.remote_request; + rndv_rtr_hdr->rreq_ptr = (uintptr_t)rreq; /* request of receiver side */ + + /* Pack remote keys (which can be empty list) */ + if (UCP_DT_IS_CONTIG(rreq->recv.datatype)) { + rndv_rtr_hdr->address = (uintptr_t)rreq->recv.buffer; + rndv_rtr_hdr->size = rndv_req->send.rndv_rtr.length; + rndv_rtr_hdr->offset = rreq->recv.frag.offset; + + packed_rkey_size = ucp_rkey_pack_uct(rndv_req->send.ep->worker->context, + rreq->recv.state.dt.contig.md_map, + rreq->recv.state.dt.contig.memh, + rreq->recv.mem_type, + rndv_rtr_hdr + 1); + if (packed_rkey_size < 0) { + return packed_rkey_size; + } + } else { + rndv_rtr_hdr->address = 0; + rndv_rtr_hdr->size = 0; + rndv_rtr_hdr->offset = 0; + packed_rkey_size = 0; + } + + return sizeof(*rndv_rtr_hdr) + packed_rkey_size; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_proto_progress_rndv_rtr, (self), + uct_pending_req_t *self) +{ + ucp_request_t *rndv_req = ucs_container_of(self, ucp_request_t, send.uct); + size_t packed_rkey_size; + ucs_status_t status; + + /* send the RTR. the pack_cb will pack all the necessary fields in the RTR */ + packed_rkey_size = ucp_ep_config(rndv_req->send.ep)->tag.rndv.rkey_size; + status = ucp_do_am_single(self, UCP_AM_ID_RNDV_RTR, ucp_tag_rndv_rtr_pack, + sizeof(ucp_rndv_rtr_hdr_t) + packed_rkey_size); + if (status == UCS_OK) { + /* release rndv request */ + ucp_request_put(rndv_req); + } + + return status; +} + +ucs_status_t ucp_tag_rndv_reg_send_buffer(ucp_request_t *sreq) +{ + ucp_ep_h ep = sreq->send.ep; + ucp_md_map_t md_map; + ucs_status_t status; + + if (UCP_DT_IS_CONTIG(sreq->send.datatype) && + ucp_rndv_is_get_zcopy(sreq->send.mem_type, + ep->worker->context->config.ext.rndv_mode)) { + + /* register a contiguous buffer for rma_get */ + md_map = ucp_ep_config(ep)->key.rma_bw_md_map; + + /* Pass UCT_MD_MEM_FLAG_HIDE_ERRORS flag, because registration may fail + * if md does not support send memory type (e.g. CUDA memory). In this + * case RTS will be sent with empty key, and sender will fallback to + * PUT or pipeline protocols. */ + status = ucp_request_send_buffer_reg(sreq, md_map, + UCT_MD_MEM_FLAG_HIDE_ERRORS); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +ucs_status_t ucp_tag_send_start_rndv(ucp_request_t *sreq) +{ + ucp_ep_h ep = sreq->send.ep; + ucs_status_t status; + + ucp_trace_req(sreq, "start_rndv to %s buffer %p length %zu", + ucp_ep_peer_name(ep), sreq->send.buffer, + sreq->send.length); + UCS_PROFILE_REQUEST_EVENT(sreq, "start_rndv", sreq->send.length); + + status = ucp_ep_resolve_dest_ep_ptr(ep, sreq->send.lane); + if (status != UCS_OK) { + return status; + } + + if (ucp_ep_is_tag_offload_enabled(ucp_ep_config(ep))) { + status = ucp_tag_offload_start_rndv(sreq); + } else { + ucs_assert(sreq->send.lane == ucp_ep_get_am_lane(ep)); + sreq->send.uct.func = ucp_proto_progress_rndv_rts; + status = ucp_tag_rndv_reg_send_buffer(sreq); + } + + return status; +} + +static void ucp_rndv_complete_send(ucp_request_t *sreq, ucs_status_t status) +{ + ucp_request_send_generic_dt_finish(sreq); + ucp_request_send_buffer_dereg(sreq); + ucp_request_complete_send(sreq, status); +} + +static void ucp_rndv_req_send_ats(ucp_request_t *rndv_req, ucp_request_t *rreq, + uintptr_t remote_request, ucs_status_t status) +{ + ucp_trace_req(rndv_req, "send ats remote_request 0x%lx", remote_request); + UCS_PROFILE_REQUEST_EVENT(rreq, "send_ats", 0); + + rndv_req->send.lane = ucp_ep_get_am_lane(rndv_req->send.ep); + rndv_req->send.uct.func = ucp_proto_progress_am_single; + rndv_req->send.proto.am_id = UCP_AM_ID_RNDV_ATS; + rndv_req->send.proto.status = status; + rndv_req->send.proto.remote_request = remote_request; + rndv_req->send.proto.comp_cb = ucp_request_put; + + ucp_request_send(rndv_req, 0); +} + +UCS_PROFILE_FUNC_VOID(ucp_rndv_complete_rma_put_zcopy, (sreq), + ucp_request_t *sreq) +{ + ucp_trace_req(sreq, "rndv_put completed"); + UCS_PROFILE_REQUEST_EVENT(sreq, "complete_rndv_put", 0); + + ucp_request_send_buffer_dereg(sreq); + ucp_request_complete_send(sreq, UCS_OK); +} + +static void ucp_rndv_send_atp(ucp_request_t *sreq, uintptr_t remote_request) +{ + ucs_assertv(sreq->send.state.dt.offset == sreq->send.length, + "sreq=%p offset=%zu length=%zu", sreq, + sreq->send.state.dt.offset, sreq->send.length); + + ucp_trace_req(sreq, "send atp remote_request 0x%lx", remote_request); + UCS_PROFILE_REQUEST_EVENT(sreq, "send_atp", 0); + + /* destroy rkey before it gets overridden by ATP protocol data */ + ucp_rkey_destroy(sreq->send.rndv_put.rkey); + + sreq->send.lane = ucp_ep_get_am_lane(sreq->send.ep); + sreq->send.uct.func = ucp_proto_progress_am_single; + sreq->send.proto.am_id = UCP_AM_ID_RNDV_ATP; + sreq->send.proto.status = UCS_OK; + sreq->send.proto.remote_request = remote_request; + sreq->send.proto.comp_cb = ucp_rndv_complete_rma_put_zcopy; + + ucp_request_send(sreq, 0); +} + +UCS_PROFILE_FUNC_VOID(ucp_rndv_complete_frag_rma_put_zcopy, (fsreq), + ucp_request_t *fsreq) +{ + ucp_request_t *sreq = fsreq->send.proto.sreq; + + sreq->send.state.dt.offset += fsreq->send.length; + + /* delete fragments send request */ + ucp_request_put(fsreq); + + /* complete send request after put completions of all fragments */ + if (sreq->send.state.dt.offset == sreq->send.length) { + ucp_rndv_complete_rma_put_zcopy(sreq); + } +} + +static void ucp_rndv_send_frag_atp(ucp_request_t *fsreq, uintptr_t remote_request) +{ + ucp_trace_req(fsreq, "send frag atp remote_request 0x%lx", remote_request); + UCS_PROFILE_REQUEST_EVENT(fsreq, "send_frag_atp", 0); + + /* destroy rkey before it gets overridden by ATP protocol data */ + ucp_rkey_destroy(fsreq->send.rndv_put.rkey); + + fsreq->send.lane = ucp_ep_get_am_lane(fsreq->send.ep); + fsreq->send.uct.func = ucp_proto_progress_am_single; + fsreq->send.proto.sreq = fsreq->send.rndv_put.sreq; + fsreq->send.proto.am_id = UCP_AM_ID_RNDV_ATP; + fsreq->send.proto.status = UCS_OK; + fsreq->send.proto.remote_request = remote_request; + fsreq->send.proto.comp_cb = ucp_rndv_complete_frag_rma_put_zcopy; + + ucp_request_send(fsreq, 0); +} + +static void ucp_rndv_zcopy_recv_req_complete(ucp_request_t *req, ucs_status_t status) +{ + ucp_request_recv_buffer_dereg(req); + ucp_request_complete_tag_recv(req, status); +} + +static void ucp_rndv_complete_rma_get_zcopy(ucp_request_t *rndv_req) +{ + ucp_request_t *rreq = rndv_req->send.rndv_get.rreq; + + ucs_assertv(rndv_req->send.state.dt.offset == rndv_req->send.length, + "rndv_req=%p offset=%zu length=%zu", rndv_req, + rndv_req->send.state.dt.offset, rndv_req->send.length); + + ucp_trace_req(rndv_req, "rndv_get completed"); + UCS_PROFILE_REQUEST_EVENT(rreq, "complete_rndv_get", 0); + + ucp_rkey_destroy(rndv_req->send.rndv_get.rkey); + ucp_request_send_buffer_dereg(rndv_req); + + ucp_rndv_req_send_ats(rndv_req, rreq, rndv_req->send.rndv_get.remote_request, + UCS_OK); + ucp_rndv_zcopy_recv_req_complete(rreq, UCS_OK); +} + +static void ucp_rndv_recv_data_init(ucp_request_t *rreq, size_t size) +{ + rreq->status = UCS_OK; + rreq->recv.tag.remaining = size; + rreq->recv.frag.rreq = NULL; + rreq->recv.frag.offset = 0; +} + +static void ucp_rndv_req_send_rtr(ucp_request_t *rndv_req, ucp_request_t *rreq, + uintptr_t sender_reqptr, size_t recv_length) +{ + ucp_trace_req(rndv_req, "send rtr remote sreq 0x%lx rreq %p", sender_reqptr, + rreq); + + rndv_req->send.lane = ucp_ep_get_am_lane(rndv_req->send.ep); + rndv_req->send.uct.func = ucp_proto_progress_rndv_rtr; + rndv_req->send.rndv_rtr.remote_request = sender_reqptr; + rndv_req->send.rndv_rtr.rreq = rreq; + rndv_req->send.rndv_rtr.length = recv_length; + + ucp_request_send(rndv_req, 0); +} + +static void ucp_rndv_get_lanes_count(ucp_request_t *rndv_req) +{ + ucp_ep_h ep = rndv_req->send.ep; + ucp_lane_map_t map = 0; + uct_rkey_t uct_rkey; + ucp_lane_index_t lane; + + if (ucs_likely(rndv_req->send.rndv_get.lane_count != 0)) { + return; /* already resolved */ + } + + while ((lane = ucp_rndv_req_get_zcopy_rma_lane(rndv_req, map, &uct_rkey)) + != UCP_NULL_LANE) { + rndv_req->send.rndv_get.lane_count++; + map |= UCS_BIT(lane); + } + + rndv_req->send.rndv_get.lane_count = ucs_min(rndv_req->send.rndv_get.lane_count, + ep->worker->context->config.ext.max_rndv_lanes); +} + +static ucp_lane_index_t ucp_rndv_get_next_lane(ucp_request_t *rndv_req, uct_rkey_t *uct_rkey) +{ + /* get lane and mask it for next iteration. + * next time this lane will not be selected & we continue + * with another lane. After all lanes are masked - reset mask + * to zero & start from scratch. this way allows to enumerate + * all lanes */ + ucp_ep_h ep = rndv_req->send.ep; + ucp_lane_index_t lane; + + lane = ucp_rndv_req_get_zcopy_rma_lane(rndv_req, + rndv_req->send.rndv_get.lanes_map, + uct_rkey); + + if ((lane == UCP_NULL_LANE) && (rndv_req->send.rndv_get.lanes_map != 0)) { + /* lanes_map != 0 - no more lanes (but BW lanes are exist because map + * is not NULL - we found at least one lane on previous iteration). + * reset used lanes map to NULL and iterate it again */ + rndv_req->send.rndv_get.lanes_map = 0; + lane = ucp_rndv_req_get_zcopy_rma_lane(rndv_req, + rndv_req->send.rndv_get.lanes_map, + uct_rkey); + } + + if (ucs_unlikely(lane == UCP_NULL_LANE)) { + /* there are no BW lanes */ + return UCP_NULL_LANE; + } + + rndv_req->send.rndv_get.lanes_map |= UCS_BIT(lane); + /* in case if masked too much lanes - reset mask to zero + * to select first lane next time */ + if (ucs_popcount(rndv_req->send.rndv_get.lanes_map) >= + ep->worker->context->config.ext.max_rndv_lanes) { + rndv_req->send.rndv_get.lanes_map = 0; + } + return lane; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_progress_rma_get_zcopy, (self), + uct_pending_req_t *self) +{ + ucp_request_t *rndv_req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_h ep = rndv_req->send.ep; + ucp_ep_config_t *config = ucp_ep_config(ep); + const size_t max_iovcnt = 1; + uct_iface_attr_t* attrs; + ucs_status_t status; + size_t offset, length, ucp_mtu, remainder, align, chunk; + uct_iov_t iov[max_iovcnt]; + size_t iovcnt; + ucp_rsc_index_t rsc_index; + ucp_dt_state_t state; + uct_rkey_t uct_rkey; + size_t min_zcopy; + size_t max_zcopy; + size_t tail; + int pending_add_res; + ucp_lane_index_t lane; + + ucp_rndv_get_lanes_count(rndv_req); + + /* Figure out which lane to use for get operation */ + rndv_req->send.lane = lane = ucp_rndv_get_next_lane(rndv_req, &uct_rkey); + + if (lane == UCP_NULL_LANE) { + /* If can't perform get_zcopy - switch to active-message. + * NOTE: we do not register memory and do not send our keys. */ + ucp_trace_req(rndv_req, "remote memory unreachable, switch to rtr"); + ucp_rkey_destroy(rndv_req->send.rndv_get.rkey); + ucp_rndv_recv_data_init(rndv_req->send.rndv_get.rreq, + rndv_req->send.length); + ucp_rndv_req_send_rtr(rndv_req, rndv_req->send.rndv_get.rreq, + rndv_req->send.rndv_get.remote_request, + rndv_req->send.length); + return UCS_OK; + } + + if (!rndv_req->send.mdesc) { + status = ucp_send_request_add_reg_lane(rndv_req, lane); + ucs_assert_always(status == UCS_OK); + } + + rsc_index = ucp_ep_get_rsc_index(ep, lane); + attrs = ucp_worker_iface_get_attr(ep->worker, rsc_index); + align = attrs->cap.get.opt_zcopy_align; + ucp_mtu = attrs->cap.get.align_mtu; + min_zcopy = config->tag.rndv.min_get_zcopy; + max_zcopy = config->tag.rndv.max_get_zcopy; + + offset = rndv_req->send.state.dt.offset; + remainder = (uintptr_t)rndv_req->send.buffer % align; + + if ((offset == 0) && (remainder > 0) && (rndv_req->send.length > ucp_mtu)) { + length = ucp_mtu - remainder; + } else { + chunk = ucs_align_up((size_t)(ucs_min(rndv_req->send.length / + rndv_req->send.rndv_get.lane_count, + max_zcopy) * config->tag.rndv.scale[lane]), + align); + length = ucs_min(chunk, rndv_req->send.length - offset); + } + + /* ensure that tail (rest of message) is over min_zcopy */ + tail = rndv_req->send.length - (offset + length); + if (ucs_unlikely(tail && (tail < min_zcopy))) { + /* ok, tail is less get zcopy minimal & could not be processed as + * standalone operation */ + /* check if we have room to increase current part and not + * step over max_zcopy */ + if (length < (max_zcopy - tail)) { + /* if we can encrease length by min_zcopy - let's do it to + * avoid small tail (we have limitation on minimal get zcopy) */ + length += tail; + } else { + /* reduce current length by align or min_zcopy value + * to process it on next round */ + ucs_assert(length > ucs_max(min_zcopy, align)); + length -= ucs_max(min_zcopy, align); + } + } + + ucs_assert(length >= min_zcopy); + ucs_assert((rndv_req->send.length - (offset + length) == 0) || + (rndv_req->send.length - (offset + length) >= min_zcopy)); + + ucs_trace_data("req %p: offset %zu remainder %zu rma-get to %p len %zu lane %d", + rndv_req, offset, remainder, + UCS_PTR_BYTE_OFFSET(rndv_req->send.buffer, offset), + length, lane); + + state = rndv_req->send.state.dt; + /* TODO: is this correct? memh array may skip MD's where + * registration is not supported. for now SHM may avoid registration, + * but it will work on single lane */ + ucp_dt_iov_copy_uct(ep->worker->context, iov, &iovcnt, max_iovcnt, &state, + rndv_req->send.buffer, ucp_dt_make_contig(1), length, + ucp_ep_md_index(ep, lane), + rndv_req->send.mdesc); + + for (;;) { + status = uct_ep_get_zcopy(ep->uct_eps[lane], + iov, iovcnt, + rndv_req->send.rndv_get.remote_address + offset, + uct_rkey, + &rndv_req->send.state.uct_comp); + ucp_request_send_state_advance(rndv_req, &state, + UCP_REQUEST_SEND_PROTO_RNDV_GET, + status); + if (rndv_req->send.state.dt.offset == rndv_req->send.length) { + if (rndv_req->send.state.uct_comp.count == 0) { + rndv_req->send.state.uct_comp.func(&rndv_req->send.state.uct_comp, status); + } + return UCS_OK; + } else if (!UCS_STATUS_IS_ERR(status)) { + /* in case if not all chunks are transmitted - return in_progress + * status */ + return UCS_INPROGRESS; + } else { + if (status == UCS_ERR_NO_RESOURCE) { + if (lane != rndv_req->send.pending_lane) { + /* switch to new pending lane */ + pending_add_res = ucp_request_pending_add(rndv_req, &status, 0); + if (!pending_add_res) { + /* failed to switch req to pending queue, try again */ + continue; + } + ucs_assert(status == UCS_INPROGRESS); + return UCS_OK; + } + } + return status; + } + } +} + +UCS_PROFILE_FUNC_VOID(ucp_rndv_get_completion, (self, status), + uct_completion_t *self, ucs_status_t status) +{ + ucp_request_t *rndv_req = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + + if (rndv_req->send.state.dt.offset == rndv_req->send.length) { + ucp_rndv_complete_rma_get_zcopy(rndv_req); + } +} + +static void ucp_rndv_put_completion(uct_completion_t *self, ucs_status_t status) +{ + ucp_request_t *sreq = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + + if (sreq->send.state.dt.offset == sreq->send.length) { + ucp_rndv_send_atp(sreq, sreq->send.rndv_put.remote_request); + } +} + +static void ucp_rndv_req_send_rma_get(ucp_request_t *rndv_req, ucp_request_t *rreq, + const ucp_rndv_rts_hdr_t *rndv_rts_hdr) +{ + ucs_status_t status; + + ucp_trace_req(rndv_req, "start rma_get rreq %p", rreq); + + rndv_req->send.uct.func = ucp_rndv_progress_rma_get_zcopy; + rndv_req->send.buffer = rreq->recv.buffer; + rndv_req->send.mem_type = rreq->recv.mem_type; + rndv_req->send.datatype = ucp_dt_make_contig(1); + rndv_req->send.length = rndv_rts_hdr->size; + rndv_req->send.rndv_get.remote_request = rndv_rts_hdr->sreq.reqptr; + rndv_req->send.rndv_get.remote_address = rndv_rts_hdr->address; + rndv_req->send.rndv_get.rreq = rreq; + rndv_req->send.rndv_get.lanes_map = 0; + rndv_req->send.rndv_get.lane_count = 0; + rndv_req->send.datatype = rreq->recv.datatype; + + status = ucp_ep_rkey_unpack(rndv_req->send.ep, rndv_rts_hdr + 1, + &rndv_req->send.rndv_get.rkey); + if (status != UCS_OK) { + ucs_fatal("failed to unpack rendezvous remote key received from %s: %s", + ucp_ep_peer_name(rndv_req->send.ep), ucs_status_string(status)); + } + + ucp_request_send_state_init(rndv_req, ucp_dt_make_contig(1), 0); + ucp_request_send_state_reset(rndv_req, ucp_rndv_get_completion, + UCP_REQUEST_SEND_PROTO_RNDV_GET); + + ucp_request_send(rndv_req, 0); +} + +static void ucp_rndv_send_frag_rtr(ucp_worker_h worker, ucp_request_t *rndv_req, + ucp_request_t *rreq, + const ucp_rndv_rts_hdr_t *rndv_rts_hdr) +{ + size_t max_frag_size = worker->context->config.ext.rndv_frag_size; + int i, num_frags; + size_t frag_size; + size_t offset; + ucp_mem_desc_t *mdesc; + ucp_request_t *freq; + ucp_request_t *frndv_req; + unsigned md_index; + unsigned memh_index; + + offset = 0; + num_frags = ucs_div_round_up(rndv_rts_hdr->size, max_frag_size); + + for (i = 0; i < num_frags; i++) { + frag_size = ucs_min(max_frag_size, (rndv_rts_hdr->size - offset)); + + /* internal fragment recv request allocated on receiver side to receive + * put fragment from sender and to perform a put to recv buffer */ + freq = ucp_request_get(worker); + if (freq == NULL) { + ucs_fatal("failed to allocate fragment receive request"); + } + + /* internal rndv request to send RTR */ + frndv_req = ucp_request_get(worker); + if (frndv_req == NULL) { + ucs_fatal("failed to allocate fragment rendezvous reply"); + } + + /* allocate fragment recv buffer desc*/ + mdesc = ucp_worker_mpool_get(&worker->rndv_frag_mp); + if (mdesc == NULL) { + ucs_fatal("failed to allocate fragment memory buffer"); + } + + freq->recv.buffer = mdesc + 1; + freq->recv.datatype = ucp_dt_make_contig(1); + freq->recv.mem_type = UCS_MEMORY_TYPE_HOST; + freq->recv.length = frag_size; + freq->recv.state.dt.contig.md_map = 0; + freq->recv.frag.rreq = rreq; + freq->recv.frag.offset = offset; + + memh_index = 0; + ucs_for_each_bit(md_index, + (ucp_ep_config(rndv_req->send.ep)->key.rma_bw_md_map & + mdesc->memh->md_map)) { + freq->recv.state.dt.contig.memh[memh_index++] = ucp_memh2uct(mdesc->memh, md_index); + freq->recv.state.dt.contig.md_map |= UCS_BIT(md_index); + } + ucs_assert(memh_index <= UCP_MAX_OP_MDS); + + frndv_req->send.ep = rndv_req->send.ep; + frndv_req->send.pending_lane = UCP_NULL_LANE; + + ucp_rndv_req_send_rtr(frndv_req, freq, rndv_rts_hdr->sreq.reqptr, + freq->recv.length); + offset += frag_size; + } + + /* release original rndv reply request */ + ucp_request_put(rndv_req); +} + +static UCS_F_ALWAYS_INLINE int +ucp_rndv_is_rkey_ptr(const ucp_rndv_rts_hdr_t *rndv_rts_hdr, ucp_ep_h ep, + ucs_memory_type_t recv_mem_type, ucp_rndv_mode_t rndv_mode) +{ + const ucp_ep_config_t *ep_config = ucp_ep_config(ep); + + return /* must have remote address */ + (rndv_rts_hdr->address != 0) && + /* remote key must be on a memory domain for which we support rkey_ptr */ + (ucp_rkey_packed_md_map(rndv_rts_hdr + 1) & + ep_config->tag.rndv.rkey_ptr_dst_mds) && + /* rendezvous mode must not be forced to put/get */ + (rndv_mode == UCP_RNDV_MODE_AUTO) && + /* need local memory access for data unpack */ + UCP_MEM_IS_ACCESSIBLE_FROM_CPU(recv_mem_type); +} + +static void ucp_rndv_do_rkey_ptr(ucp_request_t *rndv_req, ucp_request_t *rreq, + const ucp_rndv_rts_hdr_t *rndv_rts_hdr) +{ + ucp_ep_h ep = rndv_req->send.ep; + const ucp_ep_config_t *ep_config = ucp_ep_config(ep); + ucp_md_index_t dst_md_index; + ucp_lane_index_t i, lane; + ucs_status_t status; + unsigned rkey_index; + void *local_ptr; + ucp_rkey_h rkey; + + ucp_trace_req(rndv_req, "start rkey_ptr rndv rreq %p", rreq); + + status = ucp_ep_rkey_unpack(ep, rndv_rts_hdr + 1, &rkey); + if (status != UCS_OK) { + ucs_fatal("failed to unpack rendezvous remote key received from %s: %s", + ucp_ep_peer_name(ep), ucs_status_string(status)); + } + + /* Find a lane which is capable of accessing the destination memory */ + lane = UCP_NULL_LANE; + for (i = 0; i < ep_config->key.num_lanes; ++i) { + dst_md_index = ep_config->key.lanes[i].dst_md_index; + if (UCS_BIT(dst_md_index) & rkey->md_map) { + lane = i; + break; + } + } + + if (ucs_unlikely(lane == UCP_NULL_LANE)) { + /* We should be able to find a lane, because ucp_rndv_is_rkey_ptr() + * already checked that (rkey->md_map & ep_config->rkey_ptr_dst_mds) != 0 + */ + ucs_fatal("failed to find a lane to access remote memory domains 0x%lx", + rkey->md_map); + } + + rkey_index = ucs_bitmap2idx(rkey->md_map, dst_md_index); + status = uct_rkey_ptr(rkey->tl_rkey[rkey_index].cmpt, + &rkey->tl_rkey[rkey_index].rkey, + rndv_rts_hdr->address, &local_ptr); + if (status == UCS_OK) { + ucp_trace_req(rndv_req, "obtained a local pointer to remote buffer: %p", + local_ptr); + status = ucp_request_recv_data_unpack(rreq, local_ptr, + rndv_rts_hdr->size, 0, 1); + } + + ucp_request_complete_tag_recv(rreq, status); + ucp_rkey_destroy(rkey); + ucp_rndv_req_send_ats(rndv_req, rreq, rndv_rts_hdr->sreq.reqptr, status); +} + +UCS_PROFILE_FUNC_VOID(ucp_rndv_matched, (worker, rreq, rndv_rts_hdr), + ucp_worker_h worker, ucp_request_t *rreq, + const ucp_rndv_rts_hdr_t *rndv_rts_hdr) +{ + ucp_rndv_mode_t rndv_mode; + ucp_request_t *rndv_req; + ucp_ep_h ep; + + UCS_ASYNC_BLOCK(&worker->async); + + UCS_PROFILE_REQUEST_EVENT(rreq, "rndv_match", 0); + + /* rreq is the receive request on the receiver's side */ + rreq->recv.tag.info.sender_tag = rndv_rts_hdr->super.tag; + rreq->recv.tag.info.length = rndv_rts_hdr->size; + + /* the internal send request allocated on receiver side (to perform a "get" + * operation, send "ATS" and "RTR") */ + rndv_req = ucp_request_get(worker); + if (rndv_req == NULL) { + ucs_error("failed to allocate rendezvous reply"); + goto out; + } + + rndv_req->send.ep = ucp_worker_get_ep_by_ptr(worker, + rndv_rts_hdr->sreq.ep_ptr); + rndv_req->flags = 0; + rndv_req->send.mdesc = NULL; + rndv_req->send.pending_lane = UCP_NULL_LANE; + + ucp_trace_req(rreq, + "rndv matched remote {address 0x%"PRIx64" size %zu sreq 0x%lx}" + " rndv_sreq %p", rndv_rts_hdr->address, rndv_rts_hdr->size, + rndv_rts_hdr->sreq.reqptr, rndv_req); + + if (ucs_unlikely(rreq->recv.length < rndv_rts_hdr->size)) { + ucp_trace_req(rndv_req, + "rndv truncated remote size %zu local size %zu rreq %p", + rndv_rts_hdr->size, rreq->recv.length, rreq); + ucp_rndv_req_send_ats(rndv_req, rreq, rndv_rts_hdr->sreq.reqptr, UCS_OK); + ucp_request_recv_generic_dt_finish(rreq); + ucp_rndv_zcopy_recv_req_complete(rreq, UCS_ERR_MESSAGE_TRUNCATED); + goto out; + } + + /* if the receive side is not connected yet then the RTS was received on a stub ep */ + ep = rndv_req->send.ep; + rndv_mode = worker->context->config.ext.rndv_mode; + + if (ucp_rndv_is_rkey_ptr(rndv_rts_hdr, ep, rreq->recv.mem_type, rndv_mode)) { + ucp_rndv_do_rkey_ptr(rndv_req, rreq, rndv_rts_hdr); + goto out; + } + + if (UCP_DT_IS_CONTIG(rreq->recv.datatype)) { + if ((rndv_rts_hdr->address != 0) && + (ucp_rndv_is_get_zcopy(rreq->recv.mem_type, rndv_mode)) && + /* is it allowed to use GET Zcopy for the current message? */ + (rndv_rts_hdr->size >= ucp_ep_config(ep)->tag.rndv.min_get_zcopy) && + /* is GET Zcopy operation supported? */ + (ucp_ep_config(ep)->tag.rndv.max_get_zcopy != 0)) { + /* try to fetch the data with a get_zcopy operation */ + ucp_rndv_req_send_rma_get(rndv_req, rreq, rndv_rts_hdr); + goto out; + } else if (rndv_mode == UCP_RNDV_MODE_AUTO) { + /* check if we need pipelined memtype staging */ + if (UCP_MEM_IS_CUDA(rreq->recv.mem_type) && + ucp_rndv_is_recv_pipeline_needed(rndv_req, + rreq->recv.mem_type)) { + ucp_rndv_recv_data_init(rreq, rndv_rts_hdr->size); + ucp_rndv_send_frag_rtr(worker, rndv_req, rreq, rndv_rts_hdr); + goto out; + } + } + /* put protocol is allowed - register receive buffer memory for rma */ + ucs_assert(rndv_rts_hdr->size <= rreq->recv.length); + ucp_request_recv_buffer_reg(rreq, ucp_ep_config(ep)->key.rma_bw_md_map, + rndv_rts_hdr->size); + } + + /* The sender didn't specify its address in the RTS, or the rndv mode was + * configured to PUT, or GET rndv mode is unsupported - send an RTR and + * the sender will send the data with active message or put_zcopy. */ + ucp_rndv_recv_data_init(rreq, rndv_rts_hdr->size); + ucp_rndv_req_send_rtr(rndv_req, rreq, rndv_rts_hdr->sreq.reqptr, + rndv_rts_hdr->size); + +out: + UCS_ASYNC_UNBLOCK(&worker->async); +} + +ucs_status_t ucp_rndv_process_rts(void *arg, void *data, size_t length, + unsigned tl_flags) +{ + ucp_worker_h worker = arg; + ucp_rndv_rts_hdr_t *rndv_rts_hdr = data; + ucp_recv_desc_t *rdesc; + ucp_request_t *rreq; + ucs_status_t status; + + rreq = ucp_tag_exp_search(&worker->tm, rndv_rts_hdr->super.tag); + if (rreq != NULL) { + ucp_rndv_matched(worker, rreq, rndv_rts_hdr); + + /* Cancel req in transport if it was offloaded, because it arrived + as unexpected */ + ucp_tag_offload_try_cancel(worker, rreq, UCP_TAG_OFFLOAD_CANCEL_FORCE); + + UCP_WORKER_STAT_RNDV(worker, EXP); + status = UCS_OK; + } else { + status = ucp_recv_desc_init(worker, data, length, 0, tl_flags, + sizeof(*rndv_rts_hdr), + UCP_RECV_DESC_FLAG_RNDV, 0, &rdesc); + if (!UCS_STATUS_IS_ERR(status)) { + ucp_tag_unexp_recv(&worker->tm, rdesc, rndv_rts_hdr->super.tag); + } + } + + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_rts_handler, + (arg, data, length, tl_flags), + void *arg, void *data, size_t length, unsigned tl_flags) +{ + return ucp_rndv_process_rts(arg, data, length, tl_flags); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_ats_handler, + (arg, data, length, flags), + void *arg, void *data, size_t length, unsigned flags) +{ + ucp_reply_hdr_t *rep_hdr = data; + ucp_request_t *sreq = (ucp_request_t*) rep_hdr->reqptr; + + /* dereg the original send request and set it to complete */ + UCS_PROFILE_REQUEST_EVENT(sreq, "rndv_ats_recv", 0); + if (sreq->flags & UCP_REQUEST_FLAG_OFFLOADED) { + ucp_tag_offload_cancel_rndv(sreq); + } + ucp_rndv_complete_send(sreq, rep_hdr->status); + return UCS_OK; +} + +static size_t ucp_rndv_pack_data(void *dest, void *arg) +{ + ucp_rndv_data_hdr_t *hdr = dest; + ucp_request_t *sreq = arg; + size_t length, offset; + + offset = sreq->send.state.dt.offset; + hdr->rreq_ptr = sreq->send.tag.rreq_ptr; + hdr->offset = offset; + length = ucs_min(sreq->send.length - offset, + ucp_ep_get_max_bcopy(sreq->send.ep, sreq->send.lane) - sizeof(*hdr)); + + return sizeof(*hdr) + ucp_dt_pack(sreq->send.ep->worker, sreq->send.datatype, + sreq->send.mem_type, hdr + 1, sreq->send.buffer, + &sreq->send.state.dt, length); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_progress_am_bcopy, (self), + uct_pending_req_t *self) +{ + ucp_request_t *sreq = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_t *ep = sreq->send.ep; + ucs_status_t status; + + sreq->send.lane = ucp_ep_get_am_lane(ep); + + if (sreq->send.length <= ucp_ep_config(ep)->am.max_bcopy - sizeof(ucp_rndv_data_hdr_t)) { + /* send a single bcopy message */ + status = ucp_do_am_bcopy_single(self, UCP_AM_ID_RNDV_DATA, + ucp_rndv_pack_data); + } else { + status = ucp_do_am_bcopy_multi(self, UCP_AM_ID_RNDV_DATA, + UCP_AM_ID_RNDV_DATA, + ucp_rndv_pack_data, + ucp_rndv_pack_data, 1); + } + if (status == UCS_OK) { + ucp_rndv_complete_send(sreq, UCS_OK); + } else if (status == UCP_STATUS_PENDING_SWITCH) { + status = UCS_OK; + } + + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_progress_rma_put_zcopy, (self), + uct_pending_req_t *self) +{ + ucp_request_t *sreq = ucs_container_of(self, ucp_request_t, send.uct); + const size_t max_iovcnt = 1; + ucp_ep_h ep = sreq->send.ep; + ucs_status_t status; + size_t offset, ucp_mtu, align, remainder, length; + uct_iface_attr_t *attrs; + uct_iov_t iov[max_iovcnt]; + size_t iovcnt; + ucp_dt_state_t state; + + if (!sreq->send.mdesc) { + status = ucp_request_send_buffer_reg_lane(sreq, sreq->send.lane, 0); + ucs_assert_always(status == UCS_OK); + } + + attrs = ucp_worker_iface_get_attr(ep->worker, + ucp_ep_get_rsc_index(ep, sreq->send.lane)); + align = attrs->cap.put.opt_zcopy_align; + ucp_mtu = attrs->cap.put.align_mtu; + + offset = sreq->send.state.dt.offset; + remainder = (uintptr_t)sreq->send.buffer % align; + + if ((offset == 0) && (remainder > 0) && (sreq->send.length > ucp_mtu)) { + length = ucp_mtu - remainder; + } else { + length = ucs_min(sreq->send.length - offset, + ucp_ep_config(ep)->tag.rndv.max_put_zcopy); + } + + ucs_trace_data("req %p: offset %zu remainder %zu. read to %p len %zu", + sreq, offset, (uintptr_t)sreq->send.buffer % align, + UCS_PTR_BYTE_OFFSET(sreq->send.buffer, offset), length); + + state = sreq->send.state.dt; + ucp_dt_iov_copy_uct(ep->worker->context, iov, &iovcnt, max_iovcnt, &state, + sreq->send.buffer, ucp_dt_make_contig(1), length, + ucp_ep_md_index(ep, sreq->send.lane), sreq->send.mdesc); + status = uct_ep_put_zcopy(ep->uct_eps[sreq->send.lane], + iov, iovcnt, + sreq->send.rndv_put.remote_address + offset, + sreq->send.rndv_put.uct_rkey, + &sreq->send.state.uct_comp); + ucp_request_send_state_advance(sreq, &state, + UCP_REQUEST_SEND_PROTO_RNDV_PUT, + status); + if (sreq->send.state.dt.offset == sreq->send.length) { + if (sreq->send.state.uct_comp.count == 0) { + sreq->send.state.uct_comp.func(&sreq->send.state.uct_comp, status); + } + return UCS_OK; + } else if (!UCS_STATUS_IS_ERR(status)) { + return UCS_INPROGRESS; + } else { + return status; + } +} + +static void ucp_rndv_am_zcopy_send_req_complete(ucp_request_t *req, + ucs_status_t status) +{ + ucs_assert(req->send.state.uct_comp.count == 0); + ucp_request_send_buffer_dereg(req); + ucp_request_complete_send(req, status); +} + +static void ucp_rndv_am_zcopy_completion(uct_completion_t *self, + ucs_status_t status) +{ + ucp_request_t *sreq = ucs_container_of(self, ucp_request_t, + send.state.uct_comp); + if (sreq->send.state.dt.offset == sreq->send.length) { + ucp_rndv_am_zcopy_send_req_complete(sreq, status); + } else if (status != UCS_OK) { + ucs_fatal("error handling is unsupported with rendezvous protocol"); + } +} + +static ucs_status_t ucp_rndv_progress_am_zcopy_single(uct_pending_req_t *self) +{ + ucp_request_t *sreq = ucs_container_of(self, ucp_request_t, send.uct); + ucp_rndv_data_hdr_t hdr; + + hdr.rreq_ptr = sreq->send.tag.rreq_ptr; + hdr.offset = 0; + return ucp_do_am_zcopy_single(self, UCP_AM_ID_RNDV_DATA, &hdr, sizeof(hdr), + ucp_rndv_am_zcopy_send_req_complete); +} + +static ucs_status_t ucp_rndv_progress_am_zcopy_multi(uct_pending_req_t *self) +{ + ucp_request_t *sreq = ucs_container_of(self, ucp_request_t, send.uct); + ucp_rndv_data_hdr_t hdr; + + hdr.rreq_ptr = sreq->send.tag.rreq_ptr; + hdr.offset = sreq->send.state.dt.offset; + return ucp_do_am_zcopy_multi(self, + UCP_AM_ID_RNDV_DATA, + UCP_AM_ID_RNDV_DATA, + &hdr, sizeof(hdr), + &hdr, sizeof(hdr), + ucp_rndv_am_zcopy_send_req_complete, 1); +} + +UCS_PROFILE_FUNC_VOID(ucp_rndv_frag_send_put_completion, (self, status), + uct_completion_t *self, ucs_status_t status) +{ + ucp_request_t *freq = ucs_container_of(self, ucp_request_t, send.state.uct_comp); + ucp_request_t *req = freq->send.rndv_put.sreq; + + /* release memory descriptor */ + if (freq->send.mdesc) { + ucs_mpool_put_inline((void *)freq->send.mdesc); + } + + req->send.state.dt.offset += freq->send.length; + ucs_assert(req->send.state.dt.offset <= req->send.length); + + /* send ATP for last fragment of the rndv request */ + if (req->send.length == req->send.state.dt.offset) { + ucp_rndv_send_frag_atp(req, req->send.rndv_put.remote_request); + } + + ucp_request_put(freq); +} + +UCS_PROFILE_FUNC_VOID(ucp_rndv_frag_recv_put_completion, (self, status), + uct_completion_t *self, ucs_status_t status) +{ + ucp_request_t *freq = ucs_container_of(self, ucp_request_t, send.state.uct_comp); + ucp_request_t *req = freq->send.rndv_put.sreq; + + /* release memory descriptor */ + if (freq->send.mdesc) { + ucs_mpool_put_inline((void *)freq->send.mdesc); + } + + /* put completion on mem type endpoint to stage data to recv buffer */ + req->recv.tag.remaining -= freq->send.length; + + ucp_request_put(freq); + + if (req->recv.tag.remaining == 0) { + ucp_request_complete_tag_recv(req, UCS_OK); + } +} + +UCS_PROFILE_FUNC_VOID(ucp_rndv_frag_get_completion, (self, status), + uct_completion_t *self, ucs_status_t status) +{ + ucp_request_t *freq = ucs_container_of(self, ucp_request_t, send.state.uct_comp); + ucp_request_t *fsreq = freq->send.rndv_get.rreq; + + /* get completed on memtype endpoint to stage on host. send put request to receiver*/ + ucp_request_send_state_reset(freq, ucp_rndv_frag_send_put_completion, + UCP_REQUEST_SEND_PROTO_RNDV_PUT); + freq->send.rndv_put.remote_address = fsreq->send.rndv_put.remote_address + + (freq->send.rndv_get.remote_address - (uint64_t)fsreq->send.buffer); + freq->send.ep = fsreq->send.ep; + freq->send.uct.func = ucp_rndv_progress_rma_put_zcopy; + freq->send.rndv_put.sreq = fsreq; + freq->send.rndv_put.rkey = fsreq->send.rndv_put.rkey; + freq->send.rndv_put.uct_rkey = fsreq->send.rndv_put.uct_rkey; + freq->send.lane = fsreq->send.lane; + freq->send.state.dt.dt.contig.md_map = 0; + + ucp_request_send(freq, 0); +} + +static ucs_status_t ucp_rndv_pipeline(ucp_request_t *sreq, ucp_rndv_rtr_hdr_t *rndv_rtr_hdr) +{ + ucp_worker_h worker = sreq->send.ep->worker; + ucp_context_h context = worker->context; + const uct_md_attr_t *md_attr; + ucp_lane_index_t mem_type_rma_lane; + ucp_ep_h mem_type_ep; + ucp_mem_desc_t *mdesc; + ucp_request_t *freq; + ucp_request_t *fsreq; + ucp_rsc_index_t md_index; + ucs_status_t status; + int i, num_frags; + size_t max_frag_size, rndv_size, length; + size_t offset, rndv_base_offset; + + /* check if lane supports host memory, to stage sends through host memory */ + md_attr = ucp_ep_md_attr(sreq->send.ep, sreq->send.lane); + if (!(md_attr->cap.reg_mem_types & UCS_BIT(UCS_MEMORY_TYPE_HOST))) { + return UCS_ERR_UNSUPPORTED; + } + + rndv_size = ucs_min(rndv_rtr_hdr->size, sreq->send.length); + max_frag_size = context->config.ext.rndv_frag_size; + rndv_base_offset = rndv_rtr_hdr->offset; + num_frags = ucs_div_round_up(rndv_size, max_frag_size); + + /* initialize send req state on first fragment rndv request */ + if (rndv_base_offset == 0) { + ucp_request_send_state_reset(sreq, NULL, UCP_REQUEST_SEND_PROTO_RNDV_PUT); + } + + /* internal send request allocated on sender side to handle send fragments for RTR */ + fsreq = ucp_request_get(worker); + if (fsreq == NULL) { + ucs_fatal("failed to allocate fragment receive request"); + } + + ucp_request_send_state_init(fsreq, ucp_dt_make_contig(1), 0); + fsreq->send.buffer = UCS_PTR_BYTE_OFFSET(sreq->send.buffer, + rndv_base_offset); + fsreq->send.length = rndv_size; + fsreq->send.ep = sreq->send.ep; + fsreq->send.lane = sreq->send.lane; + fsreq->send.rndv_put.rkey = sreq->send.rndv_put.rkey; + fsreq->send.rndv_put.uct_rkey = sreq->send.rndv_put.uct_rkey; + fsreq->send.rndv_put.remote_request = rndv_rtr_hdr->rreq_ptr; + fsreq->send.rndv_put.remote_address = rndv_rtr_hdr->address; + fsreq->send.rndv_put.sreq = sreq; + fsreq->send.state.dt.offset = 0; + + offset = 0; + for (i = 0; i < num_frags; i++) { + length = (i == (num_frags - 1)) ? (rndv_size - offset) : max_frag_size; + + /* internal fragment send request allocated on sender side to receive + * mem type fragment stage to host and to perform a put to receiver */ + freq = ucp_request_get(worker); + if (freq == NULL) { + ucs_fatal("failed to allocate fragment receive request"); + } + + if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(sreq->send.mem_type)) { + /* sbuf is in host, directly do put */ + ucp_request_send_state_reset(freq, ucp_rndv_frag_send_put_completion, + UCP_REQUEST_SEND_PROTO_RNDV_PUT); + md_index = ucp_ep_md_index(sreq->send.ep, + sreq->send.lane); + freq->send.ep = fsreq->send.ep; + freq->send.buffer = UCS_PTR_BYTE_OFFSET(fsreq->send.buffer, + offset); + freq->send.datatype = ucp_dt_make_contig(1); + freq->send.mem_type = UCS_MEMORY_TYPE_HOST; + freq->send.state.dt.dt.contig.memh[0] = + ucp_memh_map2uct(sreq->send.state.dt.dt.contig.memh, + sreq->send.state.dt.dt.contig.md_map, md_index); + freq->send.state.dt.dt.contig.md_map = UCS_BIT(md_index); + freq->send.length = length; + freq->send.uct.func = ucp_rndv_progress_rma_put_zcopy; + freq->send.rndv_put.sreq = fsreq; + freq->send.rndv_put.rkey = fsreq->send.rndv_put.rkey; + freq->send.rndv_put.uct_rkey = fsreq->send.rndv_put.uct_rkey; + freq->send.rndv_put.remote_address = rndv_rtr_hdr->address + offset; + freq->send.rndv_put.remote_request = rndv_rtr_hdr->rreq_ptr; + freq->send.lane = fsreq->send.lane; + freq->send.mdesc = NULL; + } else { + /* perform get on memtype endpoint to stage data to host memory */ + mem_type_ep = worker->mem_type_ep[sreq->send.mem_type]; + mem_type_rma_lane = ucp_ep_config(mem_type_ep)->key.rma_bw_lanes[0]; + if (mem_type_rma_lane == UCP_NULL_LANE) { + return UCS_ERR_UNSUPPORTED; + } + + mdesc = ucp_worker_mpool_get(&worker->rndv_frag_mp); + if (mdesc == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + ucp_request_send_state_init(freq, ucp_dt_make_contig(1), 0); + ucp_request_send_state_reset(freq, ucp_rndv_frag_get_completion, + UCP_REQUEST_SEND_PROTO_RNDV_GET); + md_index = ucp_ep_md_index(mem_type_ep, mem_type_rma_lane); + freq->send.ep = mem_type_ep; + freq->send.buffer = mdesc + 1; + freq->send.datatype = ucp_dt_make_contig(1); + freq->send.mem_type = sreq->send.mem_type; + freq->send.state.dt.dt.contig.memh[0] = ucp_memh2uct(mdesc->memh, md_index); + freq->send.state.dt.dt.contig.md_map = UCS_BIT(md_index); + freq->send.length = length; + freq->send.uct.func = ucp_rndv_progress_rma_get_zcopy; + freq->send.rndv_get.rkey = NULL; + freq->send.rndv_get.remote_address = + (uint64_t)UCS_PTR_BYTE_OFFSET(fsreq->send.buffer, offset); + freq->send.rndv_get.lanes_map = 0; + freq->send.rndv_get.lane_count = 0; + freq->send.rndv_get.rreq = fsreq; + freq->send.mdesc = mdesc; + + } + + ucp_request_send(freq, 0); + offset += length; + } + + return UCS_OK; + out: + return status;; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_atp_handler, + (arg, data, length, flags), + void *arg, void *data, size_t length, unsigned flags) +{ + ucp_reply_hdr_t *rep_hdr = data; + ucp_request_t *req = (ucp_request_t*) rep_hdr->reqptr; + ucp_request_t *rreq; + ucp_worker_h worker; + ucp_lane_index_t mem_type_rma_lane; + ucp_mem_desc_t *mdesc; + ucp_rsc_index_t md_index; + ucp_ep_h mem_type_ep; + size_t frag_size, frag_offset; + + if (req->recv.frag.rreq) { + /* atp for fragmented rndv request */ + rreq = req->recv.frag.rreq; + worker = rreq->recv.worker; + frag_size = req->recv.length; + frag_offset = req->recv.frag.offset; + ucs_assert_always(!UCP_MEM_IS_ACCESSIBLE_FROM_CPU(rreq->recv.mem_type)); + + /* perform a put zcopy on memtype endpoint to stage from + * frag recv buffer to memtype recv buffer */ + mem_type_ep = worker->mem_type_ep[rreq->recv.mem_type]; + mem_type_rma_lane = ucp_ep_config(mem_type_ep)->key.rma_bw_lanes[0]; + if (mem_type_rma_lane == UCP_NULL_LANE) { + ucs_fatal("no rma bw lane to stage from stage buffer to" + " memory type recv buffer"); + } + md_index = ucp_ep_md_index(mem_type_ep, mem_type_rma_lane); + mdesc = (ucp_mem_desc_t*) req->recv.buffer - 1; + + ucp_request_send_state_init(req, ucp_dt_make_contig(1), 0); + ucp_request_send_state_reset(req, ucp_rndv_frag_recv_put_completion, + UCP_REQUEST_SEND_PROTO_RNDV_PUT); + req->send.ep = mem_type_ep; + req->send.lane = mem_type_rma_lane; + req->send.buffer = mdesc + 1; + req->send.datatype = ucp_dt_make_contig(1); + req->send.mem_type = rreq->recv.mem_type; + req->send.state.dt.dt.contig.memh[0] = ucp_memh2uct(mdesc->memh, md_index); + req->send.state.dt.dt.contig.md_map = UCS_BIT(md_index); + req->send.length = frag_size; + req->send.uct.func = ucp_rndv_progress_rma_put_zcopy; + req->send.rndv_put.sreq = rreq; + req->send.rndv_put.rkey = NULL; + req->send.rndv_put.remote_address = + (uint64_t)UCS_PTR_BYTE_OFFSET(rreq->recv.buffer, frag_offset); + req->send.mdesc = mdesc; + + ucp_request_send(req, 0); + } else { + UCS_PROFILE_REQUEST_EVENT(req, "rndv_atp_recv", 0); + ucp_rndv_zcopy_recv_req_complete(req, UCS_OK); + } + + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_rtr_handler, + (arg, data, length, flags), + void *arg, void *data, size_t length, unsigned flags) +{ + ucp_rndv_rtr_hdr_t *rndv_rtr_hdr = data; + ucp_request_t *sreq = (ucp_request_t*)rndv_rtr_hdr->sreq_ptr; + ucp_ep_h ep = sreq->send.ep; + ucp_ep_config_t *ep_config = ucp_ep_config(ep); + ucp_context_h context = ep->worker->context; + ucs_status_t status; + int is_pipeline_rndv; + + ucp_trace_req(sreq, "received rtr address 0x%lx remote rreq 0x%lx", + rndv_rtr_hdr->address, rndv_rtr_hdr->rreq_ptr); + UCS_PROFILE_REQUEST_EVENT(sreq, "rndv_rtr_recv", 0); + + if (sreq->flags & UCP_REQUEST_FLAG_OFFLOADED) { + /* Do not deregister memory here, because am zcopy rndv may + * need it registered (if am and tag is the same lane). */ + ucp_tag_offload_cancel_rndv(sreq); + } + + if (UCP_DT_IS_CONTIG(sreq->send.datatype) && rndv_rtr_hdr->address) { + status = ucp_ep_rkey_unpack(ep, rndv_rtr_hdr + 1, + &sreq->send.rndv_put.rkey); + if (status != UCS_OK) { + ucs_fatal("failed to unpack rendezvous remote key received from %s: %s", + ucp_ep_peer_name(ep), ucs_status_string(status)); + } + + is_pipeline_rndv = ((!UCP_MEM_IS_ACCESSIBLE_FROM_CPU(sreq->send.mem_type) || + (sreq->send.length != rndv_rtr_hdr->size)) && + (context->config.ext.rndv_mode != UCP_RNDV_MODE_PUT_ZCOPY)); + + sreq->send.lane = ucp_rkey_find_rma_lane(ep->worker->context, ep_config, + (is_pipeline_rndv ? + sreq->send.rndv_put.rkey->mem_type : + sreq->send.mem_type), + ep_config->tag.rndv.put_zcopy_lanes, + sreq->send.rndv_put.rkey, 0, + &sreq->send.rndv_put.uct_rkey); + if (sreq->send.lane != UCP_NULL_LANE) { + /* + * Try pipeline protocol for non-host memory, if PUT_ZCOPY protocol is + * not explicitly required. If pipeline is UNSUPPORTED, fallback to + * PUT_ZCOPY anyway. + */ + if (is_pipeline_rndv) { + status = ucp_rndv_pipeline(sreq, rndv_rtr_hdr); + if (status != UCS_ERR_UNSUPPORTED) { + return status; + } + /* If we get here, it means that RNDV pipeline protocol is + * unsupported and we have to use PUT_ZCOPY RNDV scheme instead */ + } + + if ((context->config.ext.rndv_mode != UCP_RNDV_MODE_GET_ZCOPY) && + /* is it allowed to use PUT Zcopy for the current message? */ + (sreq->send.length >= ucp_ep_config(ep)->tag.rndv.min_put_zcopy) && + /* is PUT Zcopy operation supported? */ + (ucp_ep_config(ep)->tag.rndv.max_put_zcopy != 0)) { + ucp_request_send_state_reset(sreq, ucp_rndv_put_completion, + UCP_REQUEST_SEND_PROTO_RNDV_PUT); + sreq->send.uct.func = ucp_rndv_progress_rma_put_zcopy; + sreq->send.rndv_put.remote_request = rndv_rtr_hdr->rreq_ptr; + sreq->send.rndv_put.remote_address = rndv_rtr_hdr->address; + sreq->send.mdesc = NULL; + goto out_send; + } else { + ucp_rkey_destroy(sreq->send.rndv_put.rkey); + } + } else { + ucp_rkey_destroy(sreq->send.rndv_put.rkey); + } + } + + /* switch to AM */ + sreq->send.tag.rreq_ptr = rndv_rtr_hdr->rreq_ptr; + + if (UCP_DT_IS_CONTIG(sreq->send.datatype) && + (sreq->send.length >= + ucp_ep_config(ep)->am.mem_type_zcopy_thresh[sreq->send.mem_type])) + { + status = ucp_request_send_buffer_reg_lane(sreq, ucp_ep_get_am_lane(ep), 0); + ucs_assert_always(status == UCS_OK); + + ucp_request_send_state_reset(sreq, ucp_rndv_am_zcopy_completion, + UCP_REQUEST_SEND_PROTO_ZCOPY_AM); + + if ((sreq->send.length + sizeof(ucp_rndv_data_hdr_t)) <= + ucp_ep_config(ep)->am.max_zcopy) { + sreq->send.uct.func = ucp_rndv_progress_am_zcopy_single; + } else { + sreq->send.uct.func = ucp_rndv_progress_am_zcopy_multi; + sreq->send.tag.am_bw_index = 1; + } + } else { + ucp_request_send_state_reset(sreq, NULL, UCP_REQUEST_SEND_PROTO_BCOPY_AM); + sreq->send.uct.func = ucp_rndv_progress_am_bcopy; + sreq->send.tag.am_bw_index = 1; + } + +out_send: + ucp_request_send(sreq, 0); + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_data_handler, + (arg, data, length, flags), + void *arg, void *data, size_t length, unsigned flags) +{ + ucp_rndv_data_hdr_t *rndv_data_hdr = data; + ucp_request_t *rreq = (ucp_request_t*) rndv_data_hdr->rreq_ptr; + size_t recv_len; + + recv_len = length - sizeof(*rndv_data_hdr); + UCS_PROFILE_REQUEST_EVENT(rreq, "rndv_data_recv", recv_len); + + (void)ucp_tag_request_process_recv_data(rreq, rndv_data_hdr + 1, recv_len, + rndv_data_hdr->offset, 1, 0); + return UCS_OK; +} + +static void ucp_rndv_dump_rkey(const void *packed_rkey, char *buffer, size_t max) +{ + char *p = buffer; + char *endp = buffer + max; + + snprintf(p, endp - p, " rkey "); + p += strlen(p); + + ucp_rkey_dump_packed(packed_rkey, p, endp - p); +} + +static void ucp_rndv_dump(ucp_worker_h worker, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max) +{ + + const ucp_rndv_rts_hdr_t *rndv_rts_hdr = data; + const ucp_rndv_rtr_hdr_t *rndv_rtr_hdr = data; + const ucp_rndv_data_hdr_t *rndv_data = data; + const ucp_reply_hdr_t *rep_hdr = data; + + switch (id) { + case UCP_AM_ID_RNDV_RTS: + ucs_assert(rndv_rts_hdr->sreq.ep_ptr != 0); + snprintf(buffer, max, "RNDV_RTS tag %"PRIx64" ep_ptr %lx sreq 0x%lx " + "address 0x%"PRIx64" size %zu", rndv_rts_hdr->super.tag, + rndv_rts_hdr->sreq.ep_ptr, rndv_rts_hdr->sreq.reqptr, + rndv_rts_hdr->address, rndv_rts_hdr->size); + if (rndv_rts_hdr->address) { + ucp_rndv_dump_rkey(rndv_rts_hdr + 1, buffer + strlen(buffer), + max - strlen(buffer)); + } + break; + case UCP_AM_ID_RNDV_ATS: + snprintf(buffer, max, "RNDV_ATS sreq 0x%lx status '%s'", + rep_hdr->reqptr, ucs_status_string(rep_hdr->status)); + break; + case UCP_AM_ID_RNDV_RTR: + snprintf(buffer, max, "RNDV_RTR sreq 0x%lx rreq 0x%lx address 0x%lx", + rndv_rtr_hdr->sreq_ptr, rndv_rtr_hdr->rreq_ptr, + rndv_rtr_hdr->address); + if (rndv_rtr_hdr->address) { + ucp_rndv_dump_rkey(rndv_rtr_hdr + 1, buffer + strlen(buffer), + max - strlen(buffer)); + } + break; + case UCP_AM_ID_RNDV_DATA: + snprintf(buffer, max, "RNDV_DATA rreq 0x%"PRIx64" offset %zu", + rndv_data->rreq_ptr, rndv_data->offset); + break; + case UCP_AM_ID_RNDV_ATP: + snprintf(buffer, max, "RNDV_ATP sreq 0x%lx status '%s'", + rep_hdr->reqptr, ucs_status_string(rep_hdr->status)); + break; + default: + return; + } +} + +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_RNDV_RTS, ucp_rndv_rts_handler, + ucp_rndv_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_RNDV_ATS, ucp_rndv_ats_handler, + ucp_rndv_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_RNDV_ATP, ucp_rndv_atp_handler, + ucp_rndv_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_RNDV_RTR, ucp_rndv_rtr_handler, + ucp_rndv_dump, 0); +UCP_DEFINE_AM(UCP_FEATURE_TAG, UCP_AM_ID_RNDV_DATA, ucp_rndv_data_handler, + ucp_rndv_dump, 0); + +UCP_DEFINE_AM_PROXY(UCP_AM_ID_RNDV_RTS); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_RNDV_ATS); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_RNDV_ATP); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_RNDV_RTR); +UCP_DEFINE_AM_PROXY(UCP_AM_ID_RNDV_DATA); diff --git a/src/ucp/tag/rndv.h b/src/ucp/tag/rndv.h new file mode 100644 index 0000000..5a1f1f4 --- /dev/null +++ b/src/ucp/tag/rndv.h @@ -0,0 +1,72 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_TAG_RNDV_H_ +#define UCP_TAG_RNDV_H_ + +#include "tag_match.h" + +#include +#include +#include + + +/* + * Rendezvous RTS + */ +typedef struct { + ucp_tag_hdr_t super; + ucp_request_hdr_t sreq; /* send request on the rndv initiator side */ + uint64_t address; /* holds the address of the data buffer on the sender's side */ + size_t size; /* size of the data for sending */ + /* packed rkeys follow */ +} UCS_S_PACKED ucp_rndv_rts_hdr_t; + +/* + * Rendezvous RTR + */ +typedef struct { + uintptr_t sreq_ptr; /* request on the rndv initiator side - sender */ + uintptr_t rreq_ptr; /* request on the rndv receiver side */ + uint64_t address; /* holds the address of the data buffer on the receiver's side */ + size_t size; /* size of the data to receive */ + size_t offset; /* offset of the data in the recv buffer */ + /* packed rkeys follow */ +} UCS_S_PACKED ucp_rndv_rtr_hdr_t; + +/* + * RNDV_DATA + */ +typedef struct { + uintptr_t rreq_ptr; /* request on the rndv receiver side */ + size_t offset; +} UCS_S_PACKED ucp_rndv_data_hdr_t; + + +ucs_status_t ucp_tag_send_start_rndv(ucp_request_t *req); + +void ucp_rndv_matched(ucp_worker_h worker, ucp_request_t *req, + const ucp_rndv_rts_hdr_t *rndv_rts_hdr); + +ucs_status_t ucp_rndv_progress_rma_get_zcopy(uct_pending_req_t *self); + +ucs_status_t ucp_rndv_process_rts(void *arg, void *data, size_t length, + unsigned tl_flags); + +size_t ucp_tag_rndv_rts_pack(void *dest, void *arg); + +ucs_status_t ucp_tag_rndv_reg_send_buffer(ucp_request_t *sreq); + +static UCS_F_ALWAYS_INLINE int ucp_rndv_is_get_zcopy(ucs_memory_type_t mem_type, + ucp_rndv_mode_t rndv_mode) +{ + return ((rndv_mode == UCP_RNDV_MODE_GET_ZCOPY) || + ((rndv_mode == UCP_RNDV_MODE_AUTO) && + (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type) || + UCP_MEM_IS_ROCM(mem_type)))); +} + +#endif diff --git a/src/ucp/tag/tag_match.c b/src/ucp/tag/tag_match.c new file mode 100644 index 0000000..5b2d47c --- /dev/null +++ b/src/ucp/tag/tag_match.c @@ -0,0 +1,174 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "tag_match.inl" +#include + + +ucs_status_t ucp_tag_match_init(ucp_tag_match_t *tm) +{ + size_t hash_size, bucket; + + hash_size = ucs_roundup_pow2(UCP_TAG_MATCH_HASH_SIZE); + + tm->expected.sn = 0; + tm->expected.sw_all_count = 0; + ucs_queue_head_init(&tm->expected.wildcard.queue); + ucs_list_head_init(&tm->unexpected.all); + + tm->expected.hash = ucs_malloc(sizeof(*tm->expected.hash) * hash_size, + "ucp_tm_exp_hash"); + if (tm->expected.hash == NULL) { + return UCS_ERR_NO_MEMORY; + } + + tm->unexpected.hash = ucs_malloc(sizeof(*tm->unexpected.hash) * hash_size, + "ucp_tm_unexp_hash"); + if (tm->unexpected.hash == NULL) { + ucs_free(tm->expected.hash); + return UCS_ERR_NO_MEMORY; + } + + for (bucket = 0; bucket < hash_size; ++bucket) { + tm->expected.hash[bucket].sw_count = 0; + tm->expected.hash[bucket].block_count = 0; + ucs_queue_head_init(&tm->expected.hash[bucket].queue); + ucs_list_head_init(&tm->unexpected.hash[bucket]); + } + + kh_init_inplace(ucp_tag_frag_hash, &tm->frag_hash); + ucs_queue_head_init(&tm->offload.sync_reqs); + kh_init_inplace(ucp_tag_offload_hash, &tm->offload.tag_hash); + tm->offload.thresh = SIZE_MAX; + tm->offload.zcopy_thresh = SIZE_MAX; + tm->offload.iface = NULL; + return UCS_OK; +} + +void ucp_tag_match_cleanup(ucp_tag_match_t *tm) +{ + kh_destroy_inplace(ucp_tag_offload_hash, &tm->offload.tag_hash); + kh_destroy_inplace(ucp_tag_frag_hash, &tm->frag_hash); + ucs_free(tm->unexpected.hash); + ucs_free(tm->expected.hash); +} + +int ucp_tag_unexp_is_empty(ucp_tag_match_t *tm) +{ + return ucs_list_is_empty(&tm->unexpected.all); +} + +int ucp_tag_exp_remove(ucp_tag_match_t *tm, ucp_request_t *req) +{ + ucp_request_queue_t *req_queue = ucp_tag_exp_get_req_queue(tm, req); + ucs_queue_iter_t iter; + ucp_request_t *qreq; + + ucs_queue_for_each_safe(qreq, iter, &req_queue->queue, recv.queue) { + if (qreq == req) { + ucp_tag_offload_try_cancel(req->recv.worker, req, 0); + ucp_tag_exp_delete(req, tm, req_queue, iter); + return 1; + } + } + + ucs_assert(!(req->flags & UCP_REQUEST_FLAG_COMPLETED)); + ucs_trace_req("can't remove req %p (already matched)", req); + + return 0; +} + +static inline uint64_t ucp_tag_exp_req_seq(ucs_queue_iter_t iter) +{ + return (*iter == NULL) ? ULONG_MAX : + ucs_container_of(*iter, ucp_request_t, recv.queue)->recv.tag.sn; +} + +ucp_request_t* +ucp_tag_exp_search_all(ucp_tag_match_t *tm, ucp_request_queue_t *req_queue, + ucp_tag_t tag) +{ + ucs_queue_head_t *hash_queue = &req_queue->queue; + ucp_request_queue_t *queue; + ucs_queue_iter_t hash_iter, wild_iter, *iter; + uint64_t hash_sn, wild_sn, *sn_p; + ucp_request_t *req; + + *hash_queue->ptail = NULL; + *tm->expected.wildcard.queue.ptail = NULL; + + hash_iter = ucs_queue_iter_begin(hash_queue); + wild_iter = ucs_queue_iter_begin(&tm->expected.wildcard.queue); + + hash_sn = ucp_tag_exp_req_seq(hash_iter); + wild_sn = ucp_tag_exp_req_seq(wild_iter); + + while (hash_sn != wild_sn) { + if (hash_sn < wild_sn) { + iter = &hash_iter; + sn_p = &hash_sn; + queue = req_queue; + } else { + iter = &wild_iter; + sn_p = &wild_sn; + queue = &tm->expected.wildcard; + } + + req = ucs_container_of(**iter, ucp_request_t, recv.queue); + if (ucp_tag_is_match(tag, req->recv.tag.tag, req->recv.tag.tag_mask)) { + ucs_trace_req("matched received tag %"PRIx64" to req %p", tag, req); + ucp_tag_exp_delete(req, tm, queue, *iter); + return req; + } + + *iter = ucs_queue_iter_next(*iter); + *sn_p = ucp_tag_exp_req_seq(*iter); + } + + ucs_assertv((hash_sn == ULONG_MAX) && (wild_sn == ULONG_MAX), + "hash_seq=%lu wild_seq=%lu", hash_sn, wild_sn); + ucs_assert(ucs_queue_iter_end(hash_queue, hash_iter)); + ucs_assert(ucs_queue_iter_end(&tm->expected.wildcard.queue, wild_iter)); + return NULL; +} + +void ucp_tag_frag_list_process_queue(ucp_tag_match_t *tm, ucp_request_t *req, + uint64_t msg_id UCS_STATS_ARG(int counter_idx)) +{ + ucp_eager_middle_hdr_t *hdr; + ucp_tag_frag_match_t *matchq; + ucp_recv_desc_t *rdesc; + ucs_status_t status; + khiter_t iter; + int ret; + + iter = kh_put(ucp_tag_frag_hash, &tm->frag_hash, msg_id, &ret); + matchq = &kh_value(&tm->frag_hash, iter); + if (ret == 0) { + status = UCS_INPROGRESS; + ucs_assert(ucp_tag_frag_match_is_unexp(matchq)); + ucs_queue_for_each_extract(rdesc, &matchq->unexp_q, tag_frag_queue, + status == UCS_INPROGRESS) { + UCS_STATS_UPDATE_COUNTER(req->recv.worker->stats, counter_idx, 1); + hdr = (void*)(rdesc + 1); + status = ucp_tag_recv_request_process_rdesc(req, rdesc, hdr->offset); + } + ucs_assert(ucs_queue_is_empty(&matchq->unexp_q)); + + /* if we completed the request, delete hash entry */ + if (status != UCS_INPROGRESS) { + kh_del(ucp_tag_frag_hash, &tm->frag_hash, iter); + return; + } + } + + /* request not completed, put it on the hash */ + ucp_tag_frag_hash_init_exp(matchq, req); +} diff --git a/src/ucp/tag/tag_match.h b/src/ucp/tag/tag_match.h new file mode 100644 index 0000000..b653658 --- /dev/null +++ b/src/ucp/tag/tag_match.h @@ -0,0 +1,120 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_TAG_MATCH_H_ +#define UCP_TAG_MATCH_H_ + +#include +#include +#include +#include +#include +#include + + +#define UCP_TAG_MASK_FULL 0xffffffffffffffffUL /* All 1-s */ + + +KHASH_INIT(ucp_tag_offload_hash, ucp_tag_t, ucp_worker_iface_t *, 1, + kh_int64_hash_func, kh_int64_hash_equal); + + +/** + * Tag-match header + */ +typedef struct { + ucp_tag_t tag; +} UCS_S_PACKED ucp_tag_hdr_t; + + +/** + * Queue of expected requests + */ +typedef struct { + ucs_queue_head_t queue; /* Requests queue */ + unsigned sw_count; /* Number of requests in this queue which + are not posted to offload */ + unsigned block_count; /* Number of requests which can't be + posted to offload. */ +} ucp_request_queue_t; + + +/** + * Hash table entry for tag message fragments + */ +typedef union { + ucs_queue_head_t unexp_q; /* Queue of unexpected descriptors */ + ucp_request_t *exp_req; /* Expected request */ +} ucp_tag_frag_match_t; + + +KHASH_INIT(ucp_tag_frag_hash, uint64_t, ucp_tag_frag_match_t, 1, + kh_int64_hash_func, kh_int64_hash_equal); + + +/** + * Tag-matching context + */ +typedef struct ucp_tag_match { + + /* Expected queue */ + struct { + ucp_request_queue_t wildcard; /* Expected wildcard requests */ + ucp_request_queue_t *hash; /* Hash table of expected non-wild tags */ + uint64_t sn; + unsigned sw_all_count; /* Number of all expected requests which + are not posted to offload */ + } expected; + + /* Unexpected queue */ + struct { + ucs_list_link_t all; /* Linked list of all tags */ + ucs_list_link_t *hash; /* Hash table of unexpected tags */ + } unexpected; + + /* Hash for fragment assembly, the key is a globally unique tag message id */ + khash_t(ucp_tag_frag_hash) frag_hash; + + /* Tag offload fields */ + struct { + ucs_queue_head_t sync_reqs; /* Outgoing sync send requests */ + khash_t(ucp_tag_offload_hash) tag_hash; /* Hash table of offload ifaces */ + ucp_worker_iface_t *iface; /* Active offload iface (relevant if just + one iface is activated on the worker, + otherwise hash should be used) */ + size_t thresh; /* Minimal receive buffer size to be + used with tag-matching offload. */ + size_t zcopy_thresh; /* Minimal size of user-provided + receive buffer to be passed + directly to tag-matching offload + on the transport. Buffers smaller + than this threshold would either + bounce to UCP internal buffers, + or not be used with tag-matching + offload at all, according to + 'thresh' configuration. */ + } offload; + +} ucp_tag_match_t; + + +ucs_status_t ucp_tag_match_init(ucp_tag_match_t *tm); + +void ucp_tag_match_cleanup(ucp_tag_match_t *tm); + +int ucp_tag_exp_remove(ucp_tag_match_t *tm, ucp_request_t *req); + +int ucp_tag_unexp_is_empty(ucp_tag_match_t *tm); + +ucp_request_t* +ucp_tag_exp_search_all(ucp_tag_match_t *tm, ucp_request_queue_t *req_queue, + ucp_tag_t tag); + +void ucp_tag_frag_list_process_queue(ucp_tag_match_t *tm, ucp_request_t *req, + uint64_t msg_id + UCS_STATS_ARG(int counter_idx)); + +#endif diff --git a/src/ucp/tag/tag_match.inl b/src/ucp/tag/tag_match.inl new file mode 100644 index 0000000..b6973a5 --- /dev/null +++ b/src/ucp/tag/tag_match.inl @@ -0,0 +1,396 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_TAG_MATCH_INL_ +#define UCP_TAG_MATCH_INL_ + +#include "tag_match.h" +#include "eager.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Hash size is a prime number just below 1024. Prime number for even distribution, + * and small enough to fit L1 cache. */ +#define UCP_TAG_MATCH_HASH_SIZE 1021 + + +static UCS_F_ALWAYS_INLINE +int ucp_tag_is_specific_source(ucp_context_t *context, ucp_tag_t tag_mask) +{ + return ((context->config.tag_sender_mask & tag_mask) == + context->config.tag_sender_mask); +} + +static UCS_F_ALWAYS_INLINE +int ucp_tag_is_match(ucp_tag_t tag, ucp_tag_t exp_tag, ucp_tag_t tag_mask) +{ + /* The bits in which expected and actual tag differ, should not fall + * inside the mask. + */ + return ((tag ^ exp_tag) & tag_mask) == 0; +} + +static UCS_F_ALWAYS_INLINE size_t +ucp_tag_match_calc_hash(ucp_tag_t tag) +{ + /* Compute two 32-bit modulo and combine their result */ + return ((uint32_t)tag % UCP_TAG_MATCH_HASH_SIZE) ^ + ((uint32_t)(tag >> 32) % UCP_TAG_MATCH_HASH_SIZE); +} + +static UCS_F_ALWAYS_INLINE ucp_request_queue_t* +ucp_tag_exp_get_queue_for_tag(ucp_tag_match_t *tm, ucp_tag_t tag) +{ + return &tm->expected.hash[ucp_tag_match_calc_hash(tag)]; +} + +static UCS_F_ALWAYS_INLINE ucp_request_queue_t* +ucp_tag_exp_get_queue(ucp_tag_match_t *tm, ucp_tag_t tag, ucp_tag_t tag_mask) +{ + if (tag_mask == UCP_TAG_MASK_FULL) { + return ucp_tag_exp_get_queue_for_tag(tm, tag); + } else { + return &tm->expected.wildcard; + } +} + +static UCS_F_ALWAYS_INLINE ucp_request_queue_t* +ucp_tag_exp_get_req_queue(ucp_tag_match_t *tm, ucp_request_t *req) +{ + return ucp_tag_exp_get_queue(tm, req->recv.tag.tag, req->recv.tag.tag_mask); +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_exp_push(ucp_tag_match_t *tm, ucp_request_queue_t *req_queue, + ucp_request_t *req) +{ + req->recv.tag.sn = tm->expected.sn++; + ucs_queue_push(&req_queue->queue, &req->recv.queue); +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_exp_add(ucp_tag_match_t *tm, ucp_request_t *req) +{ + ucp_tag_exp_push(tm, ucp_tag_exp_get_req_queue(tm, req), req); +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_exp_delete(ucp_request_t *req, ucp_tag_match_t *tm, + ucp_request_queue_t *req_queue, ucs_queue_iter_t iter) +{ + if (!(req->flags & UCP_REQUEST_FLAG_OFFLOADED)) { + --tm->expected.sw_all_count; + --req_queue->sw_count; + if (req->flags & UCP_REQUEST_FLAG_BLOCK_OFFLOAD) { + --req_queue->block_count; + } + } + ucs_queue_del_iter(&req_queue->queue, iter); +} + +static UCS_F_ALWAYS_INLINE ucp_request_t * +ucp_tag_exp_search(ucp_tag_match_t *tm, ucp_tag_t tag) +{ + ucp_request_queue_t *req_queue; + ucs_queue_iter_t iter; + ucp_request_t *req; + + if (ucs_unlikely(!ucs_queue_is_empty(&tm->expected.wildcard.queue))) { + req_queue = ucp_tag_exp_get_queue_for_tag(tm, tag); + return ucp_tag_exp_search_all(tm, req_queue, tag); + } + + /* fast path - wildcard queue is empty, search only the specific queue */ + req_queue = ucp_tag_exp_get_queue_for_tag(tm, tag); + ucs_queue_for_each_safe(req, iter, &req_queue->queue, recv.queue) { + req = ucs_container_of(*iter, ucp_request_t, recv.queue); + ucs_trace_data("checking req %p tag %"PRIx64"/%"PRIx64" with tag %"PRIx64, + req, req->recv.tag.tag, req->recv.tag.tag_mask, tag); + if (ucp_tag_is_match(tag, req->recv.tag.tag, req->recv.tag.tag_mask)) { + ucs_trace_req("matched received tag %"PRIx64" to req %p", tag, req); + ucp_tag_exp_delete(req, tm, req_queue, iter); + return req; + } + } + return NULL; +} + +static UCS_F_ALWAYS_INLINE ucp_tag_t ucp_rdesc_get_tag(ucp_recv_desc_t *rdesc) +{ + return ((ucp_tag_hdr_t*)(rdesc + 1))->tag; +} + +static UCS_F_ALWAYS_INLINE ucs_list_link_t* +ucp_tag_unexp_get_list_for_tag(ucp_tag_match_t *tm, ucp_tag_t tag) +{ + return &tm->unexpected.hash[ucp_tag_match_calc_hash(tag)]; +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_unexp_remove(ucp_recv_desc_t *rdesc) +{ + ucs_list_del(&rdesc->tag_list[UCP_RDESC_HASH_LIST]); + ucs_list_del(&rdesc->tag_list[UCP_RDESC_ALL_LIST] ); +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_unexp_recv(ucp_tag_match_t *tm, ucp_recv_desc_t *rdesc, ucp_tag_t tag) +{ + ucs_list_link_t *hash_list; + + hash_list = ucp_tag_unexp_get_list_for_tag(tm, tag); + ucs_list_add_tail(hash_list, &rdesc->tag_list[UCP_RDESC_HASH_LIST]); + ucs_list_add_tail(&tm->unexpected.all, &rdesc->tag_list[UCP_RDESC_ALL_LIST]); + + ucs_trace_req("unexp "UCP_RECV_DESC_FMT" tag %"PRIx64, + UCP_RECV_DESC_ARG(rdesc), tag); +} + +static UCS_F_ALWAYS_INLINE ucp_recv_desc_t* +ucp_tag_unexp_list_next(ucp_recv_desc_t *rdesc, int i_list) +{ + return ucs_list_next(&rdesc->tag_list[i_list], ucp_recv_desc_t, + tag_list[i_list]); +} + +/* search unexpected queue for tag/mask, if found return the received desc, + * otherwise return NULL + */ +static UCS_F_ALWAYS_INLINE ucp_recv_desc_t* +ucp_tag_unexp_search(ucp_tag_match_t *tm, ucp_tag_t tag, uint64_t tag_mask, + int remove, const char *title) +{ + ucp_recv_desc_t *rdesc; + ucs_list_link_t *list; + int i_list; + + /* fast check of global unexpected queue */ + if (ucs_list_is_empty(&tm->unexpected.all)) { + return NULL; + } + + if (tag_mask == UCP_TAG_MASK_FULL) { + list = ucp_tag_unexp_get_list_for_tag(tm, tag); + if (ucs_list_is_empty(list)) { + return NULL; + } + i_list = UCP_RDESC_HASH_LIST; + } else { + list = &tm->unexpected.all; + i_list = UCP_RDESC_ALL_LIST; + } + + rdesc = ucs_list_head(list, ucp_recv_desc_t, tag_list[i_list]); + do { + ucs_trace_req("searching for tag %"PRIx64"/%"PRIx64" " + "checking "UCP_RECV_DESC_FMT" tag %"PRIx64, + tag, tag_mask, UCP_RECV_DESC_ARG(rdesc), + ucp_rdesc_get_tag(rdesc)); + if (ucp_tag_is_match(ucp_rdesc_get_tag(rdesc), tag, tag_mask)) { + ucs_trace_req("matched unexp rdesc " UCP_RECV_DESC_FMT " to " + "%s tag %"PRIx64"/%"PRIx64, UCP_RECV_DESC_ARG(rdesc), + title, tag, tag_mask); + if (remove) { + ucp_tag_unexp_remove(rdesc); + } + return rdesc; + } + + rdesc = ucp_tag_unexp_list_next(rdesc, i_list); + } while (&rdesc->tag_list[i_list] != list); + + return NULL; +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_recv_request_release_non_contig_buffer(ucp_request_t *req) +{ + ucs_assert(!UCP_DT_IS_CONTIG(req->recv.datatype)); + ucs_free(req->recv.tag.non_contig_buf); + req->recv.tag.non_contig_buf = NULL; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_request_recv_offload_data(ucp_request_t *req, const void *data, + size_t length, unsigned recv_flags) +{ + ucs_status_t status = UCS_OK; + size_t offset; + ucp_offload_last_ssend_hdr_t *priv; + + /* Should be used in multi-fragmented flow only */ + ucs_assert(!(recv_flags & UCP_RECV_DESC_FLAG_EAGER_ONLY)); + + if (ucs_test_all_flags(recv_flags, UCP_RECV_DESC_FLAG_EAGER_LAST | + UCP_RECV_DESC_FLAG_EAGER_SYNC)) { + priv = (ucp_offload_last_ssend_hdr_t*)UCS_PTR_BYTE_OFFSET(data, + -sizeof(*priv)); + ucp_tag_offload_sync_send_ack(req->recv.worker, priv->ssend_ack.ep_ptr, + priv->ssend_ack.sender_tag, recv_flags); + } + + if (ucs_unlikely(req->status != UCS_OK)) { + return req->status; + } + + /* There is no correct offset in middle headers with tag offload flow. + * All fragments are always in order - can calculate offset by + * subtraction of already received data. + * NOTE: total length of unexpected eager offload message is not known + * until last fragment arrives, so it is initialized to SIZE_MAX. */ + offset = SIZE_MAX - req->recv.tag.remaining; + + if (ucs_unlikely(req->recv.length < (length + offset))) { + /* We have to release non-contig buffer only in case of + * this is not the first segment and the datatype is + * non-contig */ + if ((offset != 0) && !UCP_DT_IS_CONTIG(req->recv.datatype)) { + ucp_tag_recv_request_release_non_contig_buffer(req); + } + return ucp_request_recv_msg_truncated(req, length, offset); + } + + if (UCP_DT_IS_CONTIG(req->recv.datatype)) { + ucp_request_unpack_contig(req, + UCS_PTR_BYTE_OFFSET(req->recv.buffer, offset), + data, length); + } else { + /* For non-contig data need to assemble the whole message + * before calling unpack. */ + if (offset == 0) { + req->recv.tag.non_contig_buf = ucs_malloc(req->recv.length, + "tag gen buffer"); + if (ucs_unlikely(req->recv.tag.non_contig_buf == NULL)) { + return UCS_ERR_NO_MEMORY; + } + } + + ucp_request_unpack_contig(req, + UCS_PTR_BYTE_OFFSET(req->recv.tag.non_contig_buf, + offset), + data, length); + } + + if (recv_flags & UCP_RECV_DESC_FLAG_EAGER_LAST) { + /* Need to update recv info length. In tag offload protocol we do not + * know the total message length until the last fragment arrives. */ + req->recv.tag.info.length = offset + length; + + if (!UCP_DT_IS_CONTIG(req->recv.datatype)) { + status = ucp_request_recv_data_unpack(req, req->recv.tag.non_contig_buf, + req->recv.tag.info.length, + 0, 1); + ucp_tag_recv_request_release_non_contig_buffer(req); + } + } + + return status; +} + +/* + * process data, complete receive if done + * @return UCS_OK/ERR - completed, UCS_INPROGRESS - not completed + */ +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_tag_request_process_recv_data(ucp_request_t *req, const void *data, + size_t length, size_t offset, int dereg, + unsigned recv_flags) +{ + ucs_status_t status; + int last; + + if (recv_flags & UCP_RECV_DESC_FLAG_EAGER_OFFLOAD) { + req->status = ucp_request_recv_offload_data(req, data, length, + recv_flags); + + last = recv_flags & UCP_RECV_DESC_FLAG_EAGER_LAST; + } else { + last = req->recv.tag.remaining == length; + + /* process data only if the request is not in error state */ + if (ucs_likely(req->status == UCS_OK)) { + req->status = ucp_request_recv_data_unpack(req, data, length, + offset, last); + } + ucs_assert(req->recv.tag.remaining >= length); + } + + req->recv.tag.remaining -= length; + + if (last) { + status = req->status; + if (dereg) { + ucp_request_recv_buffer_dereg(req); + } + ucp_request_complete_tag_recv(req, status); + ucs_assert(status != UCS_INPROGRESS); + return status; + } else { + return UCS_INPROGRESS; + } +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_tag_recv_request_process_rdesc(ucp_request_t *req, ucp_recv_desc_t *rdesc, + size_t offset) +{ + size_t hdr_len, recv_len; + ucs_status_t status; + + hdr_len = rdesc->payload_offset; + recv_len = rdesc->length - hdr_len; + status = ucp_tag_request_process_recv_data(req, + UCS_PTR_BYTE_OFFSET(rdesc + 1, hdr_len), + recv_len, offset, 0, rdesc->flags); + ucp_recv_desc_release(rdesc); + return status; +} + +static UCS_F_ALWAYS_INLINE int +ucp_tag_frag_match_is_unexp(ucp_tag_frag_match_t *frag_list) +{ + /* Hack to reduce memory usage: instead of adding another field to specify + * which union field is valid, assume that when the unexpected queue field + * is valid, its ptail field could never be NULL */ + return frag_list->unexp_q.ptail != NULL; +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_frag_match_add_unexp(ucp_tag_frag_match_t *frag_list, ucp_recv_desc_t *rdesc, + size_t offset) +{ + ucs_trace_req("unexp frag "UCP_RECV_DESC_FMT" offset %zu", + UCP_RECV_DESC_ARG(rdesc), offset); + ucs_assert(ucp_tag_frag_match_is_unexp(frag_list)); + ucs_queue_push(&frag_list->unexp_q, &rdesc->tag_frag_queue); +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_frag_match_init_unexp(ucp_tag_frag_match_t *frag_list) +{ + ucs_queue_head_init(&frag_list->unexp_q); + ucs_assert(ucp_tag_frag_match_is_unexp(frag_list)); +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_frag_hash_init_exp(ucp_tag_frag_match_t *frag_list, ucp_request_t *req) +{ + UCS_STATIC_ASSERT(ucs_offsetof(ucp_tag_frag_match_t, unexp_q.ptail) >= + ucs_offsetof(ucp_tag_frag_match_t, exp_req) + sizeof(frag_list->exp_req)); + frag_list->exp_req = req; + frag_list->unexp_q.ptail = NULL; + ucs_assert(!ucp_tag_frag_match_is_unexp(frag_list)); +} + +#endif diff --git a/src/ucp/tag/tag_recv.c b/src/ucp/tag/tag_recv.c new file mode 100644 index 0000000..dfcdf52 --- /dev/null +++ b/src/ucp/tag/tag_recv.c @@ -0,0 +1,233 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "eager.h" +#include "rndv.h" +#include "tag_match.inl" +#include "offload.h" + +#include +#include +#include +#include + + +static UCS_F_ALWAYS_INLINE void +ucp_tag_recv_request_completed(ucp_request_t *req, ucs_status_t status, + ucp_tag_recv_info_t *info, const char *function) +{ + ucs_trace_req("%s returning completed request %p (%p) stag 0x%"PRIx64" len %zu, %s", + function, req, req + 1, info->sender_tag, info->length, + ucs_status_string(status)); + + req->status = status; + if ((req->flags |= UCP_REQUEST_FLAG_COMPLETED) & UCP_REQUEST_FLAG_RELEASED) { + ucp_request_put(req); + } + UCS_PROFILE_REQUEST_EVENT(req, "complete_recv", 0); +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_recv_common(ucp_worker_h worker, void *buffer, size_t count, + uintptr_t datatype, ucp_tag_t tag, ucp_tag_t tag_mask, + ucp_request_t *req, uint32_t req_flags, ucp_tag_recv_callback_t cb, + ucp_recv_desc_t *rdesc, const char *debug_name) +{ + unsigned common_flags = UCP_REQUEST_FLAG_RECV | UCP_REQUEST_FLAG_EXPECTED; + ucp_eager_first_hdr_t *eagerf_hdr; + ucp_request_queue_t *req_queue; + ucs_memory_type_t mem_type; + size_t hdr_len, recv_len; + ucs_status_t status; + uint64_t msg_id; + + ucp_trace_req(req, "%s buffer %p dt 0x%lx count %zu tag %"PRIx64"/%"PRIx64, + debug_name, buffer, datatype, count, tag, tag_mask); + + /* First, check the fast path case - single fragment + * in this case avoid initializing most of request fields + * */ + if (ucs_likely((rdesc != NULL) && (rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_ONLY))) { + UCS_PROFILE_REQUEST_EVENT(req, "eager_only_match", 0); + UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags); + UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP); + + if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) { + ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags); + } + + req->flags = UCP_REQUEST_FLAG_RECV | req_flags; + hdr_len = rdesc->payload_offset; + recv_len = rdesc->length - hdr_len; + req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc); + req->recv.tag.info.length = recv_len; + mem_type = ucp_memory_type_detect(worker->context, + buffer, recv_len); + + status = ucp_dt_unpack_only(worker, buffer, count, datatype, mem_type, + UCS_PTR_BYTE_OFFSET(rdesc + 1, hdr_len), + recv_len, 1); + ucp_recv_desc_release(rdesc); + + if (req_flags & UCP_REQUEST_FLAG_CALLBACK) { + cb(req + 1, status, &req->recv.tag.info); + } + ucp_tag_recv_request_completed(req, status, &req->recv.tag.info, + debug_name); + return; + } + + /* Initialize receive request */ + req->status = UCS_OK; + req->recv.worker = worker; + req->recv.buffer = buffer; + req->recv.datatype = datatype; + + ucp_dt_recv_state_init(&req->recv.state, buffer, datatype, count); + + if (!UCP_DT_IS_CONTIG(datatype)) { + common_flags |= UCP_REQUEST_FLAG_BLOCK_OFFLOAD; + } + + req->flags = common_flags | req_flags; + req->recv.length = ucp_dt_length(datatype, count, buffer, + &req->recv.state); + req->recv.mem_type = ucp_memory_type_detect(worker->context, buffer, + req->recv.length); + req->recv.tag.tag = tag; + req->recv.tag.tag_mask = tag_mask; + req->recv.tag.cb = cb; + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_REQ)) { + req->recv.tag.info.sender_tag = 0; + } + + if (ucs_unlikely(rdesc == NULL)) { + /* If not found on unexpected, wait until it arrives. + * If was found but need this receive request for later completion, save it */ + req_queue = ucp_tag_exp_get_queue(&worker->tm, tag, tag_mask); + + /* If offload supported, post this tag to transport as well. + * TODO: need to distinguish the cases when posting is not needed. */ + ucp_tag_offload_try_post(worker, req, req_queue); + + ucp_tag_exp_push(&worker->tm, req_queue, req); + + ucs_trace_req("%s returning expected request %p (%p)", debug_name, req, + req + 1); + return; + } + + /* Check rendezvous case */ + if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_RNDV)) { + ucp_rndv_matched(worker, req, (void*)(rdesc + 1)); + UCP_WORKER_STAT_RNDV(worker, UNEXP); + ucp_recv_desc_release(rdesc); + return; + } + + if (ucs_unlikely(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER_SYNC)) { + ucp_tag_eager_sync_send_ack(worker, rdesc + 1, rdesc->flags); + } + + UCP_WORKER_STAT_EAGER_MSG(worker, rdesc->flags); + ucs_assert(rdesc->flags & UCP_RECV_DESC_FLAG_EAGER); + eagerf_hdr = (void*)(rdesc + 1); + req->recv.tag.info.sender_tag = ucp_rdesc_get_tag(rdesc); + req->recv.tag.info.length = + req->recv.tag.remaining = eagerf_hdr->total_len; + + /* process first fragment */ + UCP_WORKER_STAT_EAGER_CHUNK(worker, UNEXP); + msg_id = eagerf_hdr->msg_id; + status = ucp_tag_recv_request_process_rdesc(req, rdesc, 0); + ucs_assert(status == UCS_INPROGRESS); + + /* process additional fragments */ + ucp_tag_frag_list_process_queue(&worker->tm, req, msg_id + UCS_STATS_ARG(UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP)); +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_tag_recv_nbr, + (worker, buffer, count, datatype, tag, tag_mask, request), + ucp_worker_h worker, void *buffer, size_t count, + uintptr_t datatype, ucp_tag_t tag, ucp_tag_t tag_mask, + void *request) +{ + ucp_request_t *req = (ucp_request_t *)request - 1; + ucp_recv_desc_t *rdesc; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_TAG, + return UCS_ERR_INVALID_PARAM); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + rdesc = ucp_tag_unexp_search(&worker->tm, tag, tag_mask, 1, "recv_nbr"); + ucp_tag_recv_common(worker, buffer, count, datatype, tag, tag_mask, + req, UCP_REQUEST_DEBUG_FLAG_EXTERNAL, NULL, rdesc, + "recv_nbr"); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_tag_recv_nb, + (worker, buffer, count, datatype, tag, tag_mask, cb), + ucp_worker_h worker, void *buffer, size_t count, + uintptr_t datatype, ucp_tag_t tag, ucp_tag_t tag_mask, + ucp_tag_recv_callback_t cb) +{ + ucp_recv_desc_t *rdesc; + ucs_status_ptr_t ret; + ucp_request_t *req; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_TAG, + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + req = ucp_request_get(worker); + if (ucs_likely(req != NULL)) { + rdesc = ucp_tag_unexp_search(&worker->tm, tag, tag_mask, 1, "recv_nb"); + ucp_tag_recv_common(worker, buffer, count, datatype, tag, tag_mask, req, + UCP_REQUEST_FLAG_CALLBACK, cb, rdesc,"recv_nb"); + ret = req + 1; + } else { + ret = UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + } + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + return ret; +} + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_tag_msg_recv_nb, + (worker, buffer, count, datatype, message, cb), + ucp_worker_h worker, void *buffer, size_t count, + uintptr_t datatype, ucp_tag_message_h message, + ucp_tag_recv_callback_t cb) +{ + ucp_recv_desc_t *rdesc = message; + ucs_status_ptr_t ret; + ucp_request_t *req; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(worker->context, UCP_FEATURE_TAG, + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(worker); + + req = ucp_request_get(worker); + if (ucs_likely(req != NULL)) { + ucp_tag_recv_common(worker, buffer, count, datatype, + ucp_rdesc_get_tag(rdesc), UCP_TAG_MASK_FULL, req, + UCP_REQUEST_FLAG_CALLBACK, cb, rdesc, "msg_recv_nb"); + ret = req + 1; + } else { + ret = UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + } + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(worker); + return ret; +} diff --git a/src/ucp/tag/tag_send.c b/src/ucp/tag/tag_send.c new file mode 100644 index 0000000..3b56c87 --- /dev/null +++ b/src/ucp/tag/tag_send.c @@ -0,0 +1,314 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "tag_match.h" +#include "eager.h" +#include "rndv.h" + +#include +#include +#include +#include +#include +#include + + +static UCS_F_ALWAYS_INLINE size_t +ucp_tag_get_rndv_threshold(const ucp_request_t *req, size_t count, + size_t max_iov, size_t rndv_rma_thresh, + size_t rndv_am_thresh) +{ + switch (req->send.datatype & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_IOV: + if ((count > max_iov) && + ucp_ep_is_tag_offload_enabled(ucp_ep_config(req->send.ep))) { + /* Make sure SW RNDV will be used, because tag offload does + * not support multi-packet eager protocols. */ + return 1; + } + /* Fall through */ + case UCP_DATATYPE_CONTIG: + return ucs_min(rndv_rma_thresh, rndv_am_thresh); + case UCP_DATATYPE_GENERIC: + return rndv_am_thresh; + default: + ucs_error("Invalid data type %lx", req->send.datatype); + } + + return SIZE_MAX; +} + +static UCS_F_ALWAYS_INLINE ucs_status_ptr_t +ucp_tag_send_req(ucp_request_t *req, size_t dt_count, + const ucp_ep_msg_config_t* msg_config, + size_t rndv_rma_thresh, size_t rndv_am_thresh, + ucp_send_callback_t cb, const ucp_request_send_proto_t *proto, + int enable_zcopy) +{ + size_t rndv_thresh = ucp_tag_get_rndv_threshold(req, dt_count, + msg_config->max_iov, + rndv_rma_thresh, + rndv_am_thresh); + ssize_t max_short = ucp_proto_get_short_max(req, msg_config); + ucs_status_t status; + size_t zcopy_thresh; + + if (enable_zcopy || + ucs_unlikely(!UCP_MEM_IS_ACCESSIBLE_FROM_CPU(req->send.mem_type))) { + zcopy_thresh = ucp_proto_get_zcopy_threshold(req, msg_config, dt_count, + rndv_thresh); + } else { + zcopy_thresh = rndv_thresh; + } + + ucs_trace_req("select tag request(%p) progress algorithm datatype=%lx " + "buffer=%p length=%zu max_short=%zd rndv_thresh=%zu " + "zcopy_thresh=%zu zcopy_enabled=%d", + req, req->send.datatype, req->send.buffer, req->send.length, + max_short, rndv_thresh, zcopy_thresh, enable_zcopy); + + status = ucp_request_send_start(req, max_short, zcopy_thresh, rndv_thresh, + dt_count, msg_config, proto); + if (ucs_unlikely(status != UCS_OK)) { + if (status == UCS_ERR_NO_PROGRESS) { + /* RMA/AM rendezvous */ + ucs_assert(req->send.length >= rndv_thresh); + status = ucp_tag_send_start_rndv(req); + if (status != UCS_OK) { + return UCS_STATUS_PTR(status); + } + + UCP_EP_STAT_TAG_OP(req->send.ep, RNDV); + } else { + return UCS_STATUS_PTR(status); + } + } else if (ucs_unlikely((req->send.uct.func == proto->zcopy_multi) || + (req->send.uct.func == proto->bcopy_multi))) { + req->send.tag.message_id = req->send.ep->worker->am_message_id++; + req->send.tag.am_bw_index = 1; + } + + if (req->flags & UCP_REQUEST_FLAG_SYNC) { + UCP_EP_STAT_TAG_OP(req->send.ep, EAGER_SYNC); + } else { + UCP_EP_STAT_TAG_OP(req->send.ep, EAGER); + } + + /* + * Start the request. + * If it is completed immediately, release the request and return the status. + * Otherwise, return the request. + */ + status = ucp_request_send(req, 0); + if (req->flags & UCP_REQUEST_FLAG_COMPLETED) { + ucs_trace_req("releasing send request %p, returning status %s", req, + ucs_status_string(status)); + if (enable_zcopy) { + ucp_request_put(req); + } + return UCS_STATUS_PTR(status); + } + + if (enable_zcopy) { + ucp_request_set_callback(req, send.cb, cb) + } + + ucs_trace_req("returning send request %p", req); + return req + 1; +} + +static UCS_F_ALWAYS_INLINE void +ucp_tag_send_req_init(ucp_request_t* req, ucp_ep_h ep, const void* buffer, + uintptr_t datatype, size_t count, ucp_tag_t tag, + uint32_t flags) +{ + req->flags = flags | UCP_REQUEST_FLAG_SEND_TAG; + req->send.ep = ep; + req->send.buffer = (void*)buffer; + req->send.datatype = datatype; + req->send.tag.tag = tag; + ucp_request_send_state_init(req, datatype, count); + req->send.length = ucp_dt_length(req->send.datatype, count, + req->send.buffer, + &req->send.state.dt); + req->send.mem_type = ucp_memory_type_detect(ep->worker->context, + (void*)buffer, + req->send.length); + req->send.lane = ucp_ep_config(ep)->tag.lane; + req->send.pending_lane = UCP_NULL_LANE; +} + +static UCS_F_ALWAYS_INLINE int +ucp_tag_eager_is_inline(ucp_ep_h ep, const ucp_memtype_thresh_t *max_eager_short, + ssize_t length) +{ + return (ucs_likely(length <= max_eager_short->memtype_off) || + (length <= max_eager_short->memtype_on && + ucp_memory_type_cache_is_empty(ep->worker->context))); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +ucp_tag_send_inline(ucp_ep_h ep, const void *buffer, size_t count, + uintptr_t datatype, ucp_tag_t tag) +{ + ucs_status_t status; + size_t length; + + if (ucs_unlikely(!UCP_DT_IS_CONTIG(datatype))) { + return UCS_ERR_NO_RESOURCE; + } + + length = ucp_contig_dt_length(datatype, count); + + if (ucp_tag_eager_is_inline(ep, &ucp_ep_config(ep)->tag.max_eager_short, + length)) { + UCS_STATIC_ASSERT(sizeof(ucp_tag_t) == sizeof(ucp_eager_hdr_t)); + UCS_STATIC_ASSERT(sizeof(ucp_tag_t) == sizeof(uint64_t)); + status = uct_ep_am_short(ucp_ep_get_am_uct_ep(ep), UCP_AM_ID_EAGER_ONLY, + tag, buffer, length); + } else if (ucp_tag_eager_is_inline(ep, &ucp_ep_config(ep)->tag.offload.max_eager_short, + length)) { + UCS_STATIC_ASSERT(sizeof(ucp_tag_t) == sizeof(uct_tag_t)); + status = uct_ep_tag_eager_short(ucp_ep_get_tag_uct_ep(ep), tag, buffer, + length); + } else { + return UCS_ERR_NO_RESOURCE; + } + + if (status != UCS_ERR_NO_RESOURCE) { + UCP_EP_STAT_TAG_OP(ep, EAGER); + } + + return status; +} + + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_tag_send_nb, + (ep, buffer, count, datatype, tag, cb), + ucp_ep_h ep, const void *buffer, size_t count, + uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) +{ + ucs_status_t status; + ucp_request_t *req; + ucs_status_ptr_t ret; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(ep->worker->context, UCP_FEATURE_TAG, + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("send_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", + buffer, count, tag, ucp_ep_peer_name(ep), cb); + + status = UCS_PROFILE_CALL(ucp_tag_send_inline, ep, buffer, count, + datatype, tag); + if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { + ret = UCS_STATUS_PTR(status); /* UCS_OK also goes here */ + goto out; + } + + req = ucp_request_get(ep->worker); + if (req == NULL) { + ret = UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + goto out; + } + + ucp_tag_send_req_init(req, ep, buffer, datatype, count, tag, 0); + + ret = ucp_tag_send_req(req, count, &ucp_ep_config(ep)->tag.eager, + ucp_ep_config(ep)->tag.rndv.rma_thresh, + ucp_ep_config(ep)->tag.rndv.am_thresh, + cb, ucp_ep_config(ep)->tag.proto, 1); +out: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return ret; +} + +UCS_PROFILE_FUNC(ucs_status_t, ucp_tag_send_nbr, + (ep, buffer, count, datatype, tag, request), + ucp_ep_h ep, const void *buffer, size_t count, + uintptr_t datatype, ucp_tag_t tag, void *request) +{ + ucp_request_t *req = (ucp_request_t *)request - 1; + ucs_status_t status; + ucs_status_ptr_t ret; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(ep->worker->context, UCP_FEATURE_TAG, + return UCS_ERR_INVALID_PARAM); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("send_nbr buffer %p count %zu tag %"PRIx64" to %s req %p", + buffer, count, tag, ucp_ep_peer_name(ep), request); + + status = UCS_PROFILE_CALL(ucp_tag_send_inline, ep, buffer, count, + datatype, tag); + if (ucs_likely(status != UCS_ERR_NO_RESOURCE)) { + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return status; + } + + ucp_tag_send_req_init(req, ep, buffer, datatype, count, tag, 0); + + ret = ucp_tag_send_req(req, count, &ucp_ep_config(ep)->tag.eager, + ucp_ep_config(ep)->tag.rndv_send_nbr.rma_thresh, + ucp_ep_config(ep)->tag.rndv_send_nbr.am_thresh, + NULL, ucp_ep_config(ep)->tag.proto, 0); + + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + + if (ucs_unlikely(UCS_PTR_IS_ERR(ret))) { + return UCS_PTR_STATUS(ret); + } + return UCS_INPROGRESS; +} + +UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_tag_send_sync_nb, + (ep, buffer, count, datatype, tag, cb), + ucp_ep_h ep, const void *buffer, size_t count, + uintptr_t datatype, ucp_tag_t tag, ucp_send_callback_t cb) +{ + ucp_request_t *req; + ucs_status_ptr_t ret; + ucs_status_t status; + + UCP_CONTEXT_CHECK_FEATURE_FLAGS(ep->worker->context, UCP_FEATURE_TAG, + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM)); + UCP_WORKER_THREAD_CS_ENTER_CONDITIONAL(ep->worker); + + ucs_trace_req("send_sync_nb buffer %p count %zu tag %"PRIx64" to %s cb %p", + buffer, count, tag, ucp_ep_peer_name(ep), cb); + + if (!ucp_ep_config_test_rndv_support(ucp_ep_config(ep))) { + ret = UCS_STATUS_PTR(UCS_ERR_UNSUPPORTED); + goto out; + } + + status = ucp_ep_resolve_dest_ep_ptr(ep, ucp_ep_config(ep)->tag.lane); + if (status != UCS_OK) { + ret = UCS_STATUS_PTR(status); + goto out; + } + + req = ucp_request_get(ep->worker); + if (req == NULL) { + ret = UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); + goto out; + } + + ucp_tag_send_req_init(req, ep, buffer, datatype, count, tag, + UCP_REQUEST_FLAG_SYNC); + + ret = ucp_tag_send_req(req, count, &ucp_ep_config(ep)->tag.eager, + ucp_ep_config(ep)->tag.rndv.rma_thresh, + ucp_ep_config(ep)->tag.rndv.am_thresh, + cb, ucp_ep_config(ep)->tag.sync_proto, 1); +out: + UCP_WORKER_THREAD_CS_EXIT_CONDITIONAL(ep->worker); + return ret; +} diff --git a/src/ucp/wireup/address.c b/src/ucp/wireup/address.c new file mode 100644 index 0000000..bb5056d --- /dev/null +++ b/src/ucp/wireup/address.c @@ -0,0 +1,961 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "address.h" +#include "wireup_ep.h" + +#include +#include +#include +#include +#include + + +/* + * Packed address layout: + * + * [ uuid(64bit) | worker_name(string) ] + * [ device1_md_index | device1_address(var) ] + * [ tl1_name_csum(string) | tl1_info | tl1_address(var) ] + * [ tl2_name_csum(string) | tl2_info | tl2_address(var) ] + * ... + * [ device2_md_index | device2_address(var) ] + * ... + * + * * worker_name is packed if ENABLE_DEBUG is set. + * * In unified mode tl_info contains just rsc_index and iface latency overhead. + * For last address in the tl address list, it will have LAST flag set. + * * In non unified mode tl_info contains iface attributes. LAST flag is set in + * iface address length. + * * If a device does not have tl addresses, it's md_index will have the flag + * EMPTY. + * * If the address list is empty, then it will contain only a single md_index + * which equals to UCP_NULL_RESOURCE. + * * For non-unified mode, ep address contains length with flags. Multiple ep + * addresses could be present and the last one is marked with the flag + * UCP_ADDRESS_FLAG_LAST. For unified mode, there could not be more than one + * ep address. + * * For any mode, ep address is followed by a lane index. + */ + + +typedef struct { + size_t dev_addr_len; + uint64_t tl_bitmap; + ucp_rsc_index_t rsc_index; + ucp_rsc_index_t tl_count; + size_t tl_addrs_size; +} ucp_address_packed_device_t; + + +typedef struct { + float overhead; + float bandwidth; + float lat_ovh; + uint32_t prio_cap_flags; /* 8 lsb: prio, 22 msb: cap flags, 2 hsb: amo */ +} ucp_address_packed_iface_attr_t; + + +/* In unified mode we pack resource index instead of iface attrs to the address, + * so the peer can get all attrs from the local device with the same resource + * index. + * Also we send information which depends on device NUMA locality, + * which may be different on peers (processes which do address pack + * and address unpack): + * - latency overhead + * - Indication whether resource can be used for atomics or not (packed to the + * signed bit of lat_ovh). + * + * TODO: Revise/fix this when NUMA locality is exposed in UCP. + * */ +typedef struct { + ucp_rsc_index_t rsc_index; + float lat_ovh; +} ucp_address_unified_iface_attr_t; + + +#define UCT_ADDRESS_FLAG_ATOMIC32 UCS_BIT(30) /* 32bit atomic operations */ +#define UCT_ADDRESS_FLAG_ATOMIC64 UCS_BIT(31) /* 64bit atomic operations */ + +#define UCP_ADDRESS_FLAG_LAST 0x80 /* Last address in the list */ +#define UCP_ADDRESS_FLAG_HAVE_EP_ADDR 0x40 /* Indicates that ep addr is packed + right after iface addr */ +#define UCP_ADDRESS_FLAG_LEN_MASK ~(UCP_ADDRESS_FLAG_HAVE_EP_ADDR | \ + UCP_ADDRESS_FLAG_LAST) + +#define UCP_ADDRESS_FLAG_EMPTY 0x80 /* Device without TL addresses */ +#define UCP_ADDRESS_FLAG_MD_ALLOC 0x40 /* MD can register */ +#define UCP_ADDRESS_FLAG_MD_REG 0x20 /* MD can allocate */ +#define UCP_ADDRESS_FLAG_MD_MASK ~(UCP_ADDRESS_FLAG_EMPTY | \ + UCP_ADDRESS_FLAG_MD_ALLOC | \ + UCP_ADDRESS_FLAG_MD_REG) + +static size_t ucp_address_worker_name_size(ucp_worker_h worker, uint64_t flags) +{ +#if ENABLE_DEBUG_DATA + return (flags & UCP_ADDRESS_PACK_FLAG_WORKER_NAME) ? + strlen(ucp_worker_get_name(worker)) + 1 : 0; +#else + return 0; +#endif +} + +static size_t ucp_address_iface_attr_size(ucp_worker_t *worker) +{ + return ucp_worker_unified_mode(worker) ? + sizeof(ucp_address_unified_iface_attr_t) : + sizeof(ucp_address_packed_iface_attr_t); +} + +static uint64_t ucp_worker_iface_can_connect(uct_iface_attr_t *attrs) +{ + return attrs->cap.flags & + (UCT_IFACE_FLAG_CONNECT_TO_IFACE | UCT_IFACE_FLAG_CONNECT_TO_EP); +} + +/* Pack a string and return a pointer to storage right after the string */ +static void* ucp_address_pack_worker_name(ucp_worker_h worker, void *dest, + uint64_t flags) +{ +#if ENABLE_DEBUG_DATA + const char *s; + size_t length; + + if (!(flags & UCP_ADDRESS_PACK_FLAG_WORKER_NAME)) { + return dest; + } + + s = ucp_worker_get_name(worker); + length = strlen(s); + ucs_assert(length <= UINT8_MAX); + *(uint8_t*)dest = length; + memcpy(UCS_PTR_TYPE_OFFSET(dest, uint8_t), s, length); + return UCS_PTR_BYTE_OFFSET(UCS_PTR_TYPE_OFFSET(dest, uint8_t), length); +#else + return dest; +#endif +} + +/* Unpack a string and return pointer to next storage byte */ +static const void* ucp_address_unpack_worker_name(const void *src, char *s, + size_t max, uint64_t flags) +{ +#if ENABLE_DEBUG_DATA + size_t length, avail; + + if (!(flags & UCP_ADDRESS_PACK_FLAG_WORKER_NAME)) { + s[0] = '\0'; + return src; + } + + ucs_assert(max >= 1); + length = *(const uint8_t*)src; + avail = ucs_min(length, max - 1); + memcpy(s, UCS_PTR_TYPE_OFFSET(src, uint8_t), avail); + s[avail] = '\0'; + return UCS_PTR_TYPE_OFFSET(UCS_PTR_BYTE_OFFSET(src, length), uint8_t); +#else + s[0] = '\0'; + return src; +#endif +} + +static ucp_address_packed_device_t* +ucp_address_get_device(ucp_context_h context, ucp_rsc_index_t rsc_index, + ucp_address_packed_device_t *devices, + ucp_rsc_index_t *num_devices_p) +{ + const ucp_tl_resource_desc_t *tl_rsc = context->tl_rscs; + ucp_address_packed_device_t *dev; + + for (dev = devices; dev < devices + *num_devices_p; ++dev) { + if ((tl_rsc[rsc_index].md_index == tl_rsc[dev->rsc_index].md_index) && + !strcmp(tl_rsc[rsc_index].tl_rsc.dev_name, + tl_rsc[dev->rsc_index].tl_rsc.dev_name)) { + goto out; + } + } + + dev = &devices[(*num_devices_p)++]; + memset(dev, 0, sizeof(*dev)); +out: + return dev; +} + +static ucs_status_t +ucp_address_gather_devices(ucp_worker_h worker, ucp_ep_h ep, uint64_t tl_bitmap, + uint64_t flags, ucp_address_packed_device_t **devices_p, + ucp_rsc_index_t *num_devices_p) +{ + ucp_context_h context = worker->context; + ucp_address_packed_device_t *dev, *devices; + uct_iface_attr_t *iface_attr; + ucp_rsc_index_t num_devices; + ucp_rsc_index_t rsc_index; + ucp_lane_index_t lane; + unsigned num_ep_addrs; + + devices = ucs_calloc(context->num_tls, sizeof(*devices), "packed_devices"); + if (devices == NULL) { + return UCS_ERR_NO_MEMORY; + } + + num_devices = 0; + tl_bitmap &= context->tl_bitmap; + ucs_for_each_bit(rsc_index, tl_bitmap) { + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + if (!ucp_worker_iface_can_connect(iface_attr)) { + continue; + } + + dev = ucp_address_get_device(context, rsc_index, devices, &num_devices); + + if ((flags & UCP_ADDRESS_PACK_FLAG_EP_ADDR) && + ucp_worker_iface_is_tl_p2p(iface_attr)) { + /* Each lane which matches the resource index adds an ep address + * entry. The length and flags is packed in non-unified mode only. + */ + ucs_assert(ep != NULL); + num_ep_addrs = 0; + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (ucp_ep_get_rsc_index(ep, lane) == rsc_index) { + dev->tl_addrs_size += !ucp_worker_unified_mode(worker); + dev->tl_addrs_size += iface_attr->ep_addr_len; + dev->tl_addrs_size += sizeof(uint8_t); /* lane index */ + } + } + if (ucp_worker_unified_mode(worker)) { + ucs_assertv_always( + num_ep_addrs <= 1, + "unexpected multiple ep addresses in unified mode"); + } + } + + dev->tl_addrs_size += sizeof(uint16_t); /* tl name checksum */ + + if (flags & UCP_ADDRESS_PACK_FLAG_IFACE_ADDR) { + /* iface address (its length will be packed in non-unified mode only) */ + dev->tl_addrs_size += iface_attr->iface_addr_len; + dev->tl_addrs_size += !ucp_worker_unified_mode(worker); /* if addr length */ + dev->tl_addrs_size += ucp_address_iface_attr_size(worker); + } else { + dev->tl_addrs_size += 1; /* 0-value for valid unpacking */ + } + + if (flags & UCP_ADDRESS_PACK_FLAG_DEVICE_ADDR) { + dev->dev_addr_len = iface_attr->device_addr_len; + } else { + dev->dev_addr_len = 0; + } + + dev->rsc_index = rsc_index; + dev->tl_bitmap |= UCS_BIT(rsc_index); + } + + *devices_p = devices; + *num_devices_p = num_devices; + return UCS_OK; +} + +static size_t ucp_address_packed_size(ucp_worker_h worker, + const ucp_address_packed_device_t *devices, + ucp_rsc_index_t num_devices, + uint64_t flags) +{ + size_t size = 0; + const ucp_address_packed_device_t *dev; + + if (flags & UCP_ADDRESS_PACK_FLAG_WORKER_UUID) { + size += sizeof(uint64_t); + } + + size += ucp_address_worker_name_size(worker, flags); + + if (num_devices == 0) { + size += 1; /* NULL md_index */ + } else { + for (dev = devices; dev < (devices + num_devices); ++dev) { + size += 1; /* device md_index */ + size += 1; /* device address length */ + if (flags & UCP_ADDRESS_PACK_FLAG_DEVICE_ADDR) { + size += dev->dev_addr_len; /* device address */ + } + size += dev->tl_addrs_size; /* transport addresses */ + } + } + return size; +} + +static void ucp_address_memcheck(ucp_context_h context, void *ptr, size_t size, + ucp_rsc_index_t rsc_index) +{ + + void *undef_ptr; + + undef_ptr = (void*)VALGRIND_CHECK_MEM_IS_DEFINED(ptr, size); + if (undef_ptr != NULL) { + ucs_error(UCT_TL_RESOURCE_DESC_FMT + " address contains undefined bytes at offset %zd", + UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[rsc_index].tl_rsc), + UCS_PTR_BYTE_DIFF(ptr, undef_ptr)); + } +} + +static int ucp_address_pack_iface_attr(ucp_worker_h worker, void *ptr, + ucp_rsc_index_t index, + const uct_iface_attr_t *iface_attr, + int enable_atomics) +{ + ucp_address_packed_iface_attr_t *packed; + ucp_address_unified_iface_attr_t *unified; + uint32_t packed_flag; + uint64_t cap_flags; + uint64_t bit; + + /* check if at least one of bandwidth values is 0 */ + if ((iface_attr->bandwidth.dedicated * iface_attr->bandwidth.shared) != 0) { + ucs_error("Incorrect bandwidth value: one of bandwidth dedicated/shared must be zero"); + return -1; + } + + + if (ucp_worker_unified_mode(worker)) { + /* In unified mode all workers have the same transports and tl bitmap. + * Just send rsc index, so the remote peer could fetch iface attributes + * from its local iface. Also send latency overhead, because it + * depends on device NUMA locality. */ + unified = ptr; + unified->rsc_index = index; + unified->lat_ovh = enable_atomics ? -iface_attr->latency.overhead : + iface_attr->latency.overhead; + + return sizeof(*unified); + } + + packed = ptr; + cap_flags = iface_attr->cap.flags; + + packed->prio_cap_flags = ((uint8_t)iface_attr->priority); + packed->overhead = iface_attr->overhead; + packed->bandwidth = iface_attr->bandwidth.dedicated - iface_attr->bandwidth.shared; + packed->lat_ovh = iface_attr->latency.overhead; + + /* Keep only the bits defined by UCP_ADDRESS_IFACE_FLAGS, to shrink address. */ + packed_flag = UCS_BIT(8); + bit = 1; + while (UCP_ADDRESS_IFACE_FLAGS & ~(bit - 1)) { + if (UCP_ADDRESS_IFACE_FLAGS & bit) { + if (cap_flags & bit) { + packed->prio_cap_flags |= packed_flag; + } + packed_flag <<= 1; + } + bit <<= 1; + } + + if (enable_atomics) { + if (ucs_test_all_flags(iface_attr->cap.atomic32.op_flags, UCP_ATOMIC_OP_MASK) && + ucs_test_all_flags(iface_attr->cap.atomic32.fop_flags, UCP_ATOMIC_FOP_MASK)) { + packed->prio_cap_flags |= UCT_ADDRESS_FLAG_ATOMIC32; + } + if (ucs_test_all_flags(iface_attr->cap.atomic64.op_flags, UCP_ATOMIC_OP_MASK) && + ucs_test_all_flags(iface_attr->cap.atomic64.fop_flags, UCP_ATOMIC_FOP_MASK)) { + packed->prio_cap_flags |= UCT_ADDRESS_FLAG_ATOMIC64; + } + } + + return sizeof(*packed); +} + +static int +ucp_address_unpack_iface_attr(ucp_worker_t *worker, + ucp_address_iface_attr_t *iface_attr, + const void *ptr) +{ + const ucp_address_packed_iface_attr_t *packed; + const ucp_address_unified_iface_attr_t *unified; + ucp_worker_iface_t *wiface; + uint32_t packed_flag; + ucp_rsc_index_t rsc_idx; + uint64_t bit; + + if (ucp_worker_unified_mode(worker)) { + /* Address contains resources index and iface latency overhead + * (not all iface attrs). */ + unified = ptr; + rsc_idx = unified->rsc_index & UCP_ADDRESS_FLAG_LEN_MASK; + iface_attr->lat_ovh = fabs(unified->lat_ovh); + wiface = ucp_worker_iface(worker, rsc_idx); + + /* Just take the rest of iface attrs from the local resource. */ + iface_attr->cap_flags = wiface->attr.cap.flags; + iface_attr->priority = wiface->attr.priority; + iface_attr->overhead = wiface->attr.overhead; + iface_attr->bandwidth = wiface->attr.bandwidth; + if (signbit(unified->lat_ovh)) { + iface_attr->atomic.atomic32.op_flags = wiface->attr.cap.atomic32.op_flags; + iface_attr->atomic.atomic32.fop_flags = wiface->attr.cap.atomic32.fop_flags; + iface_attr->atomic.atomic64.op_flags = wiface->attr.cap.atomic64.op_flags; + iface_attr->atomic.atomic64.fop_flags = wiface->attr.cap.atomic64.fop_flags; + } + + return sizeof(*unified); + } + + packed = ptr; + iface_attr->cap_flags = 0; + iface_attr->priority = packed->prio_cap_flags & UCS_MASK(8); + iface_attr->overhead = packed->overhead; + iface_attr->bandwidth.dedicated = ucs_max(0.0, packed->bandwidth); + iface_attr->bandwidth.shared = ucs_max(0.0, -packed->bandwidth); + iface_attr->lat_ovh = packed->lat_ovh; + + packed_flag = UCS_BIT(8); + bit = 1; + while (UCP_ADDRESS_IFACE_FLAGS & ~(bit - 1)) { + if (UCP_ADDRESS_IFACE_FLAGS & bit) { + if (packed->prio_cap_flags & packed_flag) { + iface_attr->cap_flags |= bit; + } + packed_flag <<= 1; + } + bit <<= 1; + } + + if (packed->prio_cap_flags & UCT_ADDRESS_FLAG_ATOMIC32) { + iface_attr->atomic.atomic32.op_flags |= UCP_ATOMIC_OP_MASK; + iface_attr->atomic.atomic32.fop_flags |= UCP_ATOMIC_FOP_MASK; + } + if (packed->prio_cap_flags & UCT_ADDRESS_FLAG_ATOMIC64) { + iface_attr->atomic.atomic64.op_flags |= UCP_ATOMIC_OP_MASK; + iface_attr->atomic.atomic64.fop_flags |= UCP_ATOMIC_FOP_MASK; + } + + return sizeof(*packed); +} + +static void* +ucp_address_iface_flags_ptr(ucp_worker_h worker, void *attr_ptr, int attr_len) +{ + if (ucp_worker_unified_mode(worker)) { + /* In unified mode, rsc_index is packed instead of attrs. Address flags + * will be packed in the end of rsc_index byte. */ + UCS_STATIC_ASSERT(ucs_offsetof(ucp_address_unified_iface_attr_t, + rsc_index) == 0); + return attr_ptr; + } + + /* In non-unified mode, address flags will be packed in the end of + * iface addr length byte, which is packed right after iface attrs. */ + return UCS_PTR_BYTE_OFFSET(attr_ptr, attr_len); +} + +static void* +ucp_address_pack_length(ucp_worker_h worker, void *ptr, size_t addr_length) +{ + if (ucp_worker_unified_mode(worker)) { + return ptr; + } + + ucs_assert(addr_length <= UCP_ADDRESS_FLAG_LEN_MASK); + *(uint8_t*)ptr = addr_length; + + return UCS_PTR_TYPE_OFFSET(ptr, uint8_t); +} + +static const void* +ucp_address_unpack_length(ucp_worker_h worker, const void* flags_ptr, const void *ptr, + size_t *addr_length, int is_ep_addr, int *is_last) +{ + ucp_rsc_index_t rsc_index; + uct_iface_attr_t *attr; + const ucp_address_unified_iface_attr_t *unified; + + if (ucp_worker_unified_mode(worker)) { + /* In unified mode: + * - flags are packed with rsc index in ucp_address_unified_iface_attr_t + * - iface and ep addr lengths are not packed, need to take them from + * local iface attrs */ + unified = flags_ptr; + rsc_index = unified->rsc_index & UCP_ADDRESS_FLAG_LEN_MASK; + attr = ucp_worker_iface_get_attr(worker, rsc_index); + + ucs_assert(&unified->rsc_index == flags_ptr); + + if (is_ep_addr) { + *addr_length = attr->ep_addr_len; + *is_last = 1; /* in unified mode, there's only 1 ep address */ + } else { + *addr_length = attr->iface_addr_len; + *is_last = unified->rsc_index & UCP_ADDRESS_FLAG_LAST; + } + return ptr; + } + + *is_last = *(uint8_t*)ptr & UCP_ADDRESS_FLAG_LAST; + *addr_length = *(uint8_t*)ptr & UCP_ADDRESS_FLAG_LEN_MASK; + + return UCS_PTR_TYPE_OFFSET(ptr, uint8_t); +} + +static ucs_status_t ucp_address_do_pack(ucp_worker_h worker, ucp_ep_h ep, + void *buffer, size_t size, + uint64_t tl_bitmap, uint64_t flags, + const ucp_lane_index_t *lanes2remote, + const ucp_address_packed_device_t *devices, + ucp_rsc_index_t num_devices) +{ + ucp_context_h context = worker->context; + uint64_t md_flags_pack_mask = (UCT_MD_FLAG_REG | UCT_MD_FLAG_ALLOC); + const ucp_address_packed_device_t *dev; + uct_iface_attr_t *iface_attr; + ucp_rsc_index_t md_index; + ucp_worker_iface_t *wiface; + ucp_rsc_index_t rsc_index; + ucp_lane_index_t lane, remote_lane; + void *flags_ptr, *ep_flags_ptr; + uint64_t dev_tl_bitmap; + unsigned num_ep_addrs; + ucs_status_t status; + size_t iface_addr_len; + size_t ep_addr_len; + uint64_t md_flags; + unsigned index; + int attr_len; + void *ptr; + int enable_amo; + + ptr = buffer; + index = 0; + + if (flags & UCP_ADDRESS_PACK_FLAG_WORKER_UUID) { + *(uint64_t*)ptr = worker->uuid; + ptr = UCS_PTR_TYPE_OFFSET(ptr, worker->uuid); + } + + ptr = ucp_address_pack_worker_name(worker, ptr, flags); + + if (num_devices == 0) { + *((uint8_t*)ptr) = UCP_NULL_RESOURCE; + ptr = UCS_PTR_TYPE_OFFSET(ptr, UCP_NULL_RESOURCE); + goto out; + } + + for (dev = devices; dev < (devices + num_devices); ++dev) { + + dev_tl_bitmap = context->tl_bitmap & dev->tl_bitmap; + + /* MD index */ + md_index = context->tl_rscs[dev->rsc_index].md_index; + md_flags = context->tl_mds[md_index].attr.cap.flags & md_flags_pack_mask; + ucs_assert_always(!(md_index & ~UCP_ADDRESS_FLAG_MD_MASK)); + + *(uint8_t*)ptr = md_index | + ((dev_tl_bitmap == 0) ? UCP_ADDRESS_FLAG_EMPTY : 0) | + ((md_flags & UCT_MD_FLAG_ALLOC) ? UCP_ADDRESS_FLAG_MD_ALLOC : 0) | + ((md_flags & UCT_MD_FLAG_REG) ? UCP_ADDRESS_FLAG_MD_REG : 0); + ptr = UCS_PTR_TYPE_OFFSET(ptr, md_index); + + /* Device address length */ + *(uint8_t*)ptr = (dev == (devices + num_devices - 1)) ? + UCP_ADDRESS_FLAG_LAST : 0; + if (flags & UCP_ADDRESS_PACK_FLAG_DEVICE_ADDR) { + ucs_assert(dev->dev_addr_len < UCP_ADDRESS_FLAG_LAST); + *(uint8_t*)ptr |= dev->dev_addr_len; + } + ptr = UCS_PTR_TYPE_OFFSET(ptr, uint8_t); + + /* Device address */ + if (flags & UCP_ADDRESS_PACK_FLAG_DEVICE_ADDR) { + wiface = ucp_worker_iface(worker, dev->rsc_index); + status = uct_iface_get_device_address(wiface->iface, + (uct_device_addr_t*)ptr); + if (status != UCS_OK) { + return status; + } + + ucp_address_memcheck(context, ptr, dev->dev_addr_len, dev->rsc_index); + ptr = UCS_PTR_BYTE_OFFSET(ptr, dev->dev_addr_len); + } + + flags_ptr = NULL; + ucs_for_each_bit(rsc_index, dev_tl_bitmap) { + + wiface = ucp_worker_iface(worker, rsc_index); + iface_attr = &wiface->attr; + + if (!ucp_worker_iface_can_connect(iface_attr)) { + return UCS_ERR_INVALID_ADDR; + } + + /* Transport name checksum */ + *(uint16_t*)ptr = context->tl_rscs[rsc_index].tl_name_csum; + ptr = UCS_PTR_TYPE_OFFSET(ptr, + context->tl_rscs[rsc_index].tl_name_csum); + + /* Transport information */ + enable_amo = worker->atomic_tls & UCS_BIT(rsc_index); + attr_len = ucp_address_pack_iface_attr(worker, ptr, rsc_index, + iface_attr, enable_amo); + if (attr_len < 0) { + return UCS_ERR_INVALID_ADDR; + } + + ucp_address_memcheck(context, ptr, attr_len, rsc_index); + + if (flags & UCP_ADDRESS_PACK_FLAG_IFACE_ADDR) { + iface_addr_len = iface_attr->iface_addr_len; + } else { + iface_addr_len = 0; + } + + flags_ptr = ucp_address_iface_flags_ptr(worker, ptr, attr_len); + ptr = UCS_PTR_BYTE_OFFSET(ptr, attr_len); + ucs_assertv(iface_addr_len < UCP_ADDRESS_FLAG_HAVE_EP_ADDR, + "iface_addr_len=%zu", iface_addr_len); + + /* Pack iface address */ + ptr = ucp_address_pack_length(worker, ptr, iface_addr_len); + if (flags & UCP_ADDRESS_PACK_FLAG_IFACE_ADDR) { + status = uct_iface_get_address(wiface->iface, + (uct_iface_addr_t*)ptr); + if (status != UCS_OK) { + return status; + } + + ucp_address_memcheck(context, ptr, iface_addr_len, rsc_index); + ptr = UCS_PTR_BYTE_OFFSET(ptr, iface_addr_len); + } + + /* Pack ep address if present: iterate over all lanes which use the + * current resource (rsc_index) and pack their addresses. The last + * one is marked with UCP_ADDRESS_FLAG_LAST in its length field. + */ + num_ep_addrs = 0; + if ((flags & UCP_ADDRESS_PACK_FLAG_EP_ADDR) && + ucp_worker_iface_is_tl_p2p(iface_attr)) { + + ucs_assert(ep != NULL); + ep_addr_len = iface_attr->ep_addr_len; + ep_flags_ptr = NULL; + + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (ucp_ep_get_rsc_index(ep, lane) != rsc_index) { + continue; + } + + /* pack ep address length and save pointer to flags */ + ep_flags_ptr = ptr; + ptr = ucp_address_pack_length(worker, ptr, + ep_addr_len); + + /* pack ep address */ + status = uct_ep_get_address(ep->uct_eps[lane], ptr); + if (status != UCS_OK) { + return status; + } + + ucp_address_memcheck(context, ptr, ep_addr_len, rsc_index); + ptr = UCS_PTR_BYTE_OFFSET(ptr, ep_addr_len); + + /* pack ep lane index */ + remote_lane = (lanes2remote == NULL) ? lane : + lanes2remote[lane]; + *(uint8_t*)ptr = remote_lane; + ptr = UCS_PTR_TYPE_OFFSET(ptr, uint8_t); + + ucs_trace("pack addr[%d].ep_addr[%d] : len %zu lane %d->%d", + index, num_ep_addrs, ep_addr_len, lane, + remote_lane); + + ++num_ep_addrs; + } + + if (num_ep_addrs > 0) { + ucs_assert(ep_flags_ptr != NULL); + *(uint8_t*)flags_ptr |= UCP_ADDRESS_FLAG_HAVE_EP_ADDR; + if (!ucp_worker_unified_mode(worker)) { + *(uint8_t*)ep_flags_ptr |= UCP_ADDRESS_FLAG_LAST; + } + } + } + + ucs_assert((num_ep_addrs > 0) || + !(*(uint8_t*)flags_ptr & UCP_ADDRESS_FLAG_HAVE_EP_ADDR)); + + if (flags & UCP_ADDRESS_PACK_FLAG_TRACE) { + ucs_trace("pack addr[%d] : "UCT_TL_RESOURCE_DESC_FMT" " + "eps %u md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e + %e/n ovh %e " + "lat_ovh %e dev_priority %d a32 0x%lx/0x%lx a64 0x%lx/0x%lx", + index, + UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[rsc_index].tl_rsc), + num_ep_addrs, md_flags, iface_attr->cap.flags, + iface_attr->bandwidth.dedicated, + iface_attr->bandwidth.shared, + iface_attr->overhead, + iface_attr->latency.overhead, + iface_attr->priority, + iface_attr->cap.atomic32.op_flags, + iface_attr->cap.atomic32.fop_flags, + iface_attr->cap.atomic64.op_flags, + iface_attr->cap.atomic64.fop_flags); + } + + ++index; + ucs_assert(index <= UCP_MAX_RESOURCES); + } + + /* flags_ptr is a valid pointer to the flags set to the last entry + * during the above loop So, set the LAST flag for the flags_ptr + * from the last iteration */ + if (flags_ptr != NULL) { + ucs_assert(dev_tl_bitmap != 0); + *(uint8_t*)flags_ptr |= UCP_ADDRESS_FLAG_LAST; + } else { + /* cppcheck-suppress internalAstError */ + ucs_assert(dev_tl_bitmap == 0); + } + } + +out: + ucs_assertv(UCS_PTR_BYTE_OFFSET(buffer, size) == ptr, + "buffer=%p size=%zu ptr=%p ptr-buffer=%zd", + buffer, size, ptr, UCS_PTR_BYTE_DIFF(buffer, ptr)); + return UCS_OK; +} + +ucs_status_t ucp_address_pack(ucp_worker_h worker, ucp_ep_h ep, + uint64_t tl_bitmap, uint64_t flags, + const ucp_lane_index_t *lanes2remote, + size_t *size_p, void **buffer_p) +{ + ucp_address_packed_device_t *devices; + ucp_rsc_index_t num_devices; + ucs_status_t status; + void *buffer; + size_t size; + + if (ep == NULL) { + flags &= ~UCP_ADDRESS_PACK_FLAG_EP_ADDR; + } + + /* Collect all devices we want to pack */ + status = ucp_address_gather_devices(worker, ep, tl_bitmap, flags, &devices, + &num_devices); + if (status != UCS_OK) { + goto out; + } + + /* Calculate packed size */ + size = ucp_address_packed_size(worker, devices, num_devices, flags); + + /* Allocate address */ + buffer = ucs_malloc(size, "ucp_address"); + if (buffer == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out_free_devices; + } + + memset(buffer, 0, size); + + /* Pack the address */ + status = ucp_address_do_pack(worker, ep, buffer, size, tl_bitmap, flags, + lanes2remote, devices, num_devices); + if (status != UCS_OK) { + ucs_free(buffer); + goto out_free_devices; + } + + VALGRIND_CHECK_MEM_IS_DEFINED(buffer, size); + + *size_p = size; + *buffer_p = buffer; + status = UCS_OK; + +out_free_devices: + ucs_free(devices); +out: + return status; +} + +ucs_status_t ucp_address_unpack(ucp_worker_t *worker, const void *buffer, + uint64_t flags, + ucp_unpacked_address_t *unpacked_address) +{ + ucp_address_entry_t *address_list, *address; + ucp_address_entry_ep_addr_t *ep_addr; + int last_dev, last_tl, last_ep_addr; + const uct_device_addr_t *dev_addr; + ucp_rsc_index_t dev_index; + ucp_rsc_index_t md_index; + unsigned address_count; + int empty_dev; + uint64_t md_flags; + size_t dev_addr_len; + size_t iface_addr_len; + size_t ep_addr_len; + size_t attr_len; + uint8_t md_byte; + const void *ptr; + const void *aptr; + const void *flags_ptr; + + ptr = buffer; + if (flags & UCP_ADDRESS_PACK_FLAG_WORKER_UUID) { + unpacked_address->uuid = *(uint64_t*)ptr; + ptr = UCS_PTR_TYPE_OFFSET(ptr, unpacked_address->uuid); + } else { + unpacked_address->uuid = 0; + } + + aptr = ucp_address_unpack_worker_name(ptr, unpacked_address->name, + sizeof(unpacked_address->name), + flags); + + /* Count addresses */ + ptr = aptr; + address_count = 0; + + last_dev = (*(uint8_t*)ptr == UCP_NULL_RESOURCE); + while (!last_dev) { + /* md_index */ + empty_dev = (*(uint8_t*)ptr) & UCP_ADDRESS_FLAG_EMPTY; + ptr = UCS_PTR_TYPE_OFFSET(ptr, uint8_t); + + /* device address length */ + dev_addr_len = (*(uint8_t*)ptr) & ~UCP_ADDRESS_FLAG_LAST; + last_dev = (*(uint8_t*)ptr) & UCP_ADDRESS_FLAG_LAST; + ptr = UCS_PTR_TYPE_OFFSET(ptr, uint8_t); + ptr = UCS_PTR_BYTE_OFFSET(ptr, dev_addr_len); + + last_tl = empty_dev; + while (!last_tl) { + ptr = UCS_PTR_TYPE_OFFSET(ptr, uint16_t); /* tl_name_csum */ + attr_len = ucp_address_iface_attr_size(worker); + flags_ptr = ucp_address_iface_flags_ptr(worker, (void*)ptr, attr_len); + ptr = UCS_PTR_BYTE_OFFSET(ptr, attr_len); + ptr = ucp_address_unpack_length(worker, flags_ptr, ptr, + &iface_addr_len, 0, &last_tl); + ptr = UCS_PTR_BYTE_OFFSET(ptr, iface_addr_len); + + last_ep_addr = !(*(uint8_t*)flags_ptr & UCP_ADDRESS_FLAG_HAVE_EP_ADDR); + while (!last_ep_addr) { + ptr = ucp_address_unpack_length(worker, flags_ptr, ptr, + &ep_addr_len, 1, &last_ep_addr); + ucs_assert(flags & UCP_ADDRESS_PACK_FLAG_EP_ADDR); + ucs_assert(ep_addr_len > 0); + ptr = UCS_PTR_BYTE_OFFSET(ptr, ep_addr_len); + ptr = UCS_PTR_TYPE_OFFSET(ptr, uint8_t); + } + + ++address_count; + ucs_assert(address_count <= UCP_MAX_RESOURCES); + } + } + + if (address_count == 0) { + address_list = NULL; + goto out; + } + + /* Allocate address list */ + address_list = ucs_calloc(address_count, sizeof(*address_list), + "ucp_address_list"); + if (address_list == NULL) { + ucs_error("failed to allocate address list"); + return UCS_ERR_NO_MEMORY; + } + + /* Unpack addresses */ + address = address_list; + ptr = aptr; + dev_index = 0; + + do { + /* md_index */ + md_byte = (*(uint8_t*)ptr); + md_index = md_byte & UCP_ADDRESS_FLAG_MD_MASK; + md_flags = (md_byte & UCP_ADDRESS_FLAG_MD_ALLOC) ? UCT_MD_FLAG_ALLOC : 0; + md_flags |= (md_byte & UCP_ADDRESS_FLAG_MD_REG) ? UCT_MD_FLAG_REG : 0; + empty_dev = md_byte & UCP_ADDRESS_FLAG_EMPTY; + ptr = UCS_PTR_TYPE_OFFSET(ptr, md_byte); + + /* device address length */ + dev_addr_len = (*(uint8_t*)ptr) & ~UCP_ADDRESS_FLAG_LAST; + last_dev = (*(uint8_t*)ptr) & UCP_ADDRESS_FLAG_LAST; + ptr = UCS_PTR_TYPE_OFFSET(ptr, uint8_t); + + dev_addr = ptr; + ptr = UCS_PTR_BYTE_OFFSET(ptr, dev_addr_len); + + last_tl = empty_dev; + while (!last_tl) { + /* tl_name_csum */ + address->tl_name_csum = *(uint16_t*)ptr; + ptr = UCS_PTR_TYPE_OFFSET(ptr, address->tl_name_csum); + + address->dev_addr = (dev_addr_len > 0) ? dev_addr : NULL; + address->md_index = md_index; + address->dev_index = dev_index; + address->md_flags = md_flags; + + attr_len = ucp_address_unpack_iface_attr(worker, &address->iface_attr, ptr); + flags_ptr = ucp_address_iface_flags_ptr(worker, (void*)ptr, attr_len); + ptr = UCS_PTR_BYTE_OFFSET(ptr, attr_len); + ptr = ucp_address_unpack_length(worker, flags_ptr, ptr, + &iface_addr_len, 0, &last_tl); + address->iface_addr = (iface_addr_len > 0) ? ptr : NULL; + address->num_ep_addrs = 0; + ptr = UCS_PTR_BYTE_OFFSET(ptr, iface_addr_len); + + last_ep_addr = !(*(uint8_t*)flags_ptr & UCP_ADDRESS_FLAG_HAVE_EP_ADDR); + while (!last_ep_addr) { + ucs_assert(address->num_ep_addrs < UCP_MAX_LANES); + ep_addr = &address->ep_addrs[address->num_ep_addrs++]; + ptr = ucp_address_unpack_length(worker, flags_ptr, ptr, + &ep_addr_len, 1, + &last_ep_addr); + ep_addr->addr = ptr; + ptr = UCS_PTR_BYTE_OFFSET(ptr, ep_addr_len); + + ep_addr->lane = *(uint8_t*)ptr; + ptr = UCS_PTR_TYPE_OFFSET(ptr, uint8_t); + } + + if (flags & UCP_ADDRESS_PACK_FLAG_TRACE) { + ucs_trace("unpack addr[%d] : eps %u md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e + %e/n ovh %e " + "lat_ovh %e dev_priority %d a32 0x%lx/0x%lx a64 0x%lx/0x%lx", + (int)(address - address_list), address->num_ep_addrs, + address->md_flags, address->iface_attr.cap_flags, + address->iface_attr.bandwidth.dedicated, + address->iface_attr.bandwidth.shared, + address->iface_attr.overhead, + address->iface_attr.lat_ovh, + address->iface_attr.priority, + address->iface_attr.atomic.atomic32.op_flags, + address->iface_attr.atomic.atomic32.fop_flags, + address->iface_attr.atomic.atomic64.op_flags, + address->iface_attr.atomic.atomic64.fop_flags); + } + + ++address; + } + + ++dev_index; + } while (!last_dev); + + ucs_assert((unsigned)(address - address_list) == address_count); + +out: + unpacked_address->address_count = address_count; + unpacked_address->address_list = address_list; + return UCS_OK; +} + diff --git a/src/ucp/wireup/address.h b/src/ucp/wireup/address.h new file mode 100644 index 0000000..2a8fda5 --- /dev/null +++ b/src/ucp/wireup/address.h @@ -0,0 +1,154 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_ADDRESS_H_ +#define UCP_ADDRESS_H_ + +#include "wireup.h" + +#include +#include +#include + + +/* Which iface flags would be packed in the address */ +enum { + UCP_ADDRESS_IFACE_FLAGS = + UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_CB_ASYNC | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_PUT_SHORT | + UCT_IFACE_FLAG_PUT_BCOPY | + UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_GET_SHORT | + UCT_IFACE_FLAG_GET_BCOPY | + UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_TAG_EAGER_BCOPY | + UCT_IFACE_FLAG_TAG_RNDV_ZCOPY | + UCT_IFACE_FLAG_EVENT_RECV | + UCT_IFACE_FLAG_EVENT_RECV_SIG | + UCT_IFACE_FLAG_PENDING +}; + + +enum { + UCP_ADDRESS_PACK_FLAG_WORKER_UUID = UCS_BIT(0), + UCP_ADDRESS_PACK_FLAG_WORKER_NAME = UCS_BIT(1), /* valid only for debug build */ + UCP_ADDRESS_PACK_FLAG_DEVICE_ADDR = UCS_BIT(2), + UCP_ADDRESS_PACK_FLAG_IFACE_ADDR = UCS_BIT(3), + UCP_ADDRESS_PACK_FLAG_EP_ADDR = UCS_BIT(4), + UCP_ADDRESS_PACK_FLAG_TRACE = UCS_BIT(16), /* show debug prints of pack/unpack */ + UCP_ADDRESS_PACK_FLAG_ALL = (uint64_t)-1 +}; + + +/** + * Remote interface attributes. + */ +struct ucp_address_iface_attr { + uint64_t cap_flags; /* Interface capability flags */ + double overhead; /* Interface performance - overhead */ + uct_ppn_bandwidth_t bandwidth; /* Interface performance - bandwidth */ + int priority; /* Priority of device */ + double lat_ovh; /* Latency overhead */ + ucp_tl_iface_atomic_flags_t atomic; /* Atomic operations */ +}; + +typedef struct ucp_address_entry_ep_addr { + ucp_lane_index_t lane; /* Lane index (local or remote) */ + const uct_ep_addr_t *addr; /* Pointer to ep address */ +} ucp_address_entry_ep_addr_t; + +/** + * Address entry. + */ +struct ucp_address_entry { + const uct_device_addr_t *dev_addr; /* Points to device address */ + const uct_iface_addr_t *iface_addr; /* Interface address, NULL if not available */ + unsigned num_ep_addrs; /* How many endpoint address are in ep_addrs */ + ucp_address_entry_ep_addr_t ep_addrs[UCP_MAX_LANES]; /* Endpoint addresses */ + ucp_address_iface_attr_t iface_attr; /* Interface attributes information */ + uint64_t md_flags; /* MD reg/alloc flags */ + uint16_t tl_name_csum; /* Checksum of transport name */ + ucp_rsc_index_t md_index; /* Memory domain index */ + ucp_rsc_index_t dev_index; /* Device index */ +}; + + +/** + * Unpacked remote address + */ +struct ucp_unpacked_address { + uint64_t uuid; /* Remote worker UUID */ + char name[UCP_WORKER_NAME_MAX]; /* Remote worker name */ + unsigned address_count; /* Length of address list */ + ucp_address_entry_t *address_list; /* Pointer to address list */ +}; + + +/* Iterate over entries in an unpacked address */ +#define ucp_unpacked_address_for_each(_elem, _unpacked_address) \ + for (_elem = (_unpacked_address)->address_list; \ + _elem < (_unpacked_address)->address_list + (_unpacked_address)->address_count; \ + ++_elem) + + +/* Return the index of a specific entry in an unpacked address */ +#define ucp_unpacked_address_index(_unpacked_address, _ae) \ + ((int)((_ae) - (_unpacked_address)->address_list)) + + +/** + * Pack multiple addresses into a buffer, of resources specified in rsc_bitmap. + * For every resource in rcs_bitmap: + * - if iface is CONNECT_TO_IFACE, pack interface address + * - if iface is CONNECT_TO_EP, and ep != NULL, and it has a uct_ep on this + * resource, pack endpoint address. + * + * @param [in] worker Worker object whose interface addresses to pack. + * @param [in] ep Endpoint object whose uct_ep addresses to pack. + * Can be set to NULL, to take addresses only from worker. + * @param [in] tl_bitmap Specifies the resources whose transport address + * (ep or iface) should be packed. + * @param [in] flags UCP_ADDRESS_PACK_FLAG_xx flags to specify address + * format. + * @param [in] lanes2remote If NULL, the lane index in each packed ep address + * will be the local lane index. Otherwise, specifies + * which lane index should be packed in the ep address + * for each local lane. + * @param [out] size_p Filled with buffer size. + * @param [out] buffer_p Filled with pointer to packed buffer. It should be + * released by ucs_free(). + */ +ucs_status_t ucp_address_pack(ucp_worker_h worker, ucp_ep_h ep, + uint64_t tl_bitmap, uint64_t flags, + const ucp_lane_index_t *lanes2remote, + size_t *size_p, void **buffer_p); + + +/** + * Unpack a list of addresses. + * + * @param [in] worker Worker object. + * @param [in] buffer Buffer with data to unpack. + * @param [in] flags UCP_ADDRESS_PACK_FLAG_xx flags to specify + * address format, must be the same as the address + * which was packed by @ref ucp_address_pack. + * @param [out] unpacked_address Filled with remote address data. + * + * @note Entries in the address list could point into the data buffer, so it + * should not be released as long as the remote address is used. + * + * @note The address list inside @ref ucp_remote_address_t should be released + * by ucs_free(). + */ +ucs_status_t ucp_address_unpack(ucp_worker_h worker, const void *buffer, + uint64_t flags, + ucp_unpacked_address_t *unpacked_address); + + +#endif diff --git a/src/ucp/wireup/ep_match.c b/src/ucp/wireup/ep_match.c new file mode 100644 index 0000000..ae546a3 --- /dev/null +++ b/src/ucp/wireup/ep_match.c @@ -0,0 +1,212 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + + +__KHASH_IMPL(ucp_ep_match, static UCS_F_MAYBE_UNUSED inline, uint64_t, + ucp_ep_match_entry_t, 1, kh_int64_hash_func, kh_int64_hash_equal); + +/* `_elem` and `_next` are `ucs_list_link_t*` objects */ +#define ucp_ep_match_list_for_each(_elem, _head) \ + for (_elem = (_head)->next; (_elem) != NULL; _elem = (_elem)->next) + +static inline void ucp_ep_match_list_add_tail(ucs_list_link_t *head, + ucs_list_link_t *elem) +{ + ucs_list_link_t *last; + + last = head->prev; + elem->next = NULL; + head->prev = elem; + + if (last == NULL) { + elem->prev = NULL; + head->next = elem; + } else { + elem->prev = last; + last->next = elem; + } +} + +static inline void ucp_ep_match_list_del(ucs_list_link_t *head, + ucs_list_link_t *elem) +{ + (elem->prev ? elem->prev : head)->next = elem->next; + (elem->next ? elem->next : head)->prev = elem->prev; +} + +void ucp_ep_match_init(ucp_ep_match_ctx_t *match_ctx) +{ + kh_init_inplace(ucp_ep_match, &match_ctx->hash); +} + +void ucp_ep_match_cleanup(ucp_ep_match_ctx_t *match_ctx) +{ + ucp_ep_match_entry_t entry; + uint64_t dest_uuid; + + kh_foreach(&match_ctx->hash, dest_uuid, entry, { + if (entry.exp_ep_q.next != NULL) { + ucs_warn("match_ctx %p: uuid 0x%"PRIx64" expected queue is not empty", + match_ctx, dest_uuid); + } + if (entry.unexp_ep_q.next != NULL) { + ucs_warn("match_ctx %p: uuid 0x%"PRIx64" unexpected queue is not empty", + match_ctx, dest_uuid); + } + }) + kh_destroy_inplace(ucp_ep_match, &match_ctx->hash); +} + +static ucp_ep_match_entry_t* +ucp_ep_match_entry_get(ucp_ep_match_ctx_t *match_ctx, uint64_t dest_uuid) +{ + ucp_ep_match_entry_t *entry; + khiter_t iter; + int ret; + + iter = kh_put(ucp_ep_match, &match_ctx->hash, dest_uuid, &ret); + entry = &kh_value(&match_ctx->hash, iter); + + if (ret != 0) { + /* initialize match list on first use */ + entry->next_conn_sn = 0; + entry->exp_ep_q.next = NULL; + entry->exp_ep_q.prev = NULL; + entry->unexp_ep_q.next = NULL; + entry->unexp_ep_q.prev = NULL; + } + + return entry; +} + +ucp_ep_conn_sn_t ucp_ep_match_get_next_sn(ucp_ep_match_ctx_t *match_ctx, + uint64_t dest_uuid) +{ + ucp_ep_match_entry_t *entry = ucp_ep_match_entry_get(match_ctx, dest_uuid); + return entry->next_conn_sn++; +} + +static void ucp_ep_match_insert_common(ucp_ep_match_ctx_t *match_ctx, + ucs_list_link_t *list, ucp_ep_h ep, + uint64_t dest_uuid, const char *title) +{ + /* NOTE: protect union */ + ucs_assert(!(ep->flags & (UCP_EP_FLAG_ON_MATCH_CTX | + UCP_EP_FLAG_FLUSH_STATE_VALID | + UCP_EP_FLAG_LISTENER))); + + ucp_ep_match_list_add_tail(list, &ucp_ep_ext_gen(ep)->ep_match.list); + ep->flags |= UCP_EP_FLAG_ON_MATCH_CTX; + ucp_ep_ext_gen(ep)->ep_match.dest_uuid = dest_uuid; + ucs_trace("match_ctx %p: ep %p added as %s uuid 0x%"PRIx64" conn_sn %d", + match_ctx, ep, title, dest_uuid, ep->conn_sn); +} + +void ucp_ep_match_insert_exp(ucp_ep_match_ctx_t *match_ctx, uint64_t dest_uuid, + ucp_ep_h ep) +{ + ucp_ep_match_entry_t *entry = ucp_ep_match_entry_get(match_ctx, dest_uuid); + + ucs_assert(!(ep->flags & UCP_EP_FLAG_DEST_EP)); + ucp_ep_match_insert_common(match_ctx, &entry->exp_ep_q, ep, dest_uuid, + "expected"); +} + +void ucp_ep_match_insert_unexp(ucp_ep_match_ctx_t *match_ctx, uint64_t dest_uuid, + ucp_ep_h ep) +{ + ucp_ep_match_entry_t *entry = ucp_ep_match_entry_get(match_ctx, dest_uuid); + + ucp_ep_match_insert_common(match_ctx, &entry->unexp_ep_q, ep, dest_uuid, + "unexpected"); +} + +static ucp_ep_h +ucp_ep_match_retrieve_common(ucp_ep_match_ctx_t *match_ctx, uint64_t dest_uuid, + ucp_ep_conn_sn_t conn_sn, int is_exp, + ucp_ep_flags_t exp_ep_flags, const char *title) +{ + ucp_ep_match_entry_t *entry; + ucs_list_link_t *list, *list_entry; + ucp_ep_ext_gen_t *ep_ext; + khiter_t iter; + ucp_ep_h ep; + + iter = kh_get(ucp_ep_match, &match_ctx->hash, dest_uuid); + if (iter == kh_end(&match_ctx->hash)) { + goto notfound; /* no hash entry */ + } + + entry = &kh_value(&match_ctx->hash, iter); + list = is_exp ? &entry->exp_ep_q : &entry->unexp_ep_q; + ucp_ep_match_list_for_each(list_entry, list) { + ep_ext = ucs_container_of(list_entry, ucp_ep_ext_gen_t, ep_match.list); + ep = ucp_ep_from_ext_gen(ep_ext); + if (ep->conn_sn == conn_sn) { + ucp_ep_match_list_del(list, &ep_ext->ep_match.list); + ucs_trace("match_ctx %p: matched %s ep %p by uuid 0x%"PRIx64" conn_sn %d", + match_ctx, title, ep, dest_uuid, conn_sn); + ucs_assertv(ucs_test_all_flags(ep->flags, + exp_ep_flags | UCP_EP_FLAG_ON_MATCH_CTX), + "ep=%p flags=0x%x exp_flags=0x%x", ep, ep->flags, + exp_ep_flags); + ep->flags &= ~UCP_EP_FLAG_ON_MATCH_CTX; + return ep; + } + } + +notfound: + ucs_trace("match_ctx %p: %s uuid 0x%"PRIx64" conn_sn %d not found", + match_ctx, title, dest_uuid, conn_sn); + return NULL; +} + +ucp_ep_h ucp_ep_match_retrieve_exp(ucp_ep_match_ctx_t *match_ctx, uint64_t dest_uuid, + ucp_ep_conn_sn_t conn_sn) +{ + return ucp_ep_match_retrieve_common(match_ctx, dest_uuid, conn_sn, 1, 0, + "expected"); +} + +ucp_ep_h ucp_ep_match_retrieve_unexp(ucp_ep_match_ctx_t *match_ctx, uint64_t dest_uuid, + ucp_ep_conn_sn_t conn_sn) +{ + return ucp_ep_match_retrieve_common(match_ctx, dest_uuid, conn_sn, 0, + UCP_EP_FLAG_DEST_EP, "unexpected"); +} + +void ucp_ep_match_remove_ep(ucp_ep_match_ctx_t *match_ctx, ucp_ep_h ep) +{ + ucp_ep_ext_gen_t *ep_ext = ucp_ep_ext_gen(ep); + ucp_ep_match_entry_t *entry; + khiter_t iter; + + if (!(ep->flags & UCP_EP_FLAG_ON_MATCH_CTX)) { + return; + } + + iter = kh_get(ucp_ep_match, &match_ctx->hash, ep_ext->ep_match.dest_uuid); + ucs_assertv(iter != kh_end(&match_ctx->hash), "ep %p not found in hash", ep); + entry = &kh_value(&match_ctx->hash, iter); + + if (ep->flags & UCP_EP_FLAG_DEST_EP) { + ucs_trace("match_ctx %p: remove unexpected ep %p", match_ctx, ep); + ucp_ep_match_list_del(&entry->unexp_ep_q, &ep_ext->ep_match.list); + } else { + ucs_trace("match_ctx %p: remove expected ep %p", match_ctx, ep); + ucp_ep_match_list_del(&entry->exp_ep_q, &ep_ext->ep_match.list); + } + ep->flags &= ~UCP_EP_FLAG_ON_MATCH_CTX; +} diff --git a/src/ucp/wireup/ep_match.h b/src/ucp/wireup/ep_match.h new file mode 100644 index 0000000..6b424d3 --- /dev/null +++ b/src/ucp/wireup/ep_match.h @@ -0,0 +1,74 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_EP_MATCH_H_ +#define UCP_EP_MATCH_H_ + +#include +#include +#include + + +/* + * Structure to embed in a UCP endpoint to support matching with remote endpoints + */ +typedef struct { + uint64_t dest_uuid; /* Destination worker UUID */ + ucs_list_link_t list; /* List entry into endpoint + matching structure */ +} ucp_ep_match_t; + + +/** + * Endpoint-to-endpoint matching entry - allows *ordered* matching of endpoints + * between a pair of workers. + * The expected/unexpected lists are *not* circular + */ +typedef struct ucp_ep_match_entry { + ucs_list_link_t exp_ep_q; /* Endpoints created by API and not + connected to remote endpoint */ + ucs_list_link_t unexp_ep_q; /* Endpoints created internally as + connected a to remote endpoints, + but not provided to user yet */ + ucp_ep_conn_sn_t next_conn_sn; /* Sequence number of matching + endpoints, since UCT may provide + wireup messages which were sent + on different endpoint out-of-order */ +} ucp_ep_match_entry_t; + + +__KHASH_TYPE(ucp_ep_match, uint64_t, ucp_ep_match_entry_t) + + +/* Context for matching endpoints */ +typedef struct { + khash_t(ucp_ep_match) hash; +} ucp_ep_match_ctx_t; + + +void ucp_ep_match_init(ucp_ep_match_ctx_t *match_ctx); + +void ucp_ep_match_cleanup(ucp_ep_match_ctx_t *match_ctx); + +ucp_ep_conn_sn_t ucp_ep_match_get_next_sn(ucp_ep_match_ctx_t *match_ctx, + uint64_t dest_uuid); + +void ucp_ep_match_insert_exp(ucp_ep_match_ctx_t *match_ctx, uint64_t dest_uuid, + ucp_ep_h ep); + +void ucp_ep_match_insert_unexp(ucp_ep_match_ctx_t *match_ctx, uint64_t dest_uuid, + ucp_ep_h ep); + +ucp_ep_h ucp_ep_match_retrieve_exp(ucp_ep_match_ctx_t *match_ctx, uint64_t dest_uuid, + ucp_ep_conn_sn_t conn_sn); + +ucp_ep_h ucp_ep_match_retrieve_unexp(ucp_ep_match_ctx_t *ep_conn, uint64_t dest_uuid, + ucp_ep_conn_sn_t conn_sn); + +void ucp_ep_match_remove_ep(ucp_ep_match_ctx_t *ep_conn, ucp_ep_h ep); + + +#endif diff --git a/src/ucp/wireup/select.c b/src/ucp/wireup/select.c new file mode 100644 index 0000000..00b2f05 --- /dev/null +++ b/src/ucp/wireup/select.c @@ -0,0 +1,1636 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * Copyright (C) Los Alamos National Security, LLC. 2019 ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wireup.h" +#include "address.h" + +#include +#include +#include +#include +#include +#include + +#define UCP_WIREUP_RMA_BW_TEST_MSG_SIZE 262144 + +#define UCP_WIREUP_CHECK_AMO_FLAGS(_ae, _criteria, _context, _addr_index, _op, _size) \ + if (!ucs_test_all_flags((_ae)->iface_attr.atomic.atomic##_size._op##_flags, \ + (_criteria)->remote_atomic_flags.atomic##_size._op##_flags)) { \ + char desc[256]; \ + ucs_trace("addr[%d] %s: no %s", (_addr_index), \ + ucp_find_tl_name_by_csum((_context), (_ae)->tl_name_csum), \ + ucp_wireup_get_missing_amo_flag_desc_##_op( \ + (_ae)->iface_attr.atomic.atomic##_size._op##_flags, \ + (_criteria)->remote_atomic_flags.atomic##_size._op##_flags, \ + (_size), desc, sizeof(desc))); \ + continue; \ + } + +typedef struct ucp_wireup_atomic_flag { + const char *name; + const char *fetch; +} ucp_wireup_atomic_flag_t; + + +enum { + UCP_WIREUP_LANE_USAGE_AM = UCS_BIT(0), /* Active messages */ + UCP_WIREUP_LANE_USAGE_AM_BW = UCS_BIT(1), /* High-BW active messages */ + UCP_WIREUP_LANE_USAGE_RMA = UCS_BIT(2), /* Remote memory access */ + UCP_WIREUP_LANE_USAGE_RMA_BW = UCS_BIT(3), /* High-BW remote memory access */ + UCP_WIREUP_LANE_USAGE_AMO = UCS_BIT(4), /* Atomic memory access */ + UCP_WIREUP_LANE_USAGE_TAG = UCS_BIT(5), /* Tag matching offload */ + UCP_WIREUP_LANE_USAGE_CM = UCS_BIT(6) /* CM wireup */ +}; + + +typedef struct { + ucp_rsc_index_t rsc_index; + unsigned addr_index; + ucp_lane_index_t proxy_lane; + ucp_rsc_index_t dst_md_index; + uint32_t usage; + double am_bw_score; + double rma_score; + double rma_bw_score; + double amo_score; +} ucp_wireup_lane_desc_t; + + +typedef struct { + ucp_wireup_criteria_t criteria; + uint64_t local_dev_bitmap; + uint64_t remote_dev_bitmap; + ucp_md_map_t md_map; + uint32_t usage; + unsigned max_lanes; +} ucp_wireup_select_bw_info_t; + + +/** + * Global parameters for lanes selection during UCP wireup procedure + */ +typedef struct { + ucp_ep_h ep; /* UCP Endpoint */ + unsigned ep_init_flags; /* Endpoint init flags */ + uint64_t tl_bitmap; /* TLs bitmap which can be selected */ + const ucp_unpacked_address_t *address; /* Remote addresses */ + int allow_am; /* Shows whether emulation over AM + * is allowed or not for RMA/AMO */ + int show_error; /* Global flag that controls showing + * errors from a selecting transport + * procedure */ +} ucp_wireup_select_params_t; + +/** + * Context for lanes selection during UCP wireup procedure + */ +typedef struct { + ucp_wireup_lane_desc_t lane_descs[UCP_MAX_LANES]; /* Array of active lanes that are + * found during selection */ + ucp_lane_index_t num_lanes; /* Number of active lanes */ + unsigned ucp_ep_init_flags; /* Endpoint init extra flags */ +} ucp_wireup_select_context_t; + +static const char *ucp_wireup_md_flags[] = { + [ucs_ilog2(UCT_MD_FLAG_ALLOC)] = "memory allocation", + [ucs_ilog2(UCT_MD_FLAG_REG)] = "memory registration", +}; + +static const char *ucp_wireup_iface_flags[] = { + [ucs_ilog2(UCT_IFACE_FLAG_AM_SHORT)] = "am short", + [ucs_ilog2(UCT_IFACE_FLAG_AM_BCOPY)] = "am bcopy", + [ucs_ilog2(UCT_IFACE_FLAG_AM_ZCOPY)] = "am zcopy", + [ucs_ilog2(UCT_IFACE_FLAG_PUT_SHORT)] = "put short", + [ucs_ilog2(UCT_IFACE_FLAG_PUT_BCOPY)] = "put bcopy", + [ucs_ilog2(UCT_IFACE_FLAG_PUT_ZCOPY)] = "put zcopy", + [ucs_ilog2(UCT_IFACE_FLAG_GET_SHORT)] = "get short", + [ucs_ilog2(UCT_IFACE_FLAG_GET_BCOPY)] = "get bcopy", + [ucs_ilog2(UCT_IFACE_FLAG_GET_ZCOPY)] = "get zcopy", + [ucs_ilog2(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)] = "peer failure handler", + [ucs_ilog2(UCT_IFACE_FLAG_CONNECT_TO_IFACE)] = "connect to iface", + [ucs_ilog2(UCT_IFACE_FLAG_CONNECT_TO_EP)] = "connect to ep", + [ucs_ilog2(UCT_IFACE_FLAG_AM_DUP)] = "full reliability", + [ucs_ilog2(UCT_IFACE_FLAG_CB_SYNC)] = "sync callback", + [ucs_ilog2(UCT_IFACE_FLAG_CB_ASYNC)] = "async callback", + [ucs_ilog2(UCT_IFACE_FLAG_EVENT_SEND_COMP)] = "send completion event", + [ucs_ilog2(UCT_IFACE_FLAG_EVENT_RECV)] = "tag or active message event", + [ucs_ilog2(UCT_IFACE_FLAG_EVENT_RECV_SIG)] = "signaled message event", + [ucs_ilog2(UCT_IFACE_FLAG_PENDING)] = "pending", + [ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_SHORT)] = "tag eager short", + [ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)] = "tag eager bcopy", + [ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)] = "tag eager zcopy", + [ucs_ilog2(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)] = "tag rndv zcopy" +}; + +static ucp_wireup_atomic_flag_t ucp_wireup_atomic_desc[] = { + [UCT_ATOMIC_OP_ADD] = {.name = "add", .fetch = "fetch-"}, + [UCT_ATOMIC_OP_AND] = {.name = "and", .fetch = "fetch-"}, + [UCT_ATOMIC_OP_OR] = {.name = "or", .fetch = "fetch-"}, + [UCT_ATOMIC_OP_XOR] = {.name = "xor", .fetch = "fetch-"}, + [UCT_ATOMIC_OP_SWAP] = {.name = "swap", .fetch = ""}, + [UCT_ATOMIC_OP_CSWAP] = {.name = "cscap", .fetch = ""} +}; + + +static double ucp_wireup_aux_score_func(ucp_context_h context, + const uct_md_attr_t *md_attr, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr); + +static const char * +ucp_wireup_get_missing_flag_desc(uint64_t flags, uint64_t required_flags, + const char ** flag_descs) +{ + ucs_assert((required_flags & (~flags)) != 0); + return flag_descs[ucs_ffs64(required_flags & (~flags))]; +} + +static const char * +ucp_wireup_get_missing_amo_flag_desc(uint64_t flags, uint64_t required_flags, + int op_size, int fetch, char *buf, size_t len) +{ + int idx; + + ucs_assert((required_flags & (~flags)) != 0); + + idx = ucs_ffs64(required_flags & (~flags)); + + snprintf(buf, len, "%d-bit atomic %s%s", op_size, + fetch ? ucp_wireup_atomic_desc[idx].fetch : "", + ucp_wireup_atomic_desc[idx].name); + + return buf; +} + +static const char * +ucp_wireup_get_missing_amo_flag_desc_op(uint64_t flags, uint64_t required_flags, + int op_size, char *buf, size_t len) +{ + return ucp_wireup_get_missing_amo_flag_desc(flags, required_flags, op_size, 0, buf, len); +} + +static const char * +ucp_wireup_get_missing_amo_flag_desc_fop(uint64_t flags, uint64_t required_flags, + int op_size, char *buf, size_t len) +{ + return ucp_wireup_get_missing_amo_flag_desc(flags, required_flags, op_size, 1, buf, len); +} + +static int ucp_wireup_check_flags(const uct_tl_resource_desc_t *resource, + uint64_t flags, uint64_t required_flags, + const char *title, const char ** flag_descs, + char *reason, size_t max) +{ + const char *missing_flag_desc; + + if (ucs_test_all_flags(flags, required_flags)) { + return 1; + } + + if (required_flags) { + missing_flag_desc = ucp_wireup_get_missing_flag_desc(flags, required_flags, + flag_descs); + ucs_trace(UCT_TL_RESOURCE_DESC_FMT " : not suitable for %s, no %s", + UCT_TL_RESOURCE_DESC_ARG(resource), title, + missing_flag_desc); + snprintf(reason, max, UCT_TL_RESOURCE_DESC_FMT" - no %s", + UCT_TL_RESOURCE_DESC_ARG(resource), missing_flag_desc); + } + return 0; +} + +static int ucp_wireup_check_amo_flags(const uct_tl_resource_desc_t *resource, + uint64_t flags, uint64_t required_flags, + int op_size, int fetch, + const char *title, char *reason, size_t max) +{ + char missing_flag_desc[256]; + + if (ucs_test_all_flags(flags, required_flags)) { + return 1; + } + + if (required_flags) { + ucp_wireup_get_missing_amo_flag_desc(flags, required_flags, + op_size, fetch, missing_flag_desc, + sizeof(missing_flag_desc)); + ucs_trace(UCT_TL_RESOURCE_DESC_FMT " : not suitable for %s, no %s", + UCT_TL_RESOURCE_DESC_ARG(resource), title, + missing_flag_desc); + snprintf(reason, max, UCT_TL_RESOURCE_DESC_FMT" - no %s", + UCT_TL_RESOURCE_DESC_ARG(resource), missing_flag_desc); + } + return 0; +} + +static void +ucp_wireup_init_select_info(ucp_context_h context, double score, + unsigned addr_index, ucp_rsc_index_t rsc_index, + uint8_t priority, const char *title, + ucp_wireup_select_info_t *select_info) +{ + ucs_assert(score >= 0.0); + + ucs_trace(UCT_TL_RESOURCE_DESC_FMT "->addr[%u] : %s score %.2f priority %d", + UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[rsc_index].tl_rsc), + addr_index, title, score, priority); + + select_info->score = score; + select_info->addr_index = addr_index; + select_info->rsc_index = rsc_index; + select_info->priority = priority; +} + +/** + * Select a local and remote transport + */ +static UCS_F_NOINLINE ucs_status_t +ucp_wireup_select_transport(const ucp_wireup_select_params_t *select_params, + const ucp_wireup_criteria_t *criteria, + uint64_t tl_bitmap, uint64_t remote_md_map, + uint64_t local_dev_bitmap, + uint64_t remote_dev_bitmap, + int show_error, + ucp_wireup_select_info_t *select_info) +{ + ucp_ep_h ep = select_params->ep; + ucp_worker_h worker = ep->worker; + ucp_context_h context = worker->context; + ucp_wireup_select_info_t sinfo = {0}; + int found = 0; + unsigned addr_index; + uct_tl_resource_desc_t *resource; + const ucp_address_entry_t *ae; + ucp_rsc_index_t rsc_index; + char tls_info[256]; + char *p, *endp; + uct_iface_attr_t *iface_attr; + uct_md_attr_t *md_attr; + uint64_t addr_index_map; + double score; + uint8_t priority; + + p = tls_info; + endp = tls_info + sizeof(tls_info) - 1; + tls_info[0] = '\0'; + tl_bitmap &= select_params->tl_bitmap; + show_error = (select_params->show_error && show_error); + + /* Check which remote addresses satisfy the criteria */ + addr_index_map = 0; + ucp_unpacked_address_for_each(ae, select_params->address) { + addr_index = ucp_unpacked_address_index(select_params->address, ae); + if (!(remote_dev_bitmap & UCS_BIT(ae->dev_index))) { + ucs_trace("addr[%d]: not in use, because on device[%d]", + addr_index, ae->dev_index); + continue; + } else if (!(remote_md_map & UCS_BIT(ae->md_index))) { + ucs_trace("addr[%d]: not in use, because on md[%d]", addr_index, + ae->md_index); + continue; + } else if (!ucs_test_all_flags(ae->md_flags, + criteria->remote_md_flags)) { + ucs_trace("addr[%d] %s: no %s", addr_index, + ucp_find_tl_name_by_csum(context, ae->tl_name_csum), + ucp_wireup_get_missing_flag_desc(ae->md_flags, + criteria->remote_md_flags, + ucp_wireup_md_flags)); + continue; + } + + /* Make sure we are indeed passing all flags required by the criteria in + * ucp packed address */ + ucs_assert(ucs_test_all_flags(UCP_ADDRESS_IFACE_FLAGS, + criteria->remote_iface_flags)); + + if (!ucs_test_all_flags(ae->iface_attr.cap_flags, criteria->remote_iface_flags)) { + ucs_trace("addr[%d] %s: no %s", addr_index, + ucp_find_tl_name_by_csum(context, ae->tl_name_csum), + ucp_wireup_get_missing_flag_desc(ae->iface_attr.cap_flags, + criteria->remote_iface_flags, + ucp_wireup_iface_flags)); + continue; + } + + UCP_WIREUP_CHECK_AMO_FLAGS(ae, criteria, context, addr_index, op, 32); + UCP_WIREUP_CHECK_AMO_FLAGS(ae, criteria, context, addr_index, op, 64); + UCP_WIREUP_CHECK_AMO_FLAGS(ae, criteria, context, addr_index, fop, 32); + UCP_WIREUP_CHECK_AMO_FLAGS(ae, criteria, context, addr_index, fop, 64); + + addr_index_map |= UCS_BIT(addr_index); + } + + if (!addr_index_map) { + snprintf(p, endp - p, "%s ", ucs_status_string(UCS_ERR_UNSUPPORTED)); + p += strlen(p); + goto out; + } + + /* For each local resource try to find the best remote address to connect to. + * Pick the best local resource to satisfy the criteria. + * best one has the highest score (from the dedicated score_func) and + * has a reachable tl on the remote peer */ + ucs_for_each_bit(rsc_index, context->tl_bitmap) { + resource = &context->tl_rscs[rsc_index].tl_rsc; + iface_attr = ucp_worker_iface_get_attr(worker, rsc_index); + md_attr = &context->tl_mds[context->tl_rscs[rsc_index].md_index].attr; + + if ((context->tl_rscs[rsc_index].flags & UCP_TL_RSC_FLAG_AUX) && + !(criteria->tl_rsc_flags & UCP_TL_RSC_FLAG_AUX)) { + continue; + } + + /* Check that local md and interface satisfy the criteria */ + if (!ucp_wireup_check_flags(resource, md_attr->cap.flags, + criteria->local_md_flags, criteria->title, + ucp_wireup_md_flags, p, endp - p) || + !ucp_wireup_check_flags(resource, iface_attr->cap.flags, + criteria->local_iface_flags, criteria->title, + ucp_wireup_iface_flags, p, endp - p) || + !ucp_wireup_check_amo_flags(resource, iface_attr->cap.atomic32.op_flags, + criteria->local_atomic_flags.atomic32.op_flags, + 32, 0, criteria->title, p, endp - p) || + !ucp_wireup_check_amo_flags(resource, iface_attr->cap.atomic64.op_flags, + criteria->local_atomic_flags.atomic64.op_flags, + 64, 0, criteria->title, p, endp - p) || + !ucp_wireup_check_amo_flags(resource, iface_attr->cap.atomic32.fop_flags, + criteria->local_atomic_flags.atomic32.fop_flags, + 32, 1, criteria->title, p, endp - p) || + !ucp_wireup_check_amo_flags(resource, iface_attr->cap.atomic64.fop_flags, + criteria->local_atomic_flags.atomic64.fop_flags, + 64, 1, criteria->title, p, endp - p)) + { + p += strlen(p); + snprintf(p, endp - p, ", "); + p += strlen(p); + continue; + } + + /* Check supplied tl & device bitmap */ + if (!(tl_bitmap & UCS_BIT(rsc_index))) { + ucs_trace(UCT_TL_RESOURCE_DESC_FMT " : disabled by tl_bitmap", + UCT_TL_RESOURCE_DESC_ARG(resource)); + snprintf(p, endp - p, UCT_TL_RESOURCE_DESC_FMT" - disabled for %s, ", + UCT_TL_RESOURCE_DESC_ARG(resource), criteria->title); + p += strlen(p); + continue; + } else if (!(local_dev_bitmap & UCS_BIT(context->tl_rscs[rsc_index].dev_index))) { + ucs_trace(UCT_TL_RESOURCE_DESC_FMT " : disabled by device bitmap", + UCT_TL_RESOURCE_DESC_ARG(resource)); + snprintf(p, endp - p, UCT_TL_RESOURCE_DESC_FMT" - disabled for %s, ", + UCT_TL_RESOURCE_DESC_ARG(resource), criteria->title); + p += strlen(p); + continue; + } + + ucp_unpacked_address_for_each(ae, select_params->address) { + addr_index = ucp_unpacked_address_index(select_params->address, ae); + if (!(addr_index_map & UCS_BIT(addr_index)) || + !ucp_wireup_is_reachable(worker, rsc_index, ae)) + { + /* Must be reachable device address, on same transport */ + continue; + } + + score = criteria->calc_score(context, md_attr, iface_attr, + &ae->iface_attr); + priority = iface_attr->priority + ae->iface_attr.priority; + + if (!found || (ucp_score_prio_cmp(score, priority, sinfo.score, + sinfo.priority) > 0)) { + ucp_wireup_init_select_info(context, score, addr_index, + rsc_index, priority, + criteria->title, &sinfo); + found = 1; + } + } + + /* If a local resource cannot reach any of the remote addresses, + * generate debug message. */ + if (!found) { + snprintf(p, endp - p, UCT_TL_RESOURCE_DESC_FMT" - %s, ", + UCT_TL_RESOURCE_DESC_ARG(resource), + ucs_status_string(UCS_ERR_UNREACHABLE)); + p += strlen(p); + } + } + +out: + if (p >= tls_info + 2) { + *(p - 2) = '\0'; /* trim last "," */ + } + + if (!found) { + if (show_error) { + ucs_error("no %s transport to %s: %s", criteria->title, + ucp_ep_peer_name(ep), tls_info); + } + + return UCS_ERR_UNREACHABLE; + } + + ucs_trace("ep %p: selected for %s: " UCT_TL_RESOURCE_DESC_FMT " md[%d]" + " -> '%s' address[%d],md[%d] score %.2f", ep, criteria->title, + UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[sinfo.rsc_index].tl_rsc), + context->tl_rscs[sinfo.rsc_index].md_index, ucp_ep_peer_name(ep), + sinfo.addr_index, + select_params->address->address_list[sinfo.addr_index].md_index, + sinfo.score); + + *select_info = sinfo; + return UCS_OK; +} + +static inline double ucp_wireup_tl_iface_latency(ucp_context_h context, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr) +{ + return ucs_max(iface_attr->latency.overhead, remote_iface_attr->lat_ovh) + + (iface_attr->latency.growth * context->config.est_num_eps); +} + +static UCS_F_NOINLINE void +ucp_wireup_add_lane_desc(const ucp_wireup_select_info_t *select_info, + ucp_rsc_index_t dst_md_index, + uint32_t usage, int is_proxy, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_wireup_lane_desc_t *lane_desc; + ucp_lane_index_t lane, proxy_lane; + int proxy_changed; + + /* Add a new lane, but try to reuse already added lanes which are selected + * on the same transport resources. + */ + proxy_changed = 0; + for (lane_desc = select_ctx->lane_descs; + lane_desc < select_ctx->lane_descs + select_ctx->num_lanes; ++lane_desc) { + if ((lane_desc->rsc_index == select_info->rsc_index) && + (lane_desc->addr_index == select_info->addr_index)) + { + lane = lane_desc - select_ctx->lane_descs; + ucs_assertv_always(dst_md_index == lane_desc->dst_md_index, + "lane[%d].dst_md_index=%d, dst_md_index=%d", + lane, lane_desc->dst_md_index, dst_md_index); + ucs_assertv_always(!(lane_desc->usage & usage), "lane[%d]=0x%x |= 0x%x", + lane, lane_desc->usage, usage); + if (is_proxy && (lane_desc->proxy_lane == UCP_NULL_LANE)) { + /* New lane is a proxy, and found existing non-proxy lane with + * same resource. So that lane should be used by the proxy. + */ + proxy_lane = lane; + goto out_add_lane; + } else if (!is_proxy && (lane_desc->proxy_lane == lane)) { + /* New lane is not a proxy, but found existing proxy lane which + * could use the new lane. It also means we should be able to + * add our new lane. + */ + lane_desc->proxy_lane = select_ctx->num_lanes; + proxy_changed = 1; + } else if (!is_proxy && (lane_desc->proxy_lane == UCP_NULL_LANE)) { + /* Found non-proxy lane with same resource - don't add */ + ucs_assert_always(!proxy_changed); + lane_desc->usage |= usage; + goto out_update_score; + } + } + } + + /* If a proxy cannot find other lane with same resource, proxy to self */ + proxy_lane = is_proxy ? select_ctx->num_lanes : UCP_NULL_LANE; + +out_add_lane: + lane_desc = &select_ctx->lane_descs[select_ctx->num_lanes]; + ++select_ctx->num_lanes; + + lane_desc->rsc_index = select_info->rsc_index; + lane_desc->addr_index = select_info->addr_index; + lane_desc->proxy_lane = proxy_lane; + lane_desc->dst_md_index = dst_md_index; + lane_desc->usage = usage; + lane_desc->am_bw_score = 0.0; + lane_desc->rma_score = 0.0; + lane_desc->rma_bw_score = 0.0; + lane_desc->amo_score = 0.0; + +out_update_score: + if (usage & UCP_WIREUP_LANE_USAGE_AM_BW) { + lane_desc->am_bw_score = select_info->score; + } + if (usage & UCP_WIREUP_LANE_USAGE_RMA) { + lane_desc->rma_score = select_info->score; + } + if (usage & UCP_WIREUP_LANE_USAGE_RMA_BW) { + lane_desc->rma_bw_score = select_info->score; + } + if (usage & UCP_WIREUP_LANE_USAGE_AMO) { + lane_desc->amo_score = select_info->score; + } +} + +static int ucp_wireup_is_lane_proxy(ucp_ep_h ep, ucp_rsc_index_t rsc_index, + uint64_t remote_cap_flags) +{ + return !ucp_worker_is_tl_p2p(ep->worker, rsc_index) && + ((remote_cap_flags & UCP_WORKER_UCT_RECV_EVENT_CAP_FLAGS) == + UCT_IFACE_FLAG_EVENT_RECV_SIG); +} + +static UCS_F_NOINLINE void +ucp_wireup_add_lane(const ucp_wireup_select_params_t *select_params, + const ucp_wireup_select_info_t *select_info, + uint32_t usage, ucp_wireup_select_context_t *select_ctx) +{ + int is_proxy = 0; + ucp_rsc_index_t dst_md_index; + uint64_t remote_cap_flags; + + if (usage & (UCP_WIREUP_LANE_USAGE_AM | + UCP_WIREUP_LANE_USAGE_AM_BW | + UCP_WIREUP_LANE_USAGE_TAG)) { + /* If the remote side is not p2p and has only signaled-am wakeup, it may + * deactivate its interface and wait for signaled active message to wake up. + * Use a proxy lane which would send the first active message as signaled to + * make sure the remote interface will indeed wake up. */ + remote_cap_flags = select_params->address->address_list + [select_info->addr_index].iface_attr.cap_flags; + is_proxy = ucp_wireup_is_lane_proxy(select_params->ep, + select_info->rsc_index, + remote_cap_flags); + } + + dst_md_index = select_params->address->address_list + [select_info->addr_index].md_index; + ucp_wireup_add_lane_desc(select_info, dst_md_index, + usage, is_proxy, select_ctx); +} + +#define UCP_WIREUP_COMPARE_SCORE(_elem1, _elem2, _arg, _token) \ + ({ \ + const ucp_lane_index_t *lane1 = (_elem1); \ + const ucp_lane_index_t *lane2 = (_elem2); \ + const ucp_wireup_lane_desc_t *lanes = (_arg); \ + double score1, score2; \ + \ + score1 = (*lane1 == UCP_NULL_LANE) ? 0.0 : lanes[*lane1]._token##_score; \ + score2 = (*lane2 == UCP_NULL_LANE) ? 0.0 : lanes[*lane2]._token##_score; \ + /* sort from highest score to lowest */ \ + (score1 < score2) ? 1 : ((score1 > score2) ? -1 : 0); \ + }) + +static int ucp_wireup_compare_lane_am_bw_score(const void *elem1, const void *elem2, + void *arg) +{ + return UCP_WIREUP_COMPARE_SCORE(elem1, elem2, arg, am_bw); +} + +static int ucp_wireup_compare_lane_rma_score(const void *elem1, const void *elem2, + void *arg) +{ + return UCP_WIREUP_COMPARE_SCORE(elem1, elem2, arg, rma); +} + +static int ucp_wireup_compare_lane_rma_bw_score(const void *elem1, const void *elem2, + void *arg) +{ + return UCP_WIREUP_COMPARE_SCORE(elem1, elem2, arg, rma_bw); +} + +static int ucp_wireup_compare_lane_amo_score(const void *elem1, const void *elem2, + void *arg) +{ + return UCP_WIREUP_COMPARE_SCORE(elem1, elem2, arg, amo); +} + +static void +ucp_wireup_unset_tl_by_md(const ucp_wireup_select_params_t *sparams, + const ucp_wireup_select_info_t *sinfo, + uint64_t *tl_bitmap, uint64_t *remote_md_map) +{ + ucp_context_h context = sparams->ep->worker->context; + const ucp_address_entry_t *ae = &sparams->address-> + address_list[sinfo->addr_index]; + ucp_rsc_index_t md_index = context->tl_rscs[sinfo->rsc_index].md_index; + ucp_rsc_index_t dst_md_index = ae->md_index; + ucp_rsc_index_t i; + + *remote_md_map &= ~UCS_BIT(dst_md_index); + + ucs_for_each_bit(i, context->tl_bitmap) { + if (context->tl_rscs[i].md_index == md_index) { + *tl_bitmap &= ~UCS_BIT(i); + } + } +} + +static UCS_F_NOINLINE ucs_status_t +ucp_wireup_add_memaccess_lanes(const ucp_wireup_select_params_t *select_params, + const ucp_wireup_criteria_t *criteria, + uint64_t tl_bitmap, uint32_t usage, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_wireup_criteria_t mem_criteria = *criteria; + ucp_wireup_select_info_t select_info = {0}; + int show_error = !select_params->allow_am; + double reg_score; + uint64_t remote_md_map; + ucs_status_t status; + char title[64]; + + remote_md_map = UINT64_MAX; + + /* Select best transport which can reach registered memory */ + snprintf(title, sizeof(title), criteria->title, "registered"); + mem_criteria.title = title; + mem_criteria.remote_md_flags = UCT_MD_FLAG_REG | criteria->remote_md_flags; + status = ucp_wireup_select_transport(select_params, &mem_criteria, + tl_bitmap, remote_md_map, + UINT64_MAX, UINT64_MAX, + show_error, &select_info); + if (status != UCS_OK) { + goto out; + } + + reg_score = select_info.score; + + /* Add to the list of lanes and remove all occurrences of the remote md + * from the address list, to avoid selecting the same remote md again. */ + ucp_wireup_add_lane(select_params, &select_info, usage, select_ctx); + ucp_wireup_unset_tl_by_md(select_params, &select_info, &tl_bitmap, + &remote_md_map); + + /* Select additional transports which can access allocated memory, but + * only if their scores are better. We need this because a remote memory + * block can be potentially allocated using one of them, and we might get + * better performance than the transports which support only registered + * remote memory. */ + snprintf(title, sizeof(title), criteria->title, "allocated"); + mem_criteria.title = title; + mem_criteria.remote_md_flags = UCT_MD_FLAG_ALLOC | + criteria->remote_md_flags; + + for (;;) { + status = ucp_wireup_select_transport(select_params, &mem_criteria, + tl_bitmap, remote_md_map, + UINT64_MAX, UINT64_MAX, 0, + &select_info); + /* Break if: */ + /* - transport selection wasn't OK */ + if ((status != UCS_OK) || + /* - the selected transport is worse than + * the transport selected above */ + (ucp_score_cmp(select_info.score, reg_score) <= 0)) { + break; + } + + /* Add lane description and remove all occurrences of the remote md. */ + ucp_wireup_add_lane(select_params, &select_info, usage, select_ctx); + ucp_wireup_unset_tl_by_md(select_params, &select_info, &tl_bitmap, + &remote_md_map); + } + + status = UCS_OK; + +out: + if ((status != UCS_OK) && select_params->allow_am) { + /* using emulation over active messages */ + select_ctx->ucp_ep_init_flags |= UCP_EP_INIT_CREATE_AM_LANE; + status = UCS_OK; + } + + return status; +} + +static uint64_t ucp_ep_get_context_features(const ucp_ep_h ep) +{ + return ep->worker->context->config.features; +} + +static double ucp_wireup_rma_score_func(ucp_context_h context, + const uct_md_attr_t *md_attr, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr) +{ + /* best for 4k messages */ + return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) + + iface_attr->overhead + + (4096.0 / ucs_min(ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth), + ucp_tl_iface_bandwidth(context, &remote_iface_attr->bandwidth)))); +} + +static void ucp_wireup_fill_peer_err_criteria(ucp_wireup_criteria_t *criteria, + unsigned ep_init_flags) +{ + if (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE) { + criteria->local_iface_flags |= UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE; + } +} + +static void ucp_wireup_fill_aux_criteria(ucp_wireup_criteria_t *criteria, + unsigned ep_init_flags) +{ + criteria->title = "auxiliary"; + criteria->local_md_flags = 0; + criteria->remote_md_flags = 0; + criteria->local_iface_flags = UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_PENDING; + criteria->remote_iface_flags = UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_CB_ASYNC; + criteria->calc_score = ucp_wireup_aux_score_func; + criteria->tl_rsc_flags = UCP_TL_RSC_FLAG_AUX; /* Can use aux transports */ + + ucp_wireup_fill_peer_err_criteria(criteria, ep_init_flags); +} + +static void ucp_wireup_clean_amo_criteria(ucp_wireup_criteria_t *criteria) +{ + memset(&criteria->remote_atomic_flags, 0, + sizeof(criteria->remote_atomic_flags)); + memset(&criteria->local_atomic_flags, 0, + sizeof(criteria->local_atomic_flags)); +} + +/** + * Check whether emulation over AM is allowed for RMA/AMO lanes + */ +static int ucp_wireup_allow_am_emulation_layer(unsigned ep_init_flags) +{ + /* disable emulation layer if err handling is required due to lack of + * keep alive protocol */ + return !(ep_init_flags & (UCP_EP_INIT_FLAG_MEM_TYPE | + UCP_EP_INIT_ERR_MODE_PEER_FAILURE)); +} + +static unsigned +ucp_wireup_ep_init_flags(const ucp_wireup_select_params_t *select_params, + const ucp_wireup_select_context_t *select_ctx) +{ + return select_params->ep_init_flags | select_ctx->ucp_ep_init_flags; +} + +static ucs_status_t +ucp_wireup_add_cm_lane(const ucp_wireup_select_params_t *select_params, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_wireup_select_info_t select_info; + + if (!(select_params->ep_init_flags & (UCP_EP_INIT_CM_WIREUP_CLIENT | + UCP_EP_INIT_CM_WIREUP_SERVER))) { + return UCS_OK; + } + + select_info.priority = 0; /**< Currently we have only 1 CM + implementation */ + select_info.rsc_index = UCP_NULL_RESOURCE; /**< RSC doesn't matter for CM */ + select_info.addr_index = 0; /**< This makes sense only for transport + lanes */ + select_info.score = 0.; /**< TODO: when we have > 1 CM implementation */ + + /* server is not a proxy because it can create all lanes connected */ + ucp_wireup_add_lane_desc(&select_info, select_info.rsc_index, + UCP_WIREUP_LANE_USAGE_CM, 0, select_ctx); + return UCS_OK; +} + +static ucs_status_t +ucp_wireup_add_rma_lanes(const ucp_wireup_select_params_t *select_params, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_wireup_criteria_t criteria = {0}; + unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params, + select_ctx); + + if (!(ucp_ep_get_context_features(select_params->ep) & UCP_FEATURE_RMA) && + !(ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE)) { + return UCS_OK; + } + + if (ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) { + criteria.title = "copy across memory types"; + criteria.remote_iface_flags = UCT_IFACE_FLAG_PUT_SHORT; + criteria.local_iface_flags = criteria.remote_iface_flags; + } else { + criteria.title = "remote %s memory access"; + criteria.remote_iface_flags = UCT_IFACE_FLAG_PUT_SHORT | + UCT_IFACE_FLAG_PUT_BCOPY | + UCT_IFACE_FLAG_GET_BCOPY; + criteria.local_iface_flags = criteria.remote_iface_flags | + UCT_IFACE_FLAG_PENDING; + } + criteria.calc_score = ucp_wireup_rma_score_func; + criteria.tl_rsc_flags = 0; + ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags); + + return ucp_wireup_add_memaccess_lanes(select_params, &criteria, UINT64_MAX, + UCP_WIREUP_LANE_USAGE_RMA, + select_ctx); +} + +double ucp_wireup_amo_score_func(ucp_context_h context, + const uct_md_attr_t *md_attr, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr) +{ + /* best one-sided latency */ + return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) + + iface_attr->overhead); +} + +static ucs_status_t +ucp_wireup_add_amo_lanes(const ucp_wireup_select_params_t *select_params, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_worker_h worker = select_params->ep->worker; + ucp_context_h context = worker->context; + ucp_wireup_criteria_t criteria = {0}; + unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params, + select_ctx); + ucp_rsc_index_t rsc_index; + uint64_t tl_bitmap; + + if (!ucs_test_flags(context->config.features, + UCP_FEATURE_AMO32, UCP_FEATURE_AMO64) || + (ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE)) { + return UCS_OK; + } + + ucp_context_uct_atomic_iface_flags(context, &criteria.remote_atomic_flags); + + criteria.title = "atomic operations on %s memory"; + criteria.local_iface_flags = criteria.remote_iface_flags | + UCT_IFACE_FLAG_PENDING; + criteria.local_atomic_flags = criteria.remote_atomic_flags; + criteria.calc_score = ucp_wireup_amo_score_func; + ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags); + + /* We can use only non-p2p resources or resources which are explicitly + * selected for atomics. Otherwise, the remote peer would not be able to + * connect back on p2p transport. + */ + tl_bitmap = worker->atomic_tls; + ucs_for_each_bit(rsc_index, context->tl_bitmap) { + if (!ucp_worker_is_tl_p2p(worker, rsc_index)) { + tl_bitmap |= UCS_BIT(rsc_index); + } + } + + return ucp_wireup_add_memaccess_lanes(select_params, &criteria, tl_bitmap, + UCP_WIREUP_LANE_USAGE_AMO, + select_ctx); +} + +static double ucp_wireup_am_score_func(ucp_context_h context, + const uct_md_attr_t *md_attr, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr) +{ + /* best end-to-end latency */ + return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) + + iface_attr->overhead + remote_iface_attr->overhead); +} + +static double ucp_wireup_rma_bw_score_func(ucp_context_h context, + const uct_md_attr_t *md_attr, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr) +{ + /* highest bandwidth with lowest overhead - test a message size of 256KB, + * a size which is likely to be used for high-bw memory access protocol, for + * how long it would take to transfer it with a certain transport. */ + return 1 / ((UCP_WIREUP_RMA_BW_TEST_MSG_SIZE / + ucs_min(ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth), + ucp_tl_iface_bandwidth(context, &remote_iface_attr->bandwidth))) + + ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) + + iface_attr->overhead + md_attr->reg_cost.overhead + + (UCP_WIREUP_RMA_BW_TEST_MSG_SIZE * md_attr->reg_cost.growth)); +} + +static inline int +ucp_wireup_is_am_required(const ucp_wireup_select_params_t *select_params, + const ucp_wireup_select_context_t *select_ctx) +{ + ucp_ep_h ep = select_params->ep; + unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params, + select_ctx); + ucp_lane_index_t lane; + + /* Check if we need active messages from the configurations, for wireup. + * If not, check if am is required due to p2p transports */ + + if (ep_init_flags & UCP_EP_INIT_CREATE_AM_LANE) { + return 1; + } + + if (!(ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) && + (ucp_ep_get_context_features(ep) & (UCP_FEATURE_TAG | + UCP_FEATURE_STREAM | + UCP_FEATURE_AM))) { + return 1; + } + + for (lane = 0; lane < select_ctx->num_lanes; ++lane) { + if (ucp_worker_is_tl_p2p(ep->worker, + select_ctx->lane_descs[lane].rsc_index)) { + return 1; + } + } + + return 0; +} + +static ucs_status_t +ucp_wireup_add_am_lane(const ucp_wireup_select_params_t *select_params, + ucp_wireup_select_info_t *am_info, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_wireup_criteria_t criteria = {0}; + ucs_status_t status; + + if (!ucp_wireup_is_am_required(select_params, select_ctx)) { + memset(am_info, 0, sizeof(*am_info)); + return UCS_OK; + } + + /* Select one lane for active messages */ + criteria.title = "active messages"; + criteria.remote_iface_flags = UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_CB_SYNC; + criteria.local_iface_flags = UCT_IFACE_FLAG_AM_BCOPY; + criteria.calc_score = ucp_wireup_am_score_func; + ucp_wireup_fill_peer_err_criteria(&criteria, + ucp_wireup_ep_init_flags(select_params, + select_ctx)); + + if (ucs_test_all_flags(ucp_ep_get_context_features(select_params->ep), + UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP)) { + criteria.local_iface_flags |= UCP_WORKER_UCT_UNSIG_EVENT_CAP_FLAGS; + } + + status = ucp_wireup_select_transport(select_params, &criteria, + select_params->tl_bitmap, UINT64_MAX, + UINT64_MAX, UINT64_MAX, 1, am_info); + if (status != UCS_OK) { + return status; + } + + ucp_wireup_add_lane(select_params, am_info, UCP_WIREUP_LANE_USAGE_AM, + select_ctx); + return UCS_OK; +} + +static double ucp_wireup_am_bw_score_func(ucp_context_h context, + const uct_md_attr_t *md_attr, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr) +{ + /* best single MTU bandwidth */ + double size = iface_attr->cap.am.max_bcopy; + double time = (size / ucs_min(ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth), + ucp_tl_iface_bandwidth(context, &remote_iface_attr->bandwidth))) + + iface_attr->overhead + remote_iface_attr->overhead + + ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr); + + return size / time * 1e-5; +} + +int ucp_wireup_is_rsc_self_or_shm(ucp_ep_h ep, ucp_rsc_index_t rsc_index) +{ + return (ep->worker->context->tl_rscs[rsc_index].tl_rsc.dev_type == UCT_DEVICE_TYPE_SHM) || + (ep->worker->context->tl_rscs[rsc_index].tl_rsc.dev_type == UCT_DEVICE_TYPE_SELF); +} + +static unsigned +ucp_wireup_add_bw_lanes(const ucp_wireup_select_params_t *select_params, + const ucp_wireup_select_bw_info_t *bw_info, + uint64_t tl_bitmap, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_ep_h ep = select_params->ep; + ucp_context_h context = ep->worker->context; + ucp_wireup_select_info_t sinfo = {0}; + const ucp_address_entry_t *ae; + ucs_status_t status; + unsigned num_lanes; + uint64_t local_dev_bitmap; + uint64_t remote_dev_bitmap; + ucp_md_map_t md_map; + + num_lanes = 0; + md_map = bw_info->md_map; + local_dev_bitmap = bw_info->local_dev_bitmap; + remote_dev_bitmap = bw_info->remote_dev_bitmap; + + /* lookup for requested number of lanes or limit of MD map + * (we have to limit MD's number to avoid malloc in + * memory registration) */ + while ((num_lanes < bw_info->max_lanes) && + (ucs_popcount(md_map) < UCP_MAX_OP_MDS)) { + status = ucp_wireup_select_transport(select_params, &bw_info->criteria, + tl_bitmap, UINT64_MAX, + local_dev_bitmap, remote_dev_bitmap, + 0, &sinfo); + if (status != UCS_OK) { + break; + } + + ucp_wireup_add_lane(select_params, &sinfo, bw_info->usage, select_ctx); + + md_map |= UCS_BIT(context->tl_rscs[sinfo.rsc_index].md_index); + num_lanes++; + + local_dev_bitmap &= ~UCS_BIT(context->tl_rscs[sinfo.rsc_index].dev_index); + ae = &select_params->address->address_list[sinfo.addr_index]; + remote_dev_bitmap &= ~UCS_BIT(ae->dev_index); + + if (ucp_wireup_is_rsc_self_or_shm(ep, sinfo.rsc_index)) { + /* special case for SHM: do not try to lookup additional lanes when + * SHM transport detected (another transport will be significantly + * slower) */ + break; + } + } + + return num_lanes; +} + +static ucs_status_t +ucp_wireup_add_am_bw_lanes(const ucp_wireup_select_params_t *select_params, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_ep_h ep = select_params->ep; + ucp_context_h context = ep->worker->context; + unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params, + select_ctx); + + ucp_wireup_select_bw_info_t bw_info; + ucp_lane_index_t lane_desc_idx; + ucp_rsc_index_t rsc_index; + unsigned addr_index; + + /* Check if we need active messages, for wireup */ + if (!(ucp_ep_get_context_features(ep) & UCP_FEATURE_TAG) || + (ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) || + (context->config.ext.max_eager_lanes < 2)) { + return UCS_OK; + } + + /* Select one lane for active messages */ + bw_info.criteria.title = "high-bw active messages"; + bw_info.criteria.local_md_flags = 0; + bw_info.criteria.remote_md_flags = 0; + bw_info.criteria.remote_iface_flags = UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_CB_SYNC; + bw_info.criteria.local_iface_flags = UCT_IFACE_FLAG_AM_BCOPY; + bw_info.criteria.calc_score = ucp_wireup_am_bw_score_func; + bw_info.criteria.tl_rsc_flags = 0; + ucp_wireup_clean_amo_criteria(&bw_info.criteria); + ucp_wireup_fill_peer_err_criteria(&bw_info.criteria, ep_init_flags); + + if (ucs_test_all_flags(ucp_ep_get_context_features(ep), + UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP)) { + bw_info.criteria.local_iface_flags |= UCP_WORKER_UCT_UNSIG_EVENT_CAP_FLAGS; + } + + bw_info.local_dev_bitmap = UINT64_MAX; + bw_info.remote_dev_bitmap = UINT64_MAX; + bw_info.md_map = 0; + bw_info.max_lanes = context->config.ext.max_eager_lanes - 1; + bw_info.usage = UCP_WIREUP_LANE_USAGE_AM_BW; + + /* am_bw_lane[0] is am_lane, so don't re-select it here */ + for (lane_desc_idx = 0; lane_desc_idx < select_ctx->num_lanes; ++lane_desc_idx) { + if (select_ctx->lane_descs[lane_desc_idx].usage & UCP_WIREUP_LANE_USAGE_AM) { + addr_index = select_ctx->lane_descs[lane_desc_idx].addr_index; + rsc_index = select_ctx->lane_descs[lane_desc_idx].rsc_index; + bw_info.md_map |= UCS_BIT(context->tl_rscs[rsc_index].md_index); + bw_info.local_dev_bitmap &= ~UCS_BIT(context->tl_rscs[rsc_index].dev_index); + bw_info.remote_dev_bitmap &= ~UCS_BIT(select_params->address-> + address_list[addr_index].dev_index); + if (ucp_wireup_is_rsc_self_or_shm(ep, rsc_index)) { + /* if AM lane is SELF or SHMEM - then do not use more lanes */ + return UCS_OK; + } else { + break; /* do not continue searching due to we found + AM lane (and there is only one lane) */ + } + } + } + + /* don't check returned number of lanes from the function below, + * since we already have one AM BW lane - AM lane */ + ucp_wireup_add_bw_lanes(select_params, &bw_info, UINT64_MAX, select_ctx); + + return UCS_OK; +} + +static uint64_t ucp_wireup_get_rma_bw_iface_flags(ucp_rndv_mode_t rndv_mode) +{ + switch (rndv_mode) { + case UCP_RNDV_MODE_AUTO: + return (UCT_IFACE_FLAG_GET_ZCOPY | UCT_IFACE_FLAG_PUT_ZCOPY); + case UCP_RNDV_MODE_GET_ZCOPY: + return UCT_IFACE_FLAG_GET_ZCOPY; + case UCP_RNDV_MODE_PUT_ZCOPY: + return UCT_IFACE_FLAG_PUT_ZCOPY; + default: + return 0; + } +} + +static ucs_status_t +ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_ep_h ep = select_params->ep; + ucp_context_h context = ep->worker->context; + unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params, + select_ctx); + uint64_t iface_rma_flags = 0; + ucp_rndv_mode_t rndv_modes[] = { + context->config.ext.rndv_mode, + UCP_RNDV_MODE_GET_ZCOPY, + UCP_RNDV_MODE_PUT_ZCOPY + }; + ucp_wireup_select_bw_info_t bw_info; + ucs_memory_type_t mem_type; + size_t added_lanes; + uint64_t md_reg_flag; + uint8_t i; + + if (ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) { + md_reg_flag = 0; + } else if (ucp_ep_get_context_features(ep) & UCP_FEATURE_TAG) { + /* if needed for RNDV, need only access for remote registered memory */ + md_reg_flag = UCT_MD_FLAG_REG; + } else { + return UCS_OK; + } + + bw_info.usage = UCP_WIREUP_LANE_USAGE_RMA_BW; + bw_info.criteria.title = "high-bw remote memory access"; + bw_info.criteria.remote_iface_flags = 0; + bw_info.criteria.local_iface_flags = UCT_IFACE_FLAG_PENDING; + bw_info.criteria.calc_score = ucp_wireup_rma_bw_score_func; + bw_info.criteria.tl_rsc_flags = 0; + bw_info.criteria.remote_md_flags = md_reg_flag; + ucp_wireup_clean_amo_criteria(&bw_info.criteria); + ucp_wireup_fill_peer_err_criteria(&bw_info.criteria, ep_init_flags); + + if (ucs_test_all_flags(ucp_ep_get_context_features(ep), + UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP)) { + bw_info.criteria.local_iface_flags |= UCP_WORKER_UCT_UNSIG_EVENT_CAP_FLAGS; + } + + bw_info.local_dev_bitmap = UINT64_MAX; + bw_info.remote_dev_bitmap = UINT64_MAX; + bw_info.md_map = 0; + + /* check rkey_ptr */ + if (!(ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) && + (context->config.ext.rndv_mode == UCP_RNDV_MODE_AUTO)) { + + /* We require remote memory registration and local ability to obtain + * a pointer to the remote key. Only one is needed since we are doing + * memory copy on the CPU. + * Allow selecting additional lanes in case the remote memory will not be + * registered with this memory domain, i.e with GPU memory. + */ + bw_info.criteria.local_md_flags = UCT_MD_FLAG_RKEY_PTR; + bw_info.max_lanes = 1; + + ucp_wireup_add_bw_lanes(select_params, &bw_info, + context->mem_type_access_tls[UCS_MEMORY_TYPE_HOST], + select_ctx); + } + + /* First checked RNDV mode has to be a mode specified in config */ + bw_info.criteria.local_md_flags = md_reg_flag; + bw_info.max_lanes = context->config.ext.max_rndv_lanes; + ucs_assert(rndv_modes[0] == context->config.ext.rndv_mode); + + /* RNDV protocol can't mix different schemes, i.e. wireup has to + * select lanes with the same iface flags depends on a requested + * RNDV scheme. + * First of all, try to select lanes with RNDV scheme requested + * by user. If no lanes were selected and RNDV scheme in the + * configuration is AUTO, try other schemes. */ + UCS_STATIC_ASSERT(UCS_MEMORY_TYPE_HOST == 0); + for (i = 0; i < ucs_array_size(rndv_modes); i++) { + /* Remove the previous iface RMA flags */ + bw_info.criteria.remote_iface_flags &= ~iface_rma_flags; + bw_info.criteria.local_iface_flags &= ~iface_rma_flags; + + iface_rma_flags = ucp_wireup_get_rma_bw_iface_flags(rndv_modes[i]); + + /* Set the new iface RMA flags */ + bw_info.criteria.remote_iface_flags |= iface_rma_flags; + bw_info.criteria.local_iface_flags |= iface_rma_flags; + + added_lanes = 0; + + for (mem_type = UCS_MEMORY_TYPE_HOST; + mem_type < UCS_MEMORY_TYPE_LAST; mem_type++) { + if (!context->mem_type_access_tls[mem_type]) { + continue; + } + + added_lanes += ucp_wireup_add_bw_lanes(select_params, &bw_info, + context->mem_type_access_tls[mem_type], + select_ctx); + } + + if (added_lanes /* There are selected lanes */ || + /* There are no selected lanes, but a user requested + * the exact RNDV scheme, so there is no other choice */ + (context->config.ext.rndv_mode != UCP_RNDV_MODE_AUTO)) { + break; + } + } + + return UCS_OK; +} + +/* Lane for transport offloaded tag interface */ +static ucs_status_t +ucp_wireup_add_tag_lane(const ucp_wireup_select_params_t *select_params, + const ucp_wireup_select_info_t *am_info, + ucp_err_handling_mode_t err_mode, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_ep_h ep = select_params->ep; + ucp_wireup_criteria_t criteria = {0}; + ucp_wireup_select_info_t select_info = {0}; + ucs_status_t status; + + if (!(ucp_ep_get_context_features(ep) & UCP_FEATURE_TAG) || + /* TODO: remove check below when UCP_ERR_HANDLING_MODE_PEER supports + * RNDV-protocol or HW TM supports fragmented protocols + */ + (err_mode != UCP_ERR_HANDLING_MODE_NONE)) { + return UCS_OK; + } + + criteria.title = "tag_offload"; + criteria.local_md_flags = UCT_MD_FLAG_REG; /* needed for posting tags to HW */ + criteria.remote_md_flags = UCT_MD_FLAG_REG; /* needed for posting tags to HW */ + criteria.remote_iface_flags = /* the same as local_iface_flags */ + criteria.local_iface_flags = UCT_IFACE_FLAG_TAG_EAGER_BCOPY | + UCT_IFACE_FLAG_TAG_RNDV_ZCOPY | + UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_PENDING; + criteria.calc_score = ucp_wireup_am_score_func; + + if (ucs_test_all_flags(ucp_ep_get_context_features(ep), + UCP_FEATURE_WAKEUP)) { + criteria.local_iface_flags |= UCP_WORKER_UCT_UNSIG_EVENT_CAP_FLAGS; + } + + /* Do not add tag offload lane, if selected tag lane score is lower + * than AM score. In this case AM will be used for tag macthing. */ + status = ucp_wireup_select_transport(select_params, &criteria, + UINT64_MAX, UINT64_MAX, UINT64_MAX, + UINT64_MAX, 0, &select_info); + if ((status == UCS_OK) && + (ucp_score_cmp(select_info.score, + am_info->score) >= 0)) { + ucp_wireup_add_lane(select_params, &select_info, + UCP_WIREUP_LANE_USAGE_TAG, select_ctx); + } + + return UCS_OK; +} + +static ucp_lane_index_t +ucp_wireup_select_wireup_msg_lane(ucp_worker_h worker, + unsigned ep_init_flags, + const ucp_address_entry_t *address_list, + const ucp_wireup_lane_desc_t *lane_descs, + ucp_lane_index_t num_lanes) +{ + ucp_context_h context = worker->context; + ucp_lane_index_t p2p_lane = UCP_NULL_LANE; + ucp_wireup_criteria_t criteria = {0}; + uct_tl_resource_desc_t *resource; + ucp_rsc_index_t rsc_index; + uct_iface_attr_t *attrs; + ucp_lane_index_t lane; + unsigned addr_index; + + ucp_wireup_fill_aux_criteria(&criteria, ep_init_flags); + for (lane = 0; lane < num_lanes; ++lane) { + rsc_index = lane_descs[lane].rsc_index; + addr_index = lane_descs[lane].addr_index; + resource = &context->tl_rscs[rsc_index].tl_rsc; + attrs = ucp_worker_iface_get_attr(worker, rsc_index); + + /* if the current lane satisfies the wireup criteria, choose it for wireup. + * if it doesn't take a lane with a p2p transport */ + if (ucp_wireup_check_flags(resource, + attrs->cap.flags, + criteria.local_iface_flags, criteria.title, + ucp_wireup_iface_flags, NULL, 0) && + ucp_wireup_check_flags(resource, + address_list[addr_index].iface_attr.cap_flags, + criteria.remote_iface_flags, criteria.title, + ucp_wireup_iface_flags, NULL, 0)) + { + return lane; + } else if (ucp_worker_is_tl_p2p(worker, rsc_index)) { + p2p_lane = lane; + } + } + + return p2p_lane; +} + +static UCS_F_NOINLINE void +ucp_wireup_select_params_init(ucp_wireup_select_params_t *select_params, + ucp_ep_h ep, unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_address, + uint64_t tl_bitmap, int show_error) +{ + select_params->ep = ep; + select_params->ep_init_flags = ep_init_flags; + select_params->tl_bitmap = tl_bitmap; + select_params->address = remote_address; + select_params->allow_am = + ucp_wireup_allow_am_emulation_layer(ep_init_flags); + select_params->show_error = show_error; +} + +static UCS_F_NOINLINE ucs_status_t +ucp_wireup_search_lanes(const ucp_wireup_select_params_t *select_params, + ucp_err_handling_mode_t err_mode, + ucp_wireup_select_context_t *select_ctx) +{ + ucp_wireup_select_info_t am_info; + ucs_status_t status; + + memset(select_ctx, 0, sizeof(*select_ctx)); + + status = ucp_wireup_add_cm_lane(select_params, select_ctx); + if (status != UCS_OK) { + return status; + } + + status = ucp_wireup_add_rma_lanes(select_params, select_ctx); + if (status != UCS_OK) { + return status; + } + + status = ucp_wireup_add_amo_lanes(select_params, select_ctx); + if (status != UCS_OK) { + return status; + } + + /* Add AM lane only after RMA/AMO was selected to be aware + * about whether they need emulation over AM or not */ + status = ucp_wireup_add_am_lane(select_params, &am_info, select_ctx); + if (status != UCS_OK) { + return status; + } + + status = ucp_wireup_add_rma_bw_lanes(select_params, select_ctx); + if (status != UCS_OK) { + return status; + } + + status = ucp_wireup_add_tag_lane(select_params, &am_info, err_mode, + select_ctx); + if (status != UCS_OK) { + return status; + } + + /* call ucp_wireup_add_am_bw_lanes after ucp_wireup_add_am_lane to + * allow exclude AM lane from AM_BW list */ + status = ucp_wireup_add_am_bw_lanes(select_params, select_ctx); + if (status != UCS_OK) { + return status; + } + + /* User should not create endpoints unless requested communication features */ + if (select_ctx->num_lanes == 0) { + ucs_error("No transports selected to %s (features: 0x%lx)", + ucp_ep_peer_name(select_params->ep), + ucp_ep_get_context_features(select_params->ep)); + return UCS_ERR_UNREACHABLE; + } + + return UCS_OK; +} + +static UCS_F_NOINLINE void +ucp_wireup_construct_lanes(const ucp_wireup_select_params_t *select_params, + ucp_wireup_select_context_t *select_ctx, + unsigned *addr_indices, ucp_ep_config_key_t *key) +{ + ucp_ep_h ep = select_params->ep; + ucp_worker_h worker = ep->worker; + ucp_context_h context = worker->context; + ucp_rsc_index_t rsc_index; + ucp_md_index_t md_index; + ucp_lane_index_t lane; + ucp_lane_index_t i; + + key->num_lanes = select_ctx->num_lanes; + /* Construct the endpoint configuration key: + * - arrange lane description in the EP configuration + * - create remote MD bitmap + * - if AM lane exists and fits for wireup messages, select it for this purpose. + */ + for (lane = 0; lane < key->num_lanes; ++lane) { + ucs_assert(select_ctx->lane_descs[lane].usage != 0); + key->lanes[lane].rsc_index = select_ctx->lane_descs[lane].rsc_index; + key->lanes[lane].proxy_lane = select_ctx->lane_descs[lane].proxy_lane; + key->lanes[lane].dst_md_index = select_ctx->lane_descs[lane].dst_md_index; + addr_indices[lane] = select_ctx->lane_descs[lane].addr_index; + + if (select_ctx->lane_descs[lane].usage & UCP_WIREUP_LANE_USAGE_CM) { + ucs_assert(key->cm_lane == UCP_NULL_LANE); + key->cm_lane = lane; + /* CM lane can't be shared with TL usage */ + ucs_assert(select_ctx->lane_descs[lane].usage == + UCP_WIREUP_LANE_USAGE_CM); + continue; + } + if (select_ctx->lane_descs[lane].usage & UCP_WIREUP_LANE_USAGE_AM) { + ucs_assert(key->am_lane == UCP_NULL_LANE); + key->am_lane = lane; + } + if ((select_ctx->lane_descs[lane].usage & UCP_WIREUP_LANE_USAGE_AM_BW) && + (lane < UCP_MAX_LANES - 1)) { + key->am_bw_lanes[lane + 1] = lane; + } + if (select_ctx->lane_descs[lane].usage & UCP_WIREUP_LANE_USAGE_RMA) { + key->rma_lanes[lane] = lane; + } + if (select_ctx->lane_descs[lane].usage & UCP_WIREUP_LANE_USAGE_RMA_BW) { + key->rma_bw_lanes[lane] = lane; + } + if (select_ctx->lane_descs[lane].usage & UCP_WIREUP_LANE_USAGE_AMO) { + key->amo_lanes[lane] = lane; + } + if (select_ctx->lane_descs[lane].usage & UCP_WIREUP_LANE_USAGE_TAG) { + ucs_assert(key->tag_lane == UCP_NULL_LANE); + key->tag_lane = lane; + } + } + + /* Sort AM, RMA and AMO lanes according to score */ + ucs_qsort_r(key->am_bw_lanes + 1, UCP_MAX_LANES - 1, sizeof(ucp_lane_index_t), + ucp_wireup_compare_lane_am_bw_score, select_ctx->lane_descs); + ucs_qsort_r(key->rma_lanes, UCP_MAX_LANES, sizeof(ucp_lane_index_t), + ucp_wireup_compare_lane_rma_score, select_ctx->lane_descs); + ucs_qsort_r(key->rma_bw_lanes, UCP_MAX_LANES, sizeof(ucp_lane_index_t), + ucp_wireup_compare_lane_rma_bw_score, select_ctx->lane_descs); + ucs_qsort_r(key->amo_lanes, UCP_MAX_LANES, sizeof(ucp_lane_index_t), + ucp_wireup_compare_lane_amo_score, select_ctx->lane_descs); + + if (!(select_params->ep_init_flags & (UCP_EP_INIT_CM_WIREUP_CLIENT | + UCP_EP_INIT_CM_WIREUP_SERVER))) { + /* Select lane for wireup messages */ + key->wireup_lane = + ucp_wireup_select_wireup_msg_lane(worker, + ucp_wireup_ep_init_flags(select_params, + select_ctx), + select_params->address->address_list, + select_ctx->lane_descs, + key->num_lanes); + } + + /* add to map first UCP_MAX_OP_MDS fastest MD's */ + for (i = 0; + (key->rma_bw_lanes[i] != UCP_NULL_LANE) && + (ucs_popcount(key->rma_bw_md_map) < UCP_MAX_OP_MDS); i++) { + lane = key->rma_bw_lanes[i]; + rsc_index = select_ctx->lane_descs[lane].rsc_index; + md_index = context->tl_rscs[rsc_index].md_index; + + /* Pack remote key only if needed for RMA. + * FIXME a temporary workaround to prevent the ugni uct from using rndv. */ + if ((context->tl_mds[md_index].attr.cap.flags & UCT_MD_FLAG_NEED_RKEY) && + !(strstr(context->tl_rscs[rsc_index].tl_rsc.tl_name, "ugni"))) { + key->rma_bw_md_map |= UCS_BIT(md_index); + } + } + + /* use AM lane first for eager AM transport to simplify processing single/middle + * msg packets */ + key->am_bw_lanes[0] = key->am_lane; +} + +ucs_status_t +ucp_wireup_select_lanes(ucp_ep_h ep, unsigned ep_init_flags, uint64_t tl_bitmap, + const ucp_unpacked_address_t *remote_address, + unsigned *addr_indices, ucp_ep_config_key_t *key) +{ + ucp_worker_h worker = ep->worker; + uint64_t scalable_tl_bitmap = worker->scalable_tl_bitmap & tl_bitmap; + ucp_wireup_select_context_t select_ctx; + ucp_wireup_select_params_t select_params; + ucs_status_t status; + + if (scalable_tl_bitmap) { + ucp_wireup_select_params_init(&select_params, ep, ep_init_flags, + remote_address, scalable_tl_bitmap, 0); + status = ucp_wireup_search_lanes(&select_params, key->err_mode, + &select_ctx); + if (status == UCS_OK) { + goto out; + } + + /* If the transport selection based on the scalable TL bitmap wasn't + * successful, repeat the selection procedure with full TL bitmap in + * order to select best transports based on their scores only */ + } + + ucp_wireup_select_params_init(&select_params, ep, ep_init_flags, + remote_address, tl_bitmap, 1); + status = ucp_wireup_search_lanes(&select_params, key->err_mode, + &select_ctx); + if (status != UCS_OK) { + return status; + } + +out: + ucp_wireup_construct_lanes(&select_params, &select_ctx, addr_indices, key); + return UCS_OK; +} + +static double ucp_wireup_aux_score_func(ucp_context_h context, + const uct_md_attr_t *md_attr, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr) +{ + /* best end-to-end latency and larger bcopy size */ + return (1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) + + iface_attr->overhead + remote_iface_attr->overhead)); +} + +ucs_status_t +ucp_wireup_select_aux_transport(ucp_ep_h ep, unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_address, + ucp_wireup_select_info_t *select_info) +{ + ucp_wireup_criteria_t criteria = {0}; + ucp_wireup_select_params_t select_params; + + ucp_wireup_select_params_init(&select_params, ep, ep_init_flags, + remote_address, UINT64_MAX, 1); + ucp_wireup_fill_aux_criteria(&criteria, ep_init_flags); + return ucp_wireup_select_transport(&select_params, &criteria, + UINT64_MAX, UINT64_MAX, UINT64_MAX, + UINT64_MAX, 1, select_info); +} + +ucs_status_t +ucp_wireup_select_sockaddr_transport(const ucp_context_h context, + const ucs_sock_addr_t *sockaddr, + ucp_rsc_index_t *rsc_index_p) +{ + char saddr_str[UCS_SOCKADDR_STRING_LEN]; + ucp_tl_resource_desc_t *resource; + ucp_rsc_index_t tl_id; + ucp_md_index_t md_index; + uct_md_h md; + int i; + + /* Go over the sockaddr transports priority array and try to use the transports + * one by one for the client side */ + for (i = 0; i < context->config.num_sockaddr_tls; i++) { + tl_id = context->config.sockaddr_tl_ids[i]; + resource = &context->tl_rscs[tl_id]; + md_index = resource->md_index; + md = context->tl_mds[md_index].md; + + ucs_assert(context->tl_mds[md_index].attr.cap.flags & + UCT_MD_FLAG_SOCKADDR); + + /* The client selects the transport for sockaddr according to the + * configuration. We rely on the server having this transport available + * as well */ + if (uct_md_is_sockaddr_accessible(md, sockaddr, + UCT_SOCKADDR_ACC_REMOTE)) { + *rsc_index_p = tl_id; + ucs_debug("sockaddr transport selected: %s", resource->tl_rsc.tl_name); + return UCS_OK; + } + + ucs_debug("md %s cannot reach %s", + context->tl_mds[md_index].rsc.md_name, + ucs_sockaddr_str(sockaddr->addr, saddr_str, + sizeof(saddr_str))); + } + + return UCS_ERR_UNREACHABLE; +} diff --git a/src/ucp/wireup/signaling_ep.c b/src/ucp/wireup/signaling_ep.c new file mode 100644 index 0000000..7fe876f --- /dev/null +++ b/src/ucp/wireup/signaling_ep.c @@ -0,0 +1,207 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wireup.h" + +#include + + +/* Context for packing short data into bcopy */ +typedef struct { + uint64_t header; + const void *payload; + unsigned length; +} ucp_signaling_ep_pack_ctx_t; + + +static size_t ucp_signaling_ep_pack_short(void *dest, void *arg) +{ + ucp_signaling_ep_pack_ctx_t *ctx = arg; + + *(uint64_t*)dest = ctx->header; + memcpy(UCS_PTR_BYTE_OFFSET(dest, sizeof(uint64_t)), ctx->payload, ctx->length); + return sizeof(uint64_t) + ctx->length; +} + +static size_t ucp_signaling_ep_pack_tag_short(void *dest, void *arg) +{ + ucp_signaling_ep_pack_ctx_t *ctx = arg; + + memcpy(dest, ctx->payload, ctx->length); + return ctx->length; +} + +static ucs_status_t +ucp_signaling_ep_am_short(uct_ep_h ep, uint8_t id, uint64_t header, + const void *payload, unsigned length) +{ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); + ucp_signaling_ep_pack_ctx_t ctx; + ssize_t packed_size; + + ctx.header = header; + ctx.payload = payload; + ctx.length = length; + + ucp_assert_memtype(proxy_ep->ucp_ep->worker->context, ctx.payload, + ctx.length, UCS_MEMORY_TYPE_HOST); + + packed_size = uct_ep_am_bcopy(proxy_ep->uct_ep, id, + ucp_signaling_ep_pack_short, &ctx, + UCT_SEND_FLAG_SIGNALED); + if (packed_size < 0) { + return (ucs_status_t)packed_size; + } + + ucp_proxy_ep_replace(proxy_ep); + return UCS_OK; +} + +static ssize_t +ucp_signaling_ep_am_bcopy(uct_ep_h ep, uint8_t id, uct_pack_callback_t pack_cb, + void *arg, unsigned flags) +{ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); + ssize_t packed_size; + + packed_size = uct_ep_am_bcopy(proxy_ep->uct_ep, id, pack_cb, arg, + flags | UCT_SEND_FLAG_SIGNALED); + if (packed_size >= 0) { + ucp_proxy_ep_replace(proxy_ep); + } + return packed_size; +} + +static ucs_status_t +ucp_signaling_ep_am_zcopy(uct_ep_h ep, uint8_t id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, uct_completion_t *comp) +{ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); + ucs_status_t status; + + status = uct_ep_am_zcopy(proxy_ep->uct_ep, id, header, header_length, iov, + iovcnt, flags | UCT_SEND_FLAG_SIGNALED, comp); + if ((status == UCS_OK) || (status == UCS_INPROGRESS)) { + ucp_proxy_ep_replace(proxy_ep); + } + return status; +} + +static ucs_status_t +ucp_signaling_ep_tag_eager_short(uct_ep_h ep, uct_tag_t tag, const void *data, + size_t length) +{ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); + ucp_signaling_ep_pack_ctx_t ctx; + ssize_t packed_size; + + ctx.payload = data; + ctx.length = length; + + ucp_assert_memtype(proxy_ep->ucp_ep->worker->context, ctx.payload, + ctx.length, UCS_MEMORY_TYPE_HOST); + + packed_size = uct_ep_tag_eager_bcopy(proxy_ep->uct_ep, tag, 0, + ucp_signaling_ep_pack_tag_short, &ctx, + UCT_SEND_FLAG_SIGNALED); + if (packed_size < 0) { + return (ucs_status_t)packed_size; + } + + ucp_proxy_ep_replace(proxy_ep); + return UCS_OK; +} + +static ssize_t +ucp_signaling_ep_tag_eager_bcopy(uct_ep_h ep, uct_tag_t tag, uint64_t imm, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); + ssize_t packed_size; + + packed_size = uct_ep_tag_eager_bcopy(proxy_ep->uct_ep, tag, imm, pack_cb, + arg, flags | UCT_SEND_FLAG_SIGNALED); + if (packed_size >= 0) { + ucp_proxy_ep_replace(proxy_ep); + } + return packed_size; +} + +static ucs_status_t +ucp_signaling_ep_tag_eager_zcopy(uct_ep_h ep, uct_tag_t tag, uint64_t imm, + const uct_iov_t *iov, size_t iovcnt, + unsigned flags, uct_completion_t *comp) +{ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); + ucs_status_t status; + + status = uct_ep_tag_eager_zcopy(proxy_ep->uct_ep, tag, imm, iov, iovcnt, + flags | UCT_SEND_FLAG_SIGNALED, comp); + + if (!UCS_STATUS_IS_ERR(status)) { + ucp_proxy_ep_replace(proxy_ep); + } + return status; +} + +static ucs_status_ptr_t +ucp_signaling_ep_tag_rndv_zcopy(uct_ep_h ep, uct_tag_t tag, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp) +{ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); + ucs_status_ptr_t status; + + status = uct_ep_tag_rndv_zcopy(proxy_ep->uct_ep, tag, header, header_length, + iov, iovcnt, flags | UCT_SEND_FLAG_SIGNALED, + comp); + if (!UCS_PTR_IS_ERR(status)) { + ucp_proxy_ep_replace(proxy_ep); + } + return status; +} + +static ucs_status_t +ucp_signaling_ep_tag_rndv_request(uct_ep_h ep, uct_tag_t tag, + const void* header, unsigned header_length, + unsigned flags) +{ + ucp_proxy_ep_t *proxy_ep = ucs_derived_of(ep, ucp_proxy_ep_t); + ucs_status_t status; + + status = uct_ep_tag_rndv_request(proxy_ep->uct_ep, tag, header, + header_length, + flags | UCT_SEND_FLAG_SIGNALED); + if (status == UCS_OK) { + ucp_proxy_ep_replace(proxy_ep); + } + return status; +} + +ucs_status_t ucp_signaling_ep_create(ucp_ep_h ucp_ep, uct_ep_h uct_ep, + int is_owner, uct_ep_h *signaling_ep) +{ + static uct_iface_ops_t signaling_ep_ops = { + .ep_am_short = ucp_signaling_ep_am_short, + .ep_am_bcopy = ucp_signaling_ep_am_bcopy, + .ep_am_zcopy = ucp_signaling_ep_am_zcopy, + .ep_tag_eager_short = ucp_signaling_ep_tag_eager_short, + .ep_tag_eager_bcopy = ucp_signaling_ep_tag_eager_bcopy, + .ep_tag_eager_zcopy = ucp_signaling_ep_tag_eager_zcopy, + .ep_tag_rndv_zcopy = ucp_signaling_ep_tag_rndv_zcopy, + .ep_tag_rndv_request = ucp_signaling_ep_tag_rndv_request + }; + + return UCS_CLASS_NEW(ucp_proxy_ep_t, signaling_ep, &signaling_ep_ops, + ucp_ep, uct_ep, is_owner); +} diff --git a/src/ucp/wireup/wireup.c b/src/ucp/wireup/wireup.c new file mode 100644 index 0000000..e1bd6f6 --- /dev/null +++ b/src/ucp/wireup/wireup.c @@ -0,0 +1,1268 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wireup.h" +#include "address.h" +#include "wireup_cm.h" +#include "wireup_ep.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Description of the protocol in UCX wiki: + * https://github.com/openucx/ucx/wiki/Connection-establishment + */ + +static size_t ucp_wireup_msg_pack(void *dest, void *arg) +{ + ucp_request_t *req = arg; + *(ucp_wireup_msg_t*)dest = req->send.wireup; + memcpy((ucp_wireup_msg_t*)dest + 1, req->send.buffer, req->send.length); + return sizeof(ucp_wireup_msg_t) + req->send.length; +} + +static const char* ucp_wireup_msg_str(uint8_t msg_type) +{ + switch (msg_type) { + case UCP_WIREUP_MSG_PRE_REQUEST: + return "PRE_REQ"; + case UCP_WIREUP_MSG_REQUEST: + return "REQ"; + case UCP_WIREUP_MSG_REPLY: + return "REP"; + case UCP_WIREUP_MSG_ACK: + return "ACK"; + default: + return ""; + } +} + +static ucp_lane_index_t ucp_wireup_get_msg_lane(ucp_ep_h ep, uint8_t msg_type) +{ + ucp_context_h context = ep->worker->context; + ucp_ep_config_t *ep_config = ucp_ep_config(ep); + ucp_lane_index_t lane = UCP_NULL_LANE; + + if (msg_type != UCP_WIREUP_MSG_ACK) { + /* for request/response, try wireup_lane first */ + lane = ep_config->key.wireup_lane; + } + + if (lane == UCP_NULL_LANE) { + /* fallback to active messages lane */ + lane = ep_config->key.am_lane; + } + + if (lane == UCP_NULL_LANE) { + ucs_fatal("ep %p to %s: could not find a lane to send CONN_%s%s", + ep, ucp_ep_peer_name(ep), ucp_wireup_msg_str(msg_type), + context->config.ext.unified_mode ? + ". try to set UCX_UNIFIED_MODE=n." : ""); + } + + return lane; +} + +ucs_status_t ucp_wireup_msg_progress(uct_pending_req_t *self) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucp_ep_h ep = req->send.ep; + ssize_t packed_len; + unsigned am_flags; + + if (req->send.wireup.type == UCP_WIREUP_MSG_REQUEST) { + if (ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED) { + ucs_trace("ep %p: not sending wireup message - remote already connected", + ep); + goto out; + } + } else if (req->send.wireup.type == UCP_WIREUP_MSG_PRE_REQUEST) { + ucs_assert (!(ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED)); + } + + /* send the active message */ + req->send.lane = ucp_wireup_get_msg_lane(ep, req->send.wireup.type); + + am_flags = 0; + if ((req->send.wireup.type == UCP_WIREUP_MSG_REQUEST) || + (req->send.wireup.type == UCP_WIREUP_MSG_PRE_REQUEST)) { + am_flags |= UCT_SEND_FLAG_SIGNALED; + } + + VALGRIND_CHECK_MEM_IS_DEFINED(&req->send.wireup, sizeof(req->send.wireup)); + VALGRIND_CHECK_MEM_IS_DEFINED(req->send.buffer, req->send.length); + + packed_len = uct_ep_am_bcopy(ep->uct_eps[req->send.lane], UCP_AM_ID_WIREUP, + ucp_wireup_msg_pack, req, am_flags); + if (packed_len < 0) { + if (packed_len != UCS_ERR_NO_RESOURCE) { + ucs_error("failed to send wireup: %s", + ucs_status_string((ucs_status_t)packed_len)); + } + return (ucs_status_t)packed_len; + } + + switch (req->send.wireup.type) { + case UCP_WIREUP_MSG_PRE_REQUEST: + ep->flags |= UCP_EP_FLAG_CONNECT_PRE_REQ_SENT; + break; + case UCP_WIREUP_MSG_REQUEST: + ep->flags |= UCP_EP_FLAG_CONNECT_REQ_SENT; + break; + case UCP_WIREUP_MSG_REPLY: + ep->flags |= UCP_EP_FLAG_CONNECT_REP_SENT; + break; + case UCP_WIREUP_MSG_ACK: + ep->flags |= UCP_EP_FLAG_CONNECT_ACK_SENT; + break; + } + +out: + ucs_free((void*)req->send.buffer); + ucs_free(req); + return UCS_OK; +} + +static inline int ucp_wireup_is_ep_needed(ucp_ep_h ep) +{ + return (ep != NULL) && !(ep->flags & UCP_EP_FLAG_LISTENER); +} + +/* + * @param [in] rsc_tli Resource index for every lane. + */ +static ucs_status_t +ucp_wireup_msg_send(ucp_ep_h ep, uint8_t type, uint64_t tl_bitmap, + const ucp_lane_index_t *lanes2remote) +{ + ucp_request_t* req; + ucs_status_t status; + void *address; + + ucs_assert(ep->cfg_index != (uint8_t)-1); + + /* We cannot allocate from memory pool because it's not thread safe + * and this function may be called from any thread + */ + req = ucs_malloc(sizeof(*req), "wireup_msg_req"); + if (req == NULL) { + return UCS_ERR_NO_MEMORY; + } + + req->flags = 0; + req->send.ep = ep; + req->send.wireup.type = type; + req->send.wireup.err_mode = ucp_ep_config(ep)->key.err_mode; + req->send.wireup.conn_sn = ep->conn_sn; + req->send.wireup.src_ep_ptr = (uintptr_t)ep; + if (ep->flags & UCP_EP_FLAG_DEST_EP) { + req->send.wireup.dest_ep_ptr = ucp_ep_dest_ep_ptr(ep); + } else { + req->send.wireup.dest_ep_ptr = 0; + } + + req->send.uct.func = ucp_wireup_msg_progress; + req->send.datatype = ucp_dt_make_contig(1); + ucp_request_send_state_init(req, ucp_dt_make_contig(1), 0); + + /* pack all addresses */ + status = ucp_address_pack(ep->worker, + ucp_wireup_is_ep_needed(ep) ? ep : NULL, + tl_bitmap, UCP_ADDRESS_PACK_FLAG_ALL, + lanes2remote, &req->send.length, &address); + if (status != UCS_OK) { + ucs_free(req); + ucs_error("failed to pack address: %s", ucs_status_string(status)); + return status; + } + + req->send.buffer = address; + + ucp_request_send(req, 0); + return UCS_OK; +} + +static uint64_t ucp_wireup_get_ep_tl_bitmap(ucp_ep_h ep, ucp_lane_map_t lane_map) +{ + uint64_t tl_bitmap = 0; + ucp_lane_index_t lane; + + ucs_for_each_bit(lane, lane_map) { + ucs_assert(lane < UCP_MAX_LANES); + tl_bitmap |= UCS_BIT(ucp_ep_get_rsc_index(ep, lane)); + } + + return tl_bitmap; +} + +/* + * Select remote ep address for every remote address entry (because there + * could be multiple ep addresses per entry). This selection is used to create + * 'lanes2remote' mapping with the remote lane index for each local lane. + */ +static void +ucp_wireup_match_p2p_lanes(ucp_ep_h ep, + const ucp_unpacked_address_t *remote_address, + const unsigned *addr_indices, + ucp_lane_index_t *lanes2remote) +{ + const ucp_address_entry_t *address; + unsigned address_index; + ucp_lane_index_t lane, remote_lane; + unsigned *ep_addr_indexes; + unsigned ep_addr_index; + uint64_t UCS_V_UNUSED used_remote_lanes; + + /* Initialize the counters of ep address index for each address entry */ + ep_addr_indexes = ucs_alloca(sizeof(ep_addr_index) * + remote_address->address_count); + for (address_index = 0; address_index < remote_address->address_count; + ++address_index) { + ep_addr_indexes[address_index] = 0; + } + + /* Initialize lanes2remote array */ + for (lane = 0; lane < UCP_MAX_LANES; ++lane) { + lanes2remote[lane] = UCP_NULL_LANE; + } + + used_remote_lanes = 0; + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (!ucp_ep_is_lane_p2p(ep, lane)) { + continue; + } + + /* Select next remote ep address within the address_index as specified + * by addr_indices argument + */ + address_index = addr_indices[lane]; + address = &remote_address->address_list[address_index]; + ep_addr_index = ep_addr_indexes[address_index]++; + remote_lane = address->ep_addrs[ep_addr_index].lane; + lanes2remote[lane] = remote_lane; + + if (used_remote_lanes & UCS_BIT(remote_lane)) { + ucs_fatal("ep %p: remote lane %d is used more than once", ep, + remote_lane); + } + used_remote_lanes |= UCS_BIT(remote_lane); + + ucs_trace("ep %p: lane[%d]->remote_lane[%d] (address[%d].ep_address[%d])", + ep, lane, remote_lane, address_index, ep_addr_index); + } +} + +static ucs_status_t +ucp_wireup_find_remote_p2p_addr(ucp_ep_h ep, ucp_lane_index_t remote_lane, + const ucp_unpacked_address_t *remote_address, + const uct_ep_addr_t **ep_addr_p, + const uct_device_addr_t **dev_addr_p) +{ + const ucp_address_entry_t *address; + unsigned ep_addr_index; + + ucp_unpacked_address_for_each(address, remote_address) { + for (ep_addr_index = 0; ep_addr_index < address->num_ep_addrs; + ++ep_addr_index) { + if (remote_lane == address->ep_addrs[ep_addr_index].lane) { + *ep_addr_p = address->ep_addrs[ep_addr_index].addr; + *dev_addr_p = address->dev_addr; + return UCS_OK; + } + } + } + + return UCS_ERR_UNREACHABLE; +} + +ucs_status_t +ucp_wireup_connect_local(ucp_ep_h ep, + const ucp_unpacked_address_t *remote_address, + const ucp_lane_index_t *lanes2remote) +{ + ucp_lane_index_t lane, remote_lane; + const uct_device_addr_t *dev_addr; + const uct_ep_addr_t *ep_addr; + ucs_status_t status; + + ucs_trace("ep %p: connect local transports", ep); + + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (!ucp_ep_is_lane_p2p(ep, lane)) { + continue; + } + + remote_lane = (lanes2remote == NULL) ? lane : lanes2remote[lane]; + + status = ucp_wireup_find_remote_p2p_addr(ep, remote_lane, remote_address, + &ep_addr, &dev_addr); + if (status != UCS_OK) { + ucs_error("ep %p: no remote ep address for lane[%d]->remote_lane[%d]", + ep, lane, remote_lane); + return status; + } + + status = uct_ep_connect_to_ep(ep->uct_eps[lane], dev_addr, ep_addr); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +void ucp_wireup_remote_connected(ucp_ep_h ep) +{ + ucp_lane_index_t lane; + + if (ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED) { + return; + } + + ucs_trace("ep %p: remote connected", ep); + ep->flags |= UCP_EP_FLAG_REMOTE_CONNECTED; + + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (ucp_ep_is_lane_p2p(ep, lane)) { + ucs_assert(ucp_wireup_ep_test(ep->uct_eps[lane])); + } + if (ucp_wireup_ep_test(ep->uct_eps[lane])) { + ucp_wireup_ep_remote_connected(ep->uct_eps[lane]); + } + } + + ucs_assert(ep->flags & UCP_EP_FLAG_DEST_EP); +} + + +static ucs_status_t +ucp_wireup_init_lanes_by_request(ucp_worker_h worker, ucp_ep_h ep, + unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_address, + unsigned *addr_indices) +{ + ucs_status_t status = ucp_wireup_init_lanes(ep, ep_init_flags, UINT64_MAX, + remote_address, addr_indices); + if (status == UCS_OK) { + return UCS_OK; + } + + ucp_worker_set_ep_failed(worker, ep, NULL, UCP_NULL_LANE, status); + return status; +} + + +static UCS_F_NOINLINE void +ucp_wireup_process_pre_request(ucp_worker_h worker, const ucp_wireup_msg_t *msg, + const ucp_unpacked_address_t *remote_address) +{ + unsigned ep_init_flags = UCP_EP_INIT_CREATE_AM_LANE; + unsigned addr_indices[UCP_MAX_LANES]; + ucs_status_t status; + ucp_ep_h ep; + + ucs_assert(msg->type == UCP_WIREUP_MSG_PRE_REQUEST); + ucs_assert(msg->dest_ep_ptr != 0); + ucs_trace("got wireup pre_request from 0x%"PRIx64" src_ep 0x%lx dst_ep 0x%lx conn_sn %d", + remote_address->uuid, msg->src_ep_ptr, msg->dest_ep_ptr, msg->conn_sn); + + /* wireup pre_request for a specific ep */ + ep = ucp_worker_get_ep_by_ptr(worker, msg->dest_ep_ptr); + ucs_assert(ep->flags & UCP_EP_FLAG_SOCKADDR_PARTIAL_ADDR); + + ucp_ep_update_dest_ep_ptr(ep, msg->src_ep_ptr); + ucp_ep_flush_state_reset(ep); + + if (ucp_ep_config(ep)->key.err_mode == UCP_ERR_HANDLING_MODE_PEER) { + ep_init_flags |= UCP_EP_INIT_ERR_MODE_PEER_FAILURE; + } + + /* initialize transport endpoints */ + status = ucp_wireup_init_lanes_by_request(worker, ep, ep_init_flags, + remote_address, addr_indices); + if (status != UCS_OK) { + return; + } + + status = ucp_wireup_send_request(ep); + if (status != UCS_OK) { + ucp_ep_cleanup_lanes(ep); + } +} + +static UCS_F_NOINLINE void +ucp_wireup_process_request(ucp_worker_h worker, const ucp_wireup_msg_t *msg, + const ucp_unpacked_address_t *remote_address) +{ + uint64_t remote_uuid = remote_address->uuid; + uint64_t tl_bitmap = 0; + int send_reply = 0; + unsigned ep_init_flags = 0; + ucp_rsc_index_t lanes2remote[UCP_MAX_LANES]; + unsigned addr_indices[UCP_MAX_LANES]; + ucs_status_t status; + ucp_ep_flags_t listener_flag; + ucp_ep_h ep; + + ucs_assert(msg->type == UCP_WIREUP_MSG_REQUEST); + ucs_trace("got wireup request from 0x%"PRIx64" src_ep 0x%lx dst_ep 0x%lx conn_sn %d", + remote_address->uuid, msg->src_ep_ptr, msg->dest_ep_ptr, msg->conn_sn); + + if (msg->dest_ep_ptr != 0) { + /* wireup request for a specific ep */ + ep = ucp_worker_get_ep_by_ptr(worker, msg->dest_ep_ptr); + ucp_ep_update_dest_ep_ptr(ep, msg->src_ep_ptr); + if (!(ep->flags & UCP_EP_FLAG_LISTENER)) { + /* Reset flush state only if it's not a client-server wireup on + * server side with long address exchange when listener (united with + * flush state) should be valid until user's callback invoking */ + ucp_ep_flush_state_reset(ep); + } + ep_init_flags |= UCP_EP_INIT_CREATE_AM_LANE; + } else { + ep = ucp_ep_match_retrieve_exp(&worker->ep_match_ctx, remote_uuid, + msg->conn_sn ^ (remote_uuid == worker->uuid)); + if (ep == NULL) { + /* Create a new endpoint if does not exist */ + status = ucp_ep_new(worker, remote_address->name, "remote-request", + &ep); + if (status != UCS_OK) { + return; + } + + /* add internal endpoint to hash */ + ep->conn_sn = msg->conn_sn; + ucp_ep_match_insert_unexp(&worker->ep_match_ctx, remote_uuid, ep); + } else { + ucp_ep_flush_state_reset(ep); + } + + ucp_ep_update_dest_ep_ptr(ep, msg->src_ep_ptr); + + /* + * If the current endpoint already sent a connection request, we have a + * "simultaneous connect" situation. In this case, only one of the endpoints + * (instead of both) should respect the connect request, otherwise they + * will end up being connected to "internal" endpoints on the remote side + * instead of each other. We use the uniqueness of worker uuid to decide + * which connect request should be ignored. + */ + if ((ep->flags & UCP_EP_FLAG_CONNECT_REQ_QUEUED) && (remote_uuid > worker->uuid)) { + ucs_trace("ep %p: ignoring simultaneous connect request", ep); + ep->flags |= UCP_EP_FLAG_CONNECT_REQ_IGNORED; + return; + } + } + + if (ep->flags & UCP_EP_FLAG_LISTENER) { + /* If this is an ep on a listener (server) that received a partial + * worker address from the client, then the following lanes initialization + * will be done after an aux lane was already created on this ep. + * Therefore, remove the existing aux endpoint since will need to create + * new lanes now */ + ucp_ep_cleanup_lanes(ep); + } + + if (msg->err_mode == UCP_ERR_HANDLING_MODE_PEER) { + ep_init_flags |= UCP_EP_INIT_ERR_MODE_PEER_FAILURE; + } + + /* Initialize lanes (possible destroy existing lanes) */ + status = ucp_wireup_init_lanes_by_request(worker, ep, ep_init_flags, + remote_address, addr_indices); + if (status != UCS_OK) { + return; + } + + ucp_wireup_match_p2p_lanes(ep, remote_address, addr_indices, lanes2remote); + + /* Send a reply if remote side does not have ep_ptr (active-active flow) or + * there are p2p lanes (client-server flow) + */ + send_reply = (msg->dest_ep_ptr == 0) || ucp_ep_config(ep)->p2p_lanes; + + /* Connect p2p addresses to remote endpoint */ + if (!(ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED)) { + status = ucp_wireup_connect_local(ep, remote_address, lanes2remote); + if (status != UCS_OK) { + return; + } + + tl_bitmap = ucp_wireup_get_ep_tl_bitmap(ep, + ucp_ep_config(ep)->p2p_lanes); + ep->flags |= UCP_EP_FLAG_LOCAL_CONNECTED; + + ucs_assert(send_reply); + } + + /* mark the endpoint as connected to remote */ + if (!ucp_ep_config(ep)->p2p_lanes) { + ucp_wireup_remote_connected(ep); + } + + if (send_reply) { + + listener_flag = ep->flags & UCP_EP_FLAG_LISTENER; + /* Remove this flag at this point if it's set + * (so that address packing would be correct) */ + ep->flags &= ~UCP_EP_FLAG_LISTENER; + + ucs_trace("ep %p: sending wireup reply", ep); + status = ucp_wireup_msg_send(ep, UCP_WIREUP_MSG_REPLY, tl_bitmap, + lanes2remote); + if (status != UCS_OK) { + return; + } + + /* Restore saved flag value */ + ep->flags |= listener_flag; + } else { + /* if in client-server flow, schedule invoking the user's callback + * (if server is connected) from the main thread */ + if (ucs_test_all_flags(ep->flags, + (UCP_EP_FLAG_LISTENER | UCP_EP_FLAG_LOCAL_CONNECTED))) { + ucp_listener_schedule_accept_cb(ep); + } + } +} + +static unsigned ucp_wireup_send_msg_ack(void *arg) +{ + ucp_ep_h ep = (ucp_ep_h)arg; + ucp_rsc_index_t rsc_tli[UCP_MAX_LANES]; + ucs_status_t status; + + /* Send ACK without any address, we've already sent it as part of the request */ + ucs_trace("ep %p: sending wireup ack", ep); + + memset(rsc_tli, UCP_NULL_RESOURCE, sizeof(rsc_tli)); + status = ucp_wireup_msg_send(ep, UCP_WIREUP_MSG_ACK, 0, rsc_tli); + return (status == UCS_OK); +} + +int ucp_wireup_msg_ack_cb_pred(const ucs_callbackq_elem_t *elem, void *arg) +{ + return ((elem->arg == arg) && (elem->cb == ucp_wireup_send_msg_ack)); +} + +static UCS_F_NOINLINE void +ucp_wireup_process_reply(ucp_worker_h worker, const ucp_wireup_msg_t *msg, + const ucp_unpacked_address_t *remote_address) +{ + uct_worker_cb_id_t cb_id = UCS_CALLBACKQ_ID_NULL; + ucs_status_t status; + ucp_ep_h ep; + int ack; + + ep = ucp_worker_get_ep_by_ptr(worker, msg->dest_ep_ptr); + + ucs_assert(msg->type == UCP_WIREUP_MSG_REPLY); + ucs_assert((!(ep->flags & UCP_EP_FLAG_LISTENER))); + ucs_trace("ep %p: got wireup reply src_ep 0x%lx dst_ep 0x%lx sn %d", ep, + msg->src_ep_ptr, msg->dest_ep_ptr, msg->conn_sn); + + ucp_ep_match_remove_ep(&worker->ep_match_ctx, ep); + ucp_ep_update_dest_ep_ptr(ep, msg->src_ep_ptr); + ucp_ep_flush_state_reset(ep); + + /* Connect p2p addresses to remote endpoint */ + if (!(ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED)) { + + /* + * In the wireup reply message, the lane indexes specify which + * **receiver** ep lane should be connected to a given ep address. So we + * don't pass 'lanes2remote' mapping, and use local lanes directly. + */ + status = ucp_wireup_connect_local(ep, remote_address, NULL); + if (status != UCS_OK) { + return; + } + + ep->flags |= UCP_EP_FLAG_LOCAL_CONNECTED; + ack = 1; + } else { + ack = 0; + } + + ucp_wireup_remote_connected(ep); + + if (ack) { + /* Send `UCP_WIREUP_MSG_ACK` from progress function + * to avoid calling UCT routines from an async thread */ + uct_worker_progress_register_safe(worker->uct, + ucp_wireup_send_msg_ack, ep, + UCS_CALLBACKQ_FLAG_ONESHOT, &cb_id); + } +} + +static UCS_F_NOINLINE +void ucp_wireup_process_ack(ucp_worker_h worker, const ucp_wireup_msg_t *msg) +{ + ucp_ep_h ep; + + ep = ucp_worker_get_ep_by_ptr(worker, msg->dest_ep_ptr); + + ucs_assert(msg->type == UCP_WIREUP_MSG_ACK); + ucs_trace("ep %p: got wireup ack", ep); + + ucs_assert(ep->flags & UCP_EP_FLAG_DEST_EP); + ucs_assert(ep->flags & UCP_EP_FLAG_CONNECT_REP_SENT); + ucs_assert(ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED); + + ucp_wireup_remote_connected(ep); + + /* if this ack is received as part of the client-server flow, when handling + * a large worker address from the client, invoke the cached user callback + * from the main thread */ + if (ep->flags & UCP_EP_FLAG_LISTENER) { + ucp_listener_schedule_accept_cb(ep); + } +} + +static ucs_status_t ucp_wireup_msg_handler(void *arg, void *data, + size_t length, unsigned flags) +{ + ucp_worker_h worker = arg; + ucp_wireup_msg_t *msg = data; + ucp_unpacked_address_t remote_address; + ucs_status_t status; + + UCS_ASYNC_BLOCK(&worker->async); + + status = ucp_address_unpack(worker, msg + 1, UINT64_MAX, &remote_address); + if (status != UCS_OK) { + ucs_error("failed to unpack address: %s", ucs_status_string(status)); + goto out; + } + + if (msg->type == UCP_WIREUP_MSG_ACK) { + ucs_assert(remote_address.address_count == 0); + ucp_wireup_process_ack(worker, msg); + } else if (msg->type == UCP_WIREUP_MSG_PRE_REQUEST) { + ucp_wireup_process_pre_request(worker, msg, &remote_address); + } else if (msg->type == UCP_WIREUP_MSG_REQUEST) { + ucp_wireup_process_request(worker, msg, &remote_address); + } else if (msg->type == UCP_WIREUP_MSG_REPLY) { + ucp_wireup_process_reply(worker, msg, &remote_address); + } else { + ucs_bug("invalid wireup message"); + } + + ucs_free(remote_address.address_list); + +out: + UCS_ASYNC_UNBLOCK(&worker->async); + return UCS_OK; +} + +void ucp_wireup_assign_lane(ucp_ep_h ep, ucp_lane_index_t lane, uct_ep_h uct_ep, + const char *info) +{ + /* If ep already exists, it's a wireup proxy, and we need to update its + * next_ep instead of replacing it. + */ + if (ep->uct_eps[lane] == NULL) { + ucs_trace("ep %p: assign uct_ep[%d]=%p%s", ep, lane, uct_ep, info); + ep->uct_eps[lane] = uct_ep; + } else { + ucs_assert(ucp_wireup_ep_test(ep->uct_eps[lane])); + ucs_trace("ep %p: wireup uct_ep[%d]=%p next set to %p%s", ep, lane, + ep->uct_eps[lane], uct_ep, info); + ucp_wireup_ep_set_next_ep(ep->uct_eps[lane], uct_ep); + ucp_wireup_ep_remote_connected(ep->uct_eps[lane]); + } +} + +static uct_ep_h ucp_wireup_extract_lane(ucp_ep_h ep, ucp_lane_index_t lane) +{ + uct_ep_h uct_ep = ep->uct_eps[lane]; + + if ((uct_ep != NULL) && ucp_wireup_ep_test(uct_ep)) { + return ucp_wireup_ep_extract_next_ep(uct_ep); + } else { + ep->uct_eps[lane] = NULL; + return uct_ep; + } +} + +ucs_status_t +ucp_wireup_connect_lane(ucp_ep_h ep, unsigned ep_init_flags, + ucp_lane_index_t lane, + const ucp_unpacked_address_t *remote_address, + unsigned addr_index) +{ + ucp_worker_h worker = ep->worker; + int connect_aux; + ucp_lane_index_t proxy_lane; + ucp_rsc_index_t rsc_index; + ucp_worker_iface_t *wiface; + uct_ep_params_t uct_ep_params; + uct_ep_h uct_ep; + ucs_status_t status; + + ucs_trace("ep %p: connect lane[%d]", ep, lane); + + ucs_assert(lane != ucp_ep_get_cm_lane(ep)); + + ucs_assert_always(remote_address != NULL); + ucs_assert_always(remote_address->address_list != NULL); + ucs_assert_always(addr_index <= remote_address->address_count); + + proxy_lane = ucp_ep_get_proxy_lane(ep, lane); + rsc_index = ucp_ep_get_rsc_index(ep, lane); + wiface = ucp_worker_iface(worker, rsc_index); + + /* + * if the selected transport can be connected directly to the remote + * interface, just create a connected UCT endpoint. + */ + if ((wiface->attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) && + ((ep->uct_eps[lane] == NULL) || ucp_wireup_ep_test(ep->uct_eps[lane]))) + { + if ((proxy_lane == UCP_NULL_LANE) || (proxy_lane == lane)) { + /* create an endpoint connected to the remote interface */ + ucs_trace("ep %p: connect uct_ep[%d] to addr[%d]", ep, lane, + addr_index); + uct_ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE | + UCT_EP_PARAM_FIELD_DEV_ADDR | + UCT_EP_PARAM_FIELD_IFACE_ADDR; + uct_ep_params.iface = wiface->iface; + uct_ep_params.dev_addr = remote_address->address_list[addr_index].dev_addr; + uct_ep_params.iface_addr = remote_address->address_list[addr_index].iface_addr; + status = uct_ep_create(&uct_ep_params, &uct_ep); + if (status != UCS_OK) { + /* coverity[leaked_storage] */ + return status; + } + + ucp_wireup_assign_lane(ep, lane, uct_ep, ""); + } + + ucp_worker_iface_progress_ep(wiface); + return UCS_OK; + } + + /* + * create a wireup endpoint which will start connection establishment + * protocol using an auxiliary transport. + */ + if (wiface->attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { + + /* For now, p2p transports have no reason to have proxy */ + ucs_assert_always(proxy_lane == UCP_NULL_LANE); + + if (ep->uct_eps[lane] == NULL) { + status = ucp_wireup_ep_create(ep, &uct_ep); + if (status != UCS_OK) { + /* coverity[leaked_storage] */ + return status; + } + + ucs_trace("ep %p: assign uct_ep[%d]=%p wireup", ep, lane, uct_ep); + ep->uct_eps[lane] = uct_ep; + } else { + uct_ep = ep->uct_eps[lane]; + ucs_assert(ucp_wireup_ep_test(uct_ep)); + } + + if (!(ep_init_flags & (UCP_EP_INIT_CM_WIREUP_CLIENT))) { + ucs_trace("ep %p: connect uct_ep[%d]=%p to addr[%d] wireup", ep, + lane, uct_ep, addr_index); + connect_aux = !(ep_init_flags & (UCP_EP_INIT_CM_WIREUP_CLIENT | + UCP_EP_INIT_CM_WIREUP_SERVER)) && + (lane == ucp_ep_get_wireup_msg_lane(ep)); + status = ucp_wireup_ep_connect(ep->uct_eps[lane], ep_init_flags, + rsc_index, connect_aux, + remote_address); + if (status != UCS_OK) { + return status; + } + } + + ucp_worker_iface_progress_ep(wiface); + + return UCS_OK; + } + + return UCS_ERR_UNREACHABLE; +} + +ucs_status_t ucp_wireup_resolve_proxy_lanes(ucp_ep_h ep) +{ + ucp_lane_index_t lane, proxy_lane; + uct_iface_attr_t *iface_attr; + uct_ep_h uct_ep, signaling_ep; + ucs_status_t status; + + /* point proxy endpoints */ + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + proxy_lane = ucp_ep_get_proxy_lane(ep, lane); + if (proxy_lane == UCP_NULL_LANE) { + continue; + } + + iface_attr = ucp_worker_iface_get_attr(ep->worker, + ucp_ep_get_rsc_index(ep, lane)); + + if (iface_attr->cap.flags & UCT_IFACE_FLAG_AM_SHORT) { + ucs_assert_always(iface_attr->cap.am.max_short <= + iface_attr->cap.am.max_bcopy); + } + + /* Create a signaling ep to the proxy lane */ + if (proxy_lane == lane) { + /* If proxy is to the same lane, temporarily remove the existing + * UCT endpoint in there, so it could be assigned to the signaling + * proxy ep. This can also be an endpoint contained inside a wireup + * proxy, so ucp_wireup_extract_lane() handles both cases. + */ + uct_ep = ucp_wireup_extract_lane(ep, proxy_lane); + ucs_assert_always(uct_ep != NULL); + status = ucp_signaling_ep_create(ep, uct_ep, 1, &signaling_ep); + if (status != UCS_OK) { + /* coverity[leaked_storage] */ + uct_ep_destroy(uct_ep); + return status; + } + } else { + status = ucp_signaling_ep_create(ep, ep->uct_eps[proxy_lane], 0, + &signaling_ep); + if (status != UCS_OK) { + /* coverity[leaked_storage] */ + return status; + } + } + + ucs_trace("ep %p: lane[%d]=%p proxy_lane=%d", ep, lane, ep->uct_eps[lane], + proxy_lane); + + ucp_wireup_assign_lane(ep, lane, signaling_ep, " (signaling proxy)"); + } + + return UCS_OK; +} + +static void ucp_wireup_print_config(ucp_context_h context, + const ucp_ep_config_key_t *key, + const char *title, + const unsigned *addr_indices, + ucs_log_level_t log_level) +{ + char lane_info[128] = {0}; + ucp_lane_index_t lane; + + if (!ucs_log_is_enabled(log_level)) { + return; + } + + ucs_log(log_level, "%s: am_lane %d wireup_lane %d reachable_mds 0x%lx", + title, key->am_lane, key->wireup_lane, + key->reachable_md_map); + + for (lane = 0; lane < key->num_lanes; ++lane) { + ucp_ep_config_lane_info_str(context, key, addr_indices, lane, + UCP_NULL_RESOURCE, lane_info, + sizeof(lane_info)); + ucs_log(log_level, "%s: %s", title, lane_info); + } +} + +int ucp_wireup_is_reachable(ucp_worker_h worker, ucp_rsc_index_t rsc_index, + const ucp_address_entry_t *ae) +{ + ucp_context_h context = worker->context; + ucp_worker_iface_t *wiface = ucp_worker_iface(worker, rsc_index); + + return (context->tl_rscs[rsc_index].tl_name_csum == ae->tl_name_csum) && + uct_iface_is_reachable(wiface->iface, ae->dev_addr, ae->iface_addr); +} + +static void +ucp_wireup_get_reachable_mds(ucp_worker_h worker, + const ucp_unpacked_address_t *remote_address, + const ucp_ep_config_key_t *prev_key, + ucp_ep_config_key_t *key) +{ + ucp_context_h context = worker->context; + ucp_rsc_index_t ae_cmpts[UCP_MAX_MDS]; /* component index for each address entry */ + const ucp_address_entry_t *ae; + ucp_rsc_index_t cmpt_index; + ucp_rsc_index_t rsc_index; + ucp_md_index_t dst_md_index; + ucp_md_map_t ae_dst_md_map, dst_md_map; + unsigned num_dst_mds; + + ae_dst_md_map = 0; + ucs_for_each_bit(rsc_index, context->tl_bitmap) { + ucp_unpacked_address_for_each(ae, remote_address) { + if (ucp_wireup_is_reachable(worker, rsc_index, ae)) { + ae_dst_md_map |= UCS_BIT(ae->md_index); + dst_md_index = context->tl_rscs[rsc_index].md_index; + ae_cmpts[ae->md_index] = context->tl_mds[dst_md_index].cmpt_index; + } + } + } + + /* merge with previous configuration */ + dst_md_map = ae_dst_md_map | prev_key->reachable_md_map; + num_dst_mds = 0; + ucs_for_each_bit(dst_md_index, dst_md_map) { + cmpt_index = UCP_NULL_RESOURCE; + /* remote md is reachable by the provided address */ + if (UCS_BIT(dst_md_index) & ae_dst_md_map) { + cmpt_index = ae_cmpts[dst_md_index]; + } + /* remote md is reachable by previous ep configuration */ + if (UCS_BIT(dst_md_index) & prev_key->reachable_md_map) { + cmpt_index = ucp_ep_config_get_dst_md_cmpt(prev_key, dst_md_index); + if (UCS_BIT(dst_md_index) & ae_dst_md_map) { + /* we expect previous configuration will not conflict with the + * new one + */ + ucs_assert_always(cmpt_index == ae_cmpts[dst_md_index]); + } + } + ucs_assert_always(cmpt_index != UCP_NULL_RESOURCE); + key->dst_md_cmpts[num_dst_mds++] = cmpt_index; + } + ucs_assert(num_dst_mds == ucs_popcount(dst_md_map)); + + key->reachable_md_map = dst_md_map; +} + +ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned ep_init_flags, + uint64_t local_tl_bitmap, + const ucp_unpacked_address_t *remote_address, + unsigned *addr_indices) +{ + ucp_worker_h worker = ep->worker; + uint64_t tl_bitmap = local_tl_bitmap & worker->context->tl_bitmap; + ucp_ep_config_key_t key; + ucp_ep_cfg_index_t new_cfg_index; + ucp_lane_index_t lane; + ucs_status_t status; + char str[32]; + ucp_wireup_ep_t *cm_wireup_ep; + + ucs_assert(tl_bitmap != 0); + + ucs_trace("ep %p: initialize lanes", ep); + + ucp_ep_config_key_reset(&key); + ucp_ep_config_key_set_err_mode(&key, ep_init_flags); + + status = ucp_wireup_select_lanes(ep, ep_init_flags, tl_bitmap, + remote_address, addr_indices, &key); + if (status != UCS_OK) { + return status; + } + + /* Get all reachable MDs from full remote address list */ + key.dst_md_cmpts = ucs_alloca(sizeof(*key.dst_md_cmpts) * UCP_MAX_MDS); + ucp_wireup_get_reachable_mds(worker, remote_address, &ucp_ep_config(ep)->key, + &key); + + /* Load new configuration */ + status = ucp_worker_get_ep_config(worker, &key, 1, &new_cfg_index); + if (status != UCS_OK) { + return status; + } + + if (ep->cfg_index == new_cfg_index) { + return UCS_OK; /* No change */ + } + + if ((ep->cfg_index != 0) && !ucp_ep_is_sockaddr_stub(ep)) { + /* + * TODO handle a case where we have to change lanes and reconfigure the ep: + * + * - if we already have uct ep connected to an address - move it to the new lane index + * - if we don't yet have connection to an address - create it + * - if an existing lane is not connected anymore - delete it (possibly) + * - if the configuration has changed - replay all pending operations on all lanes - + * need that every pending callback would return, in case of failure, the number + * of lane it wants to be queued on. + */ + ucs_debug("cannot reconfigure ep %p from [%d] to [%d]", ep, ep->cfg_index, + new_cfg_index); + ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, "old", + NULL, UCS_LOG_LEVEL_ERROR); + ucp_wireup_print_config(worker->context, &key, "new", NULL, UCS_LOG_LEVEL_ERROR); + ucs_fatal("endpoint reconfiguration not supported yet"); + } + + cm_wireup_ep = ucp_ep_get_cm_wireup_ep(ep); + ep->cfg_index = new_cfg_index; + ep->am_lane = key.am_lane; + + snprintf(str, sizeof(str), "ep %p", ep); + ucp_wireup_print_config(worker->context, &ucp_ep_config(ep)->key, str, + addr_indices, UCS_LOG_LEVEL_DEBUG); + + /* establish connections on all underlying endpoints */ + for (lane = 0; lane < ucp_ep_num_lanes(ep); ++lane) { + if (ucp_ep_get_cm_lane(ep) == lane) { + /* restore the cm lane after reconfiguration */ + ep->uct_eps[lane] = &cm_wireup_ep->super.super; + continue; + } + + status = ucp_wireup_connect_lane(ep, ep_init_flags, lane, + remote_address, addr_indices[lane]); + if (status != UCS_OK) { + return status; + } + } + + status = ucp_wireup_resolve_proxy_lanes(ep); + if (status != UCS_OK) { + return status; + } + + /* If we don't have a p2p transport, we're connected */ + if (!ucp_ep_config(ep)->p2p_lanes) { + ep->flags |= UCP_EP_FLAG_LOCAL_CONNECTED; + } + + return UCS_OK; +} + +ucs_status_t ucp_wireup_send_request(ucp_ep_h ep) +{ + ucp_rsc_index_t rsc_index; + ucs_status_t status; + uint64_t tl_bitmap; + + tl_bitmap = ucp_wireup_get_ep_tl_bitmap(ep, UCS_MASK(ucp_ep_num_lanes(ep))); + + /* TODO make sure such lane would exist */ + rsc_index = ucp_wireup_ep_get_aux_rsc_index( + ep->uct_eps[ucp_ep_get_wireup_msg_lane(ep)]); + if (rsc_index != UCP_NULL_RESOURCE) { + tl_bitmap |= UCS_BIT(rsc_index); + } + + ucs_debug("ep %p: send wireup request (flags=0x%x)", ep, ep->flags); + status = ucp_wireup_msg_send(ep, UCP_WIREUP_MSG_REQUEST, tl_bitmap, NULL); + + ep->flags |= UCP_EP_FLAG_CONNECT_REQ_QUEUED; + + return status; +} + +static void ucp_wireup_connect_remote_purge_cb(uct_pending_req_t *self, void *arg) +{ + ucp_request_t *req = ucs_container_of(self, ucp_request_t, send.uct); + ucs_queue_head_t *queue = arg; + + ucs_trace_req("ep %p: extracted request %p from pending queue", req->send.ep, + req); + ucs_queue_push(queue, (ucs_queue_elem_t*)&req->send.uct.priv); +} + +ucs_status_t ucp_wireup_send_pre_request(ucp_ep_h ep) +{ + ucp_rsc_index_t rsc_tli[UCP_MAX_LANES]; + uint64_t tl_bitmap = UINT64_MAX; /* pack full worker address */ + ucs_status_t status; + + ucs_assert(ep->flags & UCP_EP_FLAG_LISTENER); + ucs_assert(!(ep->flags & UCP_EP_FLAG_CONNECT_PRE_REQ_QUEUED)); + memset(rsc_tli, UCP_NULL_RESOURCE, sizeof(rsc_tli)); + + ucs_debug("ep %p: send wireup pre-request (flags=0x%x)", ep, ep->flags); + status = ucp_wireup_msg_send(ep, UCP_WIREUP_MSG_PRE_REQUEST, tl_bitmap, rsc_tli); + + ep->flags |= UCP_EP_FLAG_CONNECT_PRE_REQ_QUEUED; + return status; +} + +ucs_status_t ucp_wireup_connect_remote(ucp_ep_h ep, ucp_lane_index_t lane) +{ + ucs_queue_head_t tmp_q; + ucs_status_t status; + ucp_request_t *req; + uct_ep_h uct_ep; + + ucs_trace("ep %p: connect lane %d to remote peer", ep, lane); + + ucs_assert(lane != UCP_NULL_LANE); + + UCS_ASYNC_BLOCK(&ep->worker->async); + + /* checking again, with lock held, if already connected or connection is + * in progress */ + if ((ep->flags & UCP_EP_FLAG_DEST_EP) || + ucp_wireup_ep_test(ep->uct_eps[lane])) { + status = UCS_OK; + goto out_unlock; + } + + if (ucp_proxy_ep_test(ep->uct_eps[lane])) { + /* signaling ep is not needed now since we will send wireup request + * with signaling flag + */ + uct_ep = ucp_proxy_ep_extract(ep->uct_eps[lane]); + uct_ep_destroy(ep->uct_eps[lane]); + } else { + uct_ep = ep->uct_eps[lane]; + } + + ucs_assert(!(ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED)); + + ucs_trace("ep %p: connect lane %d to remote peer with wireup ep", ep, lane); + + /* make ep->uct_eps[lane] a stub */ + status = ucp_wireup_ep_create(ep, &ep->uct_eps[lane]); + if (status != UCS_OK) { + goto err; + } + + /* Extract all pending requests from the transport endpoint, otherwise they + * will prevent the wireup message from being sent (because those requests + * could not be progressed any more after switching to wireup proxy). + */ + ucs_queue_head_init(&tmp_q); + uct_ep_pending_purge(uct_ep, ucp_wireup_connect_remote_purge_cb, &tmp_q); + + /* the wireup ep should use the existing [am_lane] as next_ep */ + ucp_wireup_ep_set_next_ep(ep->uct_eps[lane], uct_ep); + + if (!(ep->flags & UCP_EP_FLAG_CONNECT_REQ_QUEUED)) { + status = ucp_wireup_send_request(ep); + if (status != UCS_OK) { + goto err_destroy_wireup_ep; + } + } + + ucs_queue_for_each_extract(req, &tmp_q, send.uct.priv, 1) { + ucs_trace_req("ep %p: requeue request %p after wireup request", + req->send.ep, req); + status = uct_ep_pending_add(ep->uct_eps[lane], &req->send.uct, + (req->send.uct.func == ucp_wireup_msg_progress) || + (req->send.uct.func == ucp_wireup_ep_progress_pending) ? + UCT_CB_FLAG_ASYNC : 0); + if (status != UCS_OK) { + ucs_fatal("wireup proxy function must always return UCS_OK"); + } + } + + goto out_unlock; + +err_destroy_wireup_ep: + uct_ep_destroy(ep->uct_eps[lane]); +err: + ep->uct_eps[lane] = uct_ep; /* restore am lane */ +out_unlock: + UCS_ASYNC_UNBLOCK(&ep->worker->async); + return status; +} + +static void ucp_wireup_msg_dump(ucp_worker_h worker, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max) +{ + ucp_context_h context = worker->context; + const ucp_wireup_msg_t *msg = data; + ucp_unpacked_address_t unpacked_address; + const ucp_address_entry_t *ae; + ucp_tl_resource_desc_t *rsc; + unsigned ep_addr_index; + ucs_status_t status; + char *p, *end; + ucp_rsc_index_t tl; + + status = ucp_address_unpack(worker, msg + 1, ~UCP_ADDRESS_PACK_FLAG_TRACE, + &unpacked_address); + if (status != UCS_OK) { + strncpy(unpacked_address.name, "", UCP_WORKER_NAME_MAX); + unpacked_address.uuid = 0; + unpacked_address.address_count = 0; + unpacked_address.address_list = NULL; + } + + p = buffer; + end = buffer + max; + + snprintf(p, end - p, + "WIREUP %s [%s uuid 0x%"PRIx64" src_ep 0x%lx dst_ep 0x%lx conn_sn %d]", + ucp_wireup_msg_str(msg->type), unpacked_address.name, + unpacked_address.uuid, msg->src_ep_ptr, msg->dest_ep_ptr, + msg->conn_sn); + p += strlen(p); + + if (unpacked_address.address_list == NULL) { + return; /* No addresses were unpacked */ + } + + ucp_unpacked_address_for_each(ae, &unpacked_address) { + ucs_for_each_bit(tl, context->tl_bitmap) { + rsc = &context->tl_rscs[tl]; + if (ae->tl_name_csum == rsc->tl_name_csum) { + snprintf(p, end - p, " "UCT_TL_RESOURCE_DESC_FMT, + UCT_TL_RESOURCE_DESC_ARG(&rsc->tl_rsc)); + p += strlen(p); + break; + } + } + snprintf(p, end - p, "/md[%d]", ae->md_index); + p += strlen(p); + + for (ep_addr_index = 0; ep_addr_index < ae->num_ep_addrs; + ++ep_addr_index) { + snprintf(p, end - p, "/lane[%d]", ae->ep_addrs[ep_addr_index].lane); + p += strlen(p); + } + } + + ucs_free(unpacked_address.address_list); +} + +int ucp_worker_iface_is_tl_p2p(const uct_iface_attr_t *iface_attr) +{ + uint64_t flags = iface_attr->cap.flags; + + return (flags & UCT_IFACE_FLAG_CONNECT_TO_EP) && + !(flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE); +} + +static ucp_err_handling_mode_t +ucp_ep_params_err_handling_mode(const ucp_ep_params_t *params) +{ + return (params->field_mask & UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE) ? + params->err_mode : UCP_ERR_HANDLING_MODE_NONE; +} + +unsigned ucp_ep_init_flags(const ucp_worker_h worker, + const ucp_ep_params_t *params) +{ + unsigned flags = ucp_cm_ep_init_flags(worker, params); + + if (!ucp_worker_sockaddr_is_cm_proto(worker) && + (params->field_mask & UCP_EP_PARAM_FIELD_SOCK_ADDR)) { + flags |= UCP_EP_INIT_CREATE_AM_LANE; + } + + if (ucp_ep_params_err_handling_mode(params) == UCP_ERR_HANDLING_MODE_PEER) { + flags |= UCP_EP_INIT_ERR_MODE_PEER_FAILURE; + } + + return flags; +} + +UCP_DEFINE_AM(UINT64_MAX, UCP_AM_ID_WIREUP, ucp_wireup_msg_handler, + ucp_wireup_msg_dump, UCT_CB_FLAG_ASYNC); diff --git a/src/ucp/wireup/wireup.h b/src/ucp/wireup/wireup.h new file mode 100644 index 0000000..db34e70 --- /dev/null +++ b/src/ucp/wireup/wireup.h @@ -0,0 +1,154 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCP_WIREUP_H_ +#define UCP_WIREUP_H_ + +#include +#include +#include +#include +#include +#include + + +/** + * Wireup message types + */ +enum { + UCP_WIREUP_MSG_PRE_REQUEST, + UCP_WIREUP_MSG_REQUEST, + UCP_WIREUP_MSG_REPLY, + UCP_WIREUP_MSG_ACK, + UCP_WIREUP_MSG_LAST +}; + + +/** + * Criteria for transport selection. + */ +typedef struct { + const char *title; /* Name of the criteria for debugging */ + uint64_t local_md_flags; /* Required local MD flags */ + uint64_t remote_md_flags; /* Required remote MD flags */ + uint64_t local_iface_flags; /* Required local interface flags */ + uint64_t remote_iface_flags;/* Required remote interface flags */ + + /** + * Calculates score of a potential transport. + * + * @param [in] context UCP context. + * @param [in] md_attr Local MD attributes. + * @param [in] iface_attr Local interface attributes. + * @param [in] remote_info Remote peer attributes. + * + * @return Transport score, the higher the better. + */ + double (*calc_score)(ucp_context_h context, + const uct_md_attr_t *md_attr, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr); + uint8_t tl_rsc_flags; /* Flags that describe TL specifics */ + + ucp_tl_iface_atomic_flags_t local_atomic_flags; + ucp_tl_iface_atomic_flags_t remote_atomic_flags; +} ucp_wireup_criteria_t; + + +/** + * Packet structure for wireup requests. + */ +typedef struct ucp_wireup_msg { + uint8_t type; /* Message type */ + ucp_err_handling_mode_t err_mode; /* Peer error handling mode */ + ucp_ep_conn_sn_t conn_sn; /* Connection sequence number */ + uintptr_t src_ep_ptr; /* Endpoint of source */ + uintptr_t dest_ep_ptr; /* Endpoint of destination (0 - invalid) */ + /* packed addresses follow */ +} UCS_S_PACKED ucp_wireup_msg_t; + + +typedef struct { + double score; + unsigned addr_index; + ucp_rsc_index_t rsc_index; + uint8_t priority; +} ucp_wireup_select_info_t; + + +ucs_status_t ucp_wireup_send_request(ucp_ep_h ep); + +ucs_status_t ucp_wireup_send_pre_request(ucp_ep_h ep); + +ucs_status_t ucp_wireup_connect_remote(ucp_ep_h ep, ucp_lane_index_t lane); + +ucs_status_t +ucp_wireup_select_aux_transport(ucp_ep_h ep, unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_address, + ucp_wireup_select_info_t *select_info); + +ucs_status_t +ucp_wireup_select_sockaddr_transport(const ucp_context_h context, + const ucs_sock_addr_t *sockaddr, + ucp_rsc_index_t *rsc_index_p); + +double ucp_wireup_amo_score_func(ucp_context_h context, + const uct_md_attr_t *md_attr, + const uct_iface_attr_t *iface_attr, + const ucp_address_iface_attr_t *remote_iface_attr); + +ucs_status_t ucp_wireup_msg_progress(uct_pending_req_t *self); + +int ucp_wireup_msg_ack_cb_pred(const ucs_callbackq_elem_t *elem, void *arg); + +int ucp_wireup_is_reachable(ucp_worker_h worker, ucp_rsc_index_t rsc_index, + const ucp_address_entry_t *ae); + +ucs_status_t ucp_wireup_init_lanes(ucp_ep_h ep, unsigned ep_init_flags, + uint64_t local_tl_bitmap, + const ucp_unpacked_address_t *remote_address, + unsigned *addr_indices); + +ucs_status_t +ucp_wireup_select_lanes(ucp_ep_h ep, unsigned ep_init_flags, uint64_t tl_bitmap, + const ucp_unpacked_address_t *remote_address, + unsigned *addr_indices, ucp_ep_config_key_t *key); + +ucs_status_t ucp_signaling_ep_create(ucp_ep_h ucp_ep, uct_ep_h uct_ep, + int is_owner, uct_ep_h *signaling_ep); + +int ucp_worker_iface_is_tl_p2p(const uct_iface_attr_t *iface_attr); + +int ucp_wireup_is_rsc_self_or_shm(ucp_ep_h ep, ucp_rsc_index_t rsc_index); + +void ucp_wireup_assign_lane(ucp_ep_h ep, ucp_lane_index_t lane, uct_ep_h uct_ep, + const char *info); + +ucs_status_t +ucp_wireup_connect_lane(ucp_ep_h ep, unsigned ep_init_flags, + ucp_lane_index_t lane, + const ucp_unpacked_address_t *remote_address, + unsigned addr_index); + +ucs_status_t ucp_wireup_resolve_proxy_lanes(ucp_ep_h ep); + +void ucp_wireup_remote_connected(ucp_ep_h ep); + +static inline int ucp_worker_is_tl_p2p(ucp_worker_h worker, ucp_rsc_index_t rsc_index) +{ + return ucp_worker_iface_is_tl_p2p(ucp_worker_iface_get_attr(worker, + rsc_index)); + +} + +unsigned ucp_ep_init_flags(const ucp_worker_h worker, + const ucp_ep_params_t *params); + +ucs_status_t +ucp_wireup_connect_local(ucp_ep_h ep, + const ucp_unpacked_address_t *remote_address, + const ucp_lane_index_t *lanes2remote); +#endif diff --git a/src/ucp/wireup/wireup_cm.c b/src/ucp/wireup/wireup_cm.c new file mode 100644 index 0000000..1894bb7 --- /dev/null +++ b/src/ucp/wireup/wireup_cm.c @@ -0,0 +1,890 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wireup_cm.h" +#include +#include +#include +#include +#include +#include + + +unsigned +ucp_cm_ep_init_flags(const ucp_worker_h worker, const ucp_ep_params_t *params) +{ + if (!ucp_worker_sockaddr_is_cm_proto(worker)) { + return 0; + } + + if (params->field_mask & UCP_EP_PARAM_FIELD_SOCK_ADDR) { + return UCP_EP_INIT_CM_WIREUP_CLIENT; + } + + if (params->field_mask & UCP_EP_PARAM_FIELD_CONN_REQUEST) { + return UCP_EP_INIT_CM_WIREUP_SERVER; + } + + return 0; +} + +static ucs_status_t +ucp_cm_ep_client_initial_config_get(ucp_ep_h ucp_ep, const char *dev_name, + ucp_ep_config_key_t *key) +{ + ucp_worker_h worker = ucp_ep->worker; + uint64_t addr_pack_flags = UCP_ADDRESS_PACK_FLAG_DEVICE_ADDR | + UCP_ADDRESS_PACK_FLAG_IFACE_ADDR; + ucp_wireup_ep_t *wireup_ep = ucp_ep_get_cm_wireup_ep(ucp_ep); + uint64_t tl_bitmap = ucp_context_dev_tl_bitmap(worker->context, + dev_name); + void *ucp_addr; + size_t ucp_addr_size; + ucp_unpacked_address_t unpacked_addr; + unsigned addr_indices[UCP_MAX_RESOURCES]; + ucs_status_t status; + + ucs_assert_always(wireup_ep != NULL); + + /* Construct local dummy address for lanes selection taking an assumption + * that server has the transports which are the best from client's + * perspective. */ + status = ucp_address_pack(worker, NULL, tl_bitmap, addr_pack_flags, NULL, + &ucp_addr_size, &ucp_addr); + if (status != UCS_OK) { + goto out; + } + + status = ucp_address_unpack(worker, ucp_addr, addr_pack_flags, + &unpacked_addr); + if (status != UCS_OK) { + goto free_ucp_addr; + } + + ucs_assert(unpacked_addr.address_count <= UCP_MAX_RESOURCES); + ucp_ep_config_key_reset(key); + ucp_ep_config_key_set_err_mode(key, wireup_ep->ep_init_flags); + status = ucp_wireup_select_lanes(ucp_ep, wireup_ep->ep_init_flags, + tl_bitmap, &unpacked_addr, addr_indices, + key); + + ucs_free(unpacked_addr.address_list); +free_ucp_addr: + ucs_free(ucp_addr); +out: + return status; +} + +static void ucp_cm_priv_data_pack(ucp_wireup_sockaddr_data_t *sa_data, + ucp_ep_h ep, ucp_rsc_index_t dev_index, + const ucp_address_t *addr, size_t addr_size) +{ + ucs_assert((int)ucp_ep_config(ep)->key.err_mode <= UINT8_MAX); + ucs_assert(dev_index != UCP_NULL_RESOURCE); + + sa_data->ep_ptr = (uintptr_t)ep; + sa_data->err_mode = ucp_ep_config(ep)->key.err_mode; + sa_data->addr_mode = UCP_WIREUP_SA_DATA_CM_ADDR; + sa_data->dev_index = dev_index; + memcpy(sa_data + 1, addr, addr_size); +} + +static ssize_t ucp_cm_client_priv_pack_cb(void *arg, const char *dev_name, + void *priv_data) +{ + ucp_wireup_sockaddr_data_t *sa_data = priv_data; + ucp_ep_h ep = arg; + ucp_worker_h worker = ep->worker; + uct_cm_h cm = worker->cms[/*cm_idx = */ 0].cm; + ucp_rsc_index_t dev_index = UCP_NULL_RESOURCE; + ucp_ep_config_key_t key; + uint64_t tl_bitmap; + uct_ep_h tl_ep; + ucp_wireup_ep_t *cm_wireup_ep; + uct_cm_attr_t cm_attr; + uct_ep_params_t tl_ep_params; + void* ucp_addr; + size_t ucp_addr_size; + ucs_status_t status; + ucp_lane_index_t lane_idx; + ucp_rsc_index_t rsc_idx; + + UCS_ASYNC_BLOCK(&worker->async); + + status = ucp_cm_ep_client_initial_config_get(ep, dev_name, &key); + if (status != UCS_OK) { + goto out; + } + + /* At this point the ep has only CM lane */ + ucs_assert((ucp_ep_num_lanes(ep) == 1) && + (ucp_ep_get_cm_lane(ep) != UCP_NULL_LANE)); + /* Detach it before reconfiguration and restore then */ + cm_wireup_ep = ucp_ep_get_cm_wireup_ep(ep); + ucs_assert(cm_wireup_ep != NULL); + + status = ucp_worker_get_ep_config(worker, &key, 0, &ep->cfg_index); + if (status != UCS_OK) { + goto out; + } + + ep->am_lane = key.am_lane; + + cm_attr.field_mask = UCT_CM_ATTR_FIELD_MAX_CONN_PRIV; + status = uct_cm_query(cm, &cm_attr); + if (status != UCS_OK) { + goto out; + } + + tl_bitmap = 0; + for (lane_idx = 0; lane_idx < ucp_ep_num_lanes(ep); ++lane_idx) { + if (lane_idx == ucp_ep_get_cm_lane(ep)) { + ep->uct_eps[lane_idx] = &cm_wireup_ep->super.super; + continue; + } + + rsc_idx = ucp_ep_get_rsc_index(ep, lane_idx); + if (rsc_idx == UCP_NULL_RESOURCE) { + continue; + } + + status = ucp_wireup_ep_create(ep, &ep->uct_eps[lane_idx]); + if (status != UCS_OK) { + goto out; + } + + ucs_assert((dev_index == UCP_NULL_RESOURCE) || + (dev_index == worker->context->tl_rscs[rsc_idx].dev_index)); + dev_index = worker->context->tl_rscs[rsc_idx].dev_index; + + tl_bitmap |= UCS_BIT(rsc_idx); + if (ucp_worker_is_tl_p2p(worker, rsc_idx)) { + tl_ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + tl_ep_params.iface = ucp_worker_iface(worker, rsc_idx)->iface; + status = uct_ep_create(&tl_ep_params, &tl_ep); + if (status != UCS_OK) { + /* coverity[leaked_storage] */ + goto out; + } + + ucp_wireup_ep_set_next_ep(ep->uct_eps[lane_idx], tl_ep); + } else { + ucs_assert(ucp_worker_iface_get_attr(worker, rsc_idx)->cap.flags & + UCT_IFACE_FLAG_CONNECT_TO_IFACE); + } + } + + /* Make sure that CM lane is restored */ + ucs_assert(cm_wireup_ep == ucp_ep_get_cm_wireup_ep(ep)); + + /* Don't pack the device address to reduce address size, it will be + * delivered by uct_listener_conn_request_callback_t in + * uct_cm_remote_data_t */ + status = ucp_address_pack(worker, ep, tl_bitmap, + UCP_ADDRESS_PACK_FLAG_IFACE_ADDR | + UCP_ADDRESS_PACK_FLAG_EP_ADDR, + NULL, &ucp_addr_size, &ucp_addr); + if (status != UCS_OK) { + goto out; + } + + if (cm_attr.max_conn_priv < (sizeof(*sa_data) + ucp_addr_size)) { + ucs_error("CM private data buffer is to small to pack UCP endpoint info, " + "ep %p service data %lu, address length %lu, cm %p max_conn_priv %lu", + ep, sizeof(*sa_data), ucp_addr_size, cm, + cm_attr.max_conn_priv); + status = UCS_ERR_BUFFER_TOO_SMALL; + goto free_addr; + } + + ucp_cm_priv_data_pack(sa_data, ep, dev_index, ucp_addr, ucp_addr_size); + +free_addr: + ucs_free(ucp_addr); +out: + if (status == UCS_OK) { + ep->flags |= UCP_EP_FLAG_LOCAL_CONNECTED; + } else { + ucp_worker_set_ep_failed(worker, ep, + &ucp_ep_get_cm_wireup_ep(ep)->super.super, + ucp_ep_get_cm_lane(ep), status); + } + + UCS_ASYNC_UNBLOCK(&worker->async); + /* coverity[leaked_storage] */ + return (status == UCS_OK) ? (sizeof(*sa_data) + ucp_addr_size) : status; +} + +/* + * The main thread progress part of connection establishment on client side + */ +static unsigned ucp_cm_client_connect_progress(void *arg) +{ + ucp_cm_client_connect_progress_arg_t *progress_arg = arg; + ucp_ep_h ucp_ep = progress_arg->ucp_ep; + ucp_worker_h worker = ucp_ep->worker; + ucp_context_h context = worker->context; + ucp_wireup_ep_t *wireup_ep; + ucp_unpacked_address_t addr; + uint64_t tl_bitmap; + ucp_rsc_index_t dev_index; + ucp_rsc_index_t rsc_index; + unsigned addr_idx; + unsigned addr_indices[UCP_MAX_RESOURCES]; + ucs_status_t status; + + wireup_ep = ucp_ep_get_cm_wireup_ep(ucp_ep); + ucs_assert(wireup_ep != NULL); + + status = ucp_address_unpack(worker, progress_arg->sa_data + 1, + UCP_ADDRESS_PACK_FLAG_IFACE_ADDR | + UCP_ADDRESS_PACK_FLAG_EP_ADDR, &addr); + if (status != UCS_OK) { + goto out; + } + + if (addr.address_count == 0) { + status = UCS_ERR_UNREACHABLE; + goto out_free_addr; + } + + for (addr_idx = 0; addr_idx < addr.address_count; ++addr_idx) { + addr.address_list[addr_idx].dev_addr = progress_arg->dev_addr; + addr.address_list[addr_idx].dev_index = progress_arg->sa_data->dev_index; + } + + UCS_ASYNC_BLOCK(&worker->async); + + ucp_ep_update_dest_ep_ptr(ucp_ep, progress_arg->sa_data->ep_ptr); + + ucs_assert(addr.address_count <= UCP_MAX_RESOURCES); + ucs_assert(wireup_ep->ep_init_flags & UCP_EP_INIT_CM_WIREUP_CLIENT); + + /* extend tl_bitmap to all TLs on the same device as initial configuration + since TL can be changed due to server side configuration */ + tl_bitmap = ucp_ep_get_tl_bitmap(ucp_ep); + ucs_assert(tl_bitmap != 0); + rsc_index = ucs_ffs64(tl_bitmap); + dev_index = context->tl_rscs[rsc_index].dev_index; + +#if ENABLE_ASSERT + ucs_for_each_bit(rsc_index, tl_bitmap) { + ucs_assert(dev_index == context->tl_rscs[rsc_index].dev_index); + } +#endif + + tl_bitmap = ucp_context_dev_idx_tl_bitmap(context, dev_index); + status = ucp_wireup_init_lanes(ucp_ep, wireup_ep->ep_init_flags, + tl_bitmap, &addr, addr_indices); + if (status != UCS_OK) { + goto out_unblock; + } + + status = ucp_wireup_connect_local(ucp_ep, &addr, NULL); + if (status != UCS_OK) { + goto out_unblock; + } + + ucp_wireup_remote_connected(ucp_ep); + +out_unblock: + UCS_ASYNC_UNBLOCK(&worker->async); +out_free_addr: + ucs_free(addr.address_list); +out: + ucs_free(progress_arg->sa_data); + ucs_free(progress_arg->dev_addr); + ucs_free(progress_arg); + + if (status != UCS_OK) { + ucp_worker_set_ep_failed(worker, ucp_ep, &wireup_ep->super.super, + ucp_ep_get_cm_lane(ucp_ep), status); + } + + return 1; +} + +static ucs_status_t +ucp_cm_remote_data_check(const uct_cm_remote_data_t *remote_data) +{ + if (ucs_test_all_flags(remote_data->field_mask, + UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR | + UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR_LENGTH | + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA | + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA_LENGTH)) { + return UCS_OK; + } + + ucs_error("incompatible client server connection establishment protocol"); + return UCS_ERR_UNSUPPORTED; +} + +/* + * Async callback on a client side which notifies that server is connected. + */ +static void ucp_cm_client_connect_cb(uct_ep_h uct_cm_ep, void *arg, + const uct_cm_remote_data_t *remote_data, + ucs_status_t status) +{ + ucp_ep_h ucp_ep = (ucp_ep_h)arg; + ucp_worker_h worker = ucp_ep->worker; + uct_worker_cb_id_t prog_id = UCS_CALLBACKQ_ID_NULL; + ucp_cm_client_connect_progress_arg_t *progress_arg; + + if (status != UCS_OK) { + goto err_out; + } + + status = ucp_cm_remote_data_check(remote_data); + if (status != UCS_OK) { + goto err_out; + } + + progress_arg = ucs_malloc(sizeof(*progress_arg), + "ucp_cm_client_connect_progress_arg_t"); + if (progress_arg == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_out; + } + + progress_arg->sa_data = ucs_malloc(remote_data->conn_priv_data_length, + "sa data"); + if (progress_arg->sa_data == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_free_arg; + } + + progress_arg->dev_addr = ucs_malloc(remote_data->dev_addr_length, + "device address"); + if (progress_arg->dev_addr == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_free_sa_data; + } + + progress_arg->ucp_ep = ucp_ep; + memcpy(progress_arg->dev_addr, remote_data->dev_addr, + remote_data->dev_addr_length); + memcpy(progress_arg->sa_data, remote_data->conn_priv_data, + remote_data->conn_priv_data_length); + + uct_worker_progress_register_safe(worker->uct, + ucp_cm_client_connect_progress, + progress_arg, UCS_CALLBACKQ_FLAG_ONESHOT, + &prog_id); + ucp_worker_signal_internal(ucp_ep->worker); + return; + +err_free_sa_data: + ucs_free(progress_arg->sa_data); +err_free_arg: + ucs_free(progress_arg); +err_out: + ucp_ep->flags &= ~UCP_EP_FLAG_LOCAL_CONNECTED; + ucp_worker_set_ep_failed(worker, ucp_ep, uct_cm_ep, + ucp_ep_get_cm_lane(ucp_ep), status); +} + +/* + * Internal flush completion callback which is a part of close protocol, + * this flush was initiated by remote peer in disconnect callback on CM lane. + */ +static void ucp_ep_cm_disconnect_flushed_cb(ucp_request_t *req) +{ + ucp_ep_h ucp_ep = req->send.ep; + /* the EP can be closed/destroyed from err callback */ + ucs_async_context_t *async = &ucp_ep->worker->async; + + UCS_ASYNC_BLOCK(async); + ucp_ep_cm_disconnect_cm_lane(ucp_ep); + ucs_assert(!(req->flags & UCP_REQUEST_FLAG_CALLBACK)); + ucp_request_put(req); + UCS_ASYNC_UNBLOCK(async); +} + +static unsigned ucp_ep_cm_remote_disconnect_progress(void *arg) +{ + ucp_ep_h ucp_ep = arg; + void *req; + ucs_status_t status; + + ucs_trace("ep %p: flags %xu cm_remote_disconnect_progress", ucp_ep, + ucp_ep->flags); + + ucs_assert(ucp_ep_get_cm_uct_ep(ucp_ep) != NULL); + + ucs_assert(ucp_ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED); + if (ucs_test_all_flags(ucp_ep->flags, UCP_EP_FLAG_CLOSED | + UCP_EP_FLAG_CLOSE_REQ_VALID)) { + ucp_request_complete_send(ucp_ep_ext_gen(ucp_ep)->close_req.req, UCS_OK); + return 1; + } + + if (ucp_ep->flags & UCP_EP_FLAG_CLOSED) { + /* the ep is closed by API but close req is not valid yet (checked + * above), it will be set later from scheduled + * @ref ucp_ep_close_flushed_callback */ + ucs_debug("ep %p: ep closed but request is not set, waiting for the flush callback", + ucp_ep); + return 1; + } + + /* + * TODO: set the ucp_ep to error state to prevent user from sending more + * ops. + */ + ucs_assert(ucp_ep->flags & UCP_EP_FLAG_FLUSH_STATE_VALID); + ucs_assert(!(ucp_ep->flags & UCP_EP_FLAG_CLOSED)); + req = ucp_ep_flush_internal(ucp_ep, UCT_FLUSH_FLAG_LOCAL, NULL, 0, NULL, + ucp_ep_cm_disconnect_flushed_cb, + "cm_disconnected_cb"); + if (req == NULL) { + /* flush is successfully completed in place, notify remote peer + * that we are disconnected, the EP will be destroyed from API call */ + ucp_ep_cm_disconnect_cm_lane(ucp_ep); + } else if (UCS_PTR_IS_ERR(req)) { + status = UCS_PTR_STATUS(req); + ucs_error("ucp_ep_flush_internal completed with error: %s", + ucs_status_string(status)); + goto err; + } + + return 1; + +err: + ucp_worker_set_ep_failed(ucp_ep->worker, ucp_ep, + ucp_ep_get_cm_uct_ep(ucp_ep), + ucp_ep_get_cm_lane(ucp_ep), status); + return 1; +} + +static unsigned ucp_ep_cm_disconnect_progress(void *arg) +{ + ucp_ep_h ucp_ep = arg; + uct_ep_h uct_cm_ep = ucp_ep_get_cm_uct_ep(ucp_ep); + ucs_async_context_t *async = &ucp_ep->worker->async; + ucp_request_t *close_req; + + UCS_ASYNC_BLOCK(async); + + ucs_trace("ep %p: got remote disconnect, cm_ep %p", ucp_ep, uct_cm_ep); + ucs_assert(ucp_ep_get_cm_uct_ep(ucp_ep) == uct_cm_ep); + + ucp_ep_invoke_err_cb(ucp_ep, UCS_ERR_CONNECTION_RESET); + + ucp_ep->flags &= ~UCP_EP_FLAG_REMOTE_CONNECTED; + + if (ucp_ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED) { + /* if the EP is local connected, need to flush it from main thread first */ + ucp_ep_cm_remote_disconnect_progress(ucp_ep); + } else { + /* if the EP is not local connected, the EP has been flushed and CM lane is + * disconnected, schedule close request completion and EP destroy */ + ucs_assert(ucp_ep->flags & UCP_EP_FLAG_CLOSE_REQ_VALID); + close_req = ucp_ep_ext_gen(ucp_ep)->close_req.req; + ucp_ep_local_disconnect_progress(close_req); + } + + UCS_ASYNC_UNBLOCK(async); + return 1; +} + +static void ucp_cm_disconnect_cb(uct_ep_h uct_cm_ep, void *arg) +{ + ucp_ep_h ucp_ep = arg; + uct_worker_cb_id_t prog_id = UCS_CALLBACKQ_ID_NULL; + + ucs_debug("ep %p: CM remote disconnect callback invoked, flags 0x%x", + ucp_ep, ucp_ep->flags); + + uct_worker_progress_register_safe(ucp_ep->worker->uct, + ucp_ep_cm_disconnect_progress, + ucp_ep, UCS_CALLBACKQ_FLAG_ONESHOT, + &prog_id); + ucp_worker_signal_internal(ucp_ep->worker); +} + +ucs_status_t ucp_ep_client_cm_connect_start(ucp_ep_h ucp_ep, + const ucp_ep_params_t *params) +{ + ucp_wireup_ep_t *wireup_ep = ucp_ep_get_cm_wireup_ep(ucp_ep); + ucp_worker_h worker = ucp_ep->worker; + uct_ep_h cm_ep; + uct_ep_params_t cm_lane_params; + ucs_status_t status; + + wireup_ep->ep_init_flags = ucp_ep_init_flags(ucp_ep->worker, params); + + cm_lane_params.field_mask = UCT_EP_PARAM_FIELD_CM | + UCT_EP_PARAM_FIELD_USER_DATA | + UCT_EP_PARAM_FIELD_SOCKADDR | + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS | + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB | + UCT_EP_PARAM_FIELD_SOCKADDR_CONNECT_CB | + UCT_EP_PARAM_FIELD_SOCKADDR_DISCONNECT_CB; + + cm_lane_params.user_data = ucp_ep; + cm_lane_params.sockaddr = ¶ms->sockaddr; + cm_lane_params.sockaddr_cb_flags = UCT_CB_FLAG_ASYNC; + cm_lane_params.sockaddr_pack_cb = ucp_cm_client_priv_pack_cb; + cm_lane_params.sockaddr_connect_cb.client = ucp_cm_client_connect_cb; + cm_lane_params.disconnect_cb = ucp_cm_disconnect_cb; + ucs_assert_always(ucp_worker_num_cm_cmpts(worker) == 1); + cm_lane_params.cm = worker->cms[0].cm; + + status = uct_ep_create(&cm_lane_params, &cm_ep); + if (status != UCS_OK) { + /* coverity[leaked_storage] */ + return status; + } + + ucp_wireup_ep_set_next_ep(&wireup_ep->super.super, cm_ep); + ucp_ep_flush_state_reset(ucp_ep); + + return UCS_OK; +} + +static unsigned ucp_cm_server_conn_request_progress(void *arg) +{ + ucp_conn_request_h conn_request = arg; + ucp_listener_h listener = conn_request->listener; + ucp_worker_h worker = listener->worker; + ucp_ep_h ep; + ucs_status_t status; + + ucs_trace_func("listener %p, connect request %p", listener, conn_request); + + if (listener->conn_cb) { + listener->conn_cb(conn_request, listener->arg); + return 1; + } + + UCS_ASYNC_BLOCK(&worker->async); + status = ucp_ep_create_server_accept(worker, conn_request, &ep); + if (status != UCS_OK) { + ucs_warn("server endpoint creation with connect request %p failed, status %s", + conn_request, ucs_status_string(status)); + } + UCS_ASYNC_UNBLOCK(&worker->async); + ucs_free(conn_request->remote_dev_addr); + ucs_free(conn_request); + return 1; +} + +void ucp_cm_server_conn_request_cb(uct_listener_h listener, void *arg, + const char *local_dev_name, + uct_conn_request_h conn_request, + const uct_cm_remote_data_t *remote_data) +{ + ucp_listener_h ucp_listener = arg; + uct_worker_cb_id_t prog_id = UCS_CALLBACKQ_ID_NULL; + ucp_conn_request_h ucp_conn_request; + ucs_status_t status; + + status = ucp_cm_remote_data_check(remote_data); + if (status != UCS_OK) { + goto err_reject; + } + + ucp_conn_request = ucs_malloc(ucs_offsetof(ucp_conn_request_t, sa_data) + + remote_data->conn_priv_data_length, + "ucp_conn_request_h"); + if (ucp_conn_request == NULL) { + ucs_error("failed to allocate connect request, rejecting connection request %p on TL listener %p", + conn_request, listener); + goto err_reject; + } + + ucp_conn_request->remote_dev_addr = ucs_malloc(remote_data->dev_addr_length, + "remote device address"); + if (ucp_conn_request->remote_dev_addr == NULL) { + ucs_error("failed to allocate device address, rejecting connection request %p on TL listener %p", + conn_request, listener); + goto err_free_ucp_conn_request; + } + + ucp_conn_request->listener = ucp_listener; + ucp_conn_request->uct.listener = listener; + ucp_conn_request->uct_req = conn_request; + ucs_strncpy_safe(ucp_conn_request->dev_name, local_dev_name, + UCT_DEVICE_NAME_MAX); + memcpy(ucp_conn_request->remote_dev_addr, remote_data->dev_addr, + remote_data->dev_addr_length); + memcpy(&ucp_conn_request->sa_data, remote_data->conn_priv_data, + remote_data->conn_priv_data_length); + + uct_worker_progress_register_safe(ucp_listener->worker->uct, + ucp_cm_server_conn_request_progress, + ucp_conn_request, + UCS_CALLBACKQ_FLAG_ONESHOT, &prog_id); + + /* If the worker supports the UCP_FEATURE_WAKEUP feature, signal the user so + * that he can wake-up on this event */ + ucp_worker_signal_internal(ucp_listener->worker); + return; + +err_free_ucp_conn_request: + ucs_free(ucp_conn_request); +err_reject: + status = uct_listener_reject(listener, conn_request); + if (status != UCS_OK) { + ucs_warn("failed to reject connect request %p on listener %p", + conn_request, listener); + } +} + +ucs_status_t +ucp_ep_cm_server_create_connected(ucp_worker_h worker, unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_addr, + ucp_conn_request_h conn_request, + ucp_ep_h *ep_p) +{ + uint64_t tl_bitmap = ucp_context_dev_tl_bitmap(worker->context, + conn_request->dev_name); + ucp_ep_h ep; + ucs_status_t status; + + /* Create and connect TL part */ + status = ucp_ep_create_to_worker_addr(worker, tl_bitmap, remote_addr, + ep_init_flags, + "conn_request on uct_listener", &ep); + if (status != UCS_OK) { + return status; + } + + status = ucp_wireup_connect_local(ep, remote_addr, NULL); + if (status != UCS_OK) { + return status; + } + + status = ucp_ep_cm_connect_server_lane(ep, conn_request); + if (status != UCS_OK) { + return status; + } + + ucp_ep_update_dest_ep_ptr(ep, conn_request->sa_data.ep_ptr); + ucp_listener_schedule_accept_cb(ep); + *ep_p = ep; + return UCS_OK; +} + +static ssize_t ucp_cm_server_priv_pack_cb(void *arg, const char *dev_name, + void *priv_data) +{ + ucp_wireup_sockaddr_data_t *sa_data = priv_data; + ucp_ep_h ep = arg; + ucp_worker_h worker = ep->worker; + uint64_t tl_bitmap; + uct_cm_attr_t cm_attr; + void* ucp_addr; + size_t ucp_addr_size; + ucp_rsc_index_t rsc_index; + ucp_rsc_index_t dev_index; + ucs_status_t status; + + UCS_ASYNC_BLOCK(&worker->async); + + tl_bitmap = ucp_ep_get_tl_bitmap(ep); + /* make sure that all lanes are created on correct device */ + ucs_assert(!(tl_bitmap & ~ucp_context_dev_tl_bitmap(worker->context, + dev_name))); + + status = ucp_address_pack(worker, ep, tl_bitmap, + UCP_ADDRESS_PACK_FLAG_IFACE_ADDR | + UCP_ADDRESS_PACK_FLAG_EP_ADDR, NULL, + &ucp_addr_size, &ucp_addr); + if (status != UCS_OK) { + goto out; + } + + cm_attr.field_mask = UCT_CM_ATTR_FIELD_MAX_CONN_PRIV; + ucs_assert(ucp_worker_num_cm_cmpts(worker) == 1); + status = uct_cm_query(worker->cms[0].cm, &cm_attr); + if (status != UCS_OK) { + goto out; + } + + if (cm_attr.max_conn_priv < (sizeof(*sa_data) + ucp_addr_size)) { + status = UCS_ERR_BUFFER_TOO_SMALL; + goto free_addr; + } + + rsc_index = ucs_ffs64_safe(tl_bitmap); + ucs_assert(rsc_index != UCP_NULL_RESOURCE); + dev_index = worker->context->tl_rscs[rsc_index].dev_index; + ucp_cm_priv_data_pack(sa_data, ep, dev_index, ucp_addr, ucp_addr_size); + +free_addr: + ucs_free(ucp_addr); +out: + if (status == UCS_OK) { + ep->flags |= UCP_EP_FLAG_LOCAL_CONNECTED; + } else { + ucp_worker_set_ep_failed(worker, ep, + &ucp_ep_get_cm_wireup_ep(ep)->super.super, + ucp_ep_get_cm_lane(ep), status); + } + + UCS_ASYNC_UNBLOCK(&worker->async); + + return (status == UCS_OK) ? (sizeof(*sa_data) + ucp_addr_size) : status; +} + +/* + * The main thread progress part of connection establishment on server side + */ +static unsigned ucp_cm_server_connect_progress(void *arg) +{ + ucp_ep_h ucp_ep = arg; + + UCS_ASYNC_BLOCK(&ucp_ep->worker->async); + ucp_wireup_remote_connected(ucp_ep); + UCS_ASYNC_UNBLOCK(&ucp_ep->worker->async); + return 1; +} + +/* + * Async callback on a server side which notifies that client is connected. + */ +static void ucp_cm_server_connect_cb(uct_ep_h ep, void *arg, + ucs_status_t status) +{ + ucp_ep_h ucp_ep = arg; + uct_worker_cb_id_t prog_id = UCS_CALLBACKQ_ID_NULL; + ucp_lane_index_t cm_lane; + + if (status == UCS_OK) { + uct_worker_progress_register_safe(ucp_ep->worker->uct, + ucp_cm_server_connect_progress, + ucp_ep, UCS_CALLBACKQ_FLAG_ONESHOT, + &prog_id); + ucp_worker_signal_internal(ucp_ep->worker); + } else if (status == UCS_ERR_CONNECTION_RESET) { + /* remote side initiated disconnect before local side has completed + * connection establishment, so: + * 1) establish connection to complete any pending requests from wireup + * lane + * 2) handle disconnect same way as close protocol + * 3) TODO: remove (1) when the EP can be moved to err state to block + * new send operations but still able to flush transport lanes */ + uct_worker_progress_register_safe(ucp_ep->worker->uct, + ucp_cm_server_connect_progress, + ucp_ep, UCS_CALLBACKQ_FLAG_ONESHOT, + &prog_id); + ucp_cm_disconnect_cb(ep, ucp_ep); + } else { + /* if reject is arrived on server side, then UCT does something wrong */ + ucs_assert(status != UCS_ERR_REJECTED); + cm_lane = ucp_ep_get_cm_lane(ucp_ep); + ucp_ep->flags &= ~UCP_EP_FLAG_LOCAL_CONNECTED; + ucp_worker_set_ep_failed(ucp_ep->worker, ucp_ep, + ucp_ep->uct_eps[cm_lane], cm_lane, status); + } +} + +ucs_status_t ucp_ep_cm_connect_server_lane(ucp_ep_h ep, + ucp_conn_request_h conn_request) +{ + ucp_worker_h worker = ep->worker; + ucp_lane_index_t lane = ucp_ep_get_cm_lane(ep); + uct_ep_params_t uct_ep_params; + uct_ep_h uct_ep; + ucs_status_t status; + + ucs_assert(lane != UCP_NULL_LANE); + ucs_assert(ep->uct_eps[lane] == NULL); + + /* TODO: split CM and wireup lanes */ + status = ucp_wireup_ep_create(ep, &ep->uct_eps[lane]); + if (status != UCS_OK) { + return status; + } + + /* create a server side CM endpoint */ + ucs_trace("ep %p: uct_ep[%d]", ep, lane); + uct_ep_params.field_mask = UCT_EP_PARAM_FIELD_CM | + UCT_EP_PARAM_FIELD_CONN_REQUEST | + UCT_EP_PARAM_FIELD_USER_DATA | + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS | + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB | + UCT_EP_PARAM_FIELD_SOCKADDR_CONNECT_CB | + UCT_EP_PARAM_FIELD_SOCKADDR_DISCONNECT_CB; + + ucs_assertv_always(ucp_worker_num_cm_cmpts(worker) == 1, + "multiple CMs are not supported"); + uct_ep_params.cm = worker->cms[0].cm; + uct_ep_params.user_data = ep; + uct_ep_params.conn_request = conn_request->uct_req; + uct_ep_params.sockaddr_cb_flags = UCT_CB_FLAG_ASYNC; + uct_ep_params.sockaddr_pack_cb = ucp_cm_server_priv_pack_cb; + uct_ep_params.sockaddr_connect_cb.server = ucp_cm_server_connect_cb; + uct_ep_params.disconnect_cb = ucp_cm_disconnect_cb; + + status = uct_ep_create(&uct_ep_params, &uct_ep); + if (status != UCS_OK) { + /* coverity[leaked_storage] */ + return status; + } + + ucp_wireup_ep_set_next_ep(ep->uct_eps[lane], uct_ep); + return UCS_OK; +} + +void ucp_ep_cm_disconnect_cm_lane(ucp_ep_h ucp_ep) +{ + uct_ep_h uct_cm_ep = ucp_ep_get_cm_uct_ep(ucp_ep); + ucs_status_t status; + + ucs_assert_always(uct_cm_ep != NULL); + /* No reason to try disconnect twice */ + ucs_assert(ucp_ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED); + + ucp_ep->flags &= ~UCP_EP_FLAG_LOCAL_CONNECTED; + /* this will invoke @ref ucp_cm_disconnect_cb on remote side */ + status = uct_ep_disconnect(uct_cm_ep, 0); + if (status != UCS_OK) { + ucs_warn("failed to disconnect CM lane %p of ep %p, %s", ucp_ep, + uct_cm_ep, ucs_status_string(status)); + } +} + +ucp_request_t* ucp_ep_cm_close_request_get(ucp_ep_h ep) +{ + ucp_request_t *request = ucp_request_get(ep->worker); + + if (request == NULL) { + ucs_error("failed to allocate close request for ep %p", ep); + return NULL; + } + + request->status = UCS_OK; + request->flags = 0; + request->send.ep = ep; + request->send.flush.uct_flags = UCT_FLUSH_FLAG_LOCAL; + + return request; +} + +static int ucp_cm_cbs_remove_filter(const ucs_callbackq_elem_t *elem, void *arg) +{ + if ((elem->cb == ucp_ep_cm_disconnect_progress) || + (elem->cb == ucp_ep_cm_remote_disconnect_progress) || + (elem->cb == ucp_cm_client_connect_progress) || + (elem->cb == ucp_cm_server_connect_progress)) { + return arg == elem->arg; + } else { + return 0; + } +} + +void ucp_ep_cm_slow_cbq_cleanup(ucp_ep_h ep) +{ + ucs_callbackq_remove_if(&ep->worker->uct->progress_q, + ucp_cm_cbs_remove_filter, ep); +} diff --git a/src/ucp/wireup/wireup_cm.h b/src/ucp/wireup/wireup_cm.h new file mode 100644 index 0000000..e13901e --- /dev/null +++ b/src/ucp/wireup/wireup_cm.h @@ -0,0 +1,48 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef WIREUP_CM_H_ +#define WIREUP_CM_H_ + +#include +#include +#include + + +typedef struct ucp_cm_client_connect_progress_arg { + ucp_ep_h ucp_ep; + ucp_wireup_sockaddr_data_t *sa_data; + uct_device_addr_t *dev_addr; +} ucp_cm_client_connect_progress_arg_t; + + +unsigned ucp_cm_ep_init_flags(const ucp_worker_h worker, + const ucp_ep_params_t *params); + +ucs_status_t ucp_ep_cm_connect_server_lane(ucp_ep_h ep, + ucp_conn_request_h conn_request); + +ucs_status_t ucp_ep_client_cm_connect_start(ucp_ep_h ucp_ep, + const ucp_ep_params_t *params); + +void ucp_cm_server_conn_request_cb(uct_listener_h listener, void *arg, + const char *local_dev_name, + uct_conn_request_h conn_request, + const uct_cm_remote_data_t *remote_data); + +ucs_status_t +ucp_ep_cm_server_create_connected(ucp_worker_h worker, unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_addr, + ucp_conn_request_h conn_request, + ucp_ep_h *ep_p); + +void ucp_ep_cm_disconnect_cm_lane(ucp_ep_h ucp_ep); + +ucp_request_t* ucp_ep_cm_close_request_get(ucp_ep_h ep); + +void ucp_ep_cm_slow_cbq_cleanup(ucp_ep_h ep); + +#endif /* WIREUP_CM_H_ */ diff --git a/src/ucp/wireup/wireup_ep.c b/src/ucp/wireup/wireup_ep.c new file mode 100644 index 0000000..fcbf5e2 --- /dev/null +++ b/src/ucp/wireup/wireup_ep.c @@ -0,0 +1,710 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wireup_ep.h" +#include "wireup.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +UCS_CLASS_DECLARE(ucp_wireup_ep_t, ucp_ep_h); + + +static UCS_CLASS_DEFINE_DELETE_FUNC(ucp_wireup_ep_t, uct_ep_t); + +static inline ucs_queue_elem_t* ucp_wireup_ep_req_priv(uct_pending_req_t *req) +{ + UCS_STATIC_ASSERT(sizeof(ucs_queue_elem_t) <= UCT_PENDING_REQ_PRIV_LEN); + return (ucs_queue_elem_t*)req->priv; +} + +static ucs_status_t +ucp_wireup_ep_connect_to_ep(uct_ep_h uct_ep, const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + + wireup_ep->flags |= UCP_WIREUP_EP_FLAG_LOCAL_CONNECTED; + return uct_ep_connect_to_ep(wireup_ep->super.uct_ep, dev_addr, ep_addr); +} + +/* + * We switch the endpoint in this function (instead in wireup code) since + * this is guaranteed to run from the main thread. + */ +static unsigned ucp_wireup_ep_progress(void *arg) +{ + ucp_wireup_ep_t *wireup_ep = arg; + ucp_ep_h ucp_ep = wireup_ep->super.ucp_ep; + ucs_queue_head_t tmp_pending_queue; + uct_pending_req_t *uct_req; + ucp_request_t *req; + + UCS_ASYNC_BLOCK(&ucp_ep->worker->async); + + ucs_assert(wireup_ep->flags & UCP_WIREUP_EP_FLAG_READY); + ucs_assert(wireup_ep->super.uct_ep != NULL); + + /* If we still have pending wireup messages, send them out first */ + if (wireup_ep->pending_count != 0) { + goto out_unblock; + } + + /* If an error happened on the endpoint (but perhaps the deferred error handler, + * ucp_worker_iface_err_handle_progress(), was not called yet, avoid changing + * ep state, and let the error handler take care of cleanup. + */ + if (ucp_ep->flags & UCP_EP_FLAG_FAILED) { + ucs_trace("ep %p: not switching wireup_ep %p to ready state because of error", + ucp_ep, wireup_ep); + goto out_unblock; + } + + ucs_trace("ep %p: switching wireup_ep %p to ready state", ucp_ep, wireup_ep); + + /* Move wireup pending queue to temporary queue and remove references to + * the wireup progress function + */ + ucs_queue_head_init(&tmp_pending_queue); + ucs_queue_for_each_extract(uct_req, &wireup_ep->pending_q, priv, 1) { + ucs_queue_push(&tmp_pending_queue, ucp_wireup_ep_req_priv(uct_req)); + } + + /* Switch to real transport and destroy proxy endpoint (aux_ep as well) */ + ucp_proxy_ep_replace(&wireup_ep->super); + wireup_ep = NULL; + + UCS_ASYNC_UNBLOCK(&ucp_ep->worker->async); + + /* Replay pending requests */ + ucs_queue_for_each_extract(uct_req, &tmp_pending_queue, priv, 1) { + req = ucs_container_of(uct_req, ucp_request_t, send.uct); + ucs_assert(req->send.ep == ucp_ep); + ucp_request_send(req, 0); + --ucp_ep->worker->flush_ops_count; + } + + return 0; + +out_unblock: + UCS_ASYNC_UNBLOCK(&ucp_ep->worker->async); + return 0; +} + +static ssize_t ucp_wireup_ep_bcopy_send_func(uct_ep_h uct_ep) +{ + return UCS_ERR_NO_RESOURCE; +} + +static uct_ep_h ucp_wireup_ep_get_msg_ep(ucp_wireup_ep_t *wireup_ep) +{ + uct_ep_h wireup_msg_ep; + + if ((wireup_ep->flags & UCP_WIREUP_EP_FLAG_READY) || (wireup_ep->aux_ep == NULL)) { + wireup_msg_ep = wireup_ep->super.uct_ep; + } else { + wireup_msg_ep = wireup_ep->aux_ep; + } + ucs_assertv(wireup_msg_ep != NULL, + "ucp_ep=%p wireup_ep=%p flags=%c%c next_ep=%p aux_ep=%p", + wireup_ep->super.ucp_ep, wireup_ep, + (wireup_ep->flags & UCP_WIREUP_EP_FLAG_LOCAL_CONNECTED) ? 'c' : '-', + (wireup_ep->flags & UCP_WIREUP_EP_FLAG_READY) ? 'r' : '-', + wireup_ep->super.uct_ep, wireup_ep->aux_ep); + return wireup_msg_ep; +} + +ucs_status_t ucp_wireup_ep_progress_pending(uct_pending_req_t *self) +{ + ucp_request_t *proxy_req = ucs_container_of(self, ucp_request_t, send.uct); + uct_pending_req_t *req = proxy_req->send.proxy.req; + ucp_wireup_ep_t *wireup_ep = proxy_req->send.proxy.wireup_ep; + ucs_status_t status; + + status = req->func(req); + if (status == UCS_OK) { + ucs_atomic_sub32(&wireup_ep->pending_count, 1); + ucs_free(proxy_req); + } + return status; +} + +static void +ucp_wireup_ep_pending_req_release(uct_pending_req_t *self, void *arg) +{ + ucp_request_t *proxy_req = ucs_container_of(self, ucp_request_t, + send.uct); + ucp_wireup_ep_t *wireup_ep = proxy_req->send.proxy.wireup_ep; + ucp_request_t *req; + + ucs_atomic_sub32(&wireup_ep->pending_count, 1); + + if (proxy_req->send.proxy.req->func == ucp_wireup_msg_progress) { + req = ucs_container_of(proxy_req->send.proxy.req, ucp_request_t, + send.uct); + ucs_free((void*)req->send.buffer); + ucs_free(req); + } + + ucs_free(proxy_req); +} + +static ucs_status_t ucp_wireup_ep_pending_add(uct_ep_h uct_ep, + uct_pending_req_t *req, + unsigned flags) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + ucp_ep_h ucp_ep = wireup_ep->super.ucp_ep; + ucp_worker_h worker = ucp_ep->worker; + ucp_request_t *proxy_req; + uct_ep_h wireup_msg_ep; + ucs_status_t status; + + UCS_ASYNC_BLOCK(&worker->async); + if (req->func == ucp_wireup_msg_progress) { + proxy_req = ucs_malloc(sizeof(*proxy_req), "ucp_wireup_proxy_req"); + if (proxy_req == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + wireup_msg_ep = ucp_wireup_ep_get_msg_ep(wireup_ep); + + proxy_req->send.uct.func = ucp_wireup_ep_progress_pending; + proxy_req->send.proxy.req = req; + proxy_req->send.proxy.wireup_ep = wireup_ep; + proxy_req->send.state.uct_comp.func = NULL; + + status = uct_ep_pending_add(wireup_msg_ep, &proxy_req->send.uct, + UCT_CB_FLAG_ASYNC); + if (status == UCS_OK) { + ucs_atomic_add32(&wireup_ep->pending_count, +1); + } else { + ucs_free(proxy_req); + } + } else { + ucs_queue_push(&wireup_ep->pending_q, ucp_wireup_ep_req_priv(req)); + ++ucp_ep->worker->flush_ops_count; + status = UCS_OK; + } +out: + UCS_ASYNC_UNBLOCK(&worker->async); + /* coverity[leaked_storage] */ + return status; +} + +static void +ucp_wireup_ep_pending_purge(uct_ep_h uct_ep, uct_pending_purge_callback_t cb, + void *arg) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + ucp_worker_h worker; + uct_pending_req_t *req; + ucp_request_t *ucp_req; + + worker = wireup_ep->super.ucp_ep->worker; + + ucs_queue_for_each_extract(req, &wireup_ep->pending_q, priv, 1) { + ucp_req = ucs_container_of(req, ucp_request_t, send.uct); + UCS_ASYNC_BLOCK(&worker->async); + --worker->flush_ops_count; + UCS_ASYNC_UNBLOCK(&worker->async); + cb(&ucp_req->send.uct, arg); + } + + if (wireup_ep->pending_count > 0) { + uct_ep_pending_purge(ucp_wireup_ep_get_msg_ep(wireup_ep), + ucp_wireup_ep_pending_req_release, arg); + } + + ucs_assert(wireup_ep->pending_count == 0); +} + +static ssize_t ucp_wireup_ep_am_bcopy(uct_ep_h uct_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + + if (id == UCP_AM_ID_WIREUP) { + return uct_ep_am_bcopy(ucp_wireup_ep_get_msg_ep(wireup_ep), + UCP_AM_ID_WIREUP, pack_cb, arg, flags); + } + + return UCS_ERR_NO_RESOURCE; +} + + +UCS_CLASS_DEFINE_NAMED_NEW_FUNC(ucp_wireup_ep_create, ucp_wireup_ep_t, uct_ep_t, + ucp_ep_h); + +ucs_status_t +ucp_wireup_ep_connect_aux(ucp_wireup_ep_t *wireup_ep, unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_address) +{ + ucp_ep_h ucp_ep = wireup_ep->super.ucp_ep; + ucp_worker_h worker = ucp_ep->worker; + ucp_wireup_select_info_t select_info = {0}; + uct_ep_params_t uct_ep_params; + const ucp_address_entry_t *aux_addr; + ucp_worker_iface_t *wiface; + ucs_status_t status; + + /* select an auxiliary transport which would be used to pass connection + * establishment messages. + */ + status = ucp_wireup_select_aux_transport(ucp_ep, ep_init_flags, + remote_address, &select_info); + if (status != UCS_OK) { + return status; + } + + wireup_ep->aux_rsc_index = select_info.rsc_index; + aux_addr = &remote_address->address_list[select_info.addr_index]; + wiface = ucp_worker_iface(worker, select_info.rsc_index); + + /* create auxiliary endpoint connected to the remote iface. */ + uct_ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE | + UCT_EP_PARAM_FIELD_DEV_ADDR | + UCT_EP_PARAM_FIELD_IFACE_ADDR; + uct_ep_params.iface = wiface->iface; + uct_ep_params.dev_addr = aux_addr->dev_addr; + uct_ep_params.iface_addr = aux_addr->iface_addr; + status = uct_ep_create(&uct_ep_params, &wireup_ep->aux_ep); + if (status != UCS_OK) { + return status; + } + + ucp_worker_iface_progress_ep(wiface); + + ucs_debug("ep %p: wireup_ep %p created aux_ep %p to %s using " + UCT_TL_RESOURCE_DESC_FMT, ucp_ep, wireup_ep, wireup_ep->aux_ep, + ucp_ep_peer_name(ucp_ep), + UCT_TL_RESOURCE_DESC_ARG(&worker->context->tl_rscs[select_info.rsc_index].tl_rsc)); + + return UCS_OK; +} + +static ucs_status_t ucp_wireup_ep_flush(uct_ep_h uct_ep, unsigned flags, + uct_completion_t *comp) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + + if (flags & UCT_FLUSH_FLAG_CANCEL) { + if (wireup_ep->aux_ep) { + return uct_ep_flush(wireup_ep->aux_ep, flags, comp); + } + return UCS_OK; + } + return UCS_ERR_NO_RESOURCE; +} + + +UCS_CLASS_INIT_FUNC(ucp_wireup_ep_t, ucp_ep_h ucp_ep) +{ + static uct_iface_ops_t ops = { + .ep_connect_to_ep = ucp_wireup_ep_connect_to_ep, + .ep_flush = ucp_wireup_ep_flush, + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(ucp_wireup_ep_t), + .ep_pending_add = ucp_wireup_ep_pending_add, + .ep_pending_purge = ucp_wireup_ep_pending_purge, + .ep_put_short = (uct_ep_put_short_func_t)ucs_empty_function_return_no_resource, + .ep_put_bcopy = (uct_ep_put_bcopy_func_t)ucp_wireup_ep_bcopy_send_func, + .ep_put_zcopy = (uct_ep_put_zcopy_func_t)ucs_empty_function_return_no_resource, + .ep_get_short = (uct_ep_get_short_func_t)ucs_empty_function_return_no_resource, + .ep_get_bcopy = (uct_ep_get_bcopy_func_t)ucs_empty_function_return_no_resource, + .ep_get_zcopy = (uct_ep_get_zcopy_func_t)ucs_empty_function_return_no_resource, + .ep_am_short = (uct_ep_am_short_func_t)ucs_empty_function_return_no_resource, + .ep_am_bcopy = ucp_wireup_ep_am_bcopy, + .ep_am_zcopy = (uct_ep_am_zcopy_func_t)ucs_empty_function_return_no_resource, + .ep_tag_eager_short = (uct_ep_tag_eager_short_func_t)ucs_empty_function_return_no_resource, + .ep_tag_eager_bcopy = (uct_ep_tag_eager_bcopy_func_t)ucp_wireup_ep_bcopy_send_func, + .ep_tag_eager_zcopy = (uct_ep_tag_eager_zcopy_func_t)ucs_empty_function_return_no_resource, + .ep_tag_rndv_zcopy = (uct_ep_tag_rndv_zcopy_func_t)ucs_empty_function_return_ptr_no_resource, + .ep_tag_rndv_request = (uct_ep_tag_rndv_request_func_t)ucs_empty_function_return_no_resource, + .ep_atomic64_post = (uct_ep_atomic64_post_func_t)ucs_empty_function_return_no_resource, + .ep_atomic64_fetch = (uct_ep_atomic64_fetch_func_t)ucs_empty_function_return_no_resource, + .ep_atomic_cswap64 = (uct_ep_atomic_cswap64_func_t)ucs_empty_function_return_no_resource, + .ep_atomic32_post = (uct_ep_atomic32_post_func_t)ucs_empty_function_return_no_resource, + .ep_atomic32_fetch = (uct_ep_atomic32_fetch_func_t)ucs_empty_function_return_no_resource, + .ep_atomic_cswap32 = (uct_ep_atomic_cswap32_func_t)ucs_empty_function_return_no_resource + }; + + UCS_CLASS_CALL_SUPER_INIT(ucp_proxy_ep_t, &ops, ucp_ep, NULL, 0); + + self->aux_ep = NULL; + self->sockaddr_ep = NULL; + self->aux_rsc_index = UCP_NULL_RESOURCE; + self->sockaddr_rsc_index = UCP_NULL_RESOURCE; + self->pending_count = 0; + self->flags = 0; + self->progress_id = UCS_CALLBACKQ_ID_NULL; + ucs_queue_head_init(&self->pending_q); + + UCS_ASYNC_BLOCK(&ucp_ep->worker->async); + ++ucp_ep->worker->flush_ops_count; + UCS_ASYNC_UNBLOCK(&ucp_ep->worker->async); + + ucs_trace("ep %p: created wireup ep %p to %s ", ucp_ep, self, + ucp_ep_peer_name(ucp_ep)); + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(ucp_wireup_ep_t) +{ + ucp_ep_h ucp_ep = self->super.ucp_ep; + ucp_worker_h worker = ucp_ep->worker; + + ucs_assert(ucs_queue_is_empty(&self->pending_q)); + ucs_assert(self->pending_count == 0); + + ucs_debug("ep %p: destroy wireup ep %p", ucp_ep, self); + + uct_worker_progress_unregister_safe(worker->uct, &self->progress_id); + if (self->aux_ep != NULL) { + ucp_worker_iface_unprogress_ep(ucp_worker_iface(worker, + self->aux_rsc_index)); + uct_ep_destroy(self->aux_ep); + } + if (self->sockaddr_ep != NULL) { + uct_ep_destroy(self->sockaddr_ep); + } + + UCS_ASYNC_BLOCK(&worker->async); + --worker->flush_ops_count; + UCS_ASYNC_UNBLOCK(&worker->async); +} + +UCS_CLASS_DEFINE(ucp_wireup_ep_t, ucp_proxy_ep_t); + +ucp_rsc_index_t ucp_wireup_ep_get_aux_rsc_index(uct_ep_h uct_ep) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + + if (!ucp_wireup_ep_test(uct_ep)) { + return UCP_NULL_RESOURCE; + } + + if (wireup_ep->aux_ep == NULL) { + return UCP_NULL_RESOURCE; + } + + return wireup_ep->aux_rsc_index; +} + +ucs_status_t ucp_wireup_ep_connect(uct_ep_h uct_ep, unsigned ucp_ep_init_flags, + ucp_rsc_index_t rsc_index, int connect_aux, + const ucp_unpacked_address_t *remote_address) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + ucp_ep_h ucp_ep = wireup_ep->super.ucp_ep; + ucp_worker_h worker = ucp_ep->worker; + uct_ep_params_t uct_ep_params; + ucs_status_t status; + uct_ep_h next_ep; + + ucs_assert(wireup_ep != NULL); + + uct_ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + uct_ep_params.iface = ucp_worker_iface(worker, rsc_index)->iface; + status = uct_ep_create(&uct_ep_params, &next_ep); + if (status != UCS_OK) { + /* make Coverity happy */ + ucs_assert(next_ep == NULL); + goto err; + } + + ucp_proxy_ep_set_uct_ep(&wireup_ep->super, next_ep, 1); + + ucs_debug("ep %p: created next_ep %p to %s using " UCT_TL_RESOURCE_DESC_FMT, + ucp_ep, wireup_ep->super.uct_ep, ucp_ep_peer_name(ucp_ep), + UCT_TL_RESOURCE_DESC_ARG(&worker->context->tl_rscs[rsc_index].tl_rsc)); + + /* we need to create an auxiliary transport only for active messages */ + if (connect_aux) { + status = ucp_wireup_ep_connect_aux(wireup_ep, ucp_ep_init_flags, + remote_address); + if (status != UCS_OK) { + goto err_destroy_next_ep; + } + } + + return UCS_OK; + +err_destroy_next_ep: + uct_ep_destroy(wireup_ep->super.uct_ep); + wireup_ep->super.uct_ep = NULL; +err: + return status; +} + +static ucs_status_t ucp_wireup_ep_pack_sockaddr_aux_tls(ucp_worker_h worker, + const char *dev_name, + uint64_t *tl_bitmap_p, + ucp_address_t **address_p, + size_t *address_length_p) +{ + ucp_context_h context = worker->context; + int tl_id, found_supported_tl = 0; + ucs_status_t status; + uint64_t tl_bitmap = 0; + + /* Find a transport which matches the given dev_name and the user's configuration. + * It also has to be a UCT_IFACE_FLAG_CONNECT_TO_IFACE transport and support + * active messaging for sending a wireup message */ + ucs_for_each_bit(tl_id, context->config.sockaddr_aux_rscs_bitmap) { + if ((!strncmp(context->tl_rscs[tl_id].tl_rsc.dev_name, dev_name, + UCT_DEVICE_NAME_MAX)) && + (ucs_test_all_flags(ucp_worker_iface_get_attr(worker, tl_id)->cap.flags, + UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_AM_BCOPY))) { + found_supported_tl = 1; + tl_bitmap |= UCS_BIT(tl_id); + } + } + + if (found_supported_tl) { + status = ucp_address_pack(worker, NULL, tl_bitmap, + UCP_ADDRESS_PACK_FLAG_ALL, NULL, + address_length_p, (void**)address_p); + } else { + ucs_error("no supported sockaddr auxiliary transports found for %s", dev_name); + status = UCS_ERR_UNREACHABLE; + } + + *tl_bitmap_p = tl_bitmap; + return status; +} + +ssize_t ucp_wireup_ep_sockaddr_fill_private_data(void *arg, const char *dev_name, + void *priv_data) +{ + ucp_wireup_sockaddr_data_t *sa_data = priv_data; + ucp_wireup_ep_t *wireup_ep = arg; + ucp_ep_h ucp_ep = wireup_ep->super.ucp_ep; + ucp_rsc_index_t sockaddr_rsc = wireup_ep->sockaddr_rsc_index; + ucp_worker_h worker = ucp_ep->worker; + ucp_context_h context = worker->context; + size_t address_length, conn_priv_len; + ucp_address_t *worker_address, *rsc_address; + uct_iface_attr_t *attrs; + ucs_status_t status; + uint64_t tl_bitmap; + char aux_tls_str[64]; + + status = ucp_address_pack(worker, NULL, UINT64_MAX, + UCP_ADDRESS_PACK_FLAG_ALL, NULL, + &address_length, (void**)&worker_address); + if (status != UCS_OK) { + goto err; + } + + conn_priv_len = sizeof(*sa_data) + address_length; + + /* pack client data */ + ucs_assert((int)ucp_ep_config(ucp_ep)->key.err_mode <= UINT8_MAX); + sa_data->err_mode = ucp_ep_config(ucp_ep)->key.err_mode; + sa_data->ep_ptr = (uintptr_t)ucp_ep; + sa_data->dev_index = UCP_NULL_RESOURCE; /* Not used */ + + attrs = ucp_worker_iface_get_attr(worker, sockaddr_rsc); + + /* check private data length limitation */ + if (conn_priv_len > attrs->max_conn_priv) { + + /* since the full worker address is too large to fit into the trasnport's + * private data, try to pack sockaddr aux tls to pass in the address */ + status = ucp_wireup_ep_pack_sockaddr_aux_tls(worker, dev_name, + &tl_bitmap, &rsc_address, + &address_length); + if (status != UCS_OK) { + goto err_free_address; + } + + conn_priv_len = sizeof(*sa_data) + address_length; + + /* check the private data length limitation again, now with partial + * resources packed (and not the entire worker address) */ + if (conn_priv_len > attrs->max_conn_priv) { + ucs_error("sockaddr aux resources addresses (%s transports)" + " information (%zu) exceeds max_priv on " + UCT_TL_RESOURCE_DESC_FMT" (%zu)", + ucp_tl_bitmap_str(context, tl_bitmap, aux_tls_str, + sizeof(aux_tls_str)), + conn_priv_len, + UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[sockaddr_rsc].tl_rsc), + attrs->max_conn_priv); + status = UCS_ERR_UNREACHABLE; + ucs_free(rsc_address); + goto err_free_address; + } + + sa_data->addr_mode = UCP_WIREUP_SA_DATA_PARTIAL_ADDR; + memcpy(sa_data + 1, rsc_address, address_length); + ucp_ep->flags |= UCP_EP_FLAG_SOCKADDR_PARTIAL_ADDR; + + ucs_free(rsc_address); + + ucs_trace("sockaddr tl ("UCT_TL_RESOURCE_DESC_FMT") sending partial address: " + "(%s transports) (len=%zu) to server. " + "total client priv data len: %zu", + context->tl_rscs[sockaddr_rsc].tl_rsc.tl_name, dev_name, + ucp_tl_bitmap_str(context, tl_bitmap, aux_tls_str, + sizeof(aux_tls_str)), + address_length, conn_priv_len); + } else { + sa_data->addr_mode = UCP_WIREUP_SA_DATA_FULL_ADDR; + memcpy(sa_data + 1, worker_address, address_length); + } + + ucp_worker_release_address(worker, worker_address); + return conn_priv_len; + +err_free_address: + ucp_worker_release_address(worker, worker_address); +err: + return status; +} + +ucs_status_t ucp_wireup_ep_connect_to_sockaddr(uct_ep_h uct_ep, + const ucp_ep_params_t *params) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + ucp_ep_h ucp_ep = wireup_ep->super.ucp_ep; + ucp_worker_h worker = ucp_ep->worker; + char saddr_str[UCS_SOCKADDR_STRING_LEN]; + uct_ep_params_t uct_ep_params; + ucp_rsc_index_t sockaddr_rsc; + ucp_worker_iface_t *wiface; + ucs_status_t status; + + ucs_assert(ucp_wireup_ep_test(uct_ep)); + + status = ucp_wireup_select_sockaddr_transport(worker->context, + ¶ms->sockaddr, + &sockaddr_rsc); + if (status != UCS_OK) { + goto out; + } + + wiface = ucp_worker_iface(worker, sockaddr_rsc); + + wireup_ep->sockaddr_rsc_index = sockaddr_rsc; + + /* Fill parameters and send connection request using the transport */ + uct_ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE | + UCT_EP_PARAM_FIELD_USER_DATA | + UCT_EP_PARAM_FIELD_SOCKADDR | + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS | + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB; + uct_ep_params.iface = wiface->iface; + uct_ep_params.sockaddr = ¶ms->sockaddr; + uct_ep_params.user_data = wireup_ep; + uct_ep_params.sockaddr_cb_flags = UCT_CB_FLAG_ASYNC; + uct_ep_params.sockaddr_pack_cb = ucp_wireup_ep_sockaddr_fill_private_data; + status = uct_ep_create(&uct_ep_params, &wireup_ep->sockaddr_ep); + if (status != UCS_OK) { + goto out; + } + + ucs_debug("ep %p connecting to %s", ucp_ep, + ucs_sockaddr_str(params->sockaddr.addr, saddr_str, sizeof(saddr_str))); + status = UCS_OK; + +out: + return status; +} + +void ucp_wireup_ep_set_next_ep(uct_ep_h uct_ep, uct_ep_h next_ep) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + + ucs_assert(wireup_ep != NULL); + ucs_assert(wireup_ep->super.uct_ep == NULL); + wireup_ep->flags |= UCP_WIREUP_EP_FLAG_LOCAL_CONNECTED; + ucp_proxy_ep_set_uct_ep(&wireup_ep->super, next_ep, 1); +} + +uct_ep_h ucp_wireup_ep_extract_next_ep(uct_ep_h uct_ep) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + uct_ep_h next_ep; + + ucs_assert_always(wireup_ep != NULL); + next_ep = wireup_ep->super.uct_ep; + wireup_ep->super.uct_ep = NULL; + return next_ep; +} + +void ucp_wireup_ep_remote_connected(uct_ep_h uct_ep) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + ucp_ep_h ucp_ep; + + ucs_assert(wireup_ep != NULL); + ucs_assert(wireup_ep->super.uct_ep != NULL); + ucs_assert(wireup_ep->flags & UCP_WIREUP_EP_FLAG_LOCAL_CONNECTED); + + ucp_ep = wireup_ep->super.ucp_ep; + + ucs_trace("ep %p: wireup ep %p is remote-connected", ucp_ep, wireup_ep); + wireup_ep->flags |= UCP_WIREUP_EP_FLAG_READY; + uct_worker_progress_register_safe(ucp_ep->worker->uct, + ucp_wireup_ep_progress, wireup_ep, 0, + &wireup_ep->progress_id); + ucp_worker_signal_internal(ucp_ep->worker); +} + +int ucp_wireup_ep_test(uct_ep_h uct_ep) +{ + return uct_ep->iface->ops.ep_destroy == + UCS_CLASS_DELETE_FUNC_NAME(ucp_wireup_ep_t); +} + +int ucp_wireup_ep_is_owner(uct_ep_h uct_ep, uct_ep_h owned_ep) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + + if (wireup_ep == NULL) { + return 0; + } + + return (wireup_ep->aux_ep == owned_ep) || + (wireup_ep->sockaddr_ep == owned_ep) || + (wireup_ep->super.uct_ep == owned_ep); +} + +void ucp_wireup_ep_disown(uct_ep_h uct_ep, uct_ep_h owned_ep) +{ + ucp_wireup_ep_t *wireup_ep = ucp_wireup_ep(uct_ep); + + ucs_assert_always(wireup_ep != NULL); + if (wireup_ep->aux_ep == owned_ep) { + wireup_ep->aux_ep = NULL; + } else if (wireup_ep->sockaddr_ep == owned_ep) { + wireup_ep->sockaddr_ep = NULL; + } else if (wireup_ep->super.uct_ep == owned_ep) { + ucp_proxy_ep_extract(uct_ep); + } +} + +ucp_wireup_ep_t *ucp_wireup_ep(uct_ep_h uct_ep) +{ + return ucp_wireup_ep_test(uct_ep) ? + ucs_derived_of(uct_ep, ucp_wireup_ep_t) : NULL; +} diff --git a/src/ucp/wireup/wireup_ep.h b/src/ucp/wireup/wireup_ep.h new file mode 100644 index 0000000..efc4bc1 --- /dev/null +++ b/src/ucp/wireup/wireup_ep.h @@ -0,0 +1,98 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCP_WIREUP_EP_H_ +#define UCP_WIREUP_EP_H_ + +#include "address.h" + +#include +#include +#include + + +/** + * Stub endpoint flags + */ +enum { + UCP_WIREUP_EP_FLAG_READY = UCS_BIT(0), /**< next_ep is fully connected */ + UCP_WIREUP_EP_FLAG_LOCAL_CONNECTED = UCS_BIT(1), /**< Debug: next_ep connected to remote */ +}; + + +/** + * Wireup proxy endpoint, to hold off send requests until wireup process completes. + * It is placed instead UCT endpoint before it's fully connected, and for AM + * endpoint it also contains an auxiliary endpoint which can send wireup messages. + */ +struct ucp_wireup_ep { + ucp_proxy_ep_t super; /**< Derive from ucp_proxy_ep_t */ + ucs_queue_head_t pending_q; /**< Queue of pending operations */ + uct_ep_h aux_ep; /**< Used to wireup the "real" endpoint */ + uct_ep_h sockaddr_ep; /**< Used for client-server wireup */ + ucp_rsc_index_t aux_rsc_index; /**< Index of auxiliary transport */ + ucp_rsc_index_t sockaddr_rsc_index; /**< Index of sockaddr transport */ + volatile uint32_t pending_count; /**< Number of pending wireup operations */ + volatile uint32_t flags; /**< Connection state flags */ + uct_worker_cb_id_t progress_id; /**< ID of progress function */ + unsigned ep_init_flags; /**< UCP wireup EP init flags */ +}; + + +/** + * Create a proxy endpoint for wireup. + */ +ucs_status_t ucp_wireup_ep_create(ucp_ep_h ep, uct_ep_h *ep_p); + + +/** + * @return Auxiliary resource index used by the wireup endpoint. + * If the endpoint is not a wireup endpoint, return UCP_NULL_RESOURCE. + */ +ucp_rsc_index_t ucp_wireup_ep_get_aux_rsc_index(uct_ep_h uct_ep); + + +/** + * Create endpoint for the real transport, which we would eventually connect. + * After this function is called, it would be possible to send wireup messages + * on this endpoint, if connect_aux is 1. + * + * @param [in] uct_ep Stub endpoint to connect. + * @param [in] ucp_ep_init_flags Initial flags of UCP EP. + * @param [in] rsc_index Resource of the real transport. + * @param [in] connect_aux Whether to connect the auxiliary transport, + * for sending. + * @param [in] remote_address Remote address connect to. + */ +ucs_status_t ucp_wireup_ep_connect(uct_ep_h uct_ep, unsigned ucp_ep_init_flags, + ucp_rsc_index_t rsc_index, int connect_aux, + const ucp_unpacked_address_t *remote_address); + +ucs_status_t ucp_wireup_ep_connect_to_sockaddr(uct_ep_h uct_ep, + const ucp_ep_params_t *params); + +ucs_status_t +ucp_wireup_ep_connect_aux(ucp_wireup_ep_t *wireup_ep, unsigned ep_init_flags, + const ucp_unpacked_address_t *remote_address); + +void ucp_wireup_ep_set_next_ep(uct_ep_h uct_ep, uct_ep_h next_ep); + +uct_ep_h ucp_wireup_ep_extract_next_ep(uct_ep_h uct_ep); + +void ucp_wireup_ep_remote_connected(uct_ep_h uct_ep); + +int ucp_wireup_ep_test(uct_ep_h uct_ep); + +int ucp_wireup_ep_is_owner(uct_ep_h uct_ep, uct_ep_h owned_ep); + +void ucp_wireup_ep_disown(uct_ep_h uct_ep, uct_ep_h owned_ep); + +ucs_status_t ucp_wireup_ep_progress_pending(uct_pending_req_t *self); + +ucp_wireup_ep_t *ucp_wireup_ep(uct_ep_h uct_ep); + +#endif diff --git a/src/ucs/Makefile.am b/src/ucs/Makefile.am new file mode 100644 index 0000000..2bd0a8d --- /dev/null +++ b/src/ucs/Makefile.am @@ -0,0 +1,177 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +AUTOMAKE_OPTIONS = nostdinc # avoid collision with built-in debug.h +lib_LTLIBRARIES = libucs.la +bin_PROGRAMS = + +libucs_la_CPPFLAGS = $(BASE_CPPFLAGS) -DUCX_MODULE_DIR=\"$(moduledir)\" +libucs_la_CFLAGS = $(BASE_CFLAGS) +libucs_la_LDFLAGS = -ldl $(NUMA_LIBS) -version-info $(SOVERSION) +libucs_ladir = $(includedir)/ucs +libucs_la_LIBADD = $(LIBM) $(top_builddir)/src/ucm/libucm.la + +nobase_dist_libucs_la_HEADERS = \ + algorithm/crc.h \ + algorithm/qsort_r.h \ + async/async_fwd.h \ + config/global_opts.h \ + config/parser.h \ + config/types.h \ + datastruct/callbackq.h \ + datastruct/khash.h \ + datastruct/linear_func.h \ + datastruct/list_types.h \ + datastruct/list.h \ + datastruct/mpool.h \ + datastruct/pgtable.h \ + datastruct/queue_types.h \ + datastruct/strided_alloc.h \ + datastruct/string_buffer.h \ + datastruct/string_set.h \ + memory/rcache.h \ + memory/memory_type.h \ + memory/memtype_cache.h \ + profile/profile_defs.h \ + profile/profile_off.h \ + profile/profile_on.h \ + stats/stats_fwd.h \ + stats/libstats.h \ + sys/event_set.h \ + sys/compiler_def.h\ + sys/math.h \ + sys/preprocessor.h \ + sys/string.h \ + sys/sock.h \ + sys/stubs.h \ + time/time_def.h \ + type/class.h \ + type/init_once.h \ + type/spinlock.h \ + type/status.h \ + type/thread_mode.h \ + type/cpu_set.h \ + arch/x86_64/global_opts.h \ + arch/aarch64/global_opts.h \ + arch/ppc64/global_opts.h \ + arch/global_opts.h + +noinst_HEADERS = \ + arch/aarch64/bitops.h \ + arch/aarch64/cpu.h \ + arch/generic/atomic.h \ + arch/generic/cpu.h \ + arch/ppc64/bitops.h \ + arch/ppc64/cpu.h \ + arch/x86_64/atomic.h \ + arch/x86_64/bitops.h \ + arch/x86_64/cpu.h \ + arch/atomic.h \ + arch/bitops.h \ + arch/cpu.h \ + datastruct/arbiter.h \ + datastruct/frag_list.h \ + datastruct/mpmc.h \ + datastruct/mpool.inl \ + datastruct/ptr_array.h \ + datastruct/queue.h \ + datastruct/sglib.h \ + datastruct/sglib_wrapper.h \ + debug/assert.h \ + debug/debug.h \ + debug/log.h \ + debug/memtrack.h \ + memory/numa.h \ + memory/rcache_int.h \ + profile/profile.h \ + stats/stats.h \ + sys/checker.h \ + sys/compiler.h \ + sys/module.h \ + sys/sys.h \ + sys/iovec.h \ + time/time.h \ + time/timerq.h \ + time/timer_wheel.h \ + async/async.h \ + async/pipe.h \ + async/signal.h \ + async/thread.h \ + async/async_int.h + +libucs_la_SOURCES = \ + algorithm/crc.c \ + algorithm/qsort_r.c \ + arch/aarch64/cpu.c \ + arch/aarch64/global_opts.c \ + arch/ppc64/timebase.c \ + arch/ppc64/global_opts.c \ + arch/x86_64/cpu.c \ + arch/x86_64/global_opts.c \ + arch/cpu.c \ + async/async.c \ + async/signal.c \ + async/pipe.c \ + async/thread.c \ + config/global_opts.c \ + config/ucm_opts.c \ + config/parser.c \ + datastruct/arbiter.c \ + datastruct/callbackq.c \ + datastruct/frag_list.c \ + datastruct/mpmc.c \ + datastruct/mpool.c \ + datastruct/pgtable.c \ + datastruct/ptr_array.c \ + datastruct/strided_alloc.c \ + datastruct/string_buffer.c \ + datastruct/string_set.c \ + debug/assert.c \ + debug/debug.c \ + debug/log.c \ + debug/memtrack.c \ + memory/memory_type.c \ + memory/memtype_cache.c \ + memory/numa.c \ + memory/rcache.c \ + profile/profile.c \ + stats/stats.c \ + sys/event_set.c \ + sys/init.c \ + sys/math.c \ + sys/module.c \ + sys/string.c \ + sys/sys.c \ + sys/iovec.c \ + sys/sock.c \ + sys/stubs.c \ + time/time.c \ + time/timer_wheel.c \ + time/timerq.c \ + type/class.c \ + type/status.c \ + type/init_once.c + +if HAVE_STATS +libucs_la_SOURCES += \ + stats/client_server.c \ + stats/serialization.c \ + stats/libstats.c + +bin_PROGRAMS += ucs_stats_parser +ucs_stats_parser_CPPFLAGS = $(BASE_CPPFLAGS) +ucs_stats_parser_LDADD = libucs.la +ucs_stats_parser_SOURCES = stats/stats_parser.c +endif + +all-local: $(objdir)/$(modulesubdir) + +$(objdir)/$(modulesubdir): $(lib_LTLIBRARIES) + $(AM_V_at)$(LN_RS) -fn $(localmoduledir) $(objdir)/$(modulesubdir) + +#TODO stats/stats_dump.c +#TODO stats/stats_reader.c diff --git a/src/ucs/Makefile.in b/src/ucs/Makefile.in new file mode 100644 index 0000000..e16c763 --- /dev/null +++ b/src/ucs/Makefile.in @@ -0,0 +1,1963 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +bin_PROGRAMS = $(am__EXEEXT_1) +@HAVE_STATS_TRUE@am__append_1 = \ +@HAVE_STATS_TRUE@ stats/client_server.c \ +@HAVE_STATS_TRUE@ stats/serialization.c \ +@HAVE_STATS_TRUE@ stats/libstats.c + +@HAVE_STATS_TRUE@am__append_2 = ucs_stats_parser +subdir = src/ucs +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(nobase_dist_libucs_la_HEADERS) \ + $(noinst_HEADERS) $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@HAVE_STATS_TRUE@am__EXEEXT_1 = ucs_stats_parser$(EXEEXT) +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" \ + "$(DESTDIR)$(libucs_ladir)" +PROGRAMS = $(bin_PROGRAMS) +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +LTLIBRARIES = $(lib_LTLIBRARIES) +am__DEPENDENCIES_1 = +libucs_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + $(top_builddir)/src/ucm/libucm.la +am__libucs_la_SOURCES_DIST = algorithm/crc.c algorithm/qsort_r.c \ + arch/aarch64/cpu.c arch/aarch64/global_opts.c \ + arch/ppc64/timebase.c arch/ppc64/global_opts.c \ + arch/x86_64/cpu.c arch/x86_64/global_opts.c arch/cpu.c \ + async/async.c async/signal.c async/pipe.c async/thread.c \ + config/global_opts.c config/ucm_opts.c config/parser.c \ + datastruct/arbiter.c datastruct/callbackq.c \ + datastruct/frag_list.c datastruct/mpmc.c datastruct/mpool.c \ + datastruct/pgtable.c datastruct/ptr_array.c \ + datastruct/strided_alloc.c datastruct/string_buffer.c \ + datastruct/string_set.c debug/assert.c debug/debug.c \ + debug/log.c debug/memtrack.c memory/memory_type.c \ + memory/memtype_cache.c memory/numa.c memory/rcache.c \ + profile/profile.c stats/stats.c sys/event_set.c sys/init.c \ + sys/math.c sys/module.c sys/string.c sys/sys.c sys/iovec.c \ + sys/sock.c sys/stubs.c time/time.c time/timer_wheel.c \ + time/timerq.c type/class.c type/status.c type/init_once.c \ + stats/client_server.c stats/serialization.c stats/libstats.c +am__dirstamp = $(am__leading_dot)dirstamp +@HAVE_STATS_TRUE@am__objects_1 = stats/libucs_la-client_server.lo \ +@HAVE_STATS_TRUE@ stats/libucs_la-serialization.lo \ +@HAVE_STATS_TRUE@ stats/libucs_la-libstats.lo +am_libucs_la_OBJECTS = algorithm/libucs_la-crc.lo \ + algorithm/libucs_la-qsort_r.lo arch/aarch64/libucs_la-cpu.lo \ + arch/aarch64/libucs_la-global_opts.lo \ + arch/ppc64/libucs_la-timebase.lo \ + arch/ppc64/libucs_la-global_opts.lo \ + arch/x86_64/libucs_la-cpu.lo \ + arch/x86_64/libucs_la-global_opts.lo arch/libucs_la-cpu.lo \ + async/libucs_la-async.lo async/libucs_la-signal.lo \ + async/libucs_la-pipe.lo async/libucs_la-thread.lo \ + config/libucs_la-global_opts.lo config/libucs_la-ucm_opts.lo \ + config/libucs_la-parser.lo datastruct/libucs_la-arbiter.lo \ + datastruct/libucs_la-callbackq.lo \ + datastruct/libucs_la-frag_list.lo datastruct/libucs_la-mpmc.lo \ + datastruct/libucs_la-mpool.lo datastruct/libucs_la-pgtable.lo \ + datastruct/libucs_la-ptr_array.lo \ + datastruct/libucs_la-strided_alloc.lo \ + datastruct/libucs_la-string_buffer.lo \ + datastruct/libucs_la-string_set.lo debug/libucs_la-assert.lo \ + debug/libucs_la-debug.lo debug/libucs_la-log.lo \ + debug/libucs_la-memtrack.lo memory/libucs_la-memory_type.lo \ + memory/libucs_la-memtype_cache.lo memory/libucs_la-numa.lo \ + memory/libucs_la-rcache.lo profile/libucs_la-profile.lo \ + stats/libucs_la-stats.lo sys/libucs_la-event_set.lo \ + sys/libucs_la-init.lo sys/libucs_la-math.lo \ + sys/libucs_la-module.lo sys/libucs_la-string.lo \ + sys/libucs_la-sys.lo sys/libucs_la-iovec.lo \ + sys/libucs_la-sock.lo sys/libucs_la-stubs.lo \ + time/libucs_la-time.lo time/libucs_la-timer_wheel.lo \ + time/libucs_la-timerq.lo type/libucs_la-class.lo \ + type/libucs_la-status.lo type/libucs_la-init_once.lo \ + $(am__objects_1) +libucs_la_OBJECTS = $(am_libucs_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libucs_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libucs_la_CFLAGS) \ + $(CFLAGS) $(libucs_la_LDFLAGS) $(LDFLAGS) -o $@ +am__ucs_stats_parser_SOURCES_DIST = stats/stats_parser.c +@HAVE_STATS_TRUE@am_ucs_stats_parser_OBJECTS = stats/ucs_stats_parser-stats_parser.$(OBJEXT) +ucs_stats_parser_OBJECTS = $(am_ucs_stats_parser_OBJECTS) +@HAVE_STATS_TRUE@ucs_stats_parser_DEPENDENCIES = libucs.la +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = algorithm/$(DEPDIR)/libucs_la-crc.Plo \ + algorithm/$(DEPDIR)/libucs_la-qsort_r.Plo \ + arch/$(DEPDIR)/libucs_la-cpu.Plo \ + arch/aarch64/$(DEPDIR)/libucs_la-cpu.Plo \ + arch/aarch64/$(DEPDIR)/libucs_la-global_opts.Plo \ + arch/ppc64/$(DEPDIR)/libucs_la-global_opts.Plo \ + arch/ppc64/$(DEPDIR)/libucs_la-timebase.Plo \ + arch/x86_64/$(DEPDIR)/libucs_la-cpu.Plo \ + arch/x86_64/$(DEPDIR)/libucs_la-global_opts.Plo \ + async/$(DEPDIR)/libucs_la-async.Plo \ + async/$(DEPDIR)/libucs_la-pipe.Plo \ + async/$(DEPDIR)/libucs_la-signal.Plo \ + async/$(DEPDIR)/libucs_la-thread.Plo \ + config/$(DEPDIR)/libucs_la-global_opts.Plo \ + config/$(DEPDIR)/libucs_la-parser.Plo \ + config/$(DEPDIR)/libucs_la-ucm_opts.Plo \ + datastruct/$(DEPDIR)/libucs_la-arbiter.Plo \ + datastruct/$(DEPDIR)/libucs_la-callbackq.Plo \ + datastruct/$(DEPDIR)/libucs_la-frag_list.Plo \ + datastruct/$(DEPDIR)/libucs_la-mpmc.Plo \ + datastruct/$(DEPDIR)/libucs_la-mpool.Plo \ + datastruct/$(DEPDIR)/libucs_la-pgtable.Plo \ + datastruct/$(DEPDIR)/libucs_la-ptr_array.Plo \ + datastruct/$(DEPDIR)/libucs_la-strided_alloc.Plo \ + datastruct/$(DEPDIR)/libucs_la-string_buffer.Plo \ + datastruct/$(DEPDIR)/libucs_la-string_set.Plo \ + debug/$(DEPDIR)/libucs_la-assert.Plo \ + debug/$(DEPDIR)/libucs_la-debug.Plo \ + debug/$(DEPDIR)/libucs_la-log.Plo \ + debug/$(DEPDIR)/libucs_la-memtrack.Plo \ + memory/$(DEPDIR)/libucs_la-memory_type.Plo \ + memory/$(DEPDIR)/libucs_la-memtype_cache.Plo \ + memory/$(DEPDIR)/libucs_la-numa.Plo \ + memory/$(DEPDIR)/libucs_la-rcache.Plo \ + profile/$(DEPDIR)/libucs_la-profile.Plo \ + stats/$(DEPDIR)/libucs_la-client_server.Plo \ + stats/$(DEPDIR)/libucs_la-libstats.Plo \ + stats/$(DEPDIR)/libucs_la-serialization.Plo \ + stats/$(DEPDIR)/libucs_la-stats.Plo \ + stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Po \ + sys/$(DEPDIR)/libucs_la-event_set.Plo \ + sys/$(DEPDIR)/libucs_la-init.Plo \ + sys/$(DEPDIR)/libucs_la-iovec.Plo \ + sys/$(DEPDIR)/libucs_la-math.Plo \ + sys/$(DEPDIR)/libucs_la-module.Plo \ + sys/$(DEPDIR)/libucs_la-sock.Plo \ + sys/$(DEPDIR)/libucs_la-string.Plo \ + sys/$(DEPDIR)/libucs_la-stubs.Plo \ + sys/$(DEPDIR)/libucs_la-sys.Plo \ + time/$(DEPDIR)/libucs_la-time.Plo \ + time/$(DEPDIR)/libucs_la-timer_wheel.Plo \ + time/$(DEPDIR)/libucs_la-timerq.Plo \ + type/$(DEPDIR)/libucs_la-class.Plo \ + type/$(DEPDIR)/libucs_la-init_once.Plo \ + type/$(DEPDIR)/libucs_la-status.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libucs_la_SOURCES) $(ucs_stats_parser_SOURCES) +DIST_SOURCES = $(am__libucs_la_SOURCES_DIST) \ + $(am__ucs_stats_parser_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(nobase_dist_libucs_la_HEADERS) $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +AUTOMAKE_OPTIONS = nostdinc # avoid collision with built-in debug.h +lib_LTLIBRARIES = libucs.la +libucs_la_CPPFLAGS = $(BASE_CPPFLAGS) -DUCX_MODULE_DIR=\"$(moduledir)\" +libucs_la_CFLAGS = $(BASE_CFLAGS) +libucs_la_LDFLAGS = -ldl $(NUMA_LIBS) -version-info $(SOVERSION) +libucs_ladir = $(includedir)/ucs +libucs_la_LIBADD = $(LIBM) $(top_builddir)/src/ucm/libucm.la +nobase_dist_libucs_la_HEADERS = \ + algorithm/crc.h \ + algorithm/qsort_r.h \ + async/async_fwd.h \ + config/global_opts.h \ + config/parser.h \ + config/types.h \ + datastruct/callbackq.h \ + datastruct/khash.h \ + datastruct/linear_func.h \ + datastruct/list_types.h \ + datastruct/list.h \ + datastruct/mpool.h \ + datastruct/pgtable.h \ + datastruct/queue_types.h \ + datastruct/strided_alloc.h \ + datastruct/string_buffer.h \ + datastruct/string_set.h \ + memory/rcache.h \ + memory/memory_type.h \ + memory/memtype_cache.h \ + profile/profile_defs.h \ + profile/profile_off.h \ + profile/profile_on.h \ + stats/stats_fwd.h \ + stats/libstats.h \ + sys/event_set.h \ + sys/compiler_def.h\ + sys/math.h \ + sys/preprocessor.h \ + sys/string.h \ + sys/sock.h \ + sys/stubs.h \ + time/time_def.h \ + type/class.h \ + type/init_once.h \ + type/spinlock.h \ + type/status.h \ + type/thread_mode.h \ + type/cpu_set.h \ + arch/x86_64/global_opts.h \ + arch/aarch64/global_opts.h \ + arch/ppc64/global_opts.h \ + arch/global_opts.h + +noinst_HEADERS = \ + arch/aarch64/bitops.h \ + arch/aarch64/cpu.h \ + arch/generic/atomic.h \ + arch/generic/cpu.h \ + arch/ppc64/bitops.h \ + arch/ppc64/cpu.h \ + arch/x86_64/atomic.h \ + arch/x86_64/bitops.h \ + arch/x86_64/cpu.h \ + arch/atomic.h \ + arch/bitops.h \ + arch/cpu.h \ + datastruct/arbiter.h \ + datastruct/frag_list.h \ + datastruct/mpmc.h \ + datastruct/mpool.inl \ + datastruct/ptr_array.h \ + datastruct/queue.h \ + datastruct/sglib.h \ + datastruct/sglib_wrapper.h \ + debug/assert.h \ + debug/debug.h \ + debug/log.h \ + debug/memtrack.h \ + memory/numa.h \ + memory/rcache_int.h \ + profile/profile.h \ + stats/stats.h \ + sys/checker.h \ + sys/compiler.h \ + sys/module.h \ + sys/sys.h \ + sys/iovec.h \ + time/time.h \ + time/timerq.h \ + time/timer_wheel.h \ + async/async.h \ + async/pipe.h \ + async/signal.h \ + async/thread.h \ + async/async_int.h + +libucs_la_SOURCES = algorithm/crc.c algorithm/qsort_r.c \ + arch/aarch64/cpu.c arch/aarch64/global_opts.c \ + arch/ppc64/timebase.c arch/ppc64/global_opts.c \ + arch/x86_64/cpu.c arch/x86_64/global_opts.c arch/cpu.c \ + async/async.c async/signal.c async/pipe.c async/thread.c \ + config/global_opts.c config/ucm_opts.c config/parser.c \ + datastruct/arbiter.c datastruct/callbackq.c \ + datastruct/frag_list.c datastruct/mpmc.c datastruct/mpool.c \ + datastruct/pgtable.c datastruct/ptr_array.c \ + datastruct/strided_alloc.c datastruct/string_buffer.c \ + datastruct/string_set.c debug/assert.c debug/debug.c \ + debug/log.c debug/memtrack.c memory/memory_type.c \ + memory/memtype_cache.c memory/numa.c memory/rcache.c \ + profile/profile.c stats/stats.c sys/event_set.c sys/init.c \ + sys/math.c sys/module.c sys/string.c sys/sys.c sys/iovec.c \ + sys/sock.c sys/stubs.c time/time.c time/timer_wheel.c \ + time/timerq.c type/class.c type/status.c type/init_once.c \ + $(am__append_1) +@HAVE_STATS_TRUE@ucs_stats_parser_CPPFLAGS = $(BASE_CPPFLAGS) +@HAVE_STATS_TRUE@ucs_stats_parser_LDADD = libucs.la +@HAVE_STATS_TRUE@ucs_stats_parser_SOURCES = stats/stats_parser.c +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/ucs/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/ucs/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +algorithm/$(am__dirstamp): + @$(MKDIR_P) algorithm + @: > algorithm/$(am__dirstamp) +algorithm/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) algorithm/$(DEPDIR) + @: > algorithm/$(DEPDIR)/$(am__dirstamp) +algorithm/libucs_la-crc.lo: algorithm/$(am__dirstamp) \ + algorithm/$(DEPDIR)/$(am__dirstamp) +algorithm/libucs_la-qsort_r.lo: algorithm/$(am__dirstamp) \ + algorithm/$(DEPDIR)/$(am__dirstamp) +arch/aarch64/$(am__dirstamp): + @$(MKDIR_P) arch/aarch64 + @: > arch/aarch64/$(am__dirstamp) +arch/aarch64/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) arch/aarch64/$(DEPDIR) + @: > arch/aarch64/$(DEPDIR)/$(am__dirstamp) +arch/aarch64/libucs_la-cpu.lo: arch/aarch64/$(am__dirstamp) \ + arch/aarch64/$(DEPDIR)/$(am__dirstamp) +arch/aarch64/libucs_la-global_opts.lo: arch/aarch64/$(am__dirstamp) \ + arch/aarch64/$(DEPDIR)/$(am__dirstamp) +arch/ppc64/$(am__dirstamp): + @$(MKDIR_P) arch/ppc64 + @: > arch/ppc64/$(am__dirstamp) +arch/ppc64/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) arch/ppc64/$(DEPDIR) + @: > arch/ppc64/$(DEPDIR)/$(am__dirstamp) +arch/ppc64/libucs_la-timebase.lo: arch/ppc64/$(am__dirstamp) \ + arch/ppc64/$(DEPDIR)/$(am__dirstamp) +arch/ppc64/libucs_la-global_opts.lo: arch/ppc64/$(am__dirstamp) \ + arch/ppc64/$(DEPDIR)/$(am__dirstamp) +arch/x86_64/$(am__dirstamp): + @$(MKDIR_P) arch/x86_64 + @: > arch/x86_64/$(am__dirstamp) +arch/x86_64/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) arch/x86_64/$(DEPDIR) + @: > arch/x86_64/$(DEPDIR)/$(am__dirstamp) +arch/x86_64/libucs_la-cpu.lo: arch/x86_64/$(am__dirstamp) \ + arch/x86_64/$(DEPDIR)/$(am__dirstamp) +arch/x86_64/libucs_la-global_opts.lo: arch/x86_64/$(am__dirstamp) \ + arch/x86_64/$(DEPDIR)/$(am__dirstamp) +arch/$(am__dirstamp): + @$(MKDIR_P) arch + @: > arch/$(am__dirstamp) +arch/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) arch/$(DEPDIR) + @: > arch/$(DEPDIR)/$(am__dirstamp) +arch/libucs_la-cpu.lo: arch/$(am__dirstamp) \ + arch/$(DEPDIR)/$(am__dirstamp) +async/$(am__dirstamp): + @$(MKDIR_P) async + @: > async/$(am__dirstamp) +async/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) async/$(DEPDIR) + @: > async/$(DEPDIR)/$(am__dirstamp) +async/libucs_la-async.lo: async/$(am__dirstamp) \ + async/$(DEPDIR)/$(am__dirstamp) +async/libucs_la-signal.lo: async/$(am__dirstamp) \ + async/$(DEPDIR)/$(am__dirstamp) +async/libucs_la-pipe.lo: async/$(am__dirstamp) \ + async/$(DEPDIR)/$(am__dirstamp) +async/libucs_la-thread.lo: async/$(am__dirstamp) \ + async/$(DEPDIR)/$(am__dirstamp) +config/$(am__dirstamp): + @$(MKDIR_P) config + @: > config/$(am__dirstamp) +config/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) config/$(DEPDIR) + @: > config/$(DEPDIR)/$(am__dirstamp) +config/libucs_la-global_opts.lo: config/$(am__dirstamp) \ + config/$(DEPDIR)/$(am__dirstamp) +config/libucs_la-ucm_opts.lo: config/$(am__dirstamp) \ + config/$(DEPDIR)/$(am__dirstamp) +config/libucs_la-parser.lo: config/$(am__dirstamp) \ + config/$(DEPDIR)/$(am__dirstamp) +datastruct/$(am__dirstamp): + @$(MKDIR_P) datastruct + @: > datastruct/$(am__dirstamp) +datastruct/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) datastruct/$(DEPDIR) + @: > datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-arbiter.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-callbackq.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-frag_list.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-mpmc.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-mpool.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-pgtable.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-ptr_array.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-strided_alloc.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-string_buffer.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +datastruct/libucs_la-string_set.lo: datastruct/$(am__dirstamp) \ + datastruct/$(DEPDIR)/$(am__dirstamp) +debug/$(am__dirstamp): + @$(MKDIR_P) debug + @: > debug/$(am__dirstamp) +debug/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) debug/$(DEPDIR) + @: > debug/$(DEPDIR)/$(am__dirstamp) +debug/libucs_la-assert.lo: debug/$(am__dirstamp) \ + debug/$(DEPDIR)/$(am__dirstamp) +debug/libucs_la-debug.lo: debug/$(am__dirstamp) \ + debug/$(DEPDIR)/$(am__dirstamp) +debug/libucs_la-log.lo: debug/$(am__dirstamp) \ + debug/$(DEPDIR)/$(am__dirstamp) +debug/libucs_la-memtrack.lo: debug/$(am__dirstamp) \ + debug/$(DEPDIR)/$(am__dirstamp) +memory/$(am__dirstamp): + @$(MKDIR_P) memory + @: > memory/$(am__dirstamp) +memory/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) memory/$(DEPDIR) + @: > memory/$(DEPDIR)/$(am__dirstamp) +memory/libucs_la-memory_type.lo: memory/$(am__dirstamp) \ + memory/$(DEPDIR)/$(am__dirstamp) +memory/libucs_la-memtype_cache.lo: memory/$(am__dirstamp) \ + memory/$(DEPDIR)/$(am__dirstamp) +memory/libucs_la-numa.lo: memory/$(am__dirstamp) \ + memory/$(DEPDIR)/$(am__dirstamp) +memory/libucs_la-rcache.lo: memory/$(am__dirstamp) \ + memory/$(DEPDIR)/$(am__dirstamp) +profile/$(am__dirstamp): + @$(MKDIR_P) profile + @: > profile/$(am__dirstamp) +profile/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) profile/$(DEPDIR) + @: > profile/$(DEPDIR)/$(am__dirstamp) +profile/libucs_la-profile.lo: profile/$(am__dirstamp) \ + profile/$(DEPDIR)/$(am__dirstamp) +stats/$(am__dirstamp): + @$(MKDIR_P) stats + @: > stats/$(am__dirstamp) +stats/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) stats/$(DEPDIR) + @: > stats/$(DEPDIR)/$(am__dirstamp) +stats/libucs_la-stats.lo: stats/$(am__dirstamp) \ + stats/$(DEPDIR)/$(am__dirstamp) +sys/$(am__dirstamp): + @$(MKDIR_P) sys + @: > sys/$(am__dirstamp) +sys/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sys/$(DEPDIR) + @: > sys/$(DEPDIR)/$(am__dirstamp) +sys/libucs_la-event_set.lo: sys/$(am__dirstamp) \ + sys/$(DEPDIR)/$(am__dirstamp) +sys/libucs_la-init.lo: sys/$(am__dirstamp) \ + sys/$(DEPDIR)/$(am__dirstamp) +sys/libucs_la-math.lo: sys/$(am__dirstamp) \ + sys/$(DEPDIR)/$(am__dirstamp) +sys/libucs_la-module.lo: sys/$(am__dirstamp) \ + sys/$(DEPDIR)/$(am__dirstamp) +sys/libucs_la-string.lo: sys/$(am__dirstamp) \ + sys/$(DEPDIR)/$(am__dirstamp) +sys/libucs_la-sys.lo: sys/$(am__dirstamp) \ + sys/$(DEPDIR)/$(am__dirstamp) +sys/libucs_la-iovec.lo: sys/$(am__dirstamp) \ + sys/$(DEPDIR)/$(am__dirstamp) +sys/libucs_la-sock.lo: sys/$(am__dirstamp) \ + sys/$(DEPDIR)/$(am__dirstamp) +sys/libucs_la-stubs.lo: sys/$(am__dirstamp) \ + sys/$(DEPDIR)/$(am__dirstamp) +time/$(am__dirstamp): + @$(MKDIR_P) time + @: > time/$(am__dirstamp) +time/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) time/$(DEPDIR) + @: > time/$(DEPDIR)/$(am__dirstamp) +time/libucs_la-time.lo: time/$(am__dirstamp) \ + time/$(DEPDIR)/$(am__dirstamp) +time/libucs_la-timer_wheel.lo: time/$(am__dirstamp) \ + time/$(DEPDIR)/$(am__dirstamp) +time/libucs_la-timerq.lo: time/$(am__dirstamp) \ + time/$(DEPDIR)/$(am__dirstamp) +type/$(am__dirstamp): + @$(MKDIR_P) type + @: > type/$(am__dirstamp) +type/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) type/$(DEPDIR) + @: > type/$(DEPDIR)/$(am__dirstamp) +type/libucs_la-class.lo: type/$(am__dirstamp) \ + type/$(DEPDIR)/$(am__dirstamp) +type/libucs_la-status.lo: type/$(am__dirstamp) \ + type/$(DEPDIR)/$(am__dirstamp) +type/libucs_la-init_once.lo: type/$(am__dirstamp) \ + type/$(DEPDIR)/$(am__dirstamp) +stats/libucs_la-client_server.lo: stats/$(am__dirstamp) \ + stats/$(DEPDIR)/$(am__dirstamp) +stats/libucs_la-serialization.lo: stats/$(am__dirstamp) \ + stats/$(DEPDIR)/$(am__dirstamp) +stats/libucs_la-libstats.lo: stats/$(am__dirstamp) \ + stats/$(DEPDIR)/$(am__dirstamp) + +libucs.la: $(libucs_la_OBJECTS) $(libucs_la_DEPENDENCIES) $(EXTRA_libucs_la_DEPENDENCIES) + $(AM_V_CCLD)$(libucs_la_LINK) -rpath $(libdir) $(libucs_la_OBJECTS) $(libucs_la_LIBADD) $(LIBS) +stats/ucs_stats_parser-stats_parser.$(OBJEXT): stats/$(am__dirstamp) \ + stats/$(DEPDIR)/$(am__dirstamp) + +ucs_stats_parser$(EXEEXT): $(ucs_stats_parser_OBJECTS) $(ucs_stats_parser_DEPENDENCIES) $(EXTRA_ucs_stats_parser_DEPENDENCIES) + @rm -f ucs_stats_parser$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(ucs_stats_parser_OBJECTS) $(ucs_stats_parser_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f algorithm/*.$(OBJEXT) + -rm -f algorithm/*.lo + -rm -f arch/*.$(OBJEXT) + -rm -f arch/*.lo + -rm -f arch/aarch64/*.$(OBJEXT) + -rm -f arch/aarch64/*.lo + -rm -f arch/ppc64/*.$(OBJEXT) + -rm -f arch/ppc64/*.lo + -rm -f arch/x86_64/*.$(OBJEXT) + -rm -f arch/x86_64/*.lo + -rm -f async/*.$(OBJEXT) + -rm -f async/*.lo + -rm -f config/*.$(OBJEXT) + -rm -f config/*.lo + -rm -f datastruct/*.$(OBJEXT) + -rm -f datastruct/*.lo + -rm -f debug/*.$(OBJEXT) + -rm -f debug/*.lo + -rm -f memory/*.$(OBJEXT) + -rm -f memory/*.lo + -rm -f profile/*.$(OBJEXT) + -rm -f profile/*.lo + -rm -f stats/*.$(OBJEXT) + -rm -f stats/*.lo + -rm -f sys/*.$(OBJEXT) + -rm -f sys/*.lo + -rm -f time/*.$(OBJEXT) + -rm -f time/*.lo + -rm -f type/*.$(OBJEXT) + -rm -f type/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@algorithm/$(DEPDIR)/libucs_la-crc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@algorithm/$(DEPDIR)/libucs_la-qsort_r.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@arch/$(DEPDIR)/libucs_la-cpu.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@arch/aarch64/$(DEPDIR)/libucs_la-cpu.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@arch/aarch64/$(DEPDIR)/libucs_la-global_opts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@arch/ppc64/$(DEPDIR)/libucs_la-global_opts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@arch/ppc64/$(DEPDIR)/libucs_la-timebase.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@arch/x86_64/$(DEPDIR)/libucs_la-cpu.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@arch/x86_64/$(DEPDIR)/libucs_la-global_opts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@async/$(DEPDIR)/libucs_la-async.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@async/$(DEPDIR)/libucs_la-pipe.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@async/$(DEPDIR)/libucs_la-signal.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@async/$(DEPDIR)/libucs_la-thread.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@config/$(DEPDIR)/libucs_la-global_opts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@config/$(DEPDIR)/libucs_la-parser.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@config/$(DEPDIR)/libucs_la-ucm_opts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-arbiter.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-callbackq.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-frag_list.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-mpmc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-mpool.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-pgtable.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-ptr_array.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-strided_alloc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-string_buffer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@datastruct/$(DEPDIR)/libucs_la-string_set.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/$(DEPDIR)/libucs_la-assert.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/$(DEPDIR)/libucs_la-debug.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/$(DEPDIR)/libucs_la-log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@debug/$(DEPDIR)/libucs_la-memtrack.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@memory/$(DEPDIR)/libucs_la-memory_type.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@memory/$(DEPDIR)/libucs_la-memtype_cache.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@memory/$(DEPDIR)/libucs_la-numa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@memory/$(DEPDIR)/libucs_la-rcache.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@profile/$(DEPDIR)/libucs_la-profile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@stats/$(DEPDIR)/libucs_la-client_server.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@stats/$(DEPDIR)/libucs_la-libstats.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@stats/$(DEPDIR)/libucs_la-serialization.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@stats/$(DEPDIR)/libucs_la-stats.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sys/$(DEPDIR)/libucs_la-event_set.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sys/$(DEPDIR)/libucs_la-init.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sys/$(DEPDIR)/libucs_la-iovec.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sys/$(DEPDIR)/libucs_la-math.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sys/$(DEPDIR)/libucs_la-module.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sys/$(DEPDIR)/libucs_la-sock.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sys/$(DEPDIR)/libucs_la-string.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sys/$(DEPDIR)/libucs_la-stubs.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sys/$(DEPDIR)/libucs_la-sys.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@time/$(DEPDIR)/libucs_la-time.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@time/$(DEPDIR)/libucs_la-timer_wheel.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@time/$(DEPDIR)/libucs_la-timerq.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@type/$(DEPDIR)/libucs_la-class.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@type/$(DEPDIR)/libucs_la-init_once.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@type/$(DEPDIR)/libucs_la-status.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +algorithm/libucs_la-crc.lo: algorithm/crc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT algorithm/libucs_la-crc.lo -MD -MP -MF algorithm/$(DEPDIR)/libucs_la-crc.Tpo -c -o algorithm/libucs_la-crc.lo `test -f 'algorithm/crc.c' || echo '$(srcdir)/'`algorithm/crc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) algorithm/$(DEPDIR)/libucs_la-crc.Tpo algorithm/$(DEPDIR)/libucs_la-crc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='algorithm/crc.c' object='algorithm/libucs_la-crc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o algorithm/libucs_la-crc.lo `test -f 'algorithm/crc.c' || echo '$(srcdir)/'`algorithm/crc.c + +algorithm/libucs_la-qsort_r.lo: algorithm/qsort_r.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT algorithm/libucs_la-qsort_r.lo -MD -MP -MF algorithm/$(DEPDIR)/libucs_la-qsort_r.Tpo -c -o algorithm/libucs_la-qsort_r.lo `test -f 'algorithm/qsort_r.c' || echo '$(srcdir)/'`algorithm/qsort_r.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) algorithm/$(DEPDIR)/libucs_la-qsort_r.Tpo algorithm/$(DEPDIR)/libucs_la-qsort_r.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='algorithm/qsort_r.c' object='algorithm/libucs_la-qsort_r.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o algorithm/libucs_la-qsort_r.lo `test -f 'algorithm/qsort_r.c' || echo '$(srcdir)/'`algorithm/qsort_r.c + +arch/aarch64/libucs_la-cpu.lo: arch/aarch64/cpu.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT arch/aarch64/libucs_la-cpu.lo -MD -MP -MF arch/aarch64/$(DEPDIR)/libucs_la-cpu.Tpo -c -o arch/aarch64/libucs_la-cpu.lo `test -f 'arch/aarch64/cpu.c' || echo '$(srcdir)/'`arch/aarch64/cpu.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) arch/aarch64/$(DEPDIR)/libucs_la-cpu.Tpo arch/aarch64/$(DEPDIR)/libucs_la-cpu.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='arch/aarch64/cpu.c' object='arch/aarch64/libucs_la-cpu.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o arch/aarch64/libucs_la-cpu.lo `test -f 'arch/aarch64/cpu.c' || echo '$(srcdir)/'`arch/aarch64/cpu.c + +arch/aarch64/libucs_la-global_opts.lo: arch/aarch64/global_opts.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT arch/aarch64/libucs_la-global_opts.lo -MD -MP -MF arch/aarch64/$(DEPDIR)/libucs_la-global_opts.Tpo -c -o arch/aarch64/libucs_la-global_opts.lo `test -f 'arch/aarch64/global_opts.c' || echo '$(srcdir)/'`arch/aarch64/global_opts.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) arch/aarch64/$(DEPDIR)/libucs_la-global_opts.Tpo arch/aarch64/$(DEPDIR)/libucs_la-global_opts.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='arch/aarch64/global_opts.c' object='arch/aarch64/libucs_la-global_opts.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o arch/aarch64/libucs_la-global_opts.lo `test -f 'arch/aarch64/global_opts.c' || echo '$(srcdir)/'`arch/aarch64/global_opts.c + +arch/ppc64/libucs_la-timebase.lo: arch/ppc64/timebase.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT arch/ppc64/libucs_la-timebase.lo -MD -MP -MF arch/ppc64/$(DEPDIR)/libucs_la-timebase.Tpo -c -o arch/ppc64/libucs_la-timebase.lo `test -f 'arch/ppc64/timebase.c' || echo '$(srcdir)/'`arch/ppc64/timebase.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) arch/ppc64/$(DEPDIR)/libucs_la-timebase.Tpo arch/ppc64/$(DEPDIR)/libucs_la-timebase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='arch/ppc64/timebase.c' object='arch/ppc64/libucs_la-timebase.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o arch/ppc64/libucs_la-timebase.lo `test -f 'arch/ppc64/timebase.c' || echo '$(srcdir)/'`arch/ppc64/timebase.c + +arch/ppc64/libucs_la-global_opts.lo: arch/ppc64/global_opts.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT arch/ppc64/libucs_la-global_opts.lo -MD -MP -MF arch/ppc64/$(DEPDIR)/libucs_la-global_opts.Tpo -c -o arch/ppc64/libucs_la-global_opts.lo `test -f 'arch/ppc64/global_opts.c' || echo '$(srcdir)/'`arch/ppc64/global_opts.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) arch/ppc64/$(DEPDIR)/libucs_la-global_opts.Tpo arch/ppc64/$(DEPDIR)/libucs_la-global_opts.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='arch/ppc64/global_opts.c' object='arch/ppc64/libucs_la-global_opts.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o arch/ppc64/libucs_la-global_opts.lo `test -f 'arch/ppc64/global_opts.c' || echo '$(srcdir)/'`arch/ppc64/global_opts.c + +arch/x86_64/libucs_la-cpu.lo: arch/x86_64/cpu.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT arch/x86_64/libucs_la-cpu.lo -MD -MP -MF arch/x86_64/$(DEPDIR)/libucs_la-cpu.Tpo -c -o arch/x86_64/libucs_la-cpu.lo `test -f 'arch/x86_64/cpu.c' || echo '$(srcdir)/'`arch/x86_64/cpu.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) arch/x86_64/$(DEPDIR)/libucs_la-cpu.Tpo arch/x86_64/$(DEPDIR)/libucs_la-cpu.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='arch/x86_64/cpu.c' object='arch/x86_64/libucs_la-cpu.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o arch/x86_64/libucs_la-cpu.lo `test -f 'arch/x86_64/cpu.c' || echo '$(srcdir)/'`arch/x86_64/cpu.c + +arch/x86_64/libucs_la-global_opts.lo: arch/x86_64/global_opts.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT arch/x86_64/libucs_la-global_opts.lo -MD -MP -MF arch/x86_64/$(DEPDIR)/libucs_la-global_opts.Tpo -c -o arch/x86_64/libucs_la-global_opts.lo `test -f 'arch/x86_64/global_opts.c' || echo '$(srcdir)/'`arch/x86_64/global_opts.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) arch/x86_64/$(DEPDIR)/libucs_la-global_opts.Tpo arch/x86_64/$(DEPDIR)/libucs_la-global_opts.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='arch/x86_64/global_opts.c' object='arch/x86_64/libucs_la-global_opts.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o arch/x86_64/libucs_la-global_opts.lo `test -f 'arch/x86_64/global_opts.c' || echo '$(srcdir)/'`arch/x86_64/global_opts.c + +arch/libucs_la-cpu.lo: arch/cpu.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT arch/libucs_la-cpu.lo -MD -MP -MF arch/$(DEPDIR)/libucs_la-cpu.Tpo -c -o arch/libucs_la-cpu.lo `test -f 'arch/cpu.c' || echo '$(srcdir)/'`arch/cpu.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) arch/$(DEPDIR)/libucs_la-cpu.Tpo arch/$(DEPDIR)/libucs_la-cpu.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='arch/cpu.c' object='arch/libucs_la-cpu.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o arch/libucs_la-cpu.lo `test -f 'arch/cpu.c' || echo '$(srcdir)/'`arch/cpu.c + +async/libucs_la-async.lo: async/async.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT async/libucs_la-async.lo -MD -MP -MF async/$(DEPDIR)/libucs_la-async.Tpo -c -o async/libucs_la-async.lo `test -f 'async/async.c' || echo '$(srcdir)/'`async/async.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) async/$(DEPDIR)/libucs_la-async.Tpo async/$(DEPDIR)/libucs_la-async.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='async/async.c' object='async/libucs_la-async.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o async/libucs_la-async.lo `test -f 'async/async.c' || echo '$(srcdir)/'`async/async.c + +async/libucs_la-signal.lo: async/signal.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT async/libucs_la-signal.lo -MD -MP -MF async/$(DEPDIR)/libucs_la-signal.Tpo -c -o async/libucs_la-signal.lo `test -f 'async/signal.c' || echo '$(srcdir)/'`async/signal.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) async/$(DEPDIR)/libucs_la-signal.Tpo async/$(DEPDIR)/libucs_la-signal.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='async/signal.c' object='async/libucs_la-signal.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o async/libucs_la-signal.lo `test -f 'async/signal.c' || echo '$(srcdir)/'`async/signal.c + +async/libucs_la-pipe.lo: async/pipe.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT async/libucs_la-pipe.lo -MD -MP -MF async/$(DEPDIR)/libucs_la-pipe.Tpo -c -o async/libucs_la-pipe.lo `test -f 'async/pipe.c' || echo '$(srcdir)/'`async/pipe.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) async/$(DEPDIR)/libucs_la-pipe.Tpo async/$(DEPDIR)/libucs_la-pipe.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='async/pipe.c' object='async/libucs_la-pipe.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o async/libucs_la-pipe.lo `test -f 'async/pipe.c' || echo '$(srcdir)/'`async/pipe.c + +async/libucs_la-thread.lo: async/thread.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT async/libucs_la-thread.lo -MD -MP -MF async/$(DEPDIR)/libucs_la-thread.Tpo -c -o async/libucs_la-thread.lo `test -f 'async/thread.c' || echo '$(srcdir)/'`async/thread.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) async/$(DEPDIR)/libucs_la-thread.Tpo async/$(DEPDIR)/libucs_la-thread.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='async/thread.c' object='async/libucs_la-thread.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o async/libucs_la-thread.lo `test -f 'async/thread.c' || echo '$(srcdir)/'`async/thread.c + +config/libucs_la-global_opts.lo: config/global_opts.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT config/libucs_la-global_opts.lo -MD -MP -MF config/$(DEPDIR)/libucs_la-global_opts.Tpo -c -o config/libucs_la-global_opts.lo `test -f 'config/global_opts.c' || echo '$(srcdir)/'`config/global_opts.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) config/$(DEPDIR)/libucs_la-global_opts.Tpo config/$(DEPDIR)/libucs_la-global_opts.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='config/global_opts.c' object='config/libucs_la-global_opts.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o config/libucs_la-global_opts.lo `test -f 'config/global_opts.c' || echo '$(srcdir)/'`config/global_opts.c + +config/libucs_la-ucm_opts.lo: config/ucm_opts.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT config/libucs_la-ucm_opts.lo -MD -MP -MF config/$(DEPDIR)/libucs_la-ucm_opts.Tpo -c -o config/libucs_la-ucm_opts.lo `test -f 'config/ucm_opts.c' || echo '$(srcdir)/'`config/ucm_opts.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) config/$(DEPDIR)/libucs_la-ucm_opts.Tpo config/$(DEPDIR)/libucs_la-ucm_opts.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='config/ucm_opts.c' object='config/libucs_la-ucm_opts.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o config/libucs_la-ucm_opts.lo `test -f 'config/ucm_opts.c' || echo '$(srcdir)/'`config/ucm_opts.c + +config/libucs_la-parser.lo: config/parser.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT config/libucs_la-parser.lo -MD -MP -MF config/$(DEPDIR)/libucs_la-parser.Tpo -c -o config/libucs_la-parser.lo `test -f 'config/parser.c' || echo '$(srcdir)/'`config/parser.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) config/$(DEPDIR)/libucs_la-parser.Tpo config/$(DEPDIR)/libucs_la-parser.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='config/parser.c' object='config/libucs_la-parser.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o config/libucs_la-parser.lo `test -f 'config/parser.c' || echo '$(srcdir)/'`config/parser.c + +datastruct/libucs_la-arbiter.lo: datastruct/arbiter.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-arbiter.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-arbiter.Tpo -c -o datastruct/libucs_la-arbiter.lo `test -f 'datastruct/arbiter.c' || echo '$(srcdir)/'`datastruct/arbiter.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-arbiter.Tpo datastruct/$(DEPDIR)/libucs_la-arbiter.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/arbiter.c' object='datastruct/libucs_la-arbiter.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-arbiter.lo `test -f 'datastruct/arbiter.c' || echo '$(srcdir)/'`datastruct/arbiter.c + +datastruct/libucs_la-callbackq.lo: datastruct/callbackq.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-callbackq.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-callbackq.Tpo -c -o datastruct/libucs_la-callbackq.lo `test -f 'datastruct/callbackq.c' || echo '$(srcdir)/'`datastruct/callbackq.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-callbackq.Tpo datastruct/$(DEPDIR)/libucs_la-callbackq.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/callbackq.c' object='datastruct/libucs_la-callbackq.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-callbackq.lo `test -f 'datastruct/callbackq.c' || echo '$(srcdir)/'`datastruct/callbackq.c + +datastruct/libucs_la-frag_list.lo: datastruct/frag_list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-frag_list.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-frag_list.Tpo -c -o datastruct/libucs_la-frag_list.lo `test -f 'datastruct/frag_list.c' || echo '$(srcdir)/'`datastruct/frag_list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-frag_list.Tpo datastruct/$(DEPDIR)/libucs_la-frag_list.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/frag_list.c' object='datastruct/libucs_la-frag_list.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-frag_list.lo `test -f 'datastruct/frag_list.c' || echo '$(srcdir)/'`datastruct/frag_list.c + +datastruct/libucs_la-mpmc.lo: datastruct/mpmc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-mpmc.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-mpmc.Tpo -c -o datastruct/libucs_la-mpmc.lo `test -f 'datastruct/mpmc.c' || echo '$(srcdir)/'`datastruct/mpmc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-mpmc.Tpo datastruct/$(DEPDIR)/libucs_la-mpmc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/mpmc.c' object='datastruct/libucs_la-mpmc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-mpmc.lo `test -f 'datastruct/mpmc.c' || echo '$(srcdir)/'`datastruct/mpmc.c + +datastruct/libucs_la-mpool.lo: datastruct/mpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-mpool.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-mpool.Tpo -c -o datastruct/libucs_la-mpool.lo `test -f 'datastruct/mpool.c' || echo '$(srcdir)/'`datastruct/mpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-mpool.Tpo datastruct/$(DEPDIR)/libucs_la-mpool.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/mpool.c' object='datastruct/libucs_la-mpool.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-mpool.lo `test -f 'datastruct/mpool.c' || echo '$(srcdir)/'`datastruct/mpool.c + +datastruct/libucs_la-pgtable.lo: datastruct/pgtable.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-pgtable.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-pgtable.Tpo -c -o datastruct/libucs_la-pgtable.lo `test -f 'datastruct/pgtable.c' || echo '$(srcdir)/'`datastruct/pgtable.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-pgtable.Tpo datastruct/$(DEPDIR)/libucs_la-pgtable.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/pgtable.c' object='datastruct/libucs_la-pgtable.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-pgtable.lo `test -f 'datastruct/pgtable.c' || echo '$(srcdir)/'`datastruct/pgtable.c + +datastruct/libucs_la-ptr_array.lo: datastruct/ptr_array.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-ptr_array.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-ptr_array.Tpo -c -o datastruct/libucs_la-ptr_array.lo `test -f 'datastruct/ptr_array.c' || echo '$(srcdir)/'`datastruct/ptr_array.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-ptr_array.Tpo datastruct/$(DEPDIR)/libucs_la-ptr_array.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/ptr_array.c' object='datastruct/libucs_la-ptr_array.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-ptr_array.lo `test -f 'datastruct/ptr_array.c' || echo '$(srcdir)/'`datastruct/ptr_array.c + +datastruct/libucs_la-strided_alloc.lo: datastruct/strided_alloc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-strided_alloc.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-strided_alloc.Tpo -c -o datastruct/libucs_la-strided_alloc.lo `test -f 'datastruct/strided_alloc.c' || echo '$(srcdir)/'`datastruct/strided_alloc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-strided_alloc.Tpo datastruct/$(DEPDIR)/libucs_la-strided_alloc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/strided_alloc.c' object='datastruct/libucs_la-strided_alloc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-strided_alloc.lo `test -f 'datastruct/strided_alloc.c' || echo '$(srcdir)/'`datastruct/strided_alloc.c + +datastruct/libucs_la-string_buffer.lo: datastruct/string_buffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-string_buffer.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-string_buffer.Tpo -c -o datastruct/libucs_la-string_buffer.lo `test -f 'datastruct/string_buffer.c' || echo '$(srcdir)/'`datastruct/string_buffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-string_buffer.Tpo datastruct/$(DEPDIR)/libucs_la-string_buffer.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/string_buffer.c' object='datastruct/libucs_la-string_buffer.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-string_buffer.lo `test -f 'datastruct/string_buffer.c' || echo '$(srcdir)/'`datastruct/string_buffer.c + +datastruct/libucs_la-string_set.lo: datastruct/string_set.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT datastruct/libucs_la-string_set.lo -MD -MP -MF datastruct/$(DEPDIR)/libucs_la-string_set.Tpo -c -o datastruct/libucs_la-string_set.lo `test -f 'datastruct/string_set.c' || echo '$(srcdir)/'`datastruct/string_set.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) datastruct/$(DEPDIR)/libucs_la-string_set.Tpo datastruct/$(DEPDIR)/libucs_la-string_set.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datastruct/string_set.c' object='datastruct/libucs_la-string_set.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o datastruct/libucs_la-string_set.lo `test -f 'datastruct/string_set.c' || echo '$(srcdir)/'`datastruct/string_set.c + +debug/libucs_la-assert.lo: debug/assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT debug/libucs_la-assert.lo -MD -MP -MF debug/$(DEPDIR)/libucs_la-assert.Tpo -c -o debug/libucs_la-assert.lo `test -f 'debug/assert.c' || echo '$(srcdir)/'`debug/assert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) debug/$(DEPDIR)/libucs_la-assert.Tpo debug/$(DEPDIR)/libucs_la-assert.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='debug/assert.c' object='debug/libucs_la-assert.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o debug/libucs_la-assert.lo `test -f 'debug/assert.c' || echo '$(srcdir)/'`debug/assert.c + +debug/libucs_la-debug.lo: debug/debug.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT debug/libucs_la-debug.lo -MD -MP -MF debug/$(DEPDIR)/libucs_la-debug.Tpo -c -o debug/libucs_la-debug.lo `test -f 'debug/debug.c' || echo '$(srcdir)/'`debug/debug.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) debug/$(DEPDIR)/libucs_la-debug.Tpo debug/$(DEPDIR)/libucs_la-debug.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='debug/debug.c' object='debug/libucs_la-debug.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o debug/libucs_la-debug.lo `test -f 'debug/debug.c' || echo '$(srcdir)/'`debug/debug.c + +debug/libucs_la-log.lo: debug/log.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT debug/libucs_la-log.lo -MD -MP -MF debug/$(DEPDIR)/libucs_la-log.Tpo -c -o debug/libucs_la-log.lo `test -f 'debug/log.c' || echo '$(srcdir)/'`debug/log.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) debug/$(DEPDIR)/libucs_la-log.Tpo debug/$(DEPDIR)/libucs_la-log.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='debug/log.c' object='debug/libucs_la-log.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o debug/libucs_la-log.lo `test -f 'debug/log.c' || echo '$(srcdir)/'`debug/log.c + +debug/libucs_la-memtrack.lo: debug/memtrack.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT debug/libucs_la-memtrack.lo -MD -MP -MF debug/$(DEPDIR)/libucs_la-memtrack.Tpo -c -o debug/libucs_la-memtrack.lo `test -f 'debug/memtrack.c' || echo '$(srcdir)/'`debug/memtrack.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) debug/$(DEPDIR)/libucs_la-memtrack.Tpo debug/$(DEPDIR)/libucs_la-memtrack.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='debug/memtrack.c' object='debug/libucs_la-memtrack.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o debug/libucs_la-memtrack.lo `test -f 'debug/memtrack.c' || echo '$(srcdir)/'`debug/memtrack.c + +memory/libucs_la-memory_type.lo: memory/memory_type.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT memory/libucs_la-memory_type.lo -MD -MP -MF memory/$(DEPDIR)/libucs_la-memory_type.Tpo -c -o memory/libucs_la-memory_type.lo `test -f 'memory/memory_type.c' || echo '$(srcdir)/'`memory/memory_type.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) memory/$(DEPDIR)/libucs_la-memory_type.Tpo memory/$(DEPDIR)/libucs_la-memory_type.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='memory/memory_type.c' object='memory/libucs_la-memory_type.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o memory/libucs_la-memory_type.lo `test -f 'memory/memory_type.c' || echo '$(srcdir)/'`memory/memory_type.c + +memory/libucs_la-memtype_cache.lo: memory/memtype_cache.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT memory/libucs_la-memtype_cache.lo -MD -MP -MF memory/$(DEPDIR)/libucs_la-memtype_cache.Tpo -c -o memory/libucs_la-memtype_cache.lo `test -f 'memory/memtype_cache.c' || echo '$(srcdir)/'`memory/memtype_cache.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) memory/$(DEPDIR)/libucs_la-memtype_cache.Tpo memory/$(DEPDIR)/libucs_la-memtype_cache.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='memory/memtype_cache.c' object='memory/libucs_la-memtype_cache.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o memory/libucs_la-memtype_cache.lo `test -f 'memory/memtype_cache.c' || echo '$(srcdir)/'`memory/memtype_cache.c + +memory/libucs_la-numa.lo: memory/numa.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT memory/libucs_la-numa.lo -MD -MP -MF memory/$(DEPDIR)/libucs_la-numa.Tpo -c -o memory/libucs_la-numa.lo `test -f 'memory/numa.c' || echo '$(srcdir)/'`memory/numa.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) memory/$(DEPDIR)/libucs_la-numa.Tpo memory/$(DEPDIR)/libucs_la-numa.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='memory/numa.c' object='memory/libucs_la-numa.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o memory/libucs_la-numa.lo `test -f 'memory/numa.c' || echo '$(srcdir)/'`memory/numa.c + +memory/libucs_la-rcache.lo: memory/rcache.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT memory/libucs_la-rcache.lo -MD -MP -MF memory/$(DEPDIR)/libucs_la-rcache.Tpo -c -o memory/libucs_la-rcache.lo `test -f 'memory/rcache.c' || echo '$(srcdir)/'`memory/rcache.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) memory/$(DEPDIR)/libucs_la-rcache.Tpo memory/$(DEPDIR)/libucs_la-rcache.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='memory/rcache.c' object='memory/libucs_la-rcache.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o memory/libucs_la-rcache.lo `test -f 'memory/rcache.c' || echo '$(srcdir)/'`memory/rcache.c + +profile/libucs_la-profile.lo: profile/profile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT profile/libucs_la-profile.lo -MD -MP -MF profile/$(DEPDIR)/libucs_la-profile.Tpo -c -o profile/libucs_la-profile.lo `test -f 'profile/profile.c' || echo '$(srcdir)/'`profile/profile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) profile/$(DEPDIR)/libucs_la-profile.Tpo profile/$(DEPDIR)/libucs_la-profile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='profile/profile.c' object='profile/libucs_la-profile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o profile/libucs_la-profile.lo `test -f 'profile/profile.c' || echo '$(srcdir)/'`profile/profile.c + +stats/libucs_la-stats.lo: stats/stats.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT stats/libucs_la-stats.lo -MD -MP -MF stats/$(DEPDIR)/libucs_la-stats.Tpo -c -o stats/libucs_la-stats.lo `test -f 'stats/stats.c' || echo '$(srcdir)/'`stats/stats.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) stats/$(DEPDIR)/libucs_la-stats.Tpo stats/$(DEPDIR)/libucs_la-stats.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stats/stats.c' object='stats/libucs_la-stats.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o stats/libucs_la-stats.lo `test -f 'stats/stats.c' || echo '$(srcdir)/'`stats/stats.c + +sys/libucs_la-event_set.lo: sys/event_set.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT sys/libucs_la-event_set.lo -MD -MP -MF sys/$(DEPDIR)/libucs_la-event_set.Tpo -c -o sys/libucs_la-event_set.lo `test -f 'sys/event_set.c' || echo '$(srcdir)/'`sys/event_set.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sys/$(DEPDIR)/libucs_la-event_set.Tpo sys/$(DEPDIR)/libucs_la-event_set.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys/event_set.c' object='sys/libucs_la-event_set.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o sys/libucs_la-event_set.lo `test -f 'sys/event_set.c' || echo '$(srcdir)/'`sys/event_set.c + +sys/libucs_la-init.lo: sys/init.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT sys/libucs_la-init.lo -MD -MP -MF sys/$(DEPDIR)/libucs_la-init.Tpo -c -o sys/libucs_la-init.lo `test -f 'sys/init.c' || echo '$(srcdir)/'`sys/init.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sys/$(DEPDIR)/libucs_la-init.Tpo sys/$(DEPDIR)/libucs_la-init.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys/init.c' object='sys/libucs_la-init.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o sys/libucs_la-init.lo `test -f 'sys/init.c' || echo '$(srcdir)/'`sys/init.c + +sys/libucs_la-math.lo: sys/math.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT sys/libucs_la-math.lo -MD -MP -MF sys/$(DEPDIR)/libucs_la-math.Tpo -c -o sys/libucs_la-math.lo `test -f 'sys/math.c' || echo '$(srcdir)/'`sys/math.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sys/$(DEPDIR)/libucs_la-math.Tpo sys/$(DEPDIR)/libucs_la-math.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys/math.c' object='sys/libucs_la-math.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o sys/libucs_la-math.lo `test -f 'sys/math.c' || echo '$(srcdir)/'`sys/math.c + +sys/libucs_la-module.lo: sys/module.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT sys/libucs_la-module.lo -MD -MP -MF sys/$(DEPDIR)/libucs_la-module.Tpo -c -o sys/libucs_la-module.lo `test -f 'sys/module.c' || echo '$(srcdir)/'`sys/module.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sys/$(DEPDIR)/libucs_la-module.Tpo sys/$(DEPDIR)/libucs_la-module.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys/module.c' object='sys/libucs_la-module.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o sys/libucs_la-module.lo `test -f 'sys/module.c' || echo '$(srcdir)/'`sys/module.c + +sys/libucs_la-string.lo: sys/string.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT sys/libucs_la-string.lo -MD -MP -MF sys/$(DEPDIR)/libucs_la-string.Tpo -c -o sys/libucs_la-string.lo `test -f 'sys/string.c' || echo '$(srcdir)/'`sys/string.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sys/$(DEPDIR)/libucs_la-string.Tpo sys/$(DEPDIR)/libucs_la-string.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys/string.c' object='sys/libucs_la-string.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o sys/libucs_la-string.lo `test -f 'sys/string.c' || echo '$(srcdir)/'`sys/string.c + +sys/libucs_la-sys.lo: sys/sys.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT sys/libucs_la-sys.lo -MD -MP -MF sys/$(DEPDIR)/libucs_la-sys.Tpo -c -o sys/libucs_la-sys.lo `test -f 'sys/sys.c' || echo '$(srcdir)/'`sys/sys.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sys/$(DEPDIR)/libucs_la-sys.Tpo sys/$(DEPDIR)/libucs_la-sys.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys/sys.c' object='sys/libucs_la-sys.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o sys/libucs_la-sys.lo `test -f 'sys/sys.c' || echo '$(srcdir)/'`sys/sys.c + +sys/libucs_la-iovec.lo: sys/iovec.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT sys/libucs_la-iovec.lo -MD -MP -MF sys/$(DEPDIR)/libucs_la-iovec.Tpo -c -o sys/libucs_la-iovec.lo `test -f 'sys/iovec.c' || echo '$(srcdir)/'`sys/iovec.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sys/$(DEPDIR)/libucs_la-iovec.Tpo sys/$(DEPDIR)/libucs_la-iovec.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys/iovec.c' object='sys/libucs_la-iovec.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o sys/libucs_la-iovec.lo `test -f 'sys/iovec.c' || echo '$(srcdir)/'`sys/iovec.c + +sys/libucs_la-sock.lo: sys/sock.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT sys/libucs_la-sock.lo -MD -MP -MF sys/$(DEPDIR)/libucs_la-sock.Tpo -c -o sys/libucs_la-sock.lo `test -f 'sys/sock.c' || echo '$(srcdir)/'`sys/sock.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sys/$(DEPDIR)/libucs_la-sock.Tpo sys/$(DEPDIR)/libucs_la-sock.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys/sock.c' object='sys/libucs_la-sock.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o sys/libucs_la-sock.lo `test -f 'sys/sock.c' || echo '$(srcdir)/'`sys/sock.c + +sys/libucs_la-stubs.lo: sys/stubs.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT sys/libucs_la-stubs.lo -MD -MP -MF sys/$(DEPDIR)/libucs_la-stubs.Tpo -c -o sys/libucs_la-stubs.lo `test -f 'sys/stubs.c' || echo '$(srcdir)/'`sys/stubs.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sys/$(DEPDIR)/libucs_la-stubs.Tpo sys/$(DEPDIR)/libucs_la-stubs.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sys/stubs.c' object='sys/libucs_la-stubs.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o sys/libucs_la-stubs.lo `test -f 'sys/stubs.c' || echo '$(srcdir)/'`sys/stubs.c + +time/libucs_la-time.lo: time/time.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT time/libucs_la-time.lo -MD -MP -MF time/$(DEPDIR)/libucs_la-time.Tpo -c -o time/libucs_la-time.lo `test -f 'time/time.c' || echo '$(srcdir)/'`time/time.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) time/$(DEPDIR)/libucs_la-time.Tpo time/$(DEPDIR)/libucs_la-time.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='time/time.c' object='time/libucs_la-time.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o time/libucs_la-time.lo `test -f 'time/time.c' || echo '$(srcdir)/'`time/time.c + +time/libucs_la-timer_wheel.lo: time/timer_wheel.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT time/libucs_la-timer_wheel.lo -MD -MP -MF time/$(DEPDIR)/libucs_la-timer_wheel.Tpo -c -o time/libucs_la-timer_wheel.lo `test -f 'time/timer_wheel.c' || echo '$(srcdir)/'`time/timer_wheel.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) time/$(DEPDIR)/libucs_la-timer_wheel.Tpo time/$(DEPDIR)/libucs_la-timer_wheel.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='time/timer_wheel.c' object='time/libucs_la-timer_wheel.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o time/libucs_la-timer_wheel.lo `test -f 'time/timer_wheel.c' || echo '$(srcdir)/'`time/timer_wheel.c + +time/libucs_la-timerq.lo: time/timerq.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT time/libucs_la-timerq.lo -MD -MP -MF time/$(DEPDIR)/libucs_la-timerq.Tpo -c -o time/libucs_la-timerq.lo `test -f 'time/timerq.c' || echo '$(srcdir)/'`time/timerq.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) time/$(DEPDIR)/libucs_la-timerq.Tpo time/$(DEPDIR)/libucs_la-timerq.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='time/timerq.c' object='time/libucs_la-timerq.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o time/libucs_la-timerq.lo `test -f 'time/timerq.c' || echo '$(srcdir)/'`time/timerq.c + +type/libucs_la-class.lo: type/class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT type/libucs_la-class.lo -MD -MP -MF type/$(DEPDIR)/libucs_la-class.Tpo -c -o type/libucs_la-class.lo `test -f 'type/class.c' || echo '$(srcdir)/'`type/class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) type/$(DEPDIR)/libucs_la-class.Tpo type/$(DEPDIR)/libucs_la-class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='type/class.c' object='type/libucs_la-class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o type/libucs_la-class.lo `test -f 'type/class.c' || echo '$(srcdir)/'`type/class.c + +type/libucs_la-status.lo: type/status.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT type/libucs_la-status.lo -MD -MP -MF type/$(DEPDIR)/libucs_la-status.Tpo -c -o type/libucs_la-status.lo `test -f 'type/status.c' || echo '$(srcdir)/'`type/status.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) type/$(DEPDIR)/libucs_la-status.Tpo type/$(DEPDIR)/libucs_la-status.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='type/status.c' object='type/libucs_la-status.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o type/libucs_la-status.lo `test -f 'type/status.c' || echo '$(srcdir)/'`type/status.c + +type/libucs_la-init_once.lo: type/init_once.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT type/libucs_la-init_once.lo -MD -MP -MF type/$(DEPDIR)/libucs_la-init_once.Tpo -c -o type/libucs_la-init_once.lo `test -f 'type/init_once.c' || echo '$(srcdir)/'`type/init_once.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) type/$(DEPDIR)/libucs_la-init_once.Tpo type/$(DEPDIR)/libucs_la-init_once.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='type/init_once.c' object='type/libucs_la-init_once.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o type/libucs_la-init_once.lo `test -f 'type/init_once.c' || echo '$(srcdir)/'`type/init_once.c + +stats/libucs_la-client_server.lo: stats/client_server.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT stats/libucs_la-client_server.lo -MD -MP -MF stats/$(DEPDIR)/libucs_la-client_server.Tpo -c -o stats/libucs_la-client_server.lo `test -f 'stats/client_server.c' || echo '$(srcdir)/'`stats/client_server.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) stats/$(DEPDIR)/libucs_la-client_server.Tpo stats/$(DEPDIR)/libucs_la-client_server.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stats/client_server.c' object='stats/libucs_la-client_server.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o stats/libucs_la-client_server.lo `test -f 'stats/client_server.c' || echo '$(srcdir)/'`stats/client_server.c + +stats/libucs_la-serialization.lo: stats/serialization.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT stats/libucs_la-serialization.lo -MD -MP -MF stats/$(DEPDIR)/libucs_la-serialization.Tpo -c -o stats/libucs_la-serialization.lo `test -f 'stats/serialization.c' || echo '$(srcdir)/'`stats/serialization.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) stats/$(DEPDIR)/libucs_la-serialization.Tpo stats/$(DEPDIR)/libucs_la-serialization.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stats/serialization.c' object='stats/libucs_la-serialization.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o stats/libucs_la-serialization.lo `test -f 'stats/serialization.c' || echo '$(srcdir)/'`stats/serialization.c + +stats/libucs_la-libstats.lo: stats/libstats.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -MT stats/libucs_la-libstats.lo -MD -MP -MF stats/$(DEPDIR)/libucs_la-libstats.Tpo -c -o stats/libucs_la-libstats.lo `test -f 'stats/libstats.c' || echo '$(srcdir)/'`stats/libstats.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) stats/$(DEPDIR)/libucs_la-libstats.Tpo stats/$(DEPDIR)/libucs_la-libstats.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stats/libstats.c' object='stats/libucs_la-libstats.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libucs_la_CPPFLAGS) $(CPPFLAGS) $(libucs_la_CFLAGS) $(CFLAGS) -c -o stats/libucs_la-libstats.lo `test -f 'stats/libstats.c' || echo '$(srcdir)/'`stats/libstats.c + +stats/ucs_stats_parser-stats_parser.o: stats/stats_parser.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucs_stats_parser_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT stats/ucs_stats_parser-stats_parser.o -MD -MP -MF stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Tpo -c -o stats/ucs_stats_parser-stats_parser.o `test -f 'stats/stats_parser.c' || echo '$(srcdir)/'`stats/stats_parser.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Tpo stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stats/stats_parser.c' object='stats/ucs_stats_parser-stats_parser.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucs_stats_parser_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o stats/ucs_stats_parser-stats_parser.o `test -f 'stats/stats_parser.c' || echo '$(srcdir)/'`stats/stats_parser.c + +stats/ucs_stats_parser-stats_parser.obj: stats/stats_parser.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucs_stats_parser_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT stats/ucs_stats_parser-stats_parser.obj -MD -MP -MF stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Tpo -c -o stats/ucs_stats_parser-stats_parser.obj `if test -f 'stats/stats_parser.c'; then $(CYGPATH_W) 'stats/stats_parser.c'; else $(CYGPATH_W) '$(srcdir)/stats/stats_parser.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Tpo stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stats/stats_parser.c' object='stats/ucs_stats_parser-stats_parser.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucs_stats_parser_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o stats/ucs_stats_parser-stats_parser.obj `if test -f 'stats/stats_parser.c'; then $(CYGPATH_W) 'stats/stats_parser.c'; else $(CYGPATH_W) '$(srcdir)/stats/stats_parser.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf algorithm/.libs algorithm/_libs + -rm -rf arch/.libs arch/_libs + -rm -rf arch/aarch64/.libs arch/aarch64/_libs + -rm -rf arch/ppc64/.libs arch/ppc64/_libs + -rm -rf arch/x86_64/.libs arch/x86_64/_libs + -rm -rf async/.libs async/_libs + -rm -rf config/.libs config/_libs + -rm -rf datastruct/.libs datastruct/_libs + -rm -rf debug/.libs debug/_libs + -rm -rf memory/.libs memory/_libs + -rm -rf profile/.libs profile/_libs + -rm -rf stats/.libs stats/_libs + -rm -rf sys/.libs sys/_libs + -rm -rf time/.libs time/_libs + -rm -rf type/.libs type/_libs +install-nobase_dist_libucs_laHEADERS: $(nobase_dist_libucs_la_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nobase_dist_libucs_la_HEADERS)'; test -n "$(libucs_ladir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(libucs_ladir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libucs_ladir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libucs_ladir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(libucs_ladir)/$$dir"; }; \ + echo " $(INSTALL_HEADER) $$xfiles '$(DESTDIR)$(libucs_ladir)/$$dir'"; \ + $(INSTALL_HEADER) $$xfiles "$(DESTDIR)$(libucs_ladir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_dist_libucs_laHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nobase_dist_libucs_la_HEADERS)'; test -n "$(libucs_ladir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(libucs_ladir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) $(HEADERS) all-local +install-binPROGRAMS: install-libLTLIBRARIES + +installdirs: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libucs_ladir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f algorithm/$(DEPDIR)/$(am__dirstamp) + -rm -f algorithm/$(am__dirstamp) + -rm -f arch/$(DEPDIR)/$(am__dirstamp) + -rm -f arch/$(am__dirstamp) + -rm -f arch/aarch64/$(DEPDIR)/$(am__dirstamp) + -rm -f arch/aarch64/$(am__dirstamp) + -rm -f arch/ppc64/$(DEPDIR)/$(am__dirstamp) + -rm -f arch/ppc64/$(am__dirstamp) + -rm -f arch/x86_64/$(DEPDIR)/$(am__dirstamp) + -rm -f arch/x86_64/$(am__dirstamp) + -rm -f async/$(DEPDIR)/$(am__dirstamp) + -rm -f async/$(am__dirstamp) + -rm -f config/$(DEPDIR)/$(am__dirstamp) + -rm -f config/$(am__dirstamp) + -rm -f datastruct/$(DEPDIR)/$(am__dirstamp) + -rm -f datastruct/$(am__dirstamp) + -rm -f debug/$(DEPDIR)/$(am__dirstamp) + -rm -f debug/$(am__dirstamp) + -rm -f memory/$(DEPDIR)/$(am__dirstamp) + -rm -f memory/$(am__dirstamp) + -rm -f profile/$(DEPDIR)/$(am__dirstamp) + -rm -f profile/$(am__dirstamp) + -rm -f stats/$(DEPDIR)/$(am__dirstamp) + -rm -f stats/$(am__dirstamp) + -rm -f sys/$(DEPDIR)/$(am__dirstamp) + -rm -f sys/$(am__dirstamp) + -rm -f time/$(DEPDIR)/$(am__dirstamp) + -rm -f time/$(am__dirstamp) + -rm -f type/$(DEPDIR)/$(am__dirstamp) + -rm -f type/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \ + clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f algorithm/$(DEPDIR)/libucs_la-crc.Plo + -rm -f algorithm/$(DEPDIR)/libucs_la-qsort_r.Plo + -rm -f arch/$(DEPDIR)/libucs_la-cpu.Plo + -rm -f arch/aarch64/$(DEPDIR)/libucs_la-cpu.Plo + -rm -f arch/aarch64/$(DEPDIR)/libucs_la-global_opts.Plo + -rm -f arch/ppc64/$(DEPDIR)/libucs_la-global_opts.Plo + -rm -f arch/ppc64/$(DEPDIR)/libucs_la-timebase.Plo + -rm -f arch/x86_64/$(DEPDIR)/libucs_la-cpu.Plo + -rm -f arch/x86_64/$(DEPDIR)/libucs_la-global_opts.Plo + -rm -f async/$(DEPDIR)/libucs_la-async.Plo + -rm -f async/$(DEPDIR)/libucs_la-pipe.Plo + -rm -f async/$(DEPDIR)/libucs_la-signal.Plo + -rm -f async/$(DEPDIR)/libucs_la-thread.Plo + -rm -f config/$(DEPDIR)/libucs_la-global_opts.Plo + -rm -f config/$(DEPDIR)/libucs_la-parser.Plo + -rm -f config/$(DEPDIR)/libucs_la-ucm_opts.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-arbiter.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-callbackq.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-frag_list.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-mpmc.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-mpool.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-pgtable.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-ptr_array.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-strided_alloc.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-string_buffer.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-string_set.Plo + -rm -f debug/$(DEPDIR)/libucs_la-assert.Plo + -rm -f debug/$(DEPDIR)/libucs_la-debug.Plo + -rm -f debug/$(DEPDIR)/libucs_la-log.Plo + -rm -f debug/$(DEPDIR)/libucs_la-memtrack.Plo + -rm -f memory/$(DEPDIR)/libucs_la-memory_type.Plo + -rm -f memory/$(DEPDIR)/libucs_la-memtype_cache.Plo + -rm -f memory/$(DEPDIR)/libucs_la-numa.Plo + -rm -f memory/$(DEPDIR)/libucs_la-rcache.Plo + -rm -f profile/$(DEPDIR)/libucs_la-profile.Plo + -rm -f stats/$(DEPDIR)/libucs_la-client_server.Plo + -rm -f stats/$(DEPDIR)/libucs_la-libstats.Plo + -rm -f stats/$(DEPDIR)/libucs_la-serialization.Plo + -rm -f stats/$(DEPDIR)/libucs_la-stats.Plo + -rm -f stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Po + -rm -f sys/$(DEPDIR)/libucs_la-event_set.Plo + -rm -f sys/$(DEPDIR)/libucs_la-init.Plo + -rm -f sys/$(DEPDIR)/libucs_la-iovec.Plo + -rm -f sys/$(DEPDIR)/libucs_la-math.Plo + -rm -f sys/$(DEPDIR)/libucs_la-module.Plo + -rm -f sys/$(DEPDIR)/libucs_la-sock.Plo + -rm -f sys/$(DEPDIR)/libucs_la-string.Plo + -rm -f sys/$(DEPDIR)/libucs_la-stubs.Plo + -rm -f sys/$(DEPDIR)/libucs_la-sys.Plo + -rm -f time/$(DEPDIR)/libucs_la-time.Plo + -rm -f time/$(DEPDIR)/libucs_la-timer_wheel.Plo + -rm -f time/$(DEPDIR)/libucs_la-timerq.Plo + -rm -f type/$(DEPDIR)/libucs_la-class.Plo + -rm -f type/$(DEPDIR)/libucs_la-init_once.Plo + -rm -f type/$(DEPDIR)/libucs_la-status.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-nobase_dist_libucs_laHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f algorithm/$(DEPDIR)/libucs_la-crc.Plo + -rm -f algorithm/$(DEPDIR)/libucs_la-qsort_r.Plo + -rm -f arch/$(DEPDIR)/libucs_la-cpu.Plo + -rm -f arch/aarch64/$(DEPDIR)/libucs_la-cpu.Plo + -rm -f arch/aarch64/$(DEPDIR)/libucs_la-global_opts.Plo + -rm -f arch/ppc64/$(DEPDIR)/libucs_la-global_opts.Plo + -rm -f arch/ppc64/$(DEPDIR)/libucs_la-timebase.Plo + -rm -f arch/x86_64/$(DEPDIR)/libucs_la-cpu.Plo + -rm -f arch/x86_64/$(DEPDIR)/libucs_la-global_opts.Plo + -rm -f async/$(DEPDIR)/libucs_la-async.Plo + -rm -f async/$(DEPDIR)/libucs_la-pipe.Plo + -rm -f async/$(DEPDIR)/libucs_la-signal.Plo + -rm -f async/$(DEPDIR)/libucs_la-thread.Plo + -rm -f config/$(DEPDIR)/libucs_la-global_opts.Plo + -rm -f config/$(DEPDIR)/libucs_la-parser.Plo + -rm -f config/$(DEPDIR)/libucs_la-ucm_opts.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-arbiter.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-callbackq.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-frag_list.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-mpmc.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-mpool.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-pgtable.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-ptr_array.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-strided_alloc.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-string_buffer.Plo + -rm -f datastruct/$(DEPDIR)/libucs_la-string_set.Plo + -rm -f debug/$(DEPDIR)/libucs_la-assert.Plo + -rm -f debug/$(DEPDIR)/libucs_la-debug.Plo + -rm -f debug/$(DEPDIR)/libucs_la-log.Plo + -rm -f debug/$(DEPDIR)/libucs_la-memtrack.Plo + -rm -f memory/$(DEPDIR)/libucs_la-memory_type.Plo + -rm -f memory/$(DEPDIR)/libucs_la-memtype_cache.Plo + -rm -f memory/$(DEPDIR)/libucs_la-numa.Plo + -rm -f memory/$(DEPDIR)/libucs_la-rcache.Plo + -rm -f profile/$(DEPDIR)/libucs_la-profile.Plo + -rm -f stats/$(DEPDIR)/libucs_la-client_server.Plo + -rm -f stats/$(DEPDIR)/libucs_la-libstats.Plo + -rm -f stats/$(DEPDIR)/libucs_la-serialization.Plo + -rm -f stats/$(DEPDIR)/libucs_la-stats.Plo + -rm -f stats/$(DEPDIR)/ucs_stats_parser-stats_parser.Po + -rm -f sys/$(DEPDIR)/libucs_la-event_set.Plo + -rm -f sys/$(DEPDIR)/libucs_la-init.Plo + -rm -f sys/$(DEPDIR)/libucs_la-iovec.Plo + -rm -f sys/$(DEPDIR)/libucs_la-math.Plo + -rm -f sys/$(DEPDIR)/libucs_la-module.Plo + -rm -f sys/$(DEPDIR)/libucs_la-sock.Plo + -rm -f sys/$(DEPDIR)/libucs_la-string.Plo + -rm -f sys/$(DEPDIR)/libucs_la-stubs.Plo + -rm -f sys/$(DEPDIR)/libucs_la-sys.Plo + -rm -f time/$(DEPDIR)/libucs_la-time.Plo + -rm -f time/$(DEPDIR)/libucs_la-timer_wheel.Plo + -rm -f time/$(DEPDIR)/libucs_la-timerq.Plo + -rm -f type/$(DEPDIR)/libucs_la-class.Plo + -rm -f type/$(DEPDIR)/libucs_la-init_once.Plo + -rm -f type/$(DEPDIR)/libucs_la-status.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-libLTLIBRARIES \ + uninstall-nobase_dist_libucs_laHEADERS + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-binPROGRAMS clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-binPROGRAMS \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-libLTLIBRARIES \ + install-man install-nobase_dist_libucs_laHEADERS install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am uninstall-binPROGRAMS \ + uninstall-libLTLIBRARIES \ + uninstall-nobase_dist_libucs_laHEADERS + +.PRECIOUS: Makefile + + +all-local: $(objdir)/$(modulesubdir) + +$(objdir)/$(modulesubdir): $(lib_LTLIBRARIES) + $(AM_V_at)$(LN_RS) -fn $(localmoduledir) $(objdir)/$(modulesubdir) + +#TODO stats/stats_dump.c +#TODO stats/stats_reader.c + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/ucs/algorithm/crc.c b/src/ucs/algorithm/crc.c new file mode 100644 index 0000000..a37d53c --- /dev/null +++ b/src/ucs/algorithm/crc.c @@ -0,0 +1,54 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include + +#include + + +/* CRC-16-CCITT */ +#define UCS_CRC16_POLY 0x8408u + +/* CRC-32 (ISO 3309) */ +#define UCS_CRC32_POLY 0xedb88320l + +#define UCS_CRC_CALC(_width, _buffer, _size, _crc) \ + do { \ + const uint8_t *end = (const uint8_t*)(UCS_PTR_BYTE_OFFSET(_buffer, _size)); \ + const uint8_t *p; \ + uint8_t bit; \ + \ + if ((_size) != 0) { \ + for (p = (_buffer); p < end; ++p) { \ + (_crc) ^= *p; \ + for (bit = 0; bit < 8; ++bit) { \ + (_crc) = ((_crc) >> 1) ^ (-(int)((_crc) & 1) & \ + UCS_CRC ## _width ## _POLY); \ + } \ + } \ + } \ + (_crc) = ~(_crc); \ + } while (0) + + +uint16_t ucs_crc16(const void *buffer, size_t size) +{ + uint16_t crc = UINT16_MAX; + UCS_CRC_CALC(16, buffer, size, crc); + return crc; +} + +uint16_t ucs_crc16_string(const char *s) +{ + return ucs_crc16((const char*)s, strlen(s)); +} + +uint32_t ucs_crc32(uint32_t prev_crc, const void *buffer, size_t size) +{ + uint32_t crc = ~prev_crc; + UCS_CRC_CALC(32, buffer, size, crc); + return crc; +} diff --git a/src/ucs/algorithm/crc.h b/src/ucs/algorithm/crc.h new file mode 100644 index 0000000..277d81b --- /dev/null +++ b/src/ucs/algorithm/crc.h @@ -0,0 +1,53 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_ALGORITHM_CRC_H_ +#define UCS_ALGORITHM_CRC_H_ + +#include + +#include +#include + +BEGIN_C_DECLS + +/** @file crc.h */ + +/** + * Calculate CRC16 of an arbitrary buffer. + * + * @param [in] buffer Buffer to compute crc for. + * @param [in] size Buffer size. + * + * @return crc16() function of the buffer. + */ +uint16_t ucs_crc16(const void *buffer, size_t size); + + +/** + * Calculate CRC16 of a NULL-terminated string. + * + * @param [in] s NULL-terminated string to compute crc for. + * + * @return crc16() function of the string. + */ +uint16_t ucs_crc16_string(const char *s); + + +/** + * Calculate CRC32 of an arbitrary buffer. + * + * @param [in] prev_crc Intitial CRC value. + * @param [in] buffer Buffer to compute crc for. + * @param [in] size Buffer size. + * + * @return crc32() function of the buffer. + */ +uint32_t ucs_crc32(uint32_t prev_crc, const void *buffer, size_t size); + +END_C_DECLS + +#endif diff --git a/src/ucs/algorithm/qsort_r.c b/src/ucs/algorithm/qsort_r.c new file mode 100644 index 0000000..1566d33 --- /dev/null +++ b/src/ucs/algorithm/qsort_r.c @@ -0,0 +1,187 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "qsort_r.h" + +#include +#include +#include + + +/* + * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". + */ +#define ucs_qsort_swapcode(TYPE, parmi, parmj, n) \ + { \ + long i = (n) / sizeof (TYPE); \ + register TYPE *pi = (TYPE *) (parmi); \ + register TYPE *pj = (TYPE *) (parmj); \ + do { \ + register TYPE t = *pi; \ + *pi++ = *pj; \ + *pj++ = t; \ + } while (--i > 0); \ + } + +#define ucs_qsort_swaptype(a, size) \ + ({ \ + (((char *)a - (char *)0) % sizeof(long)) || \ + (size % sizeof(long)) ? 2 : (size == sizeof(long)) ? 0 : 1; \ + }) + +#define ucs_qsort_swap(a, b) \ + if (swaptype == 0) { \ + long t = *(long *)(a); \ + *(long *)(a) = *(long *)(b); \ + *(long *)(b) = t; \ + } else { \ + ucs_qsort_swapfunc(a, b, size, swaptype); \ + } + +#define ucs_qsort_vecswap(a, b, n) \ + if ((n) > 0) { \ + ucs_qsort_swapfunc(a, b, n, swaptype); \ + } + +static UCS_F_ALWAYS_INLINE void +ucs_qsort_swapfunc(char *a, char *b, int n, int swaptype) +{ + if (swaptype <= 1) { + ucs_qsort_swapcode(long, a, b, n) + } else { + ucs_qsort_swapcode(char, a, b, n) + } +} + +static UCS_F_ALWAYS_INLINE char * +ucs_qsort_med3(char *a, char *b, char *c, ucs_qsort_r_compare_cb_t *compare, + void *arg) +{ + return (compare(a, b, arg) < 0) ? + ((compare(b, c, arg) < 0) ? b : ((compare(a, c, arg)) < 0 ? c : a)) : + ((compare(b, c, arg) > 0) ? b : ((compare(a, c, arg)) < 0 ? a : c)); +} + +void ucs_qsort_r(void *base, size_t nmemb, size_t size, + ucs_qsort_r_compare_cb_t *compare, void *arg) +{ + char *pa, *pb, *pc, *md, *pl, *pm, *pn; + int d, r, swaptype, swap_cnt; + +loop: + swaptype = ucs_qsort_swaptype(base, size); + swap_cnt = 0; + + if (nmemb < 7) { + /* Switch to insertion sort */ + for (pm = (char*)base + size; pm < (char*)base + nmemb * size; pm += size) { + for (pl = pm; pl > (char*)base && compare(pl - size, pl, arg) > 0; pl -= size) { + ucs_qsort_swap(pl, pl - size); + } + } + return; + } + + pm = (char*)base + (nmemb / 2) * size; + if (nmemb > 7) { + pl = base; + pn = (char*)base + (nmemb - 1) * size; + if (nmemb > 40) { + d = (nmemb / 8) * size; + pl = ucs_qsort_med3(pl, pl + d, pl + 2 * d, compare, arg); + pm = ucs_qsort_med3(pm - d, pm, pm + d, compare, arg); + pn = ucs_qsort_med3(pn - 2 * d, pn - d, pn, compare, arg); + } + pm = ucs_qsort_med3(pl, pm, pn, compare, arg); + } + + ucs_qsort_swap(base, pm); + pa = pb = (char*)base + size; + + pc = md = (char*)base + (nmemb - 1) * size; + for (;;) { + while ((pb <= pc) && (r = compare(pb, base, arg)) <= 0) { + if (r == 0) { + swap_cnt = 1; + ucs_qsort_swap(pa, pb); + pa += size; + } + pb += size; + } + while ((pb <= pc) && (r = compare(pc, base, arg)) >= 0) { + if (r == 0) { + swap_cnt = 1; + ucs_qsort_swap(pc, md); + md -= size; + } + pc -= size; + } + if (pb > pc) { + break; + } + ucs_qsort_swap(pb, pc); + swap_cnt = 1; + pb += size; + pc -= size; + } + + if (swap_cnt == 0) { + /* Switch to insertion sort */ + for (pm = (char*)base + size; pm < (char*)base + nmemb * size; pm += size) { + for (pl = pm; pl > (char *)base && compare(pl - size, pl, arg) > 0; + pl -= size) { + ucs_qsort_swap(pl, pl - size); + } + } + return; + } + + pn = (char*)base + nmemb * size; + r = ucs_min(pa - (char*)base, pb - pa); + ucs_qsort_vecswap(base, pb - r, r); + + r = ucs_min(md - pc, pn - md - size); + ucs_qsort_vecswap(pb, pn - r, r); + + if ((r = pb - pa) > size) { + ucs_qsort_r(base, r / size, size, compare, arg); + } + + if ((r = md - pc) > size) { + /* Iterate rather than recurse to save stack space */ + base = pn - r; + nmemb = r / size; + goto loop; + } +} diff --git a/src/ucs/algorithm/qsort_r.h b/src/ucs/algorithm/qsort_r.h new file mode 100644 index 0000000..4f2aea5 --- /dev/null +++ b/src/ucs/algorithm/qsort_r.h @@ -0,0 +1,69 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef UCS_DATASTRUCT_QSORT_R_H_ +#define UCS_DATASTRUCT_QSORT_R_H_ + +#include + +#include + +BEGIN_C_DECLS + +/** @file qsort_r.h */ + +/** + * Compare callback for @ref qsort_r. + */ +typedef int ucs_qsort_r_compare_cb_t(const void *elem1, const void *elem2, + void *arg); + + +/** + * Sort members of an array using QuickSort algorithm. + * Same as libc's qsort(), except it can also accept an additional argument for + * the compare function. + * + * @param [in] base Array to sort. + * @param [in] nmemb Number of members in the array. + * @param [in] size Size of each member in the array. + * @param [in] compare Compare callback. + * @param [in] arg Custome argument for the compare callback. + */ +void ucs_qsort_r(void *base, size_t nmemb, size_t size, + ucs_qsort_r_compare_cb_t *compare, void *arg); + +END_C_DECLS + +#endif diff --git a/src/ucs/arch/aarch64/bitops.h b/src/ucs/arch/aarch64/bitops.h new file mode 100644 index 0000000..a93739c --- /dev/null +++ b/src/ucs/arch/aarch64/bitops.h @@ -0,0 +1,34 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_AARCH64_BITOPS_H_ +#define UCS_AARCH64_BITOPS_H_ + +#include + + +static inline unsigned __ucs_ilog2_u32(uint32_t n) +{ + int bit; + asm ("clz %w0, %w1" : "=r" (bit) : "r" (n)); + return 31 - bit; +} + +static inline unsigned __ucs_ilog2_u64(uint64_t n) +{ + int64_t bit; + asm ("clz %0, %1" : "=r" (bit) : "r" (n)); + return 63 - bit; +} + +static inline unsigned ucs_ffs64(uint64_t n) +{ + return __ucs_ilog2_u64(n & -n); +} + + +#endif diff --git a/src/ucs/arch/aarch64/cpu.c b/src/ucs/arch/aarch64/cpu.c new file mode 100644 index 0000000..4d8c830 --- /dev/null +++ b/src/ucs/arch/aarch64/cpu.c @@ -0,0 +1,67 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#if defined(__aarch64__) + +#include +#include + + +static void ucs_aarch64_cpuid_from_proc(ucs_aarch64_cpuid_t *cpuid) +{ + char buf[256]; + int value; + FILE* f; + + cpuid->implementer = -1; + cpuid->architecture = -1; + cpuid->variant = -1; + cpuid->part = -1; + cpuid->revision = -1; + + f = fopen("/proc/cpuinfo","r"); + if (!f) { + return; + } + + while (fgets(buf, sizeof(buf), f)) { + if (sscanf(buf, "CPU implementer : 0x%x", &value) == 1) { + cpuid->implementer = value; + } else if (sscanf(buf, "CPU architecture : %d", &value) == 1) { + cpuid->architecture = value; + } else if (sscanf(buf, "CPU variant : 0x%x", &value) == 1) { + cpuid->variant = value; + } else if (sscanf(buf, "CPU part : 0x%x", &value) == 1) { + cpuid->part = value; + } else if (sscanf(buf, "CPU revision : %d", &value) == 1) { + cpuid->revision = value; + } + + if ((cpuid->implementer != -1) && (cpuid->architecture != -1) && + (cpuid->variant != -1) && (cpuid->part != -1) && (cpuid->revision != -1)) { + break; + } + } + + fclose(f); +} + +void ucs_aarch64_cpuid(ucs_aarch64_cpuid_t *cpuid) +{ + static ucs_aarch64_cpuid_t cached_cpuid; + static int initialized = 0; + + if (!initialized) { + ucs_aarch64_cpuid_from_proc(&cached_cpuid); + ucs_memory_cpu_store_fence(); + initialized = 1; + } + + ucs_memory_cpu_load_fence(); + *cpuid = cached_cpuid; +} + +#endif diff --git a/src/ucs/arch/aarch64/cpu.h b/src/ucs/arch/aarch64/cpu.h new file mode 100644 index 0000000..1fee75a --- /dev/null +++ b/src/ucs/arch/aarch64/cpu.h @@ -0,0 +1,219 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_AARCH64_CPU_H_ +#define UCS_AARCH64_CPU_H_ + +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#if __ARM_NEON +#include +#endif + + +#define UCS_ARCH_CACHE_LINE_SIZE 64 + +BEGIN_C_DECLS + +/** @file cpu.h */ + +/** + * Assume the worst - weak memory ordering. + */ + +#define ucs_aarch64_dmb(_op) asm volatile ("dmb " #_op ::: "memory") +#define ucs_aarch64_isb(_op) asm volatile ("isb " #_op ::: "memory") + +/* The macro is used to serialize stores across Normal NC (or Device) and WB + * memory, (see Arm Spec, B2.7.2). Based on recent changes in Linux kernel: + * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=22ec71615d824f4f11d38d0e55a88d8956b7e45f + * + * The underlying barrier code was changed to use lighter weight DMB instead + * of DSB. The barrier used for synchronization of access between write back + * and device mapped memory (PCIe BAR). + */ +#define ucs_memory_bus_fence() ucs_aarch64_dmb(oshsy) +#define ucs_memory_bus_store_fence() ucs_aarch64_dmb(oshst) +#define ucs_memory_bus_load_fence() ucs_aarch64_dmb(oshld) + +/* The macro is used to flush all pending stores from write combining buffer. + * Some uarch "auto" flush the stores once cache line is full (no need for additional barrier). + */ +#if defined(HAVE_AARCH64_THUNDERX2) +#define ucs_memory_bus_cacheline_wc_flush() +#else +/* The macro is used to flush stores to Normal NC or Device memory */ +#define ucs_memory_bus_cacheline_wc_flush() ucs_aarch64_dmb(oshst) +#endif + +#define ucs_memory_cpu_fence() ucs_aarch64_dmb(ish) +#define ucs_memory_cpu_store_fence() ucs_aarch64_dmb(ishst) +#define ucs_memory_cpu_load_fence() ucs_aarch64_dmb(ishld) + +/* The macro is used to serialize stores to Normal NC or Device memory + * (see Arm Spec, B2.7.2) + */ +#define ucs_memory_cpu_wc_fence() ucs_aarch64_dmb(oshst) + + +/* + * ARM processor ID (ARM ISA - Main ID Register, EL1) + */ +typedef struct ucs_aarch64_cpuid { + int implementer; + int architecture; + int variant; + int part; + int revision; +} ucs_aarch64_cpuid_t; + + +/** + * Get ARM CPU identifier and version + */ +void ucs_aarch64_cpuid(ucs_aarch64_cpuid_t *cpuid); + + +#if HAVE_HW_TIMER +static inline uint64_t ucs_arch_read_hres_clock(void) +{ + uint64_t ticks; + asm volatile("isb" : : : "memory"); + asm volatile("mrs %0, cntvct_el0" : "=r" (ticks)); + return ticks; +} + +static inline double ucs_arch_get_clocks_per_sec() +{ + uint64_t freq; + asm volatile("mrs %0, cntfrq_el0" : "=r" (freq)); + return (double) freq; +} + +#else + +#define ucs_arch_read_hres_clock ucs_arch_generic_read_hres_clock +#define ucs_arch_get_clocks_per_sec ucs_arch_generic_get_clocks_per_sec + +#endif + +static inline ucs_cpu_model_t ucs_arch_get_cpu_model() +{ + return UCS_CPU_MODEL_ARM_AARCH64; +} + +static inline ucs_cpu_vendor_t ucs_arch_get_cpu_vendor() +{ + return UCS_CPU_VENDOR_GENERIC_ARM; +} + +static inline int ucs_arch_get_cpu_flag() +{ + return UCS_CPU_FLAG_UNKNOWN; +} + +static inline void ucs_cpu_init() +{ +} + +static inline void ucs_arch_wait_mem(void *address) +{ + unsigned long tmp; + asm volatile ("ldxrb %w0, %1 \n" + "wfe \n" + : "=&r"(tmp) + : "Q"(address)); +} + +#if !HAVE___CLEAR_CACHE +static inline void ucs_arch_clear_cache(void *start, void *end) +{ +#if HAVE___AARCH64_SYNC_CACHE_RANGE + /* do not allow global declaration of compiler intrinsic */ + void __aarch64_sync_cache_range(void* beg, void* end); + + __aarch64_sync_cache_range(start, end); +#else + uintptr_t ptr; + unsigned icache; + unsigned dcache; + unsigned ctr_el0; + + /* Get cache line size, using ctr_el0 register + * + * Bits Name Function + * ***************************** + * [31] - Reserved, res1. + * [30:28] - Reserved, res0. + * [27:24] CWG Cache Write-Back granule. Log2 of the number of words of the + * maximum size of memory that can be overwritten as a result of + * the eviction of a cache entry that has had a memory location + * in it modified: + * 0x4 + * Cache Write-Back granule size is 16 words. + * [23:20] ERG Exclusives Reservation Granule. Log2 of the number of words of + * the maximum size of the reservation granule that has been + * implemented for the Load-Exclusive and Store-Exclusive instructions: + * 0x4 + * Exclusive reservation granule size is 16 words. + * [19:16] DminLine Log2 of the number of words in the smallest cache line of all the + * data and unified caches that the processor controls: + * 0x4 + * Smallest data cache line size is 16 words. + * [15:14] L1lp L1 Instruction cache policy. Indicates the indexing and tagging + * policy for the L1 Instruction cache: + * 0b10 + * Virtually Indexed Physically Tagged (VIPT). + * [13:4] - Reserved, res0. + * [3:0] IminLine Log2 of the number of words in the smallest cache line of all + * the instruction caches that the processor controls. + * 0x4 + * Smallest instruction cache line size is 16 words. + */ + asm volatile ("mrs\t%0, ctr_el0":"=r" (ctr_el0)); + icache = sizeof(int) << (ctr_el0 & 0xf); + dcache = sizeof(int) << ((ctr_el0 >> 16) & 0xf); + + for (ptr = ucs_align_down((uintptr_t)start, dcache); ptr < (uintptr_t)end; ptr += dcache) { + asm volatile ("dc cvau, %0" :: "r" (ptr) : "memory"); + } + ucs_aarch64_dsb(ish); + + for (ptr = ucs_align_down((uintptr_t)start, icache); ptr < (uintptr_t)end; ptr += icache) { + asm volatile ("ic ivau, %0" :: "r" (ptr) : "memory"); + } + ucs_aarch64_dsb(ish); + ucs_aarch64_isb(); +#endif +} +#endif + +static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len) +{ + return memcpy(dst, src, len); +} + +static UCS_F_ALWAYS_INLINE void +ucs_memcpy_nontemporal(void *dst, const void *src, size_t len) +{ + memcpy(dst, src, len); +} + +static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes) +{ + return UCS_ERR_UNSUPPORTED; +} + +END_C_DECLS + +#endif diff --git a/src/ucs/arch/aarch64/global_opts.c b/src/ucs/arch/aarch64/global_opts.c new file mode 100644 index 0000000..649dfe4 --- /dev/null +++ b/src/ucs/arch/aarch64/global_opts.c @@ -0,0 +1,24 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#if defined(__aarch64__) + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +ucs_config_field_t ucs_arch_global_opts_table[] = { + {NULL} +}; + +void ucs_arch_print_memcpy_limits(ucs_arch_global_opts_t *config) +{ +} + +#endif diff --git a/src/ucs/arch/aarch64/global_opts.h b/src/ucs/arch/aarch64/global_opts.h new file mode 100644 index 0000000..e46026a --- /dev/null +++ b/src/ucs/arch/aarch64/global_opts.h @@ -0,0 +1,23 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_AARCH64_GLOBAL_OPTS_H_ +#define UCS_AARCH64_GLOBAL_OPTS_H_ + +#include + +BEGIN_C_DECLS + +#define UCS_ARCH_GLOBAL_OPTS_INITALIZER {} + +/* built-in memcpy config */ +typedef struct ucs_arch_global_opts { + char dummy; +} ucs_arch_global_opts_t; + +END_C_DECLS + +#endif diff --git a/src/ucs/arch/atomic.h b/src/ucs/arch/atomic.h new file mode 100644 index 0000000..99e53ca --- /dev/null +++ b/src/ucs/arch/atomic.h @@ -0,0 +1,133 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ARCH_ATOMIC_H +#define UCS_ARCH_ATOMIC_H + +#include + +#if defined(__x86_64__) +# include "x86_64/atomic.h" +#elif defined(__powerpc64__) +# include "generic/atomic.h" +#elif defined(__aarch64__) +# include "generic/atomic.h" +#else +# error "Unsupported architecture" +#endif + +#define UCS_DEFINE_ATOMIC_AND(_wordsize, _suffix) \ + static inline void ucs_atomic_and##_wordsize(volatile uint##_wordsize##_t *ptr, \ + uint##_wordsize##_t value) { \ + __sync_and_and_fetch(ptr, value); \ + } + +#define UCS_DEFINE_ATOMIC_FAND(_wordsize, _suffix) \ + static inline uint##_wordsize##_t ucs_atomic_fand##_wordsize(volatile uint##_wordsize##_t *ptr, \ + uint##_wordsize##_t value) { \ + return __sync_fetch_and_and(ptr, value); \ + } + +#define UCS_DEFINE_ATOMIC_XOR(_wordsize, _suffix) \ + static inline void ucs_atomic_xor##_wordsize(volatile uint##_wordsize##_t *ptr, \ + uint##_wordsize##_t value) { \ + __sync_xor_and_fetch(ptr, value); \ + } + +#define UCS_DEFINE_ATOMIC_FXOR(_wordsize, _suffix) \ + static inline uint##_wordsize##_t ucs_atomic_fxor##_wordsize(volatile uint##_wordsize##_t *ptr, \ + uint##_wordsize##_t value) { \ + return __sync_fetch_and_xor(ptr, value); \ + } + +#define UCS_DEFINE_ATOMIC_OR(_wordsize, _suffix) \ + static inline void ucs_atomic_or##_wordsize(volatile uint##_wordsize##_t *ptr, \ + uint##_wordsize##_t value) { \ + __sync_or_and_fetch(ptr, value); \ + } + +#define UCS_DEFINE_ATOMIC_FOR(_wordsize, _suffix) \ + static inline uint##_wordsize##_t ucs_atomic_for##_wordsize(volatile uint##_wordsize##_t *ptr, \ + uint##_wordsize##_t value) { \ + return __sync_fetch_and_or(ptr, value); \ + } + +#define UCS_DEFINE_ATOMIC_SUB(wordsize, suffix) \ + static inline void ucs_atomic_sub##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t value) { \ + ucs_atomic_add##wordsize(ptr, (uint##wordsize##_t)-value); \ + } + +#define UCS_DEFINE_ATOMIC_FSUB(wordsize, suffix) \ + static inline uint##wordsize##_t ucs_atomic_fsub##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t value) { \ + return ucs_atomic_fadd##wordsize(ptr, (uint##wordsize##_t)-value); \ + } + +/* + * Define atomic functions + */ +UCS_DEFINE_ATOMIC_ADD(8, b); +UCS_DEFINE_ATOMIC_ADD(16, w); +UCS_DEFINE_ATOMIC_ADD(32, l); +UCS_DEFINE_ATOMIC_ADD(64, q); + +UCS_DEFINE_ATOMIC_FADD(8, b); +UCS_DEFINE_ATOMIC_FADD(16, w); +UCS_DEFINE_ATOMIC_FADD(32, l); +UCS_DEFINE_ATOMIC_FADD(64, q); + +UCS_DEFINE_ATOMIC_SUB(8, b); +UCS_DEFINE_ATOMIC_SUB(16, w); +UCS_DEFINE_ATOMIC_SUB(32, l); +UCS_DEFINE_ATOMIC_SUB(64, q); + +UCS_DEFINE_ATOMIC_FSUB(8, b); +UCS_DEFINE_ATOMIC_FSUB(16, w); +UCS_DEFINE_ATOMIC_FSUB(32, l); +UCS_DEFINE_ATOMIC_FSUB(64, q); + +UCS_DEFINE_ATOMIC_AND(8, b); +UCS_DEFINE_ATOMIC_AND(16, w); +UCS_DEFINE_ATOMIC_AND(32, l); +UCS_DEFINE_ATOMIC_AND(64, q); + +UCS_DEFINE_ATOMIC_FAND(8, b); +UCS_DEFINE_ATOMIC_FAND(16, w); +UCS_DEFINE_ATOMIC_FAND(32, l); +UCS_DEFINE_ATOMIC_FAND(64, q); + +UCS_DEFINE_ATOMIC_OR(8, b); +UCS_DEFINE_ATOMIC_OR(16, w); +UCS_DEFINE_ATOMIC_OR(32, l); +UCS_DEFINE_ATOMIC_OR(64, q); + +UCS_DEFINE_ATOMIC_FOR(8, b); +UCS_DEFINE_ATOMIC_FOR(16, w); +UCS_DEFINE_ATOMIC_FOR(32, l); +UCS_DEFINE_ATOMIC_FOR(64, q); + +UCS_DEFINE_ATOMIC_XOR(8, b); +UCS_DEFINE_ATOMIC_XOR(16, w); +UCS_DEFINE_ATOMIC_XOR(32, l); +UCS_DEFINE_ATOMIC_XOR(64, q); + +UCS_DEFINE_ATOMIC_FXOR(8, b); +UCS_DEFINE_ATOMIC_FXOR(16, w); +UCS_DEFINE_ATOMIC_FXOR(32, l); +UCS_DEFINE_ATOMIC_FXOR(64, q); + +UCS_DEFINE_ATOMIC_SWAP(8, b); +UCS_DEFINE_ATOMIC_SWAP(16, w); +UCS_DEFINE_ATOMIC_SWAP(32, l); +UCS_DEFINE_ATOMIC_SWAP(64, q); + +UCS_DEFINE_ATOMIC_CSWAP(8, b); +UCS_DEFINE_ATOMIC_CSWAP(16, w); +UCS_DEFINE_ATOMIC_CSWAP(32, l); +UCS_DEFINE_ATOMIC_CSWAP(64, q); + +#endif diff --git a/src/ucs/arch/bitops.h b/src/ucs/arch/bitops.h new file mode 100644 index 0000000..91d4573 --- /dev/null +++ b/src/ucs/arch/bitops.h @@ -0,0 +1,113 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ARCH_BITOPS_H +#define UCS_ARCH_BITOPS_H + + +#if defined(__x86_64__) +# include "x86_64/bitops.h" +#elif defined(__powerpc64__) +# include "ppc64/bitops.h" +#elif defined(__aarch64__) +# include "aarch64/bitops.h" +#else +# error "Unsupported architecture" +#endif + +#define ucs_ilog2(_n) \ +( \ + __builtin_constant_p(_n) ? ( \ + (_n) < 1 ? 0 : \ + (_n) & (1ULL << 63) ? 63 : \ + (_n) & (1ULL << 62) ? 62 : \ + (_n) & (1ULL << 61) ? 61 : \ + (_n) & (1ULL << 60) ? 60 : \ + (_n) & (1ULL << 59) ? 59 : \ + (_n) & (1ULL << 58) ? 58 : \ + (_n) & (1ULL << 57) ? 57 : \ + (_n) & (1ULL << 56) ? 56 : \ + (_n) & (1ULL << 55) ? 55 : \ + (_n) & (1ULL << 54) ? 54 : \ + (_n) & (1ULL << 53) ? 53 : \ + (_n) & (1ULL << 52) ? 52 : \ + (_n) & (1ULL << 51) ? 51 : \ + (_n) & (1ULL << 50) ? 50 : \ + (_n) & (1ULL << 49) ? 49 : \ + (_n) & (1ULL << 48) ? 48 : \ + (_n) & (1ULL << 47) ? 47 : \ + (_n) & (1ULL << 46) ? 46 : \ + (_n) & (1ULL << 45) ? 45 : \ + (_n) & (1ULL << 44) ? 44 : \ + (_n) & (1ULL << 43) ? 43 : \ + (_n) & (1ULL << 42) ? 42 : \ + (_n) & (1ULL << 41) ? 41 : \ + (_n) & (1ULL << 40) ? 40 : \ + (_n) & (1ULL << 39) ? 39 : \ + (_n) & (1ULL << 38) ? 38 : \ + (_n) & (1ULL << 37) ? 37 : \ + (_n) & (1ULL << 36) ? 36 : \ + (_n) & (1ULL << 35) ? 35 : \ + (_n) & (1ULL << 34) ? 34 : \ + (_n) & (1ULL << 33) ? 33 : \ + (_n) & (1ULL << 32) ? 32 : \ + (_n) & (1ULL << 31) ? 31 : \ + (_n) & (1ULL << 30) ? 30 : \ + (_n) & (1ULL << 29) ? 29 : \ + (_n) & (1ULL << 28) ? 28 : \ + (_n) & (1ULL << 27) ? 27 : \ + (_n) & (1ULL << 26) ? 26 : \ + (_n) & (1ULL << 25) ? 25 : \ + (_n) & (1ULL << 24) ? 24 : \ + (_n) & (1ULL << 23) ? 23 : \ + (_n) & (1ULL << 22) ? 22 : \ + (_n) & (1ULL << 21) ? 21 : \ + (_n) & (1ULL << 20) ? 20 : \ + (_n) & (1ULL << 19) ? 19 : \ + (_n) & (1ULL << 18) ? 18 : \ + (_n) & (1ULL << 17) ? 17 : \ + (_n) & (1ULL << 16) ? 16 : \ + (_n) & (1ULL << 15) ? 15 : \ + (_n) & (1ULL << 14) ? 14 : \ + (_n) & (1ULL << 13) ? 13 : \ + (_n) & (1ULL << 12) ? 12 : \ + (_n) & (1ULL << 11) ? 11 : \ + (_n) & (1ULL << 10) ? 10 : \ + (_n) & (1ULL << 9) ? 9 : \ + (_n) & (1ULL << 8) ? 8 : \ + (_n) & (1ULL << 7) ? 7 : \ + (_n) & (1ULL << 6) ? 6 : \ + (_n) & (1ULL << 5) ? 5 : \ + (_n) & (1ULL << 4) ? 4 : \ + (_n) & (1ULL << 3) ? 3 : \ + (_n) & (1ULL << 2) ? 2 : \ + (_n) & (1ULL << 1) ? 1 : \ + (_n) & (1ULL << 0) ? 0 : \ + 0 \ + ) : \ + (sizeof(_n) <= 4) ? \ + __ucs_ilog2_u32((uint32_t)(_n)) : \ + __ucs_ilog2_u64((uint64_t)(_n)) \ +) + +#define ucs_ilog2_or0(_n) \ + ( ((_n) == 0) ? 0 : ucs_ilog2(_n) ) + +/* Returns the number of 1-bits in x */ +#define ucs_popcount(_n) \ + ((sizeof(_n) <= 4) ? __builtin_popcount((uint32_t)(_n)) : __builtin_popcountl(_n)) + +/* Returns the number of trailing 0-bits in x, starting at the least + * significant bit position. If x is 0, the result is undefined. + */ +#define ucs_count_trailing_zero_bits(_n) \ + ((sizeof(_n) <= 4) ? __builtin_ctz((uint32_t)(_n)) : __builtin_ctzl(_n)) + +/* Returns the number of 1-bits by _idx mask */ +#define ucs_bitmap2idx(_map, _idx) \ + ucs_popcount((_map) & (UCS_MASK(_idx))) + +#endif diff --git a/src/ucs/arch/cpu.c b/src/ucs/arch/cpu.c new file mode 100644 index 0000000..e81f540 --- /dev/null +++ b/src/ucs/arch/cpu.c @@ -0,0 +1,148 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +#define UCS_CPU_CACHE_FILE_FMT "/sys/devices/system/cpu/cpu%d/cache/index%d/%s" +#define UCS_CPU_CACHE_LEVEL_FILE "level" +#define UCS_CPU_CACHE_TYPE_FILE "type" +#define UCS_CPU_CACHE_SIZE_FILE "size" + +/* cache size array. index - cache type (ucs_cpu_cache_type_t), value - cache value, + * 0 means cache is not supported */ +static size_t ucs_cpu_cache_size[UCS_CPU_CACHE_LAST] = {0}; + +static ucs_init_once_t ucs_cache_read_once = UCS_INIT_ONCE_INITIALIZER; + +/* cache datatypes */ +struct { /* sysfs entries for system cache sizes */ + int level; + const char *type; +} const ucs_cpu_cache_sysfs_name[] = { + [UCS_CPU_CACHE_L1d] = {.level = 1, .type = "Data"}, + [UCS_CPU_CACHE_L1i] = {.level = 1, .type = "Instruction"}, + [UCS_CPU_CACHE_L2] = {.level = 2, .type = "Unified"}, + [UCS_CPU_CACHE_L3] = {.level = 3, .type = "Unified"} +}; + +const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST] = { + [UCS_CPU_VENDOR_UNKNOWN] = { + .min = UCS_MEMUNITS_INF, + .max = UCS_MEMUNITS_INF + }, + [UCS_CPU_VENDOR_INTEL] = { + .min = 1 * UCS_KBYTE, + .max = 8 * UCS_MBYTE + }, + [UCS_CPU_VENDOR_AMD] = { + .min = 1 * UCS_KBYTE, + .max = 136 * UCS_KBYTE + }, + [UCS_CPU_VENDOR_GENERIC_ARM] = { + .min = UCS_MEMUNITS_INF, + .max = UCS_MEMUNITS_INF + }, + [UCS_CPU_VENDOR_GENERIC_PPC] = { + .min = UCS_MEMUNITS_INF, + .max = UCS_MEMUNITS_INF + } +}; + +const size_t ucs_cpu_est_bcopy_bw[UCS_CPU_VENDOR_LAST] = { + [UCS_CPU_VENDOR_UNKNOWN] = 5800 * UCS_MBYTE, + [UCS_CPU_VENDOR_INTEL] = 5800 * UCS_MBYTE, + [UCS_CPU_VENDOR_AMD] = 5008 * UCS_MBYTE, + [UCS_CPU_VENDOR_GENERIC_ARM] = 5800 * UCS_MBYTE, + [UCS_CPU_VENDOR_GENERIC_PPC] = 5800 * UCS_MBYTE +}; + +static void ucs_sysfs_get_cache_size() +{ + char type_str[32]; /* Data/Instruction/Unified */ + char size_str[32]; /* memunits */ + int cache_index; + int cpu; + long level; + ssize_t file_size; + ucs_cpu_cache_type_t cache_type; + ucs_status_t status; + + cpu = ucs_get_first_cpu(); + + for (cache_index = 0;; cache_index++) { + file_size = ucs_read_file_str(type_str, sizeof(type_str), 1, + UCS_CPU_CACHE_FILE_FMT, cpu, + cache_index, UCS_CPU_CACHE_TYPE_FILE); + if (file_size < 0) { + return; /* no more files */ + } + + ucs_strtrim(type_str); + status = ucs_read_file_number(&level, 1, UCS_CPU_CACHE_FILE_FMT, + cpu, cache_index, UCS_CPU_CACHE_LEVEL_FILE); + if (status != UCS_OK) { + return; /* no more files */ + } + + /* ok, we found valid directory, let's try to read cache size */ + file_size = ucs_read_file_str(size_str, sizeof(size_str), 1, UCS_CPU_CACHE_FILE_FMT, + cpu, cache_index, UCS_CPU_CACHE_SIZE_FILE); + if (file_size < 0) { + return; /* no more files */ + } + + /* now lookup for cache size entry */ + for (cache_type = UCS_CPU_CACHE_L1d; cache_type < UCS_CPU_CACHE_LAST; cache_type++) { + if ((ucs_cpu_cache_sysfs_name[cache_type].level == level) && + !strcasecmp(ucs_cpu_cache_sysfs_name[cache_type].type, type_str)) { + if (ucs_cpu_cache_size[cache_type] != 0) { + break; + } + + status = ucs_str_to_memunits(ucs_strtrim(size_str), + &ucs_cpu_cache_size[cache_type]); + if (status != UCS_OK) { + ucs_cpu_cache_size[cache_type] = 0; /* reset cache value */ + } + } + } + } +} + +size_t ucs_cpu_get_cache_size(ucs_cpu_cache_type_t type) +{ + ucs_status_t status; + + if (type >= UCS_CPU_CACHE_LAST) { + return 0; + } + + UCS_INIT_ONCE(&ucs_cache_read_once) { + UCS_STATIC_ASSERT(ucs_array_size(ucs_cpu_cache_size) == UCS_CPU_CACHE_LAST); + /* try first CPU-specific algorithm */ + status = ucs_arch_get_cache_size(ucs_cpu_cache_size); + if (status != UCS_OK) { + /* read rest of caches from sysfs */ + ucs_sysfs_get_cache_size(); + } + } + + return ucs_cpu_cache_size[type]; +} + +double ucs_cpu_get_memcpy_bw() +{ + return ucs_cpu_est_bcopy_bw[ucs_arch_get_cpu_vendor()]; +} diff --git a/src/ucs/arch/cpu.h b/src/ucs/arch/cpu.h new file mode 100644 index 0000000..c1f1cc8 --- /dev/null +++ b/src/ucs/arch/cpu.h @@ -0,0 +1,147 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ARCH_CPU_H +#define UCS_ARCH_CPU_H + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +BEGIN_C_DECLS + +/* CPU models */ +typedef enum ucs_cpu_model { + UCS_CPU_MODEL_UNKNOWN, + UCS_CPU_MODEL_INTEL_IVYBRIDGE, + UCS_CPU_MODEL_INTEL_SANDYBRIDGE, + UCS_CPU_MODEL_INTEL_NEHALEM, + UCS_CPU_MODEL_INTEL_WESTMERE, + UCS_CPU_MODEL_INTEL_HASWELL, + UCS_CPU_MODEL_INTEL_BROADWELL, + UCS_CPU_MODEL_INTEL_SKYLAKE, + UCS_CPU_MODEL_ARM_AARCH64, + UCS_CPU_MODEL_AMD_NAPLES, + UCS_CPU_MODEL_AMD_ROME, + UCS_CPU_MODEL_LAST +} ucs_cpu_model_t; + + +/* CPU flags */ +typedef enum ucs_cpu_flag { + UCS_CPU_FLAG_UNKNOWN = (-1), + UCS_CPU_FLAG_CMOV = UCS_BIT(0), + UCS_CPU_FLAG_MMX = UCS_BIT(1), + UCS_CPU_FLAG_MMX2 = UCS_BIT(2), + UCS_CPU_FLAG_SSE = UCS_BIT(3), + UCS_CPU_FLAG_SSE2 = UCS_BIT(4), + UCS_CPU_FLAG_SSE3 = UCS_BIT(5), + UCS_CPU_FLAG_SSSE3 = UCS_BIT(6), + UCS_CPU_FLAG_SSE41 = UCS_BIT(7), + UCS_CPU_FLAG_SSE42 = UCS_BIT(8), + UCS_CPU_FLAG_AVX = UCS_BIT(9), + UCS_CPU_FLAG_AVX2 = UCS_BIT(10) +} ucs_cpu_flag_t; + + +/* CPU vendors */ +typedef enum ucs_cpu_vendor { + UCS_CPU_VENDOR_UNKNOWN, + UCS_CPU_VENDOR_INTEL, + UCS_CPU_VENDOR_AMD, + UCS_CPU_VENDOR_GENERIC_ARM, + UCS_CPU_VENDOR_GENERIC_PPC, + UCS_CPU_VENDOR_LAST +} ucs_cpu_vendor_t; + + +/* CPU cache types */ +typedef enum ucs_cpu_cache_type { + UCS_CPU_CACHE_L1d, /**< L1 data cache */ + UCS_CPU_CACHE_L1i, /**< L1 instruction cache */ + UCS_CPU_CACHE_L2, /**< L2 cache */ + UCS_CPU_CACHE_L3, /**< L3 cache */ + UCS_CPU_CACHE_LAST +} ucs_cpu_cache_type_t; + + +/* Built-in memcpy settings */ +typedef struct ucs_cpu_builtin_memcpy { + size_t min; + size_t max; +} ucs_cpu_builtin_memcpy_t; + + +/* System constants */ +#define UCS_SYS_POINTER_SIZE (sizeof(void*)) +#define UCS_SYS_PARAGRAPH_SIZE 16 +#define UCS_SYS_PCI_MAX_PAYLOAD 512 + + +#if defined(__x86_64__) +# include "x86_64/cpu.h" +#elif defined(__powerpc64__) +# include "ppc64/cpu.h" +#elif defined(__aarch64__) +# include "aarch64/cpu.h" +#else +# error "Unsupported architecture" +#endif + +#if defined(HAVE_CACHE_LINE_SIZE) +#define UCS_SYS_CACHE_LINE_SIZE HAVE_CACHE_LINE_SIZE +#else +#define UCS_SYS_CACHE_LINE_SIZE UCS_ARCH_CACHE_LINE_SIZE +#endif + +/* Array of default built-in memcpy settings for different CPU architectures */ +extern const ucs_cpu_builtin_memcpy_t ucs_cpu_builtin_memcpy[UCS_CPU_VENDOR_LAST]; + + +/** + * Get size of CPU cache. + * + * @param type Cache type. + * @param value Filled with the cache size. + * + * @return Cache size value or 0 if cache is not supported or can't be read. + */ +size_t ucs_cpu_get_cache_size(ucs_cpu_cache_type_t type); + + +/** + * Clear processor data and instruction caches, intended for + * self-modifying code. + * + * @start start of region to clear cache, including address + * @end end of region to clear cache, excluding address + */ +static inline void ucs_clear_cache(void *start, void *end) +{ +#if HAVE___CLEAR_CACHE + /* do not allow global declaration of compiler intrinsic */ + void __clear_cache(void* beg, void* end); + + __clear_cache(start, end); +#else + ucs_arch_clear_cache(start, end); +#endif +} + +/** + * Get memory copy bandwidth. + * + * @return Memory copy bandwidth estimation based on CPU used. + */ +double ucs_cpu_get_memcpy_bw(); + +END_C_DECLS + +#endif diff --git a/src/ucs/arch/generic/atomic.h b/src/ucs/arch/generic/atomic.h new file mode 100644 index 0000000..c9316e1 --- /dev/null +++ b/src/ucs/arch/generic/atomic.h @@ -0,0 +1,40 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_GENERIC_ATOMIC_H_ +#define UCS_GENERIC_ATOMIC_H_ + + +#define UCS_DEFINE_ATOMIC_ADD(wordsize, suffix) \ + static inline void ucs_atomic_add##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t value) { \ + __sync_add_and_fetch(ptr, value); \ + } + +#define UCS_DEFINE_ATOMIC_FADD(wordsize, suffix) \ + static inline uint##wordsize##_t ucs_atomic_fadd##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t value) { \ + return __sync_fetch_and_add(ptr, value); \ + } + +#define UCS_DEFINE_ATOMIC_SWAP(wordsize, suffix) \ + static inline uint##wordsize##_t ucs_atomic_swap##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t value) { \ + uint##wordsize##_t old; \ + do { \ + old = *ptr; \ + } while(old != __sync_val_compare_and_swap(ptr, old, value)); \ + return old; \ + } + +#define UCS_DEFINE_ATOMIC_CSWAP(wordsize, suffix) \ + static inline uint##wordsize##_t ucs_atomic_cswap##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t compare, \ + uint##wordsize##_t swap) { \ + return __sync_val_compare_and_swap(ptr, compare, swap); \ + } + +#endif diff --git a/src/ucs/arch/generic/cpu.h b/src/ucs/arch/generic/cpu.h new file mode 100644 index 0000000..74b5cbe --- /dev/null +++ b/src/ucs/arch/generic/cpu.h @@ -0,0 +1,35 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_GENERIC_CPU_H_ +#define UCS_GENERIC_CPU_H_ + +#include +#include + + +static inline uint64_t ucs_arch_generic_read_hres_clock(void) +{ + struct timeval tv; + + if (gettimeofday(&tv, NULL) != 0) { + return 0; + } + return ((((uint64_t)(tv.tv_sec)) * 1000000ULL) + ((uint64_t)(tv.tv_usec))); +} + +static inline double ucs_arch_generic_get_clocks_per_sec() +{ + return 1.0E6; +} + +static inline void ucs_arch_generic_wait_mem(void *address) +{ + /* NOP */ +} + +#endif diff --git a/src/ucs/arch/global_opts.h b/src/ucs/arch/global_opts.h new file mode 100644 index 0000000..8786f13 --- /dev/null +++ b/src/ucs/arch/global_opts.h @@ -0,0 +1,26 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ARCH_GLOBAL_OPTS_H +#define UCS_ARCH_GLOBAL_OPTS_H + +#include + +#if defined(__x86_64__) +# include "x86_64/global_opts.h" +#elif defined(__powerpc64__) +# include "ppc64/global_opts.h" +#elif defined(__aarch64__) +# include "aarch64/global_opts.h" +#else +# error "Unsupported architecture" +#endif + +extern ucs_config_field_t ucs_arch_global_opts_table[]; + +void ucs_arch_print_memcpy_limits(ucs_arch_global_opts_t *config); + +#endif diff --git a/src/ucs/arch/ppc64/bitops.h b/src/ucs/arch/ppc64/bitops.h new file mode 100644 index 0000000..93a35fc --- /dev/null +++ b/src/ucs/arch/ppc64/bitops.h @@ -0,0 +1,32 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_PPC64_BITOPS_H_ +#define UCS_PPC64_BITOPS_H_ + +#include + + +static inline unsigned __ucs_ilog2_u32(uint32_t n) +{ + int bit; + asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n)); + return 31 - bit; +} + +static inline unsigned __ucs_ilog2_u64(uint64_t n) +{ + int bit; + asm ("cntlzd %0,%1" : "=r" (bit) : "r" (n)); + return 63 - bit; +} + +static inline unsigned ucs_ffs64(uint64_t n) +{ + return __ucs_ilog2_u64(n & -n); +} + +#endif diff --git a/src/ucs/arch/ppc64/cpu.h b/src/ucs/arch/ppc64/cpu.h new file mode 100644 index 0000000..d1aeb9f --- /dev/null +++ b/src/ucs/arch/ppc64/cpu.h @@ -0,0 +1,101 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCS_PPC64_CPU_H_ +#define UCS_PPC64_CPU_H_ + +#include +#include +#ifdef HAVE_SYS_PLATFORM_PPC_H +# include +#endif +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file cpu.h */ + +#define UCS_ARCH_CACHE_LINE_SIZE 128 + +/* Assume the worst - weak memory ordering */ +#define ucs_memory_bus_fence() asm volatile ("sync"::: "memory") +#define ucs_memory_bus_store_fence() ucs_memory_bus_fence() +#define ucs_memory_bus_load_fence() ucs_memory_bus_fence() +#define ucs_memory_bus_cacheline_wc_flush() +#define ucs_memory_cpu_fence() ucs_memory_bus_fence() +#define ucs_memory_cpu_store_fence() asm volatile ("lwsync \n" \ + ::: "memory") +#define ucs_memory_cpu_load_fence() asm volatile ("lwsync \n" \ + "isync \n" \ + ::: "memory") +#define ucs_memory_cpu_wc_fence() ucs_memory_bus_fence() + + +static inline uint64_t ucs_arch_read_hres_clock() +{ +#ifndef HAVE_SYS_PLATFORM_PPC_H + uint64_t tb; + asm volatile ("mfspr %0, 268" : "=r" (tb)); + return tb; +#else + return __ppc_get_timebase(); +#endif +} + +static inline ucs_cpu_model_t ucs_arch_get_cpu_model() +{ + return UCS_CPU_MODEL_UNKNOWN; +} + +static inline ucs_cpu_vendor_t ucs_arch_get_cpu_vendor() +{ + return UCS_CPU_VENDOR_GENERIC_PPC; +} + +static inline int ucs_arch_get_cpu_flag() +{ + return UCS_CPU_FLAG_UNKNOWN; +} + +static inline void ucs_cpu_init() +{ +} + +double ucs_arch_get_clocks_per_sec(); + +#define ucs_arch_wait_mem ucs_arch_generic_wait_mem + +#if !HAVE___CLEAR_CACHE +static inline void ucs_arch_clear_cache(void *start, void *end) +{ + ucs_memory_cpu_fence(); +} +#endif + +static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len) +{ + return memcpy(dst, src, len); +} + +static UCS_F_ALWAYS_INLINE void +ucs_memcpy_nontemporal(void *dst, const void *src, size_t len) +{ + memcpy(dst, src, len); +} + +static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes) +{ + return UCS_ERR_UNSUPPORTED; +} + +END_C_DECLS + +#endif diff --git a/src/ucs/arch/ppc64/global_opts.c b/src/ucs/arch/ppc64/global_opts.c new file mode 100644 index 0000000..e273437 --- /dev/null +++ b/src/ucs/arch/ppc64/global_opts.c @@ -0,0 +1,24 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#if defined(__powerpc64__) + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +ucs_config_field_t ucs_arch_global_opts_table[] = { + {NULL} +}; + +void ucs_arch_print_memcpy_limits(ucs_arch_global_opts_t *config) +{ +} + +#endif diff --git a/src/ucs/arch/ppc64/global_opts.h b/src/ucs/arch/ppc64/global_opts.h new file mode 100644 index 0000000..225e4e5 --- /dev/null +++ b/src/ucs/arch/ppc64/global_opts.h @@ -0,0 +1,25 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCS_PPC64_GLOBAL_OPTS_H_ +#define UCS_PPC64_GLOBAL_OPTS_H_ + +#include + +BEGIN_C_DECLS + +#define UCS_ARCH_GLOBAL_OPTS_INITALIZER {} + +/* built-in memcpy config */ +typedef struct ucs_arch_global_opts { + char dummy; +} ucs_arch_global_opts_t; + +END_C_DECLS + +#endif + diff --git a/src/ucs/arch/ppc64/timebase.c b/src/ucs/arch/ppc64/timebase.c new file mode 100644 index 0000000..8f3a6dd --- /dev/null +++ b/src/ucs/arch/ppc64/timebase.c @@ -0,0 +1,25 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#ifdef HAVE_SYS_PLATFORM_PPC_H +# include +#endif + + +#if defined(__powerpc64__) + +double ucs_arch_get_clocks_per_sec() +{ +#if HAVE_DECL___PPC_GET_TIMEBASE_FREQ + return __ppc_get_timebase_freq(); +#else + return ucs_get_cpuinfo_clock_freq("timebase", 1.0); +#endif +} + +#endif diff --git a/src/ucs/arch/x86_64/atomic.h b/src/ucs/arch/x86_64/atomic.h new file mode 100644 index 0000000..5e12f00 --- /dev/null +++ b/src/ucs/arch/x86_64/atomic.h @@ -0,0 +1,53 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_X86_64_ATOMIC_H_ +#define UCS_X86_64_ATOMIC_H_ + + +#define UCS_DEFINE_ATOMIC_ADD(wordsize, suffix) \ + static inline void ucs_atomic_add##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t value) { \ + asm volatile ( \ + "lock add" #suffix " %1, %0" \ + : "+m"(*ptr) \ + : "ir" (value)); \ + } + +#define UCS_DEFINE_ATOMIC_FADD(wordsize, suffix) \ + static inline uint##wordsize##_t ucs_atomic_fadd##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t value) { \ + asm volatile ( \ + "lock xadd" #suffix " %0, %1" \ + : "+r" (value), "+m" (*ptr) \ + : : "memory"); \ + return value; \ + } + +#define UCS_DEFINE_ATOMIC_SWAP(wordsize, suffix) \ + static inline uint##wordsize##_t ucs_atomic_swap##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t value) { \ + asm volatile ( \ + "lock xchg" #suffix " %0, %1" \ + : "+r" (value), "+m" (*ptr) \ + : : "memory", "cc"); \ + return value; \ + } + +#define UCS_DEFINE_ATOMIC_CSWAP(wordsize, suffix) \ + static inline uint##wordsize##_t ucs_atomic_cswap##wordsize(volatile uint##wordsize##_t *ptr, \ + uint##wordsize##_t compare, \ + uint##wordsize##_t swap) { \ + unsigned long prev; \ + asm volatile ( \ + "lock cmpxchg" # suffix " %1, %2" \ + : "=a" (prev) \ + : "r"(swap), "m"(*ptr), "0" (compare) \ + : "memory"); \ + return prev; \ + } + +#endif diff --git a/src/ucs/arch/x86_64/bitops.h b/src/ucs/arch/x86_64/bitops.h new file mode 100644 index 0000000..76a34db --- /dev/null +++ b/src/ucs/arch/x86_64/bitops.h @@ -0,0 +1,40 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_X86_64_BITOPS_H_ +#define UCS_X86_64_BITOPS_H_ + +#include + + +static inline unsigned ucs_ffs64(uint64_t n) +{ + uint64_t result; + asm("bsfq %1,%0" + : "=r" (result) + : "r" (n)); + return result; +} + +static inline unsigned __ucs_ilog2_u32(uint32_t n) +{ + uint32_t result; + asm("bsrl %1,%0" + : "=r" (result) + : "r" (n)); + return result; +} + +static inline unsigned __ucs_ilog2_u64(uint64_t n) +{ + uint64_t result; + asm("bsrq %1,%0" + : "=r" (result) + : "r" (n)); + return result; +} + +#endif diff --git a/src/ucs/arch/x86_64/cpu.c b/src/ucs/arch/x86_64/cpu.c new file mode 100644 index 0000000..4540f09 --- /dev/null +++ b/src/ucs/arch/x86_64/cpu.c @@ -0,0 +1,651 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#if defined(__x86_64__) + +#include +#include +#include +#include +#include + +#define X86_CPUID_GENUINEINTEL "GenuntelineI" /* GenuineIntel in magic notation */ +#define X86_CPUID_AUTHENTICAMD "AuthcAMDenti" /* AuthenticAMD in magic notation */ +#define X86_CPUID_GET_MODEL 0x00000001u +#define X86_CPUID_GET_BASE_VALUE 0x00000000u +#define X86_CPUID_GET_EXTD_VALUE 0x00000007u +#define X86_CPUID_GET_MAX_VALUE 0x80000000u +#define X86_CPUID_INVARIANT_TSC 0x80000007u +#define X86_CPUID_GET_CACHE_INFO 0x00000002u +#define X86_CPUID_GET_LEAF4_INFO 0x00000004u + +#define X86_CPU_CACHE_RESERVED 0x80000000 +#define X86_CPU_CACHE_TAG_L1_ONLY 0x40 +#define X86_CPU_CACHE_TAG_LEAF4 0xff + +#if defined (__SSE4_1__) +#define _mm_load(a) _mm_stream_load_si128((__m128i *) (a)) +#define _mm_store(a,v) _mm_storeu_si128((__m128i *) (a), (v)) +#endif + + +typedef enum ucs_x86_cpu_cache_type { + X86_CPU_CACHE_TYPE_DATA = 1, + X86_CPU_CACHE_TYPE_INSTRUCTION = 2, + X86_CPU_CACHE_TYPE_UNIFIED = 3 +} ucs_x86_cpu_cache_type_t; + +/* CPU version */ +typedef union ucs_x86_cpu_version { + struct { + unsigned stepping : 4; + unsigned model : 4; + unsigned family : 4; + unsigned type : 2; + unsigned unused : 2; + unsigned ext_model : 4; + unsigned ext_family : 8; + }; + uint32_t reg; +} UCS_S_PACKED ucs_x86_cpu_version_t; + +/* cache datatypes */ +typedef struct ucs_x86_cpu_cache_info { + unsigned level; + ucs_x86_cpu_cache_type_t type; +} UCS_S_PACKED ucs_x86_cpu_cache_info_t; + +typedef union ucs_x86_cache_line_reg_info { + uint32_t reg; + struct { + unsigned size : 12; + unsigned partitions : 10; + unsigned associativity : 10; + }; + struct { + unsigned type : 5; + unsigned level : 3; + }; +} UCS_S_PACKED ucs_x86_cache_line_reg_info_t; + +typedef union ucs_x86_cpu_registers { + struct { + union { + uint32_t eax; + uint8_t max_iter; /* leaf 2 - max iterations */ + }; + union { + struct { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + }; + char id[sizeof(uint32_t) * 3]; /* leaf 0 - CPU ID */ + }; + }; + union { + uint32_t value; + uint8_t tag[sizeof(uint32_t)]; + } reg[4]; /* leaf 2 tags */ +} UCS_S_PACKED ucs_x86_cpu_registers; + +typedef struct ucs_x86_cpu_cache_size_codes { + ucs_cpu_cache_type_t type; + size_t size; +} ucs_x86_cpu_cache_size_codes_t; + + +ucs_ternary_value_t ucs_arch_x86_enable_rdtsc = UCS_TRY; + +static const ucs_x86_cpu_cache_info_t x86_cpu_cache[] = { + [UCS_CPU_CACHE_L1d] = {.level = 1, .type = X86_CPU_CACHE_TYPE_DATA}, + [UCS_CPU_CACHE_L1i] = {.level = 1, .type = X86_CPU_CACHE_TYPE_INSTRUCTION}, + [UCS_CPU_CACHE_L2] = {.level = 2, .type = X86_CPU_CACHE_TYPE_UNIFIED}, + [UCS_CPU_CACHE_L3] = {.level = 3, .type = X86_CPU_CACHE_TYPE_UNIFIED} +}; + +static const ucs_x86_cpu_cache_size_codes_t ucs_x86_cpu_cache_size_codes[] = { + [0x06] = {.type = UCS_CPU_CACHE_L1i, .size = 8192 }, + [0x08] = {.type = UCS_CPU_CACHE_L1i, .size = 16384 }, + [0x09] = {.type = UCS_CPU_CACHE_L1i, .size = 32768 }, + [0x0a] = {.type = UCS_CPU_CACHE_L1d, .size = 8192 }, + [0x0c] = {.type = UCS_CPU_CACHE_L1d, .size = 16384 }, + [0x0d] = {.type = UCS_CPU_CACHE_L1d, .size = 16384 }, + [0x0e] = {.type = UCS_CPU_CACHE_L1d, .size = 24576 }, + [0x21] = {.type = UCS_CPU_CACHE_L2, .size = 262144 }, + [0x22] = {.type = UCS_CPU_CACHE_L3, .size = 524288 }, + [0x23] = {.type = UCS_CPU_CACHE_L3, .size = 1048576 }, + [0x25] = {.type = UCS_CPU_CACHE_L3, .size = 2097152 }, + [0x29] = {.type = UCS_CPU_CACHE_L3, .size = 4194304 }, + [0x2c] = {.type = UCS_CPU_CACHE_L1d, .size = 32768 }, + [0x30] = {.type = UCS_CPU_CACHE_L1i, .size = 32768 }, + [0x39] = {.type = UCS_CPU_CACHE_L2, .size = 131072 }, + [0x3a] = {.type = UCS_CPU_CACHE_L2, .size = 196608 }, + [0x3b] = {.type = UCS_CPU_CACHE_L2, .size = 131072 }, + [0x3c] = {.type = UCS_CPU_CACHE_L2, .size = 262144 }, + [0x3d] = {.type = UCS_CPU_CACHE_L2, .size = 393216 }, + [0x3e] = {.type = UCS_CPU_CACHE_L2, .size = 524288 }, + [0x3f] = {.type = UCS_CPU_CACHE_L2, .size = 262144 }, + [0x41] = {.type = UCS_CPU_CACHE_L2, .size = 131072 }, + [0x42] = {.type = UCS_CPU_CACHE_L2, .size = 262144 }, + [0x43] = {.type = UCS_CPU_CACHE_L2, .size = 524288 }, + [0x44] = {.type = UCS_CPU_CACHE_L2, .size = 1048576 }, + [0x45] = {.type = UCS_CPU_CACHE_L2, .size = 2097152 }, + [0x46] = {.type = UCS_CPU_CACHE_L3, .size = 4194304 }, + [0x47] = {.type = UCS_CPU_CACHE_L3, .size = 8388608 }, + [0x48] = {.type = UCS_CPU_CACHE_L2, .size = 3145728 }, + [0x49] = {.type = UCS_CPU_CACHE_L2, .size = 4194304 }, + [0x4a] = {.type = UCS_CPU_CACHE_L3, .size = 6291456 }, + [0x4b] = {.type = UCS_CPU_CACHE_L3, .size = 8388608 }, + [0x4c] = {.type = UCS_CPU_CACHE_L3, .size = 12582912 }, + [0x4d] = {.type = UCS_CPU_CACHE_L3, .size = 16777216 }, + [0x4e] = {.type = UCS_CPU_CACHE_L2, .size = 6291456 }, + [0x60] = {.type = UCS_CPU_CACHE_L1d, .size = 16384 }, + [0x66] = {.type = UCS_CPU_CACHE_L1d, .size = 8192 }, + [0x67] = {.type = UCS_CPU_CACHE_L1d, .size = 16384 }, + [0x68] = {.type = UCS_CPU_CACHE_L1d, .size = 32768 }, + [0x78] = {.type = UCS_CPU_CACHE_L2, .size = 1048576 }, + [0x79] = {.type = UCS_CPU_CACHE_L2, .size = 131072 }, + [0x7a] = {.type = UCS_CPU_CACHE_L2, .size = 262144 }, + [0x7b] = {.type = UCS_CPU_CACHE_L2, .size = 524288 }, + [0x7c] = {.type = UCS_CPU_CACHE_L2, .size = 1048576 }, + [0x7d] = {.type = UCS_CPU_CACHE_L2, .size = 2097152 }, + [0x7f] = {.type = UCS_CPU_CACHE_L2, .size = 524288 }, + [0x80] = {.type = UCS_CPU_CACHE_L2, .size = 524288 }, + [0x82] = {.type = UCS_CPU_CACHE_L2, .size = 262144 }, + [0x83] = {.type = UCS_CPU_CACHE_L2, .size = 524288 }, + [0x84] = {.type = UCS_CPU_CACHE_L2, .size = 1048576 }, + [0x85] = {.type = UCS_CPU_CACHE_L2, .size = 2097152 }, + [0x86] = {.type = UCS_CPU_CACHE_L2, .size = 524288 }, + [0x87] = {.type = UCS_CPU_CACHE_L2, .size = 1048576 }, + [0xd0] = {.type = UCS_CPU_CACHE_L3, .size = 524288 }, + [0xd1] = {.type = UCS_CPU_CACHE_L3, .size = 1048576 }, + [0xd2] = {.type = UCS_CPU_CACHE_L3, .size = 2097152 }, + [0xd6] = {.type = UCS_CPU_CACHE_L3, .size = 1048576 }, + [0xd7] = {.type = UCS_CPU_CACHE_L3, .size = 2097152 }, + [0xd8] = {.type = UCS_CPU_CACHE_L3, .size = 4194304 }, + [0xdc] = {.type = UCS_CPU_CACHE_L3, .size = 2097152 }, + [0xdd] = {.type = UCS_CPU_CACHE_L3, .size = 4194304 }, + [0xde] = {.type = UCS_CPU_CACHE_L3, .size = 8388608 }, + [0xe2] = {.type = UCS_CPU_CACHE_L3, .size = 2097152 }, + [0xe3] = {.type = UCS_CPU_CACHE_L3, .size = 4194304 }, + [0xe4] = {.type = UCS_CPU_CACHE_L3, .size = 8388608 }, + [0xea] = {.type = UCS_CPU_CACHE_L3, .size = 12582912 }, + [0xeb] = {.type = UCS_CPU_CACHE_L3, .size = 18874368 }, + [0xec] = {.type = UCS_CPU_CACHE_L3, .size = 25165824 } +}; + + +static UCS_F_NOOPTIMIZE inline void ucs_x86_cpuid(uint32_t level, + uint32_t *a, uint32_t *b, + uint32_t *c, uint32_t *d) +{ + asm volatile ("cpuid\n\t" + : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) + : "0"(level)); +} + +static UCS_F_NOOPTIMIZE inline void ucs_x86_cpuid_ecx(uint32_t level, uint32_t ecx, + uint32_t *a, uint32_t *b, + uint32_t *c, uint32_t *d) +{ + asm volatile("cpuid" + : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) + : "0"(level), "2"(ecx)); +} + +/* This allows the CPU detection to work with assemblers not supporting + * the xgetbv mnemonic. These include clang and some BSD versions. + */ +#define ucs_x86_xgetbv(_index, _eax, _edx) \ + asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(_eax), "=d"(_edx) : "c" (_index)) + +static int ucs_x86_invariant_tsc() +{ + uint32_t _eax, _ebx, _ecx, _edx; + + ucs_x86_cpuid(X86_CPUID_GET_MAX_VALUE, &_eax, &_ebx, &_ecx, &_edx); + if (_eax <= X86_CPUID_INVARIANT_TSC) { + goto warn; + } + + ucs_x86_cpuid(X86_CPUID_INVARIANT_TSC, &_eax, &_ebx, &_ecx, &_edx); + if (!(_edx & UCS_BIT(8))) { + goto warn; + } + + return 1; + +warn: + ucs_debug("CPU does not support invariant TSC, using fallback timer"); + + return 0; +} + +double ucs_x86_tsc_freq_from_cpu_model() +{ + char buf[256]; + char model[256]; + char *rate; + char newline[2]; + double ghz, max_ghz; + FILE* f; + int rc; + int warn; + + f = fopen("/proc/cpuinfo","r"); + if (!f) { + return -1; + } + + warn = 0; + max_ghz = 0.0; + while (fgets(buf, sizeof(buf), f)) { + rc = sscanf(buf, "model name : %s", model); + if (rc != 1) { + continue; + } + + rate = strrchr(buf, '@'); + if (rate == NULL) { + continue; + } + + rc = sscanf(rate, "@ %lfGHz%[\n]", &ghz, newline); + if (rc != 2) { + continue; + } + + max_ghz = ucs_max(max_ghz, ghz); + if (max_ghz != ghz) { + warn = 1; + break; + } + } + fclose(f); + + if (warn) { + ucs_debug("Conflicting CPU frequencies detected, using fallback timer"); + return -1; + } + + return max_ghz * 1e9; +} + +double ucs_x86_init_tsc_freq() +{ + double result; + + if (!ucs_x86_invariant_tsc()) { + goto err_disable_rdtsc; + } + + ucs_arch_x86_enable_rdtsc = UCS_YES; + + result = ucs_x86_tsc_freq_from_cpu_model(); + if (result <= 0.0) { + result = ucs_get_cpuinfo_clock_freq("cpu MHz", 1e6); + } + + if (result > 0.0) { + return result; + } + +err_disable_rdtsc: + ucs_arch_x86_enable_rdtsc = UCS_NO; + return -1; +} + +double ucs_arch_get_clocks_per_sec() +{ + double freq; + + /* Init rdtsc state ucs_arch_x86_enable_rdtsc */ + freq = ucs_x86_init_tsc_freq(); + if (ucs_arch_x86_enable_rdtsc == UCS_YES) { + /* using rdtsc */ + return freq; + } + + return ucs_arch_generic_get_clocks_per_sec(); +} + +ucs_cpu_model_t ucs_arch_get_cpu_model() +{ + ucs_x86_cpu_version_t version; + uint32_t _ebx, _ecx, _edx; + uint32_t model, family; + + /* Get CPU model/family */ + ucs_x86_cpuid(X86_CPUID_GET_MODEL, ucs_unaligned_ptr(&version.reg), &_ebx, &_ecx, &_edx); + + model = version.model; + family = version.family; + + /* Adjust family/model */ + if (family == 0xf) { + family += version.ext_family; + } + if ((family == 0x6) || (family == 0xf) || (family == 0x17)) { + model = (version.ext_model << 4) | model; + } + + /* Check known CPUs */ + if (family == 0x06) { + switch (model) { + case 0x3a: + case 0x3e: + return UCS_CPU_MODEL_INTEL_IVYBRIDGE; + case 0x2a: + case 0x2d: + return UCS_CPU_MODEL_INTEL_SANDYBRIDGE; + case 0x1a: + case 0x1e: + case 0x1f: + case 0x2e: + return UCS_CPU_MODEL_INTEL_NEHALEM; + case 0x25: + case 0x2c: + case 0x2f: + return UCS_CPU_MODEL_INTEL_WESTMERE; + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + return UCS_CPU_MODEL_INTEL_HASWELL; + case 0x3d: + case 0x47: + case 0x4f: + case 0x56: + return UCS_CPU_MODEL_INTEL_BROADWELL; + case 0x5e: + case 0x4e: + case 0x55: + return UCS_CPU_MODEL_INTEL_SKYLAKE; + } + } + + if (family == 0x17) { + switch (model) { + case 0x29: + return UCS_CPU_MODEL_AMD_NAPLES; + case 0x31: + return UCS_CPU_MODEL_AMD_ROME; + } + } + return UCS_CPU_MODEL_UNKNOWN; +} + + +int ucs_arch_get_cpu_flag() +{ + static int cpu_flag = UCS_CPU_FLAG_UNKNOWN; + + if (UCS_CPU_FLAG_UNKNOWN == cpu_flag) { + uint32_t result = 0; + uint32_t base_value; + uint32_t _eax, _ebx, _ecx, _edx; + + ucs_x86_cpuid(X86_CPUID_GET_BASE_VALUE, &_eax, &_ebx, &_ecx, &_edx); + base_value = _eax; + + if (base_value >= 1) { + ucs_x86_cpuid(X86_CPUID_GET_MODEL, &_eax, &_ebx, &_ecx, &_edx); + if (_edx & (1 << 15)) { + result |= UCS_CPU_FLAG_CMOV; + } + if (_edx & (1 << 23)) { + result |= UCS_CPU_FLAG_MMX; + } + if (_edx & (1 << 25)) { + result |= UCS_CPU_FLAG_MMX2; + } + if (_edx & (1 << 25)) { + result |= UCS_CPU_FLAG_SSE; + } + if (_edx & (1 << 26)) { + result |= UCS_CPU_FLAG_SSE2; + } + if (_ecx & 1) { + result |= UCS_CPU_FLAG_SSE3; + } + if (_ecx & (1 << 9)) { + result |= UCS_CPU_FLAG_SSSE3; + } + if (_ecx & (1 << 19)) { + result |= UCS_CPU_FLAG_SSE41; + } + if (_ecx & (1 << 20)) { + result |= UCS_CPU_FLAG_SSE42; + } + if ((_ecx & 0x18000000) == 0x18000000) { + ucs_x86_xgetbv(0, _eax, _edx); + if ((_eax & 0x6) == 0x6) { + result |= UCS_CPU_FLAG_AVX; + } + } + } + if (base_value >= 7) { + ucs_x86_cpuid(X86_CPUID_GET_EXTD_VALUE, &_eax, &_ebx, &_ecx, &_edx); + if ((result & UCS_CPU_FLAG_AVX) && (_ebx & (1 << 5))) { + result |= UCS_CPU_FLAG_AVX2; + } + } + cpu_flag = result; + } + + return cpu_flag; +} + +ucs_cpu_vendor_t ucs_arch_get_cpu_vendor() +{ + ucs_x86_cpu_registers reg; + + ucs_x86_cpuid(X86_CPUID_GET_BASE_VALUE, + ucs_unaligned_ptr(®.eax), ucs_unaligned_ptr(®.ebx), + ucs_unaligned_ptr(®.ecx), ucs_unaligned_ptr(®.edx)); + if (!memcmp(reg.id, X86_CPUID_GENUINEINTEL, sizeof(X86_CPUID_GENUINEINTEL) - 1)) { + return UCS_CPU_VENDOR_INTEL; + } else if (!memcmp(reg.id, X86_CPUID_AUTHENTICAMD, sizeof(X86_CPUID_AUTHENTICAMD) - 1)) { + return UCS_CPU_VENDOR_AMD; + } + + return UCS_CPU_VENDOR_UNKNOWN; +} + +#if ENABLE_BUILTIN_MEMCPY +static size_t ucs_cpu_memcpy_thresh(size_t user_val, size_t auto_val) +{ + if (user_val != UCS_MEMUNITS_AUTO) { + return user_val; + } + + if (((ucs_arch_get_cpu_vendor() == UCS_CPU_VENDOR_INTEL) && + (ucs_arch_get_cpu_model() >= UCS_CPU_MODEL_INTEL_HASWELL)) || + (ucs_arch_get_cpu_vendor() == UCS_CPU_VENDOR_AMD)) { + return auto_val; + } else { + return UCS_MEMUNITS_INF; + } +} +#endif + +void ucs_cpu_init() +{ +#if ENABLE_BUILTIN_MEMCPY + ucs_global_opts.arch.builtin_memcpy_min = + ucs_cpu_memcpy_thresh(ucs_global_opts.arch.builtin_memcpy_min, + ucs_cpu_builtin_memcpy[ucs_arch_get_cpu_vendor()].min); + ucs_global_opts.arch.builtin_memcpy_max = + ucs_cpu_memcpy_thresh(ucs_global_opts.arch.builtin_memcpy_max, + ucs_cpu_builtin_memcpy[ucs_arch_get_cpu_vendor()].max); +#endif +} + +ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes) +{ + ucs_x86_cache_line_reg_info_t cache_info; + ucs_x86_cache_line_reg_info_t line_info; + ucs_x86_cpu_registers reg; + uint32_t sets; + uint32_t i, t, r, l4; + uint32_t max_iter; + size_t c; + int level1_only; /* level 1 cache only supported */ + int tag; + int cache_count; + ucs_cpu_cache_type_t type; + + /* Get CPU ID and vendor - it will reset cache iteration sequence */ + if (ucs_arch_get_cpu_vendor() != UCS_CPU_VENDOR_INTEL) { + return UCS_ERR_UNSUPPORTED; + } + + ucs_x86_cpuid(X86_CPUID_GET_BASE_VALUE, + ucs_unaligned_ptr(®.eax), ucs_unaligned_ptr(®.ebx), + ucs_unaligned_ptr(®.ecx), ucs_unaligned_ptr(®.edx)); + if (reg.eax < X86_CPUID_GET_CACHE_INFO) { + return UCS_ERR_UNSUPPORTED; + } + + level1_only = 0; + cache_count = 0; + + for (i = 0, max_iter = 1; i < max_iter; i++) { + ucs_x86_cpuid(X86_CPUID_GET_CACHE_INFO, + ucs_unaligned_ptr(®.eax), ucs_unaligned_ptr(®.ebx), + ucs_unaligned_ptr(®.ecx), ucs_unaligned_ptr(®.edx)); + + if (i == 0) { /* on first iteration get max iteration number */ + max_iter = reg.max_iter; + reg.max_iter = 0; /* mask iteration register from processing */ + } + + for (r = 0; r < ucs_array_size(reg.reg); r++) { + if (ucs_test_all_flags(reg.reg[r].value, X86_CPU_CACHE_RESERVED)) { + continue; + } + + for (t = 0; (t < ucs_array_size(reg.reg[r].tag)) && (reg.reg[r].tag[t] != 0); t++) { + tag = reg.reg[r].tag[t]; + + switch(tag) { + case X86_CPU_CACHE_TAG_L1_ONLY: + level1_only = 1; + break; + case X86_CPU_CACHE_TAG_LEAF4: + for (l4 = 0; cache_count < UCS_CPU_CACHE_LAST; l4++) { + ucs_x86_cpuid_ecx(X86_CPUID_GET_LEAF4_INFO, l4, + ucs_unaligned_ptr(&cache_info.reg), + ucs_unaligned_ptr(&line_info.reg), + &sets, ucs_unaligned_ptr(®.edx)); + + if (cache_info.type == 0) { + /* we are done - nothing found, go to next register */ + break; + } + + for (c = 0; c < UCS_CPU_CACHE_LAST; c++) { + if ((cache_info.level == x86_cpu_cache[c].level) && + (cache_info.type == x86_cpu_cache[c].type)) { + /* found it */ + /* cache entry is not updated yet */ + /* and cache level is 1 or all levels are supported */ + if (!((cache_sizes[c] == 0) && + ((x86_cpu_cache[c].level == 1) || !level1_only))) { + break; + } + + cache_sizes[c] = (line_info.associativity + 1) * + (line_info.partitions + 1) * + (line_info.size + 1) * + (sets + 1); + cache_count++; + } + } + } + return cache_count == UCS_CPU_CACHE_LAST ? UCS_OK : UCS_ERR_UNSUPPORTED; + default: + if ((tag >= ucs_array_size(ucs_x86_cpu_cache_size_codes)) || + (ucs_x86_cpu_cache_size_codes[tag].size != 0)) { + break; /* tag is out of table or in empty entry */ + } + + type = ucs_x86_cpu_cache_size_codes[tag].type; + if (cache_sizes[type] != 0) { /* cache is filled already */ + break; + } + + cache_sizes[type] = ucs_x86_cpu_cache_size_codes[tag].size; + cache_count++; + break; + } + } + } + } + + return cache_count == UCS_CPU_CACHE_LAST ? UCS_OK : UCS_ERR_UNSUPPORTED; +} + +void ucs_x86_memcpy_sse_movntdqa(void *dst, const void *src, size_t len) +{ +#if defined (__SSE4_1__) + /* Copy unaligned portion of src */ + if ((uintptr_t)src & 15) { + uintptr_t aligned = (uintptr_t)src & ~15; + uintptr_t misalign = (uintptr_t)src & 15; + uintptr_t copy = ucs_min(len, 16 - misalign); + + __m128i tmp = _mm_load(aligned); + memcpy(dst, UCS_PTR_BYTE_OFFSET(&tmp, misalign), copy); + + src = UCS_PTR_BYTE_OFFSET(src, copy); + dst = UCS_PTR_BYTE_OFFSET(dst, copy); + len -= copy; + } + + /* Copy 64 bytes at a time */ + while (len >= 64) { + __m128i *S = (__m128i *)src; + __m128i *D = (__m128i *)dst; + __m128i tmp[4]; + + tmp[0] = _mm_load(S + 0); + tmp[1] = _mm_load(S + 1); + tmp[2] = _mm_load(S + 2); + tmp[3] = _mm_load(S + 3); + + _mm_store(D + 0, tmp[0]); + _mm_store(D + 1, tmp[1]); + _mm_store(D + 2, tmp[2]); + _mm_store(D + 3, tmp[3]); + + src = UCS_PTR_BYTE_OFFSET(src, 64); + dst = UCS_PTR_BYTE_OFFSET(dst, 64); + len -= 64; + } + + /* Copy 16 bytes at a time */ + while (len >= 16) { + _mm_store(dst, _mm_load(src)); + + src = UCS_PTR_BYTE_OFFSET(src, 16); + dst = UCS_PTR_BYTE_OFFSET(dst, 16); + len -= 16; + } + + /* Copy any remaining bytes */ + if (len) { + __m128i tmp = _mm_load(src); + memcpy(dst, &tmp, len); + } +#else + memcpy(dst, src, len); +#endif +} + +#endif diff --git a/src/ucs/arch/x86_64/cpu.h b/src/ucs/arch/x86_64/cpu.h new file mode 100644 index 0000000..7fc0062 --- /dev/null +++ b/src/ucs/arch/x86_64/cpu.h @@ -0,0 +1,122 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ASM_X86_64_H_ +#define UCS_ASM_X86_64_H_ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __SSE4_1__ +# include +#endif +#ifdef __AVX__ +# include +#endif + +BEGIN_C_DECLS + +/** @file cpu.h */ + +#define UCS_ARCH_CACHE_LINE_SIZE 64 + +/** + * In x86_64, there is strong ordering of each processor with respect to another + * processor, but weak ordering with respect to the bus. + */ +#define ucs_memory_bus_fence() asm volatile ("mfence"::: "memory") +#define ucs_memory_bus_store_fence() asm volatile ("sfence" ::: "memory") +#define ucs_memory_bus_load_fence() asm volatile ("lfence" ::: "memory") +#define ucs_memory_bus_cacheline_wc_flush() +#define ucs_memory_cpu_fence() ucs_compiler_fence() +#define ucs_memory_cpu_store_fence() ucs_compiler_fence() +#define ucs_memory_cpu_load_fence() ucs_compiler_fence() +#define ucs_memory_cpu_wc_fence() asm volatile ("sfence" ::: "memory") + +extern ucs_ternary_value_t ucs_arch_x86_enable_rdtsc; + +double ucs_arch_get_clocks_per_sec(); +double ucs_x86_init_tsc_freq(); + +ucs_cpu_model_t ucs_arch_get_cpu_model() UCS_F_NOOPTIMIZE; +ucs_cpu_flag_t ucs_arch_get_cpu_flag() UCS_F_NOOPTIMIZE; +ucs_cpu_vendor_t ucs_arch_get_cpu_vendor(); +void ucs_cpu_init(); +ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes); +void ucs_x86_memcpy_sse_movntdqa(void *dst, const void *src, size_t len); + +static inline int ucs_arch_x86_rdtsc_enabled() +{ + double UCS_V_UNUSED dummy_freq; + + if (ucs_unlikely(ucs_arch_x86_enable_rdtsc == UCS_TRY)) { + dummy_freq = ucs_x86_init_tsc_freq(); + ucs_assert(ucs_arch_x86_enable_rdtsc != UCS_TRY); + } + + return ucs_arch_x86_enable_rdtsc; +} + +static inline uint64_t ucs_arch_read_hres_clock() +{ + uint32_t low, high; + + if (ucs_unlikely(ucs_arch_x86_rdtsc_enabled() == UCS_NO)) { + return ucs_arch_generic_read_hres_clock(); + } + + asm volatile ("rdtsc" : "=a" (low), "=d" (high)); + return ((uint64_t)high << 32) | (uint64_t)low; +} + +#define ucs_arch_wait_mem ucs_arch_generic_wait_mem + +#if !HAVE___CLEAR_CACHE +static inline void ucs_arch_clear_cache(void *start, void *end) +{ + char *ptr; + + for (ptr = (char*)start; ptr < (char*)end; ptr++) { + asm volatile("mfence; clflush %0; mfence" :: "m" (*ptr)); + } +} +#endif + +static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len) +{ +#if ENABLE_BUILTIN_MEMCPY + if (ucs_unlikely((len > ucs_global_opts.arch.builtin_memcpy_min) && + (len < ucs_global_opts.arch.builtin_memcpy_max))) { + asm volatile ("rep movsb" + : "=D" (dst), + "=S" (src), + "=c" (len) + : "0" (dst), + "1" (src), + "2" (len) + : "memory"); + return dst; + } +#endif + return memcpy(dst, src, len); +} + +static UCS_F_ALWAYS_INLINE void +ucs_memcpy_nontemporal(void *dst, const void *src, size_t len) +{ + ucs_x86_memcpy_sse_movntdqa(dst, src, len); +} + +END_C_DECLS + +#endif + diff --git a/src/ucs/arch/x86_64/global_opts.c b/src/ucs/arch/x86_64/global_opts.c new file mode 100644 index 0000000..6dca604 --- /dev/null +++ b/src/ucs/arch/x86_64/global_opts.c @@ -0,0 +1,44 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#if defined(__x86_64__) + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +ucs_config_field_t ucs_arch_global_opts_table[] = { +#if ENABLE_BUILTIN_MEMCPY + {"BUILTIN_MEMCPY_MIN", "auto", + "Minimal threshold of buffer length for using built-in memcpy.", + ucs_offsetof(ucs_arch_global_opts_t, builtin_memcpy_min), UCS_CONFIG_TYPE_MEMUNITS}, + + {"BUILTIN_MEMCPY_MAX", "auto", + "Maximal threshold of buffer length for using built-in memcpy.", + ucs_offsetof(ucs_arch_global_opts_t, builtin_memcpy_max), UCS_CONFIG_TYPE_MEMUNITS}, +#endif + {NULL} +}; + + +void ucs_arch_print_memcpy_limits(ucs_arch_global_opts_t *config) +{ +#if ENABLE_BUILTIN_MEMCPY + char min_thresh_str[32]; + char max_thresh_str[32]; + + ucs_config_sprintf_memunits(min_thresh_str, sizeof(min_thresh_str), + &config->builtin_memcpy_min, NULL); + ucs_config_sprintf_memunits(max_thresh_str, sizeof(max_thresh_str), + &config->builtin_memcpy_max, NULL); + printf("# Using built-in memcpy() for size %s..%s\n", min_thresh_str, max_thresh_str); +#endif +} + +#endif diff --git a/src/ucs/arch/x86_64/global_opts.h b/src/ucs/arch/x86_64/global_opts.h new file mode 100644 index 0000000..54892ae --- /dev/null +++ b/src/ucs/arch/x86_64/global_opts.h @@ -0,0 +1,29 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_X86_64_GLOBAL_OPTS_H_ +#define UCS_X86_64_GLOBAL_OPTS_H_ + +#include + +#include + +BEGIN_C_DECLS + +#define UCS_ARCH_GLOBAL_OPTS_INITALIZER { \ + .builtin_memcpy_min = UCS_MEMUNITS_AUTO, \ + .builtin_memcpy_max = UCS_MEMUNITS_AUTO \ +} + +/* built-in memcpy config */ +typedef struct ucs_arch_global_opts { + size_t builtin_memcpy_min; + size_t builtin_memcpy_max; +} ucs_arch_global_opts_t; + +END_C_DECLS + +#endif diff --git a/src/ucs/async/async.c b/src/ucs/async/async.c new file mode 100644 index 0000000..b000128 --- /dev/null +++ b/src/ucs/async/async.c @@ -0,0 +1,659 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "async_int.h" + +#include +#include +#include +#include + + +#define UCS_ASYNC_TIMER_ID_MIN 1000000u +#define UCS_ASYNC_TIMER_ID_MAX 2000000u + +#define UCS_ASYNC_HANDLER_FMT "%p [id=%d ref %d] %s()" +#define UCS_ASYNC_HANDLER_ARG(_h) (_h), (_h)->id, (_h)->refcount, \ + ucs_debug_get_symbol_name((_h)->cb) + +#define UCS_ASYNC_HANDLER_CALLER_NULL ((pthread_t)-1) + + +/* Hash table for all event and timer handlers */ +KHASH_MAP_INIT_INT(ucs_async_handler, ucs_async_handler_t *); + + +typedef struct ucs_async_global_context { + khash_t(ucs_async_handler) handlers; + pthread_rwlock_t handlers_lock; + volatile uint32_t handler_id; +} ucs_async_global_context_t; + + +static ucs_async_global_context_t ucs_async_global_context = { + .handlers_lock = PTHREAD_RWLOCK_INITIALIZER, + .handler_id = UCS_ASYNC_TIMER_ID_MIN +}; + + +#define ucs_async_method_call(_mode, _func, ...) \ + ((_mode) == UCS_ASYNC_MODE_SIGNAL) ? ucs_async_signal_ops._func(__VA_ARGS__) : \ + ((_mode) == UCS_ASYNC_MODE_THREAD_SPINLOCK) ? ucs_async_thread_spinlock_ops._func(__VA_ARGS__) : \ + ((_mode) == UCS_ASYNC_MODE_THREAD_MUTEX) ? ucs_async_thread_mutex_ops._func(__VA_ARGS__) : \ + ucs_async_poll_ops._func(__VA_ARGS__) + +#define ucs_async_method_call_all(_func, ...) \ + { \ + ucs_async_signal_ops._func(__VA_ARGS__); \ + ucs_async_thread_spinlock_ops._func(__VA_ARGS__); \ + ucs_async_thread_mutex_ops._func(__VA_ARGS__); \ + ucs_async_poll_ops._func(__VA_ARGS__); \ + } + + +static ucs_status_t ucs_async_poll_init(ucs_async_context_t *async) +{ + async->poll_block = 0; + return UCS_OK; +} + +static int ucs_async_poll_tryblock(ucs_async_context_t *async) +{ + return 1; +} + +static ucs_async_ops_t ucs_async_poll_ops = { + .init = ucs_empty_function, + .cleanup = ucs_empty_function, + .block = ucs_empty_function, + .unblock = ucs_empty_function, + .context_init = ucs_async_poll_init, + .context_cleanup = ucs_empty_function, + .context_try_block = ucs_async_poll_tryblock, + .context_unblock = ucs_empty_function, + .add_event_fd = ucs_empty_function_return_success, + .remove_event_fd = ucs_empty_function_return_success, + .modify_event_fd = ucs_empty_function_return_success, + .add_timer = ucs_empty_function_return_success, + .remove_timer = ucs_empty_function_return_success, +}; + +static inline khiter_t ucs_async_handler_kh_get(int id) +{ + return kh_get(ucs_async_handler, &ucs_async_global_context.handlers, id); +} + +static inline int ucs_async_handler_kh_is_end(khiter_t hash_it) +{ + return hash_it == kh_end(&ucs_async_global_context.handlers); +} + +static void ucs_async_handler_hold(ucs_async_handler_t *handler) +{ + ucs_atomic_add32(&handler->refcount, 1); +} + +/* incremented reference count and return the handler */ +static ucs_async_handler_t *ucs_async_handler_get(int id) +{ + ucs_async_handler_t *handler; + khiter_t hash_it; + + pthread_rwlock_rdlock(&ucs_async_global_context.handlers_lock); + hash_it = ucs_async_handler_kh_get(id); + if (ucs_async_handler_kh_is_end(hash_it)) { + handler = NULL; + goto out_unlock; + } + + handler = kh_value(&ucs_async_global_context.handlers, hash_it); + ucs_assert_always(handler->id == id); + ucs_async_handler_hold(handler); + +out_unlock: + pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); + return handler; +} + +/* remove from hash and return the handler */ +static ucs_async_handler_t *ucs_async_handler_extract(int id) +{ + ucs_async_handler_t *handler; + khiter_t hash_it; + + pthread_rwlock_wrlock(&ucs_async_global_context.handlers_lock); + hash_it = ucs_async_handler_kh_get(id); + if (ucs_async_handler_kh_is_end(hash_it)) { + ucs_debug("async handler [id=%d] not found in hash table", id); + handler = NULL; + } else { + handler = kh_value(&ucs_async_global_context.handlers, hash_it); + ucs_assert_always(handler->id == id); + kh_del(ucs_async_handler, &ucs_async_global_context.handlers, hash_it); + ucs_debug("removed async handler " UCS_ASYNC_HANDLER_FMT " from hash", + UCS_ASYNC_HANDLER_ARG(handler)); + } + pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); + + return handler; +} + +/* decrement reference count and release the handler if reached 0 */ +static void ucs_async_handler_put(ucs_async_handler_t *handler) +{ + if (ucs_atomic_fsub32(&handler->refcount, 1) > 1) { + return; + } + + ucs_debug("release async handler " UCS_ASYNC_HANDLER_FMT, + UCS_ASYNC_HANDLER_ARG(handler)); + ucs_free(handler); +} + +/* add new handler to the table */ +static ucs_status_t ucs_async_handler_add(int min_id, int max_id, + ucs_async_handler_t *handler) +{ + int hash_extra_status; + ucs_status_t status; + khiter_t hash_it; + int i, id; + + pthread_rwlock_wrlock(&ucs_async_global_context.handlers_lock); + + handler->id = -1; + ucs_assert_always(handler->refcount == 1); + + /* + * Search for an empty key in the range [min_id, max_id) + * ucs_async_global_context.handler_id is used to generate "unique" keys. + */ + for (i = min_id; i < max_id; ++i) { + id = min_id + (ucs_atomic_fadd32(&ucs_async_global_context.handler_id, 1) % + (max_id - min_id)); + hash_it = kh_put(ucs_async_handler, &ucs_async_global_context.handlers, + id, &hash_extra_status); + if (hash_extra_status == -1) { + ucs_error("Failed to add async handler " UCS_ASYNC_HANDLER_FMT + " to hash", UCS_ASYNC_HANDLER_ARG(handler)); + status = UCS_ERR_NO_MEMORY; + goto out_unlock; + } else if (hash_extra_status != 0) { + handler->id = id; + ucs_assert(id != -1); + break; + } + } + + if (handler->id == -1) { + ucs_error("Cannot add async handler %s() - id range [%d..%d) is full", + ucs_debug_get_symbol_name(handler->cb), min_id, max_id); + status = UCS_ERR_ALREADY_EXISTS; + goto out_unlock; + } + + ucs_assert_always(!ucs_async_handler_kh_is_end(hash_it)); + kh_value(&ucs_async_global_context.handlers, hash_it) = handler; + ucs_debug("added async handler " UCS_ASYNC_HANDLER_FMT " to hash", + UCS_ASYNC_HANDLER_ARG(handler)); + status = UCS_OK; + +out_unlock: + pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); + return status; +} + +static void ucs_async_handler_invoke(ucs_async_handler_t *handler) +{ + ucs_trace_async("calling async handler " UCS_ASYNC_HANDLER_FMT, + UCS_ASYNC_HANDLER_ARG(handler)); + + /* track call count to allow removing the handler synchronously from itself + * the handler must always be called with async context blocked, so no need + * for atomic operations here. + */ + ucs_assert(handler->caller == UCS_ASYNC_HANDLER_CALLER_NULL); + handler->caller = pthread_self(); + handler->cb(handler->id, handler->arg); + handler->caller = UCS_ASYNC_HANDLER_CALLER_NULL; +} + +static ucs_status_t ucs_async_handler_dispatch(ucs_async_handler_t *handler) +{ + ucs_async_context_t *async; + ucs_async_mode_t mode; + ucs_status_t status; + + mode = handler->mode; + async = handler->async; + if (async != NULL) { + async->last_wakeup = ucs_get_time(); + } + if (async == NULL) { + ucs_async_handler_invoke(handler); + } else if (ucs_async_method_call(mode, context_try_block, async)) { + ucs_async_handler_invoke(handler); + ucs_async_method_call(mode, context_unblock, async); + } else /* async != NULL */ { + ucs_trace_async("missed " UCS_ASYNC_HANDLER_FMT ", last_wakeup %lu", + UCS_ASYNC_HANDLER_ARG(handler), async->last_wakeup); + if (ucs_atomic_cswap32(&handler->missed, 0, 1) == 0) { + status = ucs_mpmc_queue_push(&async->missed, handler->id); + if (status != UCS_OK) { + ucs_fatal("Failed to push event %d to miss queue: %s", + handler->id, ucs_status_string(status)); + } + } + return UCS_ERR_NO_PROGRESS; + } + return UCS_OK; +} + +ucs_status_t ucs_async_dispatch_handlers(int *events, size_t count) +{ + ucs_status_t status = UCS_OK, tmp_status; + ucs_async_handler_t *handler; + + for (; count > 0; --count, ++events) { + handler = ucs_async_handler_get(*events); + if (handler == NULL) { + ucs_trace_async("handler for %d not found - ignoring", *events); + continue; + } + + tmp_status = ucs_async_handler_dispatch(handler); + if (tmp_status != UCS_OK) { + status = tmp_status; + } + + ucs_async_handler_put(handler); + } + return status; +} + +ucs_status_t ucs_async_dispatch_timerq(ucs_timer_queue_t *timerq, + ucs_time_t current_time) +{ + size_t max_timers, num_timers = 0; + int *expired_timers; + ucs_timer_t *timer; + + max_timers = ucs_max(1, ucs_timerq_size(timerq)); + expired_timers = ucs_alloca(max_timers * sizeof(*expired_timers)); + + ucs_timerq_for_each_expired(timer, timerq, current_time, { + expired_timers[num_timers++] = timer->id; + if (num_timers >= max_timers) { + break; /* Keep timers which we don't have room for in the queue */ + } + }) + + return ucs_async_dispatch_handlers(expired_timers, num_timers); +} + +ucs_status_t ucs_async_context_init(ucs_async_context_t *async, ucs_async_mode_t mode) +{ + ucs_status_t status; + + ucs_trace_func("async=%p", async); + + status = ucs_mpmc_queue_init(&async->missed, ucs_global_opts.async_max_events); + if (status != UCS_OK) { + goto err; + } + + status = ucs_async_method_call(mode, context_init, async); + if (status != UCS_OK) { + goto err_free_miss_fds; + } + + async->mode = mode; + async->num_handlers = 0; + async->last_wakeup = ucs_get_time(); + return UCS_OK; + +err_free_miss_fds: + ucs_mpmc_queue_cleanup(&async->missed); +err: + return status; +} + +ucs_status_t ucs_async_context_create(ucs_async_mode_t mode, + ucs_async_context_t **async_p) +{ + ucs_async_context_t *async; + ucs_status_t status; + + async = ucs_malloc(sizeof(*async), "async context"); + if (async == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + status = ucs_async_context_init(async, mode); + if (status != UCS_OK) { + goto err_free_mem; + } + + *async_p = async; + return UCS_OK; + +err_free_mem: + ucs_free(async); +err: + return status; +} + +void ucs_async_context_cleanup(ucs_async_context_t *async) +{ + ucs_async_handler_t *handler; + + ucs_trace_func("async=%p", async); + + if (async->num_handlers > 0) { + pthread_rwlock_rdlock(&ucs_async_global_context.handlers_lock); + kh_foreach_value(&ucs_async_global_context.handlers, handler, { + if (async == handler->async) { + ucs_warn("async %p handler "UCS_ASYNC_HANDLER_FMT" not released", + async, UCS_ASYNC_HANDLER_ARG(handler)); + } + }); + ucs_warn("releasing async context with %d handlers", async->num_handlers); + pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); + } + + ucs_async_method_call(async->mode, context_cleanup, async); + ucs_mpmc_queue_cleanup(&async->missed); +} + +void ucs_async_context_destroy(ucs_async_context_t *async) +{ + ucs_async_context_cleanup(async); + ucs_free(async); +} + +static ucs_status_t +ucs_async_alloc_handler(int min_id, int max_id, ucs_async_mode_t mode, + int events, ucs_async_event_cb_t cb, void *arg, + ucs_async_context_t *async, int *id_p) +{ + ucs_async_handler_t *handler; + ucs_status_t status; + + /* If async context is given, it should have same mode */ + if ((async != NULL) && (async->mode != mode)) { + ucs_error("Async mode mismatch for handler %s(), " + "mode: %d async context mode: %d", + ucs_debug_get_symbol_name(cb), mode, async->mode); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + /* Limit amount of handlers per context */ + if (async != NULL) { + if (ucs_atomic_fadd32(&async->num_handlers, 1) >= ucs_global_opts.async_max_events) { + status = UCS_ERR_EXCEEDS_LIMIT; + goto err_dec_num_handlers; + } + } + + handler = ucs_malloc(sizeof *handler, "async handler"); + if (handler == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_dec_num_handlers; + } + + handler->mode = mode; + handler->events = events; + handler->caller = UCS_ASYNC_HANDLER_CALLER_NULL; + handler->cb = cb; + handler->arg = arg; + handler->async = async; + handler->missed = 0; + handler->refcount = 1; + ucs_async_method_call(mode, block); + status = ucs_async_handler_add(min_id, max_id, handler); + ucs_async_method_call(mode, unblock); + if (status != UCS_OK) { + goto err_free; + } + + ucs_assert((handler->id >= min_id) && (handler->id < max_id)); + *id_p = handler->id; + return UCS_OK; + +err_free: + ucs_free(handler); +err_dec_num_handlers: + if (async != NULL) { + ucs_atomic_add32(&async->num_handlers, (uint32_t)-1); + } +err: + return status; +} + +ucs_status_t ucs_async_set_event_handler(ucs_async_mode_t mode, int event_fd, + int events, ucs_async_event_cb_t cb, + void *arg, ucs_async_context_t *async) +{ + ucs_status_t status; + int event_id; + + if (event_fd >= UCS_ASYNC_TIMER_ID_MIN) { + /* File descriptor too large */ + status = UCS_ERR_EXCEEDS_LIMIT; + goto err; + } + + status = ucs_async_alloc_handler(event_fd, event_fd + 1, mode, events, cb, + arg, async, &event_id); + if (status != UCS_OK) { + goto err; + } + ucs_assert(event_id == event_fd); + + status = ucs_async_method_call(mode, add_event_fd, async, event_fd, events); + if (status != UCS_OK) { + goto err_remove_handler; + } + + ucs_debug("listening to async event fd %d events 0x%x mode %s", event_fd, + events, ucs_async_mode_names[mode]); + return UCS_OK; + +err_remove_handler: + ucs_async_remove_handler(event_fd, 1); +err: + return status; +} + +ucs_status_t ucs_async_add_timer(ucs_async_mode_t mode, ucs_time_t interval, + ucs_async_event_cb_t cb, void *arg, + ucs_async_context_t *async, int *timer_id_p) +{ + ucs_status_t status; + int timer_id; + + status = ucs_async_alloc_handler(UCS_ASYNC_TIMER_ID_MIN, UCS_ASYNC_TIMER_ID_MAX, + mode, 1, cb, arg, async, &timer_id); + if (status != UCS_OK) { + goto err; + } + + status = ucs_async_method_call(mode, add_timer, async, timer_id, interval); + if (status != UCS_OK) { + goto err_remove_handler; + } + + *timer_id_p = timer_id; + return UCS_OK; + +err_remove_handler: + ucs_async_remove_handler(timer_id, 1); +err: + return status; +} + +ucs_status_t ucs_async_remove_handler(int id, int sync) +{ + ucs_async_handler_t *handler; + ucs_status_t status; + + /* We can't find the async handle mode without taking a read lock, which in + * turn may cause a deadlock if async handle is running. So we have to block + * all modes. + */ + ucs_async_method_call_all(block); + handler = ucs_async_handler_extract(id); + ucs_async_method_call_all(unblock); + if (handler == NULL) { + return UCS_ERR_NO_ELEM; + } + + ucs_debug("removing async handler " UCS_ASYNC_HANDLER_FMT, + UCS_ASYNC_HANDLER_ARG(handler)); + if (handler->id >= UCS_ASYNC_TIMER_ID_MIN) { + status = ucs_async_method_call(handler->mode, remove_timer, + handler->async, handler->id); + } else { + status = ucs_async_method_call(handler->mode, remove_event_fd, + handler->async, handler->id); + } + if (status != UCS_OK) { + ucs_warn("failed to remove async handler " UCS_ASYNC_HANDLER_FMT " : %s", + UCS_ASYNC_HANDLER_ARG(handler), ucs_status_string(status)); + } + + if (handler->async != NULL) { + ucs_atomic_add32(&handler->async->num_handlers, (uint32_t)-1); + } + + if (sync) { + int called = (pthread_self() == handler->caller); + ucs_trace("waiting for " UCS_ASYNC_HANDLER_FMT " completion (called=%d)", + UCS_ASYNC_HANDLER_ARG(handler), called); + while ((handler->refcount - called) > 1) { + /* TODO use pthread_cond / futex to reduce CPU usage while waiting + * for the async handler to complete */ + sched_yield(); + } + } + + ucs_async_handler_put(handler); + return UCS_OK; +} + +ucs_status_t ucs_async_modify_handler(int fd, int events) +{ + ucs_async_handler_t *handler; + ucs_status_t status; + + if (fd >= UCS_ASYNC_TIMER_ID_MIN) { + return UCS_ERR_INVALID_PARAM; + } + + ucs_async_method_call_all(block); + handler = ucs_async_handler_get(fd); + ucs_async_method_call_all(unblock); + + if (handler == NULL) { + return UCS_ERR_NO_ELEM; + } + + handler->events = events; + status = ucs_async_method_call(handler->mode, modify_event_fd, + handler->async, fd, handler->events); + ucs_async_handler_put(handler); + + return status; +} + +void __ucs_async_poll_missed(ucs_async_context_t *async) +{ + ucs_async_handler_t *handler; + ucs_status_t status; + uint32_t value; + + ucs_trace_async("miss handler"); + + while (!ucs_mpmc_queue_is_empty(&async->missed)) { + + status = ucs_mpmc_queue_pull(&async->missed, &value); + if (status == UCS_ERR_NO_PROGRESS) { + /* TODO we should retry here if the code is change to check miss + * only during ASYNC_UNBLOCK */ + break; + } + + ucs_async_method_call_all(block); + UCS_ASYNC_BLOCK(async); + handler = ucs_async_handler_get(value); + if (handler != NULL) { + ucs_assert(handler->async == async); + handler->missed = 0; + ucs_async_handler_invoke(handler); + ucs_async_handler_put(handler); + } + UCS_ASYNC_UNBLOCK(async); + ucs_async_method_call_all(unblock); + } +} + +void ucs_async_poll(ucs_async_context_t *async) +{ + ucs_async_handler_t **handlers, *handler; + size_t i, n; + + ucs_trace_poll("async=%p", async); + + pthread_rwlock_rdlock(&ucs_async_global_context.handlers_lock); + handlers = ucs_alloca(kh_size(&ucs_async_global_context.handlers) * sizeof(*handlers)); + n = 0; + kh_foreach_value(&ucs_async_global_context.handlers, handler, { + if (((async == NULL) || (async == handler->async)) && /* Async context match */ + ((handler->async == NULL) || (handler->async->poll_block == 0)) && /* Not blocked */ + handler->events) /* Non-empty event set */ + { + ucs_async_handler_hold(handler); + handlers[n++] = handler; + } + }); + pthread_rwlock_unlock(&ucs_async_global_context.handlers_lock); + + for (i = 0; i < n; ++i) { + ucs_async_handler_dispatch(handlers[i]); + ucs_async_handler_put(handlers[i]); + } +} + +void ucs_async_global_init() +{ + int ret; + + ret = pthread_rwlock_init(&ucs_async_global_context.handlers_lock, NULL); + if (ret) { + ucs_fatal("pthread_rwlock_init() failed: %m"); + } + + kh_init_inplace(ucs_async_handler, &ucs_async_global_context.handlers); + ucs_async_method_call_all(init); +} + +void ucs_async_global_cleanup() +{ + int num_elems = kh_size(&ucs_async_global_context.handlers); + if (num_elems != 0) { + ucs_debug("async handler table is not empty during exit (contains %d elems)", + num_elems); + } + ucs_async_method_call_all(cleanup); + kh_destroy_inplace(ucs_async_handler, &ucs_async_global_context.handlers); + pthread_rwlock_destroy(&ucs_async_global_context.handlers_lock); +} diff --git a/src/ucs/async/async.h b/src/ucs/async/async.h new file mode 100644 index 0000000..70be5de --- /dev/null +++ b/src/ucs/async/async.h @@ -0,0 +1,137 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ASYNC_H_ +#define UCS_ASYNC_H_ + +#include "thread.h" +#include "signal.h" +#include "async_fwd.h" + +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file async.h */ + +/** + * Async event context. Manages timer and fd notifications. + */ +struct ucs_async_context { + union { + ucs_async_thread_context_t thread; + ucs_async_signal_context_t signal; + int poll_block; + }; + + ucs_async_mode_t mode; /* Event delivery mode */ + volatile uint32_t num_handlers; /* Number of event and timer handlers */ + ucs_mpmc_queue_t missed; /* Miss queue */ + ucs_time_t last_wakeup; /* time of the last wakeup */ +}; + + +/** + * @ingroup UCS_RESOURCE + * + * GLobal initialization and cleanup of async event handling. + */ +void ucs_async_global_init(); +void ucs_async_global_cleanup(); + + +/** + * Initialize an asynchronous execution context. The context is not allocated. + * To allocate the context, please use public version of the + * function @ref ucs_async_context_create + * This can be used to ensure safe event delivery. + * + * @param async Event context to initialize. + * @param mode Indicates whether to use signals or polling threads + * for waiting. + * + * @return Error code as defined by @ref ucs_status_t. + */ +ucs_status_t ucs_async_context_init(ucs_async_context_t *async, + ucs_async_mode_t mode); + + +/** + * Clean up the async context, and release system resources if possible. + * + * @param async Asynchronous context to clean up. + */ +void ucs_async_context_cleanup(ucs_async_context_t *async); + + +/** + * Check if an async callback was missed because the main thread has blocked + * the async context. This works as edge-triggered. + * Should be called with the lock held. + */ +static inline int ucs_async_check_miss(ucs_async_context_t *async) +{ + if (ucs_unlikely(!ucs_mpmc_queue_is_empty(&async->missed))) { + __ucs_async_poll_missed(async); + return 1; + } else if (ucs_unlikely(async->mode == UCS_ASYNC_MODE_POLL)) { + ucs_async_poll(async); + return 1; + } + return 0; +} + + +/** + * Block the async handler (if its currently running, wait until it exits and + * block it then). Used to serialize accesses with the async handler. + * + * @param event Event context to block events for. + * @note This function might wait until a currently running callback returns. + */ +#define UCS_ASYNC_BLOCK(_async) \ + do { \ + if ((_async)->mode == UCS_ASYNC_MODE_THREAD_SPINLOCK) { \ + ucs_spin_lock(&(_async)->thread.spinlock); \ + } else if ((_async)->mode == UCS_ASYNC_MODE_THREAD_MUTEX) { \ + (void)pthread_mutex_lock(&(_async)->thread.mutex); \ + } else if ((_async)->mode == UCS_ASYNC_MODE_SIGNAL) { \ + UCS_ASYNC_SIGNAL_BLOCK(_async); \ + } else { \ + ++(_async)->poll_block; \ + } \ + } while(0) + + +/** + * Unblock asynchronous event delivery, and invoke pending callbacks. + * + * @param event Event context to unblock events for. + */ +#define UCS_ASYNC_UNBLOCK(_async) \ + do { \ + if ((_async)->mode == UCS_ASYNC_MODE_THREAD_SPINLOCK) { \ + ucs_spin_unlock(&(_async)->thread.spinlock); \ + } else if ((_async)->mode == UCS_ASYNC_MODE_THREAD_MUTEX) { \ + (void)pthread_mutex_unlock(&(_async)->thread.mutex); \ + } else if ((_async)->mode == UCS_ASYNC_MODE_SIGNAL) { \ + UCS_ASYNC_SIGNAL_UNBLOCK(_async); \ + } else { \ + --(_async)->poll_block; \ + } \ + } while (0) + + +#define UCS_ASYNC_THREAD_LOCK_TYPE (RUNNING_ON_VALGRIND ? \ + UCS_ASYNC_MODE_THREAD_MUTEX : UCS_ASYNC_MODE_THREAD_SPINLOCK) + + +END_C_DECLS + +#endif diff --git a/src/ucs/async/async_fwd.h b/src/ucs/async/async_fwd.h new file mode 100644 index 0000000..ed77dda --- /dev/null +++ b/src/ucs/async/async_fwd.h @@ -0,0 +1,146 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ASYNC_FWD_H +#define UCS_ASYNC_FWD_H + +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file async_fwd.h */ + +typedef struct ucs_async_context ucs_async_context_t; + + +/** + * @ingroup UCS_RESOURCE + * + * Async event callback. + * + * @param id Event id (timer or file descriptor). + * @param arg User-defined argument. + */ +typedef void (*ucs_async_event_cb_t)(int id, void *arg); + + +/** + * @ingroup UCS_RESOURCE + * + * Register a file descriptor for monitoring (call handler upon events). + * Every fd can have only one handler. + * + * @param mode Thread or signal. + * @param event_fd File descriptor to set handler for. + * @param events Events to wait on (UCS_EVENT_SET_EVxxx bits). + * @param cb Callback function to execute. + * @param arg Argument to callback. + * @param async Async context to which events are delivered. + * If NULL, safety is up to the user. + * + * @return Error code as defined by @ref ucs_status_t. + */ +ucs_status_t ucs_async_set_event_handler(ucs_async_mode_t mode, int event_fd, + int events, ucs_async_event_cb_t cb, + void *arg, ucs_async_context_t *async); + + +/** + * @ingroup UCS_RESOURCE + * + * Add timer handler. + * + * @param mode Thread or signal. + * @param interval Timer interval. + * @param cb Callback function to execute. + * @param arg Argument to callback. + * @param async Async context to which events are delivered. + * If NULL, safety is up to the user. + * @param timer_id_p Filled with timer id. + * + * @return Error code as defined by @ref ucs_status_t. + */ +ucs_status_t ucs_async_add_timer(ucs_async_mode_t mode, ucs_time_t interval, + ucs_async_event_cb_t cb, void *arg, + ucs_async_context_t *async, int *timer_id_p); + + +/** + * @ingroup UCS_RESOURCE + * + * Remove an event handler (Timer or event file). + * + * @param id Timer/FD to remove. + * @param sync If nonzero, wait until the handler for this event is not + * running anymore. If called from the context of the callback, + * the handler will be removed immediately after the current + * callback returns. + * + * @return Error code as defined by @ref ucs_status_t. + */ +ucs_status_t ucs_async_remove_handler(int id, int sync); + + +/** + * @ingroup UCS_RESOURCE + * + * Modify events mask for an existing event handler (event file). + * + * @param fd File descriptor modify events for. + * @param events New set of events to wait on (UCS_EVENT_SET_EVxxx bits). + * + * @return Error code as defined by @ref ucs_status_t. + */ +ucs_status_t ucs_async_modify_handler(int fd, int events); + + +/** + * @ingroup UCS_RESOURCE + * @brief Create an asynchronous execution context + * + * Allocate and initialize an asynchronous execution context. + * This can be used to ensure safe event delivery. + * + * @param mode Indicates whether to use signals or polling threads + * for waiting. + * @param async_p Event context pointer to initialize. + * + * @return Error code as defined by @ref ucs_status_t. + */ +ucs_status_t ucs_async_context_create(ucs_async_mode_t mode, + ucs_async_context_t **async_p); + + +/** + * @ingroup UCS_RESOURCE + * @brief Destroy the asynchronous execution context + * + * Clean up the async context, and release system resources if possible. + * The context memory released. + * + * @param async Asynchronous context to clean up. + */ +void ucs_async_context_destroy(ucs_async_context_t *async); + + +/** + * @ingroup UCS_RESOURCE + * + * Poll on async context. + * + * @param async Async context to poll on. NULL polls on all. + */ +void ucs_async_poll(ucs_async_context_t *async); + + +void __ucs_async_poll_missed(ucs_async_context_t *async); + +END_C_DECLS + +#endif diff --git a/src/ucs/async/async_int.h b/src/ucs/async/async_int.h new file mode 100644 index 0000000..b3415c2 --- /dev/null +++ b/src/ucs/async/async_int.h @@ -0,0 +1,81 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ASYNC_INT_H +#define UCS_ASYNC_INT_H + +#include "async.h" + +#include +#include + + +/* Async event handler */ +typedef struct ucs_async_handler ucs_async_handler_t; +struct ucs_async_handler { + int id; /* Event/Timer ID */ + ucs_async_mode_t mode; /* Event delivery mode */ + int events; /* Bitmap of events */ + pthread_t caller; /* Thread which invokes the callback */ + ucs_async_event_cb_t cb; /* Callback function */ + void *arg; /* Callback argument */ + ucs_async_context_t *async; /* Async context for the handler. Can be NULL */ + volatile uint32_t missed; /* Protect against adding to miss queue multiple times */ + volatile uint32_t refcount; +}; + + +/** + * Dispatch event coming from async context. + * + * @param id Array of event IDs to dispatch. + * @param count Number of events + */ +ucs_status_t ucs_async_dispatch_handlers(int *events, size_t count); + + +/** + * Dispatch timers from a timer queue. + * + * @param timerq Timer queue whose timers to dispatch. + * @param current_time Current time for checking timer expiration. + */ +ucs_status_t ucs_async_dispatch_timerq(ucs_timer_queue_t *timerq, + ucs_time_t current_time); + + +/** + * Operation for specific async event delivery method. + */ +typedef struct ucs_async_ops { + void (*init)(); + void (*cleanup)(); + + void (*block)(); + void (*unblock)(); + + ucs_status_t (*context_init)(ucs_async_context_t *async); + void (*context_cleanup)(ucs_async_context_t *async); + int (*context_try_block)(ucs_async_context_t *async); + void (*context_unblock)(ucs_async_context_t *async); + + ucs_status_t (*add_event_fd)(ucs_async_context_t *async, int event_fd, + int events); + ucs_status_t (*remove_event_fd)(ucs_async_context_t *async, int event_fd); + ucs_status_t (*modify_event_fd)(ucs_async_context_t *async, int event_fd, + int events); + + ucs_status_t (*add_timer)(ucs_async_context_t *async, int timer_id, + ucs_time_t interval); + ucs_status_t (*remove_timer)(ucs_async_context_t *async, int timer_id); +} ucs_async_ops_t; + + +extern ucs_async_ops_t ucs_async_thread_spinlock_ops; +extern ucs_async_ops_t ucs_async_thread_mutex_ops; +extern ucs_async_ops_t ucs_async_signal_ops; + +#endif diff --git a/src/ucs/async/pipe.c b/src/ucs/async/pipe.c new file mode 100644 index 0000000..b6679d4 --- /dev/null +++ b/src/ucs/async/pipe.c @@ -0,0 +1,63 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "pipe.h" + +#include +#include + + +ucs_status_t ucs_async_pipe_create(ucs_async_pipe_t *p) +{ + int pipefds[2]; + int ret; + + ret = pipe(pipefds); + if (ret < 0) { + ucs_error("pipe() returned %d: %m", ret); + goto err; + } + + /* Set pipe to non blocking */ + if (ucs_sys_fcntl_modfl(pipefds[0], O_NONBLOCK, 0) != UCS_OK || + ucs_sys_fcntl_modfl(pipefds[1], O_NONBLOCK, 0) != UCS_OK) + { + goto err_close_pipe; + } + + p->read_fd = pipefds[0]; + p->write_fd = pipefds[1]; + return UCS_OK; + +err_close_pipe: + close(pipefds[0]); + close(pipefds[1]); +err: + return UCS_ERR_IO_ERROR; +} + +void ucs_async_pipe_destroy(ucs_async_pipe_t *p) +{ + close(p->read_fd); + close(p->write_fd); +} + +void ucs_async_pipe_push(ucs_async_pipe_t *p) +{ + int dummy = 0; + int ret; + + ret = write(p->write_fd, &dummy, sizeof(dummy)); + if (ret < 0 && errno != EAGAIN) { + ucs_error("writing to wakeup pipe failed: %m"); + } +} + +void ucs_async_pipe_drain(ucs_async_pipe_t *p) +{ + int dummy; + while (read(p->read_fd, &dummy, sizeof(dummy)) > 0); +} diff --git a/src/ucs/async/pipe.h b/src/ucs/async/pipe.h new file mode 100644 index 0000000..d81c009 --- /dev/null +++ b/src/ucs/async/pipe.h @@ -0,0 +1,45 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ASYNC_PIPE_H +#define UCS_ASYNC_PIPE_H + +#include + + +/** + * A pipe for event signaling. + */ +typedef struct ucs_async_pipe { + int read_fd; + int write_fd; +} ucs_async_pipe_t ; + + +/** + * Create/destroy a pipe for event signaling. + */ +ucs_status_t ucs_async_pipe_create(ucs_async_pipe_t *p); +void ucs_async_pipe_destroy(ucs_async_pipe_t *p); + +/** + * Push an event to the signaling pipe. + */ +void ucs_async_pipe_push(ucs_async_pipe_t *p); + +/** + * Remove all events from the pipe. + */ +void ucs_async_pipe_drain(ucs_async_pipe_t *p); + +/** + * @return File descriptor which gets the pipe events. + */ +static inline int ucs_async_pipe_rfd(ucs_async_pipe_t *p) { + return p->read_fd; +} + +#endif diff --git a/src/ucs/async/signal.c b/src/ucs/async/signal.c new file mode 100644 index 0000000..efd3189 --- /dev/null +++ b/src/ucs/async/signal.c @@ -0,0 +1,608 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "signal.h" +#include "async_int.h" + +#include +#include +#include +#include +#include +#include +#include + +#define UCS_SIGNAL_MAX_TIMERQS 64 + +/* + * Per-thread system timer and software timer queue. We can dispatch timers only + * on the same thread which added them. + */ +typedef struct ucs_async_signal_timer { + pid_t tid; /* Thread ID */ + timer_t sys_timer_id; /* System timer ID */ + ucs_timer_queue_t timerq; /* Queue of timers for the thread */ +} ucs_async_signal_timer_t; + + +static struct { + struct sigaction prev_sighandler; /* Previous signal handler */ + int event_count; /* Number of events in use */ + pthread_mutex_t event_lock; /* Lock for adding/removing events */ + pthread_mutex_t timers_lock; /* Lock for timers array */ + ucs_async_signal_timer_t timers[UCS_SIGNAL_MAX_TIMERQS];/* Array of all threads */ +} ucs_async_signal_global_context = { + .event_count = 0, + .event_lock = PTHREAD_MUTEX_INITIALIZER, + .timers_lock = PTHREAD_MUTEX_INITIALIZER, + .timers = {{ .tid = 0 }} +}; + + +/** + * In signal mode, we allow user to manipulate events only from the same thread. + * Otherwise, we'd get into big synchronization issues. + */ +#define UCS_ASYNC_SIGNAL_CHECK_THREAD(_async) \ + if (ucs_get_tid() != ucs_async_signal_context_tid(_async)) { \ + ucs_error("cannot manipulate signal async from different thread"); \ + return UCS_ERR_UNREACHABLE; \ + } + + +/** + * @return To which thread the async context should deliver events to. + */ +static pid_t ucs_async_signal_context_tid(ucs_async_context_t *async) +{ + static pid_t pid = -1; + + if (pid == -1) { + pid = getpid(); + } + return (async == NULL) ? pid : async->signal.tid;; +} + +static ucs_status_t +ucs_async_signal_set_fd_owner(pid_t dest_tid, int fd) +{ +#if HAVE_DECL_F_SETOWN_EX + struct f_owner_ex owner; + + owner.type = F_OWNER_TID; + owner.pid = dest_tid; + + ucs_trace_async("fcntl(F_SETOWN_EX, fd=%d, tid=%d)", fd, dest_tid); + if (0 > fcntl(fd, F_SETOWN_EX, &owner)) { + ucs_error("fcntl F_SETOWN_EX failed: %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +#else + if (dest_tid != getpid()) { + ucs_error("Cannot use signaled events to threads without F_SETOWN_EX support"); + return UCS_ERR_UNSUPPORTED; + } + + if (0 > fcntl(fd, F_SETOWN, dest_tid)) { + ucs_error("fcntl F_SETOWN failed: %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +#endif +} + +static ucs_status_t +ucs_async_signal_sys_timer_create(int uid, pid_t tid, timer_t *sys_timer_id) +{ + struct sigevent ev; + timer_t timer; + int ret; + + ucs_trace_func("tid=%d", tid); + + /* Create timer signal */ + memset(&ev, 0, sizeof(ev)); + ev.sigev_notify = SIGEV_THREAD_ID; + ev.sigev_signo = ucs_global_opts.async_signo; + ev.sigev_value.sival_int = uid; /* user parameter to timer */ +#if defined(HAVE_SIGEVENT_SIGEV_UN_TID) + ev._sigev_un._tid = tid; /* target thread */ +#elif defined(HAVE_SIGEVENT_SIGEV_NOTIFY_THREAD_ID) + ev.sigev_notify_thread_id = tid; /* target thread */ +#else +#error "Port me" +#endif + ret = timer_create(CLOCK_REALTIME, &ev, &timer); + if (ret < 0) { + ucs_error("failed to create an interval timer: %m"); + return UCS_ERR_INVALID_PARAM; + } + + *sys_timer_id = timer; + return UCS_OK; +} + +static ucs_status_t +ucs_async_signal_sys_timer_set_interval(timer_t sys_timer_id, ucs_time_t interval) +{ + struct itimerspec its; + int ret; + + ucs_trace_func("sys_timer_id=%p interval=%.2f usec", sys_timer_id, + ucs_time_to_usec(interval)); + + /* Modify the timer to have the desired accuracy */ + ucs_sec_to_timespec(ucs_time_to_sec(interval), &its.it_interval); + its.it_value = its.it_interval; + ret = timer_settime(sys_timer_id, 0, &its, NULL); + if (ret < 0) { + ucs_error("failed to set the interval for the interval timer: %m"); + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK; +} + +static void ucs_async_signal_sys_timer_delete(timer_t sys_timer_id) +{ + int ret; + + ucs_trace_func("sys_timer_id=%p", sys_timer_id); + + ret = timer_delete(sys_timer_id); + if (ret < 0) { + ucs_warn("failed to remove the timer: %m"); + } + + ucs_trace_async("removed system timer %p", sys_timer_id); +} + +static ucs_status_t ucs_async_signal_dispatch_timer(int uid) +{ + ucs_async_signal_timer_t *timer = &ucs_async_signal_global_context.timers[uid]; + + ucs_assertv_always((uid >= 0) && (uid < UCS_SIGNAL_MAX_TIMERQS), "uid=%d", uid); + + /* No need to take lock - remove operation blocks signals on the same thread */ + if (timer->tid != ucs_get_tid()) { + return UCS_OK; + } + + return ucs_async_dispatch_timerq(&timer->timerq, ucs_get_time()); +} + +static void ucs_async_signal_handler(int signo, siginfo_t *siginfo, void *arg) +{ + ucs_assert(signo == ucs_global_opts.async_signo); + + /* Check event code */ + switch (siginfo->si_code) { + case SI_TIMER: + ucs_trace_async("timer signal uid=%d", siginfo->si_value.sival_int); + ucs_async_signal_dispatch_timer(siginfo->si_value.sival_int); + return; + case POLL_IN: + case POLL_OUT: + case POLL_HUP: + case POLL_ERR: + case POLL_MSG: + case POLL_PRI: + ucs_trace_async("async signal handler called for fd %d", siginfo->si_fd); + ucs_async_dispatch_handlers(&siginfo->si_fd, 1); + return; + default: + ucs_warn("signal handler called with unexpected event code %d, ignoring", + siginfo->si_code); + return; + } +} + +static void ucs_async_signal_allow(int allow) +{ + sigset_t sigset; + + ucs_trace_func("enable=%d tid=%d", allow, ucs_get_tid()); + + sigemptyset(&sigset); + sigaddset(&sigset, ucs_global_opts.async_signo); + pthread_sigmask(allow ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL); +} + +static void ucs_async_signal_block_all() +{ + pthread_mutex_lock(&ucs_async_signal_global_context.event_lock); + if (ucs_async_signal_global_context.event_count > 0) { + ucs_async_signal_allow(0); + } + pthread_mutex_unlock(&ucs_async_signal_global_context.event_lock); +} + +static void ucs_async_signal_unblock_all() +{ + pthread_mutex_lock(&ucs_async_signal_global_context.event_lock); + if (ucs_async_signal_global_context.event_count > 0) { + ucs_async_signal_allow(1); + } + pthread_mutex_unlock(&ucs_async_signal_global_context.event_lock); +} + +static ucs_status_t ucs_async_signal_install_handler() +{ + struct sigaction new_action; + int ret; + + ucs_trace_func("event_count=%d", ucs_async_signal_global_context.event_count); + + pthread_mutex_lock(&ucs_async_signal_global_context.event_lock); + if (ucs_async_signal_global_context.event_count == 0) { + /* Set our signal handler */ + new_action.sa_sigaction = ucs_async_signal_handler; + sigemptyset(&new_action.sa_mask); + new_action.sa_flags = SA_RESTART|SA_SIGINFO; +#if HAVE_SIGACTION_SA_RESTORER + new_action.sa_restorer = NULL; +#endif + ret = sigaction(ucs_global_opts.async_signo, &new_action, + &ucs_async_signal_global_context.prev_sighandler); + if (ret < 0) { + ucs_error("failed to set a handler for signal %d: %m", + ucs_global_opts.async_signo); + pthread_mutex_unlock(&ucs_async_signal_global_context.event_lock); + return UCS_ERR_INVALID_PARAM; + } + + ucs_trace_async("installed signal handler for %s", + ucs_signal_names[ucs_global_opts.async_signo]); + } + ++ucs_async_signal_global_context.event_count; + pthread_mutex_unlock(&ucs_async_signal_global_context.event_lock); + + return UCS_OK; +} + +static void fatal_sighandler(int signo, siginfo_t *siginfo, void *arg) +{ + ucs_fatal("got timer signal"); +} + +static void ucs_async_signal_uninstall_handler() +{ + struct sigaction new_action; + int ret; + + ucs_trace_func("event_count=%d", ucs_async_signal_global_context.event_count); + + pthread_mutex_lock(&ucs_async_signal_global_context.event_lock); + if (--ucs_async_signal_global_context.event_count == 0) { + new_action = ucs_async_signal_global_context.prev_sighandler; + new_action.sa_sigaction = fatal_sighandler; + ret = sigaction(ucs_global_opts.async_signo, &new_action, NULL); + if (ret < 0) { + ucs_warn("failed to restore the async signal handler: %m"); + } + + ucs_trace_async("uninstalled signal handler for %s", + ucs_signal_names[ucs_global_opts.async_signo]); + } + pthread_mutex_unlock(&ucs_async_signal_global_context.event_lock); +} + +static ucs_status_t ucs_async_signal_init(ucs_async_context_t *async) +{ + async->signal.block_count = 0; + async->signal.tid = ucs_get_tid(); + async->signal.pthread = pthread_self(); + async->signal.timer = NULL; + return UCS_OK; +} + +static void ucs_async_signal_cleanup(ucs_async_context_t *async) +{ + if (async->signal.block_count > 0) { + ucs_warn("destroying async signal context with block_count %d", + async->signal.block_count); + } +} + +static ucs_status_t ucs_async_signal_modify_event_fd(ucs_async_context_t *async, + int event_fd, int events) +{ + ucs_status_t status; + int add, remove; + + UCS_ASYNC_SIGNAL_CHECK_THREAD(async); + + if (events) { + add = O_ASYNC; /* Enable notifications */ + remove = 0; + } else { + add = 0; /* Disable notifications */ + remove = O_ASYNC; + } + + ucs_trace_async("fcntl(fd=%d, add=0x%x, remove=0x%x)", event_fd, add, remove); + status = ucs_sys_fcntl_modfl(event_fd, add, remove); + if (status != UCS_OK) { + ucs_error("fcntl F_SETFL failed: %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +static ucs_status_t ucs_async_signal_add_event_fd(ucs_async_context_t *async, + int event_fd, int events) +{ + ucs_status_t status; + pid_t tid; + + UCS_ASYNC_SIGNAL_CHECK_THREAD(async); + + status = ucs_async_signal_install_handler(); + if (status != UCS_OK) { + goto err; + } + + /* Send signal when fd is ready */ + ucs_trace_async("fcntl(F_STSIG, fd=%d, sig=%s)", event_fd, + ucs_signal_names[ucs_global_opts.async_signo]); + if (0 > fcntl(event_fd, F_SETSIG, ucs_global_opts.async_signo)) { + ucs_error("fcntl F_SETSIG failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_remove_handler; + } + + /* Send the signal to the desired thread */ + tid = ucs_async_signal_context_tid(async); + status = ucs_async_signal_set_fd_owner(tid, event_fd); + if (status != UCS_OK) { + goto err_remove_handler; + } + + /* Set async events on the file descriptor */ + status = ucs_async_signal_modify_event_fd(async, event_fd, events); + if (status != UCS_OK) { + goto err_remove_handler; + } + + return UCS_OK; + +err_remove_handler: + ucs_async_signal_uninstall_handler(); +err: + return status; +} + +static ucs_status_t ucs_async_signal_remove_event_fd(ucs_async_context_t *async, int event_fd) +{ + ucs_status_t status; + + ucs_trace_func("event_fd=%d", event_fd); + + UCS_ASYNC_SIGNAL_CHECK_THREAD(async); + + ucs_async_signal_allow(0); + status = ucs_sys_fcntl_modfl(event_fd, 0, O_ASYNC); + ucs_async_signal_allow(1); + + ucs_async_signal_uninstall_handler(); + return status; +} + +static int ucs_async_signal_try_block(ucs_async_context_t *async) +{ + if (async->signal.block_count > 0) { + return 0; + } + + UCS_ASYNC_SIGNAL_BLOCK(async); + return 1; +} + +static void ucs_async_signal_unblock(ucs_async_context_t *async) +{ + UCS_ASYNC_SIGNAL_UNBLOCK(async); +} + +static void ucs_timer_reset_if_empty(ucs_async_signal_timer_t *timer) +{ + if (ucs_timerq_is_empty(&timer->timerq)) { + ucs_async_signal_sys_timer_delete(timer->sys_timer_id); + ucs_timerq_cleanup(&timer->timerq); + timer->tid = 0; + } +} + +/* Add a timer, possible initializing the timerq */ +static ucs_status_t +ucs_async_signal_timerq_add_timer(ucs_async_signal_timer_t *timer, int tid, + int timer_id, ucs_time_t interval) +{ + ucs_time_t sys_interval; + ucs_status_t status; + int uid; + + if (timer->tid == 0) { + timer->tid = tid; + ucs_timerq_init(&timer->timerq); + + uid = (timer - ucs_async_signal_global_context.timers); + status = ucs_async_signal_sys_timer_create(uid, timer->tid, + &timer->sys_timer_id); + if (status != UCS_OK) { + goto err; + } + + } + + status = ucs_timerq_add(&timer->timerq, timer_id, interval); + if (status != UCS_OK) { + goto err; + } + + sys_interval = ucs_timerq_min_interval(&timer->timerq); + status = ucs_async_signal_sys_timer_set_interval(timer->sys_timer_id, + sys_interval); + if (status != UCS_OK) { + goto err_remove; + } + + return UCS_OK; + +err_remove: + ucs_timerq_remove(&timer->timerq, timer_id); +err: + ucs_timer_reset_if_empty(timer); + return status; +} + +/* Remove a timer, possible resetting the timerq */ +static ucs_status_t +ucs_async_signal_timerq_remove_timer(ucs_async_signal_timer_t *timer, + int timer_id) +{ + ucs_status_t status; + + status = ucs_timerq_remove(&timer->timerq, timer_id); + if (status != UCS_OK) { + return status; + } + + ucs_timer_reset_if_empty(timer); + return UCS_OK; +} + +static ucs_async_signal_timer_t *ucs_async_signal_find_timer(pid_t tid) +{ + ucs_async_signal_timer_t *timer; + + for (timer = ucs_async_signal_global_context.timers; + timer < &ucs_async_signal_global_context.timers[UCS_SIGNAL_MAX_TIMERQS]; + ++timer) + { + if (timer->tid == tid) { + return timer; + } + } + + return NULL; +} + +static ucs_status_t ucs_async_signal_add_timer(ucs_async_context_t *async, + int timer_id, ucs_time_t interval) +{ + ucs_async_signal_timer_t *timer; + ucs_status_t status; + pid_t tid; + + ucs_trace_func("async=%p interval=%.2fus timer_id=%d", + async, ucs_time_to_usec(interval), timer_id); + + UCS_ASYNC_SIGNAL_CHECK_THREAD(async); + + /* Must install signal handler before arming the timer */ + status = ucs_async_signal_install_handler(); + if (status != UCS_OK) { + goto err; + } + + ucs_async_signal_allow(0); + pthread_mutex_lock(&ucs_async_signal_global_context.timers_lock); + + /* Find existing or available timer queue for the current thread */ + tid = ucs_async_signal_context_tid(async); + timer = ucs_async_signal_find_timer(tid); + if (timer == NULL) { + timer = ucs_async_signal_find_timer(0); /* Search for free slot */ + } + + if (timer == NULL) { + status = UCS_ERR_EXCEEDS_LIMIT; + } else { + status = ucs_async_signal_timerq_add_timer(timer, tid, timer_id, interval); + } + + pthread_mutex_unlock(&ucs_async_signal_global_context.timers_lock); + ucs_async_signal_allow(1); + + if (status != UCS_OK) { + goto err_uninstall_handler; + } + + return UCS_OK; + +err_uninstall_handler: + ucs_async_signal_uninstall_handler(); +err: + return status; +} + +static ucs_status_t ucs_async_signal_remove_timer(ucs_async_context_t *async, + int timer_id) +{ + ucs_async_signal_timer_t *timer; + ucs_status_t status; + + ucs_trace_func("async=%p timer_id=%d", async, timer_id); + + UCS_ASYNC_SIGNAL_CHECK_THREAD(async); + + ucs_async_signal_allow(0); + pthread_mutex_lock(&ucs_async_signal_global_context.timers_lock); + + timer = ucs_async_signal_find_timer(ucs_async_signal_context_tid(async)); + if (timer == NULL) { + status = UCS_ERR_NO_ELEM; + } else { + status = ucs_async_signal_timerq_remove_timer(timer, timer_id); + } + + pthread_mutex_unlock(&ucs_async_signal_global_context.timers_lock); + ucs_async_signal_allow(1); + + if (status == UCS_OK) { + ucs_async_signal_uninstall_handler(); + } + return status; +} + +static void ucs_async_signal_global_init() +{ + pthread_mutex_init(&ucs_async_signal_global_context.timers_lock, NULL); +} + +static void ucs_async_signal_global_cleanup() +{ + if (ucs_async_signal_global_context.event_count != 0) { + ucs_warn("signal handler not removed (%d events remaining)", + ucs_async_signal_global_context.event_count); + } + pthread_mutex_destroy(&ucs_async_signal_global_context.timers_lock); +} + +ucs_async_ops_t ucs_async_signal_ops = { + .init = ucs_async_signal_global_init, + .cleanup = ucs_async_signal_global_cleanup, + .block = ucs_async_signal_block_all, + .unblock = ucs_async_signal_unblock_all, + .context_init = ucs_async_signal_init, + .context_cleanup = ucs_async_signal_cleanup, + .context_try_block = ucs_async_signal_try_block, + .context_unblock = ucs_async_signal_unblock, + .add_event_fd = ucs_async_signal_add_event_fd, + .remove_event_fd = ucs_async_signal_remove_event_fd, + .modify_event_fd = ucs_async_signal_modify_event_fd, + .add_timer = ucs_async_signal_add_timer, + .remove_timer = ucs_async_signal_remove_timer, +}; + diff --git a/src/ucs/async/signal.h b/src/ucs/async/signal.h new file mode 100644 index 0000000..72e78f3 --- /dev/null +++ b/src/ucs/async/signal.h @@ -0,0 +1,45 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ASYNC_SIGNAL_H +#define UCS_ASYNC_SIGNAL_H + + +#include +#include +#include /* for ucs_get_tid() */ +#include + + +typedef struct ucs_async_signal_context { + pid_t tid; /* Thread ID to receive the signal */ + int block_count; /* How many times this context is blocked */ + pthread_t pthread; /* Thread ID for pthreads */ + timer_t timer; +} ucs_async_signal_context_t; + + +#define UCS_ASYNC_SIGNAL_BLOCK(_async) \ + { \ + ucs_assert((_async)->signal.tid == ucs_get_tid()); \ + ++(_async)->signal.block_count; \ + ucs_memory_cpu_fence(); \ + } + + +#define UCS_ASYNC_SIGNAL_UNBLOCK(_async) \ + { \ + ucs_memory_cpu_fence(); \ + --(_async)->signal.block_count; \ + } + + +#define UCS_ASYNC_SIGNAL_IS_RECURSIVELY_BLOCKED(_async) \ + (((_async)->signal.block_count > 0) && \ + ((_async)->signal.tid == ucs_get_tid())) + + +#endif diff --git a/src/ucs/async/thread.c b/src/ucs/async/thread.c new file mode 100644 index 0000000..1639575 --- /dev/null +++ b/src/ucs/async/thread.c @@ -0,0 +1,449 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "thread.h" +#include "async_int.h" +#include "pipe.h" + +#include +#include +#include +#include + + +#define UCS_ASYNC_EPOLL_MAX_EVENTS 16 +#define UCS_ASYNC_EPOLL_MIN_TIMEOUT_MS 2.0 + + +typedef struct ucs_async_thread { + ucs_async_pipe_t wakeup; + ucs_sys_event_set_t *event_set; + ucs_timer_queue_t timerq; + pthread_t thread_id; + int stop; + uint32_t refcnt; +} ucs_async_thread_t; + + +typedef struct ucs_async_thread_global_context { + ucs_async_thread_t *thread; + unsigned use_count; + pthread_mutex_t lock; +} ucs_async_thread_global_context_t; + + +typedef struct ucs_async_thread_callback_arg { + ucs_async_thread_t *thread; + int *is_missed; +} ucs_async_thread_callback_arg_t; + + +static ucs_async_thread_global_context_t ucs_async_thread_global_context = { + .thread = NULL, + .use_count = 0, + .lock = PTHREAD_MUTEX_INITIALIZER +}; + + +static void ucs_async_thread_hold(ucs_async_thread_t *thread) +{ + ucs_atomic_add32(&thread->refcnt, 1); +} + +static void ucs_async_thread_put(ucs_async_thread_t *thread) +{ + if (ucs_atomic_fsub32(&thread->refcnt, 1) == 1) { + ucs_event_set_cleanup(thread->event_set); + ucs_async_pipe_destroy(&thread->wakeup); + ucs_timerq_cleanup(&thread->timerq); + ucs_free(thread); + } +} + +static void ucs_async_thread_ev_handler(void *callback_data, int event, + void *arg) +{ + ucs_async_thread_callback_arg_t *cb_arg = (void*)arg; + int fd = (int)(uintptr_t)callback_data; + ucs_status_t status; + + ucs_trace_async("ucs_async_thread_ev_handler(fd=%d, event=%d)", + fd, event); + + if (fd == ucs_async_pipe_rfd(&cb_arg->thread->wakeup)) { + ucs_trace_async("progress thread woken up"); + ucs_async_pipe_drain(&cb_arg->thread->wakeup); + return; + } + + status = ucs_async_dispatch_handlers(&fd, 1); + if (status == UCS_ERR_NO_PROGRESS) { + *cb_arg->is_missed = 1; + } +} + +static void *ucs_async_thread_func(void *arg) +{ + ucs_async_thread_t *thread = arg; + ucs_time_t last_time, curr_time, timer_interval, time_spent; + int is_missed, timeout_ms; + ucs_status_t status; + unsigned num_events; + ucs_async_thread_callback_arg_t cb_arg; + + is_missed = 0; + curr_time = ucs_get_time(); + last_time = ucs_get_time(); + cb_arg.thread = thread; + cb_arg.is_missed = &is_missed; + + while (!thread->stop) { + num_events = ucs_min(UCS_ASYNC_EPOLL_MAX_EVENTS, + ucs_sys_event_set_max_wait_events); + + /* If we didn't get the lock, give other threads priority */ + if (is_missed) { + sched_yield(); + is_missed = 0; + } + + /* Wait until the remainder of current period */ + timer_interval = ucs_timerq_min_interval(&thread->timerq); + if (timer_interval == UCS_TIME_INFINITY) { + timeout_ms = -1; + } else { + time_spent = curr_time - last_time; + timeout_ms = ucs_time_to_msec(timer_interval - + ucs_min(time_spent, timer_interval)); + } + + status = ucs_event_set_wait(thread->event_set, + &num_events, timeout_ms, + ucs_async_thread_ev_handler, + (void*)&cb_arg); + if (UCS_STATUS_IS_ERR(status)) { + ucs_fatal("ucs_event_set_wait() failed: %d", status); + } + + /* Check timers */ + curr_time = ucs_get_time(); + if (curr_time - last_time > timer_interval) { + status = ucs_async_dispatch_timerq(&thread->timerq, curr_time); + if (status == UCS_ERR_NO_PROGRESS) { + is_missed = 1; + } + + last_time = curr_time; + } + } + + ucs_async_thread_put(thread); + return NULL; +} + +static ucs_status_t ucs_async_thread_start(ucs_async_thread_t **thread_p) +{ + ucs_async_thread_t *thread; + ucs_status_t status; + int wakeup_rfd; + int ret; + + ucs_trace_func(""); + + pthread_mutex_lock(&ucs_async_thread_global_context.lock); + if (ucs_async_thread_global_context.use_count++ > 0) { + /* Thread already started */ + status = UCS_OK; + goto out_unlock; + } + + ucs_assert_always(ucs_async_thread_global_context.thread == NULL); + + thread = ucs_malloc(sizeof(*thread), "async_thread_context"); + if (thread == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + thread->stop = 0; + thread->refcnt = 1; + + status = ucs_timerq_init(&thread->timerq); + if (status != UCS_OK) { + goto err_free; + } + + status = ucs_async_pipe_create(&thread->wakeup); + if (status != UCS_OK) { + goto err_timerq_cleanup; + } + + status = ucs_event_set_create(&thread->event_set); + if (status != UCS_OK) { + goto err_close_pipe; + } + + /* Store file descriptor into void * storage without memory allocation. */ + wakeup_rfd = ucs_async_pipe_rfd(&thread->wakeup); + status = ucs_event_set_add(thread->event_set, wakeup_rfd, + UCS_EVENT_SET_EVREAD, + (void *)(uintptr_t)wakeup_rfd); + if (status != UCS_OK) { + status = UCS_ERR_IO_ERROR; + goto err_free_event_set; + } + + ret = pthread_create(&thread->thread_id, NULL, ucs_async_thread_func, thread); + if (ret != 0) { + ucs_error("pthread_create() returned %d: %m", ret); + status = UCS_ERR_IO_ERROR; + goto err_free_event_set; + } + + ucs_async_thread_global_context.thread = thread; + status = UCS_OK; + goto out_unlock; + +err_free_event_set: + ucs_event_set_cleanup(thread->event_set); +err_close_pipe: + ucs_async_pipe_destroy(&thread->wakeup); +err_timerq_cleanup: + ucs_timerq_cleanup(&thread->timerq); +err_free: + ucs_free(thread); +err: + --ucs_async_thread_global_context.use_count; +out_unlock: + ucs_assert_always(ucs_async_thread_global_context.thread != NULL); + *thread_p = ucs_async_thread_global_context.thread; + pthread_mutex_unlock(&ucs_async_thread_global_context.lock); + return status; +} + +static void ucs_async_thread_stop() +{ + ucs_async_thread_t *thread = NULL; + + ucs_trace_func(""); + + pthread_mutex_lock(&ucs_async_thread_global_context.lock); + if (--ucs_async_thread_global_context.use_count == 0) { + thread = ucs_async_thread_global_context.thread; + ucs_async_thread_hold(thread); + thread->stop = 1; + ucs_async_pipe_push(&thread->wakeup); + ucs_async_thread_global_context.thread = NULL; + } + pthread_mutex_unlock(&ucs_async_thread_global_context.lock); + + if (thread != NULL) { + if (pthread_self() == thread->thread_id) { + pthread_detach(thread->thread_id); + } else { + pthread_join(thread->thread_id, NULL); + } + ucs_async_thread_put(thread); + } +} + +static ucs_status_t ucs_async_thread_spinlock_init(ucs_async_context_t *async) +{ + return ucs_spinlock_init(&async->thread.spinlock); +} + +static void ucs_async_thread_spinlock_cleanup(ucs_async_context_t *async) +{ + ucs_status_t status; + + status = ucs_spinlock_destroy(&async->thread.spinlock); + if (status != UCS_OK) { + ucs_warn("ucs_spinlock_destroy() failed (%d)", status); + } +} + +static int ucs_async_thread_spinlock_try_block(ucs_async_context_t *async) +{ + return ucs_spin_trylock(&async->thread.spinlock); +} + +static void ucs_async_thread_spinlock_unblock(ucs_async_context_t *async) +{ + ucs_spin_unlock(&async->thread.spinlock); +} + +static ucs_status_t ucs_async_thread_mutex_init(ucs_async_context_t *async) +{ + pthread_mutexattr_t attr; + int ret; + + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + ret = pthread_mutex_init(&async->thread.mutex, &attr); + if (ret == 0) { + return UCS_OK; + } + + ucs_error("failed to initialize async lock: %s", strerror(ret)); + return UCS_ERR_INVALID_PARAM; +} + +static void ucs_async_thread_mutex_cleanup(ucs_async_context_t *async) +{ + int ret = pthread_mutex_destroy(&async->thread.mutex); + + if (ret != 0) { + ucs_warn("failed to destroy async lock: %s", strerror(ret)); + } +} + +static ucs_status_t ucs_async_thread_add_event_fd(ucs_async_context_t *async, + int event_fd, int events) +{ + ucs_async_thread_t *thread; + ucs_status_t status; + + status = ucs_async_thread_start(&thread); + if (status != UCS_OK) { + goto err; + } + + /* Store file descriptor into void * storage without memory allocation. */ + status = ucs_event_set_add(thread->event_set, event_fd, + (ucs_event_set_type_t)events, + (void *)(uintptr_t)event_fd); + if (status != UCS_OK) { + status = UCS_ERR_IO_ERROR; + goto err_removed; + } + + ucs_async_pipe_push(&thread->wakeup); + return UCS_OK; + +err_removed: + ucs_async_thread_stop(); +err: + return status; +} + +static ucs_status_t ucs_async_thread_remove_event_fd(ucs_async_context_t *async, + int event_fd) +{ + ucs_async_thread_t *thread = ucs_async_thread_global_context.thread; + ucs_status_t status; + + status = ucs_event_set_del(thread->event_set, event_fd); + if (status != UCS_OK) { + return status; + } + + ucs_async_thread_stop(); + return UCS_OK; +} + +static ucs_status_t ucs_async_thread_modify_event_fd(ucs_async_context_t *async, + int event_fd, int events) +{ + /* Store file descriptor into void * storage without memory allocation. */ + return ucs_event_set_mod(ucs_async_thread_global_context.thread->event_set, + event_fd, (ucs_event_set_type_t)events, + (void *)(uintptr_t)event_fd); +} + +static int ucs_async_thread_mutex_try_block(ucs_async_context_t *async) +{ + return !pthread_mutex_trylock(&async->thread.mutex); +} + +static void ucs_async_thread_mutex_unblock(ucs_async_context_t *async) +{ + (void)pthread_mutex_unlock(&async->thread.mutex); +} + +static ucs_status_t ucs_async_thread_add_timer(ucs_async_context_t *async, + int timer_id, ucs_time_t interval) +{ + ucs_async_thread_t *thread; + ucs_status_t status; + + if (ucs_time_to_msec(interval) == 0) { + ucs_error("timer interval is too small (%.2f usec)", ucs_time_to_usec(interval)); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + status = ucs_async_thread_start(&thread); + if (status != UCS_OK) { + goto err; + } + + status = ucs_timerq_add(&thread->timerq, timer_id, interval); + if (status != UCS_OK) { + goto err_stop; + } + + ucs_async_pipe_push(&thread->wakeup); + return UCS_OK; + +err_stop: + ucs_async_thread_stop(); +err: + return status; +} + +static ucs_status_t ucs_async_thread_remove_timer(ucs_async_context_t *async, + int timer_id) +{ + ucs_async_thread_t *thread = ucs_async_thread_global_context.thread; + ucs_timerq_remove(&thread->timerq, timer_id); + ucs_async_pipe_push(&thread->wakeup); + ucs_async_thread_stop(); + return UCS_OK; +} + +static void ucs_async_signal_global_cleanup() +{ + if (ucs_async_thread_global_context.thread != NULL) { + ucs_debug("async thread still running (use count %u)", + ucs_async_thread_global_context.use_count); + } +} + +ucs_async_ops_t ucs_async_thread_spinlock_ops = { + .init = ucs_empty_function, + .cleanup = ucs_async_signal_global_cleanup, + .block = ucs_empty_function, + .unblock = ucs_empty_function, + .context_init = ucs_async_thread_spinlock_init, + .context_cleanup = ucs_async_thread_spinlock_cleanup, + .context_try_block = ucs_async_thread_spinlock_try_block, + .context_unblock = ucs_async_thread_spinlock_unblock, + .add_event_fd = ucs_async_thread_add_event_fd, + .remove_event_fd = ucs_async_thread_remove_event_fd, + .modify_event_fd = ucs_async_thread_modify_event_fd, + .add_timer = ucs_async_thread_add_timer, + .remove_timer = ucs_async_thread_remove_timer, +}; + +ucs_async_ops_t ucs_async_thread_mutex_ops = { + .init = ucs_empty_function, + .cleanup = ucs_async_signal_global_cleanup, + .block = ucs_empty_function, + .unblock = ucs_empty_function, + .context_init = ucs_async_thread_mutex_init, + .context_cleanup = ucs_async_thread_mutex_cleanup, + .context_try_block = ucs_async_thread_mutex_try_block, + .context_unblock = ucs_async_thread_mutex_unblock, + .add_event_fd = ucs_async_thread_add_event_fd, + .remove_event_fd = ucs_async_thread_remove_event_fd, + .modify_event_fd = ucs_async_thread_modify_event_fd, + .add_timer = ucs_async_thread_add_timer, + .remove_timer = ucs_async_thread_remove_timer, +}; diff --git a/src/ucs/async/thread.h b/src/ucs/async/thread.h new file mode 100644 index 0000000..bba5987 --- /dev/null +++ b/src/ucs/async/thread.h @@ -0,0 +1,21 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ASYNC_THREAD_H +#define UCS_ASYNC_THREAD_H + +#include +#include + + +typedef struct ucs_async_thread_context { + union { + ucs_spinlock_t spinlock; + pthread_mutex_t mutex; + }; +} ucs_async_thread_context_t; + +#endif diff --git a/src/ucs/config/global_opts.c b/src/ucs/config/global_opts.c new file mode 100644 index 0000000..877c573 --- /dev/null +++ b/src/ucs/config/global_opts.c @@ -0,0 +1,271 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "global_opts.h" + +#include +#include +#include +#include +#include +#include +#include + + +ucs_global_opts_t ucs_global_opts = { + .log_level = UCS_LOG_LEVEL_WARN, + .log_print_enable = 0, + .log_file = "", + .log_buffer_size = 1024, + .log_data_size = 0, + .mpool_fifo = 0, + .handle_errors = UCS_BIT(UCS_HANDLE_ERROR_BACKTRACE), + .error_signals = { NULL, 0 }, + .error_mail_to = "", + .error_mail_footer = "", + .gdb_command = "gdb", + .debug_signo = SIGHUP, + .log_level_trigger = UCS_LOG_LEVEL_FATAL, + .warn_unused_env_vars = 1, + .async_max_events = 64, + .async_signo = SIGALRM, + .stats_dest = "", + .tuning_path = "", + .memtrack_dest = "", + .stats_trigger = "exit", + .profile_mode = 0, + .profile_file = "", + .stats_filter = { NULL, 0 }, + .stats_format = UCS_STATS_FULL, + .rcache_check_pfn = 0, + .module_dir = UCX_MODULE_DIR, /* defined in Makefile.am */ + .module_log_level = UCS_LOG_LEVEL_TRACE, + .arch = UCS_ARCH_GLOBAL_OPTS_INITALIZER +}; + +static const char *ucs_handle_error_modes[] = { + [UCS_HANDLE_ERROR_BACKTRACE] = "bt", + [UCS_HANDLE_ERROR_FREEZE] = "freeze", + [UCS_HANDLE_ERROR_DEBUG] = "debug", + [UCS_HANDLE_ERROR_LAST] = NULL +}; + + +static UCS_CONFIG_DEFINE_ARRAY(signo, + sizeof(int), + UCS_CONFIG_TYPE_SIGNO); + +static ucs_config_field_t ucs_global_opts_table[] = { + {"LOG_LEVEL", "warn", + "UCS logging level. Messages with a level higher or equal to the selected " + "will be printed.\n" + "Possible values are: fatal, error, warn, info, debug, trace, data, func, poll.", + ucs_offsetof(ucs_global_opts_t, log_level), UCS_CONFIG_TYPE_ENUM(ucs_log_level_names)}, + + {"LOG_FILE", "", + "If not empty, UCS will print log messages to the specified file instead of stdout.\n" + "The following substitutions are performed on this string:\n" + " %p - Replaced with process ID\n" + " %h - Replaced with host name\n", + ucs_offsetof(ucs_global_opts_t, log_file), + UCS_CONFIG_TYPE_STRING}, + + {"LOG_BUFFER", "1024", + "Buffer size for a single log message.", + ucs_offsetof(ucs_global_opts_t, log_buffer_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"LOG_DATA_SIZE", "0", + "How much packet payload to print, at most, in data mode.", + ucs_offsetof(ucs_global_opts_t, log_data_size), UCS_CONFIG_TYPE_ULONG}, + + {"LOG_PRINT_ENABLE", "n", + "Enable output of ucs_print(). This option is intended for use by the library developers.\n", + ucs_offsetof(ucs_global_opts_t, log_print_enable), UCS_CONFIG_TYPE_BOOL}, + +#if ENABLE_DEBUG_DATA + {"MPOOL_FIFO", "n", + "Enable FIFO behavior for memory pool, instead of LIFO. Useful for\n" + "debugging because object pointers are not recycled.", + ucs_offsetof(ucs_global_opts_t, mpool_fifo), UCS_CONFIG_TYPE_BOOL}, +#endif + + {"HANDLE_ERRORS", +#if ENABLE_DEBUG_DATA + "bt,freeze", +#else + "bt", +#endif + "Error handling mode. A combination of: 'bt' (print backtrace),\n" + "'freeze' (freeze and wait for a debugger), 'debug' (attach debugger)", + ucs_offsetof(ucs_global_opts_t, handle_errors), + UCS_CONFIG_TYPE_BITMAP(ucs_handle_error_modes)}, + + {"ERROR_SIGNALS", "SIGILL,SIGSEGV,SIGBUS,SIGFPE", + "Signals which are considered an error indication and trigger error handling.", + ucs_offsetof(ucs_global_opts_t, error_signals), UCS_CONFIG_TYPE_ARRAY(signo)}, + + {"ERROR_MAIL_TO", "", + "If non-empty, send mail notification for fatal errors.", + ucs_offsetof(ucs_global_opts_t, error_mail_to), UCS_CONFIG_TYPE_STRING}, + + {"ERROR_MAIL_FOOTER", "", + "Footer for error report email", + ucs_offsetof(ucs_global_opts_t, error_mail_footer), UCS_CONFIG_TYPE_STRING}, + + {"GDB_COMMAND", "gdb -quiet", + "If non-empty, attaches a gdb to the process in case of error, using the provided command.", + ucs_offsetof(ucs_global_opts_t, gdb_command), UCS_CONFIG_TYPE_STRING}, + + {"DEBUG_SIGNO", "SIGHUP", + "Signal number which causes UCS to enter debug mode. Set to 0 to disable.", + ucs_offsetof(ucs_global_opts_t, debug_signo), UCS_CONFIG_TYPE_SIGNO}, + + {"LOG_LEVEL_TRIGGER", "fatal", + "Log level to trigger error handling.", + ucs_offsetof(ucs_global_opts_t, log_level_trigger), UCS_CONFIG_TYPE_ENUM(ucs_log_level_names)}, + + {UCS_GLOBAL_OPTS_WARN_UNUSED_CONFIG, "yes", + "Issue warning about UCX_ environment variables which were not used by the\n" + "configuration parser.", + ucs_offsetof(ucs_global_opts_t, warn_unused_env_vars), UCS_CONFIG_TYPE_BOOL}, + + {"ASYNC_MAX_EVENTS", "1024", /* TODO remove this; resize mpmc */ + "Maximal number of events which can be handled from one context", + ucs_offsetof(ucs_global_opts_t, async_max_events), UCS_CONFIG_TYPE_UINT}, + + {"ASYNC_SIGNO", "SIGALRM", + "Signal number used for async signaling.", + ucs_offsetof(ucs_global_opts_t, async_signo), UCS_CONFIG_TYPE_SIGNO}, + +#if ENABLE_STATS + {"STATS_DEST", "", + "Destination to send statistics to. If the value is empty, statistics are\n" + "not reported. Possible values are:\n" + " udp:[:] - send over UDP to the given host:port.\n" + " stdout - print to standard output.\n" + " stderr - print to standard error.\n" + " file:[:bin] - save to a file (%h: host, %p: pid, %c: cpu, %t: time, %u: user, %e: exe)", + ucs_offsetof(ucs_global_opts_t, stats_dest), UCS_CONFIG_TYPE_STRING}, + + {"STATS_TRIGGER", "exit", + "Trigger to dump statistics:\n" + " exit - dump just before program exits.\n" + " signal: - dump when process is signaled.\n" + " timer: - dump in specified intervals (in seconds).", + ucs_offsetof(ucs_global_opts_t, stats_trigger), UCS_CONFIG_TYPE_STRING}, + + {"STATS_FILTER", "*", + "Used for filter counters summary.\n" + "Comma-separated list of glob patterns specifying counters.\n" + "Statistics summary will contain only the matching counters.\n" + "The order is not meaningful.\n" + "Each expression in the list may contain any of the following wildcard:\n" + " * - matches any number of any characters including none.\n" + " ? - matches any single character.\n" + " [abc] - matches one character given in the bracket.\n" + " [a-z] - matches one character from the range given in the bracket.", + ucs_offsetof(ucs_global_opts_t, stats_filter), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"STATS_FORMAT", "full", + "Statistics format parameter:\n" + " full - each counter will be displayed in a separate line \n" + " agg - like full but there will also be an aggregation between similar counters\n" + " summary - all counters will be printed in the same line.", + ucs_offsetof(ucs_global_opts_t, stats_format), UCS_CONFIG_TYPE_ENUM(ucs_stats_formats_names)}, + +#endif + +#if ENABLE_MEMTRACK + {"MEMTRACK_DEST", "", + "Destination to output memory tracking report to. If the value is empty,\n" + "results are not reported. Possible values are:\n" + " file: - save to a file (%h: host, %p: pid, %c: cpu, %t: time, %u: user, %e: exe)\n" + " stdout - print to standard output.\n" + " stderr - print to standard error.\n", + ucs_offsetof(ucs_global_opts_t, memtrack_dest), UCS_CONFIG_TYPE_STRING}, +#endif + + {"PROFILE_MODE", "", + "Profile collection modes. If none is specified, profiling is disabled.\n" + " - log - Record all timestamps.\n" + " - accum - Accumulate measurements per location.\n", + ucs_offsetof(ucs_global_opts_t, profile_mode), + UCS_CONFIG_TYPE_BITMAP(ucs_profile_mode_names)}, + + {"PROFILE_FILE", "ucx_%h_%p.prof", + "File name to dump profiling data to.\n" + "Substitutions: %h: host, %p: pid, %c: cpu, %t: time, %u: user, %e: exe.\n", + ucs_offsetof(ucs_global_opts_t, profile_file), UCS_CONFIG_TYPE_STRING}, + + {"PROFILE_LOG_SIZE", "4m", + "Maximal size of profiling log. New records will replace old records.", + ucs_offsetof(ucs_global_opts_t, profile_log_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"RCACHE_CHECK_PFN", "n", + "Registration cache to check that the physical page frame number of a found\n" + "memory region was not changed since the time the region was registered.\n", + ucs_offsetof(ucs_global_opts_t, rcache_check_pfn), UCS_CONFIG_TYPE_BOOL}, + + {"MODULE_DIR", UCX_MODULE_DIR, + "Directory to search for loadable modules", + ucs_offsetof(ucs_global_opts_t, module_dir), UCS_CONFIG_TYPE_STRING}, + + {"MODULE_LOG_LEVEL", "trace", + "Logging level for module loader\n", + ucs_offsetof(ucs_global_opts_t, module_log_level), UCS_CONFIG_TYPE_ENUM(ucs_log_level_names)}, + + {"", "", NULL, + ucs_offsetof(ucs_global_opts_t, arch), + UCS_CONFIG_TYPE_TABLE(ucs_arch_global_opts_table)}, + + {NULL} +}; +UCS_CONFIG_REGISTER_TABLE(ucs_global_opts_table, "UCS global", NULL, + ucs_global_opts_t) + + +void ucs_global_opts_init() +{ + ucs_status_t status; + + status = ucs_config_parser_fill_opts(&ucs_global_opts, ucs_global_opts_table, + NULL, NULL, 1); + if (status != UCS_OK) { + ucs_fatal("failed to parse global configuration - aborting"); + } +} + +ucs_status_t ucs_global_opts_set_value(const char *name, const char *value) +{ + return ucs_config_parser_set_value(&ucs_global_opts, ucs_global_opts_table, + name, value); +} + +ucs_status_t ucs_global_opts_get_value(const char *name, char *value, size_t max) +{ + return ucs_config_parser_get_value(&ucs_global_opts, ucs_global_opts_table, + name, value, max); +} + +ucs_status_t ucs_global_opts_clone(void *dst) +{ + return ucs_config_parser_clone_opts(&ucs_global_opts, dst, ucs_global_opts_table); +} + +void ucs_global_opts_release() +{ + return ucs_config_parser_release_opts(&ucs_global_opts, ucs_global_opts_table); +} + +void ucs_global_opts_print(FILE *stream, ucs_config_print_flags_t print_flags) +{ + ucs_config_parser_print_opts(stream, "Global configuration", &ucs_global_opts, + ucs_global_opts_table, NULL, print_flags); +} diff --git a/src/ucs/config/global_opts.h b/src/ucs/config/global_opts.h new file mode 100644 index 0000000..4aaa09f --- /dev/null +++ b/src/ucs/config/global_opts.h @@ -0,0 +1,138 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_CONFIG_H_ +#define UCS_CONFIG_H_ + +#include "types.h" + +#include +#include +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file global_opts.h */ + +#define UCS_GLOBAL_OPTS_WARN_UNUSED_CONFIG "WARN_UNUSED_ENV_VARS" + +/** + * UCS global options. + */ +typedef struct { + + /* Log level above which log messages will be printed */ + ucs_log_level_t log_level; + + /* Log file */ + char *log_file; + + /* Size of log buffer for one message */ + size_t log_buffer_size; + + /* Maximal amount of packet data to print per packet */ + size_t log_data_size; + + /* Enable ucs_print() output */ + int log_print_enable; + + /* Enable FIFO behavior for memory pool, instead of LIFO. Useful for + * debugging because object pointers are not recycled. */ + int mpool_fifo; + + /* Handle errors mode */ + unsigned handle_errors; + + /* Error signals */ + UCS_CONFIG_ARRAY_FIELD(int, signals) error_signals; + + /* If not empty, send mail notifications to that address in case of error */ + char *error_mail_to; + + /* Footer for error report mail notification */ + char *error_mail_footer; + + /* If not NULL, attach gdb to the process in case of error */ + char *gdb_command; + + /* Signal number which causes to enter debug mode */ + unsigned debug_signo; + + /* Log level to trigger error handling */ + ucs_log_level_t log_level_trigger; + + /* Issue warning about UCX_ env vars which were not used by config parser */ + int warn_unused_env_vars; + + /* Max. events per context, will be removed in the future */ + unsigned async_max_events; + + /* Destination for statistics: udp:host:port / file:path / stdout + */ + char *stats_dest; + + /* Trigger to dump statistics */ + char *stats_trigger; + + /* Named pipe file path for tuning. + */ + char *tuning_path; + + /* Number of performance stall loops to perform */ + size_t perf_stall_loops; + + /* Signal number used by async handler (for signal mode) */ + unsigned async_signo; + + /* Destination for detailed memory tracking results: none / stdout / stderr + */ + char *memtrack_dest; + + /* Profiling mode */ + unsigned profile_mode; + + /* Profiling output file name */ + char *profile_file; + + /* Limit for profiling log size */ + size_t profile_log_size; + + /* Counters to be included in statistics summary */ + ucs_config_names_array_t stats_filter; + + /* statistics format options */ + ucs_stats_formats_t stats_format; + + /* registration cache checks if physical page is not moved */ + int rcache_check_pfn; + + /* directory for loadable modules */ + char *module_dir; + + /* log level for module loader code */ + ucs_log_level_t module_log_level; + + /* arch-specific global options */ + ucs_arch_global_opts_t arch; +} ucs_global_opts_t; + + +extern ucs_global_opts_t ucs_global_opts; + +void ucs_global_opts_init(); +ucs_status_t ucs_global_opts_set_value(const char *name, const char *value); +ucs_status_t ucs_global_opts_get_value(const char *name, char *value, + size_t max); +ucs_status_t ucs_global_opts_clone(void *dst); +void ucs_global_opts_release(); +void ucs_global_opts_print(FILE *stream, ucs_config_print_flags_t print_flags); + +END_C_DECLS + +#endif diff --git a/src/ucs/config/parser.c b/src/ucs/config/parser.c new file mode 100644 index 0000000..a72db1a --- /dev/null +++ b/src/ucs/config/parser.c @@ -0,0 +1,1637 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include "parser.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* width of titles in docstring */ +#define UCS_CONFIG_PARSER_DOCSTR_WIDTH 10 + + +/* list of prefixes for a configuration variable, used to dump all possible + * aliases. + */ +typedef struct ucs_config_parser_prefix_list { + const char *prefix; + ucs_list_link_t list; +} ucs_config_parser_prefix_t; + + +typedef UCS_CONFIG_ARRAY_FIELD(void, data) ucs_config_array_field_t; + +KHASH_SET_INIT_STR(ucs_config_env_vars) + + +/* Process environment variables */ +extern char **environ; + + +UCS_LIST_HEAD(ucs_config_global_list); +static khash_t(ucs_config_env_vars) ucs_config_parser_env_vars = {0}; +static pthread_mutex_t ucs_config_parser_env_vars_hash_lock = PTHREAD_MUTEX_INITIALIZER; + + +const char *ucs_async_mode_names[] = { + [UCS_ASYNC_MODE_SIGNAL] = "signal", + [UCS_ASYNC_MODE_THREAD_SPINLOCK] = "thread_spinlock", + [UCS_ASYNC_MODE_THREAD_MUTEX] = "thread_mutex", + [UCS_ASYNC_MODE_POLL] = "poll", + [UCS_ASYNC_MODE_LAST] = NULL +}; + +UCS_CONFIG_DEFINE_ARRAY(string, sizeof(char*), UCS_CONFIG_TYPE_STRING); + +/* Fwd */ +static ucs_status_t +ucs_config_parser_set_value_internal(void *opts, ucs_config_field_t *fields, + const char *name, const char *value, + const char *table_prefix, int recurse); + + +static int __find_string_in_list(const char *str, const char **list) +{ + int i; + + for (i = 0; *list; ++list, ++i) { + if (strcasecmp(*list, str) == 0) { + return i; + } + } + return -1; +} + +int ucs_config_sscanf_string(const char *buf, void *dest, const void *arg) +{ + *((char**)dest) = strdup(buf); + return 1; +} + +int ucs_config_sprintf_string(char *buf, size_t max, + const void *src, const void *arg) +{ + strncpy(buf, *((char**)src), max); + return 1; +} + +ucs_status_t ucs_config_clone_string(const void *src, void *dest, const void *arg) +{ + char *new_str = strdup(*(char**)src); + if (new_str == NULL) { + return UCS_ERR_NO_MEMORY; + } + + *((char**)dest) = new_str; + return UCS_OK; +} + +void ucs_config_release_string(void *ptr, const void *arg) +{ + free(*(char**)ptr); +} + +int ucs_config_sscanf_int(const char *buf, void *dest, const void *arg) +{ + return sscanf(buf, "%i", (unsigned*)dest); +} + +ucs_status_t ucs_config_clone_int(const void *src, void *dest, const void *arg) +{ + *(int*)dest = *(int*)src; + return UCS_OK; +} + +int ucs_config_sprintf_int(char *buf, size_t max, + const void *src, const void *arg) +{ + return snprintf(buf, max, "%i", *(unsigned*)src); +} + +int ucs_config_sscanf_uint(const char *buf, void *dest, const void *arg) +{ + if (!strcasecmp(buf, UCS_NUMERIC_INF_STR)) { + *(unsigned*)dest = UINT_MAX; + return 1; + } else { + return sscanf(buf, "%u", (unsigned*)dest); + } +} + +ucs_status_t ucs_config_clone_uint(const void *src, void *dest, const void *arg) +{ + *(unsigned*)dest = *(unsigned*)src; + return UCS_OK; +} + +int ucs_config_sprintf_uint(char *buf, size_t max, + const void *src, const void *arg) +{ + unsigned value = *(unsigned*)src; + if (value == UINT_MAX) { + snprintf(buf, max, UCS_NUMERIC_INF_STR); + return 1; + } else { + return snprintf(buf, max, "%u", value); + } +} + +int ucs_config_sscanf_ulong(const char *buf, void *dest, const void *arg) +{ + return sscanf(buf, "%lu", (unsigned long*)dest); +} + +int ucs_config_sprintf_ulong(char *buf, size_t max, + const void *src, const void *arg) +{ + return snprintf(buf, max, "%lu", *(unsigned long*)src); +} + +ucs_status_t ucs_config_clone_ulong(const void *src, void *dest, const void *arg) +{ + *(unsigned long*)dest = *(unsigned long*)src; + return UCS_OK; +} + +int ucs_config_sscanf_double(const char *buf, void *dest, const void *arg) +{ + return sscanf(buf, "%lf", (double*)dest); +} + +int ucs_config_sprintf_double(char *buf, size_t max, + const void *src, const void *arg) +{ + return snprintf(buf, max, "%.3f", *(double*)src); +} + +ucs_status_t ucs_config_clone_double(const void *src, void *dest, const void *arg) +{ + *(double*)dest = *(double*)src; + return UCS_OK; +} + +int ucs_config_sscanf_hex(const char *buf, void *dest, const void *arg) +{ + /* Special value: auto */ + if (!strcasecmp(buf, UCS_VALUE_AUTO_STR)) { + *(size_t*)dest = UCS_HEXUNITS_AUTO; + return 1; + } else if (strncasecmp(buf, "0x", 2) == 0) { + return (sscanf(buf + 2, "%x", (unsigned int*)dest)); + } else { + return 0; + } +} + +int ucs_config_sprintf_hex(char *buf, size_t max, + const void *src, const void *arg) +{ + uint16_t val = *(uint16_t*)src; + + if (val == UCS_HEXUNITS_AUTO) { + return snprintf(buf, max, UCS_VALUE_AUTO_STR); + } + + return snprintf(buf, max, "0x%x", *(unsigned int*)src); +} + +int ucs_config_sscanf_bool(const char *buf, void *dest, const void *arg) +{ + if (!strcasecmp(buf, "y") || !strcasecmp(buf, "yes") || !strcmp(buf, "1")) { + *(int*)dest = 1; + return 1; + } else if (!strcasecmp(buf, "n") || !strcasecmp(buf, "no") || !strcmp(buf, "0")) { + *(int*)dest = 0; + return 1; + } else { + return 0; + } +} + +int ucs_config_sprintf_bool(char *buf, size_t max, const void *src, const void *arg) +{ + return snprintf(buf, max, "%c", *(int*)src ? 'y' : 'n'); +} + +int ucs_config_sscanf_ternary(const char *buf, void *dest, const void *arg) +{ + UCS_STATIC_ASSERT(UCS_NO == 0); + UCS_STATIC_ASSERT(UCS_YES == 1); + if (!strcasecmp(buf, "try") || !strcasecmp(buf, "maybe")) { + *(int*)dest = UCS_TRY; + return 1; + } else { + return ucs_config_sscanf_bool(buf, dest, arg); + } +} + +int ucs_config_sprintf_ternary(char *buf, size_t max, + const void *src, const void *arg) +{ + if (*(int*)src == UCS_TRY) { + return snprintf(buf, max, "try"); + } else { + return ucs_config_sprintf_bool(buf, max, src, arg); + } +} + +int ucs_config_sscanf_on_off(const char *buf, void *dest, const void *arg) +{ + if (!strcasecmp(buf, "on") || !strcmp(buf, "1")) { + *(int*)dest = UCS_CONFIG_ON; + return 1; + } else if (!strcasecmp(buf, "off") || !strcmp(buf, "0")) { + *(int*)dest = UCS_CONFIG_OFF; + return 1; + } else { + return 0; + } +} + +int ucs_config_sscanf_on_off_auto(const char *buf, void *dest, const void *arg) +{ + if (!strcasecmp(buf, "try") || + !strcasecmp(buf, "maybe") || + !strcasecmp(buf, "auto")) { + *(int*)dest = UCS_CONFIG_AUTO; + return 1; + } else { + return ucs_config_sscanf_on_off(buf, dest, arg); + } +} + +int ucs_config_sprintf_on_off_auto(char *buf, size_t max, + const void *src, const void *arg) +{ + switch (*(int*)src) { + case UCS_CONFIG_AUTO: + return snprintf(buf, max, "auto"); + case UCS_CONFIG_ON: + return snprintf(buf, max, "on"); + case UCS_CONFIG_OFF: + return snprintf(buf, max, "off"); + default: + return snprintf(buf, max, "%d", *(int*)src); + } +} + +int ucs_config_sscanf_enum(const char *buf, void *dest, const void *arg) +{ + int i; + + i = __find_string_in_list(buf, (const char**)arg); + if (i < 0) { + return 0; + } + + *(unsigned*)dest = i; + return 1; +} + +int ucs_config_sprintf_enum(char *buf, size_t max, + const void *src, const void *arg) +{ + char * const *table = arg; + strncpy(buf, table[*(unsigned*)src], max); + return 1; +} + +static void __print_table_values(char * const *table, char *buf, size_t max) +{ + char *ptr = buf, *end = buf + max; + + for (; *table; ++table) { + snprintf(ptr, end - ptr, "|%s", *table); + ptr += strlen(ptr); + } + + snprintf(ptr, end - ptr, "]"); + + *buf = '['; +} + +void ucs_config_help_enum(char *buf, size_t max, const void *arg) +{ + __print_table_values(arg, buf, max); +} + +int ucs_config_sscanf_bitmap(const char *buf, void *dest, const void *arg) +{ + char *str = strdup(buf); + char *p, *saveptr; + int ret, i; + + if (str == NULL) { + return 0; + } + + ret = 1; + *((unsigned*)dest) = 0; + p = strtok_r(str, ",", &saveptr); + while (p != NULL) { + i = __find_string_in_list(p, (const char**)arg); + if (i < 0) { + ret = 0; + break; + } + *((unsigned*)dest) |= UCS_BIT(i); + p = strtok_r(NULL, ",", &saveptr); + } + + free(str); + return ret; +} + +int ucs_config_sprintf_bitmap(char *buf, size_t max, + const void *src, const void *arg) +{ + ucs_flags_str(buf, max, *((unsigned*)src), (const char**)arg); + return 1; +} + +void ucs_config_help_bitmap(char *buf, size_t max, const void *arg) +{ + snprintf(buf, max, "comma-separated list of: "); + __print_table_values(arg, buf + strlen(buf), max - strlen(buf)); +} + +int ucs_config_sscanf_bitmask(const char *buf, void *dest, const void *arg) +{ + int ret = sscanf(buf, "%u", (unsigned*)dest); + if (*(unsigned*)dest != 0) { + *(unsigned*)dest = UCS_BIT(*(unsigned*)dest) - 1; + } + return ret; +} + +int ucs_config_sprintf_bitmask(char *buf, size_t max, + const void *src, const void *arg) +{ + return snprintf(buf, max, "%u", __builtin_popcount(*(unsigned*)src)); +} + +int ucs_config_sscanf_time(const char *buf, void *dest, const void *arg) +{ + char units[3]; + int num_fields; + double value; + double per_sec; + + memset(units, 0, sizeof(units)); + num_fields = sscanf(buf, "%lf%c%c", &value, &units[0], &units[1]); + if (num_fields == 1) { + per_sec = 1; + } else if (num_fields == 2 || num_fields == 3) { + if (!strcmp(units, "m")) { + per_sec = 1.0 / 60.0; + } else if (!strcmp(units, "s")) { + per_sec = 1; + } else if (!strcmp(units, "ms")) { + per_sec = UCS_MSEC_PER_SEC; + } else if (!strcmp(units, "us")) { + per_sec = UCS_USEC_PER_SEC; + } else if (!strcmp(units, "ns")) { + per_sec = UCS_NSEC_PER_SEC; + } else { + return 0; + } + } else { + return 0; + } + + *(double*)dest = value / per_sec; + return 1; +} + +int ucs_config_sprintf_time(char *buf, size_t max, + const void *src, const void *arg) +{ + snprintf(buf, max, "%.2fus", *(double*)src * UCS_USEC_PER_SEC); + return 1; +} + +int ucs_config_sscanf_bw(const char *buf, void *dest, const void *arg) +{ + double *dst = (double*)dest; + char str[16] = {0}; + int offset = 0; + size_t divider; + size_t units; + double value; + int num_fields; + + if (!strcasecmp(buf, UCS_VALUE_AUTO_STR)) { + *dst = UCS_BANDWIDTH_AUTO; + return 1; + } + + num_fields = sscanf(buf, "%lf%15s", &value, str); + if (num_fields < 2) { + return 0; + } + + ucs_assert(num_fields == 2); + + units = (str[0] == 'b') ? 1 : ucs_string_quantity_prefix_value(str[0]); + if (!units) { + return 0; + } + + offset = (units == 1) ? 0 : 1; + + switch (str[offset]) { + case 'B': + divider = 1; + break; + case 'b': + divider = 8; + break; + default: + return 0; + } + + offset++; + if (strcmp(str + offset, "ps") && + strcmp(str + offset, "/s") && + strcmp(str + offset, "s")) { + return 0; + } + + ucs_assert((divider == 1) || (divider == 8)); /* bytes or bits */ + *dst = value * units / divider; + return 1; +} + +int ucs_config_sprintf_bw(char *buf, size_t max, + const void *src, const void *arg) +{ + double value = *(double*)src; + size_t len; + + if (value == UCS_BANDWIDTH_AUTO) { + snprintf(buf, max, UCS_VALUE_AUTO_STR); + } + + ucs_memunits_to_str((size_t)value, buf, max); + len = strlen(buf); + snprintf(buf + len, max - len, "Bps"); + return 1; +} + +int ucs_config_sscanf_bw_spec(const char *buf, void *dest, const void *arg) +{ + ucs_config_bw_spec_t *dst = (ucs_config_bw_spec_t*)dest; + char *delim; + + delim = strchr(buf, ':'); + if (!delim) { + return 0; + } + + if (!ucs_config_sscanf_bw(delim + 1, &dst->bw, arg)) { + return 0; + } + + dst->name = ucs_strndup(buf, delim - buf, __func__); + return dst->name != NULL; +} + +int ucs_config_sprintf_bw_spec(char *buf, size_t max, + const void *src, const void *arg) +{ + ucs_config_bw_spec_t *bw = (ucs_config_bw_spec_t*)src; + int len; + + if (max) { + snprintf(buf, max, "%s:", bw->name); + len = strlen(buf); + ucs_config_sprintf_bw(buf + len, max - len, &bw->bw, arg); + } + + return 1; +} + +ucs_status_t ucs_config_clone_bw_spec(const void *src, void *dest, const void *arg) +{ + ucs_config_bw_spec_t *s = (ucs_config_bw_spec_t*)src; + ucs_config_bw_spec_t *d = (ucs_config_bw_spec_t*)dest; + + d->bw = s->bw; + d->name = ucs_strdup(s->name, __func__); + + return d->name ? UCS_OK : UCS_ERR_NO_MEMORY; +} + +void ucs_config_release_bw_spec(void *ptr, const void *arg) +{ + ucs_free(((ucs_config_bw_spec_t*)ptr)->name); +} + +int ucs_config_sscanf_signo(const char *buf, void *dest, const void *arg) +{ + char *endptr; + int signo; + + signo = strtol(buf, &endptr, 10); + if (*endptr == '\0') { + *(int*)dest = signo; + return 1; + } + + if (!strncmp(buf, "SIG", 3)) { + buf += 3; + } + + return ucs_config_sscanf_enum(buf, dest, ucs_signal_names); +} + +int ucs_config_sprintf_signo(char *buf, size_t max, + const void *src, const void *arg) +{ + return ucs_config_sprintf_enum(buf, max, src, ucs_signal_names); +} + +int ucs_config_sscanf_memunits(const char *buf, void *dest, const void *arg) +{ + if (ucs_str_to_memunits(buf, dest) != UCS_OK) { + return 0; + } + return 1; +} + +int ucs_config_sprintf_memunits(char *buf, size_t max, + const void *src, const void *arg) +{ + size_t sz = *(size_t*)src; + + if (sz == UCS_MEMUNITS_INF) { + snprintf(buf, max, UCS_NUMERIC_INF_STR); + } else if (sz == UCS_MEMUNITS_AUTO) { + snprintf(buf, max, UCS_VALUE_AUTO_STR); + } else { + ucs_memunits_to_str(sz, buf, max); + } + return 1; +} + +int ucs_config_sscanf_ulunits(const char *buf, void *dest, const void *arg) +{ + /* Special value: auto */ + if (!strcasecmp(buf, UCS_VALUE_AUTO_STR)) { + *(size_t*)dest = UCS_ULUNITS_AUTO; + return 1; + } else if (!strcasecmp(buf, UCS_NUMERIC_INF_STR)) { + *(size_t*)dest = UCS_ULUNITS_INF; + return 1; + } + + return ucs_config_sscanf_ulong(buf, dest, arg); +} + +int ucs_config_sprintf_ulunits(char *buf, size_t max, + const void *src, const void *arg) +{ + size_t val = *(size_t*)src; + + if (val == UCS_ULUNITS_AUTO) { + return snprintf(buf, max, UCS_VALUE_AUTO_STR); + } else if (val == UCS_ULUNITS_INF) { + return snprintf(buf, max, UCS_NUMERIC_INF_STR); + } + + return ucs_config_sprintf_ulong(buf, max, src, arg); +} + +int ucs_config_sscanf_range_spec(const char *buf, void *dest, const void *arg) +{ + ucs_range_spec_t *range_spec = dest; + unsigned first, last; + char *p, *str; + int ret = 1; + + str = strdup(buf); + if (str == NULL) { + return 0; + } + + /* Check if got a range or a single number */ + p = strchr(str, '-'); + if (p == NULL) { + /* got only one value (not a range) */ + if (1 != sscanf(buf, "%u", &first)) { + ret = 0; + goto out; + } + last = first; + } else { + /* got a range of numbers */ + *p = 0; /* split str */ + + if ((1 != sscanf(str, "%u", &first)) + || (1 != sscanf(p + 1, "%u", &last))) { + ret = 0; + goto out; + } + } + + range_spec->first = first; + range_spec->last = last; + +out: + free (str); + return ret; +} + +int ucs_config_sprintf_range_spec(char *buf, size_t max, + const void *src, const void *arg) +{ + const ucs_range_spec_t *range_spec = src; + + if (range_spec->first == range_spec->last) { + snprintf(buf, max, "%d", range_spec->first); + } else { + snprintf(buf, max, "%d-%d", range_spec->first, range_spec->last); + } + + return 1; +} + +ucs_status_t ucs_config_clone_range_spec(const void *src, void *dest, const void *arg) +{ + const ucs_range_spec_t *src_range_spec = src; + ucs_range_spec_t *dest_ragne_spec = dest; + + dest_ragne_spec->first = src_range_spec->first; + dest_ragne_spec->last = src_range_spec->last; + + return UCS_OK; +} + +int ucs_config_sscanf_array(const char *buf, void *dest, const void *arg) +{ + ucs_config_array_field_t *field = dest; + void *temp_field; + const ucs_config_array_t *array = arg; + char *dup, *token, *saveptr; + int ret; + unsigned i; + + dup = strdup(buf); + if (dup == NULL) { + return 0; + } + + saveptr = NULL; + token = strtok_r(dup, ",", &saveptr); + temp_field = ucs_calloc(UCS_CONFIG_ARRAY_MAX, array->elem_size, "config array"); + i = 0; + while (token != NULL) { + ret = array->parser.read(token, (char*)temp_field + i * array->elem_size, + array->parser.arg); + if (!ret) { + ucs_free(temp_field); + free(dup); + return 0; + } + + ++i; + if (i >= UCS_CONFIG_ARRAY_MAX) { + break; + } + token = strtok_r(NULL, ",", &saveptr); + } + + field->data = temp_field; + field->count = i; + free(dup); + return 1; +} + +int ucs_config_sprintf_array(char *buf, size_t max, + const void *src, const void *arg) +{ + const ucs_config_array_field_t *field = src; + const ucs_config_array_t *array = arg; + size_t offset; + unsigned i; + int ret; + + offset = 0; + for (i = 0; i < field->count; ++i) { + if (i > 0 && offset < max) { + buf[offset++] = ','; + } + ret = array->parser.write(buf + offset, max - offset, + (char*)field->data + i * array->elem_size, + array->parser.arg); + if (!ret) { + return 0; + } + + offset += strlen(buf + offset); + } + return 1; +} + +ucs_status_t ucs_config_clone_array(const void *src, void *dest, const void *arg) +{ + const ucs_config_array_field_t *src_array = src; + const ucs_config_array_t *array = arg; + ucs_config_array_field_t *dest_array = dest; + ucs_status_t status; + unsigned i; + + dest_array->data = ucs_calloc(src_array->count, array->elem_size, + "config array"); + if (dest_array->data == NULL) { + return UCS_ERR_NO_MEMORY; + } + + dest_array->count = src_array->count; + for (i = 0; i < src_array->count; ++i) { + status = array->parser.clone((const char*)src_array->data + i * array->elem_size, + (char*)dest_array->data + i * array->elem_size, + array->parser.arg); + if (status != UCS_OK) { + ucs_free(dest_array->data); + return status; + } + } + + return UCS_OK; +} + +void ucs_config_release_array(void *ptr, const void *arg) +{ + ucs_config_array_field_t *array_field = ptr; + const ucs_config_array_t *array = arg; + unsigned i; + + for (i = 0; i < array_field->count; ++i) { + array->parser.release((char*)array_field->data + i * array->elem_size, + array->parser.arg); + } + ucs_free(array_field->data); +} + +void ucs_config_help_array(char *buf, size_t max, const void *arg) +{ + const ucs_config_array_t *array = arg; + + snprintf(buf, max, "comma-separated list of: "); + array->parser.help(buf + strlen(buf), max - strlen(buf), array->parser.arg); +} + +int ucs_config_sscanf_table(const char *buf, void *dest, const void *arg) +{ + char *tokens; + char *token, *saveptr1; + char *name, *value, *saveptr2; + ucs_status_t status; + + tokens = strdup(buf); + if (tokens == NULL) { + return 0; + } + + saveptr1 = NULL; + saveptr2 = NULL; + token = strtok_r(tokens, ";", &saveptr1); + while (token != NULL) { + name = strtok_r(token, "=", &saveptr2); + value = strtok_r(NULL, "=", &saveptr2); + if (name == NULL || value == NULL) { + free(tokens); + ucs_error("Could not parse list of values in '%s' (token: '%s')", buf, token); + return 0; + } + + status = ucs_config_parser_set_value_internal(dest, (ucs_config_field_t*)arg, + name, value, NULL, 1); + if (status != UCS_OK) { + if (status == UCS_ERR_NO_ELEM) { + ucs_error("Field '%s' does not exist", name); + } else { + ucs_debug("Failed to set %s to '%s': %s", name, value, + ucs_status_string(status)); + } + free(tokens); + return 0; + } + + token = strtok_r(NULL, ";", &saveptr1); + } + + free(tokens); + return 1; +} + +ucs_status_t ucs_config_clone_table(const void *src, void *dst, const void *arg) +{ + return ucs_config_parser_clone_opts(src, dst, (ucs_config_field_t*)arg); +} + +void ucs_config_release_table(void *ptr, const void *arg) +{ + ucs_config_parser_release_opts(ptr, (ucs_config_field_t*)arg); +} + +void ucs_config_help_table(char *buf, size_t max, const void *arg) +{ + snprintf(buf, max, "Table"); +} + +void ucs_config_release_nop(void *ptr, const void *arg) +{ +} + +void ucs_config_help_generic(char *buf, size_t max, const void *arg) +{ + strncpy(buf, (char*)arg, max); +} + +static inline int ucs_config_is_deprecated_field(const ucs_config_field_t *field) +{ + return (field->offset == UCS_CONFIG_DEPRECATED_FIELD_OFFSET); +} + +static inline int ucs_config_is_alias_field(const ucs_config_field_t *field) +{ + return (field->dfl_value == NULL); +} + +static inline int ucs_config_is_table_field(const ucs_config_field_t *field) +{ + return (field->parser.read == ucs_config_sscanf_table); +} + +static void ucs_config_print_doc_line_by_line(const ucs_config_field_t *field, + void (*cb)(int num, const char *line, void *arg), + void *arg) +{ + char *doc, *line, *p; + int num; + + line = doc = strdup(field->doc); + p = strchr(line, '\n'); + num = 0; + while (p != NULL) { + *p = '\0'; + cb(num, line, arg); + line = p + 1; + p = strchr(line, '\n'); + ++num; + } + cb(num, line, arg); + free(doc); +} + +static ucs_status_t +ucs_config_parser_parse_field(ucs_config_field_t *field, const char *value, void *var) +{ + char syntax_buf[256]; + int ret; + + ret = field->parser.read(value, var, field->parser.arg); + if (ret != 1) { + if (ucs_config_is_table_field(field)) { + ucs_error("Could not set table value for %s: '%s'", field->name, value); + + } else { + field->parser.help(syntax_buf, sizeof(syntax_buf) - 1, field->parser.arg); + ucs_error("Invalid value for %s: '%s'. Expected: %s", field->name, + value, syntax_buf); + } + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK; +} + +static void ucs_config_parser_release_field(ucs_config_field_t *field, void *var) +{ + field->parser.release(var, field->parser.arg); +} + +static int ucs_config_field_is_last(const ucs_config_field_t *field) +{ + return field->name == NULL; +} + +ucs_status_t +ucs_config_parser_set_default_values(void *opts, ucs_config_field_t *fields) +{ + ucs_config_field_t *field, *sub_fields; + ucs_status_t status; + void *var; + + for (field = fields; !ucs_config_field_is_last(field); ++field) { + if (ucs_config_is_alias_field(field) || + ucs_config_is_deprecated_field(field)) { + continue; + } + + var = (char*)opts + field->offset; + + /* If this field is a sub-table, recursively set the values for it. + * Defaults can be subsequently set by parser.read(). */ + if (ucs_config_is_table_field(field)) { + sub_fields = (ucs_config_field_t*)field->parser.arg; + status = ucs_config_parser_set_default_values(var, sub_fields); + if (status != UCS_OK) { + return status; + } + } + + status = ucs_config_parser_parse_field(field, field->dfl_value, var); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +/** + * table_prefix == NULL -> unused + */ +static ucs_status_t +ucs_config_parser_set_value_internal(void *opts, ucs_config_field_t *fields, + const char *name, const char *value, + const char *table_prefix, int recurse) +{ + ucs_config_field_t *field, *sub_fields; + size_t prefix_len; + ucs_status_t status; + unsigned count; + void *var; + + prefix_len = (table_prefix == NULL) ? 0 : strlen(table_prefix); + + count = 0; + for (field = fields; !ucs_config_field_is_last(field); ++field) { + + var = (char*)opts + field->offset; + + if (ucs_config_is_table_field(field)) { + sub_fields = (ucs_config_field_t*)field->parser.arg; + + /* Check with sub-table prefix */ + if (recurse) { + status = ucs_config_parser_set_value_internal(var, sub_fields, + name, value, + field->name, 1); + if (status == UCS_OK) { + ++count; + } else if (status != UCS_ERR_NO_ELEM) { + return status; + } + } + + /* Possible override with my prefix */ + if (table_prefix != NULL) { + status = ucs_config_parser_set_value_internal(var, sub_fields, + name, value, + table_prefix, 0); + if (status == UCS_OK) { + ++count; + } else if (status != UCS_ERR_NO_ELEM) { + return status; + } + } + } else if (((table_prefix == NULL) || !strncmp(name, table_prefix, prefix_len)) && + !strcmp(name + prefix_len, field->name)) + { + if (ucs_config_is_deprecated_field(field)) { + return UCS_ERR_NO_ELEM; + } + + ucs_config_parser_release_field(field, var); + status = ucs_config_parser_parse_field(field, value, var); + if (status != UCS_OK) { + return status; + } + ++count; + } + } + + return (count == 0) ? UCS_ERR_NO_ELEM : UCS_OK; +} + +static void ucs_config_parser_mark_env_var_used(const char *name, int *added) +{ + khiter_t iter; + char *key; + int ret; + + *added = 0; + + if (!ucs_global_opts.warn_unused_env_vars) { + return; + } + + pthread_mutex_lock(&ucs_config_parser_env_vars_hash_lock); + + iter = kh_get(ucs_config_env_vars, &ucs_config_parser_env_vars, name); + if (iter != kh_end(&ucs_config_parser_env_vars)) { + goto out; /* already exists */ + } + + key = ucs_strdup(name, "config_parser_env_var"); + if (key == NULL) { + ucs_error("strdup(%s) failed", name); + goto out; + } + +#ifndef __clang_analyzer__ + /* Exclude this code from Clang examination as it generates + * false-postive warning about potential leak of memory + * pointed to by 'key' variable */ + iter = kh_put(ucs_config_env_vars, &ucs_config_parser_env_vars, key, &ret); + if ((ret <= 0) || (iter == kh_end(&ucs_config_parser_env_vars))) { + ucs_warn("kh_put(key=%s) failed", key); + ucs_free(key); + goto out; + } +#else + ucs_free(key); +#endif + + *added = 1; + +out: + pthread_mutex_unlock(&ucs_config_parser_env_vars_hash_lock); +} + +static ucs_status_t ucs_config_apply_env_vars(void *opts, ucs_config_field_t *fields, + const char *prefix, const char *table_prefix, + int recurse, int ignore_errors) +{ + ucs_config_field_t *field, *sub_fields; + ucs_status_t status; + size_t prefix_len; + const char *env_value; + void *var; + char buf[256]; + int added; + + /* Put prefix in the buffer. Later we replace only the variable name part */ + snprintf(buf, sizeof(buf) - 1, "%s%s", prefix, table_prefix ? table_prefix : ""); + prefix_len = strlen(buf); + + /* Parse environment variables */ + for (field = fields; !ucs_config_field_is_last(field); ++field) { + + var = (char*)opts + field->offset; + + if (ucs_config_is_table_field(field)) { + sub_fields = (ucs_config_field_t*)field->parser.arg; + + /* Parse with sub-table prefix */ + if (recurse) { + status = ucs_config_apply_env_vars(var, sub_fields, prefix, + field->name, 1, ignore_errors); + if (status != UCS_OK) { + return status; + } + } + + /* Possible override with my prefix */ + if (table_prefix) { + status = ucs_config_apply_env_vars(var, sub_fields, prefix, + table_prefix, 0, ignore_errors); + if (status != UCS_OK) { + return status; + } + } + } else { + /* Read and parse environment variable */ + strncpy(buf + prefix_len, field->name, sizeof(buf) - prefix_len - 1); + env_value = getenv(buf); + if (env_value == NULL) { + continue; + } + + ucs_config_parser_mark_env_var_used(buf, &added); + + if (ucs_config_is_deprecated_field(field)) { + if (added && !ignore_errors) { + ucs_warn("%s is deprecated (set %s%s=n to suppress this warning)", + buf, UCS_CONFIG_PREFIX, + UCS_GLOBAL_OPTS_WARN_UNUSED_CONFIG); + } + } else { + ucs_config_parser_release_field(field, var); + status = ucs_config_parser_parse_field(field, env_value, var); + if (status != UCS_OK) { + /* If set to ignore errors, restore the default value */ + ucs_status_t tmp_status = + ucs_config_parser_parse_field(field, field->dfl_value, + var); + if (ignore_errors) { + status = tmp_status; + } + } + if (status != UCS_OK) { + return status; + } + } + } + } + + return UCS_OK; +} + +ucs_status_t ucs_config_parser_fill_opts(void *opts, ucs_config_field_t *fields, + const char *env_prefix, + const char *table_prefix, + int ignore_errors) +{ + ucs_status_t status; + char prefix[128]; + + /* Set default values */ + status = ucs_config_parser_set_default_values(opts, fields); + if (status != UCS_OK) { + goto err; + } + + /* Apply environment variables */ + status = ucs_config_apply_env_vars(opts, fields, UCS_CONFIG_PREFIX, + table_prefix, 1, ignore_errors); + if (status != UCS_OK) { + goto err_free; + } + + /* Apply environment variables with custom prefix */ + if ((env_prefix != NULL) && (strlen(env_prefix) > 0)) { + snprintf(prefix, sizeof(prefix), "%s%s_", UCS_CONFIG_PREFIX, env_prefix); + status = ucs_config_apply_env_vars(opts, fields, prefix, table_prefix, + 1, ignore_errors); + if (status != UCS_OK) { + goto err_free; + } + } + + return UCS_OK; + +err_free: + ucs_config_parser_release_opts(opts, fields); /* Release default values */ +err: + return status; +} + +ucs_status_t ucs_config_parser_set_value(void *opts, ucs_config_field_t *fields, + const char *name, const char *value) +{ + return ucs_config_parser_set_value_internal(opts, fields, name, value, NULL, 1); +} + +ucs_status_t ucs_config_parser_get_value(void *opts, ucs_config_field_t *fields, + const char *name, char *value, + size_t max) +{ + ucs_config_field_t *field; + ucs_config_field_t *sub_fields; + void *sub_opts; + void *value_ptr; + size_t name_len; + ucs_status_t status; + + if (!opts || !fields || !name || (!value && (max > 0))) { + return UCS_ERR_INVALID_PARAM; + } + + for (field = fields, status = UCS_ERR_NO_ELEM; + !ucs_config_field_is_last(field) && (status == UCS_ERR_NO_ELEM); ++field) { + + name_len = strlen(field->name); + + ucs_trace("compare name \"%s\" with field \"%s\" which is %s subtable", + name, field->name, + ucs_config_is_table_field(field) ? "a" : "NOT a"); + + if (ucs_config_is_table_field(field) && + !strncmp(field->name, name, name_len)) { + + sub_fields = (ucs_config_field_t*)field->parser.arg; + sub_opts = (char*)opts + field->offset; + status = ucs_config_parser_get_value(sub_opts, sub_fields, + name + name_len, + value, max); + } else if (!strncmp(field->name, name, strlen(name))) { + if (value) { + value_ptr = (char *)opts + field->offset; + field->parser.write(value, max, value_ptr, field->parser.arg); + } + status = UCS_OK; + } + } + + return status; +} + +ucs_status_t ucs_config_parser_clone_opts(const void *src, void *dst, + ucs_config_field_t *fields) +{ + ucs_status_t status; + + ucs_config_field_t *field; + for (field = fields; !ucs_config_field_is_last(field); ++field) { + if (ucs_config_is_alias_field(field) || + ucs_config_is_deprecated_field(field)) { + continue; + } + + status = field->parser.clone((const char*)src + field->offset, + (char*)dst + field->offset, + field->parser.arg); + if (status != UCS_OK) { + ucs_error("Failed to clone the filed '%s': %s", field->name, + ucs_status_string(status)); + return status; + } + } + + return UCS_OK; +} + +void ucs_config_parser_release_opts(void *opts, ucs_config_field_t *fields) +{ + ucs_config_field_t *field; + + for (field = fields; !ucs_config_field_is_last(field); ++field) { + if (ucs_config_is_alias_field(field) || + ucs_config_is_deprecated_field(field)) { + continue; + } + + ucs_config_parser_release_field(field, (char*)opts + field->offset); + } +} + +/* + * Finds the "real" field, which the given field is alias of. + * *p_alias_table_offset is filled with the offset of the sub-table containing + * the field, it may be non-0 if the alias is found in a sub-table. + */ +static const ucs_config_field_t * +ucs_config_find_aliased_field(const ucs_config_field_t *fields, + const ucs_config_field_t *alias, + size_t *p_alias_table_offset) +{ + const ucs_config_field_t *field, *result; + size_t offset; + + for (field = fields; !ucs_config_field_is_last(field); ++field) { + if (field == alias) { + /* skip */ + continue; + } else if (ucs_config_is_table_field(field)) { + result = ucs_config_find_aliased_field(field->parser.arg, alias, + &offset); + if (result != NULL) { + *p_alias_table_offset = offset + field->offset; + return result; + } + } else if (field->offset == alias->offset) { + *p_alias_table_offset = 0; + return field; + } + } + + return NULL; +} + +static void __print_stream_cb(int num, const char *line, void *arg) +{ + FILE *stream = arg; + fprintf(stream, "# %s\n", line); +} + +static void +ucs_config_parser_print_field(FILE *stream, const void *opts, const char *env_prefix, + ucs_list_link_t *prefix_list, const char *name, + const ucs_config_field_t *field, unsigned long flags, + const char *docstr, ...) +{ + ucs_config_parser_prefix_t *prefix, *head; + char value_buf[128] = {0}; + char syntax_buf[256] = {0}; + va_list ap; + + ucs_assert(!ucs_list_is_empty(prefix_list)); + head = ucs_list_head(prefix_list, ucs_config_parser_prefix_t, list); + + if (ucs_config_is_deprecated_field(field)) { + snprintf(value_buf, sizeof(value_buf), " (deprecated)"); + snprintf(syntax_buf, sizeof(syntax_buf), "N/A"); + } else { + snprintf(value_buf, sizeof(value_buf), "="); + field->parser.write(value_buf + 1, sizeof(value_buf) - 2, + (char*)opts + field->offset, + field->parser.arg); + field->parser.help(syntax_buf, sizeof(syntax_buf) - 1, field->parser.arg); + } + + if (flags & UCS_CONFIG_PRINT_DOC) { + fprintf(stream, "#\n"); + ucs_config_print_doc_line_by_line(field, __print_stream_cb, stream); + fprintf(stream, "#\n"); + fprintf(stream, "# %-*s %s\n", UCS_CONFIG_PARSER_DOCSTR_WIDTH, "syntax:", + syntax_buf); + + /* Extra docstring */ + if (docstr != NULL) { + fprintf(stream, "# "); + va_start(ap, docstr); + vfprintf(stream, docstr, ap); + va_end(ap); + fprintf(stream, "\n"); + } + + /* Parents in configuration hierarchy */ + if (prefix_list->next != prefix_list->prev) { + fprintf(stream, "# %-*s", UCS_CONFIG_PARSER_DOCSTR_WIDTH, "inherits:"); + ucs_list_for_each(prefix, prefix_list, list) { + if (prefix == head) { + continue; + } + + fprintf(stream, " %s%s%s", env_prefix, prefix->prefix, name); + if (prefix != ucs_list_tail(prefix_list, ucs_config_parser_prefix_t, list)) { + fprintf(stream, ","); + } + } + fprintf(stream, "\n"); + } + + fprintf(stream, "#\n"); + } + + fprintf(stream, "%s%s%s%s\n", env_prefix, head->prefix, name, value_buf); + + if (flags & UCS_CONFIG_PRINT_DOC) { + fprintf(stream, "\n"); + } +} + +static void +ucs_config_parser_print_opts_recurs(FILE *stream, const void *opts, + const ucs_config_field_t *fields, + unsigned flags, const char *env_prefix, + ucs_list_link_t *prefix_list) +{ + const ucs_config_field_t *field, *aliased_field; + ucs_config_parser_prefix_t *head; + ucs_config_parser_prefix_t inner_prefix; + size_t alias_table_offset; + + for (field = fields; !ucs_config_field_is_last(field); ++field) { + if (ucs_config_is_table_field(field)) { + /* Parse with sub-table prefix. + * We start the leaf prefix and continue up the hierarchy. + */ + /* Do not add the same prefix several times in a sequence. It can + * happen when similiar prefix names were used during config + * table inheritance, e.g. "IB_" -> "RC_" -> "RC_". We check the + * previous entry only, since it is currently impossible if + * something like "RC_" -> "IB_" -> "RC_" will be used. */ + if (ucs_list_is_empty(prefix_list) || + strcmp(ucs_list_tail(prefix_list, + ucs_config_parser_prefix_t, + list)->prefix, field->name)) { + inner_prefix.prefix = field->name; + ucs_list_add_tail(prefix_list, &inner_prefix.list); + } else { + inner_prefix.prefix = NULL; + } + + ucs_config_parser_print_opts_recurs(stream, + UCS_PTR_BYTE_OFFSET(opts, field->offset), + field->parser.arg, flags, + env_prefix, prefix_list); + + if (inner_prefix.prefix != NULL) { + ucs_list_del(&inner_prefix.list); + } + } else if (ucs_config_is_alias_field(field)) { + if (flags & UCS_CONFIG_PRINT_HIDDEN) { + aliased_field = + ucs_config_find_aliased_field(fields, field, + &alias_table_offset); + if (aliased_field == NULL) { + ucs_fatal("could not find aliased field of %s", field->name); + } + + head = ucs_list_head(prefix_list, ucs_config_parser_prefix_t, list); + + ucs_config_parser_print_field(stream, + UCS_PTR_BYTE_OFFSET(opts, alias_table_offset), + env_prefix, prefix_list, + field->name, aliased_field, + flags, "%-*s %s%s%s", + UCS_CONFIG_PARSER_DOCSTR_WIDTH, + "alias of:", env_prefix, + head->prefix, + aliased_field->name); + } + } else { + if (ucs_config_is_deprecated_field(field) && + !(flags & UCS_CONFIG_PRINT_HIDDEN)) { + continue; + } + ucs_config_parser_print_field(stream, opts, env_prefix, prefix_list, + field->name, field, flags, NULL); + } + } +} + +void ucs_config_parser_print_opts(FILE *stream, const char *title, const void *opts, + ucs_config_field_t *fields, const char *table_prefix, + ucs_config_print_flags_t flags) +{ + ucs_config_parser_prefix_t table_prefix_elem; + UCS_LIST_HEAD(prefix_list); + + if (flags & UCS_CONFIG_PRINT_HEADER) { + fprintf(stream, "\n"); + fprintf(stream, "#\n"); + fprintf(stream, "# %s\n", title); + fprintf(stream, "#\n"); + fprintf(stream, "\n"); + } + + if (flags & UCS_CONFIG_PRINT_CONFIG) { + table_prefix_elem.prefix = table_prefix ? table_prefix : ""; + ucs_list_add_tail(&prefix_list, &table_prefix_elem.list); + ucs_config_parser_print_opts_recurs(stream, opts, fields, flags, + UCS_CONFIG_PREFIX, &prefix_list); + } + + if (flags & UCS_CONFIG_PRINT_HEADER) { + fprintf(stream, "\n"); + } +} + +void ucs_config_parser_print_all_opts(FILE *stream, ucs_config_print_flags_t flags) +{ + const ucs_config_global_list_entry_t *entry; + ucs_status_t status; + char title[64]; + void *opts; + + ucs_list_for_each(entry, &ucs_config_global_list, list) { + if ((entry->table == NULL) || + (ucs_config_field_is_last(&entry->table[0]))) { + /* don't print title for an empty configuration table */ + continue; + } + + opts = ucs_malloc(entry->size, "tmp_opts"); + if (opts == NULL) { + ucs_error("could not allocate configuration of size %zu", entry->size); + continue; + } + + status = ucs_config_parser_fill_opts(opts, entry->table, NULL, + entry->prefix, 0); + if (status != UCS_OK) { + ucs_free(opts); + continue; + } + + snprintf(title, sizeof(title), "%s configuration", entry->name); + ucs_config_parser_print_opts(stream, title, opts, entry->table, + entry->prefix, flags); + + ucs_config_parser_release_opts(opts, entry->table); + ucs_free(opts); + } +} + +void ucs_config_parser_warn_unused_env_vars_once() +{ + static uint32_t warn_once = 1; + + if (!ucs_atomic_cswap32(&warn_once, 1, 0)) { + return; + } + + ucs_config_parser_warn_unused_env_vars(); +} + +void ucs_config_parser_warn_unused_env_vars() +{ + char unused_env_vars_names[40]; + int num_unused_vars; + char **envp, *envstr; + size_t prefix_len; + char *var_name; + char *p, *endp; + khiter_t iter; + char *saveptr; + int truncated; + int ret; + + if (!ucs_global_opts.warn_unused_env_vars) { + return; + } + + pthread_mutex_lock(&ucs_config_parser_env_vars_hash_lock); + + prefix_len = strlen(UCS_CONFIG_PREFIX); + p = unused_env_vars_names; + endp = p + sizeof(unused_env_vars_names) - 1; + *endp = '\0'; + truncated = 0; + num_unused_vars = 0; + + for (envp = environ; !truncated && (*envp != NULL); ++envp) { + envstr = ucs_strdup(*envp, "env_str"); + if (envstr == NULL) { + continue; + } + + var_name = strtok_r(envstr, "=", &saveptr); + if (!var_name || strncmp(var_name, UCS_CONFIG_PREFIX, prefix_len)) { + ucs_free(envstr); + continue; /* Not UCX */ + } + + iter = kh_get(ucs_config_env_vars, &ucs_config_parser_env_vars, var_name); + if (iter == kh_end(&ucs_config_parser_env_vars)) { + ret = snprintf(p, endp - p, " %s,", var_name); + if (ret > endp - p) { + truncated = 1; + *p = '\0'; + } else { + p += strlen(p); + ++num_unused_vars; + } + } + + ucs_free(envstr); + } + + if (num_unused_vars > 0) { + if (!truncated) { + p[-1] = '\0'; /* remove trailing comma */ + } + ucs_warn("unused env variable%s:%s%s (set %s%s=n to suppress this warning)", + (num_unused_vars > 1) ? "s" : "", unused_env_vars_names, + truncated ? "..." : "", UCS_CONFIG_PREFIX, + UCS_GLOBAL_OPTS_WARN_UNUSED_CONFIG); + } + + pthread_mutex_unlock(&ucs_config_parser_env_vars_hash_lock); +} + +size_t ucs_config_memunits_get(size_t config_size, size_t auto_size, + size_t max_size) +{ + if (config_size == UCS_MEMUNITS_AUTO) { + return auto_size; + } else { + return ucs_min(config_size, max_size); + } +} + +int ucs_config_names_search(ucs_config_names_array_t config_names, + const char *str) +{ + unsigned i; + + for (i = 0; i < config_names.count; ++i) { + if (!fnmatch(config_names.names[i], str, 0)) { + return i; + } + } + + return -1; +} + +UCS_STATIC_CLEANUP { + const char *key; + + kh_foreach_key(&ucs_config_parser_env_vars, key, { + ucs_free((void*)key); + }) + kh_destroy_inplace(ucs_config_env_vars, &ucs_config_parser_env_vars); +} diff --git a/src/ucs/config/parser.h b/src/ucs/config/parser.h new file mode 100644 index 0000000..aa37e7a --- /dev/null +++ b/src/ucs/config/parser.h @@ -0,0 +1,448 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_CONFIG_PARSER_H +#define UCS_CONFIG_PARSER_H + +#include "types.h" + +#include +#include +#include + +#include + + +#define UCS_CONFIG_PREFIX "UCX_" +#define UCS_CONFIG_ARRAY_MAX 128 + +BEGIN_C_DECLS + +/** @file parser.h */ + +/* + * Configuration varaibles syntax: + * + * name: + * + * - env_prefix: supplied by user to ucs_config_read_XXX() API + * - table_prefix: defined in sub-tables. e.g IB_, UD_, ... + * - field_name: field_name as defined in the table. e.g ZCOPY_THRESH + * + * Examples of full variable names: + * - UCS_CIB_RNDV_THRESH + * - UCS_IB_TX_MODERATION + */ + +typedef struct ucs_config_parser { + int (*read) (const char *buf, void *dest, const void *arg); + int (*write)(char *buf, size_t max, + const void *src, const void *arg); + ucs_status_t (*clone)(const void *src, void *dest, const void *arg); + void (*release)(void *ptr, const void *arg); + void (*help)(char *buf, size_t max, const void *arg); + const void *arg; +} ucs_config_parser_t; + + +typedef struct ucs_config_array { + size_t elem_size; + ucs_config_parser_t parser; +} ucs_config_array_t; + + +typedef struct ucs_config_field { + const char *name; + const char *dfl_value; + const char *doc; + size_t offset; + ucs_config_parser_t parser; +} ucs_config_field_t; + + +typedef struct ucs_ib_port_spec { + char *device_name; + unsigned port_num; +} ucs_ib_port_spec_t; + + +typedef struct ucs_range_spec { + unsigned first; /* the first value in the range */ + unsigned last; /* the last value in the range */ +} ucs_range_spec_t; + + +typedef struct ucs_config_global_list_entry { + const char *name; /* configuration table name */ + const char *prefix; /* configuration prefix */ + ucs_config_field_t *table; /* array of configuration fields */ + size_t size; /* size of config structure */ + ucs_list_link_t list; /* entry in global list */ +} ucs_config_global_list_entry_t; + + +typedef struct ucs_config_bw_spec { + char *name; + double bw; +} ucs_config_bw_spec_t; + + +#define UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY \ + { \ + .name = "", \ + .prefix = "", \ + .table = NULL, \ + .size = 0, \ + } + + +#define UCS_CONFIG_REGISTER_TABLE_ENTRY(_entry) \ + UCS_STATIC_INIT { \ + extern ucs_list_link_t ucs_config_global_list; \ + ucs_list_add_tail(&ucs_config_global_list, &(_entry)->list); \ + } \ + \ + UCS_STATIC_CLEANUP { \ + ucs_list_del(&(_entry)->list); \ + } + +#define UCS_CONFIG_REGISTER_TABLE(_table, _name, _prefix, _type) \ + static ucs_config_global_list_entry_t _table##_config_entry = { \ + .table = _table, \ + .name = _name, \ + .prefix = _prefix, \ + .size = sizeof(_type) \ + }; \ + UCS_CONFIG_REGISTER_TABLE_ENTRY(&_table##_config_entry); + +/* + * Parsing and printing different data types + */ + +int ucs_config_sscanf_string(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_string(char *buf, size_t max, const void *src, const void *arg); +ucs_status_t ucs_config_clone_string(const void *src, void *dest, const void *arg); +void ucs_config_release_string(void *ptr, const void *arg); + +int ucs_config_sscanf_int(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_int(char *buf, size_t max, const void *src, const void *arg); +ucs_status_t ucs_config_clone_int(const void *src, void *dest, const void *arg); + +int ucs_config_sscanf_uint(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_uint(char *buf, size_t max, const void *src, const void *arg); +ucs_status_t ucs_config_clone_uint(const void *src, void *dest, const void *arg); + +int ucs_config_sscanf_ulong(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_ulong(char *buf, size_t max, const void *src, const void *arg); +ucs_status_t ucs_config_clone_ulong(const void *src, void *dest, const void *arg); + +int ucs_config_sscanf_double(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_double(char *buf, size_t max, const void *src, const void *arg); +ucs_status_t ucs_config_clone_double(const void *src, void *dest, const void *arg); + +int ucs_config_sscanf_hex(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_hex(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_bool(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_bool(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_ternary(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_ternary(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_on_off(const char *buf, void *dest, const void *arg); + +int ucs_config_sscanf_on_off_auto(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_on_off_auto(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_enum(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_enum(char *buf, size_t max, const void *src, const void *arg); +void ucs_config_help_enum(char *buf, size_t max, const void *arg); + +int ucs_config_sscanf_bitmap(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_bitmap(char *buf, size_t max, const void *src, const void *arg); +void ucs_config_help_bitmap(char *buf, size_t max, const void *arg); + +int ucs_config_sscanf_bitmask(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_bitmask(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_time(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_time(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_bw(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_bw(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_bw_spec(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_bw_spec(char *buf, size_t max, const void *src, const void *arg); +ucs_status_t ucs_config_clone_bw_spec(const void *src, void *dest, const void *arg); +void ucs_config_release_bw_spec(void *ptr, const void *arg); + +int ucs_config_sscanf_signo(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_signo(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_memunits(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_memunits(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_ulunits(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_ulunits(char *buf, size_t max, const void *src, const void *arg); + +int ucs_config_sscanf_range_spec(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_range_spec(char *buf, size_t max, const void *src, const void *arg); +ucs_status_t ucs_config_clone_range_spec(const void *src, void *dest, const void *arg); + +int ucs_config_sscanf_array(const char *buf, void *dest, const void *arg); +int ucs_config_sprintf_array(char *buf, size_t max, const void *src, const void *arg); +ucs_status_t ucs_config_clone_array(const void *src, void *dest, const void *arg); +void ucs_config_release_array(void *ptr, const void *arg); +void ucs_config_help_array(char *buf, size_t max, const void *arg); + +int ucs_config_sscanf_table(const char *buf, void *dest, const void *arg); +ucs_status_t ucs_config_clone_table(const void *src, void *dest, const void *arg); +void ucs_config_release_table(void *ptr, const void *arg); +void ucs_config_help_table(char *buf, size_t max, const void *arg); + +void ucs_config_release_nop(void *ptr, const void *arg); +void ucs_config_help_generic(char *buf, size_t max, const void *arg); + +#define UCS_CONFIG_DEPRECATED_FIELD_OFFSET SIZE_MAX + +/* Forward declaration of array. Should be in header file. */ +#define UCS_CONFIG_DECLARE_ARRAY(_name) \ + extern ucs_config_array_t ucs_config_array_##_name; + +/* Definition of array of specific type. Should be in source file. */ +#define UCS_CONFIG_DEFINE_ARRAY(_name, _elem_size, ...) \ + ucs_config_array_t ucs_config_array_##_name = {_elem_size, __VA_ARGS__}; + +#define UCS_CONFIG_TYPE_STRING {ucs_config_sscanf_string, ucs_config_sprintf_string, \ + ucs_config_clone_string, ucs_config_release_string, \ + ucs_config_help_generic, "string"} + +#define UCS_CONFIG_TYPE_INT {ucs_config_sscanf_int, ucs_config_sprintf_int, \ + ucs_config_clone_int, ucs_config_release_nop, \ + ucs_config_help_generic, "integer"} + +#define UCS_CONFIG_TYPE_UINT {ucs_config_sscanf_uint, ucs_config_sprintf_uint, \ + ucs_config_clone_uint, ucs_config_release_nop, \ + ucs_config_help_generic, "unsigned integer"} + +#define UCS_CONFIG_TYPE_ULONG {ucs_config_sscanf_ulong, ucs_config_sprintf_ulong, \ + ucs_config_clone_ulong, ucs_config_release_nop, \ + ucs_config_help_generic, "unsigned long"} + +#define UCS_CONFIG_TYPE_ULUNITS {ucs_config_sscanf_ulunits, ucs_config_sprintf_ulunits, \ + ucs_config_clone_ulong, ucs_config_release_nop, \ + ucs_config_help_generic, \ + "unsigned long: , \"inf\", or \"auto\""} + +#define UCS_CONFIG_TYPE_DOUBLE {ucs_config_sscanf_double, ucs_config_sprintf_double, \ + ucs_config_clone_double, ucs_config_release_nop, \ + ucs_config_help_generic, "floating point number"} + +#define UCS_CONFIG_TYPE_HEX {ucs_config_sscanf_hex, ucs_config_sprintf_hex, \ + ucs_config_clone_uint, ucs_config_release_nop, \ + ucs_config_help_generic, \ + "hex representation of a number or \"auto\""} + +#define UCS_CONFIG_TYPE_BOOL {ucs_config_sscanf_bool, ucs_config_sprintf_bool, \ + ucs_config_clone_int, ucs_config_release_nop, \ + ucs_config_help_generic, ""} + +#define UCS_CONFIG_TYPE_TERNARY {ucs_config_sscanf_ternary, ucs_config_sprintf_ternary, \ + ucs_config_clone_int, ucs_config_release_nop, \ + ucs_config_help_generic, ""} + +#define UCS_CONFIG_TYPE_ON_OFF {ucs_config_sscanf_on_off, ucs_config_sprintf_on_off_auto, \ + ucs_config_clone_int, ucs_config_release_nop, \ + ucs_config_help_generic, ""} + +#define UCS_CONFIG_TYPE_ON_OFF_AUTO {ucs_config_sscanf_on_off_auto, ucs_config_sprintf_on_off_auto, \ + ucs_config_clone_int, ucs_config_release_nop, \ + ucs_config_help_generic, ""} + +#define UCS_CONFIG_TYPE_ENUM(t) {ucs_config_sscanf_enum, ucs_config_sprintf_enum, \ + ucs_config_clone_uint, ucs_config_release_nop, \ + ucs_config_help_enum, t} + +#define UCS_CONFIG_TYPE_BITMAP(t) {ucs_config_sscanf_bitmap, ucs_config_sprintf_bitmap, \ + ucs_config_clone_uint, ucs_config_release_nop, \ + ucs_config_help_bitmap, t} + +#define UCS_CONFIG_TYPE_BITMASK {ucs_config_sscanf_bitmask, ucs_config_sprintf_bitmask, \ + ucs_config_clone_uint, ucs_config_release_nop, \ + ucs_config_help_generic, "bit count"} + +#define UCS_CONFIG_TYPE_TIME {ucs_config_sscanf_time, ucs_config_sprintf_time, \ + ucs_config_clone_double, ucs_config_release_nop, \ + ucs_config_help_generic, "time value: [s|us|ms|ns]"} + +#define UCS_CONFIG_TYPE_BW {ucs_config_sscanf_bw, ucs_config_sprintf_bw, \ + ucs_config_clone_double, ucs_config_release_nop, \ + ucs_config_help_generic, \ + "bandwidth value: [T|G|M|K]B|b[[p|/]s] or \"auto\""} + +#define UCS_CONFIG_TYPE_BW_SPEC {ucs_config_sscanf_bw_spec, ucs_config_sprintf_bw_spec, \ + ucs_config_clone_bw_spec, ucs_config_release_bw_spec, \ + ucs_config_help_generic, \ + "device_name:[T|G|M|K]B|b[[p|/]s] or device_name:auto"} + +#define UCS_CONFIG_TYPE_SIGNO {ucs_config_sscanf_signo, ucs_config_sprintf_signo, \ + ucs_config_clone_int, ucs_config_release_nop, \ + ucs_config_help_generic, "system signal (number or SIGxxx)"} + +#define UCS_CONFIG_TYPE_MEMUNITS {ucs_config_sscanf_memunits, ucs_config_sprintf_memunits, \ + ucs_config_clone_ulong, ucs_config_release_nop, \ + ucs_config_help_generic, \ + "memory units: [b|kb|mb|gb], \"inf\", or \"auto\""} + +#define UCS_CONFIG_TYPE_ARRAY(a) {ucs_config_sscanf_array, ucs_config_sprintf_array, \ + ucs_config_clone_array, ucs_config_release_array, \ + ucs_config_help_array, &ucs_config_array_##a} + +#define UCS_CONFIG_TYPE_TABLE(t) {ucs_config_sscanf_table, NULL, \ + ucs_config_clone_table, ucs_config_release_table, \ + ucs_config_help_table, t} + +#define UCS_CONFIG_TYPE_RANGE_SPEC {ucs_config_sscanf_range_spec,ucs_config_sprintf_range_spec, \ + ucs_config_clone_range_spec, ucs_config_release_nop, \ + ucs_config_help_generic, "numbers range: -"} + +#define UCS_CONFIG_TYPE_DEPRECATED {(ucs_field_type(ucs_config_parser_t, read)) ucs_empty_function_do_assert, \ + (ucs_field_type(ucs_config_parser_t, write)) ucs_empty_function_do_assert, \ + (ucs_field_type(ucs_config_parser_t, clone)) ucs_empty_function_do_assert, \ + (ucs_field_type(ucs_config_parser_t, release))ucs_empty_function_do_assert, \ + (ucs_field_type(ucs_config_parser_t, help)) ucs_empty_function_do_assert, \ + ""} + +/* + * Helpers for using an array of strings. + */ +#define UCS_CONFIG_TYPE_STRING_ARRAY \ + UCS_CONFIG_TYPE_ARRAY(string) + +UCS_CONFIG_DECLARE_ARRAY(string) + +/** + * Set default values for options. + * + * @param opts User-defined options structure to fill. + * @param fields Array of fields which define how to parse. + */ +ucs_status_t +ucs_config_parser_set_default_values(void *opts, ucs_config_field_t *fields); + + +/** + * Fill existing opts structure. + * + * @param opts User-defined options structure to fill. + * @param fields Array of fields which define how to parse. + * @param env_prefix Prefix to add to all environment variables. + * @param table_prefix Optional prefix to add to the variables of top-level table. + * @param ignore_errors Whether to ignore parsing errors and continue parsing + * other fields. + */ +ucs_status_t ucs_config_parser_fill_opts(void *opts, ucs_config_field_t *fields, + const char *env_prefix, + const char *table_prefix, + int ignore_errors); + +/** + * Perform deep copy of the options structure. + * + * @param src User-defined options structure to copy from. + * @param dst User-defined options structure to copy to. + * @param table Array of fields which define the structure of the options. + */ +ucs_status_t ucs_config_parser_clone_opts(const void *src, void *dst, + ucs_config_field_t *fields); + +/** + * Release the options fields. + * NOTE: Does not release the structure itself. + * + * @param opts User-defined options structure. + * @param table Array of fields which define the options. + */ +void ucs_config_parser_release_opts(void *opts, ucs_config_field_t *fields); + +/** + * Print the options - names, values, documentation. + * + * @param stream Output stream to print to. + * @param opts User-defined options structure. + * @param fields Array of fields which define the options. + * @param table_prefix Optional prefix to add to the variables of top-level table. + * @param flags Flags which control the output. + */ +void ucs_config_parser_print_opts(FILE *stream, const char *title, const void *opts, + ucs_config_field_t *fields, const char *table_prefix, + ucs_config_print_flags_t flags); + +/** + * Print all options defined in the library - names, values, documentation. + * + * @param stream Output stream to print to. + * @param flags Flags which control the output. + */ +void ucs_config_parser_print_all_opts(FILE *stream, ucs_config_print_flags_t flags); + +/** + * Read a value from options structure. + * + * @param opts User-defined options structure. + * @param fields Array of fields which define how to parse. + * @param name Option name including subtable prefixes. + * @param value Filled with option value (as a string). + * @param max Number of bytes reserved in 'value'. + */ +ucs_status_t ucs_config_parser_get_value(void *opts, ucs_config_field_t *fields, + const char *name, char *value, size_t max); + +/** + * Modify existing opts structure with new setting. + * + * @param opts User-defined options structure. + * @param fields Array of fields which define how to parse. + * @param name Option name to modify. + * @param value Value to assign. + */ +ucs_status_t ucs_config_parser_set_value(void *opts, ucs_config_field_t *fields, + const char *name, const char *value); + +/** + * Check all UCX_ environment variables have been used so far by the + * configuration parser, issue a warning if not. Called just before program exit. + */ +void ucs_config_parser_warn_unused_env_vars(); + +/** + * Wrapper for `ucs_config_parser_warn_unused_env_vars` + * that ensures that this is called once + */ +void ucs_config_parser_warn_unused_env_vars_once(); + +/** + * Translate configuration value of "MEMUNITS" type to actual value. + * + * @param config_size Size specified by configuration. + * @param auto_size Default size when configured to 'auto'. + * @param max_size Maximal size to trim "inf". + */ +size_t ucs_config_memunits_get(size_t config_size, size_t auto_size, + size_t max_size); + +/** + * Look for a string in config names array. + * + * @param config_names lookup array of counters patterns. + * @param str string to search. + */ +int ucs_config_names_search(ucs_config_names_array_t config_names, + const char *str); + +END_C_DECLS + +#endif diff --git a/src/ucs/config/types.h b/src/ucs/config/types.h new file mode 100644 index 0000000..3c77ba2 --- /dev/null +++ b/src/ucs/config/types.h @@ -0,0 +1,121 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_CONFIG_TYPES_H +#define UCS_CONFIG_TYPES_H + +#include +#include + +/** + * Logging levels. + */ +typedef enum { + UCS_LOG_LEVEL_FATAL, /* Immediate termination */ + UCS_LOG_LEVEL_ERROR, /* Error is returned to the user */ + UCS_LOG_LEVEL_WARN, /* Something's wrong, but we continue */ + UCS_LOG_LEVEL_INFO, /* Information */ + UCS_LOG_LEVEL_DEBUG, /* Low-volume debugging */ + UCS_LOG_LEVEL_TRACE, /* High-volume debugging */ + UCS_LOG_LEVEL_TRACE_REQ, /* Every send/receive request */ + UCS_LOG_LEVEL_TRACE_DATA, /* Data sent/received on the transport */ + UCS_LOG_LEVEL_TRACE_ASYNC, /* Asynchronous progress engine */ + UCS_LOG_LEVEL_TRACE_FUNC, /* Function calls */ + UCS_LOG_LEVEL_TRACE_POLL, /* Polling functions */ + UCS_LOG_LEVEL_LAST, + UCS_LOG_LEVEL_PRINT /* Temporary output */ +} ucs_log_level_t; + + +/** + * Async progress mode. + */ +typedef enum { + UCS_ASYNC_MODE_SIGNAL, + UCS_ASYNC_MODE_THREAD, /* Deprecated, keep for backward compatibility */ + UCS_ASYNC_MODE_THREAD_SPINLOCK = UCS_ASYNC_MODE_THREAD, + UCS_ASYNC_MODE_THREAD_MUTEX, + UCS_ASYNC_MODE_POLL, /* TODO keep only in debug version */ + UCS_ASYNC_MODE_LAST +} ucs_async_mode_t; + + +extern const char *ucs_async_mode_names[]; + + +/** + * Ternary logic value. + */ +typedef enum ucs_ternary_value { + UCS_NO = 0, + UCS_YES = 1, + UCS_TRY = 2, + UCS_TERNARY_LAST +} ucs_ternary_value_t; + + +/** + * On/Off/Auto logic value. + */ +typedef enum ucs_on_off_auto_value { + UCS_CONFIG_OFF = 0, + UCS_CONFIG_ON = 1, + UCS_CONFIG_AUTO = 2, + UCS_CONFIG_ON_OFF_LAST +} ucs_on_off_auto_value_t; + + +/** + * Error handling modes + */ +typedef enum { + UCS_HANDLE_ERROR_BACKTRACE, /* Print backtrace */ + UCS_HANDLE_ERROR_FREEZE, /* Freeze and wait for a debugger */ + UCS_HANDLE_ERROR_DEBUG, /* Attach debugger */ + UCS_HANDLE_ERROR_LAST +} ucs_handle_error_t; + + +/** + * Configuration printing flags + */ +typedef enum { + UCS_CONFIG_PRINT_CONFIG = UCS_BIT(0), + UCS_CONFIG_PRINT_HEADER = UCS_BIT(1), + UCS_CONFIG_PRINT_DOC = UCS_BIT(2), + UCS_CONFIG_PRINT_HIDDEN = UCS_BIT(3) +} ucs_config_print_flags_t; + + +/** + * Structure type for array configuration. Should be used inside the configuration + * structure declaration. + */ +#define UCS_CONFIG_ARRAY_FIELD(_type, _array_name) \ + struct { \ + _type *_array_name; \ + unsigned count; \ + unsigned pad; \ + } + + +/* Specific structure for an array of strings */ +#define UCS_CONFIG_STRING_ARRAY_FIELD(_array_name) \ + UCS_CONFIG_ARRAY_FIELD(char*, _array_name) + + +typedef UCS_CONFIG_STRING_ARRAY_FIELD(names) ucs_config_names_array_t; + +/** + * @ingroup UCS_RESOURCE + * BSD socket address specification. + */ +typedef struct ucs_sock_addr { + const struct sockaddr *addr; /**< Pointer to socket address */ + socklen_t addrlen; /**< Address length */ +} ucs_sock_addr_t; + +#endif /* TYPES_H_ */ diff --git a/src/ucs/config/ucm_opts.c b/src/ucs/config/ucm_opts.c new file mode 100644 index 0000000..0e320ec --- /dev/null +++ b/src/ucs/config/ucm_opts.c @@ -0,0 +1,92 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "parser.h" + +#include +#include +#include +#include + + +#define UCM_CONFIG_PREFIX "MEM_" + +static const char *ucm_mmap_hook_modes[] = { + [UCM_MMAP_HOOK_NONE] = "none", + [UCM_MMAP_HOOK_RELOC] = UCM_MMAP_HOOK_RELOC_STR, +#if UCM_BISTRO_HOOKS + [UCM_MMAP_HOOK_BISTRO] = UCM_MMAP_HOOK_BISTRO_STR, +#endif + [UCM_MMAP_HOOK_LAST] = NULL +}; + +static ucs_config_field_t ucm_global_config_table[] = { + {"LOG_LEVEL", "warn", + "Logging level for memory events", ucs_offsetof(ucm_global_config_t, log_level), + UCS_CONFIG_TYPE_ENUM(ucm_log_level_names)}, + + {"ALLOC_ALIGN", "16", + "Minimal alignment of allocated blocks", + ucs_offsetof(ucm_global_config_t, alloc_alignment), UCS_CONFIG_TYPE_MEMUNITS}, + + {"EVENTS", "yes", + "Enable memory events", + ucs_offsetof(ucm_global_config_t, enable_events), UCS_CONFIG_TYPE_BOOL}, + + {"MMAP_HOOK_MODE", UCM_DEFAULT_HOOK_MODE_STR, + "MMAP hook mode\n" + " none - don't set mmap hooks.\n" + " reloc - use ELF relocation table to set hooks.\n" +#if UCM_BISTRO_HOOKS + " bistro - use binary instrumentation to set hooks.\n" +#endif + ,ucs_offsetof(ucm_global_config_t, mmap_hook_mode), UCS_CONFIG_TYPE_ENUM(ucm_mmap_hook_modes)}, + + {"MALLOC_HOOKS", "yes", + "Enable using glibc malloc hooks", + ucs_offsetof(ucm_global_config_t, enable_malloc_hooks), + UCS_CONFIG_TYPE_BOOL}, + + {"MALLOC_RELOC", "yes", + "Enable installing malloc symbols in the relocation table.\n" + "This is unsafe and off by default, because sometimes glibc\n" + "calls malloc/free without going through the relocation table,\n" + "which would use the original implementation and not ours.", + ucs_offsetof(ucm_global_config_t, enable_malloc_reloc), UCS_CONFIG_TYPE_BOOL}, + + {"CUDA_RELOC", "yes", + "Enable installing CUDA symbols in the relocation table", + ucs_offsetof(ucm_global_config_t, enable_cuda_reloc), + UCS_CONFIG_TYPE_BOOL}, + + {"DYNAMIC_MMAP_THRESH", "yes", + "Enable dynamic mmap threshold: for every released block, the\n" + "mmap threshold is adjusted upward to the size of the size of\n" + "the block, and trim threshold is adjust to twice the size of\n" + "the dynamic mmap threshold.\n" + "Note: dynamic mmap threshold is disabled when running on valgrind.", + ucs_offsetof(ucm_global_config_t, enable_dynamic_mmap_thresh), + UCS_CONFIG_TYPE_BOOL}, + + {"DLOPEN_PROCESS_RPATH", "yes", + "Process RPATH section of caller module during dynamic libraries opening.", + ucs_offsetof(ucm_global_config_t, dlopen_process_rpath), + UCS_CONFIG_TYPE_BOOL}, + + {NULL} +}; + +UCS_CONFIG_REGISTER_TABLE(ucm_global_config_table, "UCM", UCM_CONFIG_PREFIX, + ucm_global_config_t) + +UCS_STATIC_INIT { + (void)ucs_config_parser_fill_opts(&ucm_global_opts, ucm_global_config_table, + NULL, UCM_CONFIG_PREFIX, 0); +} diff --git a/src/ucs/datastruct/arbiter.c b/src/ucs/datastruct/arbiter.c new file mode 100644 index 0000000..3263e31 --- /dev/null +++ b/src/ucs/datastruct/arbiter.c @@ -0,0 +1,395 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "arbiter.h" + +#include +#include + + +#define SENTINEL ((ucs_arbiter_elem_t*)0x1) + +void ucs_arbiter_init(ucs_arbiter_t *arbiter) +{ + arbiter->current = NULL; + UCS_ARBITER_GUARD_INIT(arbiter); +} + +void ucs_arbiter_group_init(ucs_arbiter_group_t *group) +{ + group->tail = NULL; +} + +void ucs_arbiter_cleanup(ucs_arbiter_t *arbiter) +{ + ucs_assert(arbiter->current == NULL); +} + +void ucs_arbiter_group_cleanup(ucs_arbiter_group_t *group) +{ + ucs_assert(group->tail == NULL); +} + +void ucs_arbiter_group_push_elem_always(ucs_arbiter_group_t *group, + ucs_arbiter_elem_t *elem) +{ + ucs_arbiter_elem_t *tail = group->tail; + + if (tail == NULL) { + elem->list.next = NULL; /* Not scheduled yet */ + elem->next = elem; /* Connect to itself */ + } else { + elem->next = tail->next; /* Point to first element */ + tail->next = elem; /* Point previous element to new one */ + } + + elem->group = group; /* Always point to group */ + group->tail = elem; /* Update group tail */ +} + +static int ucs_arbiter_group_is_scheduled(ucs_arbiter_elem_t *head) +{ + return head->list.next != NULL; +} + +void ucs_arbiter_group_push_head_elem_always(ucs_arbiter_t *arbiter, + ucs_arbiter_group_t *group, + ucs_arbiter_elem_t *elem) +{ + ucs_arbiter_elem_t *tail = group->tail; + ucs_arbiter_elem_t *head; + + elem->group = group; /* Always point to group */ + elem->list.next = NULL; /* Not scheduled yet */ + + if (tail == NULL) { + elem->next = elem; /* Connect to itself */ + group->tail = elem; /* Update group tail */ + return; + } + + head = tail->next; + elem->next = head; /* Point to first element */ + tail->next = elem; /* Point previous element to new one */ + + if (!ucs_arbiter_group_is_scheduled(head)) { + return; + } + + ucs_assert(arbiter != NULL); + + if (head->list.next == &head->list) { + /* single group which was scheduled */ + ucs_assert(arbiter->current == head); + ucs_list_head_init(&elem->list); + arbiter->current = elem; + } else { + ucs_list_insert_replace(head->list.prev, head->list.next, &elem->list); + if (arbiter->current == head) { + arbiter->current = elem; + } + } +} + +void ucs_arbiter_group_head_desched(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *head) +{ + ucs_arbiter_elem_t *next; + + if (!ucs_arbiter_group_is_scheduled(head)) { + return; /* Not scheduled */ + } + + /* If this group is the next to be scheduled, skip it */ + if (arbiter->current == head) { + next = ucs_list_next(&head->list, ucs_arbiter_elem_t, list); + arbiter->current = (next == head) ? NULL : next; + } + + ucs_list_del(&head->list); +} + +void ucs_arbiter_group_purge(ucs_arbiter_t *arbiter, + ucs_arbiter_group_t *group, + ucs_arbiter_callback_t cb, void *cb_arg) +{ + ucs_arbiter_elem_t *tail = group->tail; + ucs_arbiter_elem_t *next_group = NULL; + ucs_arbiter_elem_t *prev_group = NULL; + ucs_arbiter_elem_t *ptr, *next, *prev; + ucs_arbiter_elem_t *head, *orig_head; + ucs_arbiter_cb_result_t result; + int is_scheduled; + + if (tail == NULL) { + return; /* Empty group */ + } + + orig_head = head = tail->next; + is_scheduled = ucs_arbiter_group_is_scheduled(head); + next = head; + prev = tail; + + if (is_scheduled) { + prev_group = ucs_list_prev(&head->list, ucs_arbiter_elem_t, list); + next_group = ucs_list_next(&head->list, ucs_arbiter_elem_t, list); + } + + do { + ptr = next; + next = ptr->next; + /* Can't touch the element if it gets removed. But it can be reused + * later as well, so it's next should be NULL. */ + ptr->next = NULL; + result = cb(arbiter, ptr, cb_arg); + + if (result == UCS_ARBITER_CB_RESULT_REMOVE_ELEM) { + if (ptr == head) { + head = next; + if (ptr == tail) { + /* Last element is being removed - mark group as empty */ + group->tail = NULL; + /* Break here to keep ptr->next = NULL, otherwise ptr->next + will be set to itself below */ + break; + } + } else if (ptr == tail) { + group->tail = prev; + /* tail->next should point to head, make sure next is head + * (it is assigned 2 lines below) */ + ucs_assert_always(next == head); + } + prev->next = next; + } else { + /* keep the element */ + ptr->next = next; /* Restore next pointer */ + prev = ptr; + } + } while (ptr != tail); + + if (is_scheduled) { + if (orig_head == prev_group) { + /* this is the only group which was scheduled */ + if (group->tail == NULL) { + /* group became empty - no more groups scheduled */ + arbiter->current = NULL; + } else if (orig_head != head) { + /* keep the group scheduled, but with new head element */ + arbiter->current = head; + ucs_list_head_init(&head->list); + } + } else { + if (group->tail == NULL) { + /* group became empty - deschedule it */ + prev_group->list.next = &next_group->list; + next_group->list.prev = &prev_group->list; + if (arbiter->current == orig_head) { + arbiter->current = next_group; + } + } else if (orig_head != head) { + /* keep the group scheduled, but with new head element */ + ucs_list_insert_replace(&prev_group->list, &next_group->list, + &head->list); + if (arbiter->current == orig_head) { + arbiter->current = head; + } + } + } + } else if ((orig_head != head) && (group->tail != NULL)) { + /* Mark new head as unscheduled */ + head->list.next = NULL; + } +} + +void ucs_arbiter_group_schedule_nonempty(ucs_arbiter_t *arbiter, + ucs_arbiter_group_t *group) +{ + ucs_arbiter_elem_t *tail = group->tail; + ucs_arbiter_elem_t *current, *head; + + UCS_ARBITER_GUARD_CHECK(arbiter); + + ucs_assert(tail != NULL); + head = tail->next; + + if (head == NULL) { + /* it means that 1 element group is + * scheduled during dispatch. + * Restore next pointer. + */ + head = tail->next = tail; + } + + if (ucs_arbiter_group_is_scheduled(head)) { + return; /* Already scheduled */ + } + + current = arbiter->current; + if (current == NULL) { + ucs_list_head_init(&head->list); + arbiter->current = head; + } else { + ucs_list_insert_before(¤t->list, &head->list); + } +} + +void ucs_arbiter_dispatch_nonempty(ucs_arbiter_t *arbiter, unsigned per_group, + ucs_arbiter_callback_t cb, void *cb_arg) +{ + ucs_arbiter_elem_t *group_head, *last_elem, *elem, *next_elem; + ucs_list_link_t *elem_list_next; + ucs_arbiter_elem_t *next_group, *prev_group; + ucs_arbiter_group_t *group; + ucs_arbiter_cb_result_t result; + unsigned group_dispatch_count; + int is_single_group; + UCS_LIST_HEAD(resched_groups); + + next_group = arbiter->current; + ucs_assert(next_group != NULL); + + do { + group_head = next_group; + ucs_assert(group_head != NULL); + prev_group = ucs_list_prev(&group_head->list, ucs_arbiter_elem_t, list); + next_group = ucs_list_next(&group_head->list, ucs_arbiter_elem_t, list); + ucs_assert(prev_group != NULL); + ucs_assert(next_group != NULL); + ucs_assert(prev_group->list.next == &group_head->list); + ucs_assert(next_group->list.prev == &group_head->list); + + group_dispatch_count = 0; + group = group_head->group; + last_elem = group->tail; + next_elem = group_head; + is_single_group = group_head == prev_group; + + do { + elem = next_elem; + next_elem = elem->next; + /* zero pointer to next elem here because: + * - user callback may free() the element + * - push_elem() will fail if next is not NULL + * and elem is reused later. For example in + * rc/ud transports control. + */ + elem->next = NULL; + elem_list_next = elem->list.next; + elem->list.next = NULL; + + ucs_assert(elem->group == group); + ucs_trace_poll("dispatching arbiter element %p", elem); + UCS_ARBITER_GUARD_ENTER(arbiter); + result = cb(arbiter, elem, cb_arg); + UCS_ARBITER_GUARD_EXIT(arbiter); + ucs_trace_poll("dispatch result %d", result); + ++group_dispatch_count; + + if (result == UCS_ARBITER_CB_RESULT_REMOVE_ELEM) { + if (elem == last_elem) { + /* Only element */ + group->tail = NULL; /* Group is empty now */ + if (is_single_group) { + next_group = NULL; /* No more groups */ + } else { + /* Remove the group */ + prev_group->list.next = &next_group->list; + next_group->list.prev = &prev_group->list; + } + } else { + /* Not only element */ + ucs_assert(elem == last_elem->next); /* first element should be removed */ + if (is_single_group) { + next_group = next_elem; /* No more groups, point arbiter + to next element in this group */ + ucs_list_head_init(&next_elem->list); + } else { + ucs_list_insert_replace(&prev_group->list, + &next_group->list, + &next_elem->list); + } + last_elem->next = next_elem; /* Tail points to new head */ + } + } else if (result == UCS_ARBITER_CB_RESULT_NEXT_GROUP) { + elem->next = next_elem; + /* avoid infinite loop */ + elem->list.next = elem_list_next; + break; + } else if ((result == UCS_ARBITER_CB_RESULT_DESCHED_GROUP) || + (result == UCS_ARBITER_CB_RESULT_RESCHED_GROUP)) { + elem->next = next_elem; + if (is_single_group) { + next_group = NULL; /* No more groups */ + } else { + prev_group->list.next = &next_group->list; + next_group->list.prev = &prev_group->list; + } + if (result == UCS_ARBITER_CB_RESULT_RESCHED_GROUP) { + ucs_list_add_tail(&resched_groups, &elem->list); + } + break; + } else if (result == UCS_ARBITER_CB_RESULT_STOP) { + elem->next = next_elem; + elem->list.next = elem_list_next; + /* make sure that next dispatch() will continue + * from the current group */ + arbiter->current = elem; + goto out; + } else { + elem->next = next_elem; + elem->list.next = elem_list_next; + ucs_bug("unexpected return value from arbiter callback"); + } + } while ((elem != last_elem) && (group_dispatch_count < per_group)); + } while (next_group != NULL); + arbiter->current = NULL; +out: + ucs_list_for_each_safe(elem, next_elem, &resched_groups, list) { + ucs_list_del(&elem->list); + elem->list.next = NULL; + ucs_trace_poll("reschedule group %p", elem->group); + ucs_arbiter_group_schedule_nonempty(arbiter, elem->group); + } +} + +void ucs_arbiter_dump(ucs_arbiter_t *arbiter, FILE *stream) +{ + ucs_arbiter_elem_t *first_group, *group_head, *elem; + + fprintf(stream, "-------\n"); + if (arbiter->current == NULL) { + fprintf(stream, "(empty)\n"); + goto out; + } + + first_group = arbiter->current; + group_head = first_group; + do { + elem = group_head; + if (group_head == first_group) { + fprintf(stream, "=> "); + } else { + fprintf(stream, " * "); + } + do { + fprintf(stream, "[%p", elem); + if (elem == group_head) { + fprintf(stream, " prev_g:%p", elem->list.prev); + fprintf(stream, " next_g:%p", elem->list.next); + } + fprintf(stream, " next_e:%p grp:%p]", elem->next, elem->group); + if (elem->next != group_head) { + fprintf(stream, "->"); + } + elem = elem->next; + } while (elem != group_head); + fprintf(stream, "\n"); + group_head = ucs_list_next(&group_head->list, ucs_arbiter_elem_t, list); + } while (group_head != first_group); + +out: + fprintf(stream, "-------\n"); +} diff --git a/src/ucs/datastruct/arbiter.h b/src/ucs/datastruct/arbiter.h new file mode 100644 index 0000000..daf8f8c --- /dev/null +++ b/src/ucs/datastruct/arbiter.h @@ -0,0 +1,388 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_ARBITER_H_ +#define UCS_ARBITER_H_ + +#include +#include +#include +#include +#include + +/* + * A mechanism to arbitrate among groups of queued work elements, which attempts + * to be "fair" with respect to the groups. + * + * - "Arbiter" - the top-level entity. + * - "Element" - a single work element. + * - "Group" - queue of work elements which would be dispatched in-order + * + * The groups and elements are arranged like this: + * - every arbitrated element points to the group (head). + * - first element in the group points to previous and next group (list) + * - first element in the group points to the first element of next group (next_group). + * - all except last element point to the next element in same group, and the + * last one points to the first (next). + * + * Note: + * Every elements holds 4 pointers. It could be done with 3 pointers, so that + * the pointer to the previous group is put instead of "next" pointer in the last + * element in the group, when it is put on the arbiter queue. However in makes + * the code much more complicated. + * + * + * Arbiter: + * +=========+ + * | current +-----------------------+ + * +=========+ | + * | + * Elements: | + * | + * +---------------------------------]----------------------------------+ + * | V | + * | +------------+ +------------+ +------------+<--+ + * +-->| list |<-------->| list |<-------->| list | + * +------------+ +------------+ +------------+<--+ + * +->| next +---+ +->| next +---+ + next +---+ + * | +------------+ | | +------------+ | +------------+ + * | | group | | | | group | | | group | + * | +------------+ | | +------------+ | +--------+---+ + * | | | | ^ | + * | | | | | | + * | +------------+ | | +------------+ | | | + * | | list |<--+ | | list |<--+ | | + * | +------------+ | +------------+ | | + * +--+ next + +--+ next | | | + * +------------+ +------------+ | | + * | group | | group | | | + * +---------+--+ +--------+---+ | | + * ^ | ^ | | | + * Groups: | | | | | | + * | | | | | | + * +------+ | +------+ | +------+ | + * | tail |<---+ | tail |<---+ | tail |<---+ + * +------+ +------+ +------+ + * + */ + +typedef struct ucs_arbiter ucs_arbiter_t; +typedef struct ucs_arbiter_group ucs_arbiter_group_t; +typedef struct ucs_arbiter_elem ucs_arbiter_elem_t; + + +/** + * Arbitration callback result codes. + */ +typedef enum { + UCS_ARBITER_CB_RESULT_REMOVE_ELEM, /* Remove the current element, move to + the next element. */ + UCS_ARBITER_CB_RESULT_NEXT_GROUP, /* Keep current element and move to next + group. Group IS NOT descheduled */ + UCS_ARBITER_CB_RESULT_DESCHED_GROUP,/* Keep current element but remove the + current group and move to next group. */ + UCS_ARBITER_CB_RESULT_RESCHED_GROUP,/* Keep current element, do not process + the group anymore during current + dispatch cycle. After dispatch() + is finished group automatically + scheduled */ + UCS_ARBITER_CB_RESULT_STOP /* Stop dispatching work altogether. Next dispatch() + will start from the group that returned STOP */ +} ucs_arbiter_cb_result_t; + +#if UCS_ENABLE_ASSERT +#define UCS_ARBITER_GUARD int guard; +#define UCS_ARBITER_GUARD_INIT(_arbiter) (_arbiter)->guard = 0 +#define UCS_ARBITER_GUARD_ENTER(_arbiter) (_arbiter)->guard++ +#define UCS_ARBITER_GUARD_EXIT(_arbiter) (_arbiter)->guard-- +#define UCS_ARBITER_GUARD_CHECK(_arbiter) \ + ucs_assertv((_arbiter)->guard == 0, \ + "scheduling group from the arbiter callback") +#else +#define UCS_ARBITER_GUARD +#define UCS_ARBITER_GUARD_INIT(_arbiter) +#define UCS_ARBITER_GUARD_ENTER(_arbiter) +#define UCS_ARBITER_GUARD_EXIT(_arbiter) +#define UCS_ARBITER_GUARD_CHECK(_arbiter) +#endif + + +/** + * Arbiter callback function. + * + * @param [in] arbiter The arbiter. + * @param [in] elem Current work element. + * @param [in] arg User-defined argument. + * + * @return According to @ref ucs_arbiter_cb_result_t. + */ +typedef ucs_arbiter_cb_result_t (*ucs_arbiter_callback_t)(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg); + + +/** + * Top-level arbiter. + */ +struct ucs_arbiter { + ucs_arbiter_elem_t *current; + UCS_ARBITER_GUARD +}; + + +/** + * Arbitration group. + */ +struct ucs_arbiter_group { + ucs_arbiter_elem_t *tail; +}; + + +/** + * Arbitrated work element. + * In order to keep it small, one of the fields is a union. + */ +struct ucs_arbiter_elem { + ucs_list_link_t list; /* List link in the scheduler queue */ + ucs_arbiter_elem_t *next; /* Next element, last points to head */ + ucs_arbiter_group_t *group; /* Always points to the group */ +}; + + +/** + * Initialize the arbiter object. + * + * @param [in] arbiter Arbiter object to initialize. + */ +void ucs_arbiter_init(ucs_arbiter_t *arbiter); +void ucs_arbiter_cleanup(ucs_arbiter_t *arbiter); + + +/** + * Initialize a group object. + * + * @param [in] group Group to initialize. + */ +void ucs_arbiter_group_init(ucs_arbiter_group_t *group); +void ucs_arbiter_group_cleanup(ucs_arbiter_group_t *group); + + +/** + * Initialize an element object. + * + * @param [in] elem Element to initialize. + */ +static inline void ucs_arbiter_elem_init(ucs_arbiter_elem_t *elem) +{ + elem->next = NULL; +} + + +/** + * Add a new work element to a group - internal function + */ +void ucs_arbiter_group_push_elem_always(ucs_arbiter_group_t *group, + ucs_arbiter_elem_t *elem); + + +/** + * Add a new work element to the head of a group - internal function + */ +void ucs_arbiter_group_push_head_elem_always(ucs_arbiter_t *arbiter, + ucs_arbiter_group_t *group, + ucs_arbiter_elem_t *elem); + + +/** + * Call the callback for each element from a group. If the callback returns + * UCS_ARBITER_CB_RESULT_REMOVE_ELEM, remove it from the group. + * + * @param [in] arbiter Arbiter object to remove the group from. + * @param [in] group Group to clean up. + * @param [in] cb Callback to be called for each element. + * @param [in] cb_arg Last argument for the callback. + */ +void ucs_arbiter_group_purge(ucs_arbiter_t *arbiter, ucs_arbiter_group_t *group, + ucs_arbiter_callback_t cb, void *cb_arg); + + +void ucs_arbiter_dump(ucs_arbiter_t *arbiter, FILE *stream); + + +/* Internal function */ +void ucs_arbiter_group_schedule_nonempty(ucs_arbiter_t *arbiter, + ucs_arbiter_group_t *group); + + +/* Internal function */ +void ucs_arbiter_dispatch_nonempty(ucs_arbiter_t *arbiter, unsigned per_group, + ucs_arbiter_callback_t cb, void *cb_arg); + + +/* Internal function */ +void ucs_arbiter_group_head_desched(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *head); + + +/** + * Return true if arbiter has no groups scheduled + * + * @param [in] arbiter Arbiter object to dispatch work on. + */ +static inline int ucs_arbiter_is_empty(ucs_arbiter_t *arbiter) +{ + return arbiter->current == NULL; +} + + +/** + * @return whether the group does not have any queued elements. + */ +static inline int ucs_arbiter_group_is_empty(ucs_arbiter_group_t *group) +{ + return group->tail == NULL; +} + + +/** + * Schedule a group for arbitration. If the group is already there, the operation + * will have no effect. + * + * @param [in] arbiter Arbiter object to schedule the group on. + * @param [in] group Group to schedule. + */ +static inline void ucs_arbiter_group_schedule(ucs_arbiter_t *arbiter, + ucs_arbiter_group_t *group) +{ + if (ucs_unlikely(!ucs_arbiter_group_is_empty(group))) { + ucs_arbiter_group_schedule_nonempty(arbiter, group); + } +} + + +/** + * Deschedule already scheduled group. If the group is not scheduled, the operation + * will have no effect + * + * @param [in] arbiter Arbiter object that group on. + * @param [in] group Group to deschedule. + */ + +static inline void ucs_arbiter_group_desched(ucs_arbiter_t *arbiter, + ucs_arbiter_group_t *group) +{ + if (ucs_unlikely(!ucs_arbiter_group_is_empty(group))) { + ucs_arbiter_elem_t *head; + + head = group->tail->next; + ucs_arbiter_group_head_desched(arbiter, head); + head->list.next = NULL; + } +} + + +/** + * @return Whether the element is queued in an arbiter group. + * (an element can't be queued more than once) + * + */ +static inline int ucs_arbiter_elem_is_scheduled(ucs_arbiter_elem_t *elem) +{ + return elem->next != NULL; +} + + +/** + * Add a new work element to a group if it is not already there + * + * @param [in] group Group to add the element to. + * @param [in] elem Work element to add. + */ +static inline void +ucs_arbiter_group_push_elem(ucs_arbiter_group_t *group, + ucs_arbiter_elem_t *elem) +{ + if (ucs_arbiter_elem_is_scheduled(elem)) { + return; + } + + ucs_arbiter_group_push_elem_always(group, elem); +} + + +/** + * Add a new work element to the head of a group if it is not already there + * + * @param [in] arbiter Arbiter object the group is on (since we modify the head + * element of a potentially scheduled group). If the group + * is not scheduled, arbiter may be NULL. + * @param [in] group Group to add the element to. + * @param [in] elem Work element to add. + */ +static inline void +ucs_arbiter_group_push_head_elem(ucs_arbiter_t *arbiter, + ucs_arbiter_group_t *group, + ucs_arbiter_elem_t *elem) +{ + if (ucs_arbiter_elem_is_scheduled(elem)) { + return; + } + + ucs_arbiter_group_push_head_elem_always(arbiter, group, elem); +} + + +/** + * Dispatch work elements in the arbiter. For every group, up to per_group work + * elements are dispatched, as long as the callback returns REMOVE_ELEM or + * NEXT_GROUP. Then, the same is done for the next group, until either the + * arbiter becomes empty or the callback returns STOP. If a group is either out + * of elements, or its callback returns REMOVE_GROUP, it will be removed until + * ucs_arbiter_group_schedule() is used to put it back on the arbiter. + * + * @param [in] arbiter Arbiter object to dispatch work on. + * @param [in] per_group How many elements to dispatch from each group. + * @param [in] cb User-defined callback to be called for each element. + * @param [in] cb_arg Last argument for the callback. + */ +static inline void +ucs_arbiter_dispatch(ucs_arbiter_t *arbiter, unsigned per_group, + ucs_arbiter_callback_t cb, void *cb_arg) +{ + if (ucs_unlikely(!ucs_arbiter_is_empty(arbiter))) { + ucs_arbiter_dispatch_nonempty(arbiter, per_group, cb, cb_arg); + } +} + + +/** + * @return Group the element belongs to. + */ +static inline ucs_arbiter_group_t* ucs_arbiter_elem_group(ucs_arbiter_elem_t *elem) +{ + return elem->group; +} + + +/** + * @return true if element is the last one in the group + */ +static inline int +ucs_arbiter_elem_is_last(ucs_arbiter_group_t *group, ucs_arbiter_elem_t *elem) +{ + return group->tail == elem; +} + +/** + * @return true if element is the only one in the group + */ +static inline int +ucs_arbiter_elem_is_only(ucs_arbiter_group_t *group, ucs_arbiter_elem_t *elem) +{ + return ucs_arbiter_elem_is_last(group, elem) && (elem->next == elem); +} + +#endif diff --git a/src/ucs/datastruct/callbackq.c b/src/ucs/datastruct/callbackq.c new file mode 100644 index 0000000..3ecf185 --- /dev/null +++ b/src/ucs/datastruct/callbackq.c @@ -0,0 +1,586 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include "callbackq.h" + + +#define UCS_CALLBACKQ_IDX_FLAG_SLOW 0x80000000u +#define UCS_CALLBACKQ_IDX_MASK 0x7fffffffu +#define UCS_CALLBACKQ_FAST_MAX (UCS_CALLBACKQ_FAST_COUNT - 1) + + +typedef struct ucs_callbackq_priv { + ucs_spinlock_t lock; /**< Protects adding / removing */ + + ucs_callbackq_elem_t *slow_elems; /**< Array of slow-path elements */ + unsigned num_slow_elems; /**< Number of slow-path elements */ + unsigned max_slow_elems; /**< Maximal number of slow-path elements */ + int slow_proxy_id; /**< ID of slow-path proxy in fast-path array. + keep track while this moves around. */ + + uint64_t fast_remove_mask; /**< Mask of which fast-path elements + should be removed */ + unsigned num_fast_elems; /**< Number of fast-path elements */ + + /* Lookup table for callback IDs. This allows moving callbacks around in + * the arrays, while the user can always use a single ID to remove the + * callback in O(1). + */ + int free_idx_id; /**< Index of first free item in the list */ + int num_idxs; /**< Size of idxs array */ + unsigned *idxs; /**< ID-to-index lookup */ + +} ucs_callbackq_priv_t; + + +static unsigned ucs_callbackq_slow_proxy(void *arg); + +static inline ucs_callbackq_priv_t* ucs_callbackq_priv(ucs_callbackq_t *cbq) +{ + UCS_STATIC_ASSERT(sizeof(cbq->priv) == sizeof(ucs_callbackq_priv_t)); + return (void*)cbq->priv; +} + +static void ucs_callbackq_enter(ucs_callbackq_t *cbq) +{ + ucs_spin_lock(&ucs_callbackq_priv(cbq)->lock); +} + +static void ucs_callbackq_leave(ucs_callbackq_t *cbq) +{ + ucs_spin_unlock(&ucs_callbackq_priv(cbq)->lock); +} + +static void ucs_callbackq_elem_reset(ucs_callbackq_t *cbq, + ucs_callbackq_elem_t *elem) +{ + elem->cb = NULL; + elem->arg = cbq; + elem->id = UCS_CALLBACKQ_ID_NULL; + elem->flags = 0; +} + +static void *ucs_callbackq_array_grow(ucs_callbackq_t *cbq, void *ptr, + size_t elem_size, int count, + int *new_count, const char *alloc_name) +{ + void *new_ptr; + + if (count == 0) { + *new_count = ucs_get_page_size() / elem_size; + } else { + *new_count = count * 2; + } + + new_ptr = ucs_sys_realloc(ptr, elem_size * count, elem_size * *new_count); + if (new_ptr == NULL) { + ucs_fatal("cbq %p: could not allocate memory for %s", cbq, alloc_name); + } + return new_ptr; +} + +static void ucs_callbackq_array_free(void *ptr, size_t elem_size, int count) +{ + ucs_sys_free(ptr, elem_size * count); +} + +/* + * @param [in] id ID to release in the lookup array. + * @return index which this ID used to hold. + */ +int ucs_callbackq_put_id(ucs_callbackq_t *cbq, int id) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + unsigned idx_with_flag; + + ucs_trace_func("cbq=%p id=%d", cbq, id); + + ucs_assert(id != UCS_CALLBACKQ_ID_NULL); + + idx_with_flag = priv->idxs[id]; /* Retrieve the index */ + priv->idxs[id] = priv->free_idx_id; /* Add ID to free-list head */ + priv->free_idx_id = id; /* Update free-list head */ + + return idx_with_flag; +} + +int ucs_callbackq_put_id_noflag(ucs_callbackq_t *cbq, int id) +{ + return ucs_callbackq_put_id(cbq, id) & UCS_CALLBACKQ_IDX_MASK; +} + +/** + * @param [in] idx Index to save in the lookup array. + * @return unique ID which holds index 'idx'. + */ +int ucs_callbackq_get_id(ucs_callbackq_t *cbq, unsigned idx) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + int new_num_idxs; + int id; + + ucs_trace_func("cbq=%p idx=%u", cbq, idx); + + if (priv->free_idx_id == UCS_CALLBACKQ_ID_NULL) { + priv->idxs = ucs_callbackq_array_grow(cbq, priv->idxs, sizeof(*priv->idxs), + priv->num_idxs, &new_num_idxs, + "indexes"); + + /* Add new items to free-list */ + for (id = priv->num_idxs; id < new_num_idxs; ++id) { + priv->idxs[id] = priv->free_idx_id; + priv->free_idx_id = id; + } + + priv->num_idxs = new_num_idxs; + } + + id = priv->free_idx_id; /* Get free ID from the list */ + ucs_assert(id != UCS_CALLBACKQ_ID_NULL); + priv->free_idx_id = priv->idxs[id]; /* Update free-list head */ + priv->idxs[id] = idx; /* Install provided idx to array */ + return id; +} + +static unsigned ucs_callbackq_get_fast_idx(ucs_callbackq_t *cbq) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + unsigned idx; + + idx = priv->num_fast_elems++; + ucs_assert(idx < UCS_CALLBACKQ_FAST_COUNT); + return idx; +} + +static int ucs_callbackq_add_fast(ucs_callbackq_t *cbq, ucs_callback_t cb, + void *arg, unsigned flags) +{ + unsigned idx; + int id; + + ucs_trace_func("cbq=%p cb=%s arg=%p flags=%u", cbq, + ucs_debug_get_symbol_name(cb), arg, flags); + + ucs_assert(!(flags & UCS_CALLBACKQ_FLAG_ONESHOT)); + + idx = ucs_callbackq_get_fast_idx(cbq); + id = ucs_callbackq_get_id(cbq, idx); + cbq->fast_elems[idx].cb = cb; + cbq->fast_elems[idx].arg = arg; + cbq->fast_elems[idx].flags = flags; + cbq->fast_elems[idx].id = id; + return id; +} + +/* should be called from dispatch thread only */ +static void ucs_callbackq_remove_fast(ucs_callbackq_t *cbq, unsigned idx) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + ucs_callbackq_elem_t *dst_elem = &cbq->fast_elems[idx]; + unsigned last_idx; + int id; + + ucs_trace_func("cbq=%p idx=%u", cbq, idx); + + ucs_assert(priv->num_fast_elems > 0); + last_idx = --priv->num_fast_elems; + + /* replace removed with last */ + *dst_elem = cbq->fast_elems[last_idx]; + ucs_callbackq_elem_reset(cbq, &cbq->fast_elems[last_idx]); + + if (priv->fast_remove_mask & UCS_BIT(last_idx)) { + /* replaced by marked-for-removal element, still need to remove 'idx' */ + ucs_assert(priv->fast_remove_mask & UCS_BIT(idx)); + priv->fast_remove_mask &= ~UCS_BIT(last_idx); + } else { + /* replaced by a live element, remove from the mask and update 'idxs' */ + priv->fast_remove_mask &= ~UCS_BIT(idx); + if (last_idx != idx) { + id = dst_elem->id; + ucs_assert(id != UCS_CALLBACKQ_ID_NULL); + priv->idxs[id] = idx; + } + } +} + +/* should be called from dispatch thread only */ +static void ucs_callbackq_purge_fast(ucs_callbackq_t *cbq) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + unsigned idx; + + ucs_trace_func("cbq=%p map=0x%"PRIx64, cbq, priv->fast_remove_mask); + + /* Remove fast-path callbacks marked for removal */ + while (priv->fast_remove_mask) { + idx = ucs_ffs64(priv->fast_remove_mask); + ucs_callbackq_remove_fast(cbq, idx); + } +} + +static void ucs_callbackq_enable_proxy(ucs_callbackq_t *cbq) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + unsigned idx; + int id; + + ucs_trace_func("cbq=%p", cbq); + + if (priv->slow_proxy_id != UCS_CALLBACKQ_ID_NULL) { + return; + } + + ucs_assert((priv->num_slow_elems > 0) || priv->fast_remove_mask); + + idx = ucs_callbackq_get_fast_idx(cbq); + id = ucs_callbackq_get_id(cbq, idx); + + ucs_assert(cbq->fast_elems[idx].arg == cbq); + cbq->fast_elems[idx].cb = ucs_callbackq_slow_proxy; + cbq->fast_elems[idx].flags = 0; + cbq->fast_elems[idx].id = id; + /* Avoid writing 'arg' because the dispatching thread may not see it in case + * of weak memory ordering. Instead, 'arg' is reset to 'cbq' for all free and + * removed elements, from the main thread. + */ + + priv->slow_proxy_id = id; +} + +static void ucs_callbackq_disable_proxy(ucs_callbackq_t *cbq) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + unsigned idx; + + ucs_trace_func("cbq=%p slow_proxy_id=%d", cbq, priv->slow_proxy_id); + + if (priv->slow_proxy_id == UCS_CALLBACKQ_ID_NULL) { + return; + } + + idx = ucs_callbackq_put_id(cbq, priv->slow_proxy_id); + ucs_callbackq_remove_fast(cbq, idx); + priv->slow_proxy_id = UCS_CALLBACKQ_ID_NULL; +} + +static int ucs_callbackq_add_slow(ucs_callbackq_t *cbq, ucs_callback_t cb, + void *arg, unsigned flags) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + ucs_callbackq_elem_t *new_slow_elems; + int new_max_slow_elems; + unsigned idx; + int id; + + ucs_trace_func("cbq=%p cb=%s arg=%p flags=%u", cbq, + ucs_debug_get_symbol_name(cb), arg, flags); + + /* Grow slow-path array if needed */ + if (priv->num_slow_elems >= priv->max_slow_elems) { + new_slow_elems = ucs_callbackq_array_grow(cbq, priv->slow_elems, + sizeof(*priv->slow_elems), + priv->max_slow_elems, + &new_max_slow_elems, + "slow_elems"); + for (idx = priv->max_slow_elems; idx < new_max_slow_elems; ++idx) { + ucs_callbackq_elem_reset(cbq, &new_slow_elems[idx]); + } + + priv->max_slow_elems = new_max_slow_elems; + priv->slow_elems = new_slow_elems; + } + + /* Add slow-path element to the queue */ + idx = priv->num_slow_elems++; + id = ucs_callbackq_get_id(cbq, idx | UCS_CALLBACKQ_IDX_FLAG_SLOW); + priv->slow_elems[idx].cb = cb; + priv->slow_elems[idx].arg = arg; + priv->slow_elems[idx].flags = flags; + priv->slow_elems[idx].id = id; + + ucs_callbackq_enable_proxy(cbq); + return id; +} + +static void ucs_callbackq_remove_slow(ucs_callbackq_t *cbq, unsigned idx) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + + ucs_trace_func("cbq=%p idx=%u", cbq, idx); + + /* Mark for removal by ucs_callbackq_purge_slow() */ + ucs_callbackq_elem_reset(cbq, &priv->slow_elems[idx]); +} + +static void ucs_callbackq_purge_slow(ucs_callbackq_t *cbq) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + ucs_callbackq_elem_t *src_elem; + unsigned src_idx, dst_idx; + + ucs_trace_func("cbq=%p", cbq); + + /* + * Copy valid elements from src_idx to dst_idx, essentially rebuilding the + * array of elements in-place, keeping only the valid ones. + * As an optimization, if no elements are actually removed, then src_idx will + * always be equal to dst_idx, so nothing will be actually copied/moved. + */ + dst_idx = 0; + for (src_idx = 0; src_idx < priv->num_slow_elems; ++src_idx) { + src_elem = &priv->slow_elems[src_idx]; + if (src_elem->id != UCS_CALLBACKQ_ID_NULL) { + ucs_assert(dst_idx <= src_idx); + if (dst_idx != src_idx) { + priv->idxs[src_elem->id] = dst_idx | UCS_CALLBACKQ_IDX_FLAG_SLOW; + priv->slow_elems[dst_idx] = *src_elem; + } + ++dst_idx; + } + } + + priv->num_slow_elems = dst_idx; +} + +static unsigned ucs_callbackq_slow_proxy(void *arg) +{ + ucs_callbackq_t *cbq = arg; + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + ucs_callbackq_elem_t *elem; + unsigned UCS_V_UNUSED removed_idx; + unsigned slow_idx, fast_idx; + ucs_callbackq_elem_t tmp_elem; + unsigned count = 0; + + ucs_trace_poll("cbq=%p", cbq); + + ucs_callbackq_enter(cbq); + + /* Execute and update slow-path callbacks */ + for (slow_idx = 0; slow_idx < priv->num_slow_elems; ++slow_idx) { + elem = &priv->slow_elems[slow_idx]; + if (elem->id == UCS_CALLBACKQ_ID_NULL) { + continue; + } + + tmp_elem = *elem; + if (elem->flags & UCS_CALLBACKQ_FLAG_FAST) { + ucs_assert(!(elem->flags & UCS_CALLBACKQ_FLAG_ONESHOT)); + if (priv->num_fast_elems < UCS_CALLBACKQ_FAST_MAX) { + fast_idx = ucs_callbackq_get_fast_idx(cbq); + cbq->fast_elems[fast_idx] = *elem; + priv->idxs[elem->id] = fast_idx; + ucs_callbackq_remove_slow(cbq, slow_idx); + } + } else if (elem->flags & UCS_CALLBACKQ_FLAG_ONESHOT) { + removed_idx = ucs_callbackq_put_id_noflag(cbq, elem->id); + ucs_assert(removed_idx == slow_idx); + ucs_callbackq_remove_slow(cbq, slow_idx); + } + + ucs_callbackq_leave(cbq); + + count += tmp_elem.cb(tmp_elem.arg); /* Execute callback without lock */ + + ucs_callbackq_enter(cbq); + } + + ucs_callbackq_purge_fast(cbq); + ucs_callbackq_purge_slow(cbq); + + /* Disable this proxy if no more work to do */ + if (!priv->fast_remove_mask && (priv->num_slow_elems == 0)) { + ucs_callbackq_disable_proxy(cbq); + } + + ucs_callbackq_leave(cbq); + + return count; +} + +ucs_status_t ucs_callbackq_init(ucs_callbackq_t *cbq) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + unsigned idx; + + for (idx = 0; idx < UCS_CALLBACKQ_FAST_COUNT + 1; ++idx) { + ucs_callbackq_elem_reset(cbq, &cbq->fast_elems[idx]); + } + + ucs_spinlock_init(&priv->lock); + priv->slow_elems = NULL; + priv->num_slow_elems = 0; + priv->max_slow_elems = 0; + priv->slow_proxy_id = UCS_CALLBACKQ_ID_NULL; + priv->fast_remove_mask = 0; + priv->num_fast_elems = 0; + priv->free_idx_id = UCS_CALLBACKQ_ID_NULL; + priv->num_idxs = 0; + priv->idxs = NULL; + return UCS_OK; +} + +void ucs_callbackq_cleanup(ucs_callbackq_t *cbq) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + + ucs_callbackq_disable_proxy(cbq); + ucs_callbackq_purge_fast(cbq); + ucs_callbackq_purge_slow(cbq); + + if ((priv->num_fast_elems) > 0 || (priv->num_slow_elems > 0)) { + ucs_warn("%d fast-path and %d slow-path callbacks remain in the queue", + priv->num_fast_elems, priv->num_slow_elems); + } + + ucs_callbackq_array_free(priv->slow_elems, sizeof(*priv->slow_elems), + priv->max_slow_elems); + ucs_callbackq_array_free(priv->idxs, sizeof(*priv->idxs), priv->num_idxs); +} + +int ucs_callbackq_add(ucs_callbackq_t *cbq, ucs_callback_t cb, void *arg, + unsigned flags) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + int id; + + ucs_callbackq_enter(cbq); + + ucs_trace_func("cbq=%p cb=%s arg=%p flags=%u", cbq, + ucs_debug_get_symbol_name(cb), arg, flags); + + if ((flags & UCS_CALLBACKQ_FLAG_FAST) && + (priv->num_fast_elems < UCS_CALLBACKQ_FAST_MAX)) + { + id = ucs_callbackq_add_fast(cbq, cb, arg, flags); + } else { + id = ucs_callbackq_add_slow(cbq, cb, arg, flags); + } + + ucs_callbackq_leave(cbq); + return id; +} + +void ucs_callbackq_remove(ucs_callbackq_t *cbq, int id) +{ + unsigned idx_with_flag, idx; + + ucs_callbackq_enter(cbq); + + ucs_trace_func("cbq=%p id=%d", cbq, id); + + ucs_callbackq_purge_fast(cbq); + + idx_with_flag = ucs_callbackq_put_id(cbq, id); + idx = idx_with_flag & UCS_CALLBACKQ_IDX_MASK; + + if (idx_with_flag & UCS_CALLBACKQ_IDX_FLAG_SLOW) { + ucs_callbackq_remove_slow(cbq, idx); + } else { + ucs_callbackq_remove_fast(cbq, idx); + } + + ucs_callbackq_leave(cbq); +} + +int ucs_callbackq_add_safe(ucs_callbackq_t *cbq, ucs_callback_t cb, void *arg, + unsigned flags) +{ + int id; + + ucs_callbackq_enter(cbq); + + ucs_trace_func("cbq=%p cb=%s arg=%p flags=%u", cbq, + ucs_debug_get_symbol_name(cb), arg, flags); + + /* Add callback to slow-path, and it may be upgraded to fast-path later by + * the proxy callback. It's not safe to add fast-path callback directly + * from this context. + */ + id = ucs_callbackq_add_slow(cbq, cb, arg, flags); + + ucs_callbackq_leave(cbq); + return id; +} + +void ucs_callbackq_remove_safe(ucs_callbackq_t *cbq, int id) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + unsigned idx_with_flag, idx; + + ucs_callbackq_enter(cbq); + + ucs_trace_func("cbq=%p id=%d", cbq, id); + + idx_with_flag = ucs_callbackq_put_id(cbq, id); + idx = idx_with_flag & UCS_CALLBACKQ_IDX_MASK; + + if (idx_with_flag & UCS_CALLBACKQ_IDX_FLAG_SLOW) { + ucs_callbackq_remove_slow(cbq, idx); + } else { + UCS_STATIC_ASSERT(UCS_CALLBACKQ_FAST_MAX <= 64); + ucs_assert(idx < priv->num_fast_elems); + priv->fast_remove_mask |= UCS_BIT(idx); + cbq->fast_elems[idx].id = UCS_CALLBACKQ_ID_NULL; /* for assertion */ + ucs_callbackq_enable_proxy(cbq); + } + + ucs_callbackq_leave(cbq); +} + +void ucs_callbackq_remove_if(ucs_callbackq_t *cbq, ucs_callbackq_predicate_t pred, + void *arg) +{ + ucs_callbackq_priv_t *priv = ucs_callbackq_priv(cbq); + ucs_callbackq_elem_t *elem; + unsigned idx; + + ucs_callbackq_enter(cbq); + + ucs_trace_func("cbq=%p", cbq); + + ucs_callbackq_purge_fast(cbq); + + /* remote fast-path elements */ + elem = cbq->fast_elems; + while (elem->cb != NULL) { + if (pred(elem, arg)) { + idx = ucs_callbackq_put_id_noflag(cbq, elem->id); + ucs_assert(idx == (elem - cbq->fast_elems)); + ucs_callbackq_remove_fast(cbq, idx); + } else { + ++elem; + } + } + + /* remote slow-path elements */ + elem = priv->slow_elems; + while (elem < priv->slow_elems + priv->num_slow_elems) { + if (pred(elem, arg)) { + idx = ucs_callbackq_put_id_noflag(cbq, elem->id); + ucs_assert(idx == (elem - priv->slow_elems)); + ucs_callbackq_remove_slow(cbq, idx); + } else { + ++elem; + } + } + + ucs_callbackq_leave(cbq); +} diff --git a/src/ucs/datastruct/callbackq.h b/src/ucs/datastruct/callbackq.h new file mode 100644 index 0000000..dcddcde --- /dev/null +++ b/src/ucs/datastruct/callbackq.h @@ -0,0 +1,218 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_CALLBACKQ_H +#define UCS_CALLBACKQ_H + +#include +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file callbackq.h */ + +/* + * Thread-safe callback queue: + * - only one thread can dispatch + * - any thread can add and remove + * - add/remove operations are O(1) + */ + +#define UCS_CALLBACKQ_FAST_COUNT 7 /* Max. number of fast-path callbacks */ +#define UCS_CALLBACKQ_ID_NULL (-1) /* Invalid callback identifier */ + + +/* + * Forward declarations + */ +typedef struct ucs_callbackq ucs_callbackq_t; +typedef struct ucs_callbackq_elem ucs_callbackq_elem_t; + + +/** + * Callback which can be placed in a queue. + * + * @param [in] arg User-defined argument for the callback. + * + * @return Count of how much "work" was done by the callback. For example, zero + * means that no work was done, and any nonzero value means that something + * was done. + */ +typedef unsigned (*ucs_callback_t)(void *arg); + + +/** + * Callback queue element predicate. + * + * @param [in] elem Callback queue element to check. + * @param [in] arg User-defined argument. + * + * @return Predicate result value - nonzero means "true", zero means "false". + */ +typedef int (*ucs_callbackq_predicate_t)(const ucs_callbackq_elem_t *elem, + void *arg); + + +/** + * @ingroup UCS_RESOURCE + * Callback flags + */ +enum ucs_callbackq_flags { + UCS_CALLBACKQ_FLAG_FAST = UCS_BIT(0), /**< Fast-path (best effort) */ + UCS_CALLBACKQ_FLAG_ONESHOT = UCS_BIT(1) /**< Call the callback only once + (cannot be used with FAST) */ +}; + + +/** + * Callback queue element. + */ +struct ucs_callbackq_elem { + ucs_callback_t cb; /**< Callback function */ + void *arg; /**< Function argument */ + unsigned flags; /**< Callback flags */ + int id; /**< Callback id */ +}; + + +/** + * A queue of callback to execute + */ +struct ucs_callbackq { + /** + * Array of fast-path element, the last is reserved as a sentinel to mark + * array end. + */ + ucs_callbackq_elem_t fast_elems[UCS_CALLBACKQ_FAST_COUNT + 1]; + + /** + * Private data, which we don't want to expose in API to avoid pulling + * more header files + */ + char priv[72]; +}; + + +/** + * Initialize the callback queue. + * + * @param [in] cbq Callback queue to initialize. + */ +ucs_status_t ucs_callbackq_init(ucs_callbackq_t *cbq); + + +/** + * Clean up the callback queue and release associated memory. + * + * @param [in] cbq Callback queue to clean up. + */ +void ucs_callbackq_cleanup(ucs_callbackq_t *cbq); + + +/** + * Add a callback to the queue. + * This is *not* safe to call while another thread might be dispatching callbacks. + * However, it can be used from the dispatch context (e.g a callback may use this + * function to add another callback). + * + * @param [in] cbq Callback queue to add the callback to. + * @param [in] cb Callback to add. + * @param [in] arg User-defined argument for the callback. + * @param [in] flags Flags for the callback, from @ref ucs_callbackq_flags. + * + * @return Unique identifier of the callback in the queue. + */ +int ucs_callbackq_add(ucs_callbackq_t *cbq, ucs_callback_t cb, void *arg, + unsigned flags); + + +/** + * Remove a callback from the queue immediately. + * This is *not* safe to call while another thread might be dispatching callbacks. + * However, it can be used from the dispatch context (e.g a callback may use this + * function to remove itself or another callback). In this case, the callback may + * still be dispatched once after this function returned. + * + * @param [in] cbq Callback queue to remove the callback from. + * @param [in] id Callback identifier to remove. + */ +void ucs_callbackq_remove(ucs_callbackq_t *cbq, int id); + + +/** + * Add a callback to the queue. + * This can be used from any context and any thread, including but not limited to: + * - A callback can add another callback. + * - A thread can add a callback while another thread is dispatching callbacks. + * + * @param [in] cbq Callback queue to add the callback to. + * @param [in] cb Callback to add. + * @param [in] arg User-defined argument for the callback. + * @param [in] flags Flags for the callback, from @ref ucs_callbackq_flags. + * + * @return Unique identifier of the callback in the queue. + */ +int ucs_callbackq_add_safe(ucs_callbackq_t *cbq, ucs_callback_t cb, void *arg, + unsigned flags); + + +/** + * Remove a callback from the queue in a safe but lazy fashion. The callback will + * be removed at some point in the near future. + * This can be used from any context and any thread, including but not limited to: + * - A callback can remove another callback or itself. + * - A thread can't remove a callback while another thread is dispatching callbacks. + * + * @param [in] cbq Callback queue to remove the callback from. + * @param [in] id Callback identifier to remove. + */ +void ucs_callbackq_remove_safe(ucs_callbackq_t *cbq, int id); + + +/** + * Remove all callbacks from the queue for which the given predicate returns + * "true" (nonzero) value. + * This is *not* safe to call while another thread might be dispatching callbacks. + * However, it can be used from the dispatch context (e.g a callback may use this + * function to remove itself or another callback). In this case, the callback may + * still be dispatched once after this function returned. + * + * @param [in] cbq Callback queue. + * @param [in] pred Predicate to check candidates for removal. + * @param [in] arg User-defined argument for the predicate. + */ +void ucs_callbackq_remove_if(ucs_callbackq_t *cbq, ucs_callbackq_predicate_t pred, + void *arg); + + +/** + * Dispatch callbacks from the callback queue. + * Must be called from single thread only. + * + * @param [in] cbq Callback queue to dispatch callbacks from. + + * @return Sum of all return values from the dispatched callbacks. + */ +static inline unsigned ucs_callbackq_dispatch(ucs_callbackq_t *cbq) +{ + ucs_callbackq_elem_t *elem; + ucs_callback_t cb; + unsigned count; + + count = 0; + for (elem = cbq->fast_elems; (cb = elem->cb) != NULL; ++elem) { + count += cb(elem->arg); + } + return count; +} + +END_C_DECLS + +#endif diff --git a/src/ucs/datastruct/frag_list.c b/src/ucs/datastruct/frag_list.c new file mode 100644 index 0000000..719f212 --- /dev/null +++ b/src/ucs/datastruct/frag_list.c @@ -0,0 +1,403 @@ + +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "frag_list.h" + +#if ENABLE_STATS + +static ucs_stats_class_t ucs_frag_list_stats_class = { + .name = "frag_list", + .num_counters = UCS_FRAG_LIST_STAT_LAST, + .counter_names = { + [UCS_FRAG_LIST_STAT_GAPS] = "gaps", + [UCS_FRAG_LIST_STAT_GAP_LEN] = "gap_len", + [UCS_FRAG_LIST_STAT_GAP_OUT] = "gap_out", + [UCS_FRAG_LIST_STAT_BURSTS] = "bursts", + [UCS_FRAG_LIST_STAT_BURST_LEN] = "burst_len", + } +}; +#endif + +ucs_status_t ucs_frag_list_init(ucs_frag_list_sn_t initial_sn, ucs_frag_list_t *frag_list, + int max_holes + UCS_STATS_ARG(ucs_stats_node_t *stats_parent) + ) +{ + ucs_status_t status; + + ucs_assert(max_holes == 0 || max_holes == -1); + frag_list->head_sn = initial_sn; + frag_list->elem_count = 0; + frag_list->list_count = 0; + frag_list->max_holes = max_holes; + ucs_queue_head_init(&frag_list->list); + ucs_queue_head_init(&frag_list->ready_list); + +#if ENABLE_STATS + frag_list->prev_sn = initial_sn; +#endif + status = UCS_STATS_NODE_ALLOC(&frag_list->stats, &ucs_frag_list_stats_class, + stats_parent); + return status; +} + +void ucs_frag_list_cleanup(ucs_frag_list_t *frag_list) +{ + ucs_assert(frag_list->elem_count == 0); + ucs_assert(frag_list->list_count == 0); + ucs_assert(ucs_queue_is_empty(&frag_list->list)); + ucs_assert(ucs_queue_is_empty(&frag_list->ready_list)); + UCS_STATS_NODE_FREE(frag_list->stats); +} + +/* + prevh--- h --- .. --- + | + e + | + e + replace h with new_h: + +prevh --- new_h --- .. --- + | + h + | + e + | + e + + */ +static inline void +frag_list_replace_head(ucs_frag_list_t *frag_list, ucs_frag_list_elem_t *prevh, + ucs_frag_list_elem_t *h, ucs_frag_list_elem_t *new_h) +{ + ucs_frag_list_elem_t UCS_V_UNUSED *e; + + ucs_trace_data("replace=%u %u", (unsigned)h->head.first_sn-1, + (unsigned)h->head.last_sn); + + new_h->head.first_sn = h->head.first_sn-1; + new_h->head.last_sn = h->head.last_sn; + /* add new_h before h in holes list */ + /* take h from holes list */ + if (prevh == NULL) { + e = ucs_queue_pull_elem_non_empty(&frag_list->list, ucs_frag_list_elem_t, list); + ucs_assert(e == h); + ucs_queue_push_head(&frag_list->list, &new_h->list); + } else { + prevh->list.next = &new_h->list; + new_h->list.next = h->list.next; + if (frag_list->list.ptail == &h->list.next) { + frag_list->list.ptail = &new_h->list.next; + } + } + + /* chain h to the new hole head */ + ucs_queue_head_init(&new_h->head.list); + ucs_queue_splice(&new_h->head.list, &h->head.list); + ucs_queue_push_head(&new_h->head.list, &h->list); +} + +/* + ..--- h --- .. --- + | + e + + add new element to h: + + ..--- h --- .. --- + | + | + e + | + elem + + */ +static inline void frag_list_add_tail(ucs_frag_list_elem_t *h, ucs_frag_list_elem_t *elem) +{ + h->head.last_sn++; + ucs_trace_data("add_tail=%u %u", (unsigned)h->head.first_sn, (unsigned)h->head.last_sn); + + /* chain h to the new hole head */ + ucs_queue_push(&h->head.list, &elem->list); +} + +/* + merge h2 into h1. Before: + + ..--- h1 --- h2 --- + | | + e e2 + after: + ..--- h1 --- .. --- + | | + e e + | + h2 + | + e2 + */ +static inline void frag_list_merge_heads(ucs_frag_list_t *head, ucs_frag_list_elem_t *h1, ucs_frag_list_elem_t *h2) +{ + ucs_trace_data("merge_heads=%u %u", (unsigned)h1->head.first_sn, (unsigned)h2->head.last_sn); + + h1->head.last_sn = h2->head.last_sn; + h1->list.next = h2->list.next; + if (head->list.ptail == &h2->list.next) { + head->list.ptail = &h1->list.next; + } + + /* turn h2 into queue element */ + ucs_queue_push_head(&h2->head.list, &h2->list); + ucs_queue_splice(&h1->head.list, &h2->head.list); +} + +/* + insert new_h into h1. Before: + prevh--- h --- .. --- + | | + e e + | + + after: + + prevh--- new_h --- h --- ... --- + | | + e e + */ +static inline void frag_list_insert_head(ucs_frag_list_t *head, + ucs_frag_list_elem_t *prevh, ucs_frag_list_elem_t *h, ucs_frag_list_elem_t *new_h, ucs_frag_list_sn_t sn) +{ + + ucs_trace_data("insert_head=%u prevh=%p", (unsigned)sn, prevh); + new_h->head.first_sn = new_h->head.last_sn = sn; + ucs_queue_head_init(&new_h->head.list); + + if (prevh == NULL) { + ucs_queue_push_head(&head->list, &new_h->list); + } + else { + prevh->list.next = &new_h->list; + new_h->list.next = &h->list; + } +} + + +/* + insert new_h into h1. Before: + ..--- prevh --- h --- + | | + e e + | + + after: + + ---.. --- h --- new_h + | | + e e + */ + +static inline void frag_list_insert_tail(ucs_frag_list_t *head, + ucs_frag_list_elem_t *new_h, + ucs_frag_list_sn_t sn) +{ + ucs_trace_data("insert_tail=%u", (unsigned)sn); + new_h->head.first_sn = new_h->head.last_sn = sn; + ucs_queue_head_init(&new_h->head.list); + ucs_queue_push(&head->list, &new_h->list); +} + +/** + * special case of insert where sn == head->head_sn + */ +ucs_frag_list_ooo_type_t +ucs_frag_list_insert_head(ucs_frag_list_t *head, ucs_frag_list_elem_t *elem, + ucs_frag_list_sn_t sn) +{ + ucs_frag_list_elem_t *h; + + /* next two ifs will not happen if we always pull all possible elems + * on INSERT_FIRST + */ + + /* check that we are not hitting element on the first frag list */ + if (!ucs_queue_is_empty(&head->list)) { + h = ucs_queue_head_elem_non_empty(&head->list, ucs_frag_list_elem_t, list); + if (UCS_FRAG_LIST_SN_CMP(sn, >=, h->head.first_sn)) { + return UCS_FRAG_LIST_INSERT_DUP; + } + } + else { + h = NULL; + } + + head->head_sn++; + if (!ucs_queue_is_empty(&head->ready_list)) { + ucs_queue_push(&head->ready_list, &elem->list); + return UCS_FRAG_LIST_INSERT_READY; + } + + if (h != NULL && UCS_FRAG_LIST_SN_CMP(h->head.first_sn, ==, sn + 1)) { + /* do not enqueue. let know that more elems may + * be pulled from the list. + * Ex of arrivals: 2 3 1 + */ + return UCS_FRAG_LIST_INSERT_FIRST; + } + + return UCS_FRAG_LIST_INSERT_FAST; +} + +ucs_frag_list_ooo_type_t +ucs_frag_list_insert_slow(ucs_frag_list_t *head, ucs_frag_list_elem_t *elem, + ucs_frag_list_sn_t sn) +{ + ucs_frag_list_elem_t *h, *prevh, *nexth; + + if (UCS_FRAG_LIST_SN_CMP(sn, ==, head->head_sn + 1)) { + return ucs_frag_list_insert_head(head, elem, sn); + } + + if (UCS_FRAG_LIST_SN_CMP(sn, <=, head->head_sn)) { + return UCS_FRAG_LIST_INSERT_DUP; + } + + if (head->max_holes == 0) { + return UCS_FRAG_LIST_INSERT_FAIL; + } + + prevh = NULL; + /* find right list to insert */ + ucs_queue_for_each(h, &head->list, list) { + /* trying to insert duplicate. retransmission or packet duplication */ + if (UCS_FRAG_LIST_SN_CMP(sn, >=, h->head.first_sn) && + UCS_FRAG_LIST_SN_CMP(sn, <=, h->head.last_sn)) { + return UCS_FRAG_LIST_INSERT_DUP; + } + + if (UCS_FRAG_LIST_SN_CMP(sn+1, ==, h->head.first_sn)) { + frag_list_replace_head(head, prevh, h, elem); + /* no need to check merge here. merge iff prev->last_sn+1==sn & sn+1 == h->first_sn + * the condition is handled in next if */ + head->elem_count++; + return UCS_FRAG_LIST_INSERT_SLOW; + } + + /* todo: mark as likely */ + if (UCS_FRAG_LIST_SN_CMP(h->head.last_sn+1, ==, sn)) { + /* add tail, check merge with next list */ + frag_list_add_tail(h, elem); + nexth = ucs_container_of(h->list.next, ucs_frag_list_elem_t, list); + + if (nexth != NULL && nexth->head.first_sn == sn + 1) { + frag_list_merge_heads(head, h, nexth); + head->list_count--; + } + head->elem_count++; + return UCS_FRAG_LIST_INSERT_SLOW; + } + + if (UCS_FRAG_LIST_SN_CMP(sn, <, h->head.first_sn)) { + /* new hole, see above comment on merge */ + if (prevh) { + ucs_assert(UCS_FRAG_LIST_SN_CMP(prevh->head.last_sn+1, <, sn)); + } + UCS_STATS_UPDATE_COUNTER(head->stats, UCS_FRAG_LIST_STAT_GAP_LEN, + prevh ? sn-prevh->head.last_sn : sn-head->head_sn); + UCS_STATS_UPDATE_COUNTER(head->stats, UCS_FRAG_LIST_STAT_GAPS, 1); + frag_list_insert_head(head, prevh, h, elem, sn); + head->elem_count++; + head->list_count++; + return UCS_FRAG_LIST_INSERT_SLOW; + } + + /* if we got here following must hold */ + ucs_assert(UCS_FRAG_LIST_SN_CMP(h->head.last_sn+1, <, sn)); + prevh = h; + } + + frag_list_insert_tail(head, elem, sn); + + head->elem_count++; + head->list_count++; + UCS_STATS_UPDATE_COUNTER(head->stats, UCS_FRAG_LIST_STAT_GAP_LEN, + sn-head->head_sn); + UCS_STATS_UPDATE_COUNTER(head->stats, UCS_FRAG_LIST_STAT_GAPS, 1); + return UCS_FRAG_LIST_INSERT_SLOW; +} + + +/* + head->h->...-> + | + e + + * mode of action + * - check if we have elements on ready list, if we do take one from there + * - see if h is ready for extraction (sn check), extract firt, move rest to the ready list + */ + +ucs_frag_list_elem_t *ucs_frag_list_pull_slow(ucs_frag_list_t *head) +{ + ucs_frag_list_elem_t *h; + + h = ucs_queue_head_elem_non_empty(&head->list, ucs_frag_list_elem_t, list); + if (UCS_FRAG_LIST_SN_CMP(h->head.first_sn, !=, head->head_sn+1)) { + ucs_trace_data("first_sn(%u) != head_sn(%u) + 1", (unsigned)h->head.first_sn, + (unsigned)head->head_sn); + return NULL; + } + + ucs_trace_data("ready list %d to %d", (unsigned)head->head_sn, + (unsigned)h->head.last_sn); + head->head_sn = h->head.last_sn; + head->elem_count--; + head->list_count--; + + h = ucs_queue_pull_elem_non_empty(&head->list, ucs_frag_list_elem_t, list); + ucs_queue_splice(&head->ready_list, &h->head.list); + return h; +} + +void ucs_frag_list_dump(ucs_frag_list_t *head, int how) +{ + ucs_frag_list_elem_t *h, *e; + int list_count, elem_count; + int cnt; + + list_count = 0; + elem_count = 0; + + ucs_queue_for_each(e, &head->ready_list, list) { + elem_count++; + } + + ucs_queue_for_each(h, &head->list, list) { + list_count++; + cnt = 0; + ucs_queue_for_each(e, &h->head.list, list) { + cnt++; + elem_count++; + } + elem_count++; + if (how == 1) { + ucs_trace_data("%d: %d-%d %d/%d", list_count, h->head.first_sn, + h->head.last_sn, h->head.last_sn - h->head.first_sn, + cnt); + } + } + + if (how == 1) { + ucs_trace_data("elem count(expected/real)=%d/%d list_count(expected/real)=%d/%d\n", + head->elem_count, elem_count, + head->list_count, list_count); + } + + ucs_assert(head->elem_count == elem_count); + ucs_assert(head->list_count == list_count); +} + diff --git a/src/ucs/datastruct/frag_list.h b/src/ucs/datastruct/frag_list.h new file mode 100644 index 0000000..2158341 --- /dev/null +++ b/src/ucs/datastruct/frag_list.h @@ -0,0 +1,219 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_FRAG_LIST_H +#define UCS_FRAG_LIST_H + +#include +#include +#include +#include + + +/* + * The "frag list" is a data structure containing elements ordered by sequence number. + * Elements can be added to in any order, and removed from the head (dequeued) + * in strict serial number order. + * It is used for ordering packets according to sequence number. + * + * Complexity: + * - O(1) for getting head element + * - O(Nelems) for memory, with the hard bound of sendwindowsize. In order insertion uses no memory. + * - O(k) insertion, where k is number of holes. Number of holes is expected to be + * something like SendWindowSize/BurstPacketSize. With win 1024 and burst 16 we + * get to 64 holes. In reality the number should be much less because: + * - each route send 'bursts' in order + * - it takes roughly the same time for each route + * - number of routes (burst generatos is expected to be small) + * + * so in the end number of holes is proportional to number of routes and time difference + * between alternative paths. Better math is welcome :P + * + * Organization + * + * min_sn + * head =list1-[hole]->list2-[hole]...->listn + * | | | | + * ready elemlist elemlist elemlist + * list + * + * elemlists and ready list are sorted and continuos - no holes + * ready list contains elements that can be easily pulled: head->sn = read_list.last_sn + */ + +/* Out-of-order handling type */ +typedef enum { + UCS_FRAG_LIST_INSERT_FAST, /* in order insert, list empty */ + UCS_FRAG_LIST_INSERT_FIRST, /* in order insert, list not empty, must try pull */ + UCS_FRAG_LIST_INSERT_SLOW, /* out of order insert, can not pull elems from list */ + UCS_FRAG_LIST_INSERT_DUP, /* old element, can not pull */ + UCS_FRAG_LIST_INSERT_READY, /* in order insert, while we can still pull elems from list */ + UCS_FRAG_LIST_INSERT_FAIL /* insert failed for some reason */ +} ucs_frag_list_ooo_type_t; + +/* Sequence number type */ +/* NOTE: it must be same type as UD transport psn */ +typedef uint16_t ucs_frag_list_sn_t; +#define UCS_FRAG_LIST_SN_CMP UCS_CIRCULAR_COMPARE16 + +/** + * C standard specifies that short integer is promoted to int + * if there is an overflow. The following will be false when + * uint16_t is used for serial number: + * sn1=0; sn2=0xFFFF; sn1 == sn2+1 + * + * So we must always use compare macro + */ + +#define UCS_FRAG_LIST_NEXT_SN(sn) ((ucs_frag_list_sn_t)((sn)+1)) +/* part of skb */ +typedef struct ucs_frag_list_head { + ucs_queue_head_t list; + ucs_frag_list_sn_t first_sn; + ucs_frag_list_sn_t last_sn; +} ucs_frag_list_head_t; + +typedef struct ucs_frag_list_elem_t { + ucs_queue_elem_t list; + ucs_frag_list_head_t head; +} ucs_frag_list_elem_t; + + +/* part of connection */ +typedef struct ucs_frag_list { + ucs_queue_head_t list; + ucs_queue_head_t ready_list; + ucs_frag_list_sn_t head_sn; + unsigned elem_count; /* total number of list elements */ + unsigned list_count; /* number of independent lists */ + int max_holes; /* do not allow insertion if ucs_list_count >= max_holes */ + UCS_STATS_NODE_DECLARE(stats) +#ifdef ENABLE_STATS + ucs_frag_list_sn_t prev_sn; /* needed to detect busrts */ +#endif +} ucs_frag_list_t; + +/* stat counters */ +enum { + UCS_FRAG_LIST_STAT_GAPS, + UCS_FRAG_LIST_STAT_GAP_LEN, + UCS_FRAG_LIST_STAT_GAP_OUT, + UCS_FRAG_LIST_STAT_BURSTS, + UCS_FRAG_LIST_STAT_BURST_LEN, + UCS_FRAG_LIST_STAT_LAST +}; + + +/** + * Initialize the frag_list. + * + * @param frag_list frag_list to initialize. + * @param initial_sn Sequence number to start with. This first inserted element + * should have this SN. + * @param max_holes Max number number of holes to allow on the list. + * Currently we support: + * 0 - allow no holes, only check sn. Out of order insert + * will result either in DUP or FAIL + * -1 - infinite number of holes + * + */ +ucs_status_t ucs_frag_list_init(ucs_frag_list_sn_t initial_sn, ucs_frag_list_t *frag_list, + int max_holes + UCS_STATS_ARG(ucs_stats_node_t *stats_parent)); + +/** + * Cleanup the frag_list. + */ +void ucs_frag_list_cleanup(ucs_frag_list_t *head); + + +/* Slow path insert */ +ucs_frag_list_ooo_type_t ucs_frag_list_insert_slow(ucs_frag_list_t *head, + ucs_frag_list_elem_t *elem, + ucs_frag_list_sn_t sn); + + +/** + * pull element from the list + * @return NULL if list is empty or it is impossible to pull anything + */ +ucs_frag_list_elem_t *ucs_frag_list_pull_slow(ucs_frag_list_t *head); + + +/** + * Dump frag list structure for debug purposes. + */ +void ucs_frag_list_dump(ucs_frag_list_t *head, int how); + + +static inline ucs_frag_list_sn_t ucs_frag_list_sn(ucs_frag_list_t *head) +{ + return head->head_sn; +} + +static inline void ucs_frag_list_sn_inc(ucs_frag_list_t *head) +{ + head->head_sn++; +} + +static inline unsigned ucs_frag_list_count(ucs_frag_list_t *head) +{ + return head->elem_count; +} + +static inline int ucs_frag_list_empty(ucs_frag_list_t *head) +{ + return ucs_queue_is_empty(&head->list) && ucs_queue_is_empty(&head->ready_list); +} + +static inline ucs_frag_list_ooo_type_t +ucs_frag_list_insert(ucs_frag_list_t *head, ucs_frag_list_elem_t *elem, + ucs_frag_list_sn_t sn) +{ +#if ENABLE_STATS + ucs_frag_list_ooo_type_t ret; + + if (UCS_FRAG_LIST_SN_CMP(sn, >, head->head_sn)) { + if (UCS_FRAG_LIST_SN_CMP(head->prev_sn + 1, !=,sn)) { + UCS_STATS_UPDATE_COUNTER(head->stats, UCS_FRAG_LIST_STAT_BURSTS, 1); + } else if (ucs_unlikely(UCS_STATS_GET_COUNTER(head->stats, UCS_FRAG_LIST_STAT_BURST_LEN) == 0)) { + /* initial burst */ + UCS_STATS_SET_COUNTER(head->stats, UCS_FRAG_LIST_STAT_BURSTS, 1); + } + UCS_STATS_UPDATE_COUNTER(head->stats, UCS_FRAG_LIST_STAT_BURST_LEN, 1); + head->prev_sn = sn; + } +#endif + /* in order arrival on empty list - inc sn and do nothing */ + if (ucs_likely(UCS_FRAG_LIST_SN_CMP(sn, ==, head->head_sn + 1) && (head->elem_count == 0))) { + head->head_sn = sn; + return UCS_FRAG_LIST_INSERT_FAST; + } + + /* return either dup or slow */ +#if ENABLE_STATS + ret = ucs_frag_list_insert_slow(head, elem, sn); + UCS_STATS_UPDATE_COUNTER(head->stats, UCS_FRAG_LIST_STAT_GAP_OUT, + ret != UCS_FRAG_LIST_INSERT_DUP ? head->list_count : 0); + return ret; +#else + return ucs_frag_list_insert_slow(head, elem, sn); +#endif +} + +static inline ucs_frag_list_elem_t *ucs_frag_list_pull(ucs_frag_list_t *head) +{ + if (!ucs_queue_is_empty(&head->ready_list)) { + --head->elem_count; + return ucs_queue_pull_elem_non_empty(&head->ready_list, ucs_frag_list_elem_t, list); + } else if (!ucs_queue_is_empty(&head->list)) { + return ucs_frag_list_pull_slow(head); + } else { + return NULL; + } +} + +#endif diff --git a/src/ucs/datastruct/khash.h b/src/ucs/datastruct/khash.h new file mode 100644 index 0000000..ff6a259 --- /dev/null +++ b/src/ucs/datastruct/khash.h @@ -0,0 +1,693 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "khash.h" +KHASH_MAP_INIT_INT(32, char) +int main() { + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; +} +*/ + +/* + 2013-05-02 (0.2.8): + + * Use quadratic probing. When the capacity is power of 2, stepping function + i*(i+1)/2 guarantees to traverse each bucket. It is better than double + hashing on cache performance and is more robust than linear probing. + + In theory, double hashing should be more robust than quadratic probing. + However, my implementation is probably not for large hash tables, because + the second hash function is closely tied to the first hash function, + which reduce the effectiveness of double hashing. + + Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php + + 2011-12-29 (0.2.7): + + * Minor code clean up; no actual effect. + + 2011-09-16 (0.2.6): + + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + + - http://code.google.com/p/ulib/ + - http://nothings.org/computer/judy/ + + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as it + is more robust to certain non-random input. + + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. + + 2011-02-14 (0.2.5): + + * Allow to declare global functions. + + 2009-09-26 (0.2.4): + + * Improve portability + + 2008-09-19 (0.2.3): + + * Corrected the example + * Improved interfaces + + 2008-09-11 (0.2.2): + + * Improved speed a little in kh_put() + + 2008-09-10 (0.2.1): + + * Added kh_clear() + * Fixed a compiling error + + 2008-09-02 (0.2.0): + + * Changed to token concatenation which increases flexibility. + + 2008-08-31 (0.1.2): + + * Fixed a bug in kh_get(), which has not been tested previously. + + 2008-08-31 (0.1.1): + + * Added destructor +*/ + + +#ifndef __AC_KHASH_H +#define __AC_KHASH_H + +/*! + @header + + Generic hash table library. + */ + +#define AC_VERSION_KHASH_H "0.2.8" + +#include +#include +#include + +/* Clang analyzer thinks that `h->flags` can be NULL, but this is + * the wrong assumption - add `kassert()` to suppress the warning */ +#ifdef __clang_analyzer__ +#include +#define kassert(...) assert(__VA_ARGS__) +#define kmemset_analyzer(P, Z, N) kmemset(P, Z, N) +#else +#define kassert(...) +#define kmemset_analyzer(P, Z, N) +#endif + +/* compiler specific configuration */ + +#if UINT_MAX == 0xffffffffu +typedef unsigned int khint32_t; +#elif ULONG_MAX == 0xffffffffu +typedef unsigned long khint32_t; +#endif + +#if ULONG_MAX == ULLONG_MAX +typedef unsigned long khint64_t; +#else +typedef unsigned long long khint64_t; +#endif + +#ifndef kh_inline +#ifdef _MSC_VER +#define kh_inline __inline +#else +#define kh_inline inline +#endif +#endif /* kh_inline */ + +#ifndef klib_unused +#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) +#define klib_unused __attribute__ ((__unused__)) +#else +#define klib_unused +#endif +#endif /* klib_unused */ + +typedef khint32_t khint_t; +typedef khint_t khiter_t; + +#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) +#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) +#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) +#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) +#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) +#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) +#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) + +#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#ifndef kcalloc +#define kcalloc(N,Z) calloc(N,Z) +#endif +#ifndef kmalloc +#define kmalloc(Z) malloc(Z) +#endif +#ifndef krealloc +#define krealloc(P,Z) realloc(P,Z) +#endif +#ifndef kfree +#define kfree(P) free(P) +#endif +#ifndef kmemset +#define kmemset(P,Z,N) memset(P,Z,N) +#endif + +static const double __ac_HASH_UPPER = 0.77; + +#define __KHASH_TYPE(name, khkey_t, khval_t) \ + typedef struct kh_##name##_s { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + khint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; + +#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ + extern kh_##name##_t *kh_init_##name(void); \ + extern kh_##name##_t *kh_init_##name##_inplace(kh_##name##_t *h); \ + extern void kh_destroy_##name(kh_##name##_t *h); \ + extern void kh_destroy_##name##_inplace(kh_##name##_t *h); \ + extern void kh_clear_##name(kh_##name##_t *h); \ + extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ + extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khint_t x); + +#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + SCOPE kh_##name##_t *kh_init_##name(void) { \ + return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ + } \ + SCOPE kh_##name##_t *kh_init_##name##_inplace(kh_##name##_t *h) { \ + return (kh_##name##_t*)kmemset(h, 0, sizeof(kh_##name##_t)); \ + } \ + SCOPE void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if (h) { \ + kfree((void *)h->keys); kfree(h->flags); \ + kfree((void *)h->vals); \ + kfree(h); \ + } \ + } \ + SCOPE void kh_destroy_##name##_inplace(kh_##name##_t *h) \ + { \ + kfree((void *)h->keys); \ + kfree((void *)h->flags); \ + kfree((void *)h->vals); \ + } \ + SCOPE void kh_clear_##name(kh_##name##_t *h) \ + { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + { \ + if (h->n_buckets) { \ + khint_t k, i, last, mask, step = 0; \ + mask = h->n_buckets - 1; \ + \ + kassert(h->flags != NULL); \ + \ + k = __hash_func(key); i = k & mask; \ + last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + i = (i + (++step)) & mask; \ + if (i == last) return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i)? h->n_buckets : i; \ + } else return 0; \ + } \ + SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ + khint32_t *new_flags = 0; \ + khint_t j = 1; \ + { \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) new_n_buckets = 4; \ + if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (!new_flags) return -1; \ + memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (!new_keys) { kfree(new_flags); return -1; } \ + h->keys = new_keys; \ + kmemset_analyzer(h->keys + (h->n_buckets * sizeof(khkey_t)), 0, \ + (new_n_buckets - h->n_buckets) * sizeof(khkey_t)); \ + if (kh_is_map) { \ + khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ + if (!new_vals) { kfree(new_flags); return -1; } \ + h->vals = new_vals; \ + } \ + } /* otherwise shrink */ \ + } \ + } \ + if (j) { /* rehashing is needed */ \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + khint_t new_mask; \ + new_mask = new_n_buckets - 1; \ + if (kh_is_map) val = h->vals[j]; \ + __ac_set_isdel_true(h->flags, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t k, i, step = 0; \ + k = __hash_func(key); \ + i = k & new_mask; \ + while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ + __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + if (kh_is_map) h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + kfree(h->flags); /* free the working space */ \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + return 0; \ + } \ + SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + { \ + khint_t x; \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size<<1)) { \ + if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ + *ret = -1; return h->n_buckets; \ + } \ + } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ + *ret = -1; return h->n_buckets; \ + } \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + \ + kassert(h->flags != NULL); \ + \ + { \ + khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ + x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ + else { \ + last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + i = (i + (++step)) & mask; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ + return x; \ + } \ + SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ + { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } + +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_PROTOTYPES(name, khkey_t, khval_t) + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + __KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +#define KHASH_TYPE(name, khkey_t, khval_t) \ + __KHASH_TYPE(name, khkey_t, khval_t) + +#define KHASH_IMPL(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + __KHASH_IMPL(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +/* --- BEGIN OF HASH FUNCTIONS --- */ + +/*! @function + @abstract Integer hash function + @param key The integer [khint32_t] + @return The hash value [khint_t] + */ +#define kh_int_hash_func(key) (khint32_t)(key) +/*! @function + @abstract Integer comparison function + */ +#define kh_int_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract 64-bit integer hash function + @param key The integer [khint64_t] + @return The hash value [khint_t] + */ +#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) +/*! @function + @abstract 64-bit integer comparison function + */ +#define kh_int64_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract const char* hash function + @param s Pointer to a null terminated string + @return The hash value + */ +static kh_inline khint_t __ac_X31_hash_string(const char *s) +{ + khint_t h = (khint_t)*s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; + return h; +} +/*! @function + @abstract Another interface to const char* hash function + @param key Pointer to a null terminated string [const char*] + @return The hash value [khint_t] + */ +#define kh_str_hash_func(key) __ac_X31_hash_string(key) +/*! @function + @abstract Const char* comparison function + */ +#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) + +static kh_inline khint_t __ac_Wang_hash(khint_t key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} +#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key) + +/* --- END OF HASH FUNCTIONS --- */ + +/* Other convenient macros... */ + +/*! + @abstract Type of the hash table. + @param name Name of the hash table [symbol] + */ +#define khash_t(name) kh_##name##_t + +/*! @function + @abstract Initiate a hash table. + @param name Name of the hash table [symbol] + @return Pointer to the hash table [khash_t(name)*] + */ +#define kh_init(name) kh_init_##name() + +/*! @function + @abstract Initiate a hash table if the in-place case. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_init_inplace(name, h) kh_init_##name##_inplace(h) + +/*! @function + @abstract Destroy a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy(name, h) kh_destroy_##name(h) + +/*! @function + @abstract Destroy a hash table if the in-place case. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy_inplace(name, h) kh_destroy_##name##_inplace(h) + +/*! @function + @abstract Reset a hash table without deallocating memory. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_clear(name, h) kh_clear_##name(h) + +/*! @function + @abstract Resize a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param s New size [khint_t] + */ +#define kh_resize(name, h, s) kh_resize_##name(h, s) + +/*! @function + @abstract Insert a key to the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @param r Extra return code: -1 if the operation failed; + 0 if the key is present in the hash table; + 1 if the bucket is empty (never used); 2 if the element in + the bucket has been deleted [int*] + @return Iterator to the inserted element [khint_t] + */ +#define kh_put(name, h, k, r) kh_put_##name(h, k, r) + +/*! @function + @abstract Retrieve a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] + */ +#define kh_get(name, h, k) kh_get_##name(h, k) + +/*! @function + @abstract Remove a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Iterator to the element to be deleted [khint_t] + */ +#define kh_del(name, h, k) kh_del_##name(h, k) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->vals[x]) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) ((h)->vals[x]) + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The start iterator [khint_t] + */ +#define kh_begin(h) (khint_t)(0) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The end iterator [khint_t] + */ +#define kh_end(h) ((h)->n_buckets) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->size) + +/*! @function + @abstract Get the number of buckets in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of buckets in the hash table [khint_t] + */ +#define kh_n_buckets(h) ((h)->n_buckets) + +/*! @function + @abstract Iterate over the entries in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param kvar Variable to which key will be assigned + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (kvar) = kh_key(h,__i); \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/*! @function + @abstract Iterate over the keys in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param kvar Variable to which key will be assigned + @param code Block of code to execute + */ +#define kh_foreach_key(h, kvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (kvar) = kh_key(h,__i); \ + code; \ + } } + +/*! @function + @abstract Iterate over the values in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @param vvar Variable to which value will be assigned + @param code Block of code to execute + */ +#define kh_foreach_value(h, vvar, code) { khint_t __i; \ + for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ + if (!kh_exist(h,__i)) continue; \ + (vvar) = kh_val(h,__i); \ + code; \ + } } + +/* More conenient interfaces */ + +/*! @function + @abstract Instantiate a hash set containing integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +typedef const char *kh_cstr_t; +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + +#endif /* __AC_KHASH_H */ diff --git a/src/ucs/datastruct/linear_func.h b/src/ucs/datastruct/linear_func.h new file mode 100644 index 0000000..09d6306 --- /dev/null +++ b/src/ucs/datastruct/linear_func.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_LINEAR_FUNC_H_ +#define UCS_LINEAR_FUNC_H_ + +#include + + +/** + * A 1d linear function, represented as f(x) = c + x * m. + */ +typedef struct { + double c; /* constant factor */ + double m; /* multiplicative factor */ +} ucs_linear_func_t; + + +/** + * Calculate the linear function value for a specific point. + * + * @param [in] func Linear function to apply. + * @param [in] x Point to apply the function at. + * + * @return f(x) + */ +static UCS_F_ALWAYS_INLINE double +ucs_linear_func_apply(const ucs_linear_func_t *func, double x) +{ + return func->c + (func->m * x); +} + + +/** + * Sum two linear functions. + * + * @param [out] result Filled with the resulting linear function. + * @param [in] func1 First function to add. + * @param [in] func2 Second function to add. + */ +static UCS_F_ALWAYS_INLINE void +ucs_linear_func_add(ucs_linear_func_t *result, const ucs_linear_func_t *func1, + const ucs_linear_func_t *func2) +{ + result->m = func1->m + func2->m; + result->c = func1->c + func2->c; +} + + +#endif diff --git a/src/ucs/datastruct/list.h b/src/ucs/datastruct/list.h new file mode 100644 index 0000000..b20848c --- /dev/null +++ b/src/ucs/datastruct/list.h @@ -0,0 +1,193 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_LIST_H_ +#define UCS_LIST_H_ + +#include +#include + + +#define UCS_LIST_INITIALIZER(_prev, _next) \ + { (_prev), (_next) } + + +/** + * Declare an empty list + */ +#define UCS_LIST_HEAD(name) \ + ucs_list_link_t name = UCS_LIST_INITIALIZER(&(name), &(name)) + + +/** + * Initialize list head. + * + * @param head List head struct to initialize. + */ +static inline void ucs_list_head_init(ucs_list_link_t *head) +{ + head->prev = head->next = head; +} + +/** + * Insert an element in-between to list elements. Any elements which were in this + * section will be discarded. + * + * @param prev Element to insert after + * @param next Element to insert before. + */ +static inline void ucs_list_insert_replace(ucs_list_link_t *prev, + ucs_list_link_t *next, + ucs_list_link_t *elem) +{ + elem->prev = prev; + elem->next = next; + prev->next = elem; + next->prev = elem; +} + +/** + * Insert an item to a list after another item. + * + * @param pos Item after which to insert. + * @param new_link Item to insert. + */ +static inline void ucs_list_insert_after(ucs_list_link_t *pos, + ucs_list_link_t *new_link) +{ + ucs_list_insert_replace(pos, pos->next, new_link); +} + +/** + * Insert an item to a list before another item. + * + * @param pos Item before which to insert. + * @param new_link Item to insert. + */ +static inline void ucs_list_insert_before(ucs_list_link_t *pos, + ucs_list_link_t *new_link) +{ + ucs_list_insert_replace(pos->prev, pos, new_link); +} + +/** + * Remove an item from its list. + * + * @param link Item to remove. + */ +static inline void ucs_list_del(ucs_list_link_t *link) +{ + link->prev->next = link->next; + link->next->prev = link->prev; +} + +/** + * @return Whether the list is empty. + */ +static inline int ucs_list_is_empty(ucs_list_link_t *head) +{ + return head->next == head; +} + +/** + * Move the items from 'newlist' to the tail of the list pointed by 'head' + * + * @param head List to whose tail to add the items. + * @param newlist List of items to add. + * + * @note The contents of 'newlist' is left unmodified. + */ +static inline void ucs_list_splice_tail(ucs_list_link_t *head, + ucs_list_link_t *newlist) +{ + ucs_list_link_t *first, *last, *tail; + + if (ucs_list_is_empty(newlist)) { + return; + } + + first = newlist->next; /* First element in the new list */ + last = newlist->prev; /* Last element in the new list */ + tail = head->prev; /* Last element in the original list */ + + first->prev = tail; + tail->next = first; + + last->next = head; + head->prev = last; +} + +/** + * Count the members of the list + */ +static inline unsigned long ucs_list_length(ucs_list_link_t *head) +{ + unsigned long length; + ucs_list_link_t *ptr; + + for (ptr = head->next, length = 0; ptr != head; ptr = ptr->next, ++length); + return length; +} + +/* + * Convenience macros + */ +#define ucs_list_add_head(_head, _item) \ + ucs_list_insert_after(_head, _item) +#define ucs_list_add_tail(_head, _item) \ + ucs_list_insert_before(_head, _item) + +/** + * Get the previous element in the list + */ +#define ucs_list_prev(_elem, _type, _member) \ + (ucs_container_of((_elem)->prev, _type, _member)) + +/** + * Get the next element in the list + */ +#define ucs_list_next(_elem, _type, _member) \ + (ucs_container_of((_elem)->next, _type, _member)) + +/** + * Get the first element in the list + */ +#define ucs_list_head ucs_list_next + +/** + * Get the last element in the list + */ +#define ucs_list_tail ucs_list_prev + +/** + * Iterate over members of the list. + */ +#define ucs_list_for_each(_elem, _head, _member) \ + for (_elem = ucs_container_of((_head)->next, typeof(*_elem), _member); \ + &(_elem)->_member != (_head); \ + _elem = ucs_container_of((_elem)->_member.next, typeof(*_elem), _member)) + +/** + * Iterate over members of the list, the user may invalidate the current entry. + */ +#define ucs_list_for_each_safe(_elem, _telem, _head, _member) \ + for (_elem = ucs_container_of((_head)->next, typeof(*_elem), _member), \ + _telem = ucs_container_of(_elem->_member.next, typeof(*_elem), _member); \ + &_elem->_member != (_head); \ + _elem = _telem, \ + _telem = ucs_container_of(_telem->_member.next, typeof(*_telem), _member)) + +/** + * Extract list head + */ +#define ucs_list_extract_head(_head, _type, _member) \ + ({ \ + ucs_list_link_t *tmp = (_head)->next; \ + ucs_list_del(tmp); \ + ucs_container_of(tmp, _type, _member); \ + }) + +#endif diff --git a/src/ucs/datastruct/list_types.h b/src/ucs/datastruct/list_types.h new file mode 100644 index 0000000..631576a --- /dev/null +++ b/src/ucs/datastruct/list_types.h @@ -0,0 +1,20 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_LIST_TYPES_H_ +#define UCS_LIST_TYPES_H_ + + +/** + * A link in a circular list. + */ +typedef struct ucs_list_link { + struct ucs_list_link *prev; + struct ucs_list_link *next; +} ucs_list_link_t; + + +#endif /* UCS_LIST_TYPES_H_ */ diff --git a/src/ucs/datastruct/mpmc.c b/src/ucs/datastruct/mpmc.c new file mode 100644 index 0000000..a64085a --- /dev/null +++ b/src/ucs/datastruct/mpmc.c @@ -0,0 +1,93 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "mpmc.h" + +#include +#include +#include +#include + + +ucs_status_t ucs_mpmc_queue_init(ucs_mpmc_queue_t *mpmc, uint32_t length) +{ + uint32_t i; + + mpmc->length = ucs_roundup_pow2(length); + mpmc->shift = ucs_ilog2(mpmc->length); + if (mpmc->length >= UCS_BIT(UCS_MPMC_VALID_SHIFT)) { + return UCS_ERR_INVALID_PARAM; + } + + mpmc->consumer = 0; + mpmc->producer = 0; + mpmc->queue = ucs_malloc(sizeof(*mpmc->queue) * mpmc->length, "mpmc"); + if (mpmc->queue == NULL) { + return UCS_ERR_NO_MEMORY; + } + + for (i = 0; i < mpmc->length; ++i) { + mpmc->queue[i] = UCS_BIT(UCS_MPMC_VALID_SHIFT); + } + + return UCS_OK; +} + +void ucs_mpmc_queue_cleanup(ucs_mpmc_queue_t *mpmc) +{ + ucs_free(mpmc->queue); +} + +static inline uint32_t __ucs_mpmc_queue_valid_bit(ucs_mpmc_queue_t *mpmc, uint32_t location) +{ + return (location >> mpmc->shift) & 1; +} + +ucs_status_t ucs_mpmc_queue_push(ucs_mpmc_queue_t *mpmc, uint32_t value) +{ + uint32_t location; + + ucs_assert((value >> UCS_MPMC_VALID_SHIFT) == 0); + + do { + location = mpmc->producer; + if (UCS_CIRCULAR_COMPARE32(location, >=, mpmc->consumer + mpmc->length)) { + /* Queue is full */ + return UCS_ERR_EXCEEDS_LIMIT; + } + } while (ucs_atomic_cswap32(&mpmc->producer, location, location + 1) != location); + + mpmc->queue[location & (mpmc->length - 1)] = value | + (__ucs_mpmc_queue_valid_bit(mpmc, location) << UCS_MPMC_VALID_SHIFT); + return UCS_OK; +} + + +ucs_status_t ucs_mpmc_queue_pull(ucs_mpmc_queue_t *mpmc, uint32_t *value_p) +{ + uint32_t location, value; + + location = mpmc->consumer; + if (location == mpmc->producer) { + /* Producer not started yet */ + return UCS_ERR_NO_PROGRESS; + } + + value = mpmc->queue[location & (mpmc->length - 1)]; + if ((value >> UCS_MPMC_VALID_SHIFT) != __ucs_mpmc_queue_valid_bit(mpmc, location)) { + /* Producer not finished yet */ + return UCS_ERR_NO_PROGRESS; + } + + if (ucs_atomic_cswap32(&mpmc->consumer, location, location + 1) != location) { + /* Someone else consumed */ + return UCS_ERR_NO_PROGRESS; + } + + *value_p = value & UCS_MASK(UCS_MPMC_VALID_SHIFT); + return UCS_OK; +} diff --git a/src/ucs/datastruct/mpmc.h b/src/ucs/datastruct/mpmc.h new file mode 100644 index 0000000..6c9df6c --- /dev/null +++ b/src/ucs/datastruct/mpmc.h @@ -0,0 +1,73 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_MPMC_H +#define UCS_MPMC_H + +#include +#include + +#define UCS_MPMC_VALID_SHIFT 31 +#define UCS_MPMC_VALUE_MAX UCS_BIT(UCS_MPMC_VALID_SHIFT) + +/** + * A Multi-producer-multi-consumer thread-safe queue. + * Every push/pull is a single atomic operation in "good" scenario. + * The queue can contain small integers up to UCS_MPMC_VALUE_MAX. + * + * TODO make the queue resizeable. + */ +typedef struct ucs_mpmc_queue { + uint32_t length; /* Array size. Rounded to power of 2. */ + int shift; + volatile uint32_t producer; /* Producer index */ + volatile uint32_t consumer; /* Consumer index */ + uint32_t *queue; /* Array of data */ +} ucs_mpmc_queue_t; + + +/** + * Initialize MPMC queue. + * + * @param length Queue length. + */ +ucs_status_t ucs_mpmc_queue_init(ucs_mpmc_queue_t *mpmc, uint32_t length); + + +/** + * Destroy MPMC queue. + */ +void ucs_mpmc_queue_cleanup(ucs_mpmc_queue_t *mpmc); + + +/** + * Atomically push a value to the queue. + * + * @param value Value to push. + * @return UCS_ERR_EXCEEDS_LIMIT if the queue is full. + */ +ucs_status_t ucs_mpmc_queue_push(ucs_mpmc_queue_t *mpmc, uint32_t value); + + +/** + * Atomically pull a value from the queue. + * + * @param value_p Filled with the value, if successful. + * @param UCS_ERR_NO_PROGRESS if there is currently no available item to retrieve, + * or another thread removed the current item. + */ +ucs_status_t ucs_mpmc_queue_pull(ucs_mpmc_queue_t *mpmc, uint32_t *value_p); + + +/** + * @retrurn nonzero if queue is empty, 0 if queue *may* be non-empty. + */ +static inline int ucs_mpmc_queue_is_empty(ucs_mpmc_queue_t *mpmc) +{ + return mpmc->producer == mpmc->consumer; +} + +#endif diff --git a/src/ucs/datastruct/mpool.c b/src/ucs/datastruct/mpool.c new file mode 100644 index 0000000..1b142f8 --- /dev/null +++ b/src/ucs/datastruct/mpool.c @@ -0,0 +1,341 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "mpool.h" +#include "mpool.inl" +#include "queue.h" + +#include +#include +#include +#include + + +static inline unsigned ucs_mpool_elem_total_size(ucs_mpool_data_t *data) +{ + return ucs_align_up_pow2(data->elem_size, data->alignment); +} + +static inline ucs_mpool_elem_t *ucs_mpool_chunk_elem(ucs_mpool_data_t *data, + ucs_mpool_chunk_t *chunk, + unsigned elem_index) +{ + return UCS_PTR_BYTE_OFFSET(chunk->elems, + elem_index * ucs_mpool_elem_total_size(data)); +} + +static void ucs_mpool_chunk_leak_check(ucs_mpool_t *mp, ucs_mpool_chunk_t *chunk) +{ + ucs_mpool_elem_t *elem; + unsigned i; + + for (i = 0; i < chunk->num_elems; ++i) { + elem = ucs_mpool_chunk_elem(mp->data, chunk, i); + if (elem->mpool != NULL) { + ucs_warn("object %p was not returned to mpool %s", elem + 1, + ucs_mpool_name(mp)); + } + } +} + +ucs_status_t ucs_mpool_init(ucs_mpool_t *mp, size_t priv_size, + size_t elem_size, size_t align_offset, size_t alignment, + unsigned elems_per_chunk, unsigned max_elems, + ucs_mpool_ops_t *ops, const char *name) +{ + /* Check input values */ + if ((elem_size == 0) || (align_offset > elem_size) || + (alignment == 0) || !ucs_is_pow2(alignment) || + (elems_per_chunk == 0) || (max_elems < elems_per_chunk) || + !ops || !ops->chunk_alloc || !ops->chunk_release) + { + ucs_error("Invalid memory pool parameter(s)"); + return UCS_ERR_INVALID_PARAM; + } + + mp->data = ucs_malloc(sizeof(*mp->data) + priv_size, "mpool_data"); + if (mp->data == NULL) { + ucs_error("Failed to allocate memory pool slow-path area"); + return UCS_ERR_NO_MEMORY; + } + + mp->freelist = NULL; + mp->data->elem_size = sizeof(ucs_mpool_elem_t) + elem_size; + mp->data->alignment = alignment; + mp->data->align_offset = sizeof(ucs_mpool_elem_t) + align_offset; + mp->data->elems_per_chunk = elems_per_chunk; + mp->data->quota = max_elems; + mp->data->tail = NULL; + mp->data->chunks = NULL; + mp->data->ops = ops; + mp->data->name = ucs_strdup(name, "mpool_data_name"); + + if (mp->data->name == NULL) { + ucs_error("Failed to allocate memory pool data name"); + goto err_strdup; + } + + VALGRIND_CREATE_MEMPOOL(mp, 0, 0); + + ucs_debug("mpool %s: align %u, maxelems %u, elemsize %u", + ucs_mpool_name(mp), mp->data->alignment, max_elems, mp->data->elem_size); + return UCS_OK; + +err_strdup: + ucs_free(mp->data); + mp->data = NULL; + return UCS_ERR_NO_MEMORY; +} + +void ucs_mpool_cleanup(ucs_mpool_t *mp, int leak_check) +{ + ucs_mpool_chunk_t *chunk, *next_chunk; + ucs_mpool_elem_t *elem, *next_elem; + ucs_mpool_data_t *data = mp->data; + void *obj; + + /* Cleanup all elements in the freelist and set their header to NULL to mark + * them as released for the leak check. + */ + next_elem = mp->freelist; + while (next_elem != NULL) { + elem = next_elem; + VALGRIND_MAKE_MEM_DEFINED(elem, sizeof *elem); + next_elem = elem->next; + if (data->ops->obj_cleanup != NULL) { + obj = elem + 1; + VALGRIND_MEMPOOL_ALLOC(mp, obj, mp->data->elem_size - sizeof(ucs_mpool_elem_t)); + VALGRIND_MAKE_MEM_DEFINED(obj, mp->data->elem_size - sizeof(ucs_mpool_elem_t)); + data->ops->obj_cleanup(mp, obj); + VALGRIND_MEMPOOL_FREE(mp, obj); + } + elem->mpool = NULL; + } + + /* Must be done before chunks are released and other threads could allocated + * the same memory address + */ + VALGRIND_DESTROY_MEMPOOL(mp); + + /* + * Go over all elements in the chunks and make sure they were on the freelist. + * Then, release the chunk. + */ + next_chunk = data->chunks; + while (next_chunk != NULL) { + chunk = next_chunk; + next_chunk = chunk->next; + + if (leak_check) { + ucs_mpool_chunk_leak_check(mp, chunk); + } + data->ops->chunk_release(mp, chunk); + } + + ucs_debug("mpool %s destroyed", ucs_mpool_name(mp)); + + ucs_free(data->name); + ucs_free(data); +} + +void *ucs_mpool_priv(ucs_mpool_t *mp) +{ + return mp->data + 1; +} + +const char *ucs_mpool_name(ucs_mpool_t *mp) +{ + return mp->data->name; +} + +int ucs_mpool_is_empty(ucs_mpool_t *mp) +{ + return (mp->freelist == NULL) && (mp->data->quota == 0); +} + +void *ucs_mpool_get(ucs_mpool_t *mp) +{ + return ucs_mpool_get_inline(mp); +} + +void ucs_mpool_put(void *obj) +{ + ucs_mpool_put_inline(obj); +} + +void ucs_mpool_grow(ucs_mpool_t *mp, unsigned num_elems) +{ + ucs_mpool_data_t *data = mp->data; + size_t chunk_size, chunk_padding; + ucs_mpool_chunk_t *chunk; + ucs_mpool_elem_t *elem; + ucs_status_t status; + unsigned i; + void *ptr; + + if (data->quota == 0) { + return; + } + + chunk_size = sizeof(ucs_mpool_chunk_t) + data->alignment + + (num_elems * ucs_mpool_elem_total_size(data)); + status = data->ops->chunk_alloc(mp, &chunk_size, &ptr); + if (status != UCS_OK) { + ucs_error("Failed to allocate memory pool (name=%s) chunk: %s", + ucs_mpool_name(mp), ucs_status_string(status)); + return; + } + + /* Calculate padding, and update element count according to allocated size */ + chunk = ptr; + chunk_padding = ucs_padding((uintptr_t)(chunk + 1) + data->align_offset, + data->alignment); + chunk->elems = UCS_PTR_BYTE_OFFSET(chunk + 1, chunk_padding); + chunk->num_elems = ucs_min(data->quota, (chunk_size - chunk_padding - sizeof(*chunk)) / + ucs_mpool_elem_total_size(data)); + + ucs_debug("mpool %s: allocated chunk %p of %lu bytes with %u elements", + ucs_mpool_name(mp), chunk, chunk_size, chunk->num_elems); + + for (i = 0; i < chunk->num_elems; ++i) { + elem = ucs_mpool_chunk_elem(data, chunk, i); + if (data->ops->obj_init != NULL) { + data->ops->obj_init(mp, elem + 1, chunk); + } + + ucs_mpool_add_to_freelist(mp, elem, 0); + if (data->tail == NULL) { + data->tail = elem; + } + } + + chunk->next = data->chunks; + data->chunks = chunk; + + if (data->quota == UINT_MAX) { + /* Infinite memory pool */ + } else if (data->quota >= chunk->num_elems) { + data->quota -= chunk->num_elems; + } else { + data->quota = 0; + } + + VALGRIND_MAKE_MEM_NOACCESS(chunk + 1, chunk_size - sizeof(*chunk)); +} + +void *ucs_mpool_get_grow(ucs_mpool_t *mp) +{ + ucs_mpool_data_t *data = mp->data; + + ucs_mpool_grow(mp, data->elems_per_chunk); + if (mp->freelist == NULL) { + return NULL; + } + + return ucs_mpool_get(mp); +} + +ucs_status_t ucs_mpool_chunk_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +{ + *chunk_p = ucs_malloc(*size_p, ucs_mpool_name(mp)); + return (*chunk_p == NULL) ? UCS_ERR_NO_MEMORY : UCS_OK; +} + +void ucs_mpool_chunk_free(ucs_mpool_t *mp, void *chunk) +{ + ucs_free(chunk); +} + + +typedef struct ucs_mmap_mpool_chunk_hdr { + size_t size; +} ucs_mmap_mpool_chunk_hdr_t; + +ucs_status_t ucs_mpool_chunk_mmap(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +{ + ucs_mmap_mpool_chunk_hdr_t *chunk; + size_t real_size; + + real_size = ucs_align_up(*size_p + sizeof(*chunk), ucs_get_page_size()); + chunk = ucs_mmap(NULL, real_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0, ucs_mpool_name(mp)); + if (chunk == MAP_FAILED) { + return UCS_ERR_NO_MEMORY; + } + + chunk->size = real_size; + *size_p = real_size - sizeof(*chunk); + *chunk_p = chunk + 1; + return UCS_OK; +} + +void ucs_mpool_chunk_munmap(ucs_mpool_t *mp, void *chunk) +{ + ucs_mmap_mpool_chunk_hdr_t *hdr = chunk; + hdr -= 1; + ucs_munmap(hdr, hdr->size); +} + + +typedef struct ucs_hugetlb_mpool_chunk_hdr { + int hugetlb; +} ucs_hugetlb_mpool_chunk_hdr_t; + +ucs_status_t ucs_mpool_hugetlb_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +{ + ucs_hugetlb_mpool_chunk_hdr_t *chunk; + size_t real_size; +#ifdef SHM_HUGETLB + void *ptr; + ucs_status_t status; + int shmid; +#endif + +#ifdef SHM_HUGETLB + ptr = NULL; + + /* First, try hugetlb */ + real_size = *size_p; + status = ucs_sysv_alloc(&real_size, real_size * 2, (void**)&ptr, SHM_HUGETLB, + ucs_mpool_name(mp), &shmid); + if (status == UCS_OK) { + chunk = ptr; + chunk->hugetlb = 1; + goto out_ok; + } +#endif + + /* Fallback to glibc */ + real_size = *size_p; + chunk = ucs_malloc(real_size, ucs_mpool_name(mp)); + if (chunk != NULL) { + chunk->hugetlb = 0; + goto out_ok; + } + + return UCS_ERR_NO_MEMORY; + +out_ok: + *size_p = real_size - sizeof(*chunk); + *chunk_p = chunk + 1; + return UCS_OK; +} + +void ucs_mpool_hugetlb_free(ucs_mpool_t *mp, void *chunk) +{ + ucs_hugetlb_mpool_chunk_hdr_t *hdr; + + hdr = (ucs_hugetlb_mpool_chunk_hdr_t*)chunk - 1; + if (hdr->hugetlb) { + ucs_sysv_free(hdr); + } else { + ucs_free(hdr); + } +} diff --git a/src/ucs/datastruct/mpool.h b/src/ucs/datastruct/mpool.h new file mode 100644 index 0000000..38ef29b --- /dev/null +++ b/src/ucs/datastruct/mpool.h @@ -0,0 +1,251 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_MPOOL_H_ +#define UCS_MPOOL_H_ + +#include +#include +#include + +BEGIN_C_DECLS + +/** @file mpool.h */ + +typedef struct ucs_mpool_chunk ucs_mpool_chunk_t; +typedef union ucs_mpool_elem ucs_mpool_elem_t; +typedef struct ucs_mpool ucs_mpool_t; +typedef struct ucs_mpool_data ucs_mpool_data_t; +typedef struct ucs_mpool_ops ucs_mpool_ops_t; + + +/** + * Manages memory allocations of same-size objects. + * + * A chunk of elements looks like this: + * +-----------+-------+----------+-------+----------+------+---------+ + * | | elem0 | padding0 | elem1 | padding1 | .... | elemN-1 | + * +-----------+-------+----------+-------+----------+------+---------+ + * + * An element looks like this: + * +------------+--------+------+ + * | mpool_elem | header | data | + * +------------+--------+------+ + * | + * This location is aligned. + */ + + +/** + * Memory pool element header. + */ +union ucs_mpool_elem { + ucs_mpool_elem_t *next; /* Next free elem - when elem is in the pool */ + ucs_mpool_t *mpool; /* Used when elem is allocated */ +}; + + +/** + * Memory pool chunk, which contains many elements. + */ +struct ucs_mpool_chunk { + ucs_mpool_chunk_t *next; /* Next chunk */ + void *elems; /* Array of elements */ + unsigned num_elems; /* How many elements */ +}; + + +/** + * Memory pool structure. + */ +struct ucs_mpool { + ucs_mpool_elem_t *freelist; /* List of available elements */ + ucs_mpool_data_t *data; /* Slow-path data */ +}; + + +/** + * Memory pool slow-path data. + */ +struct ucs_mpool_data { + unsigned elem_size; /* Size of element in the chunk */ + unsigned alignment; /* Element alignment */ + unsigned align_offset; /* Offset to alignment point */ + unsigned elems_per_chunk; /* Number of elements per chunk */ + unsigned quota; /* How many more elements can be allocated */ + ucs_mpool_elem_t *tail; /* Free list tail */ + ucs_mpool_chunk_t *chunks; /* List of allocated chunks */ + ucs_mpool_ops_t *ops; /* Memory pool operations */ + char *name; /* Name - used for debugging */ +}; + + +/** + * Defines callbacks for memory pool operations. + */ +struct ucs_mpool_ops { + /** + * Allocate a chunk of memory to be used by the mpool. + * + * @param mp Memory pool structure. + * @param size_p Points to minimal size to allocate. The function may + * modify it to the actual allocated size. which must be + * larger or equal. + * @param chunk_p Filled with a pointer to the allocated chunk. + * + * @return Error status. + */ + ucs_status_t (*chunk_alloc)(ucs_mpool_t *mp, size_t *size_p, void **chunk_p); + + /** + * Release previously allocated chunk of memory. + * + * @param mp Memory pool structure. + * @param chunk Chunk to release. + */ + void (*chunk_release)(ucs_mpool_t *mp, void *chunk); + + /** + * Initialize an object in the memory pool on the first time it's allocated. + * May be NULL. + * + * @param mp Memory pool structure. + * @param obj Object to initialize. + * @param chunk The chunk on which the object was allocated, as returned + * from chunk_alloc(). + */ + void (*obj_init)(ucs_mpool_t *mp, void *obj, void *chunk); + + /** + * Cleanup an object in the memory pool just before its memory is released. + * May be NULL. + * + * @param mp Memory pool structure. + * @param obj Object to initialize. + */ + void (*obj_cleanup)(ucs_mpool_t *mp, void *obj); +}; + + +/** + * Initialize a memory pool. + * + * @param mp Memory pool structure. + * @param priv_size Size of user-defined private data area. + * @param elem_size Size of an element. + * @param align_offset Offset in the element which should be aligned to the given boundary. + * @param alignment Boundary to which align the given offset within the element. + * @param elems_per_chunk Number of elements in a single chunk. + * @param max_elems Maximal number of elements which can be allocated by the pool. + * -1 or UINT_MAX means no limit. + * @param ops Memory pool operations. + * @param name Memory pool name. + * + * @return UCS status code. + */ +ucs_status_t ucs_mpool_init(ucs_mpool_t *mp, size_t priv_size, + size_t elem_size, size_t align_offset, size_t alignment, + unsigned elems_per_chunk, unsigned max_elems, + ucs_mpool_ops_t *ops, const char *name); + + +/** + * Cleanup a memory pool and release all its memory. + * + * @param mp Memory pool structure. + * @param leak_check Whether to check for leaks (object which were not + * returned to the pool). + */ +void ucs_mpool_cleanup(ucs_mpool_t *mp, int leak_check); + + +/** + * @param mp Memory pool structure. + * + * @return Memory pool name. + */ +const char *ucs_mpool_name(ucs_mpool_t *mp); + + +/** + * @param mp Memory pool structure. + * + * @return User-defined context, as passed to mpool_init(). + */ +void *ucs_mpool_priv(ucs_mpool_t *mp); + + +/** + * Check if a memory pool is empty (cannot allocate more objects). + * + * @param mp Memory pool structure. + * + * @return Whether a memory pool is empty. + */ +int ucs_mpool_is_empty(ucs_mpool_t *mp); + + +/** + * Get an element from the memory pool. + * + * @param mp Memory pool structure. + * + * @return New allocated object, or NULL if cannot allocate. + */ +void *ucs_mpool_get(ucs_mpool_t *mp); + + +/** + * Return an object to the memory pool. + * + * @param obj Object to return. + */ +void ucs_mpool_put(void *obj); + + +/** + * Grow the memory pool by a specified amount of elements. + * + * @param mp Memory pool structure. + * @param num_elems By how many elements to grow. + */ +void ucs_mpool_grow(ucs_mpool_t *mp, unsigned num_elems); + + +/** + * Allocate and object and grow the memory pool if necessary. + * Used internally by ucs_mpool_get(). + * + * @param mp Memory pool structure. + * + * @return New allocated object, or NULL if cannot allocate. + */ +void *ucs_mpool_get_grow(ucs_mpool_t *mp); + + +/** + * heap-based chunk allocator. + */ +ucs_status_t ucs_mpool_chunk_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p); +void ucs_mpool_chunk_free(ucs_mpool_t *mp, void *chunk); + + +/* + * mmap chunk allocator. + */ +ucs_status_t ucs_mpool_chunk_mmap(ucs_mpool_t *mp, size_t *size_p, void **chunk_p); +void ucs_mpool_chunk_munmap(ucs_mpool_t *mp, void *chunk); + + +/** + * hugetlb chunk allocator. + */ +ucs_status_t ucs_mpool_hugetlb_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p); +void ucs_mpool_hugetlb_free(ucs_mpool_t *mp, void *chunk); + +END_C_DECLS + +#endif /* MPOOL_H_ */ diff --git a/src/ucs/datastruct/mpool.inl b/src/ucs/datastruct/mpool.inl new file mode 100644 index 0000000..d06e2f9 --- /dev/null +++ b/src/ucs/datastruct/mpool.inl @@ -0,0 +1,85 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_MPOOL_INL_ +#define UCS_MPOOL_INL_ + +#include "mpool.h" + +#include +#include +#include + + +static inline void *ucs_mpool_get_inline(ucs_mpool_t *mp) +{ + ucs_mpool_elem_t *elem; + void *obj; + + if (ucs_unlikely(mp->freelist == NULL)) { + return ucs_mpool_get_grow(mp); + } + + /* Disconnect an element from the pool */ + elem = mp->freelist; + VALGRIND_MAKE_MEM_DEFINED(elem, sizeof *elem); + mp->freelist = elem->next; + elem->mpool = mp; + VALGRIND_MAKE_MEM_NOACCESS(elem, sizeof *elem); + + obj = elem + 1; + VALGRIND_MEMPOOL_ALLOC(mp, obj, mp->data->elem_size - sizeof(ucs_mpool_elem_t)); + return obj; +} + +static inline void ucs_mpool_add_to_freelist(ucs_mpool_t *mp, ucs_mpool_elem_t *elem, + int add_to_tail) +{ + ucs_mpool_elem_t *tail; + + if (add_to_tail) { + elem->next = NULL; + if (mp->freelist == NULL) { + mp->freelist = elem; + } else { + tail = mp->data->tail; + VALGRIND_MAKE_MEM_DEFINED(tail, sizeof *tail); + tail->next = elem; + VALGRIND_MAKE_MEM_NOACCESS(tail, sizeof *tail); + } + mp->data->tail = elem; + } else { + elem->next = mp->freelist; + mp->freelist = elem; + } +} + +static inline ucs_mpool_elem_t *ucs_mpool_obj_to_elem(void *obj) +{ + ucs_mpool_elem_t *elem = (ucs_mpool_elem_t*)obj - 1; + VALGRIND_MAKE_MEM_DEFINED(elem, sizeof *elem); + return elem; +} + +static inline ucs_mpool_t *ucs_mpool_obj_owner(void *obj) +{ + return ucs_mpool_obj_to_elem(obj)->mpool; +} + +static inline void ucs_mpool_put_inline(void *obj) +{ + ucs_mpool_elem_t *elem; + ucs_mpool_t *mp; + + elem = ucs_mpool_obj_to_elem(obj); + mp = elem->mpool; + ucs_mpool_add_to_freelist(mp, elem, + ENABLE_DEBUG_DATA && ucs_global_opts.mpool_fifo); + VALGRIND_MAKE_MEM_NOACCESS(elem, sizeof *elem); + VALGRIND_MEMPOOL_FREE(mp, obj); +} + +#endif diff --git a/src/ucs/datastruct/pgtable.c b/src/ucs/datastruct/pgtable.c new file mode 100644 index 0000000..cb9dd8b --- /dev/null +++ b/src/ucs/datastruct/pgtable.c @@ -0,0 +1,678 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "pgtable.h" + +#include +#include +#include +#include +#include +#include + + +#define ucs_pgt_entry_clear(_pte) \ + { (_pte)->value = 0; } + +#define ucs_pgt_entry_value(_pte) \ + ((void*)((_pte)->value & UCS_PGT_ENTRY_PTR_MASK)) + +#define ucs_pgt_entry_test(_pte, _flag) \ + ((_pte)->value & (_flag)) + +#define ucs_pgt_entry_present(_pte) \ + ucs_pgt_entry_test(_pte, UCS_PGT_ENTRY_FLAG_REGION | UCS_PGT_ENTRY_FLAG_DIR) + +#define ucs_pgt_is_addr_aligned(_addr) \ + (!((_addr) & (UCS_PGT_ADDR_ALIGN - 1))) + +#define ucs_pgt_check_ptr(_ptr) \ + do { \ + ucs_assertv(!((uintptr_t)(_ptr) & (UCS_PGT_ENTRY_MIN_ALIGN - 1)), \ + "ptr=%p", (_ptr)); \ + } while (0) + +#define ucs_pgt_entry_set_region(_pte, _region) \ + do { \ + ucs_pgt_region_t *tmp = (_region); \ + ucs_pgt_check_ptr(tmp); \ + (_pte)->value = ((uintptr_t)tmp) | UCS_PGT_ENTRY_FLAG_REGION; \ + } while (0) + +#define ucs_pgt_entry_set_dir(_pte, _dir) \ + do { \ + ucs_pgt_dir_t *tmp = (_dir); \ + ucs_pgt_check_ptr(tmp); \ + (_pte)->value = ((uintptr_t)tmp) | UCS_PGT_ENTRY_FLAG_DIR; \ + } while (0) + +#define ucs_pgt_entry_get_region(_pte) \ + ({ \ + ucs_assert(ucs_pgt_entry_test(_pte, UCS_PGT_ENTRY_FLAG_REGION)); \ + (ucs_pgt_region_t*)ucs_pgt_entry_value(_pte); \ + }) + +#define ucs_pgt_entry_get_dir(_pte) \ + ({ \ + ucs_assert(ucs_pgt_entry_test(_pte, UCS_PGT_ENTRY_FLAG_DIR)); \ + (ucs_pgt_dir_t*)ucs_pgt_entry_value(_pte); \ + }) + + +static inline ucs_pgt_dir_t* ucs_pgt_dir_alloc(ucs_pgtable_t *pgtable) +{ + ucs_pgt_dir_t *pgd; + + pgd = pgtable->pgd_alloc_cb(pgtable); + if (pgd == NULL) { + ucs_fatal("Failed to allocate page table directory"); + } + + ucs_pgt_check_ptr(pgd); + memset(pgd, 0, sizeof(*pgd)); + return pgd; +} + +static inline void ucs_pgt_dir_release(ucs_pgtable_t *pgtable, ucs_pgt_dir_t* pgd) +{ + pgtable->pgd_release_cb(pgtable, pgd); +} + +static inline void ucs_pgt_address_advance(ucs_pgt_addr_t *address_p, + unsigned order) +{ + ucs_assert(order < 64); + /* coverity[large_shift] */ + *address_p += 1ul << order; +} + +static void ucs_pgt_entry_dump_recurs(const ucs_pgtable_t *pgtable, unsigned indent, + const ucs_pgt_entry_t *pte, unsigned pte_index, + ucs_pgt_addr_t base, ucs_pgt_addr_t mask, + unsigned shift, ucs_log_level_t log_level) +{ + ucs_pgt_region_t *region; + ucs_pgt_dir_t *pgd; + size_t i; + + if (ucs_pgt_entry_test(pte, UCS_PGT_ENTRY_FLAG_REGION)) { + region = ucs_pgt_entry_value(pte); + ucs_log(log_level, "%*s[%3u] region " UCS_PGT_REGION_FMT, indent, "", + pte_index, UCS_PGT_REGION_ARG(region)); + } else if (ucs_pgt_entry_test(pte, UCS_PGT_ENTRY_FLAG_DIR)) { + pgd = ucs_pgt_entry_get_dir(pte); + ucs_log(log_level, "%*s[%3u] dir %p for [0x%lx..0x%lx], count %u shift %u mask 0x%lx", + indent, " ", pte_index, pgd, base, (base + (1 << shift)) & mask, + pgd->count, shift, mask); + shift -= UCS_PGT_ENTRY_SHIFT; + mask |= UCS_PGT_ENTRY_MASK << shift; + for (i = 0; i < UCS_PGT_ENTRIES_PER_DIR; ++i) { + ucs_pgt_entry_dump_recurs(pgtable, indent + 2, &pgd->entries[i], i, + base | (i << shift), mask, shift, log_level); + ++base; + } + } else { + ucs_log(log_level, "%*s[%3u] not present", indent, " ", pte_index); + } +} + +static void ucs_pgtable_log(const ucs_pgtable_t *pgtable, + ucs_log_level_t log_level, const char *message) +{ + ucs_log(log_level, "pgtable %p %s: base 0x%lx/0x%lx shift %u count %u", + pgtable, message, pgtable->base, pgtable->mask, pgtable->shift, + pgtable->num_regions); +} + +void ucs_pgtable_dump(const ucs_pgtable_t *pgtable, ucs_log_level_t log_level) +{ + ucs_pgtable_log(pgtable, log_level, "dump"); + ucs_pgt_entry_dump_recurs(pgtable, 0, &pgtable->root, 0, pgtable->base, + pgtable->mask, pgtable->shift, log_level); +} + +static void ucs_pgtable_trace(ucs_pgtable_t *pgtable, const char *message) +{ + ucs_pgtable_log(pgtable, UCS_LOG_LEVEL_TRACE_DATA, message); +} + +static void ucs_pgtable_reset(ucs_pgtable_t *pgtable) +{ + pgtable->base = 0; + pgtable->mask = ((ucs_pgt_addr_t)-1) << UCS_PGT_ADDR_SHIFT; + pgtable->shift = UCS_PGT_ADDR_SHIFT; +} + +/** + * Make the page table map a wider range of addresses - expands by UCS_PGT_ENTRY_SHIFT. + */ +static void ucs_pgtable_expand(ucs_pgtable_t *pgtable) +{ + ucs_pgt_dir_t *pgd; + + ucs_assertv(pgtable->shift <= (UCS_PGT_ADDR_ORDER - UCS_PGT_ENTRY_SHIFT), + "shift=%u", pgtable->shift); + + if (ucs_pgt_entry_present(&pgtable->root)) { + pgd = ucs_pgt_dir_alloc(pgtable); + pgd->entries[(pgtable->base >> pgtable->shift) & UCS_PGT_ENTRY_MASK] = + pgtable->root; + pgd->count = 1; + ucs_pgt_entry_set_dir(&pgtable->root, pgd); + } + + pgtable->shift += UCS_PGT_ENTRY_SHIFT; + pgtable->mask <<= UCS_PGT_ENTRY_SHIFT; + pgtable->base &= pgtable->mask; + ucs_pgtable_trace(pgtable, "expand"); +} + +/** + * Shrink the page table address span if possible + * + * @return Whether it was shrinked. + */ +static int ucs_pgtable_shrink(ucs_pgtable_t *pgtable) +{ + ucs_pgt_entry_t *pte; + ucs_pgt_dir_t *pgd; + unsigned pte_idx; + + if (!ucs_pgt_entry_present(&pgtable->root)) { + ucs_pgtable_reset(pgtable); + ucs_pgtable_trace(pgtable, "shrink"); + return 0; + } else if (!ucs_pgt_entry_test(&pgtable->root, UCS_PGT_ENTRY_FLAG_DIR)) { + return 0; + } + + pgd = ucs_pgt_entry_get_dir(&pgtable->root); + ucs_assert(pgd->count > 0); /* should be empty */ + + /* If there is just one PTE, we can reduce the page table to map + * this PTE only. + */ + if (pgd->count != 1) { + return 0; + } + + /* Search for the single PTE in dir */ + for (pte_idx = 0, pte = pgd->entries; !ucs_pgt_entry_present(pte); ++pte_idx, ++pte) { + ucs_assert(pte_idx < UCS_PGT_ENTRIES_PER_DIR); + } + + /* Remove one level */ + pgtable->shift -= UCS_PGT_ENTRY_SHIFT; + pgtable->base |= (ucs_pgt_addr_t)pte_idx << pgtable->shift; + pgtable->mask |= UCS_PGT_ENTRY_MASK << pgtable->shift; + pgtable->root = *pte; + ucs_pgtable_trace(pgtable, "shrink"); + ucs_pgt_dir_release(pgtable, pgd); + return 1; +} + +static void ucs_pgtable_check_page(ucs_pgt_addr_t address, unsigned order) +{ + ucs_assert( (address & ((1ul << order) - 1)) == 0 ); + ucs_assertv( ((order - UCS_PGT_ADDR_SHIFT) % UCS_PGT_ENTRY_SHIFT) == 0, "order=%u", order); +} + +/** + * @return Order of the next whole page starting in "start" and ending before "end" + * If both are 0, return the full word size. + */ +static unsigned ucs_pgtable_get_next_page_order(ucs_pgt_addr_t start, ucs_pgt_addr_t end) +{ + unsigned log2_len; + + ucs_assertv(ucs_pgt_is_addr_aligned(start), "start=0x%lx", start); + ucs_assertv(ucs_pgt_is_addr_aligned(end), "end=0x%lx", end); + + if ((end == 0) && (start == 0)) { + log2_len = UCS_PGT_ADDR_ORDER; /* entire range */ + } else if (end == start) { + log2_len = UCS_PGT_ADDR_SHIFT; + } else { + log2_len = ucs_ilog2(end - start); + if (start) { + log2_len = ucs_min(ucs_ffs64(start), log2_len); + } + } + + ucs_assertv((log2_len >= UCS_PGT_ADDR_SHIFT) && + (log2_len <= UCS_PGT_ADDR_ORDER), + "log2_len=%u start=0x%lx end=0x%lx", + log2_len, start, end); + + /* Order should be: [ADDR_SHIFT + k * ENTRY_SHIFT] */ + return (((log2_len - UCS_PGT_ADDR_SHIFT) / UCS_PGT_ENTRY_SHIFT) * UCS_PGT_ENTRY_SHIFT) + + UCS_PGT_ADDR_SHIFT; +} + +/** + * Insert a variable-size page to the page table. + * + * @param address address to insert + * @param order page size to insert - should be k*PTE_SHIFT for a certain k + * @param region region to insert + */ +static ucs_status_t +ucs_pgtable_insert_page(ucs_pgtable_t *pgtable, ucs_pgt_addr_t address, + unsigned order, ucs_pgt_region_t *region) +{ + ucs_pgt_dir_t dummy_pgd = {}; + ucs_pgt_entry_t *pte; + ucs_pgt_dir_t *pgd; + unsigned shift; + + ucs_pgtable_check_page(address, order); + + ucs_trace_data("insert page 0x%lx order %u, for region " UCS_PGT_REGION_FMT, + address, order, UCS_PGT_REGION_ARG(region)); + + /* Make root map addresses which include our interval */ + while (pgtable->shift < order) { + ucs_pgtable_expand(pgtable); + } + + if (ucs_pgt_entry_present(&pgtable->root)) { + while ((address & pgtable->mask) != pgtable->base) { + ucs_pgtable_expand(pgtable); + } + } else { + pgtable->base = address & pgtable->mask; + ucs_pgtable_trace(pgtable, "initialize"); + } + + /* Insert the page in the PTE */ + pgd = &dummy_pgd; + shift = pgtable->shift; + pte = &pgtable->root; + while (1) { + if (order == shift) { + if (ucs_pgt_entry_present(pte)) { + goto err; + } + ucs_pgt_entry_set_region(pte, region); + ++pgd->count; + break; + } else { + if (ucs_pgt_entry_test(pte, UCS_PGT_ENTRY_FLAG_REGION)) { + goto err; + } + + ucs_assertv(shift >= UCS_PGT_ENTRY_SHIFT + order, + "shift=%u order=%u", shift, order); /* sub PTE should be able to hold it */ + + if (!ucs_pgt_entry_present(pte)) { + ++pgd->count; + ucs_pgt_entry_set_dir(pte, ucs_pgt_dir_alloc(pgtable)); + } + + pgd = ucs_pgt_entry_get_dir(pte); + shift -= UCS_PGT_ENTRY_SHIFT; + pte = &pgd->entries[(address >> shift) & UCS_PGT_ENTRY_MASK]; + } + } + + return UCS_OK; + +err: + while (ucs_pgtable_shrink(pgtable)); + return UCS_ERR_ALREADY_EXISTS; +} + +/* + * `region' is only used to compare pointers + */ +static ucs_status_t +ucs_pgtable_remove_page_recurs(ucs_pgtable_t *pgtable, ucs_pgt_addr_t address, + unsigned order, ucs_pgt_dir_t *pgd, + ucs_pgt_entry_t *pte, unsigned shift, + ucs_pgt_region_t *region) +{ + ucs_pgt_dir_t *next_dir; + ucs_pgt_entry_t *next_pte; + ucs_status_t status; + unsigned next_shift; + + if (ucs_pgt_entry_test(pte, UCS_PGT_ENTRY_FLAG_REGION)) { + ucs_assertv(shift == order, "shift=%u order=%u", shift, order); + if (ucs_pgt_entry_get_region(pte) != region) { + goto no_elem; + } + + --pgd->count; + ucs_pgt_entry_clear(pte); + return UCS_OK; + } else if (ucs_pgt_entry_test(pte, UCS_PGT_ENTRY_FLAG_DIR)) { + next_dir = ucs_pgt_entry_get_dir(pte); + next_shift = shift - UCS_PGT_ENTRY_SHIFT; + next_pte = &next_dir->entries[(address >> next_shift) & UCS_PGT_ENTRY_MASK]; + + status = ucs_pgtable_remove_page_recurs(pgtable, address, order, next_dir, + next_pte, next_shift, region); + if (status != UCS_OK) { + goto no_elem; + } + + if (next_dir->count == 0) { + ucs_pgt_entry_clear(pte); + --pgd->count; + ucs_pgt_dir_release(pgtable, next_dir); + } + return UCS_OK; + } + +no_elem: + return UCS_ERR_NO_ELEM; +} + +static ucs_status_t +ucs_pgtable_remove_page(ucs_pgtable_t *pgtable, ucs_pgt_addr_t address, + unsigned order, ucs_pgt_region_t *region) +{ + ucs_pgt_dir_t dummy_pgd = {}; + ucs_status_t status; + + ucs_pgtable_check_page(address, order); + + if ((address & pgtable->mask) != pgtable->base) { + return UCS_ERR_NO_ELEM; + } + + status = ucs_pgtable_remove_page_recurs(pgtable, address, order, &dummy_pgd, + &pgtable->root, pgtable->shift, + region); + if (status != UCS_OK) { + return status; + } + + while (ucs_pgtable_shrink(pgtable)); + return UCS_OK; +} + +ucs_status_t ucs_pgtable_insert(ucs_pgtable_t *pgtable, ucs_pgt_region_t *region) +{ + ucs_pgt_addr_t address = region->start; + ucs_pgt_addr_t end = region->end; + ucs_status_t status; + unsigned order; + + ucs_trace_data("add region " UCS_PGT_REGION_FMT, UCS_PGT_REGION_ARG(region)); + + if ((address >= end) || !ucs_pgt_is_addr_aligned(address) || + !ucs_pgt_is_addr_aligned(end)) + { + return UCS_ERR_INVALID_PARAM; + } + + ucs_assert(address != end); + while (address < end) { + order = ucs_pgtable_get_next_page_order(address, end); + status = ucs_pgtable_insert_page(pgtable, address, order, region); + if (status != UCS_OK) { + goto err; + } + + ucs_pgt_address_advance(&address, order); + } + ++pgtable->num_regions; + + ucs_pgtable_trace(pgtable, "insert"); + return UCS_OK; + +err: + /* Revert all pages we've inserted by now */ + end = address; + address = region->start; + while (address < end) { + order = ucs_pgtable_get_next_page_order(address, end); + ucs_pgtable_remove_page(pgtable, address, order, region); + ucs_pgt_address_advance(&address, order); + } + return status; +} + +ucs_status_t ucs_pgtable_remove(ucs_pgtable_t *pgtable, ucs_pgt_region_t *region) +{ + ucs_pgt_addr_t address = region->start; + ucs_pgt_addr_t end = region->end; + ucs_status_t status; + unsigned order; + + ucs_trace_data("remove region " UCS_PGT_REGION_FMT, UCS_PGT_REGION_ARG(region)); + + if ((address >= end) || !ucs_pgt_is_addr_aligned(address) || + !ucs_pgt_is_addr_aligned(end)) + { + return UCS_ERR_NO_ELEM; + } + + while (address < end) { + order = ucs_pgtable_get_next_page_order(address, end); + status = ucs_pgtable_remove_page(pgtable, address, order, region); + if (status != UCS_OK) { + ucs_assert(address == region->start); /* Cannot be partially removed */ + return status; + } + + ucs_pgt_address_advance(&address, order); + } + + ucs_assert(pgtable->num_regions > 0); + --pgtable->num_regions; + + ucs_pgtable_trace(pgtable, "remove"); + return UCS_OK; +} + +ucs_pgt_region_t *ucs_pgtable_lookup(const ucs_pgtable_t *pgtable, + ucs_pgt_addr_t address) +{ + const ucs_pgt_entry_t *pte; + ucs_pgt_region_t *region; + ucs_pgt_dir_t *dir; + unsigned shift; + + ucs_trace_func("pgtable=%p address=0x%lx", pgtable, address); + + /* Check if the address is mapped by the page table */ + if ((address & pgtable->mask) != pgtable->base) { + return NULL; + } + + /* Descend into the page table */ + pte = &pgtable->root; + shift = pgtable->shift; + for (;;) { + if (ucs_pgt_entry_test(pte, UCS_PGT_ENTRY_FLAG_REGION)) { + region = ucs_pgt_entry_get_region(pte); + ucs_assert((address >= region->start) && (address < region->end)); + return region; + } else if (ucs_pgt_entry_test(pte, UCS_PGT_ENTRY_FLAG_DIR)) { + dir = ucs_pgt_entry_get_dir(pte); + shift -= UCS_PGT_ENTRY_SHIFT; + pte = &dir->entries[(address >> shift) & UCS_PGT_ENTRY_MASK]; + } else { + return NULL; + } + } +} + +static void ucs_pgtable_search_recurs(const ucs_pgtable_t *pgtable, + ucs_pgt_addr_t address, unsigned order, + const ucs_pgt_entry_t *pte, unsigned shift, + ucs_pgt_search_callback_t cb, void *arg, + ucs_pgt_region_t **last_p) +{ + ucs_pgt_entry_t *next_pte; + ucs_pgt_region_t *region; + ucs_pgt_dir_t *dir; + unsigned next_shift; + unsigned i; + + if (ucs_pgt_entry_test(pte, UCS_PGT_ENTRY_FLAG_REGION)) { + region = ucs_pgt_entry_value(pte); + + /* Check that we are not continuing with the previous region */ + if (*last_p == region) { + return; + } else if (*last_p != NULL) { + ucs_assert(region->start >= (*last_p)->end); + } + *last_p = region; + + /* Assert that the region actually overlaps the address */ + ucs_assertv(ucs_max(region->start, address) <= + ucs_min(region->end - 1, address + UCS_MASK_SAFE(order)), + UCS_PGT_REGION_FMT " address=0x%lx order=%d mask 0x%lx", + UCS_PGT_REGION_ARG(region), address, order, + (ucs_pgt_addr_t)UCS_MASK_SAFE(order)); + + /* Call the callback */ + cb(pgtable, region, arg); + + } else if (ucs_pgt_entry_test(pte, UCS_PGT_ENTRY_FLAG_DIR)) { + dir = ucs_pgt_entry_get_dir(pte); + ucs_assert(shift >= UCS_PGT_ENTRY_SHIFT); + next_shift = shift - UCS_PGT_ENTRY_SHIFT; + + if (order < shift) { + /* One of the sub-ptes maps the region */ + ucs_assert(order <= next_shift); + next_pte = &dir->entries[(address >> next_shift) & UCS_PGT_ENTRY_MASK]; + ucs_pgtable_search_recurs(pgtable, address, order, next_pte, + next_shift, cb, arg, last_p); + } else { + /* All sub-ptes contained in the region */ + for (i = 0; i < UCS_PGT_ENTRIES_PER_DIR; ++i) { + next_pte = &dir->entries[i]; + ucs_pgtable_search_recurs(pgtable, address, order, next_pte, + next_shift, cb, arg, last_p); + } + } + } +} + +void ucs_pgtable_search_range(const ucs_pgtable_t *pgtable, + ucs_pgt_addr_t from, ucs_pgt_addr_t to, + ucs_pgt_search_callback_t cb, void *arg) +{ + ucs_pgt_addr_t address = ucs_align_down_pow2(from, UCS_PGT_ADDR_ALIGN); + ucs_pgt_addr_t end = ucs_align_up_pow2(to, UCS_PGT_ADDR_ALIGN); + ucs_pgt_region_t *last; + unsigned order; + + /* if the page table is covering only part of the address space, intersect + * the range with page table address span */ + if (pgtable->shift < (sizeof(uint64_t) * 8)) { + address = ucs_max(address, pgtable->base); + end = ucs_min(end, pgtable->base + UCS_BIT(pgtable->shift)); + } else { + ucs_assert(pgtable->base == 0); + } + + last = NULL; + while (address <= to) { + order = ucs_pgtable_get_next_page_order(address, end); + if ((address & pgtable->mask) == pgtable->base) { + ucs_pgtable_search_recurs(pgtable, address, order, &pgtable->root, + pgtable->shift, cb, arg, &last); + } + + if (order == UCS_PGT_ADDR_ORDER) { + break; + } + + ucs_pgt_address_advance(&address, order); + } +} + +static void ucs_pgtable_purge_callback(const ucs_pgtable_t *pgtable, + ucs_pgt_region_t *region, + void *arg) +{ + ucs_pgt_region_t ***region_pp = arg; + **region_pp = region; + ++(*region_pp); +} + +void ucs_pgtable_purge(ucs_pgtable_t *pgtable, ucs_pgt_search_callback_t cb, + void *arg) +{ + unsigned num_regions = pgtable->num_regions; + ucs_pgt_region_t **all_regions, **next_region, *region; + ucs_pgt_addr_t from, to; + ucs_status_t status; + unsigned i; + + all_regions = ucs_calloc(num_regions, sizeof(*all_regions), + "pgt_purge_regions"); + if (all_regions == NULL) { + ucs_warn("failed to allocate array to collect all regions, will leak"); + return; + } + + next_region = all_regions; + from = pgtable->base; + to = pgtable->base + ((1ul << pgtable->shift) & pgtable->mask) - 1; + ucs_pgtable_search_range(pgtable, from, to, ucs_pgtable_purge_callback, + &next_region); + ucs_assertv(next_region == all_regions + num_regions, + "next_region=%p all_regions=%p num_regions=%u", + next_region, all_regions, num_regions); + + for (i = 0; i < num_regions; ++i) { + region = all_regions[i]; + status = ucs_pgtable_remove(pgtable, region); + if (status != UCS_OK) { + ucs_warn("failed to remove pgtable region" UCS_PGT_REGION_FMT, + UCS_PGT_REGION_ARG(region)); + } + cb(pgtable, region, arg); + } + + ucs_free(all_regions); + + /* Page table should be totally empty */ + ucs_assert(!ucs_pgt_entry_present(&pgtable->root)); + ucs_assertv(pgtable->shift == UCS_PGT_ADDR_SHIFT, "shift=%u", pgtable->shift); + ucs_assertv(pgtable->base == 0, "value=0x%lx", pgtable->base); + ucs_assertv(pgtable->num_regions == 0, "num_regions=%u", pgtable->num_regions); +} + +ucs_status_t ucs_pgtable_init(ucs_pgtable_t *pgtable, + ucs_pgt_dir_alloc_callback_t alloc_cb, + ucs_pgt_dir_release_callback_t release_cb) +{ + UCS_STATIC_ASSERT(ucs_is_pow2(UCS_PGT_ENTRY_MIN_ALIGN)); + + /* ADDR_MAX+1 must be power of 2, or wrap around to 0. */ + UCS_STATIC_ASSERT(ucs_is_pow2_or_zero(UCS_PGT_ADDR_MAX + 1)); + + /* We must cover all bits of the address up to ADDR_MAX */ + UCS_STATIC_ASSERT(((ucs_ilog2(UCS_PGT_ADDR_MAX) + 1 - UCS_PGT_ADDR_SHIFT) % + UCS_PGT_ENTRY_SHIFT) == 0); + + ucs_pgt_entry_clear(&pgtable->root); + ucs_pgtable_reset(pgtable); + pgtable->num_regions = 0; + pgtable->pgd_alloc_cb = alloc_cb; + pgtable->pgd_release_cb = release_cb; + return UCS_OK; +} + +void ucs_pgtable_cleanup(ucs_pgtable_t *pgtable) +{ + if (pgtable->num_regions != 0) { + ucs_warn("page table not empty during cleanup"); + } +} diff --git a/src/ucs/datastruct/pgtable.h b/src/ucs/datastruct/pgtable.h new file mode 100644 index 0000000..e474256 --- /dev/null +++ b/src/ucs/datastruct/pgtable.h @@ -0,0 +1,258 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_PGTABLE_H_ +#define UCS_PGTABLE_H_ + +#include +#include +#include + +/* + * The Page Table data structure organizes non-overlapping regions of memory in + * an efficient radix tree, optimized for large and/or aligned regions. + * + * A page table entry can point to either a region (indicated by setting the + * UCS_PGT_PTE_FLAG_REGION bit), or another entry (indicated by UCS_PGT_PTE_FLAG_DIR), + * or be empty - if none of these bits is set. + * + */ + + + +/* Address alignment requirements */ +#define UCS_PGT_ADDR_SHIFT 4 +#define UCS_PGT_ADDR_ALIGN (1ul << UCS_PGT_ADDR_SHIFT) +#define UCS_PGT_ADDR_ORDER (sizeof(ucs_pgt_addr_t) * 8) +#define UCS_PGT_ADDR_MAX ((ucs_pgt_addr_t)-1) + +/* Page table entry/directory constants */ +#define UCS_PGT_ENTRY_SHIFT 4 +#define UCS_PGT_ENTRIES_PER_DIR (1ul << (UCS_PGT_ENTRY_SHIFT)) +#define UCS_PGT_ENTRY_MASK (UCS_PGT_ENTRIES_PER_DIR - 1) + +/* Page table pointers constants and flags */ +#define UCS_PGT_ENTRY_FLAG_REGION UCS_BIT(0) +#define UCS_PGT_ENTRY_FLAG_DIR UCS_BIT(1) +#define UCS_PGT_ENTRY_FLAGS_MASK (UCS_PGT_ENTRY_FLAG_REGION|UCS_PGT_ENTRY_FLAG_DIR) +#define UCS_PGT_ENTRY_PTR_MASK (~UCS_PGT_ENTRY_FLAGS_MASK) +#define UCS_PGT_ENTRY_MIN_ALIGN (UCS_PGT_ENTRY_FLAGS_MASK + 1) + +/* Declare a variable as aligned so it could be placed in page table entry */ +#define UCS_PGT_ENTRY_V_ALIGNED UCS_V_ALIGNED(UCS_PGT_ENTRY_MIN_ALIGN > sizeof(long) ? \ + UCS_PGT_ENTRY_MIN_ALIGN : sizeof(long)) + + +#define UCS_PGT_REGION_FMT "%p [0x%lx..0x%lx]" +#define UCS_PGT_REGION_ARG(_region) (_region), (_region)->start, (_region)->end + + +/* Define the address type */ +typedef unsigned long ucs_pgt_addr_t; + +/* Forward declarations */ +typedef struct ucs_pgtable ucs_pgtable_t; +typedef struct ucs_pgt_region ucs_pgt_region_t; +typedef struct ucs_pgt_entry ucs_pgt_entry_t; +typedef struct ucs_pgt_dir ucs_pgt_dir_t; + + +/** + * Callback for allocating a page table directory. + * + * @param [in] pgtable Pointer to the page table to allocate the directory for. + * + * @return Pointer to newly allocated pgdir, or NULL if failed. The pointer must + * be aligned to UCS_PGT_ENTRY_ALIGN boundary. + * */ +typedef ucs_pgt_dir_t* (*ucs_pgt_dir_alloc_callback_t)(const ucs_pgtable_t *pgtable); + + +/** + * Callback for releasing a page table directory. + * + * @param [in] pgtable Pointer to the page table to in which the directory was + * allocated. + * @param [in] pgdir Page table directory to release. + */ +typedef void (*ucs_pgt_dir_release_callback_t)(const ucs_pgtable_t *pgtable, + ucs_pgt_dir_t *pgdir); + + +/** + * Callback for searching for regions in the page table. + * + * @param [in] pgtable The page table. + * @param [in] region Found region. + * @param [in] arg User-defined argument. + */ +typedef void (*ucs_pgt_search_callback_t)(const ucs_pgtable_t *pgtable, + ucs_pgt_region_t *region, void *arg); + + +/** + * Memory region in the page table. + * The structure itself, and the pointers in it, must be aligned to 2^PTR_SHIFT. + */ +struct ucs_pgt_region { + ucs_pgt_addr_t start; /**< Region start address */ + ucs_pgt_addr_t end; /**< Region end address */ +} UCS_PGT_ENTRY_V_ALIGNED; + + +/** + * Page table entry: + * + * +--------------------+---+---+ + * | pointer (MSB) | d | r | + * +--------------------+---+---+ + * | | | | + * 64 2 1 0 + * + */ +struct ucs_pgt_entry { + ucs_pgt_addr_t value; /**< Pointer + type bits. Can point + to either a @ref ucs_pgt_dir_t or + a @ref ucs_pgt_region_t. */ +}; + + +/** + * Page table directory. + */ +struct ucs_pgt_dir { + ucs_pgt_entry_t entries[UCS_PGT_ENTRIES_PER_DIR]; + unsigned count; /**< Number of valid entries */ +}; + + +/* Page table structure */ +struct ucs_pgtable { + + /* Maps addresses whose (63-shift) high bits equal to value + * This means: value * (2**shift) .. value * (2**(shift+1)) - 1 + */ + ucs_pgt_entry_t root; /**< root entry */ + ucs_pgt_addr_t base; /**< base address */ + ucs_pgt_addr_t mask; /**< mask for page table address range */ + unsigned shift; /**< page table address span is 2**shift */ + unsigned num_regions; /**< total number of regions */ + ucs_pgt_dir_alloc_callback_t pgd_alloc_cb; + ucs_pgt_dir_release_callback_t pgd_release_cb; +}; + + +/** + * Initialize a page table. + * + * @param [in] pgtable Page table to initialize. + * @param [in] alloc_cb Callback that will be used to allocate page directory, + * which is the basic building block of the page table + * data structure. This may allow the page table functions + * to be safe to use from memory allocation context. + * @param [in] release_cb Callback to release memory which was allocated by alloc_cb. + */ +ucs_status_t ucs_pgtable_init(ucs_pgtable_t *pgtable, + ucs_pgt_dir_alloc_callback_t alloc_cb, + ucs_pgt_dir_release_callback_t release_cb); + +/** + * Cleanup the page table and release all associated memory. + * + * @param [in] pgtable Page table to initialize. + */ +void ucs_pgtable_cleanup(ucs_pgtable_t *pgtable); + + +/** + * Add a memory region to the page table. + * + * @param [in] pgtable Page table to insert the region to. + * @param [in] region Memory region to insert. The region must remain valid + * and unchanged s long as it's in the page table. + * + * @return UCS_OK - region was added. + * UCS_ERR_INVALID_PARAM - memory region address in invalid (misaligned or empty) + * UCS_ERR_ALREADY_EXISTS - the region overlaps with existing region. + * + */ +ucs_status_t ucs_pgtable_insert(ucs_pgtable_t *pgtable, ucs_pgt_region_t *region); + + +/** + * Remove a memory region from the page table. + * + * @param [in] pgtable Page table to remove the region from. + * @param [in] region Memory region to remove. This must be the same pointer + * passed to @ref ucs_pgtable_insert. + * + * @return UCS_OK - region was removed. + * UCS_ERR_INVALID_PARAM - memory region address in invalid (misaligned or empty) + * UCS_ERR_ALREADY_EXISTS - the region overlaps with existing region. + * + */ +ucs_status_t ucs_pgtable_remove(ucs_pgtable_t *pgtable, ucs_pgt_region_t *region); + + +/* + * Find a region which contains the given address. + * + * @param [in] pgtable Page table to search the address in. + * @param [in] address Address to search. + * + * @return Region which contains 'address', or NULL if not found. + */ +ucs_pgt_region_t *ucs_pgtable_lookup(const ucs_pgtable_t *pgtable, + ucs_pgt_addr_t address); + + +/** + * Search for all regions overlapping with a given address range. + * + * @param [in] pgtable Page table to search the range in. + * @param [in] from Lower bound of the range. + * @param [in] to Upper bound of the range (inclusive). + * @param [in] cb Callback to be called for every region found. + * The callback must not modify the page table. + * @param [in] arg User-defined argument to the callback. + */ +void ucs_pgtable_search_range(const ucs_pgtable_t *pgtable, + ucs_pgt_addr_t from, ucs_pgt_addr_t to, + ucs_pgt_search_callback_t cb, void *arg); + + +/** + * Remove all regions from the page table and call the provided callback for each. + * + * @param [in] pgtable Page table to clean up. + * @param [in] cb Callback to be called for every region, after it (and + * all others) are removed. + * The callback must not modify the page table. + * @param [in] arg User-defined argument to the callback. + */ +void ucs_pgtable_purge(ucs_pgtable_t *pgtable, ucs_pgt_search_callback_t cb, + void *arg); + + +/** + * Dump page table to log. + * + * @param [in] pgtable Page table to dump. + * @param [in] log_level Which log level to use. + */ +void ucs_pgtable_dump(const ucs_pgtable_t *pgtable, ucs_log_level_t log_level); + + +/** + * @return >Number of regions currently present in the page table. + */ +static inline unsigned ucs_pgtable_num_regions(const ucs_pgtable_t *pgtable) +{ + return pgtable->num_regions; +} + + +#endif diff --git a/src/ucs/datastruct/ptr_array.c b/src/ucs/datastruct/ptr_array.c new file mode 100644 index 0000000..86bc56d --- /dev/null +++ b/src/ucs/datastruct/ptr_array.c @@ -0,0 +1,208 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ptr_array.h" + +#include +#include +#include +#include + + +/* Initial allocation size */ +#define UCS_PTR_ARRAY_INITIAL_SIZE 8 + + +static inline int ucs_ptr_array_is_free(ucs_ptr_array_t *ptr_array, unsigned index) +{ + return (index < ptr_array->size) && + __ucs_ptr_array_is_free(ptr_array->start[index]); +} + +static inline uint32_t ucs_ptr_array_placeholder_get(ucs_ptr_array_elem_t elem) +{ + ucs_assert(__ucs_ptr_array_is_free(elem)); + return elem >> UCS_PTR_ARRAY_PLCHDR_SHIFT; +} + +static inline void ucs_ptr_array_placeholder_set(ucs_ptr_array_elem_t *elem, + uint32_t placeholder) +{ + *elem = (*elem & ~UCS_PTR_ARRAY_PLCHDR_MASK) | + (((ucs_ptr_array_elem_t)placeholder) << UCS_PTR_ARRAY_PLCHDR_SHIFT); +} + +static inline unsigned +ucs_ptr_array_freelist_get_next(ucs_ptr_array_elem_t elem) +{ + ucs_assert(__ucs_ptr_array_is_free(elem)); + return (elem & UCS_PTR_ARRAY_NEXT_MASK) >> UCS_PTR_ARRAY_NEXT_SHIFT; +} + +static inline void +ucs_ptr_array_freelist_set_next(ucs_ptr_array_elem_t *elem, unsigned next) +{ + ucs_assert(next <= UCS_PTR_ARRAY_NEXT_MASK); + *elem = (*elem & ~UCS_PTR_ARRAY_NEXT_MASK) | + (((ucs_ptr_array_elem_t)next) << UCS_PTR_ARRAY_NEXT_SHIFT); +} + +static void UCS_F_MAYBE_UNUSED ucs_ptr_array_dump(ucs_ptr_array_t *ptr_array) +{ +#if UCS_ENABLE_ASSERT + unsigned i; + + ucs_trace_data("ptr_array start %p size %u", ptr_array->start, ptr_array->size); + for (i = 0; i < ptr_array->size; ++i) { + if (ucs_ptr_array_is_free(ptr_array, i)) { + ucs_trace_data("[%u]= (%u)", i, + ucs_ptr_array_placeholder_get(ptr_array->start[i])); + } else { + ucs_trace_data("[%u]=%p", i, (void*)ptr_array->start[i]); + } + } + + ucs_trace_data("freelist:"); + i = ptr_array->freelist; + while (i != UCS_PTR_ARRAY_SENTINEL) { + ucs_trace_data("[%u] %p", i, &ptr_array->start[i]); + i = ucs_ptr_array_freelist_get_next(ptr_array->start[i]); + } +#endif +} + +static void ucs_ptr_array_clear(ucs_ptr_array_t *ptr_array) +{ + ptr_array->start = NULL; + ptr_array->size = 0; + ptr_array->freelist = UCS_PTR_ARRAY_SENTINEL; +} + +void ucs_ptr_array_init(ucs_ptr_array_t *ptr_array, uint32_t init_placeholder, + const char *name) +{ + ptr_array->init_placeholder = init_placeholder; + ucs_ptr_array_clear(ptr_array); +#if ENABLE_MEMTRACK + ucs_snprintf_zero(ptr_array->name, sizeof(ptr_array->name), "%s", name); +#endif +} + +void ucs_ptr_array_cleanup(ucs_ptr_array_t *ptr_array) +{ + unsigned i, inuse; + + inuse = 0; + for (i = 0; i < ptr_array->size; ++i) { + if (!ucs_ptr_array_is_free(ptr_array, i)) { + ++inuse; + ucs_trace("ptr_array(%p) idx %d is not free during cleanup", ptr_array, i); + } + } + + if (inuse > 0) { + ucs_warn("releasing ptr_array with %u used items", inuse); + } + + ucs_free(ptr_array->start); + ucs_ptr_array_clear(ptr_array); +} + +static void ucs_ptr_array_grow(ucs_ptr_array_t *ptr_array UCS_MEMTRACK_ARG) +{ + ucs_ptr_array_elem_t *new_array; + unsigned curr_size, new_size; + unsigned i, next; + + curr_size = ptr_array->size; + if (curr_size == 0) { + new_size = UCS_PTR_ARRAY_INITIAL_SIZE; + } else { + new_size = curr_size * 2; + } + + /* Allocate new array */ + new_array = ucs_malloc(new_size * sizeof(ucs_ptr_array_elem_t) UCS_MEMTRACK_VAL); + ucs_assert_always(new_array != NULL); + memcpy(new_array, ptr_array->start, curr_size * sizeof(ucs_ptr_array_elem_t)); + + /* Link all new array items */ + for (i = curr_size; i < new_size; ++i) { + new_array[i] = UCS_PTR_ARRAY_FLAG_FREE; + ucs_ptr_array_placeholder_set(&new_array[i], ptr_array->init_placeholder); + ucs_ptr_array_freelist_set_next(&new_array[i], i + 1); + } + ucs_ptr_array_freelist_set_next(&new_array[new_size - 1], UCS_PTR_ARRAY_SENTINEL); + + /* Find last free list element */ + if (ptr_array->freelist == UCS_PTR_ARRAY_SENTINEL) { + ptr_array->freelist = curr_size; + } else { + next = ptr_array->freelist; + do { + i = next; + next = ucs_ptr_array_freelist_get_next(ptr_array->start[i]); + } while (next != UCS_PTR_ARRAY_SENTINEL); + ucs_ptr_array_freelist_set_next(&ptr_array->start[i], curr_size); + } + + /* Switch to new array */ + ucs_free(ptr_array->start); + ptr_array->start = new_array; + ptr_array->size = new_size; +} + +unsigned ucs_ptr_array_insert(ucs_ptr_array_t *ptr_array, void *value, + uint32_t *placeholder_p) +{ + ucs_ptr_array_elem_t *elem; + unsigned index; + + ucs_assert_always(((uintptr_t)value & UCS_PTR_ARRAY_FLAG_FREE) == 0); + + if (ptr_array->freelist == UCS_PTR_ARRAY_SENTINEL) { + ucs_ptr_array_grow(ptr_array UCS_MEMTRACK_NAME(ptr_array->name)); + } + + /* Get the first item on the free list */ + index = ptr_array->freelist; + ucs_assert(index != UCS_PTR_ARRAY_SENTINEL); + elem = &ptr_array->start[index]; + + /* Remove from free list */ + ptr_array->freelist = ucs_ptr_array_freelist_get_next(*elem); + + /* Populate */ + *placeholder_p = ucs_ptr_array_placeholder_get(*elem); + *elem = (uintptr_t)value; + return index; +} + +void ucs_ptr_array_remove(ucs_ptr_array_t *ptr_array, unsigned index, + uint32_t placeholder) +{ + ucs_ptr_array_elem_t *elem = &ptr_array->start[index]; + + ucs_assert_always(!ucs_ptr_array_is_free(ptr_array, index)); + *elem = UCS_PTR_ARRAY_FLAG_FREE; + ucs_ptr_array_placeholder_set(elem, placeholder); + ucs_ptr_array_freelist_set_next(elem, ptr_array->freelist); + ptr_array->freelist = index; +} + +void *ucs_ptr_array_replace(ucs_ptr_array_t *ptr_array, unsigned index, void *new_val) +{ + void *old_elem; + + ucs_assert_always(!ucs_ptr_array_is_free(ptr_array, index)); + old_elem = (void *)ptr_array->start[index]; + ptr_array->start[index] = (uintptr_t)new_val; + return old_elem; +} diff --git a/src/ucs/datastruct/ptr_array.h b/src/ucs/datastruct/ptr_array.h new file mode 100644 index 0000000..6d7d383 --- /dev/null +++ b/src/ucs/datastruct/ptr_array.h @@ -0,0 +1,132 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef PTR_ARRAY_H_ +#define PTR_ARRAY_H_ + +#include +#include + + +/* + * Array element layout: + * + * 64 32 1 0 + * +-----------------+----------------+---+ + * free: | placeholder | next index | 1 | + * +-----------------+----------------+---+ + * used: | user pointer | 0 | + * +-----------------+----------------+---+ + * + * + */ +typedef uint64_t ucs_ptr_array_elem_t; + + +/** + * A sparse array of pointers. + * Free slots can hold 32-bit placeholder value. + */ +typedef struct ucs_ptr_array { + uint32_t init_placeholder; + ucs_ptr_array_elem_t *start; + unsigned freelist; + unsigned size; +#if ENABLE_MEMTRACK + char name[64]; +#endif +} ucs_ptr_array_t; + + +/* Flags added to lower bits of the value */ +#define UCS_PTR_ARRAY_FLAG_FREE ((unsigned long)0x01) /* Slot is free */ + +#define UCS_PTR_ARRAY_PLCHDR_SHIFT 32 +#define UCS_PTR_ARRAY_PLCHDR_MASK (((ucs_ptr_array_elem_t)-1) & ~UCS_MASK(UCS_PTR_ARRAY_PLCHDR_SHIFT)) +#define UCS_PTR_ARRAY_NEXT_SHIFT 1 +#define UCS_PTR_ARRAY_NEXT_MASK (UCS_MASK(UCS_PTR_ARRAY_PLCHDR_SHIFT) & ~UCS_MASK(UCS_PTR_ARRAY_NEXT_SHIFT)) +#define UCS_PTR_ARRAY_SENTINEL (UCS_PTR_ARRAY_NEXT_MASK >> UCS_PTR_ARRAY_NEXT_SHIFT) + +#define __ucs_ptr_array_is_free(_elem) \ + ((uintptr_t)(_elem) & UCS_PTR_ARRAY_FLAG_FREE) + + +/** + * Initialize the array. + * + * @param init_placeholder Default placeholder value. + */ +void ucs_ptr_array_init(ucs_ptr_array_t *ptr_array, uint32_t init_placeholder, + const char *name); + + +/** + * Cleanup the array. + * All values should already be removed from it. + */ +void ucs_ptr_array_cleanup(ucs_ptr_array_t *ptr_array); + + +/** + * Insert a pointer to the array. + * + * @param value Pointer to insert. Must be 8-byte aligned. + * @param placeholder Filled with placeholder value. + * @return The index to which the value was inserted. + * + * Complexity: amortized O(1) + * + * Note: The array will grow if needed. + */ +unsigned ucs_ptr_array_insert(ucs_ptr_array_t *ptr_array, void *value, + uint32_t *placeholder_p); + + +/** + * Remove a pointer from the array. + * + * @param index Index to remove from. + * @param placeholder Value to put in the free slot. + * + * Complexity: O(1) + */ +void ucs_ptr_array_remove(ucs_ptr_array_t *ptr_array, unsigned index, + uint32_t placeholder); + + +/** + * Replace pointer in the array + * @param index index of slot + * @param new_val value to put into slot given by index + * @return old value of the slot + */ +void *ucs_ptr_array_replace(ucs_ptr_array_t *ptr_array, unsigned index, void *new_val); + + +/** + * Retrieve a value from the array. + * + * @param index Index to retrieve the value from. + * @param value Filled with the value. + * @return Whether the value is present and valid. + * + * Complexity: O(1) + */ +#define ucs_ptr_array_lookup(_ptr_array, _index, _var) \ + (((_index) >= (_ptr_array)->size) ? \ + (UCS_V_INITIALIZED(_var), 0) : \ + !__ucs_ptr_array_is_free(_var = (void*)((_ptr_array)->start[_index]))) + + +/** + * Iterate over all valid elements in the array. + */ +#define ucs_ptr_array_for_each(_var, _index, _ptr_array) \ + for (_index = 0; _index < (_ptr_array)->size; ++_index) \ + if (!__ucs_ptr_array_is_free(_var = (void*)((_ptr_array)->start[_index]))) \ + + +#endif /* PTR_ARRAY_H_ */ diff --git a/src/ucs/datastruct/queue.h b/src/ucs/datastruct/queue.h new file mode 100644 index 0000000..b4f24cc --- /dev/null +++ b/src/ucs/datastruct/queue.h @@ -0,0 +1,278 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_QUEUE_H_ +#define UCS_QUEUE_H_ + +#include "queue_types.h" + +#include +#include + + +/** + * Initialize a queue. + * + * @param queue Queue to initialize. + */ +static inline void ucs_queue_head_init(ucs_queue_head_t *queue) +{ +#ifdef __clang_analyzer__ + queue->head = (ucs_queue_elem_t*)(void*)queue; +#endif + queue->ptail = &queue->head; +} + +/** + * @return Queue length. + */ +static inline size_t ucs_queue_length(ucs_queue_head_t *queue) +{ + ucs_queue_elem_t **pelem; + size_t length; + + length = 0; + for (pelem = &queue->head; pelem != queue->ptail; pelem = &(*pelem)->next) { + ++length; + } + return length; +} + +/** + * @return Whether the queue is empty. + */ +static inline int ucs_queue_is_empty(ucs_queue_head_t *queue) +{ + return queue->ptail == &queue->head; +} + +/** + * Enqueue an element to the tail of the queue. + * + * @param queue Queue to add to. + * @param elem Element to add. + */ +static inline void ucs_queue_push(ucs_queue_head_t *queue, ucs_queue_elem_t *elem) +{ + *queue->ptail = elem; + queue->ptail = &elem->next; +#if UCS_ENABLE_ASSERT + elem->next = NULL; /* For sanity check below */ +#endif +} + +/** + * Add an element to the head of the queue. + * + * @param queue Queue to add to. + * @param elem Element to add. + */ +static inline void ucs_queue_push_head(ucs_queue_head_t *queue, + ucs_queue_elem_t *elem) +{ + elem->next = queue->head; + queue->head = elem; + if (queue->ptail == &queue->head) { + queue->ptail = &elem->next; + } +} + +/** + * Dequeue an element from the head of the queue, assuming the queue is not empty. + * + * @param queue Non-empty queue to pull from. + * @return Element from the head of the queue. + */ +static inline ucs_queue_elem_t *ucs_queue_pull_non_empty(ucs_queue_head_t *queue) +{ + ucs_queue_elem_t *elem; + + elem = queue->head; + queue->head = elem->next; + if (queue->ptail == &elem->next) { + queue->ptail = &queue->head; + } + return elem; +} + +/** + * Delete an element. + * The element must be valid when deleting it. + * After the call, iter points to the next element, and the element may be released. + */ +static inline void ucs_queue_del_iter(ucs_queue_head_t *queue, ucs_queue_iter_t iter) +{ + ucs_assert((iter != NULL) && (*iter != NULL)); + + if (queue->ptail == &(*iter)->next) { + queue->ptail = iter; /* deleting the last element */ + *iter = NULL; /* make *ptail point to NULL */ + } else { + *iter = (*iter)->next; + } + + /* Sanity check */ + ucs_assertv((queue->head != NULL) || (queue->ptail == &queue->head), + "head=%p ptail=%p &head=%p iter=%p", queue->head, queue->ptail, + &queue->head, iter); + + /* If the queue is empty, head must point to null */ + ucs_assertv((queue->ptail != &queue->head) || (queue->head == NULL), + "head=%p ptail=%p &head=%p iter=%p", queue->head, queue->ptail, + &queue->head, iter); +} + +/** + * Dequeue an element from the head of the queue. + * + * @param queue Queue to pull from. + * @return Element from the head of the queue, or NULL if the queue is empty. + */ +static inline ucs_queue_elem_t *ucs_queue_pull(ucs_queue_head_t *queue) +{ + if (ucs_queue_is_empty(queue)) + return NULL; + return ucs_queue_pull_non_empty(queue); +} + +/** + * Insert all elements from one queue to another queue, leaving the first queue + * empty. + * + * @param queue Queue to push elements to. + * @param new_elems Queue of elements to add. + */ +static inline void ucs_queue_splice(ucs_queue_head_t *queue, + ucs_queue_head_t *new_elems) +{ + if (!ucs_queue_is_empty(new_elems)) { + *queue->ptail = new_elems->head; + queue->ptail = new_elems->ptail; + new_elems->ptail = &new_elems->head; + } +} + +/** + * Convenience macro to pull from a non-empty queue and return the containing element. + * + * @param queue Non-empty queue to pull from. + * @param type Container element type. + * @param member Queue element member inside the container. + * + * @return Pulled element. + */ +#define ucs_queue_pull_elem_non_empty(queue, type, member) \ + ucs_container_of(ucs_queue_pull_non_empty(queue), type, member) + +/** + * Convenience macro to get the head element of a non-empty queue. + * + * @param queue Non-empty queue whose head element to get. + * @param type Container element type. + * @param member Queue element member inside the container. + * + * @return Head element. + */ +#define ucs_queue_head_elem_non_empty(queue, type, member) \ + ucs_container_of((queue)->head, type, member) + +/** + * Convenience macro to get the tail element of a non-empty queue. + * + * @param queue Non-empty queue whose head element to get. + * @param type Container element type. + * @param member Queue element member inside the container. + * + * @return Head element. + */ +#define ucs_queue_tail_elem_non_empty(queue, type, member) \ + ucs_container_of((queue)->ptail, type, member) + +/** + * Iterate over queue elements. The queue must not be modified during the iteration. + * + * @param elem Variable which will hold point to the element in the queue. + * @param queue Queue to iterate on. + * @param member Member inside 'elem' which is the queue link. + */ +#define ucs_queue_for_each(elem, queue, member) \ + /* we set `ptail` field to queue address to not substract NULL pointer */ \ + for (*(queue)->ptail = (ucs_queue_elem_t*)(void*)(queue), \ + elem = ucs_container_of((queue)->head, typeof(*elem), member); \ + (elem) != ucs_container_of((ucs_queue_elem_t*)(void*)(queue), \ + typeof(*elem), member); \ + elem = ucs_container_of(elem->member.next, typeof(*elem), member)) + +/** + * Iterate over queue elements. The current element may be safely removed from + * the queue using ucs_queue_del_iter(). + * + * @param elem Variable which will hold point to the element in the queue. + * @param iter Iterator variable. May be passed to ucs_queue_del_iter(). + * @param queue Queue to iterate on. + * @param member Member inside 'elem' which is the queue link. + */ +#define ucs_queue_for_each_safe(elem, iter, queue, member) \ + for (iter = &(queue)->head, \ + elem = ucs_container_of(*iter, typeof(*elem), member); \ + iter != (queue)->ptail; \ + iter = (*iter == &elem->member) ? &(*iter)->next : iter, \ + elem = ucs_container_of(*iter, typeof(*elem), member)) + +/** + * Iterate and extract elements from the queue while a condition is true. + * + * @param elem Variable which will hold point to the element in the queue. + * @param queue Queue to iterate on. + * @param member Member inside 'elem' which is the queue link. + * @param cond Condition to continue iterating. + * + * TODO optimize + */ +#define ucs_queue_for_each_extract(elem, queue, member, cond) \ + for (elem = ucs_container_of((queue)->head, typeof(*elem), member); \ + \ + !ucs_queue_is_empty(queue) && (cond) && ucs_queue_pull_non_empty(queue); \ + \ + elem = ucs_container_of((queue)->head, typeof(*elem), member)) + + +/* + * Queue iteration + */ + +static inline ucs_queue_iter_t ucs_queue_iter_begin(ucs_queue_head_t *q) +{ + return &q->head; +} + +static inline ucs_queue_iter_t ucs_queue_iter_next(ucs_queue_iter_t i) +{ + return &(*i)->next; +} + +static inline int ucs_queue_iter_end(ucs_queue_head_t *q, ucs_queue_iter_t i) +{ + return i == q->ptail; +} + +static inline void ucs_queue_remove(ucs_queue_head_t *queue, ucs_queue_elem_t *elem) +{ + ucs_queue_iter_t iter = ucs_queue_iter_begin(queue); + + while (!ucs_queue_iter_end(queue, iter)) { + if (*iter == elem) { + ucs_queue_del_iter(queue, iter); + return; + } + iter = ucs_queue_iter_next(iter); + } +} + +#define ucs_queue_iter_elem(elem, iter, member) \ + ucs_container_of(*iter, typeof(*elem), member) + +#endif diff --git a/src/ucs/datastruct/queue_types.h b/src/ucs/datastruct/queue_types.h new file mode 100644 index 0000000..410871d --- /dev/null +++ b/src/ucs/datastruct/queue_types.h @@ -0,0 +1,33 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_QUEUE_TYPES_H_ +#define UCS_QUEUE_TYPES_H_ + + +typedef struct ucs_queue_elem ucs_queue_elem_t; +typedef struct ucs_queue_head ucs_queue_head_t; +typedef ucs_queue_elem_t** ucs_queue_iter_t; + + +/** + * Queue element type. + */ +struct ucs_queue_elem { + ucs_queue_elem_t *next; +}; + + +/** + * Queue type. + */ +struct ucs_queue_head { + ucs_queue_elem_t *head; + ucs_queue_elem_t **ptail; +}; + + +#endif diff --git a/src/ucs/datastruct/sglib.h b/src/ucs/datastruct/sglib.h new file mode 100644 index 0000000..1a4780f --- /dev/null +++ b/src/ucs/datastruct/sglib.h @@ -0,0 +1,1952 @@ +/* + + This is SGLIB version 1.0.3 + + (C) by Marian Vittek, Bratislava, http://www.xref-tech.com/sglib, 2003-5 + + License Conditions: You can use a verbatim copy (including this + copyright notice) of sglib freely in any project, commercial or not. + You can also use derivative forms freely under terms of Open Source + Software license or under terms of GNU Public License. If you need + to use a derivative form in a commercial project, or you need sglib + under any other license conditions, contact the author. + + + +*/ + + +#ifndef _SGLIB__h_ +#define _SGLIB__h_ + +/* the assert is used exclusively to write unexpected error messages */ +#include + + +/* ---------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------------- */ +/* - LEVEL - 0 INTERFACE - */ +/* ---------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------------- */ + + +/* ---------------------------------------------------------------------------- */ +/* ------------------------------ STATIC ARRAYS ------------------------------- */ +/* ---------------------------------------------------------------------------- */ + +/* + + Basic algorithms for sorting arrays. Multiple depending arrays can + be rearranged using user defined 'elem_exchangers' + +*/ + +/* HEAP - SORT (level 0) */ + +#define SGLIB_ARRAY_SINGLE_HEAP_SORT(type, a, max, comparator) {\ + SGLIB_ARRAY_HEAP_SORT(type, a, max, comparator, SGLIB_ARRAY_ELEMENTS_EXCHANGER);\ +} + +#define SGLIB_ARRAY_HEAP_SORT(type, a, max, comparator, elem_exchanger) {\ + int _k_;\ + for(_k_=(max)/2; _k_>=0; _k_--) {\ + SGLIB___ARRAY_HEAP_DOWN(type, a, _k_, max, comparator, elem_exchanger);\ + }\ + for(_k_=(max)-1; _k_>=0; _k_--) {\ + elem_exchanger(type, a, 0, _k_);\ + SGLIB___ARRAY_HEAP_DOWN(type, a, 0, _k_, comparator, elem_exchanger);\ + }\ +} + +#define SGLIB___ARRAY_HEAP_DOWN(type, a, ind, max, comparator, elem_exchanger) {\ + type _t_;\ + int _m_, _l_, _r_, _i_;\ + _i_ = (ind);\ + _m_ = _i_;\ + do {\ + _i_ = _m_; \ + _l_ = 2*_i_+1;\ + _r_ = _l_+1;\ + if (_l_ < (max)){\ + if (comparator(((a)[_m_]), ((a)[_l_])) < 0) _m_ = _l_;\ + if (_r_ < (max)) {\ + if (comparator(((a)[_m_]), ((a)[_r_])) < 0) _m_ = _r_;\ + }\ + }\ + if (_m_ != _i_) {\ + elem_exchanger(type, a, _i_, _m_);\ + }\ + } while (_m_ != _i_);\ +} + + +/* QUICK - SORT (level 0) */ + +#define SGLIB_ARRAY_SINGLE_QUICK_SORT(type, a, max, comparator) {\ + SGLIB_ARRAY_QUICK_SORT(type, a, max, comparator, SGLIB_ARRAY_ELEMENTS_EXCHANGER);\ +} + +#define SGLIB_ARRAY_QUICK_SORT(type, a, max, comparator, elem_exchanger) {\ + int _i_, _j_, _p_, _stacki_, _start_, _end_;\ + /* can sort up to 2^64 elements */\ + int _startStack_[64]; \ + int _endStack_[64];\ + type _tmp_;\ + _startStack_[0] = 0;\ + _endStack_[0] = (max);\ + _stacki_ = 1;\ + while (_stacki_ > 0) {\ + _stacki_ --;\ + _start_ = _startStack_[_stacki_];\ + _end_ = _endStack_[_stacki_];\ + while (_end_ - _start_ > 2) {\ + _p_ = _start_;\ + _i_ = _start_ + 1;\ + _j_ = _end_ - 1;\ + while (_i_<_j_) {\ + for(; _i_<=_j_ && comparator(((a)[_i_]),((a)[_p_]))<=0; _i_++) ;\ + if (_i_ > _j_) {\ + /* all remaining elements lesseq than pivot */\ + elem_exchanger(type, a, _j_, _p_);\ + _i_ = _j_;\ + } else {\ + for(; _i_<=_j_ && comparator(((a)[_j_]),((a)[_p_]))>=0; _j_--) ;\ + if (_i_ > _j_) {\ + /* all remaining elements greater than pivot */\ + elem_exchanger(type, a, _j_, _p_);\ + _i_ = _j_;\ + } else if (_i_ < _j_) {\ + elem_exchanger(type, a, _i_, _j_);\ + if (_i_+2 < _j_) {_i_++; _j_--;}\ + else if (_i_+1 < _j_) _i_++;\ + }\ + }\ + }\ + /* O.K. i==j and pivot is on a[i] == a[j] */\ + /* handle recursive calls without recursion */\ + if (_i_-_start_ > 1 && _end_-_j_ > 1) {\ + /* two recursive calls, use array-stack */\ + if (_i_-_start_ < _end_-_j_-1) {\ + _startStack_[_stacki_] = _j_+1;\ + _endStack_[_stacki_] = _end_;\ + _stacki_ ++;\ + _end_ = _i_;\ + } else {\ + _startStack_[_stacki_] = _start_;\ + _endStack_[_stacki_] = _i_;\ + _stacki_ ++;\ + _start_ = _j_+1;\ + }\ + } else {\ + if (_i_-_start_ > 1) {\ + _end_ = _i_;\ + } else {\ + _start_ = _j_+1;\ + }\ + }\ + }\ + if (_end_ - _start_ == 2) {\ + if (comparator(((a)[_start_]),((a)[_end_-1])) > 0) {\ + elem_exchanger(type, a, _start_, _end_-1);\ + }\ + }\ + }\ +} + +/* BINARY SEARCH (level 0) */ + +#define SGLIB_ARRAY_BINARY_SEARCH(type, a, start_index, end_index, key, comparator, found, result_index) {\ + int _kk_, _cc_, _ii_, _jj_, _ff_;\ + _ii_ = (start_index); \ + _jj_ = (end_index);\ + _ff_ = 0;\ + while (_ii_ <= _jj_ && _ff_==0) {\ + _kk_ = (_jj_+_ii_)/2;\ + _cc_ = comparator(((a)[_kk_]), (key));\ + if (_cc_ == 0) {\ + (result_index) = _kk_; \ + _ff_ = 1;\ + } else if (_cc_ < 0) {\ + _ii_ = _kk_+1;\ + } else {\ + _jj_ = _kk_-1;\ + }\ + }\ + if (_ff_ == 0) {\ + /* not found, but set its resulting place in the array */\ + (result_index) = _jj_+1;\ + }\ + (found) = _ff_;\ +} + +/* -------------------------------- queue (in an array) ------------------ */ +/* queue is a quadruple (a,i,j,dim) such that: */ +/* a is the array storing values */ +/* i is the index of the first used element in the array */ +/* j is the index of the first free element in the array */ +/* dim is the size of the array a */ +/* !!!!!!! This data structure is NOT documented, do not use it !!!!!!!!!! */ + +#define SGLIB_QUEUE_INIT(type, a, i, j) { i = j = 0; } +#define SGLIB_QUEUE_IS_EMPTY(type, a, i, j) ((i)==(j)) +#define SGLIB_QUEUE_IS_FULL(type, a, i, j, dim) ((i)==((j)+1)%(dim)) +#define SGLIB_QUEUE_FIRST_ELEMENT(type, a, i, j) (a[i]) +#define SGLIB_QUEUE_ADD_NEXT(type, a, i, j, dim) {\ + if (SGLIB_QUEUE_IS_FULL(type, a, i, j, dim)) assert(0 && "the queue is full");\ + (j) = ((j)+1) % (dim);\ +} +#define SGLIB_QUEUE_ADD(type, a, elem, i, j, dim) {\ + a[j] = (elem);\ + SGLIB_QUEUE_ADD_NEXT(type, a, i, j, dim);\ +} +#define SGLIB_QUEUE_DELETE_FIRST(type, a, i, j, dim) {\ + if (SGLIB_QUEUE_IS_EMPTY(type, a, i, j)) assert(0 && "the queue is empty");\ + (i) = ((i)+1) % (dim);\ +} +#define SGLIB_QUEUE_DELETE(type, a, i, j, dim) {\ + SGLIB_QUEUE_DELETE_FIRST(type, a, i, j, dim);\ +} + +/* ----------------- priority queue (heap) (in an array) -------------------- */ +/* heap is a triple (a,i,dim) such that: */ +/* a is the array storing values */ +/* i is the index of the first free element in the array */ +/* dim is the size of the array a */ +/* !!!!!!! This data structure is NOT documented, do not use it !!!!!!!!!! */ + +#define SGLIB_HEAP_INIT(type, a, i) { i = 0; } +#define SGLIB_HEAP_IS_EMPTY(type, a, i) ((i)==0) +#define SGLIB_HEAP_IS_FULL(type, a, i, dim) ((i)==(dim)) +#define SGLIB_HEAP_FIRST_ELEMENT(type, a, i) (a[0]) +#define SGLIB_HEAP_ADD_NEXT(type, a, i, dim, comparator, elem_exchanger) {\ + int _i_;\ + if (SGLIB_HEAP_IS_FULL(type, a, i, dim)) assert(0 && "the heap is full");\ + _i_ = (i)++;\ + while (_i_ > 0 && comparator(a[_i_/2], a[_i_]) < 0) {\ + elem_exchanger(type, a, (_i_/2), _i_);\ + _i_ = _i_/2;\ + }\ +} +#define SGLIB_HEAP_ADD(type, a, elem, i, dim, comparator) {\ + if (SGLIB_HEAP_IS_FULL(type, a, i, dim)) assert(0 && "the heap is full");\ + a[i] = (elem);\ + SGLIB_HEAP_ADD_NEXT(type, a, i, dim, comparator, SGLIB_ARRAY_ELEMENTS_EXCHANGER);\ +} +#define SGLIB_HEAP_DELETE_FIRST(type, a, i, dim, comparator, elem_exchanger) {\ + if (SGLIB_HEAP_IS_EMPTY(type, a, i)) assert(0 && "the heap is empty");\ + (i)--;\ + a[0] = a[i];\ + SGLIB___ARRAY_HEAP_DOWN(type, a, 0, i, comparator, elem_exchanger);\ +} +#define SGLIB_HEAP_DELETE(type, a, i, dim, comparator) {\ + SGLIB_HEAP_DELETE_FIRST(type, a, i, dim, comparator, SGLIB_ARRAY_ELEMENTS_EXCHANGER);\ +} + + +/* ----------------- hashed table of pointers (in an array) -------------------- */ + +/* + + This hashed table is storing pointers to objects (not containers). + In this table there is a one-to-one mapping between 'objects' stored + in the table and indexes where they are placed. Each index is + pointing to exactly one 'object' and each 'object' stored in the + table occurs on exactly one index. Once an object is stored in the + table, it can be represented via its index. + + In case of collision while adding an object the index shifted + by SGLIB_HASH_TAB_SHIFT_CONSTANT (constant can be redefined) + + You can NOT delete an element from such hash table. The only + justification (I can see) for this data structure is an exchange + file format, having an index table at the beginning and then + refering objects via indexes. + + !!!!!!! This data structure is NOT documented, do not use it !!!!!!!!!! + +*/ + +#define SGLIB_HASH_TAB_INIT(type, table, dim) {\ + int _i_;\ + for(_i_ = 0; _i_ < (dim); _i_++) (table)[_i_] = NULL;\ +} + +#define SGLIB_HASH_TAB_ADD_IF_NOT_MEMBER(type, table, dim, elem, hash_function, comparator, member){\ + unsigned _pos_;\ + type *_elem_;\ + SGLIB_HASH_TAB_FIND_MEMBER(type, table, dim, elem, _pos_, _elem_);\ + (member) = (table)[_pos_];\ + if (_elem_ == NULL) {\ + if ((table)[_pos_] != NULL) assert(0 && "the hash table is full");\ + (table)[_pos_] = (elem);\ + }\ +} + +#define SGLIB_HASH_TAB_FIND_MEMBER(type, table, dim, elem, hash_function, comparator, resultIndex, resultMember) {\ + unsigned _i_;\ + int _count_;\ + type *_e_;\ + _count = 0;\ + _i_ = hash_function(elem);\ + _i_ %= (dim);\ + while ((_e_=(table)[_i_])!=NULL && comparator(_e_, (elem))!=0 && _count_<(dim)) {\ + _count_ ++;\ + _i_ = (_i_ + SGLIB_HASH_TAB_SHIFT_CONSTANT) % (dim);\ + }\ + (resultIndex) = _i_;\ + if (_count_ < (dim)) (resultMember) = _e_;\ + else (resultMember) = NULL;\ +} + +#define SGLIB_HASH_TAB_IS_MEMBER(type, table, dim, elem, hash_function, resultIndex) {\ + unsigned _i_;\ + int _c_;\ + type *_e_;\ + _count = 0;\ + _i_ = hash_function(elem);\ + _i_ %= (dim);\ + while ((_e_=(table)[_i_])!=NULL && _e_!=(elem) && _c_<(dim)) {\ + _c_ ++;\ + _i_ = (_i_ + SGLIB_HASH_TAB_SHIFT_CONSTANT) % (dim);\ + }\ + if (_e_==(elem)) (resultIndex) = _i_;\ + else (resultIndex) = -1;\ +} + +#define SGLIB_HASH_TAB_MAP_ON_ELEMENTS(type, table, dim, iteratedIndex, iteratedVariable, command) {\ + unsigned iteratedIndex;\ + type *iteratedVariable;\ + for(iteratedIndex=0; iteratedIndex < (dim); iteratedIndex++) {\ + iteratedVariable = (table)[iteratedIndex];\ + if (iteratedVariable != NULL) {command;}\ + }\ +} + + +/* ---------------------------------------------------------------------------- */ +/* ------------------------- DYNAMIC DATA STRUCTURES -------------------------- */ +/* ---------------------------------------------------------------------------- */ + +/* ------------------------------------ lists (level 0) --------------------- */ + +#define SGLIB_LIST_ADD(type, list, elem, next) {\ + (elem)->next = (list);\ + (list) = (elem);\ +} + +#define SGLIB_LIST_CONCAT(type, first, second, next) {\ + if ((first)==NULL) {\ + (first) = (second);\ + } else {\ + type *_p_;\ + for(_p_ = (first); _p_->next!=NULL; _p_=_p_->next) ;\ + _p_->next = (second);\ + }\ +} + +#define SGLIB_LIST_DELETE(type, list, elem, next) {\ + type **_p_;\ + for(_p_ = &(list); *_p_!=NULL && *_p_!=(elem); _p_= &(*_p_)->next) ;\ + assert(*_p_!=NULL && "element is not member of the container, use DELETE_IF_MEMBER instead"!=NULL);\ + *_p_ = (*_p_)->next;\ +} + +#define SGLIB_LIST_ADD_IF_NOT_MEMBER(type, list, elem, comparator, next, member) {\ + type *_p_;\ + for(_p_ = (list); _p_!=NULL && comparator(_p_, (elem)) != 0; _p_= _p_->next) ;\ + (member) = _p_;\ + if (_p_ == NULL) {\ + SGLIB_LIST_ADD(type, list, elem, next);\ + }\ +} + +#define SGLIB_LIST_DELETE_IF_MEMBER(type, list, elem, comparator, next, member) {\ + type **_p_;\ + for(_p_ = &(list); *_p_!=NULL && comparator((*_p_), (elem)) != 0; _p_= &(*_p_)->next) ;\ + (member) = *_p_;\ + if (*_p_ != NULL) {\ + *_p_ = (*_p_)->next;\ + }\ +} + +#define SGLIB_LIST_IS_MEMBER(type, list, elem, next, result) {\ + type *_p_;\ + for(_p_ = (list); _p_!=NULL && _p_ != (elem); _p_= _p_->next) ;\ + (result) = (_p_!=NULL);\ +} + +#define SGLIB_LIST_FIND_MEMBER(type, list, elem, comparator, next, member) {\ + type *_p_;\ + for(_p_ = (list); _p_!=NULL && comparator(_p_, (elem)) != 0; _p_= _p_->next) ;\ + (member) = _p_;\ +} + +#define SGLIB_LIST_MAP_ON_ELEMENTS(type, list, iteratedVariable, next, command) {\ + type *_ne_;\ + type *iteratedVariable;\ + (iteratedVariable) = (list); \ + while ((iteratedVariable)!=NULL) {\ + _ne_ = (iteratedVariable)->next;\ + {command;};\ + (iteratedVariable) = _ne_;\ + }\ +} + +#define SGLIB_LIST_LEN(type, list, next, result) {\ + type *_ce_;\ + (result) = 0;\ + SGLIB_LIST_MAP_ON_ELEMENTS(type, list, _ce_, next, (result)++);\ +} + +#define SGLIB_LIST_REVERSE(type, list, next) {\ + type *_list_,*_tmp_,*_res_;\ + _list_ = (list);\ + _res_ = NULL;\ + while (_list_!=NULL) {\ + _tmp_ = _list_->next; _list_->next = _res_;\ + _res_ = _list_; _list_ = _tmp_;\ + }\ + (list) = _res_;\ +} + +#define SGLIB_LIST_SORT(type, list, comparator, next) {\ + /* a non-recursive merge sort on lists */\ + type *_r_;\ + type *_a_, *_b_, *_todo_, *_t_, **_restail_;\ + int _i_, _n_, _contFlag_;\ + _r_ = (list);\ + _contFlag_ = 1;\ + for(_n_ = 1; _contFlag_; _n_ = _n_+_n_) {\ + _todo_ = _r_; _r_ = NULL; _restail_ = &_r_; _contFlag_ =0;\ + while (_todo_!=NULL) {\ + _a_ = _todo_;\ + for(_i_ = 1, _t_ = _a_; _i_ < _n_ && _t_!=NULL; _i_++, _t_ = _t_->next) ;\ + if (_t_ ==NULL) {\ + *_restail_ = _a_;\ + break;\ + }\ + _b_ = _t_->next; _t_->next=NULL;\ + for(_i_ =1, _t_ = _b_; _i_<_n_ && _t_!=NULL; _i_++, _t_ = _t_->next) ;\ + if (_t_ ==NULL) {\ + _todo_ =NULL;\ + } else {\ + _todo_ = _t_->next; _t_->next=NULL;\ + }\ + /* merge */\ + while (_a_!=NULL && _b_!=NULL) {\ + if (comparator(_a_, _b_) < 0) {\ + *_restail_ = _a_; _restail_ = &(_a_->next); _a_ = _a_->next;\ + } else {\ + *_restail_ = _b_; _restail_ = &(_b_->next); _b_ = _b_->next;\ + }\ + }\ + if (_a_!=NULL) *_restail_ = _a_;\ + else *_restail_ = _b_;\ + while (*_restail_!=NULL) _restail_ = &((*_restail_)->next);\ + _contFlag_ =1;\ + }\ + }\ + (list) = _r_;\ +} + +/* --------------------------------- sorted list (level 0) --------------------- */ +/* + All operations suppose that the list is sorted and they preserve + this property. +*/ + + +#define SGLIB_SORTED_LIST_ADD(type, list, elem, comparator, next) {\ + type **_e_;\ + int _cmpres_;\ + SGLIB_SORTED_LIST_FIND_MEMBER_OR_PLACE(type, list, elem, comparator, next, _cmpres_, _e_);\ + (elem)->next = *_e_;\ + *_e_ = (elem);\ +} + +#define SGLIB_SORTED_LIST_ADD_IF_NOT_MEMBER(type, list, elem, comparator, next, member) {\ + type **_e_;\ + int _cmp_res_;\ + SGLIB_SORTED_LIST_FIND_MEMBER_OR_PLACE(type, list, elem, comparator, next, _cmp_res_, _e_);\ + if (_cmp_res_ != 0) {\ + (elem)->next = *_e_;\ + *_e_ = (elem);\ + (member) = NULL;\ + } else {\ + (member) = *_e_;\ + }\ +} + +#define SGLIB_SORTED_LIST_DELETE(type, list, elem, next) {\ + SGLIB_LIST_DELETE(type, list, elem, next);\ +} + +#define SGLIB_SORTED_LIST_DELETE_IF_MEMBER(type, list, elem, comparator, next, member) {\ + type **_e_;\ + int _cmp_res_;\ + SGLIB_SORTED_LIST_FIND_MEMBER_OR_PLACE(type, list, elem, comparator, next, _cmp_res_, _e_);\ + if (_cmp_res_ == 0) {\ + (member) = *_e_;\ + *_e_ = (*_e_)->next;\ + } else {\ + (member) = NULL;\ + }\ +} + +#define SGLIB_SORTED_LIST_FIND_MEMBER(type, list, elem, comparator, next, member) {\ + type *_p_;\ + int _cmpres_ = 1;\ + for(_p_ = (list); _p_!=NULL && (_cmpres_=comparator(_p_, (elem))) < 0; _p_=_p_->next) ;\ + if (_cmpres_ != 0) (member) = NULL;\ + else (member) = _p_;\ +} + +#define SGLIB_SORTED_LIST_IS_MEMBER(type, list, elem, comparator, next, result) {\ + type *_p_;\ + for(_p_ = (list); _p_!=NULL && comparator(_p_, (elem)) < 0; _p_=_p_->next) ;\ + while (_p_ != NULL && _p_ != (elem) && comparator(_p_, (elem)) == 0) _p_=_p_->next;\ + (result) = (_p_ == (elem));\ +} + +#define SGLIB_SORTED_LIST_FIND_MEMBER_OR_PLACE(type, list, elem, comparator, next, comparator_result, member_ptr) {\ + (comparator_result) = -1;\ + for((member_ptr) = &(list); \ + *(member_ptr)!=NULL && ((comparator_result)=comparator((*member_ptr), (elem))) < 0; \ + (member_ptr) = &(*(member_ptr))->next) ;\ +} + +#define SGLIB_SORTED_LIST_LEN(type, list, next, result) {\ + SGLIB_LIST_LEN(type, list, next, result);\ +} + +#define SGLIB_SORTED_LIST_MAP_ON_ELEMENTS(type, list, iteratedVariable, next, command) {\ + SGLIB_LIST_MAP_ON_ELEMENTS(type, list, iteratedVariable, next, command);\ +} + + +/* ------------------------------- double linked list (level 0) ------------------------- */ +/* + Lists with back pointer to previous element. Those lists implements deletion + of an element in a constant time. +*/ + +#define SGLIB___DL_LIST_CREATE_SINGLETON(type, list, elem, previous, next) {\ + (list) = (elem);\ + (list)->next = (list)->previous = NULL;\ +} + +#define SGLIB_DL_LIST_ADD_AFTER(type, place, elem, previous, next) {\ + if ((place) == NULL) {\ + SGLIB___DL_LIST_CREATE_SINGLETON(type, place, elem, previous, next);\ + } else {\ + (elem)->next = (place)->next;\ + (elem)->previous = (place);\ + (place)->next = (elem);\ + if ((elem)->next != NULL) (elem)->next->previous = (elem);\ + }\ +} + +#define SGLIB_DL_LIST_ADD_BEFORE(type, place, elem, previous, next) {\ + if ((place) == NULL) {\ + SGLIB___DL_LIST_CREATE_SINGLETON(type, place, elem, previous, next);\ + } else {\ + (elem)->next = (place);\ + (elem)->previous = (place)->previous;\ + (place)->previous = (elem);\ + if ((elem)->previous != NULL) (elem)->previous->next = (elem);\ + }\ +} + +#define SGLIB_DL_LIST_ADD(type, list, elem, previous, next) {\ + SGLIB_DL_LIST_ADD_BEFORE(type, list, elem, previous, next)\ +} + +#define SGLIB___DL_LIST_GENERIC_ADD_IF_NOT_MEMBER(type, list, elem, comparator, previous, next, member, the_add_operation) {\ + type *_dlp_;\ + for(_dlp_ = (list); _dlp_!=NULL && comparator(_dlp_, (elem)) != 0; _dlp_= _dlp_->previous) ;\ + if (_dlp_ == NULL && (list) != NULL) {\ + for(_dlp_ = (list)->next; _dlp_!=NULL && comparator(_dlp_, (elem)) != 0; _dlp_= _dlp_->next) ;\ + }\ + (member) = _dlp_;\ + if (_dlp_ == NULL) {\ + the_add_operation(type, list, elem, previous, next);\ + }\ +} + +#define SGLIB_DL_LIST_ADD_BEFORE_IF_NOT_MEMBER(type, list, elem, comparator, previous, next, member) {\ + SGLIB___DL_LIST_GENERIC_ADD_IF_NOT_MEMBER(type, list, elem, comparator, previous, next, member, SGLIB_DL_LIST_ADD_BEFORE);\ +} + +#define SGLIB_DL_LIST_ADD_AFTER_IF_NOT_MEMBER(type, list, elem, comparator, previous, next, member) {\ + SGLIB___DL_LIST_GENERIC_ADD_IF_NOT_MEMBER(type, list, elem, comparator, previous, next, member, SGLIB_DL_LIST_ADD_AFTER);\ +} + +#define SGLIB_DL_LIST_ADD_IF_NOT_MEMBER(type, list, elem, comparator, previous, next, member) {\ + SGLIB___DL_LIST_GENERIC_ADD_IF_NOT_MEMBER(type, list, elem, comparator, previous, next, member, SGLIB_DL_LIST_ADD);\ +} + +#define SGLIB_DL_LIST_CONCAT(type, first, second, previous, next) {\ + if ((first)==NULL) {\ + (first) = (second);\ + } else if ((second)!=NULL) {\ + type *_dlp_;\ + for(_dlp_ = (first); _dlp_->next!=NULL; _dlp_=_dlp_->next) ;\ + SGLIB_DL_LIST_ADD_AFTER(type, _dlp_, second, previous, next);\ + }\ +} + +#define SGLIB_DL_LIST_DELETE(type, list, elem, previous, next) {\ + type *_l_;\ + _l_ = (list);\ + if (_l_ == (elem)) {\ + if ((elem)->previous != NULL) _l_ = (elem)->previous;\ + else _l_ = (elem)->next;\ + }\ + if ((elem)->next != NULL) (elem)->next->previous = (elem)->previous;\ + if ((elem)->previous != NULL) (elem)->previous->next = (elem)->next;\ + (list) = _l_;\ +} + +#define SGLIB_DL_LIST_DELETE_IF_MEMBER(type, list, elem, comparator, previous, next, member) {\ + type *_dlp_;\ + for(_dlp_ = (list); _dlp_!=NULL && comparator(_dlp_, (elem)) != 0; _dlp_= _dlp_->previous) ;\ + if (_dlp_ == NULL && (list) != NULL) {\ + for(_dlp_ = (list)->next; _dlp_!=NULL && comparator(_dlp_, (elem)) != 0; _dlp_= _dlp_->next) ;\ + }\ + (member) = _dlp_;\ + if (_dlp_ != NULL) {\ + SGLIB_DL_LIST_DELETE(type, list, _dlp_, previous, next);\ + }\ +} + +#define SGLIB_DL_LIST_IS_MEMBER(type, list, elem, previous, next, result) {\ + type *_dlp_;\ + SGLIB_LIST_IS_MEMBER(type, list, elem, previous, result);\ + if (result == 0 && (list) != NULL) {\ + _dlp_ = (list)->next;\ + SGLIB_LIST_IS_MEMBER(type, _dlp_, elem, next, result);\ + }\ +} + +#define SGLIB_DL_LIST_FIND_MEMBER(type, list, elem, comparator, previous, next, member) {\ + type *_dlp_;\ + SGLIB_LIST_FIND_MEMBER(type, list, elem, comparator, previous, member);\ + if ((member) == NULL && (list) != NULL) {\ + _dlp_ = (list)->next;\ + SGLIB_LIST_FIND_MEMBER(type, _dlp_, elem, comparator, next, member);\ + }\ +} + +#define SGLIB_DL_LIST_MAP_ON_ELEMENTS(type, list, iteratedVariable, previous, next, command) {\ + type *_dl_;\ + type *iteratedVariable;\ + if ((list)!=NULL) {\ + _dl_ = (list)->next;\ + SGLIB_LIST_MAP_ON_ELEMENTS(type, list, iteratedVariable, previous, command);\ + SGLIB_LIST_MAP_ON_ELEMENTS(type, _dl_, iteratedVariable, next, command);\ + }\ +} + +#define SGLIB_DL_LIST_SORT(type, list, comparator, previous, next) {\ + type *_dll_, *_dlp_, *_dlt_;\ + _dll_ = (list);\ + if (_dll_ != NULL) {\ + for(; _dll_->previous!=NULL; _dll_=_dll_->previous) ;\ + SGLIB_LIST_SORT(type, _dll_, comparator, next);\ + SGLIB___DL_LIST_CREATE_FROM_LIST(type, _dll_, previous, next);\ + (list) = _dll_;\ + }\ +} + +#define SGLIB_DL_LIST_GET_FIRST(type, list, previous, next, result) {\ + type *_dll_;\ + _dll_ = (list);\ + if (_dll_ != NULL) {\ + for(; _dll_->previous!=NULL; _dll_=_dll_->previous) ;\ + }\ + (result) = _dll_;\ +} + +#define SGLIB_DL_LIST_GET_LAST(type, list, previous, next, result) {\ + type *_dll_;\ + _dll_ = (list);\ + if (_dll_ != NULL) {\ + for(; _dll_->next!=NULL; _dll_=_dll_->next) ;\ + }\ + (result) = _dll_;\ +} + +#define SGLIB_DL_LIST_LEN(type, list, previous, next, result) {\ + type *_dl_;\ + int _r1_, _r2_;\ + if ((list)==NULL) {\ + (result) = 0;\ + } else {\ + SGLIB_LIST_LEN(type, list, previous, _r1_);\ + _dl_ = (list)->next;\ + SGLIB_LIST_LEN(type, _dl_, next, _r2_);\ + (result) = _r1_ + _r2_;\ + }\ +} + +#define SGLIB_DL_LIST_REVERSE(type, list, previous, next) {\ + type *_list_,*_nlist_,*_dlp_,*_dln_;\ + _list_ = (list);\ + if (_list_!=NULL) {\ + _nlist_ = _list_->next;\ + while (_list_!=NULL) {\ + _dln_ = _list_->next; \ + _dlp_ = _list_->previous; \ + _list_->next = _dlp_;\ + _list_->previous = _dln_;\ + _list_ = _dlp_;\ + }\ + while (_nlist_!=NULL) {\ + _dln_ = _nlist_->next; \ + _dlp_ = _nlist_->previous; \ + _nlist_->next = _dlp_;\ + _nlist_->previous = _dln_;\ + _nlist_ = _dln_;\ + }\ + }\ +} + +#define SGLIB___DL_LIST_CREATE_FROM_LIST(type, list, previous, next) {\ + type *_dlp_, *_dlt_;\ + _dlp_ = NULL;\ + for(_dlt_ = (list); _dlt_!=NULL; _dlt_ = _dlt_->next) {\ + _dlt_->previous = _dlp_;\ + _dlp_ = _dlt_;\ + }\ +} + + +/* ------------------------------- binary tree traversal (level 0) -------------------- */ + + +#define SGLIB___BIN_TREE_MAP_ON_ELEMENTS(type, tree, iteratedVariable, order, left, right, command) {\ + /* this is non-recursive implementation of tree traversal */\ + /* it maintains the path to the current node in the array '_path_' */\ + /* the _path_[0] contains the root of the tree; */\ + /* the _path_[_pathi_] contains the _current_element_ */\ + /* the macro does not use the _current_element_ after execution of command */\ + /* command can destroy it, it can free the element for example */\ + type *_path_[SGLIB_MAX_TREE_DEEP];\ + type *_right_[SGLIB_MAX_TREE_DEEP];\ + char _pass_[SGLIB_MAX_TREE_DEEP];\ + type *_cn_;\ + int _pathi_;\ + type *iteratedVariable;\ + _cn_ = (tree);\ + _pathi_ = 0;\ + while (_cn_!=NULL) {\ + /* push down to leftmost innermost element */\ + while(_cn_!=NULL) {\ + _path_[_pathi_] = _cn_;\ + _right_[_pathi_] = _cn_->right;\ + _pass_[_pathi_] = 0;\ + _cn_ = _cn_->left;\ + if (order == 0) {\ + iteratedVariable = _path_[_pathi_];\ + {command;}\ + }\ + _pathi_ ++;\ + if (_pathi_ >= SGLIB_MAX_TREE_DEEP) assert(0 && "the binary_tree is too deep");\ + }\ + do {\ + _pathi_ --;\ + if ((order==1 && _pass_[_pathi_] == 0)\ + || (order == 2 && (_pass_[_pathi_] == 1 || _right_[_pathi_]==NULL))) {\ + iteratedVariable = _path_[_pathi_];\ + {command;}\ + }\ + _pass_[_pathi_] ++;\ + } while (_pathi_>0 && _right_[_pathi_]==NULL) ;\ + _cn_ = _right_[_pathi_];\ + _right_[_pathi_] = NULL;\ + _pathi_ ++;\ + }\ +} + +#define SGLIB_BIN_TREE_MAP_ON_ELEMENTS(type, tree, _current_element_, left, right, command) {\ + SGLIB___BIN_TREE_MAP_ON_ELEMENTS(type, tree, _current_element_, 1, left, right, command);\ +} + +#define SGLIB_BIN_TREE_MAP_ON_ELEMENTS_PREORDER(type, tree, _current_element_, left, right, command) {\ + SGLIB___BIN_TREE_MAP_ON_ELEMENTS(type, tree, _current_element_, 0, left, right, command);\ +} + +#define SGLIB_BIN_TREE_MAP_ON_ELEMENTS_POSTORDER(type, tree, _current_element_, left, right, command) {\ + SGLIB___BIN_TREE_MAP_ON_ELEMENTS(type, tree, _current_element_, 2, left, right, command);\ +} + +#define SGLIB___BIN_TREE_FIND_MEMBER(type, tree, elem, left, right, comparator, res) {\ + type *_s_;\ + int _c_;\ + _s_ = (tree);\ + while (_s_!=NULL) {\ + _c_ = comparator((elem), _s_);\ + if (_c_ < 0) _s_ = _s_->left;\ + else if (_c_ > 0) _s_ = _s_->right;\ + else break;\ + }\ + (res) = _s_;\ +} + +/* ---------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------------- */ +/* - LEVEL - 1 INTERFACE - */ +/* ---------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------------- */ + + + +/* ---------------------------------------------------------------------------- */ +/* ------------------------------ STATIC ARRAYS ------------------------------- */ +/* ---------------------------------------------------------------------------- */ + +/* ----------------------------- array sorting (level 1) ---------------------- */ + +#define SGLIB_DEFINE_ARRAY_SORTING_PROTOTYPES(type, comparator) \ + extern void sglib_##type##_array_quick_sort(type *a, int max);\ + extern void sglib_##type##_array_heap_sort(type *a, int max);\ + + +#define SGLIB_DEFINE_ARRAY_SORTING_FUNCTIONS(type, comparator) \ + void sglib_##type##_array_quick_sort(type *a, int max) {\ + SGLIB_ARRAY_SINGLE_QUICK_SORT(type, a, max, comparator);\ + }\ + void sglib_##type##_array_heap_sort(type *a, int max) {\ + SGLIB_ARRAY_SINGLE_HEAP_SORT(type, a, max, comparator);\ + }\ + + +/* ----------------------------- array queue (level 1) ------------------- */ +/* sglib's queue is stored in a fixed sized array */ +/* queue_type MUST be a structure containing fields: */ +/* afield is the array storing elem_type */ +/* ifield is the index of the first element in the queue */ +/* jfield is the index of the first free element after the queue */ +/* dim is the size of the array afield */ +/* !!!!!!! This data structure is NOT documented, do not use it !!!!!!!!!! */ + + +#define SGLIB_DEFINE_QUEUE_PROTOTYPES(queue_type, elem_type, afield, ifield, jfield, dim) \ + extern void sglib_##queue_type##_init(queue_type *q); \ + extern int sglib_##queue_type##_is_empty(queue_type *q); \ + extern int sglib_##queue_type##_is_full(queue_type *q); \ + extern elem_type sglib_##queue_type##_first_element(queue_type *q); \ + extern elem_type *sglib_##queue_type##_first_element_ptr(queue_type *q); \ + extern void sglib_##queue_type##_add_next(queue_type *q); \ + extern void sglib_##queue_type##_add(queue_type *q, elem_type elem); \ + extern void sglib_##queue_type##_delete_first(queue_type *q); \ + extern void sglib_##queue_type##_delete(queue_type *q); + + +#define SGLIB_DEFINE_QUEUE_FUNCTIONS(queue_type, elem_type, afield, ifield, jfield, dim) \ + void sglib_##queue_type##_init(queue_type *q) {\ + SGLIB_QUEUE_INIT(elem_type, q->afield, q->ifield, q->jfield);\ + }\ + int sglib_##queue_type##_is_empty(queue_type *q) {\ + return(SGLIB_QUEUE_IS_EMPTY(elem_type, q->afield, q->ifield, q->jfield));\ + }\ + int sglib_##queue_type##_is_full(queue_type *q) {\ + return(SGLIB_QUEUE_IS_FULL(elem_type, q->afield, q->ifield, q->jfield));\ + }\ + elem_type sglib_##queue_type##_first_element(queue_type *q) {\ + return(SGLIB_QUEUE_FIRST_ELEMENT(elem_type, q->afield, q->ifield, q->jfield));\ + }\ + elem_type *sglib_##queue_type##_first_element_ptr(queue_type *q) {\ + return(& SGLIB_QUEUE_FIRST_ELEMENT(elem_type, q->afield, q->ifield, q->jfield));\ + }\ + void sglib_##queue_type##_add_next(queue_type *q) {\ + SGLIB_QUEUE_ADD_NEXT(elem_type, q->afield, q->ifield, q->jfield, dim);\ + }\ + void sglib_##queue_type##_add(queue_type *q, elem_type elem) {\ + SGLIB_QUEUE_ADD(elem_type, q->afield, elem, q->ifield, q->jfield, dim);\ + }\ + void sglib_##queue_type##_delete_first(queue_type *q) {\ + SGLIB_QUEUE_DELETE_FIRST(elem_type, q->afield, q->ifield, q->jfield, dim);\ + }\ + void sglib_##queue_type##_delete(queue_type *q) {\ + SGLIB_QUEUE_DELETE_FIRST(elem_type, q->afield, q->ifield, q->jfield, dim);\ + } + + +/* ------------------------ array heap (level 1) ------------------------- */ +/* sglib's heap is a priority queue implemented in a fixed sized array */ +/* heap_type MUST be a structure containing fields: */ +/* afield is the array of size dim storing elem_type */ +/* ifield is the index of the first free element after the queue */ +/* !!!!!!! This data structure is NOT documented, do not use it !!!!!!!!!! */ + + +#define SGLIB_DEFINE_HEAP_PROTOTYPES(heap_type, elem_type, afield, ifield, dim, comparator, elem_exchanger) \ + extern void sglib_##heap_type##_init(heap_type *q); \ + extern int sglib_##heap_type##_is_empty(heap_type *q); \ + extern int sglib_##heap_type##_is_full(heap_type *q); \ + extern elem_type sglib_##heap_type##_first_element(heap_type *q); \ + extern elem_type *sglib_##heap_type##_first_element_ptr(heap_type *q); \ + extern void sglib_##heap_type##_add_next(heap_type *q); \ + extern void sglib_##heap_type##_add(heap_type *q, elem_type elem); \ + extern void sglib_##heap_type##_delete_first(heap_type *q); \ + extern void sglib_##heap_type##_delete(heap_type *q) + +#define SGLIB_DEFINE_HEAP_FUNCTIONS(heap_type, elem_type, afield, ifield, dim, comparator, elem_exchanger) \ + void sglib_##heap_type##_init(heap_type *q) {\ + SGLIB_HEAP_INIT(elem_type, q->afield, q->ifield);\ + }\ + int sglib_##heap_type##_is_empty(heap_type *q) {\ + return(SGLIB_HEAP_IS_EMPTY(elem_type, q->afield, q->ifield));\ + }\ + int sglib_##heap_type##_is_full(heap_type *q) {\ + return(SGLIB_HEAP_IS_FULL(elem_type, q->afield, q->ifield));\ + }\ + elem_type sglib_##heap_type##_first_element(heap_type *q) {\ + return(SGLIB_HEAP_FIRST_ELEMENT(elem_type, q->afield, q->ifield));\ + }\ + elem_type *sglib_##heap_type##_first_element_ptr(heap_type *q) {\ + return(& SGLIB_HEAP_FIRST_ELEMENT(elem_type, q->afield, q->ifield));\ + }\ + void sglib_##heap_type##_add_next(heap_type *q) {\ + SGLIB_HEAP_ADD_NEXT(elem_type, q->afield, q->ifield, dim, comparator, elem_exchanger);\ + }\ + void sglib_##heap_type##_add(heap_type *q, elem_type elem) {\ + SGLIB_HEAP_ADD(elem_type, q->afield, elem, q->ifield, dim, comparator, elem_exchanger);\ + }\ + void sglib_##heap_type##_delete_first(heap_type *q) {\ + SGLIB_HEAP_DELETE_FIRST(elem_type, q->afield, q->ifield, dim, comparator, elem_exchanger);\ + }\ + void sglib_##heap_type##_delete(heap_type *q) {\ + SGLIB_HEAP_DELETE_FIRST(elem_type, q->afield, q->ifield, dim, comparator, elem_exchanger);\ + } + + +/* ------------------------ hashed table (level 1) ------------------------- */ +/* + + sglib's hash table is an array storing directly pointers to objects (not containers). + In this table there is a one-to-one mapping between 'objects' stored + in the table and indexes where they are placed. Each index is + pointing to exactly one 'object' and each 'object' stored in the + table occurs on exactly one index. Once an object is stored in the + table, it can be represented via its index. + + type - is the type of elements + dim - is the size of the hash array + hash_function - is a hashing function mapping type* to unsigned + comparator - is a comparator on elements + + !!!!!!! This data structure is NOT documented, do not use it !!!!!!!!!! +*/ + +#define SGLIB_DEFINE_HASHED_TABLE_PROTOTYPES(type, dim, hash_function, comparator) \ + struct sglib_hashed_##type##_iterator {\ + int currentIndex;\ + int (*subcomparator)(type *, type *);\ + type *equalto;\ + };\ + extern void sglib_hashed_##type##_init(type *table[dim]);\ + extern int sglib_hashed_##type##_add_if_not_member(type *table[dim], type *elem, type **member);\ + extern int sglib_hashed_##type##_is_member(type *table[dim], type *elem);\ + extern type * sglib_hashed_##type##_find_member(type *table[dim], type *elem);\ + extern type *sglib_hashed_##type##_it_init(struct sglib_hashed_##type##_iterator *it, type *table[dim]); \ + extern type *sglib_hashed_##type##_it_init_on_equal(struct sglib_hashed_##type##_iterator *it, type *table[dim], int (*subcomparator)(type *, type *), type *equalto); \ + extern type *sglib_hashed_##type##_it_current(struct sglib_hashed_##type##_iterator *it); \ + extern type *sglib_hashed_##type##_it_next(struct sglib_hashed_##type##_iterator *it); + +#define SGLIB_DEFINE_HASHED_TABLE_FUNCTIONS(type, dim, hash_function, comparator) \ + struct sglib_hashed_##type##_iterator {\ + int currentIndex;\ + type **table;\ + int (*subcomparator)(type *, type *);\ + type *equalto;\ + };\ + void sglib_hashed_##type##_init(type *table[dim]) {\ + SGLIB_HASH_TAB_INIT(type, table, dim);\ + }\ + int sglib_hashed_##type##_add_if_not_member(type *table[dim], type *elem, type **member) {\ + SGLIB_HASH_TAB_ADD_IF_NOT_MEMBER(type, table, dim, elem, hash_function, comparator, *member);\ + }\ + int sglib_hashed_##type##_is_member(type *table[dim], type *elem) {\ + int ind;\ + SGLIB_HASH_TAB_IS_MEMBER(type, table, dim, elem, hash_function, ind);\ + return(ind != -1);\ + }\ + type * sglib_hashed_##type##_find_member(type *table[dim], type *elem) {\ + type *mmb;\ + int ind;\ + SGLIB_HASH_TAB_FIND_MEMBER(type, table, dim, elem, hash_function, comparator, ind, mmb);\ + return(mmb);\ + }\ + type *sglib_hashed_##type##_it_init_on_equal(struct sglib_hashed_##type##_iterator *it, type *table[dim], int (*subcomparator)(type *, type *), type *equalto) {\ + int i;\ + it->table = table;\ + it->subcomparator = subcomparator;\ + it->equalto = equalto;\ + for(i=0; i<(dim) && table[i]==NULL; i++) ;\ + it->currentIndex = i;\ + if (i<(dim)) return(table[i]);\ + return(NULL);\ + }\ + type *sglib_hashed_##type##_it_init(struct sglib_hashed_##type##_iterator *it, type *table[dim]) {\ + sglib_hashed_##type##_it_init_on_equal(it, table, NULL, NULL);\ + }\ + type *sglib_hashed_##type##_it_current(struct sglib_hashed_##type##_iterator *it) {\ + return(table[it->currentIndex]);\ + }\ + type *sglib_hashed_##type##_it_next(struct sglib_hashed_##type##_iterator *it) {\ + i=it->currentIndex;\ + if (i<(dim)) {\ + for(i++; i<(dim) && table[i]==NULL; i++) ;\ + }\ + it->currentIndex = i;\ + if (i<(dim)) return(table[i]);\ + return(NULL);\ + } + + +/* ------------------- hashed container (only for level 1) -------------------- */ +/* + hashed container is a table of given fixed size containing another + (dynamic) base container in each cell. Once an object should be + inserted into the hashed container, a hash function is used to + determine the cell where the object belongs and the object is + inserted into the base container stored in this cell. Usually the + base container is simply a list or a sorted list, but it can be a + red-black tree as well. + + parameters: + type - the type of the container stored in each cell. + dim - the size of the hashed array + hash_function - the hashing function hashing 'type *' to unsigned. + +*/ + +#define SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(type, dim, hash_function) \ + struct sglib_hashed_##type##_iterator {\ + struct sglib_##type##_iterator containerIt;\ + type **table;\ + int currentIndex;\ + int (*subcomparator)(type *, type *);\ + type *equalto;\ + };\ + extern void sglib_hashed_##type##_init(type *table[dim]);\ + extern void sglib_hashed_##type##_add(type *table[dim], type *elem);\ + extern int sglib_hashed_##type##_add_if_not_member(type *table[dim], type *elem, type **member);\ + extern void sglib_hashed_##type##_delete(type *table[dim], type *elem);\ + extern int sglib_hashed_##type##_delete_if_member(type *table[dim], type *elem, type **memb);\ + extern int sglib_hashed_##type##_is_member(type *table[dim], type *elem);\ + extern type * sglib_hashed_##type##_find_member(type *table[dim], type *elem);\ + extern type *sglib_hashed_##type##_it_init(struct sglib_hashed_##type##_iterator *it, type *table[dim]); \ + extern type *sglib_hashed_##type##_it_init_on_equal(struct sglib_hashed_##type##_iterator *it, type *table[dim], int (*subcomparator)(type *, type *), type *equalto); \ + extern type *sglib_hashed_##type##_it_current(struct sglib_hashed_##type##_iterator *it); \ + extern type *sglib_hashed_##type##_it_next(struct sglib_hashed_##type##_iterator *it); + +#define SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS(type, dim, hash_function) \ + /*extern unsigned hash_function(type *elem);*/\ + void sglib_hashed_##type##_init(type *table[dim]) {\ + unsigned i;\ + for(i=0; i<(dim); i++) table[i] = NULL;\ + }\ + void sglib_hashed_##type##_add(type *table[dim], type *elem) {\ + unsigned i;\ + i = ((unsigned)hash_function(elem)) % (dim);\ + sglib_##type##_add(&(table)[i], elem);\ + }\ + int sglib_hashed_##type##_add_if_not_member(type *table[dim], type *elem, type **member) {\ + unsigned i;\ + i = ((unsigned)hash_function(elem)) % (dim);\ + return(sglib_##type##_add_if_not_member(&(table)[i], elem, member));\ + }\ + void sglib_hashed_##type##_delete(type *table[dim], type *elem) {\ + unsigned i;\ + i = ((unsigned)hash_function(elem)) % (dim);\ + sglib_##type##_delete(&(table)[i], elem);\ + }\ + int sglib_hashed_##type##_delete_if_member(type *table[dim], type *elem, type **memb) {\ + unsigned i;\ + i = ((unsigned)hash_function(elem)) % (dim);\ + return(sglib_##type##_delete_if_member(&(table)[i], elem, memb));\ + }\ + int sglib_hashed_##type##_is_member(type *table[dim], type *elem) {\ + unsigned i;\ + i = ((unsigned)hash_function(elem)) % (dim);\ + return(sglib_##type##_is_member((table)[i], elem));\ + }\ + type * sglib_hashed_##type##_find_member(type *table[dim], type *elem) {\ + unsigned i;\ + i = ((unsigned)hash_function(elem)) % (dim);\ + return(sglib_##type##_find_member((table)[i], elem));\ + }\ + type *sglib_hashed_##type##_it_init_on_equal(struct sglib_hashed_##type##_iterator *it, type *table[dim], int (*subcomparator)(type *, type *), type *equalto) {\ + type *e;\ + it->table = table;\ + it->currentIndex = 0;\ + it->subcomparator = subcomparator;\ + it->equalto = equalto;\ + e = sglib_##type##_it_init_on_equal(&it->containerIt, table[it->currentIndex], it->subcomparator, it->equalto);\ + if (e==NULL) e = sglib_hashed_##type##_it_next(it);\ + return(e);\ + }\ + type *sglib_hashed_##type##_it_init(struct sglib_hashed_##type##_iterator *it, type *table[dim]) {\ + return(sglib_hashed_##type##_it_init_on_equal(it, table, NULL, NULL));\ + }\ + type *sglib_hashed_##type##_it_current(struct sglib_hashed_##type##_iterator *it) {\ + return(sglib_##type##_it_current(&it->containerIt));\ + }\ + type *sglib_hashed_##type##_it_next(struct sglib_hashed_##type##_iterator *it) {\ + type *e;\ + e = sglib_##type##_it_next(&it->containerIt);\ + while (e==NULL && (++(it->currentIndex))<(dim)) {\ + e = sglib_##type##_it_init_on_equal(&it->containerIt, it->table[it->currentIndex], it->subcomparator, it->equalto);\ + }\ + return(e);\ + } + + + +/* ---------------------------------------------------------------------------- */ +/* ------------------------- DYNAMIC DATA STRUCTURES -------------------------- */ +/* ---------------------------------------------------------------------------- */ + + + +/* ------------------------------------ list (level 1) -------------------------------- */ + +#define SGLIB_DEFINE_LIST_PROTOTYPES(type, comparator, next) \ + struct sglib_##type##_iterator {\ + type *currentelem;\ + type *nextelem;\ + int (*subcomparator)(type *, type *);\ + type *equalto;\ + };\ + extern void sglib_##type##_add(type **list, type *elem);\ + extern int sglib_##type##_add_if_not_member(type **list, type *elem, type **member);\ + extern void sglib_##type##_concat(type **first, type *second);\ + extern void sglib_##type##_delete(type **list, type *elem);\ + extern int sglib_##type##_delete_if_member(type **list, type *elem, type **member);\ + extern int sglib_##type##_is_member(type *list, type *elem);\ + extern type *sglib_##type##_find_member(type *list, type *elem);\ + extern void sglib_##type##_sort(type **list);\ + extern int sglib_##type##_len(type *list);\ + extern void sglib_##type##_reverse(type **list);\ + extern type *sglib_##type##_it_init(struct sglib_##type##_iterator *it, type *list); \ + extern type *sglib_##type##_it_init_on_equal(struct sglib_##type##_iterator *it, type *list, int (*subcomparator)(type *, type *), type *equalto); \ + extern type *sglib_##type##_it_current(struct sglib_##type##_iterator *it); \ + extern type *sglib_##type##_it_next(struct sglib_##type##_iterator *it); + + +#define SGLIB_DEFINE_LIST_FUNCTIONS(type, comparator, next) \ + int sglib_##type##_is_member(type *list, type *elem) {\ + int result;\ + SGLIB_LIST_IS_MEMBER(type, list, elem, next, result);\ + return(result);\ + }\ + type *sglib_##type##_find_member(type *list, type *elem) {\ + type *result;\ + SGLIB_LIST_FIND_MEMBER(type, list, elem, comparator, next, result);\ + return(result);\ + }\ + int sglib_##type##_add_if_not_member(type **list, type *elem, type **member) {\ + SGLIB_LIST_ADD_IF_NOT_MEMBER(type, *list, elem, comparator, next, *member);\ + return(*member==NULL);\ + }\ + void sglib_##type##_add(type **list, type *elem) {\ + SGLIB_LIST_ADD(type, *list, elem, next);\ + }\ + void sglib_##type##_concat(type **first, type *second) {\ + SGLIB_LIST_CONCAT(type, *first, second, next);\ + }\ + void sglib_##type##_delete(type **list, type *elem) {\ + SGLIB_LIST_DELETE(type, *list, elem, next);\ + }\ + int sglib_##type##_delete_if_member(type **list, type *elem, type **member) {\ + SGLIB_LIST_DELETE_IF_MEMBER(type, *list, elem, comparator, next, *member);\ + return(*member!=NULL);\ + }\ + void sglib_##type##_sort(type **list) { \ + SGLIB_LIST_SORT(type, *list, comparator, next);\ + }\ + int sglib_##type##_len(type *list) {\ + int res;\ + SGLIB_LIST_LEN(type, list, next, res);\ + return(res);\ + }\ + void sglib_##type##_reverse(type **list) {\ + SGLIB_LIST_REVERSE(type, *list, next);\ + }\ + \ + type *sglib_##type##_it_init_on_equal(struct sglib_##type##_iterator *it, type *list, int (*subcomparator)(type *, type *), type *equalto) {\ + it->subcomparator = subcomparator;\ + it->equalto = equalto;\ + it->nextelem = list;\ + return(sglib_##type##_it_next(it));\ + }\ + type *sglib_##type##_it_init(struct sglib_##type##_iterator *it, type *list) {\ + return(sglib_##type##_it_init_on_equal(it, list, NULL, NULL));\ + }\ + type *sglib_##type##_it_current(struct sglib_##type##_iterator *it) {\ + return(it->currentelem);\ + }\ + type *sglib_##type##_it_next(struct sglib_##type##_iterator *it) {\ + type *ce, *eq;\ + int (*scp)(type *, type *);\ + ce = it->nextelem;\ + it->nextelem = NULL;\ + if (it->subcomparator != NULL) {\ + eq = it->equalto; \ + scp = it->subcomparator;\ + while (ce!=NULL && scp(ce, eq)!=0) ce = ce->next;\ + }\ + it->currentelem = ce;\ + if (ce != NULL) it->nextelem = ce->next;\ + return(ce);\ + } + +/* ----------------------------- sorted list (level 1) ----------------------------------- */ + + +#define SGLIB_DEFINE_SORTED_LIST_PROTOTYPES(type, comparator, next) \ + struct sglib_##type##_iterator {\ + type *currentelem;\ + type *nextelem;\ + int (*subcomparator)(type *, type *);\ + type *equalto;\ + };\ + extern void sglib_##type##_add(type **list, type *elem);\ + extern int sglib_##type##_add_if_not_member(type **list, type *elem, type **member);\ + extern void sglib_##type##_delete(type **list, type *elem);\ + extern int sglib_##type##_delete_if_member(type **list, type *elem, type **member);\ + extern int sglib_##type##_is_member(type *list, type *elem);\ + extern type *sglib_##type##_find_member(type *list, type *elem);\ + extern int sglib_##type##_len(type *list);\ + extern void sglib_##type##_sort(type **list);\ + extern type *sglib_##type##_it_init(struct sglib_##type##_iterator *it, type *list); \ + extern type *sglib_##type##_it_init_on_equal(struct sglib_##type##_iterator *it, type *list, int (*subcomparator)(type *, type *), type *equalto); \ + extern type *sglib_##type##_it_current(struct sglib_##type##_iterator *it); \ + extern type *sglib_##type##_it_next(struct sglib_##type##_iterator *it); + + +#define SGLIB_DEFINE_SORTED_LIST_FUNCTIONS(type, comparator, next) \ + int sglib_##type##_is_member(type *list, type *elem) {\ + int result;\ + SGLIB_SORTED_LIST_IS_MEMBER(type, list, elem, comparator, next, result);\ + return(result);\ + }\ + type *sglib_##type##_find_member(type *list, type *elem) {\ + type *result;\ + SGLIB_SORTED_LIST_FIND_MEMBER(type, list, elem, comparator, next, result);\ + return(result);\ + }\ + int sglib_##type##_add_if_not_member(type **list, type *elem, type **member) {\ + SGLIB_SORTED_LIST_ADD_IF_NOT_MEMBER(type, *list, elem, comparator, next, *member);\ + return(*member==NULL);\ + }\ + void sglib_##type##_add(type **list, type *elem) {\ + SGLIB_SORTED_LIST_ADD(type, *list, elem, comparator, next);\ + }\ + void sglib_##type##_delete(type **list, type *elem) {\ + SGLIB_SORTED_LIST_DELETE(type, *list, elem, next);\ + }\ + int sglib_##type##_delete_if_member(type **list, type *elem, type **member) {\ + SGLIB_SORTED_LIST_DELETE_IF_MEMBER(type, *list, elem, comparator, next, *member);\ + return(*member!=NULL);\ + }\ + int sglib_##type##_len(type *list) {\ + int res;\ + SGLIB_SORTED_LIST_LEN(type, list, next, res);\ + return(res);\ + }\ + void sglib_##type##_sort(type **list) { \ + SGLIB_LIST_SORT(type, *list, comparator, next);\ + }\ + \ + type *sglib_##type##_it_init_on_equal(struct sglib_##type##_iterator *it, type *list, int (*subcomparator)(type *, type *), type *equalto) {\ + it->subcomparator = subcomparator;\ + it->equalto = equalto;\ + it->nextelem = list;\ + return(sglib_##type##_it_next(it));\ + }\ + type *sglib_##type##_it_init(struct sglib_##type##_iterator *it, type *list) {\ + return(sglib_##type##_it_init_on_equal(it, list, NULL, NULL));\ + }\ + type *sglib_##type##_it_current(struct sglib_##type##_iterator *it) {\ + return(it->currentelem);\ + }\ + type *sglib_##type##_it_next(struct sglib_##type##_iterator *it) {\ + type *ce, *eq;\ + int (*scp)(type *, type *);\ + int c;\ + ce = it->nextelem;\ + it->nextelem = NULL;\ + if (it->subcomparator != NULL) {\ + eq = it->equalto; \ + scp = it->subcomparator;\ + while (ce!=NULL && (c=scp(ce, eq)) < 0) ce = ce->next;\ + if (ce != NULL && c > 0) ce = NULL;\ + }\ + it->currentelem = ce;\ + if (ce != NULL) it->nextelem = ce->next;\ + return(ce);\ + } + + +/* ----------------------------- double linked list (level 1) ------------------------------ */ + + +#define SGLIB_DEFINE_DL_LIST_PROTOTYPES(type, comparator, previous, next) \ + struct sglib_##type##_iterator {\ + type *currentelem;\ + type *prevelem;\ + type *nextelem;\ + int (*subcomparator)(type *, type *);\ + type *equalto;\ + };\ + extern void sglib_##type##_add(type **list, type *elem);\ + extern void sglib_##type##_add_before(type **list, type *elem);\ + extern void sglib_##type##_add_after(type **list, type *elem);\ + extern int sglib_##type##_add_if_not_member(type **list, type *elem, type **member);\ + extern int sglib_##type##_add_before_if_not_member(type **list, type *elem, type **member);\ + extern int sglib_##type##_add_after_if_not_member(type **list, type *elem, type **member);\ + extern void sglib_##type##_concat(type **first, type *second);\ + extern void sglib_##type##_delete(type **list, type *elem);\ + extern int sglib_##type##_delete_if_member(type **list, type *elem, type **member);\ + extern int sglib_##type##_is_member(type *list, type *elem);\ + extern type *sglib_##type##_find_member(type *list, type *elem);\ + extern type *sglib_##type##_get_first(type *list);\ + extern type *sglib_##type##_get_last(type *list);\ + extern void sglib_##type##_sort(type **list);\ + extern int sglib_##type##_len(type *list);\ + extern void sglib_##type##_reverse(type **list);\ + extern type *sglib_##type##_it_init(struct sglib_##type##_iterator *it, type *list); \ + extern type *sglib_##type##_it_init_on_equal(struct sglib_##type##_iterator *it, type *list, int (*subcomparator)(type *, type *), type *equalto); \ + extern type *sglib_##type##_it_current(struct sglib_##type##_iterator *it); \ + extern type *sglib_##type##_it_next(struct sglib_##type##_iterator *it); + + +#define SGLIB_DEFINE_DL_LIST_FUNCTIONS(type, comparator, previous, next) \ + void sglib_##type##_add(type **list, type *elem) {\ + SGLIB_DL_LIST_ADD(type, *list, elem, previous, next);\ + }\ + void sglib_##type##_add_after(type **list, type *elem) {\ + SGLIB_DL_LIST_ADD_AFTER(type, *list, elem, previous, next);\ + }\ + void sglib_##type##_add_before(type **list, type *elem) {\ + SGLIB_DL_LIST_ADD_BEFORE(type, *list, elem, previous, next);\ + }\ + int sglib_##type##_add_if_not_member(type **list, type *elem, type **member) {\ + SGLIB_DL_LIST_ADD_IF_NOT_MEMBER(type, *list, elem, comparator, previous, next, *member);\ + return(*member==NULL);\ + }\ + int sglib_##type##_add_after_if_not_member(type **list, type *elem, type **member) {\ + SGLIB_DL_LIST_ADD_AFTER_IF_NOT_MEMBER(type, *list, elem, comparator, previous, next, *member);\ + return(*member==NULL);\ + }\ + int sglib_##type##_add_before_if_not_member(type **list, type *elem, type **member) {\ + SGLIB_DL_LIST_ADD_BEFORE_IF_NOT_MEMBER(type, *list, elem, comparator, previous, next, *member);\ + return(*member==NULL);\ + }\ + void sglib_##type##_concat(type **first, type *second) {\ + SGLIB_DL_LIST_CONCAT(type, *first, second, previous, next);\ + }\ + void sglib_##type##_delete(type **list, type *elem) {\ + SGLIB_DL_LIST_DELETE(type, *list, elem, previous, next);\ + }\ + int sglib_##type##_delete_if_member(type **list, type *elem, type **member) {\ + SGLIB_DL_LIST_DELETE_IF_MEMBER(type, *list, elem, comparator, previous, next, *member);\ + return(*member!=NULL);\ + }\ + int sglib_##type##_is_member(type *list, type *elem) {\ + int result;\ + SGLIB_DL_LIST_IS_MEMBER(type, list, elem, previous, next, result);\ + return(result);\ + }\ + type *sglib_##type##_find_member(type *list, type *elem) {\ + type *result;\ + SGLIB_DL_LIST_FIND_MEMBER(type, list, elem, comparator, previous, next, result);\ + return(result);\ + }\ + type *sglib_##type##_get_first(type *list) {\ + type *result;\ + SGLIB_DL_LIST_GET_FIRST(type, list, previous, next, result);\ + return(result);\ + }\ + type *sglib_##type##_get_last(type *list) {\ + type *result;\ + SGLIB_DL_LIST_GET_LAST(type, list, previous, next, result);\ + return(result);\ + }\ + void sglib_##type##_sort(type **list) {\ + SGLIB_DL_LIST_SORT(type, *list, comparator, previous, next);\ + }\ + int sglib_##type##_len(type *list) {\ + int res;\ + SGLIB_DL_LIST_LEN(type, list, previous, next, res);\ + return(res);\ + }\ + void sglib_##type##_reverse(type **list) {\ + SGLIB_DL_LIST_REVERSE(type, *list, previous, next);\ + }\ + \ + type *sglib_##type##_it_init_on_equal(struct sglib_##type##_iterator *it, type *list, int (*subcomparator)(type *, type *), type *equalto) {\ + it->subcomparator = subcomparator;\ + it->equalto = equalto;\ + it->prevelem = list;\ + it->nextelem = list;\ + if (list != NULL) it->nextelem = list->next;\ + return(sglib_##type##_it_next(it));\ + }\ + type *sglib_##type##_it_init(struct sglib_##type##_iterator *it, type *list) {\ + return(sglib_##type##_it_init_on_equal(it, list, NULL, NULL));\ + }\ + type *sglib_##type##_it_current(struct sglib_##type##_iterator *it) {\ + return(it->currentelem);\ + }\ + type *sglib_##type##_it_next(struct sglib_##type##_iterator *it) {\ + type *ce, *eq;\ + int (*scp)(type *, type *);\ + ce = it->prevelem;\ + it->prevelem = NULL;\ + if (it->subcomparator != NULL) {\ + eq = it->equalto; \ + scp = it->subcomparator;\ + while (ce!=NULL && scp(eq, ce)!=0) ce = ce->previous;\ + }\ + if (ce != NULL) {\ + it->prevelem = ce->previous;\ + } else {\ + ce = it->nextelem;\ + it->nextelem = NULL;\ + if (it->subcomparator != NULL) {\ + eq = it->equalto; \ + scp = it->subcomparator;\ + while (ce!=NULL && scp(ce, eq)!=0) ce = ce->next;\ + }\ + if (ce != NULL) it->nextelem = ce->next;\ + }\ + it->currentelem = ce;\ + return(ce);\ + } + + +/* --------------------------------- red-black trees (level 1) -------------------------------- */ + +/* + +This implementation requires pointers to left and right sons (no +parent pointer is needed) and one bit of additional information +storing the color of the node. The implementation follows discrepancy +fixing rules from: +http://www.cis.ohio-state.edu/~gurari/course/cis680/cis680Ch11.html + +*/ + +#define SGLIB___RBTREE_FIX_INSERTION_DISCREPANCY(type, tree, leftt, rightt, bits, RED, BLACK) {\ + type *t, *tl, *a, *b, *c, *ar, *bl, *br, *cl, *cr;\ + t = *tree;\ + tl = t->leftt;\ + if (t->rightt!=NULL && SGLIB___GET_VALUE(t->rightt->bits)==RED) {\ + if (SGLIB___GET_VALUE(tl->bits)==RED) {\ + if ((tl->leftt!=NULL && SGLIB___GET_VALUE(tl->leftt->bits)==RED) \ + || (tl->rightt!=NULL && SGLIB___GET_VALUE(tl->rightt->bits)==RED)) {\ + SGLIB___SET_VALUE(t->leftt->bits,BLACK);\ + SGLIB___SET_VALUE(t->rightt->bits,BLACK);\ + SGLIB___SET_VALUE(t->bits,RED);\ + }\ + }\ + } else {\ + if (SGLIB___GET_VALUE(tl->bits)==RED) {\ + if (tl->leftt!=NULL && SGLIB___GET_VALUE(tl->leftt->bits)==RED) {\ + a = t; b = tl; c = tl->leftt;\ + br = b->rightt;\ + a->leftt = br;\ + b->leftt = c; b->rightt = a;\ + SGLIB___SET_VALUE(a->bits,RED);\ + SGLIB___SET_VALUE(b->bits,BLACK);\ + *tree = b;\ + } else if (tl->rightt!=NULL && SGLIB___GET_VALUE(tl->rightt->bits)==RED) {\ + a = t; b = tl; ar=a->rightt;\ + bl=b->leftt; c=b->rightt;\ + cl=c->leftt; cr=c->rightt;\ + b->rightt = cl;\ + a->leftt = cr;\ + c->leftt = b;\ + c->rightt = a;\ + SGLIB___SET_VALUE(c->bits,BLACK);\ + SGLIB___SET_VALUE(a->bits,RED);\ + *tree = c;\ + }\ + }\ + }\ +} + +#define SGLIB___RBTREE_FIX_DELETION_DISCREPANCY(type, tree, leftt, rightt, bits, RED, BLACK, res) {\ + type *t, *a, *b, *c, *d, *ar, *bl, *br, *cl, *cr, *dl, *dr;\ + t = a = *tree;\ + assert(t!=NULL);\ + ar = a->rightt;\ + b = t->leftt;\ + if (b==NULL) {\ + assert(SGLIB___GET_VALUE(t->bits)==RED);\ + SGLIB___SET_VALUE(t->bits,BLACK);\ + res = 0;\ + } else {\ + bl = b->leftt;\ + br = b->rightt;\ + if (SGLIB___GET_VALUE(b->bits)==RED) {\ + if (br==NULL) {\ + *tree = b;\ + SGLIB___SET_VALUE(b->bits,BLACK);\ + b->rightt = a;\ + a->leftt = br;\ + res = 0;\ + } else {\ + c = br;\ + assert(c!=NULL && SGLIB___GET_VALUE(c->bits)==BLACK);\ + cl = c->leftt;\ + cr = c->rightt;\ + if ((cl==NULL||SGLIB___GET_VALUE(cl->bits)==BLACK) && (cr==NULL||SGLIB___GET_VALUE(cr->bits)==BLACK)) {\ + *tree = b;\ + b->rightt = a;\ + SGLIB___SET_VALUE(b->bits,BLACK);\ + a->leftt = c;\ + SGLIB___SET_VALUE(c->bits,RED);\ + res = 0;\ + } else if (cl!=NULL && SGLIB___GET_VALUE(cl->bits)==RED) {\ + if (cr!=NULL && SGLIB___GET_VALUE(cr->bits)==RED) {\ + d = cr;\ + dl = d->leftt;\ + dr = d->rightt;\ + *tree = d;\ + SGLIB___SET_VALUE(d->bits,BLACK);\ + d->leftt = b;\ + c->rightt = dl;\ + d->rightt = a;\ + a->leftt = dr;\ + res = 0;\ + } else {\ + *tree = c;\ + c->leftt = b;\ + c->rightt = a;\ + b->leftt = bl;\ + b->rightt = cl;\ + a->leftt = cr;\ + SGLIB___SET_VALUE(cl->bits,BLACK);\ + res = 0;\ + }\ + } else if (cr!=NULL && SGLIB___GET_VALUE(cr->bits)==RED) {\ + assert(cl==NULL || SGLIB___GET_VALUE(cl->bits)==BLACK);\ + d = cr;\ + dl = d->leftt;\ + dr = d->rightt;\ + *tree = d;\ + SGLIB___SET_VALUE(d->bits,BLACK);\ + d->leftt = b;\ + c->rightt = dl;\ + d->rightt = a;\ + a->leftt = dr;\ + res = 0;\ + } else {\ + assert(0);\ + res = 0;\ + }\ + }\ + } else {\ + if ((bl==NULL || SGLIB___GET_VALUE(bl->bits)==BLACK) && (br==NULL || SGLIB___GET_VALUE(br->bits)==BLACK)) {\ + res = (SGLIB___GET_VALUE(a->bits)==BLACK);\ + SGLIB___SET_VALUE(a->bits,BLACK);\ + SGLIB___SET_VALUE(b->bits,RED);\ + } else if (bl!=NULL && SGLIB___GET_VALUE(bl->bits)==RED) {\ + if (br==NULL || SGLIB___GET_VALUE(br->bits)==BLACK) {\ + *tree = b;\ + SGLIB___SET_VALUE(b->bits,SGLIB___GET_VALUE(a->bits));\ + SGLIB___SET_VALUE(a->bits,BLACK);\ + b->rightt = a;\ + a->leftt = br;\ + SGLIB___SET_VALUE(bl->bits,BLACK);\ + res = 0;\ + } else {\ + assert(bl!=NULL);\ + assert(br!=NULL);\ + assert(SGLIB___GET_VALUE(bl->bits)==RED);\ + assert(SGLIB___GET_VALUE(br->bits)==RED);\ + c = br;\ + cl = c->leftt;\ + cr = c->rightt;\ + *tree = c;\ + SGLIB___SET_VALUE(c->bits,SGLIB___GET_VALUE(a->bits));\ + SGLIB___SET_VALUE(a->bits,BLACK);\ + c->leftt = b;\ + c->rightt = a;\ + b->rightt = cl;\ + a->leftt = cr;\ + res = 0;\ + }\ + } else {\ + assert(br!=NULL && SGLIB___GET_VALUE(br->bits)==RED);\ + c = br;\ + cl = c->leftt;\ + cr = c->rightt;\ + *tree = c;\ + SGLIB___SET_VALUE(c->bits,SGLIB___GET_VALUE(a->bits));\ + SGLIB___SET_VALUE(a->bits,BLACK);\ + c->leftt = b;\ + c->rightt = a;\ + b->rightt = cl;\ + a->leftt = cr;\ + res = 0;\ + }\ + }\ + }\ +} + + +#define SGLIB_DEFINE_RBTREE_FUNCTIONS_GENERAL(type, left, right, bits, comparator, RED, BLACK) \ +static void sglib___##type##_fix_left_insertion_discrepancy(type **tree) {\ + SGLIB___RBTREE_FIX_INSERTION_DISCREPANCY(type, tree, left, right, bits, RED, BLACK);\ +}\ +\ +static void sglib___##type##_fix_right_insertion_discrepancy(type **tree) {\ + SGLIB___RBTREE_FIX_INSERTION_DISCREPANCY(type, tree, right, left, bits, RED, BLACK);\ +}\ +\ +static int sglib___##type##_fix_left_deletion_discrepancy(type **tree) {\ + int res;\ + SGLIB___RBTREE_FIX_DELETION_DISCREPANCY(type, tree, right, left, bits, RED, BLACK, res);\ + return(res);\ +}\ +\ +static int sglib___##type##_fix_right_deletion_discrepancy(type **tree) {\ + int res;\ + SGLIB___RBTREE_FIX_DELETION_DISCREPANCY(type, tree, left, right, bits, RED, BLACK, res);\ + return(res);\ +}\ +\ +static void sglib___##type##_add_recursive(type **tree, type *elem) {\ + int cmp;\ + type *t;\ + t = *tree;\ + if (t == NULL) {\ + SGLIB___SET_VALUE(elem->bits,RED);\ + *tree =elem;\ + } else {\ + cmp = comparator(elem, t);\ + if (cmp < 0 || (cmp==0 && elemleft, elem);\ + if (SGLIB___GET_VALUE(t->bits)==BLACK) sglib___##type##_fix_left_insertion_discrepancy(tree);\ + } else {\ + sglib___##type##_add_recursive(&t->right, elem);\ + if (SGLIB___GET_VALUE(t->bits)==BLACK) sglib___##type##_fix_right_insertion_discrepancy(tree);\ + }\ + }\ +}\ +\ +static int sglib___##type##_delete_rightmost_leaf(type **tree, type **theLeaf) {\ + type *t;\ + int res, deepDecreased;\ + t = *tree;\ + res = 0;\ + assert(t!=NULL);\ + if (t->right == NULL) {\ + *theLeaf = t;\ + if (t->left!=NULL) {\ + if (SGLIB___GET_VALUE(t->bits)==BLACK && SGLIB___GET_VALUE(t->left->bits)==BLACK) res = 1;\ + SGLIB___SET_VALUE(t->left->bits,BLACK);\ + *tree = t->left;\ + } else {\ + *tree = NULL;\ + res = (SGLIB___GET_VALUE(t->bits)==BLACK);\ + }\ + } else {\ + deepDecreased = sglib___##type##_delete_rightmost_leaf(&t->right, theLeaf);\ + if (deepDecreased) res = sglib___##type##_fix_right_deletion_discrepancy(tree);\ + }\ + return(res);\ +}\ +\ +int sglib___##type##_delete_recursive(type **tree, type *elem) {\ + type *t, *theLeaf;\ + int cmp, res, deepDecreased;\ + t = *tree;\ + res = 0;\ + if (t==NULL) {\ + assert(0 && "The element to delete not found in the tree, use 'delete_if_member'"!=NULL);\ + } else {\ + cmp = comparator(elem, t);\ + if (cmp < 0 || (cmp==0 && elemleft, elem);\ + if (deepDecreased) {\ + res = sglib___##type##_fix_left_deletion_discrepancy(tree);\ + }\ + } else if (cmp > 0 || (cmp==0 && elem>t)) {\ + deepDecreased = sglib___##type##_delete_recursive(&t->right, elem);\ + if (deepDecreased) {\ + res = sglib___##type##_fix_right_deletion_discrepancy(tree);\ + }\ + } else {\ + assert(elem==t && "Deleting an element which is non member of the tree, use 'delete_if_member'"!=NULL);\ + if (t->left == NULL) {\ + if (t->right == NULL) {\ + /* a leaf, delete, it; */\ + *tree = NULL;\ + res = (SGLIB___GET_VALUE(t->bits)==BLACK);\ + } else {\ + if (SGLIB___GET_VALUE(t->bits)==0 && SGLIB___GET_VALUE(t->right->bits)==0) res = 1;\ + SGLIB___SET_VALUE(t->right->bits,BLACK);\ + *tree = t->right;\ + }\ + } else {\ + /* propagate deletion until righmost leaf of left subtree */\ + deepDecreased = sglib___##type##_delete_rightmost_leaf(&t->left, &theLeaf);\ + theLeaf->left = t->left;\ + theLeaf->right = t->right;\ + SGLIB___SET_VALUE(theLeaf->bits,SGLIB___GET_VALUE(t->bits));\ + *tree = theLeaf;\ + if (deepDecreased) res = sglib___##type##_fix_left_deletion_discrepancy(tree);\ + }\ + }\ + }\ + return(res);\ +}\ +\ +void sglib_##type##_add(type **tree, type *elem) {\ + elem->left = elem->right = NULL;\ + sglib___##type##_add_recursive(tree, elem);\ + SGLIB___SET_VALUE((*tree)->bits,BLACK);\ +}\ +\ +void sglib_##type##_delete(type **tree, type *elem) {\ + sglib___##type##_delete_recursive(tree, elem);\ + if (*tree!=NULL) SGLIB___SET_VALUE((*tree)->bits,BLACK);\ +}\ +\ +type *sglib_##type##_find_member(type *t, type *elem) {\ + type *res;\ + SGLIB___BIN_TREE_FIND_MEMBER(type, t, elem, left, right, comparator, res);\ + return(res);\ +}\ +\ +int sglib_##type##_is_member(type *t, type *elem) {\ + int cmp;\ + while (t!=NULL) {\ + cmp = comparator(elem, t);\ + if (cmp < 0 || (cmp==0 && elemleft;\ + } else if (cmp > 0 || (cmp==0 && elem>t)) {\ + t = t->right;\ + } else {\ + assert(t == elem);\ + return(1);\ + }\ + }\ + return(0);\ +}\ +\ +int sglib_##type##_delete_if_member(type **tree, type *elem, type **memb) {\ + if ((*memb=sglib_##type##_find_member(*tree, elem))!=NULL) {\ + sglib_##type##_delete(tree, *memb);\ + return(1);\ + } else {\ + return(0);\ + }\ +}\ +int sglib_##type##_add_if_not_member(type **tree, type *elem, type **memb) {\ + if ((*memb=sglib_##type##_find_member(*tree, elem))==NULL) {\ + sglib_##type##_add(tree, elem);\ + return(1);\ + } else {\ + return(0);\ + }\ +}\ +int sglib_##type##_len(type *t) {\ + int n;\ + type *e;\ + n = 0;\ + SGLIB_BIN_TREE_MAP_ON_ELEMENTS(type, t, e, left, right, n++);\ + return(n);\ +}\ +\ +void sglib__##type##_it_compute_current_elem(struct sglib_##type##_iterator *it) {\ + int i,j,cmp;\ + type *s, *eqt;\ + int (*subcomparator)(type *, type *);\ + eqt = it->equalto;\ + subcomparator = it->subcomparator;\ + it->currentelem = NULL;\ + while(it->pathi > 0 && it->currentelem==NULL) {\ + i = it->pathi-1;\ + if (i >= 0) {\ + if (it->pass[i] >= 2) {\ + /* goto up */\ + it->pathi --;\ + } else {\ + if (it->pass[i] == 0) {\ + /* goto left */\ + s = it->path[i]->left;\ + } else {\ + /* goto right */\ + s = it->path[i]->right;\ + }\ + if (eqt != NULL) {\ + if (subcomparator == NULL) {\ + SGLIB___BIN_TREE_FIND_MEMBER(type, s, eqt, left, right, comparator, s);\ + } else {\ + SGLIB___BIN_TREE_FIND_MEMBER(type, s, eqt, left, right, subcomparator, s);\ + }\ + }\ + if (s != NULL) {\ + j = i+1;\ + it->path[j] = s;\ + it->pass[j] = 0;\ + it->pathi ++;\ + }\ + it->pass[i] ++;\ + }\ + }\ + if (it->pathi>0 && it->order == it->pass[it->pathi-1]) {\ + it->currentelem = it->path[it->pathi-1];\ + }\ + }\ +}\ +type *sglib__##type##_it_init(struct sglib_##type##_iterator *it, type *tree, int order, int (*subcomparator)(type *, type *), type *equalto) {\ + type *t;\ + assert(it!=NULL);\ + it->order = order;\ + it->equalto = equalto;\ + it->subcomparator = subcomparator;\ + if (equalto == NULL) { \ + t = tree;\ + } else {\ + if (subcomparator == NULL) {\ + SGLIB___BIN_TREE_FIND_MEMBER(type, tree, equalto, left, right, comparator, t);\ + } else {\ + SGLIB___BIN_TREE_FIND_MEMBER(type, tree, equalto, left, right, subcomparator, t);\ + }\ + }\ + if (t == NULL) {\ + it->pathi = 0;\ + it->currentelem = NULL;\ + } else {\ + it->pathi = 1;\ + it->pass[0] = 0;\ + it->path[0] = t;\ + if (order == 0) {\ + it->currentelem = t;\ + } else {\ + sglib__##type##_it_compute_current_elem(it);\ + }\ + }\ + return(it->currentelem);\ +}\ +type *sglib_##type##_it_init(struct sglib_##type##_iterator *it, type *tree) {\ + return(sglib__##type##_it_init(it, tree, 2, NULL, NULL));\ +}\ +type *sglib_##type##_it_init_preorder(struct sglib_##type##_iterator *it, type *tree) {\ + return(sglib__##type##_it_init(it, tree, 0, NULL, NULL));\ +}\ +type *sglib_##type##_it_init_inorder(struct sglib_##type##_iterator *it, type *tree) {\ + return(sglib__##type##_it_init(it, tree, 1, NULL, NULL));\ +}\ +type *sglib_##type##_it_init_postorder(struct sglib_##type##_iterator *it, type *tree) {\ + return(sglib__##type##_it_init(it, tree, 2, NULL, NULL));\ +}\ +type *sglib_##type##_it_init_on_equal(struct sglib_##type##_iterator *it, type *tree, int (*subcomparator)(type *, type *), type *equalto) {\ + return(sglib__##type##_it_init(it, tree, 1, subcomparator, equalto));\ +}\ +type *sglib_##type##_it_current(struct sglib_##type##_iterator *it) {\ + return(it->currentelem);\ +}\ +type *sglib_##type##_it_next(struct sglib_##type##_iterator *it) {\ + sglib__##type##_it_compute_current_elem(it);\ + return(it->currentelem);\ +}\ +\ +static void sglib___##type##_consistency_check_recursive(type *t, int *pathdeep, int cdeep) {\ + if (t==NULL) {\ + if (*pathdeep < 0) *pathdeep = cdeep;\ + else assert(*pathdeep == cdeep);\ + } else {\ + if (t->left!=NULL) assert(comparator(t->left, t) <= 0);\ + if (t->right!=NULL) assert(comparator(t, t->right) <= 0);\ + if (SGLIB___GET_VALUE(t->bits) == RED) {\ + assert(t->left == NULL || SGLIB___GET_VALUE(t->left->bits)==BLACK);\ + assert(t->right == NULL || SGLIB___GET_VALUE(t->right->bits)==BLACK);\ + sglib___##type##_consistency_check_recursive(t->left, pathdeep, cdeep);\ + sglib___##type##_consistency_check_recursive(t->right, pathdeep, cdeep);\ + } else {\ + sglib___##type##_consistency_check_recursive(t->left, pathdeep, cdeep+1);\ + sglib___##type##_consistency_check_recursive(t->right, pathdeep, cdeep+1);\ + }\ + }\ +}\ +\ +void sglib___##type##_consistency_check(type *t) {\ + int pathDeep;\ + assert(t==NULL || SGLIB___GET_VALUE(t->bits) == BLACK);\ + pathDeep = -1;\ + sglib___##type##_consistency_check_recursive(t, &pathDeep, 0);\ +} + + +#define SGLIB_DEFINE_RBTREE_PROTOTYPES(type, left, right, colorbit, comparator) \ + struct sglib_##type##_iterator {\ + type *currentelem;\ + char pass[SGLIB_MAX_TREE_DEEP];\ + type *path[SGLIB_MAX_TREE_DEEP];\ + short int pathi;\ + short int order;\ + type *equalto;\ + int (*subcomparator)(type *, type *);\ + };\ + extern void sglib___##type##_consistency_check(type *t); \ + extern void sglib_##type##_add(type **tree, type *elem); \ + extern int sglib_##type##_add_if_not_member(type **tree, type *elem, type **memb); \ + extern void sglib_##type##_delete(type **tree, type *elem); \ + extern int sglib_##type##_delete_if_member(type **tree, type *elem, type **memb); \ + extern int sglib_##type##_is_member(type *t, type *elem); \ + extern type *sglib_##type##_find_member(type *t, type *elem); \ + extern int sglib_##type##_len(type *t); \ + extern type *sglib_##type##_it_init(struct sglib_##type##_iterator *it, type *tree); \ + extern type *sglib_##type##_it_init_preorder(struct sglib_##type##_iterator *it, type *tree); \ + extern type *sglib_##type##_it_init_inorder(struct sglib_##type##_iterator *it, type *tree); \ + extern type *sglib_##type##_it_init_postorder(struct sglib_##type##_iterator *it, type *tree); \ + extern type *sglib_##type##_it_init_on_equal(struct sglib_##type##_iterator *it, type *tree, int (*subcomparator)(type *, type *), type *equalto); \ + extern type *sglib_##type##_it_current(struct sglib_##type##_iterator *it); \ + extern type *sglib_##type##_it_next(struct sglib_##type##_iterator *it); \ + + +#define SGLIB_DEFINE_RBTREE_FUNCTIONS(type, left, right, colorbit, comparator) \ + SGLIB_DEFINE_RBTREE_FUNCTIONS_GENERAL(type, left, right, colorbit, comparator, 1, 0) + + + +/* ---------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------------- */ +/* - SUPPLEMENTARY DEFINITIONS - */ +/* ---------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------------- */ + + +#define SGLIB___GET_VALUE(x) (x) +#define SGLIB___SET_VALUE(x, value) {(x) = (value);} +#define SGLIB_ARRAY_ELEMENTS_EXCHANGER(type, a, i, j) {type _sgl_aee_tmp_; _sgl_aee_tmp_=(a)[(i)]; (a)[(i)]=(a)[(j)]; (a)[(j)]= _sgl_aee_tmp_;} + + +#define SGLIB_SAFE_NUMERIC_COMPARATOR(x, y) (((x)>(y)?1:((x)<(y)?-1:0))) +#define SGLIB_SAFE_REVERSE_NUMERIC_COMPARATOR(x, y) (((x)>(y)?-1:((x)<(y)?1:0))) +#define SGLIB_FAST_NUMERIC_COMPARATOR(x, y) ((int)((x) - (y))) +#define SGLIB_FAST_REVERSE_NUMERIC_COMPARATOR(x, y) ((int)((y) - (x))) +#define SGLIB_NUMERIC_COMPARATOR(x, y) SGLIB_SAFE_NUMERIC_COMPARATOR(x, y) +#define SGLIB_REVERSE_NUMERIC_COMPARATOR(x, y) SGLIB_SAFE_REVERSE_NUMERIC_COMPARATOR(x, y) + +#ifndef SGLIB_MAX_TREE_DEEP +#define SGLIB_MAX_TREE_DEEP 128 +#endif + +#ifndef SGLIB_HASH_TAB_SHIFT_CONSTANT +#define SGLIB_HASH_TAB_SHIFT_CONSTANT 16381 /* should be a prime */ +/* #define SGLIB_HASH_TAB_SHIFT_CONSTANT 536870912*/ /* for large tables :) */ +#endif + +#endif /* _SGLIB__h_ */ diff --git a/src/ucs/datastruct/sglib_wrapper.h b/src/ucs/datastruct/sglib_wrapper.h new file mode 100644 index 0000000..0569e37 --- /dev/null +++ b/src/ucs/datastruct/sglib_wrapper.h @@ -0,0 +1,21 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_SGLIB_WRAPPER_H +#define UCS_SGLIB_WRAPPER_H + +#include "sglib.h" + +/* + * Fix "unused variable" + */ +#undef SGLIB_LIST_LEN +#define SGLIB_LIST_LEN(type, list, next, result) {\ + (result) = 0;\ + SGLIB_LIST_MAP_ON_ELEMENTS(type, list, _ce_, next, (result)++);\ +} + +#endif diff --git a/src/ucs/datastruct/strided_alloc.c b/src/ucs/datastruct/strided_alloc.c new file mode 100644 index 0000000..180d8f8 --- /dev/null +++ b/src/ucs/datastruct/strided_alloc.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "strided_alloc.h" +#include "queue.h" + +#include +#include +#include +#include + + +#define ucs_strided_alloc_chunk_to_mem(_chunk) \ + UCS_PTR_BYTE_OFFSET(_chunk, + sizeof(ucs_strided_alloc_chunk_t) \ + - UCS_STRIDED_ALLOC_STRIDE) + +#define ucs_strided_alloc_mem_to_chunk(_mem) \ + UCS_PTR_BYTE_OFFSET(_mem, - sizeof(ucs_strided_alloc_chunk_t) \ + + UCS_STRIDED_ALLOC_STRIDE) + +typedef struct ucs_splitalloc_chunk { + ucs_queue_elem_t queue; +} ucs_strided_alloc_chunk_t; + +struct ucs_strided_alloc_elem { + ucs_strided_alloc_elem_t *next; +}; + +static ucs_strided_alloc_chunk_t * +ucs_strided_alloc_chunk_alloc(ucs_strided_alloc_t *sa, size_t chunk_size + UCS_MEMTRACK_ARG) +{ + ucs_status_t status; + size_t size; + void *ptr; + + size = chunk_size; + ptr = NULL; + status = ucs_mmap_alloc(&size, &ptr, 0 UCS_MEMTRACK_VAL); + if (status != UCS_OK) { + ucs_error("failed to allocate a chunk of %zu bytes", chunk_size); + return NULL; + } + + return ucs_strided_alloc_mem_to_chunk(ptr); +} + +static void ucs_strided_alloc_chunk_free(ucs_strided_alloc_t *sa, + ucs_strided_alloc_chunk_t *chunk, + size_t chunk_size) +{ + /* coverity[offset_free] */ + ucs_mmap_free(ucs_strided_alloc_chunk_to_mem(chunk), chunk_size); +} + +static void ucs_strided_alloc_push_to_freelist(ucs_strided_alloc_t *sa, + ucs_strided_alloc_elem_t *elem) +{ + elem->next = sa->freelist; + sa->freelist = elem; +} + +static void ucs_strided_alloc_calc(ucs_strided_alloc_t *sa, size_t *chunk_size, + size_t *elems_per_chunk) +{ + *chunk_size = ucs_align_up_pow2(UCS_STRIDED_ALLOC_STRIDE * sa->stride_count, + ucs_get_page_size()); + *elems_per_chunk = (UCS_STRIDED_ALLOC_STRIDE - + sizeof(ucs_strided_alloc_chunk_t)) / sa->elem_size; +} + +static ucs_status_t +ucs_strided_alloc_grow(ucs_strided_alloc_t *sa UCS_MEMTRACK_ARG) +{ + size_t chunk_size, elems_per_chunk; + ucs_strided_alloc_chunk_t *chunk; + ucs_strided_alloc_elem_t *elem; + void *chunk_mem; + ssize_t i; + + ucs_strided_alloc_calc(sa, &chunk_size, &elems_per_chunk); + + chunk = ucs_strided_alloc_chunk_alloc(sa, chunk_size UCS_MEMTRACK_VAL); + if (chunk == NULL) { + return UCS_ERR_NO_MEMORY; + } + + chunk_mem = ucs_strided_alloc_chunk_to_mem(chunk); + for (i = elems_per_chunk - 1; i >= 0; --i) { + elem = UCS_PTR_BYTE_OFFSET(chunk_mem, i * sa->elem_size); + ucs_strided_alloc_push_to_freelist(sa, elem); + } + + ucs_queue_push(&sa->chunks, &chunk->queue); + + VALGRIND_MAKE_MEM_NOACCESS(chunk_mem, chunk_size); + + return UCS_OK; +} + +void ucs_strided_alloc_init(ucs_strided_alloc_t *sa, size_t elem_size, + unsigned stride_count) +{ + ucs_assert(elem_size >= sizeof(ucs_strided_alloc_elem_t)); + ucs_assert(elem_size <= (UCS_STRIDED_ALLOC_STRIDE - + sizeof(ucs_strided_alloc_chunk_t))); + ucs_assert(stride_count >= 1); + + ucs_queue_head_init(&sa->chunks); + + sa->freelist = NULL; + sa->elem_size = elem_size; + sa->stride_count = stride_count; + sa->inuse_count = 0; + VALGRIND_CREATE_MEMPOOL(sa, 0, 0); +} + +void ucs_strided_alloc_cleanup(ucs_strided_alloc_t *sa) +{ + size_t chunk_size, elems_per_chunk; + ucs_strided_alloc_chunk_t *chunk; + + VALGRIND_DESTROY_MEMPOOL(sa); + + ucs_strided_alloc_calc(sa, &chunk_size, &elems_per_chunk); + + while (!ucs_queue_is_empty(&sa->chunks)) { + chunk = ucs_queue_head_elem_non_empty(&sa->chunks, ucs_strided_alloc_chunk_t, + queue); + VALGRIND_MAKE_MEM_DEFINED(chunk, sizeof(*chunk)); + ucs_queue_pull_non_empty(&sa->chunks); + ucs_strided_alloc_chunk_free(sa, chunk, chunk_size); + } +} + +void* ucs_strided_alloc_get(ucs_strided_alloc_t *sa, const char *alloc_name) +{ + ucs_strided_alloc_elem_t *elem; + ucs_status_t status; + unsigned i; + + if (sa->freelist == NULL) { + status = ucs_strided_alloc_grow(sa UCS_MEMTRACK_VAL); + if (status != UCS_OK) { + return NULL; + } + } + + ucs_assert(sa->freelist != NULL); + + elem = sa->freelist; + VALGRIND_MAKE_MEM_DEFINED(elem, sizeof(*elem)); + sa->freelist = elem->next; + VALGRIND_MAKE_MEM_NOACCESS(elem, sizeof(*elem)); + + for (i = 0; i < sa->stride_count; ++i) { + VALGRIND_MEMPOOL_ALLOC(sa, ucs_strided_elem_get(elem, 0, i), + sa->elem_size); + } + + ++sa->inuse_count; + + return elem; +} + +void ucs_strided_alloc_put(ucs_strided_alloc_t *sa, void *base) +{ + ucs_strided_alloc_elem_t *elem = base; + unsigned i; + + ucs_assert(sa->inuse_count > 0); + + ucs_strided_alloc_push_to_freelist(sa, elem); + + for (i = 0; i < sa->stride_count; ++i) { + VALGRIND_MEMPOOL_FREE(sa, ucs_strided_elem_get(elem, 0, i)); + } + + --sa->inuse_count; +} + +unsigned ucs_strided_alloc_inuse_count(ucs_strided_alloc_t *sa) +{ + return sa->inuse_count; +} diff --git a/src/ucs/datastruct/strided_alloc.h b/src/ucs/datastruct/strided_alloc.h new file mode 100644 index 0000000..68b8765 --- /dev/null +++ b/src/ucs/datastruct/strided_alloc.h @@ -0,0 +1,135 @@ +/* + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCS_STRIDED_ALLOC_H_ +#define UCS_STRIDED_ALLOC_H_ + +#include "queue_types.h" + +#include +#include +#include +#include + + +BEGIN_C_DECLS + +/** @file strided_alloc.h */ + +/* the distance between allocated elements */ +#define UCS_STRIDED_ALLOC_STRIDE (128 * UCS_KBYTE) + + +/** + * Get a pointer to another element in the strided object + * + * Example with stride_count=3: + * + * chunk + * start -+ + * | + * | + * | <-- 128 kB --> . <-- 128 kB --> . + * | . . + * \/ . . + * +--------+-- ... --+--------+-- ... --+--------+ + * | stride | | stride | | stride | + * obj0: | elem 0 | | elem 1 | | elem 2 | + * | (base) | | | | | + * +--------+-- ... --+--------+-- ... --+--------+ + * +--------+-- ... --+--------+-- ... --+--------+ + * | stride | | stride | | stride | + * obj1: | elem 0 | | elem 1 | | elem 2 | + * | (base) | | | | | + * +--------+-- ... -+--------+-- ... --+--------+ + * +--------+-- ... --+--------+-- ... --+--------+ + * | stride | | stride | | stride | + * obj2: | elem 0 | | elem 1 | | elem 2 | + * | (base) | | | | | + * +--------+-- ... -+--------+-- ... --+--------+ + * + * ... + * + * @param _elem Pointer to the current element + * @param _stride_idx Stride index of the current element + * @param _wanted_idx Stride index of the desired element + * + * @return Pointer to the desired element + */ +#define ucs_strided_elem_get(_elem, _stride_idx, _wanted_idx) \ + UCS_PTR_BYTE_OFFSET(_elem, (ptrdiff_t)UCS_STRIDED_ALLOC_STRIDE * \ + ((ptrdiff_t)(_wanted_idx) - (ptrdiff_t)(_stride_idx))) + + +/* Forward declaration, used internally */ +typedef struct ucs_strided_alloc_elem ucs_strided_alloc_elem_t; + + +/** + * Strided allocator - allows allocating objects which are split to several + * memory areas with a constant stride (gap) in-between. + * This improves the cache locality when the first memory area is used mostly. + */ +typedef struct ucs_strided_alloc { + ucs_strided_alloc_elem_t *freelist; /* LIFO of free elements */ + ucs_queue_head_t chunks; /* Queue of allocated chunks */ + size_t elem_size; /* Size of a single memory area */ + unsigned stride_count; /* Number of strides */ + unsigned inuse_count; /* Number of allocated elements */ +} ucs_strided_alloc_t; + + +/** + * Initialize the split allocator context + * + * @param [in] sa Strided allocator structure to initialize + * @param [in] elem_size Size of a single stride element + * @param [in] stride_count How many memory strides per object + */ +void ucs_strided_alloc_init(ucs_strided_alloc_t *sa, size_t elem_size, + unsigned stride_count); + + +/** + * Cleanup the split allocator context + * + * @param [in] sa Strided allocator structure to cleanup + */ +void ucs_strided_alloc_cleanup(ucs_strided_alloc_t *sa); + + +/** + * Allocate an object + * + * @param [in] sa Strided allocator to allocate on + * @param [in] alloc_name Debug name of the allocation + * + * @return Pointer to the first stride of the allocated object. + */ +void* ucs_strided_alloc_get(ucs_strided_alloc_t *sa, const char *alloc_name); + + +/** + * Release an object + * + * @param [in] sa Strided allocator to release the object to + * @param [in] base Pointer to the first stride of the object to release + */ +void ucs_strided_alloc_put(ucs_strided_alloc_t *sa, void *base); + + +/** + * Get the number of currently allocated objects + * + * @param [in] sa Strided allocator to get the information for +* + * @return Number of currently allocated objects + */ +unsigned ucs_strided_alloc_inuse_count(ucs_strided_alloc_t *sa); + + +END_C_DECLS + +#endif diff --git a/src/ucs/datastruct/string_buffer.c b/src/ucs/datastruct/string_buffer.c new file mode 100644 index 0000000..2f1f9d9 --- /dev/null +++ b/src/ucs/datastruct/string_buffer.c @@ -0,0 +1,140 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "string_buffer.h" + +#include +#include +#include +#include +#include +#include + + +#define UCS_STRING_BUFFER_INITIAL_CAPACITY 32 +#define UCS_STRING_BUFFER_ALLOC_NAME "string_buffer" + + +static void ucs_string_buffer_reset(ucs_string_buffer_t *strb) +{ + strb->buffer = NULL; + strb->length = 0; + strb->capacity = 0; +} + +void ucs_string_buffer_init(ucs_string_buffer_t *strb) +{ + ucs_string_buffer_reset(strb); +} + +void ucs_string_buffer_cleanup(ucs_string_buffer_t *strb) +{ + ucs_free(strb->buffer); + ucs_string_buffer_reset(strb); +} + +static ucs_status_t ucs_string_buffer_grow(ucs_string_buffer_t *strb, + size_t new_capacity) +{ + char *new_buffer; + + ucs_assert(new_capacity > strb->capacity); + + new_buffer = ucs_realloc(strb->buffer, new_capacity, + UCS_STRING_BUFFER_ALLOC_NAME); + if (new_buffer == NULL) { + ucs_error("failed to grow string from %zu to %zu characters", + strb->capacity, new_capacity); + return UCS_ERR_NO_MEMORY; + } + + strb->buffer = new_buffer; + strb->capacity = new_capacity; + /* length stays the same */ + return UCS_OK; +} + +ucs_status_t ucs_string_buffer_appendf(ucs_string_buffer_t *strb, + const char *fmt, ...) +{ + ucs_status_t status; + size_t max_print; + va_list ap; + int ret; + + /* set minimal initial size */ + if (strb->capacity - strb->length <= 1) { + status = ucs_string_buffer_grow(strb, + UCS_STRING_BUFFER_INITIAL_CAPACITY); + if (status != UCS_OK) { + return status; + } + } + + /* try to write to existing buffer */ + va_start(ap, fmt); + max_print = strb->capacity - strb->length - 1; + ret = vsnprintf(strb->buffer + strb->length, max_print, fmt, ap); + va_end(ap); + + /* if failed, grow the buffer to at least the required size and at least + * double the previous size (to reduce the amortized cost of realloc) */ + if (ret >= max_print) { + status = ucs_string_buffer_grow(strb, ucs_max(strb->capacity * 2, + strb->length + ret + 1)); + if (status != UCS_OK) { + return status; + } + + va_start(ap, fmt); + max_print = strb->capacity - strb->length - 1; + ret = vsnprintf(strb->buffer + strb->length, strb->capacity - 1, fmt, + ap); + va_end(ap); + + /* since we've grown the buffer, it should be sufficient now */ + ucs_assert(ret < max_print); + } + + /* string length grows by the amount of characters written by vsnprintf */ + strb->length += ret; + + ucs_assert(strb->length < strb->capacity); + ucs_assert(strb->buffer[strb->length] == '\0'); /* \0 is written by vsnprintf */ + + return UCS_OK; +} + +void ucs_string_buffer_rtrim(ucs_string_buffer_t *strb, const char *charset) +{ + char *ptr; + + ptr = &strb->buffer[strb->length]; + while (strb->length > 0) { + --ptr; + if (((charset == NULL) && !isspace(*ptr)) || + ((charset != NULL) && (strchr(charset, *ptr) == NULL))) { + /* if the last character should NOT be removed - stop */ + break; + } + + --strb->length; + } + + /* mark the new end of string */ + *(ptr + 1) = '\0'; +} + +const char *ucs_string_buffer_cstr(const ucs_string_buffer_t *strb) +{ + if (strb->length == 0) { + return ""; + } + + ucs_assert(strb->buffer != NULL); + ucs_assert(strb->capacity > 0); + return strb->buffer; +} diff --git a/src/ucs/datastruct/string_buffer.h b/src/ucs/datastruct/string_buffer.h new file mode 100644 index 0000000..8acec7f --- /dev/null +++ b/src/ucs/datastruct/string_buffer.h @@ -0,0 +1,86 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_STRING_BUFFER_H_ +#define UCS_STRING_BUFFER_H_ + +#include +#include +#include + + +BEGIN_C_DECLS + +/** + * String buffer - a dynamic NULL-terminated character buffer which can grow + * on demand. + */ +typedef struct ucs_string_buffer { + char *buffer; /* Buffer pointer */ + size_t length; /* Actual string length */ + size_t capacity; /* Allocated memory size */ +} ucs_string_buffer_t; + + +/** + * Initialize a string buffer + * + * @param [out] strb String buffer to initialize. + */ +void ucs_string_buffer_init(ucs_string_buffer_t *strb); + + +/** + * Cleanup a string buffer and release any memory associated with it. + * + * @param [out] strb String buffer to clean up. + */ +void ucs_string_buffer_cleanup(ucs_string_buffer_t *strb); + + +/** + * Append a formatted string to the string buffer. + * + * @param [inout] strb String buffer to append to. + * @param [in] fmt Format string. + * + * @return UCS_OK on success or UCS_ERR_NO_MEOMRY if could not allocate memory + * to grow the string. + */ +ucs_status_t ucs_string_buffer_appendf(ucs_string_buffer_t *strb, + const char *fmt, ...) + UCS_F_PRINTF(2, 3); + + +/** + * Remove specific characters from the end of the string. + * + * @param [inout] strb String buffer remote characters from. + * @param [in] charset C-string with the set of characters to remove. + * If NULL, this function removes whitespace characters, + * as defined by isspace (3). + * + * This function removes the largest contiguous suffix from the input string + * 'strb', which consists entirely of characters in 'charset'. + */ +void ucs_string_buffer_rtrim(ucs_string_buffer_t *strb, const char *charset); + + +/** + * Return a temporary pointer to a C-style string which represents the string + * buffer. The returned string is valid only as long as no other operation is + * done on the string buffer (including append). + * + * @param [in] strb String buffer to convert to a C-style string + * + * @return C-style string representing the data in the buffer. + */ +const char *ucs_string_buffer_cstr(const ucs_string_buffer_t *strb); + + +END_C_DECLS + +#endif diff --git a/src/ucs/datastruct/string_set.c b/src/ucs/datastruct/string_set.c new file mode 100644 index 0000000..063e0df --- /dev/null +++ b/src/ucs/datastruct/string_set.c @@ -0,0 +1,144 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "string_set.h" + +#include +#include +#include + + +#define UCS_STRING_SET_ALLOC_NAME "string_set" + + +void ucs_string_set_init(ucs_string_set_t *sset) +{ + kh_init_inplace(ucs_string_set, sset); +} + +void ucs_string_set_cleanup(ucs_string_set_t *sset) +{ + char *str; + + kh_foreach_key(sset, str, { + ucs_free(str); + }); + kh_destroy_inplace(ucs_string_set, sset); +} + +/* Adds string by pointer, and releases the string if add fails or the string + * already exists in the set + */ +static ucs_status_t ucs_string_set_add_ptr(ucs_string_set_t *sset, char *str) +{ + int ret; + + kh_put(ucs_string_set, sset, str, &ret); + + switch (ret) { + case -1: + ucs_free(str); + return UCS_ERR_NO_MEMORY; + case 0: + /* key already present */ + ucs_free(str); + return UCS_OK; + case 1: + case 2: + /* key inserted */ + return UCS_OK; + default: + ucs_error("unexpected return value from kh_put(ucs_string_set): %d", ret); + return UCS_ERR_INVALID_PARAM; + } +} + +ucs_status_t ucs_string_set_add(ucs_string_set_t *sset, const char *str) +{ + char *str_copy; + + str_copy = ucs_strdup(str, UCS_STRING_SET_ALLOC_NAME); + if (str_copy == NULL) { + return UCS_ERR_NO_MEMORY; + } + + return ucs_string_set_add_ptr(sset, str_copy); +} + +ucs_status_t ucs_string_set_addf(ucs_string_set_t *sset, const char *fmt, ...) +{ + int length; + va_list ap; + char *str; + + va_start(ap, fmt); + length = vsnprintf(NULL, 0, fmt, ap); + va_end(ap); + + str = ucs_malloc(length + 1, UCS_STRING_SET_ALLOC_NAME); + if (str == NULL) { + return UCS_ERR_NO_MEMORY; + } + + va_start(ap, fmt); + vsnprintf(str, length + 1, fmt, ap); + va_end(ap); + + return ucs_string_set_add_ptr(sset, str); +} + +int ucs_string_set_contains(const ucs_string_set_t *sset, const char *str) +{ + return kh_get(ucs_string_set, sset, (char*)str) != kh_end(sset); +} + +static int ucs_string_set_compare_func(const void *a, const void *b) +{ + return strcmp(*(const char**)a, *(const char**)b); +} + +ucs_status_t ucs_string_set_print_sorted(const ucs_string_set_t *sset, + ucs_string_buffer_t *strb, + const char *sep) +{ + const char **sorted_strings; + ucs_status_t status; + size_t index, count; + char *str; + + /* allocate a temporary array to hold the sorted strings */ + count = kh_size(sset); + sorted_strings = ucs_calloc(count, sizeof(*sorted_strings), "string_set"); + if (sorted_strings == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + /* collect and sort the strings */ + index = 0; + kh_foreach_key(sset, str, { + sorted_strings[index++] = str; + }) + ucs_assert(index == count); + qsort(sorted_strings, count, sizeof(*sorted_strings), + ucs_string_set_compare_func); + + /* append the sorted strings to the string buffer */ + for (index = 0; index < count; ++index) { + status = ucs_string_buffer_appendf(strb, "%s%s", (index > 0) ? sep : "", + sorted_strings[index]); + if (status != UCS_OK) { + goto out_free_array; + } + } + + status = UCS_OK; + +out_free_array: + ucs_free(sorted_strings); +out: + return status; +} diff --git a/src/ucs/datastruct/string_set.h b/src/ucs/datastruct/string_set.h new file mode 100644 index 0000000..7acab23 --- /dev/null +++ b/src/ucs/datastruct/string_set.h @@ -0,0 +1,98 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_STRING_SET_H_ +#define UCS_STRING_SET_H_ + +#include +#include +#include +#include +#include + +BEGIN_C_DECLS + + +/* + * Define ucs_string_set_t as a khash/set type + */ +KHASH_INIT(ucs_string_set, char*, char, 0, kh_str_hash_func, kh_str_hash_equal) +typedef khash_t(ucs_string_set) ucs_string_set_t; + + +/** + * Initialize a string set + * + * @param [out] sset String set to initialize. + */ +void ucs_string_set_init(ucs_string_set_t *sset); + + +/** + * Cleanup a string set and release any memory associated with it. + * + * @param [out] sset String set to clean up. + */ +void ucs_string_set_cleanup(ucs_string_set_t *sset); + + +/** + * Add a copy of a string to the string set + * + * @param [inout] sset String set to add to. + * @param [in] str String to add. The passed string can be released + * immediately after this call, since the contents of the + * string are copied to an internal buffer. + * + * @param UCS_OK if successful, or UCS_ERR_NO_MEMORY if could not allocate + * enough memory to add the string. + */ +ucs_status_t ucs_string_set_add(ucs_string_set_t *sset, const char *str); + + +/** + * Add a formatted string to the string set + * + * @param [inout] sset String set to add to. + * @param [in] fmt Format string to add. + * + * @param UCS_OK if successful, or UCS_ERR_NO_MEMORY if could not allocate + * enough memory to add the string. + */ +ucs_status_t ucs_string_set_addf(ucs_string_set_t *sset, const char *fmt, ...) + UCS_F_PRINTF(2, 3); + + +/** + * Check whether a string set contains a given string + * + * @param [in] sset String set to check. + * @param [in] str String to check if contained in the set. + * + * @return Nonzero if the string is contained in the set, 0 otherwise. + */ +int ucs_string_set_contains(const ucs_string_set_t *sset, const char *str); + + +/** + * Print set contents to a string buffer in a lexicographical order + * + * @param [in] sset String set whose contents to print. + * @param [inout] strb Append the strings in the set to this string buffer. + * @param [in] sep Separator string to insert between every two printed + * strings, for example: "," + * + * @param UCS_OK if successful, or UCS_ERR_NO_MEMORY if could not allocate + * enough memory to sort the set or to grow the string buffer. + */ +ucs_status_t ucs_string_set_print_sorted(const ucs_string_set_t *sset, + ucs_string_buffer_t *strb, + const char *sep); + + +END_C_DECLS + +#endif diff --git a/src/ucs/debug/assert.c b/src/ucs/debug/assert.c new file mode 100644 index 0000000..c581cee --- /dev/null +++ b/src/ucs/debug/assert.c @@ -0,0 +1,50 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "assert.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +void ucs_fatal_error_message(const char *file, unsigned line, + const char *function, char *message_buf) +{ + char *message_line, *save_ptr = NULL; + + ucs_log_flush(); + + message_line = (message_buf == NULL) ? NULL : + strtok_r(message_buf, "\n", &save_ptr); + while (message_line != NULL) { + ucs_log_fatal_error("%13s:%-4u %s", ucs_basename(file), line, message_line); + message_line = strtok_r(NULL, "\n", &save_ptr); + } + + ucs_handle_error(message_buf); + abort(); +} + +void ucs_fatal_error_format(const char *file, unsigned line, + const char *function, const char *format, ...) +{ + size_t buffer_size = ucs_log_get_buffer_size(); + char *buffer; + va_list ap; + + buffer = ucs_alloca(buffer_size + 1); + va_start(ap, format); + vsnprintf(buffer, buffer_size, format, ap); + va_end(ap); + + ucs_fatal_error_message(file, line, function, buffer); +} diff --git a/src/ucs/debug/assert.h b/src/ucs/debug/assert.h new file mode 100644 index 0000000..055198a --- /dev/null +++ b/src/ucs/debug/assert.h @@ -0,0 +1,111 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef _UCS_ASSERT_H +#define _UCS_ASSERT_H + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + + +BEGIN_C_DECLS + +/** @file assert.h */ + +/** + * Fail if _expression evaluates to 0 + */ +#define ucs_assert_always(_expression) \ + do { \ + if (!ucs_likely(_expression)) { \ + ucs_fatal_error_format(__FILE__, __LINE__, __FUNCTION__, \ + "Assertion `%s' failed", #_expression); \ + } \ + } while (0) + + +/** + * Fail if _expression evaluates to 0 and print a formatted error message + */ +#define ucs_assertv_always(_expression, _fmt, ...) \ + do { \ + if (!ucs_likely(_expression)) { \ + ucs_fatal_error_format(__FILE__, __LINE__, __FUNCTION__, \ + "Assertion `%s' failed: " _fmt, \ + #_expression, ## __VA_ARGS__); \ + } \ + } while (0) + + +/** + * Generate a fatal error + */ +#define ucs_fatal(_fmt, ...) \ + ucs_fatal_error_format(__FILE__, __LINE__, __FUNCTION__, \ + "Fatal: " _fmt, ## __VA_ARGS__) + + +#if ENABLE_ASSERT || defined(__COVERITY__) || defined(__clang_analyzer__) + +#define UCS_ENABLE_ASSERT 1 + +/** + * Generate a program bug report if assertions are enabled + */ +#define ucs_bug(_fmt, ...) \ + ucs_fatal_error_format(__FILE__, __LINE__, __FUNCTION__, \ + "Bug: " _fmt, ## __VA_ARGS__) + +#define ucs_assert(...) ucs_assert_always(__VA_ARGS__) +#define ucs_assertv(...) ucs_assertv_always(__VA_ARGS__) + +#else + +#define UCS_ENABLE_ASSERT 0 + +#define ucs_bug(...) +#define ucs_assert(...) +#define ucs_assertv(...) + +#endif + + +/** + * Generate a fatal error and stop the program. + * + * @param [in] file Source file name + * @param [in] line Source line number + * @param [in] function Calling function name + * @param [in] format Error message format string. Multi-line message is + * supported. + */ +void ucs_fatal_error_format(const char *file, unsigned line, + const char *function, const char *format, ...) + UCS_F_NORETURN UCS_F_PRINTF(4, 5); + + +/** + * Generate a fatal error and stop the program. + * + * @param [in] file Source file name + * @param [in] line Source line number + * @param [in] function Calling function name + * @param [in] message_buf Error message buffer. Multi-line message is + * supported. + * + * IMPORTANT NOTE: message_buf could be overridden by this function + */ +void ucs_fatal_error_message(const char *file, unsigned line, + const char *function, char *message_buf) + UCS_F_NORETURN; + + +END_C_DECLS + +#endif diff --git a/src/ucs/debug/debug.c b/src/ucs/debug/debug.c new file mode 100644 index 0000000..84526c6 --- /dev/null +++ b/src/ucs/debug/debug.c @@ -0,0 +1,1394 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "debug.h" +#include "log.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_DETAILED_BACKTRACE +# include +#endif /* HAVE_DETAILED_BACKTRACE */ + + +KHASH_MAP_INIT_INT64(ucs_debug_symbol, char*); +KHASH_MAP_INIT_INT(ucs_signal_orig_action, struct sigaction*); + +#define UCS_GDB_MAX_ARGS 32 +#define BACKTRACE_MAX 64 +#define UCS_DEBUG_UNKNOWN_SYM "???" + +#ifdef HAVE_DETAILED_BACKTRACE +# define UCS_DEBUG_BACKTRACE_LINE_FMT "%2d 0x%016lx %s() %s:%u\n" +# define UCS_DEBUG_BACKTRACE_LINE_ARG(_n, _line) \ + _n, (_line)->address, \ + (_line)->function ? (_line)->function : "??", \ + (_line)->file ? (_line)->file : "??", \ + (_line)->lineno +#else +# define UCS_DEBUG_BACKTRACE_LINE_FMT "%2d %s\n" +# define UCS_DEBUG_BACKTRACE_LINE_ARG(_n, _line) _n, (_line)->symbol +#endif + +struct dl_address_search { + unsigned long address; + const char *filename; + unsigned long base; +}; + +#ifdef HAVE_DETAILED_BACKTRACE + +struct backtrace_line { + unsigned long address; + char *file; + char *function; + unsigned lineno; +}; + +struct backtrace_file { + struct dl_address_search dl; + bfd *abfd; + asymbol **syms; +}; + +struct backtrace { + struct backtrace_line lines[BACKTRACE_MAX]; + int size; + int position; +}; + +struct backtrace_search { + int count; + struct backtrace_file *file; + int backoff; /* search the line where the function call + took place, instead of return address */ + struct backtrace_line *lines; + int max_lines; +}; + +#else /* HAVE_DETAILED_BACKTRACE */ + +struct backtrace_line { + void *address; + char *symbol; +}; + +struct backtrace { + char **symbols; + void *addresses[BACKTRACE_MAX]; + int size; + int position; + struct backtrace_line line; +}; + +#endif /* HAVE_DETAILED_BACKTRACE */ + +#define UCS_SYS_SIGNAME(signame) [SIG ## signame] = #signame +const char *ucs_signal_names[] = { + [0] = "SIGNAL0", + UCS_SYS_SIGNAME(HUP), + UCS_SYS_SIGNAME(INT), + UCS_SYS_SIGNAME(QUIT), + UCS_SYS_SIGNAME(ILL), + UCS_SYS_SIGNAME(TRAP), + UCS_SYS_SIGNAME(ABRT), + UCS_SYS_SIGNAME(BUS), + UCS_SYS_SIGNAME(FPE), + UCS_SYS_SIGNAME(KILL), + UCS_SYS_SIGNAME(USR1), + UCS_SYS_SIGNAME(SEGV), + UCS_SYS_SIGNAME(USR2), + UCS_SYS_SIGNAME(PIPE), + UCS_SYS_SIGNAME(ALRM), + UCS_SYS_SIGNAME(TERM), +#ifdef SIGSTKFLT + UCS_SYS_SIGNAME(STKFLT), +#endif + UCS_SYS_SIGNAME(CHLD), + UCS_SYS_SIGNAME(CONT), + UCS_SYS_SIGNAME(STOP), + UCS_SYS_SIGNAME(TSTP), + UCS_SYS_SIGNAME(TTIN), + UCS_SYS_SIGNAME(TTOU), + UCS_SYS_SIGNAME(URG), + UCS_SYS_SIGNAME(XCPU), + UCS_SYS_SIGNAME(XFSZ), + UCS_SYS_SIGNAME(VTALRM), + UCS_SYS_SIGNAME(PROF), + UCS_SYS_SIGNAME(WINCH), + UCS_SYS_SIGNAME(IO), +#ifdef SIGPWR + UCS_SYS_SIGNAME(PWR), +#endif + UCS_SYS_SIGNAME(SYS), +#if __linux__ + [SIGSYS + 1] = NULL +#elif __FreeBSD__ + [SIGRTMIN] = NULL +#else +#error "Port me" +#endif +}; + +#if HAVE_SIGACTION_SA_RESTORER +static void *ucs_debug_signal_restorer = &ucs_debug_signal_restorer; +#endif +static stack_t ucs_debug_signal_stack = {NULL, 0, 0}; + +static khash_t(ucs_debug_symbol) ucs_debug_symbols_cache; +static khash_t(ucs_signal_orig_action) ucs_signal_orig_action_map; + +static ucs_spinlock_t ucs_kh_lock; + +static int ucs_debug_initialized = 0; + +static int ucs_debug_backtrace_is_excluded(void *address, const char *symbol); + + +static char *ucs_debug_strdup(const char *str) +{ + size_t length; + char *newstr; + + length = strlen(str) + 1; + newstr = ucs_sys_realloc(NULL, 0, length); + if (newstr != NULL) { + strncpy(newstr, str, length); + } + return newstr; +} + +#ifdef HAVE_DETAILED_BACKTRACE + +static int dl_match_address(struct dl_phdr_info *info, size_t size, void *data) +{ + struct dl_address_search *dl = data; + const ElfW(Phdr) *phdr; + ElfW(Addr) load_base = info->dlpi_addr; + long n; + + phdr = info->dlpi_phdr; + for (n = info->dlpi_phnum; --n >= 0; phdr++) { + if (phdr->p_type == PT_LOAD) { + ElfW(Addr) vbaseaddr = phdr->p_vaddr + load_base; + if (dl->address >= vbaseaddr && dl->address < vbaseaddr + phdr->p_memsz) { + dl->filename = info->dlpi_name; + dl->base = info->dlpi_addr; + } + } + } + return 0; +} + +static int dl_lookup_address(struct dl_address_search *dl) +{ + dl->filename = NULL; + dl->base = 0; + + dl_iterate_phdr(dl_match_address, dl); + if (dl->filename == NULL) { + return 0; + } + + if (strlen(dl->filename) == 0) { + dl->filename = ucs_get_exe(); + } + return 1; +} + +/* + * The dl member in file should be initialized + */ +static int load_file(struct backtrace_file *file) +{ + long symcount; + unsigned int size; + char **matching; + + file->syms = NULL; + file->abfd = bfd_openr(file->dl.filename, NULL); + if (!file->abfd) { + goto err; + } + + if (bfd_check_format(file->abfd, bfd_archive)) { + goto err_close; + } + + if (!bfd_check_format_matches(file->abfd, bfd_object, &matching)) { + goto err_close; + } + + if ((bfd_get_file_flags(file->abfd) & HAS_SYMS) == 0) { + goto err_close; + } + + symcount = bfd_read_minisymbols(file->abfd, 0, (PTR)&file->syms, &size); + if (symcount == 0) { + free(file->syms); + symcount = bfd_read_minisymbols(file->abfd, 1, (PTR)&file->syms, &size); + } + if (symcount < 0) { + goto err_close; + } + + return 1; + +err_close: + bfd_close(file->abfd); +err: + return 0; +} + +static void unload_file(struct backtrace_file *file) +{ + free(file->syms); + bfd_close(file->abfd); +} + +static char *ucs_debug_demangle(const char *name) +{ + char *demangled = NULL; +#ifdef HAVE_CPLUS_DEMANGLE + extern char *cplus_demangle(const char *, int); + demangled = cplus_demangle(name, 0); +#endif + return demangled ? demangled : strdup(name); +} + +static void find_address_in_section(bfd *abfd, asection *section, void *data) +{ + struct backtrace_search *search = data; + bfd_size_type size; + bfd_vma vma; + unsigned long address; + const char *filename, *function; + unsigned lineno; + int found; + + if ((search->count > 0) || (search->max_lines == 0) || + ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)) { + return; + } + + address = search->file->dl.address - search->file->dl.base; + vma = bfd_get_section_vma(abfd, section); + if (address < vma) { + return; + } + + size = bfd_section_size(abfd, section); + if (address >= vma + size) { + return; + } + + /* Search in address-1 to get the calling line instead of return address */ + found = bfd_find_nearest_line(abfd, section, search->file->syms, + address - vma - search->backoff, + &filename, &function, &lineno); + do { + search->lines[search->count].address = address; + search->lines[search->count].file = strdup(filename ? filename : + UCS_DEBUG_UNKNOWN_SYM); + search->lines[search->count].function = function ? + ucs_debug_demangle(function) : + strdup(UCS_DEBUG_UNKNOWN_SYM); + search->lines[search->count].lineno = lineno; + if (search->count == 0) { + /* To get the inliner info, search at the original address */ + bfd_find_nearest_line(abfd, section, search->file->syms, address - vma, + &filename, &function, &lineno); + } + + ++search->count; + found = bfd_find_inliner_info(abfd, &filename, &function, &lineno); + } while (found && (search->count < search->max_lines)); +} + +static int get_line_info(struct backtrace_file *file, int backoff, + struct backtrace_line *lines, int max) +{ + struct backtrace_search search; + + search.file = file; + search.backoff = backoff; + search.count = 0; + search.lines = lines; + search.max_lines = max; + bfd_map_over_sections(file->abfd, find_address_in_section, &search); + return search.count; +} + +/** + * Create a backtrace from the calling location. + * + * @param bckt Backtrace object. + * @param strip How many frames to strip. +*/ +ucs_status_t ucs_debug_backtrace_create(backtrace_h *bckt, int strip) +{ + size_t size = sizeof(**bckt); + struct backtrace_file file; + void *addresses[BACKTRACE_MAX]; + int i, num_addresses; + ucs_status_t status; + + *bckt = NULL; + status = ucs_mmap_alloc(&size, (void**)bckt, 0 + UCS_MEMTRACK_NAME("debug backtrace object")); + if (status != UCS_OK) { + return status; + } + + num_addresses = backtrace(addresses, BACKTRACE_MAX); + + (*bckt)->size = 0; + (*bckt)->position = strip; + for (i = 0; i < num_addresses; ++i) { + file.dl.address = (unsigned long)addresses[i]; + if (dl_lookup_address(&file.dl) && load_file(&file)) { + (*bckt)->size += get_line_info(&file, 1, + (*bckt)->lines + (*bckt)->size, + BACKTRACE_MAX - (*bckt)->size); + unload_file(&file); + } + } + + return UCS_OK; +} + +/** + * Destroy a backtrace and free all memory. + * + * @param bckt Backtrace object. + */ +void ucs_debug_backtrace_destroy(backtrace_h bckt) +{ + int i; + + for (i = 0; i < bckt->size; ++i) { + free(bckt->lines[i].function); + free(bckt->lines[i].file); + } + bckt->size = 0; + ucs_mmap_free(bckt, sizeof(*bckt)); +} + +static ucs_status_t +ucs_debug_get_line_info(const char *filename, unsigned long base, + unsigned long address, ucs_debug_address_info_t *info) +{ + struct backtrace_file file; + struct backtrace_line line; + int count; + + file.dl.filename = filename; + file.dl.base = base; + file.dl.address = address; + + if (!load_file(&file)) { + goto err; + } + + count = get_line_info(&file, 0, &line, 1); + if (count == 0) { + goto err_unload; + } + + if (line.function) { + ucs_strncpy_zero(info->function, line.function, sizeof(info->function)); + } else { + strcpy(info->function, UCS_DEBUG_UNKNOWN_SYM); + } + if (line.file) { + ucs_strncpy_zero(info->source_file, line.file, sizeof(info->source_file)); + } else { + strcpy(info->function, UCS_DEBUG_UNKNOWN_SYM); + } + info->line_number = line.lineno; + + free(line.function); + free(line.file); + unload_file(&file); + return UCS_OK; + +err_unload: + unload_file(&file); +err: + strcpy(info->function, UCS_DEBUG_UNKNOWN_SYM); + strcpy(info->source_file, UCS_DEBUG_UNKNOWN_SYM); + info->line_number = 0; + return UCS_ERR_NO_ELEM; +} + +ucs_status_t ucs_debug_lookup_address(void *address, ucs_debug_address_info_t *info) +{ + struct dl_address_search dl; + + dl.address = (unsigned long)address; + if (!dl_lookup_address(&dl)) { + return UCS_ERR_NO_ELEM; + } + + memset(info, 0, sizeof(*info)); + info->file.base = dl.base; + ucs_expand_path(dl.filename, info->file.path, sizeof(info->file.path)); + return ucs_debug_get_line_info(dl.filename, dl.base, dl.address, info); +} + +/** + * Walk to the next backtrace line information. + * + * @param bckt Backtrace object. + * @param line Filled with backtrace frame info. + * + * NOTE: the line remains valid as long as the backtrace object is not destroyed. + */ +int ucs_debug_backtrace_next(backtrace_h bckt, backtrace_line_h *line) +{ + backtrace_line_h ln; + + do { + if (bckt->position >= bckt->size) { + return 0; + } + + ln = &bckt->lines[bckt->position++]; + } while (ucs_debug_backtrace_is_excluded((void*)ln->address, ln->function)); + + *line = ln; + return 1; +} + +static void ucs_debug_print_source_file(const char *file, unsigned line, + const char *function, FILE *stream) +{ + static const int context = 3; + char srcline[256]; + unsigned n; + FILE *f; + + f = fopen(file, "r"); + if (f == NULL) { + return; + } + + n = 0; + fprintf(stream, "\n"); + fprintf(stream, "%s: [ %s() ]\n", file, function); + if (line > context) { + fprintf(stream, " ...\n"); + } + while (fgets(srcline, sizeof(srcline), f) != NULL) { + if (abs((int)line - (int)n) <= context) { + fprintf(stream, "%s %5u %s", + (n == line) ? "==>" : " ", n, srcline); + } + ++n; + } + fprintf(stream, "\n"); + + fclose(f); +} + +static void ucs_debug_show_innermost_source_file(FILE *stream) +{ + backtrace_h bckt; + backtrace_line_h bckt_line; + ucs_status_t status; + + status = ucs_debug_backtrace_create(&bckt, 0); + if (status != UCS_OK) { + return; + } + + if (ucs_debug_backtrace_next(bckt, &bckt_line)) { + ucs_debug_print_source_file(bckt_line->file, bckt_line->lineno, + bckt_line->function, stream); + } + ucs_debug_backtrace_destroy(bckt); +} + +#else /* HAVE_DETAILED_BACKTRACE */ + +ucs_status_t ucs_debug_lookup_address(void *address, ucs_debug_address_info_t *info) +{ + Dl_info dlinfo; + int ret; + + ret = dladdr(address, &dlinfo); + if (!ret) { + return UCS_ERR_NO_ELEM; + } + + ucs_strncpy_safe(info->file.path, dlinfo.dli_fname, sizeof(info->file.path)); + info->file.base = (uintptr_t)dlinfo.dli_fbase; + ucs_strncpy_safe(info->function, + (dlinfo.dli_sname != NULL) ? dlinfo.dli_sname : UCS_DEBUG_UNKNOWN_SYM, + sizeof(info->function)); + ucs_strncpy_safe(info->source_file, UCS_DEBUG_UNKNOWN_SYM, sizeof(info->source_file)); + info->line_number = 0; + + return UCS_OK; +} + +/** + * Create a backtrace from the calling location. + */ +ucs_status_t ucs_debug_backtrace_create(backtrace_h *bckt, int strip) +{ + size_t size = sizeof(**bckt); + ucs_status_t status; + + *bckt = NULL; + status = ucs_mmap_alloc(&size, (void**)bckt, 0 + UCS_MEMTRACK_NAME("debug backtrace object")); + if (status != UCS_OK) { + return status; + } + + (*bckt)->size = backtrace((*bckt)->addresses, BACKTRACE_MAX); + (*bckt)->symbols = backtrace_symbols((*bckt)->addresses, (*bckt)->size); + (*bckt)->position = strip; + + return UCS_OK; +} + +/** + * Destroy a backtrace and free all memory. + * + * @param bckt Backtrace object. + */ +void ucs_debug_backtrace_destroy(backtrace_h bckt) +{ + free(bckt->symbols); + ucs_mmap_free(bckt, sizeof(*bckt)); +} + +/** + * Walk to the next backtrace line information. + * + * @param bckt Backtrace object. + * @param line Filled with backtrace frame info. + * + * NOTE: the line remains valid as long as the backtrace object is not destroyed. + */ +int ucs_debug_backtrace_next(backtrace_h bckt, backtrace_line_h *line) +{ + while (bckt->position < bckt->size) { + bckt->line.address = bckt->addresses[bckt->position]; + bckt->line.symbol = bckt->symbols[bckt->position]; + bckt->position++; + + if (!ucs_debug_backtrace_is_excluded(bckt->line.address, + bckt->line.symbol)) { + *line = &bckt->line; + return 1; + } + } + + return 0; +} + +static void ucs_debug_show_innermost_source_file(FILE *stream) +{ +} + +#endif /* HAVE_DETAILED_BACKTRACE */ + +/* + * Filter specific functions from the head of the backtrace. + */ +void ucs_debug_print_backtrace(FILE *stream, int strip) +{ + backtrace_h bckt; + backtrace_line_h bckt_line; + int i; + + ucs_debug_backtrace_create(&bckt, strip); + fprintf(stream, "==== backtrace (tid:%7d) ====\n", ucs_get_tid()); + for (i = 0; ucs_debug_backtrace_next(bckt, &bckt_line); ++i) { + fprintf(stream, UCS_DEBUG_BACKTRACE_LINE_FMT, + UCS_DEBUG_BACKTRACE_LINE_ARG(i, bckt_line)); + } + fprintf(stream, "=================================\n"); + + ucs_debug_backtrace_destroy(bckt); +} + +/* + * Filter specific functions from the head of the backtrace. + */ +void ucs_debug_print_backtrace_line(char *buffer, size_t maxlen, + int frame_num, + backtrace_line_h line) +{ + snprintf(buffer, maxlen, UCS_DEBUG_BACKTRACE_LINE_FMT, + UCS_DEBUG_BACKTRACE_LINE_ARG(frame_num, line)); +} + +const char *ucs_debug_get_symbol_name(void *address) +{ + static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + static ucs_debug_address_info_t info; + int hash_extra_status; + ucs_status_t status; + khiter_t hash_it; + size_t length; + char *sym; + + pthread_mutex_lock(&lock); + hash_it = kh_put(ucs_debug_symbol, &ucs_debug_symbols_cache, + (uintptr_t)address, &hash_extra_status); + if (hash_extra_status == 0) { + sym = kh_value(&ucs_debug_symbols_cache, hash_it); + } else { + status = ucs_debug_lookup_address(address, &info); + if (status == UCS_OK) { + if (hash_extra_status == -1) { + /* could not add to hash, return pointer to the static buffer */ + sym = info.function; + goto out; + } + + /* add new symbol to hash */ + ucs_assert_always(hash_it != kh_end(&ucs_debug_symbols_cache)); + length = strlen(info.function); + sym = ucs_malloc(length + 1, "debug_symbol"); + if (sym != NULL) { + ucs_strncpy_safe(sym, info.function, length + 1); + } + } else { + /* could not resolve symbol */ + sym = NULL; + } + kh_value(&ucs_debug_symbols_cache, hash_it) = sym; + } + +out: + pthread_mutex_unlock(&lock); + return sym ? sym : UCS_DEBUG_UNKNOWN_SYM; +} + +static void ucs_debugger_attach() +{ + static const char *vg_cmds_fmt = "file %s\n" + "target remote | vgdb\n"; + static const char *bt_cmds = "bt\n" + "list\n"; + static char pid_str[16]; + char *vg_cmds; + char *gdb_cmdline; + char gdb_commands_file[256]; + char* argv[6 + UCS_GDB_MAX_ARGS]; + pid_t pid, debug_pid; + int fd, ret, narg; + char UCS_V_UNUSED *self_exe; + + /* Fork a process which will execute gdb and attach to the current process. + * We must avoid trigerring calls to malloc/free, since the heap may be corrupted. + * Therefore all allocations are done with mmap() or use static arrays. + */ + + debug_pid = getpid(); + + pid = fork(); + if (pid < 0) { + ucs_log_fatal_error("fork returned %d: %m", pid); + return; + } + + /* retrieve values from original process, before forking */ + self_exe = ucs_debug_strdup(ucs_get_exe()); + + if (pid == 0) { + gdb_cmdline = ucs_debug_strdup(ucs_global_opts.gdb_command); + narg = 0; + argv[narg] = strtok(gdb_cmdline, " \t"); + while (argv[narg] != NULL) { + ++narg; + argv[narg] = strtok(NULL, " \t"); + } + + /* Make coverity know that argv[0] will not be affected by TMPDIR */ + if (narg == 0) { + return; + } + + if (!RUNNING_ON_VALGRIND) { + snprintf(pid_str, sizeof(pid_str), "%d", debug_pid); + argv[narg++] = "-p"; + argv[narg++] = pid_str; + } + + /* Generate a file name for gdb commands */ + memset(gdb_commands_file, 0, sizeof(gdb_commands_file)); + snprintf(gdb_commands_file, sizeof(gdb_commands_file) - 1, + "%s/.gdbcommands.uid-%d", ucs_get_tmpdir(), geteuid()); + + /* Write gdb commands and add the file to argv is successful */ + fd = open(gdb_commands_file, O_WRONLY|O_TRUNC|O_CREAT, 0600); + if (fd >= 0) { + if (RUNNING_ON_VALGRIND) { + vg_cmds = ucs_sys_realloc(NULL, 0, strlen(vg_cmds_fmt) + strlen(self_exe)); + sprintf(vg_cmds, vg_cmds_fmt, self_exe); + if (write(fd, vg_cmds, strlen(vg_cmds)) != strlen(vg_cmds)) { + ucs_log_fatal_error("Unable to write to command file: %m"); + } + } + + if (ucs_global_opts.handle_errors & UCS_BIT(UCS_HANDLE_ERROR_BACKTRACE)) { + if (write(fd, bt_cmds, strlen(bt_cmds)) != strlen(bt_cmds)) { + ucs_log_fatal_error("Unable to write to command file: %m"); + } + } + close(fd); + + argv[narg++] = "-x"; + argv[narg++] = gdb_commands_file; + } else { + ucs_log_fatal_error("Unable to open '%s' for writing: %m", + gdb_commands_file); + } + + argv[narg++] = NULL; + + /* Execute GDB */ + /* coverity[tainted_string] */ + ret = execvp(argv[0], argv); + if (ret < 0) { + ucs_log_fatal_error("Failed to execute %s: %m", argv[0]); + exit(-1); + } + } + + waitpid(pid, &ret, 0); +} + +static void UCS_F_NOINLINE ucs_debug_freeze() +{ + static volatile int freeze = 1; + while (freeze) { + pause(); + } +} + +static void ucs_debug_stop_handler(int signo) +{ + ucs_debug_freeze(); +} + +static void ucs_debug_stop_other_threads() +{ + static const char *task_dir = "/proc/self/task"; + struct dirent *entry; + DIR *dir; + int ret; + int tid; + + dir = opendir(task_dir); + if (dir == NULL) { + ucs_log_fatal_error("Unable to open %s: %m", task_dir); + return; + } + + signal(SIGUSR1, ucs_debug_stop_handler); + + for (;;) { + errno = 0; + entry = readdir(dir); + if (entry == NULL) { + if (errno != 0) { + ucs_log_fatal_error("Unable to read from %s: %m", task_dir); + } + break; + } + + if (!strncmp(entry->d_name, ".", 1)) { + continue; + } + + tid = atoi(entry->d_name); + if ((tid == 0) || (tid == ucs_get_tid())) { + continue; + } + + ret = ucs_tgkill(getpid(), tid, SIGUSR1); + if (ret < 0) { + break; + } + } + + closedir(dir); +} + +static void ucs_debug_send_mail(const char *message) +{ + FILE *stream; + + if (!strlen(ucs_global_opts.error_mail_to)) { + return; + } + + stream = popen("/usr/lib/sendmail -t", "w"); + if (stream == NULL) { + return; + } + + fprintf(stdout, "Sending notification to %s\n", ucs_global_opts.error_mail_to); + fflush(stdout); + + fprintf(stream, "To: %s\n", ucs_global_opts.error_mail_to); + fprintf(stream, "From: %s\n", "ucx@openucx.org"); + fprintf(stream, "Subject: ucx error report on %s\n", + ucs_get_host_name()); + fprintf(stream, "Content-Type: text/plain\n"); + fprintf(stream, "\n"); + + fprintf(stream, "program: %s\n", ucs_get_exe()); + fprintf(stream, "hostname: %s\n", ucs_get_host_name()); + fprintf(stream, "process id: %d\n", getpid()); + fprintf(stream, "\n"); + + fprintf(stream, "\n"); + fprintf(stream, "%s\n", message); + fprintf(stream, "\n"); + + ucs_debug_show_innermost_source_file(stream); + ucs_debug_print_backtrace(stream, 2); + + if (strlen(ucs_global_opts.error_mail_footer)) { + fprintf(stream, "\n"); + fprintf(stream, "%s\n", ucs_global_opts.error_mail_footer); + } + fprintf(stream, "\n"); + + pclose(stream); +} + +static void ucs_error_freeze(const char *message) +{ + static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + char response; + int ret; + + ucs_debug_stop_other_threads(); + + if (pthread_mutex_trylock(&lock) == 0) { + if (strlen(ucs_global_opts.gdb_command) && isatty(fileno(stdout)) && + isatty(fileno(stdin))) + { + ucs_log_fatal_error("Process frozen, press Enter to attach a debugger..."); + ret = read(fileno(stdin), &response, 1); /* Use low-level input to avoid deadlock */ + if ((ret == 1) && (response == '\n')) { + ucs_debugger_attach(); + } else { + ucs_debug_freeze(); + } + } else { + ucs_debug_send_mail(message); + ucs_log_fatal_error("Process frozen..."); + ucs_debug_freeze(); + } + + pthread_mutex_unlock(&lock); + } else { + ucs_debug_freeze(); + } +} + +static const char *ucs_signal_cause_common(int si_code) +{ + switch (si_code) { + case SI_USER : return "kill(2) or raise(3)"; + case SI_KERNEL : return "Sent by the kernel"; + case SI_QUEUE : return "sigqueue(2)"; + case SI_TIMER : return "POSIX timer expired"; + case SI_MESGQ : return "POSIX message queue state changed"; + case SI_ASYNCIO : return "AIO completed"; +#ifdef SI_SIGIO + case SI_SIGIO : return "queued SIGIO"; +#endif +#ifdef SI_TKILL + case SI_TKILL : return "tkill(2) or tgkill(2)"; +#endif + default : return ""; + } +} + +static const char *ucs_signal_cause_ill(int si_code) +{ + switch (si_code) { + case ILL_ILLOPC : return "illegal opcode"; + case ILL_ILLOPN : return "illegal operand"; + case ILL_ILLADR : return "illegal addressing mode"; + case ILL_ILLTRP : return "illegal trap"; + case ILL_PRVOPC : return "privileged opcode"; + case ILL_PRVREG : return "privileged register"; + case ILL_COPROC : return "coprocessor error"; + case ILL_BADSTK : return "internal stack error"; + default : return ucs_signal_cause_common(si_code); + } +} + +static const char *ucs_signal_cause_fpe(int si_code) +{ + switch (si_code) { + case FPE_INTDIV : return "integer divide by zero"; + case FPE_INTOVF : return "integer overflow"; + case FPE_FLTDIV : return "floating-point divide by zero"; + case FPE_FLTOVF : return "floating-point overflow"; + case FPE_FLTUND : return "floating-point underflow"; + case FPE_FLTRES : return "floating-point inexact result"; + case FPE_FLTINV : return "floating-point invalid operation"; + case FPE_FLTSUB : return "subscript out of range"; + default : return ucs_signal_cause_common(si_code); + } +} + +static const char *ucs_signal_cause_segv(int si_code) +{ + switch (si_code) { + case SEGV_MAPERR : return "address not mapped to object"; + case SEGV_ACCERR : return "invalid permissions for mapped object"; + default : return ucs_signal_cause_common(si_code); + } +} + +static const char *ucs_signal_cause_bus(int si_code) +{ + switch (si_code) { + case BUS_ADRALN : return "invalid address alignment"; + case BUS_ADRERR : return "nonexistent physical address"; + case BUS_OBJERR : return "object-specific hardware error"; + default : return ucs_signal_cause_common(si_code); + } +} + +static const char *ucs_signal_cause_trap(int si_code) +{ + switch (si_code) { + case TRAP_BRKPT : return "process breakpoint"; + case TRAP_TRACE : return "process trace trap"; + default : return ucs_signal_cause_common(si_code); + } +} + +static const char *ucs_signal_cause_cld(int si_code) +{ + switch (si_code) { + case CLD_EXITED : return "child has exited"; + case CLD_KILLED : return "child was killed"; + case CLD_DUMPED : return "child terminated abnormally"; + case CLD_TRAPPED : return "traced child has trapped"; + case CLD_STOPPED : return "child has stopped"; + case CLD_CONTINUED: return "stopped child has continued"; + default : return NULL; + } +} + +static void ucs_debug_handle_error_signal(int signo, const char *cause, + const char *fmt, ...) +{ + char buf[256]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + ucs_log_flush(); + ucs_log_fatal_error("Caught signal %d (%s: %s%s)", signo, + strsignal(signo), cause, buf); + ucs_handle_error(cause); +} + +static void ucs_error_signal_handler(int signo, siginfo_t *info, void *context) +{ + ucs_debug_cleanup(1); + ucs_log_flush(); + + switch (signo) { + case SIGILL: + ucs_debug_handle_error_signal(signo, ucs_signal_cause_ill(info->si_code), ""); + break; + case SIGTRAP: + ucs_debug_handle_error_signal(signo, ucs_signal_cause_trap(info->si_code), ""); + break; + case SIGBUS: + ucs_debug_handle_error_signal(signo, ucs_signal_cause_bus(info->si_code), ""); + break; + case SIGFPE: + ucs_debug_handle_error_signal(signo, ucs_signal_cause_fpe(info->si_code), ""); + break; + case SIGSEGV: + ucs_debug_handle_error_signal(signo, ucs_signal_cause_segv(info->si_code), + " at address %p", info->si_addr); + break; + case SIGCHLD: + ucs_debug_handle_error_signal(signo, ucs_signal_cause_cld(info->si_code), ""); + break; + case SIGINT: + case SIGTERM: + break; + default: + ucs_debug_handle_error_signal(signo, ucs_signal_cause_common(info->si_code), ""); + break; + } + + raise(signo); +} + +void ucs_handle_error(const char *message) +{ + ucs_debug_cleanup(1); + + if (ucs_global_opts.handle_errors & UCS_BIT(UCS_HANDLE_ERROR_DEBUG)) { + ucs_debugger_attach(); + } else { + if (ucs_global_opts.handle_errors & UCS_BIT(UCS_HANDLE_ERROR_BACKTRACE)) { + ucs_debug_show_innermost_source_file(stderr); + ucs_debug_print_backtrace(stderr, 2); + } + if (ucs_global_opts.handle_errors & UCS_BIT(UCS_HANDLE_ERROR_FREEZE)) { + ucs_error_freeze(message); + } + } +} + +static int ucs_debug_is_error_signal(int signum) +{ + khiter_t hash_it; + int result; + + if (!ucs_global_opts.handle_errors) { + return 0; + } + + /* If this signal is error, but was disabled. */ + ucs_spin_lock(&ucs_kh_lock); + hash_it = kh_get(ucs_signal_orig_action, &ucs_signal_orig_action_map, signum); + result = (hash_it != kh_end(&ucs_signal_orig_action_map)); + ucs_spin_unlock(&ucs_kh_lock); + return result; +} + +static void* ucs_debug_get_orig_func(const char *symbol, void *replacement) +{ + void *func_ptr; + + func_ptr = dlsym(RTLD_NEXT, symbol); + if (func_ptr == NULL) { + func_ptr = dlsym(RTLD_DEFAULT, symbol); + } + return func_ptr; +} + +#if !HAVE_SIGHANDLER_T +#if HAVE___SIGHANDLER_T +typedef __sighandler_t *sighandler_t; +#else +#error "Port me" +#endif +#endif +sighandler_t signal(int signum, sighandler_t handler) +{ + typedef sighandler_t (*sighandler_func_t)(int, sighandler_t); + + static sighandler_func_t orig = NULL; + + if (ucs_debug_initialized && ucs_debug_is_error_signal(signum)) { + return SIG_DFL; + } + + if (orig == NULL) { + orig = (sighandler_func_t)ucs_debug_get_orig_func("signal", signal); + } + + return orig(signum, handler); +} + +static int orig_sigaction(int signum, const struct sigaction *act, + struct sigaction *oact) +{ + typedef int (*sigaction_func_t)(int, const struct sigaction*, struct sigaction*); + + static sigaction_func_t orig = NULL; + + if (orig == NULL) { + orig = (sigaction_func_t)ucs_debug_get_orig_func("sigaction", sigaction); + } + + return orig(signum, act, oact); +} + +int sigaction(int signum, const struct sigaction *act, struct sigaction *oact) +{ + if (ucs_debug_initialized && ucs_debug_is_error_signal(signum)) { + return orig_sigaction(signum, NULL, oact); /* Return old, do not set new */ + } + + return orig_sigaction(signum, act, oact); +} + +static void ucs_debug_signal_handler(int signo) +{ + ucs_log_flush(); + ucs_global_opts.log_level = UCS_LOG_LEVEL_TRACE_DATA; + ucs_profile_dump(); +} + +static void ucs_debug_set_signal_alt_stack() +{ + int ret; + + ucs_debug_signal_stack.ss_size = SIGSTKSZ + + (2 * ucs_log_get_buffer_size()) + + (sizeof(void*) * BACKTRACE_MAX) + + (128 * UCS_KBYTE); + ucs_debug_signal_stack.ss_sp = + ucs_sys_realloc(NULL, 0, ucs_debug_signal_stack.ss_size); + if (ucs_debug_signal_stack.ss_sp == NULL) { + return; + } + + ucs_debug_signal_stack.ss_flags = 0; + ret = sigaltstack(&ucs_debug_signal_stack, NULL); + if (ret) { + ucs_warn("sigaltstack(ss_sp=%p, ss_size=%zu) failed: %m", + ucs_debug_signal_stack.ss_sp, ucs_debug_signal_stack.ss_size); + ucs_sys_free(ucs_debug_signal_stack.ss_sp, + ucs_debug_signal_stack.ss_size); + ucs_debug_signal_stack.ss_sp = NULL; + return; + } + + ucs_debug("using signal stack %p size %zu", ucs_debug_signal_stack.ss_sp, + ucs_debug_signal_stack.ss_size); +} + +static inline void ucs_debug_save_original_sighandler(int signum, + const struct sigaction* orig_handler) +{ + struct sigaction *oact_copy; + khiter_t hash_it; + int hash_extra_status; + + ucs_spin_lock(&ucs_kh_lock); + hash_it = kh_get(ucs_signal_orig_action, &ucs_signal_orig_action_map, signum); + if (hash_it != kh_end(&ucs_signal_orig_action_map)) { + goto out; + } + + oact_copy = ucs_malloc(sizeof(*orig_handler), "orig_sighandler"); + if (oact_copy == NULL) { + goto out; + } + + *oact_copy = *orig_handler; + hash_it = kh_put(ucs_signal_orig_action, + &ucs_signal_orig_action_map, + signum, &hash_extra_status); + kh_value(&ucs_signal_orig_action_map, hash_it) = oact_copy; + +out: + ucs_spin_unlock(&ucs_kh_lock); +} + +static void ucs_set_signal_handler(void (*handler)(int, siginfo_t*, void *)) +{ + struct sigaction sigact, old_action; + int i; + int ret; + + sigact.sa_sigaction = handler; + sigact.sa_flags = SA_SIGINFO; + if (ucs_debug_signal_stack.ss_sp != NULL) { + sigact.sa_flags |= SA_ONSTACK; + } + sigemptyset(&sigact.sa_mask); + + for (i = 0; i < ucs_global_opts.error_signals.count; ++i) { + ret = orig_sigaction(ucs_global_opts.error_signals.signals[i], &sigact, + &old_action); + if (ret < 0) { + ucs_warn("failed to set signal handler for sig %d : %m", + ucs_global_opts.error_signals.signals[i]); + } +#if HAVE_SIGACTION_SA_RESTORER + ucs_debug_signal_restorer = old_action.sa_restorer; +#endif + ucs_debug_save_original_sighandler(ucs_global_opts.error_signals.signals[i], &old_action); + } +} + +static int ucs_debug_backtrace_is_excluded(void *address, const char *symbol) +{ + return +#if HAVE_SIGACTION_SA_RESTORER + address == ucs_debug_signal_restorer || +#endif + !strcmp(symbol, "ucs_handle_error") || + !strcmp(symbol, "ucs_fatal_error_format") || + !strcmp(symbol, "ucs_fatal_error_message") || + !strcmp(symbol, "ucs_error_freeze") || + !strcmp(symbol, "ucs_error_signal_handler") || + !strcmp(symbol, "ucs_debug_handle_error_signal") || + !strcmp(symbol, "ucs_debug_backtrace_create") || + !strcmp(symbol, "ucs_debug_show_innermost_source_file") || + !strcmp(symbol, "ucs_log_default_handler") || + !strcmp(symbol, "__ucs_abort") || + !strcmp(symbol, "ucs_log_dispatch") || + !strcmp(symbol, "__ucs_log") || + !strcmp(symbol, "ucs_debug_send_mail") || + (strstr(symbol, "_L_unlock_") == symbol); +} + +static ucs_status_t ucs_debug_get_lib_info(Dl_info *dlinfo) +{ + int ret; + + (void)dlerror(); + ret = dladdr(ucs_debug_get_lib_info, dlinfo); + if (ret == 0) { + return UCS_ERR_NO_MEMORY; + } + + return UCS_OK; +} + +const char *ucs_debug_get_lib_path() +{ + ucs_status_t status; + Dl_info dlinfo; + + status = ucs_debug_get_lib_info(&dlinfo); + if (status != UCS_OK) { + return ""; + } + + return dlinfo.dli_fname; +} + +unsigned long ucs_debug_get_lib_base_addr() +{ + ucs_status_t status; + Dl_info dlinfo; + + status = ucs_debug_get_lib_info(&dlinfo); + if (status != UCS_OK) { + return 0; + } + + return (uintptr_t)dlinfo.dli_fbase; +} + +void ucs_debug_init() +{ + ucs_spinlock_init(&ucs_kh_lock); + + kh_init_inplace(ucs_signal_orig_action, &ucs_signal_orig_action_map); + kh_init_inplace(ucs_debug_symbol, &ucs_debug_symbols_cache); + + if (ucs_global_opts.handle_errors) { + ucs_debug_set_signal_alt_stack(); + ucs_set_signal_handler(ucs_error_signal_handler); + } + if (ucs_global_opts.debug_signo > 0) { + struct sigaction sigact, old_action; + memset(&sigact, 0, sizeof(sigact)); + memset(&old_action, 0, sizeof(old_action)); + sigact.sa_handler = ucs_debug_signal_handler; + orig_sigaction(ucs_global_opts.debug_signo, &sigact, &old_action); + ucs_debug_save_original_sighandler(ucs_global_opts.debug_signo, &old_action); + } + +#ifdef HAVE_DETAILED_BACKTRACE + bfd_init(); +#endif + + ucs_debug_initialized = 1; +} + +void ucs_debug_cleanup(int on_error) +{ + char *sym; + int signum; + struct sigaction *hndl; + ucs_status_t status; + + ucs_debug_initialized = 0; + + kh_foreach_key(&ucs_signal_orig_action_map, signum, + ucs_debug_disable_signal(signum)); + + if (!on_error) { + kh_foreach_value(&ucs_debug_symbols_cache, sym, ucs_free(sym)); + kh_foreach_value(&ucs_signal_orig_action_map, hndl, ucs_free(hndl)); + kh_destroy_inplace(ucs_debug_symbol, &ucs_debug_symbols_cache); + kh_destroy_inplace(ucs_signal_orig_action, &ucs_signal_orig_action_map); + } + + status = ucs_spinlock_destroy(&ucs_kh_lock); + if (status != UCS_OK) { + ucs_warn("ucs_spinlock_destroy() failed (%d)", status); + } +} + +static inline void ucs_debug_disable_signal_nolock(int signum) +{ + khiter_t hash_it; + struct sigaction *original_action, ucs_action; + int ret; + + hash_it = kh_get(ucs_signal_orig_action, &ucs_signal_orig_action_map, + signum); + if (hash_it == kh_end(&ucs_signal_orig_action_map)) { + ucs_warn("ucs_debug_disable_signal: signal %d was not set in ucs", + signum); + return; + } + + original_action = kh_val(&ucs_signal_orig_action_map, hash_it); + ret = orig_sigaction(signum, original_action, &ucs_action); + if (ret < 0) { + ucs_warn("failed to set signal handler for sig %d : %m", signum); + } + + kh_del(ucs_signal_orig_action, &ucs_signal_orig_action_map, hash_it); + ucs_free(original_action); +} + +void ucs_debug_disable_signal(int signum) +{ + ucs_spin_lock(&ucs_kh_lock); + ucs_debug_disable_signal_nolock(signum); + ucs_spin_unlock(&ucs_kh_lock); +} + +void ucs_debug_disable_signals() +{ + int signum; + + ucs_spin_lock(&ucs_kh_lock); + kh_foreach_key(&ucs_signal_orig_action_map, signum, + ucs_debug_disable_signal_nolock(signum)); + ucs_spin_unlock(&ucs_kh_lock); +} diff --git a/src/ucs/debug/debug.h b/src/ucs/debug/debug.h new file mode 100644 index 0000000..66b90dc --- /dev/null +++ b/src/ucs/debug/debug.h @@ -0,0 +1,147 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_DEBUG_H_ +#define UCS_DEBUG_H_ + +#include +#include +#include +#include + + +/** + * Information about an address in the code. + */ +typedef struct ucs_debug_address_info { + struct { + char path[512]; /* Binary file path */ + unsigned long base; /* Binary file load base */ + } file; + char function[128]; /* Function name */ + char source_file[512]; /* Source file path */ + unsigned line_number; /* Line number */ +} ucs_debug_address_info_t; + + +typedef struct backtrace *backtrace_h; +typedef struct backtrace_line *backtrace_line_h; + +extern const char *ucs_state_detail_level_names[]; +extern const char *ucs_signal_names[]; + + +/** + * Initialize UCS debugging subsystem. + */ +void ucs_debug_init(); + + +/** + * Cleanup UCS debugging subsystem. + */ +void ucs_debug_cleanup(int on_error); + +/** + * Disable signal handling in UCS for signal. + * Previous signal handler is set. + */ +void ucs_debug_disable_signal(int signum); + +/** + * Disable signal handling in UCS for all signals + * that was set in ucs_global_opts.error_signals. + * Previous signal handlers are set. + */ +void ucs_debug_disable_signals(); +/** + * Get information about an address in the code of the current program. + * @param address Address to look up. + * @param info Filled with information about the given address. Source file + * and line number are filled only if the binary file was compiled + * with debug information, and UCS was configured with detailed + * backtrace enabled. + * @return UCS_ERR_NO_ELEM if the address is not found, UCS_OK otherwise. + */ +ucs_status_t ucs_debug_lookup_address(void *address, ucs_debug_address_info_t *info); + + +/** + * @return Full path to current library. + */ +const char *ucs_debug_get_lib_path(); + + +/** + * @return UCS library loading address. + */ +unsigned long ucs_debug_get_lib_base_addr(); + + +/** + * Create a backtrace from the calling location. + * + * @param bckt Backtrace object. + * @param strip How many frames to strip. +*/ +ucs_status_t ucs_debug_backtrace_create(backtrace_h *bckt, int strip); + + +/** + * Destroy a backtrace and free all memory. + * + * @param bckt Backtrace object. + */ +void ucs_debug_backtrace_destroy(backtrace_h bckt); + + +/** + * Walk to the next backtrace line information. + * + * @param bckt Backtrace object. + * @param line Filled with backtrace frame info. + * + * NOTE: the line remains valid as long as the backtrace object is not destroyed. + */ +int ucs_debug_backtrace_next(backtrace_h bckt, backtrace_line_h *line); + + +/** + * Print backtrace line to string buffer. + * + * @param buffer Target buffer to print to. + * @param maxlen Size of target buffer. + * @param frame_num Frame number + * @param line Backtrace line to print + */ +void ucs_debug_print_backtrace_line(char *buffer, size_t maxlen, + int frame_num, + backtrace_line_h line); + +/** + * Print backtrace to an output stream. + * + * @param stream Stream to print to. + * @param strip How many frames to strip. + */ +void ucs_debug_print_backtrace(FILE *stream, int strip); + + +/** + * Called when UCS detects a fatal error and provides means to debug the current + * state of UCS. + */ +void ucs_handle_error(const char *message); + + +/** + * @return Name of a symbol which begins in the given address, or NULL if + * not found. + */ +const char *ucs_debug_get_symbol_name(void *address); + + +#endif diff --git a/src/ucs/debug/log.c b/src/ucs/debug/log.c new file mode 100644 index 0000000..7de784b --- /dev/null +++ b/src/ucs/debug/log.c @@ -0,0 +1,357 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "log.h" + +#include +#include +#include +#include +#include +#include +#include + +#define UCS_MAX_LOG_HANDLERS 32 + + +const char *ucs_log_level_names[] = { + [UCS_LOG_LEVEL_FATAL] = "FATAL", + [UCS_LOG_LEVEL_ERROR] = "ERROR", + [UCS_LOG_LEVEL_WARN] = "WARN", + [UCS_LOG_LEVEL_INFO] = "INFO", + [UCS_LOG_LEVEL_DEBUG] = "DEBUG", + [UCS_LOG_LEVEL_TRACE] = "TRACE", + [UCS_LOG_LEVEL_TRACE_REQ] = "REQ", + [UCS_LOG_LEVEL_TRACE_DATA] = "DATA", + [UCS_LOG_LEVEL_TRACE_ASYNC] = "ASYNC", + [UCS_LOG_LEVEL_TRACE_FUNC] = "FUNC", + [UCS_LOG_LEVEL_TRACE_POLL] = "POLL", + [UCS_LOG_LEVEL_LAST] = NULL, + [UCS_LOG_LEVEL_PRINT] = "PRINT" +}; + +static unsigned ucs_log_handlers_count = 0; +static int ucs_log_initialized = 0; +static char ucs_log_hostname[HOST_NAME_MAX] = {0}; +static int ucs_log_pid = 0; +static FILE *ucs_log_file = NULL; +static int ucs_log_file_close = 0; +static unsigned threads_count = 0; +static pthread_spinlock_t threads_lock = 0; +static pthread_t threads[128] = {0}; +static ucs_log_func_t ucs_log_handlers[UCS_MAX_LOG_HANDLERS]; + + +static int ucs_log_get_thread_num(void) +{ + pthread_t self = pthread_self(); + int i; + + for (i = 0; i < threads_count; ++i) { + if (threads[i] == self) { + return i; + } + } + + pthread_spin_lock(&threads_lock); + + for (i = 0; i < threads_count; ++i) { + if (threads[i] == self) { + goto unlock_and_return_i; + } + } + + if (threads_count >= ucs_static_array_size(threads)) { + i = -1; + goto unlock_and_return_i; + } + + i = threads_count; + ++threads_count; + threads[i] = self; + +unlock_and_return_i: + pthread_spin_unlock(&threads_lock); + return i; +} + +void ucs_log_flush() +{ + if (ucs_log_file != NULL) { + fflush(ucs_log_file); + fsync(fileno(ucs_log_file)); + } +} + +size_t ucs_log_get_buffer_size() +{ + return ucs_config_memunits_get(ucs_global_opts.log_buffer_size, + 256, 2048); +} + +static void ucs_log_print(size_t buffer_size, const char *short_file, int line, + ucs_log_level_t level, const struct timeval *tv, + const char *message) +{ + char *valg_buf; + + if (RUNNING_ON_VALGRIND) { + valg_buf = ucs_alloca(buffer_size + 1); + snprintf(valg_buf, buffer_size, + "[%lu.%06lu] %16s:%-4u %-4s %-5s %s\n", tv->tv_sec, tv->tv_usec, + short_file, line, "UCX", ucs_log_level_names[level], + message); + VALGRIND_PRINTF("%s", valg_buf); + } else if (ucs_log_initialized) { + fprintf(ucs_log_file, + "[%lu.%06lu] [%s:%-5d:%d] %16s:%-4u %-4s %-5s %s\n", + tv->tv_sec, tv->tv_usec, ucs_log_hostname, ucs_log_pid, + ucs_log_get_thread_num(), short_file, line, "UCX", + ucs_log_level_names[level], message); + } else { + fprintf(stdout, + "[%lu.%06lu] %16s:%-4u %-4s %-5s %s\n", + tv->tv_sec, tv->tv_usec, short_file, line, + "UCX", ucs_log_level_names[level], message); + } +} + +ucs_log_func_rc_t +ucs_log_default_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *format, va_list ap) +{ + size_t buffer_size = ucs_log_get_buffer_size(); + char *saveptr = ""; + char *log_line; + struct timeval tv; + char *buf; + + if (!ucs_log_is_enabled(level) && (level != UCS_LOG_LEVEL_PRINT)) { + return UCS_LOG_FUNC_RC_CONTINUE; + } + + buf = ucs_alloca(buffer_size + 1); + buf[buffer_size] = 0; + vsnprintf(buf, buffer_size, format, ap); + + if (level <= ucs_global_opts.log_level_trigger) { + ucs_fatal_error_message(file, line, function, buf); + } else { + gettimeofday(&tv, NULL); + + log_line = strtok_r(buf, "\n", &saveptr); + while (log_line != NULL) { + ucs_log_print(buffer_size, ucs_basename(file), line, level, &tv, log_line); + log_line = strtok_r(NULL, "\n", &saveptr); + } + } + + /* flush the log file if the log_level of this message is fatal or error */ + if (level <= UCS_LOG_LEVEL_ERROR) { + ucs_log_flush(); + } + + return UCS_LOG_FUNC_RC_CONTINUE; +} + +void ucs_log_push_handler(ucs_log_func_t handler) +{ + if (ucs_log_handlers_count < UCS_MAX_LOG_HANDLERS) { + ucs_log_handlers[ucs_log_handlers_count++] = handler; + } +} + +void ucs_log_pop_handler() +{ + if (ucs_log_handlers_count > 0) { + --ucs_log_handlers_count; + } +} + +unsigned ucs_log_num_handlers() +{ + return ucs_log_handlers_count; +} + +void ucs_log_dispatch(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *format, ...) +{ + ucs_log_func_rc_t rc; + unsigned index; + va_list ap; + + /* Call handlers in reverse order */ + rc = UCS_LOG_FUNC_RC_CONTINUE; + index = ucs_log_handlers_count; + while ((index > 0) && (rc == UCS_LOG_FUNC_RC_CONTINUE)) { + --index; + va_start(ap, format); + rc = ucs_log_handlers[index](file, line, function, level, format, ap); + va_end(ap); + } +} + +void ucs_log_fatal_error(const char *format, ...) +{ + size_t buffer_size = ucs_log_get_buffer_size(); + FILE *stream = stderr; + char *buffer, *p; + va_list ap; + int ret; + + buffer = ucs_alloca(buffer_size + 1); + p = buffer; + + /* Print hostname:pid */ + snprintf(p, buffer_size, "[%s:%-5d:%d:%d] ", ucs_log_hostname, ucs_log_pid, + ucs_log_get_thread_num(), ucs_get_tid()); + buffer_size -= strlen(p); + p += strlen(p); + + /* Print rest of the message */ + va_start(ap, format); + vsnprintf(p, buffer_size, format, ap); + va_end(ap); + buffer_size -= strlen(p); + p += strlen(p); + + /* Newline */ + snprintf(p, buffer_size, "\n"); + + /* Flush stderr, and write the message directly to the pipe */ + fflush(stream); + ret = write(fileno(stream), buffer, strlen(buffer)); + (void)ret; +} + +/** + * Print a bitmap as a list of ranges. + * + * @param n Number equivalent to the first bit in the bitmap. + * @param bitmap Compressed array of bits. + * @param length Number of bits in the bitmap. + */ +const char *ucs_log_bitmap_to_str(unsigned n, uint8_t *bitmap, size_t length) +{ + static char buf[512] = {0}; + int first, in_range; + unsigned prev = 0, end = 0; + char *p, *endp; + size_t i; + + p = buf; + endp = buf + sizeof(buf) - 4; + + first = 1; + in_range = 0; + for (i = 0; i < length; ++i) { + if (bitmap[i / 8] & UCS_BIT(i % 8)) { + if (first) { + p += snprintf(p, endp - p, "%d", n); + if (p > endp) { + goto overflow; + } + } else if (n == prev + 1) { + in_range = 1; + end = n; + } else { + if (in_range) { + p += snprintf(p, endp - p, "-%d", end); + if (p > endp) { + goto overflow; + } + } + in_range = 0; + p += snprintf(p, endp - p, ",%d", n); + if (p > endp) { + goto overflow; + } + } + first = 0; + prev = n; + } + ++n; + } + if (in_range) { + p += snprintf(p, endp - p, "-%d", end); + if (p > endp) { + goto overflow; + } + } + return buf; + +overflow: + strcpy(p, "..."); + return buf; +} + +void ucs_log_early_init() +{ + ucs_log_initialized = 0; + ucs_log_hostname[0] = 0; + ucs_log_pid = getpid(); + ucs_log_file = NULL; + ucs_log_file_close = 0; + threads_count = 0; + pthread_spin_init(&threads_lock, 0); +} + +void ucs_log_init() +{ + const char *next_token; + + if (ucs_log_initialized) { + return; + } + + ucs_log_initialized = 1; /* Set this to 1 immediately to avoid infinite recursion */ + + strcpy(ucs_log_hostname, ucs_get_host_name()); + ucs_log_file = stdout; + ucs_log_file_close = 0; + + ucs_log_push_handler(ucs_log_default_handler); + + if (strlen(ucs_global_opts.log_file) != 0) { + ucs_open_output_stream(ucs_global_opts.log_file, UCS_LOG_LEVEL_FATAL, + &ucs_log_file, &ucs_log_file_close, &next_token); + } +} + +void ucs_log_cleanup() +{ + ucs_log_flush(); + if (ucs_log_file_close) { + fclose(ucs_log_file); + } + pthread_spin_destroy(&threads_lock); + ucs_log_file = NULL; + ucs_log_initialized = 0; + ucs_log_handlers_count = 0; +} + +void ucs_log_print_backtrace(ucs_log_level_t level) +{ + backtrace_h bckt; + backtrace_line_h bckt_line; + int i; + char buf[1024]; + ucs_status_t status; + + status = ucs_debug_backtrace_create(&bckt, 1); + if (status != UCS_OK) { + return; + } + + ucs_log(level, "==== backtrace (tid:%7d) ====\n", ucs_get_tid()); + for (i = 0; ucs_debug_backtrace_next(bckt, &bckt_line); ++i) { + ucs_debug_print_backtrace_line(buf, sizeof(buf), i, bckt_line); + ucs_log(level, "%s", buf); + } + ucs_log(level, "=================================\n"); + + ucs_debug_backtrace_destroy(bckt); +} diff --git a/src/ucs/debug/log.h b/src/ucs/debug/log.h new file mode 100644 index 0000000..51b0f84 --- /dev/null +++ b/src/ucs/debug/log.h @@ -0,0 +1,167 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCS_LOG_H_ +#define UCS_LOG_H_ + +#ifdef HAVE_CONFIG_H +# include "config.h" /* Defines UCS_MAX_LOG_LEVEL */ +#endif + +#include +#include +#include +#include + + +BEGIN_C_DECLS + +/** @file log.h */ + +#define ucs_log_is_enabled(_level) \ + ucs_unlikely(((_level) <= UCS_MAX_LOG_LEVEL) && ((_level) <= (ucs_global_opts.log_level))) + + +#define ucs_log(_level, _fmt, ...) \ + do { \ + if (ucs_log_is_enabled(_level)) { \ + ucs_log_dispatch(__FILE__, __LINE__, __func__, \ + (ucs_log_level_t)(_level), _fmt, ## __VA_ARGS__); \ + } \ + } while (0) + + +#define ucs_error(_fmt, ...) ucs_log(UCS_LOG_LEVEL_ERROR, _fmt, ## __VA_ARGS__) +#define ucs_warn(_fmt, ...) ucs_log(UCS_LOG_LEVEL_WARN, _fmt, ## __VA_ARGS__) +#define ucs_info(_fmt, ...) ucs_log(UCS_LOG_LEVEL_INFO, _fmt, ## __VA_ARGS__) +#define ucs_debug(_fmt, ...) ucs_log(UCS_LOG_LEVEL_DEBUG, _fmt, ## __VA_ARGS__) +#define ucs_trace(_fmt, ...) ucs_log(UCS_LOG_LEVEL_TRACE, _fmt, ## __VA_ARGS__) +#define ucs_trace_req(_fmt, ...) ucs_log(UCS_LOG_LEVEL_TRACE_REQ, _fmt, ## __VA_ARGS__) +#define ucs_trace_data(_fmt, ...) ucs_log(UCS_LOG_LEVEL_TRACE_DATA, _fmt, ## __VA_ARGS__) +#define ucs_trace_async(_fmt, ...) ucs_log(UCS_LOG_LEVEL_TRACE_ASYNC, _fmt, ## __VA_ARGS__) +#define ucs_trace_func(_fmt, ...) ucs_log(UCS_LOG_LEVEL_TRACE_FUNC, "%s(" _fmt ")", __FUNCTION__, ## __VA_ARGS__) +#define ucs_trace_poll(_fmt, ...) ucs_log(UCS_LOG_LEVEL_TRACE_POLL, _fmt, ## __VA_ARGS__) + + +/** + * Print a message regardless of current log level. Output can be + * enabled/disabled via environment variable/configuration settings. + * + * During debugging it can be useful to add a few prints to the code + * without changing a current log level. Also it is useful to be able + * to see messages only from specific processes. For example, one may + * want to see prints only from rank 0 when debugging MPI. + * + * The function is intended for debugging only. It should not be used + * in the real code. + */ + +#define ucs_print(_fmt, ...) \ + do { \ + if (ucs_global_opts.log_print_enable) { \ + ucs_log_dispatch(__FILE__, __LINE__, __FUNCTION__, \ + UCS_LOG_LEVEL_PRINT, _fmt, ## __VA_ARGS__); \ + } \ + } while(0) + + +typedef enum { + UCS_LOG_FUNC_RC_STOP, + UCS_LOG_FUNC_RC_CONTINUE +} ucs_log_func_rc_t; + + +/** + * Function type for handling log messages. + * + * @param file Source file name. + * @param line Source line number. + * @param function Function name. + * @param message Log message - format string + * @param ap Log message format parameters. + * + * @return UCS_LOG_FUNC_RC_CONTINUE - continue to next log handler + * UCS_LOG_FUNC_RC_STOP - don't continue + */ +typedef ucs_log_func_rc_t (*ucs_log_func_t)(const char *file, unsigned line, + const char *function, ucs_log_level_t level, + const char *message, va_list ap); + + +extern const char *ucs_log_level_names[]; +extern const char *ucs_log_category_names[]; + + +/** + * Dispatch a logging message. + * + * @param [in] file Source file name. + * @param [in] line Source line number. + * @param [in] function Function name which generated the log. + * @param [in] level Log level of the message, + * @param [in] message Log format + */ +void ucs_log_dispatch(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *format, ...) + UCS_F_PRINTF(5, 6); + + +/** + * Flush logging output. + */ +void ucs_log_flush(); + + +/** + * @return Configured log buffer size + */ +size_t ucs_log_get_buffer_size(); + + +/** + * Default log handler, which prints the message to the output configured in + * UCS global options. See @ref ucs_log_func_t. + */ +ucs_log_func_rc_t +ucs_log_default_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *format, va_list ap); + + +/** + * Show a fatal error + */ +void ucs_log_fatal_error(const char *format, ...); + + +/** + * Initialize/cleanup logging subsystem. + */ +void ucs_log_early_init(); +void ucs_log_init(); +void ucs_log_cleanup(); + + +const char *ucs_log_bitmap_to_str(unsigned n, uint8_t *bitmap, size_t length); + +/** + * Add/remove logging handlers + */ +void ucs_log_push_handler(ucs_log_func_t handler); +void ucs_log_pop_handler(); +unsigned ucs_log_num_handlers(); + + +/** + * Log backtrace. + * + * @param level Log level. + */ +void ucs_log_print_backtrace(ucs_log_level_t level); + +END_C_DECLS + +#endif diff --git a/src/ucs/debug/memtrack.c b/src/ucs/debug/memtrack.c new file mode 100644 index 0000000..b61d47c --- /dev/null +++ b/src/ucs/debug/memtrack.c @@ -0,0 +1,391 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "memtrack.h" + +#include +#include +#include +#include +#include +#include + + +#if ENABLE_MEMTRACK + +#define UCS_MEMTRACK_FORMAT_STRING ("%22s: size: %9lu / %9lu\tcount: %9u / %9u\n") + + +typedef struct ucs_memtrack_ptr { + size_t size; /* Length of allocated buffer */ + ucs_memtrack_entry_t *entry; /* Entry which tracks this allocation */ +} ucs_memtrack_ptr_t; + +KHASH_MAP_INIT_INT64(ucs_memtrack_ptr_hash, ucs_memtrack_ptr_t) +KHASH_MAP_INIT_STR(ucs_memtrack_entry_hash, ucs_memtrack_entry_t*); + +typedef struct ucs_memtrack_context { + int enabled; + pthread_mutex_t lock; + ucs_memtrack_entry_t total; + khash_t(ucs_memtrack_ptr_hash) ptrs; + khash_t(ucs_memtrack_entry_hash) entries; + UCS_STATS_NODE_DECLARE(stats) +} ucs_memtrack_context_t; + + +/* Global context for tracking allocated memory */ +static ucs_memtrack_context_t ucs_memtrack_context = { + .enabled = 0, + .lock = PTHREAD_MUTEX_INITIALIZER +}; + +#if ENABLE_STATS +static ucs_stats_class_t ucs_memtrack_stats_class = { + .name = "memtrack", + .num_counters = UCS_MEMTRACK_STAT_LAST, + .counter_names = { + [UCS_MEMTRACK_STAT_ALLOCATION_COUNT] = "alloc_cnt", + [UCS_MEMTRACK_STAT_ALLOCATION_SIZE] = "alloc_size" + } +}; +#endif + +static void ucs_memtrack_entry_reset(ucs_memtrack_entry_t *entry) +{ + entry->size = 0; + entry->peak_size = 0; + entry->count = 0; + entry->peak_count = 0; +} + +static ucs_memtrack_entry_t* ucs_memtrack_entry_get(const char* name) +{ + ucs_memtrack_entry_t *entry; + khiter_t iter; + int ret; + + iter = kh_get(ucs_memtrack_entry_hash, &ucs_memtrack_context.entries, name); + if (iter != kh_end(&ucs_memtrack_context.entries)) { + return kh_val(&ucs_memtrack_context.entries, iter); + } + + entry = malloc(sizeof(*entry) + strlen(name) + 1); + if (entry == NULL) { + return NULL; + } + + ucs_memtrack_entry_reset(entry); + strcpy(entry->name, name); + + iter = kh_put(ucs_memtrack_entry_hash, &ucs_memtrack_context.entries, + entry->name, &ret); + ucs_assertv(ret == 1 || ret == 2, "ret=%d", ret); + kh_val(&ucs_memtrack_context.entries, iter) = entry; + + return entry; +} + +static void ucs_memtrack_entry_update(ucs_memtrack_entry_t *entry, ssize_t size) +{ + int count = (size < 0) ? -1 : 1; + + ucs_assert((int)entry->count >= -count); + ucs_assert((ssize_t)entry->size >= -size); + entry->count += count; + entry->size += size; + entry->peak_count = ucs_max(entry->peak_count, entry->count); + entry->peak_size = ucs_max(entry->peak_size, entry->size); +} + +void ucs_memtrack_allocated(void *ptr, size_t size, const char *name) +{ + ucs_memtrack_entry_t *entry; + khiter_t iter; + int ret; + +#ifdef UCX_ALLOC_ALIGN + UCS_STATIC_ASSERT(UCX_ALLOC_ALIGN >= 16); + UCS_STATIC_ASSERT(ucs_is_pow2_or_zero(UCX_ALLOC_ALIGN)); + ucs_assert(!ucs_check_if_align_pow2((uintptr_t)ptr, UCX_ALLOC_ALIGN)); +#endif + + if ((ptr == NULL) || !ucs_memtrack_is_enabled()) { + return; + } + + pthread_mutex_lock(&ucs_memtrack_context.lock); + + entry = ucs_memtrack_entry_get(name); + if (entry == NULL) { + goto out_unlock; + } + + /* Add pointer to hash */ + iter = kh_put(ucs_memtrack_ptr_hash, &ucs_memtrack_context.ptrs, + (uintptr_t)ptr, &ret); + ucs_assertv(ret == 1 || ret == 2, "ret=%d", ret); + kh_value(&ucs_memtrack_context.ptrs, iter).entry = entry; + kh_value(&ucs_memtrack_context.ptrs, iter).size = size; + + /* update specific and global entries */ + ucs_memtrack_entry_update(entry, size); + ucs_memtrack_entry_update(&ucs_memtrack_context.total, size); + + UCS_STATS_UPDATE_COUNTER(ucs_memtrack_context.stats, UCS_MEMTRACK_STAT_ALLOCATION_COUNT, 1); + UCS_STATS_UPDATE_COUNTER(ucs_memtrack_context.stats, UCS_MEMTRACK_STAT_ALLOCATION_SIZE, size); + +out_unlock: + pthread_mutex_unlock(&ucs_memtrack_context.lock); +} + +void ucs_memtrack_releasing(void* ptr) +{ + ucs_memtrack_entry_t *entry; + khiter_t iter; + size_t size; + + if ((ptr == NULL) || !ucs_memtrack_is_enabled()) { + return; + } + + pthread_mutex_lock(&ucs_memtrack_context.lock); + + iter = kh_get(ucs_memtrack_ptr_hash, &ucs_memtrack_context.ptrs, (uintptr_t)ptr); + if (iter == kh_end(&ucs_memtrack_context.ptrs)) { + ucs_debug("address %p not found in memtrack ptr hash", ptr); + goto out_unlock; + } + + /* remote pointer from hash */ + entry = kh_val(&ucs_memtrack_context.ptrs, iter).entry; + size = kh_val(&ucs_memtrack_context.ptrs, iter).size; + kh_del(ucs_memtrack_ptr_hash, &ucs_memtrack_context.ptrs, iter); + + /* update counts */ + ucs_memtrack_entry_update(entry, -size); + ucs_memtrack_entry_update(&ucs_memtrack_context.total, -size); + +out_unlock: + pthread_mutex_unlock(&ucs_memtrack_context.lock); +} + +void *ucs_malloc(size_t size, const char *name) +{ + void *ptr = malloc(size); + ucs_memtrack_allocated(ptr, size, name); + return ptr; +} + +void *ucs_calloc(size_t nmemb, size_t size, const char *name) +{ + void *ptr = calloc(nmemb, size); + ucs_memtrack_allocated(ptr, nmemb * size, name); + return ptr; +} + +void *ucs_realloc(void *ptr, size_t size, const char *name) +{ + ucs_memtrack_releasing(ptr); + ptr = realloc(ptr, size); + ucs_memtrack_allocated(ptr, size, name); + return ptr; +} + +int ucs_posix_memalign(void **ptr, size_t boundary, size_t size, const char *name) +{ + int ret; + +#if HAVE_POSIX_MEMALIGN + ret = posix_memalign(ptr, boundary, size); +#else +#error "Port me" +#endif + if (ret == 0) { + ucs_memtrack_allocated(*ptr, size, name); + } + return ret; +} + +void ucs_free(void *ptr) +{ + ucs_memtrack_releasing(ptr); + free(ptr); +} + +void *ucs_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset, const char *name) +{ + void *ptr = mmap(addr, length, prot, flags, fd, offset); + if (ptr != MAP_FAILED) { + ucs_memtrack_allocated(ptr, length, name); + } + return ptr; +} + +int ucs_munmap(void *addr, size_t length) +{ + ucs_memtrack_releasing(addr); + return munmap(addr, length); +} + +char *ucs_strdup(const char *src, const char *name) +{ + char *str = strdup(src); + ucs_memtrack_allocated(str, strlen(str) + 1, name); + return str; +} + +char *ucs_strndup(const char *src, size_t n, const char *name) +{ + char *str = strndup(src, n); + ucs_memtrack_allocated(str, strlen(str) + 1, name); + return str; +} + +void ucs_memtrack_total(ucs_memtrack_entry_t* total) +{ + if (!ucs_memtrack_is_enabled()) { + return; + } + + pthread_mutex_lock(&ucs_memtrack_context.lock); + *total = ucs_memtrack_context.total; + pthread_mutex_unlock(&ucs_memtrack_context.lock); +} + +static int ucs_memtrack_cmp_entries(const void *ptr1, const void *ptr2) +{ + ucs_memtrack_entry_t * const *e1 = ptr1; + ucs_memtrack_entry_t * const *e2 = ptr2; + + return (int)((ssize_t)(*e2)->peak_size - (ssize_t)(*e1)->peak_size); +} + +static void ucs_memtrack_dump_internal(FILE* output_stream) +{ + ucs_memtrack_entry_t *entry, **all_entries; + unsigned num_entries, i; + + if (!ucs_memtrack_is_enabled()) { + return; + } + + /* collect all entries to one array */ + all_entries = ucs_alloca(sizeof(*all_entries) * + kh_size(&ucs_memtrack_context.entries)); + num_entries = 0; + kh_foreach_value(&ucs_memtrack_context.entries, entry, { + all_entries[num_entries++] = entry; + }); + ucs_assert(num_entries <= kh_size(&ucs_memtrack_context.entries)); + + /* sort entries according to peak size */ + qsort(all_entries, num_entries, sizeof(*all_entries), ucs_memtrack_cmp_entries); + + /* print title */ + fprintf(output_stream, "%31s current / peak %16s current / peak\n", "", ""); + fprintf(output_stream, UCS_MEMTRACK_FORMAT_STRING, "TOTAL", + ucs_memtrack_context.total.size, ucs_memtrack_context.total.peak_size, + ucs_memtrack_context.total.count, ucs_memtrack_context.total.peak_count); + + /* print sorted entries */ + for (i = 0; i < num_entries; ++i) { + entry = all_entries[i]; + fprintf(output_stream, UCS_MEMTRACK_FORMAT_STRING, entry->name, + entry->size, entry->peak_size, entry->count, entry->peak_count); + } +} + +void ucs_memtrack_dump(FILE* output_stream) +{ + pthread_mutex_lock(&ucs_memtrack_context.lock); + ucs_memtrack_dump_internal(output_stream); + pthread_mutex_unlock(&ucs_memtrack_context.lock); +} + +static void ucs_memtrack_generate_report() +{ + ucs_status_t status; + FILE* output_stream; + const char *next_token; + int need_close; + + status = ucs_open_output_stream(ucs_global_opts.memtrack_dest, + UCS_LOG_LEVEL_ERROR, &output_stream, + &need_close, &next_token); + if (status != UCS_OK) { + return; + } + + ucs_memtrack_dump_internal(output_stream); + if (need_close) { + fclose(output_stream); + } +} + +void ucs_memtrack_init() +{ + ucs_status_t status; + + ucs_assert(ucs_memtrack_context.enabled == 0); + + if (!strcmp(ucs_global_opts.memtrack_dest, "")) { + ucs_trace("memtrack disabled"); + ucs_memtrack_context.enabled = 0; + return; + } + + // TODO use ucs_memtrack_entry_reset + ucs_memtrack_entry_reset(&ucs_memtrack_context.total); + kh_init_inplace(ucs_memtrack_ptr_hash, &ucs_memtrack_context.ptrs); + kh_init_inplace(ucs_memtrack_entry_hash, &ucs_memtrack_context.entries); + + status = UCS_STATS_NODE_ALLOC(&ucs_memtrack_context.stats, + &ucs_memtrack_stats_class, + ucs_stats_get_root()); + if (status != UCS_OK) { + return; + } + + ucs_debug("memtrack enabled"); + ucs_memtrack_context.enabled = 1; +} + +void ucs_memtrack_cleanup() +{ + ucs_memtrack_entry_t *entry; + + if (!ucs_memtrack_context.enabled) { + return; + } + + ucs_memtrack_generate_report(); + + /* disable before releasing the stats node */ + ucs_memtrack_context.enabled = 0; + UCS_STATS_NODE_FREE(ucs_memtrack_context.stats); + + /* cleanup entries */ + kh_foreach_value(&ucs_memtrack_context.entries, entry, { + free(entry); + }); + + /* destroy hash tables */ + kh_destroy_inplace(ucs_memtrack_entry_hash, &ucs_memtrack_context.entries); + kh_destroy_inplace(ucs_memtrack_ptr_hash, &ucs_memtrack_context.ptrs); +} + +int ucs_memtrack_is_enabled() +{ + return ucs_memtrack_context.enabled; +} + +#endif diff --git a/src/ucs/debug/memtrack.h b/src/ucs/debug/memtrack.h new file mode 100644 index 0000000..18c2887 --- /dev/null +++ b/src/ucs/debug/memtrack.h @@ -0,0 +1,146 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCS_MEMTRACK_H_ +#define UCS_MEMTRACK_H_ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include + + +BEGIN_C_DECLS + +/** @file memtrack.h */ + +enum { + UCS_MEMTRACK_STAT_ALLOCATION_COUNT, + UCS_MEMTRACK_STAT_ALLOCATION_SIZE, + UCS_MEMTRACK_STAT_LAST +}; + + +/** + * Allocation site entry + */ +typedef struct ucs_memtrack_entry { + size_t size; /* currently allocated total size */ + size_t peak_size; /* peak allocated total size */ + unsigned count; /* number of currently allocated blocks */ + unsigned peak_count; /* peak number of allocated blocks */ + char name[0]; /* allocation name */ +} ucs_memtrack_entry_t; + + + +#if ENABLE_MEMTRACK + +#define UCS_MEMTRACK_ARG , const char* alloc_name +#define UCS_MEMTRACK_VAL , alloc_name +#define UCS_MEMTRACK_VAL_ALWAYS alloc_name +#define UCS_MEMTRACK_NAME(_n) , _n + + +/** + * Start tracking memory (or increment reference count). + */ +void ucs_memtrack_init(); + + +/** + * Stop tracking memory (or decrement reference count). + */ +void ucs_memtrack_cleanup(); + + +/* + * Check if memtrack is enabled at the moment. + */ +int ucs_memtrack_is_enabled(); + + +/** + * Print a summary of memory tracked so far. + * + * @param output Stream to direct output to. + */ +void ucs_memtrack_dump(FILE* output); + + +/** + * Calculates the total of buffers currently tracked. + * + * @param total Entry (pre-allocated) to place results in. + */ +void ucs_memtrack_total(ucs_memtrack_entry_t* total); + + +/** + * Track custom allocation. Need to be called after custom allocation returns. + */ +void ucs_memtrack_allocated(void *ptr, size_t size, const char *name); + + +/** + * Track release of custom allocation. Need to be called before actually + * releasing the memory. + */ +void ucs_memtrack_releasing(void *ptr); + + +/* + * Memory allocation replacements. Their interface is the same as the originals, + * except the additional parameter which specifies the allocation name. + */ +void *ucs_malloc(size_t size, const char *name); +void *ucs_calloc(size_t nmemb, size_t size, const char *name); +void *ucs_realloc(void *ptr, size_t size, const char *name); +int ucs_posix_memalign(void **ptr, size_t boundary, size_t size, + const char *name); +void ucs_free(void *ptr); +void *ucs_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset, const char *name); +int ucs_munmap(void *addr, size_t length); +char *ucs_strdup(const char *src, const char *name); +char *ucs_strndup(const char *src, size_t n, const char *name); + +#else + +#define UCS_MEMTRACK_ARG +#define UCS_MEMTRACK_VAL +#define UCS_MEMTRACK_VAL_ALWAYS "" +#define UCS_MEMTRACK_NAME(_n) + +#define ucs_memtrack_init() UCS_EMPTY_STATEMENT +#define ucs_memtrack_cleanup() UCS_EMPTY_STATEMENT +#define ucs_memtrack_is_enabled() 0 +#define ucs_memtrack_dump(_output) UCS_EMPTY_STATEMENT +#define ucs_memtrack_total(_total) ucs_memtrack_total_init(_total) + +#define ucs_memtrack_allocated(_ptr, _sz, ...) UCS_EMPTY_STATEMENT +#define ucs_memtrack_releasing(_ptr) UCS_EMPTY_STATEMENT + +#define ucs_malloc(_s, ...) malloc(_s) +#define ucs_calloc(_n, _s, ...) calloc(_n, _s) +#define ucs_realloc(_p, _s, ...) realloc(_p, _s) +#if HAVE_POSIX_MEMALIGN +#define ucs_posix_memalign(_pp, _b, _s, ...) posix_memalign(_pp, _b, _s) +#endif +#define ucs_free(_p) free(_p) +#define ucs_mmap(_a, _l, _p, _fl, _fd, _o, ...) mmap(_a, _l, _p, _fl, _fd, _o) +#define ucs_munmap(_a, _l) munmap(_a, _l) +#define ucs_strdup(_src, ...) strdup(_src) +#define ucs_strndup(_src, _n, ...) strndup(_src, _n) + +#endif /* ENABLE_MEMTRACK */ + +END_C_DECLS + +#endif diff --git a/src/ucs/memory/memory_type.c b/src/ucs/memory/memory_type.c new file mode 100644 index 0000000..cbd98f2 --- /dev/null +++ b/src/ucs/memory/memory_type.c @@ -0,0 +1,29 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "memory_type.h" + +#include + + +const char *ucs_memory_type_names[] = { + [UCS_MEMORY_TYPE_HOST] = "host", + [UCS_MEMORY_TYPE_CUDA] = "cuda" , + [UCS_MEMORY_TYPE_CUDA_MANAGED] = "cuda-managed", + [UCS_MEMORY_TYPE_ROCM] = "rocm", + [UCS_MEMORY_TYPE_ROCM_MANAGED] = "rocm-managed", + [UCS_MEMORY_TYPE_LAST] = "unknown" +}; + +const char *ucs_memory_type_descs[] = { + [UCS_MEMORY_TYPE_HOST] = "System memory", + [UCS_MEMORY_TYPE_CUDA] = "NVIDIA GPU memory" , + [UCS_MEMORY_TYPE_CUDA_MANAGED] = "NVIDIA GPU managed/unified memory", + [UCS_MEMORY_TYPE_ROCM] = "AMD/ROCm GPU memory", + [UCS_MEMORY_TYPE_ROCM_MANAGED] = "AMD/ROCm GPU managed memory", + [UCS_MEMORY_TYPE_LAST] = "unknown" +}; + diff --git a/src/ucs/memory/memory_type.h b/src/ucs/memory/memory_type.h new file mode 100644 index 0000000..dec2a2e --- /dev/null +++ b/src/ucs/memory/memory_type.h @@ -0,0 +1,49 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCS_MEMORY_TYPE_H_ +#define UCS_MEMORY_TYPE_H_ + +#include + +BEGIN_C_DECLS + + +/* Memory types accessible from CPU */ +#define UCS_MEMORY_TYPES_CPU_ACCESSIBLE \ + (UCS_BIT(UCS_MEMORY_TYPE_HOST) | \ + UCS_BIT(UCS_MEMORY_TYPE_CUDA_MANAGED) | \ + UCS_BIT(UCS_MEMORY_TYPE_ROCM_MANAGED)) + + +/* + * Memory types + */ +typedef enum ucs_memory_type { + UCS_MEMORY_TYPE_HOST, /**< Default system memory */ + UCS_MEMORY_TYPE_CUDA, /**< NVIDIA CUDA memory */ + UCS_MEMORY_TYPE_CUDA_MANAGED, /**< NVIDIA CUDA managed (or unified) memory*/ + UCS_MEMORY_TYPE_ROCM, /**< AMD ROCM memory */ + UCS_MEMORY_TYPE_ROCM_MANAGED, /**< AMD ROCM managed system memory */ + UCS_MEMORY_TYPE_LAST +} ucs_memory_type_t; + + +/** + * Array of string names for each memory type + */ +extern const char *ucs_memory_type_names[]; + +/** + * Array of string descriptions for each memory type + */ +extern const char *ucs_memory_type_descs[]; + + +END_C_DECLS + +#endif diff --git a/src/ucs/memory/memtype_cache.c b/src/ucs/memory/memtype_cache.c new file mode 100644 index 0000000..8031aa4 --- /dev/null +++ b/src/ucs/memory/memtype_cache.c @@ -0,0 +1,326 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "memtype_cache.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +typedef enum { + UCS_MEMTYPE_CACHE_ACTION_SET_MEMTYPE, + UCS_MEMTYPE_CACHE_ACTION_REMOVE +} ucs_memtype_cache_action_t; + +static ucs_pgt_dir_t *ucs_memtype_cache_pgt_dir_alloc(const ucs_pgtable_t *pgtable) +{ + void *ptr; + int ret; + + ret = ucs_posix_memalign(&ptr, + ucs_max(sizeof(void *), UCS_PGT_ENTRY_MIN_ALIGN), + sizeof(ucs_pgt_dir_t), "memtype_cache_pgdir"); + return (ret == 0) ? ptr : NULL; +} + +static void ucs_memtype_cache_pgt_dir_release(const ucs_pgtable_t *pgtable, + ucs_pgt_dir_t *dir) +{ + ucs_free(dir); +} + +/* + * - Lock must be held in write mode + * - start, end must be aligned to page size + */ +static void ucs_memtype_cache_insert(ucs_memtype_cache_t *memtype_cache, + ucs_pgt_addr_t start, ucs_pgt_addr_t end, + ucs_memory_type_t mem_type) +{ + ucs_memtype_cache_region_t *region; + ucs_status_t status; + int ret; + + /* Allocate structure for new region */ + ret = ucs_posix_memalign((void **)®ion, + ucs_max(sizeof(void *), UCS_PGT_ENTRY_MIN_ALIGN), + sizeof(ucs_memtype_cache_region_t), + "memtype_cache_region"); + if (ret != 0) { + ucs_warn("failed to allocate memtype_cache region"); + return; + } + + ucs_assert((start % UCS_PGT_ADDR_ALIGN) == 0); + ucs_assert((end % UCS_PGT_ADDR_ALIGN) == 0); + + region->super.start = start; + region->super.end = end; + region->mem_type = mem_type; + + status = UCS_PROFILE_CALL(ucs_pgtable_insert, &memtype_cache->pgtable, + ®ion->super); + if (status != UCS_OK) { + ucs_error("failed to insert region " UCS_PGT_REGION_FMT ": %s", + UCS_PGT_REGION_ARG(®ion->super), ucs_status_string(status)); + ucs_free(region); + return; + } + + ucs_trace("memtype_cache: insert " UCS_PGT_REGION_FMT " mem_type %s", + UCS_PGT_REGION_ARG(®ion->super), + ucs_memory_type_names[mem_type]); +} + +static void ucs_memtype_cache_region_collect_callback(const ucs_pgtable_t *pgtable, + ucs_pgt_region_t *pgt_region, + void *arg) +{ + ucs_memtype_cache_region_t *region = ucs_derived_of(pgt_region, + ucs_memtype_cache_region_t); + ucs_list_link_t *list = arg; + ucs_list_add_tail(list, ®ion->list); +} + +UCS_PROFILE_FUNC_VOID(ucs_memtype_cache_update_internal, + (memtype_cache, address, size, mem_type, action), + ucs_memtype_cache_t *memtype_cache, const void *address, + size_t size, ucs_memory_type_t mem_type, + ucs_memtype_cache_action_t action) +{ + ucs_memtype_cache_region_t *region, *tmp; + UCS_LIST_HEAD(region_list); + ucs_pgt_addr_t start, end, search_start, search_end; + ucs_status_t status; + + if (!size) { + return; + } + + start = ucs_align_down_pow2((uintptr_t)address, UCS_PGT_ADDR_ALIGN); + end = ucs_align_up_pow2 ((uintptr_t)address + size, UCS_PGT_ADDR_ALIGN); + + ucs_trace("%s: [0x%lx..0x%lx] mem_type %s", + ((action == UCS_MEMTYPE_CACHE_ACTION_SET_MEMTYPE) ? + "update" : "remove"), + start, end, ucs_memory_type_names[mem_type]); + + if (action == UCS_MEMTYPE_CACHE_ACTION_SET_MEMTYPE) { + /* try to find regions that are contiguous and instersected + * with current one */ + search_start = start - 1; + search_end = end; + } else { + /* try to find regions that are instersected with current one */ + search_start = start; + search_end = end - 1; + } + + pthread_rwlock_wrlock(&memtype_cache->lock); + + /* find and remove all regions which intersect with new one */ + ucs_pgtable_search_range(&memtype_cache->pgtable, search_start, search_end, + ucs_memtype_cache_region_collect_callback, + ®ion_list); + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + if (action == UCS_MEMTYPE_CACHE_ACTION_SET_MEMTYPE) { + if (region->mem_type == mem_type) { + /* merge current region with overlapping or adjacent regions + * of same memory type */ + start = ucs_min(start, region->super.start); + end = ucs_max(end, region->super.end); + } else if ((region->super.end < start) || + (region->super.start >= end)) { + /* ignore regions which are not really overlapping and can't + * be merged because of different memory types */ + ucs_list_del(®ion->list); + continue; + } + } + + status = ucs_pgtable_remove(&memtype_cache->pgtable, ®ion->super); + if (status != UCS_OK) { + ucs_error("failed to remove " UCS_PGT_REGION_FMT + " from memtype_cache: %s", + UCS_PGT_REGION_ARG(®ion->super), + ucs_status_string(status)); + goto out_unlock; + } + + ucs_trace("memtype_cache: removed " UCS_PGT_REGION_FMT " %s", + UCS_PGT_REGION_ARG(®ion->super), + ucs_memory_type_names[region->mem_type]); + } + + if (action == UCS_MEMTYPE_CACHE_ACTION_SET_MEMTYPE) { + ucs_memtype_cache_insert(memtype_cache, start, end, mem_type); + } + + /* slice old regions by the new region, to preserve the previous memory type + * of the non-overlapping parts + */ + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + if (start > region->super.start) { + /* create previous region */ + ucs_memtype_cache_insert(memtype_cache, region->super.start, start, + region->mem_type); + } + if (end < region->super.end) { + /* create next region */ + ucs_memtype_cache_insert(memtype_cache, end, region->super.end, + region->mem_type); + } + + ucs_free(region); + } + +out_unlock: + pthread_rwlock_unlock(&memtype_cache->lock); +} + +void ucs_memtype_cache_update(ucs_memtype_cache_t *memtype_cache, + const void *address, size_t size, + ucs_memory_type_t mem_type) +{ + ucs_memtype_cache_update_internal(memtype_cache, address, size, mem_type, + UCS_MEMTYPE_CACHE_ACTION_SET_MEMTYPE); +} + +void ucs_memtype_cache_remove(ucs_memtype_cache_t *memtype_cache, + const void *address, size_t size) +{ + ucs_memtype_cache_update_internal(memtype_cache, address, size, + UCS_MEMORY_TYPE_LAST, + UCS_MEMTYPE_CACHE_ACTION_REMOVE); +} + +static void ucs_memtype_cache_event_callback(ucm_event_type_t event_type, + ucm_event_t *event, void *arg) +{ + ucs_memtype_cache_t *memtype_cache = arg; + ucs_memtype_cache_action_t action; + + if (event_type & UCM_EVENT_MEM_TYPE_ALLOC) { + action = UCS_MEMTYPE_CACHE_ACTION_SET_MEMTYPE; + } else if (event_type & UCM_EVENT_MEM_TYPE_FREE) { + action = UCS_MEMTYPE_CACHE_ACTION_REMOVE; + } else { + return; + } + + ucs_memtype_cache_update_internal(memtype_cache, event->mem_type.address, + event->mem_type.size, + event->mem_type.mem_type, action); +} + +static void ucs_memtype_cache_purge(ucs_memtype_cache_t *memtype_cache) +{ + ucs_memtype_cache_region_t *region, *tmp; + UCS_LIST_HEAD(region_list); + + ucs_trace_func("memtype_cache purge"); + + ucs_pgtable_purge(&memtype_cache->pgtable, + ucs_memtype_cache_region_collect_callback, ®ion_list); + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + ucs_free(region); + } +} + +UCS_PROFILE_FUNC(ucs_status_t, ucs_memtype_cache_lookup, + (memtype_cache, address, size, mem_type_p), + ucs_memtype_cache_t *memtype_cache, const void *address, + size_t size, ucs_memory_type_t *mem_type_p) +{ + const ucs_pgt_addr_t start = (uintptr_t)address; + ucs_memtype_cache_region_t *region; + ucs_pgt_region_t *pgt_region; + ucs_status_t status; + + pthread_rwlock_rdlock(&memtype_cache->lock); + + pgt_region = UCS_PROFILE_CALL(ucs_pgtable_lookup, &memtype_cache->pgtable, + start); + if (pgt_region == NULL) { + status = UCS_ERR_NO_ELEM; + goto out_unlock; + } + + region = ucs_derived_of(pgt_region, ucs_memtype_cache_region_t); + *mem_type_p = ((pgt_region->end >= (start + size)) ? + region->mem_type : UCS_MEMORY_TYPE_LAST); + status = UCS_OK; + +out_unlock: + pthread_rwlock_unlock(&memtype_cache->lock); + return status; +} + +static UCS_CLASS_INIT_FUNC(ucs_memtype_cache_t) +{ + ucs_status_t status; + int ret; + + ret = pthread_rwlock_init(&self->lock, NULL); + if (ret) { + ucs_error("pthread_rwlock_init() failed: %m"); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + status = ucs_pgtable_init(&self->pgtable, ucs_memtype_cache_pgt_dir_alloc, + ucs_memtype_cache_pgt_dir_release); + if (status != UCS_OK) { + goto err_destroy_rwlock; + } + + status = ucm_set_event_handler(UCM_EVENT_MEM_TYPE_ALLOC | + UCM_EVENT_MEM_TYPE_FREE | + UCM_EVENT_FLAG_EXISTING_ALLOC, + 1000, ucs_memtype_cache_event_callback, + self); + if (status != UCS_OK) { + ucs_error("failed to set UCM memtype event handler: %s", + ucs_status_string(status)); + goto err_cleanup_pgtable; + } + + return UCS_OK; + +err_cleanup_pgtable: + ucs_pgtable_cleanup(&self->pgtable); +err_destroy_rwlock: + pthread_rwlock_destroy(&self->lock); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(ucs_memtype_cache_t) +{ + ucm_unset_event_handler((UCM_EVENT_MEM_TYPE_ALLOC | UCM_EVENT_MEM_TYPE_FREE), + ucs_memtype_cache_event_callback, self); + ucs_memtype_cache_purge(self); + ucs_pgtable_cleanup(&self->pgtable); + pthread_rwlock_destroy(&self->lock); +} + +UCS_CLASS_DEFINE(ucs_memtype_cache_t, void); +UCS_CLASS_DEFINE_NAMED_NEW_FUNC(ucs_memtype_cache_create, ucs_memtype_cache_t, + ucs_memtype_cache_t) +UCS_CLASS_DEFINE_NAMED_DELETE_FUNC(ucs_memtype_cache_destroy, ucs_memtype_cache_t, + ucs_memtype_cache_t) diff --git a/src/ucs/memory/memtype_cache.h b/src/ucs/memory/memtype_cache.h new file mode 100644 index 0000000..708f6e1 --- /dev/null +++ b/src/ucs/memory/memtype_cache.h @@ -0,0 +1,102 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_MEMTYPE_CACHE_H_ +#define UCS_MEMTYPE_CACHE_H_ + +#include "memory_type.h" + +#include +#include +#include +#include +#include + + +BEGIN_C_DECLS + +typedef struct ucs_memtype_cache ucs_memtype_cache_t; +typedef struct ucs_memtype_cache_region ucs_memtype_cache_region_t; + + +struct ucs_memtype_cache_region { + ucs_pgt_region_t super; /**< Base class - page table region */ + ucs_list_link_t list; /**< List element */ + ucs_memory_type_t mem_type; /**< Memory type the address belongs to */ +}; + + +struct ucs_memtype_cache { + pthread_rwlock_t lock; /**< protests the page table */ + ucs_pgtable_t pgtable; /**< Page table to hold the regions */ +}; + + +/** + * Create a memtype cache. + * + * @param [out] memtype_cache_p Filled with a pointer to the memtype cache. + * + * @return Error code. + */ +ucs_status_t ucs_memtype_cache_create(ucs_memtype_cache_t **memtype_cache_p); + + +/** + * Destroy a memtype cache. + * + * @param [in] memtype_cache Memtype cache to destroy. + */ +void ucs_memtype_cache_destroy(ucs_memtype_cache_t *memtype_cache); + + +/** + * Find if address range is in memtype cache. + * + * @param [in] memtype_cache Memtype cache to search. + * @param [in] address Address to lookup. + * @param [in] size Length of the memory. + * @param [out] mem_type_p Set to the memory type of the address range. + * UCS_MEMORY_TYPE_LAST is a special value which + * means the memory type is an unknown non-host + * memory, and should be detected in another way. + * + * @return Error code. + */ +ucs_status_t +ucs_memtype_cache_lookup(ucs_memtype_cache_t *memtype_cache, const void *address, + size_t size, ucs_memory_type_t *mem_type_p); + + +/** + * Update the memory type of an address range. + * Can be used after @ucs_memtype_cache_lookup returns UCM_MEM_TYPE_LAST, to + * set the memory type after it was detected. + * + * @param [in] memtype_cache Memtype cache to update. + * @param [in] address Start address to update. + * @param [in] size Size of the memory to update. + * @param [out] mem_type Set the memory type of the address range to this + * value. + */ +void ucs_memtype_cache_update(ucs_memtype_cache_t *memtype_cache, + const void *address, size_t size, + ucs_memory_type_t mem_type); + + +/** + * Remove the address range from a memtype cache. + * + * @param [in] memtype_cache Memtype cache to remove. + * @param [in] address Start address to remove. + * @param [in] size Size of the memory to remove. + */ +void ucs_memtype_cache_remove(ucs_memtype_cache_t *memtype_cache, + const void *address, size_t size); + +END_C_DECLS + +#endif diff --git a/src/ucs/memory/numa.c b/src/ucs/memory/numa.c new file mode 100644 index 0000000..b7d7b5f --- /dev/null +++ b/src/ucs/memory/numa.c @@ -0,0 +1,70 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "numa.h" + +#include +#include +#include +#include + + +const char *ucs_numa_policy_names[] = { + [UCS_NUMA_POLICY_DEFAULT] = "default", + [UCS_NUMA_POLICY_PREFERRED] = "preferred", + [UCS_NUMA_POLICY_BIND] = "bind", + [UCS_NUMA_POLICY_LAST] = NULL, +}; + +#if HAVE_NUMA + + +static void ucs_numa_populate_cpumap(int16_t cpu_numa_nodes[]) +{ + struct bitmask *cpumask; + int numa_node, cpu; + int ret; + + cpumask = numa_allocate_cpumask(); + + for (numa_node = 0; numa_node <= numa_max_node(); ++numa_node) { + if (!numa_bitmask_isbitset(numa_all_nodes_ptr, numa_node)) { + continue; + } + + ret = numa_node_to_cpus(numa_node, cpumask); + if (ret == -1) { + ucs_warn("failed to get CPUs for NUMA node %d: %m", numa_node); + continue; + } + + for (cpu = 0; cpu < numa_num_configured_cpus(); ++cpu) { + if (numa_bitmask_isbitset(cpumask, cpu)) { + cpu_numa_nodes[cpu] = numa_node + 1; + } + } + } + + numa_free_cpumask(cpumask); +} + + +int ucs_numa_node_of_cpu(int cpu) +{ + /* we can initialize statically only to the value 0, so the NUMA node + * numbers will be stored as 1..N instead of 0..N-1 */ + static int16_t cpu_numa_nodes[__CPU_SETSIZE] = {0}; + + UCS_STATIC_ASSERT(NUMA_NUM_NODES <= INT16_MAX); + ucs_assert(cpu < __CPU_SETSIZE); + + if (cpu_numa_nodes[cpu] == 0) { + ucs_numa_populate_cpumap(cpu_numa_nodes); + } + return cpu_numa_nodes[cpu] - 1; +} + +#endif diff --git a/src/ucs/memory/numa.h b/src/ucs/memory/numa.h new file mode 100644 index 0000000..c26b414 --- /dev/null +++ b/src/ucs/memory/numa.h @@ -0,0 +1,65 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_NUMA_H_ +#define UCS_NUMA_H_ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#if HAVE_NUMA +#include +#include + +#if HAVE_STRUCT_BITMASK +# define numa_nodemask_p(_nm) ((_nm)->maskp) +# define numa_nodemask_size(_nm) ((_nm)->size) +# define numa_get_thread_node_mask(_nmp) \ + { \ + numa_free_nodemask(*(_nmp)); \ + *(_nmp) = numa_get_run_node_mask(); \ + } +#else +# define numa_allocate_nodemask() ucs_malloc(sizeof(nodemask_t), "nodemask") +# define numa_free_nodemask(_nm) ucs_free(_nm) +# define numa_nodemask_p(_nm) ((_nm)->maskp.n) +# define numa_nodemask_size(_nm) ((size_t)NUMA_NUM_NODES) +# define numa_bitmask_clearall(_nm) nodemask_zero(&(_nm)->maskp) +# define numa_bitmask_setbit(_nm, _n) nodemask_set(&(_nm)->maskp, _n) +# define numa_get_thread_node_mask(_nmp) \ + { \ + (*(_nmp))->maskp = numa_get_run_node_mask(); \ + } + +struct bitmask { + nodemask_t maskp; +}; +#endif /* HAVE_STRUCT_BITMASK */ + +#endif /* HAVE_NUMA */ + + +#define UCS_NUMA_MIN_DISTANCE 10 + + +typedef enum { + UCS_NUMA_POLICY_DEFAULT, + UCS_NUMA_POLICY_BIND, + UCS_NUMA_POLICY_PREFERRED, + UCS_NUMA_POLICY_LAST +} ucs_numa_policy_t; + + +extern const char *ucs_numa_policy_names[]; + + +int ucs_numa_node_of_cpu(int cpu); + + +#endif diff --git a/src/ucs/memory/rcache.c b/src/ucs/memory/rcache.c new file mode 100644 index 0000000..d88452a --- /dev/null +++ b/src/ucs/memory/rcache.c @@ -0,0 +1,744 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rcache.h" +#include "rcache_int.h" + +#define ucs_rcache_region_log(_level, _message, ...) \ + do { \ + if (ucs_log_is_enabled(_level)) { \ + __ucs_rcache_region_log(__FILE__, __LINE__, __FUNCTION__, (_level), \ + _message, ## __VA_ARGS__); \ + } \ + } while (0) + +#define ucs_rcache_region_error(_message, ...) \ + ucs_rcache_region_log(UCS_LOG_LEVEL_ERROR, _message, ## __VA_ARGS__) +#define ucs_rcache_region_warn(_message, ...) \ + ucs_rcache_region_log(UCS_LOG_LEVEL_WARN, _message, ## __VA_ARGS__) +#define ucs_rcache_region_debug(_message, ...) \ + ucs_rcache_region_log(UCS_LOG_LEVEL_DEBUG, _message, ## __VA_ARGS__) +#define ucs_rcache_region_trace(_message, ...) \ + ucs_rcache_region_log(UCS_LOG_LEVEL_TRACE, _message, ## __VA_ARGS__) + +#define ucs_rcache_region_pfn(_region) \ + ((_region)->priv) + + +typedef struct ucs_rcache_inv_entry { + ucs_queue_elem_t queue; + ucs_pgt_addr_t start; + ucs_pgt_addr_t end; +} ucs_rcache_inv_entry_t; + + +#if ENABLE_STATS +static ucs_stats_class_t ucs_rcache_stats_class = { + .name = "rcache", + .num_counters = UCS_RCACHE_STAT_LAST, + .counter_names = { + [UCS_RCACHE_GETS] = "gets", + [UCS_RCACHE_HITS_FAST] = "hits_fast", + [UCS_RCACHE_HITS_SLOW] = "hits_slow", + [UCS_RCACHE_MISSES] = "misses", + [UCS_RCACHE_MERGES] = "regions_merged", + [UCS_RCACHE_UNMAPS] = "unmap_events", + [UCS_RCACHE_UNMAP_INVALIDATES] = "regions_inv_unmap", + [UCS_RCACHE_PUTS] = "puts", + [UCS_RCACHE_REGS] = "mem_regs", + [UCS_RCACHE_DEREGS] = "mem_deregs", + } +}; +#endif + + +static void __ucs_rcache_region_log(const char *file, int line, const char *function, + ucs_log_level_t level, ucs_rcache_t *rcache, + ucs_rcache_region_t *region, const char *fmt, + ...) +{ + char message[128]; + char region_desc[64]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + if (region->flags & UCS_RCACHE_REGION_FLAG_REGISTERED) { + rcache->params.ops->dump_region(rcache->params.context, rcache, region, + region_desc, sizeof(region_desc)); + } else { + strcpy(region_desc, ""); + } + + ucs_log_dispatch(file, line, function, level, + "%s: %s region " UCS_PGT_REGION_FMT " %c%c "UCS_RCACHE_PROT_FMT" ref %u %s", + rcache->name, message, + UCS_PGT_REGION_ARG(®ion->super), + (region->flags & UCS_RCACHE_REGION_FLAG_REGISTERED) ? 'g' : '-', + (region->flags & UCS_RCACHE_REGION_FLAG_PGTABLE) ? 't' : '-', + UCS_RCACHE_PROT_ARG(region->prot), + region->refcount, + region_desc); +} + +static ucs_pgt_dir_t *ucs_rcache_pgt_dir_alloc(const ucs_pgtable_t *pgtable) +{ + void *ptr; + int error; + + error = ucs_posix_memalign(&ptr, + ucs_max(sizeof(void *), UCS_PGT_ENTRY_MIN_ALIGN), + sizeof(ucs_pgt_dir_t), "rcache_pgdir"); + return (error == 0) ? ptr : NULL; +} + +static void ucs_rcache_pgt_dir_release(const ucs_pgtable_t *pgtable, + ucs_pgt_dir_t *dir) +{ + ucs_free(dir); +} + +static ucs_status_t ucs_rcache_mp_chunk_alloc(ucs_mpool_t *mp, size_t *size_p, + void **chunk_p) +{ + size_t size; + void *ptr; + + size = ucs_align_up_pow2(sizeof(size_t) + *size_p, ucs_get_page_size()); + ptr = ucm_orig_mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + if (ptr == MAP_FAILED) { + ucs_error("mmmap(size=%zu) failed: %m", size); + return UCS_ERR_NO_MEMORY; + } + + /* Store the size in the first bytes of the chunk */ + *(size_t*)ptr = size; + *chunk_p = UCS_PTR_BYTE_OFFSET(ptr, sizeof(size_t)); + *size_p = size - sizeof(size_t); + return UCS_OK; +} + +static void ucs_rcache_mp_chunk_release(ucs_mpool_t *mp, void *chunk) +{ + size_t size; + void *ptr; + int ret; + + ptr = UCS_PTR_BYTE_OFFSET(chunk, -sizeof(size_t)); + size = *(size_t*)ptr; + ret = ucm_orig_munmap(ptr, size); + if (ret) { + ucs_warn("munmap(%p, %zu) failed: %m", ptr, size); + } +} + +static ucs_mpool_ops_t ucs_rcache_mp_ops = { + .chunk_alloc = ucs_rcache_mp_chunk_alloc, + .chunk_release = ucs_rcache_mp_chunk_release, + .obj_init = NULL, + .obj_cleanup = NULL +}; + +/* Lock must be held for read */ +static void ucs_rcache_region_validate_pfn(ucs_rcache_t *rcache, + ucs_rcache_region_t *region) +{ + unsigned long region_pfn, actual_pfn; + + if (!ucs_unlikely(ucs_global_opts.rcache_check_pfn)) { + return; + } + + region_pfn = ucs_rcache_region_pfn(region); + actual_pfn = ucs_sys_get_pfn(region->super.start); + if (region_pfn != actual_pfn) { + ucs_rcache_region_error(rcache, region, "pfn check failed"); + ucs_fatal("%s: page at virtual address 0x%lx moved from pfn 0x%lx to pfn 0x%lx", + rcache->name, region->super.start, region_pfn, actual_pfn); + } else { + ucs_rcache_region_trace(rcache, region, "pfn ok"); + } +} + +/* Lock must be held */ +static void ucs_rcache_region_collect_callback(const ucs_pgtable_t *pgtable, + ucs_pgt_region_t *pgt_region, void *arg) +{ + ucs_rcache_region_t *region = ucs_derived_of(pgt_region, ucs_rcache_region_t); + ucs_list_link_t *list = arg; + ucs_list_add_tail(list, ®ion->list); +} + +/* Lock must be held */ +static void ucs_rcache_find_regions(ucs_rcache_t *rcache, ucs_pgt_addr_t from, + ucs_pgt_addr_t to, ucs_list_link_t *list) +{ + ucs_list_head_init(list); + ucs_pgtable_search_range(&rcache->pgtable, from, to, + ucs_rcache_region_collect_callback, list); +} + +/* Lock must be held in write mode */ +static void ucs_mem_region_destroy_internal(ucs_rcache_t *rcache, + ucs_rcache_region_t *region) +{ + ucs_rcache_region_trace(rcache, region, "destroy"); + + ucs_assert(region->refcount == 0); + ucs_assert(!(region->flags & UCS_RCACHE_REGION_FLAG_PGTABLE)); + + if (region->flags & UCS_RCACHE_REGION_FLAG_REGISTERED) { + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_DEREGS, 1); + UCS_PROFILE_CODE("mem_dereg") { + rcache->params.ops->mem_dereg(rcache->params.context, rcache, region); + } + } + + ucs_free(region); +} + +static inline void ucs_rcache_region_put_internal(ucs_rcache_t *rcache, + ucs_rcache_region_t *region, + int lock, + int must_be_destroyed) +{ + ucs_rcache_region_trace(rcache, region, lock ? "put" : "put_nolock"); + + ucs_assert(region->refcount > 0); + if (ucs_unlikely(ucs_atomic_fsub32(®ion->refcount, 1) == 1)) { + if (lock) { + pthread_rwlock_wrlock(&rcache->lock); + } + ucs_mem_region_destroy_internal(rcache, region); + if (lock) { + pthread_rwlock_unlock(&rcache->lock); + } + } else { + ucs_assert(!must_be_destroyed); + } +} + +/* Lock must be held in write mode */ +static void ucs_rcache_region_invalidate(ucs_rcache_t *rcache, + ucs_rcache_region_t *region, + int must_be_in_pgt, + int must_be_destroyed) +{ + ucs_status_t status; + + ucs_rcache_region_trace(rcache, region, "invalidate"); + + /* Remove the memory region from page table, if it's there */ + if (region->flags & UCS_RCACHE_REGION_FLAG_PGTABLE) { + status = ucs_pgtable_remove(&rcache->pgtable, ®ion->super); + if (status != UCS_OK) { + ucs_rcache_region_warn(rcache, region, "failed to remove (%s)", + ucs_status_string(status)); + } + region->flags &= ~UCS_RCACHE_REGION_FLAG_PGTABLE; + } else { + ucs_assert(!must_be_in_pgt); + } + + ucs_rcache_region_put_internal(rcache, region, 0, must_be_destroyed); +} + +/* Lock must be held in write mode */ +static void ucs_rcache_invalidate_range(ucs_rcache_t *rcache, ucs_pgt_addr_t start, + ucs_pgt_addr_t end) +{ + ucs_rcache_region_t *region, *tmp; + ucs_list_link_t region_list; + + ucs_trace_func("rcache=%s, start=0x%lx, end=0x%lx", rcache->name, start, end); + + ucs_rcache_find_regions(rcache, start, end - 1, ®ion_list); + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + /* all regions on the list are in the page table */ + ucs_rcache_region_invalidate(rcache, region, 1, 0); + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_UNMAP_INVALIDATES, 1); + } +} + +/* Lock must be held in write mode */ +static void ucs_rcache_check_inv_queue(ucs_rcache_t *rcache) +{ + ucs_rcache_inv_entry_t *entry; + + ucs_trace_func("rcache=%s", rcache->name); + + ucs_spin_lock(&rcache->inv_lock); + while (!ucs_queue_is_empty(&rcache->inv_q)) { + entry = ucs_queue_pull_elem_non_empty(&rcache->inv_q, + ucs_rcache_inv_entry_t, queue); + + /* We need to drop the lock since the following code may trigger memory + * operations, which could trigger vm_unmapped event which also takes + * this lock. + */ + ucs_spin_unlock(&rcache->inv_lock); + + ucs_rcache_invalidate_range(rcache, entry->start, entry->end); + + ucs_spin_lock(&rcache->inv_lock); + + ucs_mpool_put(entry); /* Must be done with the lock held */ + } + ucs_spin_unlock(&rcache->inv_lock); +} + +static void ucs_rcache_unmapped_callback(ucm_event_type_t event_type, + ucm_event_t *event, void *arg) +{ + ucs_rcache_t *rcache = arg; + ucs_rcache_inv_entry_t *entry; + ucs_pgt_addr_t start, end; + + ucs_assert(event_type == UCM_EVENT_VM_UNMAPPED || + event_type == UCM_EVENT_MEM_TYPE_FREE); + + if (event_type == UCM_EVENT_VM_UNMAPPED) { + start = (uintptr_t)event->vm_unmapped.address; + end = (uintptr_t)event->vm_unmapped.address + event->vm_unmapped.size; + } else if(event_type == UCM_EVENT_MEM_TYPE_FREE) { + start = (uintptr_t)event->mem_type.address; + end = (uintptr_t)event->mem_type.address + event->mem_type.size; + } else { + ucs_warn("%s: unknown event type: %x", rcache->name, event_type); + return; + } + + ucs_trace_func("%s: event vm_unmapped 0x%lx..0x%lx", rcache->name, start, end); + + ucs_spin_lock(&rcache->inv_lock); + entry = ucs_mpool_get(&rcache->inv_mp); + if (entry != NULL) { + /* Add region to invalidation list */ + entry->start = start; + entry->end = end; + ucs_queue_push(&rcache->inv_q, &entry->queue); + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_UNMAPS, 1); + } else { + ucs_error("Failed to allocate invalidation entry for 0x%lx..0x%lx, " + "data corruption may occur", start, end); + } + ucs_spin_unlock(&rcache->inv_lock); +} + +/* Clear all regions + Lock must be held in write mode (or use it during cleanup) + */ +static void ucs_rcache_purge(ucs_rcache_t *rcache) +{ + ucs_rcache_region_t *region, *tmp; + ucs_list_link_t region_list; + + ucs_trace_func("rcache=%s", rcache->name); + + ucs_list_head_init(®ion_list); + ucs_pgtable_purge(&rcache->pgtable, ucs_rcache_region_collect_callback, + ®ion_list); + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + if (region->flags & UCS_RCACHE_REGION_FLAG_PGTABLE) { + region->flags &= ~UCS_RCACHE_REGION_FLAG_PGTABLE; + ucs_atomic_add32(®ion->refcount, (uint32_t)-1); + } + if (region->refcount > 0) { + ucs_rcache_region_warn(rcache, region, "destroying inuse"); + } + ucs_mem_region_destroy_internal(rcache, region); + } +} + +static inline int ucs_rcache_region_test(ucs_rcache_region_t *region, int prot) +{ + return (region->flags & UCS_RCACHE_REGION_FLAG_REGISTERED) && + ucs_test_all_flags(region->prot, prot); +} + +/* Lock must be held */ +static ucs_status_t +ucs_rcache_check_overlap(ucs_rcache_t *rcache, ucs_pgt_addr_t *start, + ucs_pgt_addr_t *end, int *prot, int *merged, + ucs_rcache_region_t **region_p) +{ + ucs_rcache_region_t *region, *tmp; + ucs_list_link_t region_list; + int mem_prot; + + ucs_trace_func("rcache=%s, *start=0x%lx, *end=0x%lx", rcache->name, *start, + *end); + + ucs_rcache_check_inv_queue(rcache); + + ucs_rcache_find_regions(rcache, *start, *end - 1, ®ion_list); + + /* TODO check if any of the regions is locked */ + + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + + if ((*start >= region->super.start) && (*end <= region->super.end) && + ucs_rcache_region_test(region, *prot)) + { + /* Found a region which contains the given address range */ + ucs_rcache_region_hold(rcache, region); + *region_p = region; + return UCS_ERR_ALREADY_EXISTS; + } + + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_MERGES, 1); + /* + * If we don't provide some of the permissions the other region had, + * we might want to expand our permissions to support them. We can + * do that only if the memory range actually has those permissions. + * This will prevent the users of the other region to kick us out + * the next time. + */ + if (!ucs_test_all_flags(*prot, region->prot)) { + /* A slow path because searching /proc/maps in order to + * check memory protection is very expensive. + * + * TODO: currently rcache is optimized for the case where most of + * the regions have same protection. + */ + mem_prot = UCS_PROFILE_CALL(ucs_get_mem_prot, *start, *end); + if (!ucs_test_all_flags(mem_prot, *prot)) { + ucs_rcache_region_trace(rcache, region, + "do not merge "UCS_RCACHE_PROT_FMT + " with mem "UCS_RCACHE_PROT_FMT, + UCS_RCACHE_PROT_ARG(*prot), + UCS_RCACHE_PROT_ARG(mem_prot)); + /* The memory protection can not satisfy that of the + * region. However mem_reg still may be able to deal with it. + * Do the safest thing: invalidate cached region + */ + ucs_rcache_region_invalidate(rcache, region, 1, 0); + continue; + } else if (ucs_test_all_flags(mem_prot, region->prot)) { + *prot |= region->prot; + } else { + /* Could not support other region's permissions - so do not merge + * with it. If anybody will use the other region, this will kick + * out our region, and may potentially lead to ineffective use + * of the cache. We can't solve it as long as we have only one + * page table, since it does not allow overlap. + */ + ucs_rcache_region_trace(rcache, region, + "do not merge mem "UCS_RCACHE_PROT_FMT" with", + UCS_RCACHE_PROT_ARG(mem_prot)); + ucs_rcache_region_invalidate(rcache, region, 1, 0); + continue; + } + } + + ucs_rcache_region_trace(rcache, region, + "merge 0x%lx..0x%lx "UCS_RCACHE_PROT_FMT" with", + *start, *end, UCS_RCACHE_PROT_ARG(*prot)); + *start = ucs_min(*start, region->super.start); + *end = ucs_max(*end, region->super.end); + *merged = 1; + ucs_rcache_region_invalidate(rcache, region, 1, 0); + } + return UCS_OK; +} + +static ucs_status_t +ucs_rcache_create_region(ucs_rcache_t *rcache, void *address, size_t length, + int prot, void *arg, ucs_rcache_region_t **region_p) +{ + ucs_rcache_region_t *region; + ucs_pgt_addr_t start, end; + ucs_status_t status; + int error, merged; + + ucs_trace_func("rcache=%s, address=%p, length=%zu", rcache->name, address, + length); + + pthread_rwlock_wrlock(&rcache->lock); + +retry: + /* Align to page size */ + start = ucs_align_down_pow2((uintptr_t)address, + rcache->params.alignment); + end = ucs_align_up_pow2 ((uintptr_t)address + length, + rcache->params.alignment); + region = NULL; + merged = 0; + + /* Check overlap with existing regions */ + status = UCS_PROFILE_CALL(ucs_rcache_check_overlap, rcache, &start, &end, + &prot, &merged, ®ion); + if (status == UCS_ERR_ALREADY_EXISTS) { + /* Found a matching region (it could have been added after we released + * the lock) + */ + ucs_rcache_region_validate_pfn(rcache, region); + status = region->status; + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_HITS_SLOW, 1); + goto out_set_region; + } else if (status != UCS_OK) { + /* Could not create a region because there are overlapping regions which + * cannot be removed. + */ + goto out_unlock; + } + + /* Allocate structure for new region */ + error = ucs_posix_memalign((void **)®ion, + ucs_max(sizeof(void *), UCS_PGT_ENTRY_MIN_ALIGN), + rcache->params.region_struct_size, + "rcache_region"); + if (error != 0) { + ucs_error("failed to allocate rcache region descriptor: %m"); + status = UCS_ERR_NO_MEMORY; + goto out_unlock; + } + + memset(region, 0, rcache->params.region_struct_size); + + region->super.start = start; + region->super.end = end; + status = UCS_PROFILE_CALL(ucs_pgtable_insert, &rcache->pgtable, ®ion->super); + if (status != UCS_OK) { + ucs_error("failed to insert region " UCS_PGT_REGION_FMT ": %s", + UCS_PGT_REGION_ARG(®ion->super), ucs_status_string(status)); + ucs_free(region); + goto out_unlock; + } + + /* If memory registration failed, keep the region and mark it as invalid, + * to avoid numerous retries of registering the region. + */ + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_REGS, 1); + + region->prot = prot; + region->flags = UCS_RCACHE_REGION_FLAG_PGTABLE; + region->refcount = 1; + region->status = status = + UCS_PROFILE_NAMED_CALL("mem_reg", rcache->params.ops->mem_reg, + rcache->params.context, rcache, arg, region, + merged ? UCS_RCACHE_MEM_REG_HIDE_ERRORS : 0); + if (status != UCS_OK) { + if (merged) { + /* failure may be due to merge, because memory of the merged + * regions has different access permission. + * Retry with original address: there will be no merge because + * all merged regions has been invalidated and registration will + * succeed. + */ + ucs_debug("failed to register merged region " UCS_PGT_REGION_FMT ": %s, retrying", + UCS_PGT_REGION_ARG(®ion->super), ucs_status_string(status)); + ucs_rcache_region_invalidate(rcache, region, 1, 1); + goto retry; + } else { + ucs_debug("failed to register region " UCS_PGT_REGION_FMT ": %s", + UCS_PGT_REGION_ARG(®ion->super), ucs_status_string(status)); + goto out_unlock; + } + } + + region->flags |= UCS_RCACHE_REGION_FLAG_REGISTERED; + region->refcount = 2; /* Page-table + user */ + + if (ucs_global_opts.rcache_check_pfn) { + ucs_rcache_region_pfn(region) = ucs_sys_get_pfn(region->super.start); + } else { + ucs_rcache_region_pfn(region) = 0; + } + + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_MISSES, 1); + + ucs_rcache_region_trace(rcache, region, "created"); + +out_set_region: + *region_p = region; +out_unlock: + pthread_rwlock_unlock(&rcache->lock); + return status; +} + +void ucs_rcache_region_hold(ucs_rcache_t *rcache, ucs_rcache_region_t *region) +{ + ucs_atomic_add32(®ion->refcount, +1); + ucs_rcache_region_trace(rcache, region, "hold"); +} + +ucs_status_t ucs_rcache_get(ucs_rcache_t *rcache, void *address, size_t length, + int prot, void *arg, ucs_rcache_region_t **region_p) +{ + ucs_pgt_addr_t start = (uintptr_t)address; + ucs_pgt_region_t *pgt_region; + ucs_rcache_region_t *region; + + ucs_trace_func("rcache=%s, address=%p, length=%zu", rcache->name, address, + length); + + pthread_rwlock_rdlock(&rcache->lock); + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_GETS, 1); + if (ucs_queue_is_empty(&rcache->inv_q)) { + pgt_region = UCS_PROFILE_CALL(ucs_pgtable_lookup, &rcache->pgtable, + start); + if (ucs_likely(pgt_region != NULL)) { + region = ucs_derived_of(pgt_region, ucs_rcache_region_t); + if (((start + length) <= region->super.end) && + ucs_rcache_region_test(region, prot)) + { + ucs_rcache_region_hold(rcache, region); + ucs_rcache_region_validate_pfn(rcache, region); + *region_p = region; + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_HITS_FAST, 1); + pthread_rwlock_unlock(&rcache->lock); + return UCS_OK; + } + } + } + pthread_rwlock_unlock(&rcache->lock); + + /* Fall back to slow version (with rw lock) in following cases: + * - invalidation list not empty + * - could not find cached region + * - found unregistered region + */ + return UCS_PROFILE_CALL(ucs_rcache_create_region, rcache, address, length, + prot, arg, region_p); +} + +void ucs_rcache_region_put(ucs_rcache_t *rcache, ucs_rcache_region_t *region) +{ + ucs_rcache_region_put_internal(rcache, region, 1, 0); + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_PUTS, 1); +} + +static UCS_CLASS_INIT_FUNC(ucs_rcache_t, const ucs_rcache_params_t *params, + const char *name, ucs_stats_node_t *stats_parent) +{ + ucs_status_t status, spinlock_status; + int ret; + + if (params->region_struct_size < sizeof(ucs_rcache_region_t)) { + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + if (!ucs_is_pow2(params->alignment) || + (params->alignment < UCS_PGT_ADDR_ALIGN) || + (params->alignment > params->max_alignment)) + { + ucs_error("invalid regcache alignment (%zu): must be a power of 2 " + "between %zu and %zu", + params->alignment, UCS_PGT_ADDR_ALIGN, params->max_alignment); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + status = UCS_STATS_NODE_ALLOC(&self->stats, &ucs_rcache_stats_class, + stats_parent); + if (status != UCS_OK) { + goto err; + } + + self->params = *params; + + self->name = strdup(name); + if (self->name == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_destroy_stats; + } + + ret = pthread_rwlock_init(&self->lock, NULL); + if (ret) { + ucs_error("pthread_rwlock_init() failed: %m"); + status = UCS_ERR_INVALID_PARAM; + goto err_free_name; + } + + status = ucs_spinlock_init(&self->inv_lock); + if (status != UCS_OK) { + goto err_destroy_rwlock; + } + + status = ucs_pgtable_init(&self->pgtable, ucs_rcache_pgt_dir_alloc, + ucs_rcache_pgt_dir_release); + if (status != UCS_OK) { + goto err_destroy_inv_q_lock; + } + + status = ucs_mpool_init(&self->inv_mp, 0, sizeof(ucs_rcache_inv_entry_t), 0, + 1, 1024, UINT_MAX, &ucs_rcache_mp_ops, "rcache_inv_mp"); + if (status != UCS_OK) { + goto err_cleanup_pgtable; + } + + ucs_queue_head_init(&self->inv_q); + + status = ucm_set_event_handler(params->ucm_events, params->ucm_event_priority, + ucs_rcache_unmapped_callback, self); + if (status != UCS_OK) { + goto err_destroy_mp; + } + + return UCS_OK; + +err_destroy_mp: + ucs_mpool_cleanup(&self->inv_mp, 1); +err_cleanup_pgtable: + ucs_pgtable_cleanup(&self->pgtable); +err_destroy_inv_q_lock: + spinlock_status = ucs_spinlock_destroy(&self->inv_lock); + if (spinlock_status != UCS_OK) { + ucs_warn("ucs_spinlock_destroy() failed (%d)", spinlock_status); + } +err_destroy_rwlock: + pthread_rwlock_destroy(&self->lock); +err_free_name: + free(self->name); +err_destroy_stats: + UCS_STATS_NODE_FREE(self->stats); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(ucs_rcache_t) +{ + ucs_status_t status; + + ucm_unset_event_handler(self->params.ucm_events, ucs_rcache_unmapped_callback, + self); + ucs_rcache_check_inv_queue(self); + ucs_rcache_purge(self); + + ucs_mpool_cleanup(&self->inv_mp, 1); + ucs_pgtable_cleanup(&self->pgtable); + status = ucs_spinlock_destroy(&self->inv_lock); + if (status != UCS_OK) { + ucs_warn("ucs_spinlock_destroy() failed (%d)", status); + } + pthread_rwlock_destroy(&self->lock); + UCS_STATS_NODE_FREE(self->stats); + free(self->name); +} + +UCS_CLASS_DEFINE(ucs_rcache_t, void); +UCS_CLASS_DEFINE_NAMED_NEW_FUNC(ucs_rcache_create, ucs_rcache_t, ucs_rcache_t, + const ucs_rcache_params_t*, const char *, + ucs_stats_node_t*) +UCS_CLASS_DEFINE_NAMED_DELETE_FUNC(ucs_rcache_destroy, ucs_rcache_t, ucs_rcache_t) diff --git a/src/ucs/memory/rcache.h b/src/ucs/memory/rcache.h new file mode 100644 index 0000000..d9db909 --- /dev/null +++ b/src/ucs/memory/rcache.h @@ -0,0 +1,192 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_REG_CACHE_H_ +#define UCS_REG_CACHE_H_ + +/* + * Memory registration cache - holds registered memory regions, takes care of + * memory invalidation (if it's unmapped), merging of regions, protection flags. + * This data structure is thread safe. + */ +#include +#include +#include +#include +#include +#include + + +#define UCS_RCACHE_PROT_FMT "%c%c" +#define UCS_RCACHE_PROT_ARG(_prot) \ + ((_prot) & PROT_READ) ? 'r' : '-', \ + ((_prot) & PROT_WRITE) ? 'w' : '-' + + +typedef struct ucs_rcache ucs_rcache_t; +typedef struct ucs_rcache_ops ucs_rcache_ops_t; +typedef struct ucs_rcache_params ucs_rcache_params_t; +typedef struct ucs_rcache_region ucs_rcache_region_t; + +/* + * Memory region flags. + */ +enum { + UCS_RCACHE_REGION_FLAG_REGISTERED = UCS_BIT(0), /**< Memory registered */ + UCS_RCACHE_REGION_FLAG_PGTABLE = UCS_BIT(1) /**< In the page table */ +}; + +/* + * Memory registration flags. + */ +enum { + UCS_RCACHE_MEM_REG_HIDE_ERRORS = UCS_BIT(0) /**< Hide errors on memory registration */ +}; + +/* + * Registration cache operations. + */ +struct ucs_rcache_ops { + /** + * Register a memory region. + * + * @param [in] context User context, as passed to @ref ucs_rcache_create + * @param [in] rcache Pointer to the registration cache. + * @param [in] arg Custom argument passed to @ref ucs_rcache_get(). + * @param [in] region Memory region to register. This may point to a larger + * user-defined structure, as specified by the field + * `region_struct_size' in @ref ucs_rcache_params. + * This function may store relevant information (such + * as memory keys) inside the larger structure. + * @param [in] flags Memory registration flags. + * + * @return UCS_OK if registration is successful, error otherwise. + * + * @note This function should be able to handle inaccessible memory addresses + * and return error status in this case, without any destructive consequences + * such as error messages or fatal failure. + */ + ucs_status_t (*mem_reg)(void *context, ucs_rcache_t *rcache, + void *arg, ucs_rcache_region_t *region, + uint16_t flags); + /** + * Deregister a memory region. + * + * @param [in] context User context, as passed to @ref ucs_rcache_create + * @param [in] rcache Pointer to the registration cache. + * @param [in] region Memory region to deregister. + */ + void (*mem_dereg)(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *region); + + /** + * Dump memory region information to a string buffer. + * (Only the user-defined part of the memory regoin should be dumped) + * + * @param [in] context User context, as passed to @ref ucs_rcache_create + * @param [in] rcache Pointer to the registration cache. + * @param [in] region Memory region to dump. + * @param [in] buf String buffer to dump to. + * @param [in] max Maximal length of the string buffer. + */ + void (*dump_region)(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *region, + char *buf, size_t max); +}; + + +struct ucs_rcache_params { + size_t region_struct_size; /**< Size of memory region structure, + must be at least the size + of @ref ucs_rcache_region_t */ + size_t alignment; /**< Force-align regions to this size. + Must be smaller or equal to + system page size. */ + size_t max_alignment; /**< Maximum alignment */ + int ucm_events; /**< UCM events to register. Currently + UCM_EVENT_VM_UNMAPPED and + UCM_EVENT_MEM_TYPE_FREE are supported */ + int ucm_event_priority; /**< Priority of memory events */ + const ucs_rcache_ops_t *ops; /**< Memory operations functions */ + void *context; /**< User-defined context that will + be passed to mem_reg/mem_dereg */ +}; + + +struct ucs_rcache_region { + ucs_pgt_region_t super; /**< Base class - page table region */ + ucs_list_link_t list; /**< List element */ + volatile uint32_t refcount; /**< Reference count, including +1 if it's + in the page table */ + ucs_status_t status; /**< Current status code */ + uint8_t prot; /**< Protection bits */ + uint16_t flags; /**< Status flags. Protected by page table lock. */ + uint64_t priv; /**< Used internally */ +}; + + +/** + * Create a memory registration cache. + * + * @param [in] params Registration cache parameters. + * @param [in] name Registration cache name, for debugging. + * @param [in] stats_parent Pointer to statistics parent node. + * @param [out] rcache_p Filled with a pointer to the registration cache. + */ +ucs_status_t ucs_rcache_create(const ucs_rcache_params_t *params, const char *name, + ucs_stats_node_t *stats_parent, ucs_rcache_t **rcache_p); + + +/** + * Destroy a memory registration cache. + * + * @param [in] rcache Registration cache to destroy. + */ +void ucs_rcache_destroy(ucs_rcache_t *rcache); + + +/** + * Resolve buffer in the registration cache, or register it if not found. + * TODO register after N usages. + * + * @param [in] rcache Memory registration cache. + * @param [in] address Address to register or resolve. + * @param [in] length Length of buffer to register or resolve. + * @param [in] prot Requested access flags, PROT_xx (same as passed to mmap). + * @param [in] arg Custom argument passed down to memory registration + * callback, if a memory registration happens during + * this call. + * @param [out] region_p On success, filled with a pointer to the memory + * region. The user could put more data in the region + * structure in mem_reg() function. + * + * On success succeeds, the memory region reference count is incremented by 1. + * + * @return Error code. + */ +ucs_status_t ucs_rcache_get(ucs_rcache_t *rcache, void *address, size_t length, + int prot, void *arg, ucs_rcache_region_t **region_p); + + +/** + * Increment memory region reference count. + * + * @param [in] rcache Memory registration cache. + * @param [in] region Memory region whose reference count to increment. + */ +void ucs_rcache_region_hold(ucs_rcache_t *rcache, ucs_rcache_region_t *region); + + +/** + * Decrement memory region reference count and possibly destroy it. + * + * @param [in] rcache Memory registration cache. + * @param [in] region Memory region to release. + */ +void ucs_rcache_region_put(ucs_rcache_t *rcache, ucs_rcache_region_t *region); + + +#endif diff --git a/src/ucs/memory/rcache_int.h b/src/ucs/memory/rcache_int.h new file mode 100644 index 0000000..68f0668 --- /dev/null +++ b/src/ucs/memory/rcache_int.h @@ -0,0 +1,49 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_REG_CACHE_INT_H_ +#define UCS_REG_CACHE_INT_H_ + +#include + +/* Names of rcache stats counters */ +enum { + UCS_RCACHE_GETS, /* number of get operations */ + UCS_RCACHE_HITS_FAST, /* number of fast path hits */ + UCS_RCACHE_HITS_SLOW, /* number of slow path hits */ + UCS_RCACHE_MISSES, /* number of misses */ + UCS_RCACHE_MERGES, /* number of region merges */ + UCS_RCACHE_UNMAPS, /* number of memory unmap events */ + UCS_RCACHE_UNMAP_INVALIDATES, /* number of regions invalidated because + of unmap events */ + UCS_RCACHE_PUTS, /* number of put operations */ + UCS_RCACHE_REGS, /* number of memory registrations */ + UCS_RCACHE_DEREGS, /* number of memory deregistrations */ + UCS_RCACHE_STAT_LAST +}; + + +struct ucs_rcache { + ucs_rcache_params_t params; /**< rcache parameters (immutable) */ + pthread_rwlock_t lock; /**< Protects the page table and all regions + whose refcount is 0 */ + ucs_pgtable_t pgtable; /**< page table to hold the regions */ + + ucs_spinlock_t inv_lock; /**< Lock for inv_q and inv_mp. This is a + separate lock because we may want to put + regions on inv_q while the page table + lock is held by the calling context */ + ucs_queue_head_t inv_q; /**< Regions which were invalidated during + memory events */ + ucs_mpool_t inv_mp; /**< Memory pool to allocate entries for inv_q, + since we cannot use regulat malloc(). + The backing storage is original mmap() + which does not generate memory events */ + char *name; + UCS_STATS_NODE_DECLARE(stats) +}; + +#endif diff --git a/src/ucs/profile/profile.c b/src/ucs/profile/profile.c new file mode 100644 index 0000000..aa033b3 --- /dev/null +++ b/src/ucs/profile/profile.c @@ -0,0 +1,609 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "profile.h" + +#include +#include +#include +#include +#include +#include +#include + + +typedef struct ucs_profile_global_location { + ucs_profile_location_t super; /*< Location info */ + volatile int *loc_id_p; /*< Back-pointer to location index */ +} ucs_profile_global_location_t; + + +/** + * Profiling global context + */ +typedef struct ucs_profile_global_context { + ucs_profile_global_location_t *locations; /**< Array of all locations */ + unsigned num_locations; /**< Number of valid locations */ + unsigned max_locations; /**< Size of locations array */ + pthread_mutex_t mutex; /**< Protects updating the locations array */ + pthread_key_t tls_key; /**< TLS key for per-thread context */ + ucs_list_link_t thread_list; /**< List of all thread contexts */ +} ucs_profile_global_context_t; + + +/* Profiling per-thread context */ +typedef struct ucs_profile_thread_context { + pthread_t pthread_id; /**< POSIX thread id */ + int tid; /**< System thread id */ + ucs_time_t start_time; /**< Thread context init time */ + ucs_time_t end_time; /**< Thread end time */ + ucs_list_link_t list; /**< Entry in thread list */ + int is_completed; /**< Set to 1 when thread exits */ + + struct { + ucs_profile_record_t *start; /**< Circular log buffer start */ + ucs_profile_record_t *end; /**< Circular log buffer end */ + ucs_profile_record_t *current; /**< Current log pointer */ + int wraparound; /**< Whether log was rotated */ + } log; + + struct { + unsigned num_locations; /**< Number of valid locations */ + ucs_profile_thread_location_t *locations; /**< Statistics per location */ + int stack_top; /**< Index of stack top */ + ucs_time_t stack[UCS_PROFILE_STACK_MAX]; /**< Timestamps for each nested scope */ + } accum; +} ucs_profile_thread_context_t; + + +#define ucs_profile_for_each_location(_var) \ + for ((_var) = ucs_profile_global_ctx.locations; \ + (_var) < (ucs_profile_global_ctx.locations + \ + ucs_profile_global_ctx.num_locations); \ + ++(_var)) + + +const char *ucs_profile_mode_names[] = { + [UCS_PROFILE_MODE_ACCUM] = "accum", + [UCS_PROFILE_MODE_LOG] = "log", + [UCS_PROFILE_MODE_LAST] = NULL +}; + +static ucs_profile_global_context_t ucs_profile_global_ctx = { + .locations = NULL, + .num_locations = 0, + .max_locations = 0, + .mutex = PTHREAD_MUTEX_INITIALIZER, + .thread_list = UCS_LIST_INITIALIZER(&ucs_profile_global_ctx.thread_list, + &ucs_profile_global_ctx.thread_list), +}; + +static ucs_status_t ucs_profile_file_write_data(int fd, void *data, size_t size) +{ + ssize_t written; + + if (size > 0) { + written = write(fd, data, size); + if (written < 0) { + ucs_error("failed to write %zu bytes to profiling file: %m", size); + return UCS_ERR_IO_ERROR; + } else if (size != written) { + ucs_error("wrote only %zd of %zu bytes to profiling file: %m", + written, size); + return UCS_ERR_IO_ERROR; + } + } + + return UCS_OK; +} + +static ucs_status_t +ucs_profile_file_write_records(int fd, ucs_profile_record_t *begin, + ucs_profile_record_t *end) +{ + return ucs_profile_file_write_data(fd, begin, UCS_PTR_BYTE_DIFF(begin, end)); +} + +/* Global lock must be held */ +static ucs_status_t +ucs_profile_file_write_thread(int fd, ucs_profile_thread_context_t *ctx, + ucs_time_t default_end_time) +{ + ucs_profile_thread_location_t empty_location = { .total_time = 0, .count = 0 }; + ucs_profile_thread_header_t thread_hdr; + unsigned i, num_locations; + ucs_status_t status; + + /* + * NOTE: There is no protection against a race with a thread which is still + * producing profiling data (e.g updating the context structure without a + * lock). + * To avoid excess locking on fast-path, we assume that when we dump the + * profiling data (at program exit), the profiled threads are not calling + * ucs_profile_record() anymore. + */ + + ucs_debug("profiling context %p: write to file", ctx); + + /* write thread header */ + thread_hdr.tid = ctx->tid; + thread_hdr.start_time = ctx->start_time; + if (ctx->is_completed) { + thread_hdr.end_time = ctx->end_time; + } else { + thread_hdr.end_time = default_end_time; + } + + if (ucs_global_opts.profile_mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) { + thread_hdr.num_records = ctx->log.wraparound ? + (ctx->log.end - ctx->log.start) : + (ctx->log.current - ctx->log.start); + } else { + thread_hdr.num_records = 0; + } + + status = ucs_profile_file_write_data(fd, &thread_hdr, sizeof(thread_hdr)); + if (status != UCS_OK) { + return status; + } + + /* If accumulate mode is not enabled, there are no location entries */ + if (ucs_global_opts.profile_mode & UCS_BIT(UCS_PROFILE_MODE_ACCUM)) { + num_locations = ctx->accum.num_locations; + } else { + num_locations = 0; + } + + /* write profiling information for every location + * note: the thread location array may be smaller (or even empty) than the + * global list, but it cannot be larger. If it's smaller, we pad with empty + * entries + */ + ucs_assert_always(num_locations <= ucs_profile_global_ctx.num_locations); + ucs_profile_file_write_data(fd, ctx->accum.locations, + num_locations * sizeof(*ctx->accum.locations)); + for (i = num_locations; i < ucs_profile_global_ctx.num_locations; ++i) { + status = ucs_profile_file_write_data(fd, &empty_location, + sizeof(empty_location)); + if (status != UCS_OK) { + return status; + } + } + + /* write profiling records */ + if (ucs_global_opts.profile_mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) { + if (ctx->log.wraparound) { + status = ucs_profile_file_write_records(fd, ctx->log.current, + ctx->log.end); + if (status != UCS_OK) { + return status; + } + } + + status = ucs_profile_file_write_records(fd, ctx->log.start, + ctx->log.current); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +static ucs_status_t ucs_profile_write_locations(int fd) +{ + ucs_profile_global_location_t *loc; + ucs_status_t status; + + ucs_profile_for_each_location(loc) { + status = ucs_profile_file_write_data(fd, &loc->super, sizeof(loc->super)); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +static void ucs_profile_write() +{ + ucs_profile_thread_context_t *ctx; + ucs_profile_header_t header; + char fullpath[1024] = {0}; + char filename[1024] = {0}; + ucs_time_t write_time; + ucs_status_t status; + int fd; + + if (!ucs_global_opts.profile_mode) { + return; + } + + pthread_mutex_lock(&ucs_profile_global_ctx.mutex); + + write_time = ucs_get_time(); + + ucs_fill_filename_template(ucs_global_opts.profile_file, + filename, sizeof(filename)); + ucs_expand_path(filename, fullpath, sizeof(fullpath) - 1); + + fd = open(fullpath, O_WRONLY|O_CREAT|O_TRUNC, 0600); + if (fd < 0) { + ucs_error("failed to write profiling data to '%s': %m", fullpath); + goto out_unlock; + } + + /* write header */ + memset(&header, 0, sizeof(header)); + ucs_read_file(header.cmdline, sizeof(header.cmdline), 1, "/proc/self/cmdline"); + strncpy(header.hostname, ucs_get_host_name(), sizeof(header.hostname) - 1); + header.version = UCS_PROFILE_FILE_VERSION; + strncpy(header.ucs_path, ucs_debug_get_lib_path(), sizeof(header.ucs_path) - 1); + header.pid = getpid(); + header.mode = ucs_global_opts.profile_mode; + header.num_locations = ucs_profile_global_ctx.num_locations; + header.num_threads = ucs_list_length(&ucs_profile_global_ctx.thread_list); + header.one_second = ucs_time_from_sec(1.0); + ucs_profile_file_write_data(fd, &header, sizeof(header)); + + /* write locations */ + status = ucs_profile_write_locations(fd); + if (status != UCS_OK) { + goto out_close_fd; + } + + /* write threads */ + ucs_list_for_each(ctx, &ucs_profile_global_ctx.thread_list, list) { + status = ucs_profile_file_write_thread(fd, ctx, write_time); + if (status != UCS_OK) { + goto out_close_fd; + } + } + +out_close_fd: + close(fd); +out_unlock: + pthread_mutex_unlock(&ucs_profile_global_ctx.mutex); +} + +static UCS_F_NOINLINE +ucs_profile_thread_context_t* ucs_profile_thread_init() +{ + ucs_profile_thread_context_t *ctx; + size_t num_records; + + ucs_assert(ucs_global_opts.profile_mode); + + ctx = ucs_malloc(sizeof(*ctx), "profile_thread_context"); + if (ctx == NULL) { + ucs_error("failed to allocate profiling thread context"); + return NULL; + } + + ctx->tid = ucs_get_tid(); + ctx->start_time = ucs_get_time(); + ctx->end_time = 0; + ctx->pthread_id = pthread_self(); + + ucs_debug("profiling context %p: start on thread 0x%lx tid %d mode %d", + ctx, (unsigned long)pthread_self(), ucs_get_tid(), + ucs_global_opts.profile_mode); + + /* Initialize log mode */ + if (ucs_global_opts.profile_mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) { + num_records = ucs_global_opts.profile_log_size / + sizeof(ucs_profile_record_t); + ctx->log.start = ucs_calloc(num_records, sizeof(ucs_profile_record_t), + "profile_log"); + if (ctx->log.start == NULL) { + ucs_fatal("failed to allocate profiling log"); + } + + ctx->log.end = ctx->log.start + num_records; + ctx->log.current = ctx->log.start; + ctx->log.wraparound = 0; + } + + /* Initialize accumulate mode */ + if (ucs_global_opts.profile_mode & UCS_BIT(UCS_PROFILE_MODE_ACCUM)) { + ctx->accum.num_locations = 0; + ctx->accum.locations = NULL; + ctx->accum.stack_top = -1; + } + + pthread_setspecific(ucs_profile_global_ctx.tls_key, ctx); + + pthread_mutex_lock(&ucs_profile_global_ctx.mutex); + ucs_list_add_tail(&ucs_profile_global_ctx.thread_list, &ctx->list); + pthread_mutex_unlock(&ucs_profile_global_ctx.mutex); + + return ctx; +} + +static void ucs_profile_thread_cleanup(ucs_profile_thread_context_t *ctx) +{ + ucs_debug("profiling context %p: cleanup", ctx); + + if (ucs_global_opts.profile_mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) { + ucs_free(ctx->log.start); + } + + if (ucs_global_opts.profile_mode & UCS_BIT(UCS_PROFILE_MODE_ACCUM)) { + ucs_free(ctx->accum.locations); + } + + ucs_list_del(&ctx->list); + ucs_free(ctx); +} + +static void ucs_profile_thread_finalize(ucs_profile_thread_context_t *ctx) +{ + ucs_debug("profiling context %p: completed", ctx); + + ctx->end_time = ucs_get_time(); + ctx->is_completed = 1; +} + +static void ucs_profile_thread_key_destr(void *data) +{ + ucs_profile_thread_context_t *ctx = data; + ucs_profile_thread_finalize(ctx); +} + +/* + * Register a profiling location - should be called once per location in the + * code, before the first record of each such location is made. + * SHOULD NOT be used directly - use UCS_PROFILE macros instead. + * + * @param [in] type Location type. + * @param [in] file Source file name. + * @param [in] line Source line number. + * @param [in] function Calling function name. + * @param [in] name Location name. + * @param [out] loc_id_p Filled with location ID: + * 0 - profiling is disabled + * >0 - location index + 1 + */ +static UCS_F_NOINLINE +int ucs_profile_get_location(ucs_profile_type_t type, const char *name, + const char *file, int line, const char *function, + volatile int *loc_id_p) +{ + ucs_profile_global_location_t *loc, *new_locations; + int loc_id; + + pthread_mutex_lock(&ucs_profile_global_ctx.mutex); + + /* Check, with lock held, that the location is not already initialized */ + if (*loc_id_p >= 0) { + loc_id = *loc_id_p; + goto out_unlock; + } + + /* Check if profiling is disabled */ + if (!ucs_global_opts.profile_mode) { + *loc_id_p = loc_id = 0; + goto out_unlock; + } + + /* Location ID must be uninitialized */ + ucs_assert(*loc_id_p == -1); + + ucs_profile_for_each_location(loc) { + if ((type == loc->super.type) && (line == loc->super.line) && + !strcmp(loc->super.name, name) && + !strcmp(loc->super.file, basename(file)) && + !strcmp(loc->super.function, function)) { + goto out_found; + } + } + + ++ucs_profile_global_ctx.num_locations; + + /* Reallocate array if needed */ + if (ucs_profile_global_ctx.num_locations > ucs_profile_global_ctx.max_locations) { + ucs_profile_global_ctx.max_locations = + 2 * ucs_profile_global_ctx.num_locations; + new_locations = ucs_realloc(ucs_profile_global_ctx.locations, + sizeof(*ucs_profile_global_ctx.locations) * + ucs_profile_global_ctx.max_locations, + "profile_locations"); + if (new_locations == NULL) { + ucs_warn("failed to expand locations array"); + *loc_id_p = loc_id = 0; + goto out_unlock; + } + + ucs_profile_global_ctx.locations = new_locations; + } + + /* Initialize new location */ + loc = &ucs_profile_global_ctx.locations[ucs_profile_global_ctx.num_locations - 1]; + ucs_strncpy_zero(loc->super.file, basename(file), sizeof(loc->super.file)); + ucs_strncpy_zero(loc->super.function, function, sizeof(loc->super.function)); + ucs_strncpy_zero(loc->super.name, name, sizeof(loc->super.name)); + loc->super.line = line; + loc->super.type = type; + loc->loc_id_p = loc_id_p; + +out_found: + *loc_id_p = loc_id = (loc - ucs_profile_global_ctx.locations) + 1; + ucs_memory_cpu_store_fence(); +out_unlock: + pthread_mutex_unlock(&ucs_profile_global_ctx.mutex); + return loc_id; +} + +static void ucs_profile_thread_expand_locations(int loc_id) +{ + ucs_profile_thread_context_t *ctx; + unsigned i, new_num_locations; + + ctx = pthread_getspecific(ucs_profile_global_ctx.tls_key); + ucs_assert(ctx != NULL); + + new_num_locations = ucs_max(loc_id, ctx->accum.num_locations); + ctx->accum.locations = ucs_realloc(ctx->accum.locations, + sizeof(*ctx->accum.locations) * + new_num_locations, + "profile_thread_locations"); + if (ctx->accum.locations == NULL) { + ucs_fatal("failed to allocate profiling per-thread locations"); + } + + for (i = ctx->accum.num_locations; i < new_num_locations; ++i) { + ctx->accum.locations[i].count = 0; + ctx->accum.locations[i].total_time = 0; + } + + ctx->accum.num_locations = new_num_locations; +} + +void ucs_profile_record(ucs_profile_type_t type, const char *name, + uint32_t param32, uint64_t param64, const char *file, + int line, const char *function, volatile int *loc_id_p) +{ + ucs_profile_thread_location_t *loc; + ucs_profile_thread_context_t *ctx; + ucs_profile_record_t *rec; + ucs_time_t current_time; + int loc_id; + + /* If the location id is -1 or 0, need to re-read it with lock held */ + loc_id = *loc_id_p; + if (ucs_unlikely(loc_id <= 0)) { + loc_id = ucs_profile_get_location(type, name, file, line, function, + loc_id_p); + if (loc_id == 0) { + return; + } + } + + ucs_memory_cpu_load_fence(); + + ucs_assert(*loc_id_p != 0); + ucs_assert(ucs_global_opts.profile_mode != 0); + + /* Get thread-specific profiling context */ + ctx = pthread_getspecific(ucs_profile_global_ctx.tls_key); + if (ucs_unlikely(ctx == NULL)) { + ctx = ucs_profile_thread_init(); + } + + current_time = ucs_get_time(); + if (ucs_global_opts.profile_mode & UCS_BIT(UCS_PROFILE_MODE_ACCUM)) { + if (ucs_unlikely(loc_id > ctx->accum.num_locations)) { + /* expand the locations array of the current thread */ + ucs_profile_thread_expand_locations(loc_id); + } + ucs_assert(loc_id - 1 < ctx->accum.num_locations); + + loc = &ctx->accum.locations[loc_id - 1]; + switch (type) { + case UCS_PROFILE_TYPE_SCOPE_BEGIN: + ctx->accum.stack[++ctx->accum.stack_top] = current_time; + break; + case UCS_PROFILE_TYPE_SCOPE_END: + loc->total_time += current_time - ctx->accum.stack[ctx->accum.stack_top]; + --ctx->accum.stack_top; + break; + default: + break; + } + ++loc->count; + } + + if (ucs_global_opts.profile_mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) { + rec = ctx->log.current; + rec->timestamp = current_time; + rec->param64 = param64; + rec->param32 = param32; + rec->location = loc_id - 1; + if (++ctx->log.current >= ctx->log.end) { + ctx->log.current = ctx->log.start; + ctx->log.wraparound = 1; + } + } +} + +static void ucs_profile_check_active_threads() +{ + size_t num_active_threads; + + pthread_mutex_lock(&ucs_profile_global_ctx.mutex); + num_active_threads = ucs_list_length(&ucs_profile_global_ctx.thread_list); + pthread_mutex_unlock(&ucs_profile_global_ctx.mutex); + + if (num_active_threads > 0) { + ucs_warn("%zd profiled threads are still running", num_active_threads); + } +} + +void ucs_profile_reset_locations() +{ + ucs_profile_global_location_t *loc; + + pthread_mutex_lock(&ucs_profile_global_ctx.mutex); + + ucs_profile_for_each_location(loc) { + *loc->loc_id_p = -1; + } + + ucs_profile_global_ctx.num_locations = 0; + ucs_profile_global_ctx.max_locations = 0; + ucs_free(ucs_profile_global_ctx.locations); + ucs_profile_global_ctx.locations = NULL; + + pthread_mutex_unlock(&ucs_profile_global_ctx.mutex); +} + +static void ucs_profile_cleanup_completed_threads() +{ + ucs_profile_thread_context_t *ctx, *tmp; + + pthread_mutex_lock(&ucs_profile_global_ctx.mutex); + ucs_list_for_each_safe(ctx, tmp, &ucs_profile_global_ctx.thread_list, + list) { + if (ctx->is_completed) { + ucs_profile_thread_cleanup(ctx); + } + } + pthread_mutex_unlock(&ucs_profile_global_ctx.mutex); +} + +void ucs_profile_dump() +{ + ucs_profile_thread_context_t *ctx; + + /* finalize profiling on current thread */ + ctx = pthread_getspecific(ucs_profile_global_ctx.tls_key); + if (ctx) { + ucs_profile_thread_finalize(ctx); + pthread_setspecific(ucs_profile_global_ctx.tls_key, NULL); + } + + /* write and cleanup all completed threads (including the current thread) */ + ucs_profile_write(); + ucs_profile_cleanup_completed_threads(); +} + +void ucs_profile_global_init() +{ + if (ucs_global_opts.profile_mode && !strlen(ucs_global_opts.profile_file)) { + // TODO make sure profiling file is writeable + ucs_warn("profiling file not specified"); + } + + pthread_key_create(&ucs_profile_global_ctx.tls_key, + ucs_profile_thread_key_destr); +} + +void ucs_profile_global_cleanup() +{ + ucs_profile_dump(); + ucs_profile_check_active_threads(); + pthread_key_delete(ucs_profile_global_ctx.tls_key); +} diff --git a/src/ucs/profile/profile.h b/src/ucs/profile/profile.h new file mode 100644 index 0000000..e8e7fe8 --- /dev/null +++ b/src/ucs/profile/profile.h @@ -0,0 +1,20 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_PROFILE_H_ +#define UCS_PROFILE_H_ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#if HAVE_PROFILING +# include "profile_on.h" +#else +# include "profile_off.h" +#endif + +#endif diff --git a/src/ucs/profile/profile_defs.h b/src/ucs/profile/profile_defs.h new file mode 100644 index 0000000..e315839 --- /dev/null +++ b/src/ucs/profile/profile_defs.h @@ -0,0 +1,142 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_PROFILE_DEFS_H_ +#define UCS_PROFILE_DEFS_H_ + +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file profile_defs.h */ + +#define UCS_PROFILE_STACK_MAX 64 +#define UCS_PROFILE_FILE_VERSION 2u + + +/** + * Profiling modes + */ +enum { + UCS_PROFILE_MODE_ACCUM, /**< Accumulate elapsed time per location */ + UCS_PROFILE_MODE_LOG, /**< Record all events */ + UCS_PROFILE_MODE_LAST +}; + + +/** + * Profiling location type + */ +typedef enum { + UCS_PROFILE_TYPE_SAMPLE, /**< Sample only */ + UCS_PROFILE_TYPE_SCOPE_BEGIN, /**< Begin a scope */ + UCS_PROFILE_TYPE_SCOPE_END, /**< End a scope */ + UCS_PROFILE_TYPE_REQUEST_NEW, /**< New asynchronous request */ + UCS_PROFILE_TYPE_REQUEST_EVENT, /**< Some progress is made on a request */ + UCS_PROFILE_TYPE_REQUEST_FREE, /**< Asynchronous request released */ + UCS_PROFILE_TYPE_LAST +} ucs_profile_type_t; + + +/* + * Profile file structure: + * + * < ucs_profile_header_t > + * < ucs_profile_location_t > * ucs_profile_header_t::num_locaitons + * [ + * < ucs_profile_thread_header_t > + * < ucs_profile_thread_location_t > * ucs_profile_header_t::num_locaitons + * < ucs_profile_record_t > * ucs_profile_thread_header_t::num_records + * + * ] * ucs_profile_thread_header_t::num_threads + */ + + +/** + * Profile output file header + */ +typedef struct ucs_profile_header { + uint32_t version; /**< File format version */ + char ucs_path[1024];/**< UCX library path*/ + char cmdline[1024]; /**< Command line */ + char hostname[64]; /**< Host name */ + uint32_t pid; /**< Process ID */ + uint32_t mode; /**< Bitmask of profiling modes */ + uint32_t num_locations; /**< Number of locations in the file */ + uint32_t num_threads; /**< Number of threads in the file */ + uint64_t one_second; /**< How much time is one second on the sampled machine */ +} UCS_S_PACKED ucs_profile_header_t; + + +/** + * Profile location record + */ +typedef struct ucs_profile_location { + char file[64]; /**< Source file name */ + char function[64]; /**< Function name */ + char name[32]; /**< User-provided name */ + int line; /**< Source line number */ + uint8_t type; /**< From ucs_profile_type_t */ +} UCS_S_PACKED ucs_profile_location_t; + + +/** + * Profile output file thread header + */ +typedef struct ucs_profile_thread_header { + uint32_t tid; /**< System thread id */ + uint64_t start_time; /**< Time of thread start */ + uint64_t end_time; /**< Time of thread exit */ + uint64_t num_records; /**< Number of records for the thread */ +} UCS_S_PACKED ucs_profile_thread_header_t; + + +/** + * Profile thread location with samples + */ +typedef struct ucs_profile_thread_location { + uint64_t total_time; /**< Total interval from previous location */ + size_t count; /**< Number of times we've hit this location */ +} UCS_S_PACKED ucs_profile_thread_location_t; + + +/** + * Profile output file sample record + */ +typedef struct ucs_profile_record { + uint64_t timestamp; /**< Record timestamp */ + uint64_t param64; /**< Custom 64-bit parameter */ + uint32_t param32; /**< Custom 32-bit parameter */ + uint32_t location; /**< Location identifier */ +} UCS_S_PACKED ucs_profile_record_t; + + +extern const char *ucs_profile_mode_names[]; + + +/** + * Initialize profiling system. + */ +void ucs_profile_global_init(); + + +/** + * Save and cleanup profiling. + */ +void ucs_profile_global_cleanup(); + + +/** + * Save and reset profiling. + */ +void ucs_profile_dump(); + +END_C_DECLS + +#endif diff --git a/src/ucs/profile/profile_off.h b/src/ucs/profile/profile_off.h new file mode 100644 index 0000000..06df5a1 --- /dev/null +++ b/src/ucs/profile/profile_off.h @@ -0,0 +1,31 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_PROFILE_OFF_H_ +#define UCS_PROFILE_OFF_H_ + +#include "profile_defs.h" + +#include + + +#define UCS_PROFILE(...) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_SAMPLE(_name) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_SCOPE_BEGIN() UCS_EMPTY_STATEMENT +#define UCS_PROFILE_SCOPE_END(_name) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_CODE(_name) +#define UCS_PROFILE_FUNC(_ret_type, _name, _arglist, ...) _ret_type _name(__VA_ARGS__) +#define UCS_PROFILE_FUNC_VOID(_name, _arglist, ...) void _name(__VA_ARGS__) +#define UCS_PROFILE_NAMED_CALL(_name, _func, ...) _func(__VA_ARGS__) +#define UCS_PROFILE_CALL(_func, ...) _func(__VA_ARGS__) +#define UCS_PROFILE_NAMED_CALL_VOID(_name, _func, ...) _func(__VA_ARGS__) +#define UCS_PROFILE_CALL_VOID(_func, ...) _func(__VA_ARGS__) +#define UCS_PROFILE_REQUEST_NEW(...) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_REQUEST_EVENT(...) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_REQUEST_EVENT_CHECK_STATUS(...) UCS_EMPTY_STATEMENT +#define UCS_PROFILE_REQUEST_FREE(...) UCS_EMPTY_STATEMENT + +#endif diff --git a/src/ucs/profile/profile_on.h b/src/ucs/profile/profile_on.h new file mode 100644 index 0000000..d17a14a --- /dev/null +++ b/src/ucs/profile/profile_on.h @@ -0,0 +1,286 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_PROFILE_ON_H_ +#define UCS_PROFILE_ON_H_ + +#include "profile_defs.h" + +#include +#include +#include + + +BEGIN_C_DECLS + +/** @file profile_on.h */ + +/* Helper macro */ +#define _UCS_PROFILE_RECORD(_type, _name, _param64, _param32, _loc_id_p) \ + { \ + if (*(_loc_id_p) != 0) { \ + ucs_profile_record((_type), (_name), (_param64), (_param32), \ + __FILE__, __LINE__, __FUNCTION__, (_loc_id_p)); \ + } \ + } + + +/* Helper macro */ +#define __UCS_PROFILE_CODE(_name, _loop_var) \ + int _loop_var ; \ + for (({ UCS_PROFILE_SCOPE_BEGIN(); _loop_var = 1;}); \ + _loop_var; \ + ({ UCS_PROFILE_SCOPE_END(_name); _loop_var = 0;})) + + +/* Helper macro */ +#define _UCS_PROFILE_CODE(_name, _var_suffix) \ + __UCS_PROFILE_CODE(_name, UCS_PP_TOKENPASTE(loop, _var_suffix)) + + +/** + * Record a profiling event. + * + * @param _type Event type. + * @param _name Event name. + * @param _param32 Custom 32-bit parameter. + * @param _param64 Custom 64-bit parameter. + */ +#define UCS_PROFILE(_type, _name, _param32, _param64) \ + { \ + static int loc_id = -1; \ + _UCS_PROFILE_RECORD((_type), (_name), (_param32), (_param64), &loc_id); \ + } + + +/** + * Record a profiling sample event. + * + * @param _name Event name. + */ +#define UCS_PROFILE_SAMPLE(_name) \ + UCS_PROFILE(UCS_PROFILE_TYPE_SAMPLE, (_name), 0, 0) + + +/** + * Record a scope-begin profiling event. + */ +#define UCS_PROFILE_SCOPE_BEGIN() \ + { \ + UCS_PROFILE(UCS_PROFILE_TYPE_SCOPE_BEGIN, "", 0, 0); \ + ucs_compiler_fence(); \ + } + + +/** + * Record a scope-end profiling event. + * + * @param _name Scope name. + */ +#define UCS_PROFILE_SCOPE_END(_name) \ + { \ + ucs_compiler_fence(); \ + UCS_PROFILE(UCS_PROFILE_TYPE_SCOPE_END, _name, 0, 0); \ + } + + +/** + * Declare a profiled scope of code. + * + * Usage: + * UCS_PROFILE_CODE() { + * + * } + * + * @param _name Scope name. + */ +#define UCS_PROFILE_CODE(_name) \ + _UCS_PROFILE_CODE(_name, UCS_PP_UNIQUE_ID) + + +/** + * Create a profiled function. + * + * Usage: + * UCS_PROFILE_FUNC(, , (a, b), int a, char b) + * + * @param _ret_type Function return type. + * @param _name Function name. + * @param _arglist List of argument *names* only. + * @param ... Argument declarations (with types). + */ +#define UCS_PROFILE_FUNC(_ret_type, _name, _arglist, ...) \ + static UCS_F_ALWAYS_INLINE _ret_type _name##_inner(__VA_ARGS__); \ + \ + _ret_type _name(__VA_ARGS__) { \ + UCS_PROFILE_SCOPE_BEGIN(); \ + _ret_type _ret = _name##_inner _arglist; \ + UCS_PROFILE_SCOPE_END(#_name); \ + return _ret; \ + } \ + static UCS_F_ALWAYS_INLINE _ret_type _name##_inner(__VA_ARGS__) + + +/** + * Create a profiled function whose return type is void. + * + * Usage: + * UCS_PROFILE_FUNC_VOID(, (a, b), int a, char b) + * + * @param _name Function name. + * @param _arglist List of argument *names* only. + * @param ... Argument declarations (with types). + */ +#define UCS_PROFILE_FUNC_VOID(_name, _arglist, ...) \ + static UCS_F_ALWAYS_INLINE void _name##_inner(__VA_ARGS__); \ + \ + void _name(__VA_ARGS__) { \ + UCS_PROFILE_SCOPE_BEGIN(); \ + _name##_inner _arglist; \ + UCS_PROFILE_SCOPE_END(#_name); \ + } \ + static UCS_F_ALWAYS_INLINE void _name##_inner(__VA_ARGS__) + + +/* + * Profile a function call, and specify explicit name string for the profile. + * Useful when calling a function by a pointer. + * + * Usage: + * UCS_PROFILE_NAMED_CALL("name", function, arg1, arg2) + * + * @param _name Name string for the profile. + * @param _func Function name. + * @param ... Function call arguments. + */ +#define UCS_PROFILE_NAMED_CALL(_name, _func, ...) \ + ({ \ + typeof(_func(__VA_ARGS__)) retval; \ + UCS_PROFILE_SCOPE_BEGIN(); \ + retval = _func(__VA_ARGS__); \ + UCS_PROFILE_SCOPE_END(_name); \ + retval; \ + }) + + +/* + * Profile a function call. + * + * Usage: + * UCS_PROFILE_CALL(function, arg1, arg2) + * + * @param _func Function name. + * @param ... Function call arguments. + */ +#define UCS_PROFILE_CALL(_func, ...) \ + UCS_PROFILE_NAMED_CALL(#_func, _func, ## __VA_ARGS__) + + +/* + * Profile a function call which does not return a value, and specify explicit + * name string for the profile. Useful when calling a function by a pointer. + * + * Usage: + * UCS_PROFILE_NAMED_CALL_VOID("name", function, arg1, arg2) + * + * @param _name Name string for the profile. + * @param _func Function name. + * @param ... Function call arguments. + */ +#define UCS_PROFILE_NAMED_CALL_VOID(_name, _func, ...) \ + { \ + UCS_PROFILE_SCOPE_BEGIN(); \ + _func(__VA_ARGS__); \ + UCS_PROFILE_SCOPE_END(_name); \ + } + + +/* + * Profile a function call which does not return a value. + * + * Usage: + * UCS_PROFILE_CALL_VOID(function, arg1, arg2) + * + * @param _func Function name. + * @param ... Function call arguments. + */ +#define UCS_PROFILE_CALL_VOID(_func, ...) \ + UCS_PROFILE_NAMED_CALL_VOID(#_func, _func, ## __VA_ARGS__) + + +/* + * Profile a new request allocation. + * + * @param _req Request pointer. + * @param _name Allocation site name. + * @param _param32 Custom 32-bit parameter. + */ +#define UCS_PROFILE_REQUEST_NEW(_req, _name, _param32) \ + UCS_PROFILE(UCS_PROFILE_TYPE_REQUEST_NEW, (_name), (_param32), (uintptr_t)(_req)); + + +/* + * Profile a request progress event. + * + * @param _req Request pointer. + * @param _name Event name. + * @param _param32 Custom 32-bit parameter. + */ +#define UCS_PROFILE_REQUEST_EVENT(_req, _name, _param32) \ + UCS_PROFILE(UCS_PROFILE_TYPE_REQUEST_EVENT, (_name), (_param32), (uintptr_t)(_req)); + + +/* + * Profile a request progress event with status check. + * + * @param _req Request pointer. + * @param _name Event name. + * @param _param32 Custom 32-bit parameter. + * @param _status Status of the last progress event. + */ +#define UCS_PROFILE_REQUEST_EVENT_CHECK_STATUS(_req, _name, _param32, _status) \ + if (!UCS_STATUS_IS_ERR(_status)) { \ + UCS_PROFILE_REQUEST_EVENT((_req), (_name), (_param32)); \ + } + + +/* + * Profile a request release. + * + * @param _req Request pointer. + */ +#define UCS_PROFILE_REQUEST_FREE(_req) \ + UCS_PROFILE(UCS_PROFILE_TYPE_REQUEST_FREE, "", 0, (uintptr_t)(_req)); + + +/* + * Store a new record with the given data. + * SHOULD NOT be used directly - use UCS_PROFILE macros instead. + * + * @param [in] type Location type. + * @param [in] name Location name. + * @param [in] param32 custom 32-bit parameter. + * @param [in] param64 custom 64-bit parameter. + * @param [in] file Source file name. + * @param [in] line Source line number. + * @param [in] function Calling function name. + * @param [in,out] loc_id_p Variable used to maintain the location ID. + */ +void ucs_profile_record(ucs_profile_type_t type, const char *name, + uint32_t param32, uint64_t param64, const char *file, + int line, const char *function, volatile int *loc_id_p); + + +/** + * Reset the internal array of profiling locations. + * Used for testing purposes only. + */ +void ucs_profile_reset_locations(); + + +END_C_DECLS + +#endif diff --git a/src/ucs/stats/client_server.c b/src/ucs/stats/client_server.c new file mode 100644 index 0000000..44db28c --- /dev/null +++ b/src/ucs/stats/client_server.c @@ -0,0 +1,675 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "libstats.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define UCS_STATS_MAGIC "UCSSTAT1" +#define UCS_STATS_MSG_FRAG_SIZE 1400 +#define ENTITY_HASH_SIZE 997 + + +/* UDP packet header */ +typedef struct ucs_stats_packet_hdr { + char magic[8]; + uint64_t timestamp; + uint32_t total_size; + uint32_t frag_offset; + uint32_t frag_size; +} UCS_S_PACKED ucs_stats_packet_hdr_t; + + +/* Fragment assembly hole free-list */ +typedef struct frag_hole { + ucs_list_link_t list; + size_t size; /* Including this struct */ +} frag_hole_t; + + +/* An entity which reports statistics */ +typedef struct stats_entity stats_entity_t; +struct stats_entity { + struct sockaddr_in in_addr; /* Entity address */ + uint64_t timestamp; /* Current timestamp */ + size_t buffer_size; /* Buffer size */ + void *inprogress_buffer; /* Fragment assembly buffer */ + ucs_list_link_t holes; /* List of holes in the buffer */ + stats_entity_t *next; /* Hash link */ + + pthread_mutex_t lock; + volatile unsigned refcount; + void *completed_buffer; /* Completed buffer */ + struct timeval update_time; +}; + + +/* Client context */ +typedef struct ucs_stats_client { + int sockfd; +} ucs_stats_client_t; + + +/* Server context */ +typedef struct ucs_stats_server { + int sockfd; + int udp_port; + pthread_t server_thread; + volatile unsigned long rcvd_packets; + volatile int stop; + ucs_list_link_t curr_stats; + pthread_mutex_t entities_lock; + stats_entity_t* entities_hash[ENTITY_HASH_SIZE]; +} ucs_stats_server_t; + + +SGLIB_DEFINE_LIST_PROTOTYPES(stats_entity_t, stats_entity_cmp, next) +SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(stats_entity_t, ENTITY_HASH_SIZE, stats_entity_hash) + + +ucs_status_t ucs_stats_client_init(const char *server_addr, int port, ucs_stats_client_h *p_client) +{ + ucs_stats_client_h client; + struct sockaddr_in saddr; + struct hostent *he; + ucs_status_t status; + int ret; + + client = malloc(sizeof *client); + if (client == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + he = gethostbyname(server_addr); + if (he == NULL || he->h_addr_list == NULL) { + ucs_error("failed to resolve address of '%s'", server_addr); + status = UCS_ERR_INVALID_ADDR; + goto err_free; + } + + saddr.sin_family = he->h_addrtype; + saddr.sin_port = htons(port); + assert(he->h_length == sizeof(saddr.sin_addr)); + memcpy(&saddr.sin_addr, he->h_addr_list[0], he->h_length); + memset(saddr.sin_zero, 0, sizeof(saddr.sin_zero)); + + client->sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (client->sockfd < 0) { + ucs_error("socket() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_free; + } + + ret = connect(client->sockfd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + ucs_error("connect(%d) failed: %m", client->sockfd); + status = UCS_ERR_IO_ERROR; + goto err_close; + } + + *p_client = client; + return UCS_OK; + +err_close: + close(client->sockfd); +err_free: + free(client); +err: + return status; +} + +void ucs_stats_client_cleanup(ucs_stats_client_h client) +{ + close(client->sockfd); + free(client); +} + +static ucs_status_t +ucs_stats_sock_send_frags(int sockfd, uint64_t timestamp, void *buffer, size_t size) +{ + struct iovec iov[2]; + ucs_stats_packet_hdr_t hdr; + size_t frag_size, offset; + ssize_t nsent; + size_t max_frag = UCS_STATS_MSG_FRAG_SIZE - sizeof(hdr); + + offset = 0; + + memcpy(hdr.magic, UCS_STATS_MAGIC, sizeof(hdr.magic)); + hdr.total_size = size; + hdr.timestamp = timestamp; + + while (offset < size) { + frag_size = size - offset; + if (frag_size > max_frag) { + frag_size = max_frag; + } + + hdr.frag_offset = offset; + hdr.frag_size = frag_size; + + iov[0].iov_base = &hdr; + iov[0].iov_len = sizeof(hdr); + iov[1].iov_base = UCS_PTR_BYTE_OFFSET(buffer, offset); + iov[1].iov_len = hdr.frag_size; + + nsent = writev(sockfd, iov, 2); + if (nsent == -1) { + if (errno == ECONNREFUSED) { + ucs_trace("stats server is down"); + return UCS_OK; + } else { + ucs_error("writev() failed: %m"); + return UCS_ERR_IO_ERROR; + } + } + + assert(nsent == sizeof(hdr) + frag_size); + offset += frag_size; + } + + return UCS_OK; +} + +ucs_status_t +ucs_stats_client_send(ucs_stats_client_h client, ucs_stats_node_t *root, + uint64_t timestamp) +{ + ucs_status_t status; + FILE *stream; + char *buffer; + size_t size; + + /* TODO use GLIBC custom stream */ + stream = open_memstream(&buffer, &size); + if (stream == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + status = ucs_stats_serialize(stream, root, UCS_STATS_SERIALIZE_BINARY); + fclose(stream); + + if (status != UCS_OK) { + goto out_free; + } + + /* send */ + status = ucs_stats_sock_send_frags(client->sockfd, timestamp, buffer, size); + +out_free: + free(buffer); +out: + return status; +} + +static void ucs_stats_server_entity_reset_buffer(stats_entity_t * entity, + size_t new_size) +{ + frag_hole_t *hole; + + if (new_size != entity->buffer_size) { + pthread_mutex_lock(&entity->lock); + entity->buffer_size = new_size; + entity->inprogress_buffer = realloc(entity->inprogress_buffer, + new_size + sizeof(frag_hole_t)); + entity->completed_buffer = realloc(entity->completed_buffer, + new_size + sizeof(frag_hole_t)); + pthread_mutex_unlock(&entity->lock); + } + + hole = entity->inprogress_buffer; + hole->size = entity->buffer_size; + ucs_list_head_init(&entity->holes); + ucs_list_add_tail(&entity->holes, &hole->list); +} + +static stats_entity_t *ucs_stats_server_entity_alloc(struct sockaddr_in *addr) +{ + stats_entity_t *entity; + + entity = malloc(sizeof *entity); + if (entity == NULL) { + return NULL; + } + + entity->in_addr = *addr; + entity->timestamp = 0; + entity->buffer_size = SIZE_MAX; + entity->inprogress_buffer = NULL; + entity->completed_buffer = NULL; + entity->refcount = 1; + ucs_list_head_init(&entity->holes); + pthread_mutex_init(&entity->lock, NULL); + + ucs_stats_server_entity_reset_buffer(entity, 0); + return entity; +} + +static void ucs_stats_server_entity_free(stats_entity_t * entity) +{ + free(entity->inprogress_buffer); + free(entity->completed_buffer); + free(entity); +} + +static stats_entity_t* +ucs_stats_server_entity_get(ucs_stats_server_h server, struct sockaddr_in *addr) +{ + stats_entity_t *entity, search; + + pthread_mutex_lock(&server->entities_lock); + search.in_addr = *addr; + + entity = sglib_hashed_stats_entity_t_find_member(server->entities_hash, &search); + if (entity == NULL) { + entity = ucs_stats_server_entity_alloc(addr); + gettimeofday(&entity->update_time, NULL); + sglib_hashed_stats_entity_t_add(server->entities_hash, entity); + } + + __sync_fetch_and_add(&entity->refcount, 1); + pthread_mutex_unlock(&server->entities_lock); + + return entity; +} + +static void ucs_stats_server_entity_put(stats_entity_t * entity) +{ + if (__sync_fetch_and_sub(&entity->refcount, 1) == 1) { + ucs_stats_server_entity_free(entity); + } +} + +/** + * Find a hole to contain the given fragment. + */ +static frag_hole_t * +find_frag_hole(stats_entity_t *entity, size_t frag_size, size_t frag_offset) +{ + void *frag_start = UCS_PTR_BYTE_OFFSET(entity->inprogress_buffer, frag_offset); + void *frag_end = UCS_PTR_BYTE_OFFSET(entity->inprogress_buffer, + frag_offset + frag_size); + frag_hole_t *hole; + + ucs_list_for_each(hole, &entity->holes, list) { + if ((frag_start >= (void*)hole) && + (frag_end <= UCS_PTR_BYTE_OFFSET(hole, hole->size))) { + return hole; + } + } + return NULL; +} + +/** + * Update statistics with new arrived fragment. + */ +static ucs_status_t +ucs_stats_server_entity_update(ucs_stats_server_h server, stats_entity_t *entity, + uint64_t timestamp, size_t total_size, void *frag, + size_t frag_size, size_t frag_offset) +{ + frag_hole_t *hole, *new_hole; + void *frag_start, *frag_end, *hole_end; + + ucs_debug("From %s:%d - timestamp %"PRIu64", %zu..%zu / %zu", + inet_ntoa(entity->in_addr.sin_addr), ntohs(entity->in_addr.sin_port), + timestamp, frag_offset, frag_offset + frag_size, total_size); + + if (timestamp < entity->timestamp) { + ucs_debug("Dropping - old timestamp"); + return UCS_OK; + } else if (timestamp > entity->timestamp) { + ucs_debug("New timestamp, resetting buffer with size %zu", total_size); + entity->timestamp = timestamp; + ucs_stats_server_entity_reset_buffer(entity, total_size); + } else { + /* Make sure all packets in this timestamp have the same 'total_size' */ + if (entity->buffer_size != total_size) { + ucs_error("Total size in the packet is %zu, but expected is %zu", + total_size, entity->buffer_size); + } + } + + hole = find_frag_hole(entity, frag_size, frag_offset); + if (hole == NULL) { + ucs_error("cannot fill fragment (offset %zu size %zu)", frag_offset, frag_size); + return UCS_ERR_MESSAGE_TRUNCATED; + } + + frag_start = UCS_PTR_BYTE_OFFSET(entity->inprogress_buffer, frag_offset); + frag_end = UCS_PTR_BYTE_OFFSET(entity->inprogress_buffer, + frag_offset + frag_size); + hole_end = UCS_PTR_BYTE_OFFSET(hole, hole->size); + + ucs_debug("inserting into a hole of %zu..%zu", + UCS_PTR_BYTE_DIFF(entity->inprogress_buffer, hole), + UCS_PTR_BYTE_DIFF(entity->inprogress_buffer, hole_end)); + + /* If the fragment does not reach the end of the hole, create a new hole + * in this space. + */ + if (frag_end < hole_end) { + /* Make sure we don't create a hole which is too small for a free-list + * pointer to fit in. An exception is the last fragment. + */ + assert((UCS_PTR_BYTE_DIFF(frag_end, hole_end) >= sizeof(*new_hole)) || + (hole_end == UCS_PTR_BYTE_OFFSET(entity->inprogress_buffer, + entity->buffer_size))); + new_hole = frag_end; + new_hole->size = UCS_PTR_BYTE_DIFF(frag_end, hole_end); + ucs_list_insert_after(&hole->list, &new_hole->list); + } + + /* If we have room before the fragment, resize the hole. Otherwise, delete it */ + if (frag_start > (void*)hole) { + assert(UCS_PTR_BYTE_DIFF(hole, frag_start) >= sizeof(*hole)); + hole->size = UCS_PTR_BYTE_DIFF(hole, frag_start); + } else { + ucs_list_del(&hole->list); + } + + /* Copy the fragment */ + memcpy(frag_start, frag, frag_size); + + /* Completed? */ + if (ucs_list_is_empty(&entity->holes)) { + ucs_debug("timestamp %"PRIu64" fully assembled", entity->timestamp); + pthread_mutex_lock(&entity->lock); + memcpy(entity->completed_buffer, entity->inprogress_buffer, entity->buffer_size); + pthread_mutex_unlock(&entity->lock); + } + + return UCS_OK; +} + +/** + * Update context with new arrived packet. + */ +static ucs_status_t +ucs_stats_server_update_context(ucs_stats_server_h server, struct sockaddr_in *sender, + ucs_stats_packet_hdr_t *pkt, size_t pkt_len) +{ + stats_entity_t *entity; + ucs_status_t status; + + /* Validate fragment size */ + if (pkt_len != pkt->frag_size + sizeof(ucs_stats_packet_hdr_t)) { + ucs_error("Invalid receive size: expected %zu, got %zu", + pkt->frag_size + sizeof(ucs_stats_packet_hdr_t), pkt_len); + return UCS_ERR_MESSAGE_TRUNCATED; + } + + /* Validate magic */ + if (memcmp(pkt->magic, UCS_STATS_MAGIC, sizeof(pkt->magic)) != 0) { + ucs_error("Invalid magic in packet header"); + return UCS_ERR_INVALID_PARAM; + } + + /* Find or create the entity */ + entity = ucs_stats_server_entity_get(server, sender); + + pthread_mutex_lock(&entity->lock); + gettimeofday(&entity->update_time, NULL); + pthread_mutex_unlock(&entity->lock); + + /* Update the entity */ + status = ucs_stats_server_entity_update(server, entity, pkt->timestamp, + pkt->total_size, pkt + 1, + pkt->frag_size, pkt->frag_offset); + + ucs_stats_server_entity_put(entity); + ++server->rcvd_packets; + return status; +} + +static ucs_status_t ucs_stats_server_create_socket(int udp_port, int *p_sockfd, + int *p_udp_port) +{ + struct sockaddr_in saddr; + socklen_t socklen; + int sockfd; + int ret; + + sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (sockfd < 0) { + ucs_error("socked() failed: %m"); + return UCS_ERR_IO_ERROR; + } + + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = INADDR_ANY; + saddr.sin_port = htons(udp_port); + memset(saddr.sin_zero, 0, sizeof(saddr.sin_zero)); + + ret = bind(sockfd, (struct sockaddr*)&saddr, sizeof(saddr)); + if (ret < 0) { + ucs_error("Failed to bind socket to port %u: %m", udp_port); + goto err_close_sock; + } + + socklen = sizeof(saddr); + ret = getsockname(sockfd, (struct sockaddr*)&saddr, &socklen); + if (ret < 0) { + ucs_error("getsockname(%d) failed: %m", sockfd); + goto err_close_sock; + } + + *p_sockfd = sockfd; + *p_udp_port = ntohs(saddr.sin_port); + return UCS_OK; + +err_close_sock: + close(sockfd); + return UCS_ERR_INVALID_ADDR; +} + +static void ucs_stats_server_clear_old_enitities(ucs_stats_server_h server) +{ + struct sglib_hashed_stats_entity_t_iterator it; + stats_entity_t *entity; + struct timeval current, diff; + + gettimeofday(¤t, NULL); + + pthread_mutex_lock(&server->entities_lock); + entity = sglib_hashed_stats_entity_t_it_init(&it,server->entities_hash); + while (entity != NULL) { + pthread_mutex_lock(&entity->lock); + timersub(¤t, &entity->update_time, &diff); + pthread_mutex_unlock(&entity->lock); + + if (diff.tv_sec > 5.0) { + sglib_hashed_stats_entity_t_delete(server->entities_hash, entity); + ucs_stats_server_entity_put(entity); + } + entity = sglib_hashed_stats_entity_t_it_next(&it); + } + + pthread_mutex_unlock(&server->entities_lock); +} + +static void* ucs_stats_server_thread_func(void *arg) +{ + ucs_stats_server_h server = arg; + struct sockaddr_in recv_addr; + socklen_t recv_addr_len; + char recv_buf[UCS_STATS_MSG_FRAG_SIZE]; + ssize_t recv_len; + ucs_status_t status; + + ucs_debug("starting server thread"); + while (!server->stop) { + recv_addr_len = sizeof(recv_addr); + recv_len = recvfrom(server->sockfd, recv_buf, UCS_STATS_MSG_FRAG_SIZE, 0, + (struct sockaddr*)&recv_addr, &recv_addr_len); + if (recv_len < 0) { + ucs_error("recvfrom() failed: %s (return value: %ld)", strerror(errno), + recv_len); + break; + } else if (recv_len == 0) { + ucs_debug("Empty receive - ignoring"); + continue; + } + + if (recv_addr.sin_family != AF_INET) { + ucs_error("invalid address family from recvfrom()"); + break; + } + + /* Update with new data */ + /* coverity[tainted_data] */ + status = ucs_stats_server_update_context(server, &recv_addr, (void*)recv_buf, recv_len); + if (status != UCS_OK) { + break; + } + + ucs_stats_server_clear_old_enitities(server); + } + + ucs_debug("terminating server thread"); + return NULL; +} + +ucs_status_t ucs_stats_server_start(int port, ucs_stats_server_h *p_server) +{ + ucs_stats_server_h server; + ucs_status_t status; + + server = malloc(sizeof *server); + if (server == NULL) { + ucs_error("Failed to allocate stats context"); + return UCS_ERR_NO_MEMORY; + } + + pthread_mutex_init(&server->entities_lock, NULL); + ucs_list_head_init(&server->curr_stats); + sglib_hashed_stats_entity_t_init(server->entities_hash); + + status = ucs_stats_server_create_socket(port, &server->sockfd, &server->udp_port); + if (status != UCS_OK) { + free(server); + return status; + } + + server->rcvd_packets = 0; + server->stop = 0; + pthread_create(&server->server_thread, NULL, ucs_stats_server_thread_func, + server); + + *p_server = server; + return UCS_OK; +} + +void ucs_stats_server_destroy(ucs_stats_server_h server) +{ + struct sglib_hashed_stats_entity_t_iterator it; + stats_entity_t *entity; + void *retval; + + server->stop = 1; + shutdown(server->sockfd, SHUT_RDWR); + pthread_join(server->server_thread, &retval); + close(server->sockfd); + + ucs_stats_server_purge_stats(server); + + entity = sglib_hashed_stats_entity_t_it_init(&it,server->entities_hash); + while (entity != NULL) { + ucs_stats_server_entity_put(entity); + entity = sglib_hashed_stats_entity_t_it_next(&it); + } + free(server); +} + +int ucs_stats_server_get_port(ucs_stats_server_h server) +{ + return server->udp_port; +} + +ucs_list_link_t *ucs_stats_server_get_stats(ucs_stats_server_h server) +{ + struct sglib_hashed_stats_entity_t_iterator it; + stats_entity_t *entity; + ucs_stats_node_t *node; + ucs_status_t status; + FILE *stream; + + ucs_stats_server_purge_stats(server); + + pthread_mutex_lock(&server->entities_lock); + for (entity = sglib_hashed_stats_entity_t_it_init(&it, server->entities_hash); + entity != NULL; entity = sglib_hashed_stats_entity_t_it_next(&it)) + { + /* Parse the statistics data */ + pthread_mutex_lock(&entity->lock); + stream = fmemopen(entity->completed_buffer, entity->buffer_size, "rb"); + status = ucs_stats_deserialize(stream, &node); + fclose(stream); + pthread_mutex_unlock(&entity->lock); + + if (status == UCS_OK) { + ucs_list_add_tail(&server->curr_stats, &node->list); + } + } + pthread_mutex_unlock(&server->entities_lock); + + return &server->curr_stats; +} + +void ucs_stats_server_purge_stats(ucs_stats_server_h server) +{ + ucs_stats_node_t *node, *tmp; + + ucs_list_for_each_safe(node, tmp, &server->curr_stats, list) { + ucs_list_del(&node->list); + ucs_stats_free(node); + } +} + +unsigned long ucs_stats_server_rcvd_packets(ucs_stats_server_h server) +{ + return server->rcvd_packets; +} + +static inline int stats_entity_cmp(stats_entity_t *e1, stats_entity_t *e2) +{ + int addr_diff = e1->in_addr.sin_addr.s_addr < e2->in_addr.sin_addr.s_addr; + if (addr_diff != 0) { + return addr_diff; + } else { + return ntohs(e1->in_addr.sin_port) - ntohs(e1->in_addr.sin_port); + } +} + +static inline int stats_entity_hash(stats_entity_t *e) +{ + return (((uint64_t)e->in_addr.sin_addr.s_addr << 16) + (uint64_t)ntohs(e->in_addr.sin_port)) % ENTITY_HASH_SIZE; +} + +SGLIB_DEFINE_LIST_FUNCTIONS(stats_entity_t, stats_entity_cmp, next) +SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS(stats_entity_t, ENTITY_HASH_SIZE, stats_entity_hash) diff --git a/src/ucs/stats/libstats.c b/src/ucs/stats/libstats.c new file mode 100644 index 0000000..c469d15 --- /dev/null +++ b/src/ucs/stats/libstats.c @@ -0,0 +1,65 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "libstats.h" + +#include +#include +#include + + +#define UCS_STATS_NAME_VALID_CHARS \ + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" + + +static ucs_status_t ucs_stats_name_check(const char *name) +{ + size_t length, valid_length; + + length = strlen(name); + if (length > UCS_STAT_NAME_MAX) { + ucs_error("stats name '%s' is too long (%zu)", name, length); + return UCS_ERR_INVALID_PARAM; + } + + valid_length = strspn(name, UCS_STATS_NAME_VALID_CHARS); + if (valid_length != length) { + ucs_error("stats name '%s' contains invalid character at offset %zu", + name, valid_length); + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK;; +} + +ucs_status_t ucs_stats_node_initv(ucs_stats_node_t *node, ucs_stats_class_t *cls, + const char *name, va_list ap) +{ + ucs_status_t status; + unsigned i; + + /* Check class */ + status = ucs_stats_name_check(cls->name); + if (status != UCS_OK) { + return status; + } + for (i = 0; i < cls->num_counters; ++i) { + status = ucs_stats_name_check(cls->counter_names[i]); + if (status != UCS_OK) { + return status; + } + } + + /* Set up node */ + node->cls = cls; + vsnprintf(node->name, UCS_STAT_NAME_MAX, name, ap); + ucs_list_head_init(&node->children[UCS_STATS_INACTIVE_CHILDREN]); + ucs_list_head_init(&node->children[UCS_STATS_ACTIVE_CHILDREN]); + memset(node->counters, 0, cls->num_counters * sizeof(ucs_stats_counter_t)); + + return UCS_OK; +} + diff --git a/src/ucs/stats/libstats.h b/src/ucs/stats/libstats.h new file mode 100644 index 0000000..64abd50 --- /dev/null +++ b/src/ucs/stats/libstats.h @@ -0,0 +1,263 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_LIBSTATS_H_ +#define UCS_LIBSTATS_H_ + +#include +#include +#include +#include +#include +#include +#include + + +/* + * Serialization options + */ +enum { + UCS_STATS_SERIALIZE_INACTVIVE = UCS_BIT(0), /* Use "inactive" tree */ + UCS_STATS_SERIALIZE_BINARY = UCS_BIT(1), /* Binary mode */ + UCS_STATS_SERIALIZE_COMPRESS = UCS_BIT(2) /* Compress */ +}; + +#define UCS_STATS_DEFAULT_UDP_PORT 37873 + + +#define UCS_STAT_NAME_MAX 39 + +#define UCS_STATS_NODE_FMT \ + "%s%s" +#define UCS_STATS_NODE_ARG(_node) \ + (_node)->cls->name, (_node)->name + +#define UCS_STATS_INDENT(_is_sum, _indent) _is_sum ? 0 : (_indent) * 2, "" +#define UCS_STATS_IS_LAST_COUNTER(_counters_bits, _current) \ + (_counters_bits > ((2ull<<_current) - 1)) + +typedef struct ucs_stats_server *ucs_stats_server_h; /* Handle to server */ +typedef struct ucs_stats_client *ucs_stats_client_h; /* Handle to client */ + + +typedef enum ucs_stats_children_sel { + UCS_STATS_INACTIVE_CHILDREN, + UCS_STATS_ACTIVE_CHILDREN, + UCS_STATS_CHILDREN_LAST +} ucs_stats_children_sel_t; + + +/* Statistics class */ +struct ucs_stats_class { + const char *name; + unsigned num_counters; + const char* counter_names[]; +}; + +/* + * ucs_stats_node is used to hold the counters, their classes and the + * relationship between them. + * ucs_stats_filter_node is a data structure used to filter the counters + * on the report. + * Therre are 3 types of filtering: Full, Aggregate, and summary + * Following is an example of the data structures in aggregate mode: + * + * ucs_stats_node ucs_stats_filter_node + * -------------- --------------------- + * + * +-----+ +-----+ + * | A-1 |............................> | A-1 | + * +-----+ +-----+ + * A A A A + * | | | | + * | | +----------+ | + * | | | | + * +--+ +--+ +--+--+................. | + * | | | B-1 | : | + * | | +-----+ : | + * | +-----+....|..A................. : | + * | | B-2 | | | : : | + * | +-----+ <--+ | V V | + * +-----+......|..........|.............> +-----+ + * | B-3 | || +---------------| B* | + * +-----+ <----+ +-----+ + * cntr1 cntr1* + * cntr2 cntr2* + * cntr3 cntr3* + * + * unfiltered statistics report: + * + * A-1: + * B-1: + * cntr1: 11111 + * cntr2: 22222 + * cntr3: 33333 + * B-2: + * cntr1: 11111 + * cntr2: 22222 + * cntr3: 33333 + * B-3: + * cntr1: 11111 + * cntr2: 22222 + * cntr3: 33333 + * + * filtered statistics report: + * + * A-1: + * B*: + * cntr1: 33333 + * cntr2: 66666 + * cntr3: 99999 + * + */ + +/* In-memory statistics node */ + +struct ucs_stats_node { + ucs_stats_class_t *cls; /* Class info */ + ucs_stats_node_t *parent; /* Hierachy structure */ + char name[UCS_STAT_NAME_MAX + 1]; + /* instance name */ + ucs_list_link_t list; /* nodes sharing same parent */ + ucs_list_link_t children[UCS_STATS_CHILDREN_LAST]; + /* children list head */ + ucs_list_link_t type_list; /* nodes with same class/es + hierarchy */ + ucs_stats_filter_node_t *filter_node; /* ptr to type list head */ + ucs_stats_counter_t counters[1]; /* instance counters */ +}; + +struct ucs_stats_filter_node { + ucs_stats_filter_node_t *parent; + ucs_list_link_t list; /* nodes sharing same parent.*/ + ucs_list_link_t children; + ucs_list_link_t type_list_head; /* nodes with same ancestors classes */ + int type_list_len; /* length of list */ + int ref_count; /* report node when non zero */ + uint64_t counters_bitmask; /* which counters to print */ +}; + +/** + * Initialize statistics node. + * + * @param node Node to initialize. + * @param cls Node class. + * @param name Node name format string. + * @param ap Name formatting arguments. + */ +ucs_status_t ucs_stats_node_initv(ucs_stats_node_t *node, ucs_stats_class_t *cls, + const char *name, va_list ap); + + +/** + * Serialize statistics. + * + * @param stream Destination + * @param root Statistics node root. + * @param options Serialization options. + */ +ucs_status_t ucs_stats_serialize(FILE *stream, ucs_stats_node_t *root, int options); + + +/** + * De-serialize statistics. + * + * @param stream Source data. + * @param p_roo Filled with tatistics node root. + * + * @return UCS_ERR_NO_ELEM if hit EOF. + */ +ucs_status_t ucs_stats_deserialize(FILE *stream, ucs_stats_node_t **p_root); + + +/** + * Release stats returned by ucs_stats_deserialize(). + * @param root Stats to release. + */ +void ucs_stats_free(ucs_stats_node_t *root); + + +/** + * Initialize statistics client. + * + * @param server_addr Address of server machine. + * @param port Port number on server. + * @param p_client Filled with handle to the client. + */ +ucs_status_t ucs_stats_client_init(const char *server_addr, int port, + ucs_stats_client_h *p_client); + + +/** + * Destroy statistics client. + */ +void ucs_stats_client_cleanup(ucs_stats_client_h client); + + +/** + * Send statistics. + * + * @param client Client handle. + * @param root Statistics tree root. + * @param timestamp Current statistics timestamp, identifies every "snapshot". + */ +ucs_status_t ucs_stats_client_send(ucs_stats_client_h client, ucs_stats_node_t *root, + uint64_t timestamp); + + +/** + * Start a thread running a server which receives statistics. + * + * @param port Port number to listen on. 0 - random available port. + * @param verbose Verbose level. + * @param p_server Filled with handle to the server. + */ +ucs_status_t ucs_stats_server_start(int port, ucs_stats_server_h *p_server); + + +/** + * Stop statistics server. + * @param server Handle to statistics server. + */ +void ucs_stats_server_destroy(ucs_stats_server_h server); + + +/** + * Get port number used by the server, useful if we started it on a random port. + * + * @param server Handle to statistics server. + * + * @return Port number. + */ +int ucs_stats_server_get_port(ucs_stats_server_h server); + + +/** + * Get current statistics gathered by the server. The data is valid until the next + * call to any of the following functions: + * - ucs_stats_server_purge_stats + * - ucs_stats_server_cleanup + * - ucs_stats_server_get_stats + * + * @param server Handle to statistics server. + * @return A list of stat trees for all entities gathered by the server. + */ +ucs_list_link_t *ucs_stats_server_get_stats(ucs_stats_server_h server); + + +/** + * Clean up existing statistics. + */ +void ucs_stats_server_purge_stats(ucs_stats_server_h server); + + +/** + * @return Number of packets received by the server. + */ +unsigned long ucs_stats_server_rcvd_packets(ucs_stats_server_h server); + + +#endif /* LIBSTATS_H_ */ diff --git a/src/ucs/stats/serialization.c b/src/ucs/stats/serialization.c new file mode 100644 index 0000000..e1a3785 --- /dev/null +++ b/src/ucs/stats/serialization.c @@ -0,0 +1,586 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "libstats.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Class table */ +#define UCS_STATS_CLS_HASH_SIZE 127 +#define UCS_STATS_CLSID_HASH(a) ( (uintptr_t)((a)->cls) ) +#define UCS_STATS_CLSID_CMP(a, b) ( ((long)((a)->cls)) - ((long)((b)->cls)) ) +#define UCS_STATS_CLSID_SENTINEL UINT8_MAX + +/* Encode counter size */ +#define UCS_STATS_BITS_PER_COUNTER 2 +#define UCS_STATS_COUNTER_ZERO 0 +#define UCS_STATS_COUNTER_U16 1 +#define UCS_STATS_COUNTER_U32 2 +#define UCS_STATS_COUNTER_U64 3 + + +/* Compression mode */ +#define UCS_STATS_COMPRESSION_NONE 0 +#define UCS_STATS_COMPRESSION_BZIP2 1 + + +/* Statistics data header */ +typedef struct ucs_stats_data_header { + uint32_t version; + uint32_t reserved; + uint32_t compression; + uint32_t num_classes; +} ucs_stats_data_header_t; + + +/* Class id record */ +typedef struct ucs_stats_clsid ucs_stats_clsid_t; +struct ucs_stats_clsid { + uint8_t clsid; + ucs_stats_class_t *cls; + ucs_stats_clsid_t *next; +}; + + +/* Save pointer to class table near the root node */ +typedef struct ucs_stats_root_storage { + ucs_stats_class_t **classes; + unsigned num_classes; + ucs_stats_node_t node; +} ucs_stats_root_storage_t; + + +SGLIB_DEFINE_LIST_PROTOTYPES(ucs_stats_clsid_t, UCS_STATS_CLSID_CMP, next) +SGLIB_DEFINE_LIST_FUNCTIONS(ucs_stats_clsid_t, UCS_STATS_CLSID_CMP, next) +SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(ucs_stats_clsid_t, UCS_STATS_CLS_HASH_SIZE, UCS_STATS_CLSID_HASH) +SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS(ucs_stats_clsid_t, UCS_STATS_CLS_HASH_SIZE, UCS_STATS_CLSID_HASH) + +#define FREAD(_buf, _size, _stream) \ + { \ + size_t nread = fread(_buf, 1, _size, _stream); \ + assert(nread == _size); \ + } + +#define FWRITE(_buf, _size, _stream) \ + { \ + size_t nwrite = fwrite(_buf, 1, _size, _stream); \ + assert(nwrite == _size); \ + } + +#define FREAD_ONE(_ptr, _stream) \ + FREAD(_ptr, sizeof(*(_ptr)), _stream) + +#define FWRITE_ONE(_ptr, _stream) \ + FWRITE(_ptr, sizeof(*(_ptr)), _stream) + + +static unsigned ucs_stats_get_all_classes_recurs(ucs_stats_node_t *node, + ucs_stats_children_sel_t sel, + ucs_stats_clsid_t **cls_hash) +{ + ucs_stats_clsid_t *elem, search; + ucs_stats_node_t *child; + unsigned count; + + search.cls = node->cls; + if (!sglib_hashed_ucs_stats_clsid_t_find_member(cls_hash, &search)) { + elem = malloc(sizeof *elem); + elem->cls = node->cls; + sglib_hashed_ucs_stats_clsid_t_add(cls_hash, elem); + count = 1; + } else { + count = 0; + } + + ucs_list_for_each(child, &node->children[sel], list) { + count += ucs_stats_get_all_classes_recurs(child, sel, cls_hash); + } + + return count; +} + +static char * ucs_stats_read_str(FILE *stream) +{ + uint8_t tmp; + char *str; + + FREAD_ONE(&tmp, stream); + /* coverity[tainted_data] */ + str = malloc(tmp + 1); + FREAD(str, tmp, stream); + str[tmp] = '\0'; + return str; +} + +static void ucs_stats_write_str(const char *str, FILE *stream) +{ + uint8_t tmp = strlen(str); + + FWRITE_ONE(&tmp, stream); + FWRITE(str, tmp, stream); +} + +static void ucs_stats_read_counters(ucs_stats_counter_t *counters, + unsigned num_counters, + FILE *stream) +{ + const unsigned counters_per_byte = 8 / UCS_STATS_BITS_PER_COUNTER; + uint16_t value16; + uint32_t value32; + uint64_t value64; + uint8_t *counter_desc, v; + size_t counter_desc_size; + unsigned i; + + counter_desc_size = ((num_counters + counters_per_byte - 1) / counters_per_byte); + counter_desc = ucs_alloca(counter_desc_size); + FREAD(counter_desc, counter_desc_size, stream); + + for (i = 0; i < num_counters; ++i) { + v = (counter_desc[i / counters_per_byte] >> + ((i % counters_per_byte) * UCS_STATS_BITS_PER_COUNTER)) & 0x3; + switch (v) { + case UCS_STATS_COUNTER_ZERO: + counters[i] = 0; + break; + case UCS_STATS_COUNTER_U16: + FREAD_ONE(&value16, stream); + counters[i] = value16; + break; + case UCS_STATS_COUNTER_U32: + FREAD_ONE(&value32, stream); + counters[i] = value32; + break; + case UCS_STATS_COUNTER_U64: + FREAD_ONE(&value64, stream); + counters[i] = value64; + break; + } + } +} + +static void ucs_stats_write_counters(ucs_stats_counter_t *counters, + unsigned num_counters, + FILE *stream) +{ + const unsigned counters_per_byte = 8 / UCS_STATS_BITS_PER_COUNTER; + ucs_stats_counter_t value; + uint8_t *counter_desc, v; + char *counter_data, *pos; + size_t counter_desc_size; + unsigned i; + + UCS_STATIC_ASSERT((8 % UCS_STATS_BITS_PER_COUNTER) == 0); + counter_desc_size = ((num_counters + counters_per_byte - 1) / counters_per_byte); + counter_desc = ucs_alloca(counter_desc_size); + counter_data = ucs_alloca(num_counters * sizeof(ucs_stats_counter_t)); + + memset(counter_desc, 0, counter_desc_size); + pos = counter_data; + + /* + * First, we have an array with 2 bits per counter describing its size: + * (0 - empty, 1 - 16bit, 2 - 32bit, 3 - 64bit) + * Then, an array of all counters, each one occupying the size listed before. + */ + for (i = 0; i < num_counters; ++i) { + value = counters[i]; + if (value == 0) { + v = UCS_STATS_COUNTER_ZERO; + } else if (value <= USHRT_MAX) { + v = UCS_STATS_COUNTER_U16; + *(uint16_t*)(pos) = value; + pos += sizeof(uint16_t); + } else if (value <= UINT_MAX) { + v = UCS_STATS_COUNTER_U32; + *(uint32_t*)(pos) = value; + pos += sizeof(uint32_t); + } else { + v = UCS_STATS_COUNTER_U64; + *(uint64_t*)(pos) = value; + pos += sizeof(uint64_t); + } + counter_desc[i / counters_per_byte] |= + v << ((i % counters_per_byte) * UCS_STATS_BITS_PER_COUNTER); + } + + FWRITE(counter_desc, counter_desc_size, stream); + FWRITE(counter_data, pos - counter_data, stream); +} + +static void +ucs_stats_serialize_binary_recurs(FILE *stream, ucs_stats_node_t *node, + ucs_stats_children_sel_t sel, + ucs_stats_clsid_t **cls_hash) +{ + ucs_stats_class_t *cls = node->cls; + ucs_stats_clsid_t *elem, search; + ucs_stats_node_t *child; + uint8_t sentinel; + + /* Search the class */ + search.cls = cls; + elem = sglib_hashed_ucs_stats_clsid_t_find_member(cls_hash, &search); + assert(elem != NULL); + + /* Write class ID */ + FWRITE_ONE(&elem->clsid, stream); + + /* Name */ + ucs_stats_write_str(node->name, stream); + + /* Counters */ + ucs_stats_write_counters(node->counters, cls->num_counters, stream); + + /* Children */ + ucs_list_for_each(child, &node->children[sel], list) { + ucs_stats_serialize_binary_recurs(stream, child, sel, cls_hash); + } + + /* Write sentinel which is not valid class id to mark end of children */ + sentinel = UCS_STATS_CLSID_SENTINEL; + FWRITE_ONE(&sentinel, stream); +} + +static ucs_status_t +ucs_stats_serialize_binary(FILE *stream, ucs_stats_node_t *root, + ucs_stats_children_sel_t sel) +{ + ucs_stats_clsid_t* cls_hash[UCS_STATS_CLS_HASH_SIZE]; + struct sglib_hashed_ucs_stats_clsid_t_iterator it; + ucs_stats_class_t *cls; + ucs_stats_clsid_t *elem; + ucs_stats_data_header_t hdr; + unsigned index, counter; + + sglib_hashed_ucs_stats_clsid_t_init(cls_hash); + + /* Write header */ + hdr.version = 1; + hdr.compression = UCS_STATS_COMPRESSION_NONE; + hdr.reserved = 0; + hdr.num_classes = ucs_stats_get_all_classes_recurs(root, sel, cls_hash); + assert(hdr.num_classes < UINT8_MAX); + FWRITE_ONE(&hdr, stream); + + /* Write stats node classes */ + index = 0; + for (elem = sglib_hashed_ucs_stats_clsid_t_it_init(&it, cls_hash); + elem != NULL; elem = sglib_hashed_ucs_stats_clsid_t_it_next(&it)) + { + cls = elem->cls; + ucs_stats_write_str(cls->name, stream); + FWRITE_ONE(&cls->num_counters, stream); + for (counter = 0; counter < cls->num_counters; ++counter) { + ucs_stats_write_str(cls->counter_names[counter], stream); + } + elem->clsid = index++; + } + + assert(index == hdr.num_classes); + + /* Write stats nodes */ + ucs_stats_serialize_binary_recurs(stream, root, sel, cls_hash); + + /* Free classes */ + for (elem = sglib_hashed_ucs_stats_clsid_t_it_init(&it, cls_hash); + elem != NULL; elem = sglib_hashed_ucs_stats_clsid_t_it_next(&it)) + { + free(elem); + } + + return UCS_OK; +} + +static ucs_status_t +ucs_stats_serialize_text_recurs_filtered(FILE *stream, + ucs_stats_filter_node_t *filter_node, + unsigned indent) +{ + ucs_stats_filter_node_t *filter_child; + ucs_stats_node_t *node; + unsigned i; + int is_sum = ucs_global_opts.stats_format == UCS_STATS_SUMMARY; + char *nl = is_sum ? "" : "\n"; + char *space = is_sum ? "" : " "; + char *left_b = is_sum ? "{" : ""; + char *rigth_b = is_sum ? "} " : ""; + + if (!filter_node->ref_count) { + return UCS_OK; + } + + if (ucs_list_is_empty(&filter_node->type_list_head)) { + ucs_error("no node is associated with node filter"); + return UCS_OK; + } + + node = ucs_list_head(&filter_node->type_list_head, + ucs_stats_node_t, + type_list); + if (filter_node->type_list_len > 1) { + fprintf(stream, "%*s%s*:%s", UCS_STATS_INDENT(is_sum, indent), + node->cls->name, nl); + } else { + if (ucs_global_opts.stats_format == UCS_STATS_SUMMARY) { + fprintf(stream, "%*s%s:%s", + UCS_STATS_INDENT(is_sum, indent), + strlen(node->cls->name) ? node->cls->name : node->name, nl); + + } else { + fprintf(stream, "%*s"UCS_STATS_NODE_FMT":%s", + UCS_STATS_INDENT(is_sum, indent), + UCS_STATS_NODE_ARG(node), nl); + } + } + + /* Root shouldn't be with brackets.*/ + if (filter_node->parent) { + fputs(left_b, stream); + } + + for (i = 0; (i < node->cls->num_counters) && (i < 64); ++i) { + ucs_stats_counter_t counters_acc = 0; + if (filter_node->counters_bitmask & UCS_BIT(i)) { + ucs_stats_node_t * temp_node; + ucs_list_for_each(temp_node, &filter_node->type_list_head, type_list) { + counters_acc += temp_node->counters[i]; + } + + fprintf(stream, "%*s%s:%s%"PRIu64"%s", + UCS_STATS_INDENT(is_sum, indent + 1), + node->cls->counter_names[i], + space, counters_acc, nl); + + /* Don't print space on last counter */ + if (UCS_STATS_IS_LAST_COUNTER(filter_node->counters_bitmask, i) && + is_sum) { + fputs(" ", stream); + } + } + } + + ucs_list_for_each(filter_child, &filter_node->children, list) { + ucs_stats_serialize_text_recurs_filtered(stream, filter_child, + indent + 1); + } + + if (filter_node->parent) { + /* Root shouldn't be with parent brackets.*/ + fputs(rigth_b, stream); + } else { + /* End report with new line.*/ + fputs("\n", stream); + } + + return UCS_OK; +} + +ucs_status_t ucs_stats_serialize(FILE *stream, ucs_stats_node_t *root, int options) +{ + ucs_stats_children_sel_t sel = + (options & UCS_STATS_SERIALIZE_INACTVIVE) ? + UCS_STATS_INACTIVE_CHILDREN : + UCS_STATS_ACTIVE_CHILDREN; + + if (options & UCS_STATS_SERIALIZE_BINARY) { + return ucs_stats_serialize_binary(stream, root, sel); + } else { + return ucs_stats_serialize_text_recurs_filtered(stream, + root->filter_node, + 0); + } +} + +static ucs_status_t +ucs_stats_deserialize_recurs(FILE *stream, ucs_stats_class_t **classes, + unsigned num_classes, size_t headroom, + ucs_stats_node_t **p_root) +{ + ucs_stats_node_t *node, *child; + ucs_stats_class_t *cls; + uint8_t clsid, namelen; + ucs_status_t status; + void *ptr; + + if (headroom >= UINT_MAX) { + return UCS_ERR_INVALID_PARAM; + } + + if (feof(stream)) { + ucs_error("Error parsing statistics - premature end of stream"); + return UCS_ERR_MESSAGE_TRUNCATED; + } + + FREAD_ONE(&clsid, stream); + if (clsid == UCS_STATS_CLSID_SENTINEL) { + return UCS_ERR_NO_MESSAGE; /* Sentinel */ + } + + if (clsid >= num_classes) { + ucs_error("Error parsing statistics - class id out of range"); + return UCS_ERR_OUT_OF_RANGE; + } + + FREAD_ONE(&namelen, stream); + if (namelen >= UCS_STAT_NAME_MAX) { + ucs_error("Error parsing statistics - node name too long"); + return UCS_ERR_OUT_OF_RANGE; /* Name too long */ + } + + cls = classes[clsid]; + ptr = malloc(headroom + sizeof *node + sizeof(ucs_stats_counter_t) * cls->num_counters); + if (ptr == NULL) { + ucs_error("Failed to allocate statistics counters (headroom %zu, %u counters)", + headroom, cls->num_counters); + return UCS_ERR_NO_MEMORY; + } + + node = UCS_PTR_BYTE_OFFSET(ptr, headroom); + + node->cls = cls; + FREAD(node->name, namelen, stream); + node->name[namelen] = '\0'; + ucs_list_head_init(&node->children[UCS_STATS_INACTIVE_CHILDREN]); + ucs_list_head_init(&node->children[UCS_STATS_ACTIVE_CHILDREN]); + + /* Read counters */ + ucs_stats_read_counters(node->counters, cls->num_counters, stream); + + /* Read children */ + do { + status = ucs_stats_deserialize_recurs(stream, classes, num_classes, 0, + &child); + if (status == UCS_OK) { + ucs_list_add_tail(&node->children[UCS_STATS_ACTIVE_CHILDREN], &child->list); + } else if (status == UCS_ERR_NO_MESSAGE) { + break; /* Sentinel */ + } else { + ucs_error("ucs_stats_deserialize_recurs returned %s", ucs_status_string(status)); + free(ptr); /* Error TODO free previous children */ + return status; + } + } while (1); + + *p_root = node; + return UCS_OK; +} + +static void ucs_stats_free_classes(ucs_stats_class_t **classes, unsigned num_classes) +{ + unsigned i, j; + + for (i = 0; i < num_classes; ++i) { + free((char*)classes[i]->name); + for (j = 0; j < classes[i]->num_counters; ++j) { + free((char*)classes[i]->counter_names[j]); + } + free(classes[i]); + } + free(classes); +} + +ucs_status_t ucs_stats_deserialize(FILE *stream, ucs_stats_node_t **p_root) +{ + ucs_stats_data_header_t hdr; + ucs_stats_root_storage_t *s; + ucs_stats_class_t **classes, *cls; + unsigned i, j, num_counters; + ucs_status_t status; + size_t nread; + char *name; + + nread = fread(&hdr, 1, sizeof(hdr), stream); + if (nread == 0) { + status = UCS_ERR_NO_ELEM; + goto err; + } + + if (hdr.version != 1) { + ucs_error("invalid file version"); + status = UCS_ERR_UNSUPPORTED; + goto err; + } + + if (!(hdr.num_classes < UINT8_MAX)) { + ucs_error("invalid num classes"); + status = UCS_ERR_OUT_OF_RANGE; + goto err; + } + + /* Read classes */ + classes = malloc(hdr.num_classes * sizeof(*classes)); + for (i = 0; i < hdr.num_classes; ++i) { + name = ucs_stats_read_str(stream); + FREAD_ONE(&num_counters, stream); + + /* coverity[tainted_data] */ + cls = malloc(sizeof *cls + num_counters * sizeof(cls->counter_names[0])); + cls->name = name; + cls->num_counters = num_counters; + + /* coverity[tainted_data] */ + for (j = 0; j < cls->num_counters; ++j) { + cls->counter_names[j] = ucs_stats_read_str(stream); + } + classes[i] = cls; + + } + + /* Read nodes */ + status = ucs_stats_deserialize_recurs(stream, classes, hdr.num_classes, + sizeof(ucs_stats_root_storage_t) - sizeof(ucs_stats_node_t), + p_root); + if (status != UCS_OK) { + if (status == UCS_ERR_NO_MESSAGE) { + ucs_error("Error parsing statistics - misplaced sentinel"); + } + goto err_free; + } + + s = ucs_container_of(*p_root, ucs_stats_root_storage_t, node); + s->num_classes = hdr.num_classes; + s->classes = classes; + return UCS_OK; + +err_free: + ucs_stats_free_classes(classes, hdr.num_classes); +err: + return status; +} + +static void ucs_stats_free_recurs(ucs_stats_node_t *node) +{ + ucs_stats_node_t *child, *tmp; + + ucs_list_for_each_safe(child, tmp, &node->children[UCS_STATS_ACTIVE_CHILDREN], list) { + ucs_stats_free_recurs(child); + free(child); + } + ucs_list_for_each_safe(child, tmp, &node->children[UCS_STATS_INACTIVE_CHILDREN], list) { + ucs_stats_free_recurs(child); + free(child); + } +} + +void ucs_stats_free(ucs_stats_node_t *root) +{ + ucs_stats_root_storage_t *s; + + s = ucs_container_of(root, ucs_stats_root_storage_t, node); + ucs_stats_free_recurs(&s->node); + ucs_stats_free_classes(s->classes, s->num_classes); + free(s); +} + diff --git a/src/ucs/stats/stats.c b/src/ucs/stats/stats.c new file mode 100644 index 0000000..29cbde3 --- /dev/null +++ b/src/ucs/stats/stats.c @@ -0,0 +1,783 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "stats.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#ifdef HAVE_LINUX_FUTEX_H +#include +#endif + +const char *ucs_stats_formats_names[] = { + [UCS_STATS_FULL] = "full", + [UCS_STATS_FULL_AGG] = "agg", + [UCS_STATS_SUMMARY] = "summary", + [UCS_STATS_LAST] = NULL +}; + +#if ENABLE_STATS + +enum { + UCS_STATS_FLAG_ON_EXIT = UCS_BIT(0), + UCS_STATS_FLAG_ON_TIMER = UCS_BIT(1), + UCS_STATS_FLAG_ON_SIGNAL = UCS_BIT(2), + + UCS_STATS_FLAG_SOCKET = UCS_BIT(8), + UCS_STATS_FLAG_STREAM = UCS_BIT(9), + UCS_STATS_FLAG_STREAM_CLOSE = UCS_BIT(10), + UCS_STATS_FLAG_STREAM_BINARY = UCS_BIT(11), +}; + +enum { + UCS_ROOT_STATS_RUNTIME, + UCS_ROOT_STATS_LAST +}; + +KHASH_MAP_INIT_STR(ucs_stats_cls, ucs_stats_class_t*) + +typedef struct { + volatile unsigned flags; + + ucs_time_t start_time; + ucs_stats_filter_node_t root_filter_node; + ucs_stats_node_t root_node; + ucs_stats_counter_t root_counters[UCS_ROOT_STATS_LAST]; + + union { + FILE *stream; /* Output stream */ + ucs_stats_client_h client; /* UDP client */ + }; + + union { + int signo; + double interval; + }; + + khash_t(ucs_stats_cls) cls; + + pthread_mutex_t lock; +#ifndef HAVE_LINUX_FUTEX_H + pthread_cond_t cv; +#endif + pthread_t thread; +} ucs_stats_context_t; + +static ucs_stats_context_t ucs_stats_context = { + .flags = 0, + .root_node = {}, + .root_filter_node = {}, + .lock = PTHREAD_MUTEX_INITIALIZER, +#ifndef HAVE_LINUX_FUTEX_H + .cv = PTHREAD_COND_INITIALIZER, +#endif + .thread = (pthread_t)-1 +}; + +static ucs_stats_class_t ucs_stats_root_node_class = { + .name = "", + .num_counters = UCS_ROOT_STATS_LAST, + .counter_names = { + [UCS_ROOT_STATS_RUNTIME] = "runtime" + } +}; + + +#ifdef HAVE_LINUX_FUTEX_H +static inline int +ucs_sys_futex(volatile void *addr1, int op, int val1, struct timespec *timeout, + void *uaddr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, uaddr2, val3); +} +#endif + +static void ucs_stats_clean_node(ucs_stats_node_t *node) { + ucs_stats_filter_node_t * temp_filter_node; + ucs_stats_filter_node_t * filter_node; + + filter_node = node->filter_node; + filter_node->type_list_len--; + temp_filter_node = node->filter_node; + + if (temp_filter_node->ref_count) { + while (temp_filter_node != NULL) { + temp_filter_node->ref_count--; + temp_filter_node = temp_filter_node->parent; + } + } + + if (!filter_node->type_list_len) { + ucs_list_del(&filter_node->list); + } + ucs_list_del(&node->type_list); +} + +static void ucs_stats_free_class(ucs_stats_class_t *cls) +{ + unsigned i; + + for (i = 0; i < cls->num_counters; i++) { + ucs_free((void*)cls->counter_names[i]); + } + + ucs_free((void*)cls->name); + ucs_free(cls); +} + +static ucs_stats_class_t *ucs_stats_dup_class(ucs_stats_class_t *cls) +{ + ucs_stats_class_t *dup; + + dup = ucs_calloc(1, sizeof(*cls) + sizeof(*cls->counter_names) * cls->num_counters, + "ucs_stats_class_dup"); + if (!dup) { + ucs_error("failed to allocate statistics class"); + goto err; + } + + dup->name = ucs_strdup(cls->name, "ucs_stats_class_t name"); + if (!dup->name) { + ucs_error("failed to allocate statistics class name"); + goto err_free; + } + + for (dup->num_counters = 0; dup->num_counters < cls->num_counters; dup->num_counters++) { + dup->counter_names[dup->num_counters] = ucs_strdup(cls->counter_names[dup->num_counters], + "ucs_stats_class_t counter"); + if (!dup->counter_names[dup->num_counters]) { + ucs_error("failed to allocate statistics counter name"); + goto err_free; + } + } + + return dup; + +err_free: + ucs_stats_free_class(dup); +err: + return NULL; +} + +static ucs_stats_class_t *ucs_stats_get_class(ucs_stats_class_t *cls) +{ + ucs_stats_class_t *dup; + khiter_t iter; + int r; + + iter = kh_get(ucs_stats_cls, &ucs_stats_context.cls, cls->name); + if (iter != kh_end(&ucs_stats_context.cls)) { + return kh_val(&ucs_stats_context.cls, iter); + } + + dup = ucs_stats_dup_class(cls); + if (dup == NULL) { + return NULL; + } + + iter = kh_put(ucs_stats_cls, &ucs_stats_context.cls, dup->name, &r); + ucs_assert_always(r != 0); /* initialize a previously empty hash entry */ + kh_val(&ucs_stats_context.cls, iter) = dup; + return dup; +} + +static void ucs_stats_node_remove(ucs_stats_node_t *node, int make_inactive) +{ + ucs_assert(node != &ucs_stats_context.root_node); + + if (!ucs_list_is_empty(&node->children[UCS_STATS_ACTIVE_CHILDREN])) { + ucs_warn("stats node "UCS_STATS_NODE_FMT" still has active children", + UCS_STATS_NODE_ARG(node)); + } + + pthread_mutex_lock(&ucs_stats_context.lock); + + ucs_list_del(&node->list); + if (make_inactive) { + node->cls = ucs_stats_get_class(node->cls); + if (node->cls) { + ucs_list_add_tail(&node->parent->children[UCS_STATS_INACTIVE_CHILDREN], &node->list); + } else { + /* failed to allocate class duplicate - remove node */ + ucs_stats_clean_node(node); + make_inactive = 0; + } + } else { + ucs_stats_clean_node(node); + } + + pthread_mutex_unlock(&ucs_stats_context.lock); + + if (!make_inactive) { + if (!node->filter_node->type_list_len) { + ucs_free(node->filter_node); + } + ucs_free(node); + } +} + +static void ucs_stats_filter_node_init_root() { + ucs_list_head_init(&ucs_stats_context.root_filter_node.list); + ucs_stats_context.root_filter_node.parent = NULL; + ucs_list_head_init(&ucs_stats_context.root_filter_node.type_list_head); + ucs_list_add_tail(&ucs_stats_context.root_filter_node.type_list_head, + &ucs_stats_context.root_node.type_list); + ucs_stats_context.root_filter_node.counters_bitmask = 0; + ucs_stats_context.root_filter_node.ref_count = 0; + ucs_stats_context.root_filter_node.type_list_len = 1; + ucs_list_head_init(&ucs_stats_context.root_filter_node.children); +} + +static void ucs_stats_node_init_root(const char *name, ...) +{ + ucs_status_t status; + va_list ap; + + if (!ucs_stats_is_active()) { + return; + } + + va_start(ap, name); + status = ucs_stats_node_initv(&ucs_stats_context.root_node, + &ucs_stats_root_node_class, name, ap); + ucs_assert_always(status == UCS_OK); + va_end(ap); + + ucs_stats_context.root_node.parent = NULL; + ucs_stats_context.root_node.filter_node = &ucs_stats_context.root_filter_node; + + ucs_stats_filter_node_init_root(); +} + +static ucs_status_t ucs_stats_node_new(ucs_stats_class_t *cls, ucs_stats_node_t **p_node) +{ + ucs_stats_node_t *node; + + node = ucs_malloc(sizeof(ucs_stats_node_t) + + sizeof(ucs_stats_counter_t) * + (cls->num_counters > 0 ? cls->num_counters - 1 : 0), + "stats node"); + if (node == NULL) { + ucs_error("Failed to allocate stats node for %s", cls->name); + return UCS_ERR_NO_MEMORY; + } + + *p_node = node; + return UCS_OK; +} + +static ucs_status_t ucs_stats_filter_node_new(ucs_stats_class_t *cls, ucs_stats_filter_node_t **p_node) +{ + ucs_stats_filter_node_t *node; + + node = ucs_malloc(sizeof(ucs_stats_filter_node_t), + "stats filter node"); + if (node == NULL) { + ucs_error("Failed to allocate stats filter node for %s", cls->name); + return UCS_ERR_NO_MEMORY; + } + + *p_node = node; + return UCS_OK; +} + +static ucs_stats_filter_node_t * ucs_stats_find_class(ucs_stats_filter_node_t *filter_parent, + const char *class_name) { + ucs_stats_filter_node_t *filter_node; + ucs_stats_node_t * node; + + ucs_list_for_each(filter_node, &filter_parent->children, list) { + if (ucs_list_is_empty(&filter_node->type_list_head)) { + ucs_error("type list is empty"); + return NULL; + } + node = ucs_list_head(&filter_node->type_list_head, + ucs_stats_node_t, + type_list); + if (!strcmp(node->cls->name, class_name)) { + return filter_node; + } + } + return NULL; +} + +static void ucs_stats_add_to_filter(ucs_stats_node_t *node, + ucs_stats_filter_node_t * new_filter_node) +{ + ucs_stats_filter_node_t *temp_filter_node; + ucs_stats_filter_node_t *filter_node = NULL; + ucs_stats_filter_node_t *filter_parent; + int found = 0; + int filter_index = 0; + int i; + + if (ucs_global_opts.stats_format == UCS_STATS_SUMMARY) { + filter_parent = &ucs_stats_context.root_filter_node; + } else { + filter_parent = node->parent->filter_node; + } + + if (ucs_global_opts.stats_format != UCS_STATS_FULL) { + filter_node = ucs_stats_find_class(filter_parent, node->cls->name); + } + + if (!filter_node) { + filter_node = new_filter_node; + + filter_node->type_list_len = 0; + filter_node->ref_count = 0; + filter_node->counters_bitmask = 0; + ucs_list_head_init(&filter_node->children); + ucs_list_head_init(&filter_node->type_list_head); + filter_node->parent = filter_parent; + ucs_list_add_tail(&filter_parent->children, &filter_node->list); + } + + filter_node->type_list_len++; + ucs_list_add_tail(&filter_node->type_list_head, &node->type_list); + node->filter_node = filter_node; + + for (i = 0; (i < node->cls->num_counters) && (i < 64); ++i) { + filter_index = ucs_config_names_search(ucs_global_opts.stats_filter, + node->cls->counter_names[i]); + if (filter_index >= 0) { + filter_node->counters_bitmask |= UCS_BIT(i); + found = 1; + } + } + + if (found) { + temp_filter_node = filter_node; + while (temp_filter_node != NULL) { + temp_filter_node->ref_count++; + temp_filter_node = temp_filter_node->parent; + } + } +} + +static ucs_status_t ucs_stats_node_add(ucs_stats_node_t *node, + ucs_stats_node_t *parent, + ucs_stats_filter_node_t *filter_node) +{ + ucs_assert(node != &ucs_stats_context.root_node); + if (parent == NULL) { + return UCS_ERR_INVALID_PARAM; + } + + /* Append node to existing tree */ + pthread_mutex_lock(&ucs_stats_context.lock); + ucs_list_add_tail(&parent->children[UCS_STATS_ACTIVE_CHILDREN], &node->list); + node->parent = parent; + ucs_stats_add_to_filter(node, filter_node); + + pthread_mutex_unlock(&ucs_stats_context.lock); + + return UCS_OK; +} + +ucs_status_t ucs_stats_node_alloc(ucs_stats_node_t** p_node, ucs_stats_class_t *cls, + ucs_stats_node_t *parent, const char *name, ...) +{ + ucs_stats_node_t *node; + ucs_stats_filter_node_t *filter_node; + ucs_status_t status; + va_list ap; + + if (!ucs_stats_is_active()) { + *p_node = NULL; + return UCS_OK; + } + + status = ucs_stats_node_new(cls, &node); + if (status != UCS_OK) { + return status; + } + + va_start(ap, name); + status = ucs_stats_node_initv(node, cls, name, ap); + va_end(ap); + + if (status != UCS_OK) { + ucs_free(node); + return status; + } + + status = ucs_stats_filter_node_new(node->cls, &filter_node); + if (status != UCS_OK) { + ucs_free(node); + return status; + } + + ucs_trace("allocated stats node '"UCS_STATS_NODE_FMT"'", UCS_STATS_NODE_ARG(node)); + + status = ucs_stats_node_add(node, parent, filter_node); + if (status != UCS_OK) { + ucs_free(node); + ucs_free(filter_node); + return status; + } + + if (node->filter_node != filter_node) { + ucs_free(filter_node); + } + + *p_node = node; + return UCS_OK; +} + +void ucs_stats_node_free(ucs_stats_node_t *node) +{ + if (node == NULL) { + return; + } + + ucs_trace("releasing stats node '"UCS_STATS_NODE_FMT"'", UCS_STATS_NODE_ARG(node)); + + /* If we would dump stats in exit, keep this data instead of releasing it */ + if (ucs_stats_context.flags & UCS_STATS_FLAG_ON_EXIT) { + ucs_stats_node_remove(node, 1); + } else { + ucs_stats_node_remove(node, 0); + } +} + +static void __ucs_stats_dump(int inactive) +{ + ucs_status_t status = UCS_OK; + int options; + + /* Assume locked */ + + UCS_STATS_SET_TIME(&ucs_stats_context.root_node, UCS_ROOT_STATS_RUNTIME, + ucs_stats_context.start_time); + + if (ucs_stats_context.flags & UCS_STATS_FLAG_SOCKET) { + status = ucs_stats_client_send(ucs_stats_context.client, + &ucs_stats_context.root_node, + ucs_get_time()); + } + + if (ucs_stats_context.flags & UCS_STATS_FLAG_STREAM) { + options = 0; + if (ucs_stats_context.flags & UCS_STATS_FLAG_STREAM_BINARY) { + options |= UCS_STATS_SERIALIZE_BINARY; + } + if (inactive) { + options |= UCS_STATS_SERIALIZE_INACTVIVE; + } + + status = ucs_stats_serialize(ucs_stats_context.stream, + &ucs_stats_context.root_node, options); + fflush(ucs_stats_context.stream); + } + + if (status != UCS_OK) { + ucs_warn("Failed to dump statistics: %s", ucs_status_string(status)); + } +} + +static void* ucs_stats_thread_func(void *arg) +{ + struct timespec timeout, *ptime; + unsigned flags; + long nsec; + + if (ucs_stats_context.interval > 0) { + nsec = (long)(ucs_stats_context.interval * UCS_NSEC_PER_SEC + 0.5); + timeout.tv_sec = nsec / UCS_NSEC_PER_SEC; + timeout.tv_nsec = nsec % UCS_NSEC_PER_SEC; + ptime = &timeout; + } + else { + ptime = NULL; + } + + /* + * TODO: Switch to use the condvar on all systems, eliminating + * futexes. For now it is kept conditionally to not commit the + * change, runtime-untested on FreeBSD, to working Linux codebase. + */ +#ifdef HAVE_LINUX_FUTEX_H + flags = ucs_stats_context.flags; + while (flags & UCS_STATS_FLAG_ON_TIMER) { + /* Wait for timeout/wakeup */ + ucs_sys_futex(&ucs_stats_context.flags, FUTEX_WAIT, flags, ptime, NULL, 0); + ucs_stats_dump(); + flags = ucs_stats_context.flags; + } +#else + pthread_mutex_lock(&ucs_stats_context.lock); + flags = ucs_stats_context.flags; + while (flags & UCS_STATS_FLAG_ON_TIMER) { + /* Wait for timeout/wakeup */ + pthread_cond_timedwait(&ucs_stats_context.cv, &ucs_stats_context.lock, + ptime); + __ucs_stats_dump(0); + flags = ucs_stats_context.flags; + } + pthread_mutex_unlock(&ucs_stats_context.lock); +#endif + + return NULL; +} + +static void ucs_stats_open_dest() +{ + ucs_status_t status; + char *copy_str, *saveptr; + const char *hostname, *port_str; + const char *next_token; + int need_close; + + copy_str = NULL; + if (!strncmp(ucs_global_opts.stats_dest, "udp:", 4)) { + + copy_str = ucs_strdup(&ucs_global_opts.stats_dest[4], + "statistics dest"); + if (copy_str == NULL) { + return; + } + + saveptr = NULL; + hostname = strtok_r(copy_str, ":", &saveptr); + port_str = strtok_r(NULL, ":", &saveptr); + + if (hostname == NULL) { + ucs_error("Invalid statistics destination format (%s)", ucs_global_opts.stats_dest); + goto out_free; + } + + status = ucs_stats_client_init(hostname, + port_str ? atoi(port_str) : UCS_STATS_DEFAULT_UDP_PORT, + &ucs_stats_context.client); + if (status != UCS_OK) { + goto out_free; + } + + ucs_stats_context.flags |= UCS_STATS_FLAG_SOCKET; + } else if (strcmp(ucs_global_opts.stats_dest, "") != 0) { + status = ucs_open_output_stream(ucs_global_opts.stats_dest, + UCS_LOG_LEVEL_ERROR, + &ucs_stats_context.stream, + &need_close, &next_token); + if (status != UCS_OK) { + goto out_free; + } + + /* File flags */ + ucs_stats_context.flags |= UCS_STATS_FLAG_STREAM; + if (need_close) { + ucs_stats_context.flags |= UCS_STATS_FLAG_STREAM_CLOSE; + } + + /* Optional: Binary mode */ + if (!strcmp(next_token, ":bin")) { + ucs_stats_context.flags |= UCS_STATS_FLAG_STREAM_BINARY; + } + } + +out_free: + ucs_free(copy_str); +} + +static void ucs_stats_close_dest() +{ + if (ucs_stats_context.flags & UCS_STATS_FLAG_SOCKET) { + ucs_stats_context.flags &= ~UCS_STATS_FLAG_SOCKET; + ucs_stats_client_cleanup(ucs_stats_context.client); + } + if (ucs_stats_context.flags & UCS_STATS_FLAG_STREAM) { + fflush(ucs_stats_context.stream); + if (ucs_stats_context.flags & UCS_STATS_FLAG_STREAM_CLOSE) { + fclose(ucs_stats_context.stream); + } + ucs_stats_context.flags &= ~(UCS_STATS_FLAG_STREAM| + UCS_STATS_FLAG_STREAM_BINARY| + UCS_STATS_FLAG_STREAM_CLOSE); + } +} + +static void ucs_stats_dump_sighandler(int signo) +{ + ucs_stats_dump(); +} + +static void ucs_stats_set_trigger() +{ + char *p; + + if (!strcmp(ucs_global_opts.stats_trigger, "exit")) { + ucs_stats_context.flags |= UCS_STATS_FLAG_ON_EXIT; + } else if (!strncmp(ucs_global_opts.stats_trigger, "timer:", 6)) { + p = ucs_global_opts.stats_trigger + 6; + if (!ucs_config_sscanf_time(p, &ucs_stats_context.interval, NULL)) { + ucs_error("Invalid statistics interval time format: %s", p); + return; + } + + ucs_stats_context.flags |= UCS_STATS_FLAG_ON_TIMER; + pthread_create(&ucs_stats_context.thread, NULL, ucs_stats_thread_func, NULL); + } else if (!strncmp(ucs_global_opts.stats_trigger, "signal:", 7)) { + p = ucs_global_opts.stats_trigger + 7; + if (!ucs_config_sscanf_signo(p, &ucs_stats_context.signo, NULL)) { + ucs_error("Invalid statistics signal specification: %s", p); + return; + } + + signal(ucs_stats_context.signo, ucs_stats_dump_sighandler); + ucs_stats_context.flags |= UCS_STATS_FLAG_ON_SIGNAL; + } else if (!strcmp(ucs_global_opts.stats_trigger, "")) { + /* No external trigger */ + } else { + ucs_error("Invalid statistics trigger: %s", ucs_global_opts.stats_trigger); + } +} + +static void ucs_stats_unset_trigger() +{ + void *result; + +#ifdef HAVE_LINUX_FUTEX_H + if (ucs_stats_context.flags & UCS_STATS_FLAG_ON_TIMER) { + ucs_stats_context.flags &= ~UCS_STATS_FLAG_ON_TIMER; + ucs_sys_futex(&ucs_stats_context.flags, FUTEX_WAKE, 1, NULL, NULL, 0); + pthread_join(ucs_stats_context.thread, &result); + } +#else + pthread_mutex_lock(&ucs_stats_context.lock); + if (ucs_stats_context.flags & UCS_STATS_FLAG_ON_TIMER) { + ucs_stats_context.flags &= ~UCS_STATS_FLAG_ON_TIMER; + pthread_cond_broadcast(&ucs_stats_context.cv); + pthread_mutex_unlock(&ucs_stats_context.lock); + pthread_join(ucs_stats_context.thread, &result); + } else { + pthread_mutex_unlock(&ucs_stats_context.lock); + } +#endif + + if (ucs_stats_context.flags & UCS_STATS_FLAG_ON_EXIT) { + ucs_debug("dumping stats"); + __ucs_stats_dump(1); + ucs_stats_context.flags &= ~UCS_STATS_FLAG_ON_EXIT; + } + + if (ucs_stats_context.flags & UCS_STATS_FLAG_ON_SIGNAL) { + ucs_stats_context.flags &= ~UCS_STATS_FLAG_ON_SIGNAL; + signal(ucs_stats_context.signo, SIG_DFL); + } +} + +static void ucs_stats_clean_node_recurs(ucs_stats_node_t *node) +{ + ucs_stats_node_t *child, *tmp; + + if (!ucs_list_is_empty(&node->children[UCS_STATS_ACTIVE_CHILDREN])) { + ucs_warn("stats node "UCS_STATS_NODE_FMT" still has active children", + UCS_STATS_NODE_ARG(node)); + } + + ucs_list_for_each_safe(child, tmp, &node->children[UCS_STATS_INACTIVE_CHILDREN], list) { + ucs_stats_clean_node_recurs(child); + ucs_stats_node_remove(child, 0); + } +} + +void ucs_stats_init() +{ + ucs_assert(ucs_stats_context.flags == 0); + ucs_stats_open_dest(); + + if (!ucs_stats_is_active()) { + ucs_trace("statistics disabled"); + return; + } + + UCS_STATS_START_TIME(ucs_stats_context.start_time); + ucs_stats_node_init_root("%s:%d", ucs_get_host_name(), getpid()); + ucs_stats_set_trigger(); + kh_init_inplace(ucs_stats_cls, &ucs_stats_context.cls); + + ucs_debug("statistics enabled, flags: %c%c%c%c%c%c%c", + (ucs_stats_context.flags & UCS_STATS_FLAG_ON_TIMER) ? 't' : '-', + (ucs_stats_context.flags & UCS_STATS_FLAG_ON_EXIT) ? 'e' : '-', + (ucs_stats_context.flags & UCS_STATS_FLAG_ON_SIGNAL) ? 's' : '-', + (ucs_stats_context.flags & UCS_STATS_FLAG_SOCKET) ? 'u' : '-', + (ucs_stats_context.flags & UCS_STATS_FLAG_STREAM) ? 'f' : '-', + (ucs_stats_context.flags & UCS_STATS_FLAG_STREAM_BINARY) ? 'b' : '-', + (ucs_stats_context.flags & UCS_STATS_FLAG_STREAM_CLOSE) ? 'c' : '-'); +} + +void ucs_stats_cleanup() +{ + ucs_stats_class_t *cls; + + if (!ucs_stats_is_active()) { + return; + } + + ucs_stats_unset_trigger(); + ucs_stats_clean_node_recurs(&ucs_stats_context.root_node); + ucs_stats_close_dest(); + ucs_assert(ucs_stats_context.flags == 0); + + kh_foreach_value(&ucs_stats_context.cls, cls, { + ucs_stats_free_class(cls); + }); + + kh_destroy_inplace(ucs_stats_cls, &ucs_stats_context.cls); +} + +void ucs_stats_dump() +{ + pthread_mutex_lock(&ucs_stats_context.lock); + __ucs_stats_dump(0); + pthread_mutex_unlock(&ucs_stats_context.lock); +} + +int ucs_stats_is_active() +{ + return ucs_stats_context.flags & (UCS_STATS_FLAG_SOCKET|UCS_STATS_FLAG_STREAM); +} + +ucs_stats_node_t * ucs_stats_get_root() { + return &ucs_stats_context.root_node; +} + +#else + +void ucs_stats_init() +{ +} + +void ucs_stats_cleanup() +{ +} + +void ucs_stats_dump() +{ +} + +int ucs_stats_is_active() +{ + return 0; +} + +ucs_stats_node_t *ucs_stats_get_root() +{ + return NULL; +} +#endif diff --git a/src/ucs/stats/stats.h b/src/ucs/stats/stats.h new file mode 100644 index 0000000..0c81dfb --- /dev/null +++ b/src/ucs/stats/stats.h @@ -0,0 +1,116 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCS_STATS_H_ +#define UCS_STATS_H_ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +BEGIN_C_DECLS + +/** @file stats.h */ + +void ucs_stats_init(); +void ucs_stats_cleanup(); +void ucs_stats_dump(); +int ucs_stats_is_active(); +#include "stats_fwd.h" +#if ENABLE_STATS + +#include "libstats.h" + +/** + * Allocate statistics node. + * + * @param p_node Filled with a pointer to new node, or NULL if stats are off. + * @param cls Node class / type. + * @param parent Parent node. + * @param name Node name format. + */ +ucs_status_t ucs_stats_node_alloc(ucs_stats_node_t** p_node, ucs_stats_class_t *cls, + ucs_stats_node_t *parent, const char *name, ...); +void ucs_stats_node_free(ucs_stats_node_t *node); + +#define UCS_STATS_ARG(_arg) , _arg + +#define UCS_STATS_RVAL(_rval) _rval + +#define UCS_STATS_NODE_DECLARE(_node) \ + ucs_stats_node_t* _node; + +#define UCS_STATS_NODE_ALLOC(_p_node, _class, _parent, ...) \ + ucs_stats_node_alloc(_p_node, _class, _parent, ## __VA_ARGS__ , "") + +#define UCS_STATS_NODE_FREE(_node) \ + ucs_stats_node_free(_node) + +#define UCS_STATS_UPDATE_COUNTER(_node, _index, _delta) \ + if (((_delta) != 0) && ((_node) != NULL)) { \ + (_node)->counters[(_index)] += (_delta); \ + } + +#define UCS_STATS_SET_COUNTER(_node, _index, _value) \ + if ((_node) != NULL) { \ + (_node)->counters[(_index)] = (_value); \ + } + +#define UCS_STATS_GET_COUNTER(_node, _index) \ + (((_node) != NULL) ? \ + (_node)->counters[(_index)] : 0) + +#define UCS_STATS_UPDATE_MAX(_node, _index, _value) \ + if ((_node) != NULL) { \ + if ((_node)->counters[(_index)] < (_value)) { \ + (_node)->counters[(_index)] = (_value); \ + } \ + } + +#define UCS_STATS_START_TIME(_start_time) \ + { \ + _start_time = ucs_get_time(); \ + ucs_compiler_fence(); \ + } + +#define UCS_STATS_UPDATE_TIME(_node, _index, _start_time) \ + { \ + ucs_compiler_fence(); \ + UCS_STATS_UPDATE_COUNTER(_node, _index, \ + (long)ucs_time_to_nsec(ucs_get_time() - (_start_time))); \ + } + +#define UCS_STATS_SET_TIME(_node, _index, _start_time) \ + { \ + ucs_compiler_fence(); \ + UCS_STATS_SET_COUNTER(_node, _index, \ + (long)ucs_time_to_nsec(ucs_get_time() - (_start_time))); \ + } + +#else + +#define UCS_STATS_ARG(_arg) +#define UCS_STATS_RVAL(_rval) NULL +#define UCS_STATS_NODE_DECLARE(_node) +#define UCS_STATS_NODE_ALLOC(_p_node, _class, _parent, ...) ucs_empty_function_return_success() +#define UCS_STATS_NODE_FREE(_node) +#define UCS_STATS_UPDATE_COUNTER(_node, _index, _delta) +#define UCS_STATS_SET_COUNTER(_node, _index, _value) +#define UCS_STATS_GET_COUNTER(_node, _index) 0 +#define UCS_STATS_UPDATE_MAX(_node, _index, _value) +#define UCS_STATS_START_TIME(_start_time) +#define UCS_STATS_UPDATE_TIME(_node, _index, _start_time) +#define UCS_STATS_SET_TIME(_node, _index, _start_time) + +#endif + +END_C_DECLS + +#endif diff --git a/src/ucs/stats/stats_fwd.h b/src/ucs/stats/stats_fwd.h new file mode 100644 index 0000000..5dd64c9 --- /dev/null +++ b/src/ucs/stats/stats_fwd.h @@ -0,0 +1,35 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_STATS_FD_H_ +#define UCS_STATS_FD_H_ + +#include + +#include + +BEGIN_C_DECLS + +/** @file stats_fwd.h */ + +typedef uint64_t ucs_stats_counter_t; /* Stats counter*/ +typedef struct ucs_stats_class ucs_stats_class_t; /* Stats class */ +typedef struct ucs_stats_node ucs_stats_node_t; /* Stats node */ +typedef struct ucs_stats_filter_node ucs_stats_filter_node_t; /* Stats filter node */ + +typedef enum { + UCS_STATS_FULL, /* Full statistics report */ + UCS_STATS_FULL_AGG, /* Full statistics report */ + UCS_STATS_SUMMARY, /* Summary statistics report */ + UCS_STATS_LAST +} ucs_stats_formats_t; + +extern const char *ucs_stats_formats_names[]; +ucs_stats_node_t * ucs_stats_get_root(void); + +END_C_DECLS + +#endif /* STATS_FD_H_ */ diff --git a/src/ucs/stats/stats_parser.c b/src/ucs/stats/stats_parser.c new file mode 100644 index 0000000..59b267e --- /dev/null +++ b/src/ucs/stats/stats_parser.c @@ -0,0 +1,52 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "stats.h" + +/* + * Dump binary statistics file to stdout. + * Usage: ucs_stats_parser [ file1 ] [ file2 ] ... + */ + +static ucs_status_t dump_file(const char *filename) +{ + ucs_stats_node_t *root; + ucs_status_t status; + FILE *stream; + + stream = fopen(filename, "rb"); + if (stream == NULL) { + fprintf(stderr, "Could not open %s\n", filename); + return UCS_ERR_IO_ERROR; + } + + while (!feof(stream)) { + status = ucs_stats_deserialize(stream, &root); + if (status != UCS_OK) { + goto out; + } + + ucs_stats_serialize(stdout, root, 0); + ucs_stats_free(root); + } + + status = UCS_OK; + +out: + fclose(stream); + return status; +} + +int main(int argc, char **argv) +{ + int i; + + for (i = 1; i < argc; ++i) { + dump_file(argv[i]); + } + + return 0; +} diff --git a/src/ucs/sys/checker.h b/src/ucs/sys/checker.h new file mode 100644 index 0000000..8fe0fde --- /dev/null +++ b/src/ucs/sys/checker.h @@ -0,0 +1,61 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_CHECKER_H_ +#define UCS_CHECKER_H_ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* + * Valgrind support + */ +#ifndef NVALGRIND +# include +# ifndef VALGRIND_MAKE_MEM_DEFINED +# define VALGRIND_MAKE_MEM_DEFINED(p, n) VALGRIND_MAKE_READABLE(p, n) +# endif +# ifndef VALGRIND_MAKE_MEM_UNDEFINED +# define VALGRIND_MAKE_MEM_UNDEFINED(p, n) VALGRIND_MAKE_WRITABLE(p, n) +# endif +#else +# define VALGRIND_MAKE_MEM_DEFINED(p, n) +# define VALGRIND_MAKE_MEM_UNDEFINED(p, n) +# define VALGRIND_MAKE_MEM_NOACCESS(p, n) +# define VALGRIND_CREATE_MEMPOOL(n,p,x) +# define VALGRIND_DESTROY_MEMPOOL(p) +# define VALGRIND_MEMPOOL_ALLOC(n,p,x) +# define VALGRIND_MEMPOOL_FREE(n,p) +# define VALGRIND_MALLOCLIKE_BLOCK(p,s,r,z) +# define VALGRIND_FREELIKE_BLOCK(p,r) +# define VALGRIND_CHECK_MEM_IS_DEFINED(p, n) ({(uintptr_t)0;}) +# define VALGRIND_COUNT_ERRORS 0 +# define VALGRIND_COUNT_LEAKS(a,b,c,d) { a = b = c = d = 0; } +# define RUNNING_ON_VALGRIND 0 +# define VALGRIND_PRINTF(...) +#endif + + +/* + * BullsEye Code Coverage tool + */ +#if _BullseyeCoverage +#define BULLSEYE_ON 1 +#define BULLSEYE_EXCLUDE_START #pragma BullseyeCoverage off +#define BULLSEYE_EXCLUDE_END #pragma BullseyeCoverage on +#define BULLSEYE_EXCLUDE_BLOCK_START "BullseyeCoverage save off"; +#define BULLSEYE_EXCLUDE_BLOCK_END "BullseyeCoverage restore"; +#else +#define BULLSEYE_ON 0 +#define BULLSEYE_EXCLUDE_START +#define BULLSEYE_EXCLUDE_END +#define BULLSEYE_EXCLUDE_BLOCK_START +#define BULLSEYE_EXCLUDE_BLOCK_END +#endif + + +#endif diff --git a/src/ucs/sys/compiler.h b/src/ucs/sys/compiler.h new file mode 100644 index 0000000..4190dfc --- /dev/null +++ b/src/ucs/sys/compiler.h @@ -0,0 +1,103 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_COMPILER_H_ +#define UCS_COMPILER_H_ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "preprocessor.h" +#include "compiler_def.h" + +#include +#include +#include +#ifdef HAVE_ALLOCA_H +#include +#endif + +#ifndef ULLONG_MAX +#define ULLONG_MAX (__LONG_LONG_MAX__ * 2ULL + 1) +#endif + + +#ifdef __ICC +# pragma warning(disable: 268) +#endif + +/* A function which should not be optimized */ +#if defined(HAVE_ATTRIBUTE_NOOPTIMIZE) && (HAVE_ATTRIBUTE_NOOPTIMIZE == 1) +#define UCS_F_NOOPTIMIZE __attribute__((optimize("O0"))) +#else +#define UCS_F_NOOPTIMIZE +#endif + + +/** + * Copy words from _src to _dst. + * + * @param _dst_type Type to use for destination buffer. + * @param _dst Destination buffer. + * @param _src_type Type to use for source buffer. + * @param _src Source buffer. + * @param _size Number of bytes to copy. + */ +#define UCS_WORD_COPY(_dst_type, _dst, _src_type, _src, _size) \ + { \ + unsigned i; \ + UCS_STATIC_ASSERT(sizeof(_src_type) == sizeof(_dst_type)); \ + for (i = 0; i < (_size) / sizeof(_src_type); ++i) { \ + *((_dst_type*)(_dst) + i) = *((_src_type*)(_src) + i); \ + } \ + } + +#define UCS_ALLOCA_MAX_SIZE 1200 + +/** + * alloca which makes sure the size is small enough. + */ +#define ucs_alloca(_size) \ + ({ \ + ucs_assertv((_size) <= UCS_ALLOCA_MAX_SIZE, "alloca(%zu)", (size_t)(_size)); \ + alloca(_size); \ + }) + +/** + * suppress unaligned pointer warning + */ +#define ucs_unaligned_ptr(_ptr) ({void *_p = (void*)(_ptr); _p;}) + + +/** + * Define cache-line padding variable inside a structure + * + * @param ... List of types, of the variables which should be padded to cache line. + */ +#define UCS_CACHELINE_PADDING(...) \ + char UCS_PP_APPEND_UNIQUE_ID(pad)[UCS_SYS_CACHE_LINE_SIZE - \ + UCS_CACHELINE_PADDING_MISALIGN(__VA_ARGS__)] +#define UCS_CACHELINE_PADDING_SIZEOF(_, _x) \ + + sizeof(_x) +#define UCS_CACHELINE_PADDING_MISALIGN(...) \ + ((UCS_PP_FOREACH(UCS_CACHELINE_PADDING_SIZEOF, _, __VA_ARGS__)) % UCS_SYS_CACHE_LINE_SIZE) + + +/* + * Define code which runs at global constructor phase + */ +#define UCS_STATIC_INIT \ + static void UCS_F_CTOR UCS_PP_APPEND_UNIQUE_ID(ucs_initializer_ctor)() + + +/* + * Define code which runs at global destructor phase + */ +#define UCS_STATIC_CLEANUP \ + static void UCS_F_DTOR UCS_PP_APPEND_UNIQUE_ID(ucs_initializer_dtor)() + +#endif diff --git a/src/ucs/sys/compiler_def.h b/src/ucs/sys/compiler_def.h new file mode 100644 index 0000000..aaff61d --- /dev/null +++ b/src/ucs/sys/compiler_def.h @@ -0,0 +1,184 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCS_COMPILER_DEF_H +#define UCS_COMPILER_DEF_H + +/* Note: Place "@file .h" after BEGIN_C_DECS + * to avoid bugs in a documentation */ +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif + +/* + * Assertions which are checked in compile-time + * + * Usage: UCS_STATIC_ASSERT(condition) + */ +#define UCS_STATIC_ASSERT(_cond) \ + switch(0) {case 0:case (_cond):;} + +/* Aliasing structure */ +#define UCS_S_MAY_ALIAS __attribute__((may_alias)) + +/* A function without side effects */ +#define UCS_F_PURE __attribute__((pure)) + +/* A function which does not return */ +#define UCS_F_NORETURN __attribute__((noreturn)) + +/* Packed structure */ +#define UCS_S_PACKED __attribute__((packed)) + +/* Avoid inlining the function */ +#define UCS_F_NOINLINE __attribute__ ((noinline)) + +/* Shared library constructor and destructor */ +#define UCS_F_CTOR __attribute__((constructor)) +#define UCS_F_DTOR __attribute__((destructor)) + +/* Silence "defined but not used" error for static function */ +#define UCS_F_MAYBE_UNUSED __attribute__((used)) + +/* Always inline the function */ +#ifdef __GNUC__ +#define UCS_F_ALWAYS_INLINE inline __attribute__ ((always_inline)) +#else +#define UCS_F_ALWAYS_INLINE inline +#endif + +/* Silence "uninitialized variable" for stupid compilers (gcc 4.1) + * which can't optimize properly. + */ +#if (((__GNUC__ == 4) && (__GNUC_MINOR__ == 1)) || !defined(__OPTIMIZE__)) +# define UCS_V_INITIALIZED(_v) (_v = (typeof(_v))0) +#else +# define UCS_V_INITIALIZED(_v) ((void)0) +#endif + +/* The i-th bit */ +#define UCS_BIT(i) (1ul << (i)) + +/* Mask of bits 0..i-1 */ +#define UCS_MASK(i) (UCS_BIT(i) - 1) + +/* + * Enable compiler checks for printf-like formatting. + * + * @param fmtargN number of formatting argument + * @param vargN number of variadic argument + */ +#define UCS_F_PRINTF(fmtargN, vargN) __attribute__((format(printf, fmtargN, vargN))) + +/* Unused variable */ +#define UCS_V_UNUSED __attribute__((unused)) + +/* Aligned variable */ +#define UCS_V_ALIGNED(_align) __attribute__((aligned(_align))) + +/* Used for labels */ +#define UCS_EMPTY_STATEMENT {} + +/* Helper macro for address arithmetic in bytes */ +#define UCS_PTR_BYTE_OFFSET(_ptr, _offset) \ + ((void *)((intptr_t)(_ptr) + (intptr_t)(_offset))) + +/* Helper macro to calculate an address with offset equal to size of _type */ +#define UCS_PTR_TYPE_OFFSET(_ptr, _type) \ + ((void *)((typeof(_type) *)(_ptr) + 1)) + +/* Helper macro to calculate ptr difference (_end - _start) */ +#define UCS_PTR_BYTE_DIFF(_start, _end) \ + ((ptrdiff_t)((uintptr_t)(_end) - (uintptr_t)(_start))) + + +/** + * Size of statically-declared array + */ +#define ucs_static_array_size(_array) \ + ({ \ + UCS_STATIC_ASSERT((void*)&(_array) == (void*)&((_array)[0])); \ + ( sizeof(_array) / sizeof((_array)[0]) ); \ + }) + +/** + * @return count of elements in const-size array + */ +#define ucs_array_size(_array) \ + (sizeof(_array) / sizeof((_array)[0])) + +/** + * @return Offset of _member in _type. _type is a structure type. + */ +#define ucs_offsetof(_type, _member) \ + ((unsigned long)&( ((_type*)0)->_member )) + +/** + * Get a pointer to a struct containing a member. + * + * @param __ptr Pointer to the member. + * @param type Container type. + * @param member Element member inside the container. + + * @return Address of the container structure. + */ +#define ucs_container_of(_ptr, _type, _member) \ + ( (_type*)( (char*)(void*)(_ptr) - ucs_offsetof(_type, _member) ) ) + + +/** + * @return Address of a derived structure. It must have a "super" member at offset 0. + * NOTE: we use the built-in offsetof here because we can't use ucs_offsetof() in + * a constant expression. + */ +#define ucs_derived_of(_ptr, _type) \ + ({\ + UCS_STATIC_ASSERT(offsetof(_type, super) == 0) \ + ucs_container_of(_ptr, _type, super); \ + }) + +/** + * @param _type Structure type. + * @param _field Field of structure. + * + * @return Size of _field in _type. + */ +#define ucs_field_sizeof(_type, _field) \ + sizeof(((_type*)0)->_field) + +/** + * @param _type Structure type. + * @param _field Field of structure. + * + * @return Type of _field in _type. + */ +#define ucs_field_type(_type, _field) \ + typeof(((_type*)0)->_field) + +/** + * Prevent compiler from reordering instructions + */ +#define ucs_compiler_fence() asm volatile(""::: "memory") + +/** + * Prefetch cache line + */ +#define ucs_prefetch(p) __builtin_prefetch(p) + +/* Branch prediction */ +#define ucs_likely(x) __builtin_expect(x, 1) +#define ucs_unlikely(x) __builtin_expect(x, 0) + +/* Check if an expression is a compile-time constant */ +#define ucs_is_constant(expr) __builtin_constant_p(expr) + +#endif /* UCS_COMPILER_DEF_H */ diff --git a/src/ucs/sys/event_set.c b/src/ucs/sys/event_set.c new file mode 100644 index 0000000..06de682 --- /dev/null +++ b/src/ucs/sys/event_set.c @@ -0,0 +1,234 @@ +/** + * Copyright (C) Hiroyuki Sato. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "event_set.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +enum { + UCS_SYS_EVENT_SET_EXTERNAL_EVENT_FD = UCS_BIT(0), +}; + +struct ucs_sys_event_set { + int event_fd; + unsigned flags; +}; + +const unsigned ucs_sys_event_set_max_wait_events = + UCS_ALLOCA_MAX_SIZE / sizeof(struct epoll_event); + + +static inline int ucs_event_set_map_to_raw_events(int events) +{ + int raw_events = 0; + + if (events & UCS_EVENT_SET_EVREAD) { + raw_events |= EPOLLIN; + } + if (events & UCS_EVENT_SET_EVWRITE) { + raw_events |= EPOLLOUT; + } + if (events & UCS_EVENT_SET_EVERR) { + raw_events |= EPOLLERR; + } + if (events & UCS_EVENT_SET_EDGE_TRIGGERED) { + raw_events |= EPOLLET; + } + return raw_events; +} + +static inline int ucs_event_set_map_to_events(int raw_events) +{ + int events = 0; + + if (raw_events & EPOLLIN) { + events |= UCS_EVENT_SET_EVREAD; + } + if (raw_events & EPOLLOUT) { + events |= UCS_EVENT_SET_EVWRITE; + } + if (raw_events & EPOLLERR) { + events |= UCS_EVENT_SET_EVERR; + } + if (raw_events & EPOLLET) { + events |= UCS_EVENT_SET_EDGE_TRIGGERED; + } + return events; +} + +static ucs_sys_event_set_t *ucs_event_set_alloc(int event_fd, unsigned flags) +{ + ucs_sys_event_set_t *event_set; + + event_set = ucs_malloc(sizeof(ucs_sys_event_set_t), "ucs_sys_event_set"); + if (event_set == NULL) { + ucs_error("unable to allocate memory ucs_sys_event_set_t object"); + return NULL; + } + + event_set->flags = flags; + event_set->event_fd = event_fd; + return event_set; +} + +ucs_status_t ucs_event_set_create_from_fd(ucs_sys_event_set_t **event_set_p, + int event_fd) +{ + *event_set_p = ucs_event_set_alloc(event_fd, + UCS_SYS_EVENT_SET_EXTERNAL_EVENT_FD); + if (*event_set_p == NULL) { + return UCS_ERR_NO_MEMORY; + } + + return UCS_OK; +} + +ucs_status_t ucs_event_set_create(ucs_sys_event_set_t **event_set_p) +{ + ucs_status_t status; + int event_fd; + + /* Create epoll set the thread will wait on */ + event_fd = epoll_create(1); + if (event_fd < 0) { + ucs_error("epoll_create() failed: %m"); + return UCS_ERR_IO_ERROR; + } + + *event_set_p = ucs_event_set_alloc(event_fd, 0); + if (*event_set_p == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_close_event_fd; + } + + return UCS_OK; + +err_close_event_fd: + close(event_fd); + return status; +} + +ucs_status_t ucs_event_set_add(ucs_sys_event_set_t *event_set, int fd, + ucs_event_set_type_t events, void *callback_data) +{ + struct epoll_event raw_event; + int ret; + + memset(&raw_event, 0, sizeof(raw_event)); + raw_event.events = ucs_event_set_map_to_raw_events(events); + raw_event.data.ptr = callback_data; + + ret = epoll_ctl(event_set->event_fd, EPOLL_CTL_ADD, fd, &raw_event); + if (ret < 0) { + ucs_error("epoll_ctl(event_fd=%d, ADD, fd=%d) failed: %m", + event_set->event_fd, fd); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +ucs_status_t ucs_event_set_mod(ucs_sys_event_set_t *event_set, int fd, + ucs_event_set_type_t events, void *callback_data) +{ + struct epoll_event raw_event; + int ret; + + memset(&raw_event, 0, sizeof(raw_event)); + raw_event.events = ucs_event_set_map_to_raw_events(events); + raw_event.data.ptr = callback_data; + + ret = epoll_ctl(event_set->event_fd, EPOLL_CTL_MOD, fd, &raw_event); + if (ret < 0) { + ucs_error("epoll_ctl(event_fd=%d, MOD, fd=%d) failed: %m", + event_set->event_fd, fd); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +ucs_status_t ucs_event_set_del(ucs_sys_event_set_t *event_set, int fd) +{ + int ret; + + ret = epoll_ctl(event_set->event_fd, EPOLL_CTL_DEL, fd, NULL); + if (ret < 0) { + ucs_error("epoll_ctl(event_fd=%d, DEL, fd=%d) failed: %m", + event_set->event_fd, fd); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +ucs_status_t ucs_event_set_wait(ucs_sys_event_set_t *event_set, + unsigned *num_events, int timeout_ms, + ucs_event_set_handler_t event_set_handler, + void *arg) +{ + struct epoll_event *events; + int nready, i, io_events; + + ucs_assert(event_set_handler != NULL); + ucs_assert(num_events != NULL); + ucs_assert(*num_events <= ucs_sys_event_set_max_wait_events); + + events = ucs_alloca(sizeof(*events) * *num_events); + + nready = epoll_wait(event_set->event_fd, events, *num_events, timeout_ms); + if (ucs_unlikely(nready < 0)) { + *num_events = 0; + if (errno == EINTR) { + return UCS_INPROGRESS; + } + ucs_error("epoll_wait() failed: %m"); + return UCS_ERR_IO_ERROR; + } + + ucs_assert(nready <= *num_events); + ucs_trace_poll("epoll_wait(event_fd=%d, num_events=%u, timeout=%d) " + "returned %u", + event_set->event_fd, *num_events, timeout_ms, nready); + + for (i = 0; i < nready; i++) { + io_events = ucs_event_set_map_to_events(events[i].events); + event_set_handler(events[i].data.ptr, io_events, arg); + } + + *num_events = nready; + return UCS_OK; +} + +void ucs_event_set_cleanup(ucs_sys_event_set_t *event_set) +{ + if (!(event_set->flags & UCS_SYS_EVENT_SET_EXTERNAL_EVENT_FD)) { + close(event_set->event_fd); + } + ucs_free(event_set); +} + +ucs_status_t ucs_event_set_fd_get(ucs_sys_event_set_t *event_set, + int *event_fd_p) +{ + ucs_assert(event_set != NULL); + *event_fd_p = event_set->event_fd; + return UCS_OK; +} diff --git a/src/ucs/sys/event_set.h b/src/ucs/sys/event_set.h new file mode 100644 index 0000000..b333d20 --- /dev/null +++ b/src/ucs/sys/event_set.h @@ -0,0 +1,141 @@ +/** + * Copyright (C) Hiroyuki Sato. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_EVENT_SET_H +#define UCS_EVENT_SET_H + +#include + +/** + * ucs_sys_event_set_t structure used in ucs_event_set_XXX functions. + * + */ +typedef struct ucs_sys_event_set ucs_sys_event_set_t; + + +/** + * ucs_event_set_handler call this handler for notifying event + * + * @param [in] callback_data User data which set in ucs_event_set_add(). + * @param [in] event Detection event. Sets of ucs_event_set_type_t. + * @param [in] arg User data which set in ucs_event_set_wait(). + * + */ +typedef void (*ucs_event_set_handler_t)(void *callback_data, int event, + void *arg); + +/** + * ucs_event_set_type_t member is a bit set composed using the following + * available event types + */ +typedef enum { + UCS_EVENT_SET_EVREAD = UCS_BIT(0), + UCS_EVENT_SET_EVWRITE = UCS_BIT(1), + UCS_EVENT_SET_EVERR = UCS_BIT(2), + UCS_EVENT_SET_EDGE_TRIGGERED = UCS_BIT(3) +} ucs_event_set_type_t; + +/* The maximum possible number of events based on system constraints */ +extern const unsigned ucs_sys_event_set_max_wait_events; + +/** + * Allocate ucs_sys_event_set_t structure and assign provided file + * descriptor to wait for events on. + * + * @param [out] event_set_p Event set pointer to initialize. + * @param [in] event_fd File descriptor to wait for events on. + * + * @return UCS_OK on success or an error code on failure. + */ +ucs_status_t ucs_event_set_create_from_fd(ucs_sys_event_set_t **event_set_p, + int event_fd); + +/** + * Allocate ucs_sys_event_set_t structure. + * + * @param [out] event_set_p Event set pointer to initialize. + * + * @return UCS_OK on success or an error code on failure. + */ +ucs_status_t ucs_event_set_create(ucs_sys_event_set_t **event_set_p); + +/** + * Register the target event. + * + * @param [in] event_set_p Event set pointer to initialize. + * @param [in] fd Register the target file descriptor fd. + * @param [in] events Operation events. + * @param [in] callback_data ucs_event_set_handler_t accepts this data. + * + * @return UCS_OK on success or an error code on failure. + */ +ucs_status_t ucs_event_set_add(ucs_sys_event_set_t *event_set, int fd, + ucs_event_set_type_t events, + void *callback_data); + +/** + * Modify the target event. + * + * @param [in] event_set Event set created by ucs_event_set_create. + * @param [in] fd Register the target file descriptor fd. + * @param [in] events Operation events. + * @param [in] callback_data ucs_event_set_handler_t accepts this data. + * + * @return UCS_OK on success or an error code on failure. + */ +ucs_status_t ucs_event_set_mod(ucs_sys_event_set_t *event_set, int fd, + ucs_event_set_type_t events, + void *callback_data); + +/** + * Remove the target event. + * + * @param [in] event_set Event set created by ucs_event_set_create. + * @param [in] fd Register the target file descriptor fd. + * + * @return UCS_OK on success or an error code on failure. + */ +ucs_status_t ucs_event_set_del(ucs_sys_event_set_t *event_set, int fd); + +/** + * Wait for an I/O events + * + * @param [in] event_set Event set created by ucs_event_set_create. + * @param [in/out] num_events Number of expected/read events. + * @param [in] timeout_ms Timeout period in ms. + * @param [in] event_set_handler Callback functions. + * @param [in] arg User data variables. + * + * @return return UCS_OK on success, UCS_INPROGRESS - call was interrupted by a + * signal handler, UCS_ERR_IO_ERROR - an error occurred during waiting + * for I/O events. + */ +ucs_status_t ucs_event_set_wait(ucs_sys_event_set_t *event_set, + unsigned *num_events, int timeout_ms, + ucs_event_set_handler_t event_set_handler, + void *arg); + +/** + * Cleanup event set + * + * @param [in] event_set Event set created by ucs_event_set_create. + * + */ +void ucs_event_set_cleanup(ucs_sys_event_set_t *event_set); + +/** + * Get file descriptor for watching events. + * + * @param [in] event_set Event set created by ucs_event_set_create. + * @param [out] event_fd_p File descriptor that is used by Event set to wait + * for events on. + * + * @return UCS_OK on success or an error code on failure. + */ +ucs_status_t ucs_event_set_fd_get(ucs_sys_event_set_t *event_set, + int *event_fd_p); + +#endif diff --git a/src/ucs/sys/init.c b/src/ucs/sys/init.c new file mode 100644 index 0000000..64ad045 --- /dev/null +++ b/src/ucs/sys/init.c @@ -0,0 +1,104 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* run-time CPU detection */ +static UCS_F_NOOPTIMIZE void ucs_check_cpu_flags(void) +{ + char str[256]; + char *p_str; + int cpu_flags; + struct { + const char* flag; + ucs_cpu_flag_t value; + } *p_flags, + cpu_flags_array[] = { + { "cmov", UCS_CPU_FLAG_CMOV }, + { "mmx", UCS_CPU_FLAG_MMX }, + { "mmx2", UCS_CPU_FLAG_MMX2 }, + { "sse", UCS_CPU_FLAG_SSE }, + { "sse2", UCS_CPU_FLAG_SSE2 }, + { "sse3", UCS_CPU_FLAG_SSE3 }, + { "ssse3", UCS_CPU_FLAG_SSSE3 }, + { "sse41", UCS_CPU_FLAG_SSE41 }, + { "sse42", UCS_CPU_FLAG_SSE42 }, + { "avx", UCS_CPU_FLAG_AVX }, + { "avx2", UCS_CPU_FLAG_AVX2 }, + { NULL, UCS_CPU_FLAG_UNKNOWN }, + }; + + cpu_flags = ucs_arch_get_cpu_flag(); + if (UCS_CPU_FLAG_UNKNOWN == cpu_flags) { + return ; + } + strncpy(str, UCS_PP_MAKE_STRING(CPU_FLAGS), sizeof(str) - 1); + + p_str = strtok(str, " |\t\n\r"); + while (p_str) { + p_flags = cpu_flags_array; + while (p_flags && p_flags->flag) { + if (!strcmp(p_str, p_flags->flag)) { + if (!(cpu_flags & p_flags->value)) { + fprintf(stderr, "[%s:%d] FATAL: UCX library was compiled with %s" + " but CPU does not support it.\n", + ucs_get_host_name(), getpid(), p_flags->flag); + exit(1); + } + break; + } + p_flags++; + } + if (NULL == p_flags->flag) { + fprintf(stderr, "[%s:%d] FATAL: UCX library was compiled with %s" + " but CPU does not support it.\n", + ucs_get_host_name(), getpid(), p_str); + exit(1); + } + p_str = strtok(NULL, " |\t\n\r"); + } +} + +static void UCS_F_CTOR ucs_init() +{ + ucs_check_cpu_flags(); + ucs_log_early_init(); /* Must be called before all others */ + ucs_global_opts_init(); + ucs_cpu_init(); + ucs_log_init(); +#if ENABLE_STATS + ucs_stats_init(); +#endif + ucs_memtrack_init(); + ucs_debug_init(); + ucs_profile_global_init(); + ucs_async_global_init(); + ucs_debug("%s loaded at 0x%lx", ucs_debug_get_lib_path(), + ucs_debug_get_lib_base_addr()); + ucs_debug("cmd line: %s", ucs_get_process_cmdline()); +} + +static void UCS_F_DTOR ucs_cleanup(void) +{ + ucs_async_global_cleanup(); + ucs_profile_global_cleanup(); + ucs_debug_cleanup(0); + ucs_memtrack_cleanup(); +#if ENABLE_STATS + ucs_stats_cleanup(); +#endif + ucs_log_cleanup(); +} diff --git a/src/ucs/sys/iovec.c b/src/ucs/sys/iovec.c new file mode 100644 index 0000000..b9a0c02 --- /dev/null +++ b/src/ucs/sys/iovec.c @@ -0,0 +1,106 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#include + +#include +#include +/* Need this to get IOV_MAX on some platforms. */ +#ifndef __need_IOV_MAX +#define __need_IOV_MAX +#endif +#include + + +size_t ucs_iov_copy(const struct iovec *iov, size_t iov_cnt, + size_t iov_offset, void *buf, size_t max_copy, + ucs_iov_copy_direction_t dir) +{ + size_t copied = 0; + char *iov_buf; + size_t i, len; + + for (i = 0; (i < iov_cnt) && max_copy; i++) { + len = iov[i].iov_len; + + if (iov_offset > len) { + iov_offset -= len; + continue; + } + + iov_buf = UCS_PTR_BYTE_OFFSET(iov[i].iov_base, iov_offset); + len -= iov_offset; + + len = ucs_min(len, max_copy); + if (dir == UCS_IOV_COPY_FROM_BUF) { + memcpy(iov_buf, UCS_PTR_BYTE_OFFSET(buf, copied), len); + } else if (dir == UCS_IOV_COPY_TO_BUF) { + memcpy(UCS_PTR_BYTE_OFFSET(buf, copied), iov_buf, len); + } + + iov_offset = 0; + max_copy -= len; + copied += len; + } + + return copied; +} + +void ucs_iov_advance(struct iovec *iov, size_t iov_cnt, + size_t *cur_iov_idx, size_t consumed) +{ + size_t i; + + ucs_assert(*cur_iov_idx <= iov_cnt); + + for (i = *cur_iov_idx; i < iov_cnt; i++) { + if (consumed < iov[i].iov_len) { + iov[i].iov_len -= consumed; + iov[i].iov_base = UCS_PTR_BYTE_OFFSET(iov[i].iov_base, + consumed); + *cur_iov_idx = i; + return; + } + + consumed -= iov[i].iov_len; + iov[i].iov_base = UCS_PTR_BYTE_OFFSET(iov[i].iov_base, + iov[i].iov_len); + iov[i].iov_len = 0; + } + + ucs_assert(!consumed && (i == iov_cnt)); +} + +size_t ucs_iov_get_max() +{ + static int max_iov = -1; + +#ifdef _SC_IOV_MAX + if (max_iov != -1) { + return max_iov; + } + + max_iov = sysconf(_SC_IOV_MAX); + if (max_iov != -1) { + return max_iov; + } + /* if unable to get value from sysconf(), + * use a predefined value */ +#endif + +#if defined(IOV_MAX) + max_iov = IOV_MAX; +#elif defined(UIO_MAXIOV) + max_iov = UIO_MAXIOV; +#else + /* The value is used as a fallback when system value is not available. + * The latest kernels define it as 1024 */ + max_iov = 1024; +#endif + + return max_iov; +} diff --git a/src/ucs/sys/iovec.h b/src/ucs/sys/iovec.h new file mode 100644 index 0000000..c9a4c72 --- /dev/null +++ b/src/ucs/sys/iovec.h @@ -0,0 +1,64 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_IOVEC_H +#define UCS_IOVEC_H + +#include + +#include +#include + +BEGIN_C_DECLS + +/* A direction for copying a data to/from an array of iovec elements */ +typedef enum ucs_iov_copy_direction { + UCS_IOV_COPY_TO_BUF, + UCS_IOV_COPY_FROM_BUF +} ucs_iov_copy_direction_t; + + +/** + * Copy a data from iovec [buffer] to buffer [iovec]. + * + * @param [in] iov A pointer to an array of iovec elements. + * @param [in] iov_cnt A number of elements in a iov array. + * @param [in] iov_offset An offset in a iov array. + * @param [in] buf A buffer that should be used for copying a data. + * @param [in] max_copye A maximum amount of data that should be copied. + * @param [in] dir Direction that specifies destination and source. + * + * @return The amount, in bytes, of the data that was copied. + */ +size_t ucs_iov_copy(const struct iovec *iov, size_t iov_cnt, + size_t iov_offset, void *buf, size_t max_copy, + ucs_iov_copy_direction_t dir); + +/** + * Update an array of iovec elements to consider an already consumed data. + * + * @param [in] iov A pointer to an array of iovec elements. + * @param [in] iov_cnt A number of elements in a iov array. + * @param [in/out] cur_iov_idx A pointer to an index in a iov array from + * which the operation should be started. + * @param [in] consumed An amount of data consumed that should be + * considered in a current iov array. + */ +void ucs_iov_advance(struct iovec *iov, size_t iov_cnt, + size_t *cur_iov_idx, size_t consumed); + +/** + * Returns the maximum possible value for the number of IOVs. + * It maybe either value from the system configuration or IOV_MAX + * value or UIO_MAXIOV value or 1024 if nothing is defined. + * + * @return The maximum number of IOVs. + */ +size_t ucs_iov_get_max(); + +END_C_DECLS + +#endif diff --git a/src/ucs/sys/math.c b/src/ucs/sys/math.c new file mode 100644 index 0000000..d127f63 --- /dev/null +++ b/src/ucs/sys/math.c @@ -0,0 +1,26 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2012. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "math.h" + +static uint64_t ucs_large_primes[] = { + 14476643271716824181ull, 12086978239110065677ull, + 15386586898367453843ull, 17958312454893560653ull, + + 32416188191ull, 32416188793ull, + 32416189381ull, 32416190071ull, + + 9929050057ull, 9929050081ull, 9929050097ull, 9929050111ull, + 9929050121ull, 9929050133ull, 9929050139ull, 9929050163ull, + 9929050207ull, 9929050217ull, 9929050249ull, 9929050253ull +}; + +uint64_t ucs_get_prime(unsigned index) +{ + static const unsigned num_primes = sizeof(ucs_large_primes) / sizeof(ucs_large_primes[0]); + + return ucs_large_primes[index % num_primes]; +} diff --git a/src/ucs/sys/math.h b/src/ucs/sys/math.h new file mode 100644 index 0000000..6ebd179 --- /dev/null +++ b/src/ucs/sys/math.h @@ -0,0 +1,162 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCS_MATH_H +#define UCS_MATH_H + +#include "compiler_def.h" + +#include +#include +#include + +BEGIN_C_DECLS + +/** @file math.h */ + +#define UCS_KBYTE (1ull << 10) +#define UCS_MBYTE (1ull << 20) +#define UCS_GBYTE (1ull << 30) +#define UCS_TBYTE (1ull << 40) + +#define ucs_min(_a, _b) \ +({ \ + typeof(_a) __a = (_a); \ + typeof(_b) __b = (_b); \ + (__a < __b) ? __a : __b; \ +}) + +#define ucs_max(_a, _b) \ +({ \ + typeof(_a) __a = (_a); \ + typeof(_b) __b = (_b); \ + (__a > __b) ? __a : __b; \ +}) + +#define ucs_is_pow2_or_zero(_n) \ + !((_n) & ((_n) - 1)) + +#define ucs_is_pow2(_n) \ + (((_n) > 0) && ucs_is_pow2_or_zero(_n)) + +#define ucs_padding(_n, _alignment) \ + ( ((_alignment) - (_n) % (_alignment)) % (_alignment) ) + +#define ucs_align_down(_n, _alignment) \ + ( (_n) - ((_n) % (_alignment)) ) + +#define ucs_align_up(_n, _alignment) \ + ( (_n) + ucs_padding(_n, _alignment) ) + +#define ucs_align_down_pow2(_n, _alignment) \ + ( (_n) & ~((_alignment) - 1) ) + +#define ucs_align_up_pow2(_n, _alignment) \ + ucs_align_down_pow2((_n) + (_alignment) - 1, _alignment) + +#define ucs_align_down_pow2_ptr(_ptr, _alignment) \ + ((typeof(_ptr))ucs_align_down_pow2((uintptr_t)(_ptr), (_alignment))) + +#define ucs_align_up_pow2_ptr(_ptr, _alignment) \ + ((typeof(_ptr))ucs_align_up_pow2((uintptr_t)(_ptr), (_alignment))) + +#define ucs_roundup_pow2(_n) \ + ({ \ + typeof(_n) pow2; \ + ucs_assert((_n) >= 1); \ + for (pow2 = 1; pow2 < (_n); pow2 <<= 1); \ + pow2; \ + }) + +#define ucs_signum(_n) \ + (((_n) > (typeof(_n))0) - ((_n) < (typeof(_n))0)) + +#define ucs_roundup_pow2_or0(_n) \ + ( ((_n) == 0) ? 0 : ucs_roundup_pow2(_n) ) + +/* Return values: 0 - aligned, non-0 - unaligned */ +#define ucs_check_if_align_pow2(_n, _p) ((_n) & ((_p) - 1)) + +/* Return values: off-set from the alignment */ +#define ucs_padding_pow2(_n, _p) ucs_check_if_align_pow2(_n, _p) + +#define UCS_MASK_SAFE(_i) \ + (((_i) >= 64) ? ((uint64_t)(-1)) : UCS_MASK(_i)) + +#define ucs_div_round_up(_n, _d) \ + (((_n) + (_d) - 1) / (_d)) + +static inline double ucs_log2(double x) +{ + return log(x) / log(2.0); +} + +/** + * Convert flags without a branch + * @return '_newflag' if '_oldflag' is set in '_value', otherwise - 0 + */ +#define ucs_convert_flag(_value, _oldflag, _newflag) \ + ({ \ + UCS_STATIC_ASSERT(ucs_is_constant(_oldflag)); \ + UCS_STATIC_ASSERT(ucs_is_constant(_newflag)); \ + UCS_STATIC_ASSERT(ucs_is_pow2(_oldflag)); \ + UCS_STATIC_ASSERT(ucs_is_pow2(_newflag)); \ + (((_value) & (_oldflag)) ? (_newflag) : 0); \ + }) + + +/** + * Test if a value is one of a specified list of values, assuming all possible + * values are powers of 2. + */ +#define __ucs_test_flags(__value, __f1, __f2, __f3, __f4, __f5, __f6, __f7, __f8, __f9, ...) \ + (__value & ((__f1) | (__f2) | (__f3) | (__f4) | (__f5) | (__f6) | (__f7) | (__f8) | (__f9))) +#define ucs_test_flags(__value, ...) \ + __ucs_test_flags((__value), __VA_ARGS__, 0, 0, 0, 0, 0, 0, 0, 0, 0) + +/* + * Check if all given flags are on + */ +#define ucs_test_all_flags(__value, __mask) \ + ( ((__value) & (__mask)) == (__mask) ) + +/** + * Compare unsigned numbers which can wrap-around, assuming the wrap-around + * distance can be at most the maximal value of the signed type. + * + * @param __a First number + * @param __op Operator (e.g >=) + * @param __b Second number + * @param _signed_type Signed type of __a/__b (e.g int32_t) + * + * @return value of the expression "__a __op __b". + */ +#define UCS_CIRCULAR_COMPARE(__a, __op, __b, __signed_type) \ + ((__signed_type)((__a) - (__b)) __op 0) + +#define UCS_CIRCULAR_COMPARE8(__a, __op, __b) UCS_CIRCULAR_COMPARE(__a, __op, __b, int8_t) +#define UCS_CIRCULAR_COMPARE16(__a, __op, __b) UCS_CIRCULAR_COMPARE(__a, __op, __b, int16_t) +#define UCS_CIRCULAR_COMPARE32(__a, __op, __b) UCS_CIRCULAR_COMPARE(__a, __op, __b, int32_t) +#define UCS_CIRCULAR_COMPARE64(__a, __op, __b) UCS_CIRCULAR_COMPARE(__a, __op, __b, int64_t) + +/* on some arch ffs64(0) returns 0, on other -1, let's unify this */ +#define ucs_ffs64_safe(_val) ((_val) ? ucs_ffs64(_val) : 64) + +#define ucs_for_each_bit(_index, _map) \ + for ((_index) = ucs_ffs64_safe(_map); (_index) < 64; \ + (_index) = ucs_ffs64_safe((uint64_t)(_map) & (-2ull << (uint64_t)(_index)))) + + +/* + * Generate a large prime number + */ +uint64_t ucs_get_prime(unsigned index); + +END_C_DECLS + +#endif /* MACROS_H_ */ diff --git a/src/ucs/sys/module.c b/src/ucs/sys/module.c new file mode 100644 index 0000000..af948e6 --- /dev/null +++ b/src/ucs/sys/module.c @@ -0,0 +1,260 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE /* for dladdr(3) */ +#endif + +#include "module.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define UCS_MODULE_PATH_MEMTRACK_NAME "module_path" +#define UCS_MODULE_SRCH_PATH_MAX 2 + +#define ucs_module_debug(_fmt, ...) \ + ucs_log(ucs_min(UCS_LOG_LEVEL_DEBUG, ucs_global_opts.module_log_level), \ + _fmt, ## __VA_ARGS__) +#define ucs_module_trace(_fmt, ...) \ + ucs_log(ucs_min(UCS_LOG_LEVEL_TRACE, ucs_global_opts.module_log_level), \ + _fmt, ## __VA_ARGS__) + +static struct { + ucs_init_once_t init; + char module_ext[NAME_MAX]; + unsigned srchpath_cnt; + char *srch_path[UCS_MODULE_SRCH_PATH_MAX]; +} ucs_module_loader_state = { + .init = UCS_INIT_ONCE_INITIALIZER, + .module_ext = ".so", /* default extension */ + .srchpath_cnt = 0, + .srch_path = { NULL, NULL} +}; + +/* Should be called with lock held */ +static void ucs_module_loader_add_dl_dir() +{ + char *dlpath_dup = NULL; + size_t max_length; + Dl_info dl_info; + char *p, *path; + int ret; + + (void)dlerror(); + ret = dladdr((void*)&ucs_module_loader_state, &dl_info); + if (ret == 0) { + ucs_error("dladdr failed: %s", dlerror()); + return; + } + + ucs_module_debug("ucs library path: %s", dl_info.dli_fname); + + /* copy extension */ + dlpath_dup = ucs_strdup(dl_info.dli_fname, + UCS_MODULE_PATH_MEMTRACK_NAME); + if (dlpath_dup == NULL) { + return; + } + + p = basename(dlpath_dup); + p = strchr(p, '.'); + if (p != NULL) { + strncpy(ucs_module_loader_state.module_ext, p, + sizeof(ucs_module_loader_state.module_ext) - 1); + } + ucs_free(dlpath_dup); + + /* copy directory component */ + dlpath_dup = ucs_strdup(dl_info.dli_fname, + UCS_MODULE_PATH_MEMTRACK_NAME); + if (dlpath_dup == NULL) { + return; + } + + /* construct module directory path */ + max_length = strlen(dlpath_dup) + /* directory */ + 1 + /* '/' */ + strlen(UCX_MODULE_SUBDIR) + /* sub-directory */ + 1; /* '\0' */ + path = ucs_malloc(max_length, UCS_MODULE_PATH_MEMTRACK_NAME); + if (path == NULL) { + goto out; + } + + snprintf(path, max_length, "%s/%s", dirname(dlpath_dup), UCX_MODULE_SUBDIR); + ucs_module_loader_state.srch_path[ucs_module_loader_state.srchpath_cnt++] = path; + +out: + ucs_free(dlpath_dup); +} + +/* Should be called with lock held */ +static void ucs_module_loader_add_install_dir() +{ + ucs_module_loader_state.srch_path[ucs_module_loader_state.srchpath_cnt++] = + ucs_global_opts.module_dir; +} + +static void ucs_module_loader_init_paths() +{ + UCS_INIT_ONCE(&ucs_module_loader_state.init) { + ucs_assert(ucs_module_loader_state.srchpath_cnt == 0); + ucs_module_loader_add_dl_dir(); + ucs_module_loader_add_install_dir(); + ucs_assert(ucs_module_loader_state.srchpath_cnt <= UCS_MODULE_SRCH_PATH_MAX); + } +} + +/* Perform shallow search for a symbol */ +static void *ucs_module_dlsym_shallow(const char *module_path, void *dl, + const char *symbol) +{ + struct link_map *lm_entry; + Dl_info dl_info; + void *addr; + int ret; + + addr = dlsym(dl, symbol); + if (addr == NULL) { + return NULL; + } + + (void)dlerror(); + ret = dladdr(addr, &dl_info); + if (ret == 0) { + ucs_module_debug("dladdr(%p) [%s] failed: %s", addr, symbol, dlerror()); + return NULL; + } + + (void)dlerror(); + ret = dlinfo(dl, RTLD_DI_LINKMAP, &lm_entry); + if (ret) { + ucs_module_debug("dlinfo(%p) [%s] failed: %s", dl, module_path, dlerror()); + return NULL; + } + + /* return the symbol only if it was found in the requested library, and not, + * for example, in one of its dependencies. + */ + if (lm_entry->l_addr != (uintptr_t)dl_info.dli_fbase) { + ucs_module_debug("ignoring '%s' (%p) from %s (%p), expected in %s (%lx)", + symbol, addr, ucs_basename(dl_info.dli_fname), + dl_info.dli_fbase, ucs_basename(module_path), + lm_entry->l_addr); + return NULL; + } + + return addr; +} + +static void ucs_module_init(const char *module_path, void *dl) +{ + typedef ucs_status_t (*init_func_t)(); + + const char *module_init_name = + UCS_PP_MAKE_STRING(UCS_MODULE_CONSTRUCTOR_NAME); + char *fullpath, buffer[PATH_MAX]; + init_func_t init_func; + ucs_status_t status; + + fullpath = realpath(module_path, buffer); + ucs_module_trace("loaded %s [%p]", fullpath, dl); + + init_func = (init_func_t)ucs_module_dlsym_shallow(module_path, dl, + module_init_name); + if (init_func == NULL) { + ucs_module_trace("not calling constructor '%s' in %s", module_init_name, + module_path); + return; + } + + ucs_module_trace("calling '%s' in '%s': [%p]", module_init_name, fullpath, + init_func); + status = init_func(); + if (status != UCS_OK) { + ucs_module_debug("initializing '%s' failed: %s, unloading", fullpath, + ucs_status_string(status)); + dlclose(dl); + } +} + +static void ucs_module_load_one(const char *framework, const char *module_name, + unsigned flags) +{ + char module_path[PATH_MAX] = {0}; + const char *error; + unsigned i; + void *dl; + int mode; + + mode = RTLD_LAZY; + if (flags & UCS_MODULE_LOAD_FLAG_NODELETE) { + mode |= RTLD_NODELETE; + } + if (flags & UCS_MODULE_LOAD_FLAG_GLOBAL) { + mode |= RTLD_GLOBAL; + } else { + mode |= RTLD_LOCAL; + } + + for (i = 0; i < ucs_module_loader_state.srchpath_cnt; ++i) { + snprintf(module_path, sizeof(module_path) - 1, "%s/lib%s_%s%s", + ucs_module_loader_state.srch_path[i], framework, module_name, + ucs_module_loader_state.module_ext); + + /* Clear error state */ + (void)dlerror(); + dl = dlopen(module_path, mode); + if (dl != NULL) { + ucs_module_init(module_path, dl); + break; + } else { + /* If a module fails to load, silently give up */ + error = dlerror(); + ucs_module_debug("dlopen('%s', mode=0x%x) failed: %s", module_path, + mode, error ? error : "Unknown error"); + } + } + + /* coverity[leaked_storage] : a loaded module is never unloaded */ +} + +void ucs_load_modules(const char *framework, const char *modules, + ucs_init_once_t *init_once, unsigned flags) +{ + char *modules_str; + char *saveptr; + char *module_name; + + ucs_module_loader_init_paths(); + + UCS_INIT_ONCE(init_once) { + ucs_module_debug("loading modules for %s", framework); + modules_str = ucs_strdup(modules, "modules_list"); + if (modules_str != NULL) { + saveptr = NULL; + module_name = strtok_r(modules_str, ":", &saveptr); + while (module_name != NULL) { + ucs_module_load_one(framework, module_name, flags); + module_name = strtok_r(NULL, ":", &saveptr); + } + ucs_free(modules_str); + } else { + ucs_error("failed to allocate module names list"); + } + } +} diff --git a/src/ucs/sys/module.h b/src/ucs/sys/module.h new file mode 100644 index 0000000..a9e3af9 --- /dev/null +++ b/src/ucs/sys/module.h @@ -0,0 +1,96 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCS_MODULE_H_ +#define UCS_MODULE_H_ + +#include +#include + + +/** + * Flags for @ref UCS_MODULE_FRAMEWORK_LOAD + */ +typedef enum { + UCS_MODULE_LOAD_FLAG_NODELETE = UCS_BIT(0), /**< Never unload */ + UCS_MODULE_LOAD_FLAG_GLOBAL = UCS_BIT(1) /**< Load to global scope */ +} ucs_module_load_flags_t; + + +/** + * Declare a "framework", which is a context for a specific collection of + * loadable modules. Usually the modules in a particular framework provide + * alternative implementations of the same internal interface. + * + * @param [in] _name Framework name (as a token) + */ +#define UCS_MODULE_FRAMEWORK_DECLARE(_name) \ + static ucs_init_once_t ucs_framework_init_once_##_name = \ + UCS_INIT_ONCE_INITIALIZER + + +/** + * Load all modules in a particular framework. + * + * @param [in] _name Framework name, same as passed to + * @ref UCS_MODULE_FRAMEWORK_DECLARE + * @param [in] _flags Modules load flags, see @ref ucs_module_load_flags_t + * + * The modules in the framework are loaded by dlopen(). The shared library name + * of a module is: "lib_.so.", where: + * - is the framework name + * - is the module name. The list of all modules in a framework is + * defined by the preprocessor macro _MODULES in the auto-generated + * config.h file, for example: #define foo_MODULES ":bar1:bar2". + * - is the shared library version of the module, as generated by + * libtool. It's extracted from the full path of the current library (libucs). + * + * Module shared libraries are searched in the following locations (in order of + * priority): + * 1. 'ucx' sub-directory inside the directory of the current shared library (libucs) + * 2. ${libdir}/ucx, where ${libdir} is the directory where libraries are installed + * Note that if libucs is loaded from its installation path, (1) and (2) are the + * same location. Only if libucs is moved or ran from build directory, the paths + * will be different, in which case we prefer the 'local' library rather than the + * 'installed' one. + * + * @param [in] _name Framework name (as a token) + */ +#define UCS_MODULE_FRAMEWORK_LOAD(_name, _flags) \ + ucs_load_modules(#_name, _name##_MODULES, &ucs_framework_init_once_##_name, \ + _flags) + + +/** + * Define a function to be called when a module is loaded. + * Some things can't be done in shared library constructor, and need to be done + * only after dlopen() completes. For example, loading another shared library + * which uses symbols from the current module. + * + * Usage: + * UCS_MODULE_INIT() { ... code ... } + */ +#define UCS_MODULE_INIT() \ + ucs_status_t __attribute__((visibility("protected"))) \ + UCS_MODULE_CONSTRUCTOR_NAME(void) + + +/** + * Define the name of a loadable module global constructor + */ +#define UCS_MODULE_CONSTRUCTOR_NAME \ + ucs_module_global_init + + +/** + * Internal function. Please use @ref UCS_MODULE_FRAMEWORK_LOAD macro instead. + */ +void ucs_load_modules(const char *framework, const char *modules, + ucs_init_once_t *init_once, unsigned flags); + + +#endif diff --git a/src/ucs/sys/preprocessor.h b/src/ucs/sys/preprocessor.h new file mode 100644 index 0000000..dc734ed --- /dev/null +++ b/src/ucs/sys/preprocessor.h @@ -0,0 +1,154 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2012. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_PREPROCESSOR_H +#define UCS_PREPROCESSOR_H + +/* Convert token to string */ +#define UCS_PP_QUOTE(x) # x + +/* Paste two expanded tokens */ +#define __UCS_TOKENPASTE_HELPER(x, y) x ## y +#define UCS_PP_TOKENPASTE(x, y) __UCS_TOKENPASTE_HELPER(x, y) + +/* Unique value generator */ +#ifdef __COUNTER__ +# define UCS_PP_UNIQUE_ID __COUNTER__ +#else +# define UCS_PP_UNIQUE_ID __LINE__ +#endif + +/* Creating unique identifiers, used for macros */ +#define UCS_PP_APPEND_UNIQUE_ID(x) UCS_PP_TOKENPASTE(x, UCS_PP_UNIQUE_ID) + +/* Convert to string */ +#define _UCS_PP_MAKE_STRING(x) #x +#define UCS_PP_MAKE_STRING(x) _UCS_PP_MAKE_STRING(x) + +/* + * Count number of macro arguments + * e.g UCS_PP_NUM_ARGS(a,b) will expand to: 2 + */ +#define UCS_PP_MAX_ARGS 20 +#define _UCS_PP_NUM_ARGS(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,N,...) \ + N +#define UCS_PP_NUM_ARGS(...) \ + _UCS_PP_NUM_ARGS(, ## __VA_ARGS__,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0) + + +/* Expand macro for each argument in the list + * e.g + * UCS_PP_FOREACH(macro, arg, a, b, c) will expand to: macro(arg, a) macro(arg, b) macro(arg, c) + * UCS_PP_FOREACH_SEP(macro, arg, a, b, c) will expand to: macro(arg, a), macro(arg, b), macro(arg, c) + * UCS_PP_ZIP((a, b, c), (1, 2, 3)) will expand to: (a, 1), (b, 2), (c, 3) + */ +#define UCS_PP_FOREACH(_macro, _arg, ...) \ + UCS_PP_TOKENPASTE(_UCS_PP_FOREACH_, UCS_PP_NUM_ARGS(__VA_ARGS__))(_macro, _arg, __VA_ARGS__) +#define UCS_PP_FOREACH_SEP(_macro, _arg, ...) \ + UCS_PP_TOKENPASTE(_UCS_PP_FOREACH_SEP_, UCS_PP_NUM_ARGS(__VA_ARGS__))(_macro, _arg, __VA_ARGS__) +#define UCS_PP_ZIP(_l1, _l2) \ + UCS_PP_TOKENPASTE(_UCS_PP_ZIP_, UCS_PP_NUM_ARGS _l1)(_l1, _l2) + +#define _UCS_PP_FOREACH_0(_macro , _arg, ...) +#define _UCS_PP_FOREACH_1(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_0 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_2(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_1 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_3(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_2 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_4(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_3 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_5(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_4 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_6(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_5 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_7(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_6 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_8(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_7 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_9(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_8 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_10(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_9 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_11(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_10(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_12(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_11(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_13(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_12(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_14(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_13(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_15(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_14(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_16(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_15(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_17(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_16(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_18(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_17(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_19(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_18(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_20(_macro, _arg, _arg1, ...) _macro(_arg, _arg1) _UCS_PP_FOREACH_19(_macro, _arg, __VA_ARGS__) + +#define _UCS_PP_FOREACH_SEP_0(_macro , _arg, _arg1, ...) +#define _UCS_PP_FOREACH_SEP_1(_macro , _arg, _arg1, ...) _macro(_arg, _arg1) +#define _UCS_PP_FOREACH_SEP_2(_macro , _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_1 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_3(_macro , _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_2 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_4(_macro , _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_3 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_5(_macro , _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_4 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_6(_macro , _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_5 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_7(_macro , _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_6 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_8(_macro , _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_7 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_9(_macro , _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_8 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_10(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_9 (_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_11(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_10(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_12(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_11(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_13(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_12(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_14(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_13(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_15(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_14(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_16(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_15(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_17(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_16(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_18(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_17(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_19(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_18(_macro, _arg, __VA_ARGS__) +#define _UCS_PP_FOREACH_SEP_20(_macro, _arg, _arg1, ...) _macro(_arg, _arg1), _UCS_PP_FOREACH_SEP_19(_macro, _arg, __VA_ARGS__) + +#define _UCS_PP_ZIP_0(_l1, _l2) +#define _UCS_PP_ZIP_1(_l1, _l2) _UCS_PP_ZIP_0(_l1, _l2) (UCS_PP_TUPLE_0 _l1, UCS_PP_TUPLE_0 _l2) +#define _UCS_PP_ZIP_2(_l1, _l2) _UCS_PP_ZIP_1(_l1, _l2), (UCS_PP_TUPLE_1 _l1, UCS_PP_TUPLE_1 _l2) +#define _UCS_PP_ZIP_3(_l1, _l2) _UCS_PP_ZIP_2(_l1, _l2), (UCS_PP_TUPLE_2 _l1, UCS_PP_TUPLE_2 _l2) +#define _UCS_PP_ZIP_4(_l1, _l2) _UCS_PP_ZIP_3(_l1, _l2), (UCS_PP_TUPLE_3 _l1, UCS_PP_TUPLE_3 _l2) +#define _UCS_PP_ZIP_5(_l1, _l2) _UCS_PP_ZIP_4(_l1, _l2), (UCS_PP_TUPLE_4 _l1, UCS_PP_TUPLE_4 _l2) +#define _UCS_PP_ZIP_6(_l1, _l2) _UCS_PP_ZIP_5(_l1, _l2), (UCS_PP_TUPLE_5 _l1, UCS_PP_TUPLE_5 _l2) +#define _UCS_PP_ZIP_7(_l1, _l2) _UCS_PP_ZIP_6(_l1, _l2), (UCS_PP_TUPLE_6 _l1, UCS_PP_TUPLE_6 _l2) +#define _UCS_PP_ZIP_8(_l1, _l2) _UCS_PP_ZIP_7(_l1, _l2), (UCS_PP_TUPLE_7 _l1, UCS_PP_TUPLE_7 _l2) +#define _UCS_PP_ZIP_9(_l1, _l2) _UCS_PP_ZIP_8(_l1, _l2), (UCS_PP_TUPLE_8 _l1, UCS_PP_TUPLE_8 _l2) +#define _UCS_PP_ZIP_10(_l1, _l2) _UCS_PP_ZIP_9(_l1, _l2), (UCS_PP_TUPLE_9 _l1, UCS_PP_TUPLE_9 _l2) + + +/* Extract elements from tuples + */ +#define UCS_PP_TUPLE_0(_0, ...) _0 +#define UCS_PP_TUPLE_1(_0, _1, ...) _1 +#define UCS_PP_TUPLE_2(_0, _1, _2, ...) _2 +#define UCS_PP_TUPLE_3(_0, _1, _2, _3, ...) _3 +#define UCS_PP_TUPLE_4(_0, _1, _2, _3, _4, ...) _4 +#define UCS_PP_TUPLE_5(_0, _1, _2, _3, _4, _5, ...) _5 +#define UCS_PP_TUPLE_6(_0, _1, _2, _3, _4, _5, _6, ...) _6 +#define UCS_PP_TUPLE_7(_0, _1, _2, _3, _4, _5, _6, _7, ...) _7 +#define UCS_PP_TUPLE_8(_0, _1, _2, _3, _4, _5, _6, _7, _8, ...) _8 +#define UCS_PP_TUPLE_9(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, ...) _9 +#define UCS_PP_TUPLE_10(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, ...) _10 +#define UCS_PP_TUPLE_BREAK(...) __VA_ARGS__ + + +/* Sequence of numbers + */ +#define _UCS_PP_SEQ_0 +#define _UCS_PP_SEQ_1 _UCS_PP_SEQ_0 0 +#define _UCS_PP_SEQ_2 _UCS_PP_SEQ_1 , 1 +#define _UCS_PP_SEQ_3 _UCS_PP_SEQ_2 , 2 +#define _UCS_PP_SEQ_4 _UCS_PP_SEQ_3 , 3 +#define _UCS_PP_SEQ_5 _UCS_PP_SEQ_4 , 4 +#define _UCS_PP_SEQ_6 _UCS_PP_SEQ_5 , 5 +#define _UCS_PP_SEQ_7 _UCS_PP_SEQ_6 , 6 +#define _UCS_PP_SEQ_8 _UCS_PP_SEQ_7 , 7 +#define _UCS_PP_SEQ_9 _UCS_PP_SEQ_8 , 8 +#define _UCS_PP_SEQ_10 _UCS_PP_SEQ_9 , 9 +#define _UCS_PP_SEQ_11 _UCS_PP_SEQ_10, 10 +#define _UCS_PP_SEQ_12 _UCS_PP_SEQ_11, 11 +#define _UCS_PP_SEQ_13 _UCS_PP_SEQ_12, 12 +#define _UCS_PP_SEQ_14 _UCS_PP_SEQ_13, 13 +#define _UCS_PP_SEQ_15 _UCS_PP_SEQ_14, 14 +#define _UCS_PP_SEQ_16 _UCS_PP_SEQ_15, 15 +#define _UCS_PP_SEQ_17 _UCS_PP_SEQ_16, 16 +#define _UCS_PP_SEQ_18 _UCS_PP_SEQ_17, 17 +#define _UCS_PP_SEQ_19 _UCS_PP_SEQ_18, 18 +#define _UCS_PP_SEQ_20 _UCS_PP_SEQ_19, 19 +#define _UCS_PP_SEQ(_n) _UCS_PP_SEQ_##_n +#define UCS_PP_SEQ(_n) _UCS_PP_SEQ(_n) + +#endif diff --git a/src/ucs/sys/sock.c b/src/ucs/sys/sock.c new file mode 100644 index 0000000..294b3eb --- /dev/null +++ b/src/ucs/sys/sock.c @@ -0,0 +1,687 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* See file LICENSE for terms. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +#define UCS_SOCKET_MAX_CONN_PATH "/proc/sys/net/core/somaxconn" + + +typedef ssize_t (*ucs_socket_io_func_t)(int fd, void *data, + size_t size, int flags); + +typedef ssize_t (*ucs_socket_iov_func_t)(int fd, const struct msghdr *msg, + int flags); + + +int ucs_netif_flags_is_active(unsigned int flags) +{ + return (flags & IFF_UP) && (flags & IFF_RUNNING) && !(flags & IFF_LOOPBACK); +} + +ucs_status_t ucs_netif_ioctl(const char *if_name, unsigned long request, + struct ifreq *if_req) +{ + ucs_status_t status; + int fd = -1, ret; + + ucs_strncpy_zero(if_req->ifr_name, if_name, sizeof(if_req->ifr_name)); + + status = ucs_socket_create(AF_INET, SOCK_STREAM, &fd); + if (status != UCS_OK) { + goto out; + } + + ret = ioctl(fd, request, if_req); + if (ret < 0) { + ucs_debug("ioctl(req=%lu, ifr_name=%s) failed: %m", request, if_name); + status = UCS_ERR_IO_ERROR; + goto out_close_fd; + } + + status = UCS_OK; + +out_close_fd: + close(fd); +out: + return status; +} + +int ucs_netif_is_active(const char *if_name) +{ + ucs_status_t status; + struct ifreq ifr; + + status = ucs_netif_ioctl(if_name, SIOCGIFADDR, &ifr); + if (status != UCS_OK) { + return 0; + } + + status = ucs_netif_ioctl(if_name, SIOCGIFFLAGS, &ifr); + if (status != UCS_OK) { + return 0; + } + + return ucs_netif_flags_is_active(ifr.ifr_flags); +} + +ucs_status_t ucs_socket_create(int domain, int type, int *fd_p) +{ + int fd = socket(domain, type, 0); + if (fd < 0) { + ucs_error("socket create failed: %m"); + return UCS_ERR_IO_ERROR; + } + + *fd_p = fd; + return UCS_OK; +} + +ucs_status_t ucs_socket_setopt(int fd, int level, int optname, + const void *optval, socklen_t optlen) +{ + int ret = setsockopt(fd, level, optname, optval, optlen); + if (ret < 0) { + ucs_error("failed to set %d option for %d level on fd %d: %m", + optname, level, fd); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +const char *ucs_socket_getname_str(int fd, char *str, size_t max_size) +{ + struct sockaddr_storage sock_addr = {0}; /* Suppress Clang false-positive */ + socklen_t addr_size; + int ret; + + addr_size = sizeof(sock_addr); + ret = getsockname(fd, (struct sockaddr*)&sock_addr, + &addr_size); + if (ret < 0) { + ucs_debug("getsockname(fd=%d) failed: %m", fd); + ucs_strncpy_safe(str, "-", max_size); + return str; + } + + return ucs_sockaddr_str((const struct sockaddr*)&sock_addr, + str, max_size); +} + +static ucs_status_t ucs_socket_check_errno(int io_errno) +{ + if ((io_errno == EAGAIN) || (io_errno == EWOULDBLOCK) || (io_errno == EINTR)) { + return UCS_ERR_NO_PROGRESS; + } + + return UCS_ERR_IO_ERROR; +} + +ucs_status_t ucs_socket_connect(int fd, const struct sockaddr *dest_addr) +{ + char dest_str[UCS_SOCKADDR_STRING_LEN]; + char src_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + size_t dest_addr_size; + int UCS_V_UNUSED conn_errno; + int ret; + + status = ucs_sockaddr_sizeof(dest_addr, &dest_addr_size); + if (status != UCS_OK) { + return status; + } + + do { + ret = connect(fd, dest_addr, dest_addr_size); + if (ret < 0) { + /* Save errno to separate variable to not override it + * when calling getsockname() below */ + conn_errno = errno; + + if (errno == EINPROGRESS) { + status = UCS_INPROGRESS; + break; + } + + if (errno == EISCONN) { + status = UCS_ERR_ALREADY_EXISTS; + break; + } + + if (errno != EINTR) { + ucs_error("connect(fd=%d, dest_addr=%s) failed: %m", fd, + ucs_sockaddr_str(dest_addr, dest_str, + UCS_SOCKADDR_STRING_LEN)); + return UCS_ERR_UNREACHABLE; + } + } else { + conn_errno = 0; + } + } while ((ret < 0) && (errno == EINTR)); + + ucs_debug("connect(fd=%d, src_addr=%s dest_addr=%s): %s", fd, + ucs_socket_getname_str(fd, src_str, UCS_SOCKADDR_STRING_LEN), + ucs_sockaddr_str(dest_addr, dest_str, UCS_SOCKADDR_STRING_LEN), + strerror(conn_errno)); + + return status; +} + +ucs_status_t ucs_socket_accept(int fd, struct sockaddr *addr, socklen_t *length_ptr, + int *accept_fd) +{ + ucs_status_t status; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + + *accept_fd = accept(fd, addr, length_ptr); + if (*accept_fd < 0) { + status = ucs_socket_check_errno(errno); + if (status == UCS_ERR_NO_PROGRESS) { + return status; + } + + ucs_error("accept() failed (client addr %s): %m", + ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); + return status; + } + + return UCS_OK; +} + +ucs_status_t ucs_socket_getpeername(int fd, struct sockaddr_storage *peer_addr, + socklen_t *peer_addr_len) +{ + int ret; + + *peer_addr_len = sizeof(*peer_addr); + ret = getpeername(fd, (struct sockaddr*)peer_addr, + peer_addr_len); + if (ret < 0) { + if ((errno != ENOTCONN) && (errno != ECONNRESET)) { + ucs_error("getpeername(fd=%d) failed: %m", fd); + return UCS_ERR_IO_ERROR; + } + + return UCS_ERR_NOT_CONNECTED; + } + + return UCS_OK; +} + +int ucs_socket_is_connected(int fd) +{ + struct sockaddr_storage peer_addr = {0}; /* Suppress Clang false-positive */ + char peer_str[UCS_SOCKADDR_STRING_LEN]; + char local_str[UCS_SOCKADDR_STRING_LEN]; + socklen_t peer_addr_len; + ucs_status_t status; + + status = ucs_socket_getpeername(fd, &peer_addr, &peer_addr_len); + if (status != UCS_OK) { + return 0; + } + + ucs_debug("[%s]<->[%s] is a connected pair", + ucs_socket_getname_str(fd, local_str, UCS_SOCKADDR_STRING_LEN), + ucs_sockaddr_str((const struct sockaddr*)&peer_addr, peer_str, + UCS_SOCKADDR_STRING_LEN)); + + return 1; +} + + +ucs_status_t ucs_socket_server_init(const struct sockaddr *saddr, socklen_t socklen, + int backlog, int *listen_fd) +{ + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + int ret, fd = -1; + uint16_t port; + + /* Create the server socket for accepting incoming connections */ + status = ucs_socket_create(saddr->sa_family, SOCK_STREAM, &fd); + if (status != UCS_OK) { + goto err; + } + + /* Set the fd to non-blocking mode (so that accept() won't be blocking) */ + status = ucs_sys_fcntl_modfl(fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto err_close_socket; + } + + status = ucs_sockaddr_get_port(saddr, &port); + if (status != UCS_OK) { + goto err_close_socket; + } + + do { + ret = bind(fd, saddr, socklen); + } while (!port && (ret < 0) && (errno == EADDRINUSE)); + + if (ret < 0) { + ucs_error("bind(fd=%d addr=%s) failed: %m", + fd, ucs_sockaddr_str((struct sockaddr *)saddr, + ip_port_str, sizeof(ip_port_str))); + status = (errno == EADDRINUSE) ? UCS_ERR_BUSY : UCS_ERR_IO_ERROR; + goto err_close_socket; + } + + if (listen(fd, backlog) < 0) { + ucs_error("listen(fd=%d addr=%s backlog=%d) failed: %m", + fd, ucs_sockaddr_str(saddr, ip_port_str, sizeof(ip_port_str)), + backlog); + status = UCS_ERR_IO_ERROR; + goto err_close_socket; + } + + *listen_fd = fd; + return UCS_OK; + +err_close_socket: + close(fd); +err: + return status; +} + +int ucs_socket_max_conn() +{ + static long somaxconn_val = 0; + + if (somaxconn_val || + (ucs_read_file_number(&somaxconn_val, 1, + UCS_SOCKET_MAX_CONN_PATH) == UCS_OK)) { + ucs_assert(somaxconn_val <= INT_MAX); + return somaxconn_val; + } else { + ucs_warn("unable to read somaxconn value from %s file", + UCS_SOCKET_MAX_CONN_PATH); + somaxconn_val = SOMAXCONN; + return somaxconn_val; + } +} + +static ucs_status_t +ucs_socket_handle_io_error(int fd, const char *name, ssize_t io_retval, int io_errno, + ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) +{ + ucs_status_t status; + + if (io_retval == 0) { + ucs_trace("fd %d is closed", fd); + return UCS_ERR_CANCELED; /* Connection closed */ + } + + status = ucs_socket_check_errno(io_errno); + if (status == UCS_ERR_NO_PROGRESS) { + return UCS_ERR_NO_PROGRESS; + } + + if (err_cb != NULL) { + status = err_cb(err_cb_arg, io_errno); + if (status == UCS_OK) { + return UCS_ERR_NO_PROGRESS; + } + } + + ucs_error("%s(fd=%d) failed: %s", name, fd, strerror(io_errno)); + + return status; +} + +static inline ucs_status_t +ucs_socket_do_io_nb(int fd, void *data, size_t *length_p, + ucs_socket_io_func_t io_func, const char *name, + ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) +{ + ssize_t ret; + + ucs_assert(*length_p > 0); + + ret = io_func(fd, data, *length_p, MSG_NOSIGNAL); + if (ucs_likely(ret > 0)) { + *length_p = ret; + return UCS_OK; + } + + *length_p = 0; + return ucs_socket_handle_io_error(fd, name, ret, errno, + err_cb, err_cb_arg); +} + +static inline ucs_status_t +ucs_socket_do_io_b(int fd, void *data, size_t length, + ucs_socket_io_func_t io_func, const char *name, + ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) +{ + size_t done_cnt = 0, cur_cnt = length; + ucs_status_t status; + + do { + status = ucs_socket_do_io_nb(fd, data, &cur_cnt, io_func, + name, err_cb, err_cb); + done_cnt += cur_cnt; + ucs_assert(done_cnt <= length); + cur_cnt = length - done_cnt; + } while ((done_cnt < length) && + ((status == UCS_OK) || (status == UCS_ERR_NO_PROGRESS))); + + return status; +} + +static inline ucs_status_t +ucs_socket_do_iov_nb(int fd, struct iovec *iov, size_t iov_cnt, size_t *length_p, + ucs_socket_iov_func_t iov_func, const char *name, + ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) +{ + struct msghdr msg = { + .msg_iov = iov, + .msg_iovlen = iov_cnt + }; + ssize_t ret; + + ucs_assert(iov_cnt > 0); + + ret = iov_func(fd, &msg, MSG_NOSIGNAL); + if (ucs_likely(ret > 0)) { + *length_p = ret; + return UCS_OK; + } + + *length_p = 0; + return ucs_socket_handle_io_error(fd, name, ret, errno, + err_cb, err_cb_arg); +} + +ucs_status_t ucs_socket_send_nb(int fd, const void *data, size_t *length_p, + ucs_socket_io_err_cb_t err_cb, + void *err_cb_arg) +{ + return ucs_socket_do_io_nb(fd, (void*)data, length_p, + (ucs_socket_io_func_t)send, + "send", err_cb, err_cb_arg); +} + +/* recv is declared as 'always_inline' on some platforms, it leads to + * compilation warning. wrap it into static function */ +static ssize_t ucs_socket_recv_io(int fd, void *data, size_t size, int flags) +{ + return recv(fd, data, size, flags); +} + +ucs_status_t ucs_socket_recv_nb(int fd, void *data, size_t *length_p, + ucs_socket_io_err_cb_t err_cb, + void *err_cb_arg) +{ + return ucs_socket_do_io_nb(fd, data, length_p, ucs_socket_recv_io, + "recv", err_cb, err_cb_arg); +} + +ucs_status_t ucs_socket_send(int fd, const void *data, size_t length, + ucs_socket_io_err_cb_t err_cb, + void *err_cb_arg) +{ + return ucs_socket_do_io_b(fd, (void*)data, length, + (ucs_socket_io_func_t)send, + "send", err_cb, err_cb_arg); +} + +ucs_status_t ucs_socket_recv(int fd, void *data, size_t length, + ucs_socket_io_err_cb_t err_cb, + void *err_cb_arg) +{ + return ucs_socket_do_io_b(fd, data, length, ucs_socket_recv_io, + "recv", err_cb, err_cb_arg); +} + +ucs_status_t +ucs_socket_sendv_nb(int fd, struct iovec *iov, size_t iov_cnt, size_t *length_p, + ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) +{ + return ucs_socket_do_iov_nb(fd, iov, iov_cnt, length_p, sendmsg, + "sendv", err_cb, err_cb_arg); +} + +ucs_status_t ucs_sockaddr_sizeof(const struct sockaddr *addr, size_t *size_p) +{ + switch (addr->sa_family) { + case AF_INET: + *size_p = sizeof(struct sockaddr_in); + return UCS_OK; + case AF_INET6: + *size_p = sizeof(struct sockaddr_in6); + return UCS_OK; + default: + ucs_error("unknown address family: %d", addr->sa_family); + return UCS_ERR_INVALID_PARAM; + } +} + +ucs_status_t ucs_sockaddr_get_port(const struct sockaddr *addr, uint16_t *port_p) +{ + switch (addr->sa_family) { + case AF_INET: + *port_p = ntohs(UCS_SOCKET_INET_PORT(addr)); + return UCS_OK; + case AF_INET6: + *port_p = ntohs(UCS_SOCKET_INET6_PORT(addr)); + return UCS_OK; + default: + ucs_error("unknown address family: %d", addr->sa_family); + return UCS_ERR_INVALID_PARAM; + } +} + +ucs_status_t ucs_sockaddr_set_port(struct sockaddr *addr, uint16_t port) +{ + switch (addr->sa_family) { + case AF_INET: + UCS_SOCKET_INET_PORT(addr) = htons(port); + return UCS_OK; + case AF_INET6: + UCS_SOCKET_INET6_PORT(addr) = htons(port); + return UCS_OK; + default: + ucs_error("unknown address family: %d", addr->sa_family); + return UCS_ERR_INVALID_PARAM; + } +} + +const void *ucs_sockaddr_get_inet_addr(const struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + return &UCS_SOCKET_INET_ADDR(addr); + case AF_INET6: + return &UCS_SOCKET_INET6_ADDR(addr); + default: + ucs_error("unknown address family: %d", addr->sa_family); + return NULL; + } +} + +static unsigned ucs_sockaddr_is_known_af(const struct sockaddr *sa) +{ + return ((sa->sa_family == AF_INET) || + (sa->sa_family == AF_INET6)); +} + +const char* ucs_sockaddr_str(const struct sockaddr *sock_addr, + char *str, size_t max_size) +{ + uint16_t port; + size_t str_len; + + if (!ucs_sockaddr_is_known_af(sock_addr)) { + ucs_strncpy_zero(str, "", max_size); + return str; + } + + if (!inet_ntop(sock_addr->sa_family, ucs_sockaddr_get_inet_addr(sock_addr), + str, max_size)) { + ucs_strncpy_zero(str, "", max_size); + return str; + } + + if (ucs_sockaddr_get_port(sock_addr, &port) != UCS_OK) { + ucs_strncpy_zero(str, "", max_size); + return str; + } + + str_len = strlen(str); + + ucs_snprintf_zero(str + str_len, max_size - str_len, ":%d", port); + + return str; +} + +int ucs_sockaddr_cmp(const struct sockaddr *sa1, + const struct sockaddr *sa2, + ucs_status_t *status_p) +{ + int result = 1; + ucs_status_t status = UCS_OK; + uint16_t port1, port2; + + if (!ucs_sockaddr_is_known_af(sa1) || + !ucs_sockaddr_is_known_af(sa2)) { + ucs_error("unknown address family: %d", + !ucs_sockaddr_is_known_af(sa1) ? + sa1->sa_family : sa2->sa_family); + status = UCS_ERR_INVALID_PARAM; + goto out; + } + + if (sa1->sa_family != sa2->sa_family) { + result = (int)sa1->sa_family - (int)sa2->sa_family; + goto out; + } + + switch (sa1->sa_family) { + case AF_INET: + result = memcmp(&UCS_SOCKET_INET_ADDR(sa1), + &UCS_SOCKET_INET_ADDR(sa2), + sizeof(UCS_SOCKET_INET_ADDR(sa1))); + port1 = ntohs(UCS_SOCKET_INET_PORT(sa1)); + port2 = ntohs(UCS_SOCKET_INET_PORT(sa2)); + break; + case AF_INET6: + result = memcmp(&UCS_SOCKET_INET6_ADDR(sa1), + &UCS_SOCKET_INET6_ADDR(sa2), + sizeof(UCS_SOCKET_INET6_ADDR(sa1))); + port1 = ntohs(UCS_SOCKET_INET6_PORT(sa1)); + port2 = ntohs(UCS_SOCKET_INET6_PORT(sa2)); + break; + } + + if (!result && (port1 != port2)) { + result = (int)port1 - (int)port2; + } + +out: + if (status_p) { + *status_p = status; + } + return result; +} + +int ucs_sockaddr_is_inaddr_any(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + return UCS_SOCKET_INET_ADDR(addr).s_addr == INADDR_ANY; + case AF_INET6: + return !memcmp(&(UCS_SOCKET_INET6_ADDR(addr)), &in6addr_any, + sizeof(UCS_SOCKET_INET6_ADDR(addr))); + default: + ucs_debug("invalid address family: %d", addr->sa_family); + return 0; + } +} + +ucs_status_t ucs_sockaddr_copy(struct sockaddr *dst_addr, + const struct sockaddr *src_addr) +{ + ucs_status_t status; + size_t size; + + status = ucs_sockaddr_sizeof(src_addr, &size); + if (status != UCS_OK) { + return status; + } + + memcpy(dst_addr, src_addr, size); + return UCS_OK; +} + +ucs_status_t ucs_sockaddr_get_ifname(int fd, char *ifname_str, size_t max_strlen) +{ + ucs_status_t status = UCS_ERR_NO_DEVICE; + struct ifaddrs *ifa; + struct ifaddrs* ifaddrs; + struct sockaddr *sa; + struct sockaddr *my_addr; + socklen_t sockaddr_len; + char str_local_addr[UCS_SOCKADDR_STRING_LEN]; + + sockaddr_len = sizeof(struct sockaddr_storage); + my_addr = ucs_alloca(sockaddr_len); + + if (getsockname(fd, my_addr, &sockaddr_len)) { + ucs_warn("getsockname error: %m"); + return UCS_ERR_IO_ERROR; + } + + /* port number is not important, so we assign zero because sockaddr + * structures returned by getifaddrs have ports assigned to zero */ + if (UCS_OK != ucs_sockaddr_set_port(my_addr, 0)) { + ucs_warn("sockcm doesn't support unknown address family"); + return UCS_ERR_INVALID_PARAM; + } + + ucs_debug("check ifname for socket on %s", + ucs_sockaddr_str(my_addr, str_local_addr, UCS_SOCKADDR_STRING_LEN)); + + if (getifaddrs(&ifaddrs)) { + ucs_warn("getifaddrs error: %m"); + return UCS_ERR_IO_ERROR; + } + + for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) { + sa = (struct sockaddr*) ifa->ifa_addr; + + if (sa == NULL) { + ucs_debug("NULL ifaddr encountered with ifa_name: %s", ifa->ifa_name); + continue; + } + + if (((sa->sa_family == AF_INET) ||(sa->sa_family == AF_INET6)) && + (!ucs_sockaddr_cmp(sa, my_addr, NULL))) { + ucs_debug("matching ip found iface on %s", ifa->ifa_name); + ucs_strncpy_safe(ifname_str, ifa->ifa_name, max_strlen); + status = UCS_OK; + break; + } + } + + freeifaddrs(ifaddrs); + + return status; +} diff --git a/src/ucs/sys/sock.h b/src/ucs/sys/sock.h new file mode 100644 index 0000000..0725f14 --- /dev/null +++ b/src/ucs/sys/sock.h @@ -0,0 +1,423 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_SOCKET_H +#define UCS_SOCKET_H + +#include + +#include +#include +#include +#include +#include +#include + +BEGIN_C_DECLS + + +/* A string to hold the IP address and port from a sockaddr */ +#define UCS_SOCKADDR_STRING_LEN 60 + +#define UCS_SOCKET_INET_ADDR(_addr) (((struct sockaddr_in*)(_addr))->sin_addr) +#define UCS_SOCKET_INET_PORT(_addr) (((struct sockaddr_in*)(_addr))->sin_port) + +#define UCS_SOCKET_INET6_ADDR(_addr) (((struct sockaddr_in6*)(_addr))->sin6_addr) +#define UCS_SOCKET_INET6_PORT(_addr) (((struct sockaddr_in6*)(_addr))->sin6_port) + + +/* Returns an error if the default error handling should be + * done (the error value will be returned to a caller), + * otherwise `UCS_OK` */ +typedef ucs_status_t (*ucs_socket_io_err_cb_t)(void *arg, int io_errno); + + +/** + * Check if the given (interface) flags represent an active interface. + * + * @param [in] flags Interface flags (Can be obtained using getifaddrs + * or from SIOCGIFFLAGS ioctl). + * + * @return 1 if true, otherwise 0 + */ +int ucs_netif_flags_is_active(unsigned int flags); + + +/** + * Perform an ioctl call on the given interface with the given request. + * Set the result in the ifreq struct. + * + * @param [in] if_name Interface name to test. + * @param [in] request The request to fulfill. + * @param [out] if_req Filled with the requested information. + * + * @return UCS_OK on success or an error code on failure. + */ +ucs_status_t ucs_netif_ioctl(const char *if_name, unsigned long request, + struct ifreq *if_req); + + +/** + * Check if the given interface is in an active state. + * + * @param [in] if_name Interface name to check. + * + * @return 1 if true, otherwise 0 + */ +int ucs_netif_is_active(const char *if_name); + + +/** + * Create a socket. + * + * @param [in] domain Communication domain (AF_INET/AF_INET6/etc). + * @param [in] type Communication semantics (SOCK_STREAM/SOCK_DGRAM/etc). + * @param [out] fd_p Pointer to created fd. + * + * @return UCS_OK on success or UCS_ERR_IO_ERROR on failure. + */ +ucs_status_t ucs_socket_create(int domain, int type, int *fd_p); + + +/** + * Set options on socket. + * + * @param [in] fd Socket fd. + * @param [in] level The level at which the option is defined. + * @param [in] optname The socket option for which the value is to be set. + * @param [in] optval A pointer to the buffer in which the value for the + * requested option is specified. + * @param [in] optlen The size, in bytes, of the buffer pointed to by the + * optval and def_optval parameters. + * + * @return UCS_OK on success or UCS_ERR_IO_ERROR on failure + */ +ucs_status_t ucs_socket_setopt(int fd, int level, int optname, + const void *optval, socklen_t optlen); + + +/** + * Connect the socket referred to by the file descriptor `fd` + * to the address specified by `dest_addr`. + * + * @param [in] fd Socket fd. + * @param [in] dest_addr Pointer to destination address. + * + * @return UCS_OK on success or UCS_ERR_UNREACHABLE on failure or + * UCS_INPROGRESS if operation is in progress. + */ +ucs_status_t ucs_socket_connect(int fd, const struct sockaddr *dest_addr); + + +/** + * Accept a connection request on the given socket fd. + * + * @param [in] fd Socket fd. + * @param [out] addr Client socket address that initiated the connection + * @param [out] length_ptr Client address socket's length + * @param [out] accept_fd Upon success, a non-negative file descriptor + * of the accepted socket. Otherwise, -1. + * + * @return UCS_OK on success or UCS_ERR_NO_PROGRESS to indicate that no progress + * was made or UCS_ERR_IO_ERROR on failure. + */ +ucs_status_t ucs_socket_accept(int fd, struct sockaddr *addr, socklen_t *length_ptr, + int *accept_fd); + + +/** + * Get the address of the peer's socket that the given fd is connected to + * + * @param [in] fd Socket fd. + * @param [out] peer_addr Address of the remote peer. + * @param [out] peer_addr_len Length of the remote peer's address. + * + * @return UCS_OK on success or UCS_ERR_IO_ERROR on failure + */ +ucs_status_t ucs_socket_getpeername(int fd, struct sockaddr_storage *peer_addr, + socklen_t *peer_addr_len); + + +/** + * Check whether the socket referred to by the file descriptor `fd` + * is connected to a peer or not. + * + * @param [in] fd Socket fd. + * + * @return 1 - connected, 0 - not connected. + */ +int ucs_socket_is_connected(int fd); + + +/** + * Initialize a TCP server. + * Open a socket, bind a sockadrr to that socket and start listening on it for + * incoming connection requests. + * + * @param [in] saddr Sockaddr for the server to listen on. + * If the port number inside is set to zero - + * use a random port. + * @param [in] socklen Size of saddr. + * @param [in] backlog Length of the queue for pending connections - + * for the listen() call. + * @param [out] listen_fd The fd that belongs to the server. + * + * @return UCS_OK on success or an error code on failure. + */ +ucs_status_t ucs_socket_server_init(const struct sockaddr *saddr, socklen_t socklen, + int backlog, int *listen_fd); + + +/** + * Returns the maximum possible value for the number of sockets that + * are ready to be accepted. It maybe either value from the system path + * or SOMAXCONN value. + * + * @return The queue length for completely established sockets + * waiting to be accepted. + */ +int ucs_socket_max_conn(); + + +/** + * Non-blocking send operation sends data on the connected (or bound + * connectionless) socket referred to by the file descriptor `fd`. + * + * @param [in] fd Socket fd. + * @param [in] data A pointer to a buffer containing the data to + * be transmitted. + * @param [in/out] length_p The length, in bytes, of the data in buffer + * pointed to by the `data` parameter. The amount of + * data transmitted is written to this argument. + * @param [in] err_cb Error callback. + * @param [in] err_cb_arg User's argument for the error callback. + * + * @return UCS_OK on success, UCS_ERR_CANCELED if connection closed, + * UCS_ERR_NO_PROGRESS if system call was interrupted or + * would block, UCS_ERR_IO_ERROR on failure. + */ +ucs_status_t ucs_socket_send_nb(int fd, const void *data, size_t *length_p, + ucs_socket_io_err_cb_t err_cb, + void *err_cb_arg); + + +/** + * Non-blocking receive operation receives data from the connected (or bound + * connectionless) socket referred to by the file descriptor `fd`. + * + * @param [in] fd Socket fd. + * @param [in] data A pointer to a buffer to receive the incoming + * data. + * @param [in/out] length_p The length, in bytes, of the data in buffer + * pointed to by the `data` parameter. The amount of + * data received is written to this argument. + * @param [in] err_cb Error callback. + * @param [in] err_cb_arg User's argument for the error callback. + * + * @return UCS_OK on success, UCS_ERR_CANCELED if connection closed, + * UCS_ERR_NO_PROGRESS if system call was interrupted or + * would block, UCS_ERR_IO_ERROR on failure. + */ +ucs_status_t ucs_socket_recv_nb(int fd, void *data, size_t *length_p, + ucs_socket_io_err_cb_t err_cb, + void *err_cb_arg); + + +/** + * Blocking send operation sends data on the connected (or bound connectionless) + * socket referred to by the file descriptor `fd`. + * + * @param [in] fd Socket fd. + * @param [in] data A pointer to a buffer containing the data to + * be transmitted. + * @param [in/out] length The length, in bytes, of the data in buffer + * pointed to by the `data` parameter. + * @param [in] err_cb Error callback. + * @param [in] err_cb_arg User's argument for the error callback. + * + * @return UCS_OK on success, UCS_ERR_CANCELED if connection closed, + * UCS_ERR_IO_ERROR on failure. + */ +ucs_status_t ucs_socket_send(int fd, const void *data, size_t length, + ucs_socket_io_err_cb_t err_cb, + void *err_cb_arg); + + +/** + * Non-blocking send operation sends I/O vector on the connected (or bound + * connectionless) socket referred to by the file descriptor `fd`. + * + * @param [in] fd Socket fd. + * @param [in] iov A pointer to an array of iovec buffers. + * @param [in] iov_cnt The number of buffers pointed to by + * the iov parameter. + * @param [out] length_p The amount of data transmitted is written to + * this argument. + * @param [in] err_cb Error callback. + * @param [in] err_cb_arg User's argument for the error callback. + * + * @return UCS_OK on success, UCS_ERR_CANCELED if connection closed, + * UCS_ERR_NO_PROGRESS if system call was interrupted or + * would block, UCS_ERR_IO_ERROR on failure. + */ +ucs_status_t ucs_socket_sendv_nb(int fd, struct iovec *iov, size_t iov_cnt, + size_t *length_p, ucs_socket_io_err_cb_t err_cb, + void *err_cb_arg); + + +/** + * Blocking receive operation receives data from the connected (or bound + * connectionless) socket referred to by the file descriptor `fd`. + * + * @param [in] fd Socket fd. + * @param [in] data A pointer to a buffer to receive the incoming + * data. + * @param [in/out] length The length, in bytes, of the data in buffer + * pointed to by the `data` paramete. + * @param [in] err_cb Error callback. + * @param [in] err_cb_arg User's argument for the error callback. + * + * @return UCS_OK on success, UCS_ERR_CANCELED if connection closed, + * UCS_ERR_IO_ERROR on failure. + */ +ucs_status_t ucs_socket_recv(int fd, void *data, size_t length, + ucs_socket_io_err_cb_t err_cb, + void *err_cb_arg); + + +/** + * Return size of a given sockaddr structure. + * + * @param [in] addr Pointer to sockaddr structure. + * @param [out] size_p Pointer to variable where size of + * sockaddr_in/sockaddr_in6 structure will be written + * + * @return UCS_OK on success or UCS_ERR_INVALID_PARAM on failure. + */ +ucs_status_t ucs_sockaddr_sizeof(const struct sockaddr *addr, size_t *size_p); + + +/** + * Return port of a given sockaddr structure. + * + * @param [in] addr Pointer to sockaddr structure. + * @param [out] port_p Pointer to variable where port (host notation) + * of sockaddr_in/sockaddr_in6 structure will be written + * + * @return UCS_OK on success or UCS_ERR_INVALID_PARAM on failure. + */ +ucs_status_t ucs_sockaddr_get_port(const struct sockaddr *addr, uint16_t *port_p); + + +/** + * Set port to a given sockaddr structure. + * + * @param [in] addr Pointer to sockaddr structure. + * @param [in] port Port (host notation) that will be written + * + * @return UCS_OK on success or UCS_ERR_INVALID_PARAM on failure. + */ +ucs_status_t ucs_sockaddr_set_port(struct sockaddr *addr, uint16_t port); + + +/** + * Return IP addr of a given sockaddr structure. + * + * @param [in] addr Pointer to sockaddr structure. + * + * @return IP address of sockaddr_in/sockaddr_in6 structure + * on success or NULL on failure. + */ +const void *ucs_sockaddr_get_inet_addr(const struct sockaddr *addr); + + +/** + * Extract the IP address from a given sockaddr and return it as a string. + * + * @param [in] sock_addr Sockaddr to take IP address from. + * @param [out] str A string filled with the IP address. + * @param [in] max_size Size of a string (considering '\0'-terminated symbol) + * + * @return ip_str if the sock_addr has a valid IP address or 'Invalid address' + * otherwise. + */ +const char* ucs_sockaddr_str(const struct sockaddr *sock_addr, + char *str, size_t max_size); + + +/** + * Extract the IP address from a given socket fd and return it as a string. + * + * @param [in] fd Socket fd. + * @param [out] str A string filled with the IP address. + * @param [in] max_size Size of a string (considering '\0'-terminated symbol) + * + * @return ip_str if the sock_addr has a valid IP address or 'Invalid address' + * otherwise. + */ +const char *ucs_socket_getname_str(int fd, char *str, size_t max_size); + + +/** + * Return a value indicating the relationships between passed sockaddr structures. + * + * @param [in] sa1 Pointer to sockaddr structure #1. + * @param [in] sa2 Pointer to sockaddr structure #2. + * @param [in/out] status_p Pointer (can be NULL) to a status: UCS_OK on success + * or UCS_ERR_INVALID_PARAM on failure. + * + * @return Returns an integral value indicating the relationship between the + * socket addresses: + * > 0 - the first socket address is greater than the second + * socket address; + * < 0 - the first socket address is lower than the second + * socket address; + * = 0 - the socket addresses are equal. + * Note: it returns a positive integer value in case of error occured + * during comparison. + */ +int ucs_sockaddr_cmp(const struct sockaddr *sa1, + const struct sockaddr *sa2, + ucs_status_t *status_p); + +/** + * Indicate if given IP addr is INADDR_ANY (IPV4) or in6addr_any (IPV6) + * + * @param [in] addr Pointer to sockaddr structure. + * + * @return 1 if input is INADDR_ANY or in6addr_any + * 0 if not + */ +int ucs_sockaddr_is_inaddr_any(struct sockaddr *addr); + + +/** + * Copy the src_addr sockaddr to dst_addr sockaddr. The length to copy is + * the size of the src_addr sockaddr. + * + * @param [in] dst_addr Pointer to destination sockaddr (to copy to). + * @param [in] src_addr Pointer to source sockaddr (to copy from). + * + * @return UCS_OK on success or UCS_ERR_INVALID_PARAM on failure. + */ +ucs_status_t ucs_sockaddr_copy(struct sockaddr *dst_addr, + const struct sockaddr *src_addr); + + +/** + * Copy into ifname_name the interface associated the IP on which the socket + * file descriptor fd is bound on. IPv4 and IPv6 addresses are handled. + * + * @param [in] fd Socket fd. + * @param [out] if_str A string filled with the interface name. + * @param [in] max_strlen Maximum length of the if_str. + */ +ucs_status_t ucs_sockaddr_get_ifname(int fd, char *ifname_str, size_t max_strlen); + +END_C_DECLS + +#endif diff --git a/src/ucs/sys/string.c b/src/ucs/sys/string.c new file mode 100644 index 0000000..c66abd0 --- /dev/null +++ b/src/ucs/sys/string.c @@ -0,0 +1,292 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "string.h" +#include "math.h" +#include "sys.h" +#include +#include + +#include +#include +#include +#include + + +void ucs_fill_filename_template(const char *tmpl, char *buf, size_t max) +{ + char *p, *end; + const char *pf, *pp; + size_t length; + time_t t; + + p = buf; + end = buf + max - 1; + *end = 0; + pf = tmpl; + while (*pf != 0 && p < end) { + pp = strchr(pf, '%'); + if (pp == NULL) { + strncpy(p, pf, end - p); + p = end; + break; + } + + length = ucs_min(pp - pf, end - p); + strncpy(p, pf, length); + p += length; + + switch (*(pp + 1)) { + case 'p': + snprintf(p, end - p, "%d", getpid()); + pf = pp + 2; + p += strlen(p); + break; + case 'h': + snprintf(p, end - p, "%s", ucs_get_host_name()); + pf = pp + 2; + p += strlen(p); + break; + case 'c': + snprintf(p, end - p, "%02d", ucs_get_first_cpu()); + pf = pp + 2; + p += strlen(p); + break; + case 't': + t = time(NULL); + strftime(p, end - p, "%Y-%m-%d-%H:%M:%S", localtime(&t)); + pf = pp + 2; + p += strlen(p); + break; + case 'u': + snprintf(p, end - p, "%s", basename(ucs_get_user_name())); + pf = pp + 2; + p += strlen(p); + break; + case 'e': + snprintf(p, end - p, "%s", basename(ucs_get_exe())); + pf = pp + 2; + p += strlen(p); + break; + default: + *(p++) = *pp; + pf = pp + 1; + break; + } + + p += strlen(p); + } + *p = 0; +} + +void ucs_snprintf_zero(char *buf, size_t size, const char *fmt, ...) +{ + va_list ap; + + memset(buf, 0, size); + va_start(ap, fmt); + vsnprintf(buf, size, fmt, ap); + va_end(ap); +} + +void ucs_strncpy_zero(char *dest, const char *src, size_t max) +{ + if (max) { + strncpy(dest, src, max - 1); + dest[max - 1] = '\0'; + } +} + +uint64_t ucs_string_to_id(const char* str) +{ + uint64_t id = 0; + strncpy((char*)&id, str, sizeof(id) - 1); /* Last character will be \0 */ + return id; +} + +size_t ucs_string_quantity_prefix_value(char prefix) +{ + switch (prefix) { + case 'B': + return 1; + case 'K': + return UCS_KBYTE; + case 'M': + return UCS_MBYTE; + case 'G': + return UCS_GBYTE; + case 'T': + return UCS_TBYTE; + default: + return 0; + } +} + +char *ucs_memunits_to_str(size_t value, char *buf, size_t max) +{ + static const char * suffixes[] = {"", "K", "M", "G", "T", NULL}; + + const char **suffix; + + if (value == SIZE_MAX) { + ucs_strncpy_safe(buf, "(inf)", max); + } else { + suffix = &suffixes[0]; + while ((value >= 1024) && ((value % 1024) == 0) && *(suffix + 1)) { + value /= 1024; + ++suffix; + } + ucs_snprintf_safe(buf, max, "%zu%s", value, *suffix); + } + return buf; +} + +ucs_status_t ucs_str_to_memunits(const char *buf, void *dest) +{ + char units[3]; + int num_fields; + size_t value; + size_t bytes; + + /* Special value: infinity */ + if (!strcasecmp(buf, UCS_NUMERIC_INF_STR)) { + *(size_t*)dest = UCS_MEMUNITS_INF; + return UCS_OK; + } + + /* Special value: auto */ + if (!strcasecmp(buf, "auto")) { + *(size_t*)dest = UCS_MEMUNITS_AUTO; + return UCS_OK; + } + + memset(units, 0, sizeof(units)); + num_fields = sscanf(buf, "%ld%c%c", &value, &units[0], &units[1]); + if (num_fields == 1) { + bytes = 1; + } else if ((num_fields == 2) || (num_fields == 3)) { + bytes = ucs_string_quantity_prefix_value(toupper(units[0])); + if (!bytes || ((num_fields == 3) && tolower(units[1]) != 'b')) { + return UCS_ERR_INVALID_PARAM; + } + } else { + return UCS_ERR_INVALID_PARAM; + } + + *(size_t*)dest = value * bytes; + return UCS_OK; +} + +void ucs_snprintf_safe(char *buf, size_t size, const char *fmt, ...) +{ + va_list ap; + + if (size == 0) { + return; + } + + va_start(ap, fmt); + vsnprintf(buf, size - 1, fmt, ap); + buf[size - 1] = '\0'; + va_end(ap); +} + +char* ucs_strncpy_safe(char *dst, const char *src, size_t len) +{ + size_t length; + + if (!len) { + return dst; + } + + /* copy string into dst including null terminator */ + length = ucs_min(len, strnlen(src, len) + 1); + + memcpy(dst, src, length); + dst[length - 1] = '\0'; + return dst; +} + +char *ucs_strtrim(char *str) +{ + char *start, *end; + + /* point 'p' at first non-space character */ + start = str; + while (isspace(*start)) { + ++start; + } + + if (*start) { + /* write '\0' after the last non-space character */ + end = start + strlen(start) - 1; + while (isspace(*end)) { + --end; + } + *(end + 1) = '\0'; + } + + return start; +} + +const char * ucs_str_dump_hex(const void* data, size_t length, char *buf, + size_t max, size_t per_line) +{ + static const char hexchars[] = "0123456789abcdef"; + char *p, *endp; + uint8_t value; + size_t i; + + p = buf; + endp = buf + max - 2; + i = 0; + while ((p < endp) && (i < length)) { + if (i > 0) { + if ((i % per_line) == 0) { + *(p++) = '\n'; + } else if ((i % 4) == 0) { + *(p++) = ':'; + } + + if (p == endp) { + break; + } + } + + value = *(const uint8_t*)(UCS_PTR_BYTE_OFFSET(data, i)); + p[0] = hexchars[value / 16]; + p[1] = hexchars[value % 16]; + p += 2; + ++i; + } + *p = 0; + return buf; +} + +const char* ucs_flags_str(char *buf, size_t max, + uint64_t flags, const char **str_table) +{ + size_t i, len = 0; + + for (i = 0; *str_table; ++str_table, ++i) { + if (flags & UCS_BIT(i)) { /* not using ucs_for_each_bit to silence coverity */ + snprintf(buf + len, max - len, "%s,", *str_table); + len = strlen(buf); + } + } + + if (len > 0) { + buf[len - 1] = '\0'; /* remove last ',' */ + } else { + buf[0] = '\0'; + } + + return buf; +} diff --git a/src/ucs/sys/string.h b/src/ucs/sys/string.h new file mode 100644 index 0000000..d3e979f --- /dev/null +++ b/src/ucs/sys/string.h @@ -0,0 +1,203 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_STRING_H_ +#define UCS_STRING_H_ + +#include "compiler_def.h" +#include +#include + +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file string.h */ + +/* value which specifies "infinity" for a numeric variable */ +#define UCS_NUMERIC_INF_STR "inf" + +/* value which specifies "auto" for a variable */ +#define UCS_VALUE_AUTO_STR "auto" + +/* the numeric value of "infinity" */ +#define UCS_MEMUNITS_INF ((size_t)-1) +#define UCS_ULUNITS_INF ((size_t)-1) + +/* value which specifies "auto" for a numeric variable */ +#define UCS_MEMUNITS_AUTO ((size_t)-2) +#define UCS_ULUNITS_AUTO ((size_t)-2) +#define UCS_HEXUNITS_AUTO ((uint16_t)-2) + +#define UCS_BANDWIDTH_AUTO (-1.0) + +/** + * Expand a partial path to full path. + * + * @param path Path to expand. + * @param fullpath Filled with full path. + * @param max Room in "fullpath" + */ +void ucs_expand_path(const char *path, char *fullpath, size_t max); + + +/** + * Fill a filename template. The following values in the string are replaced: + * %p - replaced by process id + * %h - replaced by host name + * + * @param tmpl File name template (possibly containing formatting sequences) + * @param buf Filled with resulting file name + * @param max Maximal size of destination buffer. + */ +void ucs_fill_filename_template(const char *tmpl, char *buf, size_t max); + + +/** + * Format a string to a buffer of given size, and fill the rest of the buffer + * with '\0'. Also, guarantee that the last char in the buffer is '\0'. + * + * @param buf Buffer to format the string to. + * @param size Buffer size. + * @param fmt Format string. + */ +void ucs_snprintf_zero(char *buf, size_t size, const char *fmt, ...) + UCS_F_PRINTF(3, 4); + + +/** + * Same as strncpy(), but guarantee that the last char in the buffer is '\0'. + */ +void ucs_strncpy_zero(char *dest, const char *src, size_t max); + + +/** + * Return a number filled with the first characters of the string. + */ +uint64_t ucs_string_to_id(const char *str); + + +/** + * Convert a memory units value to a string which is abbreviated if possible. + * For example: + * 1024 -> 1kb + * + * @param value Value to convert. + * @param buf Buffer to place the string. + * @param max Maximal length of the buffer. + * + * @return Pointer to 'buf', which holds the resulting string. + */ +char *ucs_memunits_to_str(size_t value, char *buf, size_t max); + + +/** + * Convert a string holding memory units to a numeric value. + * + * @param buf String to convert + * @param dest Numeric value of the string + * + * @return UCS_OK if successful, or error code otherwise. + */ +ucs_status_t ucs_str_to_memunits(const char *buf, void *dest); + + +/** + * Return the numeric value of the memunits prefix. + * For example: + * 'M' -> 1048576 + */ +size_t ucs_string_quantity_prefix_value(char prefix); + + +/** + * Format a string to a buffer of given size, and guarantee that the last char + * in the buffer is '\0'. + * + * @param buf Buffer to format the string to. + * @param size Buffer size. + * @param fmt Format string. + */ +void ucs_snprintf_safe(char *buf, size_t size, const char *fmt, ...) + UCS_F_PRINTF(3, 4); + + +/** + * Copy string limited by len bytes. Destination string is always ended by '\0' + * + * @param dst Destination buffer + * @param src Source string + * @param len Maximum string length to copy + * + * @return address of destination buffer + */ +char* ucs_strncpy_safe(char *dst, const char *src, size_t len); + + +/** + * Remove whitespace characters in the beginning and end of the string, as + * detected by isspace(3). Returns a pointer to the new string (which may be a + * substring of 'str'). The original string 'str' may be modified in-place. + * + * @param str String to remove whitespaces from. + * @return Pointer to the new string, with leading/trailing whitespaces removed. + */ +char *ucs_strtrim(char *str); + + +/** + * Get pointer to file name in path, same as basename but do not + * modify source string. + * + * @param path Path to parse. + * + * @return file name + */ +static UCS_F_ALWAYS_INLINE const char* ucs_basename(const char *path) +{ + const char *name = strrchr(path, '/'); + + return (name == NULL) ? path : name + 1; +} + + +/** + * Dump binary array into string in hex format. Destination string is + * always ended by '\0'. + * + * @param data Source array to dump. + * @param length Length of source array in bytes. + * @param buf Destination string. + * @param max Max length of destination string including terminating + * '\0' byte. + * @param per_line Number of bytes in source array to print per line + * or SIZE_MAX for single line. + * + * @return address of destination buffer + */ +const char *ucs_str_dump_hex(const void* data, size_t length, char *buf, + size_t max, size_t per_line); + + +/** + * Convert the given flags to a string that represents them. + * + * @param str String to hold the flags string values. + * @param max Size of the string. + * @param flags Flags to be converted. + * @param str_table Conversion table - from flag value to a string. + * + * @return String that holds the representation of the given flags. + */ +const char* ucs_flags_str(char *str, size_t max, + uint64_t flags, const char **str_table); + +END_C_DECLS + +#endif diff --git a/src/ucs/sys/stubs.c b/src/ucs/sys/stubs.c new file mode 100644 index 0000000..b5fb831 --- /dev/null +++ b/src/ucs/sys/stubs.c @@ -0,0 +1,74 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#include + + +void ucs_empty_function() +{ +} + +unsigned ucs_empty_function_return_zero() +{ + return 0; +} + +int64_t ucs_empty_function_return_zero_int64() +{ + return 0; +} + +unsigned ucs_empty_function_return_one() +{ + return 1; +} + +ucs_status_t ucs_empty_function_return_success() +{ + return UCS_OK; +} + +ucs_status_t ucs_empty_function_return_unsupported() +{ + return UCS_ERR_UNSUPPORTED; +} + +ucs_status_t ucs_empty_function_return_inprogress() +{ + return UCS_INPROGRESS; +} + +ucs_status_t ucs_empty_function_return_no_resource() +{ + return UCS_ERR_NO_RESOURCE; +} + +ucs_status_ptr_t ucs_empty_function_return_ptr_no_resource() +{ + return UCS_STATUS_PTR(UCS_ERR_NO_RESOURCE); +} + +ucs_status_t ucs_empty_function_return_ep_timeout() +{ + return UCS_ERR_ENDPOINT_TIMEOUT; +} + +ssize_t ucs_empty_function_return_bc_ep_timeout() +{ + return UCS_ERR_ENDPOINT_TIMEOUT; +} + +ucs_status_t ucs_empty_function_return_busy() +{ + return UCS_ERR_BUSY; +} + +int ucs_empty_function_do_assert() +{ + ucs_assert_always(0); + return 0; +} diff --git a/src/ucs/sys/stubs.h b/src/ucs/sys/stubs.h new file mode 100644 index 0000000..fe0849c --- /dev/null +++ b/src/ucs/sys/stubs.h @@ -0,0 +1,39 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_STUBS_H +#define UCS_STUBS_H + +#include + +#include +#include +#include + +BEGIN_C_DECLS + +/** @file stubs.h */ + +/** + * Empty function which can be casted to a no-operation callback in various situations. + */ +void ucs_empty_function(); +unsigned ucs_empty_function_return_zero(); +unsigned ucs_empty_function_return_one(); +int64_t ucs_empty_function_return_zero_int64(); +ucs_status_t ucs_empty_function_return_success(); +ucs_status_t ucs_empty_function_return_unsupported(); +ucs_status_t ucs_empty_function_return_inprogress(); +ucs_status_t ucs_empty_function_return_no_resource(); +ucs_status_ptr_t ucs_empty_function_return_ptr_no_resource(); +ucs_status_t ucs_empty_function_return_ep_timeout(); +ssize_t ucs_empty_function_return_bc_ep_timeout(); +ucs_status_t ucs_empty_function_return_busy(); +int ucs_empty_function_do_assert(); + +END_C_DECLS + +#endif diff --git a/src/ucs/sys/sys.c b/src/ucs/sys/sys.c new file mode 100644 index 0000000..943b088 --- /dev/null +++ b/src/ucs/sys/sys.c @@ -0,0 +1,1255 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2012. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2014-2019. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_THR_H +#include +#endif + +#if HAVE_SYS_CAPABILITY_H +# include +#endif + +/* Default huge page size is 2 MBytes */ +#define UCS_DEFAULT_MEM_FREE 640000 +#define UCS_PROCESS_SMAPS_FILE "/proc/self/smaps" +#define UCS_PROCESS_NS_DIR "/proc/self/ns" +#define UCS_PROCESS_BOOTID_FILE "/proc/sys/kernel/random/boot_id" +#define UCS_PROCESS_BOOTID_FMT "%x-%4hx-%4hx-%4hx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx" +#define UCS_PROCESS_NS_FIRST 0xF0000000U +#define UCS_PROCESS_NS_NET_DFLT 0xF0000080U + + +struct { + const char *name; + ucs_sys_ns_t dflt; +} static ucs_sys_namespace_info[] = { + [UCS_SYS_NS_TYPE_IPC] = {.name = "ipc", .dflt = UCS_PROCESS_NS_FIRST - 1}, + [UCS_SYS_NS_TYPE_MNT] = {.name = "mnt", .dflt = UCS_PROCESS_NS_FIRST - 0}, + [UCS_SYS_NS_TYPE_NET] = {.name = "net", .dflt = UCS_PROCESS_NS_NET_DFLT}, + [UCS_SYS_NS_TYPE_PID] = {.name = "pid", .dflt = UCS_PROCESS_NS_FIRST - 4}, + [UCS_SYS_NS_TYPE_USER] = {.name = "user", .dflt = UCS_PROCESS_NS_FIRST - 3}, + [UCS_SYS_NS_TYPE_UTS] = {.name = "uts", .dflt = UCS_PROCESS_NS_FIRST - 2} +}; + + +const char *ucs_get_tmpdir() +{ + char *env_tmpdir; + + env_tmpdir = getenv("TMPDIR"); + if (env_tmpdir) { + return env_tmpdir; + } else { + return "/tmp/"; + } +} + +const char *ucs_get_host_name() +{ + static char hostname[HOST_NAME_MAX] = {0}; + + if (*hostname == 0) { + gethostname(hostname, sizeof(hostname)); + strtok(hostname, "."); + } + return hostname; +} + +const char *ucs_get_user_name() +{ + static char username[256] = {0}; + + if (*username == 0) { + getlogin_r(username, sizeof(username)); + } + return username; +} + +void ucs_expand_path(const char *path, char *fullpath, size_t max) +{ + char cwd[1024] = {0}; + + if (path[0] == '/') { + strncpy(fullpath, path, max); + } else if (getcwd(cwd, sizeof(cwd) - 1) != NULL) { + snprintf(fullpath, max, "%s/%s", cwd, path); + } else { + ucs_warn("failed to expand path '%s' (%m), using original path", path); + strncpy(fullpath, path, max); + } +} + +const char *ucs_get_exe() +{ + static char exe[1024]; + int ret; + + ret = readlink("/proc/self/exe", exe, sizeof(exe) - 1); + if (ret < 0) { + exe[0] = '\0'; + } else { + exe[ret] = '\0'; + } + + return exe; +} + +uint32_t ucs_file_checksum(const char *filename) +{ + char buffer[1024]; + ssize_t nread; + int fd; + uint32_t crc; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + return 0; + } + + crc = 0; + do { + nread = read(fd, buffer, sizeof(buffer)); + if (nread > 0) { + crc = ucs_crc32(crc, buffer, nread); + } + } while (nread == sizeof(buffer)); + close(fd); + + return crc; +} + +static uint64_t ucs_get_mac_address() +{ + static uint64_t mac_address = 0; + struct ifreq ifr, *it, *end; + struct ifconf ifc; + char buf[1024]; + int sock; + + if (mac_address == 0) { + sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); + if (sock == -1) { + ucs_error("failed to create socket: %m"); + return 0; + } + + ifc.ifc_len = sizeof(buf); + ifc.ifc_buf = buf; + if (ioctl(sock, SIOCGIFCONF, &ifc) == -1) { + ucs_error("ioctl(SIOCGIFCONF) failed: %m"); + close(sock); + return 0; + } + + it = ifc.ifc_req; + end = it + (ifc.ifc_len / sizeof *it); + for (it = ifc.ifc_req; it != end; ++it) { + strcpy(ifr.ifr_name, it->ifr_name); + if (ioctl(sock, SIOCGIFFLAGS, &ifr) != 0) { + ucs_error("ioctl(SIOCGIFFLAGS) failed: %m"); + close(sock); + return 0; + } + + if (!(ifr.ifr_flags & IFF_LOOPBACK)) { + if (ioctl(sock, SIOCGIFHWADDR, &ifr) != 0) { + ucs_error("ioctl(SIOCGIFHWADDR) failed: %m"); + close(sock); + return 0; + } + + memcpy(&mac_address, ifr.ifr_hwaddr.sa_data, 6); + break; + } + } + + close(sock); + ucs_trace("MAC address is 0x%012"PRIX64, mac_address); + } + + return mac_address; +} + +static uint64_t __sumup_host_name(unsigned prime_index) +{ + uint64_t sum, n; + const char *p; + unsigned i; + + sum = 0; + i = prime_index; + p = ucs_get_host_name(); + while (*p != '\0') { + n = 0; + memcpy(&n, p, strnlen(p, sizeof(n))); + sum += ucs_get_prime(i) * n; + ++i; + p += ucs_min(sizeof(n), strlen(p)); + } + return sum; +} + +uint64_t ucs_machine_guid() +{ + return ucs_get_prime(0) * ucs_get_mac_address() + + __sumup_host_name(1); +} + +/* + * If a certain system constant (name) is undefined on the underlying system the + * sysconf routine returns -1. ucs_sysconf return the negative value + * a user and the user is responsible to define default value or abort. + * + * If an error occurs sysconf modified errno and ucs_sysconf aborts. + * + * Otherwise, a non-negative values is returned. + */ +static long ucs_sysconf(int name) +{ + long rc; + errno = 0; + + rc = sysconf(name); + ucs_assert_always(errno == 0); + + return rc; +} + +int ucs_get_first_cpu() +{ + int first_cpu, total_cpus, ret; + ucs_sys_cpuset_t mask; + + ret = ucs_sysconf(_SC_NPROCESSORS_CONF); + if (ret < 0) { + ucs_error("failed to get local cpu count: %m"); + return ret; + } + total_cpus = ret; + + CPU_ZERO(&mask); + ret = ucs_sys_getaffinity(&mask); + if (ret < 0) { + ucs_error("failed to get process affinity: %m"); + return ret; + } + + for (first_cpu = 0; first_cpu < total_cpus; ++first_cpu) { + if (CPU_ISSET(first_cpu, &mask)) { + return first_cpu; + } + } + + return total_cpus; +} + +uint64_t ucs_generate_uuid(uint64_t seed) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return seed + + ucs_get_prime(0) * ucs_get_tid() + + ucs_get_prime(1) * ucs_get_time() + + ucs_get_prime(2) * ucs_get_mac_address() + + ucs_get_prime(3) * tv.tv_sec + + ucs_get_prime(4) * tv.tv_usec + + __sumup_host_name(5); +} + +ucs_status_t +ucs_open_output_stream(const char *config_str, ucs_log_level_t err_log_level, + FILE **p_fstream, int *p_need_close, + const char **p_next_token) +{ + FILE *output_stream; + char filename[256]; + char *template; + const char *p; + size_t len; + + *p_next_token = config_str; + + len = strcspn(config_str, ":"); + if (!strncmp(config_str, "stdout", len)) { + *p_fstream = stdout; + *p_need_close = 0; + *p_next_token = config_str + len; + } else if (!strncmp(config_str, "stderr", len)) { + *p_fstream = stderr; + *p_need_close = 0; + *p_next_token = config_str + len; + } else { + if (!strncmp(config_str, "file:", 5)) { + p = config_str + 5; + } else { + p = config_str; + } + + len = strcspn(p, ":"); + template = strndup(p, len); + ucs_fill_filename_template(template, filename, sizeof(filename)); + free(template); + + output_stream = fopen(filename, "w"); + if (output_stream == NULL) { + ucs_log(err_log_level, "failed to open '%s' for writing: %m", + filename); + return UCS_ERR_IO_ERROR; + } + + *p_fstream = output_stream; + *p_need_close = 1; + *p_next_token = p + len; + } + + return UCS_OK; +} + +static ssize_t ucs_read_file_vararg(char *buffer, size_t max, int silent, + const char *filename_fmt, va_list ap) +{ + char filename[MAXPATHLEN]; + ssize_t read_bytes; + int fd; + + vsnprintf(filename, MAXPATHLEN, filename_fmt, ap); + + fd = open(filename, O_RDONLY); + if (fd < 0) { + if (!silent) { + ucs_error("failed to open %s: %m", filename); + } + read_bytes = -1; + goto out; + } + + read_bytes = read(fd, buffer, max - 1); + if (read_bytes < 0) { + if (!silent) { + ucs_error("failed to read from %s: %m", filename); + } + goto out_close; + } + + if (read_bytes < max) { + buffer[read_bytes] = '\0'; + } + +out_close: + close(fd); +out: + return read_bytes; +} + +ssize_t ucs_read_file(char *buffer, size_t max, int silent, + const char *filename_fmt, ...) +{ + ssize_t read_bytes; + va_list ap; + + va_start(ap, filename_fmt); + read_bytes = ucs_read_file_vararg(buffer, max, silent, filename_fmt, ap); + va_end(ap); + + return read_bytes; +} + +ucs_status_t ucs_read_file_number(long *value, int silent, + const char *filename_fmt, ...) +{ + char buffer[64], *tail; + ssize_t read_bytes; + va_list ap; + long n; + + va_start(ap, filename_fmt); + read_bytes = ucs_read_file_vararg(buffer, sizeof(buffer) - 1, silent, + filename_fmt, ap); + va_end(ap); + + if (read_bytes < 0) { + /* read error */ + return UCS_ERR_IO_ERROR; + } + + n = strtol(buffer, &tail, 0); + if ((*tail != '\0') && !isspace(*tail)) { + /* parse error */ + return UCS_ERR_INVALID_PARAM; + } + + *value = n; + return UCS_OK; +} + +ssize_t ucs_read_file_str(char *buffer, size_t max, int silent, + const char *filename_fmt, ...) +{ + size_t max_read = ucs_max(max, 1) - 1; + ssize_t read_bytes; + va_list ap; + + va_start(ap, filename_fmt); + read_bytes = ucs_read_file_vararg(buffer, max_read, silent, filename_fmt, ap); + va_end(ap); + + if ((read_bytes >= 0) && (max > 0)) { + buffer[read_bytes] = '\0'; + } + + return read_bytes; +} + +size_t ucs_get_page_size() +{ + static long page_size = 0; + + if (page_size == 0) { + page_size = ucs_sysconf(_SC_PAGESIZE); + if (page_size < 0) { + page_size = 4096; + ucs_debug("_SC_PAGESIZE is undefined, setting default value to %ld", + page_size); + } + } + return page_size; +} + +void ucs_get_mem_page_size(void *address, size_t size, size_t *min_page_size_p, + size_t *max_page_size_p) +{ + int found = 0; + unsigned long start, end; + unsigned long page_size_kb; + size_t page_size; + char buf[1024]; + FILE *file; + int n; + + file = fopen(UCS_PROCESS_SMAPS_FILE, "r"); + if (!file) { + goto out; + } + + while (fgets(buf, sizeof(buf), file) != NULL) { + n = sscanf(buf, "%lx-%lx", &start, &end); + if (n != 2) { + continue; + } + + if (start > (uintptr_t)address + size) { + /* the scanned range is after memory range of interest - stop */ + break; + } + if (end <= (uintptr_t)address) { + /* the scanned range is still before the memory range of interest */ + continue; + } + + while (fgets(buf, sizeof(buf), file) != NULL) { + n = sscanf(buf, "KernelPageSize: %lu kB", &page_size_kb); + if (n < 1) { + continue; + } + + page_size = page_size_kb * UCS_KBYTE; + if (found) { + *min_page_size_p = ucs_min(*min_page_size_p, page_size); + *max_page_size_p = ucs_max(*max_page_size_p, page_size); + } else { + found = 1; + *min_page_size_p = page_size; + *max_page_size_p = page_size; + } + break; + } + } + + fclose(file); + +out: + if (!found) { + *min_page_size_p = *max_page_size_p = ucs_get_page_size(); + } +} + +static ssize_t ucs_get_meminfo_entry(const char* pattern) +{ + char buf[256]; + char final_pattern[80]; + int val = 0; + ssize_t val_b = -1; + FILE *f; + + f = fopen("/proc/meminfo", "r"); + if (f != NULL) { + snprintf(final_pattern, sizeof(final_pattern), "%s: %s", pattern, + "%d kB"); + while (fgets(buf, sizeof(buf), f)) { + if (sscanf(buf, final_pattern, &val) == 1) { + val_b = val * 1024ull; + break; + } + } + fclose(f); + } + + return val_b; +} + +size_t ucs_get_memfree_size() +{ + ssize_t mem_free; + + mem_free = ucs_get_meminfo_entry("MemFree"); + if (mem_free == -1) { + mem_free = UCS_DEFAULT_MEM_FREE; + ucs_info("cannot determine free mem size, using default: %zu", + mem_free); + } + + return mem_free; +} + +ssize_t ucs_get_huge_page_size() +{ + static ssize_t huge_page_size = 0; + + /* Cache the huge page size value */ + if (huge_page_size == 0) { + huge_page_size = ucs_get_meminfo_entry("Hugepagesize"); + if (huge_page_size == -1) { + ucs_debug("huge pages are not supported on the system"); + } else { + ucs_trace("detected huge page size: %zu", huge_page_size); + } + } + + return huge_page_size; +} + +size_t ucs_get_phys_mem_size() +{ + static size_t phys_mem_size = 0; + long phys_pages; + + if (phys_mem_size == 0) { + phys_pages = ucs_sysconf(_SC_PHYS_PAGES); + if (phys_pages < 0) { + ucs_debug("_SC_PHYS_PAGES is undefined, setting default value to %ld", + SIZE_MAX); + phys_mem_size = SIZE_MAX; + } else { + phys_mem_size = phys_pages * ucs_get_page_size(); + } + } + return phys_mem_size; +} + +#define UCS_SYS_THP_ENABLED_FILE "/sys/kernel/mm/transparent_hugepage/enabled" +int ucs_is_thp_enabled() +{ + char buf[256]; + int rc; + + rc = ucs_read_file(buf, sizeof(buf) - 1, 1, UCS_SYS_THP_ENABLED_FILE); + if (rc < 0) { + ucs_debug("failed to read %s:%m", UCS_SYS_THP_ENABLED_FILE); + return 0; + } + + buf[rc] = 0; + return (strstr(buf, "[never]") == NULL); +} + +#define UCS_PROC_SYS_SHMMAX_FILE "/proc/sys/kernel/shmmax" +size_t ucs_get_shmmax() +{ + ucs_status_t status; + long size; + + status = ucs_read_file_number(&size, 0, UCS_PROC_SYS_SHMMAX_FILE); + if (status != UCS_OK) { + ucs_warn("failed to read %s:%m", UCS_PROC_SYS_SHMMAX_FILE); + return 0; + } + + return size; +} + +static void ucs_sysv_shmget_error_check_ENOSPC(size_t alloc_size, + const struct shminfo *ipc_info, + char *buf, size_t max) +{ + unsigned long new_used_ids; + unsigned long new_shm_tot; + struct shm_info shm_info; + char *p, *endp; + int ret; + + p = buf; + endp = p + max; + + ret = shmctl(0, SHM_INFO, (struct shmid_ds *)&shm_info); + if (ret >= 0) { + return; + } + + new_used_ids = shm_info.used_ids; + if (new_used_ids > ipc_info->shmmni) { + snprintf(p, endp - p, + ", total number of segments in the system (%lu) would exceed the" + " limit in /proc/sys/kernel/shmmni (=%lu)", + new_used_ids, ipc_info->shmmni); + p += strlen(p); + } + + new_shm_tot = shm_info.shm_tot + + (alloc_size + ucs_get_page_size() - 1) / ucs_get_page_size(); + if (new_shm_tot > ipc_info->shmall) { + snprintf(p, endp - p, + ", total shared memory pages in the system (%lu) would exceed the" + " limit in /proc/sys/kernel/shmall (=%lu)", + new_shm_tot, ipc_info->shmall); + } +} + +ucs_status_t ucs_sys_get_proc_cap(uint32_t *effective) +{ +#if HAVE_SYS_CAPABILITY_H + cap_user_header_t hdr = ucs_alloca(sizeof(*hdr)); + cap_user_data_t data = ucs_alloca(sizeof(*data) * _LINUX_CAPABILITY_U32S_3); + int ret; + + hdr->pid = 0; /* current thread */ + hdr->version = _LINUX_CAPABILITY_VERSION_3; + ret = capget(hdr, data); + if (ret) { + ucs_debug("capget(pid=%d version=0x%x) failed: %m", hdr->pid, + hdr->version); + return UCS_ERR_IO_ERROR; + + } + + *effective = data->effective; + return UCS_OK; +#else + return UCS_ERR_UNSUPPORTED; +#endif +} + +static void ucs_sysv_shmget_error_check_EPERM(int flags, char *buf, size_t max) +{ +#if HAVE_SYS_CAPABILITY_H + ucs_status_t status; + uint32_t ecap; + + UCS_STATIC_ASSERT(CAP_IPC_LOCK < 32); + status = ucs_sys_get_proc_cap(&ecap); + if ((status == UCS_OK) && !(ecap & UCS_BIT(CAP_IPC_LOCK))) { + /* detected missing CAP_IPC_LOCK */ + snprintf(buf, max, ", CAP_IPC_LOCK privilege is needed for SHM_HUGETLB"); + return; + } +#endif + + snprintf(buf, max, + ", please check for CAP_IPC_LOCK privilege for using SHM_HUGETLB"); +} + +static void ucs_sysv_shmget_format_error(size_t alloc_size, int flags, + const char *alloc_name, int sys_errno, + char *buf, size_t max) +{ + struct shminfo ipc_info; + char *p, *endp, *errp; + int ret; + + buf[0] = '\0'; + p = buf; + endp = p + max; + + snprintf(p, endp - p, "shmget(size=%zu flags=0x%x) for %s failed: %s", + alloc_size, flags, alloc_name, strerror(sys_errno)); + p += strlen(p); + errp = p; /* save current string pointer to detect if anything was added */ + + ret = shmctl(0, IPC_INFO, (struct shmid_ds *)&ipc_info); + if (ret >= 0) { + if ((sys_errno == EINVAL) && (alloc_size > ipc_info.shmmax)) { + snprintf(p, endp - p, + ", allocation size exceeds /proc/sys/kernel/shmmax (=%zu)", + ipc_info.shmmax); + p += strlen(p); + } + + if (sys_errno == ENOSPC) { + ucs_sysv_shmget_error_check_ENOSPC(alloc_size, &ipc_info, p, endp - p); + p += strlen(p); + } + } + + if (sys_errno == EPERM) { + ucs_sysv_shmget_error_check_EPERM(flags, p, endp - p); + p += strlen(p); + } + + /* default error message if no useful information was added to the string */ + if (p == errp) { + snprintf(p, endp - p, ", please check shared memory limits by 'ipcs -l'"); + } +} + +ucs_status_t ucs_sysv_alloc(size_t *size, size_t max_size, void **address_p, + int flags, const char *alloc_name, int *shmid) +{ + char error_string[256]; +#ifdef SHM_HUGETLB + ssize_t huge_page_size; +#endif + size_t alloc_size; + int sys_errno; + void *ptr; + int ret; + +#ifdef SHM_HUGETLB + if (flags & SHM_HUGETLB) { + huge_page_size = ucs_get_huge_page_size(); + if (huge_page_size <= 0) { + ucs_debug("huge pages are not supported on the system"); + return UCS_ERR_NO_MEMORY; /* Huge pages not supported */ + } + + alloc_size = ucs_align_up(*size, huge_page_size); + } else +#endif + { + alloc_size = ucs_align_up(*size, ucs_get_page_size()); + } + + if (alloc_size >= max_size) { + return UCS_ERR_EXCEEDS_LIMIT; + } + + flags |= IPC_CREAT | SHM_R | SHM_W; + *shmid = shmget(IPC_PRIVATE, alloc_size, flags); + if (*shmid < 0) { + sys_errno = errno; + ucs_sysv_shmget_format_error(alloc_size, flags, alloc_name, sys_errno, + error_string, sizeof(error_string)); + switch (sys_errno) { + case ENOMEM: + case EPERM: +#ifdef SHM_HUGETLB + if (!(flags & SHM_HUGETLB)) +#endif + { + ucs_error("%s", error_string); + } + return UCS_ERR_NO_MEMORY; + case ENOSPC: + case EINVAL: + ucs_error("%s", error_string); + return UCS_ERR_NO_MEMORY; + default: + ucs_error("%s", error_string); + return UCS_ERR_SHMEM_SEGMENT; + } + } + + /* Attach segment */ + if (*address_p) { + ptr = shmat(*shmid, *address_p, SHM_REMAP); + } else { + ptr = shmat(*shmid, NULL, 0); + } + + /* Remove segment, the attachment keeps a reference to the mapping */ + /* FIXME having additional attaches to a removed segment is not portable + * behavior */ + ret = shmctl(*shmid, IPC_RMID, NULL); + if (ret != 0) { + ucs_warn("shmctl(IPC_RMID, shmid=%d) returned %d: %m", *shmid, ret); + } + + /* Check if attachment was successful */ + if (ptr == (void*)-1) { + if (errno == ENOMEM) { + return UCS_ERR_NO_MEMORY; + } else if (RUNNING_ON_VALGRIND && (errno == EINVAL)) { + return UCS_ERR_NO_MEMORY; + } else { + ucs_error("shmat(shmid=%d) returned unexpected error: %m", *shmid); + return UCS_ERR_SHMEM_SEGMENT; + } + } + + ucs_memtrack_allocated(ptr, alloc_size UCS_MEMTRACK_VAL); + *address_p = ptr; + *size = alloc_size; + return UCS_OK; +} + +ucs_status_t ucs_sysv_free(void *address) +{ + int ret; + + ucs_memtrack_releasing(address); + ret = shmdt(address); + if (ret) { + ucs_warn("Unable to detach shared memory segment at %p: %m", address); + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK; +} + +ucs_status_t ucs_mmap_alloc(size_t *size, void **address_p, + int flags UCS_MEMTRACK_ARG) +{ + size_t alloc_length; + void *addr; + + alloc_length = ucs_align_up_pow2(*size, ucs_get_page_size()); + + addr = ucs_mmap(*address_p, alloc_length, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | flags, -1, 0 UCS_MEMTRACK_VAL); + if (addr == MAP_FAILED) { + return UCS_ERR_NO_MEMORY; + } + + *size = alloc_length; + *address_p = addr; + return UCS_OK; +} + +ucs_status_t ucs_mmap_free(void *address, size_t length) +{ + int ret; + size_t alloc_length; + + alloc_length = ucs_align_up_pow2(length, ucs_get_page_size()); + + ret = ucs_munmap(address, alloc_length); + if (ret != 0) { + ucs_warn("munmap(address=%p, length=%zu) failed: %m", address, length); + return UCS_ERR_INVALID_PARAM; + } + return UCS_OK; +} + +typedef struct { + unsigned long start; + unsigned long end; + int prot; + int found; +} ucs_get_mem_prot_ctx_t; + +static int ucs_get_mem_prot_cb(void *arg, void *addr, size_t length, int prot, + const char *path) +{ + ucs_get_mem_prot_ctx_t *ctx = arg; + unsigned long seg_start = (uintptr_t)addr; + unsigned long seg_end = (uintptr_t)addr + length; + + if (ctx->start < seg_start) { + ucs_trace("address 0x%lx is before next mapping 0x%lx..0x%lx", ctx->start, + seg_start, seg_end); + return 1; + } else if (ctx->start < seg_end) { + ucs_trace("range 0x%lx..0x%lx overlaps with mapping 0x%lx..0x%lx prot 0x%x", + ctx->start, ctx->end, seg_start, seg_end, prot); + + if (!ctx->found) { + /* first segment sets protection flags */ + ctx->prot = prot; + ctx->found = 1; + } else { + /* subsequent segments update protection flags */ + ctx->prot &= prot; + } + + if (ctx->end <= seg_end) { + /* finished going over entire memory region */ + return 1; + } + + /* continue from the end of current segment */ + ctx->start = seg_end; + } + return 0; +} + +int ucs_get_mem_prot(unsigned long start, unsigned long end) +{ + ucs_get_mem_prot_ctx_t ctx = { start, end, PROT_NONE, 0 }; + ucm_parse_proc_self_maps(ucs_get_mem_prot_cb, &ctx); + return ctx.prot; +} + +const char* ucs_get_process_cmdline() +{ + static char cmdline[1024] = {0}; + static int initialized = 0; + ssize_t len; + int i; + + if (!initialized) { + len = ucs_read_file(cmdline, sizeof(cmdline), 1, "/proc/self/cmdline"); + for (i = 0; i < len; ++i) { + if (cmdline[i] == '\0') { + cmdline[i] = ' '; + } + } + initialized = 1; + } + return cmdline; +} + +unsigned long ucs_sys_get_pfn(uintptr_t address) +{ + static const char *pagemap_file = "/proc/self/pagemap"; + static int initialized = 0; + static int pagemap_fd; + uint64_t data; + off_t offset; + ssize_t ret; + + if (!initialized) { + pagemap_fd = open(pagemap_file, O_RDONLY); + if (pagemap_fd < 0) { + ucs_warn("failed to open %s: %m", pagemap_file); + } + initialized = 1; + } + + if (pagemap_fd < 0) { + return 0; /* could not open file */ + } + + offset = (address / ucs_get_page_size()) * sizeof(data); + data = 0; + ret = pread(pagemap_fd, &data, sizeof(data), offset); + if (ret < 0) { + ucs_warn("pread(file=%s offset=%zu) failed: %m", pagemap_file, offset); + return 0; + } + + if (!(data & UCS_BIT(63))) { + ucs_trace("address 0x%lx not present", address); + return 0; + } + + return data & UCS_MASK(55); +} + +ucs_status_t ucs_sys_fcntl_modfl(int fd, int add, int remove) +{ + int oldfl, ret; + + oldfl = fcntl(fd, F_GETFL); + if (oldfl < 0) { + ucs_error("fcntl(fd=%d, F_GETFL) returned %d: %m", fd, oldfl); + return UCS_ERR_IO_ERROR; + } + + ret = fcntl(fd, F_SETFL, (oldfl | add) & ~remove); + if (ret < 0) { + ucs_error("fcntl(fd=%d, F_SETFL) returned %d: %m", fd, ret); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +pid_t ucs_get_tid(void) +{ +#ifdef SYS_gettid + return syscall(SYS_gettid); +#elif defined(HAVE_SYS_THR_H) + long id; + + thr_self(&id); + return (id); +#else +#error "Port me" +#endif +} + +int ucs_tgkill(int tgid, int tid, int sig) +{ +#ifdef SYS_tgkill + return syscall(SYS_tgkill, tgid, tid, sig); +#elif defined(HAVE_SYS_THR_H) + return (thr_kill2(tgid, tid, sig)); +#else +#error "Port me" +#endif +} + +double ucs_get_cpuinfo_clock_freq(const char *header, double scale) +{ + double value = 0.0; + double m; + int rc; + FILE* f; + char buf[256]; + char fmt[256]; + int warn; + + f = fopen("/proc/cpuinfo","r"); + if (!f) { + return 0.0; + } + + snprintf(fmt, sizeof(fmt), "%s : %%lf ", header); + + warn = 0; + while (fgets(buf, sizeof(buf), f)) { + + rc = sscanf(buf, fmt, &m); + if (rc != 1) { + continue; + } + + if (value == 0.0) { + value = m; + continue; + } + + if (value != m) { + value = ucs_max(value,m); + warn = 1; + } + } + fclose(f); + + if (warn) { + ucs_debug("Conflicting CPU frequencies detected, using: %.2f", value); + } + + return value * scale; +} + +void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length) +{ + void *ptr; + + new_length = ucs_align_up_pow2(new_length, ucs_get_page_size()); + if (old_ptr == NULL) { + /* Note: Must pass the 0 offset as "long", otherwise it will be + * partially undefined when converted to syscall arguments */ + ptr = (void*)syscall(__NR_mmap, NULL, new_length, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0ul); + if (ptr == MAP_FAILED) { + ucs_log_fatal_error("mmap(NULL, %zu, READ|WRITE, PRIVATE|ANON) failed: %m", + new_length); + return NULL; + } + } else { + old_length = ucs_align_up_pow2(old_length, ucs_get_page_size()); + ptr = (void*)syscall(__NR_mremap, old_ptr, old_length, new_length, + MREMAP_MAYMOVE); + if (ptr == MAP_FAILED) { + ucs_log_fatal_error("mremap(%p, %zu, %zu, MAYMOVE) failed: %m", + old_ptr, old_length, new_length); + return NULL; + } + } + + return ptr; +} + +void ucs_sys_free(void *ptr, size_t length) +{ + int ret; + + if (ptr != NULL) { + length = ucs_align_up_pow2(length, ucs_get_page_size()); + ret = syscall(__NR_munmap, ptr, length); + if (ret) { + ucs_log_fatal_error("munmap(%p, %zu) failed: %m", ptr, length); + } + } +} + +char* ucs_make_affinity_str(const ucs_sys_cpuset_t *cpuset, char *str, size_t len) +{ + int i = 0, prev = -1; + char *p = str; + + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, cpuset)) { + if (prev < 0) { + prev = i; + } + } else { + if (prev >= 0) { + if (prev == i - 1) { + p += snprintf(p, str + len - p, "%d,", prev); + } else { + p += snprintf(p, str + len - p, "%d-%d,", prev, i - 1); + } + } + if (p > str + len) { + p = str + len - 4; + while (*p != ',') { + p--; + } + sprintf(p, "..."); + return str; + } + prev = -1; + } + } + + *(--p) = 0; + return str; +} + +int ucs_sys_setaffinity(ucs_sys_cpuset_t *cpuset) +{ + int ret; + +#if defined(HAVE_SCHED_SETAFFINITY) + ret = sched_setaffinity(0, sizeof(*cpuset), cpuset); +#elif defined(HAVE_CPUSET_SETAFFINITY) + ret = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, getpid(), + sizeof(*cpuset), cpuset); +#else +#error "Port me" +#endif + return ret; +} + +int ucs_sys_getaffinity(ucs_sys_cpuset_t *cpuset) +{ + int ret; + +#if defined(HAVE_SCHED_GETAFFINITY) + ret = sched_getaffinity(0, sizeof(*cpuset), cpuset); +#elif defined(HAVE_CPUSET_GETAFFINITY) + ret = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, getpid(), + sizeof(*cpuset), cpuset); +#else +#error "Port me" +#endif + return ret; +} + +void ucs_sys_cpuset_copy(ucs_cpu_set_t *dst, const ucs_sys_cpuset_t *src) +{ + int c; + + UCS_CPU_ZERO(dst); + for (c = 0; c < UCS_CPU_SETSIZE; ++c) { + if (CPU_ISSET(c, src)) { + UCS_CPU_SET(c, dst); + } + } +} + +ucs_sys_ns_t ucs_sys_get_ns(ucs_sys_namespace_type_t ns) +{ + char filename[MAXPATHLEN]; + int res; + struct stat st; + + if (ns >= UCS_SYS_NS_TYPE_LAST) { + return 0; + } + + snprintf(filename, sizeof(filename), "%s/%s", UCS_PROCESS_NS_DIR, + ucs_sys_namespace_info[ns].name); + + res = stat(filename, &st); + if (res == 0) { + return (ucs_sys_ns_t)st.st_ino; + } + + return ucs_sys_namespace_info[ns].dflt; +} + +int ucs_sys_ns_is_default(ucs_sys_namespace_type_t ns) +{ + return ucs_sys_get_ns(ns) == ucs_sys_namespace_info[ns].dflt; +} + +ucs_status_t ucs_sys_get_boot_id(uint64_t *high, uint64_t *low) +{ + static struct { + uint64_t high; + uint64_t low; + } boot_id = {0, 0}; + + static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER; + static ucs_status_t status = UCS_ERR_IO_ERROR; + char bootid_str[256]; + ssize_t size; + uint32_t v1; + uint16_t v2; + uint16_t v3; + uint16_t v4; + uint8_t v5[6]; + int res; + int i; + + UCS_INIT_ONCE(&init_once) { + size = ucs_read_file_str(bootid_str, sizeof(bootid_str), 1, + "%s", UCS_PROCESS_BOOTID_FILE); + if (size <= 0) { + continue; /* jump out of INIT_ONCE section */ + } + + res = sscanf(bootid_str, UCS_PROCESS_BOOTID_FMT, + &v1, &v2, &v3, &v4, + &v5[0], &v5[1], &v5[2], + &v5[3], &v5[4], &v5[5]); + if (res == 10) { /* 10 values should be scanned */ + status = UCS_OK; + boot_id.low = ((uint64_t)v1) | ((uint64_t)v2 << 32) | + ((uint64_t)v3 << 48); + boot_id.high = v4; + for (i = 0; i < ucs_array_size(v5); i++) { + boot_id.high |= (uint64_t)v5[i] << (16 + (i * 8)); + } + } + } + + if (status == UCS_OK) { + *high = boot_id.high; + *low = boot_id.low; + } + + return status; +} diff --git a/src/ucs/sys/sys.h b/src/ucs/sys/sys.h new file mode 100644 index 0000000..e723eba --- /dev/null +++ b/src/ucs/sys/sys.h @@ -0,0 +1,463 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2014-2019. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_SYS_H +#define UCS_SYS_H + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#if defined(__linux__) || defined(HAVE_CPU_SET_T) +#include +typedef cpu_set_t ucs_sys_cpuset_t; +#elif defined(__FreeBSD__) || defined(HAVE_CPUSET_T) +#include +typedef cpuset_t ucs_sys_cpuset_t; +#else +#error "Port me" +#endif + + +BEGIN_C_DECLS + +/** @file sys.h */ + + +typedef ino_t ucs_sys_ns_t; + + +/* namespace type used in @ref ucs_sys_get_ns and @ref ucs_sys_ns_is_default */ +typedef enum { + UCS_SYS_NS_TYPE_IPC, + UCS_SYS_NS_TYPE_MNT, + UCS_SYS_NS_TYPE_NET, + UCS_SYS_NS_TYPE_PID, + UCS_SYS_NS_TYPE_USER, + UCS_SYS_NS_TYPE_UTS, + UCS_SYS_NS_TYPE_LAST +} ucs_sys_namespace_type_t; + + +/** + * @return TMPDIR environment variable if set. Otherwise, return "/tmp". + */ +const char *ucs_get_tmpdir(); + +/** + * @return Host name. + */ +const char *ucs_get_host_name(); + + +/** + * @return user name. + */ +const char *ucs_get_user_name(); + + +/** + * Expand a partial path to full path. + * + * @param path Path to expand. + * @param fullpath Filled with full path. + * @param max Room in "fullpath" + */ +void ucs_expand_path(const char *path, char *fullpath, size_t max); + + +/** + * @return Path to the main executable. + */ +const char *ucs_get_exe(); + + +/** + * Calculate checksum of a file. + */ +uint32_t ucs_file_checksum(const char *filename); + + +/** + * Get a globally unique identifier of the machine running the current process. + */ +uint64_t ucs_machine_guid(); + + +/** + * Get the first processor number we are bound to. + */ +int ucs_get_first_cpu(); + + +/** + * Generate a world-wide unique ID + * + * @param seed Additional seed to mix in. + * + * @note All bits of the returned number have the same randomness. + */ +uint64_t ucs_generate_uuid(uint64_t seed); + + +/** + * Open an output stream according to user configuration: + * - file: - file name, %p, %h, %c are substituted. + * - stdout + * - stderr + * + * *p_fstream is filled with the stream handle, *p_need_close is set to whether + * fclose() should be called to release resources, *p_next_token to the remainder + * of config_str. + */ +ucs_status_t +ucs_open_output_stream(const char *config_str, ucs_log_level_t err_log_level, + FILE **p_fstream, int *p_need_close, + const char **p_next_token); + + +/** + * Read file contents into a string. If the size of the data is smaller than the + * supplied upper limit (max), a null terminator is appended to the data. + * + * @param buffer Buffer to fill with file contents. + * @param max Maximal buffer size. + * @param filename_fmt File name printf-like format string. + * + * @return Number of bytes read, or -1 in case of error. + */ +ssize_t ucs_read_file(char *buffer, size_t max, int silent, + const char *filename_fmt, ...) + UCS_F_PRINTF(4, 5); + + +/** + * Read file contents as a numeric value. + * + * @param value Filled with the number read from the file. + * @param filename_fmt File name printf-like format string. + * + * @return UCS_OK if successful, or error code otherwise. + */ +ucs_status_t ucs_read_file_number(long *value, int silent, + const char *filename_fmt, ...) + UCS_F_PRINTF(3, 4); + + +/** + * Read file contents into a string closed by null terminator. + * + * @param buffer Buffer to fill with file contents. + * @param max Maximal buffer size. + * @param filename_fmt File name printf-like format string. + * + * @return Number of bytes read, or -1 in case of error. + */ +ssize_t ucs_read_file_str(char *buffer, size_t max, int silent, + const char *filename_fmt, ...) + UCS_F_PRINTF(4, 5); + + +/** + * @return Regular page size on the system. + */ +size_t ucs_get_page_size(); + + +/** + * Get page size of a memory region. + * + * @param [in] address Memory region start address, + * @param [in] size Memory region size. + * @param [out] min_page_size_p Set to the minimal page size in the memory region. + * @param [out] max_page_size_p Set to the maximal page size in the memory region. + */ +void ucs_get_mem_page_size(void *address, size_t size, size_t *min_page_size_p, + size_t *max_page_size_p); + + +/** + * @return Huge page size on the system, or -1 if unsupported. + */ +ssize_t ucs_get_huge_page_size(); + + +/** + * @return free mem size on the system. + */ +size_t ucs_get_memfree_size(); + + +/** + * @return Physical memory size on the system. + */ +size_t ucs_get_phys_mem_size(); + + +/** + * Allocate shared memory using SystemV API. + * + * @param size Pointer to memory size to allocate, updated with actual size + * (rounded up to huge page size or to regular page size). + * @param max_size maximal size to allocate. If need to allocate more than this, + * the function fails and returns UCS_ERR_EXCEEDS_LIMIT. + * @param address_p Filled with allocated memory address. + * @param flags Flags to indicate the permissions for the allocate memory. + * (also, whether or not to allocate memory with huge pages). + * @param alloc_name Name of memory allocation, for debug/error reporting purposes. + * @param shmid Filled with the shmid from the shmget call in the function. + */ +ucs_status_t ucs_sysv_alloc(size_t *size, size_t max_size, void **address_p, + int flags, const char *alloc_name, int *shimd); + + +/** + * Release memory allocated via SystemV API. + * + * @param address Memory to release (returned from @ref ucs_sysv_alloc). + */ +ucs_status_t ucs_sysv_free(void *address); + + +/** + * Allocate private memory using mmap API. + * + * @param size Pointer to memory size to allocate, updated with actual size + * (rounded up to huge page size or to regular page size). + * @param address_p Filled with allocated memory address. + * @param flags Flags to pass to the mmap() system call + */ +ucs_status_t ucs_mmap_alloc(size_t *size, void **address_p, + int flags UCS_MEMTRACK_ARG); + +/** + * Release memory allocated via mmap API. + * + * @param address Address of memory to release as returned from @ref ucs_mmap_alloc. + * @param length Length of memory to release passed to @ref ucs_mmap_alloc. + */ +ucs_status_t ucs_mmap_free(void *address, size_t length); + +/** + * Retrieve memory access flags for a given region of memory. + * If the specified memory region has multiple different access flags, the AND + * of them is returned. If any part of the region is not mapped, PROT_NONE will + * be returned. + * + * @param start Region start. + * @param end Region end. + * @return Memory protection flags (PROT_xxx). + */ +int ucs_get_mem_prot(unsigned long start, unsigned long end); + + +/** + * Returns the physical page frame number of a given virtual page address. + * If the page map file is non-readable (for example, due to permissions), or + * the page is not present, this function returns 0. + * + * @param address Virtual address to get the PFN for + * @return PFN number, or 0 if failed. + */ +unsigned long ucs_sys_get_pfn(uintptr_t address); + + +/** + * Modify file descriptor flags via fcntl(). + * + * @param fd File descriptor to modify. + * @param add Flags to add. + * @param remove Flags to remove. + * + * Note: if a flags is specified in both add and remove, it will be removed. + */ +ucs_status_t ucs_sys_fcntl_modfl(int fd, int add, int remove); + + +/** + * Get process command line + */ +const char* ucs_get_process_cmdline(); + + +/** + * Get current thread (LWP) id. + */ +pid_t ucs_get_tid(void); + + +/** + * Send signal to a thread. + */ +int ucs_tgkill(int tgid, int tid, int sig); + + +/** + * Get CPU frequency from /proc/cpuinfo. Return value is clocks-per-second. + * + * @param header String in /proc/cpuinfo which precedes the clock speed number. + * @param scale Frequency value units. + */ +double ucs_get_cpuinfo_clock_freq(const char *mhz_header, double scale); + + +/** + * Check if transparent huge-pages are enabled . + * + * @return 1 for true and 0 for false + */ +int ucs_is_thp_enabled(); + + +/** + * Get shmmax size from /proc/sys/kernel/shmmax. + * + * @return shmmax size + */ +size_t ucs_get_shmmax(); + + +/** + * Return effective capabilities of the current thread. + * + * @param effective Filled with thread's effective capabilities. + * @return UCS_OK or error in case of failure. + */ +ucs_status_t ucs_sys_get_proc_cap(uint32_t *effective); + + +/** + * Allocate or re-allocate memory from the operating system. + * + * @param [in] old_ptr Pointer to existing block, may be NULL. If non-NULL, + * this block will be resized and potentially moved. + * @param [in] old_length Length of the block pointed by old_ptr. + * @param [in] new_length Length to allocate for the new block. + * + * @return New allocated block, with size 'new_length'. + * @note Actual allocation size is rounded up to system page size. + */ +void *ucs_sys_realloc(void *old_ptr, size_t old_length, size_t new_length); + + +/** + * Release memory previously allocated by @ref ucs_sys_realloc(). + * + * @param [in] ptr Pointer to memory block to release. + * @param [in] length Length of the memory block. + */ +void ucs_sys_free(void *ptr, size_t length); + +/** + * Fill human readable cpu set representation + * + * @param [in] cpuset Set of CPUs + * @param [in] str String to fill + * @param [in] len String length + * + * @return Filled string + */ +char *ucs_make_affinity_str(const ucs_sys_cpuset_t *cpuset, char *str, size_t len); + +/** + * Sets affinity for the current process. + * + * @param [in] cpuset Pointer to the cpuset to assign + * + * @return -1 on error with errno set, 0 on success + */ +int ucs_sys_setaffinity(ucs_sys_cpuset_t *cpuset); + +/** + * Queries affinity for the current process. + * + * @param [out] cpuset Pointer to the cpuset to return result + * + * @return -1 on error with errno set, 0 on success + */ +int ucs_sys_getaffinity(ucs_sys_cpuset_t *cpuset); + +/** + * Copies ucs_sys_cpuset_t to ucs_cpu_set_t. + * + * @param [in] src Source + * @param [out] dst Destination + */ +void ucs_sys_cpuset_copy(ucs_cpu_set_t *dst, const ucs_sys_cpuset_t *src); + +/** + * Get namespace id for resource. + * + * @param [in] name Namespace to get value + * + * @return namespace value or 0 if namespaces are not supported + */ +ucs_sys_ns_t ucs_sys_get_ns(ucs_sys_namespace_type_t name); + + +/** + * Check if namespace is namespace of host system. + * + * @param [in] name Namespace to evaluate + * + * @return 1 in case if namespace is root, 0 - in other cases + */ +int ucs_sys_ns_is_default(ucs_sys_namespace_type_t name); + + +/** + * Get 128-bit boot ID value. + * + * @param [out] high Pointer to high 64 bit of 128 boot ID + * @param [out] low Pointer to low 64 bit of 128 boot ID + * + * @return UCS_OK or error in case of failure. + */ +ucs_status_t ucs_sys_get_boot_id(uint64_t *high, uint64_t *low); + +END_C_DECLS + +#endif diff --git a/src/ucs/time/time.c b/src/ucs/time/time.c new file mode 100644 index 0000000..a3de64e --- /dev/null +++ b/src/ucs/time/time.c @@ -0,0 +1,22 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#include + + +double ucs_get_cpu_clocks_per_sec() +{ + static double clocks_per_sec = 0.0; + static int initialized = 0; + + if (!initialized) { + clocks_per_sec = ucs_arch_get_clocks_per_sec(); + ucs_debug("measured arch clock speed: %.2f Hz", clocks_per_sec); + initialized = 1; + } + return clocks_per_sec; +} diff --git a/src/ucs/time/time.h b/src/ucs/time/time.h new file mode 100644 index 0000000..58ccbb0 --- /dev/null +++ b/src/ucs/time/time.h @@ -0,0 +1,152 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_TIME_H +#define UCS_TIME_H + +#include +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file time.h */ + +/** + * Short time type + * Used to represent short time intervals, and takes less memory. + */ +typedef uint32_t ucs_short_time_t; + +/** + * Compare short time values + */ +#define UCS_SHORT_TIME_CMP UCS_CIRCULAR_COMPARE32 + + +#define UCS_TIME_INFINITY ULLONG_MAX + +#define UCS_MSEC_PER_SEC 1000ull /* Milli */ +#define UCS_USEC_PER_SEC 1000000ul /* Micro */ +#define UCS_NSEC_PER_SEC 1000000000ul /* Nano */ + + +double ucs_get_cpu_clocks_per_sec(); + + +/** + * @return The current time, in UCS time units. + */ +static inline ucs_time_t ucs_get_time() +{ + return (ucs_time_t)ucs_arch_read_hres_clock(); +} + +/** + * @return The current accurate time, in seconds. + * @note This function may have higher overhead than @ref ucs_get_time() + */ +static inline double ucs_get_accurate_time() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (tv.tv_usec / (double)UCS_USEC_PER_SEC); +} + +/** + * @return The clock value of a single second. + */ +static inline double ucs_time_sec_value() +{ + return ucs_get_cpu_clocks_per_sec(); +} + + +/** + * Convert seconds to UCS time units. + */ +static inline ucs_time_t ucs_time_from_sec(double sec) +{ + return (ucs_time_t)(sec * ucs_time_sec_value() + 0.5); +} + +/** + * Convert seconds to UCS time units. + */ +static inline ucs_time_t ucs_time_from_msec(double msec) +{ + return ucs_time_from_sec(msec / UCS_MSEC_PER_SEC); +} + +/** + * Convert seconds to UCS time units. + */ +static inline ucs_time_t ucs_time_from_usec(double usec) +{ + return ucs_time_from_sec(usec / UCS_USEC_PER_SEC); +} + +/** + * Convert UCS time units to seconds. + */ +static inline double ucs_time_to_sec(ucs_time_t time) +{ + return time / ucs_time_sec_value(); +} + +/** + * Convert UCS time units to milliseconds. + */ +static inline double ucs_time_to_msec(ucs_time_t time) +{ + return ucs_time_to_sec(time) * UCS_MSEC_PER_SEC; +} + +/** + * Convert UCS time units to microseconds. + */ +static inline double ucs_time_to_usec(ucs_time_t time) +{ + return ucs_time_to_sec(time) * UCS_USEC_PER_SEC; +} + +/** + * Convert UCS time units to nanoseconds. + */ +static inline double ucs_time_to_nsec(ucs_time_t time) +{ + return ucs_time_to_sec(time) * UCS_NSEC_PER_SEC; +} + +/** + * Convert UCS time interval (small) to nanoseconds. + */ +static inline double ucs_time_interval_to_nsec(ucs_time_t time) +{ + return ucs_time_to_sec(time * UCS_NSEC_PER_SEC); +} + +/* Convert seconds to POSIX timeval */ +static inline void ucs_sec_to_timeval(double seconds, struct timeval *tv) +{ + int64_t usec = (int64_t)( (seconds * UCS_USEC_PER_SEC) + 0.5 ); + tv->tv_sec = usec / UCS_USEC_PER_SEC; + tv->tv_usec = usec % UCS_USEC_PER_SEC; +} + +/* Convert seconds to POSIX timespec */ +static inline void ucs_sec_to_timespec(double seconds, struct timespec *ts) +{ + int64_t nsec = (int64_t)( (seconds * UCS_NSEC_PER_SEC) + 0.5 ); + ts->tv_sec = nsec / UCS_NSEC_PER_SEC; + ts->tv_nsec = nsec % UCS_NSEC_PER_SEC; +} + +END_C_DECLS + +#endif diff --git a/src/ucs/time/time_def.h b/src/ucs/time/time_def.h new file mode 100644 index 0000000..eba0c79 --- /dev/null +++ b/src/ucs/time/time_def.h @@ -0,0 +1,27 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_TIME_DEF_H +#define UCS_TIME_DEF_H + +#include + +BEGIN_C_DECLS + +/** @file time_def.h */ + +/** + * @ingroup UCS_RESOURCE + * + * UCS time units. + * These are not necessarily aligned with metric time units. + * MUST compare short time values with UCS_SHORT_TIME_CMP to handle wrap-around. + */ +typedef unsigned long ucs_time_t; + +END_C_DECLS + +#endif /* UCS_TIME_DEF_H */ diff --git a/src/ucs/time/timer_wheel.c b/src/ucs/time/timer_wheel.c new file mode 100644 index 0000000..81eac16 --- /dev/null +++ b/src/ucs/time/timer_wheel.c @@ -0,0 +1,98 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2012-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include + +#include +#include +#include +#include + + +ucs_status_t ucs_twheel_init(ucs_twheel_t *twheel, ucs_time_t resolution, + ucs_time_t current_time) +{ + unsigned i; + + twheel->res = ucs_roundup_pow2(resolution); + twheel->res_order = (unsigned) ucs_log2(twheel->res); + twheel->num_slots = 1024; + twheel->current = 0; + twheel->now = current_time; + twheel->wheel = ucs_malloc(sizeof(*twheel->wheel) * twheel->num_slots, + "twheel"); + if (twheel->wheel == NULL) { + return UCS_ERR_NO_MEMORY; + } + + for (i = 0; i < twheel->num_slots; i++) { + ucs_list_head_init(&twheel->wheel[i]); + } + + ucs_debug("high res timer created log=%d resolution=%lf usec wanted: %lf usec", + twheel->res_order, ucs_time_to_usec(twheel->res), ucs_time_to_usec(resolution)); + return UCS_OK; +} + +void ucs_twheel_cleanup(ucs_twheel_t *twheel) +{ + ucs_free(twheel->wheel); +} + +ucs_status_t ucs_wtimer_init(ucs_wtimer_t *t, ucs_twheel_callback_t cb) +{ + t->cb = cb; + t->is_active = 0; + return UCS_OK; +} + +void __ucs_wtimer_add(ucs_twheel_t *t, ucs_wtimer_t *timer, ucs_time_t delta) +{ + uint64_t slot; + + timer->is_active = 1; + slot = delta>>t->res_order; + if (ucs_unlikely(slot == 0)) { + /* nothing really wrong with adding timer to the current slot. However + * we want to guard against the case we spend to much time in hi res + * timer processing */ + ucs_fatal("Timer resolution is too low. Min resolution %lf usec, wanted %lf usec", + ucs_time_to_usec(t->res), ucs_time_to_usec(delta)); + } + ucs_assert(slot > 0); + + if (ucs_unlikely(slot >= t->num_slots)) { + slot = t->num_slots - 1; + } + + slot = (t->current + slot) % t->num_slots; + ucs_assert(slot != t->current); + + ucs_list_add_tail(&t->wheel[slot], &timer->list); +} + +void __ucs_twheel_sweep(ucs_twheel_t *t, ucs_time_t current_time) +{ + ucs_wtimer_t *timer; + uint64_t slot; + + slot = (current_time - t->now) >> t->res_order; + t->now = current_time; + + if (ucs_unlikely(slot >= t->num_slots)) { + slot = t->num_slots - 1; + } + + slot = (t->current + slot) % t->num_slots; + + for (; t->current != slot; t->current = (t->current+1) % t->num_slots) { + while (!ucs_list_is_empty(&t->wheel[t->current])) { + timer = ucs_list_extract_head(&t->wheel[t->current], ucs_wtimer_t, list); + timer->is_active = 0; + timer->cb(timer); + } + } +} diff --git a/src/ucs/time/timer_wheel.h b/src/ucs/time/timer_wheel.h new file mode 100644 index 0000000..8c46d4e --- /dev/null +++ b/src/ucs/time/timer_wheel.h @@ -0,0 +1,134 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2012-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_WHEEL_H +#define UCS_WHEEL_H + +#include +#include +#include + + +/* Forward declarations */ +typedef struct ucs_wtimer ucs_wtimer_t; +typedef struct ucs_timer_wheel ucs_twheel_t; + + +/** + * Timer wheel callback + */ +typedef void (*ucs_twheel_callback_t)(ucs_wtimer_t *self); + + +/** + * UCS high resolution timer. + */ +struct ucs_wtimer { + ucs_twheel_callback_t cb; /* User callback */ + ucs_list_link_t list; /* Link in the list of timers */ + int is_active; +}; + + +struct ucs_timer_wheel { + ucs_time_t res; + ucs_time_t now; /* when wheel was last updated */ + uint64_t current; + ucs_list_link_t *wheel; + unsigned res_order; + unsigned num_slots; +}; + + +/** + * Initialize the timer queue. + * + * @param twheel Timer queue to initialize. + * @param resolution Timer resolution. Timer wheel range is from now to now + UCS_TWHEEL_NSLOTS * res + * @param current_time Current time to initialize the timer with. + */ +ucs_status_t ucs_twheel_init(ucs_twheel_t *twheel, ucs_time_t resolution, + ucs_time_t current_time); + + +/** + * Cleanup the timer queue. + * + * @param twheel Timer queue to clean up. + */ +void ucs_twheel_cleanup(ucs_twheel_t *twheel); + + +/** + * Initialize wheel timer + * + * @param cb Callback to call + */ +ucs_status_t ucs_wtimer_init(ucs_wtimer_t *t, ucs_twheel_callback_t cb); + + +/** + * Go through the timers in the timer queue, dispatch expired timers. + * + * @param twheel Timer wheel to dispatch timers on. + * @param current_time Current time to dispatch the timers for. + * + * @note Timers which expired between calls to this function will also be dispatched. + * @note There is no guarantee on the order of dispatching. + */ +void __ucs_twheel_sweep(ucs_twheel_t *t, ucs_time_t current_time); +static inline void ucs_twheel_sweep(ucs_twheel_t *t, ucs_time_t current_time) +{ + if (ucs_unlikely(current_time - t->now >= t->res)) { + __ucs_twheel_sweep(t, current_time); + } +} + +/** + * Get current time + */ +static inline ucs_time_t ucs_twheel_get_time(ucs_twheel_t *t) +{ + return t->now; +} + +/** + * Add a one shot timer. + * + * @param twheel Timer queue to schedule on. + * @param timer Timer callback to invoke every time. + * @param delta Invocation time + * + * NOTE: adding timer already in queue will do nothing + */ +void __ucs_wtimer_add(ucs_twheel_t *t, ucs_wtimer_t *timer, ucs_time_t delta); +static inline ucs_status_t ucs_wtimer_add(ucs_twheel_t *t, ucs_wtimer_t *timer, + ucs_time_t delta) +{ + if (ucs_likely(timer->is_active)) { + /* most of the times we try to schedule already active timer */ + return UCS_ERR_BUSY; + } + + __ucs_wtimer_add(t, timer, delta); + return UCS_OK; +} + + +/** + * Remove a timer. + * + * @param timer timer to remove. + */ +static inline void ucs_wtimer_remove(ucs_wtimer_t *timer) +{ + if (ucs_likely(timer->is_active)) { + ucs_list_del(&timer->list); + timer->is_active = 0; + } +} + +#endif diff --git a/src/ucs/time/timerq.c b/src/ucs/time/timerq.c new file mode 100644 index 0000000..b75e3d8 --- /dev/null +++ b/src/ucs/time/timerq.c @@ -0,0 +1,121 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "timerq.h" + +#include +#include +#include +#include + + +ucs_status_t ucs_timerq_init(ucs_timer_queue_t *timerq) +{ + ucs_trace_func("timerq=%p", timerq); + + ucs_spinlock_init(&timerq->lock); + timerq->timers = NULL; + timerq->num_timers = 0; + /* coverity[missing_lock] */ + timerq->min_interval = UCS_TIME_INFINITY; + return UCS_OK; +} + +void ucs_timerq_cleanup(ucs_timer_queue_t *timerq) +{ + ucs_status_t status; + + ucs_trace_func("timerq=%p", timerq); + + if (timerq->num_timers > 0) { + ucs_warn("timer queue with %d timers being destroyed", timerq->num_timers); + } + ucs_free(timerq->timers); + + status = ucs_spinlock_destroy(&timerq->lock); + if (status != UCS_OK) { + ucs_warn("ucs_spinlock_destroy() failed (%d)", status); + } +} + +ucs_status_t ucs_timerq_add(ucs_timer_queue_t *timerq, int timer_id, + ucs_time_t interval) +{ + ucs_status_t status; + ucs_timer_t *ptr; + + ucs_trace_func("timerq=%p interval=%.2fus timer_id=%d", timerq, + ucs_time_to_usec(interval), timer_id); + + ucs_spin_lock(&timerq->lock); + + /* Make sure ID is unique */ + for (ptr = timerq->timers; ptr < timerq->timers + timerq->num_timers; ++ptr) { + if (ptr->id == timer_id) { + status = UCS_ERR_ALREADY_EXISTS; + goto out_unlock; + } + } + + /* Resize timer array */ + ptr = ucs_realloc(timerq->timers, (timerq->num_timers + 1) * sizeof(ucs_timer_t), + "timerq"); + if (ptr == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out_unlock; + } + timerq->timers = ptr; + ++timerq->num_timers; + timerq->min_interval = ucs_min(interval, timerq->min_interval); + ucs_assert(timerq->min_interval != UCS_TIME_INFINITY); + + /* Initialize the new timer */ + ptr = &timerq->timers[timerq->num_timers - 1]; + ptr->expiration = 0; /* will fire the next time sweep is called */ + ptr->interval = interval; + ptr->id = timer_id; + + status = UCS_OK; + +out_unlock: + ucs_spin_unlock(&timerq->lock); + return status; +} + +ucs_status_t ucs_timerq_remove(ucs_timer_queue_t *timerq, int timer_id) +{ + ucs_status_t status; + ucs_timer_t *ptr; + + ucs_trace_func("timerq=%p timer_id=%d", timerq, timer_id); + + status = UCS_ERR_NO_ELEM; + + ucs_spin_lock(&timerq->lock); + timerq->min_interval = UCS_TIME_INFINITY; + ptr = timerq->timers; + while (ptr < timerq->timers + timerq->num_timers) { + if (ptr->id == timer_id) { + *ptr = timerq->timers[--timerq->num_timers]; + status = UCS_OK; + } else { + timerq->min_interval = ucs_min(timerq->min_interval, ptr->interval); + ++ptr; + } + } + + /* TODO realloc - shrink */ + if (timerq->num_timers == 0) { + ucs_assert(timerq->min_interval == UCS_TIME_INFINITY); + free(timerq->timers); + timerq->timers = NULL; + } else { + ucs_assert(timerq->min_interval != UCS_TIME_INFINITY); + } + + ucs_spin_unlock(&timerq->lock); + return status; +} diff --git a/src/ucs/time/timerq.h b/src/ucs/time/timerq.h new file mode 100644 index 0000000..4547ab7 --- /dev/null +++ b/src/ucs/time/timerq.h @@ -0,0 +1,119 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_TIMERQ_H +#define UCS_TIMERQ_H + +#include +#include +#include +#include +#include + + +typedef struct ucs_timer { + ucs_time_t expiration;/* Absolute timer expiration time */ + ucs_time_t interval; /* Re-scheduling interval */ + int id; +} ucs_timer_t; + + +typedef struct ucs_timer_queue { + ucs_spinlock_t lock; + ucs_time_t min_interval; /* Expiration of next timer */ + ucs_timer_t *timers; /* Array of timers */ + unsigned num_timers; /* Number of timers */ +} ucs_timer_queue_t; + + +/** + * Initialize the timer queue. + * + * @param timerq Timer queue to initialize. + */ +ucs_status_t ucs_timerq_init(ucs_timer_queue_t *timerq); + + +/** + * Cleanup the timer queue. + * + * @param timerq Timer queue to clean up. + */ +void ucs_timerq_cleanup(ucs_timer_queue_t *timerq); + + +/** + * Add a periodic timer. + * + * @param timerq Timer queue to schedule on. + * @param timer_id Timer ID to add. + * @param interval Timer interval. + */ +ucs_status_t ucs_timerq_add(ucs_timer_queue_t *timerq, int timer_id, + ucs_time_t interval); + + +/** + * Remove a timer. + * + * @param timerq Time queue this timer was scheduled on. + * @param timer_id Timer ID to remove. + */ +ucs_status_t ucs_timerq_remove(ucs_timer_queue_t *timerq, int timer_id); + + +/** + * @return Minimal timer interval. + */ +static inline ucs_time_t ucs_timerq_min_interval(ucs_timer_queue_t *timerq) { + return timerq->min_interval; +} + + +/** + * @return Number of timers in the queue. + */ +static inline int ucs_timerq_size(ucs_timer_queue_t *timerq) { + return timerq->num_timers; +} + + +/** + * @return Whether there are no timers. + */ +static inline int ucs_timerq_is_empty(ucs_timer_queue_t *timerq) { + return ucs_timerq_size(timerq) == 0; +} + + +/** + * Go through the expired timers in the timer queue. + * + * @param _timer Variable to be assigned with a pointer to the timer. + * @param _timerq Timer queue to dispatch timers on. + * @param _current_time Current time to dispatch the timers for. + * + * @note Timers which expired between calls to this function will also be dispatched. + * @note There is no guarantee on the order of dispatching. + */ +#define ucs_timerq_for_each_expired(_timer, _timerq, _current_time, _code) \ + { \ + ucs_time_t __current_time = _current_time; \ + ucs_spin_lock(&(_timerq)->lock); /* Grab lock */ \ + for (_timer = (_timerq)->timers; \ + _timer != (_timerq)->timers + (_timerq)->num_timers; \ + ++_timer) \ + { \ + if (__current_time >= (_timer)->expiration) { \ + /* Update expiration time */ \ + (_timer)->expiration = __current_time + (_timer)->interval; \ + _code; \ + } \ + } \ + ucs_spin_unlock(&(_timerq)->lock); /* Release lock */ \ + } + +#endif diff --git a/src/ucs/type/class.c b/src/ucs/type/class.c new file mode 100644 index 0000000..9566e73 --- /dev/null +++ b/src/ucs/type/class.c @@ -0,0 +1,70 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "class.h" + +#include +#include +#include + + +UCS_CLASS_INIT_FUNC(void) +{ + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(void) +{ +} + +ucs_class_t _UCS_CLASS_DECL_NAME(void) = { + UCS_PP_QUOTE(void), + 0, + NULL, + (ucs_class_init_func_t)_UCS_CLASS_INIT_NAME(void), + (ucs_class_cleanup_func_t)_UCS_CLASS_CLEANUP_NAME(void) +}; + +void ucs_class_call_cleanup_chain(ucs_class_t *cls, void *obj, int limit) +{ + ucs_class_t *c; + int depth, skip; + + ucs_assert(((limit == -1) || (limit >= 1)) && (cls != NULL)); + + /* Count how many classes are there */ + for (depth = 0, c = cls; c != NULL; ++depth, c = c->superclass); + + /* Skip some destructors, because we may have a limit here */ + skip = (limit < 0) ? 0 : ucs_max(depth - limit, 0); + c = cls; + + /* check for NULL pointer to suppress clang warning */ + while ((skip-- > 0) && (c != NULL)) { + c = c->superclass; + } + + /* Call remaining destructors */ + while (c != NULL) { + c->cleanup(obj); + c = c->superclass; + } +} + +void *ucs_class_malloc(ucs_class_t *cls) +{ + return ucs_malloc(cls->size, cls->name); +} + +void ucs_class_free(void *obj) +{ + ucs_free(obj); +} + +void ucs_class_check_new_func_result(ucs_status_t status, void *obj) +{ + ucs_assert((status == UCS_OK) || (obj == NULL)); +} diff --git a/src/ucs/type/class.h b/src/ucs/type/class.h new file mode 100644 index 0000000..d97e038 --- /dev/null +++ b/src/ucs/type/class.h @@ -0,0 +1,319 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_OBJECT_H_ +#define UCS_OBJECT_H_ + +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file class.h */ + +typedef struct ucs_class ucs_class_t; + + +typedef ucs_status_t (*ucs_class_init_func_t) (void *self, ...); +typedef void (*ucs_class_cleanup_func_t)(void *self); + +struct ucs_class { + const char *name; + size_t size; + ucs_class_t *superclass; + ucs_class_init_func_t init; + ucs_class_cleanup_func_t cleanup; +}; + + +/* + * Helper: Define names of class-related identifiers. + */ +#define _UCS_CLASS_DECL_NAME(_type) \ + UCS_PP_TOKENPASTE(_type, _class) +#define _UCS_CLASS_INIT_NAME(_type) \ + UCS_PP_TOKENPASTE(_type, _init) +#define _UCS_CLASS_CLEANUP_NAME(_type) \ + UCS_PP_TOKENPASTE(_type, _cleanup) + +/** + * Class initialization/cleanup function prototypes. + */ +#define UCS_CLASS_INIT_FUNC(_type, ...) \ + ucs_status_t _UCS_CLASS_INIT_NAME(_type)(_type *self, ucs_class_t *_myclass, \ + int *_init_count, ## __VA_ARGS__) +#define UCS_CLASS_CLEANUP_FUNC(_type) \ + void _UCS_CLASS_CLEANUP_NAME(_type)(_type *self) + + +/** + * Declare a class. + * + * @param _type Class type. + */ +#define UCS_CLASS_DECLARE(_type, ...) \ + extern ucs_class_t _UCS_CLASS_DECL_NAME(_type); \ + UCS_CLASS_INIT_FUNC(_type, ## __VA_ARGS__); + +#define UCS_CLASS_NAME(_type) \ + _UCS_CLASS_DECL_NAME(_type) + +/** + * Define a class. + * + * @param _type Class type. + * @param _super Superclass type (may be void to indicate top-level class) + */ +#define UCS_CLASS_DEFINE(_type, _super) \ + extern ucs_class_t _UCS_CLASS_DECL_NAME(_super); \ + ucs_class_t _UCS_CLASS_DECL_NAME(_type) = { \ + UCS_PP_QUOTE(_type), \ + sizeof(_type), \ + &_UCS_CLASS_DECL_NAME(_super), \ + (ucs_class_init_func_t)(_UCS_CLASS_INIT_NAME(_type)), \ + (ucs_class_cleanup_func_t)(_UCS_CLASS_CLEANUP_NAME(_type)) \ + }; + + +/** + * Initialize a class in-place. + * + * @param _type Class type. + * @param _obj Instance pointer to initialize. + * @param ... Additional arguments to the constructor. + * + * @return UCS_OK, or error code if failed. + */ +#define UCS_CLASS_INIT(_type, _obj, ...) \ + ({ \ + extern ucs_class_t _UCS_CLASS_DECL_NAME(_type); \ + ucs_class_t *cls = &_UCS_CLASS_DECL_NAME(_type); \ + int init_count = 1; \ + ucs_status_t status; \ + \ + status = _UCS_CLASS_INIT_NAME(_type)((_type*)(_obj), cls, &init_count, \ + ## __VA_ARGS__); \ + if (status != UCS_OK) { \ + ucs_class_call_cleanup_chain(&_UCS_CLASS_DECL_NAME(_type), \ + (_obj), init_count); \ + } \ + \ + (status); \ + }) + + +/** + * Cleanup a class in-place. + * + * @param _type Class type. + * @param _obj Instance pointer to cleanup. + */ +#define UCS_CLASS_CLEANUP_CALL(_cls, _obj) \ + ucs_class_call_cleanup_chain(_cls, _obj, -1) + + +/** + * Cleanup a class in-place. + * + * @param _type Class type. + * @param _obj Instance pointer to cleanup. + */ +#define UCS_CLASS_CLEANUP(_type, _obj) \ + { \ + extern ucs_class_t _UCS_CLASS_DECL_NAME(_type); \ + UCS_CLASS_CLEANUP_CALL(&_UCS_CLASS_DECL_NAME(_type), _obj); \ + } + + +/** + * Instantiate a class. + * + * @param _type Class type. + * @param _obj Variable to save the new instance to. + * @param ... Additional arguments to the constructor. + * + * @return UCS_OK, or error code if failed. + */ +#define UCS_CLASS_NEW(_type, _obj, ...) \ + _UCS_CLASS_NEW (_type, _obj, ## __VA_ARGS__) +#define _UCS_CLASS_NEW(_type, _obj, ...) \ + ({ \ + extern ucs_class_t _UCS_CLASS_DECL_NAME(_type); \ + ucs_class_t *cls = &_UCS_CLASS_DECL_NAME(_type); \ + ucs_status_t status; \ + void *obj; \ + \ + obj = ucs_class_malloc(cls); \ + if (obj != NULL) { \ + status = UCS_CLASS_INIT(_type, obj, ## __VA_ARGS__); \ + if (status == UCS_OK) { \ + *(_obj) = (typeof(*(_obj)))obj; /* Success - assign pointer */ \ + } else { \ + ucs_class_free(obj); /* Initialization failure */ \ + } \ + } else { \ + status = UCS_ERR_NO_MEMORY; /* Allocation failure */ \ + } \ + \ + (status); \ + }) + + +/** + * Destroy a class instance. + * + * @param _type Class type. + * @param _obj Instance to destroy. + */ +#define UCS_CLASS_DELETE(_type, _obj) \ + { \ + UCS_CLASS_CLEANUP(_type, _obj); \ + ucs_class_free(_obj); \ + } + + +/** + * Invoke the parent constructor. + * Should be used only from init function (which defines "self" and "_myclass") + * + * @param _superclass Type of the superclass. + * @param ... Arguments to parent constructor. + */ +#define UCS_CLASS_CALL_SUPER_INIT(_superclass, ...) \ + { \ + { \ + ucs_status_t status = _UCS_CLASS_INIT_NAME(_superclass)\ + (&self->super, _myclass->superclass, _init_count, ## __VA_ARGS__); \ + if (status != UCS_OK) { \ + return status; \ + } \ + if (_myclass->superclass != &_UCS_CLASS_DECL_NAME(void)) { \ + ++(*_init_count); \ + } \ + } \ + } + + +/** + * Declare / define a function which creates an instance of a class. + * + * @param _name Function name. + * @param _type Class type. + * @param _argtype Type to use for the instance argument. Should be a superclass of _type. + * @param ... List of types for initialization arguments (without variable names). + * + * Defines a function which can be used as follows: + * { + * ucs_status_t status; + * _type *obj; + * status = _type##_new(arg1, arg2, arg3, &obj); + * } + */ +#define UCS_CLASS_DECLARE_NAMED_NEW_FUNC(_name, _argtype, ...) \ + ucs_status_t _name(UCS_PP_FOREACH(_UCS_CLASS_INIT_ARG_DEFINE, _, \ + UCS_PP_ZIP((UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))), \ + (__VA_ARGS__))) \ + _argtype **obj_p) +#define UCS_CLASS_DEFINE_NAMED_NEW_FUNC(_name, _type, _argtype, ...) \ + UCS_CLASS_DECLARE_NAMED_NEW_FUNC(_name, _argtype, ## __VA_ARGS__) { \ + ucs_status_t status; \ + \ + *obj_p = NULL; \ + \ + status = UCS_CLASS_NEW(_type, obj_p \ + UCS_PP_FOREACH(_UCS_CLASS_INIT_ARG_PASS, _, \ + UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__)))); \ + ucs_class_check_new_func_result(status, *obj_p); \ + return status; \ + } +#define UCS_CLASS_DECLARE_NEW_FUNC(_type, _argtype, ...) \ + UCS_CLASS_DECLARE_NAMED_NEW_FUNC(UCS_CLASS_NEW_FUNC_NAME(_type), _argtype, ## __VA_ARGS__) +#define UCS_CLASS_DEFINE_NEW_FUNC(_type, _argtype, ...) \ + UCS_CLASS_DEFINE_NAMED_NEW_FUNC(UCS_CLASS_NEW_FUNC_NAME(_type), _type, _argtype, ## __VA_ARGS__) + + +/* + * Helper macros for creating argument list + */ +#define _UCS_CLASS_INIT_ARG_DEFINE(_, _bundle) \ + __UCS_CLASS_INIT_ARG_DEFINE(_, UCS_PP_TUPLE_0 _bundle, UCS_PP_TUPLE_1 _bundle) +#define __UCS_CLASS_INIT_ARG_DEFINE(_, _index, _type) \ + _type _UCS_CLASS_INIT_ARG_NAME(_, _index), +#define _UCS_CLASS_INIT_ARG_PASS(_, _index) \ + , _UCS_CLASS_INIT_ARG_NAME(_, _index) +#define _UCS_CLASS_INIT_ARG_NAME(_, _index) \ + UCS_PP_TOKENPASTE(arg, _index) + + +/** + * Name of the function created by UCS_CLASS_DEFINE_NEW_FUNC. + */ +#define UCS_CLASS_NEW_FUNC_NAME(_type) \ + UCS_PP_TOKENPASTE(_type, _new) + + +/** + * Define a function which deletes class instance. + * + * @param _type Class type. + * @param _argtype Type to use for the instance argument. Should be a superclass of _type. + * + * Defines a function which can be used as follows: + * { + * _type *obj = ...; + * _type##_delete(obj); + */ +#define UCS_CLASS_DECLARE_NAMED_DELETE_FUNC(_name, _argtype) \ + void _name(_argtype *self) +#define UCS_CLASS_DEFINE_NAMED_DELETE_FUNC(_name, _type, _argtype) \ + UCS_CLASS_DECLARE_NAMED_DELETE_FUNC(_name, _argtype) \ + { \ + UCS_CLASS_DELETE(_type, self); \ + } +#define UCS_CLASS_DECLARE_DELETE_FUNC(_type, _argtype) \ + UCS_CLASS_DECLARE_NAMED_DELETE_FUNC(UCS_CLASS_DELETE_FUNC_NAME(_type), _argtype) +#define UCS_CLASS_DEFINE_DELETE_FUNC(_type, _argtype) \ + UCS_CLASS_DEFINE_NAMED_DELETE_FUNC(UCS_CLASS_DELETE_FUNC_NAME(_type), _type, _argtype) + + +/** + * Name of the function created by UCS_CLASS_DEFINE_DELETE_FUNC. + */ +#define UCS_CLASS_DELETE_FUNC_NAME(_type) \ + UCS_PP_TOKENPASTE(_type, _delete) + + +/** + * Helper: Call class destructor chain. + * + * @param cls Class type. + * @param obj Instance pointer. + * @param limit How many destructors to call (0: none, -1: all, 1: only ucs_object_t's). + */ +void ucs_class_call_cleanup_chain(ucs_class_t *cls, void *obj, int limit); + + +/* + * Helpers: + */ +/* Allocate objects */ +void *ucs_class_malloc(ucs_class_t *cls); +/* Release objects */ +void ucs_class_free(void *obj); +/* Check new function result */ +void ucs_class_check_new_func_result(ucs_status_t status, void *obj); + + +/** + * The empty class. + */ +UCS_CLASS_DECLARE(void); + +END_C_DECLS + +#endif diff --git a/src/ucs/type/cpu_set.h b/src/ucs/type/cpu_set.h new file mode 100644 index 0000000..eadb3f1 --- /dev/null +++ b/src/ucs/type/cpu_set.h @@ -0,0 +1,77 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_TYPE_CPU_SET_H +#define UCS_TYPE_CPU_SET_H + +#include +#include + +/* Type for array elements in 'ucs_cpu_set_t'. */ +typedef unsigned long int ucs_cpu_mask_t; + + +/* Size definition for CPU sets. */ +#define UCS_CPU_SETSIZE 1024 +#define UCS_NCPUBITS (8 * sizeof(ucs_cpu_mask_t)) + +#define UCS_CPUELT(_cpu) ((_cpu) / UCS_NCPUBITS) +#define UCS_CPUMASK(_cpu) ((ucs_cpu_mask_t) 1 << ((_cpu) % UCS_NCPUBITS)) + + +/* Data structure to describe CPU mask. */ +typedef struct { + ucs_cpu_mask_t ucs_bits[UCS_CPU_SETSIZE / UCS_NCPUBITS]; +} ucs_cpu_set_t; + + +#define UCS_CPU_ZERO(_cpusetp) \ + do { \ + int _i; \ + for ( _i = 0; _i < (int)(UCS_CPU_SETSIZE / UCS_NCPUBITS); ++_i) { \ + ((_cpusetp)->ucs_bits)[_i] = 0; \ + } \ + } while (0) + +#define UCS_CPU_SET(_cpu, _cpusetp) \ + do { \ + size_t _cpu2 = (_cpu); \ + if (_cpu2 < (8 * sizeof (ucs_cpu_set_t))) { \ + (((ucs_cpu_mask_t *)((_cpusetp)->ucs_bits))[UCS_CPUELT(_cpu2)] |= \ + UCS_CPUMASK(_cpu2)); \ + } \ + } while (0) + +#define UCS_CPU_CLR(_cpu, _cpusetp) \ + do { \ + size_t _cpu2 = (_cpu); \ + if (_cpu2 < (8 * sizeof(ucs_cpu_set_t))) { \ + (((ucs_cpu_mask_t *) ((_cpusetp)->ucs_bits))[UCS_CPUELT(_cpu2)] &= \ + ~UCS_CPUMASK(_cpu2)); \ + } \ + } while (0) + +static inline int ucs_cpu_is_set(int cpu, const ucs_cpu_set_t *cpusetp) +{ + if (cpu < (int)(8 * sizeof(ucs_cpu_set_t))) { + const ucs_cpu_mask_t *mask = cpusetp->ucs_bits; + return ((mask[UCS_CPUELT(cpu)] & UCS_CPUMASK(cpu)) != 0); + } + return 0; +} + +static inline int ucs_cpu_set_find_lcs(const ucs_cpu_set_t * cpu_mask) +{ + int i; + for (i = 0; i < UCS_CPU_SETSIZE; ++i) { + if (ucs_cpu_is_set(i, cpu_mask)) { + return i; + } + } + return 0; +} + +#endif diff --git a/src/ucs/type/init_once.c b/src/ucs/type/init_once.c new file mode 100644 index 0000000..f9601aa --- /dev/null +++ b/src/ucs/type/init_once.c @@ -0,0 +1,17 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#include +#include + + +unsigned ucs_init_once_mutex_unlock(pthread_mutex_t *lock) +{ + int ret = pthread_mutex_unlock(lock); + ucs_assert_always(ret == 0); + return 0; +} diff --git a/src/ucs/type/init_once.h b/src/ucs/type/init_once.h new file mode 100644 index 0000000..4b7e967 --- /dev/null +++ b/src/ucs/type/init_once.h @@ -0,0 +1,57 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCS_TYPE_INIT_ONCE_H_ +#define UCS_TYPE_INIT_ONCE_H_ + + +#include + + +/* + * Synchronization object for one-time initialization. + */ +typedef struct ucs_init_once { + pthread_mutex_t lock; /* Protects the initialization */ + int initialized; /* Whether the initialization took place */ +} ucs_init_once_t; + + +/* Static initializer for @ref ucs_init_once_t */ +#define UCS_INIT_ONCE_INITIALIZER \ + { PTHREAD_MUTEX_INITIALIZER, 0 } + + +/* Wrapper to unlock a mutex that always returns 0 to avoid endless loop + * and make static analyzers happy - they report "double unlock" warning */ +unsigned ucs_init_once_mutex_unlock(pthread_mutex_t *lock); + + +/* + * Start a code block to perform an arbitrary initialization step only once + * during the lifetime of the provided synchronization object. + * + * @param [in] _once Pointer to @ref ucs_init_once_t synchronization object. + * + * Usage: + * UCS_INIT_ONCE(&once) { + * ... code ... + * } + * + * @note It's safe to use a "continue" statement in order to exit the code block, + * but "return" and "break" statements may lead to unexpected behavior. + * + * How does it work? First, lock the mutex. Then check if already initialized, + * if yes unlock the mutex and exit the loop (pthread_mutex_unlock is expected + * to return 0). Otherwise, perform the "body" of the for loop, and then set + * "initialized" to 1. On the next condition check, unlock the mutex and exit. + */ +#define UCS_INIT_ONCE(_once) \ + for (pthread_mutex_lock(&(_once)->lock); \ + !(_once)->initialized || ucs_init_once_mutex_unlock(&(_once)->lock); \ + (_once)->initialized = 1) + +#endif diff --git a/src/ucs/type/spinlock.h b/src/ucs/type/spinlock.h new file mode 100644 index 0000000..69f7017 --- /dev/null +++ b/src/ucs/type/spinlock.h @@ -0,0 +1,113 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_SPINLOCK_H +#define UCS_SPINLOCK_H + +#include +#include +#include + +BEGIN_C_DECLS + +/** @file spinlock.h */ + +/** + * Reentrant spinlock. + */ +typedef struct ucs_spinlock { + pthread_spinlock_t lock; + int count; + pthread_t owner; +} ucs_spinlock_t; + + +#define UCS_SPINLOCK_OWNER_NULL ((pthread_t)-1) + +static inline ucs_status_t ucs_spinlock_init(ucs_spinlock_t *lock) +{ + int ret; + + ret = pthread_spin_init(&lock->lock, 0); + if (ret != 0) { + return UCS_ERR_IO_ERROR; + } + + lock->count = 0; + lock->owner = UCS_SPINLOCK_OWNER_NULL; + + return UCS_OK; +} + +static inline ucs_status_t ucs_spinlock_destroy(ucs_spinlock_t *lock) +{ + int ret; + + if (lock->count != 0) { + return UCS_ERR_BUSY; + } + + ret = pthread_spin_destroy(&lock->lock); + if (ret != 0) { + if (errno == EBUSY) { + return UCS_ERR_BUSY; + } else { + return UCS_ERR_INVALID_PARAM; + } + } + + return UCS_OK; +} + +static inline int ucs_spin_is_owner(ucs_spinlock_t *lock, pthread_t self) +{ + return lock->owner == self; +} + +static inline void ucs_spin_lock(ucs_spinlock_t *lock) +{ + pthread_t self = pthread_self(); + + if (ucs_spin_is_owner(lock, self)) { + ++lock->count; + return; + } + + pthread_spin_lock(&lock->lock); + lock->owner = self; + ++lock->count; +} + +static inline int ucs_spin_trylock(ucs_spinlock_t *lock) +{ + pthread_t self = pthread_self(); + + if (ucs_spin_is_owner(lock, self)) { + ++lock->count; + return 1; + } + + if (pthread_spin_trylock(&lock->lock) != 0) { + return 0; + } + + lock->owner = self; + ++lock->count; + return 1; +} + +static inline void ucs_spin_unlock(ucs_spinlock_t *lock) +{ + --lock->count; + if (lock->count == 0) { + lock->owner = UCS_SPINLOCK_OWNER_NULL; + pthread_spin_unlock(&lock->lock); + } +} + +END_C_DECLS + +#endif diff --git a/src/ucs/type/status.c b/src/ucs/type/status.c new file mode 100644 index 0000000..9e6cf6b --- /dev/null +++ b/src/ucs/type/status.c @@ -0,0 +1,77 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "status.h" + +#include + + +const char *ucs_status_string(ucs_status_t status) +{ + static char error_str[128] = {0}; + + switch (status) { + case UCS_OK: + return "Success"; + case UCS_INPROGRESS: + return "Operation in progress"; + case UCS_ERR_NO_MESSAGE: + return "No pending message"; + case UCS_ERR_NO_RESOURCE: + return "No resources are available to initiate the operation"; + case UCS_ERR_IO_ERROR: + return "Input/output error"; + case UCS_ERR_NO_MEMORY: + return "Out of memory"; + case UCS_ERR_INVALID_PARAM: + return "Invalid parameter"; + case UCS_ERR_UNREACHABLE: + return "Destination is unreachable"; + case UCS_ERR_INVALID_ADDR: + return "Address not valid"; + case UCS_ERR_NOT_IMPLEMENTED: + return "Function not implemented"; + case UCS_ERR_MESSAGE_TRUNCATED: + return "Message truncated"; + case UCS_ERR_NO_PROGRESS: + return "No progress"; + case UCS_ERR_BUFFER_TOO_SMALL: + return "Provided buffer is too small"; + case UCS_ERR_NO_ELEM: + return "No such element"; + case UCS_ERR_SOME_CONNECTS_FAILED: + return "Failed to connect some of the requested endpoints"; + case UCS_ERR_NO_DEVICE: + return "No such device"; + case UCS_ERR_BUSY: + return "Device is busy"; + case UCS_ERR_CANCELED: + return "Request canceled"; + case UCS_ERR_SHMEM_SEGMENT: + return "Shared memory error"; + case UCS_ERR_ALREADY_EXISTS: + return "Element already exists"; + case UCS_ERR_OUT_OF_RANGE: + return "Index out of range"; + case UCS_ERR_TIMED_OUT: + return "Operation timed out"; + case UCS_ERR_EXCEEDS_LIMIT: + return "User-defined limit was reached"; + case UCS_ERR_UNSUPPORTED: + return "Unsupported operation"; + case UCS_ERR_REJECTED: + return "Operation rejected by remote peer"; + case UCS_ERR_NOT_CONNECTED: + return "Endpoint is not connected"; + case UCS_ERR_CONNECTION_RESET: + return "Connection reset by remote peer"; + case UCS_ERR_ENDPOINT_TIMEOUT: + return "Endpoint timeout"; + default: + snprintf(error_str, sizeof(error_str) - 1, "Unknown error %d", status); + return error_str; + }; +} diff --git a/src/ucs/type/status.h b/src/ucs/type/status.h new file mode 100644 index 0000000..d6409ff --- /dev/null +++ b/src/ucs/type/status.h @@ -0,0 +1,125 @@ +/** + * @file status.h + * @date 2014-2019 + * @copyright Mellanox Technologies Ltd. All rights reserved. + * @copyright The University of Tennessee and the University of Tennessee research foundation. All rights reserved. + * @brief Unified Communication Services + */ + +#ifndef UCS_TYPES_STATUS_H_ +#define UCS_TYPES_STATUS_H_ + +#include + +BEGIN_C_DECLS + +/** @file status.h */ + +/** + * @defgroup UCS_API Unified Communication Services (UCS) API + * @{ + * This section describes UCS API. + * @} + */ + +/** +* @defgroup UCS_RESOURCE UCS Communication Resource +* @ingroup UCS_API +* @{ +* This section describes a concept of the Communication Resource and routines +* associated with the concept. +* @} +*/ + +/** + * @ingroup UCS_RESOURCE + * @brief Status codes + * + * @note In order to evaluate the necessary steps to recover from a certain + * error, all error codes which can be returned by the external API are grouped + * by the largest entity permanently effected by the error. Each group ranges + * between its UCS_ERR_FIRST_ and UCS_ERR_LAST_ enum values. + * For example, if a link fails it may be sufficient to destroy (and possibly + * replace) it, in contrast to an endpoint-level error. + */ +typedef enum { + /* Operation completed successfully */ + UCS_OK = 0, + + /* Operation is queued and still in progress */ + UCS_INPROGRESS = 1, + + /* Failure codes */ + UCS_ERR_NO_MESSAGE = -1, + UCS_ERR_NO_RESOURCE = -2, + UCS_ERR_IO_ERROR = -3, + UCS_ERR_NO_MEMORY = -4, + UCS_ERR_INVALID_PARAM = -5, + UCS_ERR_UNREACHABLE = -6, + UCS_ERR_INVALID_ADDR = -7, + UCS_ERR_NOT_IMPLEMENTED = -8, + UCS_ERR_MESSAGE_TRUNCATED = -9, + UCS_ERR_NO_PROGRESS = -10, + UCS_ERR_BUFFER_TOO_SMALL = -11, + UCS_ERR_NO_ELEM = -12, + UCS_ERR_SOME_CONNECTS_FAILED = -13, + UCS_ERR_NO_DEVICE = -14, + UCS_ERR_BUSY = -15, + UCS_ERR_CANCELED = -16, + UCS_ERR_SHMEM_SEGMENT = -17, + UCS_ERR_ALREADY_EXISTS = -18, + UCS_ERR_OUT_OF_RANGE = -19, + UCS_ERR_TIMED_OUT = -20, + UCS_ERR_EXCEEDS_LIMIT = -21, + UCS_ERR_UNSUPPORTED = -22, + UCS_ERR_REJECTED = -23, + UCS_ERR_NOT_CONNECTED = -24, + UCS_ERR_CONNECTION_RESET = -25, + + UCS_ERR_FIRST_LINK_FAILURE = -40, + UCS_ERR_LAST_LINK_FAILURE = -59, + UCS_ERR_FIRST_ENDPOINT_FAILURE = -60, + UCS_ERR_LAST_ENDPOINT_FAILURE = -79, + UCS_ERR_ENDPOINT_TIMEOUT = -80, + + UCS_ERR_LAST = -100 +} UCS_S_PACKED ucs_status_t; + + +#define UCS_IS_LINK_ERROR(_code) \ + (((_code) <= UCS_ERR_FIRST_LINK_FAILURE) && \ + ((_code) >= UCS_ERR_LAST_LINK_FAILURE) + +#define UCS_IS_ENDPOINT_ERROR(_code) \ + (((_code) <= UCS_ERR_FIRST_ENDPOINT_FAILURE) && \ + ((_code) >= UCS_ERR_LAST_ENDPOINT_FAILURE) + +/** + * @ingroup UCS_RESOURCE + * @brief Status pointer + * + * A pointer can represent one of these values: + * - NULL / UCS_OK + * - Error code pointer (UCS_ERR_xx) + * - Valid pointer + */ +typedef void *ucs_status_ptr_t; + +#define UCS_PTR_IS_ERR(_ptr) (((uintptr_t)(_ptr)) >= ((uintptr_t)UCS_ERR_LAST)) +#define UCS_PTR_IS_PTR(_ptr) (((uintptr_t)(_ptr) - 1) < ((uintptr_t)UCS_ERR_LAST - 1)) +#define UCS_PTR_RAW_STATUS(_ptr) ((ucs_status_t)(intptr_t)(_ptr)) +#define UCS_PTR_STATUS(_ptr) (UCS_PTR_IS_PTR(_ptr) ? UCS_INPROGRESS : UCS_PTR_RAW_STATUS(_ptr)) +#define UCS_STATUS_PTR(_status) ((void*)(intptr_t)(_status)) +#define UCS_STATUS_IS_ERR(_status) ((_status) < 0) + + +/** + * @param status UCS status code. + * + * @return Verbose status message. + */ +const char *ucs_status_string(ucs_status_t status); + +END_C_DECLS + +#endif diff --git a/src/ucs/type/thread_mode.h b/src/ucs/type/thread_mode.h new file mode 100644 index 0000000..ba6b527 --- /dev/null +++ b/src/ucs/type/thread_mode.h @@ -0,0 +1,27 @@ +/* +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) The University of Tennessee and The University +* of Tennessee Research Foundation. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_TYPE_THREAD_MODE_H +#define UCS_TYPE_THREAD_MODE_H + + +/** + * @ingroup UCS_RESOURCE + * @brief Thread sharing mode + * + * Specifies thread sharing mode of an object. + */ +typedef enum { + UCS_THREAD_MODE_SINGLE, /**< Only the master thread can access (i.e. the thread that initialized the context; multiple threads may exist and never access) */ + UCS_THREAD_MODE_SERIALIZED, /**< Multiple threads can access, but only one at a time */ + UCS_THREAD_MODE_MULTI, /**< Multiple threads can access concurrently */ + UCS_THREAD_MODE_LAST +} ucs_thread_mode_t; + + +#endif diff --git a/src/uct/Makefile.am b/src/uct/Makefile.am new file mode 100644 index 0000000..73f372c --- /dev/null +++ b/src/uct/Makefile.am @@ -0,0 +1,73 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. +# Copyright (c) The University of Tennesse and the University of Tennessee +# Research Foundation. 2016. ALL RIGHTS RESERVED. +# Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +SUBDIRS = . cuda ib rocm sm ugni + +lib_LTLIBRARIES = libuct.la +libuct_la_CFLAGS = $(BASE_CFLAGS) +libuct_la_CPPFLAGS = $(BASE_CPPFLAGS) +libuct_la_LIBADD = $(top_builddir)/src/ucs/libucs.la +libuct_la_LDFLAGS = -ldl -version-info $(SOVERSION) +libuct_ladir = $(includedir)/uct + +nobase_dist_libuct_la_HEADERS = \ + api/tl.h \ + api/uct_def.h \ + api/uct.h \ + api/version.h + +noinst_HEADERS = \ + base/uct_md.h \ + base/uct_component.h \ + base/uct_iface.h \ + base/uct_log.h \ + base/uct_worker.h \ + base/uct_cm.h \ + sm/base/sm_ep.h \ + sm/base/sm_iface.h \ + sm/mm/base/mm_iface.h \ + sm/mm/base/mm_ep.h \ + sm/mm/base/mm_md.h \ + sm/self/self.h \ + tcp/tcp.h \ + tcp/tcp_sockcm.h \ + tcp/tcp_listener.h \ + tcp/tcp_sockcm_ep.h \ + tcp/sockcm/sockcm_def.h \ + tcp/sockcm/sockcm_iface.h \ + tcp/sockcm/sockcm_ep.h \ + tcp/sockcm/sockcm_md.h + + +libuct_la_SOURCES = \ + base/uct_md.c \ + base/uct_mem.c \ + base/uct_component.c \ + base/uct_iface.c \ + base/uct_worker.c \ + base/uct_cm.c \ + sm/base/sm_ep.c \ + sm/base/sm_iface.c \ + sm/mm/base/mm_iface.c \ + sm/mm/base/mm_ep.c \ + sm/mm/base/mm_md.c \ + sm/mm/posix/mm_posix.c \ + sm/mm/sysv/mm_sysv.c \ + sm/self/self.c \ + tcp/tcp_ep.c \ + tcp/tcp_iface.c \ + tcp/tcp_md.c \ + tcp/tcp_net.c \ + tcp/tcp_cm.c \ + tcp/tcp_sockcm.c \ + tcp/tcp_listener.c \ + tcp/tcp_sockcm_ep.c \ + tcp/sockcm/sockcm_iface.c \ + tcp/sockcm/sockcm_ep.c \ + tcp/sockcm/sockcm_md.c diff --git a/src/uct/Makefile.in b/src/uct/Makefile.in new file mode 100644 index 0000000..1a45ecc --- /dev/null +++ b/src/uct/Makefile.in @@ -0,0 +1,1434 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. +# Copyright (c) The University of Tennesse and the University of Tennessee +# Research Foundation. 2016. ALL RIGHTS RESERVED. +# Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(nobase_dist_libuct_la_HEADERS) \ + $(noinst_HEADERS) $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libuct_ladir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libuct_la_DEPENDENCIES = $(top_builddir)/src/ucs/libucs.la +am__dirstamp = $(am__leading_dot)dirstamp +am_libuct_la_OBJECTS = base/libuct_la-uct_md.lo \ + base/libuct_la-uct_mem.lo base/libuct_la-uct_component.lo \ + base/libuct_la-uct_iface.lo base/libuct_la-uct_worker.lo \ + base/libuct_la-uct_cm.lo sm/base/libuct_la-sm_ep.lo \ + sm/base/libuct_la-sm_iface.lo sm/mm/base/libuct_la-mm_iface.lo \ + sm/mm/base/libuct_la-mm_ep.lo sm/mm/base/libuct_la-mm_md.lo \ + sm/mm/posix/libuct_la-mm_posix.lo \ + sm/mm/sysv/libuct_la-mm_sysv.lo sm/self/libuct_la-self.lo \ + tcp/libuct_la-tcp_ep.lo tcp/libuct_la-tcp_iface.lo \ + tcp/libuct_la-tcp_md.lo tcp/libuct_la-tcp_net.lo \ + tcp/libuct_la-tcp_cm.lo tcp/libuct_la-tcp_sockcm.lo \ + tcp/libuct_la-tcp_listener.lo tcp/libuct_la-tcp_sockcm_ep.lo \ + tcp/sockcm/libuct_la-sockcm_iface.lo \ + tcp/sockcm/libuct_la-sockcm_ep.lo \ + tcp/sockcm/libuct_la-sockcm_md.lo +libuct_la_OBJECTS = $(am_libuct_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libuct_la_CFLAGS) \ + $(CFLAGS) $(libuct_la_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = base/$(DEPDIR)/libuct_la-uct_cm.Plo \ + base/$(DEPDIR)/libuct_la-uct_component.Plo \ + base/$(DEPDIR)/libuct_la-uct_iface.Plo \ + base/$(DEPDIR)/libuct_la-uct_md.Plo \ + base/$(DEPDIR)/libuct_la-uct_mem.Plo \ + base/$(DEPDIR)/libuct_la-uct_worker.Plo \ + sm/base/$(DEPDIR)/libuct_la-sm_ep.Plo \ + sm/base/$(DEPDIR)/libuct_la-sm_iface.Plo \ + sm/mm/base/$(DEPDIR)/libuct_la-mm_ep.Plo \ + sm/mm/base/$(DEPDIR)/libuct_la-mm_iface.Plo \ + sm/mm/base/$(DEPDIR)/libuct_la-mm_md.Plo \ + sm/mm/posix/$(DEPDIR)/libuct_la-mm_posix.Plo \ + sm/mm/sysv/$(DEPDIR)/libuct_la-mm_sysv.Plo \ + sm/self/$(DEPDIR)/libuct_la-self.Plo \ + tcp/$(DEPDIR)/libuct_la-tcp_cm.Plo \ + tcp/$(DEPDIR)/libuct_la-tcp_ep.Plo \ + tcp/$(DEPDIR)/libuct_la-tcp_iface.Plo \ + tcp/$(DEPDIR)/libuct_la-tcp_listener.Plo \ + tcp/$(DEPDIR)/libuct_la-tcp_md.Plo \ + tcp/$(DEPDIR)/libuct_la-tcp_net.Plo \ + tcp/$(DEPDIR)/libuct_la-tcp_sockcm.Plo \ + tcp/$(DEPDIR)/libuct_la-tcp_sockcm_ep.Plo \ + tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_ep.Plo \ + tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_iface.Plo \ + tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_md.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_la_SOURCES) +DIST_SOURCES = $(libuct_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(nobase_dist_libuct_la_HEADERS) $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +SUBDIRS = . cuda ib rocm sm ugni +lib_LTLIBRARIES = libuct.la +libuct_la_CFLAGS = $(BASE_CFLAGS) +libuct_la_CPPFLAGS = $(BASE_CPPFLAGS) +libuct_la_LIBADD = $(top_builddir)/src/ucs/libucs.la +libuct_la_LDFLAGS = -ldl -version-info $(SOVERSION) +libuct_ladir = $(includedir)/uct +nobase_dist_libuct_la_HEADERS = \ + api/tl.h \ + api/uct_def.h \ + api/uct.h \ + api/version.h + +noinst_HEADERS = \ + base/uct_md.h \ + base/uct_component.h \ + base/uct_iface.h \ + base/uct_log.h \ + base/uct_worker.h \ + base/uct_cm.h \ + sm/base/sm_ep.h \ + sm/base/sm_iface.h \ + sm/mm/base/mm_iface.h \ + sm/mm/base/mm_ep.h \ + sm/mm/base/mm_md.h \ + sm/self/self.h \ + tcp/tcp.h \ + tcp/tcp_sockcm.h \ + tcp/tcp_listener.h \ + tcp/tcp_sockcm_ep.h \ + tcp/sockcm/sockcm_def.h \ + tcp/sockcm/sockcm_iface.h \ + tcp/sockcm/sockcm_ep.h \ + tcp/sockcm/sockcm_md.h + +libuct_la_SOURCES = \ + base/uct_md.c \ + base/uct_mem.c \ + base/uct_component.c \ + base/uct_iface.c \ + base/uct_worker.c \ + base/uct_cm.c \ + sm/base/sm_ep.c \ + sm/base/sm_iface.c \ + sm/mm/base/mm_iface.c \ + sm/mm/base/mm_ep.c \ + sm/mm/base/mm_md.c \ + sm/mm/posix/mm_posix.c \ + sm/mm/sysv/mm_sysv.c \ + sm/self/self.c \ + tcp/tcp_ep.c \ + tcp/tcp_iface.c \ + tcp/tcp_md.c \ + tcp/tcp_net.c \ + tcp/tcp_cm.c \ + tcp/tcp_sockcm.c \ + tcp/tcp_listener.c \ + tcp/tcp_sockcm_ep.c \ + tcp/sockcm/sockcm_iface.c \ + tcp/sockcm/sockcm_ep.c \ + tcp/sockcm/sockcm_md.c + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +base/$(am__dirstamp): + @$(MKDIR_P) base + @: > base/$(am__dirstamp) +base/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) base/$(DEPDIR) + @: > base/$(DEPDIR)/$(am__dirstamp) +base/libuct_la-uct_md.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_la-uct_mem.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_la-uct_component.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_la-uct_iface.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_la-uct_worker.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_la-uct_cm.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +sm/base/$(am__dirstamp): + @$(MKDIR_P) sm/base + @: > sm/base/$(am__dirstamp) +sm/base/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sm/base/$(DEPDIR) + @: > sm/base/$(DEPDIR)/$(am__dirstamp) +sm/base/libuct_la-sm_ep.lo: sm/base/$(am__dirstamp) \ + sm/base/$(DEPDIR)/$(am__dirstamp) +sm/base/libuct_la-sm_iface.lo: sm/base/$(am__dirstamp) \ + sm/base/$(DEPDIR)/$(am__dirstamp) +sm/mm/base/$(am__dirstamp): + @$(MKDIR_P) sm/mm/base + @: > sm/mm/base/$(am__dirstamp) +sm/mm/base/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sm/mm/base/$(DEPDIR) + @: > sm/mm/base/$(DEPDIR)/$(am__dirstamp) +sm/mm/base/libuct_la-mm_iface.lo: sm/mm/base/$(am__dirstamp) \ + sm/mm/base/$(DEPDIR)/$(am__dirstamp) +sm/mm/base/libuct_la-mm_ep.lo: sm/mm/base/$(am__dirstamp) \ + sm/mm/base/$(DEPDIR)/$(am__dirstamp) +sm/mm/base/libuct_la-mm_md.lo: sm/mm/base/$(am__dirstamp) \ + sm/mm/base/$(DEPDIR)/$(am__dirstamp) +sm/mm/posix/$(am__dirstamp): + @$(MKDIR_P) sm/mm/posix + @: > sm/mm/posix/$(am__dirstamp) +sm/mm/posix/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sm/mm/posix/$(DEPDIR) + @: > sm/mm/posix/$(DEPDIR)/$(am__dirstamp) +sm/mm/posix/libuct_la-mm_posix.lo: sm/mm/posix/$(am__dirstamp) \ + sm/mm/posix/$(DEPDIR)/$(am__dirstamp) +sm/mm/sysv/$(am__dirstamp): + @$(MKDIR_P) sm/mm/sysv + @: > sm/mm/sysv/$(am__dirstamp) +sm/mm/sysv/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sm/mm/sysv/$(DEPDIR) + @: > sm/mm/sysv/$(DEPDIR)/$(am__dirstamp) +sm/mm/sysv/libuct_la-mm_sysv.lo: sm/mm/sysv/$(am__dirstamp) \ + sm/mm/sysv/$(DEPDIR)/$(am__dirstamp) +sm/self/$(am__dirstamp): + @$(MKDIR_P) sm/self + @: > sm/self/$(am__dirstamp) +sm/self/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) sm/self/$(DEPDIR) + @: > sm/self/$(DEPDIR)/$(am__dirstamp) +sm/self/libuct_la-self.lo: sm/self/$(am__dirstamp) \ + sm/self/$(DEPDIR)/$(am__dirstamp) +tcp/$(am__dirstamp): + @$(MKDIR_P) tcp + @: > tcp/$(am__dirstamp) +tcp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) tcp/$(DEPDIR) + @: > tcp/$(DEPDIR)/$(am__dirstamp) +tcp/libuct_la-tcp_ep.lo: tcp/$(am__dirstamp) \ + tcp/$(DEPDIR)/$(am__dirstamp) +tcp/libuct_la-tcp_iface.lo: tcp/$(am__dirstamp) \ + tcp/$(DEPDIR)/$(am__dirstamp) +tcp/libuct_la-tcp_md.lo: tcp/$(am__dirstamp) \ + tcp/$(DEPDIR)/$(am__dirstamp) +tcp/libuct_la-tcp_net.lo: tcp/$(am__dirstamp) \ + tcp/$(DEPDIR)/$(am__dirstamp) +tcp/libuct_la-tcp_cm.lo: tcp/$(am__dirstamp) \ + tcp/$(DEPDIR)/$(am__dirstamp) +tcp/libuct_la-tcp_sockcm.lo: tcp/$(am__dirstamp) \ + tcp/$(DEPDIR)/$(am__dirstamp) +tcp/libuct_la-tcp_listener.lo: tcp/$(am__dirstamp) \ + tcp/$(DEPDIR)/$(am__dirstamp) +tcp/libuct_la-tcp_sockcm_ep.lo: tcp/$(am__dirstamp) \ + tcp/$(DEPDIR)/$(am__dirstamp) +tcp/sockcm/$(am__dirstamp): + @$(MKDIR_P) tcp/sockcm + @: > tcp/sockcm/$(am__dirstamp) +tcp/sockcm/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) tcp/sockcm/$(DEPDIR) + @: > tcp/sockcm/$(DEPDIR)/$(am__dirstamp) +tcp/sockcm/libuct_la-sockcm_iface.lo: tcp/sockcm/$(am__dirstamp) \ + tcp/sockcm/$(DEPDIR)/$(am__dirstamp) +tcp/sockcm/libuct_la-sockcm_ep.lo: tcp/sockcm/$(am__dirstamp) \ + tcp/sockcm/$(DEPDIR)/$(am__dirstamp) +tcp/sockcm/libuct_la-sockcm_md.lo: tcp/sockcm/$(am__dirstamp) \ + tcp/sockcm/$(DEPDIR)/$(am__dirstamp) + +libuct.la: $(libuct_la_OBJECTS) $(libuct_la_DEPENDENCIES) $(EXTRA_libuct_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_la_LINK) -rpath $(libdir) $(libuct_la_OBJECTS) $(libuct_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f base/*.$(OBJEXT) + -rm -f base/*.lo + -rm -f sm/base/*.$(OBJEXT) + -rm -f sm/base/*.lo + -rm -f sm/mm/base/*.$(OBJEXT) + -rm -f sm/mm/base/*.lo + -rm -f sm/mm/posix/*.$(OBJEXT) + -rm -f sm/mm/posix/*.lo + -rm -f sm/mm/sysv/*.$(OBJEXT) + -rm -f sm/mm/sysv/*.lo + -rm -f sm/self/*.$(OBJEXT) + -rm -f sm/self/*.lo + -rm -f tcp/*.$(OBJEXT) + -rm -f tcp/*.lo + -rm -f tcp/sockcm/*.$(OBJEXT) + -rm -f tcp/sockcm/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_la-uct_cm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_la-uct_component.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_la-uct_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_la-uct_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_la-uct_mem.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_la-uct_worker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sm/base/$(DEPDIR)/libuct_la-sm_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sm/base/$(DEPDIR)/libuct_la-sm_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sm/mm/base/$(DEPDIR)/libuct_la-mm_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sm/mm/base/$(DEPDIR)/libuct_la-mm_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sm/mm/base/$(DEPDIR)/libuct_la-mm_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sm/mm/posix/$(DEPDIR)/libuct_la-mm_posix.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sm/mm/sysv/$(DEPDIR)/libuct_la-mm_sysv.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@sm/self/$(DEPDIR)/libuct_la-self.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/$(DEPDIR)/libuct_la-tcp_cm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/$(DEPDIR)/libuct_la-tcp_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/$(DEPDIR)/libuct_la-tcp_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/$(DEPDIR)/libuct_la-tcp_listener.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/$(DEPDIR)/libuct_la-tcp_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/$(DEPDIR)/libuct_la-tcp_net.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/$(DEPDIR)/libuct_la-tcp_sockcm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/$(DEPDIR)/libuct_la-tcp_sockcm_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_md.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +base/libuct_la-uct_md.lo: base/uct_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT base/libuct_la-uct_md.lo -MD -MP -MF base/$(DEPDIR)/libuct_la-uct_md.Tpo -c -o base/libuct_la-uct_md.lo `test -f 'base/uct_md.c' || echo '$(srcdir)/'`base/uct_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_la-uct_md.Tpo base/$(DEPDIR)/libuct_la-uct_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/uct_md.c' object='base/libuct_la-uct_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o base/libuct_la-uct_md.lo `test -f 'base/uct_md.c' || echo '$(srcdir)/'`base/uct_md.c + +base/libuct_la-uct_mem.lo: base/uct_mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT base/libuct_la-uct_mem.lo -MD -MP -MF base/$(DEPDIR)/libuct_la-uct_mem.Tpo -c -o base/libuct_la-uct_mem.lo `test -f 'base/uct_mem.c' || echo '$(srcdir)/'`base/uct_mem.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_la-uct_mem.Tpo base/$(DEPDIR)/libuct_la-uct_mem.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/uct_mem.c' object='base/libuct_la-uct_mem.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o base/libuct_la-uct_mem.lo `test -f 'base/uct_mem.c' || echo '$(srcdir)/'`base/uct_mem.c + +base/libuct_la-uct_component.lo: base/uct_component.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT base/libuct_la-uct_component.lo -MD -MP -MF base/$(DEPDIR)/libuct_la-uct_component.Tpo -c -o base/libuct_la-uct_component.lo `test -f 'base/uct_component.c' || echo '$(srcdir)/'`base/uct_component.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_la-uct_component.Tpo base/$(DEPDIR)/libuct_la-uct_component.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/uct_component.c' object='base/libuct_la-uct_component.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o base/libuct_la-uct_component.lo `test -f 'base/uct_component.c' || echo '$(srcdir)/'`base/uct_component.c + +base/libuct_la-uct_iface.lo: base/uct_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT base/libuct_la-uct_iface.lo -MD -MP -MF base/$(DEPDIR)/libuct_la-uct_iface.Tpo -c -o base/libuct_la-uct_iface.lo `test -f 'base/uct_iface.c' || echo '$(srcdir)/'`base/uct_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_la-uct_iface.Tpo base/$(DEPDIR)/libuct_la-uct_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/uct_iface.c' object='base/libuct_la-uct_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o base/libuct_la-uct_iface.lo `test -f 'base/uct_iface.c' || echo '$(srcdir)/'`base/uct_iface.c + +base/libuct_la-uct_worker.lo: base/uct_worker.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT base/libuct_la-uct_worker.lo -MD -MP -MF base/$(DEPDIR)/libuct_la-uct_worker.Tpo -c -o base/libuct_la-uct_worker.lo `test -f 'base/uct_worker.c' || echo '$(srcdir)/'`base/uct_worker.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_la-uct_worker.Tpo base/$(DEPDIR)/libuct_la-uct_worker.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/uct_worker.c' object='base/libuct_la-uct_worker.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o base/libuct_la-uct_worker.lo `test -f 'base/uct_worker.c' || echo '$(srcdir)/'`base/uct_worker.c + +base/libuct_la-uct_cm.lo: base/uct_cm.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT base/libuct_la-uct_cm.lo -MD -MP -MF base/$(DEPDIR)/libuct_la-uct_cm.Tpo -c -o base/libuct_la-uct_cm.lo `test -f 'base/uct_cm.c' || echo '$(srcdir)/'`base/uct_cm.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_la-uct_cm.Tpo base/$(DEPDIR)/libuct_la-uct_cm.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/uct_cm.c' object='base/libuct_la-uct_cm.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o base/libuct_la-uct_cm.lo `test -f 'base/uct_cm.c' || echo '$(srcdir)/'`base/uct_cm.c + +sm/base/libuct_la-sm_ep.lo: sm/base/sm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT sm/base/libuct_la-sm_ep.lo -MD -MP -MF sm/base/$(DEPDIR)/libuct_la-sm_ep.Tpo -c -o sm/base/libuct_la-sm_ep.lo `test -f 'sm/base/sm_ep.c' || echo '$(srcdir)/'`sm/base/sm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sm/base/$(DEPDIR)/libuct_la-sm_ep.Tpo sm/base/$(DEPDIR)/libuct_la-sm_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sm/base/sm_ep.c' object='sm/base/libuct_la-sm_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o sm/base/libuct_la-sm_ep.lo `test -f 'sm/base/sm_ep.c' || echo '$(srcdir)/'`sm/base/sm_ep.c + +sm/base/libuct_la-sm_iface.lo: sm/base/sm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT sm/base/libuct_la-sm_iface.lo -MD -MP -MF sm/base/$(DEPDIR)/libuct_la-sm_iface.Tpo -c -o sm/base/libuct_la-sm_iface.lo `test -f 'sm/base/sm_iface.c' || echo '$(srcdir)/'`sm/base/sm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sm/base/$(DEPDIR)/libuct_la-sm_iface.Tpo sm/base/$(DEPDIR)/libuct_la-sm_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sm/base/sm_iface.c' object='sm/base/libuct_la-sm_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o sm/base/libuct_la-sm_iface.lo `test -f 'sm/base/sm_iface.c' || echo '$(srcdir)/'`sm/base/sm_iface.c + +sm/mm/base/libuct_la-mm_iface.lo: sm/mm/base/mm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT sm/mm/base/libuct_la-mm_iface.lo -MD -MP -MF sm/mm/base/$(DEPDIR)/libuct_la-mm_iface.Tpo -c -o sm/mm/base/libuct_la-mm_iface.lo `test -f 'sm/mm/base/mm_iface.c' || echo '$(srcdir)/'`sm/mm/base/mm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sm/mm/base/$(DEPDIR)/libuct_la-mm_iface.Tpo sm/mm/base/$(DEPDIR)/libuct_la-mm_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sm/mm/base/mm_iface.c' object='sm/mm/base/libuct_la-mm_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o sm/mm/base/libuct_la-mm_iface.lo `test -f 'sm/mm/base/mm_iface.c' || echo '$(srcdir)/'`sm/mm/base/mm_iface.c + +sm/mm/base/libuct_la-mm_ep.lo: sm/mm/base/mm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT sm/mm/base/libuct_la-mm_ep.lo -MD -MP -MF sm/mm/base/$(DEPDIR)/libuct_la-mm_ep.Tpo -c -o sm/mm/base/libuct_la-mm_ep.lo `test -f 'sm/mm/base/mm_ep.c' || echo '$(srcdir)/'`sm/mm/base/mm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sm/mm/base/$(DEPDIR)/libuct_la-mm_ep.Tpo sm/mm/base/$(DEPDIR)/libuct_la-mm_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sm/mm/base/mm_ep.c' object='sm/mm/base/libuct_la-mm_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o sm/mm/base/libuct_la-mm_ep.lo `test -f 'sm/mm/base/mm_ep.c' || echo '$(srcdir)/'`sm/mm/base/mm_ep.c + +sm/mm/base/libuct_la-mm_md.lo: sm/mm/base/mm_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT sm/mm/base/libuct_la-mm_md.lo -MD -MP -MF sm/mm/base/$(DEPDIR)/libuct_la-mm_md.Tpo -c -o sm/mm/base/libuct_la-mm_md.lo `test -f 'sm/mm/base/mm_md.c' || echo '$(srcdir)/'`sm/mm/base/mm_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sm/mm/base/$(DEPDIR)/libuct_la-mm_md.Tpo sm/mm/base/$(DEPDIR)/libuct_la-mm_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sm/mm/base/mm_md.c' object='sm/mm/base/libuct_la-mm_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o sm/mm/base/libuct_la-mm_md.lo `test -f 'sm/mm/base/mm_md.c' || echo '$(srcdir)/'`sm/mm/base/mm_md.c + +sm/mm/posix/libuct_la-mm_posix.lo: sm/mm/posix/mm_posix.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT sm/mm/posix/libuct_la-mm_posix.lo -MD -MP -MF sm/mm/posix/$(DEPDIR)/libuct_la-mm_posix.Tpo -c -o sm/mm/posix/libuct_la-mm_posix.lo `test -f 'sm/mm/posix/mm_posix.c' || echo '$(srcdir)/'`sm/mm/posix/mm_posix.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sm/mm/posix/$(DEPDIR)/libuct_la-mm_posix.Tpo sm/mm/posix/$(DEPDIR)/libuct_la-mm_posix.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sm/mm/posix/mm_posix.c' object='sm/mm/posix/libuct_la-mm_posix.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o sm/mm/posix/libuct_la-mm_posix.lo `test -f 'sm/mm/posix/mm_posix.c' || echo '$(srcdir)/'`sm/mm/posix/mm_posix.c + +sm/mm/sysv/libuct_la-mm_sysv.lo: sm/mm/sysv/mm_sysv.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT sm/mm/sysv/libuct_la-mm_sysv.lo -MD -MP -MF sm/mm/sysv/$(DEPDIR)/libuct_la-mm_sysv.Tpo -c -o sm/mm/sysv/libuct_la-mm_sysv.lo `test -f 'sm/mm/sysv/mm_sysv.c' || echo '$(srcdir)/'`sm/mm/sysv/mm_sysv.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sm/mm/sysv/$(DEPDIR)/libuct_la-mm_sysv.Tpo sm/mm/sysv/$(DEPDIR)/libuct_la-mm_sysv.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sm/mm/sysv/mm_sysv.c' object='sm/mm/sysv/libuct_la-mm_sysv.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o sm/mm/sysv/libuct_la-mm_sysv.lo `test -f 'sm/mm/sysv/mm_sysv.c' || echo '$(srcdir)/'`sm/mm/sysv/mm_sysv.c + +sm/self/libuct_la-self.lo: sm/self/self.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT sm/self/libuct_la-self.lo -MD -MP -MF sm/self/$(DEPDIR)/libuct_la-self.Tpo -c -o sm/self/libuct_la-self.lo `test -f 'sm/self/self.c' || echo '$(srcdir)/'`sm/self/self.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) sm/self/$(DEPDIR)/libuct_la-self.Tpo sm/self/$(DEPDIR)/libuct_la-self.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sm/self/self.c' object='sm/self/libuct_la-self.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o sm/self/libuct_la-self.lo `test -f 'sm/self/self.c' || echo '$(srcdir)/'`sm/self/self.c + +tcp/libuct_la-tcp_ep.lo: tcp/tcp_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/libuct_la-tcp_ep.lo -MD -MP -MF tcp/$(DEPDIR)/libuct_la-tcp_ep.Tpo -c -o tcp/libuct_la-tcp_ep.lo `test -f 'tcp/tcp_ep.c' || echo '$(srcdir)/'`tcp/tcp_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/$(DEPDIR)/libuct_la-tcp_ep.Tpo tcp/$(DEPDIR)/libuct_la-tcp_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/tcp_ep.c' object='tcp/libuct_la-tcp_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/libuct_la-tcp_ep.lo `test -f 'tcp/tcp_ep.c' || echo '$(srcdir)/'`tcp/tcp_ep.c + +tcp/libuct_la-tcp_iface.lo: tcp/tcp_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/libuct_la-tcp_iface.lo -MD -MP -MF tcp/$(DEPDIR)/libuct_la-tcp_iface.Tpo -c -o tcp/libuct_la-tcp_iface.lo `test -f 'tcp/tcp_iface.c' || echo '$(srcdir)/'`tcp/tcp_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/$(DEPDIR)/libuct_la-tcp_iface.Tpo tcp/$(DEPDIR)/libuct_la-tcp_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/tcp_iface.c' object='tcp/libuct_la-tcp_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/libuct_la-tcp_iface.lo `test -f 'tcp/tcp_iface.c' || echo '$(srcdir)/'`tcp/tcp_iface.c + +tcp/libuct_la-tcp_md.lo: tcp/tcp_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/libuct_la-tcp_md.lo -MD -MP -MF tcp/$(DEPDIR)/libuct_la-tcp_md.Tpo -c -o tcp/libuct_la-tcp_md.lo `test -f 'tcp/tcp_md.c' || echo '$(srcdir)/'`tcp/tcp_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/$(DEPDIR)/libuct_la-tcp_md.Tpo tcp/$(DEPDIR)/libuct_la-tcp_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/tcp_md.c' object='tcp/libuct_la-tcp_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/libuct_la-tcp_md.lo `test -f 'tcp/tcp_md.c' || echo '$(srcdir)/'`tcp/tcp_md.c + +tcp/libuct_la-tcp_net.lo: tcp/tcp_net.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/libuct_la-tcp_net.lo -MD -MP -MF tcp/$(DEPDIR)/libuct_la-tcp_net.Tpo -c -o tcp/libuct_la-tcp_net.lo `test -f 'tcp/tcp_net.c' || echo '$(srcdir)/'`tcp/tcp_net.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/$(DEPDIR)/libuct_la-tcp_net.Tpo tcp/$(DEPDIR)/libuct_la-tcp_net.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/tcp_net.c' object='tcp/libuct_la-tcp_net.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/libuct_la-tcp_net.lo `test -f 'tcp/tcp_net.c' || echo '$(srcdir)/'`tcp/tcp_net.c + +tcp/libuct_la-tcp_cm.lo: tcp/tcp_cm.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/libuct_la-tcp_cm.lo -MD -MP -MF tcp/$(DEPDIR)/libuct_la-tcp_cm.Tpo -c -o tcp/libuct_la-tcp_cm.lo `test -f 'tcp/tcp_cm.c' || echo '$(srcdir)/'`tcp/tcp_cm.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/$(DEPDIR)/libuct_la-tcp_cm.Tpo tcp/$(DEPDIR)/libuct_la-tcp_cm.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/tcp_cm.c' object='tcp/libuct_la-tcp_cm.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/libuct_la-tcp_cm.lo `test -f 'tcp/tcp_cm.c' || echo '$(srcdir)/'`tcp/tcp_cm.c + +tcp/libuct_la-tcp_sockcm.lo: tcp/tcp_sockcm.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/libuct_la-tcp_sockcm.lo -MD -MP -MF tcp/$(DEPDIR)/libuct_la-tcp_sockcm.Tpo -c -o tcp/libuct_la-tcp_sockcm.lo `test -f 'tcp/tcp_sockcm.c' || echo '$(srcdir)/'`tcp/tcp_sockcm.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/$(DEPDIR)/libuct_la-tcp_sockcm.Tpo tcp/$(DEPDIR)/libuct_la-tcp_sockcm.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/tcp_sockcm.c' object='tcp/libuct_la-tcp_sockcm.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/libuct_la-tcp_sockcm.lo `test -f 'tcp/tcp_sockcm.c' || echo '$(srcdir)/'`tcp/tcp_sockcm.c + +tcp/libuct_la-tcp_listener.lo: tcp/tcp_listener.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/libuct_la-tcp_listener.lo -MD -MP -MF tcp/$(DEPDIR)/libuct_la-tcp_listener.Tpo -c -o tcp/libuct_la-tcp_listener.lo `test -f 'tcp/tcp_listener.c' || echo '$(srcdir)/'`tcp/tcp_listener.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/$(DEPDIR)/libuct_la-tcp_listener.Tpo tcp/$(DEPDIR)/libuct_la-tcp_listener.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/tcp_listener.c' object='tcp/libuct_la-tcp_listener.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/libuct_la-tcp_listener.lo `test -f 'tcp/tcp_listener.c' || echo '$(srcdir)/'`tcp/tcp_listener.c + +tcp/libuct_la-tcp_sockcm_ep.lo: tcp/tcp_sockcm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/libuct_la-tcp_sockcm_ep.lo -MD -MP -MF tcp/$(DEPDIR)/libuct_la-tcp_sockcm_ep.Tpo -c -o tcp/libuct_la-tcp_sockcm_ep.lo `test -f 'tcp/tcp_sockcm_ep.c' || echo '$(srcdir)/'`tcp/tcp_sockcm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/$(DEPDIR)/libuct_la-tcp_sockcm_ep.Tpo tcp/$(DEPDIR)/libuct_la-tcp_sockcm_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/tcp_sockcm_ep.c' object='tcp/libuct_la-tcp_sockcm_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/libuct_la-tcp_sockcm_ep.lo `test -f 'tcp/tcp_sockcm_ep.c' || echo '$(srcdir)/'`tcp/tcp_sockcm_ep.c + +tcp/sockcm/libuct_la-sockcm_iface.lo: tcp/sockcm/sockcm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/sockcm/libuct_la-sockcm_iface.lo -MD -MP -MF tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_iface.Tpo -c -o tcp/sockcm/libuct_la-sockcm_iface.lo `test -f 'tcp/sockcm/sockcm_iface.c' || echo '$(srcdir)/'`tcp/sockcm/sockcm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_iface.Tpo tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/sockcm/sockcm_iface.c' object='tcp/sockcm/libuct_la-sockcm_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/sockcm/libuct_la-sockcm_iface.lo `test -f 'tcp/sockcm/sockcm_iface.c' || echo '$(srcdir)/'`tcp/sockcm/sockcm_iface.c + +tcp/sockcm/libuct_la-sockcm_ep.lo: tcp/sockcm/sockcm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/sockcm/libuct_la-sockcm_ep.lo -MD -MP -MF tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_ep.Tpo -c -o tcp/sockcm/libuct_la-sockcm_ep.lo `test -f 'tcp/sockcm/sockcm_ep.c' || echo '$(srcdir)/'`tcp/sockcm/sockcm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_ep.Tpo tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/sockcm/sockcm_ep.c' object='tcp/sockcm/libuct_la-sockcm_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/sockcm/libuct_la-sockcm_ep.lo `test -f 'tcp/sockcm/sockcm_ep.c' || echo '$(srcdir)/'`tcp/sockcm/sockcm_ep.c + +tcp/sockcm/libuct_la-sockcm_md.lo: tcp/sockcm/sockcm_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -MT tcp/sockcm/libuct_la-sockcm_md.lo -MD -MP -MF tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_md.Tpo -c -o tcp/sockcm/libuct_la-sockcm_md.lo `test -f 'tcp/sockcm/sockcm_md.c' || echo '$(srcdir)/'`tcp/sockcm/sockcm_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_md.Tpo tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='tcp/sockcm/sockcm_md.c' object='tcp/sockcm/libuct_la-sockcm_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_la_CPPFLAGS) $(CPPFLAGS) $(libuct_la_CFLAGS) $(CFLAGS) -c -o tcp/sockcm/libuct_la-sockcm_md.lo `test -f 'tcp/sockcm/sockcm_md.c' || echo '$(srcdir)/'`tcp/sockcm/sockcm_md.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf base/.libs base/_libs + -rm -rf sm/base/.libs sm/base/_libs + -rm -rf sm/mm/base/.libs sm/mm/base/_libs + -rm -rf sm/mm/posix/.libs sm/mm/posix/_libs + -rm -rf sm/mm/sysv/.libs sm/mm/sysv/_libs + -rm -rf sm/self/.libs sm/self/_libs + -rm -rf tcp/.libs tcp/_libs + -rm -rf tcp/sockcm/.libs tcp/sockcm/_libs +install-nobase_dist_libuct_laHEADERS: $(nobase_dist_libuct_la_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nobase_dist_libuct_la_HEADERS)'; test -n "$(libuct_ladir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(libuct_ladir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libuct_ladir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libuct_ladir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(libuct_ladir)/$$dir"; }; \ + echo " $(INSTALL_HEADER) $$xfiles '$(DESTDIR)$(libuct_ladir)/$$dir'"; \ + $(INSTALL_HEADER) $$xfiles "$(DESTDIR)$(libuct_ladir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_dist_libuct_laHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nobase_dist_libuct_la_HEADERS)'; test -n "$(libuct_ladir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(libuct_ladir)'; $(am__uninstall_files_from_dir) + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libuct_ladir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f base/$(DEPDIR)/$(am__dirstamp) + -rm -f base/$(am__dirstamp) + -rm -f sm/base/$(DEPDIR)/$(am__dirstamp) + -rm -f sm/base/$(am__dirstamp) + -rm -f sm/mm/base/$(DEPDIR)/$(am__dirstamp) + -rm -f sm/mm/base/$(am__dirstamp) + -rm -f sm/mm/posix/$(DEPDIR)/$(am__dirstamp) + -rm -f sm/mm/posix/$(am__dirstamp) + -rm -f sm/mm/sysv/$(DEPDIR)/$(am__dirstamp) + -rm -f sm/mm/sysv/$(am__dirstamp) + -rm -f sm/self/$(DEPDIR)/$(am__dirstamp) + -rm -f sm/self/$(am__dirstamp) + -rm -f tcp/$(DEPDIR)/$(am__dirstamp) + -rm -f tcp/$(am__dirstamp) + -rm -f tcp/sockcm/$(DEPDIR)/$(am__dirstamp) + -rm -f tcp/sockcm/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f base/$(DEPDIR)/libuct_la-uct_cm.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_component.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_iface.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_md.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_mem.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_worker.Plo + -rm -f sm/base/$(DEPDIR)/libuct_la-sm_ep.Plo + -rm -f sm/base/$(DEPDIR)/libuct_la-sm_iface.Plo + -rm -f sm/mm/base/$(DEPDIR)/libuct_la-mm_ep.Plo + -rm -f sm/mm/base/$(DEPDIR)/libuct_la-mm_iface.Plo + -rm -f sm/mm/base/$(DEPDIR)/libuct_la-mm_md.Plo + -rm -f sm/mm/posix/$(DEPDIR)/libuct_la-mm_posix.Plo + -rm -f sm/mm/sysv/$(DEPDIR)/libuct_la-mm_sysv.Plo + -rm -f sm/self/$(DEPDIR)/libuct_la-self.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_cm.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_ep.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_iface.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_listener.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_md.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_net.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_sockcm.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_sockcm_ep.Plo + -rm -f tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_ep.Plo + -rm -f tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_iface.Plo + -rm -f tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_md.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-nobase_dist_libuct_laHEADERS + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f base/$(DEPDIR)/libuct_la-uct_cm.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_component.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_iface.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_md.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_mem.Plo + -rm -f base/$(DEPDIR)/libuct_la-uct_worker.Plo + -rm -f sm/base/$(DEPDIR)/libuct_la-sm_ep.Plo + -rm -f sm/base/$(DEPDIR)/libuct_la-sm_iface.Plo + -rm -f sm/mm/base/$(DEPDIR)/libuct_la-mm_ep.Plo + -rm -f sm/mm/base/$(DEPDIR)/libuct_la-mm_iface.Plo + -rm -f sm/mm/base/$(DEPDIR)/libuct_la-mm_md.Plo + -rm -f sm/mm/posix/$(DEPDIR)/libuct_la-mm_posix.Plo + -rm -f sm/mm/sysv/$(DEPDIR)/libuct_la-mm_sysv.Plo + -rm -f sm/self/$(DEPDIR)/libuct_la-self.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_cm.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_ep.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_iface.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_listener.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_md.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_net.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_sockcm.Plo + -rm -f tcp/$(DEPDIR)/libuct_la-tcp_sockcm_ep.Plo + -rm -f tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_ep.Plo + -rm -f tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_iface.Plo + -rm -f tcp/sockcm/$(DEPDIR)/libuct_la-sockcm_md.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES \ + uninstall-nobase_dist_libuct_laHEADERS + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-generic \ + clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man \ + install-nobase_dist_libuct_laHEADERS install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs installdirs-am \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-libLTLIBRARIES \ + uninstall-nobase_dist_libuct_laHEADERS + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/api/tl.h b/src/uct/api/tl.h new file mode 100644 index 0000000..c258187 --- /dev/null +++ b/src/uct/api/tl.h @@ -0,0 +1,416 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_TL_H_ +#define UCT_TL_H_ + +#if !defined(UCT_H_) +# error "You should not include this header directly. Include uct.h instead." +#endif + +#include "uct_def.h" + +#include +#include +#include +#include +#include + +BEGIN_C_DECLS + +/** @file tl.h */ + +/* endpoint - put */ + +typedef ucs_status_t (*uct_ep_put_short_func_t)(uct_ep_h ep, + const void *buffer, + unsigned length, + uint64_t remote_addr, + uct_rkey_t rkey); + +typedef ssize_t (*uct_ep_put_bcopy_func_t)(uct_ep_h ep, + uct_pack_callback_t pack_cb, + void *arg, + uint64_t remote_addr, + uct_rkey_t rkey); + +typedef ucs_status_t (*uct_ep_put_zcopy_func_t)(uct_ep_h ep, + const uct_iov_t *iov, + size_t iovcnt, + uint64_t remote_addr, + uct_rkey_t rkey, + uct_completion_t *comp); + +/* endpoint - get */ + +typedef ucs_status_t (*uct_ep_get_short_func_t)(uct_ep_h ep, + void *buffer, + unsigned length, + uint64_t remote_addr, + uct_rkey_t rkey); + +typedef ucs_status_t (*uct_ep_get_bcopy_func_t)(uct_ep_h ep, + uct_unpack_callback_t unpack_cb, + void *arg, + size_t length, + uint64_t remote_addr, + uct_rkey_t rkey, + uct_completion_t *comp); + +typedef ucs_status_t (*uct_ep_get_zcopy_func_t)(uct_ep_h ep, + const uct_iov_t *iov, + size_t iovcnt, + uint64_t remote_addr, + uct_rkey_t rkey, + uct_completion_t *comp); + +/* endpoint - active message */ + +typedef ucs_status_t (*uct_ep_am_short_func_t)(uct_ep_h ep, + uint8_t id, + uint64_t header, + const void *payload, + unsigned length); + +typedef ssize_t (*uct_ep_am_bcopy_func_t)(uct_ep_h ep, + uint8_t id, + uct_pack_callback_t pack_cb, + void *arg, + unsigned flags); + +typedef ucs_status_t (*uct_ep_am_zcopy_func_t)(uct_ep_h ep, + uint8_t id, + const void *header, + unsigned header_length, + const uct_iov_t *iov, + size_t iovcnt, + unsigned flags, + uct_completion_t *comp); + +/* endpoint - atomics */ + +typedef ucs_status_t (*uct_ep_atomic_cswap64_func_t)(uct_ep_h ep, + uint64_t compare, + uint64_t swap, + uint64_t remote_addr, + uct_rkey_t rkey, + uint64_t *result, + uct_completion_t *comp); + +typedef ucs_status_t (*uct_ep_atomic_cswap32_func_t)(uct_ep_h ep, + uint32_t compare, + uint32_t swap, + uint64_t remote_addr, + uct_rkey_t rkey, + uint32_t *result, + uct_completion_t *comp); + +typedef ucs_status_t (*uct_ep_atomic32_post_func_t)(uct_ep_h ep, + unsigned opcode, + uint32_t value, + uint64_t remote_addr, + uct_rkey_t rkey); + +typedef ucs_status_t (*uct_ep_atomic64_post_func_t)(uct_ep_h ep, + unsigned opcode, + uint64_t value, + uint64_t remote_addr, + uct_rkey_t rkey); + +typedef ucs_status_t (*uct_ep_atomic32_fetch_func_t)(uct_ep_h ep, + unsigned opcode, + uint32_t value, + uint32_t *result, + uint64_t remote_addr, + uct_rkey_t rkey, + uct_completion_t *comp); + +typedef ucs_status_t (*uct_ep_atomic64_fetch_func_t)(uct_ep_h ep, + unsigned opcode, + uint64_t value, + uint64_t *result, + uint64_t remote_addr, + uct_rkey_t rkey, + uct_completion_t *comp); + +/* endpoint - tagged operations */ + +typedef ucs_status_t (*uct_ep_tag_eager_short_func_t)(uct_ep_h ep, + uct_tag_t tag, + const void *data, + size_t length); + +typedef ssize_t (*uct_ep_tag_eager_bcopy_func_t)(uct_ep_h ep, + uct_tag_t tag, + uint64_t imm, + uct_pack_callback_t pack_cb, + void *arg, + unsigned flags); + +typedef ucs_status_t (*uct_ep_tag_eager_zcopy_func_t)(uct_ep_h ep, + uct_tag_t tag, + uint64_t imm, + const uct_iov_t *iov, + size_t iovcnt, + unsigned flags, + uct_completion_t *comp); + +typedef ucs_status_ptr_t (*uct_ep_tag_rndv_zcopy_func_t)(uct_ep_h ep, + uct_tag_t tag, + const void *header, + unsigned header_length, + const uct_iov_t *iov, + size_t iovcnt, + unsigned flags, + uct_completion_t *comp); + +typedef ucs_status_t (*uct_ep_tag_rndv_cancel_func_t)(uct_ep_h ep, void *op); + +typedef ucs_status_t (*uct_ep_tag_rndv_request_func_t)(uct_ep_h ep, + uct_tag_t tag, + const void* header, + unsigned header_length, + unsigned flags); + +/* interface - tagged operations */ + +typedef ucs_status_t (*uct_iface_tag_recv_zcopy_func_t)(uct_iface_h iface, + uct_tag_t tag, + uct_tag_t tag_mask, + const uct_iov_t *iov, + size_t iovcnt, + uct_tag_context_t *ctx); + +typedef ucs_status_t (*uct_iface_tag_recv_cancel_func_t)(uct_iface_h iface, + uct_tag_context_t *ctx, + int force); + +/* endpoint - pending queue */ + +typedef ucs_status_t (*uct_ep_pending_add_func_t)(uct_ep_h ep, + uct_pending_req_t *n, + unsigned flags); + +typedef void (*uct_ep_pending_purge_func_t)(uct_ep_h ep, + uct_pending_purge_callback_t cb, + void *arg); + +/* endpoint - synchronization */ + +typedef ucs_status_t (*uct_ep_flush_func_t)(uct_ep_h ep, + unsigned flags, + uct_completion_t *comp); + +typedef ucs_status_t (*uct_ep_fence_func_t)(uct_ep_h ep, unsigned flags); + +typedef ucs_status_t (*uct_ep_check_func_t)(uct_ep_h ep, + unsigned flags, + uct_completion_t *comp); + +/* endpoint - connection establishment */ + +typedef ucs_status_t (*uct_ep_create_func_t)(const uct_ep_params_t *params, + uct_ep_h *ep_p); + +typedef ucs_status_t (*uct_ep_disconnect_func_t)(uct_ep_h ep, unsigned flags); + +typedef void (*uct_ep_destroy_func_t)(uct_ep_h ep); + +typedef ucs_status_t (*uct_ep_get_address_func_t)(uct_ep_h ep, + uct_ep_addr_t *addr); + +typedef ucs_status_t (*uct_ep_connect_to_ep_func_t)(uct_ep_h ep, + const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr); + +typedef ucs_status_t (*uct_iface_accept_func_t)(uct_iface_h iface, + uct_conn_request_h conn_request); + +typedef ucs_status_t (*uct_iface_reject_func_t)(uct_iface_h iface, + uct_conn_request_h conn_request); + +/* interface - synchronization */ + +typedef ucs_status_t (*uct_iface_flush_func_t)(uct_iface_h iface, + unsigned flags, + uct_completion_t *comp); + +typedef ucs_status_t (*uct_iface_fence_func_t)(uct_iface_h iface, unsigned flags); + +/* interface - progress control */ + +typedef void (*uct_iface_progress_enable_func_t)(uct_iface_h iface, + unsigned flags); + +typedef void (*uct_iface_progress_disable_func_t)(uct_iface_h iface, + unsigned flags); + +typedef unsigned (*uct_iface_progress_func_t)(uct_iface_h iface); + +/* interface - events */ + +typedef ucs_status_t (*uct_iface_event_fd_get_func_t)(uct_iface_h iface, + int *fd_p); + +typedef ucs_status_t (*uct_iface_event_arm_func_t)(uct_iface_h iface, + unsigned events); + +/* interface - management */ + +typedef void (*uct_iface_close_func_t)(uct_iface_h iface); + +typedef ucs_status_t (*uct_iface_query_func_t)(uct_iface_h iface, + uct_iface_attr_t *iface_attr); + +/* interface - connection establishment */ + +typedef ucs_status_t (*uct_iface_get_device_address_func_t)(uct_iface_h iface, + uct_device_addr_t *addr); + +typedef ucs_status_t (*uct_iface_get_address_func_t)(uct_iface_h iface, + uct_iface_addr_t *addr); + +typedef int (*uct_iface_is_reachable_func_t)(const uct_iface_h iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr); + + +/** + * Transport interface operations. + * Every operation exposed in the API must appear in the table below, to allow + * creating interface/endpoint with custom operations. + */ +typedef struct uct_iface_ops { + + /* endpoint - put */ + uct_ep_put_short_func_t ep_put_short; + uct_ep_put_bcopy_func_t ep_put_bcopy; + uct_ep_put_zcopy_func_t ep_put_zcopy; + + /* endpoint - get */ + uct_ep_get_short_func_t ep_get_short; + uct_ep_get_bcopy_func_t ep_get_bcopy; + uct_ep_get_zcopy_func_t ep_get_zcopy; + + /* endpoint - active message */ + uct_ep_am_short_func_t ep_am_short; + uct_ep_am_bcopy_func_t ep_am_bcopy; + uct_ep_am_zcopy_func_t ep_am_zcopy; + + /* endpoint - atomics */ + uct_ep_atomic_cswap64_func_t ep_atomic_cswap64; + uct_ep_atomic_cswap32_func_t ep_atomic_cswap32; + uct_ep_atomic32_post_func_t ep_atomic32_post; + uct_ep_atomic64_post_func_t ep_atomic64_post; + uct_ep_atomic32_fetch_func_t ep_atomic32_fetch; + uct_ep_atomic64_fetch_func_t ep_atomic64_fetch; + + /* endpoint - tagged operations */ + uct_ep_tag_eager_short_func_t ep_tag_eager_short; + uct_ep_tag_eager_bcopy_func_t ep_tag_eager_bcopy; + uct_ep_tag_eager_zcopy_func_t ep_tag_eager_zcopy; + uct_ep_tag_rndv_zcopy_func_t ep_tag_rndv_zcopy; + uct_ep_tag_rndv_cancel_func_t ep_tag_rndv_cancel; + uct_ep_tag_rndv_request_func_t ep_tag_rndv_request; + + /* interface - tagged operations */ + uct_iface_tag_recv_zcopy_func_t iface_tag_recv_zcopy; + uct_iface_tag_recv_cancel_func_t iface_tag_recv_cancel; + + /* endpoint - pending queue */ + uct_ep_pending_add_func_t ep_pending_add; + uct_ep_pending_purge_func_t ep_pending_purge; + + /* endpoint - synchronization */ + uct_ep_flush_func_t ep_flush; + uct_ep_fence_func_t ep_fence; + uct_ep_check_func_t ep_check; + + /* endpoint - connection establishment */ + uct_ep_create_func_t ep_create; + uct_ep_disconnect_func_t ep_disconnect; + uct_ep_destroy_func_t ep_destroy; + uct_ep_get_address_func_t ep_get_address; + uct_ep_connect_to_ep_func_t ep_connect_to_ep; + uct_iface_accept_func_t iface_accept; + uct_iface_reject_func_t iface_reject; + + /* interface - synchronization */ + uct_iface_flush_func_t iface_flush; + uct_iface_fence_func_t iface_fence; + + /* interface - progress control */ + uct_iface_progress_enable_func_t iface_progress_enable; + uct_iface_progress_disable_func_t iface_progress_disable; + uct_iface_progress_func_t iface_progress; + + /* interface - events */ + uct_iface_event_fd_get_func_t iface_event_fd_get; + uct_iface_event_arm_func_t iface_event_arm; + + /* interface - management */ + uct_iface_close_func_t iface_close; + uct_iface_query_func_t iface_query; + + /* interface - connection establishment */ + uct_iface_get_device_address_func_t iface_get_device_address; + uct_iface_get_address_func_t iface_get_address; + uct_iface_is_reachable_func_t iface_is_reachable; + +} uct_iface_ops_t; + + +/** + * A progress engine and a domain for allocating communication resources. + * Different workers are progressed independently. + */ +typedef struct uct_worker { + ucs_callbackq_t progress_q; +} uct_worker_t; + + +/** + * Communication interface context + */ +typedef struct uct_iface { + uct_iface_ops_t ops; +} uct_iface_t; + + +/** + * Remote endpoint + */ +typedef struct uct_ep { + uct_iface_h iface; +} uct_ep_t; + + +/** + * Listener for incoming connections + */ +typedef struct uct_listener { + uct_cm_h cm; +} uct_listener_t; + + +typedef struct uct_recv_desc uct_recv_desc_t; +typedef void (*uct_desc_release_callback_t)(uct_recv_desc_t *self, void * desc); + + +/** + * Receive descriptor + */ +struct uct_recv_desc { + uct_desc_release_callback_t cb; +}; + + +#define uct_recv_desc(_desc) \ + ( *( ( (uct_recv_desc_t**)(_desc) ) - 1) ) + +END_C_DECLS + +#endif diff --git a/src/uct/api/uct.h b/src/uct/api/uct.h new file mode 100644 index 0000000..60f1b58 --- /dev/null +++ b/src/uct/api/uct.h @@ -0,0 +1,3149 @@ +/** + * @file uct.h + * @date 2014-2019 + * @copyright Mellanox Technologies Ltd. All rights reserved. + * @copyright Oak Ridge National Laboratory. All rights received. + * @copyright Advanced Micro Devices, Inc. All rights received. + * @brief Unified Communication Transport + */ + +#ifndef UCT_H_ +#define UCT_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +BEGIN_C_DECLS + +/** @file uct.h */ + +/** + * @defgroup UCT_API Unified Communication Transport (UCT) API + * @{ + * This section describes UCT API. + * @} + */ + +/** +* @defgroup UCT_RESOURCE UCT Communication Resource +* @ingroup UCT_API +* @{ +* This section describes a concept of the Communication Resource and routines +* associated with the concept. +* @} +*/ + +/** + * @defgroup UCT_CONTEXT UCT Communication Context + * @ingroup UCT_API + * @{ + * + * UCT context abstracts all the resources required for network communication. + * It is designed to enable either share or isolate resources for multiple + * programming models used by an application. + * + * This section provides a detailed description of this concept and + * routines associated with it. + * + * @} + */ + +/** + * @defgroup UCT_MD UCT Memory Domain + * @ingroup UCT_API + * @{ + * The Memory Domain abstracts resources required for network communication, + * which typically includes memory, transport mechanisms, compute and + * network resources. It is an isolation mechanism that can be employed + * by the applications for isolating resources between multiple programming models. + * The attributes of the Memory Domain are defined by the structure @ref uct_md_attr(). + * The communication and memory operations are defined in the context of Memory Domain. + * + * @} + */ + +/** + * @defgroup UCT_AM UCT Active messages + * @ingroup UCT_API + * @{ + * Defines active message functions. + * @} + */ + +/** + * @defgroup UCT_RMA UCT Remote memory access operations + * @ingroup UCT_API + * @{ + * Defines remote memory access operations. + * @} + */ + +/** + * @defgroup UCT_AMO UCT Atomic operations + * @ingroup UCT_API + * @{ + * Defines atomic operations. + * @} + */ + +/** + * @defgroup UCT_TAG UCT Tag matching operations + * @ingroup UCT_API + * @{ + * Defines tag matching operations. + * @} + */ + +/** + * @defgroup UCT_CLIENT_SERVER UCT client-server operations + * @ingroup UCT_API + * @{ + * Defines client-server operations. + * The client-server API allows the connection establishment between an active + * side - a client, and its peer - the passive side - a server. + * The connection can be established through a UCT transport that supports + * listening and connecting via IP address and port (listening can also be on INADDR_ANY). + * + * The following is a general overview of the operations on the server side: + * + * Connecting: + * @ref uct_cm_open + * Open a connection manager. + * @ref uct_listener_create + * Create a listener on the CM and start listening on a given IP,port / INADDR_ANY. + * @ref uct_listener_conn_request_callback_t + * This callback is invoked by the UCT transport to handle an incoming connection + * request from a client. + * Accept or reject the client's connection request. + * @ref uct_ep_create + * Connect to the client by creating an endpoint in case of accepting its request. + * The server creates a new endpoint per every connection request that it accepts. + * @ref uct_sockaddr_priv_pack_callback_t + * This callback is invoked by the UCT transport to fill auxiliary data in + * the connection acknowledgement or reject notification back to the client. + * Send the client a connection acknowledgement or reject notification. + * Wait for an acknowledgment from the client, indicating that it is connected. + * @ref uct_ep_server_connect_cb_t + * This callback is invoked by the UCT transport to handle the connection + * acknowledgment from the client. + * + * Disconnecting: + * @ref uct_ep_disconnect + * Disconnect the server's endpoint from the client. + * Can be called when initiating a disconnect or when receiving a disconnect + * notification from the remote side. + * @ref uct_ep_disconnect_cb_t + * This callback is invoked by the UCT transport when the client side calls + * uct_ep_disconnect as well. + * @ref uct_ep_destroy + * Destroy the endpoint connected to the remote peer. + * If this function is called before the endpoint was disconnected, the + * @ref uct_ep_disconnect_cb_t will not be invoked. + * + * Destroying the server's resources: + * @ref uct_listener_destroy + * Destroy the listener object. + * @ref uct_cm_close + * Close the connection manager. + * + * The following is a general overview of the operations on the client side: + * + * Connecting: + * @ref uct_cm_open + * Open a connection manager. + * @ref uct_ep_create + * Create an endpoint for establishing a connection to the server. + * @ref uct_sockaddr_priv_pack_callback_t + * This callback is invoked by the UCT transport to fill the user's private data + * in the connection request to be sent to the server. This connection request + * should be created by the transport. + * Send the connection request to the server. + * Wait for an acknowledgment from the server, indicating that it is connected. + * @ref uct_ep_client_connect_cb_t + * This callback is invoked by the UCT transport to handle a connection response + * from the server. + * After invoking this callback, the UCT transport will finalize the client's + * connection to the server. + * + * Disconnecting: + * @ref uct_ep_disconnect + * Disconnect the client's endpoint from the server. + * Can be called when initiating a disconnect or when receiving a disconnect + * notification from the remote side. + * @ref uct_ep_disconnect_cb_t + * This callback is invoked by the UCT transport when the server side calls + * uct_ep_disconnect as well. + * @ref uct_ep_destroy + * Destroy the endpoint connected to the remote peer. + * + * Destroying the client's resources: + * @ref uct_cm_close + * Close the connection manager. + * + * @} + */ + +/** + * @ingroup UCT_RESOURCE + * @brief Memory domain resource descriptor. + * + * This structure describes a memory domain resource. + */ +typedef struct uct_md_resource_desc { + char md_name[UCT_MD_NAME_MAX]; /**< Memory domain name */ +} uct_md_resource_desc_t; + + +/** + * @ingroup UCT_RESOURCE + * @brief UCT component attributes field mask + * + * The enumeration allows specifying which fields in @ref uct_component_attr_t + * are present. It is used for backward compatibility support. + */ +enum uct_component_attr_field { + UCT_COMPONENT_ATTR_FIELD_NAME = UCS_BIT(0), /**< Component name */ + UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT = UCS_BIT(1), /**< MD resource count */ + UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES = UCS_BIT(2), /**< MD resources array */ + UCT_COMPONENT_ATTR_FIELD_FLAGS = UCS_BIT(3) /**< Capability flags */ +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief UCT component attributes + * + * This structure defines the attributes for UCT component. It is used for + * @ref uct_component_query + */ +typedef struct uct_component_attr { + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_component_attr_field. + * Fields not specified in this mask will be ignored. + * Provides ABI compatibility with respect to adding new fields. + */ + uint64_t field_mask; + + /** Component name */ + char name[UCT_COMPONENT_NAME_MAX]; + + /** Number of memory-domain resources */ + unsigned md_resource_count; + + /** + * Array of memory domain resources. When used, it should be initialized + * prior to calling @ref uct_component_query with a pointer to an array, + * which is large enough to hold all memory domain resource entries. After + * the call, this array will be filled with information about existing + * memory domain resources. + * In order to allocate this array, you can call @ref uct_component_query + * twice: The first time would only obtain the amount of entries required, + * by specifying @ref UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT in + * field_mask. Then the array could be allocated with the returned number of + * entries, and passed to a second call to @ref uct_component_query, this + * time setting field_mask to @ref UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES. + */ + uct_md_resource_desc_t *md_resources; + + /** + * Flags as defined by UCT_COMPONENT_FLAG_xx. + */ + uint64_t flags; +} uct_component_attr_t; + + +/** + * @ingroup UCT_RESOURCE + * @brief Capability flags of @ref uct_component_h. + * + * The enumeration defines bit mask of @ref uct_component_h capabilities in + * @ref uct_component_attr_t::flags which is set by @ref uct_component_query. + */ +enum { + /** + * If set, the component supports @ref uct_cm_h functionality. + * See @ref uct_cm_open for details. + */ + UCT_COMPONENT_FLAG_CM = UCS_BIT(0) +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief List of UCX device types. + */ +typedef enum { + UCT_DEVICE_TYPE_NET, /**< Network devices */ + UCT_DEVICE_TYPE_SHM, /**< Shared memory devices */ + UCT_DEVICE_TYPE_ACC, /**< Acceleration devices */ + UCT_DEVICE_TYPE_SELF, /**< Loop-back device */ + UCT_DEVICE_TYPE_LAST +} uct_device_type_t; + + +/** + * @ingroup UCT_RESOURCE + * @brief Communication resource descriptor. + * + * Resource descriptor is an object representing the network resource. + * Resource descriptor could represent a stand-alone communication resource + * such as an HCA port, network interface, or multiple resources such as + * multiple network interfaces or communication ports. It could also represent + * virtual communication resources that are defined over a single physical + * network interface. + */ +typedef struct uct_tl_resource_desc { + char tl_name[UCT_TL_NAME_MAX]; /**< Transport name */ + char dev_name[UCT_DEVICE_NAME_MAX]; /**< Hardware device name */ + uct_device_type_t dev_type; /**< Device type. To which UCT group it belongs to */ +} uct_tl_resource_desc_t; + +#define UCT_TL_RESOURCE_DESC_FMT "%s/%s" +#define UCT_TL_RESOURCE_DESC_ARG(_resource) (_resource)->tl_name, (_resource)->dev_name + + +/** + * @brief Atomic operation requested for uct_ep_atomic32_post, uct_ep_atomic64_post, + * uct_ep_atomic32_fetch and uct_ep_atomic64_fetch. + * + * This enumeration defines which atomic memory operation should be + * performed by the uct_ep_atomic family of fuctions. + */ +typedef enum uct_atomic_op { + UCT_ATOMIC_OP_ADD, /**< Atomic add */ + UCT_ATOMIC_OP_AND, /**< Atomic and */ + UCT_ATOMIC_OP_OR, /**< Atomic or */ + UCT_ATOMIC_OP_XOR, /**< Atomic xor */ + UCT_ATOMIC_OP_SWAP, /**< Atomic swap */ + UCT_ATOMIC_OP_CSWAP, /**< Atomic compare-and-swap */ + UCT_ATOMIC_OP_LAST +} uct_atomic_op_t; + + +/** + * @defgroup UCT_RESOURCE_IFACE_CAP UCT interface operations and capabilities + * @ingroup UCT_RESOURCE + * + * @brief List of capabilities supported by UCX API + * + * The definition list presents a full list of operations and capabilities + * exposed by UCX API. + * @{ + */ + /* Active message capabilities */ +#define UCT_IFACE_FLAG_AM_SHORT UCS_BIT(0) /**< Short active message */ +#define UCT_IFACE_FLAG_AM_BCOPY UCS_BIT(1) /**< Buffered active message */ +#define UCT_IFACE_FLAG_AM_ZCOPY UCS_BIT(2) /**< Zero-copy active message */ + +#define UCT_IFACE_FLAG_PENDING UCS_BIT(3) /**< Pending operations */ + + /* PUT capabilities */ +#define UCT_IFACE_FLAG_PUT_SHORT UCS_BIT(4) /**< Short put */ +#define UCT_IFACE_FLAG_PUT_BCOPY UCS_BIT(5) /**< Buffered put */ +#define UCT_IFACE_FLAG_PUT_ZCOPY UCS_BIT(6) /**< Zero-copy put */ + + /* GET capabilities */ +#define UCT_IFACE_FLAG_GET_SHORT UCS_BIT(8) /**< Short get */ +#define UCT_IFACE_FLAG_GET_BCOPY UCS_BIT(9) /**< Buffered get */ +#define UCT_IFACE_FLAG_GET_ZCOPY UCS_BIT(10) /**< Zero-copy get */ + + /* Atomic operations domain */ +#define UCT_IFACE_FLAG_ATOMIC_CPU UCS_BIT(30) /**< Atomic communications are consistent + with respect to CPU operations. */ +#define UCT_IFACE_FLAG_ATOMIC_DEVICE UCS_BIT(31) /**< Atomic communications are consistent + only with respect to other atomics + on the same device. */ + + /* Error handling capabilities */ +#define UCT_IFACE_FLAG_ERRHANDLE_SHORT_BUF UCS_BIT(32) /**< Invalid buffer for short operation */ +#define UCT_IFACE_FLAG_ERRHANDLE_BCOPY_BUF UCS_BIT(33) /**< Invalid buffer for buffered operation */ +#define UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF UCS_BIT(34) /**< Invalid buffer for zero copy operation */ +#define UCT_IFACE_FLAG_ERRHANDLE_AM_ID UCS_BIT(35) /**< Invalid AM id on remote */ +#define UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM UCS_BIT(36) /**< Remote memory access */ +#define UCT_IFACE_FLAG_ERRHANDLE_BCOPY_LEN UCS_BIT(37) /**< Invalid length for buffered operation */ +#define UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE UCS_BIT(38) /**< Remote peer failures/outage */ + +#define UCT_IFACE_FLAG_EP_CHECK UCS_BIT(39) /**< Endpoint check */ + + /* Connection establishment */ +#define UCT_IFACE_FLAG_CONNECT_TO_IFACE UCS_BIT(40) /**< Supports connecting to interface */ +#define UCT_IFACE_FLAG_CONNECT_TO_EP UCS_BIT(41) /**< Supports connecting to specific endpoint */ +#define UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR UCS_BIT(42) /**< Supports connecting to sockaddr */ + + /* Special transport flags */ +#define UCT_IFACE_FLAG_AM_DUP UCS_BIT(43) /**< Active messages may be received with duplicates + This happens if the transport does not keep enough + information to detect retransmissions */ + + /* Callback invocation */ +#define UCT_IFACE_FLAG_CB_SYNC UCS_BIT(44) /**< Interface supports setting a callback + which is invoked only from the calling context of + uct_worker_progress() */ +#define UCT_IFACE_FLAG_CB_ASYNC UCS_BIT(45) /**< Interface supports setting a callback + which will be invoked within a reasonable amount of + time if uct_worker_progress() is not being called. + The callback can be invoked from any progress context + and it may also be invoked when uct_worker_progress() + is called. */ + + /* Event notification */ +#define UCT_IFACE_FLAG_EVENT_SEND_COMP UCS_BIT(46) /**< Event notification of send completion is + supported */ +#define UCT_IFACE_FLAG_EVENT_RECV UCS_BIT(47) /**< Event notification of tag and active message + receive is supported */ +#define UCT_IFACE_FLAG_EVENT_RECV_SIG UCS_BIT(48) /**< Event notification of signaled tag and active + message is supported */ + + /* Tag matching operations */ +#define UCT_IFACE_FLAG_TAG_EAGER_SHORT UCS_BIT(50) /**< Hardware tag matching short eager support */ +#define UCT_IFACE_FLAG_TAG_EAGER_BCOPY UCS_BIT(51) /**< Hardware tag matching bcopy eager support */ +#define UCT_IFACE_FLAG_TAG_EAGER_ZCOPY UCS_BIT(52) /**< Hardware tag matching zcopy eager support */ +#define UCT_IFACE_FLAG_TAG_RNDV_ZCOPY UCS_BIT(53) /**< Hardware tag matching rendezvous zcopy support */ +/** + * @} + */ + + +/** + * @ingroup UCT_CONTEXT + * @brief Memory allocation methods. + */ +typedef enum { + UCT_ALLOC_METHOD_THP, /**< Allocate from OS using libc allocator with + Transparent Huge Pages enabled*/ + UCT_ALLOC_METHOD_MD, /**< Allocate using memory domain */ + UCT_ALLOC_METHOD_HEAP, /**< Allocate from heap using libc allocator */ + UCT_ALLOC_METHOD_MMAP, /**< Allocate from OS using mmap() syscall */ + UCT_ALLOC_METHOD_HUGE, /**< Allocate huge pages */ + UCT_ALLOC_METHOD_LAST, + UCT_ALLOC_METHOD_DEFAULT = UCT_ALLOC_METHOD_LAST /**< Use default method */ +} uct_alloc_method_t; + + +/** + * @ingroup UCT_RESOURCE + * @brief Asynchronous event types. + * + * @note The UCT_EVENT_RECV and UCT_EVENT_RECV_SIG event types are used to + * indicate receive-side completions for both tag matching and active + * messages. If the interface supports signaled receives + * (@ref UCT_IFACE_FLAG_EVENT_RECV_SIG), then for the messages sent with + * UCT_SEND_FLAG_SIGNALED flag, UCT_EVENT_RECV_SIG should be triggered + * on the receiver. Otherwise, UCT_EVENT_RECV should be triggered. + */ +enum uct_iface_event_types { + UCT_EVENT_SEND_COMP = UCS_BIT(0), /**< Send completion event */ + UCT_EVENT_RECV = UCS_BIT(1), /**< Tag or active message received */ + UCT_EVENT_RECV_SIG = UCS_BIT(2) /**< Signaled tag or active message + received */ +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief Flush modifiers. + */ +enum uct_flush_flags { + UCT_FLUSH_FLAG_LOCAL = 0, /**< Guarantees that the data + transfer is completed but the + target buffer may not be + updated yet.*/ + UCT_FLUSH_FLAG_CANCEL = UCS_BIT(0) /**< The library will make a best + effort attempt to cancel all + uncompleted operations. + However, there is a chance that + some operations will not be + canceled in which case the user + will need to handle their + completions through + the relevant callbacks. + After @ref uct_ep_flush + with this flag is completed, + the endpoint will be set to + error state, and it becomes + unusable for send operations + and should be destroyed. */ +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief UCT progress types + */ +enum uct_progress_types { + UCT_PROGRESS_SEND = UCS_BIT(0), /**< Progress send operations */ + UCT_PROGRESS_RECV = UCS_BIT(1), /**< Progress receive operations */ + UCT_PROGRESS_THREAD_SAFE = UCS_BIT(7) /**< Enable/disable progress while + another thread may be calling + @ref ucp_worker_progress(). */ +}; + + +/** + * @ingroup UCT_AM + * @brief Flags for active message send operation. + */ +enum uct_msg_flags { + UCT_SEND_FLAG_SIGNALED = UCS_BIT(0) /**< Trigger @ref UCT_EVENT_RECV_SIG + event on remote side. Make best + effort attempt to avoid triggering + @ref UCT_EVENT_RECV event. + Ignored if not supported by interface. */ +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief Callback flags. + * + * List of flags for a callback. + */ +enum uct_cb_flags { + UCT_CB_FLAG_RESERVED = UCS_BIT(1), /**< Reserved for future use. */ + UCT_CB_FLAG_ASYNC = UCS_BIT(2) /**< Callback is allowed to be called + from any thread in the process, and + therefore should be thread-safe. For + example, it may be called from a + transport async progress thread. To + guarantee async invocation, the + interface must have the @ref + UCT_IFACE_FLAG_CB_ASYNC flag set. If + async callback is requested on an + interface which only supports sync + callback (i.e., only the @ref + UCT_IFACE_FLAG_CB_SYNC flag is set), + the callback will be invoked only + from the context that called @ref + uct_iface_progress). */ +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief Mode in which to open the interface. + */ +enum uct_iface_open_mode { + /** Interface is opened on a specific device */ + UCT_IFACE_OPEN_MODE_DEVICE = UCS_BIT(0), + + /** Interface is opened on a specific address on the server side. This mode + will be deprecated in the near future for a better API. */ + UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER = UCS_BIT(1), + + /** Interface is opened on a specific address on the client side This mode + will be deprecated in the near future for a better API. */ + UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT = UCS_BIT(2) +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief UCT interface created by @ref uct_iface_open parameters field mask. + * + * The enumeration allows specifying which fields in @ref uct_iface_params_t are + * present, for backward compatibility support. + */ +enum uct_iface_params_field { + /** Enables @ref uct_iface_params_t::cpu_mask */ + UCT_IFACE_PARAM_FIELD_CPU_MASK = UCS_BIT(0), + + /** Enables @ref uct_iface_params_t::open_mode */ + UCT_IFACE_PARAM_FIELD_OPEN_MODE = UCS_BIT(1), + + /** Enables @ref uct_iface_params_t_mode_device + * "uct_iface_params_t::mode::device" */ + UCT_IFACE_PARAM_FIELD_DEVICE = UCS_BIT(2), + + /** Enables @ref uct_iface_params_t_mode_sockaddr + * "uct_iface_params_t::mode::sockaddr" */ + UCT_IFACE_PARAM_FIELD_SOCKADDR = UCS_BIT(3), + + /** Enables @ref uct_iface_params_t::stats_root */ + UCT_IFACE_PARAM_FIELD_STATS_ROOT = UCS_BIT(4), + + /** Enables @ref uct_iface_params_t::rx_headroom */ + UCT_IFACE_PARAM_FIELD_RX_HEADROOM = UCS_BIT(5), + + /** Enables @ref uct_iface_params_t::err_handler_arg */ + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG = UCS_BIT(6), + + /** Enables @ref uct_iface_params_t::err_handler */ + UCT_IFACE_PARAM_FIELD_ERR_HANDLER = UCS_BIT(7), + + /** Enables @ref uct_iface_params_t::err_handler_flags */ + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS = UCS_BIT(8), + + /** Enables @ref uct_iface_params_t::eager_arg */ + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_ARG = UCS_BIT(9), + + /** Enables @ref uct_iface_params_t::eager_cb */ + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_CB = UCS_BIT(10), + + /** Enables @ref uct_iface_params_t::rndv_arg */ + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_ARG = UCS_BIT(11), + + /** Enables @ref uct_iface_params_t::rndv_cb */ + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_CB = UCS_BIT(12) +}; + +/** + * @ingroup UCT_MD + * @brief Socket address accessibility type. + */ +typedef enum { + UCT_SOCKADDR_ACC_LOCAL, /**< Check if local address exists. + Address should belong to a local + network interface */ + UCT_SOCKADDR_ACC_REMOTE /**< Check if remote address can be reached. + Address is routable from one of the + local network interfaces */ +} uct_sockaddr_accessibility_t; + + +/** + * @ingroup UCT_MD + * @brief Memory domain capability flags. + */ +enum { + UCT_MD_FLAG_ALLOC = UCS_BIT(0), /**< MD supports memory allocation */ + UCT_MD_FLAG_REG = UCS_BIT(1), /**< MD supports memory registration */ + UCT_MD_FLAG_NEED_MEMH = UCS_BIT(2), /**< The transport needs a valid local + memory handle for zero-copy operations */ + UCT_MD_FLAG_NEED_RKEY = UCS_BIT(3), /**< The transport needs a valid + remote memory key for remote memory + operations */ + UCT_MD_FLAG_ADVISE = UCS_BIT(4), /**< MD supports memory advice */ + UCT_MD_FLAG_FIXED = UCS_BIT(5), /**< MD supports memory allocation with + fixed address */ + UCT_MD_FLAG_RKEY_PTR = UCS_BIT(6), /**< MD supports direct access to + remote memory via a pointer that + is returned by @ref uct_rkey_ptr */ + UCT_MD_FLAG_SOCKADDR = UCS_BIT(7) /**< MD support for client-server + connection establishment via + sockaddr */ +}; + +/** + * @ingroup UCT_MD + * @brief Memory allocation/registration flags. + */ +enum uct_md_mem_flags { + UCT_MD_MEM_FLAG_NONBLOCK = UCS_BIT(0), /**< Hint to perform non-blocking + allocation/registration: page + mapping may be deferred until + it is accessed by the CPU or a + transport. */ + UCT_MD_MEM_FLAG_FIXED = UCS_BIT(1), /**< Place the mapping at exactly + defined address */ + UCT_MD_MEM_FLAG_LOCK = UCS_BIT(2), /**< Registered memory should be + locked. May incur extra cost for + registration, but memory access + is usually faster. */ + UCT_MD_MEM_FLAG_HIDE_ERRORS = UCS_BIT(3), /**< Hide errors on memory registration. + In some cases registration failure + is not an error (e. g. for merged + memory regions). */ + + /* memory access flags */ + UCT_MD_MEM_ACCESS_REMOTE_PUT = UCS_BIT(5), /**< enable remote put access */ + UCT_MD_MEM_ACCESS_REMOTE_GET = UCS_BIT(6), /**< enable remote get access */ + UCT_MD_MEM_ACCESS_REMOTE_ATOMIC = UCS_BIT(7), /**< enable remote atomic access */ + + /** enable local and remote access for all operations */ + UCT_MD_MEM_ACCESS_ALL = (UCT_MD_MEM_ACCESS_REMOTE_PUT| + UCT_MD_MEM_ACCESS_REMOTE_GET| + UCT_MD_MEM_ACCESS_REMOTE_ATOMIC), + + /** enable local and remote access for put and get operations */ + UCT_MD_MEM_ACCESS_RMA = (UCT_MD_MEM_ACCESS_REMOTE_PUT| + UCT_MD_MEM_ACCESS_REMOTE_GET) +}; + + +/** + * @ingroup UCT_MD + * @brief list of UCT memory use advice + */ +typedef enum { + UCT_MADV_NORMAL = 0, /**< No special treatment */ + UCT_MADV_WILLNEED /**< can be used on the memory mapped with + @ref UCT_MD_MEM_FLAG_NONBLOCK to speed up + memory mapping and to avoid page faults when + the memory is accessed for the first time. */ +} uct_mem_advice_t; + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief UCT connection manager attributes field mask. + * + * The enumeration allows specifying which fields in @ref uct_cm_attr_t are + * present, for backward compatibility support. + */ +enum uct_cm_attr_field { + /** Enables @ref uct_cm_attr::max_conn_priv */ + UCT_CM_ATTR_FIELD_MAX_CONN_PRIV = UCS_BIT(0) +}; + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief UCT listener attributes field mask. + * + * The enumeration allows specifying which fields in @ref uct_listener_attr_t are + * present, for backward compatibility support. + */ +enum uct_listener_attr_field { + /** Enables @ref uct_listener_attr::sockaddr */ + UCT_LISTENER_ATTR_FIELD_SOCKADDR = UCS_BIT(0) +}; + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief UCT listener created by @ref uct_listener_create parameters field mask. + * + * The enumeration allows specifying which fields in @ref uct_listener_params_t + * are present, for backward compatibility support. + */ +enum uct_listener_params_field { + /** Enables @ref uct_listener_params::backlog */ + UCT_LISTENER_PARAM_FIELD_BACKLOG = UCS_BIT(0), + + /** Enables @ref uct_listener_params::conn_request_cb */ + UCT_LISTENER_PARAM_FIELD_CONN_REQUEST_CB = UCS_BIT(1), + + /** Enables @ref uct_listener_params::user_data */ + UCT_LISTENER_PARAM_FIELD_USER_DATA = UCS_BIT(2) +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief UCT endpoint created by @ref uct_ep_create parameters field mask. + * + * The enumeration allows specifying which fields in @ref uct_ep_params_t are + * present, for backward compatibility support. + */ +enum uct_ep_params_field { + /** Enables @ref uct_ep_params::iface */ + UCT_EP_PARAM_FIELD_IFACE = UCS_BIT(0), + + /** Enables @ref uct_ep_params::user_data */ + UCT_EP_PARAM_FIELD_USER_DATA = UCS_BIT(1), + + /** Enables @ref uct_ep_params::dev_addr */ + UCT_EP_PARAM_FIELD_DEV_ADDR = UCS_BIT(2), + + /** Enables @ref uct_ep_params::iface_addr */ + UCT_EP_PARAM_FIELD_IFACE_ADDR = UCS_BIT(3), + + /** Enables @ref uct_ep_params::sockaddr */ + UCT_EP_PARAM_FIELD_SOCKADDR = UCS_BIT(4), + + /** Enables @ref uct_ep_params::sockaddr_cb_flags */ + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS = UCS_BIT(5), + + /** Enables @ref uct_ep_params::sockaddr_pack_cb */ + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB = UCS_BIT(6), + + /** Enables @ref uct_ep_params::cm */ + UCT_EP_PARAM_FIELD_CM = UCS_BIT(7), + + /** Enables @ref uct_ep_params::conn_request */ + UCT_EP_PARAM_FIELD_CONN_REQUEST = UCS_BIT(8), + + /** Enables @ref uct_ep_params::sockaddr_connect_cb */ + UCT_EP_PARAM_FIELD_SOCKADDR_CONNECT_CB = UCS_BIT(9), + + /** Enables @ref uct_ep_params::disconnect_cb */ + UCT_EP_PARAM_FIELD_SOCKADDR_DISCONNECT_CB = UCS_BIT(10) +}; + + +/* + * @ingroup UCT_RESOURCE + * @brief Linear growth specification: f(x) = overhead + growth * x + * + * This structure specifies a linear function which is used as basis for time + * estimation of various UCT operations. This information can be used to select + * the best performing combination of UCT operations. + */ +typedef struct uct_linear_growth { + double overhead; /**< Constant overhead factor */ + double growth; /**< Growth rate factor */ +} uct_linear_growth_t; + + +/* + * @ingroup UCT_RESOURCE + * @brief Process Per Node (PPN) bandwidth specification: f(ppn) = dedicated + shared / ppn + * + * This structure specifies a function which is used as basis for bandwidth + * estimation of various UCT operations. This information can be used to select + * the best performing combination of UCT operations. + */ +typedef struct uct_ppn_bandwidth { + double dedicated; /**< Dedicated bandwidth, bytes/second */ + double shared; /**< Shared bandwidth, bytes/second */ +} uct_ppn_bandwidth_t; + + +/** + * @ingroup UCT_RESOURCE + * @brief Interface attributes: capabilities and limitations. + */ +struct uct_iface_attr { + struct { + struct { + size_t max_short; /**< Maximal size for put_short */ + size_t max_bcopy; /**< Maximal size for put_bcopy */ + size_t min_zcopy; /**< Minimal size for put_zcopy (total + of @ref uct_iov_t::length of the + @a iov parameter) */ + size_t max_zcopy; /**< Maximal size for put_zcopy (total + of @ref uct_iov_t::length of the + @a iov parameter) */ + size_t opt_zcopy_align; /**< Optimal alignment for zero-copy + buffer address */ + size_t align_mtu; /**< MTU used for alignment */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref ::uct_ep_put_zcopy + @anchor uct_iface_attr_cap_put_max_iov */ + } put; /**< Attributes for PUT operations */ + + struct { + size_t max_short; /**< Maximal size for get_short */ + size_t max_bcopy; /**< Maximal size for get_bcopy */ + size_t min_zcopy; /**< Minimal size for get_zcopy (total + of @ref uct_iov_t::length of the + @a iov parameter) */ + size_t max_zcopy; /**< Maximal size for get_zcopy (total + of @ref uct_iov_t::length of the + @a iov parameter) */ + size_t opt_zcopy_align; /**< Optimal alignment for zero-copy + buffer address */ + size_t align_mtu; /**< MTU used for alignment */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref uct_ep_get_zcopy + @anchor uct_iface_attr_cap_get_max_iov */ + } get; /**< Attributes for GET operations */ + + struct { + size_t max_short; /**< Total max. size (incl. the header) */ + size_t max_bcopy; /**< Total max. size (incl. the header) */ + size_t min_zcopy; /**< Minimal size for am_zcopy (incl. the + header and total of @ref uct_iov_t::length + of the @a iov parameter) */ + size_t max_zcopy; /**< Total max. size (incl. the header + and total of @ref uct_iov_t::length + of the @a iov parameter) */ + size_t opt_zcopy_align; /**< Optimal alignment for zero-copy + buffer address */ + size_t align_mtu; /**< MTU used for alignment */ + size_t max_hdr; /**< Max. header size for zcopy */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref ::uct_ep_am_zcopy + @anchor uct_iface_attr_cap_am_max_iov */ + } am; /**< Attributes for AM operations */ + + struct { + struct { + size_t min_recv; /**< Minimal allowed length of posted receive buffer */ + size_t max_zcopy; /**< Maximal allowed data length in + @ref uct_iface_tag_recv_zcopy */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref uct_iface_tag_recv_zcopy + @anchor uct_iface_attr_cap_tag_recv_iov */ + size_t max_outstanding; /**< Maximal number of simultaneous + receive operations */ + } recv; + + struct { + size_t max_short; /**< Maximal allowed data length in + @ref uct_ep_tag_eager_short */ + size_t max_bcopy; /**< Maximal allowed data length in + @ref uct_ep_tag_eager_bcopy */ + size_t max_zcopy; /**< Maximal allowed data length in + @ref uct_ep_tag_eager_zcopy */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref uct_ep_tag_eager_zcopy */ + } eager; /**< Attributes related to eager protocol */ + + struct { + size_t max_zcopy; /**< Maximal allowed data length in + @ref uct_ep_tag_rndv_zcopy */ + size_t max_hdr; /**< Maximal allowed header length in + @ref uct_ep_tag_rndv_zcopy and + @ref uct_ep_tag_rndv_request */ + size_t max_iov; /**< Maximal @a iovcnt parameter in + @ref uct_ep_tag_rndv_zcopy */ + } rndv; /**< Attributes related to rendezvous protocol */ + } tag; /**< Attributes for TAG operations */ + + struct { + uint64_t op_flags; /**< Attributes for atomic-post operations */ + uint64_t fop_flags; /**< Attributes for atomic-fetch operations */ + } atomic32, atomic64; /**< Attributes for atomic operations */ + + uint64_t flags; /**< Flags from @ref UCT_RESOURCE_IFACE_CAP */ + } cap; /**< Interface capabilities */ + + size_t device_addr_len;/**< Size of device address */ + size_t iface_addr_len; /**< Size of interface address */ + size_t ep_addr_len; /**< Size of endpoint address */ + size_t max_conn_priv; /**< Max size of the iface's private data. + used for connection + establishment with sockaddr */ + struct sockaddr_storage listen_sockaddr; /**< Sockaddr on which this iface + is listening. */ + /* + * The following fields define expected performance of the communication + * interface, this would usually be a combination of device and system + * characteristics and determined at run time. + */ + double overhead; /**< Message overhead, seconds */ + uct_ppn_bandwidth_t bandwidth; /**< Bandwidth model */ + uct_linear_growth_t latency; /**< Latency model */ + uint8_t priority; /**< Priority of device */ + size_t max_num_eps; /**< Maximum number of endpoints */ +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief Parameters used for interface creation. + * + * This structure should be allocated by the user and should be passed to + * @ref uct_iface_open. User has to initialize all fields of this structure. + */ +struct uct_iface_params { + /** Mask of valid fields in this structure, using bits from + * @ref uct_iface_params_field. Fields not specified in this mask will be + * ignored. */ + uint64_t field_mask; + /** Mask of CPUs to use for resources */ + ucs_cpu_set_t cpu_mask; + /** Interface open mode bitmap. @ref uct_iface_open_mode */ + uint64_t open_mode; + /** Mode-specific parameters */ + union { + /** @anchor uct_iface_params_t_mode_device + * The fields in this structure (tl_name and dev_name) need to be set only when + * the @ref UCT_IFACE_OPEN_MODE_DEVICE bit is set in @ref + * uct_iface_params_t.open_mode This will make @ref uct_iface_open + * open the interface on the specified device. + */ + struct { + const char *tl_name; /**< Transport name */ + const char *dev_name; /**< Device Name */ + } device; + /** @anchor uct_iface_params_t_mode_sockaddr + * These callbacks and address are only relevant for client-server + * connection establishment with sockaddr and are needed on the server side. + * The callbacks and address need to be set when the @ref + * UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER bit is set in @ref + * uct_iface_params_t.open_mode. This will make @ref uct_iface_open + * open the interface on the specified address as a server. */ + struct { + ucs_sock_addr_t listen_sockaddr; + /** Argument for connection request callback */ + void *conn_request_arg; + /** Callback for an incoming connection request on the server */ + uct_sockaddr_conn_request_callback_t conn_request_cb; + /** Callback flags to indicate where the callback can be invoked from. + * @ref uct_cb_flags */ + uint32_t cb_flags; + } sockaddr; + } mode; + + /** Root in the statistics tree. Can be NULL. If non NULL, it will be + a root of @a uct_iface object in the statistics tree. */ + ucs_stats_node_t *stats_root; + /** How much bytes to reserve before the receive segment.*/ + size_t rx_headroom; + + /** Custom argument of @a err_handler. */ + void *err_handler_arg; + /** The callback to handle transport level error.*/ + uct_error_handler_t err_handler; + /** Callback flags to indicate where the @a err_handler callback can be + * invoked from. @ref uct_cb_flags */ + uint32_t err_handler_flags; + + /** These callbacks are only relevant for HW Tag Matching */ + void *eager_arg; + /** Callback for tag matching unexpected eager messages */ + uct_tag_unexp_eager_cb_t eager_cb; + void *rndv_arg; + /** Callback for tag matching unexpected rndv messages */ + uct_tag_unexp_rndv_cb_t rndv_cb; +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief Parameters for creating a UCT endpoint by @ref uct_ep_create + */ +struct uct_ep_params { + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_ep_params_field. Fields not specified by this mask will be + * ignored. + */ + uint64_t field_mask; + + /** + * Interface to create the endpoint on. + * Either @a iface or @a cm field must be initialized but not both. + */ + uct_iface_h iface; + + /** + * User data associated with the endpoint. + */ + void *user_data; + + /** + * The device address to connect to on the remote peer. This must be defined + * together with @ref uct_ep_params_t::iface_addr to create an endpoint + * connected to a remote interface. + */ + const uct_device_addr_t *dev_addr; + + /** + * This specifies the remote address to use when creating an endpoint that + * is connected to a remote interface. + * @note This requires @ref UCT_IFACE_FLAG_CONNECT_TO_IFACE capability. + */ + const uct_iface_addr_t *iface_addr; + + /** + * The sockaddr to connect to on the remote peer. If set, @ref uct_ep_create + * will create an endpoint for a connection to the remote peer, specified by + * its socket address. + * @note The interface in this routine requires the + * @ref UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR capability. + */ + const ucs_sock_addr_t *sockaddr; + + /** + * @ref uct_cb_flags to indicate @ref uct_ep_params_t::sockaddr_pack_cb + * behavior. If @ref uct_ep_params_t::sockaddr_pack_cb is not set, this + * field will be ignored. + */ + uint32_t sockaddr_cb_flags; + + /** + * Callback that will be used for filling the user's private data to be + * delivered to the remote peer by the callback on the server or client side. + * This field is only valid if @ref uct_ep_params_t::sockaddr is set. + * @note It is never guaranteed that the callaback will be called. If, for + * example, the endpoint goes into error state before issuing the connection + * request, the callback will not be invoked. + */ + uct_sockaddr_priv_pack_callback_t sockaddr_pack_cb; + + /** + * The connection manager object as created by @ref uct_cm_open. + * Either @a cm or @a iface field must be initialized but not both. + */ + uct_cm_h cm; + + /** + * Connection request that was passed to + * @ref uct_listener_conn_request_callback_t . + */ + uct_conn_request_h conn_request; + + union { + /** + * Callback that will be invoked when the endpoint on the client side + * is being connected to the server by a connection manager @ref uct_cm_h . + */ + uct_ep_client_connect_cb_t client; + + /** + * Callback that will be invoked when the endpoint on the server side + * is being connected to a client by a connection manager @ref uct_cm_h . + */ + uct_ep_server_connect_cb_t server; + } sockaddr_connect_cb; + + /** + * Callback that will be invoked when the endpoint is disconnected. + */ + uct_ep_disconnect_cb_t disconnect_cb; +}; + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Connection manager attributes, capabilities and limitations. + */ +struct uct_cm_attr { + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_cm_attr_field. Fields not specified by this mask + * will be ignored. + */ + uint64_t field_mask; + + /** + * Max size of the connection manager's private data used for connection + * establishment with sockaddr. + */ + size_t max_conn_priv; +}; + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief UCT listener attributes, capabilities and limitations. + */ +struct uct_listener_attr { + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_listener_attr_field. Fields not specified by this mask + * will be ignored. + */ + uint64_t field_mask; + + /** + * Sockaddr on which this listener is listening. + */ + struct sockaddr_storage sockaddr; +}; + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Parameters for creating a listener object @ref uct_listener_h by + * @ref uct_listener_create + */ +struct uct_listener_params { + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_listener_params_field. Fields not specified by this mask + * will be ignored. + */ + uint64_t field_mask; + + /** + * Backlog of incoming connection requests. + * If not specified, SOMAXCONN, as defined in , will be used. + */ + int backlog; + + /** + * Callback function for handling incoming connection requests. + */ + uct_listener_conn_request_callback_t conn_request_cb; + + /** + * User data associated with the listener. + */ + void *user_data; +}; + + +/** + * @ingroup UCT_MD + * @brief Memory domain attributes. + * + * This structure defines the attributes of a Memory Domain which includes + * maximum memory that can be allocated, credentials required for accessing the memory, + * and CPU mask indicating the proximity of CPUs. + */ +struct uct_md_attr { + struct { + size_t max_alloc; /**< Maximal allocation size */ + size_t max_reg; /**< Maximal registration size */ + uint64_t flags; /**< UCT_MD_FLAG_xx */ + uint64_t reg_mem_types; /**< Bitmap of memory types that Memory Domain can be registered with */ + uint64_t detect_mem_types; /**< Bitmap of memory types that Memory Domain can detect if address belongs to it */ + ucs_memory_type_t access_mem_type; /**< Memory type MD can access */ + } cap; + + uct_linear_growth_t reg_cost; /**< Memory registration cost estimation + (time,seconds) as a linear function + of the buffer size. */ + + char component_name[UCT_COMPONENT_NAME_MAX]; /**< Component name */ + size_t rkey_packed_size; /**< Size of buffer needed for packed rkey */ + ucs_cpu_set_t local_cpus; /**< Mask of CPUs near the resource */ +}; + + +/** + * @ingroup UCT_MD + * @brief Describes a memory allocated by UCT. + * + * This structure describes the memory block which includes the address, size, and + * Memory Domain used for allocation. This structure is passed to interface + * and the memory is allocated by memory allocation functions @ref uct_mem_alloc. + */ +typedef struct uct_allocated_memory { + void *address; /**< Address of allocated memory */ + size_t length; /**< Real size of allocated memory */ + uct_alloc_method_t method; /**< Method used to allocate the memory */ + ucs_memory_type_t mem_type; /**< type of allocated memory */ + uct_md_h md; /**< if method==MD: MD used to allocate the memory */ + uct_mem_h memh; /**< if method==MD: MD memory handle */ +} uct_allocated_memory_t; + + +/** + * @ingroup UCT_MD + * @brief Remote key with its type + * + * This structure describes the credentials (typically key) and information + * required to access the remote memory by the communication interfaces. + */ +typedef struct uct_rkey_bundle { + uct_rkey_t rkey; /**< Remote key descriptor, passed to RMA functions */ + void *handle; /**< Handle, used internally for releasing the key */ + void *type; /**< Remote key type */ +} uct_rkey_bundle_t; + + +/** + * @ingroup UCT_RESOURCE + * @brief Completion handle. + * + * This structure should be allocated by the user and can be passed to communication + * primitives. User has to initializes both fields of the structure. + * If the operation returns UCS_INPROGRESS, this structure will be in use by the + * transport until the operation completes. When the operation completes, "count" + * field is decremented by 1, and whenever it reaches 0 - the callback is called. + * + * Notes: + * - The same structure can be passed multiple times to communication functions + * without the need to wait for completion. + * - If the number of operations is smaller than the initial value of the counter, + * the callback will not be called at all, so it may be left undefined. + */ +struct uct_completion { + uct_completion_callback_t func; /**< User callback function */ + int count; /**< Completion counter */ +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief Pending request. + * + * This structure should be passed to @ref uct_ep_pending_add() and is used to signal + * new available resources back to user. + */ +struct uct_pending_req { + uct_pending_callback_t func; /**< User callback function */ + char priv[UCT_PENDING_REQ_PRIV_LEN]; /**< Used internally by UCT */ +}; + + +/** + * @ingroup UCT_TAG + * @brief Posted tag context. + * + * Tag context is an object which tracks a tag posted to the transport. It + * contains callbacks for matching events on this tag. + */ +struct uct_tag_context { + /** + * Tag is consumed by the transport and should not be matched in software. + * + * @param [in] self Pointer to relevant context structure, which was + * initially passed to @ref uct_iface_tag_recv_zcopy. + */ + void (*tag_consumed_cb)(uct_tag_context_t *self); + + /** + * Tag processing is completed by the transport. + * + * @param [in] self Pointer to relevant context structure, which was + * initially passed to @ref uct_iface_tag_recv_zcopy. + * @param [in] stag Tag from sender. + * @param [in] imm Immediate data from sender. For rendezvous, it's always 0. + * @param [in] length Completed length. + * @param [in] status Completion status: + * (a) UCS_OK - Success, data placed in provided buffer. + * (b) UCS_ERR_TRUNCATED - Sender's length exceed posted + buffer, no data is copied. + * (c) UCS_ERR_CANCELED - Canceled by user. + */ + void (*completed_cb)(uct_tag_context_t *self, uct_tag_t stag, uint64_t imm, + size_t length, ucs_status_t status); + + /** + * Tag was matched by a rendezvous request, which should be completed by + * the protocol layer. + * + * @param [in] self Pointer to relevant context structure, which was + * initially passed to @ref uct_iface_tag_recv_zcopy. + * @param [in] stag Tag from sender. + * @param [in] header User defined header. + * @param [in] header_length User defined header length in bytes. + * @param [in] status Completion status. + */ + void (*rndv_cb)(uct_tag_context_t *self, uct_tag_t stag, const void *header, + unsigned header_length, ucs_status_t status); + + /** A placeholder for the private data used by the transport */ + char priv[UCT_TAG_PRIV_LEN]; +}; + + +extern const char *uct_alloc_method_names[]; + + +/** + * @ingroup UCT_RESOURCE + * @brief Query for list of components. + * + * Obtain the list of transport components available on the current system. + * + * @param [out] components_p Filled with a pointer to an array of component + * handles. + * @param [out] num_components_p Filled with the number of elements in the array. + * + * @return UCS_OK if successful, or UCS_ERR_NO_MEMORY if failed to allocate the + * array of component handles. + */ +ucs_status_t uct_query_components(uct_component_h **components_p, + unsigned *num_components_p); + +/** + * @ingroup UCT_RESOURCE + * @brief Release the list of components returned from @ref uct_query_components. + * + * This routine releases the memory associated with the list of components + * allocated by @ref uct_query_components. + * + * @param [in] components Array of component handles to release. + */ +void uct_release_component_list(uct_component_h *components); + + +/** + * @ingroup UCT_RESOURCE + * @brief Get component attributes + * + * Query various attributes of a component. + * + * @param [in] component Component handle to query attributes for. The + * handle can be obtained from @ref uct_query_components. + * @param [inout] component_attr Filled with component attributes. + * + * @return UCS_OK if successful, or nonzero error code in case of failure. + */ +ucs_status_t uct_component_query(uct_component_h component, + uct_component_attr_t *component_attr); + + +/** + * @ingroup UCT_RESOURCE + * @brief Open a memory domain. + * + * Open a specific memory domain. All communications and memory operations + * are performed in the context of a specific memory domain. Therefore it + * must be created before communication resources. + * + * @param [in] component Component on which to open the memory domain, + * as returned from @ref uct_query_components. + * @param [in] md_name Memory domain name, as returned from @ref + * uct_component_query. + * @param [in] config MD configuration options. Should be obtained + * from uct_md_config_read() function, or point to + * MD-specific structure which extends uct_md_config_t. + * @param [out] md_p Filled with a handle to the memory domain. + * + * @return Error code. + */ +ucs_status_t uct_md_open(uct_component_h component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p); + +/** + * @ingroup UCT_RESOURCE + * @brief Close a memory domain. + * + * @param [in] md Memory domain to close. + */ +void uct_md_close(uct_md_h md); + + +/** + * @ingroup UCT_RESOURCE + * @brief Query for transport resources. + * + * This routine queries the @ref uct_md_h "memory domain" for communication + * resources that are available for it. + * + * @param [in] md Handle to memory domain. + * @param [out] resources_p Filled with a pointer to an array of resource + * descriptors. + * @param [out] num_resources_p Filled with the number of resources in the array. + * + * @return Error code. + */ +ucs_status_t uct_md_query_tl_resources(uct_md_h md, + uct_tl_resource_desc_t **resources_p, + unsigned *num_resources_p); + + +/** + * @ingroup UCT_RESOURCE + * @brief Release the list of resources returned from @ref uct_md_query_tl_resources. + * + * This routine releases the memory associated with the list of resources + * allocated by @ref uct_md_query_tl_resources. + * + * @param [in] resources Array of resource descriptors to release. + */ +void uct_release_tl_resource_list(uct_tl_resource_desc_t *resources); + + +/** + * @ingroup UCT_CONTEXT + * @brief Create a worker object. + * + * The worker represents a progress engine. Multiple progress engines can be + * created in an application, for example to be used by multiple threads. + * Transports can allocate separate communication resources for every worker, + * so that every worker can be progressed independently of others. + * + * @param [in] async Context for async event handlers. Must not be NULL. + * @param [in] thread_mode Thread access mode to the worker and all interfaces + * and endpoints associated with it. + * @param [out] worker_p Filled with a pointer to the worker object. + */ +ucs_status_t uct_worker_create(ucs_async_context_t *async, + ucs_thread_mode_t thread_mode, + uct_worker_h *worker_p); + + +/** + * @ingroup UCT_CONTEXT + * @brief Destroy a worker object. + * + * @param [in] worker Worker object to destroy. + */ +void uct_worker_destroy(uct_worker_h worker); + + +/** + * @ingroup UCT_CONTEXT + * @brief Add a slow path callback function to a worker progress. + * + * If *id_p is equal to UCS_CALLBACKQ_ID_NULL, this function will add a callback + * which will be invoked every time progress is made on the worker. *id_p will + * be updated with an id which refers to this callback and can be used in + * @ref uct_worker_progress_unregister_safe to remove it from the progress path. + * + * @param [in] worker Handle to the worker whose progress should invoke + * the callback. + * @param [in] func Pointer to the callback function. + * @param [in] arg Argument for the callback function. + * @param [in] flags Callback flags, see @ref ucs_callbackq_flags. + * @param [inout] id_p Points to a location to store a callback identifier. + * If *id_p is equal to UCS_CALLBACKQ_ID_NULL, a + * callback will be added and *id_p will be replaced + * with a callback identifier which can be subsequently + * used to remove the callback. Otherwise, no callback + * will be added and *id_p will be left unchanged. + * + * @note This function is thread safe. + */ +void uct_worker_progress_register_safe(uct_worker_h worker, ucs_callback_t func, + void *arg, unsigned flags, + uct_worker_cb_id_t *id_p); + + +/** + * @ingroup UCT_CONTEXT + * @brief Remove a slow path callback function from worker's progress. + * + * If *id_p is not equal to UCS_CALLBACKQ_ID_NULL, remove a callback which was + * previously added by @ref uct_worker_progress_register_safe. *id_p will be reset + * to UCS_CALLBACKQ_ID_NULL. + * + * @param [in] worker Handle to the worker whose progress should invoke + * the callback. + * @param [inout] id_p Points to a callback identifier which indicates + * the callback to remove. If *id_p is not equal to + * UCS_CALLBACKQ_ID_NULL, the callback will be removed + * and *id_p will be reset to UCS_CALLBACKQ_ID_NULL. + * If *id_p is equal to UCS_CALLBACKQ_ID_NULL, no + * operation will be performed and *id_p will be + * left unchanged. + * + * @note This function is thread safe. + */ +void uct_worker_progress_unregister_safe(uct_worker_h worker, + uct_worker_cb_id_t *id_p); + + +/** + * @ingroup UCT_RESOURCE + * @brief Read transport-specific interface configuration. + * + * @param [in] md Memory domain on which the transport's interface + * was registered. + * @param [in] tl_name Transport name. If @e md supports + * @ref UCT_MD_FLAG_SOCKADDR, the transport name + * is allowed to be NULL. In this case, the configuration + * returned from this routine should be passed to + * @ref uct_iface_open with + * @ref UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER or + * @ref UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT set in + * @ref uct_iface_params_t.open_mode. + * In addition, if tl_name is not NULL, the configuration + * returned from this routine should be passed to + * @ref uct_iface_open with @ref UCT_IFACE_OPEN_MODE_DEVICE + * set in @ref uct_iface_params_t.open_mode. + * @param [in] env_prefix If non-NULL, search for environment variables + * starting with this UCT__. Otherwise, search + * for environment variables starting with just UCT_. + * @param [in] filename If non-NULL, read configuration from this file. If + * the file does not exist, it will be ignored. + * @param [out] config_p Filled with a pointer to configuration. + * + * @return Error code. + */ +ucs_status_t uct_md_iface_config_read(uct_md_h md, const char *tl_name, + const char *env_prefix, const char *filename, + uct_iface_config_t **config_p); + + +/** + * @ingroup UCT_RESOURCE + * @brief Release configuration memory returned from uct_md_iface_config_read(), + * uct_md_config_read(), or from uct_cm_config_read(). + * + * @param [in] config Configuration to release. + */ +void uct_config_release(void *config); + + +/** + * @ingroup UCT_CONTEXT + * @brief Get value by name from interface configuration (@ref uct_iface_config_t), + * memory domain configuration (@ref uct_md_config_t) + * or connection manager configuration (@ref uct_cm_config_t). + * + * @param [in] config Configuration to get from. + * @param [in] name Configuration variable name. + * @param [out] value Pointer to get value. Should be allocated/freed by + * caller. + * @param [in] max Available memory space at @a value pointer. + * + * @return UCS_OK if found, otherwise UCS_ERR_INVALID_PARAM or UCS_ERR_NO_ELEM + * if error. + */ +ucs_status_t uct_config_get(void *config, const char *name, char *value, + size_t max); + + +/** + * @ingroup UCT_CONTEXT + * @brief Modify interface configuration (@ref uct_iface_config_t), + * memory domain configuration (@ref uct_md_config_t) + * or connection manager configuration (@ref uct_cm_config_t). + * + * @param [in] config Configuration to modify. + * @param [in] name Configuration variable name. + * @param [in] value Value to set. + * + * @return Error code. + */ +ucs_status_t uct_config_modify(void *config, const char *name, const char *value); + + +/** + * @ingroup UCT_RESOURCE + * @brief Open a communication interface. + * + * @param [in] md Memory domain to create the interface on. + * @param [in] worker Handle to worker which will be used to progress + * communications on this interface. + * @param [in] params User defined @ref uct_iface_params_t parameters. + * @param [in] config Interface configuration options. Should be obtained + * from uct_md_iface_config_read() function, or point to + * transport-specific structure which extends uct_iface_config_t. + * @param [out] iface_p Filled with a handle to opened communication interface. + * + * @return Error code. + */ +ucs_status_t uct_iface_open(uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *config, + uct_iface_h *iface_p); + + +/** + * @ingroup UCT_RESOURCE + * @brief Close and destroy an interface. + * + * @param [in] iface Interface to close. + */ +void uct_iface_close(uct_iface_h iface); + + +/** + * @ingroup UCT_RESOURCE + * @brief Get interface attributes. + * + * @param [in] iface Interface to query. + * @param [out] iface_attr Filled with interface attributes. + */ +ucs_status_t uct_iface_query(uct_iface_h iface, uct_iface_attr_t *iface_attr); + + +/** + * @ingroup UCT_RESOURCE + * @brief Get address of the device the interface is using. + * + * Get underlying device address of the interface. All interfaces using the same + * device would return the same address. + * + * @param [in] iface Interface to query. + * @param [out] addr Filled with device address. The size of the buffer + * provided must be at least @ref uct_iface_attr_t::device_addr_len. + */ +ucs_status_t uct_iface_get_device_address(uct_iface_h iface, uct_device_addr_t *addr); + + +/** + * @ingroup UCT_RESOURCE + * @brief Get interface address. + * + * requires @ref UCT_IFACE_FLAG_CONNECT_TO_IFACE. + * + * @param [in] iface Interface to query. + * @param [out] addr Filled with interface address. The size of the buffer + * provided must be at least @ref uct_iface_attr_t::iface_addr_len. + */ +ucs_status_t uct_iface_get_address(uct_iface_h iface, uct_iface_addr_t *addr); + + +/** + * @ingroup UCT_RESOURCE + * @brief Check if remote iface address is reachable. + * + * This function checks if a remote address can be reached from a local interface. + * If the function returns true, it does not necessarily mean a connection and/or + * data transfer would succeed, since the reachability check is a local operation + * it does not detect issues such as network mis-configuration or lack of connectivity. + * + * @param [in] iface Interface to check reachability from. + * @param [in] dev_addr Device address to check reachability to. It is NULL + * if iface_attr.dev_addr_len == 0, and must be non-NULL otherwise. + * @param [in] iface_addr Interface address to check reachability to. It is + * NULL if iface_attr.iface_addr_len == 0, and must + * be non-NULL otherwise. + * + * @return Nonzero if reachable, 0 if not. + */ +int uct_iface_is_reachable(const uct_iface_h iface, const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr); + + +/** + * @ingroup UCT_RESOURCE + * @brief check if the destination endpoint is alive in respect to UCT library + * + * This function checks if the destination endpoint is alive with respect to the + * UCT library. If the status of @a ep is known, either @ref UCS_OK or an error + * is returned immediately. Otherwise, @ref UCS_INPROGRESS is returned, + * indicating that synchronization on the status is needed. In this case, the + * status will be be propagated by @a comp callback. + * + * @param [in] ep Endpoint to check + * @param [in] flags Flags that define level of check + * (currently unsupported - set to 0). + * @param [in] comp Handler to process status of @a ep + * + * @return Error code. + */ +ucs_status_t uct_ep_check(const uct_ep_h ep, unsigned flags, + uct_completion_t *comp); + + +/** + * @ingroup UCT_RESOURCE + * @brief Obtain a notification file descriptor for polling. + * + * Only interfaces that support at least one of the UCT_IFACE_FLAG_EVENT* flags + * will implement this function. + * + * @param [in] iface Interface to get the notification descriptor. + * @param [out] fd_p Location to write the notification file descriptor. + * + * @return Error code. + */ +ucs_status_t uct_iface_event_fd_get(uct_iface_h iface, int *fd_p); + + +/** + * @ingroup UCT_RESOURCE + * @brief Turn on event notification for the next event. + * + * This routine needs to be called before waiting on each notification on this + * interface, so will typically be called once the processing of the previous + * event is over. + * + * @param [in] iface Interface to arm. + * @param [in] events Events to wakeup on. See @ref uct_iface_event_types + * + * @return ::UCS_OK The operation completed successfully. File descriptor + * will be signaled by new events. + * @return ::UCS_ERR_BUSY There are unprocessed events which prevent the + * file descriptor from being armed. + * The operation is not completed. File descriptor + * will not be signaled by new events. + * @return @ref ucs_status_t "Other" different error codes in case of issues. + */ +ucs_status_t uct_iface_event_arm(uct_iface_h iface, unsigned events); + + +/** + * @ingroup UCT_RESOURCE + * @brief Allocate memory which can be used for zero-copy communications. + * + * Allocate a region of memory which can be used for zero-copy data transfer or + * remote access on a particular transport interface. + * + * @param [in] iface Interface to allocate memory on. + * @param [in] length Size of memory region to allocate. + * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags. + * @param [in] name Allocation name, for debug purposes. + * @param [out] mem Descriptor of allocated memory. + * + * @return UCS_OK if allocation was successful, error code otherwise. + */ +ucs_status_t uct_iface_mem_alloc(uct_iface_h iface, size_t length, unsigned flags, + const char *name, uct_allocated_memory_t *mem); + + +/** + * @ingroup UCT_RESOURCE + * @brief Release memory allocated with @ref uct_iface_mem_alloc(). + * + * @param [in] mem Descriptor of memory to release. + */ +void uct_iface_mem_free(const uct_allocated_memory_t *mem); + + +/** + * @ingroup UCT_AM + * @brief Set active message handler for the interface. + * + * Only one handler can be set of each active message ID, and setting a handler + * replaces the previous value. If cb == NULL, the current handler is removed. + * + * + * @param [in] iface Interface to set the active message handler for. + * @param [in] id Active message id. Must be 0..UCT_AM_ID_MAX-1. + * @param [in] cb Active message callback. NULL to clear. + * @param [in] arg Active message argument. + * @param [in] flags Required @ref uct_cb_flags "callback flags" + * + * @return error code if the interface does not support active messages or + * requested callback flags + */ +ucs_status_t uct_iface_set_am_handler(uct_iface_h iface, uint8_t id, + uct_am_callback_t cb, void *arg, uint32_t flags); + + +/** + * @ingroup UCT_AM + * @brief Set active message tracer for the interface. + * + * Sets a function which dumps active message debug information to a buffer, + * which is printed every time an active message is sent or received, when + * data tracing is on. Without the tracer, only transport-level information is + * printed. + * + * @param [in] iface Interface to set the active message tracer for. + * @param [in] tracer Active message tracer. NULL to clear. + * @param [in] arg Tracer custom argument. + */ +ucs_status_t uct_iface_set_am_tracer(uct_iface_h iface, uct_am_tracer_t tracer, + void *arg); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Accept connection request. + * + * @param [in] iface Transport interface which generated connection + * request @a conn_request. + * @param [in] conn_request Connection establishment request passed as parameter + * of @ref uct_sockaddr_conn_request_callback_t. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t uct_iface_accept(uct_iface_h iface, + uct_conn_request_h conn_request); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Reject connection request. Will invoke an error handler @ref + * uct_error_handler_t on the remote transport interface, if set. + * + * @param [in] iface Interface which generated connection establishment + * request @a conn_request. + * @param [in] conn_request Connection establishment request passed as parameter + * of @ref uct_sockaddr_conn_request_callback_t. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t uct_iface_reject(uct_iface_h iface, + uct_conn_request_h conn_request); + + +/** + * @ingroup UCT_RESOURCE + * @brief Create new endpoint. + * + * Create a UCT endpoint in one of the available modes: + * -# Unconnected endpoint: If no any address is present in @ref uct_ep_params, + * this creates an unconnected endpoint. To establish a connection to a + * remote endpoint, @ref uct_ep_connect_to_ep will need to be called. Use of + * this mode requires @ref uct_ep_params_t::iface has the + * @ref UCT_IFACE_FLAG_CONNECT_TO_EP capability flag. It may be obtained by + * @ref uct_iface_query . + * -# Connect to a remote interface: If @ref uct_ep_params_t::dev_addr and + * @ref uct_ep_params_t::iface_addr are set, this will establish an endpoint + * that is connected to a remote interface. This requires that + * @ref uct_ep_params_t::iface has the @ref UCT_IFACE_FLAG_CONNECT_TO_IFACE + * capability flag. It may be obtained by @ref uct_iface_query . + * -# Connect to a remote socket address: If @ref uct_ep_params_t::sockaddr is + * set, this will create an endpoint that is conected to a remote socket. + * This requires that @ref uct_ep_params_t::iface has the + * @ref UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR capability flag. It may be + * obtained by @ref uct_iface_query .* + * @param [in] params User defined @ref uct_ep_params_t configurations for the + * @a ep_p. + * @param [out] ep_p Filled with handle to the new endpoint. + * + * @return UCS_OK The endpoint is created successfully. This does not + * guarantee that the endpoint has been connected to + * the destination defined in @a params; in case of failure, + * the error will be reported to the interface error + * handler callback provided to @ref uct_iface_open + * via @ref uct_iface_params_t.err_handler. + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t uct_ep_create(const uct_ep_params_t *params, uct_ep_h *ep_p); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Initiate a disconnection of an endpoint connected to a + * sockaddr by a connection manager @ref uct_cm_h. + * + * This non-blocking routine will send a disconnect notification on the endpoint, + * so that @ref uct_ep_disconnect_cb_t will be called on the remote peer. + * The remote side should also call this routine when handling the initiator's + * disconnect. + * After a call to this function, the given endpoint may not be used for + * communications anymore. + * The @ref uct_ep_flush / @ref uct_iface_flush routines will guarantee that the + * disconnect notification is delivered to the remote peer. + * @ref uct_ep_destroy should be called on this endpoint after invoking this + * routine and @ref uct_ep_params::disconnect_cb was called. + * + * @param [in] ep Endpoint to disconnect. + * @param [in] flags Reserved for future use. + * + * @return UCS_OK Operation has completed successfully. + * UCS_ERR_BUSY The @a ep is not connected yet (either + * @ref uct_ep_client_connect_cb_t or + * @ref uct_ep_server_connect_cb_t was not + * invoked). + * UCS_INPROGRESS The disconnect request has been initiated, but + * the remote peer has not yet responded to this + * request, and consequently the registered + * callback @ref uct_ep_disconnect_cb_t has not + * been invoked to handle the request. + * UCS_ERR_NOT_CONNECTED The @a ep is disconnected locally and remotely. + * Other error codes as defined by @ref ucs_status_t . + */ +ucs_status_t uct_ep_disconnect(uct_ep_h ep, unsigned flags); + + +/** + * @ingroup UCT_RESOURCE + * @brief Destroy an endpoint. + * + * @param [in] ep Endpoint to destroy. + */ +void uct_ep_destroy(uct_ep_h ep); + + +/** + * @ingroup UCT_RESOURCE + * @brief Get endpoint address. + * + * @param [in] ep Endpoint to query. + * @param [out] addr Filled with endpoint address. The size of the buffer + * provided must be at least @ref uct_iface_attr_t::ep_addr_len. + */ +ucs_status_t uct_ep_get_address(uct_ep_h ep, uct_ep_addr_t *addr); + + +/** + * @ingroup UCT_RESOURCE + * @brief Connect endpoint to a remote endpoint. + * + * requires @ref UCT_IFACE_FLAG_CONNECT_TO_EP capability. + * + * @param [in] ep Endpoint to connect. + * @param [in] dev_addr Remote device address. + * @param [in] ep_addr Remote endpoint address. + */ +ucs_status_t uct_ep_connect_to_ep(uct_ep_h ep, const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr); + + +/** + * @ingroup UCT_MD + * @brief Query for memory domain attributes. + * + * @param [in] md Memory domain to query. + * @param [out] md_attr Filled with memory domain attributes. + */ +ucs_status_t uct_md_query(uct_md_h md, uct_md_attr_t *md_attr); + + +/** + * @ingroup UCT_MD + * @brief Allocate memory for zero-copy sends and remote access. + * + * Allocate memory on the memory domain. In order to use this function, MD + * must support @ref UCT_MD_FLAG_ALLOC flag. + * + * @param [in] md Memory domain to allocate memory on. + * @param [in,out] length_p Points to the size of memory to allocate. Upon successful + * return, filled with the actual size that was allocated, + * which may be larger than the one requested. Must be >0. + * @param [in,out] address_p The address + * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags. + * @param [in] name Name of the allocated region, used to track memory + * usage for debugging and profiling. + * @param [out] memh_p Filled with handle for allocated region. + */ +ucs_status_t uct_md_mem_alloc(uct_md_h md, size_t *length_p, void **address_p, + unsigned flags, const char *name, uct_mem_h *memh_p); + + +/** + * @ingroup UCT_MD + * @brief Release memory allocated by @ref uct_md_mem_alloc. + * + * @param [in] md Memory domain memory was allocated on. + * @param [in] memh Memory handle, as returned from @ref uct_md_mem_alloc. + */ +ucs_status_t uct_md_mem_free(uct_md_h md, uct_mem_h memh); + + +/** + * @ingroup UCT_MD + * @brief Give advice about the use of memory + * + * This routine advises the UCT about how to handle memory range beginning at + * address and size of length bytes. This call does not influence the semantics + * of the application, but may influence its performance. The advice may be + * ignored. + * + * @param [in] md Memory domain memory was allocated or registered on. + * @param [in] memh Memory handle, as returned from @ref uct_md_mem_alloc + * @param [in] addr Memory base address. Memory range must belong to the + * @a memh + * @param [in] length Length of memory to advise. Must be >0. + * @param [in] advice Memory use advice as defined in the + * @ref uct_mem_advice_t list + */ +ucs_status_t uct_md_mem_advise(uct_md_h md, uct_mem_h memh, void *addr, + size_t length, uct_mem_advice_t advice); + + +/** + * @ingroup UCT_MD + * @brief Register memory for zero-copy sends and remote access. + * + * Register memory on the memory domain. In order to use this function, MD + * must support @ref UCT_MD_FLAG_REG flag. + * + * @param [in] md Memory domain to register memory on. + * @param [out] address Memory to register. + * @param [in] length Size of memory to register. Must be >0. + * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags. + * @param [out] memh_p Filled with handle for allocated region. + */ +ucs_status_t uct_md_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p); + + +/** + * @ingroup UCT_MD + * @brief Undo the operation of @ref uct_md_mem_reg(). + * + * @param [in] md Memory domain which was used to register the memory. + * @param [in] memh Local access key to memory region. + */ +ucs_status_t uct_md_mem_dereg(uct_md_h md, uct_mem_h memh); + + +/** + * @ingroup UCT_MD + * @brief Detect memory type + * + * + * @param [in] md Memory domain to detect memory type + * @param [in] addr Memory address to detect. + * @param [in] length Size of memory + * @param [out] mem_type_p Filled with memory type of the address range if + function succeeds + * @return UCS_OK If memory type is successfully detected + * UCS_ERR_INVALID_ADDR If failed to detect memory type + */ +ucs_status_t uct_md_detect_memory_type(uct_md_h md, const void *addr, + size_t length, + ucs_memory_type_t *mem_type_p); + + +/** + * @ingroup UCT_MD + * @brief Allocate memory for zero-copy communications and remote access. + * + * Allocate potentially registered memory. Every one of the provided allocation + * methods will be used, in turn, to perform the allocation, until one succeeds. + * Whenever the MD method is encountered, every one of the provided MDs will be + * used, in turn, to allocate the memory, until one succeeds, or they are + * exhausted. In this case the next allocation method from the initial list will + * be attempted. + * + * @param [in] addr If @a addr is NULL, the underlying allocation routine + * will choose the address at which to create the mapping. + * If @a addr is non-NULL but UCT_MD_MEM_FLAG_FIXED is + * not set, the address will be interpreted as a hint + * as to where to establish the mapping. If @a addr is + * non-NULL and UCT_MD_MEM_FLAG_FIXED is set, then + * the specified address is interpreted as a requirement. + * In this case, if the mapping to the exact address + * cannot be made, the allocation request fails. + * @param [in] min_length Minimal size to allocate. The actual size may be + * larger, for example because of alignment restrictions. + * @param [in] flags Memory allocation flags, see @ref uct_md_mem_flags. + * @param [in] methods Array of memory allocation methods to attempt. + * @param [in] num_methods Length of 'methods' array. + * @param [in] mds Array of memory domains to attempt to allocate + * the memory with, for MD allocation method. + * @param [in] num_mds Length of 'mds' array. May be empty, in such case + * 'mds' may be NULL, and MD allocation method will + * be skipped. + * @param [in] name Name of the allocation. Used for memory statistics. + * @param [out] mem In case of success, filled with information about + * the allocated memory. @ref uct_allocated_memory_t. + */ +ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags, + uct_alloc_method_t *methods, unsigned num_methods, + uct_md_h *mds, unsigned num_mds, const char *name, + uct_allocated_memory_t *mem); + + +/** + * @ingroup UCT_MD + * @brief Release allocated memory. + * + * Release the memory allocated by @ref uct_mem_alloc. + * + * @param [in] mem Description of allocated memory, as returned from + * @ref uct_mem_alloc. + */ +ucs_status_t uct_mem_free(const uct_allocated_memory_t *mem); + +/** + * @ingroup UCT_MD + * @brief Read the configuration for a memory domain. + * + * @param [in] component Read the configuration of this component. + * @param [in] env_prefix If non-NULL, search for environment variables + * starting with this UCT__. Otherwise, search + * for environment variables starting with just UCT_. + * @param [in] filename If non-NULL, read configuration from this file. If + * the file does not exist, it will be ignored. + * @param [out] config_p Filled with a pointer to the configuration. + * + * @return Error code. + */ +ucs_status_t uct_md_config_read(uct_component_h component, + const char *env_prefix, const char *filename, + uct_md_config_t **config_p); + + + +/** + * @ingroup UCT_MD + * @brief Check if remote sock address is accessible from the memory domain. + * + * This function checks if a remote sock address can be accessed from a local + * memory domain. Accessibility can be checked in local or remote mode. + * + * @param [in] md Memory domain to check accessibility from. + * This memory domain must support the @ref + * UCT_MD_FLAG_SOCKADDR flag. + * @param [in] sockaddr Socket address to check accessibility to. + * @param [in] mode Mode for checking accessibility, as defined in @ref + * uct_sockaddr_accessibility_t. + * Indicates if accessibility is tested on the server side - + * for binding to the given sockaddr, or on the + * client side - for connecting to the given remote + * peer's sockaddr. + * + * @return Nonzero if accessible, 0 if inaccessible. + */ +int uct_md_is_sockaddr_accessible(uct_md_h md, const ucs_sock_addr_t *sockaddr, + uct_sockaddr_accessibility_t mode); + + +/** + * @ingroup UCT_MD + * + * @brief Pack a remote key. + * + * @param [in] md Handle to memory domain. + * @param [in] memh Local key, whose remote key should be packed. + * @param [out] rkey_buffer Filled with packed remote key. + * + * @return Error code. + */ +ucs_status_t uct_md_mkey_pack(uct_md_h md, uct_mem_h memh, void *rkey_buffer); + + +/** + * @ingroup UCT_MD + * + * @brief Unpack a remote key. + * + * @param [in] component Component on which to unpack the remote key. + * @param [in] rkey_buffer Packed remote key buffer. + * @param [out] rkey_ob Filled with the unpacked remote key and its type. + * + * @note The remote key must be unpacked with the same component that was used + * to pack it. For example, if a remote device address on the remote + * memory domain which was used to pack the key is reachable by a + * transport on a local component, then that component is eligible to + * unpack the key. + * If the remote key buffer cannot be unpacked with the given component, + * UCS_ERR_INVALID_PARAM will be returned. + * + * @return Error code. + */ +ucs_status_t uct_rkey_unpack(uct_component_h component, const void *rkey_buffer, + uct_rkey_bundle_t *rkey_ob); + + +/** + * @ingroup UCT_MD + * + * @brief Get a local pointer to remote memory. + * + * This routine returns a local pointer to the remote memory + * described by the rkey bundle. The MD must support + * @ref UCT_MD_FLAG_RKEY_PTR flag. + * + * @param [in] component Component on which to obtain the pointer to the + * remote key. + * @param [in] rkey_ob A remote key bundle as returned by + * the @ref uct_rkey_unpack function. + * @param [in] remote_addr A remote address within the memory area described + * by the rkey_ob. + * @param [out] addr_p A pointer that can be used for direct access to + * the remote memory. + * + * @note The component used to obtain a local pointer to the remote memory must + * be the same component that was used to pack the remote key. See notes + * section for @ref uct_rkey_unpack. + * + * @return Error code if the remote memory cannot be accessed directly or + * the remote address is not valid. + */ +ucs_status_t uct_rkey_ptr(uct_component_h component, uct_rkey_bundle_t *rkey_ob, + uint64_t remote_addr, void **addr_p); + + +/** + * @ingroup UCT_MD + * + * @brief Release a remote key. + * + * @param [in] component Component which was used to unpack the remote key. + * @param [in] rkey_ob Remote key to release. + */ +ucs_status_t uct_rkey_release(uct_component_h component, + const uct_rkey_bundle_t *rkey_ob); + + +/** + * @ingroup UCT_CONTEXT + * @brief Explicit progress for UCT worker. + * + * This routine explicitly progresses any outstanding communication operations + * and active message requests. + * + * @note @li In the current implementation, users @b MUST call this routine + * to receive the active message requests. + * + * @param [in] worker Handle to worker. + * + * @return Nonzero if any communication was progressed, zero otherwise. + */ +UCT_INLINE_API unsigned uct_worker_progress(uct_worker_h worker) +{ + return ucs_callbackq_dispatch(&worker->progress_q); +} + + +/** + * @ingroup UCT_RESOURCE + * @brief Flush outstanding communication operations on an interface. + * + * Flushes all outstanding communications issued on the interface prior to + * this call. The operations are completed at the origin or at the target + * as well. The exact completion semantic depends on @a flags parameter. + * + * @note Currently only one completion type is supported. It guarantees that + * the data transfer is completed but the target buffer may not be updated yet. + * + * @param [in] iface Interface to flush communications from. + * @param [in] flags Flags that control completion semantic (currently only + * @ref UCT_FLUSH_FLAG_LOCAL is supported). + * @param [inout] comp Completion handle as defined by @ref uct_completion_t. + * Can be NULL, which means that the call will return the + * current state of the interface and no completion will + * be generated in case of outstanding communications. + * If it is not NULL completion counter is decremented + * by 1 when the call completes. Completion callback is + * called when the counter reaches 0. + * + * + * @return UCS_OK - No outstanding communications left. + * UCS_INPROGRESS - Some communication operations are still in progress. + * If non-NULL 'comp' is provided, it will be updated + * upon completion of these operations. + */ +UCT_INLINE_API ucs_status_t uct_iface_flush(uct_iface_h iface, unsigned flags, + uct_completion_t *comp) +{ + return iface->ops.iface_flush(iface, flags, comp); +} + +/** + * @ingroup UCT_RESOURCE + * @brief Ensures ordering of outstanding communications on the interface. + * Operations issued on the interface prior to this call are guaranteed to + * be completed before any subsequent communication operations to the same + * interface which follow the call to fence. + * + * @param [in] iface Interface to issue communications from. + * @param [in] flags Flags that control ordering semantic (currently + * unsupported - set to 0). + * @return UCS_OK - Ordering is inserted. + */ + +UCT_INLINE_API ucs_status_t uct_iface_fence(uct_iface_h iface, unsigned flags) +{ + return iface->ops.iface_fence(iface, flags); +} + +/** + * @ingroup UCT_AM + * @brief Release AM descriptor + * + * Release active message descriptor @a desc, which was passed to + * @ref uct_am_callback_t "the active message callback", and owned by the callee. + * + * @param [in] desc Descriptor to release. + */ +UCT_INLINE_API void uct_iface_release_desc(void *desc) +{ + uct_recv_desc_t *release_desc = uct_recv_desc(desc); + release_desc->cb(release_desc, desc); +} + + +/** + * @ingroup UCT_RMA + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_put_short(uct_ep_h ep, const void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ + return ep->iface->ops.ep_put_short(ep, buffer, length, remote_addr, rkey); +} + + +/** + * @ingroup UCT_RMA + * @brief + */ +UCT_INLINE_API ssize_t uct_ep_put_bcopy(uct_ep_h ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, + uct_rkey_t rkey) +{ + return ep->iface->ops.ep_put_bcopy(ep, pack_cb, arg, remote_addr, rkey); +} + + +/** + * @ingroup UCT_RMA + * @brief Write data to remote memory while avoiding local memory copy + * + * The input data in @a iov array of @ref ::uct_iov_t structures sent to remote + * address ("gather output"). Buffers in @a iov are processed in array order. + * This means that the function complete iov[0] before proceeding to + * iov[1], and so on. + * + * + * @param [in] ep Destination endpoint handle. + * @param [in] iov Points to an array of @ref ::uct_iov_t structures. + * The @a iov pointer must be a valid address of an array + * of @ref ::uct_iov_t structures. A particular structure + * pointer must be a valid address. A NULL terminated + * array is not required. + * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures + * array. If @a iovcnt is zero, the data is considered empty. + * @a iovcnt is limited by @ref uct_iface_attr_cap_put_max_iov + * "uct_iface_attr::cap::put::max_iov". + * @param [in] remote_addr Remote address to place the @a iov data. + * @param [in] rkey Remote key descriptor provided by @ref ::uct_rkey_unpack + * @param [in] comp Completion handle as defined by @ref ::uct_completion_t. + * + * @return UCS_INPROGRESS Some communication operations are still in progress. + * If non-NULL @a comp is provided, it will be updated + * upon completion of these operations. + * + */ +UCT_INLINE_API ucs_status_t uct_ep_put_zcopy(uct_ep_h ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_put_zcopy(ep, iov, iovcnt, remote_addr, rkey, comp); +} + + +/** + * @ingroup UCT_RMA + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_get_short(uct_ep_h ep, void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ + return ep->iface->ops.ep_get_short(ep, buffer, length, remote_addr, rkey); +} + + +/** + * @ingroup UCT_RMA + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_get_bcopy(uct_ep_h ep, uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_get_bcopy(ep, unpack_cb, arg, length, remote_addr, + rkey, comp); +} + + +/** + * @ingroup UCT_RMA + * @brief Read data from remote memory while avoiding local memory copy + * + * The output data in @a iov array of @ref ::uct_iov_t structures received from + * remote address ("scatter input"). Buffers in @a iov are processed in array order. + * This means that the function complete iov[0] before proceeding to + * iov[1], and so on. + * + * + * @param [in] ep Destination endpoint handle. + * @param [in] iov Points to an array of @ref ::uct_iov_t structures. + * The @a iov pointer must be a valid address of an array + * of @ref ::uct_iov_t structures. A particular structure + * pointer must be a valid address. A NULL terminated + * array is not required. + * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures + * array. If @a iovcnt is zero, the data is considered empty. + * @a iovcnt is limited by @ref uct_iface_attr_cap_get_max_iov + * "uct_iface_attr::cap::get::max_iov". + * @param [in] remote_addr Remote address of the data placed to the @a iov. + * @param [in] rkey Remote key descriptor provided by @ref ::uct_rkey_unpack + * @param [in] comp Completion handle as defined by @ref ::uct_completion_t. + * + * @return UCS_INPROGRESS Some communication operations are still in progress. + * If non-NULL @a comp is provided, it will be updated + * upon completion of these operations. + * + */ +UCT_INLINE_API ucs_status_t uct_ep_get_zcopy(uct_ep_h ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_get_zcopy(ep, iov, iovcnt, remote_addr, rkey, comp); +} + + +/** + * @ingroup UCT_AM + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_am_short(uct_ep_h ep, uint8_t id, uint64_t header, + const void *payload, unsigned length) +{ + return ep->iface->ops.ep_am_short(ep, id, header, payload, length); +} + + +/** + * @ingroup UCT_AM + * @brief + */ +UCT_INLINE_API ssize_t uct_ep_am_bcopy(uct_ep_h ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + return ep->iface->ops.ep_am_bcopy(ep, id, pack_cb, arg, flags); +} + + +/** + * @ingroup UCT_AM + * @brief Send active message while avoiding local memory copy + * + * The input data in @a iov array of @ref ::uct_iov_t structures sent to remote + * side ("gather output"). Buffers in @a iov are processed in array order. + * This means that the function complete iov[0] before proceeding to + * iov[1], and so on. + * + * + * @param [in] ep Destination endpoint handle. + * @param [in] id Active message id. Must be in range 0..UCT_AM_ID_MAX-1. + * @param [in] header Active message header. + * @param [in] header_length Active message header length in bytes. + * @param [in] iov Points to an array of @ref ::uct_iov_t structures. + * The @a iov pointer must be a valid address of an array + * of @ref ::uct_iov_t structures. A particular structure + * pointer must be a valid address. A NULL terminated + * array is not required. + * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures + * array. If @a iovcnt is zero, the data is considered empty. + * @a iovcnt is limited by @ref uct_iface_attr_cap_am_max_iov + * "uct_iface_attr::cap::am::max_iov". + * @param [in] flags Active message flags, see @ref uct_msg_flags. + * @param [in] comp Completion handle as defined by @ref ::uct_completion_t. + * + * @return UCS_OK Operation completed successfully. + * @return UCS_INPROGRESS Some communication operations are still in progress. + * If non-NULL @a comp is provided, it will be updated + * upon completion of these operations. + * @return UCS_ERR_NO_RESOURCE Could not start the operation due to lack of send + * resources. + * + * @note If the operation returns @a UCS_INPROGRESS, the memory buffers + * pointed to by @a iov array must not be modified until the operation + * is completed by @a comp. @a header can be released or changed. + */ +UCT_INLINE_API ucs_status_t uct_ep_am_zcopy(uct_ep_h ep, uint8_t id, + const void *header, + unsigned header_length, + const uct_iov_t *iov, size_t iovcnt, + unsigned flags, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_am_zcopy(ep, id, header, header_length, iov, iovcnt, + flags, comp); +} + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_atomic_cswap64(uct_ep_h ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp) +{ + return ep->iface->ops.ep_atomic_cswap64(ep, compare, swap, remote_addr, rkey, result, comp); +} + + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_atomic_cswap32(uct_ep_h ep, uint32_t compare, uint32_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp) +{ + return ep->iface->ops.ep_atomic_cswap32(ep, compare, swap, remote_addr, rkey, result, comp); +} + + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_atomic32_post(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint64_t remote_addr, + uct_rkey_t rkey) +{ + return ep->iface->ops.ep_atomic32_post(ep, opcode, value, remote_addr, rkey); +} + + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_atomic64_post(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t remote_addr, + uct_rkey_t rkey) +{ + return ep->iface->ops.ep_atomic64_post(ep, opcode, value, remote_addr, rkey); +} + + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_atomic32_fetch(ep, opcode, value, result, + remote_addr, rkey, comp); +} + + +/** + * @ingroup UCT_AMO + * @brief + */ +UCT_INLINE_API ucs_status_t uct_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_atomic64_fetch(ep, opcode, value, result, + remote_addr, rkey, comp); +} + + +/** + * @ingroup UCT_RESOURCE + * @brief Add a pending request to an endpoint. + * + * Add a pending request to the endpoint pending queue. The request will be + * dispatched when the endpoint could potentially have additional send resources. + * + * @param [in] ep Endpoint to add the pending request to. + * @param [in] req Pending request, which would be dispatched when more + * resources become available. The user is expected to initialize + * the "func" field. + * After being passed to the function, the request is owned by UCT, + * until the callback is called and returns UCS_OK. + * @param [in] flags Flags that control pending request processing (see @ref uct_cb_flags) + * + * @return UCS_OK - request added to pending queue + * UCS_ERR_BUSY - request was not added to pending queue, because send + * resources are available now. The user is advised to + * retry. + */ +UCT_INLINE_API ucs_status_t uct_ep_pending_add(uct_ep_h ep, + uct_pending_req_t *req, + unsigned flags) +{ + return ep->iface->ops.ep_pending_add(ep, req, flags); +} + + +/** + * @ingroup UCT_RESOURCE + * @brief Remove all pending requests from an endpoint. + * + * Remove pending requests from the given endpoint and pass them to the provided + * callback function. The callback return value is ignored. + * + * @param [in] ep Endpoint to remove pending requests from. + * @param [in] cb Callback to pass the removed requests to. + * @param [in] arg Argument to pass to the @a cb callback. + */ +UCT_INLINE_API void uct_ep_pending_purge(uct_ep_h ep, + uct_pending_purge_callback_t cb, + void *arg) +{ + ep->iface->ops.ep_pending_purge(ep, cb, arg); +} + + +/** + * @ingroup UCT_RESOURCE + * @brief Flush outstanding communication operations on an endpoint. + * + * Flushes all outstanding communications issued on the endpoint prior to + * this call. The operations are completed at the origin or at the target + * as well. The exact completion semantic depends on @a flags parameter. + * + * @param [in] ep Endpoint to flush communications from. + * @param [in] flags Flags @ref uct_flush_flags that control completion + * semantic. + * @param [inout] comp Completion handle as defined by @ref uct_completion_t. + * Can be NULL, which means that the call will return the + * current state of the endpoint and no completion will + * be generated in case of outstanding communications. + * If it is not NULL completion counter is decremented + * by 1 when the call completes. Completion callback is + * called when the counter reaches 0. + * + * @return UCS_OK - No outstanding communications left. + * UCS_ERR_NO_RESOURCE - Flush operation could not be initiated. A subsequent + * call to @ref uct_ep_pending_add would add a pending + * operation, which provides an opportunity to retry + * the flush. + * UCS_INPROGRESS - Some communication operations are still in progress. + * If non-NULL 'comp' is provided, it will be updated + * upon completion of these operations. + */ +UCT_INLINE_API ucs_status_t uct_ep_flush(uct_ep_h ep, unsigned flags, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_flush(ep, flags, comp); +} + + +/** + * @ingroup UCT_RESOURCE + * @brief Ensures ordering of outstanding communications on the endpoint. + * Operations issued on the endpoint prior to this call are guaranteed to + * be completed before any subsequent communication operations to the same + * endpoint which follow the call to fence. + * + * @param [in] ep Endpoint to issue communications from. + * @param [in] flags Flags that control ordering semantic (currently + * unsupported - set to 0). + * @return UCS_OK - Ordering is inserted. + */ +UCT_INLINE_API ucs_status_t uct_ep_fence(uct_ep_h ep, unsigned flags) +{ + return ep->iface->ops.ep_fence(ep, flags); +} + + +/** + * @ingroup UCT_TAG + * @brief Short eager tagged-send operation. + * + * This routine sends a message using @ref uct_short_protocol_desc "short" + * eager protocol. Eager protocol means that the whole data is sent to the peer + * immediately without any preceding notification. + * The data is provided as buffer and its length,and must not be larger than the + * corresponding @a max_short value in @ref uct_iface_attr. + * The immediate value delivered to the receiver is implicitly equal to 0. + * If it's required to pass nonzero imm value, @ref uct_ep_tag_eager_bcopy + * should be used. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for the eager message. + * @param [in] data Data to send. + * @param [in] length Data length. + * + * @return UCS_OK - operation completed successfully. + * @return UCS_ERR_NO_RESOURCE - could not start the operation due to lack of + * send resources. + */ +UCT_INLINE_API ucs_status_t uct_ep_tag_eager_short(uct_ep_h ep, uct_tag_t tag, + const void *data, size_t length) +{ + return ep->iface->ops.ep_tag_eager_short(ep, tag, data, length); +} + + +/** + * @ingroup UCT_TAG + * @brief Bcopy eager tagged-send operation. + * + * This routine sends a message using @ref uct_bcopy_protocol_desc "bcopy" + * eager protocol. Eager protocol means that the whole data is sent to the peer + * immediately without any preceding notification. + * Custom data callback is used to copy the data to the network buffers. + * + * @note The resulted data length must not be larger than the corresponding + * @a max_bcopy value in @ref uct_iface_attr. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for the eager message. + * @param [in] imm Immediate value which will be available to the + * receiver. + * @param [in] pack_cb User callback to pack the data. + * @param [in] arg Custom argument to @a pack_cb. + * @param [in] flags Tag message flags, see @ref uct_msg_flags. + * + * @return >=0 - The size of the data packed by @a pack_cb. + * @return otherwise - Error code. + */ +UCT_INLINE_API ssize_t uct_ep_tag_eager_bcopy(uct_ep_h ep, uct_tag_t tag, + uint64_t imm, + uct_pack_callback_t pack_cb, + void *arg, unsigned flags) +{ + return ep->iface->ops.ep_tag_eager_bcopy(ep, tag, imm, pack_cb, arg, flags); +} + + +/** + * @ingroup UCT_TAG + * @brief Zcopy eager tagged-send operation. + * + * This routine sends a message using @ref uct_zcopy_protocol_desc "zcopy" + * eager protocol. Eager protocol means that the whole data is sent to the peer + * immediately without any preceding notification. + * The input data (which has to be previously registered) in @a iov array of + * @ref uct_iov_t structures sent to remote side ("gather output"). Buffers in + * @a iov are processed in array order, so the function complete @a iov[0] + * before proceeding to @a iov[1], and so on. + * + * @note The resulted data length must not be larger than the corresponding + * @a max_zcopy value in @ref uct_iface_attr. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for the eager message. + * @param [in] imm Immediate value which will be available to the + * receiver. + * @param [in] iov Points to an array of @ref uct_iov_t structures. + * A particular structure pointer must be a valid address. + * A NULL terminated array is not required. + * @param [in] iovcnt Size of the @a iov array. If @a iovcnt is zero, the + * data is considered empty. Note that @a iovcnt is + * limited by the corresponding @a max_iov value in + * @ref uct_iface_attr. + * @param [in] flags Tag message flags, see @ref uct_msg_flags. + * @param [in] comp Completion callback which will be called when the data + * is reliably received by the peer, and the buffer + * can be reused or invalidated. + * + * @return UCS_OK - operation completed successfully. + * @return UCS_ERR_NO_RESOURCE - could not start the operation due to lack of + * send resources. + * @return UCS_INPROGRESS - operation started, and @a comp will be used to + * notify when it's completed. + */ +UCT_INLINE_API ucs_status_t uct_ep_tag_eager_zcopy(uct_ep_h ep, uct_tag_t tag, + uint64_t imm, + const uct_iov_t *iov, + size_t iovcnt, + unsigned flags, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_tag_eager_zcopy(ep, tag, imm, iov, iovcnt, flags, + comp); +} + + +/** + * @ingroup UCT_TAG + * @brief Rendezvous tagged-send operation. + * + * This routine sends a message using rendezvous protocol. Rendezvous protocol + * means that only a small notification is sent at first, and the data itself + * is transferred later (when there is a match) to avoid extra memory copy. + * + * @note The header will be available to the receiver in case of unexpected + * rendezvous operation only, i.e. the peer has not posted tag for this + * message yet (by means of @ref uct_iface_tag_recv_zcopy), when it is + * arrived. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for the eager message. + * @param [in] header User defined header. + * @param [in] header_length User defined header length in bytes. Note that + * it is limited by the corresponding @a max_hdr + * value in @ref uct_iface_attr. + * @param [in] iov Points to an array of @ref uct_iov_t structures. + * A particular structure pointer must be valid + * address. A NULL terminated array is not required. + * @param [in] iovcnt Size of the @a iov array. If @a iovcnt is zero, + * the data is considered empty. Note that @a iovcnt + * is limited by the corresponding @a max_iov value + * in @ref uct_iface_attr. + * @param [in] flags Tag message flags, see @ref uct_msg_flags. + * @param [in] comp Completion callback which will be called when the + * data is reliably received by the peer, and the + * buffer can be reused or invalidated. + * + * @return >=0 - The operation is in progress and the return value is a + * handle which can be used to cancel the outstanding + * rendezvous operation. + * @return otherwise - Error code. + */ +UCT_INLINE_API ucs_status_ptr_t uct_ep_tag_rndv_zcopy(uct_ep_h ep, uct_tag_t tag, + const void *header, + unsigned header_length, + const uct_iov_t *iov, + size_t iovcnt, + unsigned flags, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_tag_rndv_zcopy(ep, tag, header, header_length, + iov, iovcnt, flags, comp); +} + + +/** + * @ingroup UCT_TAG + * @brief Cancel outstanding rendezvous operation. + * + * This routine signals the underlying transport disregard the outstanding + * operation without calling completion callback provided in + * @ref uct_ep_tag_rndv_zcopy. + * + * @note The operation handle should be valid at the time the routine is + * invoked. I.e. it should be a handle of the real operation which is not + * completed yet. + * + * @param [in] ep Destination endpoint handle. + * @param [in] op Rendezvous operation handle, as returned from + * @ref uct_ep_tag_rndv_zcopy. + * + * @return UCS_OK - The operation has been canceled. + */ +UCT_INLINE_API ucs_status_t uct_ep_tag_rndv_cancel(uct_ep_h ep, void *op) +{ + return ep->iface->ops.ep_tag_rndv_cancel(ep, op); +} + + +/** + * @ingroup UCT_TAG + * @brief Send software rendezvous request. + * + * This routine sends a rendezvous request only, which indicates that the data + * transfer should be completed in software. + * + * @param [in] ep Destination endpoint handle. + * @param [in] tag Tag to use for matching. + * @param [in] header User defined header + * @param [in] header_length User defined header length in bytes. Note that it + * is limited by the corresponding @a max_hdr value + * in @ref uct_iface_attr. + * @param [in] flags Tag message flags, see @ref uct_msg_flags. + * + * @return UCS_OK - operation completed successfully. + * @return UCS_ERR_NO_RESOURCE - could not start the operation due to lack of + * send resources. + */ +UCT_INLINE_API ucs_status_t uct_ep_tag_rndv_request(uct_ep_h ep, uct_tag_t tag, + const void* header, + unsigned header_length, + unsigned flags) +{ + return ep->iface->ops.ep_tag_rndv_request(ep, tag, header, header_length, + flags); +} + + +/** + * @ingroup UCT_TAG + * @brief Post a tag to a transport interface. + * + * This routine posts a tag to be matched on a transport interface. When a + * message with the corresponding tag arrives it is stored in the user buffer + * (described by @a iov and @a iovcnt) directly. The operation completion is + * reported using callbacks on the @a ctx structure. + * + * @param [in] iface Interface to post the tag on. + * @param [in] tag Tag to expect. + * @param [in] tag_mask Mask which specifies what bits of the tag to + * compare. + * @param [in] iov Points to an array of @ref ::uct_iov_t structures. + * The @a iov pointer must be a valid address of an array + * of @ref ::uct_iov_t structures. A particular structure + * pointer must be a valid address. A NULL terminated + * array is not required. + * @param [in] iovcnt Size of the @a iov data @ref ::uct_iov_t structures + * array. If @a iovcnt is zero, the data is considered empty. + * @a iovcnt is limited by @ref uct_iface_attr_cap_tag_recv_iov + * "uct_iface_attr::cap::tag::max_iov". + * @param [inout] ctx Context associated with this particular tag, "priv" field + * in this structure is used to track the state internally. + * + * @return UCS_OK - The tag is posted to the transport. + * @return UCS_ERR_NO_RESOURCE - Could not start the operation due to lack of + * resources. + * @return UCS_ERR_EXCEEDS_LIMIT - No more room for tags in the transport. + */ +UCT_INLINE_API ucs_status_t uct_iface_tag_recv_zcopy(uct_iface_h iface, + uct_tag_t tag, + uct_tag_t tag_mask, + const uct_iov_t *iov, + size_t iovcnt, + uct_tag_context_t *ctx) +{ + return iface->ops.iface_tag_recv_zcopy(iface, tag, tag_mask, iov, iovcnt, ctx); +} + + +/** + * @ingroup UCT_TAG + * @brief Cancel a posted tag. + * + * This routine cancels a tag, which was previously posted by + * @ref uct_iface_tag_recv_zcopy. The tag would be either matched or canceled, + * in a bounded time, regardless of the peer actions. The original completion + * callback of the tag would be called with the status if @a force is not set. + * + * @param [in] iface Interface to cancel the tag on. + * @param [in] ctx Tag context which was used for posting the tag. If + * force is 0, @a ctx->completed_cb will be called with + * either UCS_OK which means the tag was matched and data + * received despite the cancel request, or + * UCS_ERR_CANCELED which means the tag was successfully + * canceled before it was matched. + * @param [in] force Whether to report completions to @a ctx->completed_cb. + * If nonzero, the cancel is assumed to be successful, + * and the callback is not called. + * + * @return UCS_OK - The tag is canceled in the transport. + */ +UCT_INLINE_API ucs_status_t uct_iface_tag_recv_cancel(uct_iface_h iface, + uct_tag_context_t *ctx, + int force) +{ + return iface->ops.iface_tag_recv_cancel(iface, ctx, force); +} + + +/** + * @ingroup UCT_RESOURCE + * @brief Enable synchronous progress for the interface + * + * Notify the transport that it should actively progress communications during + * @ref uct_worker_progress(). + * + * When the interface is created, its progress is initially disabled. + * + * @param [in] iface The interface to enable progress. + * @param [in] flags The type of progress to enable as defined by + * @ref uct_progress_types + * + * @note This function is not thread safe with respect to + * @ref ucp_worker_progress(), unless the flag + * @ref UCT_PROGRESS_THREAD_SAFE is specified. + * + */ +UCT_INLINE_API void uct_iface_progress_enable(uct_iface_h iface, unsigned flags) +{ + iface->ops.iface_progress_enable(iface, flags); +} + + +/** + * @ingroup UCT_RESOURCE + * @brief Disable synchronous progress for the interface + * + * Notify the transport that it should not progress its communications during + * @ref uct_worker_progress(). Thus the latency of other transports may be + * improved. + * + * By default, progress is disabled when the interface is created. + * + * @param [in] iface The interface to disable progress. + * @param [in] flags The type of progress to disable as defined by + * @ref uct_progress_types. + * + * @note This function is not thread safe with respect to + * @ref ucp_worker_progress(), unless the flag + * @ref UCT_PROGRESS_THREAD_SAFE is specified. + * + */ +UCT_INLINE_API void uct_iface_progress_disable(uct_iface_h iface, unsigned flags) +{ + iface->ops.iface_progress_disable(iface, flags); +} + + +/** + * @ingroup UCT_RESOURCE + * @brief Perform a progress on an interface. + */ +UCT_INLINE_API unsigned uct_iface_progress(uct_iface_h iface) +{ + return iface->ops.iface_progress(iface); +} + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Open a connection manager. + * + * Open a connection manager. All client server connection + * establishment operations are performed in the context of a specific + * connection manager. + * @note This is an alternative API for + * @ref uct_iface_open_mode::UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER and + * @ref uct_iface_open_mode::UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT . + * + * @param [in] component Component on which to open the connection manager, + * as returned from @ref uct_query_components. + * @param [in] worker Worker on which to open the connection manager. + * @param [in] config CM configuration options. Either obtained + * from @ref uct_cm_config_read() function, or pointer + * to CM-specific structure that extends + * @ref uct_cm_config_t. + * @param [out] cm_p Filled with a handle to the connection manager. + * + * @return Error code. + */ +ucs_status_t uct_cm_open(uct_component_h component, uct_worker_h worker, + const uct_cm_config_t *config, uct_cm_h *cm_p); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Close a connection manager. + * + * @param [in] cm Connection manager to close. + */ +void uct_cm_close(uct_cm_h cm); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Get connection manager attributes. + * + * This routine queries the @ref uct_cm_h "cm" for its attributes + * @ref uct_cm_attr_t. + * + * @param [in] cm Connection manager to query. + * @param [out] cm_attr Filled with connection manager attributes. + */ +ucs_status_t uct_cm_query(uct_cm_h cm, uct_cm_attr_t *cm_attr); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Read the configuration for a connection manager. + * + * @param [in] component Read the configuration of the connection manager + * on this component. + * @param [in] env_prefix If non-NULL, search for environment variables + * starting with this UCT__. Otherwise, search + * for environment variables starting with just UCT_. + * @param [in] filename If non-NULL, read configuration from this file. If + * the file does not exist, or exists but cannot be + * opened or read, it will be ignored. + * @param [out] config_p Filled with a pointer to the configuration. + * + * @return Error code. + */ +ucs_status_t uct_cm_config_read(uct_component_h component, + const char *env_prefix, const char *filename, + uct_cm_config_t **config_p); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Create a new transport listener object. + * + * This routine creates a new listener on the given CM which will start + * listening on a given sockaddr. + * + * @param [in] cm Connection manager on which to open the listener. + * This cm should not be closed as long as there are + * open listeners on it. + * @param [in] saddr The socket address to listen on. + * @param [in] socklen The saddr length. + * @param [in] params User defined @ref uct_listener_params_t + * configurations for the @a listener_p. + * @param [out] listener_p Filled with handle to the new listener. + * + * @return Error code. + */ +ucs_status_t uct_listener_create(uct_cm_h cm, const struct sockaddr *saddr, + socklen_t socklen, + const uct_listener_params_t *params, + uct_listener_h *listener_p); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Destroy a transport listener. + * + * @param [in] listener Listener to destroy. + */ +void uct_listener_destroy(uct_listener_h listener); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Reject a connection request. + * + * This routine can be invoked on the server side. It rejects a connection request + * from the client. + * + * @param [in] listener Listener which will reject the connection request. + * @param [in] conn_request Connection establishment request passed as parameter + * of @ref uct_listener_conn_request_callback_t. + * + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t uct_listener_reject(uct_listener_h listener, + uct_conn_request_h conn_request); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Get attributes specific to a particular listener. + * + * This routine queries the @ref uct_listener_h "listener" for its attributes + * @ref uct_listener_attr_t. + * + * @param [in] listener Listener object to query. + * @param [out] listener_attr Filled with attributes of the listener. + * + * @return Error code as defined by @ref ucs_status_t + */ +ucs_status_t uct_listener_query(uct_listener_h listener, + uct_listener_attr_t *listener_attr); + + +/** + * @example uct_hello_world.c + * UCT hello world client / server example utility. + */ + +END_C_DECLS + +#endif diff --git a/src/uct/api/uct_def.h b/src/uct/api/uct_def.h new file mode 100644 index 0000000..13bb604 --- /dev/null +++ b/src/uct/api/uct_def.h @@ -0,0 +1,601 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_DEF_H_ +#define UCT_DEF_H_ + +#include +#include + +#include +#include +#include + + +#define UCT_COMPONENT_NAME_MAX 16 +#define UCT_TL_NAME_MAX 10 +#define UCT_MD_NAME_MAX 16 +#define UCT_DEVICE_NAME_MAX 32 +#define UCT_PENDING_REQ_PRIV_LEN 40 +#define UCT_TAG_PRIV_LEN 32 +#define UCT_AM_ID_BITS 5 +#define UCT_AM_ID_MAX UCS_BIT(UCT_AM_ID_BITS) +#define UCT_MEM_HANDLE_NULL NULL +#define UCT_INVALID_RKEY ((uintptr_t)(-1)) +#define UCT_INLINE_API static UCS_F_ALWAYS_INLINE + + +/** + * @ingroup UCT_AM + * @brief Trace types for active message tracer. + */ +enum uct_am_trace_type { + UCT_AM_TRACE_TYPE_SEND, + UCT_AM_TRACE_TYPE_RECV, + UCT_AM_TRACE_TYPE_SEND_DROP, + UCT_AM_TRACE_TYPE_RECV_DROP, + UCT_AM_TRACE_TYPE_LAST +}; + + +/** + * @ingroup UCT_RESOURCE + * @brief Flags for active message and tag-matching offload callbacks (callback's parameters). + * + * If UCT_CB_PARAM_FLAG_DESC flag is enabled, then data is part of a descriptor + * which includes the user-defined rx_headroom, and the callback may return + * UCS_INPROGRESS and hold on to that descriptor. Otherwise, the data can't be + * used outside the callback. If needed, the data must be copied-out. + * + @verbatim + descriptor data + | | + +-------------+-------------------------+ + | rx_headroom | payload | + +-------------+-------------------------+ + @endverbatim + * + * UCT_CB_PARAM_FLAG_FIRST and UCT_CB_PARAM_FLAG_MORE flags are relevant for + * @ref uct_tag_unexp_eager_cb_t callback only. The former value indicates that + * the data is the first fragment of the message. The latter value means that + * more fragments of the message yet to be delivered. + */ +enum uct_cb_param_flags { + UCT_CB_PARAM_FLAG_DESC = UCS_BIT(0), + UCT_CB_PARAM_FLAG_FIRST = UCS_BIT(1), + UCT_CB_PARAM_FLAG_MORE = UCS_BIT(2) +}; + +/** + * @addtogroup UCT_RESOURCE + * @{ + */ +typedef struct uct_component *uct_component_h; +typedef struct uct_iface *uct_iface_h; +typedef struct uct_iface_config uct_iface_config_t; +typedef struct uct_md_config uct_md_config_t; +typedef struct uct_cm_config uct_cm_config_t; +typedef struct uct_ep *uct_ep_h; +typedef void * uct_mem_h; +typedef uintptr_t uct_rkey_t; +typedef struct uct_md *uct_md_h; /**< @brief Memory domain handler */ +typedef struct uct_md_ops uct_md_ops_t; +typedef void *uct_rkey_ctx_h; +typedef struct uct_iface_attr uct_iface_attr_t; +typedef struct uct_iface_params uct_iface_params_t; +typedef struct uct_md_attr uct_md_attr_t; +typedef struct uct_completion uct_completion_t; +typedef struct uct_pending_req uct_pending_req_t; +typedef struct uct_worker *uct_worker_h; +typedef struct uct_md uct_md_t; +typedef enum uct_am_trace_type uct_am_trace_type_t; +typedef struct uct_device_addr uct_device_addr_t; +typedef struct uct_iface_addr uct_iface_addr_t; +typedef struct uct_ep_addr uct_ep_addr_t; +typedef struct uct_ep_params uct_ep_params_t; +typedef struct uct_cm_attr uct_cm_attr_t; +typedef struct uct_cm uct_cm_t; +typedef uct_cm_t *uct_cm_h; +typedef struct uct_listener_attr uct_listener_attr_t; +typedef struct uct_listener *uct_listener_h; +typedef struct uct_listener_params uct_listener_params_t; +typedef struct uct_tag_context uct_tag_context_t; +typedef uint64_t uct_tag_t; /* tag type - 64 bit */ +typedef int uct_worker_cb_id_t; +typedef void* uct_conn_request_h; + +/** + * @} + */ + + +/** + * @ingroup UCT_RESOURCE + * @brief Structure for scatter-gather I/O. + * + * Specifies a list of buffers which can be used within a single data transfer + * function call. + * + @verbatim + buffer + | + +-----------+-------+-----------+-------+-----------+ + | payload | empty | payload | empty | payload | + +-----------+-------+-----------+-------+-----------+ + |<-length-->| |<-length-->| |<-length-->| + |<---- stride ----->|<---- stride ----->| + @endverbatim + * + * @note The sum of lengths in all iov list must be less or equal to max_zcopy + * of the respective communication operation. + * @note If @a length or @a count are zero, the memory pointed to by @a buffer + * will not be accessed. Otherwise, @a buffer must point to valid memory. + * + * @note If @a count is one, every iov entry specifies a single contiguous data block + * + * @note If @a count > 1, each iov entry specifies a strided block of @a count + * elements and distance of @a stride byte between consecutive elements + * + */ +typedef struct uct_iov { + void *buffer; /**< Data buffer */ + size_t length; /**< Length of the payload in bytes */ + uct_mem_h memh; /**< Local memory key descriptor for the data */ + size_t stride; /**< Stride between beginnings of payload elements in + the buffer in bytes */ + unsigned count; /**< Number of payload elements in the buffer */ +} uct_iov_t; + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Remote data attributes field mask. + * + * The enumeration allows specifying which fields in @ref uct_cm_remote_data are + * present, for backward compatibility support. + */ +enum uct_cm_remote_data_field { + /** Enables @ref uct_cm_remote_data::dev_addr */ + UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR = UCS_BIT(0), + + /** Enables @ref uct_cm_remote_data::dev_addr_length */ + UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR_LENGTH = UCS_BIT(1), + + /** Enables @ref uct_cm_remote_data::conn_priv_data */ + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA = UCS_BIT(2), + + /** Enables @ref uct_cm_remote_data::conn_priv_data_length */ + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA_LENGTH = UCS_BIT(3) +}; + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Data received from the remote peer. + * + * The remote peer's device address, the data received from it and their lengths. + * Used with the client-server API on a connection manager. + */ +typedef struct uct_cm_remote_data { + /** + * Mask of valid fields in this structure, using bits from + * @ref uct_cm_remote_data_field. Fields not specified by this mask + * will be ignored. + */ + uint64_t field_mask; + + /** + * Device address of the remote peer. + */ + const uct_device_addr_t *dev_addr; + + /** + * Length of the remote device address. + */ + size_t dev_addr_length; + + /** + * Pointer to the received data. This is the private data that was passed to + * @ref uct_ep_params_t::sockaddr_pack_cb. + */ + const void *conn_priv_data; + + /** + * Length of the received data from the peer. + */ + size_t conn_priv_data_length; +} uct_cm_remote_data_t; + + +/** + * @ingroup UCT_AM + * @brief Callback to process incoming active message + * + * When the callback is called, @a flags indicates how @a data should be handled. + * If @a flags contain @ref UCT_CB_PARAM_FLAG_DESC value, it means @a data is part of + * a descriptor which must be released later by @ref uct_iface_release_desc by + * the user if the callback returns @ref UCS_INPROGRESS. + * + * @param [in] arg User-defined argument. + * @param [in] data Points to the received data. This may be a part of + * a descriptor which may be released later. + * @param [in] length Length of data. + * @param [in] flags Mask with @ref uct_cb_param_flags + * + * @note This callback could be set and released + * by @ref uct_iface_set_am_handler function. + * + * @retval UCS_OK - descriptor was consumed, and can be released + * by the caller. + * @retval UCS_INPROGRESS - descriptor is owned by the callee, and would be + * released later. Supported only if @a flags contain + * @ref UCT_CB_PARAM_FLAG_DESC value. Otherwise, this is + * an error. + * + */ +typedef ucs_status_t (*uct_am_callback_t)(void *arg, void *data, size_t length, + unsigned flags); + + +/** + * @ingroup UCT_AM + * @brief Callback to trace active messages. + * + * Writes a string which represents active message contents into 'buffer'. + * + * @param [in] arg User-defined argument. + * @param [in] type Message type. + * @param [in] id Active message id. + * @param [in] data Points to the received data. + * @param [in] length Length of data. + * @param [out] buffer Filled with a debug information string. + * @param [in] max Maximal length of the string. + */ +typedef void (*uct_am_tracer_t)(void *arg, uct_am_trace_type_t type, uint8_t id, + const void *data, size_t length, char *buffer, + size_t max); + + +/** + * @ingroup UCT_RESOURCE + * @brief Callback to process send completion. + * + * @param [in] self Pointer to relevant completion structure, which was + * initially passed to the operation. + * @param [in] status Status of send action, possibly indicating an error. + */ +typedef void (*uct_completion_callback_t)(uct_completion_t *self, + ucs_status_t status); + + +/** + * @ingroup UCT_RESOURCE + * @brief Callback to process pending requests. + * + * @param [in] self Pointer to relevant pending structure, which was + * initially passed to the operation. + * + * @return @ref UCS_OK - This pending request has completed and + * should be removed. + * @ref UCS_INPROGRESS - Some progress was made, but not completed. + * Keep this request and keep processing the queue. + * Otherwise - Could not make any progress. Keep this pending + * request on the queue, and stop processing the queue. + */ +typedef ucs_status_t (*uct_pending_callback_t)(uct_pending_req_t *self); + + +/** + * @ingroup UCT_RESOURCE + * @brief Callback to process peer failure. + * + * @param [in] arg User argument to be passed to the callback. + * @param [in] ep Endpoint which has failed. Upon return from the callback, + * this @a ep is no longer usable and all subsequent + * operations on this @a ep will fail with the error code + * passed in @a status. + * @param [in] status Status indicating error. + * + * @return @ref UCS_OK - The error was handled successfully. + * Otherwise - The error was not handled and is returned back to + * the transport. + */ +typedef ucs_status_t (*uct_error_handler_t)(void *arg, uct_ep_h ep, + ucs_status_t status); + + +/** + * @ingroup UCT_RESOURCE + * @brief Callback to purge pending requests. + * + * @param [in] self Pointer to relevant pending structure, which was + * initially passed to the operation. + * @param [in] arg User argument to be passed to the callback. + */ +typedef void (*uct_pending_purge_callback_t)(uct_pending_req_t *self, + void *arg); + +/** + * @ingroup UCT_RESOURCE + * @brief Callback for producing data. + * + * @param [in] dest Memory buffer to pack the data to. + * @param [in] arg Custom user-argument. + * + * @return Size of the data was actually produced. + */ +typedef size_t (*uct_pack_callback_t)(void *dest, void *arg); + + +/** + * @ingroup UCT_RESOURCE + * @brief Callback for consuming data. + * + * @param [in] arg Custom user-argument. + * @param [in] data Memory buffer to unpack the data from. + * @param [in] length How much data to consume (size of "data") + * + * @note The arguments for this callback are in the same order as libc's memcpy(). + */ +typedef void (*uct_unpack_callback_t)(void *arg, const void *data, size_t length); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Callback to process an incoming connection request on the server side. + * + * This callback routine will be invoked on the server side upon receiving an + * incoming connection request. It should be set by the server side while + * initializing an interface. + * Incoming data is placed inside the conn_priv_data buffer. + * This callback has to be thread safe. + * Other than communication progress routines, it is allowed to call other UCT + * communication routines from this callback. + * + * @param [in] iface Transport interface. + * @param [in] arg User defined argument for this callback. + * @param [in] conn_request Transport level connection request. The user + * should accept or reject the request by calling + * @ref uct_iface_accept or @ref uct_iface_reject + * routines respectively. + * conn_request should not be used outside the + * scope of this callback. + * @param [in] conn_priv_data Points to the received data. + * This is the private data that was passed to the + * @ref uct_ep_params_t::sockaddr_pack_cb on the + * client side. + * @param [in] length Length of the received data. + * + */ +typedef void +(*uct_sockaddr_conn_request_callback_t)(uct_iface_h iface, void *arg, + uct_conn_request_h conn_request, + const void *conn_priv_data, + size_t length); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Callback to process an incoming connection request on the server side + * listener in a connection manager. + * + * This callback routine will be invoked on the server side upon receiving an + * incoming connection request. It should be set by the server side while + * initializing a listener in a connection manager. + * This callback has to be thread safe. + * Other than communication progress routines, it is allowed to call other UCT + * communication routines from this callback. + * + * @param [in] listener Transport listener. + * @param [in] arg User argument for this callback as defined in + * @ref uct_listener_params_t::user_data + * @param [in] local_dev_name Device name which handles the incoming connection + * request. + * @param [in] conn_request Connection request handle. Can be passed to this + * callback from the transport and will be used + * by it to accept or reject the connection request + * from the client. + * @param [in] remote_data Remote data from the client. + * + */ +typedef void +(*uct_listener_conn_request_callback_t)(uct_listener_h listener, void *arg, + const char *local_dev_name, + uct_conn_request_h conn_request, + const uct_cm_remote_data_t *remote_data); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Callback to process an incoming connection establishment acknowledgment + * on the server side listener, from the client, which indicates that the + * client side is connected. + * + * This callback routine will be invoked on the server side upon receiving an + * incoming connection establishment acknowledgment from the client, which is sent + * from it once the client is connected to the server. Used to connect the server + * side to the client or handle an error from it - depending on the status field. + * This callback has to be thread safe. + * Other than communication progress routines, it is allowed to call other UCT + * communication routines from this callback. + * + * @param [in] ep Transport endpoint. + * @param [in] arg User argument for this callback as defined in + * @ref uct_ep_params_t::user_data + * @param [in] status Indicates the client's status. + */ +typedef void (*uct_ep_server_connect_cb_t)(uct_ep_h ep, void *arg, + ucs_status_t status); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Callback to process an incoming connection response on the client side + * from the server. + * + * This callback routine will be invoked on the client side upon receiving an + * incoming connection response from the server. Used to connect the client side + * to the server or handle an error from it - depending on the status field. + * This callback has to be thread safe. + * Other than communication progress routines, it is allowed to call other UCT + * communication routines from this callback. + * + * @param [in] ep Transport endpoint. + * @param [in] arg User argument for this callback as defined in + * @ref uct_ep_params_t::user_data. + * @param [in] remote_data Remote data from the server. + * @param [in] status Indicates the server's status. + */ +typedef void (*uct_ep_client_connect_cb_t)(uct_ep_h ep, void *arg, + const uct_cm_remote_data_t *remote_data, + ucs_status_t status); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Callback to handle the disconnection of the remote peer. + * + * This callback routine will be invoked on the client and server sides upon + * a disconnect of the remote peer. It will disconnect the given endpoint from + * the remote peer. + * This callback won't be invoked if @ref uct_ep_disconnect was called locally + * with a completion that is not NULL. + * This callback has to be thread safe. + * Other than communication progress routines, it is allowed to call other UCT + * communication routines from this callback. + * + * @param [in] ep Transport endpoint to disconnect. + * @param [in] arg User argument for this callback as defined in + * @ref uct_ep_params_t::user_data. + */ +typedef void (*uct_ep_disconnect_cb_t)(uct_ep_h ep, void *arg); + + +/** + * @ingroup UCT_CLIENT_SERVER + * @brief Callback to fill the user's private data in a client-server flow. + * + * This callback routine will be invoked on the client side, before sending the + * transport's connection request to the server, or on the server side before + * sending a connection response to the client. + * The callback routine must be set when creating an endpoint. + * The user's private data should be placed inside the priv_data buffer to be + * sent to the remote side. + * The maximal allowed length of the private data is indicated by the field + * max_conn_priv inside @ref uct_iface_attr or inside @ref uct_cm_attr when using a + * connection manager. + * Communication progress routines should not be called from this callback. + * It is allowed to call other UCT communication routines from this callback. + * + * @param [in] arg User defined argument for this callback. + * @param [in] dev_name Device name. This routine may fill the user's private + * data according to the given device name. + * The device name that is passed to this routine, + * corresponds to the dev_name field inside + * @ref uct_tl_resource_desc_t as returned from + * @ref uct_md_query_tl_resources. + * @param [out] priv_data User's private data to be passed to the remote side. + * + * @return Negative value indicates an error according to @ref ucs_status_t. + * On success, a non-negative value indicates actual number of + * bytes written to the @a priv_data buffer. + */ +typedef ssize_t (*uct_sockaddr_priv_pack_callback_t)(void *arg, + const char *dev_name, + void *priv_data); + + +/** + * @ingroup UCT_TAG + * @brief Callback to process unexpected eager tagged message. + * + * This callback is invoked when tagged message sent by eager protocol has + * arrived and no corresponding tag has been posted. + * + * @note The callback is always invoked from the context (thread, process) + * that called @a uct_iface_progress(). + * + * @note It is allowed to call other communication routines from the callback. + * + * @param [in] arg User-defined argument + * @param [in] data Points to the received unexpected data. + * @param [in] length Length of data. + * @param [in] flags Mask with @ref uct_cb_param_flags flags. If it + * contains @ref UCT_CB_PARAM_FLAG_DESC value, this means + * @a data is part of a descriptor which must be released + * later using @ref uct_iface_release_desc by the user if + * the callback returns @ref UCS_INPROGRESS. + * @param [in] stag Tag from sender. + * @param [in] imm Immediate data from sender. + * + * @param [inout] context Storage for a per-message user-defined context. In + * this context, the message is defined by the sender + * side as a single call to uct_ep_tag_eager_short/bcopy/zcopy. + * On the transport level the message can be fragmented + * and delivered to the target over multiple fragments. + * The fragments will preserve the original order of the + * message. Each fragment will result in invocation of + * the above callback. The user can use + * UCT_CB_PARAM_FLAG_FIRST to identify the first fragment, + * allocate the context object and use the context as a + * token that is set by the user and passed to subsequent + * callbacks of the same message. The user is responsible + * for allocation and release of the context. + * + * @note No need to allocate the context in the case of a single fragment message + * (i.e. @a flags contains @ref UCT_CB_PARAM_FLAG_FIRST, but does not + * contain @ref UCT_CB_PARAM_FLAG_MORE). + * + * @retval UCS_OK - data descriptor was consumed, and can be released + * by the caller. + * @retval UCS_INPROGRESS - data descriptor is owned by the callee, and will be + * released later. + */ +typedef ucs_status_t (*uct_tag_unexp_eager_cb_t)(void *arg, void *data, + size_t length, unsigned flags, + uct_tag_t stag, uint64_t imm, + void **context); + + +/** + * @ingroup UCT_TAG + * @brief Callback to process unexpected rendezvous tagged message. + * + * This callback is invoked when rendezvous send notification has arrived + * and no corresponding tag has been posted. + * + * @note The callback is always invoked from the context (thread, process) + * that called @a uct_iface_progress(). + * + * @note It is allowed to call other communication routines from the callback. + * + * @param [in] arg User-defined argument + * @param [in] flags Mask with @ref uct_cb_param_flags + * @param [in] stag Tag from sender. + * @param [in] header User defined header. + * @param [in] header_length User defined header length in bytes. + * @param [in] remote_addr Sender's buffer virtual address. + * @param [in] length Sender's buffer length. + * @param [in] rkey_buf Sender's buffer packed remote key. It can be + * passed to uct_rkey_unpack() to create uct_rkey_t. + * + * @warning If the user became the owner of the @a desc (by returning + * @ref UCS_INPROGRESS) the descriptor must be released later by + * @ref uct_iface_release_desc by the user. + * + * @retval UCS_OK - descriptor was consumed, and can be released + * by the caller. + * @retval UCS_INPROGRESS - descriptor is owned by the callee, and would be + * released later. + */ +typedef ucs_status_t (*uct_tag_unexp_rndv_cb_t)(void *arg, unsigned flags, + uint64_t stag, const void *header, + unsigned header_length, + uint64_t remote_addr, size_t length, + const void *rkey_buf); + + +#endif diff --git a/src/uct/api/version.h b/src/uct/api/version.h new file mode 100644 index 0000000..d6f19cd --- /dev/null +++ b/src/uct/api/version.h @@ -0,0 +1,22 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCT_VERSION_H_ +#define UCT_VERSION_H_ + +#define UCT_VERNO_MAJOR 1 +#define UCT_VERNO_MINOR 8 +#define UCT_VERNO_STRING "1.8.0" +#define UCT_SCM_VERSION "c30b7da" + +#define UCT_MINOR_BIT (16UL) +#define UCT_MAJOR_BIT (24UL) +#define UCT_API ((1L< +#include +#include + + +ucs_config_field_t uct_cm_config_table[] = { + {NULL} +}; + +ucs_status_t uct_cm_open(uct_component_h component, uct_worker_h worker, + const uct_cm_config_t *config, uct_cm_h *cm_p) +{ + return component->cm_open(component, worker, config, cm_p); +} + +void uct_cm_close(uct_cm_h cm) +{ + cm->ops->close(cm); +} + +ucs_status_t uct_cm_query(uct_cm_h cm, uct_cm_attr_t *cm_attr) +{ + return cm->ops->cm_query(cm, cm_attr); +} + +ucs_status_t uct_cm_config_read(uct_component_h component, + const char *env_prefix, const char *filename, + uct_cm_config_t **config_p) +{ + uct_config_bundle_t *bundle = NULL; + ucs_status_t status; + + status = uct_config_read(&bundle, component->cm_config.table, + component->cm_config.size, env_prefix, + component->cm_config.prefix); + if (status != UCS_OK) { + ucs_error("failed to read CM configuration"); + return status; + } + + *config_p = (uct_cm_config_t*) bundle->data; + /* coverity[leaked_storage] */ + return UCS_OK; +} + +void uct_cm_ep_client_connect_cb(uct_cm_base_ep_t *cep, + uct_cm_remote_data_t *remote_data, + ucs_status_t status) +{ + cep->client.connect_cb(&cep->super.super, cep->user_data, remote_data, status); +} + +void uct_cm_ep_server_connect_cb(uct_cm_base_ep_t *cep, ucs_status_t status) +{ + cep->server.connect_cb(&cep->super.super, cep->user_data, status); +} + +ucs_status_t uct_cm_check_ep_params(const uct_ep_params_t *params) +{ + if (!(params->field_mask & UCT_EP_PARAM_FIELD_CM)) { + ucs_error("UCT_EP_PARAM_FIELD_CM is not set. field_mask 0x%lx", + params->field_mask); + return UCS_ERR_INVALID_PARAM; + } + + if (!(params->field_mask & UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS) || + !(params->sockaddr_cb_flags & UCT_CB_FLAG_ASYNC)) { + ucs_error("UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS and UCT_CB_FLAG_ASYNC " + "should be set. field_mask 0x%lx, sockaddr_cb_flags 0x%x", + params->field_mask, params->sockaddr_cb_flags); + return UCS_ERR_UNSUPPORTED; + } + + if (!(params->field_mask & (UCT_EP_PARAM_FIELD_SOCKADDR | + UCT_EP_PARAM_FIELD_CONN_REQUEST))) { + ucs_error("neither UCT_EP_PARAM_FIELD_SOCKADDR nor " + "UCT_EP_PARAM_FIELD_CONN_REQUEST is set. field_mask 0x%lx", + params->field_mask); + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK; +} + +UCS_CLASS_INIT_FUNC(uct_cm_base_ep_t, const uct_ep_params_t *params) +{ + ucs_status_t status; + + status = uct_cm_check_ep_params(params); + if (status != UCS_OK) { + return status; + } + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, ¶ms->cm->iface); + + self->priv_pack_cb = (params->field_mask & + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB) ? + params->sockaddr_pack_cb : NULL; + self->disconnect_cb = (params->field_mask & + UCT_EP_PARAM_FIELD_SOCKADDR_DISCONNECT_CB) ? + params->disconnect_cb : NULL; + self->user_data = (params->field_mask & + UCT_EP_PARAM_FIELD_USER_DATA) ? + params->user_data : NULL; + + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(uct_cm_base_ep_t){} + +UCS_CLASS_DEFINE(uct_cm_base_ep_t, uct_base_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_cm_base_ep_t, uct_base_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_cm_base_ep_t, uct_base_ep_t); + + +UCS_CLASS_INIT_FUNC(uct_listener_t, uct_cm_h cm) +{ + self->cm = cm; + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(uct_listener_t){} + +UCS_CLASS_DEFINE(uct_listener_t, void); +UCS_CLASS_DEFINE_NEW_FUNC(uct_listener_t, void, uct_cm_h); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_listener_t, void); + +ucs_status_t uct_listener_create(uct_cm_h cm, const struct sockaddr *saddr, + socklen_t socklen, const uct_listener_params_t *params, + uct_listener_h *listener_p) +{ + if (!(params->field_mask & UCT_LISTENER_PARAM_FIELD_CONN_REQUEST_CB)) { + return UCS_ERR_INVALID_PARAM; + } + + return cm->ops->listener_create(cm, saddr, socklen, params, listener_p); +} + +void uct_listener_destroy(uct_listener_h listener) +{ + listener->cm->ops->listener_destroy(listener); +} + +ucs_status_t uct_listener_query(uct_listener_h listener, + uct_listener_attr_t *listener_attr) +{ + return listener->cm->ops->listener_query(listener, listener_attr); +} + +ucs_status_t uct_listener_reject(uct_listener_h listener, + uct_conn_request_h conn_request) +{ + return listener->cm->ops->listener_reject(listener, conn_request); +} + + +#if ENABLE_STATS +static ucs_stats_class_t uct_cm_stats_class = { + .name = "rdmacm_cm", + .num_counters = 0 +}; +#endif + +UCS_CLASS_INIT_FUNC(uct_cm_t, uct_cm_ops_t* ops, uct_iface_ops_t* iface_ops, + uct_worker_h worker, uct_component_h component) +{ + self->ops = ops; + self->component = component; + self->iface.super.ops = *iface_ops; + self->iface.worker = ucs_derived_of(worker, uct_priv_worker_t); + + self->iface.md = NULL; + self->iface.am->arg = NULL; + self->iface.am->flags = 0; + self->iface.am->cb = (uct_am_callback_t)ucs_empty_function_return_unsupported; + self->iface.am_tracer = NULL; + self->iface.am_tracer_arg = NULL; + self->iface.err_handler = NULL; + self->iface.err_handler_arg = NULL; + self->iface.err_handler_flags = 0; + self->iface.prog.id = UCS_CALLBACKQ_ID_NULL; + self->iface.prog.refcount = 0; + self->iface.progress_flags = 0; + + return UCS_STATS_NODE_ALLOC(&self->iface.stats, &uct_cm_stats_class, + ucs_stats_get_root(), "%s-%p", "iface", + self->iface); +} + +UCS_CLASS_CLEANUP_FUNC(uct_cm_t) +{ + UCS_STATS_NODE_FREE(self->iface.stats); +} + +UCS_CLASS_DEFINE(uct_cm_t, void); +UCS_CLASS_DEFINE_NEW_FUNC(uct_cm_t, void, uct_cm_ops_t*, uct_iface_ops_t*, + uct_worker_h, uct_component_h); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_cm_t, void); diff --git a/src/uct/base/uct_cm.h b/src/uct/base/uct_cm.h new file mode 100644 index 0000000..2fb10fd --- /dev/null +++ b/src/uct/base/uct_cm.h @@ -0,0 +1,101 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_CM_H_ +#define UCT_CM_H_ + +#include +#include +#include +#include + + +UCS_CLASS_DECLARE(uct_listener_t, uct_cm_h); + +/** + * "Base" structure which defines CM configuration options. + * Specific CMs extend this structure. + */ +struct uct_cm_config { + /* C standard prohibits empty structures */ + char __dummy; +}; + +/** + * Connection manager component operations + */ +typedef struct uct_cm_ops { + void (*close)(uct_cm_h cm); + ucs_status_t (*cm_query)(uct_cm_h cm, uct_cm_attr_t *cm_attr); + ucs_status_t (*listener_create)(uct_cm_h cm, const struct sockaddr *saddr, + socklen_t socklen, + const uct_listener_params_t *params, + uct_listener_h *listener_p); + ucs_status_t (*listener_reject)(uct_listener_h listener, + uct_conn_request_h conn_request); + ucs_status_t (*listener_query) (uct_listener_h listener, + uct_listener_attr_t *listener_attr); + void (*listener_destroy)(uct_listener_h listener); + ucs_status_t (*ep_create)(const uct_ep_params_t *params, uct_ep_h *ep_p); +} uct_cm_ops_t; + + +struct uct_cm { + uct_cm_ops_t *ops; + uct_component_h component; + uct_base_iface_t iface; +}; + + +/** + * Connection manager base endpoint + */ +typedef struct uct_cm_base_ep { + uct_base_ep_t super; + + /* User data associated with the endpoint */ + void *user_data; + + /* Callback to handle the disconnection of the remote peer */ + uct_ep_disconnect_cb_t disconnect_cb; + + /* Callback to fill the user's private data */ + uct_sockaddr_priv_pack_callback_t priv_pack_cb; + + union { + struct { + /* On the client side - callback to process an incoming + * connection response from the server */ + uct_ep_client_connect_cb_t connect_cb; + } client; + struct { + /* On the server side - callback to process an incoming connection + * establishment acknowledgment from the client */ + uct_ep_server_connect_cb_t connect_cb; + } server; + }; +} uct_cm_base_ep_t; + + +UCS_CLASS_DECLARE(uct_cm_base_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_NEW_FUNC(uct_cm_base_ep_t, uct_base_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_cm_base_ep_t, uct_base_ep_t); + + +extern ucs_config_field_t uct_cm_config_table[]; + +UCS_CLASS_DECLARE(uct_cm_t, uct_cm_ops_t*, uct_iface_ops_t*, uct_worker_h, + uct_component_h); + +ucs_status_t uct_cm_check_ep_params(const uct_ep_params_t *params); + +void uct_cm_ep_client_connect_cb(uct_cm_base_ep_t *cep, + uct_cm_remote_data_t *remote_data, + ucs_status_t status); + +void uct_cm_ep_server_connect_cb(uct_cm_base_ep_t *cep, ucs_status_t status); + +#endif /* UCT_CM_H_ */ diff --git a/src/uct/base/uct_component.c b/src/uct/base/uct_component.c new file mode 100644 index 0000000..b5b453c --- /dev/null +++ b/src/uct/base/uct_component.c @@ -0,0 +1,130 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "uct_component.h" + +#include +#include +#include +#include +#include +#include + + +UCS_LIST_HEAD(uct_components_list); + +ucs_status_t uct_query_components(uct_component_h **components_p, + unsigned *num_components_p) +{ + UCS_MODULE_FRAMEWORK_DECLARE(uct); + uct_component_h *components; + uct_component_t *component; + size_t num_components; + + UCS_MODULE_FRAMEWORK_LOAD(uct, 0); + num_components = ucs_list_length(&uct_components_list); + components = ucs_malloc(num_components * sizeof(*components), + "uct_components"); + if (components == NULL) { + return UCS_ERR_NO_MEMORY; + } + + ucs_assert(num_components < UINT_MAX); + *num_components_p = num_components; + *components_p = components; + + ucs_list_for_each(component, &uct_components_list, list) { + *(components++) = component; + } + + return UCS_OK; +} + +void uct_release_component_list(uct_component_h *components) +{ + ucs_free(components); +} + +ucs_status_t uct_component_query(uct_component_h component, + uct_component_attr_t *component_attr) +{ + uct_md_resource_desc_t *resources = NULL; + unsigned num_resources = 0; + ucs_status_t status; + + if (component_attr->field_mask & (UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT| + UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES)) { + status = component->query_md_resources(component, &resources, + &num_resources); + if (status != UCS_OK) { + return status; + } + + ucs_assertv((num_resources == 0) || (resources != NULL), + "component=%s", component->name); + } + + if (component_attr->field_mask & UCT_COMPONENT_ATTR_FIELD_NAME) { + ucs_snprintf_zero(component_attr->name, sizeof(component_attr->name), + "%s", component->name); + } + + if (component_attr->field_mask & UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT) { + component_attr->md_resource_count = num_resources; + + } + + if ((resources != NULL) && + (component_attr->field_mask & UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES)) + { + memcpy(component_attr->md_resources, resources, + sizeof(uct_md_resource_desc_t) * num_resources); + } + + if (component_attr->field_mask & UCT_COMPONENT_ATTR_FIELD_FLAGS) { + component_attr->flags = component->flags; + } + + ucs_free(resources); + return UCS_OK; +} + +ucs_status_t uct_config_read(uct_config_bundle_t **bundle, + ucs_config_field_t *config_table, + size_t config_size, const char *env_prefix, + const char *cfg_prefix) +{ + uct_config_bundle_t *config_bundle; + ucs_status_t status; + + config_bundle = ucs_calloc(1, sizeof(*config_bundle) + config_size, "uct_config"); + if (config_bundle == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + /* TODO use env_prefix */ + status = ucs_config_parser_fill_opts(config_bundle->data, config_table, + env_prefix, cfg_prefix, 0); + if (status != UCS_OK) { + goto err_free_bundle; + } + + config_bundle->table = config_table; + config_bundle->table_prefix = ucs_strdup(cfg_prefix, "uct_config"); + if (config_bundle->table_prefix == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_free_bundle; + } + + *bundle = config_bundle; + return UCS_OK; + +err_free_bundle: + ucs_free(config_bundle); +err: + return status; +} diff --git a/src/uct/base/uct_component.h b/src/uct/base/uct_component.h new file mode 100644 index 0000000..1130449 --- /dev/null +++ b/src/uct/base/uct_component.h @@ -0,0 +1,184 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + + +#ifndef UCT_COMPONENT_H_ +#define UCT_COMPONENT_H_ + +#include +#include +#include + + +/* Forward declaration */ +typedef struct uct_component uct_component_t; + + +/** + * Keeps information about allocated configuration structure, to be used when + * releasing the options. + */ +typedef struct uct_config_bundle { + ucs_config_field_t *table; + const char *table_prefix; + char data[]; +} uct_config_bundle_t; + + +/** + * Component method to query component memory domain resources. + * + * @param [in] component Query memory domain resources for this + * component. + * @param [out] resources_p Filled with a pointer to an array of + * memory domain resources, which should be + * released with ucs_free(). + * @param [out] num_resources_p Filled with the number of memory domain + * resource entries in the array. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_component_query_md_resources_func_t)( + uct_component_t *component, uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p); + + +/** + * Component method to open a memory domain. + * + * @param [in] component Open memory domain resources on this + * component. + * @param [in] md_name Name of the memory domain to open, as + * returned by + * @ref uct_component_query_resources_func_t + * @param [in] config Memory domain configuration. + * @param [out] md_p Handle to the opened memory domain. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_component_md_open_func_t)( + uct_component_t *component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p); + + +/** + * Component method to open a client/server connection manager. + * + * @param [in] component Open a connection manager on this + * component. + * @param [in] worker Open the connection manager on this worker. + * @param [in] config Connection manager configuration. + * @param [out] cm_p Filled with a handle to the connection manager. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_component_cm_open_func_t)( + uct_component_t *component, uct_worker_h worker, + const uct_cm_config_t *config, uct_cm_h *cm_p); + + +/** + * Component method to unpack a remote key buffer into a remote key object. + * + * @param [in] component Unpack the remote key buffer on this + * component. + * @param [in] rkey_buffer Remote key buffer to unpack. + * @param [in] config Memory domain configuration. + * @param [out] rkey_p Filled with a pointer to the unpacked + * remote key. + * @param [out] handle_p Filled with an additional handle which + * is used to release the remote key, but + * is not required for remote memory + * access operations. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_component_rkey_unpack_func_t)( + uct_component_t *component, const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p); + + +/** + * Component method to obtain a locally accessible pointer to a remote key. + * + * @param [in] component Get remote key memory pointer on this + * component. + * @param [in] rkey Obtain the pointer for this remote key. + * @param [in] handle Remote key handle, as returned from + * @ref uct_component_rkey_unpack_func_t. + * @param [in] remote_addr Remote address to obtain the pointer for. + * @param [out] local_addr_p Filled with the local access pointer. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_component_rkey_ptr_func_t)( + uct_component_t *component, uct_rkey_t rkey, void *handle, + uint64_t remote_addr, void **local_addr_p); + + +/** + * Component method to release an unpacked remote key. + * + * @param [in] component Release the remote key of this + * component. + * @param [in] rkey Release this remote key. + * @param [in] handle Remote key handle, as returned from + * @ref uct_component_rkey_unpack_func_t. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_component_rkey_release_func_t)( + uct_component_t *component, uct_rkey_t rkey, void *handle); + + +/** + * Defines a UCT component + */ +struct uct_component { + const char name[UCT_COMPONENT_NAME_MAX]; /**< Component name */ + uct_component_query_md_resources_func_t query_md_resources; /**< Query memory domain resources method */ + uct_component_md_open_func_t md_open; /**< Memory domain open method */ + uct_component_cm_open_func_t cm_open; /**< Connection manager open method */ + uct_component_rkey_unpack_func_t rkey_unpack; /**< Remote key unpack method */ + uct_component_rkey_ptr_func_t rkey_ptr; /**< Remote key access pointer method */ + uct_component_rkey_release_func_t rkey_release; /**< Remote key release method */ + ucs_config_global_list_entry_t md_config; /**< MD configuration entry */ + ucs_config_global_list_entry_t cm_config; /**< CM configuration entry */ + ucs_list_link_t tl_list; /**< List of transports */ + ucs_list_link_t list; /**< Entry in global list of components */ + uint64_t flags; /**< Flags as defined by + UCT_COMPONENT_FLAG_xx */ +}; + + +/** + * Register a component for usage, so it will be returned from + * @ref uct_query_components. + * + * @param [in] _component Pointer to a global component structure to register. + */ +#define UCT_COMPONENT_REGISTER(_component) \ + UCS_STATIC_INIT { \ + extern ucs_list_link_t uct_components_list; \ + ucs_list_add_tail(&uct_components_list, &(_component)->list); \ + } \ + UCS_CONFIG_REGISTER_TABLE_ENTRY(&(_component)->md_config); \ + UCS_CONFIG_REGISTER_TABLE_ENTRY(&(_component)->cm_config); \ + + +/** + * Helper macro to initialize component's transport list head. + */ +#define UCT_COMPONENT_TL_LIST_INITIALIZER(_component) \ + UCS_LIST_INITIALIZER(&(_component)->tl_list, &(_component)->tl_list) + + +ucs_status_t uct_config_read(uct_config_bundle_t **bundle, + ucs_config_field_t *config_table, + size_t config_size, const char *env_prefix, + const char *cfg_prefix); + +#endif diff --git a/src/uct/base/uct_iface.c b/src/uct/base/uct_iface.c new file mode 100644 index 0000000..9e9c8f0 --- /dev/null +++ b/src/uct/base/uct_iface.c @@ -0,0 +1,615 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "uct_iface.h" +#include "uct_cm.h" + +#include +#include +#include +#include +#include + + +#if ENABLE_STATS +static ucs_stats_class_t uct_ep_stats_class = { + .name = "uct_ep", + .num_counters = UCT_EP_STAT_LAST, + .counter_names = { + [UCT_EP_STAT_AM] = "am", + [UCT_EP_STAT_PUT] = "put", + [UCT_EP_STAT_GET] = "get", + [UCT_EP_STAT_ATOMIC] = "atomic", +#if IBV_HW_TM + [UCT_EP_STAT_TAG] = "tag", +#endif + [UCT_EP_STAT_BYTES_SHORT] = "bytes_short", + [UCT_EP_STAT_BYTES_BCOPY] = "bytes_bcopy", + [UCT_EP_STAT_BYTES_ZCOPY] = "bytes_zcopy", + [UCT_EP_STAT_NO_RES] = "no_res", + [UCT_EP_STAT_FLUSH] = "flush", + [UCT_EP_STAT_FLUSH_WAIT] = "flush_wait", + [UCT_EP_STAT_PENDING] = "pending", + [UCT_EP_STAT_FENCE] = "fence" + } +}; + +static ucs_stats_class_t uct_iface_stats_class = { + .name = "uct_iface", + .num_counters = UCT_IFACE_STAT_LAST, + .counter_names = { + [UCT_IFACE_STAT_RX_AM] = "rx_am", + [UCT_IFACE_STAT_RX_AM_BYTES] = "rx_am_bytes", + [UCT_IFACE_STAT_TX_NO_DESC] = "tx_no_desc", + [UCT_IFACE_STAT_FLUSH] = "flush", + [UCT_IFACE_STAT_FLUSH_WAIT] = "flush_wait", + [UCT_IFACE_STAT_FENCE] = "fence" + } +}; +#endif + + +static ucs_status_t uct_iface_stub_am_handler(void *arg, void *data, + size_t length, unsigned flags) +{ + const size_t dump_len = 64; + uint8_t id = (uintptr_t)arg; + char dump_str[(dump_len * 4) + 1]; /* 1234:5678\n\0 */ + + ucs_warn("got active message id %d, but no handler installed", id); + ucs_warn("payload %zu of %zu bytes:\n%s", ucs_min(length, dump_len), length, + ucs_str_dump_hex(data, ucs_min(length, dump_len), + dump_str, sizeof(dump_str), 16)); + ucs_log_print_backtrace(UCS_LOG_LEVEL_WARN); + return UCS_OK; +} + +static void uct_iface_set_stub_am_handler(uct_base_iface_t *iface, uint8_t id) +{ + iface->am[id].cb = uct_iface_stub_am_handler; + iface->am[id].arg = (void*)(uintptr_t)id; + iface->am[id].flags = UCT_CB_FLAG_ASYNC; +} + +ucs_status_t uct_iface_set_am_handler(uct_iface_h tl_iface, uint8_t id, + uct_am_callback_t cb, void *arg, + uint32_t flags) +{ + uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t); + ucs_status_t status; + uct_iface_attr_t attr; + + if (id >= UCT_AM_ID_MAX) { + ucs_error("active message id out-of-range (got: %d max: %d)", id, + (int)UCT_AM_ID_MAX); + return UCS_ERR_INVALID_PARAM; + } + + if (cb == NULL) { + uct_iface_set_stub_am_handler(iface, id); + return UCS_OK; + } + + status = uct_iface_query(tl_iface, &attr); + if (status != UCS_OK) { + return status; + } + + UCT_CB_FLAGS_CHECK(flags); + + /* If user wants a synchronous callback, it must be supported, or the + * callback could be called from another thread. + */ + if (!(flags & UCT_CB_FLAG_ASYNC) && !(attr.cap.flags & UCT_IFACE_FLAG_CB_SYNC)) { + ucs_error("Synchronous callback requested, but not supported"); + return UCS_ERR_INVALID_PARAM; + } + + iface->am[id].cb = cb; + iface->am[id].arg = arg; + iface->am[id].flags = flags; + return UCS_OK; +} + +ucs_status_t uct_iface_set_am_tracer(uct_iface_h tl_iface, uct_am_tracer_t tracer, + void *arg) +{ + uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t); + + iface->am_tracer = tracer; + iface->am_tracer_arg = arg; + return UCS_OK; +} + +void uct_iface_dump_am(uct_base_iface_t *iface, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max) +{ + if (iface->am_tracer != NULL) { + iface->am_tracer(iface->am_tracer_arg, type, id, data, length, buffer, max); + } +} + +void uct_iface_mpool_empty_warn(uct_base_iface_t *iface, ucs_mpool_t *mp) +{ + static ucs_time_t warn_time = 0; + ucs_time_t now = ucs_get_time(); + + /* Limit the rate of warning to once in 30 seconds. This gives reasonable + * indication about a deadlock without flooding with warnings messages. */ + if (warn_time == 0) { + warn_time = now; + } + if (now - warn_time > ucs_time_from_sec(30)) { + ucs_warn("Memory pool %s is empty", ucs_mpool_name(mp)); + warn_time = now; + } +} + +ucs_status_t uct_iface_query(uct_iface_h iface, uct_iface_attr_t *iface_attr) +{ + return iface->ops.iface_query(iface, iface_attr); +} + +ucs_status_t uct_iface_get_device_address(uct_iface_h iface, uct_device_addr_t *addr) +{ + return iface->ops.iface_get_device_address(iface, addr); +} + +ucs_status_t uct_iface_get_address(uct_iface_h iface, uct_iface_addr_t *addr) +{ + return iface->ops.iface_get_address(iface, addr); +} + +int uct_iface_is_reachable(const uct_iface_h iface, const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + return iface->ops.iface_is_reachable(iface, dev_addr, iface_addr); +} + +ucs_status_t uct_ep_check(const uct_ep_h ep, unsigned flags, + uct_completion_t *comp) +{ + return ep->iface->ops.ep_check(ep, flags, comp); +} + +ucs_status_t uct_iface_event_fd_get(uct_iface_h iface, int *fd_p) +{ + return iface->ops.iface_event_fd_get(iface, fd_p); +} + +ucs_status_t uct_iface_event_arm(uct_iface_h iface, unsigned events) +{ + return iface->ops.iface_event_arm(iface, events); +} + +void uct_iface_close(uct_iface_h iface) +{ + iface->ops.iface_close(iface); +} + +void uct_base_iface_progress_enable(uct_iface_h tl_iface, unsigned flags) +{ + uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t); + uct_base_iface_progress_enable_cb(iface, + (ucs_callback_t)iface->super.ops.iface_progress, + flags); +} + +void uct_base_iface_progress_enable_cb(uct_base_iface_t *iface, + ucs_callback_t cb, unsigned flags) +{ + uct_priv_worker_t *worker = iface->worker; + unsigned thread_safe; + + UCS_ASYNC_BLOCK(worker->async); + + thread_safe = flags & UCT_PROGRESS_THREAD_SAFE; + flags &= ~UCT_PROGRESS_THREAD_SAFE; + + /* Add callback only if previous flags are 0 and new flags != 0 */ + if ((!iface->progress_flags && flags) && + (iface->prog.id == UCS_CALLBACKQ_ID_NULL)) { + if (thread_safe) { + iface->prog.id = ucs_callbackq_add_safe(&worker->super.progress_q, + cb, iface, + UCS_CALLBACKQ_FLAG_FAST); + } else { + iface->prog.id = ucs_callbackq_add(&worker->super.progress_q, cb, + iface, UCS_CALLBACKQ_FLAG_FAST); + } + } + iface->progress_flags |= flags; + + UCS_ASYNC_UNBLOCK(worker->async); +} + +void uct_base_iface_progress_disable(uct_iface_h tl_iface, unsigned flags) +{ + uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t); + uct_priv_worker_t *worker = iface->worker; + unsigned thread_safe; + + UCS_ASYNC_BLOCK(worker->async); + + thread_safe = flags & UCT_PROGRESS_THREAD_SAFE; + flags &= ~UCT_PROGRESS_THREAD_SAFE; + + /* Remove callback only if previous flags != 0, and removing the given + * flags makes it become 0. + */ + if ((iface->progress_flags && !(iface->progress_flags & ~flags)) && + (iface->prog.id != UCS_CALLBACKQ_ID_NULL)) { + if (thread_safe) { + ucs_callbackq_remove_safe(&worker->super.progress_q, iface->prog.id); + } else { + ucs_callbackq_remove(&worker->super.progress_q, iface->prog.id); + } + iface->prog.id = UCS_CALLBACKQ_ID_NULL; + } + iface->progress_flags &= ~flags; + + UCS_ASYNC_UNBLOCK(worker->async); +} + +ucs_status_t uct_base_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + UCT_TL_IFACE_STAT_FLUSH(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_OK; +} + +ucs_status_t uct_base_iface_fence(uct_iface_h tl_iface, unsigned flags) +{ + UCT_TL_IFACE_STAT_FENCE(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_OK; +} + +ucs_status_t uct_base_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + UCT_TL_EP_STAT_FLUSH(ucs_derived_of(tl_ep, uct_base_ep_t)); + return UCS_OK; +} + +ucs_status_t uct_base_ep_fence(uct_ep_h tl_ep, unsigned flags) +{ + UCT_TL_EP_STAT_FENCE(ucs_derived_of(tl_ep, uct_base_ep_t)); + return UCS_OK; +} + +static void uct_ep_failed_purge_cb(uct_pending_req_t *self, void *arg) +{ + uct_pending_req_queue_push((ucs_queue_head_t*)arg, self); +} + +static void uct_ep_failed_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, + void *arg) +{ + uct_failed_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_failed_iface_t); + uct_pending_req_t *req; + + ucs_queue_for_each_extract(req, &iface->pend_q, priv, 1) { + if (cb != NULL) { + cb(req, arg); + } else { + ucs_warn("ep=%p cancelling user pending request %p", tl_ep, req); + } + } +} + +static void uct_ep_failed_destroy(uct_ep_h tl_ep) +{ + /* Warn user if some pending reqs left*/ + uct_ep_failed_purge (tl_ep, NULL, NULL); + + ucs_free(tl_ep->iface); + ucs_free(tl_ep); +} + +ucs_status_t uct_set_ep_failed(ucs_class_t *cls, uct_ep_h tl_ep, + uct_iface_h tl_iface, ucs_status_t status) +{ + uct_failed_iface_t *f_iface; + uct_iface_ops_t *ops; + uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t); + + ucs_debug("set ep %p to failed state", tl_ep); + + /* TBD: consider allocating one instance per interface + * rather than for each endpoint */ + f_iface = ucs_malloc(sizeof(*f_iface), "failed iface"); + if (f_iface == NULL) { + ucs_error("Could not create failed iface (nomem)"); + return status; + } + + ucs_queue_head_init(&f_iface->pend_q); + ops = &f_iface->super.ops; + + /* Move all pending requests to the queue. + * Failed ep will use that queue for purge. */ + uct_ep_pending_purge(tl_ep, uct_ep_failed_purge_cb, &f_iface->pend_q); + + ops->ep_put_short = (uct_ep_put_short_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_put_bcopy = (uct_ep_put_bcopy_func_t)ucs_empty_function_return_bc_ep_timeout; + ops->ep_put_zcopy = (uct_ep_put_zcopy_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_get_short = (uct_ep_get_short_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_get_bcopy = (uct_ep_get_bcopy_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_get_zcopy = (uct_ep_get_zcopy_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_am_short = (uct_ep_am_short_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_am_bcopy = (uct_ep_am_bcopy_func_t)ucs_empty_function_return_bc_ep_timeout; + ops->ep_am_zcopy = (uct_ep_am_zcopy_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_atomic_cswap64 = (uct_ep_atomic_cswap64_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_atomic_cswap32 = (uct_ep_atomic_cswap32_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_atomic64_post = (uct_ep_atomic64_post_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_atomic32_post = (uct_ep_atomic32_post_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_atomic64_fetch = (uct_ep_atomic64_fetch_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_atomic32_fetch = (uct_ep_atomic32_fetch_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_tag_eager_short = (uct_ep_tag_eager_short_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_tag_eager_bcopy = (uct_ep_tag_eager_bcopy_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_tag_eager_zcopy = (uct_ep_tag_eager_zcopy_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_tag_rndv_zcopy = (uct_ep_tag_rndv_zcopy_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_tag_rndv_cancel = (uct_ep_tag_rndv_cancel_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_tag_rndv_request = (uct_ep_tag_rndv_request_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_pending_add = (uct_ep_pending_add_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_pending_purge = uct_ep_failed_purge; + ops->ep_flush = (uct_ep_flush_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_fence = (uct_ep_fence_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_check = (uct_ep_check_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_connect_to_ep = (uct_ep_connect_to_ep_func_t)ucs_empty_function_return_ep_timeout; + ops->ep_destroy = uct_ep_failed_destroy; + ops->ep_get_address = (uct_ep_get_address_func_t)ucs_empty_function_return_ep_timeout; + + ucs_class_call_cleanup_chain(cls, tl_ep, -1); + + tl_ep->iface = &f_iface->super; + + if (iface->err_handler) { + return iface->err_handler(iface->err_handler_arg, tl_ep, status); + } else if (status == UCS_ERR_CANCELED) { + ucs_debug("error %s was suppressed for ep %p", + ucs_status_string(UCS_ERR_CANCELED), tl_ep); + /* Suppress this since the cancellation is initiated by user. */ + status = UCS_OK; + } else { + ucs_debug("error %s was not handled for ep %p", + ucs_status_string(status), tl_ep); + } + + return status; +} + +void uct_base_iface_query(uct_base_iface_t *iface, uct_iface_attr_t *iface_attr) +{ + memset(iface_attr, 0, sizeof(*iface_attr)); + + iface_attr->max_num_eps = iface->config.max_num_eps; +} + +ucs_status_t uct_single_device_resource(uct_md_h md, const char *dev_name, + uct_device_type_t dev_type, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + uct_tl_device_resource_t *device; + + device = ucs_calloc(1, sizeof(*device), "device resource"); + if (NULL == device) { + ucs_error("failed to allocate device resource"); + return UCS_ERR_NO_MEMORY; + } + + ucs_snprintf_zero(device->name, sizeof(device->name), "%s", dev_name); + device->type = dev_type; + + *num_tl_devices_p = 1; + *tl_devices_p = device; + return UCS_OK; +} + +UCS_CLASS_INIT_FUNC(uct_iface_t, uct_iface_ops_t *ops) +{ + ucs_assert_always(ops->ep_flush != NULL); + ucs_assert_always(ops->ep_fence != NULL); + ucs_assert_always(ops->ep_destroy != NULL); + ucs_assert_always(ops->iface_flush != NULL); + ucs_assert_always(ops->iface_fence != NULL); + ucs_assert_always(ops->iface_progress_enable != NULL); + ucs_assert_always(ops->iface_progress_disable != NULL); + ucs_assert_always(ops->iface_progress != NULL); + ucs_assert_always(ops->iface_close != NULL); + ucs_assert_always(ops->iface_query != NULL); + ucs_assert_always(ops->iface_get_device_address != NULL); + ucs_assert_always(ops->iface_is_reachable != NULL); + + self->ops = *ops; + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(uct_iface_t) +{ +} + +UCS_CLASS_DEFINE(uct_iface_t, void); + + +UCS_CLASS_INIT_FUNC(uct_base_iface_t, uct_iface_ops_t *ops, uct_md_h md, + uct_worker_h worker, const uct_iface_params_t *params, + const uct_iface_config_t *config + UCS_STATS_ARG(ucs_stats_node_t *stats_parent) + UCS_STATS_ARG(const char *iface_name)) +{ + uint64_t alloc_methods_bitmap; + uct_alloc_method_t method; + unsigned i; + uint8_t id; + + UCS_CLASS_CALL_SUPER_INIT(uct_iface_t, ops); + + UCT_CB_FLAGS_CHECK((params->field_mask & + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS) ? + params->err_handler_flags : 0); + + self->md = md; + self->worker = ucs_derived_of(worker, uct_priv_worker_t); + self->am_tracer = NULL; + self->am_tracer_arg = NULL; + self->err_handler = (params->field_mask & + UCT_IFACE_PARAM_FIELD_ERR_HANDLER) ? + params->err_handler : NULL; + self->err_handler_flags = (params->field_mask & + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS) ? + params->err_handler_flags : 0; + self->err_handler_arg = (params->field_mask & + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG) ? + params->err_handler_arg : NULL; + self->progress_flags = 0; + uct_worker_progress_init(&self->prog); + + for (id = 0; id < UCT_AM_ID_MAX; ++id) { + uct_iface_set_stub_am_handler(self, id); + } + + /* Copy allocation methods configuration. In the process, remove duplicates. */ + UCS_STATIC_ASSERT(sizeof(alloc_methods_bitmap) * 8 >= UCT_ALLOC_METHOD_LAST); + self->config.num_alloc_methods = 0; + alloc_methods_bitmap = 0; + for (i = 0; i < config->alloc_methods.count; ++i) { + method = config->alloc_methods.methods[i]; + if (alloc_methods_bitmap & UCS_BIT(method)) { + continue; + } + + ucs_assert(self->config.num_alloc_methods < UCT_ALLOC_METHOD_LAST); + self->config.alloc_methods[self->config.num_alloc_methods++] = method; + alloc_methods_bitmap |= UCS_BIT(method); + } + + self->config.failure_level = (ucs_log_level_t)config->failure; + self->config.max_num_eps = config->max_num_eps; + + return UCS_STATS_NODE_ALLOC(&self->stats, &uct_iface_stats_class, + stats_parent, "-%s-%p", iface_name, self); +} + +static UCS_CLASS_CLEANUP_FUNC(uct_base_iface_t) +{ + UCS_STATS_NODE_FREE(self->stats); +} + +UCS_CLASS_DEFINE(uct_base_iface_t, uct_iface_t); + + +ucs_status_t uct_iface_accept(uct_iface_h iface, + uct_conn_request_h conn_request) +{ + return iface->ops.iface_accept(iface, conn_request); +} + + +ucs_status_t uct_iface_reject(uct_iface_h iface, + uct_conn_request_h conn_request) +{ + return iface->ops.iface_reject(iface, conn_request); +} + + +ucs_status_t uct_ep_create(const uct_ep_params_t *params, uct_ep_h *ep_p) +{ + if (params->field_mask & UCT_EP_PARAM_FIELD_IFACE) { + return params->iface->ops.ep_create(params, ep_p); + } else if (params->field_mask & UCT_EP_PARAM_FIELD_CM) { + return params->cm->ops->ep_create(params, ep_p); + } + + return UCS_ERR_INVALID_PARAM; +} + +ucs_status_t uct_ep_disconnect(uct_ep_h ep, unsigned flags) +{ + return ep->iface->ops.ep_disconnect(ep, flags); +} + +void uct_ep_destroy(uct_ep_h ep) +{ + ep->iface->ops.ep_destroy(ep); +} + +ucs_status_t uct_ep_get_address(uct_ep_h ep, uct_ep_addr_t *addr) +{ + return ep->iface->ops.ep_get_address(ep, addr); +} + +ucs_status_t uct_ep_connect_to_ep(uct_ep_h ep, const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr) +{ + return ep->iface->ops.ep_connect_to_ep(ep, dev_addr, ep_addr); +} + +UCS_CLASS_INIT_FUNC(uct_ep_t, uct_iface_t *iface) +{ + self->iface = iface; + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(uct_ep_t) +{ +} + +UCS_CLASS_DEFINE(uct_ep_t, void); + + +UCS_CLASS_INIT_FUNC(uct_base_ep_t, uct_base_iface_t *iface) +{ + UCS_CLASS_CALL_SUPER_INIT(uct_ep_t, &iface->super); + + return UCS_STATS_NODE_ALLOC(&self->stats, &uct_ep_stats_class, iface->stats, + "-%p", self); +} + +static UCS_CLASS_CLEANUP_FUNC(uct_base_ep_t) +{ + UCS_STATS_NODE_FREE(self->stats); +} + +UCS_CLASS_DEFINE(uct_base_ep_t, uct_ep_t); + + +UCS_CONFIG_DEFINE_ARRAY(alloc_methods, sizeof(uct_alloc_method_t), + UCS_CONFIG_TYPE_ENUM(uct_alloc_method_names)); + +ucs_config_field_t uct_iface_config_table[] = { + {"MAX_SHORT", "", + "The configuration parameter replaced by: " + "UCX__TX_MIN_INLINE for IB, UCX_MM_FIFO_SIZE for MM", + UCS_CONFIG_DEPRECATED_FIELD_OFFSET, UCS_CONFIG_TYPE_DEPRECATED}, + + {"MAX_BCOPY", "", + "The configuration parameter replaced by: " + "UCX__SEG_SIZE where is one of: IB, MM, SELF, TCP", + UCS_CONFIG_DEPRECATED_FIELD_OFFSET, UCS_CONFIG_TYPE_DEPRECATED}, + + {"ALLOC", "huge,thp,md,mmap,heap", + "Priority of methods to allocate intermediate buffers for communication", + ucs_offsetof(uct_iface_config_t, alloc_methods), UCS_CONFIG_TYPE_ARRAY(alloc_methods)}, + + {"FAILURE", "error", + "Level of network failure reporting", + ucs_offsetof(uct_iface_config_t, failure), UCS_CONFIG_TYPE_ENUM(ucs_log_level_names)}, + + {"MAX_NUM_EPS", "inf", + "Maximum number of endpoints that the transport interface is able to create", + ucs_offsetof(uct_iface_config_t, max_num_eps), UCS_CONFIG_TYPE_ULUNITS}, + + {NULL} +}; diff --git a/src/uct/base/uct_iface.h b/src/uct/base/uct_iface.h new file mode 100644 index 0000000..d095ebe --- /dev/null +++ b/src/uct/base/uct_iface.h @@ -0,0 +1,728 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IFACE_H_ +#define UCT_IFACE_H_ + +#include "uct_worker.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +enum { + UCT_EP_STAT_AM, + UCT_EP_STAT_PUT, + UCT_EP_STAT_GET, + UCT_EP_STAT_ATOMIC, +#if IBV_HW_TM + UCT_EP_STAT_TAG, +#endif + UCT_EP_STAT_BYTES_SHORT, + UCT_EP_STAT_BYTES_BCOPY, + UCT_EP_STAT_BYTES_ZCOPY, + UCT_EP_STAT_NO_RES, + UCT_EP_STAT_FLUSH, + UCT_EP_STAT_FLUSH_WAIT, /* number of times flush called while in progress */ + UCT_EP_STAT_PENDING, + UCT_EP_STAT_FENCE, + UCT_EP_STAT_LAST +}; + +enum { + UCT_IFACE_STAT_RX_AM, + UCT_IFACE_STAT_RX_AM_BYTES, + UCT_IFACE_STAT_TX_NO_DESC, + UCT_IFACE_STAT_FLUSH, + UCT_IFACE_STAT_FLUSH_WAIT, /* number of times flush called while in progress */ + UCT_IFACE_STAT_FENCE, + UCT_IFACE_STAT_LAST +}; + + +/* + * Statistics macros + */ +#define UCT_TL_EP_STAT_OP(_ep, _op, _method, _size) \ + UCS_STATS_UPDATE_COUNTER((_ep)->stats, UCT_EP_STAT_##_op, 1); \ + UCS_STATS_UPDATE_COUNTER((_ep)->stats, UCT_EP_STAT_BYTES_##_method, _size); +#define UCT_TL_EP_STAT_OP_IF_SUCCESS(_status, _ep, _op, _method, _size) \ + if (_status >= 0) { \ + UCT_TL_EP_STAT_OP(_ep, _op, _method, _size) \ + } +#define UCT_TL_EP_STAT_ATOMIC(_ep) \ + UCS_STATS_UPDATE_COUNTER((_ep)->stats, UCT_EP_STAT_ATOMIC, 1); +#define UCT_TL_EP_STAT_FLUSH(_ep) \ + UCS_STATS_UPDATE_COUNTER((_ep)->stats, UCT_EP_STAT_FLUSH, 1); +#define UCT_TL_EP_STAT_FLUSH_WAIT(_ep) \ + UCS_STATS_UPDATE_COUNTER((_ep)->stats, UCT_EP_STAT_FLUSH_WAIT, 1); +#define UCT_TL_EP_STAT_FENCE(_ep) \ + UCS_STATS_UPDATE_COUNTER((_ep)->stats, UCT_EP_STAT_FENCE, 1); +#define UCT_TL_EP_STAT_PEND(_ep) \ + UCS_STATS_UPDATE_COUNTER((_ep)->stats, UCT_EP_STAT_PENDING, 1); + +#define UCT_TL_IFACE_STAT_FLUSH(_iface) \ + UCS_STATS_UPDATE_COUNTER((_iface)->stats, UCT_IFACE_STAT_FLUSH, 1); +#define UCT_TL_IFACE_STAT_FLUSH_WAIT(_iface) \ + UCS_STATS_UPDATE_COUNTER((_iface)->stats, UCT_IFACE_STAT_FLUSH_WAIT, 1); +#define UCT_TL_IFACE_STAT_FENCE(_iface) \ + UCS_STATS_UPDATE_COUNTER((_iface)->stats, UCT_IFACE_STAT_FENCE, 1); +#define UCT_TL_IFACE_STAT_TX_NO_DESC(_iface) \ + UCS_STATS_UPDATE_COUNTER((_iface)->stats, UCT_IFACE_STAT_TX_NO_DESC, 1); + + +#define UCT_CB_FLAGS_CHECK(_flags) \ + do { \ + if ((_flags) & UCT_CB_FLAG_RESERVED) { \ + ucs_error("Unsupported callback flag 0x%x", UCT_CB_FLAG_RESERVED); \ + return UCS_ERR_INVALID_PARAM; \ + } \ + } while (0) + + +/** + * In release mode - do nothing. + * + * In debug mode, if _condition is not true, return an error. This could be less + * optimal because of additional checks, and that compiler needs to generate code + * for error flow as well. + */ +#define UCT_CHECK_PARAM(_condition, _err_message, ...) \ + if (ENABLE_PARAMS_CHECK && !(_condition)) { \ + ucs_error(_err_message, ## __VA_ARGS__); \ + return UCS_ERR_INVALID_PARAM; \ + } + + +/** + * In release mode - do nothing. + * + * In debug mode, if @a _params field mask does not have set + * @ref UCT_EP_PARAM_FIELD_DEV_ADDR and @ref UCT_EP_PARAM_FIELD_IFACE_ADDR + * flags, return an error. + */ +#define UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(_params) \ + UCT_CHECK_PARAM(ucs_test_all_flags((_params)->field_mask, \ + UCT_EP_PARAM_FIELD_DEV_ADDR | \ + UCT_EP_PARAM_FIELD_IFACE_ADDR), \ + "UCT_EP_PARAM_FIELD_DEV_ADDR and UCT_EP_PARAM_FIELD_IFACE_ADDR are not defined") + + +/** + * Check the condition and return status as a pointer if not true. + */ +#define UCT_CHECK_PARAM_PTR(_condition, _err_message, ...) \ + if (ENABLE_PARAMS_CHECK && !(_condition)) { \ + ucs_error(_err_message, ## __VA_ARGS__); \ + return UCS_STATUS_PTR(UCS_ERR_INVALID_PARAM); \ + } + + +/** + * Check the size of the IOV array + */ +#define UCT_CHECK_IOV_SIZE(_iovcnt, _max_iov, _name) \ + UCT_CHECK_PARAM((_iovcnt) <= (_max_iov), \ + "iovcnt(%lu) should be limited by %lu in %s", \ + _iovcnt, _max_iov, _name) + + +/** + * In debug mode, if _condition is not true, generate 'Invalid length' error. + */ +#define UCT_CHECK_LENGTH(_length, _min_length, _max_length, _name) \ + { \ + typeof(_length) __length = _length; \ + UCT_CHECK_PARAM((_length) <= (_max_length), \ + "Invalid %s length: %zu (expected: <= %zu)", \ + _name, (size_t)(__length), (size_t)(_max_length)); \ + UCT_CHECK_PARAM((ssize_t)(_length) >= (_min_length), \ + "Invalid %s length: %zu (expected: >= %zu)", \ + _name, (size_t)(__length), (size_t)(_min_length)); \ + } + +/** + * Skip if this is a zero-length operation. + */ +#define UCT_SKIP_ZERO_LENGTH(_length, ...) \ + if (0 == (_length)) { \ + ucs_trace_data("Zero length request: skip it"); \ + UCS_PP_FOREACH(_UCT_RELEASE_DESC, _, __VA_ARGS__) \ + return UCS_OK; \ + } +#define _UCT_RELEASE_DESC(_, _desc) \ + ucs_mpool_put(_desc); + + +/** + * In debug mode, check that active message ID is valid. + */ +#define UCT_CHECK_AM_ID(_am_id) \ + UCT_CHECK_PARAM((_am_id) < UCT_AM_ID_MAX, \ + "Invalid active message id (valid range: 0..%d)", \ + (int)UCT_AM_ID_MAX - 1) + + +/** + * Declare classes for structures defined in api/tl.h + */ +UCS_CLASS_DECLARE(uct_iface_h, uct_iface_ops_t, uct_md_h); +UCS_CLASS_DECLARE(uct_ep_t, uct_iface_h); + + +/** + * Active message handle table entry + */ +typedef struct uct_am_handler { + uct_am_callback_t cb; + void *arg; + uint32_t flags; +} uct_am_handler_t; + + +/** + * Base structure of all interfaces. + * Includes the AM table which we don't want to expose. + */ +typedef struct uct_base_iface { + uct_iface_t super; + uct_md_h md; /* MD this interface is using */ + uct_priv_worker_t *worker; /* Worker this interface is on */ + uct_am_handler_t am[UCT_AM_ID_MAX];/* Active message table */ + uct_am_tracer_t am_tracer; /* Active message tracer */ + void *am_tracer_arg; /* Tracer argument */ + uct_error_handler_t err_handler; /* Error handler */ + void *err_handler_arg; /* Error handler argument */ + uint32_t err_handler_flags; /* Error handler callback flags */ + uct_worker_progress_t prog; /* Will be removed once all transports + support progress control */ + unsigned progress_flags; /* Which progress is currently enabled */ + + struct { + unsigned num_alloc_methods; + uct_alloc_method_t alloc_methods[UCT_ALLOC_METHOD_LAST]; + ucs_log_level_t failure_level; + size_t max_num_eps; + } config; + + UCS_STATS_NODE_DECLARE(stats) /* Statistics */ +} uct_base_iface_t; + +UCS_CLASS_DECLARE(uct_base_iface_t, uct_iface_ops_t*, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t* + UCS_STATS_ARG(ucs_stats_node_t*) UCS_STATS_ARG(const char*)); + + +/** + * Stub interface used for failed endpoints + */ +typedef struct uct_failed_iface { + uct_iface_t super; + ucs_queue_head_t pend_q; +} uct_failed_iface_t; + + +/** + * Base structure of all endpoints. + */ +typedef struct uct_base_ep { + uct_ep_t super; + UCS_STATS_NODE_DECLARE(stats) +} uct_base_ep_t; +UCS_CLASS_DECLARE(uct_base_ep_t, uct_base_iface_t*); + + +/** + * Internal resource descriptor of a transport device + */ +typedef struct uct_tl_device_resource { + char name[UCT_DEVICE_NAME_MAX]; /**< Hardware device name */ + uct_device_type_t type; /**< Device type. To which UCT group it belongs to */ +} uct_tl_device_resource_t; + + +/** + * UCT transport definition. This structure should not be used directly; use + * @ref UCT_TL_DEFINE macro to define a transport. + */ +typedef struct uct_tl { + char name[UCT_TL_NAME_MAX]; /**< Transport name */ + + ucs_status_t (*query_devices)(uct_md_h md, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p); + + ucs_status_t (*iface_open)(uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *config, + uct_iface_h *iface_p); + + ucs_config_global_list_entry_t config; /**< Transport configuration entry */ + ucs_list_link_t list; /**< Entry in component's transports list */ +} uct_tl_t; + + +/** + * Define a transport + * + * @param _component Component to add the transport to + * @param _name Name of the transport (should be a token, not a string) + * @param _query_devices Function to query the list of available devices + * @param _iface_class Struct type defining the uct_iface class + */ +#define UCT_TL_DEFINE(_component, _name, _query_devices, _iface_class, \ + _cfg_prefix, _cfg_table, _cfg_struct) \ + \ + uct_tl_t uct_##_name##_tl = { \ + .name = #_name, \ + .query_devices = _query_devices, \ + .iface_open = UCS_CLASS_NEW_FUNC_NAME(_iface_class), \ + .config = { \ + .name = #_name" transport", \ + .prefix = _cfg_prefix, \ + .table = _cfg_table, \ + .size = sizeof(_cfg_struct), \ + } \ + }; \ + UCS_CONFIG_REGISTER_TABLE_ENTRY(&(uct_##_name##_tl).config); \ + UCS_STATIC_INIT { \ + ucs_list_add_tail(&(_component)->tl_list, &(uct_##_name##_tl).list); \ + } + + +/** + * "Base" structure which defines interface configuration options. + * Specific transport extend this structure. + */ +struct uct_iface_config { + struct { + uct_alloc_method_t *methods; + unsigned count; + } alloc_methods; + + int failure; /* Level of failure reports */ + size_t max_num_eps; +}; + + +/** + * Memory pool configuration. + */ +typedef struct uct_iface_mpool_config { + unsigned max_bufs; /* Upper limit to number of buffers */ + unsigned bufs_grow; /* How many buffers (approx.) are allocated every time */ +} uct_iface_mpool_config_t; + + +/** + * Define configuration fields for memory pool parameters. + */ +#define UCT_IFACE_MPOOL_CONFIG_FIELDS(_prefix, _dfl_max, _dfl_grow, _mp_name, _offset, _desc) \ + {_prefix "MAX_BUFS", UCS_PP_QUOTE(_dfl_max), \ + "Maximal number of " _mp_name " buffers for the interface. -1 is infinite." \ + _desc, \ + (_offset) + ucs_offsetof(uct_iface_mpool_config_t, max_bufs), UCS_CONFIG_TYPE_INT}, \ + \ + {_prefix "BUFS_GROW", UCS_PP_QUOTE(_dfl_grow), \ + "How much buffers are added every time the " _mp_name " memory pool grows.\n" \ + "0 means the value is chosen by the transport.", \ + (_offset) + ucs_offsetof(uct_iface_mpool_config_t, bufs_grow), UCS_CONFIG_TYPE_UINT} + + +/** + * Get a descriptor from memory pool, tell valgrind it's already defined, return + * error if the memory pool is empty. + * + * @param _mp Memory pool to get descriptor from. + * @param _desc Variable to assign descriptor to. + * @param _failure What do to if memory poll is empty. + * + * @return TX descriptor fetched from memory pool. + */ +#define UCT_TL_IFACE_GET_TX_DESC(_iface, _mp, _desc, _failure) \ + { \ + _desc = ucs_mpool_get_inline(_mp); \ + if (ucs_unlikely((_desc) == NULL)) { \ + UCT_TL_IFACE_STAT_TX_NO_DESC(_iface); \ + _failure; \ + } \ + \ + VALGRIND_MAKE_MEM_DEFINED(_desc, sizeof(*(_desc))); \ + } + + +#define UCT_TL_IFACE_GET_RX_DESC(_iface, _mp, _desc, _failure) \ + { \ + _desc = ucs_mpool_get_inline(_mp); \ + if (ucs_unlikely((_desc) == NULL)) { \ + uct_iface_mpool_empty_warn(_iface, _mp); \ + _failure; \ + } \ + \ + VALGRIND_MAKE_MEM_DEFINED(_desc, sizeof(*(_desc))); \ + } + + +#define UCT_TL_IFACE_PUT_DESC(_desc) \ + { \ + ucs_mpool_put_inline(_desc); \ + VALGRIND_MAKE_MEM_UNDEFINED(_desc, sizeof(*(_desc))); \ + } + + +/** + * TL Memory pool object initialization callback. + */ +typedef void (*uct_iface_mpool_init_obj_cb_t)(uct_iface_h iface, void *obj, + uct_mem_h memh); + + +/** + * Base structure for private data held inside a pending request for TLs + * which use ucs_arbiter_t to progress pending requests. + */ +typedef struct { + ucs_arbiter_elem_t arb_elem; +} uct_pending_req_priv_arb_t; + + +static UCS_F_ALWAYS_INLINE ucs_arbiter_elem_t * +uct_pending_req_priv_arb_elem(uct_pending_req_t *req) +{ + uct_pending_req_priv_arb_t *priv_arb_p = + (uct_pending_req_priv_arb_t *)&req->priv; + + return &priv_arb_p->arb_elem; +} + + +/** + * Add a pending request to the arbiter. + */ +#define uct_pending_req_arb_group_push(_arbiter_group, _req) \ + do { \ + ucs_arbiter_elem_init(uct_pending_req_priv_arb_elem(_req)); \ + ucs_arbiter_group_push_elem(_arbiter_group, \ + uct_pending_req_priv_arb_elem(_req)); \ + } while (0) + + +/** + * Add a pending request to the head of group in arbiter. + */ +#define uct_pending_req_arb_group_push_head(_arbiter, _arbiter_group, _req) \ + do { \ + ucs_arbiter_elem_init(uct_pending_req_priv_arb_elem(_req)); \ + ucs_arbiter_group_push_head_elem(_arbiter, _arbiter_group, \ + uct_pending_req_priv_arb_elem(_req)); \ + } while (0) + + +/** + * Base structure for private data held inside a pending request for TLs + * which use ucs_queue_t to progress pending requests. + */ +typedef struct { + ucs_queue_elem_t queue_elem; +} uct_pending_req_priv_queue_t; + + +static UCS_F_ALWAYS_INLINE ucs_queue_elem_t * +uct_pending_req_priv_queue_elem(uct_pending_req_t* req) +{ + uct_pending_req_priv_queue_t *priv_queue_p = + (uct_pending_req_priv_queue_t *)&req->priv; + + return &priv_queue_p->queue_elem; +} + + +/** + * Add a pending request to the queue. + */ +#define uct_pending_req_queue_push(_queue, _req) \ + ucs_queue_push((_queue), uct_pending_req_priv_queue_elem(_req)) + + +typedef struct { + uct_pending_purge_callback_t cb; + void *arg; +} uct_purge_cb_args_t; + + +/** + * Dispatch all requests in the pending queue, as long as _cond holds true. + * _cond is an expression which can use "_priv" variable. + * + * @param _priv Variable which will hold a pointer to request private data. + * @param _queue The pending queue. + * @param _cond Condition which should be true in order to keep dispatching. + * + * TODO support a callback returning UCS_INPROGRESS. + */ +#define uct_pending_queue_dispatch(_priv, _queue, _cond) \ + while (!ucs_queue_is_empty(_queue)) { \ + uct_pending_req_priv_queue_t *_base_priv; \ + uct_pending_req_t *_req; \ + ucs_status_t _status; \ + \ + _base_priv = \ + ucs_queue_head_elem_non_empty((_queue), \ + uct_pending_req_priv_queue_t, \ + queue_elem); \ + \ + UCS_STATIC_ASSERT(sizeof(*(_priv)) <= UCT_PENDING_REQ_PRIV_LEN); \ + _priv = (typeof(_priv))(_base_priv); \ + \ + if (!(_cond)) { \ + break; \ + } \ + \ + _req = ucs_container_of(_priv, uct_pending_req_t, priv); \ + ucs_queue_pull_non_empty(_queue); \ + _status = _req->func(_req); \ + if (_status != UCS_OK) { \ + ucs_queue_push_head(_queue, &_base_priv->queue_elem); \ + } \ + } + + +/** + * Purge messages from the pending queue. + * + * @param _priv Variable which will hold a pointer to request private data. + * @param _queue The pending queue. + * @param _cond Condition which should be true in order to remove a request. + * @param _cb Callback for purging the request. + * @return Callback return value. + */ +#define uct_pending_queue_purge(_priv, _queue, _cond, _cb, _arg) \ + { \ + uct_pending_req_priv_queue_t *_base_priv; \ + ucs_queue_iter_t _iter; \ + \ + ucs_queue_for_each_safe(_base_priv, _iter, _queue, queue_elem) { \ + _priv = (typeof(_priv))(_base_priv); \ + if (_cond) { \ + ucs_queue_del_iter(_queue, _iter); \ + (void)_cb(ucs_container_of(_base_priv, uct_pending_req_t, priv), _arg); \ + } \ + } \ + } + + +/** + * Helper macro to trace active message send/receive. + * + * @param _iface Interface. + * @param _type Message type (send/receive) + * @param _am_id Active message ID. + * @param _payload Active message payload. + * @paral _length Active message length + */ +#define uct_iface_trace_am(_iface, _type, _am_id, _payload, _length, _fmt, ...) \ + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { \ + char buf[256] = {0}; \ + uct_iface_dump_am(_iface, _type, _am_id, _payload, _length, \ + buf, sizeof(buf) - 1); \ + ucs_trace_data(_fmt " am_id %d len %zu %s", ## __VA_ARGS__, \ + _am_id, (size_t)(_length), buf); \ + } + + +extern ucs_config_field_t uct_iface_config_table[]; + + +/** + * Initialize a memory pool for buffers used by TL interface. + * + * @param mp + * @param elem_size + * @param align_offset + * @param alignment Data will be aligned to these units. + * @param config Memory pool configuration. + * @param grow Default number of buffers added for every chunk. + * @param init_obj_cb Object constructor. + * @param name Memory pool name. + */ +ucs_status_t uct_iface_mpool_init(uct_base_iface_t *iface, ucs_mpool_t *mp, + size_t elem_size, size_t align_offset, size_t alignment, + const uct_iface_mpool_config_t *config, unsigned grow, + uct_iface_mpool_init_obj_cb_t init_obj_cb, + const char *name); + + +/** + * Dump active message contents using the user-defined tracer callback. + */ +void uct_iface_dump_am(uct_base_iface_t *iface, uct_am_trace_type_t type, + uint8_t id, const void *data, size_t length, + char *buffer, size_t max); + +void uct_iface_mpool_empty_warn(uct_base_iface_t *iface, ucs_mpool_t *mp); + +ucs_status_t uct_set_ep_failed(ucs_class_t* cls, uct_ep_h tl_ep, uct_iface_h + tl_iface, ucs_status_t status); + +void uct_base_iface_query(uct_base_iface_t *iface, uct_iface_attr_t *iface_attr); + +ucs_status_t uct_single_device_resource(uct_md_h md, const char *dev_name, + uct_device_type_t dev_type, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p); + +ucs_status_t uct_base_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_base_iface_fence(uct_iface_h tl_iface, unsigned flags); + +void uct_base_iface_progress_enable(uct_iface_h tl_iface, unsigned flags); + +void uct_base_iface_progress_enable_cb(uct_base_iface_t *iface, + ucs_callback_t cb, unsigned flags); + +void uct_base_iface_progress_disable(uct_iface_h tl_iface, unsigned flags); + +ucs_status_t uct_base_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_base_ep_fence(uct_ep_h tl_ep, unsigned flags); + +/* + * Invoke active message handler. + * + * @param iface Interface to invoke the handler for. + * @param id Active message ID. + * @param data Received data. + * @param length Length of received data. + * @param flags Mask with @ref uct_cb_param_flags + */ +static inline ucs_status_t +uct_iface_invoke_am(uct_base_iface_t *iface, uint8_t id, void *data, + unsigned length, unsigned flags) +{ + ucs_status_t status; + uct_am_handler_t *handler; + + ucs_assertv(id < UCT_AM_ID_MAX, "invalid am id: %d (max: %lu)", + id, UCT_AM_ID_MAX - 1); + + UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_IFACE_STAT_RX_AM, 1); + UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_IFACE_STAT_RX_AM_BYTES, length); + + handler = &iface->am[id]; + status = handler->cb(handler->arg, data, length, flags); + ucs_assert((status == UCS_OK) || + ((status == UCS_INPROGRESS) && (flags & UCT_CB_PARAM_FLAG_DESC))); + return status; +} + + +/** + * Invoke send completion. + * + * @param comp Completion to invoke. + * @param data Optional completion data (operation reply). + */ +static UCS_F_ALWAYS_INLINE +void uct_invoke_completion(uct_completion_t *comp, ucs_status_t status) +{ + ucs_trace_func("comp=%p, count=%d, status=%d", comp, comp->count, status); + if (--comp->count == 0) { + comp->func(comp, status); + } +} + +/** + * Calculates total length of particular iov data buffer. + * Currently has no support for stride. + * If stride supported it should be like: length + ((count - 1) * stride) + */ +static UCS_F_ALWAYS_INLINE +size_t uct_iov_get_length(const uct_iov_t *iov) +{ + return iov->count * iov->length; +} + +/** + * Calculates total length of the iov array buffers. + */ +static UCS_F_ALWAYS_INLINE +size_t uct_iov_total_length(const uct_iov_t *iov, size_t iovcnt) +{ + size_t iov_it, total_length = 0; + + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + total_length += uct_iov_get_length(&iov[iov_it]); + } + + return total_length; +} + +/** + * Fill iovec data structure by data provided in uct_iov_t. + * The function avoids copying IOVs with zero length. + * @return Number of elements in io_vec[]. + */ +static UCS_F_ALWAYS_INLINE +size_t uct_iovec_fill_iov(struct iovec *io_vec, const uct_iov_t *iov, + size_t iovcnt, size_t *total_length) +{ + size_t io_vec_it = 0; + size_t io_vec_len = 0; + size_t iov_it; + + *total_length = 0; + + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + io_vec_len = uct_iov_get_length(&iov[iov_it]); + + /* Avoid zero length elements in resulted iov_vec */ + if (io_vec_len != 0) { + io_vec[io_vec_it].iov_len = io_vec_len; + io_vec[io_vec_it].iov_base = iov[iov_it].buffer; + *total_length += io_vec_len; + ++io_vec_it; + } + } + + return io_vec_it; +} + +/** + * Copy data to target am_short buffer + */ +static UCS_F_ALWAYS_INLINE +void uct_am_short_fill_data(void *buffer, uint64_t header, const void *payload, + size_t length) +{ + /** + * Helper structure to fill send buffer of short messages for + * non-accelerated transports + */ + struct uct_am_short_packet { + uint64_t header; + char payload[]; + } UCS_S_PACKED *packet = (struct uct_am_short_packet*)buffer; + + packet->header = header; + /* suppress false positive diagnostic from uct_mm_ep_am_common_send call */ + /* cppcheck-suppress ctunullpointer */ + memcpy(packet->payload, payload, length); +} + +#endif diff --git a/src/uct/base/uct_log.h b/src/uct/base/uct_log.h new file mode 100644 index 0000000..dc2c840 --- /dev/null +++ b/src/uct/base/uct_log.h @@ -0,0 +1,31 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_LOG_H_ +#define UCT_LOG_H_ + +#include "uct_iface.h" + +#include +#include + + +/** + * In debug mode, print packet description to the log. + */ +#define uct_log_data(_file, _line, _function, _info) \ + ucs_log_dispatch(_file, _line, _function, UCS_LOG_LEVEL_TRACE_DATA, "%s", buf); + + +/** + * Log callback which prints information about transport headers. + */ +typedef void (*uct_log_data_dump_func_t)(uct_base_iface_t *iface, + uct_am_trace_type_t type, void *data, + size_t length, size_t valid_length, + char *bufer, size_t max); + +#endif diff --git a/src/uct/base/uct_md.c b/src/uct/base/uct_md.c new file mode 100644 index 0000000..994472b --- /dev/null +++ b/src/uct/base/uct_md.c @@ -0,0 +1,431 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "uct_md.h" +#include "uct_iface.h" + +#include +#include +#include +#include +#include +#include +#include + + +ucs_config_field_t uct_md_config_table[] = { + + {NULL} +}; + +ucs_config_field_t uct_md_config_rcache_table[] = { + {"RCACHE_MEM_PRIO", "1000", "Registration cache memory event priority", + ucs_offsetof(uct_md_rcache_config_t, event_prio), UCS_CONFIG_TYPE_UINT}, + + {"RCACHE_OVERHEAD", "90ns", "Registration cache lookup overhead", + ucs_offsetof(uct_md_rcache_config_t, overhead), UCS_CONFIG_TYPE_TIME}, + + {"RCACHE_ADDR_ALIGN", UCS_PP_MAKE_STRING(UCS_SYS_CACHE_LINE_SIZE), + "Registration cache address alignment, must be power of 2\n" + "between "UCS_PP_MAKE_STRING(UCS_PGT_ADDR_ALIGN)"and system page size", + ucs_offsetof(uct_md_rcache_config_t, alignment), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + + +ucs_status_t uct_md_open(uct_component_h component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p) +{ + ucs_status_t status; + uct_md_h md; + + status = component->md_open(component, md_name, config, &md); + if (status != UCS_OK) { + return status; + } + + *md_p = md; + ucs_assert_always(md->component == component); + return UCS_OK; +} + +void uct_md_close(uct_md_h md) +{ + md->ops->close(md); +} + +ucs_status_t uct_md_query_tl_resources(uct_md_h md, + uct_tl_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + uct_component_t *component = md->component; + uct_tl_resource_desc_t *resources, *tmp; + uct_tl_device_resource_t *tl_devices; + unsigned i, num_resources, num_tl_devices; + ucs_status_t status; + uct_tl_t *tl; + + resources = NULL; + num_resources = 0; + + ucs_list_for_each(tl, &component->tl_list, list) { + status = tl->query_devices(md, &tl_devices, &num_tl_devices); + if (status != UCS_OK) { + ucs_debug("failed to query %s resources: %s", tl->name, + ucs_status_string(status)); + continue; + } + + if (num_tl_devices == 0) { + ucs_free(tl_devices); + continue; + } + + tmp = ucs_realloc(resources, + sizeof(*resources) * (num_resources + num_tl_devices), + "md_resources"); + if (tmp == NULL) { + ucs_free(tl_devices); + status = UCS_ERR_NO_MEMORY; + goto err; + } + + /* add tl devices to overall list of resources */ + for (i = 0; i < num_tl_devices; ++i) { + ucs_strncpy_zero(tmp[num_resources + i].tl_name, tl->name, + sizeof(tmp[num_resources + i].tl_name)); + ucs_strncpy_zero(tmp[num_resources + i].dev_name, tl_devices[i].name, + sizeof(tmp[num_resources + i].dev_name)); + tmp[num_resources + i].dev_type = tl_devices[i].type; + } + + resources = tmp; + num_resources += num_tl_devices; + ucs_free(tl_devices); + } + + *resources_p = resources; + *num_resources_p = num_resources; + return UCS_OK; + +err: + ucs_free(resources); + return status; +} + +void uct_release_tl_resource_list(uct_tl_resource_desc_t *resources) +{ + ucs_free(resources); +} + +ucs_status_t +uct_md_query_single_md_resource(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + uct_md_resource_desc_t *resource; + + resource = ucs_malloc(sizeof(*resource), "md resource"); + if (resource == NULL) { + return UCS_ERR_NO_MEMORY; + } + + ucs_snprintf_zero(resource->md_name, UCT_MD_NAME_MAX, "%s", + component->name); + + *resources_p = resource; + *num_resources_p = 1; + return UCS_OK; +} + +ucs_status_t +uct_md_query_empty_md_resource(uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + *resources_p = NULL; + *num_resources_p = 0; + return UCS_OK; +} + +ucs_status_t uct_md_stub_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, uct_rkey_t *rkey_p, + void **handle_p) +{ + *rkey_p = 0xdeadbeef; + *handle_p = NULL; + return UCS_OK; +} + +static uct_tl_t *uct_find_tl(uct_component_h component, uint64_t md_flags, + const char *tl_name) +{ + uct_tl_t *tl; + + ucs_list_for_each(tl, &component->tl_list, list) { + if (((tl_name != NULL) && !strcmp(tl_name, tl->name)) || + ((tl_name == NULL) && (md_flags & UCT_MD_FLAG_SOCKADDR))) { + return tl; + } + } + return NULL; +} + +ucs_status_t uct_md_iface_config_read(uct_md_h md, const char *tl_name, + const char *env_prefix, const char *filename, + uct_iface_config_t **config_p) +{ + uct_config_bundle_t *bundle = NULL; + uct_md_attr_t md_attr; + ucs_status_t status; + uct_tl_t *tl; + + status = uct_md_query(md, &md_attr); + if (status != UCS_OK) { + ucs_error("Failed to query MD"); + return status; + } + + tl = uct_find_tl(md->component, md_attr.cap.flags, tl_name); + if (tl == NULL) { + if (tl_name == NULL) { + ucs_error("There is no sockaddr transport registered on the md"); + } else { + ucs_error("Transport '%s' does not exist", tl_name); + } + status = UCS_ERR_NO_DEVICE; /* Non-existing transport */ + return status; + } + + status = uct_config_read(&bundle, tl->config.table, tl->config.size, + env_prefix, tl->config.prefix); + if (status != UCS_OK) { + ucs_error("Failed to read iface config"); + return status; + } + + *config_p = (uct_iface_config_t*) bundle->data; + /* coverity[leaked_storage] */ + return UCS_OK; +} + +ucs_status_t uct_iface_open(uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *config, + uct_iface_h *iface_p) +{ + uct_md_attr_t md_attr; + ucs_status_t status; + uct_tl_t *tl; + + status = uct_md_query(md, &md_attr); + if (status != UCS_OK) { + ucs_error("Failed to query MD"); + return status; + } + + UCT_CHECK_PARAM(params->field_mask & UCT_IFACE_PARAM_FIELD_OPEN_MODE, + "UCT_IFACE_PARAM_FIELD_OPEN_MODE is not defined"); + + if (params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE) { + tl = uct_find_tl(md->component, md_attr.cap.flags, + params->mode.device.tl_name); + } else if ((params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT) || + (params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER)) { + tl = uct_find_tl(md->component, md_attr.cap.flags, NULL); + } else { + ucs_error("Invalid open mode %zu", params->open_mode); + return status; + } + + if (tl == NULL) { + /* Non-existing transport */ + return UCS_ERR_NO_DEVICE; + } + + return tl->iface_open(md, worker, params, config, iface_p); +} + +ucs_status_t uct_md_config_read(uct_component_h component, + const char *env_prefix, const char *filename, + uct_md_config_t **config_p) +{ + uct_config_bundle_t *bundle = NULL; + ucs_status_t status; + + status = uct_config_read(&bundle, component->md_config.table, + component->md_config.size, env_prefix, + component->md_config.prefix); + if (status != UCS_OK) { + ucs_error("Failed to read MD config"); + return status; + } + + *config_p = (uct_md_config_t*) bundle->data; + /* coverity[leaked_storage] */ + return UCS_OK; +} + +void uct_config_release(void *config) +{ + uct_config_bundle_t *bundle = (uct_config_bundle_t *)config - 1; + + ucs_config_parser_release_opts(config, bundle->table); + ucs_free((void*)(bundle->table_prefix)); + ucs_free(bundle); +} + +ucs_status_t uct_config_get(void *config, const char *name, char *value, + size_t max) +{ + uct_config_bundle_t *bundle = (uct_config_bundle_t *)config - 1; + return ucs_config_parser_get_value(bundle->data, bundle->table, name, value, + max); +} + +ucs_status_t uct_config_modify(void *config, const char *name, const char *value) +{ + uct_config_bundle_t *bundle = (uct_config_bundle_t *)config - 1; + return ucs_config_parser_set_value(bundle->data, bundle->table, name, value); +} + +ucs_status_t uct_md_mkey_pack(uct_md_h md, uct_mem_h memh, void *rkey_buffer) +{ + void *rbuf = uct_md_fill_md_name(md, rkey_buffer); + return md->ops->mkey_pack(md, memh, rbuf); +} + +ucs_status_t uct_rkey_unpack(uct_component_h component, const void *rkey_buffer, + uct_rkey_bundle_t *rkey_ob) +{ + char component_name[UCT_COMPONENT_NAME_MAX + 1]; + + if (ENABLE_DEBUG_DATA) { + if (ENABLE_PARAMS_CHECK && + strncmp(rkey_buffer, component->name, UCT_COMPONENT_NAME_MAX)) { + ucs_snprintf_zero(component_name, sizeof(component_name), "%s", + (const char*)rkey_buffer); + ucs_error("invalid component for rkey unpack; " + "expected: %s, actual: %s", component_name, component->name); + return UCS_ERR_INVALID_PARAM; + } + + rkey_buffer = UCS_PTR_BYTE_OFFSET(rkey_buffer, UCT_COMPONENT_NAME_MAX); + } + + return component->rkey_unpack(component, rkey_buffer, &rkey_ob->rkey, + &rkey_ob->handle); +} + +ucs_status_t uct_rkey_ptr(uct_component_h component, uct_rkey_bundle_t *rkey_ob, + uint64_t remote_addr, void **local_addr_p) +{ + return component->rkey_ptr(component, rkey_ob->rkey, rkey_ob->handle, + remote_addr, local_addr_p); +} + +ucs_status_t uct_rkey_release(uct_component_h component, + const uct_rkey_bundle_t *rkey_ob) +{ + return component->rkey_release(component, rkey_ob->rkey, rkey_ob->handle); +} + +ucs_status_t uct_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + ucs_status_t status; + + status = md->ops->query(md, md_attr); + if (status != UCS_OK) { + return status; + } + + /* Component name + data */ + memcpy(md_attr->component_name, md->component->name, UCT_COMPONENT_NAME_MAX); + +#if ENABLE_DEBUG_DATA + /* MD name is packed into rkey in DEBUG mode only */ + md_attr->rkey_packed_size += UCT_COMPONENT_NAME_MAX; +#endif + + return UCS_OK; +} + +static ucs_status_t uct_mem_check_flags(unsigned flags) +{ + if (!(flags & UCT_MD_MEM_ACCESS_ALL)) { + return UCS_ERR_INVALID_PARAM; + } + return UCS_OK; +} + +ucs_status_t uct_md_mem_alloc(uct_md_h md, size_t *length_p, void **address_p, + unsigned flags, const char *alloc_name, uct_mem_h *memh_p) +{ + ucs_status_t status; + + status = uct_mem_check_flags(flags); + if (status != UCS_OK) { + return status; + } + + return md->ops->mem_alloc(md, length_p, address_p, flags, alloc_name, memh_p); +} + +ucs_status_t uct_md_mem_free(uct_md_h md, uct_mem_h memh) +{ + return md->ops->mem_free(md, memh); +} + +ucs_status_t +uct_md_mem_advise(uct_md_h md, uct_mem_h memh, void *addr, size_t length, + unsigned advice) +{ + if ((length == 0) || (addr == NULL)) { + return UCS_ERR_INVALID_PARAM; + } + + return md->ops->mem_advise(md, memh, addr, length, advice); +} + +ucs_status_t uct_md_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + ucs_status_t status; + + if ((length == 0) || (address == NULL)) { + return UCS_ERR_INVALID_PARAM; + } + + status = uct_mem_check_flags(flags); + if (status != UCS_OK) { + return status; + } + + return md->ops->mem_reg(md, address, length, flags, memh_p); +} + +ucs_status_t uct_md_mem_dereg(uct_md_h md, uct_mem_h memh) +{ + return md->ops->mem_dereg(md, memh); +} + +int uct_md_is_sockaddr_accessible(uct_md_h md, const ucs_sock_addr_t *sockaddr, + uct_sockaddr_accessibility_t mode) +{ + return md->ops->is_sockaddr_accessible(md, sockaddr, mode); +} + +ucs_status_t uct_md_detect_memory_type(uct_md_h md, const void *addr, size_t length, + ucs_memory_type_t *mem_type_p) +{ + return md->ops->detect_memory_type(md, addr, length, mem_type_p); +} diff --git a/src/uct/base/uct_md.h b/src/uct/base/uct_md.h new file mode 100644 index 0000000..6e8825b --- /dev/null +++ b/src/uct/base/uct_md.h @@ -0,0 +1,146 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_MD_H_ +#define UCT_MD_H_ + +#include "uct_component.h" + +#include +#include +#include + + +typedef struct uct_md_rcache_config { + size_t alignment; /**< Force address alignment */ + unsigned event_prio; /**< Memory events priority */ + double overhead; /**< Lookup overhead estimation */ +} uct_md_rcache_config_t; + + +extern ucs_config_field_t uct_md_config_rcache_table[]; + +/** + * "Base" structure which defines MD configuration options. + * Specific MDs extend this structure. + */ +struct uct_md_config { + /* C standard prohibits empty structures */ + char __dummy; +}; + + +typedef void (*uct_md_close_func_t)(uct_md_h md); + +typedef ucs_status_t (*uct_md_query_func_t)(uct_md_h md, + uct_md_attr_t *md_attr); + +typedef ucs_status_t (*uct_md_mem_alloc_func_t)(uct_md_h md, + size_t *length_p, + void **address_p, + unsigned flags, + const char *alloc_name, + uct_mem_h *memh_p); + +typedef ucs_status_t (*uct_md_mem_free_func_t)(uct_md_h md, uct_mem_h memh); + +typedef ucs_status_t (*uct_md_mem_advise_func_t)(uct_md_h md, + uct_mem_h memh, + void *addr, + size_t length, + unsigned advice); + +typedef ucs_status_t (*uct_md_mem_reg_func_t)(uct_md_h md, void *address, + size_t length, + unsigned flags, + uct_mem_h *memh_p); + +typedef ucs_status_t (*uct_md_mem_dereg_func_t)(uct_md_h md, uct_mem_h memh); + +typedef ucs_status_t (*uct_md_mkey_pack_func_t)(uct_md_h md, uct_mem_h memh, + void *rkey_buffer); + +typedef int (*uct_md_is_sockaddr_accessible_func_t)(uct_md_h md, + const ucs_sock_addr_t *sockaddr, + uct_sockaddr_accessibility_t mode); + +typedef ucs_status_t (*uct_md_detect_memory_type_func_t)(uct_md_h md, + const void *addr, + size_t length, + ucs_memory_type_t *mem_type_p); + + +/** + * Memory domain operations + */ +struct uct_md_ops { + uct_md_close_func_t close; + uct_md_query_func_t query; + uct_md_mem_alloc_func_t mem_alloc; + uct_md_mem_free_func_t mem_free; + uct_md_mem_advise_func_t mem_advise; + uct_md_mem_reg_func_t mem_reg; + uct_md_mem_dereg_func_t mem_dereg; + uct_md_mkey_pack_func_t mkey_pack; + uct_md_is_sockaddr_accessible_func_t is_sockaddr_accessible; + uct_md_detect_memory_type_func_t detect_memory_type; +}; + + +/** + * Memory domain + */ +struct uct_md { + uct_md_ops_t *ops; + uct_component_t *component; +}; + + +#define UCT_MD_DEFAULT_CONFIG_INITIALIZER \ + { \ + .name = "Default memory domain", \ + .prefix = "", \ + .table = uct_md_config_table, \ + .size = sizeof(uct_md_config_t), \ + } + + +static UCS_F_ALWAYS_INLINE void* +uct_md_fill_md_name(uct_md_h md, void *buffer) +{ +#if ENABLE_DEBUG_DATA + memcpy(buffer, md->component->name, UCT_COMPONENT_NAME_MAX); + return (char*)buffer + UCT_COMPONENT_NAME_MAX; +#else + return buffer; +#endif +} + +/* + * Base implementation of query_md_resources(), which returns a single md + * resource whose name is identical to component name. + */ +ucs_status_t +uct_md_query_single_md_resource(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p); + +ucs_status_t +uct_md_query_empty_md_resource(uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p); + +/** + * @brief Dummy function + * Dummy function to emulate unpacking a remote key buffer to handle. + * + */ +ucs_status_t uct_md_stub_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, uct_rkey_t *rkey_p, + void **handle_p); + +extern ucs_config_field_t uct_md_config_table[]; + +#endif diff --git a/src/uct/base/uct_mem.c b/src/uct/base/uct_mem.c new file mode 100644 index 0000000..743998b --- /dev/null +++ b/src/uct/base/uct_mem.c @@ -0,0 +1,434 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "uct_iface.h" +#include "uct_md.h" + +#include +#include +#include + + +typedef struct { + uct_alloc_method_t method; + size_t length; + uct_mem_h memh; +} uct_iface_mp_chunk_hdr_t; + + +typedef struct { + uct_base_iface_t *iface; + uct_iface_mpool_init_obj_cb_t init_obj_cb; +} uct_iface_mp_priv_t; + + +const char *uct_alloc_method_names[] = { + [UCT_ALLOC_METHOD_THP] = "thp", + [UCT_ALLOC_METHOD_MD] = "md", + [UCT_ALLOC_METHOD_HEAP] = "heap", + [UCT_ALLOC_METHOD_MMAP] = "mmap", + [UCT_ALLOC_METHOD_HUGE] = "huge", + [UCT_ALLOC_METHOD_LAST] = NULL +}; + + +static inline int uct_mem_get_mmap_flags(unsigned uct_mmap_flags) +{ + int mm_flags = 0; + +#ifdef MAP_NONBLOCK + if (uct_mmap_flags & UCT_MD_MEM_FLAG_NONBLOCK) { + mm_flags |= MAP_NONBLOCK; + } +#endif + + if (uct_mmap_flags & UCT_MD_MEM_FLAG_FIXED) { + mm_flags |= MAP_FIXED; + } + + return mm_flags; +} + +ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags, + uct_alloc_method_t *methods, unsigned num_methods, + uct_md_h *mds, unsigned num_mds, + const char *alloc_name, uct_allocated_memory_t *mem) +{ + uct_alloc_method_t *method; + uct_md_attr_t md_attr; + ucs_status_t status; + size_t alloc_length; + unsigned md_index; + uct_mem_h memh; + uct_md_h md; + void *address; + int ret; +#ifdef SHM_HUGETLB + int shmid; +#endif +#ifdef MADV_HUGEPAGE + ssize_t huge_page_size; +#endif + + if (min_length == 0) { + ucs_error("Allocation length cannot be 0"); + return UCS_ERR_INVALID_PARAM; + } + + if (num_methods == 0) { + ucs_error("No allocation methods provided"); + return UCS_ERR_INVALID_PARAM; + } + + if ((flags & UCT_MD_MEM_FLAG_FIXED) && + (!addr || ((uintptr_t)addr % ucs_get_page_size()))) { + ucs_debug("UCT_MD_MEM_FLAG_FIXED requires valid page size aligned address"); + return UCS_ERR_INVALID_PARAM; + } + + for (method = methods; method < methods + num_methods; ++method) { + ucs_trace("trying allocation method %s", uct_alloc_method_names[*method]); + + switch (*method) { + case UCT_ALLOC_METHOD_MD: + /* Allocate with one of the specified memory domains */ + for (md_index = 0; md_index < num_mds; ++md_index) { + md = mds[md_index]; + status = uct_md_query(md, &md_attr); + if (status != UCS_OK) { + ucs_error("Failed to query MD"); + return status; + } + + /* Check if MD supports allocation */ + if (!(md_attr.cap.flags & UCT_MD_FLAG_ALLOC)) { + continue; + } + + /* Check if MD supports allocation with fixed address + * if it's requested */ + if ((flags & UCT_MD_MEM_FLAG_FIXED) && + !(md_attr.cap.flags & UCT_MD_FLAG_FIXED)) { + continue; + } + + /* Allocate memory using the MD. + * If the allocation fails, it's considered an error and we don't + * fall-back, because this MD already exposed support for memory + * allocation. + */ + alloc_length = min_length; + address = addr; + status = uct_md_mem_alloc(md, &alloc_length, &address, flags, + alloc_name, &memh); + if (status != UCS_OK) { + ucs_error("failed to allocate %zu bytes using md %s for %s: %s", + alloc_length, md->component->name, + alloc_name, ucs_status_string(status)); + return status; + } + + ucs_assert(memh != UCT_MEM_HANDLE_NULL); + mem->md = md; + mem->mem_type = md_attr.cap.access_mem_type; + mem->memh = memh; + goto allocated; + + } + break; + + case UCT_ALLOC_METHOD_THP: +#ifdef MADV_HUGEPAGE + /* Fixed option is not supported for thp allocation*/ + if (flags & UCT_MD_MEM_FLAG_FIXED) { + break; + } + + if (!ucs_is_thp_enabled()) { + break; + } + + huge_page_size = ucs_get_huge_page_size(); + if (huge_page_size <= 0) { + break; + } + + alloc_length = ucs_align_up(min_length, huge_page_size); + if (alloc_length >= 2 * min_length) { + break; + } + + ret = ucs_posix_memalign(&address, huge_page_size, alloc_length + UCS_MEMTRACK_VAL); + if (ret != 0) { + ucs_trace("failed to allocate %zu bytes using THP: %m", alloc_length); + } else { + ret = madvise(address, alloc_length, MADV_HUGEPAGE); + if (ret != 0) { + ucs_trace("madvise(address=%p, length=%zu, HUGEPAGE) " + "returned %d: %m", address, alloc_length, ret); + ucs_free(address); + } else { + goto allocated_without_md; + } + } +#endif + break; + + case UCT_ALLOC_METHOD_HEAP: + /* Allocate aligned memory using libc allocator */ + + /* Fixed option is not supported for heap allocation*/ + if (flags & UCT_MD_MEM_FLAG_FIXED) { + break; + } + + alloc_length = min_length; + ret = ucs_posix_memalign(&address, UCS_SYS_CACHE_LINE_SIZE, + alloc_length UCS_MEMTRACK_VAL); + if (ret == 0) { + goto allocated_without_md; + } + + ucs_trace("failed to allocate %zu bytes from the heap", alloc_length); + break; + + case UCT_ALLOC_METHOD_MMAP: + /* Request memory from operating system using mmap() */ + alloc_length = min_length; + address = addr; + + status = ucs_mmap_alloc(&alloc_length, &address, + uct_mem_get_mmap_flags(flags) + UCS_MEMTRACK_VAL); + if (status== UCS_OK) { + goto allocated_without_md; + } + + ucs_trace("failed to mmap %zu bytes: %s", min_length, + ucs_status_string(status)); + break; + + case UCT_ALLOC_METHOD_HUGE: +#ifdef SHM_HUGETLB + /* Allocate huge pages */ + alloc_length = min_length; + address = (flags & UCT_MD_MEM_FLAG_FIXED) ? addr : NULL; + status = ucs_sysv_alloc(&alloc_length, min_length * 2, &address, + SHM_HUGETLB, alloc_name, &shmid); + if (status == UCS_OK) { + goto allocated_without_md; + } +#else + status = UCS_ERR_NO_MEMORY; +#endif + + ucs_trace("failed to allocate %zu bytes from hugetlb: %s", + min_length, ucs_status_string(status)); + break; + + default: + ucs_error("Invalid allocation method %d", *method); + return UCS_ERR_INVALID_PARAM; + } + } + + ucs_debug("Could not allocate memory with any of the provided methods"); + return UCS_ERR_NO_MEMORY; + +allocated_without_md: + mem->md = NULL; + mem->mem_type = UCS_MEMORY_TYPE_HOST; + mem->memh = UCT_MEM_HANDLE_NULL; +allocated: + ucs_trace("allocated %zu bytes at %p using %s", alloc_length, address, + (mem->md == NULL) ? uct_alloc_method_names[*method] + : mem->md->component->name); + mem->address = address; + mem->length = alloc_length; + mem->method = *method; + return UCS_OK; +} + +ucs_status_t uct_mem_free(const uct_allocated_memory_t *mem) +{ + switch (mem->method) { + case UCT_ALLOC_METHOD_MD: + return uct_md_mem_free(mem->md, mem->memh); + + case UCT_ALLOC_METHOD_THP: + case UCT_ALLOC_METHOD_HEAP: + ucs_free(mem->address); + return UCS_OK; + + case UCT_ALLOC_METHOD_MMAP: + return ucs_mmap_free(mem->address, mem->length); + + case UCT_ALLOC_METHOD_HUGE: + return ucs_sysv_free(mem->address); + + default: + ucs_warn("Invalid memory allocation method: %d", mem->method); + return UCS_ERR_INVALID_PARAM; + } +} + +ucs_status_t uct_iface_mem_alloc(uct_iface_h tl_iface, size_t length, unsigned flags, + const char *name, uct_allocated_memory_t *mem) +{ + uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t); + uct_md_attr_t md_attr; + ucs_status_t status; + + status = uct_mem_alloc(NULL, length, UCT_MD_MEM_ACCESS_ALL, + iface->config.alloc_methods, + iface->config.num_alloc_methods, &iface->md, 1, + name, mem); + if (status != UCS_OK) { + goto err; + } + + /* If the memory was not allocated using MD, register it */ + if (mem->method != UCT_ALLOC_METHOD_MD) { + + status = uct_md_query(iface->md, &md_attr); + if (status != UCS_OK) { + goto err_free; + } + + /* If MD does not support registration, allow only the MD method */ + if ((md_attr.cap.flags & UCT_MD_FLAG_REG) && + (md_attr.cap.reg_mem_types & UCS_BIT(mem->mem_type))) { + status = uct_md_mem_reg(iface->md, mem->address, mem->length, flags, + &mem->memh); + if (status != UCS_OK) { + goto err_free; + } + + ucs_assert(mem->memh != UCT_MEM_HANDLE_NULL); + } else { + mem->memh = UCT_MEM_HANDLE_NULL; + } + + mem->md = iface->md; + } + + return UCS_OK; + +err_free: + uct_mem_free(mem); +err: + return status; +} + +void uct_iface_mem_free(const uct_allocated_memory_t *mem) +{ + if ((mem->method != UCT_ALLOC_METHOD_MD) && + (mem->memh != UCT_MEM_HANDLE_NULL)) + { + (void)uct_md_mem_dereg(mem->md, mem->memh); + } + uct_mem_free(mem); +} + +static inline uct_iface_mp_priv_t* uct_iface_mp_priv(ucs_mpool_t *mp) +{ + return (uct_iface_mp_priv_t*)ucs_mpool_priv(mp); +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_iface_mp_chunk_alloc, (mp, size_p, chunk_p), + ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +{ + uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface; + uct_iface_mp_chunk_hdr_t *hdr; + uct_allocated_memory_t mem; + ucs_status_t status; + size_t length; + + length = sizeof(*hdr) + *size_p; + status = uct_iface_mem_alloc(&iface->super, length, + UCT_MD_MEM_ACCESS_ALL | UCT_MD_MEM_FLAG_LOCK, + ucs_mpool_name(mp), &mem); + if (status != UCS_OK) { + return status; + } + + ucs_assert(mem.memh != UCT_MEM_HANDLE_NULL); + ucs_assert(mem.md == iface->md); + + hdr = mem.address; + hdr->method = mem.method; + hdr->length = mem.length; + hdr->memh = mem.memh; + *size_p = mem.length - sizeof(*hdr); + *chunk_p = hdr + 1; + return UCS_OK; +} + +UCS_PROFILE_FUNC_VOID(uct_iface_mp_chunk_release, (mp, chunk), + ucs_mpool_t *mp, void *chunk) +{ + uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface; + uct_iface_mp_chunk_hdr_t *hdr; + uct_allocated_memory_t mem; + + hdr = UCS_PTR_BYTE_OFFSET(chunk, -sizeof(*hdr)); + + mem.address = hdr; + mem.method = hdr->method; + mem.memh = hdr->memh; + mem.length = hdr->length; + mem.md = iface->md; + + uct_iface_mem_free(&mem); +} + +static void uct_iface_mp_obj_init(ucs_mpool_t *mp, void *obj, void *chunk) +{ + uct_base_iface_t *iface = uct_iface_mp_priv(mp)->iface; + uct_iface_mpool_init_obj_cb_t init_obj_cb; + uct_iface_mp_chunk_hdr_t *hdr; + + init_obj_cb = uct_iface_mp_priv(mp)->init_obj_cb; + hdr = UCS_PTR_BYTE_OFFSET(chunk, -sizeof(*hdr)); + if (init_obj_cb != NULL) { + init_obj_cb(&iface->super, obj, hdr->memh); + } +} + +static ucs_mpool_ops_t uct_iface_mpool_ops = { + .chunk_alloc = uct_iface_mp_chunk_alloc, + .chunk_release = uct_iface_mp_chunk_release, + .obj_init = uct_iface_mp_obj_init, + .obj_cleanup = NULL +}; + +ucs_status_t uct_iface_mpool_init(uct_base_iface_t *iface, ucs_mpool_t *mp, + size_t elem_size, size_t align_offset, size_t alignment, + const uct_iface_mpool_config_t *config, unsigned grow, + uct_iface_mpool_init_obj_cb_t init_obj_cb, + const char *name) +{ + unsigned elems_per_chunk; + ucs_status_t status; + + elems_per_chunk = (config->bufs_grow != 0) ? config->bufs_grow : grow; + status = ucs_mpool_init(mp, sizeof(uct_iface_mp_priv_t), + elem_size, align_offset, alignment, + elems_per_chunk, config->max_bufs, + &uct_iface_mpool_ops, name); + if (status != UCS_OK) { + return status; + } + + uct_iface_mp_priv(mp)->iface = iface; + uct_iface_mp_priv(mp)->init_obj_cb = init_obj_cb; + return UCS_OK; +} diff --git a/src/uct/base/uct_worker.c b/src/uct/base/uct_worker.c new file mode 100644 index 0000000..5f7ec09 --- /dev/null +++ b/src/uct/base/uct_worker.c @@ -0,0 +1,131 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * Copyright (C) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED. + * Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "uct_worker.h" + +#include +#include +#include + + +static UCS_CLASS_INIT_FUNC(uct_worker_t) +{ + ucs_callbackq_init(&self->progress_q); + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_worker_t) +{ + ucs_callbackq_cleanup(&self->progress_q); +} + +UCS_CLASS_DEFINE(uct_worker_t, void); + +static UCS_CLASS_INIT_FUNC(uct_priv_worker_t, ucs_async_context_t *async, + ucs_thread_mode_t thread_mode) +{ + UCS_CLASS_CALL_SUPER_INIT(uct_worker_t); + + if (async == NULL) { + return UCS_ERR_INVALID_PARAM; + } + + self->async = async; + self->thread_mode = thread_mode; + ucs_list_head_init(&self->tl_data); + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_priv_worker_t) +{ +} + +UCS_CLASS_DEFINE(uct_priv_worker_t, uct_worker_t); + +UCS_CLASS_DEFINE_NAMED_NEW_FUNC(uct_worker_create, uct_priv_worker_t, uct_worker_t, + ucs_async_context_t*, ucs_thread_mode_t) +UCS_CLASS_DEFINE_NAMED_DELETE_FUNC(uct_worker_destroy, uct_priv_worker_t, uct_worker_t) + +void uct_worker_progress_init(uct_worker_progress_t *prog) +{ + prog->id = UCS_CALLBACKQ_ID_NULL; + prog->refcount = 0; +} + +void uct_worker_progress_add_safe(uct_priv_worker_t *worker, ucs_callback_t cb, + void *arg, uct_worker_progress_t *prog) +{ + UCS_ASYNC_BLOCK(worker->async); + if (ucs_atomic_fadd32(&prog->refcount, 1) == 0) { + prog->id = ucs_callbackq_add_safe(&worker->super.progress_q, cb, arg, + UCS_CALLBACKQ_FLAG_FAST); + } + UCS_ASYNC_UNBLOCK(worker->async); +} + +void uct_worker_progress_remove(uct_priv_worker_t *worker, uct_worker_progress_t *prog) +{ + UCS_ASYNC_BLOCK(worker->async); + ucs_assert(prog->refcount > 0); + if (ucs_atomic_fsub32(&prog->refcount, 1) == 1) { + ucs_callbackq_remove(&worker->super.progress_q, prog->id); + prog->id = UCS_CALLBACKQ_ID_NULL; + } + UCS_ASYNC_UNBLOCK(worker->async); +} + +void uct_worker_progress_remove_all(uct_priv_worker_t *worker, + uct_worker_progress_t *prog) +{ + uint32_t ref; + + UCS_ASYNC_BLOCK(worker->async); + ref = prog->refcount; + while (ref > 0) { + if (ucs_atomic_cswap32(&prog->refcount, ref, 0) == ref) { + ucs_callbackq_remove(&worker->super.progress_q, prog->id); + prog->id = UCS_CALLBACKQ_ID_NULL; + break; /* coverity thinks that `UCS_CALLBACKQ_ID_NULL` + * can be passed to `ucs_callbackq_remove()` + * make coverity happy - return from the loop */ + } + ref = prog->refcount; + } + UCS_ASYNC_UNBLOCK(worker->async); +} + +void uct_worker_progress_register_safe(uct_worker_h tl_worker, ucs_callback_t func, + void *arg, unsigned flags, + uct_worker_cb_id_t *id_p) +{ + uct_priv_worker_t *worker = ucs_derived_of(tl_worker, uct_priv_worker_t); + + if (*id_p == UCS_CALLBACKQ_ID_NULL) { + UCS_ASYNC_BLOCK(worker->async); + *id_p = ucs_callbackq_add_safe(&worker->super.progress_q, func, arg, flags); + ucs_assert(*id_p != UCS_CALLBACKQ_ID_NULL); + UCS_ASYNC_UNBLOCK(worker->async); + } +} + +void uct_worker_progress_unregister_safe(uct_worker_h tl_worker, + uct_worker_cb_id_t *id_p) +{ + uct_priv_worker_t *worker = ucs_derived_of(tl_worker, uct_priv_worker_t); + + if (*id_p != UCS_CALLBACKQ_ID_NULL) { + UCS_ASYNC_BLOCK(worker->async); + ucs_callbackq_remove_safe(&worker->super.progress_q, *id_p); + UCS_ASYNC_UNBLOCK(worker->async); + *id_p = UCS_CALLBACKQ_ID_NULL; + } +} diff --git a/src/uct/base/uct_worker.h b/src/uct/base/uct_worker.h new file mode 100644 index 0000000..8d7fc2e --- /dev/null +++ b/src/uct/base/uct_worker.h @@ -0,0 +1,98 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCT_WORKER_H_ +#define UCT_WORKER_H_ + +#include +#include +#include + + +/** + * Transport-specific data on a worker + */ +typedef struct uct_worker_tl_data { + ucs_list_link_t list; + uint32_t refcount; + uint32_t key; +} uct_worker_tl_data_t; + + +typedef struct uct_priv_worker { + uct_worker_t super; + ucs_async_context_t *async; + ucs_thread_mode_t thread_mode; + ucs_list_link_t tl_data; +} uct_priv_worker_t; + + +typedef struct uct_worker_progress { + uct_worker_cb_id_t id; + uint32_t refcount; +} uct_worker_progress_t; + + +#define uct_worker_tl_data_get(_worker, _key, _type, _cmp_fn, _init_fn, ...) \ + ({ \ + uct_worker_tl_data_t *data; \ + _type *result; \ + ucs_status_t status; \ + \ + ucs_list_for_each(data, &(_worker)->tl_data, list) { \ + if ((data->key == (_key)) && _cmp_fn(ucs_derived_of(data, _type), \ + ## __VA_ARGS__)) \ + { \ + ++data->refcount; \ + break; \ + } \ + } \ + \ + if (&data->list == &(_worker)->tl_data) { /* not found */ \ + result = ucs_malloc(sizeof(_type), UCS_PP_QUOTE(_type)); \ + if (result == NULL) { \ + result = UCS_STATUS_PTR(UCS_ERR_NO_MEMORY); \ + } else { \ + data = (uct_worker_tl_data_t*)result;\ + data->key = (_key); \ + data->refcount = 1; \ + status = _init_fn(ucs_derived_of(data, _type), ## __VA_ARGS__); \ + if (status != UCS_OK) { \ + ucs_free(result); \ + result = UCS_STATUS_PTR(status); \ + } else { \ + ucs_list_add_tail(&(_worker)->tl_data, &data->list); \ + } \ + } \ + } else { \ + result = ucs_derived_of(data, _type); \ + } \ + result; \ + }) + + +#define uct_worker_tl_data_put(_data, _cleanup_fn, ...) \ + { \ + uct_worker_tl_data_t *data = (uct_worker_tl_data_t*)(_data); \ + if (--data->refcount == 0) { \ + ucs_list_del(&data->list); \ + _cleanup_fn((_data), ## __VA_ARGS__); \ + ucs_free(data); \ + } \ + } + + +void uct_worker_progress_init(uct_worker_progress_t *prog); + +void uct_worker_progress_add_safe(uct_priv_worker_t *worker, ucs_callback_t cb, + void *arg, uct_worker_progress_t *prog); + +void uct_worker_progress_remove_all(uct_priv_worker_t *worker, + uct_worker_progress_t *prog); + +void uct_worker_progress_remove(uct_priv_worker_t *worker, uct_worker_progress_t *prog); + +#endif diff --git a/src/uct/configure.m4 b/src/uct/configure.m4 new file mode 100644 index 0000000..338b257 --- /dev/null +++ b/src/uct/configure.m4 @@ -0,0 +1,15 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +uct_modules="" +m4_include([src/uct/cuda/configure.m4]) +m4_include([src/uct/ib/configure.m4]) +m4_include([src/uct/rocm/configure.m4]) +m4_include([src/uct/sm/configure.m4]) +m4_include([src/uct/ugni/configure.m4]) + +AC_DEFINE_UNQUOTED([uct_MODULES], ["${uct_modules}"], [UCT loadable modules]) + +AC_CONFIG_FILES([src/uct/Makefile]) diff --git a/src/uct/cuda/Makefile.am b/src/uct/cuda/Makefile.am new file mode 100644 index 0000000..0992bb4 --- /dev/null +++ b/src/uct/cuda/Makefile.am @@ -0,0 +1,41 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_CUDA + +SUBDIRS = . gdr_copy + +module_LTLIBRARIES = libuct_cuda.la +libuct_cuda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS) +libuct_cuda_la_CFLAGS = $(BASE_CFLAGS) $(CUDA_CFLAGS) +libuct_cuda_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la +libuct_cuda_la_LDFLAGS = $(CUDA_LDFLAGS) -version-info $(SOVERSION) + +noinst_HEADERS = \ + base/cuda_md.h \ + base/cuda_iface.h \ + cuda_copy/cuda_copy_md.h \ + cuda_copy/cuda_copy_iface.h \ + cuda_copy/cuda_copy_ep.h \ + cuda_ipc/cuda_ipc_md.h \ + cuda_ipc/cuda_ipc_iface.h \ + cuda_ipc/cuda_ipc_ep.h \ + cuda_ipc/cuda_ipc_cache.h + +libuct_cuda_la_SOURCES = \ + base/cuda_iface.c \ + base/cuda_md.c \ + cuda_copy/cuda_copy_md.c \ + cuda_copy/cuda_copy_iface.c \ + cuda_copy/cuda_copy_ep.c \ + cuda_ipc/cuda_ipc_md.c \ + cuda_ipc/cuda_ipc_iface.c \ + cuda_ipc/cuda_ipc_ep.c \ + cuda_ipc/cuda_ipc_cache.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/uct/cuda/Makefile.in b/src/uct/cuda/Makefile.in new file mode 100644 index 0000000..c4abf47 --- /dev/null +++ b/src/uct/cuda/Makefile.in @@ -0,0 +1,1146 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/cuda +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_CUDA_TRUE@libuct_cuda_la_DEPENDENCIES = \ +@HAVE_CUDA_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_CUDA_TRUE@ $(top_builddir)/src/uct/libuct.la +am__libuct_cuda_la_SOURCES_DIST = base/cuda_iface.c base/cuda_md.c \ + cuda_copy/cuda_copy_md.c cuda_copy/cuda_copy_iface.c \ + cuda_copy/cuda_copy_ep.c cuda_ipc/cuda_ipc_md.c \ + cuda_ipc/cuda_ipc_iface.c cuda_ipc/cuda_ipc_ep.c \ + cuda_ipc/cuda_ipc_cache.c +am__dirstamp = $(am__leading_dot)dirstamp +@HAVE_CUDA_TRUE@am_libuct_cuda_la_OBJECTS = \ +@HAVE_CUDA_TRUE@ base/libuct_cuda_la-cuda_iface.lo \ +@HAVE_CUDA_TRUE@ base/libuct_cuda_la-cuda_md.lo \ +@HAVE_CUDA_TRUE@ cuda_copy/libuct_cuda_la-cuda_copy_md.lo \ +@HAVE_CUDA_TRUE@ cuda_copy/libuct_cuda_la-cuda_copy_iface.lo \ +@HAVE_CUDA_TRUE@ cuda_copy/libuct_cuda_la-cuda_copy_ep.lo \ +@HAVE_CUDA_TRUE@ cuda_ipc/libuct_cuda_la-cuda_ipc_md.lo \ +@HAVE_CUDA_TRUE@ cuda_ipc/libuct_cuda_la-cuda_ipc_iface.lo \ +@HAVE_CUDA_TRUE@ cuda_ipc/libuct_cuda_la-cuda_ipc_ep.lo \ +@HAVE_CUDA_TRUE@ cuda_ipc/libuct_cuda_la-cuda_ipc_cache.lo +libuct_cuda_la_OBJECTS = $(am_libuct_cuda_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_cuda_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libuct_cuda_la_CFLAGS) $(CFLAGS) $(libuct_cuda_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_CUDA_TRUE@am_libuct_cuda_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = base/$(DEPDIR)/libuct_cuda_la-cuda_iface.Plo \ + base/$(DEPDIR)/libuct_cuda_la-cuda_md.Plo \ + cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_ep.Plo \ + cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_iface.Plo \ + cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_md.Plo \ + cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_cache.Plo \ + cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_ep.Plo \ + cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_iface.Plo \ + cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_md.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_cuda_la_SOURCES) +DIST_SOURCES = $(am__libuct_cuda_la_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = base/cuda_md.h base/cuda_iface.h \ + cuda_copy/cuda_copy_md.h cuda_copy/cuda_copy_iface.h \ + cuda_copy/cuda_copy_ep.h cuda_ipc/cuda_ipc_md.h \ + cuda_ipc/cuda_ipc_iface.h cuda_ipc/cuda_ipc_ep.h \ + cuda_ipc/cuda_ipc_cache.h +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = . gdr_copy +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_CUDA_TRUE@SUBDIRS = . gdr_copy +@HAVE_CUDA_TRUE@module_LTLIBRARIES = libuct_cuda.la +@HAVE_CUDA_TRUE@libuct_cuda_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS) +@HAVE_CUDA_TRUE@libuct_cuda_la_CFLAGS = $(BASE_CFLAGS) $(CUDA_CFLAGS) +@HAVE_CUDA_TRUE@libuct_cuda_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_CUDA_TRUE@ $(top_builddir)/src/uct/libuct.la + +@HAVE_CUDA_TRUE@libuct_cuda_la_LDFLAGS = $(CUDA_LDFLAGS) -version-info $(SOVERSION) +@HAVE_CUDA_TRUE@noinst_HEADERS = \ +@HAVE_CUDA_TRUE@ base/cuda_md.h \ +@HAVE_CUDA_TRUE@ base/cuda_iface.h \ +@HAVE_CUDA_TRUE@ cuda_copy/cuda_copy_md.h \ +@HAVE_CUDA_TRUE@ cuda_copy/cuda_copy_iface.h \ +@HAVE_CUDA_TRUE@ cuda_copy/cuda_copy_ep.h \ +@HAVE_CUDA_TRUE@ cuda_ipc/cuda_ipc_md.h \ +@HAVE_CUDA_TRUE@ cuda_ipc/cuda_ipc_iface.h \ +@HAVE_CUDA_TRUE@ cuda_ipc/cuda_ipc_ep.h \ +@HAVE_CUDA_TRUE@ cuda_ipc/cuda_ipc_cache.h + +@HAVE_CUDA_TRUE@libuct_cuda_la_SOURCES = \ +@HAVE_CUDA_TRUE@ base/cuda_iface.c \ +@HAVE_CUDA_TRUE@ base/cuda_md.c \ +@HAVE_CUDA_TRUE@ cuda_copy/cuda_copy_md.c \ +@HAVE_CUDA_TRUE@ cuda_copy/cuda_copy_iface.c \ +@HAVE_CUDA_TRUE@ cuda_copy/cuda_copy_ep.c \ +@HAVE_CUDA_TRUE@ cuda_ipc/cuda_ipc_md.c \ +@HAVE_CUDA_TRUE@ cuda_ipc/cuda_ipc_iface.c \ +@HAVE_CUDA_TRUE@ cuda_ipc/cuda_ipc_ep.c \ +@HAVE_CUDA_TRUE@ cuda_ipc/cuda_ipc_cache.c + + +# Automake silent rules +@HAVE_CUDA_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_CUDA_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_CUDA_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_CUDA_TRUE@AM_V_LN_1 = true +@HAVE_CUDA_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/cuda/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/cuda/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +base/$(am__dirstamp): + @$(MKDIR_P) base + @: > base/$(am__dirstamp) +base/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) base/$(DEPDIR) + @: > base/$(DEPDIR)/$(am__dirstamp) +base/libuct_cuda_la-cuda_iface.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_cuda_la-cuda_md.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +cuda_copy/$(am__dirstamp): + @$(MKDIR_P) cuda_copy + @: > cuda_copy/$(am__dirstamp) +cuda_copy/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) cuda_copy/$(DEPDIR) + @: > cuda_copy/$(DEPDIR)/$(am__dirstamp) +cuda_copy/libuct_cuda_la-cuda_copy_md.lo: cuda_copy/$(am__dirstamp) \ + cuda_copy/$(DEPDIR)/$(am__dirstamp) +cuda_copy/libuct_cuda_la-cuda_copy_iface.lo: \ + cuda_copy/$(am__dirstamp) cuda_copy/$(DEPDIR)/$(am__dirstamp) +cuda_copy/libuct_cuda_la-cuda_copy_ep.lo: cuda_copy/$(am__dirstamp) \ + cuda_copy/$(DEPDIR)/$(am__dirstamp) +cuda_ipc/$(am__dirstamp): + @$(MKDIR_P) cuda_ipc + @: > cuda_ipc/$(am__dirstamp) +cuda_ipc/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) cuda_ipc/$(DEPDIR) + @: > cuda_ipc/$(DEPDIR)/$(am__dirstamp) +cuda_ipc/libuct_cuda_la-cuda_ipc_md.lo: cuda_ipc/$(am__dirstamp) \ + cuda_ipc/$(DEPDIR)/$(am__dirstamp) +cuda_ipc/libuct_cuda_la-cuda_ipc_iface.lo: cuda_ipc/$(am__dirstamp) \ + cuda_ipc/$(DEPDIR)/$(am__dirstamp) +cuda_ipc/libuct_cuda_la-cuda_ipc_ep.lo: cuda_ipc/$(am__dirstamp) \ + cuda_ipc/$(DEPDIR)/$(am__dirstamp) +cuda_ipc/libuct_cuda_la-cuda_ipc_cache.lo: cuda_ipc/$(am__dirstamp) \ + cuda_ipc/$(DEPDIR)/$(am__dirstamp) + +libuct_cuda.la: $(libuct_cuda_la_OBJECTS) $(libuct_cuda_la_DEPENDENCIES) $(EXTRA_libuct_cuda_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_cuda_la_LINK) $(am_libuct_cuda_la_rpath) $(libuct_cuda_la_OBJECTS) $(libuct_cuda_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f base/*.$(OBJEXT) + -rm -f base/*.lo + -rm -f cuda_copy/*.$(OBJEXT) + -rm -f cuda_copy/*.lo + -rm -f cuda_ipc/*.$(OBJEXT) + -rm -f cuda_ipc/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_cuda_la-cuda_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_cuda_la-cuda_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_cache.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_md.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +base/libuct_cuda_la-cuda_iface.lo: base/cuda_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -MT base/libuct_cuda_la-cuda_iface.lo -MD -MP -MF base/$(DEPDIR)/libuct_cuda_la-cuda_iface.Tpo -c -o base/libuct_cuda_la-cuda_iface.lo `test -f 'base/cuda_iface.c' || echo '$(srcdir)/'`base/cuda_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_cuda_la-cuda_iface.Tpo base/$(DEPDIR)/libuct_cuda_la-cuda_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/cuda_iface.c' object='base/libuct_cuda_la-cuda_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -c -o base/libuct_cuda_la-cuda_iface.lo `test -f 'base/cuda_iface.c' || echo '$(srcdir)/'`base/cuda_iface.c + +base/libuct_cuda_la-cuda_md.lo: base/cuda_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -MT base/libuct_cuda_la-cuda_md.lo -MD -MP -MF base/$(DEPDIR)/libuct_cuda_la-cuda_md.Tpo -c -o base/libuct_cuda_la-cuda_md.lo `test -f 'base/cuda_md.c' || echo '$(srcdir)/'`base/cuda_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_cuda_la-cuda_md.Tpo base/$(DEPDIR)/libuct_cuda_la-cuda_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/cuda_md.c' object='base/libuct_cuda_la-cuda_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -c -o base/libuct_cuda_la-cuda_md.lo `test -f 'base/cuda_md.c' || echo '$(srcdir)/'`base/cuda_md.c + +cuda_copy/libuct_cuda_la-cuda_copy_md.lo: cuda_copy/cuda_copy_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -MT cuda_copy/libuct_cuda_la-cuda_copy_md.lo -MD -MP -MF cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_md.Tpo -c -o cuda_copy/libuct_cuda_la-cuda_copy_md.lo `test -f 'cuda_copy/cuda_copy_md.c' || echo '$(srcdir)/'`cuda_copy/cuda_copy_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_md.Tpo cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cuda_copy/cuda_copy_md.c' object='cuda_copy/libuct_cuda_la-cuda_copy_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -c -o cuda_copy/libuct_cuda_la-cuda_copy_md.lo `test -f 'cuda_copy/cuda_copy_md.c' || echo '$(srcdir)/'`cuda_copy/cuda_copy_md.c + +cuda_copy/libuct_cuda_la-cuda_copy_iface.lo: cuda_copy/cuda_copy_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -MT cuda_copy/libuct_cuda_la-cuda_copy_iface.lo -MD -MP -MF cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_iface.Tpo -c -o cuda_copy/libuct_cuda_la-cuda_copy_iface.lo `test -f 'cuda_copy/cuda_copy_iface.c' || echo '$(srcdir)/'`cuda_copy/cuda_copy_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_iface.Tpo cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cuda_copy/cuda_copy_iface.c' object='cuda_copy/libuct_cuda_la-cuda_copy_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -c -o cuda_copy/libuct_cuda_la-cuda_copy_iface.lo `test -f 'cuda_copy/cuda_copy_iface.c' || echo '$(srcdir)/'`cuda_copy/cuda_copy_iface.c + +cuda_copy/libuct_cuda_la-cuda_copy_ep.lo: cuda_copy/cuda_copy_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -MT cuda_copy/libuct_cuda_la-cuda_copy_ep.lo -MD -MP -MF cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_ep.Tpo -c -o cuda_copy/libuct_cuda_la-cuda_copy_ep.lo `test -f 'cuda_copy/cuda_copy_ep.c' || echo '$(srcdir)/'`cuda_copy/cuda_copy_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_ep.Tpo cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cuda_copy/cuda_copy_ep.c' object='cuda_copy/libuct_cuda_la-cuda_copy_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -c -o cuda_copy/libuct_cuda_la-cuda_copy_ep.lo `test -f 'cuda_copy/cuda_copy_ep.c' || echo '$(srcdir)/'`cuda_copy/cuda_copy_ep.c + +cuda_ipc/libuct_cuda_la-cuda_ipc_md.lo: cuda_ipc/cuda_ipc_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -MT cuda_ipc/libuct_cuda_la-cuda_ipc_md.lo -MD -MP -MF cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_md.Tpo -c -o cuda_ipc/libuct_cuda_la-cuda_ipc_md.lo `test -f 'cuda_ipc/cuda_ipc_md.c' || echo '$(srcdir)/'`cuda_ipc/cuda_ipc_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_md.Tpo cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cuda_ipc/cuda_ipc_md.c' object='cuda_ipc/libuct_cuda_la-cuda_ipc_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -c -o cuda_ipc/libuct_cuda_la-cuda_ipc_md.lo `test -f 'cuda_ipc/cuda_ipc_md.c' || echo '$(srcdir)/'`cuda_ipc/cuda_ipc_md.c + +cuda_ipc/libuct_cuda_la-cuda_ipc_iface.lo: cuda_ipc/cuda_ipc_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -MT cuda_ipc/libuct_cuda_la-cuda_ipc_iface.lo -MD -MP -MF cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_iface.Tpo -c -o cuda_ipc/libuct_cuda_la-cuda_ipc_iface.lo `test -f 'cuda_ipc/cuda_ipc_iface.c' || echo '$(srcdir)/'`cuda_ipc/cuda_ipc_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_iface.Tpo cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cuda_ipc/cuda_ipc_iface.c' object='cuda_ipc/libuct_cuda_la-cuda_ipc_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -c -o cuda_ipc/libuct_cuda_la-cuda_ipc_iface.lo `test -f 'cuda_ipc/cuda_ipc_iface.c' || echo '$(srcdir)/'`cuda_ipc/cuda_ipc_iface.c + +cuda_ipc/libuct_cuda_la-cuda_ipc_ep.lo: cuda_ipc/cuda_ipc_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -MT cuda_ipc/libuct_cuda_la-cuda_ipc_ep.lo -MD -MP -MF cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_ep.Tpo -c -o cuda_ipc/libuct_cuda_la-cuda_ipc_ep.lo `test -f 'cuda_ipc/cuda_ipc_ep.c' || echo '$(srcdir)/'`cuda_ipc/cuda_ipc_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_ep.Tpo cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cuda_ipc/cuda_ipc_ep.c' object='cuda_ipc/libuct_cuda_la-cuda_ipc_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -c -o cuda_ipc/libuct_cuda_la-cuda_ipc_ep.lo `test -f 'cuda_ipc/cuda_ipc_ep.c' || echo '$(srcdir)/'`cuda_ipc/cuda_ipc_ep.c + +cuda_ipc/libuct_cuda_la-cuda_ipc_cache.lo: cuda_ipc/cuda_ipc_cache.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -MT cuda_ipc/libuct_cuda_la-cuda_ipc_cache.lo -MD -MP -MF cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_cache.Tpo -c -o cuda_ipc/libuct_cuda_la-cuda_ipc_cache.lo `test -f 'cuda_ipc/cuda_ipc_cache.c' || echo '$(srcdir)/'`cuda_ipc/cuda_ipc_cache.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_cache.Tpo cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_cache.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cuda_ipc/cuda_ipc_cache.c' object='cuda_ipc/libuct_cuda_la-cuda_ipc_cache.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_la_CFLAGS) $(CFLAGS) -c -o cuda_ipc/libuct_cuda_la-cuda_ipc_cache.lo `test -f 'cuda_ipc/cuda_ipc_cache.c' || echo '$(srcdir)/'`cuda_ipc/cuda_ipc_cache.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf base/.libs base/_libs + -rm -rf cuda_copy/.libs cuda_copy/_libs + -rm -rf cuda_ipc/.libs cuda_ipc/_libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +@HAVE_CUDA_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f base/$(DEPDIR)/$(am__dirstamp) + -rm -f base/$(am__dirstamp) + -rm -f cuda_copy/$(DEPDIR)/$(am__dirstamp) + -rm -f cuda_copy/$(am__dirstamp) + -rm -f cuda_ipc/$(DEPDIR)/$(am__dirstamp) + -rm -f cuda_ipc/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f base/$(DEPDIR)/libuct_cuda_la-cuda_iface.Plo + -rm -f base/$(DEPDIR)/libuct_cuda_la-cuda_md.Plo + -rm -f cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_ep.Plo + -rm -f cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_iface.Plo + -rm -f cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_md.Plo + -rm -f cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_cache.Plo + -rm -f cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_ep.Plo + -rm -f cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_iface.Plo + -rm -f cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_md.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f base/$(DEPDIR)/libuct_cuda_la-cuda_iface.Plo + -rm -f base/$(DEPDIR)/libuct_cuda_la-cuda_md.Plo + -rm -f cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_ep.Plo + -rm -f cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_iface.Plo + -rm -f cuda_copy/$(DEPDIR)/libuct_cuda_la-cuda_copy_md.Plo + -rm -f cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_cache.Plo + -rm -f cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_ep.Plo + -rm -f cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_iface.Plo + -rm -f cuda_ipc/$(DEPDIR)/libuct_cuda_la-cuda_ipc_md.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ + am--depfiles check check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_CUDA_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_CUDA_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_CUDA_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_CUDA_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CUDA_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_CUDA_TRUE@ done +@HAVE_CUDA_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CUDA_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_CUDA_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/cuda/base/cuda_iface.c b/src/uct/cuda/base/cuda_iface.c new file mode 100644 index 0000000..8022f59 --- /dev/null +++ b/src/uct/cuda/base/cuda_iface.c @@ -0,0 +1,17 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "cuda_iface.h" + + +ucs_status_t +uct_cuda_base_query_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + return uct_single_device_resource(md, UCT_CUDA_DEV_NAME, UCT_DEVICE_TYPE_ACC, + tl_devices_p, num_tl_devices_p); +} + diff --git a/src/uct/cuda/base/cuda_iface.h b/src/uct/cuda/base/cuda_iface.h new file mode 100644 index 0000000..17088ee --- /dev/null +++ b/src/uct/cuda/base/cuda_iface.h @@ -0,0 +1,75 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_CUDA_IFACE_H +#define UCT_CUDA_IFACE_H + +#include +#include +#include +#include + + +#define UCT_CUDA_DEV_NAME "cuda" + + +#define UCT_CUDA_FUNC(_func) \ + ({ \ + ucs_status_t _status = UCS_OK; \ + do { \ + cudaError_t _result = (_func); \ + if (cudaSuccess != _result) { \ + ucs_error("%s is failed. ret:%s", \ + UCS_PP_MAKE_STRING(_func), \ + cudaGetErrorString(_result)); \ + _status = UCS_ERR_IO_ERROR; \ + } \ + } while (0); \ + _status; \ + }) + + +#define UCT_CUDADRV_FUNC(_func) \ + ({ \ + ucs_status_t _status = UCS_OK; \ + do { \ + CUresult _result = (_func); \ + const char *cu_err_str; \ + if (CUDA_ERROR_NOT_READY == _result) { \ + _status = UCS_INPROGRESS; \ + } else if (CUDA_SUCCESS != _result) { \ + cuGetErrorString(_result, &cu_err_str); \ + ucs_error("%s is failed. ret:%s", \ + UCS_PP_MAKE_STRING(_func),cu_err_str);\ + _status = UCS_ERR_IO_ERROR; \ + } \ + } while (0); \ + _status; \ + }) + + +#define UCT_CUDADRV_CTX_ACTIVE(_state) \ + { \ + CUcontext cur_ctx; \ + CUdevice dev; \ + unsigned flags; \ + \ + _state = 0; \ + /* avoid active state check if no cuda activity */ \ + if ((CUDA_SUCCESS == cuCtxGetCurrent(&cur_ctx)) && \ + (NULL != cur_ctx)) { \ + UCT_CUDADRV_FUNC(cuCtxGetDevice(&dev)); \ + UCT_CUDADRV_FUNC(cuDevicePrimaryCtxGetState(dev, \ + &flags, \ + &_state)); \ + } \ + } + + +ucs_status_t +uct_cuda_base_query_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p); + +#endif diff --git a/src/uct/cuda/base/cuda_md.c b/src/uct/cuda/base/cuda_md.c new file mode 100644 index 0000000..375c2f3 --- /dev/null +++ b/src/uct/cuda/base/cuda_md.c @@ -0,0 +1,80 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "cuda_md.h" + +#include +#include +#include +#include +#include + + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_base_detect_memory_type, + (md, addr, length, mem_type_p), + uct_md_h md, const void *addr, size_t length, + ucs_memory_type_t *mem_type_p) +{ + CUmemorytype memType = (CUmemorytype)0; + uint32_t isManaged = 0; + unsigned value = 1; + void *attrdata[] = {(void *)&memType, (void *)&isManaged}; + CUpointer_attribute attributes[2] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE, + CU_POINTER_ATTRIBUTE_IS_MANAGED}; + CUresult cu_err; + const char *cu_err_str; + + if (addr == NULL) { + *mem_type_p = UCS_MEMORY_TYPE_HOST; + return UCS_OK; + } + + cu_err = cuPointerGetAttributes(2, attributes, attrdata, (CUdeviceptr)addr); + if ((cu_err == CUDA_SUCCESS) && (memType == CU_MEMORYTYPE_DEVICE)) { + if (isManaged) { + *mem_type_p = UCS_MEMORY_TYPE_CUDA_MANAGED; + } else { + *mem_type_p = UCS_MEMORY_TYPE_CUDA; + cu_err = cuPointerSetAttribute(&value, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, + (CUdeviceptr)addr); + if (cu_err != CUDA_SUCCESS) { + cuGetErrorString(cu_err, &cu_err_str); + ucs_warn("cuPointerSetAttribute(%p) error: %s", (void*) addr, cu_err_str); + } + } + return UCS_OK; + } + + return UCS_ERR_INVALID_ADDR; +} + +ucs_status_t +uct_cuda_base_query_md_resources(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + cudaError_t cudaErr; + int num_gpus; + + cudaErr = cudaGetDeviceCount(&num_gpus); + if ((cudaErr != cudaSuccess) || (num_gpus == 0)) { + return uct_md_query_empty_md_resource(resources_p, num_resources_p); + } + + return uct_md_query_single_md_resource(component, resources_p, + num_resources_p); +} + +UCS_MODULE_INIT() { + /* TODO make gdrcopy independent of cuda */ + UCS_MODULE_FRAMEWORK_DECLARE(uct_cuda); + UCS_MODULE_FRAMEWORK_LOAD(uct_cuda, 0); + return UCS_OK; +} diff --git a/src/uct/cuda/base/cuda_md.h b/src/uct/cuda/base/cuda_md.h new file mode 100644 index 0000000..f68d703 --- /dev/null +++ b/src/uct/cuda/base/cuda_md.h @@ -0,0 +1,20 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_CUDA_MD_H +#define UCT_CUDA_MD_H + +#include + +ucs_status_t uct_cuda_base_detect_memory_type(uct_md_h md, const void *addr, + size_t length, + ucs_memory_type_t *mem_type_p); + +ucs_status_t +uct_cuda_base_query_md_resources(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p); + +#endif diff --git a/src/uct/cuda/configure.m4 b/src/uct/cuda/configure.m4 new file mode 100644 index 0000000..f3f9421 --- /dev/null +++ b/src/uct/cuda/configure.m4 @@ -0,0 +1,12 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +UCX_CHECK_CUDA + +AS_IF([test "x$cuda_happy" = "xyes"], [uct_modules="${uct_modules}:cuda"]) +uct_cuda_modules="" +m4_include([src/uct/cuda/gdr_copy/configure.m4]) +AC_DEFINE_UNQUOTED([uct_cuda_MODULES], ["${uct_cuda_modules}"], [CUDA loadable modules]) +AC_CONFIG_FILES([src/uct/cuda/Makefile]) diff --git a/src/uct/cuda/cuda_copy/cuda_copy_ep.c b/src/uct/cuda/cuda_copy/cuda_copy_ep.c new file mode 100644 index 0000000..0bf9962 --- /dev/null +++ b/src/uct/cuda/cuda_copy/cuda_copy_ep.c @@ -0,0 +1,175 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "cuda_copy_ep.h" +#include "cuda_copy_iface.h" + +#include +#include +#include +#include +#include + + +static UCS_CLASS_INIT_FUNC(uct_cuda_copy_ep_t, const uct_ep_params_t *params) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(params->iface, + uct_cuda_copy_iface_t); + + UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params); + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_cuda_copy_ep_t) +{ +} + +UCS_CLASS_DEFINE(uct_cuda_copy_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_cuda_copy_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_cuda_copy_ep_t, uct_ep_t); + +#define uct_cuda_copy_trace_data(_remote_addr, _rkey, _fmt, ...) \ + ucs_trace_data(_fmt " to %"PRIx64"(%+ld)", ## __VA_ARGS__, (_remote_addr), \ + (_rkey)) + +#define UCT_CUDA_COPY_CHECK_AND_CREATE_STREAM(_strm) \ + if ((_strm) == 0) { \ + ucs_status_t status; \ + status = UCT_CUDA_FUNC(cudaStreamCreateWithFlags(&(_strm), cudaStreamNonBlocking)); \ + if (UCS_OK != status) { \ + return UCS_ERR_IO_ERROR; \ + } \ + } + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_cuda_copy_post_cuda_async_copy(uct_ep_h tl_ep, void *dst, void *src, size_t length, + enum cudaMemcpyKind direction, cudaStream_t stream, + ucs_queue_head_t *outstanding_queue, + uct_completion_t *comp) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t); + uct_cuda_copy_event_desc_t *cuda_event; + ucs_status_t status; + + if (!length) { + return UCS_OK; + } + + cuda_event = ucs_mpool_get(&iface->cuda_event_desc); + if (ucs_unlikely(cuda_event == NULL)) { + ucs_error("Failed to allocate cuda event object"); + return UCS_ERR_NO_MEMORY; + } + + status = UCT_CUDA_FUNC(cudaMemcpyAsync(dst, src, length, direction, stream)); + if (UCS_OK != status) { + return UCS_ERR_IO_ERROR; + } + + status = UCT_CUDA_FUNC(cudaEventRecord(cuda_event->event, stream)); + if (UCS_OK != status) { + return UCS_ERR_IO_ERROR; + } + ucs_queue_push(outstanding_queue, &cuda_event->queue); + cuda_event->comp = comp; + + ucs_trace("cuda async issued :%p dst:%p, src:%p len:%ld", + cuda_event, dst, src, length); + return UCS_INPROGRESS; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_ep_get_zcopy, + (tl_ep, iov, iovcnt, remote_addr, rkey, comp), + uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t); + ucs_status_t status; + + UCT_CUDA_COPY_CHECK_AND_CREATE_STREAM(iface->stream_d2h); + + status = uct_cuda_copy_post_cuda_async_copy(tl_ep, iov[0].buffer, (void *)remote_addr, + iov[0].length, cudaMemcpyDeviceToHost, + iface->stream_d2h, + &iface->outstanding_d2h_cuda_event_q, comp); + if (!UCS_STATUS_IS_ERR(status)) { + VALGRIND_MAKE_MEM_DEFINED(iov[0].buffer, iov[0].length); + } + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_cuda_copy_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_ep_put_zcopy, + (tl_ep, iov, iovcnt, remote_addr, rkey, comp), + uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t); + ucs_status_t status; + + UCT_CUDA_COPY_CHECK_AND_CREATE_STREAM(iface->stream_h2d); + + status = uct_cuda_copy_post_cuda_async_copy(tl_ep, (void *)remote_addr, iov[0].buffer, + iov[0].length, cudaMemcpyHostToDevice, + iface->stream_h2d, + &iface->outstanding_h2d_cuda_event_q, comp); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_cuda_copy_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + return status; + +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_ep_put_short, + (tl_ep, buffer, length, remote_addr, rkey), + uct_ep_h tl_ep, const void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t); + ucs_status_t status; + + UCT_CUDA_COPY_CHECK_AND_CREATE_STREAM(iface->stream_h2d); + + UCT_CUDA_FUNC(cudaMemcpyAsync((void *)remote_addr, buffer, length, + cudaMemcpyHostToDevice, iface->stream_h2d)); + status = UCT_CUDA_FUNC(cudaStreamSynchronize(iface->stream_h2d)); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); + ucs_trace_data("PUT_SHORT size %d from %p to %p", + length, buffer, (void *)remote_addr); + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_ep_get_short, + (tl_ep, buffer, length, remote_addr, rkey), + uct_ep_h tl_ep, void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_copy_iface_t); + ucs_status_t status; + + UCT_CUDA_COPY_CHECK_AND_CREATE_STREAM(iface->stream_d2h); + + UCT_CUDA_FUNC(cudaMemcpyAsync(buffer, (void *)remote_addr, length, + cudaMemcpyDeviceToHost, iface->stream_d2h)); + status = UCT_CUDA_FUNC(cudaStreamSynchronize(iface->stream_d2h)); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, SHORT, length); + ucs_trace_data("GET_SHORT size %d from %p to %p", + length, (void *)remote_addr, buffer); + return status; +} + diff --git a/src/uct/cuda/cuda_copy/cuda_copy_ep.h b/src/uct/cuda/cuda_copy/cuda_copy_ep.h new file mode 100644 index 0000000..310a50d --- /dev/null +++ b/src/uct/cuda/cuda_copy/cuda_copy_ep.h @@ -0,0 +1,44 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_CUDA_COPY_EP_H +#define UCT_CUDA_COPY_EP_H + +#include +#include +#include + + +typedef struct uct_cuda_copy_ep_addr { + int ep_id; +} uct_cuda_copy_ep_addr_t; + +typedef struct uct_cuda_copy_ep { + uct_base_ep_t super; + struct uct_cuda_copy_ep *next; +} uct_cuda_copy_ep_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_cuda_copy_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_cuda_copy_ep_t, uct_ep_t); + +ucs_status_t uct_cuda_copy_ep_get_zcopy(uct_ep_h tl_ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_cuda_copy_ep_put_zcopy(uct_ep_h tl_ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_cuda_copy_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +ucs_status_t uct_cuda_copy_ep_get_short(uct_ep_h tl_ep, void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +#endif diff --git a/src/uct/cuda/cuda_copy/cuda_copy_iface.c b/src/uct/cuda/cuda_copy/cuda_copy_iface.c new file mode 100644 index 0000000..dbb9ca9 --- /dev/null +++ b/src/uct/cuda/cuda_copy/cuda_copy_iface.c @@ -0,0 +1,298 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "cuda_copy_iface.h" +#include "cuda_copy_md.h" +#include "cuda_copy_ep.h" + +#include +#include +#include +#include + + +static ucs_config_field_t uct_cuda_copy_iface_config_table[] = { + + {"", "", NULL, + ucs_offsetof(uct_cuda_copy_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {"MAX_POLL", "16", + "Max number of event completions to pick during cuda events polling", + ucs_offsetof(uct_cuda_copy_iface_config_t, max_poll), UCS_CONFIG_TYPE_UINT}, + + {"MAX_EVENTS", "inf", + "Max number of cuda events. -1 is infinite", + ucs_offsetof(uct_cuda_copy_iface_config_t, max_cuda_events), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + + +/* Forward declaration for the delete function */ +static void UCS_CLASS_DELETE_FUNC_NAME(uct_cuda_copy_iface_t)(uct_iface_t*); + + +static ucs_status_t uct_cuda_copy_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *iface_addr) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_copy_iface_t); + + *(uct_cuda_copy_iface_addr_t*)iface_addr = iface->id; + return UCS_OK; +} + +static int uct_cuda_copy_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_copy_iface_t); + uct_cuda_copy_iface_addr_t *addr = (uct_cuda_copy_iface_addr_t*)iface_addr; + + return (addr != NULL) && (iface->id == *addr); +} + +static ucs_status_t uct_cuda_copy_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_copy_iface_t); + + uct_base_iface_query(&iface->super, iface_attr); + + iface_attr->iface_addr_len = sizeof(uct_cuda_copy_iface_addr_t); + iface_attr->device_addr_len = 0; + iface_attr->ep_addr_len = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_GET_SHORT | + UCT_IFACE_FLAG_PUT_SHORT | + UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_PENDING; + + iface_attr->cap.put.max_short = UINT_MAX; + iface_attr->cap.put.max_bcopy = 0; + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = SIZE_MAX; + iface_attr->cap.put.opt_zcopy_align = 1; + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = 1; + + iface_attr->cap.get.max_short = UINT_MAX; + iface_attr->cap.get.max_bcopy = 0; + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = SIZE_MAX; + iface_attr->cap.get.opt_zcopy_align = 1; + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = 1; + + iface_attr->cap.am.max_short = 0; + iface_attr->cap.am.max_bcopy = 0; + iface_attr->cap.am.min_zcopy = 0; + iface_attr->cap.am.max_zcopy = 0; + iface_attr->cap.am.opt_zcopy_align = 1; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + iface_attr->cap.am.max_hdr = 0; + iface_attr->cap.am.max_iov = 1; + + iface_attr->latency.overhead = 10e-6; /* 10 us */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = 0; + iface_attr->bandwidth.shared = 6911 * 1024.0 * 1024.0; + iface_attr->overhead = 0; + iface_attr->priority = 0; + + return UCS_OK; +} + +static ucs_status_t uct_cuda_copy_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_copy_iface_t); + + if (comp != NULL) { + return UCS_ERR_UNSUPPORTED; + } + + if (ucs_queue_is_empty(&iface->outstanding_d2h_cuda_event_q) && + ucs_queue_is_empty(&iface->outstanding_h2d_cuda_event_q)) { + UCT_TL_IFACE_STAT_FLUSH(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_OK; + } + + UCT_TL_IFACE_STAT_FLUSH_WAIT(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_INPROGRESS; +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_cuda_copy_progress_event_queue(ucs_queue_head_t *event_queue, unsigned max_events) +{ + unsigned count = 0; + cudaError_t result = cudaSuccess; + uct_cuda_copy_event_desc_t *cuda_event; + ucs_queue_iter_t iter; + + ucs_queue_for_each_safe(cuda_event, iter, event_queue, queue) { + result = cudaEventQuery(cuda_event->event); + if (cudaSuccess != result) { + break; + } + ucs_queue_del_iter(event_queue, iter); + if (cuda_event->comp != NULL) { + uct_invoke_completion(cuda_event->comp, UCS_OK); + } + ucs_trace_poll("CUDA Event Done :%p", cuda_event); + ucs_mpool_put(cuda_event); + count++; + if (count >= max_events) { + break; + } + } + return count; +} + +static unsigned uct_cuda_copy_iface_progress(uct_iface_h tl_iface) +{ + uct_cuda_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_copy_iface_t); + unsigned max_events = iface->config.max_poll; + unsigned count; + + count = uct_cuda_copy_progress_event_queue(&iface->outstanding_d2h_cuda_event_q, + max_events); + count += uct_cuda_copy_progress_event_queue(&iface->outstanding_h2d_cuda_event_q, + (max_events - count)); + return count; +} + +static uct_iface_ops_t uct_cuda_copy_iface_ops = { + .ep_get_short = uct_cuda_copy_ep_get_short, + .ep_put_short = uct_cuda_copy_ep_put_short, + .ep_get_zcopy = uct_cuda_copy_ep_get_zcopy, + .ep_put_zcopy = uct_cuda_copy_ep_put_zcopy, + .ep_pending_add = ucs_empty_function_return_busy, + .ep_pending_purge = ucs_empty_function, + .ep_flush = uct_base_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_cuda_copy_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_cuda_copy_ep_t), + .iface_flush = uct_cuda_copy_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = uct_base_iface_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = uct_cuda_copy_iface_progress, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_cuda_copy_iface_t), + .iface_query = uct_cuda_copy_iface_query, + .iface_get_device_address = (uct_iface_get_device_address_func_t)ucs_empty_function_return_success, + .iface_get_address = uct_cuda_copy_iface_get_address, + .iface_is_reachable = uct_cuda_copy_iface_is_reachable, +}; + +static void uct_cuda_copy_event_desc_init(ucs_mpool_t *mp, void *obj, void *chunk) +{ + uct_cuda_copy_event_desc_t *base = (uct_cuda_copy_event_desc_t *) obj; + ucs_status_t status; + + memset(base, 0 , sizeof(*base)); + status = UCT_CUDA_FUNC(cudaEventCreateWithFlags(&(base->event), + cudaEventDisableTiming)); + if (UCS_OK != status) { + ucs_error("cudaEventCreateWithFlags Failed"); + } +} + +static void uct_cuda_copy_event_desc_cleanup(ucs_mpool_t *mp, void *obj) +{ + uct_cuda_copy_event_desc_t *base = (uct_cuda_copy_event_desc_t *) obj; + int active; + + UCT_CUDADRV_CTX_ACTIVE(active); + + if (active) { + UCT_CUDA_FUNC(cudaEventDestroy(base->event)); + } +} + +static ucs_mpool_ops_t uct_cuda_copy_event_desc_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = uct_cuda_copy_event_desc_init, + .obj_cleanup = uct_cuda_copy_event_desc_cleanup, +}; + +static UCS_CLASS_INIT_FUNC(uct_cuda_copy_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_cuda_copy_iface_config_t *config = ucs_derived_of(tl_config, + uct_cuda_copy_iface_config_t); + ucs_status_t status; + + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_cuda_copy_iface_ops, md, worker, + params, tl_config UCS_STATS_ARG(params->stats_root) + UCS_STATS_ARG("cuda_copy")); + + if (strncmp(params->mode.device.dev_name, + UCT_CUDA_DEV_NAME, strlen(UCT_CUDA_DEV_NAME)) != 0) { + ucs_error("no device was found: %s", params->mode.device.dev_name); + return UCS_ERR_NO_DEVICE; + } + + self->id = ucs_generate_uuid((uintptr_t)self); + self->config.max_poll = config->max_poll; + self->config.max_cuda_events = config->max_cuda_events; + + status = ucs_mpool_init(&self->cuda_event_desc, + 0, + sizeof(uct_cuda_copy_event_desc_t), + 0, + UCS_SYS_CACHE_LINE_SIZE, + 128, + self->config.max_cuda_events, + &uct_cuda_copy_event_desc_mpool_ops, + "CUDA EVENT objects"); + + if (UCS_OK != status) { + ucs_error("mpool creation failed"); + return UCS_ERR_IO_ERROR; + } + + self->stream_d2h = 0; + self->stream_h2d = 0; + + ucs_queue_head_init(&self->outstanding_d2h_cuda_event_q); + ucs_queue_head_init(&self->outstanding_h2d_cuda_event_q); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_cuda_copy_iface_t) +{ + int active; + + UCT_CUDADRV_CTX_ACTIVE(active); + + uct_base_iface_progress_disable(&self->super.super, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + if (active) { + if (self->stream_h2d != 0) { + UCT_CUDA_FUNC(cudaStreamDestroy(self->stream_h2d)); + } + + if (self->stream_d2h != 0) { + UCT_CUDA_FUNC(cudaStreamDestroy(self->stream_d2h)); + } + } + + ucs_mpool_cleanup(&self->cuda_event_desc, 1); +} + +UCS_CLASS_DEFINE(uct_cuda_copy_iface_t, uct_base_iface_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_cuda_copy_iface_t, uct_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t*); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_cuda_copy_iface_t, uct_iface_t); + + +UCT_TL_DEFINE(&uct_cuda_copy_component, cuda_copy, uct_cuda_base_query_devices, + uct_cuda_copy_iface_t, "CUDA_COPY_", + uct_cuda_copy_iface_config_table, uct_cuda_copy_iface_config_t); diff --git a/src/uct/cuda/cuda_copy/cuda_copy_iface.h b/src/uct/cuda/cuda_copy/cuda_copy_iface.h new file mode 100644 index 0000000..7ca6d51 --- /dev/null +++ b/src/uct/cuda/cuda_copy/cuda_copy_iface.h @@ -0,0 +1,44 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_CUDA_COPY_IFACE_H +#define UCT_CUDA_COPY_IFACE_H + +#include +#include + + +typedef uint64_t uct_cuda_copy_iface_addr_t; + + +typedef struct uct_cuda_copy_iface { + uct_base_iface_t super; + uct_cuda_copy_iface_addr_t id; + ucs_mpool_t cuda_event_desc; + ucs_queue_head_t outstanding_d2h_cuda_event_q; + ucs_queue_head_t outstanding_h2d_cuda_event_q; + cudaStream_t stream_d2h; + cudaStream_t stream_h2d; + struct { + unsigned max_poll; + unsigned max_cuda_events; + } config; +} uct_cuda_copy_iface_t; + + +typedef struct uct_cuda_copy_iface_config { + uct_iface_config_t super; + unsigned max_poll; + unsigned max_cuda_events; +} uct_cuda_copy_iface_config_t; + + +typedef struct uct_cuda_copy_event_desc { + cudaEvent_t event; + uct_completion_t *comp; + ucs_queue_elem_t queue; +} uct_cuda_copy_event_desc_t; + +#endif diff --git a/src/uct/cuda/cuda_copy/cuda_copy_md.c b/src/uct/cuda/cuda_copy/cuda_copy_md.c new file mode 100644 index 0000000..a76c27e --- /dev/null +++ b/src/uct/cuda/cuda_copy/cuda_copy_md.c @@ -0,0 +1,167 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "cuda_copy_md.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static ucs_config_field_t uct_cuda_copy_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_cuda_copy_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {NULL} +}; + +static ucs_status_t uct_cuda_copy_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->cap.flags = UCT_MD_FLAG_REG; + md_attr->cap.reg_mem_types = UCS_BIT(UCS_MEMORY_TYPE_HOST); + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_CUDA; + md_attr->cap.detect_mem_types = UCS_BIT(UCS_MEMORY_TYPE_CUDA) | + UCS_BIT(UCS_MEMORY_TYPE_CUDA_MANAGED); + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = ULONG_MAX; + md_attr->rkey_packed_size = 0; + md_attr->reg_cost.overhead = 0; + md_attr->reg_cost.growth = 0; + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t uct_cuda_copy_mkey_pack(uct_md_h md, uct_mem_h memh, + void *rkey_buffer) +{ + return UCS_OK; +} + +static ucs_status_t uct_cuda_copy_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, + uct_rkey_t *rkey_p, + void **handle_p) +{ + *rkey_p = 0xdeadbeef; + *handle_p = NULL; + return UCS_OK; +} + +static ucs_status_t uct_cuda_copy_rkey_release(uct_component_t *component, + uct_rkey_t rkey, void *handle) +{ + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_mem_reg, + (md, address, length, flags, memh_p), + uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + CUmemorytype memType; + CUresult result; + ucs_status_t status; + + if (address == NULL) { + *memh_p = address; + return UCS_OK; + } + + result = cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, + (CUdeviceptr)(address)); + if ((result == CUDA_SUCCESS) && (memType == CU_MEMORYTYPE_HOST)) { + /* memory is allocated with cudaMallocHost which is already registered */ + *memh_p = NULL; + return UCS_OK; + } + + status = UCT_CUDA_FUNC(cudaHostRegister(address, length, + cudaHostRegisterPortable)); + if (status != UCS_OK) { + return status; + } + + *memh_p = address; + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_copy_mem_dereg, + (md, memh), uct_md_h md, uct_mem_h memh) +{ + void *address = (void *)memh; + ucs_status_t status; + + if (address == NULL) { + return UCS_OK; + } + + status = UCT_CUDA_FUNC(cudaHostUnregister(address)); + if (status != UCS_OK) { + return status; + } + + return UCS_OK; +} + +static void uct_cuda_copy_md_close(uct_md_h uct_md) { + uct_cuda_copy_md_t *md = ucs_derived_of(uct_md, uct_cuda_copy_md_t); + + ucs_free(md); +} + +static uct_md_ops_t md_ops = { + .close = uct_cuda_copy_md_close, + .query = uct_cuda_copy_md_query, + .mkey_pack = uct_cuda_copy_mkey_pack, + .mem_reg = uct_cuda_copy_mem_reg, + .mem_dereg = uct_cuda_copy_mem_dereg, + .detect_memory_type = uct_cuda_base_detect_memory_type, +}; + +static ucs_status_t +uct_cuda_copy_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p) +{ + uct_cuda_copy_md_t *md; + + md = ucs_malloc(sizeof(uct_cuda_copy_md_t), "uct_cuda_copy_md_t"); + if (NULL == md) { + ucs_error("failed to allocate memory for uct_cuda_copy_md_t"); + return UCS_ERR_NO_MEMORY; + } + + md->super.ops = &md_ops; + md->super.component = &uct_cuda_copy_component; + *md_p = (uct_md_h)md; + return UCS_OK; +} + +uct_component_t uct_cuda_copy_component = { + .query_md_resources = uct_cuda_base_query_md_resources, + .md_open = uct_cuda_copy_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_cuda_copy_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = uct_cuda_copy_rkey_release, + .name = "cuda_cpy", + .md_config = { + .name = "Cuda-copy memory domain", + .prefix = "CUDA_COPY_", + .table = uct_cuda_copy_md_config_table, + .size = sizeof(uct_cuda_copy_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_cuda_copy_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_cuda_copy_component); + diff --git a/src/uct/cuda/cuda_copy/cuda_copy_md.h b/src/uct/cuda/cuda_copy/cuda_copy_md.h new file mode 100644 index 0000000..f73e625 --- /dev/null +++ b/src/uct/cuda/cuda_copy/cuda_copy_md.h @@ -0,0 +1,29 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_CUDA_COPY_MD_H +#define UCT_CUDA_COPY_MD_H + +#include +#include + + +extern uct_component_t uct_cuda_copy_component; + +/** + * @brief cuda_copy MD descriptor + */ +typedef struct uct_cuda_copy_md { + struct uct_md super; /**< Domain info */ +} uct_cuda_copy_md_t; + +/** + * gdr copy domain configuration. + */ +typedef struct uct_cuda_copy_md_config { + uct_md_config_t super; +} uct_cuda_copy_md_config_t; + +#endif diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c new file mode 100644 index 0000000..af258d6 --- /dev/null +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c @@ -0,0 +1,283 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "cuda_ipc_cache.h" +#include +#include +#include +#include +#include + +static ucs_pgt_dir_t *uct_cuda_ipc_cache_pgt_dir_alloc(const ucs_pgtable_t *pgtable) +{ + void *ptr; + int ret; + + ret = ucs_posix_memalign(&ptr, + ucs_max(sizeof(void *), UCS_PGT_ENTRY_MIN_ALIGN), + sizeof(ucs_pgt_dir_t), "cuda_ipc_cache_pgdir"); + return (ret == 0) ? ptr : NULL; +} + +static void uct_cuda_ipc_cache_pgt_dir_release(const ucs_pgtable_t *pgtable, + ucs_pgt_dir_t *dir) +{ + ucs_free(dir); +} + +static void +uct_cuda_ipc_cache_region_collect_callback(const ucs_pgtable_t *pgtable, + ucs_pgt_region_t *pgt_region, + void *arg) +{ + ucs_list_link_t *list = arg; + uct_cuda_ipc_cache_region_t *region; + + region = ucs_derived_of(pgt_region, uct_cuda_ipc_cache_region_t); + ucs_list_add_tail(list, ®ion->list); +} + +static void uct_cuda_ipc_cache_purge(uct_cuda_ipc_cache_t *cache) +{ + uct_cuda_ipc_cache_region_t *region, *tmp; + ucs_list_link_t region_list; + + ucs_list_head_init(®ion_list); + ucs_pgtable_purge(&cache->pgtable, uct_cuda_ipc_cache_region_collect_callback, + ®ion_list); + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + UCT_CUDADRV_FUNC(cuIpcCloseMemHandle((CUdeviceptr)region->mapped_addr)); + ucs_free(region); + } + ucs_trace("%s: cuda ipc cache purged", cache->name); +} + +static ucs_status_t uct_cuda_ipc_open_memhandle(CUipcMemHandle memh, + CUdeviceptr *mapped_addr) +{ + const char *cu_err_str; + CUresult cuerr; + + cuerr = cuIpcOpenMemHandle(mapped_addr, memh, + CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS); + if (cuerr != CUDA_SUCCESS) { + if (cuerr == CUDA_ERROR_ALREADY_MAPPED) { + return UCS_ERR_ALREADY_EXISTS; + } + + cuGetErrorString(cuerr, &cu_err_str); + ucs_error("cuIpcOpenMemHandle() failed: %s", cu_err_str); + + return UCS_ERR_INVALID_PARAM; + } + + return UCS_OK; +} + +static void uct_cuda_ipc_cache_invalidate_regions(uct_cuda_ipc_cache_t *cache, + void *from, void *to) +{ + ucs_list_link_t region_list; + ucs_status_t status; + uct_cuda_ipc_cache_region_t *region, *tmp; + + ucs_list_head_init(®ion_list); + ucs_pgtable_search_range(&cache->pgtable, (ucs_pgt_addr_t)from, + (ucs_pgt_addr_t)to, + uct_cuda_ipc_cache_region_collect_callback, + ®ion_list); + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + status = ucs_pgtable_remove(&cache->pgtable, ®ion->super); + if (status != UCS_OK) { + ucs_error("failed to remove address:%p from cache (%s)", + (void *)region->key.d_bptr, ucs_status_string(status)); + } + UCT_CUDADRV_FUNC(cuIpcCloseMemHandle((CUdeviceptr)region->mapped_addr)); + ucs_free(region); + } + ucs_trace("%s: closed memhandles in the range [%p..%p]", + cache->name, from, to); +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_cache_map_memhandle, + (arg, key, mapped_addr), + void *arg, uct_cuda_ipc_key_t *key, void **mapped_addr) +{ + uct_cuda_ipc_cache_t *cache = (uct_cuda_ipc_cache_t *)arg; + ucs_status_t status; + ucs_pgt_region_t *pgt_region; + uct_cuda_ipc_cache_region_t *region; + int ret; + + pthread_rwlock_rdlock(&cache->lock); + pgt_region = UCS_PROFILE_CALL(ucs_pgtable_lookup, + &cache->pgtable, key->d_bptr); + if (ucs_likely(pgt_region != NULL)) { + region = ucs_derived_of(pgt_region, uct_cuda_ipc_cache_region_t); + if (memcmp((const void *)&key->ph, (const void *)®ion->key.ph, + sizeof(key->ph)) == 0) { + /*cache hit */ + ucs_trace("%s: cuda_ipc cache hit addr:%p size:%lu region:" + UCS_PGT_REGION_FMT, cache->name, (void *)key->d_bptr, + key->b_len, UCS_PGT_REGION_ARG(®ion->super)); + + *mapped_addr = region->mapped_addr; + pthread_rwlock_unlock(&cache->lock); + return UCS_OK; + } else { + ucs_trace("%s: cuda_ipc cache remove stale region:" + UCS_PGT_REGION_FMT " new_addr:%p new_size:%lu", + cache->name, UCS_PGT_REGION_ARG(®ion->super), + (void *)key->d_bptr, key->b_len); + + status = ucs_pgtable_remove(&cache->pgtable, ®ion->super); + if (status != UCS_OK) { + ucs_error("%s: failed to remove address:%p from cache", + cache->name, (void *)key->d_bptr); + goto err; + } + + /* close memhandle */ + UCT_CUDADRV_FUNC(cuIpcCloseMemHandle((CUdeviceptr) + region->mapped_addr)); + key->d_mapped = 0; + ucs_free(region); + } + } + + status = (key->d_mapped == 0) /* potentially already opened in rkey_unpack */ + ? uct_cuda_ipc_open_memhandle(key->ph, &key->d_mapped) + : UCS_OK; + + *mapped_addr = (void *)key->d_mapped; + + if (ucs_unlikely(status != UCS_OK)) { + if (ucs_likely(status == UCS_ERR_ALREADY_EXISTS)) { + /* unmap all overlapping regions and retry*/ + uct_cuda_ipc_cache_invalidate_regions(cache, (void *)key->d_bptr, + UCS_PTR_BYTE_OFFSET(key->d_bptr, + key->b_len)); + status = uct_cuda_ipc_open_memhandle(key->ph, (CUdeviceptr *)mapped_addr); + if (ucs_unlikely(status != UCS_OK)) { + if (ucs_likely(status == UCS_ERR_ALREADY_EXISTS)) { + /* unmap all cache entries and retry */ + uct_cuda_ipc_cache_purge(cache); + status = uct_cuda_ipc_open_memhandle(key->ph, (CUdeviceptr *)mapped_addr); + if (status != UCS_OK) { + ucs_fatal("%s: failed to open ipc mem handle. addr:%p " + "len:%lu (%s)", cache->name, + (void *)key->d_bptr, key->b_len, + ucs_status_string(status)); + } + } else { + ucs_fatal("%s: failed to open ipc mem handle. addr:%p len:%lu", + cache->name, (void *)key->d_bptr, key->b_len); + } + } + } else { + ucs_fatal("%s: failed to open ipc mem handle. addr:%p len:%lu", + cache->name, (void *)key->d_bptr, key->b_len); + } + } + + /*create new cache entry */ + ret = ucs_posix_memalign((void **)®ion, + ucs_max(sizeof(void *), UCS_PGT_ENTRY_MIN_ALIGN), + sizeof(uct_cuda_ipc_cache_region_t), + "uct_cuda_ipc_cache_region"); + if (ret != 0) { + ucs_warn("failed to allocate uct_cuda_ipc_cache region"); + status = UCS_ERR_NO_MEMORY; + goto err; + } + + region->super.start = ucs_align_down_pow2((uintptr_t)key->d_bptr, + UCS_PGT_ADDR_ALIGN); + region->super.end = ucs_align_up_pow2 ((uintptr_t)key->d_bptr + key->b_len, + UCS_PGT_ADDR_ALIGN); + region->key = *key; + region->mapped_addr = *mapped_addr; + + status = UCS_PROFILE_CALL(ucs_pgtable_insert, + &cache->pgtable, ®ion->super); + if (status == UCS_ERR_ALREADY_EXISTS) { + /* overlapped region means memory freed at source. remove and try insert */ + uct_cuda_ipc_cache_invalidate_regions(cache, + (void *)region->super.start, + (void *)region->super.end); + status = UCS_PROFILE_CALL(ucs_pgtable_insert, + &cache->pgtable, ®ion->super); + } + if (status != UCS_OK) { + + ucs_error("%s: failed to insert region:"UCS_PGT_REGION_FMT" size:%lu :%s", + cache->name, UCS_PGT_REGION_ARG(®ion->super), key->b_len, + ucs_status_string(status)); + ucs_free(region); + goto err; + } + + ucs_trace("%s: cuda_ipc cache new region:"UCS_PGT_REGION_FMT" size:%lu", + cache->name, UCS_PGT_REGION_ARG(®ion->super), key->b_len); + + pthread_rwlock_unlock(&cache->lock); + return UCS_OK; +err: + pthread_rwlock_unlock(&cache->lock); + return status; +} + +ucs_status_t uct_cuda_ipc_create_cache(uct_cuda_ipc_cache_t **cache, + const char *name) +{ + ucs_status_t status; + uct_cuda_ipc_cache_t *cache_desc; + int ret; + + cache_desc = ucs_malloc(sizeof(uct_cuda_ipc_cache_t), "uct_cuda_ipc_cache_t"); + if (cache_desc == NULL) { + ucs_error("failed to allocate memory for cuda_ipc cache"); + return UCS_ERR_NO_MEMORY; + } + + ret = pthread_rwlock_init(&cache_desc->lock, NULL); + if (ret) { + ucs_error("pthread_rwlock_init() failed: %m"); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + status = ucs_pgtable_init(&cache_desc->pgtable, + uct_cuda_ipc_cache_pgt_dir_alloc, + uct_cuda_ipc_cache_pgt_dir_release); + if (status != UCS_OK) { + goto err_destroy_rwlock; + } + + cache_desc->name = strdup(name); + if (cache_desc->name == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_destroy_rwlock; + } + + *cache = cache_desc; + return UCS_OK; + +err_destroy_rwlock: + pthread_rwlock_destroy(&cache_desc->lock); +err: + free(cache_desc); + return status; +} + +void uct_cuda_ipc_destroy_cache(uct_cuda_ipc_cache_t *cache) +{ + uct_cuda_ipc_cache_purge(cache); + ucs_pgtable_cleanup(&cache->pgtable); + pthread_rwlock_destroy(&cache->lock); + free(cache->name); + ucs_free(cache); +} diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.h b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.h new file mode 100644 index 0000000..fa5f867 --- /dev/null +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.h @@ -0,0 +1,48 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCT_CUDA_IPC_CACHE_H_ +#define UCT_CUDA_IPC_CACHE_H_ + +#include +#include +#include "cuda_ipc_md.h" +#include +#include + + +typedef struct uct_cuda_ipc_cache uct_cuda_ipc_cache_t; +typedef struct uct_cuda_ipc_cache_region uct_cuda_ipc_cache_region_t; + + +typedef struct uct_cuda_ipc_rem_memh uct_cuda_ipc_rem_memh_t; + + +struct uct_cuda_ipc_cache_region { + ucs_pgt_region_t super; /**< Base class - page table region */ + ucs_list_link_t list; /**< List element */ + uct_cuda_ipc_key_t key; /**< Remote memory key */ + void *mapped_addr; /**< Local mapped address */ +}; + + +struct uct_cuda_ipc_cache { + pthread_rwlock_t lock; /**< protests the page table */ + ucs_pgtable_t pgtable; /**< Page table to hold the regions */ + char *name; /**< Name */ +}; + + +ucs_status_t uct_cuda_ipc_create_cache(uct_cuda_ipc_cache_t **cache, + const char *name); + + +void uct_cuda_ipc_destroy_cache(uct_cuda_ipc_cache_t *cache); + + +ucs_status_t uct_cuda_ipc_cache_map_memhandle(void *arg, uct_cuda_ipc_key_t *key, + void **mapped_addr); +#endif diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c new file mode 100644 index 0000000..2cdbf1c --- /dev/null +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.c @@ -0,0 +1,176 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018-2019. ALL RIGHTS RESERVED. + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#include "cuda_ipc_ep.h" +#include "cuda_ipc_iface.h" +#include "cuda_ipc_md.h" + +#include +#include +#include +#include +#include + +#define UCT_CUDA_IPC_PUT 0 +#define UCT_CUDA_IPC_GET 1 + +static UCS_CLASS_INIT_FUNC(uct_cuda_ipc_ep_t, const uct_ep_params_t *params) +{ + uct_cuda_ipc_iface_t *iface = ucs_derived_of(params->iface, + uct_cuda_ipc_iface_t); + ucs_status_t status; + char target_name[64]; + + UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params); + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); + self->remote_memh_cache = NULL; + + if (iface->config.enable_cache) { + snprintf(target_name, sizeof(target_name), "dest:%d", + *(pid_t*)params->iface_addr); + status = uct_cuda_ipc_create_cache(&self->remote_memh_cache, target_name); + if (status != UCS_OK) { + ucs_error("could not create create cuda ipc cache: %s", + ucs_status_string(status)); + return status; + } + } + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_cuda_ipc_ep_t) +{ + if (self->remote_memh_cache) { + uct_cuda_ipc_destroy_cache(self->remote_memh_cache); + } +} + +UCS_CLASS_DEFINE(uct_cuda_ipc_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_cuda_ipc_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_cuda_ipc_ep_t, uct_ep_t); + +#define uct_cuda_ipc_trace_data(_addr, _rkey, _fmt, ...) \ + ucs_trace_data(_fmt " to %"PRIx64"(%+ld)", ## __VA_ARGS__, (_addr), (_rkey)) + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_cuda_ipc_post_cuda_async_copy(uct_ep_h tl_ep, uint64_t remote_addr, + const uct_iov_t *iov, uct_rkey_t rkey, + uct_completion_t *comp, int direction) +{ + uct_cuda_ipc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cuda_ipc_iface_t); + uct_cuda_ipc_ep_t *ep = ucs_derived_of(tl_ep, uct_cuda_ipc_ep_t); + uct_cuda_ipc_key_t *key = (uct_cuda_ipc_key_t *) rkey; + void *mapped_rem_addr; + void *mapped_addr; + uct_cuda_ipc_event_desc_t *cuda_ipc_event; + ucs_queue_head_t *outstanding_queue; + ucs_status_t status; + CUdeviceptr dst, src; + CUstream stream; + size_t offset; + + if (0 == iov[0].length) { + ucs_trace_data("Zero length request: skip it"); + return UCS_OK; + } + + status = iface->map_memhandle((void *)ep->remote_memh_cache, key, &mapped_addr); + if (status != UCS_OK) { + return UCS_ERR_IO_ERROR; + } + + offset = (uintptr_t)remote_addr - (uintptr_t)key->d_bptr; + mapped_rem_addr = (void *) ((uintptr_t) mapped_addr + offset); + ucs_assert(offset <= key->b_len); + + if (!iface->streams_initialized) { + status = uct_cuda_ipc_iface_init_streams(iface); + if (UCS_OK != status) { + return status; + } + } + + key->dev_num %= iface->config.max_streams; /* round-robin */ + + stream = iface->stream_d2d[key->dev_num]; + outstanding_queue = &iface->outstanding_d2d_event_q; + cuda_ipc_event = ucs_mpool_get(&iface->event_desc); + + if (ucs_unlikely(cuda_ipc_event == NULL)) { + ucs_error("Failed to allocate cuda_ipc event object"); + return UCS_ERR_NO_MEMORY; + } + + dst = (CUdeviceptr) + ((direction == UCT_CUDA_IPC_PUT) ? mapped_rem_addr : iov[0].buffer); + src = (CUdeviceptr) + ((direction == UCT_CUDA_IPC_PUT) ? iov[0].buffer : mapped_rem_addr); + + status = UCT_CUDADRV_FUNC(cuMemcpyDtoDAsync(dst, src, iov[0].length, stream)); + if (UCS_OK != status) { + ucs_mpool_put(cuda_ipc_event); + return status; + } + + iface->stream_refcount[key->dev_num]++; + cuda_ipc_event->stream_id = key->dev_num; + + status = UCT_CUDADRV_FUNC(cuEventRecord(cuda_ipc_event->event, stream)); + if (UCS_OK != status) { + ucs_mpool_put(cuda_ipc_event); + return status; + } + + ucs_queue_push(outstanding_queue, &cuda_ipc_event->queue); + cuda_ipc_event->comp = comp; + cuda_ipc_event->mapped_addr = mapped_addr; + ucs_trace("cuMemcpyDtoDAsync issued :%p dst:%p, src:%p len:%ld", + cuda_ipc_event, (void *) dst, (void *) src, iov[0].length); + return UCS_INPROGRESS; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_ep_get_zcopy, + (tl_ep, iov, iovcnt, remote_addr, rkey, comp), + uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + ucs_status_t status; + + status = uct_cuda_ipc_post_cuda_async_copy(tl_ep, remote_addr, iov, + rkey, comp, UCT_CUDA_IPC_GET); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_cuda_ipc_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + return status; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_ep_put_zcopy, + (tl_ep, iov, iovcnt, remote_addr, rkey, comp), + uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + ucs_status_t status; + + status = uct_cuda_ipc_post_cuda_async_copy(tl_ep, remote_addr, iov, + rkey, comp, UCT_CUDA_IPC_PUT); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_cuda_ipc_trace_data(remote_addr, rkey, "PUT_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + return status; +} diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_ep.h b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.h new file mode 100644 index 0000000..4be71d2 --- /dev/null +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_ep.h @@ -0,0 +1,36 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2018-2019. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_CUDA_IPC_EP_H +#define UCT_CUDA_IPC_EP_H + +#include +#include +#include +#include "cuda_ipc_md.h" +#include "cuda_ipc_cache.h" + +typedef struct uct_cuda_ipc_ep_addr { + int ep_id; +} uct_cuda_ipc_ep_addr_t; + +typedef struct uct_cuda_ipc_ep { + uct_base_ep_t super; + uct_cuda_ipc_cache_t *remote_memh_cache; +} uct_cuda_ipc_ep_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_cuda_ipc_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_cuda_ipc_ep_t, uct_ep_t); + +ucs_status_t uct_cuda_ipc_ep_get_zcopy(uct_ep_h tl_ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_cuda_ipc_ep_put_zcopy(uct_ep_h tl_ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +#endif diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c b/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c new file mode 100644 index 0000000..c646c60 --- /dev/null +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_iface.c @@ -0,0 +1,470 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018-2019. ALL RIGHTS RESERVED. + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#include "cuda_ipc_iface.h" +#include "cuda_ipc_md.h" +#include "cuda_ipc_ep.h" + +#include +#include +#include +#include + +static ucs_config_field_t uct_cuda_ipc_iface_config_table[] = { + + {"", "", NULL, + ucs_offsetof(uct_cuda_ipc_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {"MAX_POLL", "16", + "Max number of event completions to pick during cuda events polling", + ucs_offsetof(uct_cuda_ipc_iface_config_t, max_poll), UCS_CONFIG_TYPE_UINT}, + + {"MAX_STREAMS", "16", + "Max number of CUDA streams to make concurrent progress on", + ucs_offsetof(uct_cuda_ipc_iface_config_t, max_streams), UCS_CONFIG_TYPE_UINT}, + + {"CACHE", "y", + "Enable remote endpoint IPC memhandle mapping cache", + ucs_offsetof(uct_cuda_ipc_iface_config_t, enable_cache), + UCS_CONFIG_TYPE_BOOL}, + + {"MAX_EVENTS", "inf", + "Max number of cuda events. -1 is infinite", + ucs_offsetof(uct_cuda_ipc_iface_config_t, max_cuda_ipc_events), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + + +/* Forward declaration for the delete function */ +static void UCS_CLASS_DELETE_FUNC_NAME(uct_cuda_ipc_iface_t)(uct_iface_t*); + + +static uint64_t uct_cuda_ipc_iface_node_guid(uct_base_iface_t *iface) +{ + return ucs_machine_guid() * + ucs_string_to_id(iface->md->component->name); +} + +ucs_status_t uct_cuda_ipc_iface_get_device_address(uct_iface_t *tl_iface, + uct_device_addr_t *addr) +{ + uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t); + + *(uint64_t*)addr = uct_cuda_ipc_iface_node_guid(iface); + return UCS_OK; +} + +static ucs_status_t uct_cuda_ipc_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *iface_addr) +{ + *(pid_t*)iface_addr = getpid(); + return UCS_OK; +} + +static int uct_cuda_ipc_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uct_cuda_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_ipc_iface_t); + + return ((uct_cuda_ipc_iface_node_guid(&iface->super) == + *((const uint64_t *)dev_addr)) && ((getpid() != *(pid_t *)iface_addr))); +} + +static ucs_status_t uct_cuda_ipc_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_cuda_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_ipc_iface_t); + + uct_base_iface_query(&iface->super, iface_attr); + + iface_attr->iface_addr_len = sizeof(pid_t); + iface_attr->device_addr_len = sizeof(uint64_t); + iface_attr->ep_addr_len = 0; + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE | + UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_EVENT_SEND_COMP | + UCT_IFACE_FLAG_EVENT_RECV; + + iface_attr->cap.put.max_short = 0; + iface_attr->cap.put.max_bcopy = 0; + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = ULONG_MAX; + iface_attr->cap.put.opt_zcopy_align = 1; + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = 1; + + iface_attr->cap.get.max_bcopy = 0; + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = ULONG_MAX; + iface_attr->cap.get.opt_zcopy_align = 1; + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = 1; + + iface_attr->latency.overhead = 1e-9; + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = 0; + iface_attr->bandwidth.shared = 24000 * 1024.0 * 1024.0; + iface_attr->overhead = 0; + iface_attr->priority = 0; + + return UCS_OK; +} + +static ucs_status_t +uct_cuda_ipc_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + uct_cuda_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_ipc_iface_t); + + if (comp != NULL) { + return UCS_ERR_UNSUPPORTED; + } + + if (ucs_queue_is_empty(&iface->outstanding_d2d_event_q)) { + UCT_TL_IFACE_STAT_FLUSH(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_OK; + } + + UCT_TL_IFACE_STAT_FLUSH_WAIT(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_INPROGRESS; +} + +static ucs_status_t uct_cuda_ipc_iface_event_fd_get(uct_iface_h tl_iface, int *fd_p) +{ + uct_cuda_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_ipc_iface_t); + + if (-1 == iface->eventfd) { + iface->eventfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (iface->eventfd == -1) { + ucs_error("Failed to create event fd: %m"); + return UCS_ERR_IO_ERROR; + } + } + + *fd_p = iface->eventfd; + return UCS_OK; +} + +static void uct_cuda_ipc_common_cb(void *cuda_ipc_iface) +{ + uct_cuda_ipc_iface_t *iface = cuda_ipc_iface; + uint64_t dummy = 1; + int ret; + + /* No error handling yet */ + do { + ret = write(iface->eventfd, &dummy, sizeof(dummy)); + if (ret == sizeof(dummy)) { + return; + } else if (ret == -1) { + if (errno == EAGAIN) { + continue; + } else if (errno != EINTR) { + ucs_error("Signaling wakeup failed: %m"); + return; + } + } else { + ucs_assert(ret == 0); + } + } while (ret == 0); +} + +#if (__CUDACC_VER_MAJOR__ >= 100000) +static void CUDA_CB myHostFn(void *iface) +#else +static void CUDA_CB myHostCallback(CUstream hStream, CUresult status, + void *iface) +#endif +{ + uct_cuda_ipc_common_cb(iface); +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_cuda_ipc_progress_event_q(uct_cuda_ipc_iface_t *iface, + ucs_queue_head_t *event_q) +{ + unsigned count = 0; + uct_cuda_ipc_event_desc_t *cuda_ipc_event; + ucs_queue_iter_t iter; + ucs_status_t status; + unsigned max_events = iface->config.max_poll; + + ucs_queue_for_each_safe(cuda_ipc_event, iter, event_q, queue) { + status = UCT_CUDADRV_FUNC(cuEventQuery(cuda_ipc_event->event)); + if (UCS_INPROGRESS == status) { + continue; + } else if (UCS_OK != status) { + return status; + } + + ucs_queue_del_iter(event_q, iter); + if (cuda_ipc_event->comp != NULL) { + uct_invoke_completion(cuda_ipc_event->comp, UCS_OK); + } + + status = iface->unmap_memhandle(cuda_ipc_event->mapped_addr); + if (status != UCS_OK) { + ucs_fatal("failed to unmap addr:%p", cuda_ipc_event->mapped_addr); + } + + ucs_trace_poll("CUDA_IPC Event Done :%p", cuda_ipc_event); + iface->stream_refcount[cuda_ipc_event->stream_id]--; + ucs_mpool_put(cuda_ipc_event); + count++; + + if (count >= max_events) { + break; + } + } + + return count; +} + +static unsigned uct_cuda_ipc_iface_progress(uct_iface_h tl_iface) +{ + uct_cuda_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_ipc_iface_t); + + return uct_cuda_ipc_progress_event_q(iface, &iface->outstanding_d2d_event_q); +} + +static ucs_status_t uct_cuda_ipc_iface_event_fd_arm(uct_iface_h tl_iface, + unsigned events) +{ + uct_cuda_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_cuda_ipc_iface_t); + int ret; + int i; + uint64_t dummy; + ucs_status_t status; + + if (uct_cuda_ipc_progress_event_q(iface, &iface->outstanding_d2d_event_q)) { + return UCS_ERR_BUSY; + } + + ucs_assert(iface->eventfd != -1); + + do { + ret = read(iface->eventfd, &dummy, sizeof(dummy)); + if (ret == sizeof(dummy)) { + status = UCS_ERR_BUSY; + return status; + } else if (ret == -1) { + if (errno == EAGAIN) { + break; + } else if (errno != EINTR) { + ucs_error("read from internal event fd failed: %m"); + status = UCS_ERR_IO_ERROR; + return status; + } else { + return UCS_ERR_BUSY; + } + } else { + ucs_assert(ret == 0); + } + } while (ret != 0); + + if (iface->streams_initialized) { + for (i = 0; i < iface->config.max_streams; i++) { + if (iface->stream_refcount[i]) { + status = +#if (__CUDACC_VER_MAJOR__ >= 100000) + UCT_CUDADRV_FUNC(cuLaunchHostFunc(iface->stream_d2d[i], + myHostFn, iface)); +#else + UCT_CUDADRV_FUNC(cuStreamAddCallback(iface->stream_d2d[i], + myHostCallback, iface, 0)); +#endif + if (UCS_OK != status) { + return status; + } + } + } + } + return UCS_OK; +} + +static uct_iface_ops_t uct_cuda_ipc_iface_ops = { + .ep_get_zcopy = uct_cuda_ipc_ep_get_zcopy, + .ep_put_zcopy = uct_cuda_ipc_ep_put_zcopy, + .ep_pending_add = ucs_empty_function_return_busy, + .ep_pending_purge = ucs_empty_function, + .ep_flush = uct_base_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_cuda_ipc_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_cuda_ipc_ep_t), + .iface_flush = uct_cuda_ipc_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = uct_base_iface_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = uct_cuda_ipc_iface_progress, + .iface_event_fd_get = uct_cuda_ipc_iface_event_fd_get, + .iface_event_arm = uct_cuda_ipc_iface_event_fd_arm, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_cuda_ipc_iface_t), + .iface_query = uct_cuda_ipc_iface_query, + .iface_get_device_address = uct_cuda_ipc_iface_get_device_address, + .iface_get_address = uct_cuda_ipc_iface_get_address, + .iface_is_reachable = uct_cuda_ipc_iface_is_reachable, +}; + +static void uct_cuda_ipc_event_desc_init(ucs_mpool_t *mp, void *obj, void *chunk) +{ + uct_cuda_ipc_event_desc_t *base = (uct_cuda_ipc_event_desc_t *) obj; + + memset(base, 0, sizeof(*base)); + UCT_CUDADRV_FUNC(cuEventCreate(&base->event, CU_EVENT_DISABLE_TIMING)); +} + +static void uct_cuda_ipc_event_desc_cleanup(ucs_mpool_t *mp, void *obj) +{ + uct_cuda_ipc_event_desc_t *base = (uct_cuda_ipc_event_desc_t *) obj; + int active; + + UCT_CUDADRV_CTX_ACTIVE(active); + + if (active) { + UCT_CUDADRV_FUNC(cuEventDestroy(base->event)); + } +} + +ucs_status_t uct_cuda_ipc_iface_init_streams(uct_cuda_ipc_iface_t *iface) +{ + ucs_status_t status; + int i; + + for (i = 0; i < iface->config.max_streams; i++) { + status = UCT_CUDADRV_FUNC(cuStreamCreate(&iface->stream_d2d[i], + CU_STREAM_NON_BLOCKING)); + if (UCS_OK != status) { + return status; + } + + iface->stream_refcount[i] = 0; + } + + iface->streams_initialized = 1; + + return UCS_OK; +} + +static ucs_mpool_ops_t uct_cuda_ipc_event_desc_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = uct_cuda_ipc_event_desc_init, + .obj_cleanup = uct_cuda_ipc_event_desc_cleanup, +}; + +ucs_status_t uct_cuda_ipc_map_memhandle(void *arg, uct_cuda_ipc_key_t *key, + void **mapped_addr) +{ + if (key->d_mapped != 0) { + /* potentially already mapped in uct_cuda_ipc_rkey_unpack */ + *mapped_addr = (void *) key->d_mapped; + return UCS_OK; + } + + return UCT_CUDADRV_FUNC(cuIpcOpenMemHandle((CUdeviceptr *)mapped_addr, + key->ph, CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS)); +} + +ucs_status_t uct_cuda_ipc_unmap_memhandle(void *mapped_addr) +{ + return UCT_CUDADRV_FUNC(cuIpcCloseMemHandle((CUdeviceptr)mapped_addr)); +} + +static UCS_CLASS_INIT_FUNC(uct_cuda_ipc_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_cuda_ipc_iface_config_t *config = NULL; + ucs_status_t status; + + config = ucs_derived_of(tl_config, uct_cuda_ipc_iface_config_t); + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_cuda_ipc_iface_ops, md, worker, + params, tl_config UCS_STATS_ARG(params->stats_root) + UCS_STATS_ARG("cuda_ipc")); + + if (strncmp(params->mode.device.dev_name, + UCT_CUDA_DEV_NAME, strlen(UCT_CUDA_DEV_NAME)) != 0) { + ucs_error("No device was found: %s", params->mode.device.dev_name); + return UCS_ERR_NO_DEVICE; + } + + self->config.max_poll = config->max_poll; + self->config.max_streams = config->max_streams; + self->config.enable_cache = config->enable_cache; + self->config.max_cuda_ipc_events = config->max_cuda_ipc_events; + + if (self->config.enable_cache) { + self->map_memhandle = uct_cuda_ipc_cache_map_memhandle; + self->unmap_memhandle = ucs_empty_function_return_success; + } else { + self->map_memhandle = uct_cuda_ipc_map_memhandle; + self->unmap_memhandle = uct_cuda_ipc_unmap_memhandle; + } + + status = ucs_mpool_init(&self->event_desc, + 0, + sizeof(uct_cuda_ipc_event_desc_t), + 0, + UCS_SYS_CACHE_LINE_SIZE, + 128, + self->config.max_cuda_ipc_events, + &uct_cuda_ipc_event_desc_mpool_ops, + "CUDA_IPC EVENT objects"); + if (UCS_OK != status) { + ucs_error("mpool creation failed"); + return UCS_ERR_IO_ERROR; + } + + self->eventfd = -1; + self->streams_initialized = 0; + ucs_queue_head_init(&self->outstanding_d2d_event_q); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_cuda_ipc_iface_t) +{ + ucs_status_t status; + int i; + int active; + + UCT_CUDADRV_CTX_ACTIVE(active); + + if (self->streams_initialized && active) { + for (i = 0; i < self->config.max_streams; i++) { + status = UCT_CUDADRV_FUNC(cuStreamDestroy(self->stream_d2d[i])); + if (UCS_OK != status) { + continue; + } + + ucs_assert(self->stream_refcount[i] == 0); + } + self->streams_initialized = 0; + } + + uct_base_iface_progress_disable(&self->super.super, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + ucs_mpool_cleanup(&self->event_desc, 1); + if (self->eventfd != -1) { + close(self->eventfd); + } +} + +UCS_CLASS_DEFINE(uct_cuda_ipc_iface_t, uct_base_iface_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_cuda_ipc_iface_t, uct_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t*); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_cuda_ipc_iface_t, uct_iface_t); + +UCT_TL_DEFINE(&uct_cuda_ipc_component.super, cuda_ipc, uct_cuda_base_query_devices, + uct_cuda_ipc_iface_t, "CUDA_IPC_", uct_cuda_ipc_iface_config_table, + uct_cuda_ipc_iface_config_t); diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_iface.h b/src/uct/cuda/cuda_ipc/cuda_ipc_iface.h new file mode 100644 index 0000000..14be70a --- /dev/null +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_iface.h @@ -0,0 +1,64 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#ifndef UCT_CUDA_IPC_IFACE_H +#define UCT_CUDA_IPC_IFACE_H + +#include +#include +#include +#include + +#include "cuda_ipc_md.h" +#include "cuda_ipc_ep.h" + + +#define UCT_CUDA_IPC_MAX_PEERS 16 + + +typedef struct uct_cuda_ipc_iface { + uct_base_iface_t super; + ucs_mpool_t event_desc; /* cuda event desc */ + ucs_queue_head_t outstanding_d2d_event_q; /* stream for outstanding d2d */ + int eventfd; /* get event notifications */ + int streams_initialized; /* indicates if stream created */ + CUstream stream_d2d[UCT_CUDA_IPC_MAX_PEERS]; + /* per-peer stream */ + unsigned long stream_refcount[UCT_CUDA_IPC_MAX_PEERS]; + /* per stream outstanding ops */ + struct { + unsigned max_poll; /* query attempts w.o success */ + unsigned max_streams; /* # concurrent streams for || progress*/ + unsigned max_cuda_ipc_events; /* max mpool entries */ + int enable_cache; /* enable/disable ipc handle cache */ + } config; + ucs_status_t (*map_memhandle)(void *context, uct_cuda_ipc_key_t *key, + void **map_addr); + ucs_status_t (*unmap_memhandle)(void *map_addr); +} uct_cuda_ipc_iface_t; + + +typedef struct uct_cuda_ipc_iface_config { + uct_iface_config_t super; + unsigned max_poll; + unsigned max_streams; + int enable_cache; + unsigned max_cuda_ipc_events; +} uct_cuda_ipc_iface_config_t; + + +typedef struct uct_cuda_ipc_event_desc { + CUevent event; + void *mapped_addr; + unsigned stream_id; + uct_completion_t *comp; + ucs_queue_elem_t queue; + uct_cuda_ipc_ep_t *ep; +} uct_cuda_ipc_event_desc_t; + + +ucs_status_t uct_cuda_ipc_iface_init_streams(uct_cuda_ipc_iface_t *iface); +#endif diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_md.c b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c new file mode 100644 index 0000000..fa79cab --- /dev/null +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c @@ -0,0 +1,332 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018-2019. ALL RIGHTS RESERVED. + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#include "cuda_ipc_md.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static ucs_config_field_t uct_cuda_ipc_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_cuda_ipc_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {NULL} +}; + +static ucs_status_t uct_cuda_ipc_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->cap.flags = UCT_MD_FLAG_REG | + UCT_MD_FLAG_NEED_RKEY; + md_attr->cap.reg_mem_types = UCS_BIT(UCS_MEMORY_TYPE_CUDA); + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_CUDA; + md_attr->cap.detect_mem_types = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = ULONG_MAX; + md_attr->rkey_packed_size = sizeof(uct_cuda_ipc_key_t); + md_attr->reg_cost.overhead = 0; + md_attr->reg_cost.growth = 0; + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t uct_cuda_ipc_mkey_pack(uct_md_h md, uct_mem_h memh, + void *rkey_buffer) +{ + uct_cuda_ipc_key_t *packed = (uct_cuda_ipc_key_t *) rkey_buffer; + uct_cuda_ipc_key_t *mem_hndl = (uct_cuda_ipc_key_t *) memh; + + *packed = *mem_hndl; + packed->d_mapped = 0; + + return UCT_CUDADRV_FUNC(cuDeviceGetUuid(&packed->uuid, mem_hndl->dev_num)); +} + +static inline int uct_cuda_ipc_uuid_equals(const CUuuid* a, const CUuuid* b) +{ + int64_t *a0 = (int64_t *) a->bytes; + int64_t *b0 = (int64_t *) b->bytes; + return (a0[0] == b0[0]) && (a0[1] == b0[1]) ? 1 : 0; +} + +static inline void uct_cuda_ipc_uuid_copy(CUuuid* dst, const CUuuid* src) +{ + int64_t *a = (int64_t *) src->bytes; + int64_t *b = (int64_t *) dst->bytes; + *b++ = *a++; + *b = *a; +} + +ucs_status_t uct_cuda_ipc_get_unique_index_for_uuid(int* idx, + uct_cuda_ipc_md_t* md, + uct_cuda_ipc_key_t *rkey) +{ + int i; + + for (i = 0; i < md->uuid_map_size; i++) { + if (uct_cuda_ipc_uuid_equals(&rkey->uuid, &md->uuid_map[i])) { + *idx = i; + return UCS_OK; /* found */ + } + } + + if (ucs_unlikely(md->uuid_map_size == md->uuid_map_capacity)) { + /* reallocate on demand */ + int num_devices; + int original_cache_size, new_cache_size; + int new_capacity = md->uuid_map_capacity * 2; + + UCT_CUDA_IPC_DEVICE_GET_COUNT(num_devices); + original_cache_size = md->uuid_map_capacity * num_devices; + new_cache_size = new_capacity * num_devices; + md->uuid_map_capacity = new_capacity; + md->uuid_map = ucs_realloc(md->uuid_map, + new_capacity * sizeof(CUuuid), + "uct_cuda_ipc_uuid_map"); + if (md->uuid_map == NULL) { + return UCS_ERR_NO_MEMORY; + } + + md->peer_accessible_cache = ucs_realloc(md->peer_accessible_cache, + new_cache_size, + "uct_cuda_ipc_peer_accessible_cache"); + if (md->peer_accessible_cache == NULL) { + return UCS_ERR_NO_MEMORY; + } + + memset(md->peer_accessible_cache + original_cache_size, 0xFF, + new_cache_size - original_cache_size); + } + + /* Add new mapping */ + uct_cuda_ipc_uuid_copy(&md->uuid_map[md->uuid_map_size], &rkey->uuid); + *idx = md->uuid_map_size++; + + return UCS_OK; +} + +static ucs_status_t uct_cuda_ipc_is_peer_accessible(uct_cuda_ipc_component_t *mdc, + uct_cuda_ipc_key_t *rkey) +{ + CUdevice this_device; + ucs_status_t status; + int peer_idx; + int num_devices; + char* accessible; + + status = uct_cuda_ipc_get_unique_index_for_uuid(&peer_idx, mdc->md, rkey); + if (ucs_unlikely(status != UCS_OK)) { + return status; + } + + /* overwrite dev_num with a unique ID; this means that relative remote + * device number of multiple peers do not map on the same stream and reduces + * stream sequentialization */ + rkey->dev_num = peer_idx; + + UCT_CUDA_IPC_GET_DEVICE(this_device); + UCT_CUDA_IPC_DEVICE_GET_COUNT(num_devices); + + accessible = &mdc->md->peer_accessible_cache[peer_idx * num_devices + this_device]; + if (*accessible == (char)0xFF) { /* unchecked, add to cache */ + /* rkey->d_mapped is picked up in uct_cuda_ipc_cache_map_memhandle */ + CUresult result = cuIpcOpenMemHandle(&rkey->d_mapped, + rkey->ph, + CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS); + *accessible = ((result != CUDA_SUCCESS) && (result != CUDA_ERROR_ALREADY_MAPPED)) + ? 0 : 1; + } + + return (*accessible == 1) ? UCS_OK : UCS_ERR_UNREACHABLE; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_rkey_unpack, + (component, rkey_buffer, rkey_p, handle_p), + uct_component_t *component, const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + uct_cuda_ipc_component_t *com = ucs_derived_of(component, uct_cuda_ipc_component_t); + uct_cuda_ipc_key_t *packed = (uct_cuda_ipc_key_t *) rkey_buffer; + uct_cuda_ipc_key_t *key; + ucs_status_t status; + + status = uct_cuda_ipc_is_peer_accessible(com, packed); + if (status != UCS_OK) { + return status; + } + + key = ucs_malloc(sizeof(uct_cuda_ipc_key_t), "uct_cuda_ipc_key_t"); + if (NULL == key) { + ucs_error("failed to allocate memory for uct_cuda_ipc_key_t"); + return UCS_ERR_NO_MEMORY; + } + + *key = *packed; + *handle_p = NULL; + *rkey_p = (uintptr_t) key; + + return UCS_OK; +} + +static ucs_status_t uct_cuda_ipc_rkey_release(uct_component_t *component, + uct_rkey_t rkey, void *handle) +{ + ucs_assert(NULL == handle); + ucs_free((void *)rkey); + return UCS_OK; +} + +static ucs_status_t +uct_cuda_ipc_mem_reg_internal(uct_md_h uct_md, void *addr, size_t length, + unsigned flags, uct_cuda_ipc_key_t *key) +{ + CUdevice cu_device; + ucs_status_t status; + + if (!length) { + return UCS_OK; + } + + status = UCT_CUDADRV_FUNC(cuIpcGetMemHandle(&(key->ph), (CUdeviceptr) addr)); + if (UCS_OK != status) { + return status; + } + + UCT_CUDA_IPC_GET_DEVICE(cu_device); + + UCT_CUDADRV_FUNC(cuMemGetAddressRange(&(key->d_bptr), + &(key->b_len), + (CUdeviceptr) addr)); + key->dev_num = (int) cu_device; + key->d_mapped = 0; + ucs_trace("registered memory:%p..%p length:%lu dev_num:%d", + addr, UCS_PTR_BYTE_OFFSET(addr, length), length, (int) cu_device); + return UCS_OK; +} + +static ucs_status_t uct_cuda_ipc_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + uct_cuda_ipc_key_t *key; + ucs_status_t status; + + key = ucs_malloc(sizeof(uct_cuda_ipc_key_t), "uct_cuda_ipc_key_t"); + if (NULL == key) { + ucs_error("failed to allocate memory for uct_cuda_ipc_key_t"); + return UCS_ERR_NO_MEMORY; + } + + status = uct_cuda_ipc_mem_reg_internal(md, address, length, 0, key); + if (status != UCS_OK) { + ucs_free(key); + return status; + } + *memh_p = key; + + return UCS_OK; +} + +static ucs_status_t uct_cuda_ipc_mem_dereg(uct_md_h md, uct_mem_h memh) +{ + ucs_free(memh); + return UCS_OK; +} + + +static void uct_cuda_ipc_md_close(uct_md_h uct_md) +{ + uct_cuda_ipc_md_t *md = ucs_derived_of(uct_md, uct_cuda_ipc_md_t); + + ucs_free(md->uuid_map); + ucs_free(md->peer_accessible_cache); + ucs_free(md); +} + +static ucs_status_t +uct_cuda_ipc_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p) +{ + static uct_md_ops_t md_ops = { + .close = uct_cuda_ipc_md_close, + .query = uct_cuda_ipc_md_query, + .mkey_pack = uct_cuda_ipc_mkey_pack, + .mem_reg = uct_cuda_ipc_mem_reg, + .mem_dereg = uct_cuda_ipc_mem_dereg, + .detect_memory_type = ucs_empty_function_return_unsupported, + }; + + int num_devices; + uct_cuda_ipc_md_t* md; + uct_cuda_ipc_component_t* com; + + UCS_STATIC_ASSERT(sizeof(md->peer_accessible_cache[0]) == sizeof(char)); + UCT_CUDA_IPC_DEVICE_GET_COUNT(num_devices); + + md = ucs_calloc(1, sizeof(uct_cuda_ipc_md_t), "uct_cuda_ipc_md"); + if (md == NULL) { + return UCS_ERR_NO_MEMORY; + } + + md->super.ops = &md_ops; + md->super.component = &uct_cuda_ipc_component.super; + + /* allocate uuid map and peer accessible cache */ + md->uuid_map_size = 0; + md->uuid_map_capacity = 16; + md->uuid_map = ucs_malloc(md->uuid_map_capacity * sizeof(CUuuid), + "uct_cuda_ipc_uuid_map"); + if (md->uuid_map == NULL) { + free(md); + return UCS_ERR_NO_MEMORY; + } + + /* Initially support caching accessibility of up to 16 other peers */ + md->peer_accessible_cache = ucs_malloc(num_devices * md->uuid_map_capacity, + "uct_cuda_ipc_peer_accessible_cache"); + if (md->peer_accessible_cache == NULL) { + free(md->uuid_map); + free(md); + return UCS_ERR_NO_MEMORY; + } + + /* 0xFF = !cached, 1 = accessible, 0 = !accessible */ + memset(md->peer_accessible_cache, 0xFF, num_devices * md->uuid_map_capacity); + + com = ucs_derived_of(md->super.component, uct_cuda_ipc_component_t); + com->md = md; + *md_p = &md->super; + return UCS_OK; +} + +uct_cuda_ipc_component_t uct_cuda_ipc_component = { + .super = { + .query_md_resources = uct_cuda_base_query_md_resources, + .md_open = uct_cuda_ipc_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_cuda_ipc_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = uct_cuda_ipc_rkey_release, + .name = "cuda_ipc", + .md_config = { + .name = "Cuda-IPC memory domain", + .prefix = "CUDA_IPC_", + .table = uct_cuda_ipc_md_config_table, + .size = sizeof(uct_cuda_ipc_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_cuda_ipc_component.super), + .flags = 0 + }, + .md = NULL, +}; +UCT_COMPONENT_REGISTER(&uct_cuda_ipc_component.super); + diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_md.h b/src/uct/cuda/cuda_ipc/cuda_ipc_md.h new file mode 100644 index 0000000..02bb944 --- /dev/null +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_md.h @@ -0,0 +1,71 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#ifndef UCT_CUDA_IPC_MD_H +#define UCT_CUDA_IPC_MD_H + +#include +#include +#include + + +/** + * @brief cuda ipc MD descriptor + */ +typedef struct uct_cuda_ipc_md { + struct uct_md super; /**< Domain info */ + CUuuid* uuid_map; + char* peer_accessible_cache; + int uuid_map_size; + int uuid_map_capacity; +} uct_cuda_ipc_md_t; + +/** + * @brief cuda ipc component extension + */ +typedef struct uct_cuda_ipc_component { + uct_component_t super; + uct_cuda_ipc_md_t* md; +} uct_cuda_ipc_component_t; + +extern uct_cuda_ipc_component_t uct_cuda_ipc_component; + +/** + * @brief cuda ipc domain configuration. + */ +typedef struct uct_cuda_ipc_md_config { + uct_md_config_t super; +} uct_cuda_ipc_md_config_t; + + +/** + * @brief cuda_ipc packed and remote key for put/get + */ +typedef struct uct_cuda_ipc_key { + CUipcMemHandle ph; /* Memory handle of GPU memory */ + CUdeviceptr d_bptr; /* Allocation base address */ + size_t b_len; /* Allocation size */ + int dev_num; /* GPU Device number */ + CUuuid uuid; /* GPU Device UUID */ + CUdeviceptr d_mapped; /* Locally mapped device address */ +} uct_cuda_ipc_key_t; + + +#define UCT_CUDA_IPC_GET_DEVICE(_cu_device) \ + do { \ + if (UCS_OK != UCT_CUDADRV_FUNC(cuCtxGetDevice(&_cu_device))) { \ + return UCS_ERR_IO_ERROR; \ + } \ + } while(0); + +#define UCT_CUDA_IPC_DEVICE_GET_COUNT(_num_device) \ + do { \ + if (UCS_OK != UCT_CUDADRV_FUNC(cuDeviceGetCount(&_num_device))) { \ + return UCS_ERR_IO_ERROR; \ + } \ + } while(0); + +#endif diff --git a/src/uct/cuda/gdr_copy/Makefile.am b/src/uct/cuda/gdr_copy/Makefile.am new file mode 100644 index 0000000..e551d5f --- /dev/null +++ b/src/uct/cuda/gdr_copy/Makefile.am @@ -0,0 +1,27 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_GDR_COPY + +module_LTLIBRARIES = libuct_cuda_gdrcopy.la +libuct_cuda_gdrcopy_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS) $(GDR_COPY_CPPFLAGS) +libuct_cuda_gdrcopy_la_CFLAGS = $(BASE_CFLAGS) +libuct_cuda_gdrcopy_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/cuda/libuct_cuda.la +libuct_cuda_gdrcopy_la_LDFLAGS = $(CUDA_LDFLAGS) $(GDR_COPY_LDFLAGS) -version-info $(SOVERSION) + +noinst_HEADERS = \ + gdr_copy_md.h \ + gdr_copy_iface.h \ + gdr_copy_ep.h + +libuct_cuda_gdrcopy_la_SOURCES = \ + gdr_copy_md.c \ + gdr_copy_iface.c \ + gdr_copy_ep.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/uct/cuda/gdr_copy/Makefile.in b/src/uct/cuda/gdr_copy/Makefile.in new file mode 100644 index 0000000..8fc6de2 --- /dev/null +++ b/src/uct/cuda/gdr_copy/Makefile.in @@ -0,0 +1,889 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/cuda/gdr_copy +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_GDR_COPY_TRUE@libuct_cuda_gdrcopy_la_DEPENDENCIES = \ +@HAVE_GDR_COPY_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_GDR_COPY_TRUE@ $(top_builddir)/src/uct/cuda/libuct_cuda.la +am__libuct_cuda_gdrcopy_la_SOURCES_DIST = gdr_copy_md.c \ + gdr_copy_iface.c gdr_copy_ep.c +@HAVE_GDR_COPY_TRUE@am_libuct_cuda_gdrcopy_la_OBJECTS = \ +@HAVE_GDR_COPY_TRUE@ libuct_cuda_gdrcopy_la-gdr_copy_md.lo \ +@HAVE_GDR_COPY_TRUE@ libuct_cuda_gdrcopy_la-gdr_copy_iface.lo \ +@HAVE_GDR_COPY_TRUE@ libuct_cuda_gdrcopy_la-gdr_copy_ep.lo +libuct_cuda_gdrcopy_la_OBJECTS = $(am_libuct_cuda_gdrcopy_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_cuda_gdrcopy_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libuct_cuda_gdrcopy_la_CFLAGS) $(CFLAGS) \ + $(libuct_cuda_gdrcopy_la_LDFLAGS) $(LDFLAGS) -o $@ +@HAVE_GDR_COPY_TRUE@am_libuct_cuda_gdrcopy_la_rpath = -rpath \ +@HAVE_GDR_COPY_TRUE@ $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = \ + ./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_ep.Plo \ + ./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_iface.Plo \ + ./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_md.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_cuda_gdrcopy_la_SOURCES) +DIST_SOURCES = $(am__libuct_cuda_gdrcopy_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = gdr_copy_md.h gdr_copy_iface.h gdr_copy_ep.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_GDR_COPY_TRUE@module_LTLIBRARIES = libuct_cuda_gdrcopy.la +@HAVE_GDR_COPY_TRUE@libuct_cuda_gdrcopy_la_CPPFLAGS = $(BASE_CPPFLAGS) $(CUDA_CPPFLAGS) $(GDR_COPY_CPPFLAGS) +@HAVE_GDR_COPY_TRUE@libuct_cuda_gdrcopy_la_CFLAGS = $(BASE_CFLAGS) +@HAVE_GDR_COPY_TRUE@libuct_cuda_gdrcopy_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_GDR_COPY_TRUE@ $(top_builddir)/src/uct/cuda/libuct_cuda.la + +@HAVE_GDR_COPY_TRUE@libuct_cuda_gdrcopy_la_LDFLAGS = $(CUDA_LDFLAGS) $(GDR_COPY_LDFLAGS) -version-info $(SOVERSION) +@HAVE_GDR_COPY_TRUE@noinst_HEADERS = \ +@HAVE_GDR_COPY_TRUE@ gdr_copy_md.h \ +@HAVE_GDR_COPY_TRUE@ gdr_copy_iface.h \ +@HAVE_GDR_COPY_TRUE@ gdr_copy_ep.h + +@HAVE_GDR_COPY_TRUE@libuct_cuda_gdrcopy_la_SOURCES = \ +@HAVE_GDR_COPY_TRUE@ gdr_copy_md.c \ +@HAVE_GDR_COPY_TRUE@ gdr_copy_iface.c \ +@HAVE_GDR_COPY_TRUE@ gdr_copy_ep.c + + +# Automake silent rules +@HAVE_GDR_COPY_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_GDR_COPY_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_GDR_COPY_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_GDR_COPY_TRUE@AM_V_LN_1 = true +@HAVE_GDR_COPY_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/cuda/gdr_copy/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/cuda/gdr_copy/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libuct_cuda_gdrcopy.la: $(libuct_cuda_gdrcopy_la_OBJECTS) $(libuct_cuda_gdrcopy_la_DEPENDENCIES) $(EXTRA_libuct_cuda_gdrcopy_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_cuda_gdrcopy_la_LINK) $(am_libuct_cuda_gdrcopy_la_rpath) $(libuct_cuda_gdrcopy_la_OBJECTS) $(libuct_cuda_gdrcopy_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_md.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libuct_cuda_gdrcopy_la-gdr_copy_md.lo: gdr_copy_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_gdrcopy_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_gdrcopy_la_CFLAGS) $(CFLAGS) -MT libuct_cuda_gdrcopy_la-gdr_copy_md.lo -MD -MP -MF $(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_md.Tpo -c -o libuct_cuda_gdrcopy_la-gdr_copy_md.lo `test -f 'gdr_copy_md.c' || echo '$(srcdir)/'`gdr_copy_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_md.Tpo $(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gdr_copy_md.c' object='libuct_cuda_gdrcopy_la-gdr_copy_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_gdrcopy_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_gdrcopy_la_CFLAGS) $(CFLAGS) -c -o libuct_cuda_gdrcopy_la-gdr_copy_md.lo `test -f 'gdr_copy_md.c' || echo '$(srcdir)/'`gdr_copy_md.c + +libuct_cuda_gdrcopy_la-gdr_copy_iface.lo: gdr_copy_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_gdrcopy_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_gdrcopy_la_CFLAGS) $(CFLAGS) -MT libuct_cuda_gdrcopy_la-gdr_copy_iface.lo -MD -MP -MF $(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_iface.Tpo -c -o libuct_cuda_gdrcopy_la-gdr_copy_iface.lo `test -f 'gdr_copy_iface.c' || echo '$(srcdir)/'`gdr_copy_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_iface.Tpo $(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gdr_copy_iface.c' object='libuct_cuda_gdrcopy_la-gdr_copy_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_gdrcopy_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_gdrcopy_la_CFLAGS) $(CFLAGS) -c -o libuct_cuda_gdrcopy_la-gdr_copy_iface.lo `test -f 'gdr_copy_iface.c' || echo '$(srcdir)/'`gdr_copy_iface.c + +libuct_cuda_gdrcopy_la-gdr_copy_ep.lo: gdr_copy_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_gdrcopy_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_gdrcopy_la_CFLAGS) $(CFLAGS) -MT libuct_cuda_gdrcopy_la-gdr_copy_ep.lo -MD -MP -MF $(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_ep.Tpo -c -o libuct_cuda_gdrcopy_la-gdr_copy_ep.lo `test -f 'gdr_copy_ep.c' || echo '$(srcdir)/'`gdr_copy_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_ep.Tpo $(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gdr_copy_ep.c' object='libuct_cuda_gdrcopy_la-gdr_copy_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cuda_gdrcopy_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cuda_gdrcopy_la_CFLAGS) $(CFLAGS) -c -o libuct_cuda_gdrcopy_la-gdr_copy_ep.lo `test -f 'gdr_copy_ep.c' || echo '$(srcdir)/'`gdr_copy_ep.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_GDR_COPY_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_ep.Plo + -rm -f ./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_iface.Plo + -rm -f ./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_md.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_ep.Plo + -rm -f ./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_iface.Plo + -rm -f ./$(DEPDIR)/libuct_cuda_gdrcopy_la-gdr_copy_md.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_GDR_COPY_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_GDR_COPY_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_GDR_COPY_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_GDR_COPY_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_GDR_COPY_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_GDR_COPY_TRUE@ done +@HAVE_GDR_COPY_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_GDR_COPY_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_GDR_COPY_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/cuda/gdr_copy/configure.m4 b/src/uct/cuda/gdr_copy/configure.m4 new file mode 100644 index 0000000..be2a17b --- /dev/null +++ b/src/uct/cuda/gdr_copy/configure.m4 @@ -0,0 +1,10 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +UCX_CHECK_GDRCOPY + +AS_IF([test "x$gdrcopy_happy" = "xyes"], [uct_cuda_modules="${uct_cuda_modules}:gdrcopy"]) +AC_CONFIG_FILES([src/uct/cuda/gdr_copy/Makefile]) diff --git a/src/uct/cuda/gdr_copy/gdr_copy_ep.c b/src/uct/cuda/gdr_copy/gdr_copy_ep.c new file mode 100644 index 0000000..4ed318a --- /dev/null +++ b/src/uct/cuda/gdr_copy/gdr_copy_ep.c @@ -0,0 +1,105 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#include "gdr_copy_ep.h" +#include "gdr_copy_md.h" +#include "gdr_copy_iface.h" + +#include +#include +#include +#include +#include + + +static UCS_CLASS_INIT_FUNC(uct_gdr_copy_ep_t, const uct_ep_params_t *params) +{ + uct_gdr_copy_iface_t *iface = ucs_derived_of(params->iface, + uct_gdr_copy_iface_t); + + UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params); + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_gdr_copy_ep_t) +{ +} + +UCS_CLASS_DEFINE(uct_gdr_copy_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_gdr_copy_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_gdr_copy_ep_t, uct_ep_t); + +UCS_PROFILE_FUNC(ucs_status_t, uct_gdr_copy_ep_put_short, + (tl_ep, buffer, length, remote_addr, rkey), + uct_ep_h tl_ep, const void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_gdr_copy_key_t *gdr_copy_key = (uct_gdr_copy_key_t *) rkey; + size_t bar_offset; + int ret; + + if (ucs_likely(length)) { + bar_offset = remote_addr - gdr_copy_key->vaddr; +#if HAVE_DECL_GDR_COPY_TO_MAPPING + ret = gdr_copy_to_mapping(gdr_copy_key->mh, + UCS_PTR_BYTE_OFFSET(gdr_copy_key->bar_ptr, + bar_offset), + buffer, length); + if (ret) { + ucs_error("gdr_copy_to_mapping failed. ret:%d", ret); + return UCS_ERR_IO_ERROR; + } +#else + ret = gdr_copy_to_bar(gdr_copy_key->bar_ptr + bar_offset, buffer, length); + if (ret) { + ucs_error("gdr_copy_to_bar failed. ret:%d", ret); + return UCS_ERR_IO_ERROR; + } +#endif + } + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); + ucs_trace_data("PUT_SHORT size %d from %p to %p", + length, buffer, (void *)remote_addr); + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_gdr_copy_ep_get_short, + (tl_ep, buffer, length, remote_addr, rkey), + uct_ep_h tl_ep, void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_gdr_copy_key_t *gdr_copy_key = (uct_gdr_copy_key_t *) rkey; + size_t bar_offset; + int ret; + + if (ucs_likely(length)) { + bar_offset = remote_addr - gdr_copy_key->vaddr; +#if HAVE_DECL_GDR_COPY_TO_MAPPING + ret = gdr_copy_from_mapping(gdr_copy_key->mh, buffer, + UCS_PTR_BYTE_OFFSET(gdr_copy_key->bar_ptr, + bar_offset), + length); + if (ret) { + ucs_error("gdr_copy_from_mapping failed. ret:%d", ret); + return UCS_ERR_IO_ERROR; + } +#else + ret = gdr_copy_from_bar(buffer, gdr_copy_key->bar_ptr + bar_offset, length); + if (ret) { + ucs_error("gdr_copy_from_bar failed. ret:%d", ret); + return UCS_ERR_IO_ERROR; + } +#endif + } + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, SHORT, length); + ucs_trace_data("GET_SHORT size %d from %p to %p", + length, (void *)remote_addr, buffer); + return UCS_OK; +} diff --git a/src/uct/cuda/gdr_copy/gdr_copy_ep.h b/src/uct/cuda/gdr_copy/gdr_copy_ep.h new file mode 100644 index 0000000..faad8de --- /dev/null +++ b/src/uct/cuda/gdr_copy/gdr_copy_ep.h @@ -0,0 +1,37 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_GDR_COPY_EP_H +#define UCT_GDR_COPY_EP_H + +#include +#include +#include + + +typedef struct uct_gdr_copy_ep_addr { + int ep_id; +} uct_gdr_copy_ep_addr_t; + + +typedef struct uct_gdr_copy_ep { + uct_base_ep_t super; + struct uct_gdr_copy_ep *next; +} uct_gdr_copy_ep_t; + + +UCS_CLASS_DECLARE_NEW_FUNC(uct_gdr_copy_ep_t, uct_ep_t, const uct_ep_params_t *); + +UCS_CLASS_DECLARE_DELETE_FUNC(uct_gdr_copy_ep_t, uct_ep_t); + +ucs_status_t uct_gdr_copy_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +ucs_status_t uct_gdr_copy_ep_get_short(uct_ep_h tl_ep, void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +#endif diff --git a/src/uct/cuda/gdr_copy/gdr_copy_iface.c b/src/uct/cuda/gdr_copy/gdr_copy_iface.c new file mode 100644 index 0000000..01b02a3 --- /dev/null +++ b/src/uct/cuda/gdr_copy/gdr_copy_iface.c @@ -0,0 +1,147 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "gdr_copy_iface.h" +#include "gdr_copy_md.h" +#include "gdr_copy_ep.h" + +#include +#include +#include + + +static ucs_config_field_t uct_gdr_copy_iface_config_table[] = { + + {"", "", NULL, + ucs_offsetof(uct_gdr_copy_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {NULL} +}; + +/* Forward declaration for the delete function */ +static void UCS_CLASS_DELETE_FUNC_NAME(uct_gdr_copy_iface_t)(uct_iface_t*); + +static ucs_status_t uct_gdr_copy_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *iface_addr) +{ + uct_gdr_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_gdr_copy_iface_t); + + *(uct_gdr_copy_iface_addr_t*)iface_addr = iface->id; + return UCS_OK; +} + +static int uct_gdr_copy_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uct_gdr_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_gdr_copy_iface_t); + uct_gdr_copy_iface_addr_t *addr = (uct_gdr_copy_iface_addr_t*)iface_addr; + + return (addr != NULL) && (iface->id == *addr); +} + +static ucs_status_t uct_gdr_copy_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_gdr_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_gdr_copy_iface_t); + + uct_base_iface_query(&iface->super, iface_attr); + + iface_attr->iface_addr_len = sizeof(uct_gdr_copy_iface_addr_t); + iface_attr->device_addr_len = 0; + iface_attr->ep_addr_len = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_PUT_SHORT | + UCT_IFACE_FLAG_GET_SHORT; + + iface_attr->cap.put.max_short = UINT_MAX; + iface_attr->cap.put.max_bcopy = 0; + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = 0; + iface_attr->cap.put.opt_zcopy_align = 1; + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = 1; + + iface_attr->cap.get.max_short = UINT_MAX; + iface_attr->cap.get.max_bcopy = 0; + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = 0; + iface_attr->cap.get.opt_zcopy_align = 1; + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = 1; + + iface_attr->cap.am.max_short = 0; + iface_attr->cap.am.max_bcopy = 0; + iface_attr->cap.am.min_zcopy = 0; + iface_attr->cap.am.max_zcopy = 0; + iface_attr->cap.am.opt_zcopy_align = 1; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + iface_attr->cap.am.max_hdr = 0; + iface_attr->cap.am.max_iov = 1; + + iface_attr->latency.overhead = 1e-6; /* 1 us */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = 0; + iface_attr->bandwidth.shared = 6911 * 1024.0 * 1024.0; + iface_attr->overhead = 0; + iface_attr->priority = 0; + + return UCS_OK; +} + +static uct_iface_ops_t uct_gdr_copy_iface_ops = { + .ep_put_short = uct_gdr_copy_ep_put_short, + .ep_get_short = uct_gdr_copy_ep_get_short, + .ep_pending_add = ucs_empty_function_return_busy, + .ep_pending_purge = ucs_empty_function, + .ep_flush = uct_base_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_gdr_copy_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_gdr_copy_ep_t), + .iface_flush = uct_base_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = ucs_empty_function_return_zero, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_gdr_copy_iface_t), + .iface_query = uct_gdr_copy_iface_query, + .iface_get_device_address = (uct_iface_get_device_address_func_t)ucs_empty_function_return_success, + .iface_get_address = uct_gdr_copy_iface_get_address, + .iface_is_reachable = uct_gdr_copy_iface_is_reachable, +}; + +static UCS_CLASS_INIT_FUNC(uct_gdr_copy_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_gdr_copy_iface_ops, md, worker, + params, tl_config UCS_STATS_ARG(params->stats_root) + UCS_STATS_ARG("gdr_copy")); + + if (strncmp(params->mode.device.dev_name, + UCT_CUDA_DEV_NAME, strlen(UCT_CUDA_DEV_NAME)) != 0) { + ucs_error("No device was found: %s", params->mode.device.dev_name); + return UCS_ERR_NO_DEVICE; + } + + self->id = ucs_generate_uuid((uintptr_t)self); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_gdr_copy_iface_t) +{ + /* tasks to tear down the domain */ +} + +UCS_CLASS_DEFINE(uct_gdr_copy_iface_t, uct_base_iface_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_gdr_copy_iface_t, uct_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t*); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_gdr_copy_iface_t, uct_iface_t); + +UCT_TL_DEFINE(&uct_gdr_copy_component, gdr_copy, uct_cuda_base_query_devices, + uct_gdr_copy_iface_t, "GDR_COPY_", + uct_gdr_copy_iface_config_table, uct_gdr_copy_iface_config_t); diff --git a/src/uct/cuda/gdr_copy/gdr_copy_iface.h b/src/uct/cuda/gdr_copy/gdr_copy_iface.h new file mode 100644 index 0000000..1d4875e --- /dev/null +++ b/src/uct/cuda/gdr_copy/gdr_copy_iface.h @@ -0,0 +1,25 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_GDR_COPY_IFACE_H +#define UCT_GDR_COPY_IFACE_H + +#include + + +typedef uint64_t uct_gdr_copy_iface_addr_t; + + +typedef struct uct_gdr_copy_iface { + uct_base_iface_t super; + uct_gdr_copy_iface_addr_t id; +} uct_gdr_copy_iface_t; + + +typedef struct uct_gdr_copy_iface_config { + uct_iface_config_t super; +} uct_gdr_copy_iface_config_t; + +#endif diff --git a/src/uct/cuda/gdr_copy/gdr_copy_md.c b/src/uct/cuda/gdr_copy/gdr_copy_md.c new file mode 100644 index 0000000..7e10714 --- /dev/null +++ b/src/uct/cuda/gdr_copy/gdr_copy_md.c @@ -0,0 +1,438 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#include "gdr_copy_md.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN 65536 + +static ucs_config_field_t uct_gdr_copy_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_gdr_copy_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {"RCACHE", "try", "Enable using memory registration cache", + ucs_offsetof(uct_gdr_copy_md_config_t, enable_rcache), UCS_CONFIG_TYPE_TERNARY}, + + {"", "RCACHE_ADDR_ALIGN=" UCS_PP_MAKE_STRING(UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN), NULL, + ucs_offsetof(uct_gdr_copy_md_config_t, rcache), + UCS_CONFIG_TYPE_TABLE(uct_md_config_rcache_table)}, + + {"MEM_REG_OVERHEAD", "16us", "Memory registration overhead", /* TODO take default from device */ + ucs_offsetof(uct_gdr_copy_md_config_t, uc_reg_cost.overhead), UCS_CONFIG_TYPE_TIME}, + + {"MEM_REG_GROWTH", "0.06ns", "Memory registration growth rate", /* TODO take default from device */ + ucs_offsetof(uct_gdr_copy_md_config_t, uc_reg_cost.growth), UCS_CONFIG_TYPE_TIME}, + + {NULL} +}; + +static ucs_status_t uct_gdr_copy_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->cap.flags = UCT_MD_FLAG_REG | + UCT_MD_FLAG_NEED_RKEY; + md_attr->cap.reg_mem_types = UCS_BIT(UCS_MEMORY_TYPE_CUDA); + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_CUDA; + md_attr->cap.detect_mem_types = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = ULONG_MAX; + md_attr->rkey_packed_size = sizeof(uct_gdr_copy_key_t); + md_attr->reg_cost.overhead = 0; + md_attr->reg_cost.growth = 0; + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t uct_gdr_copy_mkey_pack(uct_md_h md, uct_mem_h memh, + void *rkey_buffer) +{ + uct_gdr_copy_key_t *packed = (uct_gdr_copy_key_t *)rkey_buffer; + uct_gdr_copy_mem_t *mem_hndl = (uct_gdr_copy_mem_t *)memh; + + packed->vaddr = mem_hndl->info.va; + packed->bar_ptr = mem_hndl->bar_ptr; + packed->mh = mem_hndl->mh; + + return UCS_OK; +} + +static ucs_status_t uct_gdr_copy_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + uct_gdr_copy_key_t *packed = (uct_gdr_copy_key_t *)rkey_buffer; + uct_gdr_copy_key_t *key; + + key = ucs_malloc(sizeof(uct_gdr_copy_key_t), "uct_gdr_copy_key_t"); + if (NULL == key) { + ucs_error("failed to allocate memory for uct_gdr_copy_key_t"); + return UCS_ERR_NO_MEMORY; + } + + key->vaddr = packed->vaddr; + key->bar_ptr = packed->bar_ptr; + key->mh = packed->mh; + + *handle_p = NULL; + *rkey_p = (uintptr_t)key; + + return UCS_OK; +} + +static ucs_status_t uct_gdr_copy_rkey_release(uct_component_t *component, + uct_rkey_t rkey, void *handle) +{ + ucs_assert(NULL == handle); + ucs_free((void *)rkey); + return UCS_OK; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_gdr_copy_mem_reg_internal, + (uct_md, address, length, flags, mem_hndl), + uct_md_h uct_md, void *address, size_t length, + unsigned flags, uct_gdr_copy_mem_t *mem_hndl) +{ + uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t); + CUdeviceptr d_ptr = ((CUdeviceptr )(char *) address); + int ret; + + if (!length) { + memset(mem_hndl, 0, sizeof(*mem_hndl)); + return UCS_OK; + } + + ret = gdr_pin_buffer(md->gdrcpy_ctx, d_ptr, length, 0, 0, &mem_hndl->mh); + if (ret) { + ucs_error("gdr_pin_buffer failed. length :%lu ret:%d", length, ret); + goto err; + } + + ret = gdr_map(md->gdrcpy_ctx, mem_hndl->mh, &mem_hndl->bar_ptr, length); + if (ret) { + ucs_error("gdr_map failed. length :%lu ret:%d", length, ret); + goto unpin_buffer; + } + + mem_hndl->reg_size = length; + + ret = gdr_get_info(md->gdrcpy_ctx, mem_hndl->mh, &mem_hndl->info); + if (ret) { + ucs_error("gdr_get_info failed. ret:%d", ret); + goto unmap_buffer; + } + + ucs_trace("registered memory:%p..%p length:%lu info.va:0x%"PRIx64" bar_ptr:%p", + address, UCS_PTR_BYTE_OFFSET(address, length), length, + mem_hndl->info.va, mem_hndl->bar_ptr); + + return UCS_OK; + +unmap_buffer: + ret = gdr_unmap(md->gdrcpy_ctx, mem_hndl->mh, mem_hndl->bar_ptr, mem_hndl->reg_size); + if (ret) { + ucs_warn("gdr_unmap failed. unpin_size:%lu ret:%d", mem_hndl->reg_size, ret); + } +unpin_buffer: + ret = gdr_unpin_buffer(md->gdrcpy_ctx, mem_hndl->mh); + if (ret) { + ucs_warn("gdr_unpin_buffer failed. ret;%d", ret); + } +err: + return UCS_ERR_IO_ERROR; +} + +UCS_PROFILE_FUNC(ucs_status_t, uct_gdr_copy_mem_dereg_internal, + (uct_md, mem_hndl), + uct_md_h uct_md, uct_gdr_copy_mem_t *mem_hndl) +{ + uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t); + int ret; + + ret = gdr_unmap(md->gdrcpy_ctx, mem_hndl->mh, mem_hndl->bar_ptr, mem_hndl->reg_size); + if (ret) { + ucs_error("gdr_unmap failed. unpin_size:%lu ret:%d", mem_hndl->reg_size, ret); + return UCS_ERR_IO_ERROR; + } + + ret = gdr_unpin_buffer(md->gdrcpy_ctx, mem_hndl->mh); + if (ret) { + ucs_error("gdr_unpin_buffer failed. ret:%d", ret); + return UCS_ERR_IO_ERROR; + } + + ucs_trace("deregistered memorory. info.va:0x%"PRIx64" bar_ptr:%p", + mem_hndl->info.va, mem_hndl->bar_ptr); + return UCS_OK; +} + +static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + uct_gdr_copy_mem_t *mem_hndl = NULL; + void *start, *end; + ucs_status_t status; + + mem_hndl = ucs_malloc(sizeof(uct_gdr_copy_mem_t), "gdr_copy handle"); + if (NULL == mem_hndl) { + ucs_error("failed to allocate memory for gdr_copy_mem_t"); + return UCS_ERR_NO_MEMORY; + } + + start = ucs_align_down_pow2_ptr(address, GPU_PAGE_SIZE); + end = ucs_align_up_pow2_ptr(UCS_PTR_BYTE_OFFSET(address, length), GPU_PAGE_SIZE); + ucs_assert_always(start <= end); + + status = uct_gdr_copy_mem_reg_internal(uct_md, start, + UCS_PTR_BYTE_DIFF(start, end), + 0, mem_hndl); + if (status != UCS_OK) { + ucs_free(mem_hndl); + return status; + } + + *memh_p = mem_hndl; + return UCS_OK; +} + +static ucs_status_t uct_gdr_copy_mem_dereg(uct_md_h uct_md, uct_mem_h memh) +{ + uct_gdr_copy_mem_t *mem_hndl = memh; + ucs_status_t status; + + status = uct_gdr_copy_mem_dereg_internal(uct_md, mem_hndl); + if (status != UCS_OK) { + ucs_warn("failed to deregister memory handle"); + } + + ucs_free(mem_hndl); + return status; +} + +static ucs_status_t +uct_gdr_copy_query_md_resources(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + gdr_t ctx; + + ctx = gdr_open(); + if (ctx == NULL) { + ucs_debug("could not open gdr copy. disabling gdr copy resource"); + return uct_md_query_empty_md_resource(resources_p, num_resources_p); + } + gdr_close(ctx); + + return uct_cuda_base_query_md_resources(component, resources_p, + num_resources_p); +} + +static void uct_gdr_copy_md_close(uct_md_h uct_md) +{ + uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t); + int ret; + + if (md->rcache != NULL) { + ucs_rcache_destroy(md->rcache); + } + + ret = gdr_close(md->gdrcpy_ctx); + if (ret) { + ucs_warn("failed to close gdrcopy. ret:%d", ret); + } + + ucs_free(md); +} + +static uct_md_ops_t md_ops = { + .close = uct_gdr_copy_md_close, + .query = uct_gdr_copy_md_query, + .mkey_pack = uct_gdr_copy_mkey_pack, + .mem_reg = uct_gdr_copy_mem_reg, + .mem_dereg = uct_gdr_copy_mem_dereg, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + +static inline uct_gdr_copy_rcache_region_t* +uct_gdr_copy_rache_region_from_memh(uct_mem_h memh) +{ + return ucs_container_of(memh, uct_gdr_copy_rcache_region_t, memh); +} + +static ucs_status_t +uct_gdr_copy_mem_rcache_reg(uct_md_h uct_md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t); + ucs_rcache_region_t *rregion; + ucs_status_t status; + uct_gdr_copy_mem_t *memh; + + status = ucs_rcache_get(md->rcache, (void *)address, length, PROT_READ|PROT_WRITE, + &flags, &rregion); + if (status != UCS_OK) { + return status; + } + + ucs_assert(rregion->refcount > 0); + memh = &ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t)->memh; + *memh_p = memh; + return UCS_OK; +} + +static ucs_status_t uct_gdr_copy_mem_rcache_dereg(uct_md_h uct_md, uct_mem_h memh) +{ + uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t); + uct_gdr_copy_rcache_region_t *region = uct_gdr_copy_rache_region_from_memh(memh); + + ucs_rcache_region_put(md->rcache, ®ion->super); + return UCS_OK; +} + +static uct_md_ops_t md_rcache_ops = { + .close = uct_gdr_copy_md_close, + .query = uct_gdr_copy_md_query, + .mkey_pack = uct_gdr_copy_mkey_pack, + .mem_reg = uct_gdr_copy_mem_rcache_reg, + .mem_dereg = uct_gdr_copy_mem_rcache_dereg, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + +static ucs_status_t +uct_gdr_copy_rcache_mem_reg_cb(void *context, ucs_rcache_t *rcache, + void *arg, ucs_rcache_region_t *rregion, + uint16_t rcache_mem_reg_flags) +{ + uct_gdr_copy_md_t *md = context; + int *flags = arg; + uct_gdr_copy_rcache_region_t *region; + + region = ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t); + return uct_gdr_copy_mem_reg_internal(&md->super, (void*)region->super.super.start, + region->super.super.end - + region->super.super.start, + *flags, ®ion->memh); +} + +static void uct_gdr_copy_rcache_mem_dereg_cb(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *rregion) +{ + uct_gdr_copy_md_t *md = context; + uct_gdr_copy_rcache_region_t *region; + + region = ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t); + (void)uct_gdr_copy_mem_dereg_internal(&md->super, ®ion->memh); +} + +static void uct_gdr_copy_rcache_dump_region_cb(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *rregion, char *buf, + size_t max) +{ + uct_gdr_copy_rcache_region_t *region = ucs_derived_of(rregion, + uct_gdr_copy_rcache_region_t); + uct_gdr_copy_mem_t *memh = ®ion->memh; + + snprintf(buf, max, "bar ptr:%p", memh->bar_ptr); +} + +static ucs_rcache_ops_t uct_gdr_copy_rcache_ops = { + .mem_reg = uct_gdr_copy_rcache_mem_reg_cb, + .mem_dereg = uct_gdr_copy_rcache_mem_dereg_cb, + .dump_region = uct_gdr_copy_rcache_dump_region_cb +}; + +static ucs_status_t +uct_gdr_copy_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p) +{ + const uct_gdr_copy_md_config_t *md_config = + ucs_derived_of(config, uct_gdr_copy_md_config_t); + ucs_status_t status; + uct_gdr_copy_md_t *md; + ucs_rcache_params_t rcache_params; + + md = ucs_malloc(sizeof(uct_gdr_copy_md_t), "uct_gdr_copy_md_t"); + if (NULL == md) { + ucs_error("failed to allocate memory for uct_gdr_copy_md_t"); + return UCS_ERR_NO_MEMORY; + } + + md->super.ops = &md_ops; + md->super.component = &uct_gdr_copy_component; + md->rcache = NULL; + md->reg_cost = md_config->uc_reg_cost; + + md->gdrcpy_ctx = gdr_open(); + if (md->gdrcpy_ctx == NULL) { + ucs_error("failed to open gdr copy"); + status = UCS_ERR_IO_ERROR; + goto err_free_md; + } + + if (md_config->enable_rcache != UCS_NO) { + rcache_params.region_struct_size = sizeof(uct_gdr_copy_rcache_region_t); + rcache_params.alignment = md_config->rcache.alignment; + rcache_params.max_alignment = UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN; + rcache_params.ucm_events = UCM_EVENT_MEM_TYPE_FREE; + rcache_params.ucm_event_priority = md_config->rcache.event_prio; + rcache_params.context = md; + rcache_params.ops = &uct_gdr_copy_rcache_ops; + status = ucs_rcache_create(&rcache_params, "gdr_copy", NULL, &md->rcache); + if (status == UCS_OK) { + md->super.ops = &md_rcache_ops; + md->reg_cost.overhead = 0; + md->reg_cost.growth = 0; + } else { + ucs_assert(md->rcache == NULL); + if (md_config->enable_rcache == UCS_YES) { + status = UCS_ERR_IO_ERROR; + goto err_close_gdr; + } else { + ucs_debug("could not create registration cache for: %s", + ucs_status_string(status)); + } + } + } + + *md_p = (uct_md_h) md; + status = UCS_OK; +out: + return status; +err_close_gdr: + gdr_close(md->gdrcpy_ctx); +err_free_md: + ucs_free(md); + goto out; +} + +uct_component_t uct_gdr_copy_component = { + .query_md_resources = uct_gdr_copy_query_md_resources, + .md_open = uct_gdr_copy_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_gdr_copy_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = uct_gdr_copy_rkey_release, + .name = "gdr_copy", + .md_config = { + .name = "GDR-copy memory domain", + .prefix = "GDR_COPY_", + .table = uct_gdr_copy_md_config_table, + .size = sizeof(uct_gdr_copy_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_gdr_copy_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_gdr_copy_component); + diff --git a/src/uct/cuda/gdr_copy/gdr_copy_md.h b/src/uct/cuda/gdr_copy/gdr_copy_md.h new file mode 100644 index 0000000..596ed67 --- /dev/null +++ b/src/uct/cuda/gdr_copy/gdr_copy_md.h @@ -0,0 +1,71 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#ifndef UCT_GDR_COPY_MD_H +#define UCT_GDR_COPY_MD_H + +#include +#include +#include +#include "gdrapi.h" + + +extern uct_component_t uct_gdr_copy_component; + + +/** + * @brief gdr_copy MD descriptor + */ +typedef struct uct_gdr_copy_md { + uct_md_t super; /**< Domain info */ + gdr_t gdrcpy_ctx; /**< gdr copy context */ + ucs_rcache_t *rcache; /**< Registration cache (can be NULL) */ + uct_linear_growth_t reg_cost; /**< Memory registration cost */ +} uct_gdr_copy_md_t; + + +/** + * gdr copy domain configuration. + */ +typedef struct uct_gdr_copy_md_config { + uct_md_config_t super; + int enable_rcache;/**< Enable registration cache */ + uct_md_rcache_config_t rcache; /**< Registration cache config */ + uct_linear_growth_t uc_reg_cost; /**< Memory registration cost estimation + without using the cache */ +} uct_gdr_copy_md_config_t; + + +/** + * @brief gdr copy mem handle + */ +typedef struct uct_gdr_copy_mem { + gdr_mh_t mh; /**< Memory handle of GPU memory */ + gdr_info_t info; /**< Info of GPU memory mapping */ + void *bar_ptr; /**< BAR address of GPU mapping */ + size_t reg_size; /**< Size of mapping */ +} uct_gdr_copy_mem_t; + + +/** + * @brief gdr copy packed and remote key for put + */ +typedef struct uct_gdr_copy_key { + uint64_t vaddr; /**< Mapped GPU address */ + void *bar_ptr; /**< BAR address of GPU mapping */ + gdr_mh_t mh; /**< Memory handle of GPU memory */ +} uct_gdr_copy_key_t; + + +/** + * cuda memory region in the registration cache. + */ +typedef struct uct_gdr_copy_rcache_region { + ucs_rcache_region_t super; + uct_gdr_copy_mem_t memh; /**< mr exposed to the user as the memh */ +} uct_gdr_copy_rcache_region_t; + +#endif diff --git a/src/uct/ib/Makefile.am b/src/uct/ib/Makefile.am new file mode 100644 index 0000000..61f2cdd --- /dev/null +++ b/src/uct/ib/Makefile.am @@ -0,0 +1,149 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_IB + +SUBDIRS = . cm rdmacm + +module_LTLIBRARIES = libuct_ib.la +libuct_ib_la_CPPFLAGS = $(BASE_CPPFLAGS) $(IBVERBS_CPPFLAGS) +libuct_ib_la_CFLAGS = $(BASE_CFLAGS) +libuct_ib_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la +libuct_ib_la_LDFLAGS = $(IBVERBS_LDFLAGS) $(NUMA_LIBS) -version-info $(SOVERSION) +libmlx5_ver = $(shell (rpm -qf $(IBVERBS_DIR)/include/infiniband/mlx5_hw.h &>/dev/null && rpm -qf /usr/include/infiniband/mlx5_hw.h) | grep -v 'not owned' | head -1) + +noinst_HEADERS = \ + base/ib_device.h \ + base/ib_iface.h \ + base/ib_log.h \ + base/ib_md.h \ + base/ib_verbs.h + +libuct_ib_la_SOURCES = \ + base/ib_device.c \ + base/ib_iface.c \ + base/ib_log.c \ + base/ib_md.c + +# TODO separate module for mlx5 +if HAVE_MLX5_HW +libuct_ib_la_CPPFLAGS += -DUCT_IB_LIBMLX5_VER=\"$(libmlx5_ver)\" + +noinst_HEADERS += \ + mlx5/ib_mlx5_log.h \ + mlx5/ib_mlx5.h \ + mlx5/ib_mlx5.inl \ + mlx5/dv/ib_mlx5_dv.h \ + mlx5/dv/ib_mlx5_ifc.h \ + mlx5/exp/ib_mlx5_hw.h + +libuct_ib_la_SOURCES += \ + mlx5/ib_mlx5_log.c \ + mlx5/ib_mlx5.c + +if HAVE_EXP +noinst_HEADERS += \ + mlx5/exp/ib_exp.h + +libuct_ib_la_SOURCES += \ + mlx5/exp/ib_mlx5_hw.c \ + mlx5/exp/ib_exp.c \ + mlx5/exp/ib_exp_md.c +endif # HAVE_EXP + +if HAVE_MLX5_DV +libuct_ib_la_LDFLAGS += $(LIB_MLX5) +libuct_ib_la_SOURCES += \ + mlx5/dv/ib_mlx5_dv.c \ + mlx5/dv/ib_mlx5dv_md.c +endif # HAVE_MLX5_DV + +endif # HAVE_MLX5_HW + + +if HAVE_TL_RC +noinst_HEADERS += \ + rc/base/rc_def.h \ + rc/base/rc_ep.h \ + rc/base/rc_iface.h \ + rc/verbs/rc_verbs.h \ + rc/verbs/rc_verbs_impl.h + +libuct_ib_la_SOURCES += \ + rc/base/rc_ep.c \ + rc/base/rc_iface.c \ + rc/verbs/rc_verbs_ep.c \ + rc/verbs/rc_verbs_iface.c + +if HAVE_MLX5_HW +noinst_HEADERS += \ + rc/accel/rc_mlx5.h \ + rc/accel/rc_mlx5.inl \ + rc/accel/rc_mlx5_common.h + +libuct_ib_la_SOURCES += \ + rc/accel/rc_mlx5_ep.c \ + rc/accel/rc_mlx5_iface.c \ + rc/accel/rc_mlx5_common.c +endif # HAVE_MLX5_HW + +if HAVE_DEVX +libuct_ib_la_SOURCES += \ + rc/accel/rc_mlx5_devx.c +endif # HAVE_DEVX + +endif # HAVE_TL_RC + + +if HAVE_TL_DC +noinst_HEADERS += \ + dc/dc_mlx5_ep.h \ + dc/dc_mlx5.h + +libuct_ib_la_SOURCES += \ + dc/dc_mlx5_ep.c \ + dc/dc_mlx5.c + +if HAVE_DEVX +libuct_ib_la_SOURCES += \ + dc/dc_mlx5_devx.c +endif # HAVE_DEVX + +endif # HAVE_TL_DC + + +if HAVE_TL_UD +noinst_HEADERS += \ + ud/base/ud_iface_common.h \ + ud/base/ud_iface.h \ + ud/base/ud_ep.h \ + ud/base/ud_def.h \ + ud/base/ud_inl.h \ + ud/verbs/ud_verbs.h + +libuct_ib_la_SOURCES += \ + ud/base/ud_iface_common.c \ + ud/base/ud_iface.c \ + ud/base/ud_ep.c \ + ud/base/ud_log.c \ + ud/verbs/ud_verbs.c + + +if HAVE_MLX5_HW_UD +noinst_HEADERS += \ + ud/accel/ud_mlx5_common.h \ + ud/accel/ud_mlx5.h + +libuct_ib_la_SOURCES += \ + ud/accel/ud_mlx5_common.c \ + ud/accel/ud_mlx5.c +endif # HAVE_MLX5_HW_UD + +endif # HAVE_TL_UD + +include $(top_srcdir)/config/module.am + +endif # HAVE_IB diff --git a/src/uct/ib/Makefile.in b/src/uct/ib/Makefile.in new file mode 100644 index 0000000..7483dca --- /dev/null +++ b/src/uct/ib/Makefile.in @@ -0,0 +1,1612 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ + +# TODO separate module for mlx5 +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@am__append_1 = -DUCT_IB_LIBMLX5_VER=\"$(libmlx5_ver)\" +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@am__append_2 = \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/ib_mlx5_log.h \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/ib_mlx5.h \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/ib_mlx5.inl \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/dv/ib_mlx5_dv.h \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/dv/ib_mlx5_ifc.h \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/exp/ib_mlx5_hw.h + +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@am__append_3 = \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/ib_mlx5_log.c \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/ib_mlx5.c + +@HAVE_EXP_TRUE@@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@am__append_4 = \ +@HAVE_EXP_TRUE@@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/exp/ib_exp.h + +@HAVE_EXP_TRUE@@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@am__append_5 = \ +@HAVE_EXP_TRUE@@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/exp/ib_mlx5_hw.c \ +@HAVE_EXP_TRUE@@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/exp/ib_exp.c \ +@HAVE_EXP_TRUE@@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/exp/ib_exp_md.c + +@HAVE_IB_TRUE@@HAVE_MLX5_DV_TRUE@@HAVE_MLX5_HW_TRUE@am__append_6 = $(LIB_MLX5) +@HAVE_IB_TRUE@@HAVE_MLX5_DV_TRUE@@HAVE_MLX5_HW_TRUE@am__append_7 = \ +@HAVE_IB_TRUE@@HAVE_MLX5_DV_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/dv/ib_mlx5_dv.c \ +@HAVE_IB_TRUE@@HAVE_MLX5_DV_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/dv/ib_mlx5dv_md.c + +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@am__append_8 = \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/base/rc_def.h \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/base/rc_ep.h \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/base/rc_iface.h \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/verbs/rc_verbs.h \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/verbs/rc_verbs_impl.h + +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@am__append_9 = \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/base/rc_ep.c \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/base/rc_iface.c \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/verbs/rc_verbs_ep.c \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/verbs/rc_verbs_iface.c + +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@am__append_10 = \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@ rc/accel/rc_mlx5.h \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@ rc/accel/rc_mlx5.inl \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@ rc/accel/rc_mlx5_common.h + +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@am__append_11 = \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@ rc/accel/rc_mlx5_ep.c \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@ rc/accel/rc_mlx5_iface.c \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@ rc/accel/rc_mlx5_common.c + +@HAVE_DEVX_TRUE@@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@am__append_12 = \ +@HAVE_DEVX_TRUE@@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/accel/rc_mlx5_devx.c + +@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@am__append_13 = \ +@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@ dc/dc_mlx5_ep.h \ +@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@ dc/dc_mlx5.h + +@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@am__append_14 = \ +@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@ dc/dc_mlx5_ep.c \ +@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@ dc/dc_mlx5.c + +@HAVE_DEVX_TRUE@@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@am__append_15 = \ +@HAVE_DEVX_TRUE@@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@ dc/dc_mlx5_devx.c + +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@am__append_16 = \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/ud_iface_common.h \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/ud_iface.h \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/ud_ep.h \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/ud_def.h \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/ud_inl.h \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/verbs/ud_verbs.h + +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@am__append_17 = \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/ud_iface_common.c \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/ud_iface.c \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/ud_ep.c \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/ud_log.c \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/verbs/ud_verbs.c + +@HAVE_IB_TRUE@@HAVE_MLX5_HW_UD_TRUE@@HAVE_TL_UD_TRUE@am__append_18 = \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_UD_TRUE@@HAVE_TL_UD_TRUE@ ud/accel/ud_mlx5_common.h \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_UD_TRUE@@HAVE_TL_UD_TRUE@ ud/accel/ud_mlx5.h + +@HAVE_IB_TRUE@@HAVE_MLX5_HW_UD_TRUE@@HAVE_TL_UD_TRUE@am__append_19 = \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_UD_TRUE@@HAVE_TL_UD_TRUE@ ud/accel/ud_mlx5_common.c \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_UD_TRUE@@HAVE_TL_UD_TRUE@ ud/accel/ud_mlx5.c + +subdir = src/uct/ib +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_IB_TRUE@libuct_ib_la_DEPENDENCIES = \ +@HAVE_IB_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_IB_TRUE@ $(top_builddir)/src/uct/libuct.la +am__libuct_ib_la_SOURCES_DIST = base/ib_device.c base/ib_iface.c \ + base/ib_log.c base/ib_md.c mlx5/ib_mlx5_log.c mlx5/ib_mlx5.c \ + mlx5/exp/ib_mlx5_hw.c mlx5/exp/ib_exp.c mlx5/exp/ib_exp_md.c \ + mlx5/dv/ib_mlx5_dv.c mlx5/dv/ib_mlx5dv_md.c rc/base/rc_ep.c \ + rc/base/rc_iface.c rc/verbs/rc_verbs_ep.c \ + rc/verbs/rc_verbs_iface.c rc/accel/rc_mlx5_ep.c \ + rc/accel/rc_mlx5_iface.c rc/accel/rc_mlx5_common.c \ + rc/accel/rc_mlx5_devx.c dc/dc_mlx5_ep.c dc/dc_mlx5.c \ + dc/dc_mlx5_devx.c ud/base/ud_iface_common.c ud/base/ud_iface.c \ + ud/base/ud_ep.c ud/base/ud_log.c ud/verbs/ud_verbs.c \ + ud/accel/ud_mlx5_common.c ud/accel/ud_mlx5.c +am__dirstamp = $(am__leading_dot)dirstamp +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@am__objects_1 = mlx5/libuct_ib_la-ib_mlx5_log.lo \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/libuct_ib_la-ib_mlx5.lo +@HAVE_EXP_TRUE@@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@am__objects_2 = mlx5/exp/libuct_ib_la-ib_mlx5_hw.lo \ +@HAVE_EXP_TRUE@@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/exp/libuct_ib_la-ib_exp.lo \ +@HAVE_EXP_TRUE@@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/exp/libuct_ib_la-ib_exp_md.lo +@HAVE_IB_TRUE@@HAVE_MLX5_DV_TRUE@@HAVE_MLX5_HW_TRUE@am__objects_3 = mlx5/dv/libuct_ib_la-ib_mlx5_dv.lo \ +@HAVE_IB_TRUE@@HAVE_MLX5_DV_TRUE@@HAVE_MLX5_HW_TRUE@ mlx5/dv/libuct_ib_la-ib_mlx5dv_md.lo +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@am__objects_4 = \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/base/libuct_ib_la-rc_ep.lo \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/base/libuct_ib_la-rc_iface.lo \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/verbs/libuct_ib_la-rc_verbs_ep.lo \ +@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ rc/verbs/libuct_ib_la-rc_verbs_iface.lo +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@am__objects_5 = rc/accel/libuct_ib_la-rc_mlx5_ep.lo \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@ rc/accel/libuct_ib_la-rc_mlx5_iface.lo \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_TRUE@@HAVE_TL_RC_TRUE@ rc/accel/libuct_ib_la-rc_mlx5_common.lo +@HAVE_DEVX_TRUE@@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@am__objects_6 = rc/accel/libuct_ib_la-rc_mlx5_devx.lo +@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@am__objects_7 = \ +@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@ dc/libuct_ib_la-dc_mlx5_ep.lo \ +@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@ dc/libuct_ib_la-dc_mlx5.lo +@HAVE_DEVX_TRUE@@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@am__objects_8 = dc/libuct_ib_la-dc_mlx5_devx.lo +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@am__objects_9 = ud/base/libuct_ib_la-ud_iface_common.lo \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/libuct_ib_la-ud_iface.lo \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/libuct_ib_la-ud_ep.lo \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/base/libuct_ib_la-ud_log.lo \ +@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ ud/verbs/libuct_ib_la-ud_verbs.lo +@HAVE_IB_TRUE@@HAVE_MLX5_HW_UD_TRUE@@HAVE_TL_UD_TRUE@am__objects_10 = ud/accel/libuct_ib_la-ud_mlx5_common.lo \ +@HAVE_IB_TRUE@@HAVE_MLX5_HW_UD_TRUE@@HAVE_TL_UD_TRUE@ ud/accel/libuct_ib_la-ud_mlx5.lo +@HAVE_IB_TRUE@am_libuct_ib_la_OBJECTS = \ +@HAVE_IB_TRUE@ base/libuct_ib_la-ib_device.lo \ +@HAVE_IB_TRUE@ base/libuct_ib_la-ib_iface.lo \ +@HAVE_IB_TRUE@ base/libuct_ib_la-ib_log.lo \ +@HAVE_IB_TRUE@ base/libuct_ib_la-ib_md.lo $(am__objects_1) \ +@HAVE_IB_TRUE@ $(am__objects_2) $(am__objects_3) \ +@HAVE_IB_TRUE@ $(am__objects_4) $(am__objects_5) \ +@HAVE_IB_TRUE@ $(am__objects_6) $(am__objects_7) \ +@HAVE_IB_TRUE@ $(am__objects_8) $(am__objects_9) \ +@HAVE_IB_TRUE@ $(am__objects_10) +libuct_ib_la_OBJECTS = $(am_libuct_ib_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_ib_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libuct_ib_la_CFLAGS) \ + $(CFLAGS) $(libuct_ib_la_LDFLAGS) $(LDFLAGS) -o $@ +@HAVE_IB_TRUE@am_libuct_ib_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = base/$(DEPDIR)/libuct_ib_la-ib_device.Plo \ + base/$(DEPDIR)/libuct_ib_la-ib_iface.Plo \ + base/$(DEPDIR)/libuct_ib_la-ib_log.Plo \ + base/$(DEPDIR)/libuct_ib_la-ib_md.Plo \ + dc/$(DEPDIR)/libuct_ib_la-dc_mlx5.Plo \ + dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_devx.Plo \ + dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_ep.Plo \ + mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5.Plo \ + mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5_log.Plo \ + mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5_dv.Plo \ + mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5dv_md.Plo \ + mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp.Plo \ + mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp_md.Plo \ + mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_mlx5_hw.Plo \ + rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_common.Plo \ + rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_devx.Plo \ + rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_ep.Plo \ + rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_iface.Plo \ + rc/base/$(DEPDIR)/libuct_ib_la-rc_ep.Plo \ + rc/base/$(DEPDIR)/libuct_ib_la-rc_iface.Plo \ + rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_ep.Plo \ + rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_iface.Plo \ + ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5.Plo \ + ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5_common.Plo \ + ud/base/$(DEPDIR)/libuct_ib_la-ud_ep.Plo \ + ud/base/$(DEPDIR)/libuct_ib_la-ud_iface.Plo \ + ud/base/$(DEPDIR)/libuct_ib_la-ud_iface_common.Plo \ + ud/base/$(DEPDIR)/libuct_ib_la-ud_log.Plo \ + ud/verbs/$(DEPDIR)/libuct_ib_la-ud_verbs.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_ib_la_SOURCES) +DIST_SOURCES = $(am__libuct_ib_la_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = base/ib_device.h base/ib_iface.h \ + base/ib_log.h base/ib_md.h base/ib_verbs.h mlx5/ib_mlx5_log.h \ + mlx5/ib_mlx5.h mlx5/ib_mlx5.inl mlx5/dv/ib_mlx5_dv.h \ + mlx5/dv/ib_mlx5_ifc.h mlx5/exp/ib_mlx5_hw.h mlx5/exp/ib_exp.h \ + rc/base/rc_def.h rc/base/rc_ep.h rc/base/rc_iface.h \ + rc/verbs/rc_verbs.h rc/verbs/rc_verbs_impl.h \ + rc/accel/rc_mlx5.h rc/accel/rc_mlx5.inl \ + rc/accel/rc_mlx5_common.h dc/dc_mlx5_ep.h dc/dc_mlx5.h \ + ud/base/ud_iface_common.h ud/base/ud_iface.h ud/base/ud_ep.h \ + ud/base/ud_def.h ud/base/ud_inl.h ud/verbs/ud_verbs.h \ + ud/accel/ud_mlx5_common.h ud/accel/ud_mlx5.h +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = . cm rdmacm +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_IB_TRUE@SUBDIRS = . cm rdmacm +@HAVE_IB_TRUE@module_LTLIBRARIES = libuct_ib.la +@HAVE_IB_TRUE@libuct_ib_la_CPPFLAGS = $(BASE_CPPFLAGS) \ +@HAVE_IB_TRUE@ $(IBVERBS_CPPFLAGS) $(am__append_1) +@HAVE_IB_TRUE@libuct_ib_la_CFLAGS = $(BASE_CFLAGS) +@HAVE_IB_TRUE@libuct_ib_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_IB_TRUE@ $(top_builddir)/src/uct/libuct.la + +@HAVE_IB_TRUE@libuct_ib_la_LDFLAGS = $(IBVERBS_LDFLAGS) $(NUMA_LIBS) \ +@HAVE_IB_TRUE@ -version-info $(SOVERSION) $(am__append_6) +@HAVE_IB_TRUE@libmlx5_ver = $(shell (rpm -qf $(IBVERBS_DIR)/include/infiniband/mlx5_hw.h &>/dev/null && rpm -qf /usr/include/infiniband/mlx5_hw.h) | grep -v 'not owned' | head -1) +@HAVE_IB_TRUE@noinst_HEADERS = base/ib_device.h base/ib_iface.h \ +@HAVE_IB_TRUE@ base/ib_log.h base/ib_md.h base/ib_verbs.h \ +@HAVE_IB_TRUE@ $(am__append_2) $(am__append_4) $(am__append_8) \ +@HAVE_IB_TRUE@ $(am__append_10) $(am__append_13) \ +@HAVE_IB_TRUE@ $(am__append_16) $(am__append_18) +@HAVE_IB_TRUE@libuct_ib_la_SOURCES = base/ib_device.c base/ib_iface.c \ +@HAVE_IB_TRUE@ base/ib_log.c base/ib_md.c $(am__append_3) \ +@HAVE_IB_TRUE@ $(am__append_5) $(am__append_7) $(am__append_9) \ +@HAVE_IB_TRUE@ $(am__append_11) $(am__append_12) \ +@HAVE_IB_TRUE@ $(am__append_14) $(am__append_15) \ +@HAVE_IB_TRUE@ $(am__append_17) $(am__append_19) + +# Automake silent rules +@HAVE_IB_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_IB_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_IB_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_IB_TRUE@AM_V_LN_1 = true +@HAVE_IB_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/ib/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/ib/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +base/$(am__dirstamp): + @$(MKDIR_P) base + @: > base/$(am__dirstamp) +base/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) base/$(DEPDIR) + @: > base/$(DEPDIR)/$(am__dirstamp) +base/libuct_ib_la-ib_device.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_ib_la-ib_iface.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_ib_la-ib_log.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_ib_la-ib_md.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +mlx5/$(am__dirstamp): + @$(MKDIR_P) mlx5 + @: > mlx5/$(am__dirstamp) +mlx5/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mlx5/$(DEPDIR) + @: > mlx5/$(DEPDIR)/$(am__dirstamp) +mlx5/libuct_ib_la-ib_mlx5_log.lo: mlx5/$(am__dirstamp) \ + mlx5/$(DEPDIR)/$(am__dirstamp) +mlx5/libuct_ib_la-ib_mlx5.lo: mlx5/$(am__dirstamp) \ + mlx5/$(DEPDIR)/$(am__dirstamp) +mlx5/exp/$(am__dirstamp): + @$(MKDIR_P) mlx5/exp + @: > mlx5/exp/$(am__dirstamp) +mlx5/exp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mlx5/exp/$(DEPDIR) + @: > mlx5/exp/$(DEPDIR)/$(am__dirstamp) +mlx5/exp/libuct_ib_la-ib_mlx5_hw.lo: mlx5/exp/$(am__dirstamp) \ + mlx5/exp/$(DEPDIR)/$(am__dirstamp) +mlx5/exp/libuct_ib_la-ib_exp.lo: mlx5/exp/$(am__dirstamp) \ + mlx5/exp/$(DEPDIR)/$(am__dirstamp) +mlx5/exp/libuct_ib_la-ib_exp_md.lo: mlx5/exp/$(am__dirstamp) \ + mlx5/exp/$(DEPDIR)/$(am__dirstamp) +mlx5/dv/$(am__dirstamp): + @$(MKDIR_P) mlx5/dv + @: > mlx5/dv/$(am__dirstamp) +mlx5/dv/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) mlx5/dv/$(DEPDIR) + @: > mlx5/dv/$(DEPDIR)/$(am__dirstamp) +mlx5/dv/libuct_ib_la-ib_mlx5_dv.lo: mlx5/dv/$(am__dirstamp) \ + mlx5/dv/$(DEPDIR)/$(am__dirstamp) +mlx5/dv/libuct_ib_la-ib_mlx5dv_md.lo: mlx5/dv/$(am__dirstamp) \ + mlx5/dv/$(DEPDIR)/$(am__dirstamp) +rc/base/$(am__dirstamp): + @$(MKDIR_P) rc/base + @: > rc/base/$(am__dirstamp) +rc/base/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) rc/base/$(DEPDIR) + @: > rc/base/$(DEPDIR)/$(am__dirstamp) +rc/base/libuct_ib_la-rc_ep.lo: rc/base/$(am__dirstamp) \ + rc/base/$(DEPDIR)/$(am__dirstamp) +rc/base/libuct_ib_la-rc_iface.lo: rc/base/$(am__dirstamp) \ + rc/base/$(DEPDIR)/$(am__dirstamp) +rc/verbs/$(am__dirstamp): + @$(MKDIR_P) rc/verbs + @: > rc/verbs/$(am__dirstamp) +rc/verbs/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) rc/verbs/$(DEPDIR) + @: > rc/verbs/$(DEPDIR)/$(am__dirstamp) +rc/verbs/libuct_ib_la-rc_verbs_ep.lo: rc/verbs/$(am__dirstamp) \ + rc/verbs/$(DEPDIR)/$(am__dirstamp) +rc/verbs/libuct_ib_la-rc_verbs_iface.lo: rc/verbs/$(am__dirstamp) \ + rc/verbs/$(DEPDIR)/$(am__dirstamp) +rc/accel/$(am__dirstamp): + @$(MKDIR_P) rc/accel + @: > rc/accel/$(am__dirstamp) +rc/accel/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) rc/accel/$(DEPDIR) + @: > rc/accel/$(DEPDIR)/$(am__dirstamp) +rc/accel/libuct_ib_la-rc_mlx5_ep.lo: rc/accel/$(am__dirstamp) \ + rc/accel/$(DEPDIR)/$(am__dirstamp) +rc/accel/libuct_ib_la-rc_mlx5_iface.lo: rc/accel/$(am__dirstamp) \ + rc/accel/$(DEPDIR)/$(am__dirstamp) +rc/accel/libuct_ib_la-rc_mlx5_common.lo: rc/accel/$(am__dirstamp) \ + rc/accel/$(DEPDIR)/$(am__dirstamp) +rc/accel/libuct_ib_la-rc_mlx5_devx.lo: rc/accel/$(am__dirstamp) \ + rc/accel/$(DEPDIR)/$(am__dirstamp) +dc/$(am__dirstamp): + @$(MKDIR_P) dc + @: > dc/$(am__dirstamp) +dc/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) dc/$(DEPDIR) + @: > dc/$(DEPDIR)/$(am__dirstamp) +dc/libuct_ib_la-dc_mlx5_ep.lo: dc/$(am__dirstamp) \ + dc/$(DEPDIR)/$(am__dirstamp) +dc/libuct_ib_la-dc_mlx5.lo: dc/$(am__dirstamp) \ + dc/$(DEPDIR)/$(am__dirstamp) +dc/libuct_ib_la-dc_mlx5_devx.lo: dc/$(am__dirstamp) \ + dc/$(DEPDIR)/$(am__dirstamp) +ud/base/$(am__dirstamp): + @$(MKDIR_P) ud/base + @: > ud/base/$(am__dirstamp) +ud/base/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ud/base/$(DEPDIR) + @: > ud/base/$(DEPDIR)/$(am__dirstamp) +ud/base/libuct_ib_la-ud_iface_common.lo: ud/base/$(am__dirstamp) \ + ud/base/$(DEPDIR)/$(am__dirstamp) +ud/base/libuct_ib_la-ud_iface.lo: ud/base/$(am__dirstamp) \ + ud/base/$(DEPDIR)/$(am__dirstamp) +ud/base/libuct_ib_la-ud_ep.lo: ud/base/$(am__dirstamp) \ + ud/base/$(DEPDIR)/$(am__dirstamp) +ud/base/libuct_ib_la-ud_log.lo: ud/base/$(am__dirstamp) \ + ud/base/$(DEPDIR)/$(am__dirstamp) +ud/verbs/$(am__dirstamp): + @$(MKDIR_P) ud/verbs + @: > ud/verbs/$(am__dirstamp) +ud/verbs/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ud/verbs/$(DEPDIR) + @: > ud/verbs/$(DEPDIR)/$(am__dirstamp) +ud/verbs/libuct_ib_la-ud_verbs.lo: ud/verbs/$(am__dirstamp) \ + ud/verbs/$(DEPDIR)/$(am__dirstamp) +ud/accel/$(am__dirstamp): + @$(MKDIR_P) ud/accel + @: > ud/accel/$(am__dirstamp) +ud/accel/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ud/accel/$(DEPDIR) + @: > ud/accel/$(DEPDIR)/$(am__dirstamp) +ud/accel/libuct_ib_la-ud_mlx5_common.lo: ud/accel/$(am__dirstamp) \ + ud/accel/$(DEPDIR)/$(am__dirstamp) +ud/accel/libuct_ib_la-ud_mlx5.lo: ud/accel/$(am__dirstamp) \ + ud/accel/$(DEPDIR)/$(am__dirstamp) + +libuct_ib.la: $(libuct_ib_la_OBJECTS) $(libuct_ib_la_DEPENDENCIES) $(EXTRA_libuct_ib_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_ib_la_LINK) $(am_libuct_ib_la_rpath) $(libuct_ib_la_OBJECTS) $(libuct_ib_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f base/*.$(OBJEXT) + -rm -f base/*.lo + -rm -f dc/*.$(OBJEXT) + -rm -f dc/*.lo + -rm -f mlx5/*.$(OBJEXT) + -rm -f mlx5/*.lo + -rm -f mlx5/dv/*.$(OBJEXT) + -rm -f mlx5/dv/*.lo + -rm -f mlx5/exp/*.$(OBJEXT) + -rm -f mlx5/exp/*.lo + -rm -f rc/accel/*.$(OBJEXT) + -rm -f rc/accel/*.lo + -rm -f rc/base/*.$(OBJEXT) + -rm -f rc/base/*.lo + -rm -f rc/verbs/*.$(OBJEXT) + -rm -f rc/verbs/*.lo + -rm -f ud/accel/*.$(OBJEXT) + -rm -f ud/accel/*.lo + -rm -f ud/base/*.$(OBJEXT) + -rm -f ud/base/*.lo + -rm -f ud/verbs/*.$(OBJEXT) + -rm -f ud/verbs/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_ib_la-ib_device.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_ib_la-ib_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_ib_la-ib_log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_ib_la-ib_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dc/$(DEPDIR)/libuct_ib_la-dc_mlx5.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_devx.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5_log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5_dv.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5dv_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_mlx5_hw.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_devx.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rc/base/$(DEPDIR)/libuct_ib_la-rc_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rc/base/$(DEPDIR)/libuct_ib_la-rc_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ud/base/$(DEPDIR)/libuct_ib_la-ud_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ud/base/$(DEPDIR)/libuct_ib_la-ud_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ud/base/$(DEPDIR)/libuct_ib_la-ud_iface_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ud/base/$(DEPDIR)/libuct_ib_la-ud_log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ud/verbs/$(DEPDIR)/libuct_ib_la-ud_verbs.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +base/libuct_ib_la-ib_device.lo: base/ib_device.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT base/libuct_ib_la-ib_device.lo -MD -MP -MF base/$(DEPDIR)/libuct_ib_la-ib_device.Tpo -c -o base/libuct_ib_la-ib_device.lo `test -f 'base/ib_device.c' || echo '$(srcdir)/'`base/ib_device.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_ib_la-ib_device.Tpo base/$(DEPDIR)/libuct_ib_la-ib_device.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/ib_device.c' object='base/libuct_ib_la-ib_device.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o base/libuct_ib_la-ib_device.lo `test -f 'base/ib_device.c' || echo '$(srcdir)/'`base/ib_device.c + +base/libuct_ib_la-ib_iface.lo: base/ib_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT base/libuct_ib_la-ib_iface.lo -MD -MP -MF base/$(DEPDIR)/libuct_ib_la-ib_iface.Tpo -c -o base/libuct_ib_la-ib_iface.lo `test -f 'base/ib_iface.c' || echo '$(srcdir)/'`base/ib_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_ib_la-ib_iface.Tpo base/$(DEPDIR)/libuct_ib_la-ib_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/ib_iface.c' object='base/libuct_ib_la-ib_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o base/libuct_ib_la-ib_iface.lo `test -f 'base/ib_iface.c' || echo '$(srcdir)/'`base/ib_iface.c + +base/libuct_ib_la-ib_log.lo: base/ib_log.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT base/libuct_ib_la-ib_log.lo -MD -MP -MF base/$(DEPDIR)/libuct_ib_la-ib_log.Tpo -c -o base/libuct_ib_la-ib_log.lo `test -f 'base/ib_log.c' || echo '$(srcdir)/'`base/ib_log.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_ib_la-ib_log.Tpo base/$(DEPDIR)/libuct_ib_la-ib_log.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/ib_log.c' object='base/libuct_ib_la-ib_log.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o base/libuct_ib_la-ib_log.lo `test -f 'base/ib_log.c' || echo '$(srcdir)/'`base/ib_log.c + +base/libuct_ib_la-ib_md.lo: base/ib_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT base/libuct_ib_la-ib_md.lo -MD -MP -MF base/$(DEPDIR)/libuct_ib_la-ib_md.Tpo -c -o base/libuct_ib_la-ib_md.lo `test -f 'base/ib_md.c' || echo '$(srcdir)/'`base/ib_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_ib_la-ib_md.Tpo base/$(DEPDIR)/libuct_ib_la-ib_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/ib_md.c' object='base/libuct_ib_la-ib_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o base/libuct_ib_la-ib_md.lo `test -f 'base/ib_md.c' || echo '$(srcdir)/'`base/ib_md.c + +mlx5/libuct_ib_la-ib_mlx5_log.lo: mlx5/ib_mlx5_log.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT mlx5/libuct_ib_la-ib_mlx5_log.lo -MD -MP -MF mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5_log.Tpo -c -o mlx5/libuct_ib_la-ib_mlx5_log.lo `test -f 'mlx5/ib_mlx5_log.c' || echo '$(srcdir)/'`mlx5/ib_mlx5_log.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5_log.Tpo mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5_log.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mlx5/ib_mlx5_log.c' object='mlx5/libuct_ib_la-ib_mlx5_log.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o mlx5/libuct_ib_la-ib_mlx5_log.lo `test -f 'mlx5/ib_mlx5_log.c' || echo '$(srcdir)/'`mlx5/ib_mlx5_log.c + +mlx5/libuct_ib_la-ib_mlx5.lo: mlx5/ib_mlx5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT mlx5/libuct_ib_la-ib_mlx5.lo -MD -MP -MF mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5.Tpo -c -o mlx5/libuct_ib_la-ib_mlx5.lo `test -f 'mlx5/ib_mlx5.c' || echo '$(srcdir)/'`mlx5/ib_mlx5.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5.Tpo mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mlx5/ib_mlx5.c' object='mlx5/libuct_ib_la-ib_mlx5.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o mlx5/libuct_ib_la-ib_mlx5.lo `test -f 'mlx5/ib_mlx5.c' || echo '$(srcdir)/'`mlx5/ib_mlx5.c + +mlx5/exp/libuct_ib_la-ib_mlx5_hw.lo: mlx5/exp/ib_mlx5_hw.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT mlx5/exp/libuct_ib_la-ib_mlx5_hw.lo -MD -MP -MF mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_mlx5_hw.Tpo -c -o mlx5/exp/libuct_ib_la-ib_mlx5_hw.lo `test -f 'mlx5/exp/ib_mlx5_hw.c' || echo '$(srcdir)/'`mlx5/exp/ib_mlx5_hw.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_mlx5_hw.Tpo mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_mlx5_hw.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mlx5/exp/ib_mlx5_hw.c' object='mlx5/exp/libuct_ib_la-ib_mlx5_hw.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o mlx5/exp/libuct_ib_la-ib_mlx5_hw.lo `test -f 'mlx5/exp/ib_mlx5_hw.c' || echo '$(srcdir)/'`mlx5/exp/ib_mlx5_hw.c + +mlx5/exp/libuct_ib_la-ib_exp.lo: mlx5/exp/ib_exp.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT mlx5/exp/libuct_ib_la-ib_exp.lo -MD -MP -MF mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp.Tpo -c -o mlx5/exp/libuct_ib_la-ib_exp.lo `test -f 'mlx5/exp/ib_exp.c' || echo '$(srcdir)/'`mlx5/exp/ib_exp.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp.Tpo mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mlx5/exp/ib_exp.c' object='mlx5/exp/libuct_ib_la-ib_exp.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o mlx5/exp/libuct_ib_la-ib_exp.lo `test -f 'mlx5/exp/ib_exp.c' || echo '$(srcdir)/'`mlx5/exp/ib_exp.c + +mlx5/exp/libuct_ib_la-ib_exp_md.lo: mlx5/exp/ib_exp_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT mlx5/exp/libuct_ib_la-ib_exp_md.lo -MD -MP -MF mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp_md.Tpo -c -o mlx5/exp/libuct_ib_la-ib_exp_md.lo `test -f 'mlx5/exp/ib_exp_md.c' || echo '$(srcdir)/'`mlx5/exp/ib_exp_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp_md.Tpo mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mlx5/exp/ib_exp_md.c' object='mlx5/exp/libuct_ib_la-ib_exp_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o mlx5/exp/libuct_ib_la-ib_exp_md.lo `test -f 'mlx5/exp/ib_exp_md.c' || echo '$(srcdir)/'`mlx5/exp/ib_exp_md.c + +mlx5/dv/libuct_ib_la-ib_mlx5_dv.lo: mlx5/dv/ib_mlx5_dv.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT mlx5/dv/libuct_ib_la-ib_mlx5_dv.lo -MD -MP -MF mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5_dv.Tpo -c -o mlx5/dv/libuct_ib_la-ib_mlx5_dv.lo `test -f 'mlx5/dv/ib_mlx5_dv.c' || echo '$(srcdir)/'`mlx5/dv/ib_mlx5_dv.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5_dv.Tpo mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5_dv.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mlx5/dv/ib_mlx5_dv.c' object='mlx5/dv/libuct_ib_la-ib_mlx5_dv.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o mlx5/dv/libuct_ib_la-ib_mlx5_dv.lo `test -f 'mlx5/dv/ib_mlx5_dv.c' || echo '$(srcdir)/'`mlx5/dv/ib_mlx5_dv.c + +mlx5/dv/libuct_ib_la-ib_mlx5dv_md.lo: mlx5/dv/ib_mlx5dv_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT mlx5/dv/libuct_ib_la-ib_mlx5dv_md.lo -MD -MP -MF mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5dv_md.Tpo -c -o mlx5/dv/libuct_ib_la-ib_mlx5dv_md.lo `test -f 'mlx5/dv/ib_mlx5dv_md.c' || echo '$(srcdir)/'`mlx5/dv/ib_mlx5dv_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5dv_md.Tpo mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5dv_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mlx5/dv/ib_mlx5dv_md.c' object='mlx5/dv/libuct_ib_la-ib_mlx5dv_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o mlx5/dv/libuct_ib_la-ib_mlx5dv_md.lo `test -f 'mlx5/dv/ib_mlx5dv_md.c' || echo '$(srcdir)/'`mlx5/dv/ib_mlx5dv_md.c + +rc/base/libuct_ib_la-rc_ep.lo: rc/base/rc_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT rc/base/libuct_ib_la-rc_ep.lo -MD -MP -MF rc/base/$(DEPDIR)/libuct_ib_la-rc_ep.Tpo -c -o rc/base/libuct_ib_la-rc_ep.lo `test -f 'rc/base/rc_ep.c' || echo '$(srcdir)/'`rc/base/rc_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rc/base/$(DEPDIR)/libuct_ib_la-rc_ep.Tpo rc/base/$(DEPDIR)/libuct_ib_la-rc_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rc/base/rc_ep.c' object='rc/base/libuct_ib_la-rc_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o rc/base/libuct_ib_la-rc_ep.lo `test -f 'rc/base/rc_ep.c' || echo '$(srcdir)/'`rc/base/rc_ep.c + +rc/base/libuct_ib_la-rc_iface.lo: rc/base/rc_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT rc/base/libuct_ib_la-rc_iface.lo -MD -MP -MF rc/base/$(DEPDIR)/libuct_ib_la-rc_iface.Tpo -c -o rc/base/libuct_ib_la-rc_iface.lo `test -f 'rc/base/rc_iface.c' || echo '$(srcdir)/'`rc/base/rc_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rc/base/$(DEPDIR)/libuct_ib_la-rc_iface.Tpo rc/base/$(DEPDIR)/libuct_ib_la-rc_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rc/base/rc_iface.c' object='rc/base/libuct_ib_la-rc_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o rc/base/libuct_ib_la-rc_iface.lo `test -f 'rc/base/rc_iface.c' || echo '$(srcdir)/'`rc/base/rc_iface.c + +rc/verbs/libuct_ib_la-rc_verbs_ep.lo: rc/verbs/rc_verbs_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT rc/verbs/libuct_ib_la-rc_verbs_ep.lo -MD -MP -MF rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_ep.Tpo -c -o rc/verbs/libuct_ib_la-rc_verbs_ep.lo `test -f 'rc/verbs/rc_verbs_ep.c' || echo '$(srcdir)/'`rc/verbs/rc_verbs_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_ep.Tpo rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rc/verbs/rc_verbs_ep.c' object='rc/verbs/libuct_ib_la-rc_verbs_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o rc/verbs/libuct_ib_la-rc_verbs_ep.lo `test -f 'rc/verbs/rc_verbs_ep.c' || echo '$(srcdir)/'`rc/verbs/rc_verbs_ep.c + +rc/verbs/libuct_ib_la-rc_verbs_iface.lo: rc/verbs/rc_verbs_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT rc/verbs/libuct_ib_la-rc_verbs_iface.lo -MD -MP -MF rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_iface.Tpo -c -o rc/verbs/libuct_ib_la-rc_verbs_iface.lo `test -f 'rc/verbs/rc_verbs_iface.c' || echo '$(srcdir)/'`rc/verbs/rc_verbs_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_iface.Tpo rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rc/verbs/rc_verbs_iface.c' object='rc/verbs/libuct_ib_la-rc_verbs_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o rc/verbs/libuct_ib_la-rc_verbs_iface.lo `test -f 'rc/verbs/rc_verbs_iface.c' || echo '$(srcdir)/'`rc/verbs/rc_verbs_iface.c + +rc/accel/libuct_ib_la-rc_mlx5_ep.lo: rc/accel/rc_mlx5_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT rc/accel/libuct_ib_la-rc_mlx5_ep.lo -MD -MP -MF rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_ep.Tpo -c -o rc/accel/libuct_ib_la-rc_mlx5_ep.lo `test -f 'rc/accel/rc_mlx5_ep.c' || echo '$(srcdir)/'`rc/accel/rc_mlx5_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_ep.Tpo rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rc/accel/rc_mlx5_ep.c' object='rc/accel/libuct_ib_la-rc_mlx5_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o rc/accel/libuct_ib_la-rc_mlx5_ep.lo `test -f 'rc/accel/rc_mlx5_ep.c' || echo '$(srcdir)/'`rc/accel/rc_mlx5_ep.c + +rc/accel/libuct_ib_la-rc_mlx5_iface.lo: rc/accel/rc_mlx5_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT rc/accel/libuct_ib_la-rc_mlx5_iface.lo -MD -MP -MF rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_iface.Tpo -c -o rc/accel/libuct_ib_la-rc_mlx5_iface.lo `test -f 'rc/accel/rc_mlx5_iface.c' || echo '$(srcdir)/'`rc/accel/rc_mlx5_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_iface.Tpo rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rc/accel/rc_mlx5_iface.c' object='rc/accel/libuct_ib_la-rc_mlx5_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o rc/accel/libuct_ib_la-rc_mlx5_iface.lo `test -f 'rc/accel/rc_mlx5_iface.c' || echo '$(srcdir)/'`rc/accel/rc_mlx5_iface.c + +rc/accel/libuct_ib_la-rc_mlx5_common.lo: rc/accel/rc_mlx5_common.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT rc/accel/libuct_ib_la-rc_mlx5_common.lo -MD -MP -MF rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_common.Tpo -c -o rc/accel/libuct_ib_la-rc_mlx5_common.lo `test -f 'rc/accel/rc_mlx5_common.c' || echo '$(srcdir)/'`rc/accel/rc_mlx5_common.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_common.Tpo rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_common.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rc/accel/rc_mlx5_common.c' object='rc/accel/libuct_ib_la-rc_mlx5_common.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o rc/accel/libuct_ib_la-rc_mlx5_common.lo `test -f 'rc/accel/rc_mlx5_common.c' || echo '$(srcdir)/'`rc/accel/rc_mlx5_common.c + +rc/accel/libuct_ib_la-rc_mlx5_devx.lo: rc/accel/rc_mlx5_devx.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT rc/accel/libuct_ib_la-rc_mlx5_devx.lo -MD -MP -MF rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_devx.Tpo -c -o rc/accel/libuct_ib_la-rc_mlx5_devx.lo `test -f 'rc/accel/rc_mlx5_devx.c' || echo '$(srcdir)/'`rc/accel/rc_mlx5_devx.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_devx.Tpo rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_devx.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rc/accel/rc_mlx5_devx.c' object='rc/accel/libuct_ib_la-rc_mlx5_devx.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o rc/accel/libuct_ib_la-rc_mlx5_devx.lo `test -f 'rc/accel/rc_mlx5_devx.c' || echo '$(srcdir)/'`rc/accel/rc_mlx5_devx.c + +dc/libuct_ib_la-dc_mlx5_ep.lo: dc/dc_mlx5_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT dc/libuct_ib_la-dc_mlx5_ep.lo -MD -MP -MF dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_ep.Tpo -c -o dc/libuct_ib_la-dc_mlx5_ep.lo `test -f 'dc/dc_mlx5_ep.c' || echo '$(srcdir)/'`dc/dc_mlx5_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_ep.Tpo dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dc/dc_mlx5_ep.c' object='dc/libuct_ib_la-dc_mlx5_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o dc/libuct_ib_la-dc_mlx5_ep.lo `test -f 'dc/dc_mlx5_ep.c' || echo '$(srcdir)/'`dc/dc_mlx5_ep.c + +dc/libuct_ib_la-dc_mlx5.lo: dc/dc_mlx5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT dc/libuct_ib_la-dc_mlx5.lo -MD -MP -MF dc/$(DEPDIR)/libuct_ib_la-dc_mlx5.Tpo -c -o dc/libuct_ib_la-dc_mlx5.lo `test -f 'dc/dc_mlx5.c' || echo '$(srcdir)/'`dc/dc_mlx5.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) dc/$(DEPDIR)/libuct_ib_la-dc_mlx5.Tpo dc/$(DEPDIR)/libuct_ib_la-dc_mlx5.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dc/dc_mlx5.c' object='dc/libuct_ib_la-dc_mlx5.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o dc/libuct_ib_la-dc_mlx5.lo `test -f 'dc/dc_mlx5.c' || echo '$(srcdir)/'`dc/dc_mlx5.c + +dc/libuct_ib_la-dc_mlx5_devx.lo: dc/dc_mlx5_devx.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT dc/libuct_ib_la-dc_mlx5_devx.lo -MD -MP -MF dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_devx.Tpo -c -o dc/libuct_ib_la-dc_mlx5_devx.lo `test -f 'dc/dc_mlx5_devx.c' || echo '$(srcdir)/'`dc/dc_mlx5_devx.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_devx.Tpo dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_devx.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dc/dc_mlx5_devx.c' object='dc/libuct_ib_la-dc_mlx5_devx.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o dc/libuct_ib_la-dc_mlx5_devx.lo `test -f 'dc/dc_mlx5_devx.c' || echo '$(srcdir)/'`dc/dc_mlx5_devx.c + +ud/base/libuct_ib_la-ud_iface_common.lo: ud/base/ud_iface_common.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT ud/base/libuct_ib_la-ud_iface_common.lo -MD -MP -MF ud/base/$(DEPDIR)/libuct_ib_la-ud_iface_common.Tpo -c -o ud/base/libuct_ib_la-ud_iface_common.lo `test -f 'ud/base/ud_iface_common.c' || echo '$(srcdir)/'`ud/base/ud_iface_common.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ud/base/$(DEPDIR)/libuct_ib_la-ud_iface_common.Tpo ud/base/$(DEPDIR)/libuct_ib_la-ud_iface_common.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ud/base/ud_iface_common.c' object='ud/base/libuct_ib_la-ud_iface_common.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o ud/base/libuct_ib_la-ud_iface_common.lo `test -f 'ud/base/ud_iface_common.c' || echo '$(srcdir)/'`ud/base/ud_iface_common.c + +ud/base/libuct_ib_la-ud_iface.lo: ud/base/ud_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT ud/base/libuct_ib_la-ud_iface.lo -MD -MP -MF ud/base/$(DEPDIR)/libuct_ib_la-ud_iface.Tpo -c -o ud/base/libuct_ib_la-ud_iface.lo `test -f 'ud/base/ud_iface.c' || echo '$(srcdir)/'`ud/base/ud_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ud/base/$(DEPDIR)/libuct_ib_la-ud_iface.Tpo ud/base/$(DEPDIR)/libuct_ib_la-ud_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ud/base/ud_iface.c' object='ud/base/libuct_ib_la-ud_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o ud/base/libuct_ib_la-ud_iface.lo `test -f 'ud/base/ud_iface.c' || echo '$(srcdir)/'`ud/base/ud_iface.c + +ud/base/libuct_ib_la-ud_ep.lo: ud/base/ud_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT ud/base/libuct_ib_la-ud_ep.lo -MD -MP -MF ud/base/$(DEPDIR)/libuct_ib_la-ud_ep.Tpo -c -o ud/base/libuct_ib_la-ud_ep.lo `test -f 'ud/base/ud_ep.c' || echo '$(srcdir)/'`ud/base/ud_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ud/base/$(DEPDIR)/libuct_ib_la-ud_ep.Tpo ud/base/$(DEPDIR)/libuct_ib_la-ud_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ud/base/ud_ep.c' object='ud/base/libuct_ib_la-ud_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o ud/base/libuct_ib_la-ud_ep.lo `test -f 'ud/base/ud_ep.c' || echo '$(srcdir)/'`ud/base/ud_ep.c + +ud/base/libuct_ib_la-ud_log.lo: ud/base/ud_log.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT ud/base/libuct_ib_la-ud_log.lo -MD -MP -MF ud/base/$(DEPDIR)/libuct_ib_la-ud_log.Tpo -c -o ud/base/libuct_ib_la-ud_log.lo `test -f 'ud/base/ud_log.c' || echo '$(srcdir)/'`ud/base/ud_log.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ud/base/$(DEPDIR)/libuct_ib_la-ud_log.Tpo ud/base/$(DEPDIR)/libuct_ib_la-ud_log.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ud/base/ud_log.c' object='ud/base/libuct_ib_la-ud_log.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o ud/base/libuct_ib_la-ud_log.lo `test -f 'ud/base/ud_log.c' || echo '$(srcdir)/'`ud/base/ud_log.c + +ud/verbs/libuct_ib_la-ud_verbs.lo: ud/verbs/ud_verbs.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT ud/verbs/libuct_ib_la-ud_verbs.lo -MD -MP -MF ud/verbs/$(DEPDIR)/libuct_ib_la-ud_verbs.Tpo -c -o ud/verbs/libuct_ib_la-ud_verbs.lo `test -f 'ud/verbs/ud_verbs.c' || echo '$(srcdir)/'`ud/verbs/ud_verbs.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ud/verbs/$(DEPDIR)/libuct_ib_la-ud_verbs.Tpo ud/verbs/$(DEPDIR)/libuct_ib_la-ud_verbs.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ud/verbs/ud_verbs.c' object='ud/verbs/libuct_ib_la-ud_verbs.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o ud/verbs/libuct_ib_la-ud_verbs.lo `test -f 'ud/verbs/ud_verbs.c' || echo '$(srcdir)/'`ud/verbs/ud_verbs.c + +ud/accel/libuct_ib_la-ud_mlx5_common.lo: ud/accel/ud_mlx5_common.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT ud/accel/libuct_ib_la-ud_mlx5_common.lo -MD -MP -MF ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5_common.Tpo -c -o ud/accel/libuct_ib_la-ud_mlx5_common.lo `test -f 'ud/accel/ud_mlx5_common.c' || echo '$(srcdir)/'`ud/accel/ud_mlx5_common.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5_common.Tpo ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5_common.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ud/accel/ud_mlx5_common.c' object='ud/accel/libuct_ib_la-ud_mlx5_common.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o ud/accel/libuct_ib_la-ud_mlx5_common.lo `test -f 'ud/accel/ud_mlx5_common.c' || echo '$(srcdir)/'`ud/accel/ud_mlx5_common.c + +ud/accel/libuct_ib_la-ud_mlx5.lo: ud/accel/ud_mlx5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -MT ud/accel/libuct_ib_la-ud_mlx5.lo -MD -MP -MF ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5.Tpo -c -o ud/accel/libuct_ib_la-ud_mlx5.lo `test -f 'ud/accel/ud_mlx5.c' || echo '$(srcdir)/'`ud/accel/ud_mlx5.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5.Tpo ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ud/accel/ud_mlx5.c' object='ud/accel/libuct_ib_la-ud_mlx5.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_la_CFLAGS) $(CFLAGS) -c -o ud/accel/libuct_ib_la-ud_mlx5.lo `test -f 'ud/accel/ud_mlx5.c' || echo '$(srcdir)/'`ud/accel/ud_mlx5.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf base/.libs base/_libs + -rm -rf dc/.libs dc/_libs + -rm -rf mlx5/.libs mlx5/_libs + -rm -rf mlx5/dv/.libs mlx5/dv/_libs + -rm -rf mlx5/exp/.libs mlx5/exp/_libs + -rm -rf rc/accel/.libs rc/accel/_libs + -rm -rf rc/base/.libs rc/base/_libs + -rm -rf rc/verbs/.libs rc/verbs/_libs + -rm -rf ud/accel/.libs ud/accel/_libs + -rm -rf ud/base/.libs ud/base/_libs + -rm -rf ud/verbs/.libs ud/verbs/_libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +@HAVE_IB_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f base/$(DEPDIR)/$(am__dirstamp) + -rm -f base/$(am__dirstamp) + -rm -f dc/$(DEPDIR)/$(am__dirstamp) + -rm -f dc/$(am__dirstamp) + -rm -f mlx5/$(DEPDIR)/$(am__dirstamp) + -rm -f mlx5/$(am__dirstamp) + -rm -f mlx5/dv/$(DEPDIR)/$(am__dirstamp) + -rm -f mlx5/dv/$(am__dirstamp) + -rm -f mlx5/exp/$(DEPDIR)/$(am__dirstamp) + -rm -f mlx5/exp/$(am__dirstamp) + -rm -f rc/accel/$(DEPDIR)/$(am__dirstamp) + -rm -f rc/accel/$(am__dirstamp) + -rm -f rc/base/$(DEPDIR)/$(am__dirstamp) + -rm -f rc/base/$(am__dirstamp) + -rm -f rc/verbs/$(DEPDIR)/$(am__dirstamp) + -rm -f rc/verbs/$(am__dirstamp) + -rm -f ud/accel/$(DEPDIR)/$(am__dirstamp) + -rm -f ud/accel/$(am__dirstamp) + -rm -f ud/base/$(DEPDIR)/$(am__dirstamp) + -rm -f ud/base/$(am__dirstamp) + -rm -f ud/verbs/$(DEPDIR)/$(am__dirstamp) + -rm -f ud/verbs/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f base/$(DEPDIR)/libuct_ib_la-ib_device.Plo + -rm -f base/$(DEPDIR)/libuct_ib_la-ib_iface.Plo + -rm -f base/$(DEPDIR)/libuct_ib_la-ib_log.Plo + -rm -f base/$(DEPDIR)/libuct_ib_la-ib_md.Plo + -rm -f dc/$(DEPDIR)/libuct_ib_la-dc_mlx5.Plo + -rm -f dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_devx.Plo + -rm -f dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_ep.Plo + -rm -f mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5.Plo + -rm -f mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5_log.Plo + -rm -f mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5_dv.Plo + -rm -f mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5dv_md.Plo + -rm -f mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp.Plo + -rm -f mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp_md.Plo + -rm -f mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_mlx5_hw.Plo + -rm -f rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_common.Plo + -rm -f rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_devx.Plo + -rm -f rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_ep.Plo + -rm -f rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_iface.Plo + -rm -f rc/base/$(DEPDIR)/libuct_ib_la-rc_ep.Plo + -rm -f rc/base/$(DEPDIR)/libuct_ib_la-rc_iface.Plo + -rm -f rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_ep.Plo + -rm -f rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_iface.Plo + -rm -f ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5.Plo + -rm -f ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5_common.Plo + -rm -f ud/base/$(DEPDIR)/libuct_ib_la-ud_ep.Plo + -rm -f ud/base/$(DEPDIR)/libuct_ib_la-ud_iface.Plo + -rm -f ud/base/$(DEPDIR)/libuct_ib_la-ud_iface_common.Plo + -rm -f ud/base/$(DEPDIR)/libuct_ib_la-ud_log.Plo + -rm -f ud/verbs/$(DEPDIR)/libuct_ib_la-ud_verbs.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f base/$(DEPDIR)/libuct_ib_la-ib_device.Plo + -rm -f base/$(DEPDIR)/libuct_ib_la-ib_iface.Plo + -rm -f base/$(DEPDIR)/libuct_ib_la-ib_log.Plo + -rm -f base/$(DEPDIR)/libuct_ib_la-ib_md.Plo + -rm -f dc/$(DEPDIR)/libuct_ib_la-dc_mlx5.Plo + -rm -f dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_devx.Plo + -rm -f dc/$(DEPDIR)/libuct_ib_la-dc_mlx5_ep.Plo + -rm -f mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5.Plo + -rm -f mlx5/$(DEPDIR)/libuct_ib_la-ib_mlx5_log.Plo + -rm -f mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5_dv.Plo + -rm -f mlx5/dv/$(DEPDIR)/libuct_ib_la-ib_mlx5dv_md.Plo + -rm -f mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp.Plo + -rm -f mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_exp_md.Plo + -rm -f mlx5/exp/$(DEPDIR)/libuct_ib_la-ib_mlx5_hw.Plo + -rm -f rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_common.Plo + -rm -f rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_devx.Plo + -rm -f rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_ep.Plo + -rm -f rc/accel/$(DEPDIR)/libuct_ib_la-rc_mlx5_iface.Plo + -rm -f rc/base/$(DEPDIR)/libuct_ib_la-rc_ep.Plo + -rm -f rc/base/$(DEPDIR)/libuct_ib_la-rc_iface.Plo + -rm -f rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_ep.Plo + -rm -f rc/verbs/$(DEPDIR)/libuct_ib_la-rc_verbs_iface.Plo + -rm -f ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5.Plo + -rm -f ud/accel/$(DEPDIR)/libuct_ib_la-ud_mlx5_common.Plo + -rm -f ud/base/$(DEPDIR)/libuct_ib_la-ud_ep.Plo + -rm -f ud/base/$(DEPDIR)/libuct_ib_la-ud_iface.Plo + -rm -f ud/base/$(DEPDIR)/libuct_ib_la-ud_iface_common.Plo + -rm -f ud/base/$(DEPDIR)/libuct_ib_la-ud_log.Plo + -rm -f ud/verbs/$(DEPDIR)/libuct_ib_la-ud_verbs.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ + am--depfiles check check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_IB_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_IB_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_IB_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_IB_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_IB_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_IB_TRUE@ done +@HAVE_IB_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_IB_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_IB_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/ib/base/ib_device.c b/src/uct/ib/base/ib_device.c new file mode 100644 index 0000000..2ad5e04 --- /dev/null +++ b/src/uct/ib/base/ib_device.c @@ -0,0 +1,1079 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "ib_device.h" +#include "ib_md.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +typedef struct { + union ibv_gid gid; + struct { + uint8_t major; + uint8_t minor; + } roce_version; +} uct_ib_device_gid_info_t; + + +/* This table is according to "Encoding for RNR NAK Timer Field" + * in IBTA specification */ +const double uct_ib_qp_rnr_time_ms[] = { + 655.36, 0.01, 0.02, 0.03, 0.04, 0.06, 0.08, 0.12, + 0.16, 0.24, 0.32, 0.48, 0.64, 0.96, 1.28, 1.92, + 2.56, 3.84, 5.12, 7.68, 10.24, 15.36, 20.48, 30.72, + 40.96, 61.44, 81.92, 122.88, 163.84, 245.76, 327.68, 491.52 +}; + + +/* use both gid + lid data for key generarion (lid - ib based, gid - RoCE) */ +static UCS_F_ALWAYS_INLINE +khint32_t uct_ib_kh_ah_hash_func(struct ibv_ah_attr attr) +{ + return kh_int64_hash_func(attr.grh.dgid.global.subnet_prefix ^ + attr.grh.dgid.global.interface_id ^ + attr.dlid); +} + +static UCS_F_ALWAYS_INLINE +int uct_ib_kh_ah_hash_equal(struct ibv_ah_attr a, struct ibv_ah_attr b) +{ + return !memcmp(&a, &b, sizeof(a)); +} + +KHASH_IMPL(uct_ib_ah, struct ibv_ah_attr, struct ibv_ah*, 1, + uct_ib_kh_ah_hash_func, uct_ib_kh_ah_hash_equal) + + +#if ENABLE_STATS +static ucs_stats_class_t uct_ib_device_stats_class = { + .name = "", + .num_counters = UCT_IB_DEVICE_STAT_LAST, + .counter_names = { + [UCT_IB_DEVICE_STAT_ASYNC_EVENT] = "async_event" + } +}; +#endif + +static uct_ib_device_spec_t uct_ib_builtin_device_specs[] = { + {"ConnectX-3", {0x15b3, 4099}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX4_PRM, 10}, + {"ConnectX-3 Pro", {0x15b3, 4103}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX4_PRM, 11}, + {"Connect-IB", {0x15b3, 4113}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V1, 20}, + {"ConnectX-4", {0x15b3, 4115}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V1, 30}, + {"ConnectX-4", {0x15b3, 4116}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V1, 29}, + {"ConnectX-4 LX", {0x15b3, 4117}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V1, 28}, + {"ConnectX-4 LX VF", {0x15b3, 4118}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V1, 28}, + {"ConnectX-5", {0x15b3, 4119}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V2, 38}, + {"ConnectX-5", {0x15b3, 4121}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V2, 40}, + {"ConnectX-5", {0x15b3, 4120}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V2, 39}, + {"ConnectX-5", {0x15b3, 41682}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V2, 37}, + {"ConnectX-5", {0x15b3, 4122}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V2, 36}, + {"ConnectX-6", {0x15b3, 4123}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V2, 50}, + {"ConnectX-6 VF", {0x15b3, 4124}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V2, 50}, + {"ConnectX-6 DX", {0x15b3, 4125}, + UCT_IB_DEVICE_FLAG_MELLANOX | UCT_IB_DEVICE_FLAG_MLX5_PRM | + UCT_IB_DEVICE_FLAG_DC_V2, 50}, + {"Generic HCA", {0, 0}, 0, 0}, + {NULL} +}; + +static void uct_ib_device_get_locality(const char *dev_name, + ucs_sys_cpuset_t *cpu_mask, + int *numa_node) +{ + char *p, buf[ucs_max(CPU_SETSIZE, 10)]; + ucs_status_t status; + ssize_t nread; + uint32_t word; + int base, k; + long n; + + /* Read list of CPUs close to the device */ + CPU_ZERO(cpu_mask); + nread = ucs_read_file(buf, sizeof(buf) - 1, 1, UCT_IB_DEVICE_SYSFS_FMT, + dev_name, "local_cpus"); + if (nread >= 0) { + buf[CPU_SETSIZE - 1] = '\0'; + base = 0; + do { + p = strrchr(buf, ','); + if (p == NULL) { + p = buf; + } else if (*p == ',') { + *(p++) = 0; + } + + word = strtoul(p, 0, 16); + for (k = 0; word; ++k, word >>= 1) { + if (word & 1) { + CPU_SET(base + k, cpu_mask); + } + } + base += 32; + } while ((base < CPU_SETSIZE) && (p != buf)); + } else { + /* If affinity file is not present, treat all CPUs as local */ + for (k = 0; k < CPU_SETSIZE; ++k) { + CPU_SET(k, cpu_mask); + } + } + + /* Read NUMA node number */ + status = ucs_read_file_number(&n, 1, + "/sys/class/infiniband/%s/device/numa_node", + dev_name); + *numa_node = (status == UCS_OK) ? n : -1; +} + +static void uct_ib_async_event_handler(int fd, void *arg) +{ + uct_ib_device_t *dev = arg; + struct ibv_async_event event; + ucs_log_level_t level; + char event_info[200]; + int ret; + + ret = ibv_get_async_event(dev->ibv_context, &event); + if (ret != 0) { + if (errno != EAGAIN) { + ucs_warn("ibv_get_async_event() failed: %m"); + } + return; + } + + switch (event.event_type) { + case IBV_EVENT_CQ_ERR: + snprintf(event_info, sizeof(event_info), "%s on CQ %p", + ibv_event_type_str(event.event_type), event.element.cq); + level = UCS_LOG_LEVEL_ERROR; + break; + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_COMM_EST: + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + snprintf(event_info, sizeof(event_info), "%s on QPN 0x%x", + ibv_event_type_str(event.event_type), event.element.qp->qp_num); + level = UCS_LOG_LEVEL_ERROR; + break; + case IBV_EVENT_QP_LAST_WQE_REACHED: + snprintf(event_info, sizeof(event_info), "SRQ-attached QP 0x%x was flushed", + event.element.qp->qp_num); + level = UCS_LOG_LEVEL_DEBUG; + break; + case IBV_EVENT_SRQ_ERR: + level = UCS_LOG_LEVEL_ERROR; + snprintf(event_info, sizeof(event_info), "%s on SRQ %p", + ibv_event_type_str(event.event_type), event.element.srq); + break; + case IBV_EVENT_SRQ_LIMIT_REACHED: + snprintf(event_info, sizeof(event_info), "%s on SRQ %p", + ibv_event_type_str(event.event_type), event.element.srq); + level = UCS_LOG_LEVEL_DEBUG; + break; + case IBV_EVENT_DEVICE_FATAL: + case IBV_EVENT_PORT_ERR: + snprintf(event_info, sizeof(event_info), "%s on port %d", + ibv_event_type_str(event.event_type), event.element.port_num); + level = UCS_LOG_LEVEL_ERROR; + break; + case IBV_EVENT_PORT_ACTIVE: +#if HAVE_DECL_IBV_EVENT_GID_CHANGE + case IBV_EVENT_GID_CHANGE: +#endif + case IBV_EVENT_LID_CHANGE: + case IBV_EVENT_PKEY_CHANGE: + case IBV_EVENT_SM_CHANGE: + case IBV_EVENT_CLIENT_REREGISTER: + snprintf(event_info, sizeof(event_info), "%s on port %d", + ibv_event_type_str(event.event_type), event.element.port_num); + level = UCS_LOG_LEVEL_WARN; + break; +#if HAVE_STRUCT_IBV_ASYNC_EVENT_ELEMENT_DCT + case IBV_EXP_EVENT_DCT_KEY_VIOLATION: + snprintf(event_info, sizeof(event_info), "%s on DCTN 0x%x", + "DCT key violation", event.element.dct->dct_num); + level = UCS_LOG_LEVEL_ERROR; + break; + case IBV_EXP_EVENT_DCT_ACCESS_ERR: + if (event.element.dct) { + snprintf(event_info, sizeof(event_info), "%s on DCTN 0x%x", + "DCT access error", event.element.dct->dct_num); + } else { + snprintf(event_info, sizeof(event_info), "%s on DCTN UNKNOWN", + "DCT access error"); + } + level = UCS_LOG_LEVEL_ERROR; + break; + case IBV_EXP_EVENT_DCT_REQ_ERR: + snprintf(event_info, sizeof(event_info), "%s on DCTN 0x%x", + "DCT requester error", event.element.dct->dct_num); + level = UCS_LOG_LEVEL_ERROR; + break; +#endif + default: + snprintf(event_info, sizeof(event_info), "%s (%d)", + ibv_event_type_str(event.event_type), event.event_type); + level = UCS_LOG_LEVEL_INFO; + break; + }; + + UCS_STATS_UPDATE_COUNTER(dev->stats, UCT_IB_DEVICE_STAT_ASYNC_EVENT, +1); + ucs_log(level, "IB Async event on %s: %s", uct_ib_device_name(dev), event_info); + ibv_ack_async_event(&event); +} + +static void uct_ib_device_get_ids(uct_ib_device_t *dev) +{ + long vendor_id, device_id; + + if ((ucs_read_file_number(&vendor_id, 1, UCT_IB_DEVICE_SYSFS_FMT, + uct_ib_device_name(dev), "vendor") == UCS_OK) && + (ucs_read_file_number(&device_id, 1, UCT_IB_DEVICE_SYSFS_FMT, + uct_ib_device_name(dev), "device") == UCS_OK)) { + dev->pci_id.vendor = vendor_id; + dev->pci_id.device = device_id; + ucs_debug("%s vendor_id: 0x%x device_id: %d", uct_ib_device_name(dev), + dev->pci_id.vendor, dev->pci_id.device); + } else { + dev->pci_id.vendor = 0; + dev->pci_id.device = 0; + ucs_warn("%s: could not read device/vendor id from sysfs, " + "performance may be affected", uct_ib_device_name(dev)); + } +} + +ucs_status_t uct_ib_device_query(uct_ib_device_t *dev, + struct ibv_device *ibv_device) +{ + ucs_status_t status; + uint8_t i; + int ret; + + status = uct_ib_query_device(dev->ibv_context, &dev->dev_attr); + if (status != UCS_OK) { + return status; + } + + /* Check device type*/ + switch (ibv_device->node_type) { + case IBV_NODE_SWITCH: + dev->first_port = 0; + dev->num_ports = 1; + break; + case IBV_NODE_CA: + default: + dev->first_port = 1; + dev->num_ports = IBV_DEV_ATTR(dev, phys_port_cnt); + break; + } + + if (dev->num_ports > UCT_IB_DEV_MAX_PORTS) { + ucs_error("%s has %d ports, but only up to %d are supported", + ibv_get_device_name(ibv_device), dev->num_ports, + UCT_IB_DEV_MAX_PORTS); + return UCS_ERR_UNSUPPORTED; + } + + /* Query all ports */ + for (i = 0; i < dev->num_ports; ++i) { + ret = ibv_query_port(dev->ibv_context, i + dev->first_port, + &dev->port_attr[i]); + if (ret != 0) { + ucs_error("ibv_query_port() returned %d: %m", ret); + return UCS_ERR_IO_ERROR; + } + } + + uct_ib_device_get_ids(dev); + + return UCS_OK; +} + +ucs_status_t uct_ib_device_init(uct_ib_device_t *dev, + struct ibv_device *ibv_device, int async_events + UCS_STATS_ARG(ucs_stats_node_t *stats_parent)) +{ + ucs_status_t status; + + dev->async_events = async_events; + + uct_ib_device_get_locality(ibv_get_device_name(ibv_device), &dev->local_cpus, + &dev->numa_node); + + status = UCS_STATS_NODE_ALLOC(&dev->stats, &uct_ib_device_stats_class, + stats_parent, "device"); + if (status != UCS_OK) { + goto err; + } + + status = ucs_sys_fcntl_modfl(dev->ibv_context->async_fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto err_release_stats; + } + + /* Register to IB async events */ + if (dev->async_events) { + status = ucs_async_set_event_handler(UCS_ASYNC_THREAD_LOCK_TYPE, + dev->ibv_context->async_fd, + UCS_EVENT_SET_EVREAD, + uct_ib_async_event_handler, dev, + NULL); + if (status != UCS_OK) { + goto err_release_stats; + } + } + + kh_init_inplace(uct_ib_ah, &dev->ah_hash); + ucs_spinlock_init(&dev->ah_lock); + + ucs_debug("initialized device '%s' (%s) with %d ports", uct_ib_device_name(dev), + ibv_node_type_str(ibv_device->node_type), + dev->num_ports); + return UCS_OK; + +err_release_stats: + UCS_STATS_NODE_FREE(dev->stats); +err: + return status; +} + +void uct_ib_device_cleanup_ah_cached(uct_ib_device_t *dev) +{ + struct ibv_ah *ah; + + kh_foreach_value(&dev->ah_hash, ah, ibv_destroy_ah(ah)); +} + +void uct_ib_device_cleanup(uct_ib_device_t *dev) +{ + ucs_status_t status; + + ucs_debug("destroying ib device %s", uct_ib_device_name(dev)); + + kh_destroy_inplace(uct_ib_ah, &dev->ah_hash); + + status = ucs_spinlock_destroy(&dev->ah_lock); + if (status != UCS_OK) { + ucs_warn("ucs_spinlock_destroy() failed (%d)", status); + } + + if (dev->async_events) { + ucs_async_remove_handler(dev->ibv_context->async_fd, 1); + } + UCS_STATS_NODE_FREE(dev->stats); +} + +static inline int uct_ib_device_spec_match(uct_ib_device_t *dev, + const uct_ib_device_spec_t *spec) +{ + return (spec->pci_id.vendor == dev->pci_id.vendor) && + (spec->pci_id.device == dev->pci_id.device); +} + +const uct_ib_device_spec_t* uct_ib_device_spec(uct_ib_device_t *dev) +{ + uct_ib_md_t *md = ucs_container_of(dev, uct_ib_md_t, dev); + uct_ib_device_spec_t *spec; + + /* search through devices specified in the configuration */ + for (spec = md->custom_devices.specs; + spec < md->custom_devices.specs + md->custom_devices.count; ++spec) { + if (uct_ib_device_spec_match(dev, spec)) { + return spec; + } + } + + /* search through built-in list of device specifications */ + spec = uct_ib_builtin_device_specs; + while ((spec->name != NULL) && !uct_ib_device_spec_match(dev, spec)) { + ++spec; + } + return spec; /* if no match is found, return the last entry, which contains + default settings for unknown devices */ +} + +static size_t uct_ib_device_get_ib_gid_index(uct_ib_md_t *md) +{ + if (md->config.gid_index == UCS_ULUNITS_AUTO) { + return UCT_IB_MD_DEFAULT_GID_INDEX; + } else { + return md->config.gid_index; + } +} + +static int uct_ib_device_is_iwarp(uct_ib_device_t *dev) +{ + return dev->ibv_context->device->transport_type == IBV_TRANSPORT_IWARP; +} + +ucs_status_t uct_ib_device_port_check(uct_ib_device_t *dev, uint8_t port_num, + unsigned flags) +{ + uct_ib_md_t *md = ucs_container_of(dev, uct_ib_md_t, dev); + const uct_ib_device_spec_t *dev_info; + uint8_t required_dev_flags; + ucs_status_t status; + union ibv_gid gid; + int is_roce_v2; + + if (port_num < dev->first_port || port_num >= dev->first_port + dev->num_ports) { + return UCS_ERR_NO_DEVICE; + } + + if (uct_ib_device_port_attr(dev, port_num)->state != IBV_PORT_ACTIVE) { + ucs_trace("%s:%d is not active (state: %d)", uct_ib_device_name(dev), + port_num, uct_ib_device_port_attr(dev, port_num)->state); + return UCS_ERR_UNREACHABLE; + } + + if (uct_ib_device_is_iwarp(dev)) { + /* TODO: enable it when support is ready */ + ucs_debug("iWarp device %s is not supported", uct_ib_device_name(dev)); + return UCS_ERR_UNSUPPORTED; + } + + if (!uct_ib_device_is_port_ib(dev, port_num) && (flags & UCT_IB_DEVICE_FLAG_LINK_IB)) { + ucs_debug("%s:%d is not IB link layer", uct_ib_device_name(dev), + port_num); + return UCS_ERR_UNSUPPORTED; + } + + if (flags & UCT_IB_DEVICE_FLAG_DC) { + if (!IBV_DEVICE_HAS_DC(dev)) { + ucs_trace("%s:%d does not support DC", uct_ib_device_name(dev), port_num); + return UCS_ERR_UNSUPPORTED; + } + } + + /* check generic device flags */ + dev_info = uct_ib_device_spec(dev); + required_dev_flags = flags & (UCT_IB_DEVICE_FLAG_MLX4_PRM | + UCT_IB_DEVICE_FLAG_MLX5_PRM); + if (!ucs_test_all_flags(dev_info->flags, required_dev_flags)) { + ucs_trace("%s:%d (%s) does not support flags 0x%x", uct_ib_device_name(dev), + port_num, dev_info->name, required_dev_flags); + return UCS_ERR_UNSUPPORTED; + } + + if (md->check_subnet_filter && uct_ib_device_is_port_ib(dev, port_num)) { + status = uct_ib_device_query_gid(dev, port_num, + uct_ib_device_get_ib_gid_index(md), &gid, + &is_roce_v2); + if (status) { + return status; + } + + ucs_assert(is_roce_v2 == 0); + if (md->subnet_filter != gid.global.subnet_prefix) { + ucs_trace("%s:%d subnet_prefix does not match", + uct_ib_device_name(dev), port_num); + return UCS_ERR_UNSUPPORTED; + } + } + + return UCS_OK; +} + +static int uct_ib_device_is_addr_ipv4_mcast(const struct in6_addr *raw, + const uint32_t addr_last_bits) +{ + /* IPv4 encoded multicast addresses */ + return (raw->s6_addr32[0] == htonl(0xff0e0000)) && + !(raw->s6_addr32[1] | addr_last_bits); +} + +static sa_family_t uct_ib_device_get_addr_family(union ibv_gid *gid, int gid_index) +{ + const struct in6_addr *raw = (struct in6_addr *)gid->raw; + const uint32_t addr_last_bits = raw->s6_addr32[2] ^ htonl(0x0000ffff); + char p[128]; + + ucs_debug("testing addr_family on gid index %d: %s", + gid_index, inet_ntop(AF_INET6, gid, p, sizeof(p))); + + if (!((raw->s6_addr32[0] | raw->s6_addr32[1]) | addr_last_bits) || + uct_ib_device_is_addr_ipv4_mcast(raw, addr_last_bits)) { + return AF_INET; + } else { + return AF_INET6; + } +} + +static ucs_status_t +uct_ib_device_query_gid_info(uct_ib_device_t *dev, uint8_t port_num, + unsigned gid_index, uct_ib_device_gid_info_t *info) +{ + int ret; + +#if HAVE_DECL_IBV_EXP_QUERY_GID_ATTR + struct ibv_exp_gid_attr attr; + + attr.comp_mask = IBV_EXP_QUERY_GID_ATTR_TYPE | IBV_EXP_QUERY_GID_ATTR_GID; + ret = ibv_exp_query_gid_attr(dev->ibv_context, port_num, gid_index, &attr); + if (ret == 0) { + info->gid = attr.gid; + switch (attr.type) { + case IBV_EXP_IB_ROCE_V1_GID_TYPE: + info->roce_version.major = 1; + info->roce_version.minor = 0; + return UCS_OK; + case IBV_EXP_ROCE_V1_5_GID_TYPE: + info->roce_version.major = 1; + info->roce_version.minor = 5; + return UCS_OK; + case IBV_EXP_ROCE_V2_GID_TYPE: + info->roce_version.major = 2; + info->roce_version.minor = 0; + return UCS_OK; + default: + ucs_error("Invalid GID[%d] type on %s:%d: %d", + gid_index, uct_ib_device_name(dev), port_num, attr.type); + return UCS_ERR_IO_ERROR; + } + } +#else +#define UCT_IB_SYSFS_GID_TYPE_FMT \ + "/sys/class/infiniband/%s/ports/%d/gid_attrs/types/%d" + char buf[16]; + + ret = ibv_query_gid(dev->ibv_context, port_num, gid_index, &info->gid); + if (ret == 0) { + ret = ucs_read_file(buf, sizeof(buf) - 1, 1, UCT_IB_SYSFS_GID_TYPE_FMT, + uct_ib_device_name(dev), port_num, gid_index); + if (ret > 0) { + if (!strncmp(buf, "IB/RoCE v1", 10)) { + info->roce_version.major = 1; + info->roce_version.minor = 0; + } else if (!strncmp(buf, "RoCE v2", 7)) { + info->roce_version.major = 2; + info->roce_version.minor = 0; + } else { + ucs_error("failed to parse gid type '%s' (dev=%s port=%d index=%d)", + buf, uct_ib_device_name(dev), port_num, gid_index); + return UCS_ERR_INVALID_PARAM; + } + } else { + info->roce_version.major = 1; + info->roce_version.minor = 0; + } + return UCS_OK; + } +#endif + ucs_error("ibv_query_gid(dev=%s port=%d index=%d) failed: %m", + uct_ib_device_name(dev), port_num, gid_index); + return UCS_ERR_INVALID_PARAM; +} + +int uct_ib_device_test_roce_gid_index(uct_ib_device_t *dev, uint8_t port_num, + const union ibv_gid *gid, + uint8_t gid_index) +{ + struct ibv_ah_attr ah_attr; + struct ibv_ah *ah; + + ucs_assert(uct_ib_device_is_port_roce(dev, port_num)); + + memset(&ah_attr, 0, sizeof(ah_attr)); + ah_attr.port_num = port_num; + ah_attr.is_global = 1; + ah_attr.grh.dgid = *gid; + ah_attr.grh.sgid_index = gid_index; + ah_attr.grh.hop_limit = 255; + + ah = ibv_create_ah(ucs_container_of(dev, uct_ib_md_t, dev)->pd, &ah_attr); + if (ah == NULL) { + return 0; /* gid entry is not operational */ + } + + ibv_destroy_ah(ah); + return 1; +} + +static ucs_status_t uct_ib_device_set_roce_gid_index(uct_ib_device_t *dev, + uint8_t port_num, + uint8_t *gid_index) +{ + static const uct_ib_roce_version_desc_t roce_prio[] = { + {2, 0, AF_INET}, + {2, 0, AF_INET6}, + {1, 0, AF_INET}, + {1, 0, AF_INET6} + }; + int gid_tbl_len = uct_ib_device_port_attr(dev, port_num)->gid_tbl_len; + ucs_status_t status = UCS_OK; + int priorities_arr_len = ucs_static_array_size(roce_prio); + uct_ib_device_gid_info_t gid_info; + int i, prio_idx; + + /* search for matching GID table entries, accroding to the order defined + * in priorities array + */ + for (prio_idx = 0; prio_idx < priorities_arr_len; prio_idx++) { + for (i = 0; i < gid_tbl_len; i++) { + status = uct_ib_device_query_gid_info(dev, port_num, i, &gid_info); + if (status != UCS_OK) { + goto out; + } + + if ((roce_prio[prio_idx].roce_major == gid_info.roce_version.major) && + (roce_prio[prio_idx].roce_minor == gid_info.roce_version.minor) && + (roce_prio[prio_idx].address_family == + uct_ib_device_get_addr_family(&gid_info.gid, i)) && + uct_ib_device_test_roce_gid_index(dev, port_num, &gid_info.gid, i)) { + + *gid_index = i; + goto out_print; + } + } + } + + *gid_index = UCT_IB_MD_DEFAULT_GID_INDEX; + +out_print: + ucs_debug("%s:%d using gid_index %d", uct_ib_device_name(dev), port_num, + *gid_index); +out: + return status; +} + +int uct_ib_device_is_port_ib(uct_ib_device_t *dev, uint8_t port_num) +{ +#if HAVE_DECL_IBV_LINK_LAYER_INFINIBAND + return uct_ib_device_port_attr(dev, port_num)->link_layer == IBV_LINK_LAYER_INFINIBAND; +#else + return 1; +#endif +} + +int uct_ib_device_is_port_roce(uct_ib_device_t *dev, uint8_t port_num) +{ + return IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_device_port_attr(dev, port_num)); +} + +ucs_status_t uct_ib_device_select_gid_index(uct_ib_device_t *dev, + uint8_t port_num, + size_t md_config_index, + uint8_t *gid_index) +{ + ucs_status_t status = UCS_OK; + + if (md_config_index == UCS_ULUNITS_AUTO) { + if (uct_ib_device_is_port_roce(dev, port_num)) { + status = uct_ib_device_set_roce_gid_index(dev, port_num, gid_index); + } else { + *gid_index = UCT_IB_MD_DEFAULT_GID_INDEX; + } + } else { + *gid_index = md_config_index; + } + + return status; +} + +const char *uct_ib_device_name(uct_ib_device_t *dev) +{ + return ibv_get_device_name(dev->ibv_context->device); +} + +size_t uct_ib_mtu_value(enum ibv_mtu mtu) +{ + switch (mtu) { + case IBV_MTU_256: + return 256; + case IBV_MTU_512: + return 512; + case IBV_MTU_1024: + return 1024; + case IBV_MTU_2048: + return 2048; + case IBV_MTU_4096: + return 4096; + } + ucs_fatal("Invalid MTU value (%d)", mtu); +} + +uint8_t uct_ib_to_qp_fabric_time(double time) +{ + double to; + + to = log(time / 4.096e-6) / log(2.0); + if (to < 1) { + return 1; /* Very small timeout */ + } else if ((long)(to + 0.5) >= UCT_IB_FABRIC_TIME_MAX) { + return 0; /* No timeout */ + } else { + return (long)(to + 0.5); + } +} + +uint8_t uct_ib_to_rnr_fabric_time(double time) +{ + double time_ms = time * UCS_MSEC_PER_SEC; + uint8_t index, next_index; + double avg_ms; + + for (index = 1; index < UCT_IB_FABRIC_TIME_MAX; index++) { + next_index = (index + 1) % UCT_IB_FABRIC_TIME_MAX; + + if (time_ms <= uct_ib_qp_rnr_time_ms[next_index]) { + avg_ms = (uct_ib_qp_rnr_time_ms[index] + + uct_ib_qp_rnr_time_ms[next_index]) * 0.5; + + if (time_ms < avg_ms) { + /* return previous index */ + return index; + } else { + /* return current index */ + return next_index; + } + } + } + + return 0; /* this is a special value that means the maximum value */ +} + +ucs_status_t uct_ib_modify_qp(struct ibv_qp *qp, enum ibv_qp_state state) +{ + struct ibv_qp_attr qp_attr; + + ucs_debug("modify QP 0x%x to state %d", qp->qp_num, state); + memset(&qp_attr, 0, sizeof(qp_attr)); + qp_attr.qp_state = state; + if (ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE)) { + ucs_warn("modify qp 0x%x to state %d failed: %m", qp->qp_num, state); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +ucs_status_t uct_ib_device_query_ports(uct_ib_device_t *dev, unsigned flags, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + uct_tl_device_resource_t *tl_devices; + unsigned num_tl_devices; + ucs_status_t status; + uint8_t port_num; + + /* Allocate resources array + * We may allocate more memory than really required, but it's not so bad. */ + tl_devices = ucs_calloc(dev->num_ports, sizeof(*tl_devices), "ib device resource"); + if (tl_devices == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + /* Second pass: fill port information */ + num_tl_devices = 0; + for (port_num = dev->first_port; port_num < dev->first_port + dev->num_ports; + ++port_num) + { + /* Check port capabilities */ + status = uct_ib_device_port_check(dev, port_num, flags); + if (status != UCS_OK) { + ucs_trace("%s:%d does not support flags 0x%x: %s", + uct_ib_device_name(dev), port_num, flags, + ucs_status_string(status)); + continue; + } + + /* Save device information */ + ucs_snprintf_zero(tl_devices[num_tl_devices].name, + sizeof(tl_devices[num_tl_devices].name), + "%s:%d", uct_ib_device_name(dev), port_num); + tl_devices[num_tl_devices].type = UCT_DEVICE_TYPE_NET; + ++num_tl_devices; + } + + if (num_tl_devices == 0) { + ucs_debug("no compatible IB ports found for flags 0x%x", flags); + status = UCS_ERR_NO_DEVICE; + goto err_free; + } + + *num_tl_devices_p = num_tl_devices; + *tl_devices_p = tl_devices; + return UCS_OK; + +err_free: + ucs_free(tl_devices); +err: + return status; +} + +ucs_status_t uct_ib_device_find_port(uct_ib_device_t *dev, + const char *resource_dev_name, + uint8_t *p_port_num) +{ + const char *ibdev_name; + unsigned port_num; + size_t devname_len; + char *p; + + p = strrchr(resource_dev_name, ':'); + if (p == NULL) { + goto err; /* Wrong device name format */ + } + devname_len = p - resource_dev_name; + + ibdev_name = uct_ib_device_name(dev); + if ((strlen(ibdev_name) != devname_len) || + strncmp(ibdev_name, resource_dev_name, devname_len)) + { + goto err; /* Device name is wrong */ + } + + port_num = strtod(p + 1, &p); + if (*p != '\0') { + goto err; /* Failed to parse port number */ + } + if ((port_num < dev->first_port) || (port_num >= dev->first_port + dev->num_ports)) { + goto err; /* Port number out of range */ + } + + *p_port_num = port_num; + return UCS_OK; + +err: + ucs_error("%s: failed to find port", resource_dev_name); + return UCS_ERR_NO_DEVICE; +} + +ucs_status_t uct_ib_device_mtu(const char *dev_name, uct_md_h md, int *p_mtu) +{ + + uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev; + uint8_t port_num; + ucs_status_t status; + + status = uct_ib_device_find_port(dev, dev_name, &port_num); + if (status != UCS_OK) { + return status; + } + + *p_mtu = uct_ib_mtu_value(uct_ib_device_port_attr(dev, port_num)->active_mtu); + return UCS_OK; +} + +int uct_ib_device_is_gid_raw_empty(uint8_t *gid_raw) +{ + return (*(uint64_t *)gid_raw == 0) && (*(uint64_t *)(gid_raw + 8) == 0); +} + +ucs_status_t uct_ib_device_query_gid(uct_ib_device_t *dev, uint8_t port_num, + unsigned gid_index, union ibv_gid *gid, + int *is_roce_v2) +{ + uct_ib_device_gid_info_t gid_info; + ucs_status_t status; + + status = uct_ib_device_query_gid_info(dev, port_num, gid_index, &gid_info); + if (status != UCS_OK) { + return status; + } + + if (uct_ib_device_is_gid_raw_empty(gid_info.gid.raw)) { + ucs_error("Invalid gid[%d] on %s:%d", gid_index, + uct_ib_device_name(dev), port_num); + return UCS_ERR_INVALID_ADDR; + } + + *gid = gid_info.gid; + *is_roce_v2 = uct_ib_device_is_port_roce(dev, port_num) && + (gid_info.roce_version.major >= 2); + return UCS_OK; +} + +size_t uct_ib_device_odp_max_size(uct_ib_device_t *dev) +{ +#if HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS + const struct ibv_exp_device_attr *dev_attr = &dev->dev_attr; + uint32_t required_ud_odp_caps = IBV_EXP_ODP_SUPPORT_SEND; + uint32_t required_rc_odp_caps = IBV_EXP_ODP_SUPPORT_SEND | + IBV_EXP_ODP_SUPPORT_WRITE | + IBV_EXP_ODP_SUPPORT_READ; + + if (RUNNING_ON_VALGRIND || + !IBV_EXP_HAVE_ODP(dev_attr) || + !ucs_test_all_flags(IBV_EXP_ODP_CAPS(dev_attr, rc), required_rc_odp_caps) || + !ucs_test_all_flags(IBV_EXP_ODP_CAPS(dev_attr, ud), required_ud_odp_caps)) + { + return 0; + } + + if (IBV_DEVICE_HAS_DC(dev) +# if HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS_PER_TRANSPORT_CAPS_DC_ODP_CAPS + && !ucs_test_all_flags(IBV_EXP_ODP_CAPS(dev_attr, dc), required_rc_odp_caps) +# endif + ) + { + return 0; + } + +# if HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_MR_MAX_SIZE + return dev_attr->odp_mr_max_size; +# else + return 1ul << 28; /* Limit ODP to 256 MB by default */ +# endif /* HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_MR_MAX_SIZE */ + +#else + return 0; +#endif /* HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS */ +} + +const char *uct_ib_wc_status_str(enum ibv_wc_status wc_status) +{ + return ibv_wc_status_str(wc_status); +} + +static ucs_status_t uct_ib_device_create_ah(uct_ib_device_t *dev, + struct ibv_ah_attr *ah_attr, + struct ibv_pd *pd, + struct ibv_ah **ah_p) +{ + char buf[128]; + char *p, *endp; + struct ibv_ah *ah; + + ah = ibv_create_ah(pd, ah_attr); + if (ah == NULL) { + p = buf; + endp = buf + sizeof(buf); + snprintf(p, endp - p, "dlid=%d sl=%d port=%d src_path_bits=%d", + ah_attr->dlid, ah_attr->sl, + ah_attr->port_num, ah_attr->src_path_bits); + p += strlen(p); + + if (ah_attr->is_global) { + snprintf(p, endp - p, " dgid="); + p += strlen(p); + inet_ntop(AF_INET6, &ah_attr->grh.dgid, p, endp - p); + p += strlen(p); + snprintf(p, endp - p, " sgid_index=%d traffic_class=%d", + ah_attr->grh.sgid_index, ah_attr->grh.traffic_class); + } + + ucs_error("ibv_create_ah(%s) failed: %m", buf); + return UCS_ERR_INVALID_ADDR; + } + + *ah_p = ah; + return UCS_OK; +} + +ucs_status_t uct_ib_device_create_ah_cached(uct_ib_device_t *dev, + struct ibv_ah_attr *ah_attr, + struct ibv_pd *pd, + struct ibv_ah **ah_p) +{ + ucs_status_t status = UCS_OK; + khiter_t iter; + int ret; + + ucs_spin_lock(&dev->ah_lock); + + /* looking for existing AH with same attributes */ + iter = kh_get(uct_ib_ah, &dev->ah_hash, *ah_attr); + if (iter == kh_end(&dev->ah_hash)) { + /* new AH */ + status = uct_ib_device_create_ah(dev, ah_attr, pd, ah_p); + if (status != UCS_OK) { + goto unlock; + } + + /* store AH in hash */ + iter = kh_put(uct_ib_ah, &dev->ah_hash, *ah_attr, &ret); + + /* failed to store - rollback */ + if (iter == kh_end(&dev->ah_hash)) { + ibv_destroy_ah(*ah_p); + status = UCS_ERR_NO_MEMORY; + goto unlock; + } + + kh_value(&dev->ah_hash, iter) = *ah_p; + } else { + /* found existing AH */ + *ah_p = kh_value(&dev->ah_hash, iter); + } + +unlock: + ucs_spin_unlock(&dev->ah_lock); + return status; +} + +int uct_ib_get_cqe_size(int cqe_size_min) +{ + static int cqe_size_max = -1; + int cqe_size; + + if (cqe_size_max == -1) { +#ifdef __aarch64__ + char arm_board_vendor[128]; + ucs_aarch64_cpuid_t cpuid; + ucs_aarch64_cpuid(&cpuid); + + arm_board_vendor[0] = '\0'; + ucs_read_file(arm_board_vendor, sizeof(arm_board_vendor), 1, + "/sys/devices/virtual/dmi/id/board_vendor"); + ucs_debug("arm_board_vendor is '%s'", arm_board_vendor); + + cqe_size_max = ((strcasestr(arm_board_vendor, "Huawei")) && + (cpuid.implementer == 0x41) && (cpuid.architecture == 8) && + (cpuid.variant == 0) && (cpuid.part == 0xd08) && + (cpuid.revision == 2)) + ? 64 : 128; +#else + cqe_size_max = 128; +#endif + ucs_debug("max IB CQE size is %d", cqe_size_max); + } + + /* Set cqe size according to inline size and cache line size. */ + cqe_size = ucs_max(cqe_size_min, UCS_SYS_CACHE_LINE_SIZE); + cqe_size = ucs_max(cqe_size, 64); /* at least 64 */ + cqe_size = ucs_min(cqe_size, cqe_size_max); + + return cqe_size; +} diff --git a/src/uct/ib/base/ib_device.h b/src/uct/ib/base/ib_device.h new file mode 100644 index 0000000..0b674f8 --- /dev/null +++ b/src/uct/ib/base/ib_device.h @@ -0,0 +1,322 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_DEVICE_H +#define UCT_IB_DEVICE_H + +#include "ib_verbs.h" + +#include +#include +#include +#include +#include +#include + +#include + + +#define UCT_IB_QPN_ORDER 24 /* How many bits can be an IB QP number */ +#define UCT_IB_LRH_LEN 8 /* IB Local routing header */ +#define UCT_IB_GRH_LEN 40 /* IB GLobal routing header */ +#define UCT_IB_BTH_LEN 12 /* IB base transport header */ +#define UCT_IB_ROCE_LEN 14 /* Ethernet header - + 6B for Destination MAC + + 6B for Source MAC + 2B Type (RoCE) */ +#define UCT_IB_DETH_LEN 8 /* IB datagram header */ +#define UCT_IB_RETH_LEN 16 /* IB RDMA header */ +#define UCT_IB_ATOMIC_ETH_LEN 28 /* IB atomic header */ +#define UCT_IB_AETH_LEN 4 /* IB ack */ +#define UCT_IB_PAYLOAD_ALIGN 4 /* IB payload padding */ +#define UCT_IB_ICRC_LEN 4 /* IB invariant crc footer */ +#define UCT_IB_VCRC_LEN 2 /* IB variant crc footer */ +#define UCT_IB_DELIM_LEN 2 /* IB wire delimiter */ +#define UCT_IB_FDR_PACKET_GAP 64 /* Minimal FDR packet gap */ +#define UCT_IB_MAX_MESSAGE_SIZE (2UL << 30) /* Maximal IB message size */ +#define UCT_IB_PKEY_PARTITION_MASK 0x7fff /* IB partition number mask */ +#define UCT_IB_PKEY_MEMBERSHIP_MASK 0x8000 /* Full/send-only member */ +#define UCT_IB_DEV_MAX_PORTS 2 +#define UCT_IB_FABRIC_TIME_MAX 32 +#define UCT_IB_INVALID_RKEY 0xffffffffu +#define UCT_IB_KEY 0x1ee7a330 +#define UCT_IB_LINK_LOCAL_PREFIX be64toh(0xfe80000000000000ul) /* IBTA 4.1.1 12a */ +#define UCT_IB_SITE_LOCAL_PREFIX be64toh(0xfec0000000000000ul) /* IBTA 4.1.1 12b */ +#define UCT_IB_SITE_LOCAL_MASK be64toh(0xffffffffffff0000ul) /* IBTA 4.1.1 12b */ +#define UCT_IB_DEFAULT_ROCEV2_DSCP 106 /* Default DSCP for RoCE v2 */ +#define UCT_IB_DEVICE_SYSFS_FMT "/sys/class/infiniband/%s/device/%s" + + +enum { + UCT_IB_DEVICE_STAT_ASYNC_EVENT, + UCT_IB_DEVICE_STAT_LAST +}; + + +enum { + UCT_IB_DEVICE_FLAG_MLX4_PRM = UCS_BIT(1), /* Device supports mlx4 PRM */ + UCT_IB_DEVICE_FLAG_MLX5_PRM = UCS_BIT(2), /* Device supports mlx5 PRM */ + UCT_IB_DEVICE_FLAG_MELLANOX = UCS_BIT(3), /* Mellanox device */ + UCT_IB_DEVICE_FLAG_LINK_IB = UCS_BIT(5), /* Require only IB */ + UCT_IB_DEVICE_FLAG_DC_V1 = UCS_BIT(6), /* Device supports DC ver 1 */ + UCT_IB_DEVICE_FLAG_DC_V2 = UCS_BIT(7), /* Device supports DC ver 2 */ + UCT_IB_DEVICE_FLAG_AV = UCS_BIT(8), /* Device supports compact AV */ + UCT_IB_DEVICE_FLAG_DC = UCT_IB_DEVICE_FLAG_DC_V1 | + UCT_IB_DEVICE_FLAG_DC_V2, /* Device supports DC */ + UCT_IB_DEVICE_FLAG_ODP_IMPLICIT = UCS_BIT(9), +}; + + +/** + * Flags which specify which address fields are present + */ +enum { + UCT_IB_ADDRESS_FLAG_LID = UCS_BIT(0), + UCT_IB_ADDRESS_FLAG_IF_ID = UCS_BIT(1), + UCT_IB_ADDRESS_FLAG_SUBNET16 = UCS_BIT(2), + UCT_IB_ADDRESS_FLAG_SUBNET64 = UCS_BIT(3), + UCT_IB_ADDRESS_FLAG_GID = UCS_BIT(4), + UCT_IB_ADDRESS_FLAG_LINK_LAYER_IB = UCS_BIT(5), + UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH = UCS_BIT(6) +}; + + +/** + * IB network address + */ +typedef struct uct_ib_address { + uint8_t flags; + /* Following fields appear in this order (if specified by flags). + * The full gid always appears last: + * - uint16_t lid + * - uint64_t if_id + * - uint16_t subnet16 + * - uint64_t subnet64 + * For RoCE: + * - uint8_t gid[16] + */ +} UCS_S_PACKED uct_ib_address_t; + + +/** + * PCI identifier of a device + */ +typedef struct { + uint16_t vendor; + uint16_t device; +} uct_ib_pci_id_t; + + +/** + * IB device specification. + */ +typedef struct uct_ib_device_spec { + const char *name; + uct_ib_pci_id_t pci_id; + unsigned flags; + uint8_t priority; +} uct_ib_device_spec_t; + + +KHASH_TYPE(uct_ib_ah, struct ibv_ah_attr, struct ibv_ah*); + +/** + * IB device (corresponds to HCA) + */ +typedef struct uct_ib_device { + struct ibv_context *ibv_context; /* Verbs context */ + uct_ib_device_attr dev_attr; /* Cached device attributes */ + uint8_t first_port; /* Number of first port (usually 1) */ + uint8_t num_ports; /* Amount of physical ports */ + ucs_sys_cpuset_t local_cpus; /* CPUs local to device */ + int numa_node; /* NUMA node of the device */ + int async_events; /* Whether async events are handled */ + int max_zcopy_log_sge; /* Maximum sges log for zcopy am */ + UCS_STATS_NODE_DECLARE(stats) + struct ibv_port_attr port_attr[UCT_IB_DEV_MAX_PORTS]; /* Cached port attributes */ + uct_ib_pci_id_t pci_id; + unsigned flags; + uint8_t atomic_arg_sizes; + uint8_t atomic_arg_sizes_be; + uint8_t ext_atomic_arg_sizes; + uint8_t ext_atomic_arg_sizes_be; + uint8_t pci_fadd_arg_sizes; + uint8_t pci_cswap_arg_sizes; + /* AH hash */ + khash_t(uct_ib_ah) ah_hash; + ucs_spinlock_t ah_lock; +} uct_ib_device_t; + + +/** + * RoCE version priorities + */ +typedef struct uct_ib_roce_version_desc { + uint8_t roce_major; + uint8_t roce_minor; + sa_family_t address_family; +} uct_ib_roce_version_desc_t; + + +extern const double uct_ib_qp_rnr_time_ms[]; + + +/** + * Check if a port on a device is active and supports the given flags. + */ +ucs_status_t uct_ib_device_port_check(uct_ib_device_t *dev, uint8_t port_num, + unsigned flags); + + +/* + * Helper function to list IB transport resources. + * + * @param dev IB device. + * @param flags Transport requirements from IB device (see UCT_IB_RESOURCE_FLAG_xx) + * @param devices_p Filled with a pointer to an array of devices. + * @param num_devices_p Filled with the number of devices. + */ +ucs_status_t uct_ib_device_query_ports(uct_ib_device_t *dev, unsigned flags, + uct_tl_device_resource_t **devices_p, + unsigned *num_devices_p); + +ucs_status_t uct_ib_device_query(uct_ib_device_t *dev, + struct ibv_device *ibv_device); + +ucs_status_t uct_ib_device_init(uct_ib_device_t *dev, + struct ibv_device *ibv_device, int async_events + UCS_STATS_ARG(ucs_stats_node_t *stats_parent)); + +void uct_ib_device_cleanup(uct_ib_device_t *dev); + + +/** + * @return device specification. + */ +const uct_ib_device_spec_t* uct_ib_device_spec(uct_ib_device_t *dev); + + +/** + * Select the IB gid index to use. + * + * @param dev IB device. + * @param port_num Port number. + * @param md_config_index Gid index from the md configuration. + * @param ib_gid_index Filled with the selected gid index. + */ +ucs_status_t uct_ib_device_select_gid_index(uct_ib_device_t *dev, + uint8_t port_num, + size_t md_config_index, + uint8_t *ib_gid_index); + + +/** + * @return device name. + */ +const char *uct_ib_device_name(uct_ib_device_t *dev); + + +/** + * @return whether the port is InfiniBand + */ +int uct_ib_device_is_port_ib(uct_ib_device_t *dev, uint8_t port_num); + + +/** + * @return whether the port is RoCE + */ +int uct_ib_device_is_port_roce(uct_ib_device_t *dev, uint8_t port_num); + + +/** + * @return 1 if the gid_raw is 0, 0 otherwise. + */ +int uct_ib_device_is_gid_raw_empty(uint8_t *gid_raw); + + +/** + * Convert time-in-seconds to IB fabric QP time value + */ +uint8_t uct_ib_to_qp_fabric_time(double time); + + +/** + * Convert time-in-seconds to IB fabric RNR time value + */ +uint8_t uct_ib_to_rnr_fabric_time(double time); + + +/** + * @return MTU in bytes. + */ +size_t uct_ib_mtu_value(enum ibv_mtu mtu); + + +/** + * Modify QP to a given state and check for error + */ +ucs_status_t uct_ib_modify_qp(struct ibv_qp *qp, enum ibv_qp_state state); + + +/** + * find device mtu. This function can be used before ib + * interface is created. + */ +ucs_status_t uct_ib_device_mtu(const char *dev_name, uct_md_h md, int *p_mtu); + +ucs_status_t uct_ib_device_find_port(uct_ib_device_t *dev, + const char *resource_dev_name, + uint8_t *p_port_num); + +size_t uct_ib_device_odp_max_size(uct_ib_device_t *dev); + +const char *uct_ib_wc_status_str(enum ibv_wc_status wc_status); + +ucs_status_t uct_ib_device_create_ah_cached(uct_ib_device_t *dev, + struct ibv_ah_attr *ah_attr, + struct ibv_pd *pd, + struct ibv_ah **ah_p); + +void uct_ib_device_cleanup_ah_cached(uct_ib_device_t *dev); + +static inline struct ibv_port_attr* +uct_ib_device_port_attr(uct_ib_device_t *dev, uint8_t port_num) +{ + return &dev->port_attr[port_num - dev->first_port]; +} + +static inline int uct_ib_device_has_pci_atomics(uct_ib_device_t *dev) +{ + return !!((dev->pci_fadd_arg_sizes | dev->pci_cswap_arg_sizes) & + (sizeof(uint32_t) | sizeof(uint64_t))); +} + +ucs_status_t uct_ib_device_query_gid(uct_ib_device_t *dev, uint8_t port_num, + unsigned gid_index, union ibv_gid *gid, + int *is_roce_v2); + +int uct_ib_device_test_roce_gid_index(uct_ib_device_t *dev, uint8_t port_num, + const union ibv_gid *gid, + uint8_t gid_index); + +int uct_ib_get_cqe_size(int cqe_size_min); + +static inline ucs_status_t uct_ib_poll_cq(struct ibv_cq *cq, unsigned *count, struct ibv_wc *wcs) +{ + int ret; + + ret = ibv_poll_cq(cq, *count, wcs); + if (ret <= 0) { + if (ucs_likely(ret == 0)) { + return UCS_ERR_NO_PROGRESS; + } + ucs_fatal("failed to poll receive CQ %d", ret); + } + + *count = ret; + return UCS_OK; +} + +#endif diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c new file mode 100644 index 0000000..05d7cd1 --- /dev/null +++ b/src/uct/ib/base/ib_iface.c @@ -0,0 +1,1241 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ib_iface.h" +#include "ib_log.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static UCS_CONFIG_DEFINE_ARRAY(path_bits_spec, + sizeof(ucs_range_spec_t), + UCS_CONFIG_TYPE_RANGE_SPEC); + +const char *uct_ib_mtu_values[] = { + [UCT_IB_MTU_DEFAULT] = "default", + [UCT_IB_MTU_512] = "512", + [UCT_IB_MTU_1024] = "1024", + [UCT_IB_MTU_2048] = "2048", + [UCT_IB_MTU_4096] = "4096", + [UCT_IB_MTU_LAST] = NULL +}; + +enum { + UCT_IB_ADDRESS_TYPE_LINK_LOCAL, + UCT_IB_ADDRESS_TYPE_SITE_LOCAL, + UCT_IB_ADDRESS_TYPE_GLOBAL, + UCT_IB_ADDRESS_TYPE_ETH, + UCT_IB_ADDRESS_TYPE_LAST, + UCT_IB_IFACE_ADDRESS_TYPE_AUTO = UCT_IB_ADDRESS_TYPE_LAST, + UCT_IB_IFACE_ADDRESS_TYPE_LAST +}; + +static const char *uct_ib_iface_addr_types[] = { + [UCT_IB_ADDRESS_TYPE_LINK_LOCAL] = "ib_local", + [UCT_IB_ADDRESS_TYPE_SITE_LOCAL] = "ib_site_local", + [UCT_IB_ADDRESS_TYPE_GLOBAL] = "ib_global", + [UCT_IB_ADDRESS_TYPE_ETH] = "eth", + [UCT_IB_IFACE_ADDRESS_TYPE_AUTO] = "auto", + [UCT_IB_IFACE_ADDRESS_TYPE_LAST] = NULL +}; + +ucs_config_field_t uct_ib_iface_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_ib_iface_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {"SEG_SIZE", "8192", + "Size of bounce buffers used for post_send and post_recv.", + ucs_offsetof(uct_ib_iface_config_t, seg_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"TX_QUEUE_LEN", "256", + "Length of send queue in the QP.", + ucs_offsetof(uct_ib_iface_config_t, tx.queue_len), UCS_CONFIG_TYPE_UINT}, + + {"TX_MAX_BATCH", "16", + "Number of send WQEs to batch in one post-send list. Larger values reduce\n" + "the CPU usage, but increase the latency and pipelining between sender and\n" + "receiver.", + ucs_offsetof(uct_ib_iface_config_t, tx.max_batch), UCS_CONFIG_TYPE_UINT}, + + {"TX_MAX_POLL", "16", + "Max number of receive completions to pick during TX poll", + ucs_offsetof(uct_ib_iface_config_t, tx.max_poll), UCS_CONFIG_TYPE_UINT}, + + {"TX_MIN_INLINE", "64", + "Bytes to reserve in send WQE for inline data. Messages which are small\n" + "enough will be sent inline.", + ucs_offsetof(uct_ib_iface_config_t, tx.min_inline), UCS_CONFIG_TYPE_MEMUNITS}, + + {"TX_INLINE_RESP", "32", + "Bytes to reserve in send WQE for inline response. Responses which are small\n" + "enough, such as of atomic operations and small reads, will be received inline.", + ucs_offsetof(uct_ib_iface_config_t, tx.inl_resp), UCS_CONFIG_TYPE_MEMUNITS}, + + {"TX_MIN_SGE", "3", + "Number of SG entries to reserve in the send WQE.", + ucs_offsetof(uct_ib_iface_config_t, tx.min_sge), UCS_CONFIG_TYPE_UINT}, + +#if HAVE_DECL_IBV_EXP_CQ_MODERATION + {"TX_EVENT_MOD_COUNT", "0", + "Number of send completions for which an event would be generated (0 - disabled).", + ucs_offsetof(uct_ib_iface_config_t, tx.cq_moderation_count), UCS_CONFIG_TYPE_UINT}, + + {"TX_EVENT_MOD_PERIOD", "0us", + "Time period to generate send event (0 - disabled).", + ucs_offsetof(uct_ib_iface_config_t, tx.cq_moderation_period), UCS_CONFIG_TYPE_TIME}, + + {"RX_EVENT_MOD_COUNT", "0", + "Number of received messages for which an event would be generated (0 - disabled).", + ucs_offsetof(uct_ib_iface_config_t, rx.cq_moderation_count), UCS_CONFIG_TYPE_UINT}, + + {"RX_EVENT_MOD_PERIOD", "0us", + "Time period to generate receive event (0 - disabled).", + ucs_offsetof(uct_ib_iface_config_t, rx.cq_moderation_period), UCS_CONFIG_TYPE_TIME}, +#endif /* HAVE_DECL_IBV_EXP_CQ_MODERATION */ + + UCT_IFACE_MPOOL_CONFIG_FIELDS("TX_", -1, 1024, "send", + ucs_offsetof(uct_ib_iface_config_t, tx.mp), + "\nAttention: Setting this param with value != -1 is a dangerous thing\n" + "in RC/DC and could cause deadlock or performance degradation."), + + {"RX_QUEUE_LEN", "4096", + "Length of receive queue in the QPs.", + ucs_offsetof(uct_ib_iface_config_t, rx.queue_len), UCS_CONFIG_TYPE_UINT}, + + {"RX_MAX_BATCH", "16", + "How many post-receives to perform in one batch.", + ucs_offsetof(uct_ib_iface_config_t, rx.max_batch), UCS_CONFIG_TYPE_UINT}, + + {"RX_MAX_POLL", "16", + "Max number of receive completions to pick during RX poll", + ucs_offsetof(uct_ib_iface_config_t, rx.max_poll), UCS_CONFIG_TYPE_UINT}, + + {"RX_INLINE", "0", + "Number of bytes to request for inline receive. If the maximal supported size\n" + "is smaller, it will be used instead. If it is possible to support a larger\n" + "size than requested with the same hardware resources, it will be used instead.", + ucs_offsetof(uct_ib_iface_config_t, rx.inl), UCS_CONFIG_TYPE_MEMUNITS}, + + UCT_IFACE_MPOOL_CONFIG_FIELDS("RX_", -1, 0, "receive", + ucs_offsetof(uct_ib_iface_config_t, rx.mp), ""), + + {"ADDR_TYPE", "auto", + "Set the interface address type. \"auto\" mode detects the type according to\n" + "link layer type and IB subnet prefix.\n" + "Deprecated. To force use of global routing use IS_GLOBAL.", + ucs_offsetof(uct_ib_iface_config_t, addr_type), + UCS_CONFIG_TYPE_ENUM(uct_ib_iface_addr_types)}, + + {"IS_GLOBAL", "n", + "Force interface to use global routing.", + ucs_offsetof(uct_ib_iface_config_t, is_global), UCS_CONFIG_TYPE_BOOL}, + + {"SL", "0", + "IB Service Level / RoCEv2 Ethernet Priority.\n", + ucs_offsetof(uct_ib_iface_config_t, sl), UCS_CONFIG_TYPE_UINT}, + + {"TRAFFIC_CLASS", "auto", + "IB Traffic Class / RoCEv2 Differentiated Services Code Point (DSCP).\n" + "\"auto\" option selects 106 on RoCEv2 and 0 otherwise.", + ucs_offsetof(uct_ib_iface_config_t, traffic_class), UCS_CONFIG_TYPE_ULUNITS}, + + {"HOP_LIMIT", "255", + "IB Hop limit / RoCEv2 Time to Live. Should be between 0 and 255.\n", + ucs_offsetof(uct_ib_iface_config_t, hop_limit), UCS_CONFIG_TYPE_UINT}, + + {"LID_PATH_BITS", "0-17", + "List of IB Path bits separated by comma (a,b,c) " + "which will be the low portion of the LID, according to the LMC in the fabric.", + ucs_offsetof(uct_ib_iface_config_t, lid_path_bits), UCS_CONFIG_TYPE_ARRAY(path_bits_spec)}, + + {"PKEY", "auto", + "Which pkey value to use. Should be between 0 and 0x7fff.\n" + "\"auto\" option selects a first valid pkey value with full membership.", + ucs_offsetof(uct_ib_iface_config_t, pkey_value), UCS_CONFIG_TYPE_HEX}, + +#if HAVE_IBV_EXP_RES_DOMAIN + {"RESOURCE_DOMAIN", "y", + "Enable multiple resource domains (experimental).", + ucs_offsetof(uct_ib_iface_config_t, enable_res_domain), UCS_CONFIG_TYPE_BOOL}, +#endif + + + {NULL} +}; + +int uct_ib_iface_is_roce(uct_ib_iface_t *iface) +{ + return uct_ib_device_is_port_roce(uct_ib_iface_device(iface), + iface->config.port_num); +} + +int uct_ib_iface_is_ib(uct_ib_iface_t *iface) +{ + return uct_ib_device_is_port_ib(uct_ib_iface_device(iface), + iface->config.port_num); +} + +static void uct_ib_iface_recv_desc_init(uct_iface_h tl_iface, void *obj, uct_mem_h memh) +{ + uct_ib_iface_recv_desc_t *desc = obj; + uct_ib_mem_t *ib_memh = memh; + + desc->lkey = ib_memh->lkey; +} + +ucs_status_t uct_ib_iface_recv_mpool_init(uct_ib_iface_t *iface, + const uct_ib_iface_config_t *config, + const char *name, ucs_mpool_t *mp) +{ + unsigned grow; + + if (config->rx.queue_len < 1024) { + grow = 1024; + } else { + /* We want to have some free (+10%) elements to avoid mem pool expansion */ + grow = ucs_min( (int)(1.1 * config->rx.queue_len + 0.5), + config->rx.mp.max_bufs); + } + + return uct_iface_mpool_init(&iface->super, mp, + iface->config.rx_payload_offset + iface->config.seg_size, + iface->config.rx_hdr_offset, + UCS_SYS_CACHE_LINE_SIZE, + &config->rx.mp, grow, + uct_ib_iface_recv_desc_init, + name); +} + +void uct_ib_iface_release_desc(uct_recv_desc_t *self, void *desc) +{ + uct_ib_iface_t *iface = ucs_container_of(self, uct_ib_iface_t, release_desc); + void *ib_desc; + + ib_desc = UCS_PTR_BYTE_OFFSET(desc, -(ptrdiff_t)iface->config.rx_headroom_offset); + ucs_mpool_put_inline(ib_desc); +} + +size_t uct_ib_address_size(const union ibv_gid *gid, unsigned pack_flags) +{ + size_t size = sizeof(uct_ib_address_t); + + if (pack_flags & UCT_IB_ADDRESS_PACK_FLAG_ETH) { + /* Ethernet: address contains only raw GID */ + return size + sizeof(union ibv_gid); + } + + /* InfiniBand: address always contains LID */ + size += sizeof(uint16_t); /* lid */ + + if (pack_flags & UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID) { + /* Add GUID */ + UCS_STATIC_ASSERT(sizeof(gid->global.interface_id) == sizeof(uint64_t)); + size += sizeof(uint64_t); + } + + if (pack_flags & UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX) { + if ((gid->global.subnet_prefix & UCT_IB_SITE_LOCAL_MASK) == + UCT_IB_SITE_LOCAL_PREFIX) { + /* 16-bit subnet prefix */ + size += sizeof(uint16_t); + } else if (gid->global.subnet_prefix != UCT_IB_LINK_LOCAL_PREFIX) { + /* 64-bit subnet prefix */ + size += sizeof(uint64_t); + } + /* Note: if subnet prefix is LINK_LOCAL, no need to pack it because + * it's a well-known value defined by IB specification. + */ + } + + return size; +} + +void uct_ib_address_pack(const union ibv_gid *gid, uint16_t lid, + unsigned pack_flags, uct_ib_address_t *ib_addr) +{ + void *ptr = ib_addr + 1; + + if (pack_flags & UCT_IB_ADDRESS_PACK_FLAG_ETH) { + /* RoCE, in this case we don't use the lid and set the GID flag */ + ib_addr->flags = UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH | + UCT_IB_ADDRESS_FLAG_GID; + /* uint8_t raw[16]; */ + memcpy(ptr, gid->raw, sizeof(gid->raw) * sizeof(uint8_t)); + return; + } + + /* IB, LID */ + ib_addr->flags = UCT_IB_ADDRESS_FLAG_LINK_LAYER_IB | + UCT_IB_ADDRESS_FLAG_LID; + ptr = ib_addr + 1; + *(uint16_t*)ptr = lid; + ptr = UCS_PTR_BYTE_OFFSET(ptr, sizeof(uint16_t)); + + if (pack_flags & UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID) { + /* Pack GUID */ + ib_addr->flags |= UCT_IB_ADDRESS_FLAG_IF_ID; + *(uint64_t*) ptr = gid->global.interface_id; + ptr = UCS_PTR_BYTE_OFFSET(ptr, sizeof(uint64_t)); + } + + if (pack_flags & UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX) { + if ((gid->global.subnet_prefix & UCT_IB_SITE_LOCAL_MASK) == + UCT_IB_SITE_LOCAL_PREFIX) { + /* Site-local */ + ib_addr->flags |= UCT_IB_ADDRESS_FLAG_SUBNET16; + *(uint16_t*)ptr = gid->global.subnet_prefix >> 48; + } else if (gid->global.subnet_prefix != UCT_IB_LINK_LOCAL_PREFIX) { + /* Global */ + ib_addr->flags |= UCT_IB_ADDRESS_FLAG_SUBNET64; + *(uint64_t*)ptr = gid->global.subnet_prefix; + } + } +} + +static unsigned uct_ib_iface_address_pack_flags(uct_ib_iface_t *iface) +{ + if (uct_ib_iface_is_roce(iface)) { + /* pack Ethernet address */ + return UCT_IB_ADDRESS_PACK_FLAG_ETH; + } else if (iface->config.force_global_addr) { + /* pack full IB address */ + return UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX | + UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID; + } else { + /* pack only subnet prefix for reachability test */ + return UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX; + } +} + +size_t uct_ib_iface_address_size(uct_ib_iface_t *iface) +{ + return uct_ib_address_size(&iface->gid, + uct_ib_iface_address_pack_flags(iface)); +} + +void uct_ib_iface_address_pack(uct_ib_iface_t *iface, const union ibv_gid *gid, + uint16_t lid, uct_ib_address_t *ib_addr) +{ + uct_ib_address_pack(gid, lid, uct_ib_iface_address_pack_flags(iface), + ib_addr); +} + +void uct_ib_address_unpack(const uct_ib_address_t *ib_addr, uint16_t *lid, + union ibv_gid *gid) +{ + const void *ptr = ib_addr + 1; + + *lid = 0; + + if (ib_addr->flags & UCT_IB_ADDRESS_FLAG_GID) { + memcpy(gid->raw, ptr, sizeof(gid->raw) * sizeof(uint8_t)); /* uint8_t raw[16]; */ + ucs_assert(ib_addr->flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH); + ucs_assert(!(ib_addr->flags & UCT_IB_ADDRESS_FLAG_LID)); + return; + } + + gid->global.subnet_prefix = UCT_IB_LINK_LOCAL_PREFIX; /* Default prefix */ + gid->global.interface_id = 0; + + if (ib_addr->flags & UCT_IB_ADDRESS_FLAG_LID) { + *lid = *(uint16_t*)ptr; + ptr = UCS_PTR_BYTE_OFFSET(ptr, sizeof(uint16_t)); + } + + if (ib_addr->flags & UCT_IB_ADDRESS_FLAG_IF_ID) { + gid->global.interface_id = *(uint64_t*)ptr; + ptr = UCS_PTR_BYTE_OFFSET(ptr, sizeof(uint64_t)); + } + + if (ib_addr->flags & UCT_IB_ADDRESS_FLAG_SUBNET16) { + gid->global.subnet_prefix = UCT_IB_SITE_LOCAL_PREFIX | + ((uint64_t) *(uint16_t*) ptr << 48); + ptr = UCS_PTR_BYTE_OFFSET(ptr, sizeof(uint16_t)); + ucs_assert(!(ib_addr->flags & UCT_IB_ADDRESS_FLAG_SUBNET64)); + } + + if (ib_addr->flags & UCT_IB_ADDRESS_FLAG_SUBNET64) { + gid->global.subnet_prefix = *(uint64_t*) ptr; + } +} + +const char *uct_ib_address_str(const uct_ib_address_t *ib_addr, char *buf, + size_t max) +{ + union ibv_gid gid; + uint16_t lid; + char *p, *endp; + + uct_ib_address_unpack(ib_addr, &lid, &gid); + + p = buf; + endp = buf + max; + if (lid != 0) { + snprintf(p, endp - p, "lid %d ", lid); + p += strlen(p); + } + inet_ntop(AF_INET6, &gid, p, endp - p); + + return buf; +} + +ucs_status_t uct_ib_iface_get_device_address(uct_iface_h tl_iface, + uct_device_addr_t *dev_addr) +{ + uct_ib_iface_t *iface = ucs_derived_of(tl_iface, uct_ib_iface_t); + + uct_ib_iface_address_pack(iface, &iface->gid, uct_ib_iface_port_attr(iface)->lid, + (void*)dev_addr); + return UCS_OK; +} + +int uct_ib_iface_is_reachable(const uct_iface_h tl_iface, const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uct_ib_iface_t *iface = ucs_derived_of(tl_iface, uct_ib_iface_t); + int is_local_eth = uct_ib_iface_is_roce(iface); + const uct_ib_address_t *ib_addr = (const void*)dev_addr; + union ibv_gid gid; + uint16_t lid; + + uct_ib_address_unpack(ib_addr, &lid, &gid); + + if (!is_local_eth && (ib_addr->flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_IB)) { + /* same subnet prefix */ + return gid.global.subnet_prefix == iface->gid.global.subnet_prefix; + } else if (is_local_eth && (ib_addr->flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH)) { + /* there shouldn't be a lid and the gid flag should be on */ + ucs_assert(ib_addr->flags & UCT_IB_ADDRESS_FLAG_GID); + ucs_assert(!(ib_addr->flags & UCT_IB_ADDRESS_FLAG_LID)); + return 1; + } else { + /* local and remote have different link layers and therefore are unreachable */ + return 0; + } +} + +ucs_status_t uct_ib_iface_create_ah(uct_ib_iface_t *iface, + struct ibv_ah_attr *ah_attr, + struct ibv_ah **ah_p) +{ + return uct_ib_device_create_ah_cached(uct_ib_iface_device(iface), ah_attr, + uct_ib_iface_md(iface)->pd, ah_p); +} + +static ucs_status_t uct_ib_iface_init_pkey(uct_ib_iface_t *iface, + const uct_ib_iface_config_t *config) +{ + uct_ib_device_t *dev = uct_ib_iface_device(iface); + uint16_t pkey_tbl_len = uct_ib_iface_port_attr(iface)->pkey_tbl_len; + int pkey_found = 0; + uint16_t pkey_index, port_pkey, pkey; + + if ((config->pkey_value != UCS_HEXUNITS_AUTO) && + (config->pkey_value > UCT_IB_PKEY_PARTITION_MASK)) { + ucs_error("Requested pkey 0x%x is invalid, should be in the range 0..0x%x", + config->pkey_value, UCT_IB_PKEY_PARTITION_MASK); + return UCS_ERR_INVALID_PARAM; + } + + /* get the user's pkey value and find its index in the port's pkey table */ + for (pkey_index = 0; pkey_index < pkey_tbl_len; ++pkey_index) { + /* get the pkey values from the port's pkeys table */ + if (ibv_query_pkey(dev->ibv_context, iface->config.port_num, pkey_index, + &port_pkey)) + { + ucs_debug("ibv_query_pkey("UCT_IB_IFACE_FMT", index=%d) failed: %m", + UCT_IB_IFACE_ARG(iface), pkey_index); + continue; + } + + pkey = ntohs(port_pkey); + if (!(pkey & UCT_IB_PKEY_MEMBERSHIP_MASK)) { + /* if pkey = 0x0, just skip it w/o debug trace, because 0x0 + * means that there is no real pkey configured at this index */ + if (pkey) { + ucs_trace("skipping send-only pkey[%d]=0x%x on "UCT_IB_IFACE_FMT, + pkey_index, pkey, UCT_IB_IFACE_ARG(iface)); + } + continue; + } + + /* take only the lower 15 bits for the comparison */ + if ((config->pkey_value == UCS_HEXUNITS_AUTO) || + ((pkey & UCT_IB_PKEY_PARTITION_MASK) == config->pkey_value)) { + iface->pkey_index = pkey_index; + iface->pkey_value = pkey; + pkey_found = 1; + break; + } + } + + if (!pkey_found) { + if (config->pkey_value == UCS_HEXUNITS_AUTO) { + ucs_error("There is no valid pkey with full membership on " + UCT_IB_IFACE_FMT, UCT_IB_IFACE_ARG(iface)); + } else { + ucs_error("Unable to find specified pkey 0x%x on "UCT_IB_IFACE_FMT, + config->pkey_value, UCT_IB_IFACE_ARG(iface)); + } + + return UCS_ERR_INVALID_PARAM; + } + + ucs_debug("using pkey[%d] 0x%x on "UCT_IB_IFACE_FMT, iface->pkey_index, + iface->pkey_value, UCT_IB_IFACE_ARG(iface)); + return UCS_OK; +} + +static ucs_status_t uct_ib_iface_init_lmc(uct_ib_iface_t *iface, + const uct_ib_iface_config_t *config) +{ + unsigned i, j, num_path_bits; + unsigned first, last; + uint8_t lmc; + int step; + + if (config->lid_path_bits.count == 0) { + ucs_error("List of path bits must not be empty"); + return UCS_ERR_INVALID_PARAM; + } + + /* count the number of lid_path_bits */ + num_path_bits = 0; + for (i = 0; i < config->lid_path_bits.count; i++) { + num_path_bits += 1 + abs((int)(config->lid_path_bits.ranges[i].first - + config->lid_path_bits.ranges[i].last)); + } + + iface->path_bits = ucs_calloc(1, num_path_bits * sizeof(*iface->path_bits), + "ib_path_bits"); + if (iface->path_bits == NULL) { + return UCS_ERR_NO_MEMORY; + } + + lmc = uct_ib_iface_port_attr(iface)->lmc; + + /* go over the list of values (ranges) for the lid_path_bits and set them */ + iface->path_bits_count = 0; + for (i = 0; i < config->lid_path_bits.count; ++i) { + + first = config->lid_path_bits.ranges[i].first; + last = config->lid_path_bits.ranges[i].last; + + /* range of values or one value */ + if (first < last) { + step = 1; + } else { + step = -1; + } + + /* fill the value/s */ + for (j = first; j != (last + step); j += step) { + if (j >= UCS_BIT(lmc)) { + ucs_debug("Not using value %d for path_bits - must be < 2^lmc (lmc=%d)", + j, lmc); + if (step == 1) { + break; + } else { + continue; + } + } + + ucs_assert(iface->path_bits_count < num_path_bits); + iface->path_bits[iface->path_bits_count] = j; + iface->path_bits_count++; + } + } + + return UCS_OK; +} + +void uct_ib_iface_fill_attr(uct_ib_iface_t *iface, uct_ib_qp_attr_t *attr) +{ + attr->ibv.send_cq = iface->cq[UCT_IB_DIR_TX]; + attr->ibv.recv_cq = iface->cq[UCT_IB_DIR_RX]; + + attr->ibv.srq = attr->srq; + attr->ibv.cap = attr->cap; + attr->ibv.qp_type = (enum ibv_qp_type)attr->qp_type; + attr->ibv.sq_sig_all = attr->sq_sig_all; + +#if HAVE_DECL_IBV_EXP_CREATE_QP + if (!(attr->ibv.comp_mask & IBV_EXP_QP_INIT_ATTR_PD)) { + attr->ibv.comp_mask = IBV_EXP_QP_INIT_ATTR_PD; + attr->ibv.pd = uct_ib_iface_md(iface)->pd; + } +#elif HAVE_DECL_IBV_CREATE_QP_EX + if (!(attr->ibv.comp_mask & IBV_QP_INIT_ATTR_PD)) { + attr->ibv.comp_mask = IBV_QP_INIT_ATTR_PD; + attr->ibv.pd = uct_ib_iface_md(iface)->pd; + } +#endif + + attr->port = iface->config.port_num; + + if (attr->qp_type == IBV_QPT_UD) { + return; + } + + /* MOFED requires this to enable IB spec atomic */ +#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE + if (uct_ib_iface_device(iface)->dev_attr.exp_atomic_cap == + IBV_EXP_ATOMIC_HCA_REPLY_BE) { + attr->ibv.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS; + attr->ibv.exp_create_flags = IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY; + } +#endif +} + +ucs_status_t uct_ib_iface_create_qp(uct_ib_iface_t *iface, + uct_ib_qp_attr_t *attr, + struct ibv_qp **qp_p) +{ + uct_ib_device_t *dev = uct_ib_iface_device(iface); + struct ibv_qp *qp; + + uct_ib_iface_fill_attr(iface, attr); + +#if HAVE_DECL_IBV_EXP_CREATE_QP + qp = ibv_exp_create_qp(dev->ibv_context, &attr->ibv); +#elif HAVE_DECL_IBV_CREATE_QP_EX + qp = ibv_create_qp_ex(dev->ibv_context, &attr->ibv); +#else + qp = ibv_create_qp(uct_ib_iface_md(iface)->pd, &attr->ibv); +#endif + if (qp == NULL) { + ucs_error("iface=%p: failed to create %s QP TX wr:%d sge:%d inl:%d RX wr:%d sge:%d inl %d: %m", + iface, uct_ib_qp_type_str(attr->qp_type), + attr->cap.max_send_wr, attr->cap.max_send_sge, attr->cap.max_inline_data, + attr->cap.max_recv_wr, attr->cap.max_recv_sge, attr->max_inl_recv); + return UCS_ERR_IO_ERROR; + } + + attr->cap = attr->ibv.cap; + *qp_p = qp; + + ucs_debug("iface=%p: created %s QP 0x%x on %s:%d TX wr:%d sge:%d inl:%d RX wr:%d sge:%d inl %d", + iface, uct_ib_qp_type_str(attr->qp_type), qp->qp_num, + uct_ib_device_name(dev), iface->config.port_num, + attr->cap.max_send_wr, attr->cap.max_send_sge, attr->cap.max_inline_data, + attr->cap.max_recv_wr, attr->cap.max_recv_sge, attr->max_inl_recv); + + return UCS_OK; +} + +ucs_status_t uct_ib_verbs_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector, int ignore_overrun, + size_t *inl, struct ibv_cq **cq_p) +{ + struct ibv_cq *cq; +#if HAVE_DECL_IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN + struct ibv_cq_init_attr_ex cq_attr = {}; + + cq_attr.cqe = cqe; + cq_attr.channel = channel; + cq_attr.comp_vector = comp_vector; + if (ignore_overrun) { + cq_attr.comp_mask = IBV_CQ_INIT_ATTR_MASK_FLAGS; + cq_attr.flags = IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN; + } + + cq = ibv_cq_ex_to_cq(ibv_create_cq_ex(context, &cq_attr)); + if (!cq && (errno == ENOSYS)) +#endif + { + *inl = 0; + cq = ibv_create_cq(context, cqe, NULL, channel, comp_vector); + } + + if (!cq) { + ucs_error("ibv_create_cq(cqe=%d) failed: %m", cqe); + return UCS_ERR_IO_ERROR; + } + + *cq_p = cq; + return UCS_OK; +} + +static ucs_status_t uct_ib_iface_create_cq(uct_ib_iface_t *iface, int cq_length, + size_t *inl, int preferred_cpu, + int flags, struct ibv_cq **cq_p) +{ + uct_ib_device_t *dev = uct_ib_iface_device(iface); + ucs_status_t status; +#if HAVE_DECL_IBV_EXP_SETENV && !HAVE_DECL_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE + static const char *cqe_size_env_var = "MLX5_CQE_SIZE"; + const char *cqe_size_env_value; + size_t cqe_size = 64; + size_t cqe_size_min; + char cqe_size_buf[32]; + int env_var_added = 0; + int ret; + + cqe_size_min = (*inl > 32) ? 128 : 64; + cqe_size_env_value = getenv(cqe_size_env_var); + + if (cqe_size_env_value != NULL) { + cqe_size = atol(cqe_size_env_value); + if (cqe_size < cqe_size_min) { + ucs_error("%s is set to %zu, but at least %zu is required (inl: %zu)", + cqe_size_env_var, cqe_size, cqe_size_min, *inl); + return UCS_ERR_INVALID_PARAM; + } + } else { + cqe_size = uct_ib_get_cqe_size(cqe_size_min); + snprintf(cqe_size_buf, sizeof(cqe_size_buf),"%zu", cqe_size); + ucs_debug("%s: setting %s=%s", uct_ib_device_name(dev), cqe_size_env_var, + cqe_size_buf); + ret = ibv_exp_setenv(dev->ibv_context, cqe_size_env_var, cqe_size_buf, 1); + if (ret) { + ucs_error("ibv_exp_setenv(%s=%s) failed: %m", cqe_size_env_var, + cqe_size_buf); + return UCS_ERR_INVALID_PARAM; + } + + env_var_added = 1; + } +#endif + status = iface->ops->create_cq(dev->ibv_context, cq_length, + iface->comp_channel, preferred_cpu, + flags & UCT_IB_CQ_IGNORE_OVERRUN, inl, cq_p); + if (status != UCS_OK) { + goto out_unsetenv; + } + + status = UCS_OK; + +out_unsetenv: +#if HAVE_DECL_IBV_EXP_SETENV && !HAVE_DECL_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE + *inl = cqe_size / 2; + if (env_var_added) { + /* if we created a new environment variable, remove it */ + ret = ibv_exp_unsetenv(dev->ibv_context, cqe_size_env_var); + if (ret) { + ucs_warn("unsetenv(%s) failed: %m", cqe_size_env_var); + } + } +#endif + return status; +} + + +static ucs_status_t uct_ib_iface_set_moderation(struct ibv_cq *cq, + unsigned count, double period_usec) +{ +#if HAVE_DECL_IBV_EXP_CQ_MODERATION + unsigned period = (unsigned)(period_usec * UCS_USEC_PER_SEC); + + if (count > UINT16_MAX) { + ucs_error("CQ moderation count is too high: %u, max value: %u", count, UINT16_MAX); + return UCS_ERR_INVALID_PARAM; + } else if (count == 0) { + /* in case if count value is 0 (unchanged default value) - set it to maximum + * possible value */ + count = UINT16_MAX; + } + + if (period > UINT16_MAX) { + ucs_error("CQ moderation period is too high: %u, max value: %uus", period, UINT16_MAX); + return UCS_ERR_INVALID_PARAM; + } else if (period == 0) { + /* in case if count value is 0 (unchanged default value) - set it to maximum + * possible value, the same behavior as counter */ + period = UINT16_MAX; + } + + if ((count < UINT16_MAX) || (period < UINT16_MAX)) { + struct ibv_exp_cq_attr cq_attr = { + .comp_mask = IBV_EXP_CQ_ATTR_MODERATION, + .moderation.cq_count = (uint16_t)(count), + .moderation.cq_period = (uint16_t)(period), + .cq_cap_flags = 0 + }; + if (ibv_exp_modify_cq(cq, &cq_attr, IBV_EXP_CQ_MODERATION)) { + ucs_error("ibv_exp_modify_cq(count=%d, period=%d) failed: %m", count, period); + return UCS_ERR_IO_ERROR; + } + } +#endif /* HAVE_DECL_IBV_EXP_CQ_MODERATION */ + + return UCS_OK; +} + +UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md, + uct_worker_h worker, const uct_iface_params_t *params, + const uct_ib_iface_config_t *config, + const uct_ib_iface_init_attr_t *init_attr) +{ + uct_ib_md_t *ib_md = ucs_derived_of(md, uct_ib_md_t); + uct_ib_device_t *dev = &ib_md->dev; + size_t rx_headroom = (params->field_mask & + UCT_IFACE_PARAM_FIELD_RX_HEADROOM) ? + params->rx_headroom : 0; + ucs_cpu_set_t cpu_mask; + int preferred_cpu; + ucs_status_t status; + uint8_t port_num; + size_t inl; + + if (!(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE)) { + return UCS_ERR_UNSUPPORTED; + } + + if (params->field_mask & UCT_IFACE_PARAM_FIELD_CPU_MASK) { + cpu_mask = params->cpu_mask; + } else { + memset(&cpu_mask, 0, sizeof(cpu_mask)); + } + + preferred_cpu = ucs_cpu_set_find_lcs(&cpu_mask); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker, + params, &config->super + UCS_STATS_ARG(((params->field_mask & + UCT_IFACE_PARAM_FIELD_STATS_ROOT) && + (params->stats_root != NULL)) ? + params->stats_root : + dev->stats) + UCS_STATS_ARG(params->mode.device.dev_name)); + + status = uct_ib_device_find_port(dev, params->mode.device.dev_name, &port_num); + if (status != UCS_OK) { + goto err; + } + + self->ops = ops; + + self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) + + ucs_max(sizeof(uct_recv_desc_t) + + rx_headroom, + init_attr->rx_priv_len + + init_attr->rx_hdr_len); + self->config.rx_hdr_offset = self->config.rx_payload_offset - + init_attr->rx_hdr_len; + self->config.rx_headroom_offset = self->config.rx_payload_offset - + rx_headroom; + self->config.seg_size = init_attr->seg_size; + self->config.tx_max_poll = config->tx.max_poll; + self->config.rx_max_poll = config->rx.max_poll; + self->config.rx_max_batch = ucs_min(config->rx.max_batch, + config->rx.queue_len / 4); + self->config.port_num = port_num; + self->config.sl = config->sl; + self->config.hop_limit = config->hop_limit; + self->release_desc.cb = uct_ib_iface_release_desc; + self->config.enable_res_domain = config->enable_res_domain; + self->config.qp_type = init_attr->qp_type; + + if (ucs_derived_of(worker, uct_priv_worker_t)->thread_mode == UCS_THREAD_MODE_MULTI) { + ucs_error("IB transports do not support multi-threaded worker"); + return UCS_ERR_INVALID_PARAM; + } + + status = uct_ib_iface_init_pkey(self, config); + if (status != UCS_OK) { + goto err; + } + + status = uct_ib_device_select_gid_index(dev, self->config.port_num, + ib_md->config.gid_index, + &self->config.gid_index); + if (status != UCS_OK) { + goto err; + } + + status = uct_ib_device_query_gid(dev, self->config.port_num, + self->config.gid_index, &self->gid, + &self->is_roce_v2); + if (status != UCS_OK) { + goto err; + } + + if (config->traffic_class == UCS_ULUNITS_AUTO) { + self->config.traffic_class = self->is_roce_v2 ? + UCT_IB_DEFAULT_ROCEV2_DSCP : 0; + } else { + self->config.traffic_class = config->traffic_class; + } + + status = uct_ib_iface_init_lmc(self, config); + if (status != UCS_OK) { + goto err; + } + + self->comp_channel = ibv_create_comp_channel(dev->ibv_context); + if (self->comp_channel == NULL) { + ucs_error("ibv_create_comp_channel() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_cleanup; + } + + status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto err_destroy_comp_channel; + } + + inl = config->rx.inl; + status = uct_ib_iface_create_cq(self, init_attr->tx_cq_len, &inl, + preferred_cpu, init_attr->flags, + &self->cq[UCT_IB_DIR_TX]); + if (status != UCS_OK) { + goto err_destroy_comp_channel; + } + ucs_assert_always(inl <= UINT8_MAX); + self->config.max_inl_resp = inl; + + status = uct_ib_iface_set_moderation(self->cq[UCT_IB_DIR_TX], + config->tx.cq_moderation_count, + config->tx.cq_moderation_period); + if (status != UCS_OK) { + goto err_destroy_send_cq; + } + + inl = config->rx.inl; + status = uct_ib_iface_create_cq(self, init_attr->rx_cq_len, &inl, + preferred_cpu, init_attr->flags, + &self->cq[UCT_IB_DIR_RX]); + if (status != UCS_OK) { + goto err_destroy_send_cq; + } + + status = uct_ib_iface_set_moderation(self->cq[UCT_IB_DIR_RX], + config->rx.cq_moderation_count, + config->rx.cq_moderation_period); + if (status != UCS_OK) { + goto err_destroy_recv_cq; + } + + /* Address scope and size */ + if (uct_ib_iface_is_roce(self) || config->is_global || + /* check ADDR_TYPE for backward compatibility */ + (config->addr_type == UCT_IB_ADDRESS_TYPE_SITE_LOCAL) || + (config->addr_type == UCT_IB_ADDRESS_TYPE_GLOBAL)) { + self->config.force_global_addr = 1; + } else { + self->config.force_global_addr = 0; + } + + self->addr_size = uct_ib_iface_address_size(self); + + ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d", + self->config.rx_headroom_offset, self->config.rx_payload_offset, + self->config.rx_hdr_offset, self->config.seg_size); + + return UCS_OK; + +err_destroy_recv_cq: + ibv_destroy_cq(self->cq[UCT_IB_DIR_RX]); +err_destroy_send_cq: + ibv_destroy_cq(self->cq[UCT_IB_DIR_TX]); +err_destroy_comp_channel: + ibv_destroy_comp_channel(self->comp_channel); +err_cleanup: + ucs_free(self->path_bits); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ib_iface_t) +{ + int ret; + + ret = ibv_destroy_cq(self->cq[UCT_IB_DIR_RX]); + if (ret != 0) { + ucs_warn("ibv_destroy_cq(recv_cq) returned %d: %m", ret); + } + + ret = ibv_destroy_cq(self->cq[UCT_IB_DIR_TX]); + if (ret != 0) { + ucs_warn("ibv_destroy_cq(send_cq) returned %d: %m", ret); + } + + ret = ibv_destroy_comp_channel(self->comp_channel); + if (ret != 0) { + ucs_warn("ibv_destroy_comp_channel(comp_channel) returned %d: %m", ret); + } + + ucs_free(self->path_bits); +} + +UCS_CLASS_DEFINE(uct_ib_iface_t, uct_base_iface_t); + +int uct_ib_iface_prepare_rx_wrs(uct_ib_iface_t *iface, ucs_mpool_t *mp, + uct_ib_recv_wr_t *wrs, unsigned n) +{ + uct_ib_iface_recv_desc_t *desc; + unsigned count; + + count = 0; + while (count < n) { + UCT_TL_IFACE_GET_RX_DESC(&iface->super, mp, desc, break); + wrs[count].sg.addr = (uintptr_t)uct_ib_iface_recv_desc_hdr(iface, desc); + wrs[count].sg.length = iface->config.rx_payload_offset + iface->config.seg_size; + wrs[count].sg.lkey = desc->lkey; + wrs[count].ibwr.num_sge = 1; + wrs[count].ibwr.wr_id = (uintptr_t)desc; + wrs[count].ibwr.sg_list = &wrs[count].sg; + wrs[count].ibwr.next = &wrs[count + 1].ibwr; + ++count; + } + + if (count > 0) { + wrs[count - 1].ibwr.next = NULL; + } + + return count; +} + +static ucs_status_t uct_ib_iface_get_numa_latency(uct_ib_iface_t *iface, + double *latency) +{ + uct_ib_device_t *dev = uct_ib_iface_device(iface); + uct_ib_md_t *md = uct_ib_iface_md(iface); + ucs_sys_cpuset_t temp_cpu_mask, process_affinity; +#if HAVE_NUMA + int distance, min_cpu_distance; + int cpu, num_cpus; +#endif + int ret; + + if (!md->config.prefer_nearest_device) { + *latency = 0; + return UCS_OK; + } + + ret = ucs_sys_getaffinity(&process_affinity); + if (ret) { + ucs_error("sched_getaffinity() failed: %m"); + return UCS_ERR_INVALID_PARAM; + } + +#if HAVE_NUMA + /* Try to estimate the extra device latency according to NUMA distance */ + if (dev->numa_node != -1) { + min_cpu_distance = INT_MAX; + num_cpus = ucs_min(CPU_SETSIZE, numa_num_configured_cpus()); + for (cpu = 0; cpu < num_cpus; ++cpu) { + if (!CPU_ISSET(cpu, &process_affinity)) { + continue; + } + distance = numa_distance(ucs_numa_node_of_cpu(cpu), dev->numa_node); + if (distance >= UCS_NUMA_MIN_DISTANCE) { + min_cpu_distance = ucs_min(min_cpu_distance, distance); + } + } + + if (min_cpu_distance != INT_MAX) { + /* set the extra latency to (numa_distance - 10) * 20nsec */ + *latency = (min_cpu_distance - UCS_NUMA_MIN_DISTANCE) * 20e-9; + return UCS_OK; + } + } +#endif + + /* Estimate the extra device latency according to its local CPUs mask */ + CPU_AND(&temp_cpu_mask, &dev->local_cpus, &process_affinity); + if (CPU_EQUAL(&process_affinity, &temp_cpu_mask)) { + *latency = 0; + } else { + *latency = 200e-9; + } + return UCS_OK; +} + +ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len, + uct_iface_attr_t *iface_attr) +{ + uct_ib_device_t *dev = uct_ib_iface_device(iface); + uct_ib_md_t *md = uct_ib_iface_md(iface); + static const unsigned ib_port_widths[] = { + [0] = 1, + [1] = 4, + [2] = 8, + [3] = 12, + [4] = 16 + }; + uint8_t active_width, active_speed, active_mtu, width_idx; + double encoding, signal_rate, wire_speed; + size_t mtu, width, extra_pkt_len; + ucs_status_t status; + double numa_latency; + + uct_base_iface_query(&iface->super, iface_attr); + + active_width = uct_ib_iface_port_attr(iface)->active_width; + active_speed = uct_ib_iface_port_attr(iface)->active_speed; + active_mtu = uct_ib_iface_port_attr(iface)->active_mtu; + + /* Get active width */ + width_idx = ucs_ilog2(active_width); + if (!ucs_is_pow2(active_width) || + (active_width < 1) || (width_idx > 4)) + { + ucs_error("Invalid active_width on %s:%d: %d", + UCT_IB_IFACE_ARG(iface), active_width); + return UCS_ERR_IO_ERROR; + } + + iface_attr->device_addr_len = iface->addr_size; + + switch (active_speed) { + case 1: /* SDR */ + iface_attr->latency.overhead = 5000e-9; + signal_rate = 2.5e9; + encoding = 8.0/10.0; + break; + case 2: /* DDR */ + iface_attr->latency.overhead = 2500e-9; + signal_rate = 5.0e9; + encoding = 8.0/10.0; + break; + case 4: + iface_attr->latency.overhead = 1300e-9; + if (uct_ib_iface_is_roce(iface)) { + /* 10/40g Eth */ + signal_rate = 10.3125e9; + encoding = 64.0/66.0; + } else { + /* QDR */ + signal_rate = 10.0e9; + encoding = 8.0/10.0; + } + break; + case 8: /* FDR10 */ + iface_attr->latency.overhead = 700e-9; + signal_rate = 10.3125e9; + encoding = 64.0/66.0; + break; + case 16: /* FDR */ + iface_attr->latency.overhead = 700e-9; + signal_rate = 14.0625e9; + encoding = 64.0/66.0; + break; + case 32: /* EDR / 100g Eth */ + iface_attr->latency.overhead = 600e-9; + signal_rate = 25.78125e9; + encoding = 64.0/66.0; + break; + case 64: /* 50g Eth */ + iface_attr->latency.overhead = 600e-9; + signal_rate = 25.78125e9 * 2; + encoding = 64.0/66.0; + break; + default: + ucs_error("Invalid active_speed on %s:%d: %d", + UCT_IB_IFACE_ARG(iface), active_speed); + return UCS_ERR_IO_ERROR; + } + + status = uct_ib_iface_get_numa_latency(iface, &numa_latency); + if (status != UCS_OK) { + return status; + } + + iface_attr->latency.overhead += numa_latency; + iface_attr->latency.growth = 0; + + /* Wire speed calculation: Width * SignalRate * Encoding */ + width = ib_port_widths[width_idx]; + wire_speed = (width * signal_rate * encoding) / 8.0; + + /* Calculate packet overhead */ + mtu = ucs_min(uct_ib_mtu_value((enum ibv_mtu)active_mtu), + iface->config.seg_size); + + extra_pkt_len = UCT_IB_BTH_LEN + xport_hdr_len + UCT_IB_ICRC_LEN + UCT_IB_VCRC_LEN + UCT_IB_DELIM_LEN; + + if (uct_ib_iface_is_roce(iface)) { + extra_pkt_len += UCT_IB_GRH_LEN + UCT_IB_ROCE_LEN; + iface_attr->latency.overhead += 200e-9; + } else { + /* TODO check if UCT_IB_DELIM_LEN is present in RoCE as well */ + extra_pkt_len += UCT_IB_LRH_LEN; + } + + iface_attr->bandwidth.shared = ucs_min((wire_speed * mtu) / (mtu + extra_pkt_len), md->pci_bw); + iface_attr->bandwidth.dedicated = 0; + iface_attr->priority = uct_ib_device_spec(dev)->priority; + + return UCS_OK; +} + +ucs_status_t uct_ib_iface_event_fd_get(uct_iface_h tl_iface, int *fd_p) +{ + uct_ib_iface_t *iface = ucs_derived_of(tl_iface, uct_ib_iface_t); + *fd_p = iface->comp_channel->fd; + return UCS_OK; +} + +ucs_status_t uct_ib_iface_pre_arm(uct_ib_iface_t *iface) +{ + int res, send_cq_count, recv_cq_count; + struct ibv_cq *cq; + void *cq_context; + + send_cq_count = 0; + recv_cq_count = 0; + do { + res = ibv_get_cq_event(iface->comp_channel, &cq, &cq_context); + if (0 == res) { + if (iface->cq[UCT_IB_DIR_TX] == cq) { + iface->ops->event_cq(iface, UCT_IB_DIR_TX); + ++send_cq_count; + } + if (iface->cq[UCT_IB_DIR_RX] == cq) { + iface->ops->event_cq(iface, UCT_IB_DIR_RX); + ++recv_cq_count; + } + } + } while (res == 0); + + if (errno != EAGAIN) { + return UCS_ERR_IO_ERROR; + } + + if (send_cq_count > 0) { + ibv_ack_cq_events(iface->cq[UCT_IB_DIR_TX], send_cq_count); + } + + if (recv_cq_count > 0) { + ibv_ack_cq_events(iface->cq[UCT_IB_DIR_RX], recv_cq_count); + } + + /* avoid re-arming the interface if any events exists */ + if ((send_cq_count > 0) || (recv_cq_count > 0)) { + ucs_trace("arm_cq: got %d send and %d recv events, returning BUSY", + send_cq_count, recv_cq_count); + return UCS_ERR_BUSY; + } + + return UCS_OK; +} + +ucs_status_t uct_ib_iface_arm_cq(uct_ib_iface_t *iface, + uct_ib_dir_t dir, + int solicited_only) +{ + int ret; + + ret = ibv_req_notify_cq(iface->cq[dir], solicited_only); + if (ret != 0) { + ucs_error("ibv_req_notify_cq("UCT_IB_IFACE_FMT", %d, sol=%d) failed: %m", + UCT_IB_IFACE_ARG(iface), dir, solicited_only); + return UCS_ERR_IO_ERROR; + } + return UCS_OK; +} diff --git a/src/uct/ib/base/ib_iface.h b/src/uct/ib/base/ib_iface.h new file mode 100644 index 0000000..ea9c80e --- /dev/null +++ b/src/uct/ib/base/ib_iface.h @@ -0,0 +1,593 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_IFACE_H +#define UCT_IB_IFACE_H + +#include "ib_md.h" + +#include +#include +#include +#include +#include +#include + +#define UCT_IB_MAX_IOV 8UL +#define UCT_IB_IFACE_NULL_RES_DOMAIN_KEY 0u +#define UCT_IB_MAX_ATOMIC_SIZE sizeof(uint64_t) + + +/* Forward declarations */ +typedef struct uct_ib_iface_config uct_ib_iface_config_t; +typedef struct uct_ib_iface_ops uct_ib_iface_ops_t; +typedef struct uct_ib_iface uct_ib_iface_t; + + +/** + * IB port/path MTU. + */ +typedef enum uct_ib_mtu { + UCT_IB_MTU_DEFAULT = 0, + UCT_IB_MTU_512 = 1, + UCT_IB_MTU_1024 = 2, + UCT_IB_MTU_2048 = 3, + UCT_IB_MTU_4096 = 4, + UCT_IB_MTU_LAST +} uct_ib_mtu_t; + + +/** + * Traffic direction. + */ +typedef enum { + UCT_IB_DIR_RX, + UCT_IB_DIR_TX, + UCT_IB_DIR_NUM +} uct_ib_dir_t; + +enum { + UCT_IB_QPT_UNKNOWN, +#if HAVE_DC_EXP + UCT_IB_QPT_DCI = IBV_EXP_QPT_DC_INI, +#elif HAVE_DC_DV + UCT_IB_QPT_DCI = IBV_QPT_DRIVER, +#endif +}; + + +/** + * IB address packing flags + */ +enum { + UCT_IB_ADDRESS_PACK_FLAG_ETH = UCS_BIT(0), + UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID = UCS_BIT(1), + UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX = UCS_BIT(2) +}; + + +struct uct_ib_iface_config { + uct_iface_config_t super; + + size_t seg_size; /* Maximal size of copy-out sends */ + + struct { + unsigned queue_len; /* Queue length */ + unsigned max_batch; /* How many fragments can be batched to one post send */ + unsigned max_poll; /* How many wcs can be picked when polling tx cq */ + size_t min_inline; /* Inline space to reserve for sends */ + size_t inl_resp; /* Inline space to reserve for responses */ + unsigned min_sge; /* How many SG entries to support */ + uct_iface_mpool_config_t mp; + + /* Event moderation parameters */ + unsigned cq_moderation_count; + double cq_moderation_period; + } tx; + + struct { + unsigned queue_len; /* Queue length */ + unsigned max_batch; /* How many buffers can be batched to one post receive */ + unsigned max_poll; /* How many wcs can be picked when polling rx cq */ + size_t inl; /* Inline space to reserve in CQ/QP */ + uct_iface_mpool_config_t mp; + + /* Event moderation parameters */ + unsigned cq_moderation_count; + double cq_moderation_period; + } rx; + + /* Change the address type */ + int addr_type; + + /* Force global routing */ + int is_global; + + /* IB SL to use */ + unsigned sl; + + /* IB Traffic Class to use */ + unsigned long traffic_class; + + /* IB hop limit / TTL */ + unsigned hop_limit; + + /* Ranges of path bits */ + UCS_CONFIG_ARRAY_FIELD(ucs_range_spec_t, ranges) lid_path_bits; + + /* IB PKEY to use */ + unsigned pkey_value; + + /* Multiple resource domains */ + int enable_res_domain; +}; + + +typedef struct uct_ib_qp_attr { + int qp_type; + struct ibv_qp_cap cap; + int port; + struct ibv_srq *srq; + uint32_t srq_num; + unsigned sq_sig_all; + unsigned max_inl_recv; + unsigned max_inl_resp; +#if HAVE_DECL_IBV_EXP_CREATE_QP + struct ibv_exp_qp_init_attr ibv; +#elif HAVE_DECL_IBV_CREATE_QP_EX + struct ibv_qp_init_attr_ex ibv; +#else + struct ibv_qp_init_attr ibv; +#endif +} uct_ib_qp_attr_t; + + +typedef ucs_status_t (*uct_ib_iface_create_cq_func_t)(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector, int ignore_overrun, + size_t *inl, struct ibv_cq **cq_p); + +typedef ucs_status_t (*uct_ib_iface_arm_cq_func_t)(uct_ib_iface_t *iface, + uct_ib_dir_t dir, + int solicited_only); + +typedef void (*uct_ib_iface_event_cq_func_t)(uct_ib_iface_t *iface, + uct_ib_dir_t dir); + +typedef void (*uct_ib_iface_handle_failure_func_t)(uct_ib_iface_t *iface, void *arg, + ucs_status_t status); + +typedef ucs_status_t (*uct_ib_iface_set_ep_failed_func_t)(uct_ib_iface_t *iface, uct_ep_h ep, + ucs_status_t status); + + +struct uct_ib_iface_ops { + uct_iface_ops_t super; + uct_ib_iface_create_cq_func_t create_cq; + uct_ib_iface_arm_cq_func_t arm_cq; + uct_ib_iface_event_cq_func_t event_cq; + uct_ib_iface_handle_failure_func_t handle_failure; + uct_ib_iface_set_ep_failed_func_t set_ep_failed; +}; + + +struct uct_ib_iface { + uct_base_iface_t super; + + struct ibv_cq *cq[UCT_IB_DIR_NUM]; + struct ibv_comp_channel *comp_channel; + uct_recv_desc_t release_desc; + + uint8_t *path_bits; + unsigned path_bits_count; + uint16_t pkey_index; + uint16_t pkey_value; + uint8_t addr_size; + union ibv_gid gid; + int is_roce_v2; + + struct { + unsigned rx_payload_offset; /* offset from desc to payload */ + unsigned rx_hdr_offset; /* offset from desc to network header */ + unsigned rx_headroom_offset; /* offset from desc to user headroom */ + unsigned rx_max_batch; + unsigned rx_max_poll; + unsigned tx_max_poll; + unsigned seg_size; + uint8_t max_inl_resp; + uint8_t port_num; + uint8_t sl; + uint8_t traffic_class; + uint8_t hop_limit; + uint8_t gid_index; /* IB GID index to use */ + uint8_t enable_res_domain; /* Disable multiple resource domains */ + uint8_t qp_type; + uint8_t force_global_addr; + size_t max_iov; /* Maximum buffers in IOV array */ + } config; + + uct_ib_iface_ops_t *ops; +}; + + +typedef struct uct_ib_fence_info { + uint16_t fence_beat; /* 16bit is enough because if it wraps around, + * it means the older ops are already completed + * because QP size is less than 64k */ +} uct_ib_fence_info_t; + + +enum { + UCT_IB_CQ_IGNORE_OVERRUN = UCS_BIT(0), + UCT_IB_TM_SUPPORTED = UCS_BIT(1) +}; + +typedef struct uct_ib_iface_init_attr { + unsigned rx_priv_len; /* Length of transport private data to reserve */ + unsigned rx_hdr_len; /* Length of transport network header */ + unsigned tx_cq_len; /* Send CQ length */ + unsigned rx_cq_len; /* Receive CQ length */ + size_t seg_size; /* Transport segment size */ + unsigned fc_req_size; /* Flow control request size */ + int qp_type; /* IB QP type */ + int flags; /* Various flags (see enum) */ +} uct_ib_iface_init_attr_t; + +UCS_CLASS_DECLARE(uct_ib_iface_t, uct_ib_iface_ops_t*, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_ib_iface_config_t*, + const uct_ib_iface_init_attr_t*); + +/* + * The offset to the payload is the maximum between user-requested headroom + * and transport-specific data/header. When the active message callback is invoked, + * it gets a pointer to the beginning of the headroom. + * The headroom can be either smaller (1) or larger (2) than the transport data. + * + * (1) + * + * + * | + * | + * uct_recv_desc_t | + * | | + * | am_callback/tag_unexp_callback + * | | + * +------+------+---+-----------+---------+ + * | LKey | ??? | D | Head Room | Payload | + * +------+------+---+--+--------+---------+ + * | LKey | TL data | TL hdr | Payload | + * +------+-------------+--------+---------+ + * | + * post_receive + * + * (2) + * am_callback/tag_unexp_callback + * | + * +------+---+------------------+---------+ + * | LKey | D | Head Room | Payload | + * +------+---+-----+---+--------+---------+ + * | LKey | TL data | ? | TL hdr | Payload | + * +------+---------+---+--------+---------+ + * | + * post_receive + * + * <--- rx_headroom --> + * <------- rx_payload_offset ---> + * <--- rx_hdr_offset --> + * + */ +typedef struct uct_ib_iface_recv_desc { + uint32_t lkey; +} UCS_S_PACKED uct_ib_iface_recv_desc_t; + + + +extern ucs_config_field_t uct_ib_iface_config_table[]; +extern const char *uct_ib_mtu_values[]; + + +/** + * Create memory pool of receive descriptors. + */ +ucs_status_t uct_ib_iface_recv_mpool_init(uct_ib_iface_t *iface, + const uct_ib_iface_config_t *config, + const char *name, ucs_mpool_t *mp); + +void uct_ib_iface_release_desc(uct_recv_desc_t *self, void *desc); + + +static UCS_F_ALWAYS_INLINE void +uct_ib_iface_invoke_am_desc(uct_ib_iface_t *iface, uint8_t am_id, void *data, + unsigned length, uct_ib_iface_recv_desc_t *ib_desc) +{ + void *desc = (char*)ib_desc + iface->config.rx_headroom_offset; + ucs_status_t status; + + status = uct_iface_invoke_am(&iface->super, am_id, data, length, + UCT_CB_PARAM_FLAG_DESC); + if (status == UCS_OK) { + ucs_mpool_put_inline(ib_desc); + } else { + uct_recv_desc(desc) = &iface->release_desc; + } +} + + +/** + * @return Whether the port used by this interface is RoCE + */ +int uct_ib_iface_is_roce(uct_ib_iface_t *iface); + + +/** + * @return Whether the port used by this interface is IB + */ +int uct_ib_iface_is_ib(uct_ib_iface_t *iface); + + +/** + * Get the expected size of IB packed address. + * + * @param [in] gid GID address to pack. + * @param [in] pack_flags Packing flags, UCT_IB_ADDRESS_PACK_FLAG_xx. + * + * @return IB address size of the given link scope. + */ +size_t uct_ib_address_size(const union ibv_gid *gid, unsigned pack_flags); + + +/** + * @return IB address size of the given iface. + */ +size_t uct_ib_iface_address_size(uct_ib_iface_t *iface); + + +/** + * Pack IB address. + * + * @param [in] gid GID address to pack. + * @param [in] lid LID address to pack. + * @param [in] pack_flags Packing flags, UCT_IB_ADDRESS_PACK_FLAG_xx. + * @param [in/out] ib_addr Filled with packed ib address. Size of the structure + * must be at least what @ref uct_ib_address_size() + * returns for the given scope. + */ +void uct_ib_address_pack(const union ibv_gid *gid, uint16_t lid, + unsigned pack_flags, uct_ib_address_t *ib_addr); + + +/** + * Pack the IB address of the given iface. + * + * @param [in] iface Iface whose IB address to pack. + * @param [in] gid GID address to pack. + * @param [in] lid LID address to pack. + * @param [in/out] ib_addr Filled with packed ib address. Size of the structure + * must be at least what @ref uct_ib_address_size() + * returns for the given scope. + */ +void uct_ib_iface_address_pack(uct_ib_iface_t *iface, const union ibv_gid *gid, + uint16_t lid, uct_ib_address_t *ib_addr); + + +/** + * Unpack IB address. + * + * @param [in] ib_addr IB address to unpack. + * @param [out] lid Filled with address LID, or 0 if not present. + * @param [out] gid Filled with address GID, or 0 if not present. + */ +void uct_ib_address_unpack(const uct_ib_address_t *ib_addr, uint16_t *lid, + union ibv_gid *gid); + + +/** + * Convert IB address to a human-readable string. + */ +const char *uct_ib_address_str(const uct_ib_address_t *ib_addr, char *buf, + size_t max); + +ucs_status_t uct_ib_iface_get_device_address(uct_iface_h tl_iface, + uct_device_addr_t *dev_addr); + +int uct_ib_iface_is_reachable(const uct_iface_h tl_iface, const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr); + +/* + * @param xport_hdr_len How many bytes this transport adds on top of IB header (LRH+BTH+iCRC+vCRC) + */ +ucs_status_t uct_ib_iface_query(uct_ib_iface_t *iface, size_t xport_hdr_len, + uct_iface_attr_t *iface_attr); + +static inline uct_ib_md_t* uct_ib_iface_md(uct_ib_iface_t *iface) +{ + return ucs_derived_of(iface->super.md, uct_ib_md_t); +} + +static inline uct_ib_device_t* uct_ib_iface_device(uct_ib_iface_t *iface) +{ + return &uct_ib_iface_md(iface)->dev; +} + +static inline struct ibv_port_attr* uct_ib_iface_port_attr(uct_ib_iface_t *iface) +{ + return uct_ib_device_port_attr(uct_ib_iface_device(iface), iface->config.port_num); +} + +static inline void* uct_ib_iface_recv_desc_hdr(uct_ib_iface_t *iface, + uct_ib_iface_recv_desc_t *desc) +{ + return (void*)((char *)desc + iface->config.rx_hdr_offset); +} + +typedef struct uct_ib_recv_wr { + struct ibv_recv_wr ibwr; + struct ibv_sge sg; +} uct_ib_recv_wr_t; + +/** + * prepare a list of n work requests that can be passed to + * ibv_post_recv() + * + * @return number of prepared wrs + */ +int uct_ib_iface_prepare_rx_wrs(uct_ib_iface_t *iface, ucs_mpool_t *mp, + uct_ib_recv_wr_t *wrs, unsigned n); + +ucs_status_t uct_ib_iface_create_ah(uct_ib_iface_t *iface, + struct ibv_ah_attr *ah_attr, + struct ibv_ah **ah_p); + +ucs_status_t uct_ib_iface_pre_arm(uct_ib_iface_t *iface); + +ucs_status_t uct_ib_iface_event_fd_get(uct_iface_h iface, int *fd_p); + +ucs_status_t uct_ib_iface_arm_cq(uct_ib_iface_t *iface, + uct_ib_dir_t dir, + int solicited_only); + +ucs_status_t uct_ib_verbs_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector, int ignore_overrun, + size_t *inl, struct ibv_cq **cq_p); + +ucs_status_t uct_ib_iface_create_qp(uct_ib_iface_t *iface, + uct_ib_qp_attr_t *attr, + struct ibv_qp **qp_p); + +void uct_ib_iface_fill_attr(uct_ib_iface_t *iface, + uct_ib_qp_attr_t *attr); + + +#define UCT_IB_IFACE_FMT \ + "%s:%d" +#define UCT_IB_IFACE_ARG(_iface) \ + uct_ib_device_name(uct_ib_iface_device(_iface)), (_iface)->config.port_num + + +#define UCT_IB_IFACE_VERBS_COMPLETION_ERR(_type, _iface, _i, _wc) \ + ucs_fatal("%s completion[%d] with error on %s/%p: %s, vendor_err 0x%x wr_id 0x%lx", \ + _type, _i, uct_ib_device_name(uct_ib_iface_device(_iface)), _iface, \ + uct_ib_wc_status_str(_wc[i].status), _wc[i].vendor_err, \ + _wc[i].wr_id); + +#define UCT_IB_IFACE_VERBS_FOREACH_RXWQE(_iface, _i, _hdr, _wc, _wc_count) \ + for (_i = 0; _i < _wc_count && ({ \ + if (ucs_unlikely(_wc[i].status != IBV_WC_SUCCESS)) { \ + UCT_IB_IFACE_VERBS_COMPLETION_ERR("receive", _iface, _i, _wc); \ + } \ + _hdr = (typeof(_hdr))uct_ib_iface_recv_desc_hdr(_iface, \ + (uct_ib_iface_recv_desc_t *)(uintptr_t)_wc[i].wr_id); \ + VALGRIND_MAKE_MEM_DEFINED(_hdr, _wc[i].byte_len); \ + 1; }); ++_i) + +#define UCT_IB_MAX_ZCOPY_LOG_SGE(_iface) \ + (uct_ib_iface_device(_iface)->max_zcopy_log_sge) + +/** + * Fill ibv_sge data structure by data provided in uct_iov_t + * The function avoids copying IOVs with zero length + * + * @return Number of elements in sge[] + */ +static UCS_F_ALWAYS_INLINE +size_t uct_ib_verbs_sge_fill_iov(struct ibv_sge *sge, const uct_iov_t *iov, + size_t iovcnt) +{ + size_t iov_it, sge_it = 0; + + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + sge[sge_it].length = uct_iov_get_length(&iov[iov_it]); + if (sge[sge_it].length > 0) { + sge[sge_it].addr = (uintptr_t)(iov[iov_it].buffer); + } else { + continue; /* to avoid zero length elements in sge */ + } + + if (iov[sge_it].memh == UCT_MEM_HANDLE_NULL) { + sge[sge_it].lkey = 0; + } else { + sge[sge_it].lkey = ((uct_ib_mem_t *)(iov[iov_it].memh))->lkey; + } + ++sge_it; + } + + return sge_it; +} + + +static UCS_F_ALWAYS_INLINE +size_t uct_ib_iface_get_max_iov(uct_ib_iface_t *iface) +{ + return iface->config.max_iov; +} + + +static UCS_F_ALWAYS_INLINE +void uct_ib_iface_set_max_iov(uct_ib_iface_t *iface, size_t max_iov) +{ + size_t min_iov_requested; + + ucs_assert((ssize_t)max_iov > 0); + + min_iov_requested = ucs_max(max_iov, 1UL); /* max_iov mustn't be 0 */ + iface->config.max_iov = ucs_min(UCT_IB_MAX_IOV, min_iov_requested); +} + + +static UCS_F_ALWAYS_INLINE +void uct_ib_iface_fill_ah_attr_from_gid_lid(uct_ib_iface_t *iface, uint16_t lid, + const union ibv_gid *gid, + struct ibv_ah_attr *ah_attr) +{ + memset(ah_attr, 0, sizeof(*ah_attr)); + + ah_attr->sl = iface->config.sl; + ah_attr->src_path_bits = iface->path_bits[0]; + ah_attr->dlid = lid | iface->path_bits[0]; + ah_attr->port_num = iface->config.port_num; + ah_attr->grh.traffic_class = iface->config.traffic_class; + + if (iface->config.force_global_addr || + (iface->gid.global.subnet_prefix != gid->global.subnet_prefix)) { + ucs_assert_always(gid->global.interface_id != 0); + ah_attr->is_global = 1; + ah_attr->grh.dgid = *gid; + ah_attr->grh.sgid_index = iface->config.gid_index; + ah_attr->grh.hop_limit = iface->config.hop_limit; + } else { + ah_attr->is_global = 0; + } +} + +static UCS_F_ALWAYS_INLINE +void uct_ib_iface_fill_ah_attr_from_addr(uct_ib_iface_t *iface, + const uct_ib_address_t *ib_addr, + struct ibv_ah_attr *ah_attr) +{ + union ibv_gid gid; + uint16_t lid; + + ucs_assert(!uct_ib_iface_is_roce(iface) == + !(ib_addr->flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH)); + + uct_ib_address_unpack(ib_addr, &lid, &gid); + uct_ib_iface_fill_ah_attr_from_gid_lid(iface, lid, &gid, ah_attr); +} + +static UCS_F_ALWAYS_INLINE +size_t uct_ib_iface_hdr_size(size_t max_inline, size_t min_size) +{ + return (size_t)ucs_max((ssize_t)(max_inline - min_size), 0); +} + +static UCS_F_ALWAYS_INLINE void +uct_ib_fence_info_init(uct_ib_fence_info_t* fence) +{ + fence->fence_beat = 0; +} + +#endif diff --git a/src/uct/ib/base/ib_log.c b/src/uct/ib/base/ib_log.c new file mode 100644 index 0000000..fcb807f --- /dev/null +++ b/src/uct/ib/base/ib_log.c @@ -0,0 +1,343 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ib_log.h" + +#include + + +const char *uct_ib_qp_type_str(int qp_type) +{ + switch (qp_type) { + case IBV_QPT_RC: + return "RC"; + case IBV_QPT_UD: + return "UD"; +#if HAVE_TL_DC + case UCT_IB_QPT_DCI: + return "DCI"; +#endif + default: + ucs_bug("invalid qp type: %d", qp_type); + return "unknown"; + } +} + +void uct_ib_log_dump_opcode(uct_ib_opcode_t *op, int signal, int fence, int se, + char *buf, size_t max) +{ + snprintf(buf, max, "%s %c%c%c", op->name, + signal ? 's' : '-', + fence ? 'f' : '-', + se ? 'e' : '-'); +} + +void uct_ib_log_dump_sg_list(uct_ib_iface_t *iface, uct_am_trace_type_t type, + struct ibv_sge *sg_list, int num_sge, + uint64_t inline_bitmap, + uct_log_data_dump_func_t data_dump, + char *buf, size_t max) +{ + char data[256]; + size_t total_len = 0; + size_t total_valid_len = 0;; + char *s = buf; + char *ends = buf + max; + void *md = data; + size_t len; + int i; + + for (i = 0; i < num_sge; ++i) { + if (inline_bitmap & UCS_BIT(i)) { + snprintf(s, ends - s, " [inl len %d]", sg_list[i].length); + } else { + snprintf(s, ends - s, " [va 0x%"PRIx64" len %d lkey 0x%x]", + sg_list[i].addr, sg_list[i].length, sg_list[i].lkey); + } + + s += strlen(s); + + if (data_dump) { + len = ucs_min(sg_list[i].length, + UCS_PTR_BYTE_DIFF(md, data) + sizeof(data)); + memcpy(md, (void*)sg_list[i].addr, len); + + md = UCS_PTR_BYTE_OFFSET(md, len); + total_len += len; + total_valid_len += sg_list[i].length; + } + } + + if (data_dump) { + data_dump(&iface->super, type, data, total_len, total_valid_len, s, ends - s); + } +} + +void uct_ib_log_dump_remote_addr(uint64_t remote_addr, uint32_t rkey, + char *buf, size_t max) +{ + snprintf(buf, max, " [rva 0x%"PRIx64" rkey 0x%x]", remote_addr, rkey); +} + +void uct_ib_log_dump_atomic_fadd(uint64_t add, char *buf, size_t max) +{ + snprintf(buf, max, " [add %ld]", add); +} + +void uct_ib_log_dump_atomic_cswap(uint64_t compare, uint64_t swap, char *buf, size_t max) +{ + snprintf(buf, max, " [cmp %ld swap %ld]", compare, swap); +} + +void uct_ib_log_dump_atomic_masked_fadd(int argsize, uint64_t add, uint64_t boundary, + char *buf, size_t max) +{ + snprintf(buf, max, " [%dbit add %"PRIi64"/0x%"PRIx64"]", argsize * 8, add, boundary); +} + +void uct_ib_log_dump_atomic_masked_cswap(int argsize, uint64_t compare, uint64_t compare_mask, + uint64_t swap, uint64_t swap_mask, + char *buf, size_t max) +{ + snprintf(buf, max, " [%d bit cmp %"PRIi64"/0x%"PRIx64" swap %"PRIi64"/0x%"PRIx64"]", + argsize * 8, compare, compare_mask, swap, swap_mask); +} + +void uct_ib_log_dump_recv_completion(uct_ib_iface_t *iface, uint32_t local_qp, + uint32_t sender_qp, uint16_t sender_lid, + void *data, size_t length, + uct_log_data_dump_func_t data_dump, + char *buf, size_t max) +{ + char *s = buf; + char *ends = buf + max; + + snprintf(s, ends - s, "RECV qp 0x%x", local_qp); + s += strlen(s); + + if (iface->config.qp_type == IBV_QPT_UD) { + snprintf(s, ends - s, " [slid %d sqp 0x%x]", sender_lid, sender_qp); + s += strlen(s); + } + + snprintf(s, ends - s, " [va %p len %zu]", data, length); + s += strlen(s); + + if (data_dump != NULL) { + data_dump(&iface->super, UCT_AM_TRACE_TYPE_RECV, data, length, length, + s, ends - s); + } +} + +static void uct_ib_dump_wr_opcode(struct ibv_qp *qp, uint64_t wr_id, + uct_ib_opcode_t *op, int send_flags, + char *buf, size_t max) +{ + char *s = buf; + char *ends = buf + max; + + snprintf(s, ends - s, "QP 0x%x wrid 0x%"PRIx64" ", qp->qp_num, wr_id); + s += strlen(s); + + uct_ib_log_dump_opcode(op, + send_flags & IBV_SEND_SIGNALED, + send_flags & IBV_SEND_FENCE, + send_flags & IBV_SEND_SOLICITED, + s, ends - s); +} + +static void uct_ib_dump_wr(struct ibv_qp *qp, uct_ib_opcode_t *op, + struct ibv_send_wr *wr, char *buf, size_t max) +{ + char *s = buf; + char *ends = buf + max; + + if (qp->qp_type == IBV_QPT_UD) { + snprintf(s, ends - s, " [rqpn 0x%x ah %p]", wr->wr.ud.remote_qpn, + wr->wr.ud.ah); + s += strlen(s); + } + + if (op->flags & UCT_IB_OPCODE_FLAG_HAS_RADDR) { + uct_ib_log_dump_remote_addr(wr->wr.rdma.remote_addr, wr->wr.rdma.rkey, + s, ends - s); + s += strlen(s); + } + + if (op->flags & UCT_IB_OPCODE_FLAG_HAS_ATOMIC) { + uct_ib_log_dump_remote_addr(wr->wr.atomic.remote_addr, wr->wr.atomic.rkey, + s, ends - s); + s += strlen(s); + + if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD) { + uct_ib_log_dump_atomic_fadd(wr->wr.atomic.compare_add, s, ends - s); + } else if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { + uct_ib_log_dump_atomic_cswap(wr->wr.atomic.compare_add, + wr->wr.atomic.swap, s, ends - s); + } + + /* do not forget `s += strlen(s);` here if you are + * processing more information for dumping below */ + } +} + +static void uct_ib_dump_send_wr(uct_ib_iface_t *iface, struct ibv_qp *qp, + struct ibv_send_wr *wr, int max_sge, + uct_log_data_dump_func_t data_dump, + char *buf, size_t max) +{ + static uct_ib_opcode_t opcodes[] = { + [IBV_WR_RDMA_WRITE] = { "RDMA_WRITE", UCT_IB_OPCODE_FLAG_HAS_RADDR }, + [IBV_WR_RDMA_READ] = { "RDMA_READ", UCT_IB_OPCODE_FLAG_HAS_RADDR }, + [IBV_WR_SEND] = { "SEND", 0 }, + [IBV_WR_SEND_WITH_IMM] = { "SEND_IMM", 0 }, + [IBV_WR_ATOMIC_CMP_AND_SWP] = { "CSWAP", UCT_IB_OPCODE_FLAG_HAS_ATOMIC }, + [IBV_WR_ATOMIC_FETCH_AND_ADD] = { "FETCH_ADD", UCT_IB_OPCODE_FLAG_HAS_ATOMIC }, + }; + + char *s = buf; + char *ends = buf + max; + uct_ib_opcode_t *op = &opcodes[wr->opcode]; + + uct_ib_dump_wr_opcode(qp, wr->wr_id, op, wr->send_flags, s, ends - s); + s += strlen(s); + + uct_ib_dump_wr(qp, op, wr, s, ends - s); + s += strlen(s); + + uct_ib_log_dump_sg_list(iface, UCT_AM_TRACE_TYPE_SEND, wr->sg_list, + ucs_min(wr->num_sge, max_sge), + (wr->send_flags & IBV_SEND_INLINE) ? -1 : 0, + data_dump, s, ends - s); +} + +void __uct_ib_log_post_send(const char *file, int line, const char *function, + uct_ib_iface_t *iface, struct ibv_qp *qp, + struct ibv_send_wr *wr, int max_sge, + uct_log_data_dump_func_t data_dump_cb) +{ + char buf[256] = {0}; + while (wr != NULL) { + uct_ib_dump_send_wr(iface, qp, wr, max_sge, data_dump_cb, buf, sizeof(buf) - 1); + uct_log_data(file, line, function, buf); + wr = wr->next; + } +} + +void __uct_ib_log_recv_completion(const char *file, int line, const char *function, + uct_ib_iface_t *iface, uint32_t l_qp, + uint32_t r_qp, uint16_t slid, void *data, + size_t length, + uct_log_data_dump_func_t packet_dump_cb) +{ + char buf[256] = {0}; + size_t len; + + len = length; + if (iface->config.qp_type == IBV_QPT_UD) { + len -= UCT_IB_GRH_LEN; + data = UCS_PTR_BYTE_OFFSET(data, UCT_IB_GRH_LEN); + } + uct_ib_log_dump_recv_completion(iface, l_qp, r_qp, slid, data, len, + packet_dump_cb, buf, sizeof(buf) - 1); + uct_log_data(file, line, function, buf); +} + +#if HAVE_DECL_IBV_EXP_POST_SEND +static void uct_ib_dump_exp_send_wr(uct_ib_iface_t *iface, struct ibv_qp *qp, + struct ibv_exp_send_wr *wr, int max_sge, + uct_log_data_dump_func_t data_dump_cb, + char *buf, size_t max) +{ + static uct_ib_opcode_t exp_opcodes[] = { +#if HAVE_DECL_IBV_EXP_WR_NOP + [IBV_EXP_WR_NOP] = { "NOP", 0}, +#endif + [IBV_EXP_WR_RDMA_WRITE] = { "RDMA_WRITE", UCT_IB_OPCODE_FLAG_HAS_RADDR }, + [IBV_EXP_WR_RDMA_READ] = { "RDMA_READ", UCT_IB_OPCODE_FLAG_HAS_RADDR }, + [IBV_EXP_WR_SEND] = { "SEND", 0 }, + [IBV_EXP_WR_SEND_WITH_IMM] = { "SEND_IMM", 0 }, + [IBV_EXP_WR_ATOMIC_CMP_AND_SWP] = { "CSWAP", UCT_IB_OPCODE_FLAG_HAS_ATOMIC }, + [IBV_EXP_WR_ATOMIC_FETCH_AND_ADD] = { "FETCH_ADD", UCT_IB_OPCODE_FLAG_HAS_ATOMIC }, +#if HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP + [IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP] = { "MASKED_CSWAP", + UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC }, +#endif +#if HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD + [IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD] = { "MASKED_FETCH_ADD", + UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC }, +#endif + }; + + char *s = buf; + char *ends = buf + max; + uct_ib_opcode_t *op = &exp_opcodes[wr->exp_opcode]; + + /* opcode in legacy mode */ + UCS_STATIC_ASSERT((int)IBV_SEND_SIGNALED == (int)IBV_EXP_SEND_SIGNALED); + UCS_STATIC_ASSERT((int)IBV_SEND_FENCE == (int)IBV_EXP_SEND_FENCE); + UCS_STATIC_ASSERT((int)IBV_SEND_SOLICITED == (int)IBV_EXP_SEND_SOLICITED); + uct_ib_dump_wr_opcode(qp, wr->wr_id, op, wr->exp_send_flags, s, ends - s); + s += strlen(s); + + /* TODO DC address handle */ + + /* WR data in legacy mode */ + UCS_STATIC_ASSERT((int)IBV_WR_ATOMIC_FETCH_AND_ADD == (int)IBV_EXP_WR_ATOMIC_FETCH_AND_ADD); + UCS_STATIC_ASSERT((int)IBV_WR_ATOMIC_CMP_AND_SWP == (int)IBV_EXP_WR_ATOMIC_CMP_AND_SWP); + UCS_STATIC_ASSERT(ucs_offsetof(struct ibv_send_wr, opcode) == + ucs_offsetof(struct ibv_exp_send_wr, exp_opcode)); + UCS_STATIC_ASSERT(ucs_offsetof(struct ibv_send_wr, wr) == + ucs_offsetof(struct ibv_exp_send_wr, wr)); + uct_ib_dump_wr(qp, op, (struct ibv_send_wr*)wr, s, ends - s); + s += strlen(s); + + /* Extended atomics */ +#if HAVE_IB_EXT_ATOMICS + if (op->flags & UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC) { + uct_ib_log_dump_remote_addr(wr->ext_op.masked_atomics.remote_addr, + wr->ext_op.masked_atomics.rkey, + s, ends - s); + s += strlen(s); + + if (wr->exp_opcode == IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD) { + uct_ib_log_dump_atomic_masked_fadd(wr->ext_op.masked_atomics.log_arg_sz, + wr->ext_op.masked_atomics.wr_data.inline_data.op.fetch_add.add_val, + wr->ext_op.masked_atomics.wr_data.inline_data.op.fetch_add.field_boundary, + s, ends - s); + } else if (wr->exp_opcode == IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP) { + uct_ib_log_dump_atomic_masked_cswap(wr->ext_op.masked_atomics.log_arg_sz, + wr->ext_op.masked_atomics.wr_data.inline_data.op.cmp_swap.compare_val, + wr->ext_op.masked_atomics.wr_data.inline_data.op.cmp_swap.compare_mask, + wr->ext_op.masked_atomics.wr_data.inline_data.op.cmp_swap.swap_val, + wr->ext_op.masked_atomics.wr_data.inline_data.op.cmp_swap.swap_mask, + s, ends - s); + } + s += strlen(s); + } +#endif + + uct_ib_log_dump_sg_list(iface, UCT_AM_TRACE_TYPE_SEND, wr->sg_list, + ucs_min(wr->num_sge, max_sge), + (wr->exp_send_flags & IBV_EXP_SEND_INLINE) ? -1 : 0, + data_dump_cb, s, ends - s); +} + +void __uct_ib_log_exp_post_send(const char *file, int line, const char *function, + uct_ib_iface_t *iface, struct ibv_qp *qp, + struct ibv_exp_send_wr *wr, int max_sge, + uct_log_data_dump_func_t packet_dump_cb) +{ + char buf[256] = {0}; + while (wr != NULL) { + uct_ib_dump_exp_send_wr(iface, qp, wr, max_sge, packet_dump_cb, + buf, sizeof(buf) - 1); + uct_log_data(file, line, function, buf); + wr = wr->next; + } +} + +#endif diff --git a/src/uct/ib/base/ib_log.h b/src/uct/ib/base/ib_log.h new file mode 100644 index 0000000..bf7e466 --- /dev/null +++ b/src/uct/ib/base/ib_log.h @@ -0,0 +1,101 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_LOG_H +#define UCT_IB_LOG_H + +#include "ib_verbs.h" +#include "ib_iface.h" + +#include +#include +#include + + +enum { + UCT_IB_OPCODE_FLAG_HAS_RADDR = UCS_BIT(0), + UCT_IB_OPCODE_FLAG_HAS_ATOMIC = UCS_BIT(1), + UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC = UCS_BIT(2) +}; + + +typedef struct uct_ib_opcode { + const char *name; + uint32_t flags; +} uct_ib_opcode_t; + + +const char *uct_ib_qp_type_str(int qp_type); + +void uct_ib_log_dump_opcode(uct_ib_opcode_t *op, int signal, int fence, int se, + char *buf, size_t max); + +void uct_ib_log_dump_sg_list(uct_ib_iface_t *iface, uct_am_trace_type_t type, + struct ibv_sge *sg_list, int num_sge, + uint64_t inline_bitmap, + uct_log_data_dump_func_t data_dump, + char *buf, size_t max); + +void uct_ib_log_dump_remote_addr(uint64_t remote_addr, uint32_t rkey, + char *buf, size_t max); + +void uct_ib_log_dump_atomic_fadd(uint64_t add, char *buf, size_t max); + +void uct_ib_log_dump_atomic_cswap(uint64_t compare, uint64_t swap, char *buf, size_t max); + +void uct_ib_log_dump_atomic_masked_fadd(int argsize, uint64_t add, uint64_t boundary, + char *buf, size_t max); + +void uct_ib_log_dump_atomic_masked_cswap(int argsize, uint64_t compare, uint64_t compare_mask, + uint64_t swap, uint64_t swap_mask, + char *buf, size_t max); + +void uct_ib_log_dump_recv_completion(uct_ib_iface_t *iface, uint32_t local_qp, + uint32_t sender_qp, uint16_t sender_lid, + void *data, size_t length, + uct_log_data_dump_func_t data_dump, + char *buf, size_t max); + +void __uct_ib_log_post_send(const char *file, int line, const char *function, + uct_ib_iface_t *iface, struct ibv_qp *qp, + struct ibv_send_wr *wr, int max_sge, + uct_log_data_dump_func_t packet_dump_cb); + +void __uct_ib_log_recv_completion(const char *file, int line, const char *function, + uct_ib_iface_t *iface, uint32_t l_qp, + uint32_t r_qp, uint16_t slid, void *data, + size_t length, + uct_log_data_dump_func_t packet_dump_cb); + +#if HAVE_DECL_IBV_EXP_POST_SEND +void __uct_ib_log_exp_post_send(const char *file, int line, const char *function, + uct_ib_iface_t *iface, struct ibv_qp *qp, + struct ibv_exp_send_wr *wr, int max_sge, + uct_log_data_dump_func_t packet_dump_cb); +#endif + + +#define uct_ib_log_post_send(_iface, _qp, _wr, _max_sge, _dump_cb) \ + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { \ + __uct_ib_log_post_send(__FILE__, __LINE__, __FUNCTION__, \ + _iface, _qp, _wr, _max_sge, _dump_cb); \ + } + +/* Suitable for both: regular and exp wcs */ +#define uct_ib_log_recv_completion(_iface, _wc, _data, _length, _dump_cb, ...) \ + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { \ + __uct_ib_log_recv_completion(__FILE__, __LINE__, __FUNCTION__, \ + _iface, (_wc)->qp_num, (_wc)->src_qp, (_wc)->slid, \ + _data, _length, _dump_cb, ## __VA_ARGS__); \ + } + +#define uct_ib_log_exp_post_send(_iface, _qp, _wr, _max_sge,_dump_cb) \ + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { \ + __uct_ib_log_exp_post_send(__FILE__, __LINE__, __FUNCTION__, \ + _iface, _qp, _wr, _max_sge, _dump_cb); \ + } + +#endif diff --git a/src/uct/ib/base/ib_md.c b/src/uct/ib/base/ib_md.c new file mode 100644 index 0000000..b030194 --- /dev/null +++ b/src/uct/ib/base/ib_md.c @@ -0,0 +1,1657 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * Copyright (C) The University of Tennessee and The University + * of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ib_md.h" +#include "ib_device.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#if HAVE_PTHREAD_NP_H +#include +#endif +#include +#include + + +#define UCT_IB_MD_RCACHE_DEFAULT_ALIGN 16 + +typedef struct uct_ib_md_pci_info { + double bw; /* bandwidth */ + uint16_t payload; /* payload used to data transfer */ + uint16_t overhead; /* PHY + data link layer + header + *CRC* */ + uint16_t nack; /* number of TLC before ACK */ + uint16_t ctrl; /* length of control TLP */ + uint16_t encoding; /* number of bits in symbol encoded, 8 - gen 1/2, 128 - gen 3 */ + uint16_t decoding; /* number of bits in symbol decoded, 10 - gen 1/2, 130 - gen 3 */ + const char *name; /* name of PCI generation */ +} uct_ib_md_pci_info_t; + +static UCS_CONFIG_DEFINE_ARRAY(pci_bw, + sizeof(ucs_config_bw_spec_t), + UCS_CONFIG_TYPE_BW_SPEC); + +static const char *uct_ib_devx_objs[] = { + [UCT_IB_DEVX_OBJ_RCQP] = "rcqp", + [UCT_IB_DEVX_OBJ_RCSRQ] = "rcsrq", + [UCT_IB_DEVX_OBJ_DCT] = "dct", + [UCT_IB_DEVX_OBJ_DCSRQ] = "dcsrq", + NULL +}; + +static ucs_config_field_t uct_ib_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_ib_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {"REG_METHODS", "rcache,odp,direct", + "List of registration methods in order of preference. Supported methods are:\n" + " odp - implicit on-demand paging\n" + " rcache - userspace registration cache\n" + " direct - direct registration\n", + ucs_offsetof(uct_ib_md_config_t, reg_methods), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"", "RCACHE_ADDR_ALIGN=" UCS_PP_MAKE_STRING(UCT_IB_MD_RCACHE_DEFAULT_ALIGN), NULL, + ucs_offsetof(uct_ib_md_config_t, rcache), + UCS_CONFIG_TYPE_TABLE(uct_md_config_rcache_table)}, + + {"MEM_REG_OVERHEAD", "16us", "Memory registration overhead", /* TODO take default from device */ + ucs_offsetof(uct_ib_md_config_t, uc_reg_cost.overhead), UCS_CONFIG_TYPE_TIME}, + + {"MEM_REG_GROWTH", "0.06ns", "Memory registration growth rate", /* TODO take default from device */ + ucs_offsetof(uct_ib_md_config_t, uc_reg_cost.growth), UCS_CONFIG_TYPE_TIME}, + + {"FORK_INIT", "try", + "Initialize a fork-safe IB library with ibv_fork_init().", + ucs_offsetof(uct_ib_md_config_t, fork_init), UCS_CONFIG_TYPE_TERNARY}, + + {"ASYNC_EVENTS", "n", + "Enable listening for async events on the device", + ucs_offsetof(uct_ib_md_config_t, async_events), UCS_CONFIG_TYPE_BOOL}, + + {"ETH_PAUSE_ON", "y", + "Whether or not 'Pause Frame' is enabled on an Ethernet network.\n" + "Pause frame is a mechanism for temporarily stopping the transmission of data to\n" + "ensure zero loss under congestion on Ethernet family computer networks.\n" + "This parameter, if set to 'no', will disqualify IB transports that may not perform\n" + "well on a lossy fabric when working with RoCE.", + ucs_offsetof(uct_ib_md_config_t, ext.eth_pause), UCS_CONFIG_TYPE_BOOL}, + + {"ODP_NUMA_POLICY", "preferred", + "Override NUMA policy for ODP regions, to avoid extra page migrations.\n" + " - default: Do no change existing policy.\n" + " - preferred/bind:\n" + " Unless the memory policy of the current thread is MPOL_BIND, set the\n" + " policy of ODP regions to MPOL_PREFERRED/MPOL_BIND, respectively.\n" + " If the numa node mask of the current thread is not defined, use the numa\n" + " nodes which correspond to its cpu affinity mask.", + ucs_offsetof(uct_ib_md_config_t, ext.odp.numa_policy), + UCS_CONFIG_TYPE_ENUM(ucs_numa_policy_names)}, + + {"ODP_PREFETCH", "n", + "Force prefetch of memory regions created with ODP.\n", + ucs_offsetof(uct_ib_md_config_t, ext.odp.prefetch), UCS_CONFIG_TYPE_BOOL}, + + {"ODP_MAX_SIZE", "auto", + "Maximal memory region size to enable ODP for. 0 - disable.\n", + ucs_offsetof(uct_ib_md_config_t, ext.odp.max_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"DEVICE_SPECS", "", + "Array of custom device specification. Each element is a string of the following format:\n" + " :[:name[:[:]]]\n" + "where:\n" + " - (mandatory) pci vendor id, integer or hexadecimal.\n" + " - (mandatory) pci device id, integer or hexadecimal.\n" + " - (optional) device name.\n" + " - (optional) empty, or a combination of:\n" + " '4' - mlx4 device\n" + " '5' - mlx5 device\n" + " 'd' - DC version 1 (Connect-IB, ConnectX-4)\n" + " 'D' - DC version 2 (ConnectX-5 and above)\n" + " 'a' - Compact address vector support\n" + " - (optional) device priority, integer.\n" + "\n" + "Example: The value '0x02c9:4115:ConnectX4:5d' would specify a device named ConnectX-4\n" + "to match vendor id 0x2c9, device id 4115, with DC version 1 support.", + ucs_offsetof(uct_ib_md_config_t, custom_devices), UCS_CONFIG_TYPE_STRING_ARRAY}, + + {"PREFER_NEAREST_DEVICE", "y", + "Prefer nearest device to cpu when selecting a device from NET_DEVICES list.\n", + ucs_offsetof(uct_ib_md_config_t, ext.prefer_nearest_device), UCS_CONFIG_TYPE_BOOL}, + + {"INDIRECT_ATOMIC", "y", + "Use indirect atomic\n", + ucs_offsetof(uct_ib_md_config_t, ext.enable_indirect_atomic), UCS_CONFIG_TYPE_BOOL}, + + {"GID_INDEX", "auto", + "Port GID index to use.", + ucs_offsetof(uct_ib_md_config_t, ext.gid_index), UCS_CONFIG_TYPE_ULUNITS}, + + {"SUBNET_PREFIX", "", + "Infiniband subnet prefix to filter ports by, empty means no filter. " + "Relevant for IB link layer only\n" + "For example a filter for the default subnet prefix can be specified as: fe80:0:0:0", + ucs_offsetof(uct_ib_md_config_t, subnet_prefix), UCS_CONFIG_TYPE_STRING}, + + {"GPU_DIRECT_RDMA", "try", + "Use GPU Direct RDMA for HCA to access GPU pages directly\n", + ucs_offsetof(uct_ib_md_config_t, ext.enable_gpudirect_rdma), UCS_CONFIG_TYPE_TERNARY}, + +#if HAVE_EXP_UMR + {"MAX_INLINE_KLM_LIST", "inf", + "When posting a UMR, KLM lists shorter or equal to this value will be posted as inline.\n" + "The actual maximal length is also limited by device capabilities.", + ucs_offsetof(uct_ib_md_config_t, ext.max_inline_klm_list), UCS_CONFIG_TYPE_UINT}, +#endif + + {"PCI_BW", "", + "Maximum effective data transfer rate of PCI bus connected to HCA\n", + ucs_offsetof(uct_ib_md_config_t, pci_bw), UCS_CONFIG_TYPE_ARRAY(pci_bw)}, + + {"MLX5_DEVX", "try", + "DEVX support\n", + ucs_offsetof(uct_ib_md_config_t, devx), UCS_CONFIG_TYPE_TERNARY}, + + {"MLX5_DEVX_OBJECTS", "rcqp,rcsrq,dct,dcsrq", + "Objects to be created by DevX\n", + ucs_offsetof(uct_ib_md_config_t, devx_objs), + UCS_CONFIG_TYPE_BITMAP(uct_ib_devx_objs)}, + + {"REG_MT_THRESH", "4G", + "Minimal MR size to be register using multiple parallel threads.\n" + "Number of threads used will be determined by number of CPUs which " + "registering thread is bound to by hard affinity.", + ucs_offsetof(uct_ib_md_config_t, ext.min_mt_reg), UCS_CONFIG_TYPE_MEMUNITS}, + + {"REG_MT_CHUNK", "2G", + "Size of single chunk used in multithreaded registration.\n" + "Must by power of 2.", + ucs_offsetof(uct_ib_md_config_t, ext.mt_reg_chunk), UCS_CONFIG_TYPE_MEMUNITS}, + + {"REG_MT_BIND", "n", + "Enable setting CPU affinity of memory registration threads.", + ucs_offsetof(uct_ib_md_config_t, ext.mt_reg_bind), UCS_CONFIG_TYPE_BOOL}, + + {NULL} +}; + +#if ENABLE_STATS +static ucs_stats_class_t uct_ib_md_stats_class = { + .name = "", + .num_counters = UCT_IB_MD_STAT_LAST, + .counter_names = { + [UCT_IB_MD_STAT_MEM_ALLOC] = "mem_alloc", + [UCT_IB_MD_STAT_MEM_REG] = "mem_reg" + } +}; +#endif + +static const uct_ib_md_pci_info_t uct_ib_md_pci_info[] = { + { /* GEN 1 */ + .bw = 2.5 * UCS_GBYTE / 8, + .payload = 512, + .overhead = 28, + .nack = 5, + .ctrl = 256, + .encoding = 8, + .decoding = 10, + .name = "gen1" + }, + { /* GEN 2 */ + .bw = 5.0 * UCS_GBYTE / 8, + .payload = 512, + .overhead = 28, + .nack = 5, + .ctrl = 256, + .encoding = 8, + .decoding = 10, + .name = "gen2" + }, + { /* GEN 3 */ + .bw = 8.0 * UCS_GBYTE / 8, + .payload = 512, + .overhead = 30, + .nack = 5, + .ctrl = 256, + .encoding = 128, + .decoding = 130, + .name = "gen3" + }, +}; + +UCS_LIST_HEAD(uct_ib_md_ops_list); + +typedef struct uct_ib_verbs_mem { + uct_ib_mem_t super; + struct ibv_mr *mr; +} uct_ib_verbs_mem_t; + +typedef struct { + pthread_t thread; + void *addr; + size_t len; + size_t chunk; + uint64_t access; + struct ibv_pd *pd; + struct ibv_mr **mr; +} uct_ib_md_mem_reg_thread_t; + +static void uct_ib_check_gpudirect_driver(uct_ib_md_t *md, uct_md_attr_t *md_attr, + const char *file, + ucs_memory_type_t mem_type) +{ + if (!access(file, F_OK)) { + md_attr->cap.reg_mem_types |= UCS_BIT(mem_type); + } + + ucs_debug("%s: %s GPUDirect RDMA is %s", + uct_ib_device_name(&md->dev), ucs_memory_type_names[mem_type], + md_attr->cap.reg_mem_types & UCS_BIT(mem_type) ? + "enabled" : "disabled"); +} + +static ucs_status_t uct_ib_md_query(uct_md_h uct_md, uct_md_attr_t *md_attr) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + + md_attr->cap.max_alloc = ULONG_MAX; /* TODO query device */ + md_attr->cap.max_reg = ULONG_MAX; /* TODO query device */ + md_attr->cap.flags = UCT_MD_FLAG_REG | + UCT_MD_FLAG_NEED_MEMH | + UCT_MD_FLAG_NEED_RKEY | + UCT_MD_FLAG_ADVISE; + md_attr->cap.reg_mem_types = UCS_MEMORY_TYPES_CPU_ACCESSIBLE; + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; + md_attr->cap.detect_mem_types = 0; + + if (md->config.enable_gpudirect_rdma != UCS_NO) { + /* check if GDR driver is loaded */ + uct_ib_check_gpudirect_driver(md, md_attr, + "/sys/kernel/mm/memory_peers/nv_mem/version", + UCS_MEMORY_TYPE_CUDA); + + /* check if ROCM KFD driver is loaded */ + uct_ib_check_gpudirect_driver(md, md_attr, "/dev/kfd", + UCS_MEMORY_TYPE_ROCM); + + if (!(md_attr->cap.reg_mem_types & ~UCS_BIT(UCS_MEMORY_TYPE_HOST)) && + (md->config.enable_gpudirect_rdma == UCS_YES)) { + ucs_error("%s: Couldn't enable GPUDirect RDMA. Please make sure" + " nv_peer_mem or amdgpu plugin installed correctly.", + uct_ib_device_name(&md->dev)); + return UCS_ERR_UNSUPPORTED; + } + } + + md_attr->rkey_packed_size = UCT_IB_MD_PACKED_RKEY_SIZE; + md_attr->reg_cost = md->reg_cost; + ucs_sys_cpuset_copy(&md_attr->local_cpus, &md->dev.local_cpus); + + return UCS_OK; +} + +static void uct_ib_md_print_mem_reg_err_msg(ucs_log_level_t level, void *address, + size_t length, uint64_t access) +{ + char msg[200] = {0}; + struct rlimit limit_info; + + ucs_snprintf_zero(msg, sizeof(msg), + "%s(address=%p, length=%zu, access=0x%lx) failed: %m", + ibv_reg_mr_func_name, address, length, access); + + /* Check the value of the max locked memory which is set on the system + * (ulimit -l) */ + if (!getrlimit(RLIMIT_MEMLOCK, &limit_info) && + (limit_info.rlim_cur != RLIM_INFINITY)) { + ucs_snprintf_zero(msg + strlen(msg), sizeof(msg) - strlen(msg), + ". Please set max locked memory (ulimit -l) to 'unlimited' " + "(current: %llu kbytes)", limit_info.rlim_cur / UCS_KBYTE); + } + + ucs_log(level, "%s", msg); +} + +void *uct_ib_md_mem_handle_thread_func(void *arg) +{ + uct_ib_md_mem_reg_thread_t *ctx = arg; + ucs_status_t status; + int mr_idx = 0; + size_t size = 0; + ucs_time_t UCS_V_UNUSED t0 = ucs_get_time(); + + while (ctx->len) { + size = ucs_min(ctx->len, ctx->chunk); + if (ctx->access != UCT_IB_MEM_DEREG) { + ctx->mr[mr_idx] = UCS_PROFILE_NAMED_CALL(ibv_reg_mr_func_name, + ibv_reg_mr, ctx->pd, + ctx->addr, size, + ctx->access); + if (ctx->mr[mr_idx] == NULL) { + return UCS_STATUS_PTR(UCS_ERR_IO_ERROR); + } + } else { + status = uct_ib_dereg_mr(ctx->mr[mr_idx]); + if (status != UCS_OK) { + return UCS_STATUS_PTR(status); + } + } + ctx->addr = UCS_PTR_BYTE_OFFSET(ctx->addr, size); + ctx->len -= size; + mr_idx++; + } + + ucs_trace("%s %p..%p took %f usec\n", + (ctx->access == UCT_IB_MEM_DEREG) ? "dereg_mr" : "reg_mr", + ctx->mr[0]->addr, + UCS_PTR_BYTE_OFFSET(ctx->mr[mr_idx-1]->addr, size), + ucs_time_to_usec(ucs_get_time() - t0)); + + return UCS_STATUS_PTR(UCS_OK); +} + +ucs_status_t +uct_ib_md_handle_mr_list_multithreaded(uct_ib_md_t *md, void *address, + size_t length, uint64_t access, + size_t chunk, struct ibv_mr **mrs) +{ + int thread_num_mrs, thread_num, thread_idx, mr_idx = 0, cpu_id = 0; + int mr_num = ucs_div_round_up(length, chunk); + ucs_status_t status; + void *thread_status; + ucs_sys_cpuset_t parent_set, thread_set; + uct_ib_md_mem_reg_thread_t *ctxs, *cur_ctx; + pthread_attr_t attr; + char UCS_V_UNUSED affinity_str[64]; + int ret; + + ret = pthread_getaffinity_np(pthread_self(), sizeof(ucs_sys_cpuset_t), + &parent_set); + if (ret != 0) { + ucs_error("pthread_getaffinity_np() failed: %m"); + return UCS_ERR_INVALID_PARAM; + } + + thread_num = ucs_min(CPU_COUNT(&parent_set), mr_num); + + ucs_trace("multithreaded handle %p..%p access %lx threads %d affinity %s\n", + address, UCS_PTR_BYTE_OFFSET(address, length), access, thread_num, + ucs_make_affinity_str(&parent_set, affinity_str, sizeof(affinity_str))); + + if (thread_num == 1) { + return UCS_ERR_UNSUPPORTED; + } + + ctxs = ucs_calloc(thread_num, sizeof(*ctxs), "ib mr ctxs"); + if (ctxs == NULL) { + return UCS_ERR_NO_MEMORY; + } + + pthread_attr_init(&attr); + + status = UCS_OK; + for (thread_idx = 0; thread_idx < thread_num; thread_idx++) { + /* calculate number of mrs for each thread so each one will + * get proportional amount */ + thread_num_mrs = ucs_div_round_up(mr_num - mr_idx, thread_num - thread_idx); + + cur_ctx = &ctxs[thread_idx]; + cur_ctx->pd = md->pd; + cur_ctx->addr = UCS_PTR_BYTE_OFFSET(address, mr_idx * chunk); + cur_ctx->len = ucs_min(thread_num_mrs * chunk, length - (mr_idx * chunk)); + cur_ctx->access = access; + cur_ctx->mr = &mrs[mr_idx]; + cur_ctx->chunk = chunk; + + if (md->config.mt_reg_bind) { + while (!CPU_ISSET(cpu_id, &parent_set)) { + cpu_id++; + } + + CPU_ZERO(&thread_set); + CPU_SET(cpu_id, &thread_set); + cpu_id++; + pthread_attr_setaffinity_np(&attr, sizeof(ucs_sys_cpuset_t), &thread_set); + } + + ret = pthread_create(&cur_ctx->thread, &attr, + uct_ib_md_mem_handle_thread_func, cur_ctx); + if (ret) { + ucs_error("pthread_create() failed: %m"); + status = UCS_ERR_IO_ERROR; + thread_num = thread_idx; + break; + } + + mr_idx += thread_num_mrs; + } + + for (thread_idx = 0; thread_idx < thread_num; thread_idx++) { + cur_ctx = &ctxs[thread_idx]; + pthread_join(cur_ctx->thread, &thread_status); + if (UCS_PTR_IS_ERR(UCS_OK)) { + status = UCS_PTR_STATUS(thread_status); + } + } + + ucs_free(ctxs); + pthread_attr_destroy(&attr); + + if (status != UCS_OK) { + for (mr_idx = 0; mr_idx < mr_num; mr_idx++) { + /* coverity[check_return] */ + uct_ib_dereg_mr(mrs[mr_idx]); + } + } + + return status; +} + +static ucs_status_t uct_ib_md_reg_mr(uct_ib_md_t *md, void *address, + size_t length, uint64_t access, + int silent, uct_ib_mem_t *memh) +{ + ucs_log_level_t level = silent ? UCS_LOG_LEVEL_DEBUG : UCS_LOG_LEVEL_ERROR; + ucs_status_t status; + + if (length >= md->config.min_mt_reg) { + UCS_PROFILE_CODE("reg ksm") { + status = md->ops->reg_multithreaded(md, address, length, + access, memh); + } + + if (status != UCS_ERR_UNSUPPORTED) { + if (status == UCS_OK) { + memh->flags |= UCT_IB_MEM_MULTITHREADED; + } else { + uct_ib_md_print_mem_reg_err_msg(level, address, length, + access); + } + + return status; + } /* if unsuported - fallback to regular registration */ + } + + status = md->ops->reg_key(md, address, length, access, memh); + if (status != UCS_OK) { + uct_ib_md_print_mem_reg_err_msg(level, address, length, access); + return status; + } + + return UCS_OK; +} + +ucs_status_t uct_ib_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t access, struct ibv_mr **mr_p) +{ + struct ibv_mr *mr; +#if HAVE_DECL_IBV_EXP_REG_MR + struct ibv_exp_reg_mr_in in = {}; + + in.pd = pd; + in.addr = addr; + in.length = length; + in.exp_access = access; + mr = UCS_PROFILE_CALL(ibv_exp_reg_mr, &in); +#else + mr = UCS_PROFILE_CALL(ibv_reg_mr, pd, addr, length, access); +#endif + if (mr == NULL) { + return UCS_ERR_IO_ERROR; + } + + *mr_p = mr; + return UCS_OK; +} + +ucs_status_t uct_ib_dereg_mr(struct ibv_mr *mr) +{ + int ret; + + if (mr == NULL) { + return UCS_OK; + } + + ret = UCS_PROFILE_CALL(ibv_dereg_mr, mr); + if (ret != 0) { + ucs_error("ibv_dereg_mr() failed: %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +ucs_status_t uct_ib_dereg_mrs(struct ibv_mr **mrs, size_t mr_num) +{ + ucs_status_t s, status = UCS_OK; + int i; + + for (i = 0; i < mr_num; i++) { + s = uct_ib_dereg_mr(mrs[i]); + if (s != UCS_OK) { + status = s; + } + } + + return status; +} + +static ucs_status_t uct_ib_memh_dereg(uct_ib_md_t *md, uct_ib_mem_t *memh) +{ + ucs_status_t s1, s2; + + if (memh->flags & UCT_IB_MEM_MULTITHREADED) { + return md->ops->dereg_multithreaded(md, memh); + } + + s2 = UCS_OK; + if (memh->flags & UCT_IB_MEM_FLAG_ATOMIC_MR) { + s2 = md->ops->dereg_atomic_key(md, memh); + memh->flags &= ~UCT_IB_MEM_FLAG_ATOMIC_MR; + } + + s1 = md->ops->dereg_key(md, memh); + return (s1 != UCS_OK) ? s1 : s2; +} + +static void uct_ib_memh_free(uct_ib_mem_t *memh) +{ + ucs_free(memh); +} + +static uct_ib_mem_t *uct_ib_memh_alloc(uct_ib_md_t *md) +{ + return ucs_calloc(1, md->ops->memh_struct_size, "ib_memh"); +} + +static uint64_t uct_ib_md_access_flags(uct_ib_md_t *md, unsigned flags, + size_t length) +{ + uint64_t access = UCT_IB_MEM_ACCESS_FLAGS; + + if ((flags & UCT_MD_MEM_FLAG_NONBLOCK) && (length > 0) && + (length <= md->config.odp.max_size)) { + access |= IBV_ACCESS_ON_DEMAND; + } + return access; +} + +#if HAVE_NUMA +static ucs_status_t uct_ib_mem_set_numa_policy(uct_ib_md_t *md, void *address, + size_t length, uct_ib_mem_t *memh) +{ + int ret, old_policy, new_policy; + struct bitmask *nodemask; + uintptr_t start, end; + ucs_status_t status; + + if (!(memh->flags & UCT_IB_MEM_FLAG_ODP) || + (md->config.odp.numa_policy == UCS_NUMA_POLICY_DEFAULT) || + (numa_available() < 0)) + { + status = UCS_OK; + goto out; + } + + nodemask = numa_allocate_nodemask(); + if (nodemask == NULL) { + ucs_warn("Failed to allocate numa node mask"); + status = UCS_ERR_NO_MEMORY; + goto out; + } + + ret = get_mempolicy(&old_policy, numa_nodemask_p(nodemask), + numa_nodemask_size(nodemask), NULL, 0); + if (ret < 0) { + ucs_warn("get_mempolicy(maxnode=%zu) failed: %m", + numa_nodemask_size(nodemask)); + status = UCS_ERR_INVALID_PARAM; + goto out_free; + } + + switch (old_policy) { + case MPOL_DEFAULT: + /* if no policy is defined, use the numa node of the current cpu */ + numa_get_thread_node_mask(&nodemask); + break; + case MPOL_BIND: + /* if the current policy is BIND, keep it as-is */ + status = UCS_OK; + goto out_free; + default: + break; + } + + switch (md->config.odp.numa_policy) { + case UCS_NUMA_POLICY_BIND: + new_policy = MPOL_BIND; + break; + case UCS_NUMA_POLICY_PREFERRED: + new_policy = MPOL_PREFERRED; + break; + default: + ucs_error("unexpected numa policy %d", md->config.odp.numa_policy); + status = UCS_ERR_INVALID_PARAM; + goto out_free; + } + + if (new_policy != old_policy) { + start = ucs_align_down_pow2((uintptr_t)address, ucs_get_page_size()); + end = ucs_align_up_pow2((uintptr_t)address + length, + ucs_get_page_size()); + ucs_trace("0x%lx..0x%lx: changing numa policy from %d to %d, " + "nodemask[0]=0x%lx", start, end, old_policy, new_policy, + numa_nodemask_p(nodemask)[0]); + + ret = UCS_PROFILE_CALL(mbind, (void*)start, end - start, new_policy, + numa_nodemask_p(nodemask), + numa_nodemask_size(nodemask), 0); + if (ret < 0) { + ucs_warn("mbind(addr=0x%lx length=%ld policy=%d) failed: %m", + start, end - start, new_policy); + status = UCS_ERR_IO_ERROR; + goto out_free; + } + } + + status = UCS_OK; + +out_free: + numa_free_nodemask(nodemask); +out: + return status; +} +#else +static ucs_status_t uct_ib_mem_set_numa_policy(uct_ib_md_t *md, void *address, + size_t length, uct_ib_mem_t *memh) +{ + return UCS_OK; +} +#endif /* UCT_MD_DISABLE_NUMA */ + +static void uct_ib_mem_init(uct_ib_mem_t *memh, unsigned uct_flags, + uint64_t access) +{ + memh->flags = 0; + + /* coverity[dead_error_condition] */ + if (access & IBV_ACCESS_ON_DEMAND) { + memh->flags |= UCT_IB_MEM_FLAG_ODP; + } + + if (uct_flags & UCT_MD_MEM_ACCESS_REMOTE_ATOMIC) { + memh->flags |= UCT_IB_MEM_ACCESS_REMOTE_ATOMIC; + } +} + +static ucs_status_t uct_ib_mem_reg_internal(uct_md_h uct_md, void *address, + size_t length, unsigned flags, + int silent, uct_ib_mem_t *memh) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + ucs_status_t status; + uint64_t access; + + access = uct_ib_md_access_flags(md, flags, length); + uct_ib_mem_init(memh, flags, access); + status = uct_ib_md_reg_mr(md, address, length, access, silent, memh); + if (status != UCS_OK) { + return status; + } + + ucs_debug("registered memory %p..%p on %s lkey 0x%x rkey 0x%x " + "access 0x%lx flags 0x%x", address, + UCS_PTR_BYTE_OFFSET(address, length), + uct_ib_device_name(&md->dev), memh->lkey, memh->rkey, + access, flags); + + uct_ib_mem_set_numa_policy(md, address, length, memh); + + if (md->config.odp.prefetch) { + md->ops->mem_prefetch(md, memh, address, length); + } + + UCS_STATS_UPDATE_COUNTER(md->stats, UCT_IB_MD_STAT_MEM_REG, +1); + return UCS_OK; +} + +static ucs_status_t uct_ib_mem_reg(uct_md_h uct_md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + ucs_status_t status; + uct_ib_mem_t *memh; + + memh = uct_ib_memh_alloc(md); + if (memh == NULL) { + return UCS_ERR_NO_MEMORY; + } + + status = uct_ib_mem_reg_internal(uct_md, address, length, flags, 0, memh); + if (status != UCS_OK) { + uct_ib_memh_free(memh); + return status; + } + *memh_p = memh; + + return UCS_OK; +} + +static ucs_status_t uct_ib_mem_dereg(uct_md_h uct_md, uct_mem_h memh) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + uct_ib_mem_t *ib_memh = memh; + ucs_status_t status; + + status = uct_ib_memh_dereg(md, ib_memh); + uct_ib_memh_free(ib_memh); + return status; +} + +static ucs_status_t uct_ib_verbs_reg_key(uct_ib_md_t *md, void *address, + size_t length, uint64_t access, + uct_ib_mem_t *ib_memh) +{ + uct_ib_verbs_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_verbs_mem_t); + ucs_status_t status; + + status = uct_ib_reg_mr(md->pd, address, length, access, &memh->mr); + if (status != UCS_OK) { + return status; + } + + uct_ib_memh_init_from_mr(&memh->super, memh->mr); + return UCS_OK; +} + +static ucs_status_t uct_ib_verbs_dereg_key(uct_ib_md_t *md, uct_ib_mem_t *ib_memh) +{ + uct_ib_verbs_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_verbs_mem_t); + + return uct_ib_dereg_mr(memh->mr); +} + +static ucs_status_t +uct_ib_mem_advise(uct_md_h uct_md, uct_mem_h memh, void *addr, + size_t length, unsigned advice) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + + ucs_debug("memh %p advice %d", memh, advice); + if ((advice == UCT_MADV_WILLNEED) && !md->config.odp.prefetch) { + return md->ops->mem_prefetch(md, memh, addr, length); + } + + return UCS_OK; +} + +static ucs_status_t uct_ib_mkey_pack(uct_md_h uct_md, uct_mem_h uct_memh, + void *rkey_buffer) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + uct_ib_mem_t *memh = uct_memh; + uint32_t atomic_rkey; + ucs_status_t status; + + /* create umr only if a user requested atomic access to the + * memory region and the hardware supports it. + */ + if ((memh->flags & UCT_IB_MEM_ACCESS_REMOTE_ATOMIC) && + !(memh->flags & UCT_IB_MEM_FLAG_ATOMIC_MR) && + (memh != md->global_odp)) + { + /* create UMR on-demand */ + UCS_PROFILE_CODE("reg atomic key") { + status = md->ops->reg_atomic_key(md, memh); + } + if (status == UCS_OK) { + memh->flags |= UCT_IB_MEM_FLAG_ATOMIC_MR; + ucs_trace("created atomic key 0x%x for 0x%x", memh->atomic_rkey, + memh->lkey); + } else if (status != UCS_ERR_UNSUPPORTED) { + return status; + } + } + if (memh->flags & UCT_IB_MEM_FLAG_ATOMIC_MR) { + atomic_rkey = memh->atomic_rkey; + } else { + atomic_rkey = UCT_IB_INVALID_RKEY; + } + + uct_ib_md_pack_rkey(memh->rkey, atomic_rkey, rkey_buffer); + return UCS_OK; +} + +static ucs_status_t uct_ib_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, uct_rkey_t *rkey_p, + void **handle_p) +{ + uint64_t packed_rkey = *(const uint64_t*)rkey_buffer; + + *rkey_p = packed_rkey; + *handle_p = NULL; + ucs_trace("unpacked rkey 0x%llx: direct 0x%x indirect 0x%x", + (unsigned long long)packed_rkey, + uct_ib_md_direct_rkey(*rkey_p), uct_ib_md_indirect_rkey(*rkey_p)); + return UCS_OK; +} + +static uct_md_ops_t uct_ib_md_ops = { + .close = uct_ib_md_close, + .query = uct_ib_md_query, + .mem_reg = uct_ib_mem_reg, + .mem_dereg = uct_ib_mem_dereg, + .mem_advise = uct_ib_mem_advise, + .mkey_pack = uct_ib_mkey_pack, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + +static inline uct_ib_rcache_region_t* uct_ib_rcache_region_from_memh(uct_mem_h memh) +{ + return ucs_container_of(memh, uct_ib_rcache_region_t, memh); +} + +static ucs_status_t uct_ib_mem_rcache_reg(uct_md_h uct_md, void *address, + size_t length, unsigned flags, + uct_mem_h *memh_p) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + ucs_rcache_region_t *rregion; + ucs_status_t status; + uct_ib_mem_t *memh; + + status = ucs_rcache_get(md->rcache, address, length, PROT_READ|PROT_WRITE, + &flags, &rregion); + if (status != UCS_OK) { + return status; + } + + ucs_assert(rregion->refcount > 0); + memh = &ucs_derived_of(rregion, uct_ib_rcache_region_t)->memh; + /* The original region was registered without atomic access + * so update the access flags. Actual umr creation will happen + * when uct_ib_mkey_pack() is called. + */ + if (flags & UCT_MD_MEM_ACCESS_REMOTE_ATOMIC) { + memh->flags |= UCT_IB_MEM_ACCESS_REMOTE_ATOMIC; + } + *memh_p = memh; + return UCS_OK; +} + +static ucs_status_t uct_ib_mem_rcache_dereg(uct_md_h uct_md, uct_mem_h memh) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + uct_ib_rcache_region_t *region = uct_ib_rcache_region_from_memh(memh); + + ucs_rcache_region_put(md->rcache, ®ion->super); + return UCS_OK; +} + +static uct_md_ops_t uct_ib_md_rcache_ops = { + .close = uct_ib_md_close, + .query = uct_ib_md_query, + .mem_reg = uct_ib_mem_rcache_reg, + .mem_dereg = uct_ib_mem_rcache_dereg, + .mem_advise = uct_ib_mem_advise, + .mkey_pack = uct_ib_mkey_pack, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + +static ucs_status_t uct_ib_rcache_mem_reg_cb(void *context, ucs_rcache_t *rcache, + void *arg, ucs_rcache_region_t *rregion, + uint16_t rcache_mem_reg_flags) +{ + uct_ib_rcache_region_t *region = ucs_derived_of(rregion, uct_ib_rcache_region_t); + uct_ib_md_t *md = context; + int *flags = arg; + int silent = (rcache_mem_reg_flags & UCS_RCACHE_MEM_REG_HIDE_ERRORS) || + (*flags & UCT_MD_MEM_FLAG_HIDE_ERRORS); + ucs_status_t status; + + status = uct_ib_mem_reg_internal(&md->super, (void*)region->super.super.start, + region->super.super.end - region->super.super.start, + *flags, silent, ®ion->memh); + if (status != UCS_OK) { + return status; + } + + return UCS_OK; +} + +static void uct_ib_rcache_mem_dereg_cb(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *rregion) +{ + uct_ib_rcache_region_t *region = ucs_derived_of(rregion, uct_ib_rcache_region_t); + uct_ib_md_t *md = (uct_ib_md_t *)context; + + (void)uct_ib_memh_dereg(md, ®ion->memh); +} + +static void uct_ib_rcache_dump_region_cb(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *rregion, char *buf, + size_t max) +{ + uct_ib_rcache_region_t *region = ucs_derived_of(rregion, uct_ib_rcache_region_t); + uct_ib_mem_t *memh = ®ion->memh; + + snprintf(buf, max, "lkey 0x%x rkey 0x%x atomic_rkey 0x%x", + memh->lkey, memh->rkey, + (memh->flags & UCT_IB_MEM_FLAG_ATOMIC_MR) ? memh->atomic_rkey : + UCT_IB_INVALID_RKEY + ); +} + +static ucs_rcache_ops_t uct_ib_rcache_ops = { + .mem_reg = uct_ib_rcache_mem_reg_cb, + .mem_dereg = uct_ib_rcache_mem_dereg_cb, + .dump_region = uct_ib_rcache_dump_region_cb +}; + +static ucs_status_t uct_ib_md_odp_query(uct_md_h uct_md, uct_md_attr_t *md_attr) +{ + ucs_status_t status; + + status = uct_ib_md_query(uct_md, md_attr); + if (status != UCS_OK) { + return status; + } + + /* ODP supports only host memory */ + md_attr->cap.reg_mem_types &= UCS_BIT(UCS_MEMORY_TYPE_HOST); + return UCS_OK; +} + +static ucs_status_t uct_ib_mem_global_odp_reg(uct_md_h uct_md, void *address, + size_t length, unsigned flags, + uct_mem_h *memh_p) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + uct_ib_mem_t *memh = md->global_odp; + + ucs_assert(md->global_odp != NULL); + if (flags & UCT_MD_MEM_FLAG_LOCK) { + return uct_ib_mem_reg(uct_md, address, length, flags, memh_p); + } + + if (md->config.odp.prefetch) { + md->ops->mem_prefetch(md, memh, address, length); + } + + /* cppcheck-suppress autoVariables */ + *memh_p = md->global_odp; + return UCS_OK; +} + +static ucs_status_t uct_ib_mem_global_odp_dereg(uct_md_h uct_md, uct_mem_h memh) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + + if (memh == md->global_odp) { + return UCS_OK; + } + + return uct_ib_mem_dereg(uct_md, memh); +} + +static uct_md_ops_t UCS_V_UNUSED uct_ib_md_global_odp_ops = { + .close = uct_ib_md_close, + .query = uct_ib_md_odp_query, + .mem_reg = uct_ib_mem_global_odp_reg, + .mem_dereg = uct_ib_mem_global_odp_dereg, + .mem_advise = uct_ib_mem_advise, + .mkey_pack = uct_ib_mkey_pack, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + +static ucs_status_t uct_ib_query_md_resources(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + UCS_MODULE_FRAMEWORK_DECLARE(uct_ib); + uct_md_resource_desc_t *resources; + struct ibv_device **device_list; + ucs_status_t status; + int i, num_devices; + + UCS_MODULE_FRAMEWORK_LOAD(uct_ib, 0); + + /* Get device list from driver */ + device_list = ibv_get_device_list(&num_devices); + if (device_list == NULL) { + ucs_debug("Failed to get IB device list, assuming no devices are present"); + *resources_p = NULL; + *num_resources_p = 0; + return UCS_OK; + } + + resources = ucs_calloc(num_devices, sizeof(*resources), "ib resources"); + if (resources == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out_free_device_list; + } + + for (i = 0; i < num_devices; ++i) { + ucs_snprintf_zero(resources[i].md_name, sizeof(resources[i].md_name), + "%s", ibv_get_device_name(device_list[i])); + } + + *resources_p = resources; + *num_resources_p = num_devices; + status = UCS_OK; + +out_free_device_list: + ibv_free_device_list(device_list); + return status; +} + +static void uct_ib_fork_warn() +{ + ucs_warn("IB: ibv_fork_init() was disabled or failed, yet a fork() has been issued."); + ucs_warn("IB: data corruption might occur when using registered memory."); +} + +static void uct_ib_fork_warn_enable() +{ + static volatile uint32_t enabled = 0; + int ret; + + if (ucs_atomic_cswap32(&enabled, 0, 1) != 0) { + return; + } + + ret = pthread_atfork(uct_ib_fork_warn, NULL, NULL); + if (ret) { + ucs_warn("registering fork() warning failed: %m"); + } +} + +static void uct_ib_md_release_device_config(uct_ib_md_t *md) +{ + unsigned i; + + for (i = 0; i < md->custom_devices.count; ++i) { + free((char*)md->custom_devices.specs[i].name); + } + ucs_free(md->custom_devices.specs); +} + +static ucs_status_t UCS_V_UNUSED +uct_ib_md_global_odp_init(uct_ib_md_t *md, uct_mem_h *memh_p) +{ + uct_ib_verbs_mem_t *global_odp; + ucs_status_t status; + + global_odp = (uct_ib_verbs_mem_t *)uct_ib_memh_alloc(md); + if (global_odp == NULL) { + return UCS_ERR_NO_MEMORY; + } + + status = uct_ib_reg_mr(md->pd, 0, UINT64_MAX, + UCT_IB_MEM_ACCESS_FLAGS | IBV_ACCESS_ON_DEMAND, + &global_odp->mr); + if (status != UCS_OK) { + ucs_debug("%s: failed to register global mr: %m", + uct_ib_device_name(&md->dev)); + goto err; + } + + uct_ib_memh_init_from_mr(&global_odp->super, global_odp->mr); + global_odp->super.flags = UCT_IB_MEM_FLAG_ODP; + *memh_p = global_odp; + return UCS_OK; + +err: + uct_ib_memh_free(&global_odp->super); + return status; +} + +static ucs_status_t +uct_ib_md_parse_reg_methods(uct_ib_md_t *md, uct_md_attr_t *md_attr, + const uct_ib_md_config_t *md_config) +{ + ucs_rcache_params_t rcache_params; + ucs_status_t status; + int i; + + for (i = 0; i < md_config->reg_methods.count; ++i) { + if (!strcasecmp(md_config->reg_methods.rmtd[i], "rcache")) { + rcache_params.region_struct_size = sizeof(ucs_rcache_region_t) + + md->ops->memh_struct_size; + rcache_params.alignment = md_config->rcache.alignment; + rcache_params.max_alignment = ucs_get_page_size(); + rcache_params.ucm_events = UCM_EVENT_VM_UNMAPPED; + if (md_attr->cap.reg_mem_types & ~UCS_BIT(UCS_MEMORY_TYPE_HOST)) { + rcache_params.ucm_events |= UCM_EVENT_MEM_TYPE_FREE; + } + rcache_params.ucm_event_priority = md_config->rcache.event_prio; + rcache_params.context = md; + rcache_params.ops = &uct_ib_rcache_ops; + + status = ucs_rcache_create(&rcache_params, uct_ib_device_name(&md->dev), + UCS_STATS_RVAL(md->stats), &md->rcache); + if (status != UCS_OK) { + ucs_debug("%s: failed to create registration cache: %s", + uct_ib_device_name(&md->dev), + ucs_status_string(status)); + continue; + } + + md->super.ops = &uct_ib_md_rcache_ops; + md->reg_cost.overhead = md_config->rcache.overhead; + md->reg_cost.growth = 0; /* It's close enough to 0 */ + ucs_debug("%s: using registration cache", + uct_ib_device_name(&md->dev)); + return UCS_OK; +#if HAVE_ODP_IMPLICIT + } else if (!strcasecmp(md_config->reg_methods.rmtd[i], "odp")) { + if (!(md->dev.flags & UCT_IB_DEVICE_FLAG_ODP_IMPLICIT)) { + ucs_debug("%s: on-demand-paging with global memory region is " + "not supported", uct_ib_device_name(&md->dev)); + continue; + } + + status = uct_ib_md_global_odp_init(md, &md->global_odp); + if (status != UCS_OK) { + continue; + } + + md->super.ops = &uct_ib_md_global_odp_ops; + md->reg_cost.overhead = 10e-9; + md->reg_cost.growth = 0; + ucs_debug("%s: using odp global key", uct_ib_device_name(&md->dev)); + return UCS_OK; +#endif + } else if (!strcmp(md_config->reg_methods.rmtd[i], "direct")) { + md->super.ops = &uct_ib_md_ops; + md->reg_cost = md_config->uc_reg_cost; + ucs_debug("%s: using direct registration", + uct_ib_device_name(&md->dev)); + return UCS_OK; + } + } + + return UCS_ERR_INVALID_PARAM; +} + +static ucs_status_t +uct_ib_md_parse_device_config(uct_ib_md_t *md, const uct_ib_md_config_t *md_config) +{ + uct_ib_device_spec_t *spec; + ucs_status_t status; + char *flags_str, *p; + unsigned i, count; + int nfields; + + count = md->custom_devices.count = md_config->custom_devices.count; + if (count == 0) { + md->custom_devices.specs = NULL; + md->custom_devices.count = 0; + return UCS_OK; + } + + md->custom_devices.specs = ucs_calloc(count, sizeof(*md->custom_devices.specs), + "ib_custom_devices"); + if (md->custom_devices.specs == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + for (i = 0; i < count; ++i) { + spec = &md->custom_devices.specs[i]; + nfields = sscanf(md_config->custom_devices.spec[i], + "%hi:%hi:%m[^:]:%m[^:]:%hhu", + &spec->pci_id.vendor, &spec->pci_id.device, &spec->name, + &flags_str, &spec->priority); + if (nfields < 2) { + ucs_error("failed to parse device config '%s' (parsed: %d/%d)", + md_config->custom_devices.spec[i], nfields, 5); + status = UCS_ERR_INVALID_PARAM; + goto err_free; + } + + if (nfields >= 4) { + for (p = flags_str; *p != 0; ++p) { + if (*p == '4') { + spec->flags |= UCT_IB_DEVICE_FLAG_MLX4_PRM; + } else if (*p == '5') { + spec->flags |= UCT_IB_DEVICE_FLAG_MLX5_PRM; + } else if (*p == 'd') { + spec->flags |= UCT_IB_DEVICE_FLAG_DC_V1; + } else if (*p == 'D') { + spec->flags |= UCT_IB_DEVICE_FLAG_DC_V2; + } else if (*p == 'a') { + spec->flags |= UCT_IB_DEVICE_FLAG_AV; + } else { + ucs_error("invalid device flag: '%c'", *p); + free(flags_str); + status = UCS_ERR_INVALID_PARAM; + goto err_free; + } + } + free(flags_str); + } + + ucs_trace("added device '%s' vendor_id 0x%x device_id %d flags %c%c prio %d", + spec->name, spec->pci_id.vendor, spec->pci_id.device, + (spec->flags & UCT_IB_DEVICE_FLAG_MLX4_PRM) ? '4' : '-', + (spec->flags & UCT_IB_DEVICE_FLAG_MLX5_PRM) ? '5' : '-', + spec->priority); + } + + return UCS_OK; + +err_free: + uct_ib_md_release_device_config(md); +err: + return status; +} + +static void uct_ib_md_release_reg_method(uct_ib_md_t *md) +{ + if (md->rcache != NULL) { + ucs_rcache_destroy(md->rcache); + } + if (md->global_odp != NULL) { + uct_ib_mem_dereg(&md->super, md->global_odp); + } +} + +static ucs_status_t +uct_ib_md_parse_subnet_prefix(const char *subnet_prefix_str, + uint64_t *subnet_prefix) +{ + uint16_t pfx[4] = {0}; + uint64_t pfx64 = 0; + int res, i; + + res = sscanf(subnet_prefix_str, "%hx:%hx:%hx:%hx", + &pfx[0], &pfx[1], &pfx[2], &pfx[3]); + if (res != 4) { + ucs_error("subnet filter '%s' is invalid", subnet_prefix_str); + return UCS_ERR_INVALID_PARAM; + } + + for (i = 0; i < 4; i++) { + pfx64 = pfx[i] + (pfx64 << 16); + } + + *subnet_prefix = htobe64(pfx64); + return UCS_OK; +} + +static double uct_ib_md_read_pci_bw(struct ibv_device *ib_device) +{ + const char *pci_width_file_name = "current_link_width"; + const char *pci_speed_file_name = "current_link_speed"; + char pci_width_str[16]; + char pci_speed_str[16]; + char gts[16]; + const uct_ib_md_pci_info_t *p; + double bw, effective_bw; + unsigned width; + ssize_t len; + size_t i; + + len = ucs_read_file(pci_width_str, sizeof(pci_width_str) - 1, 1, + UCT_IB_DEVICE_SYSFS_FMT, ib_device->name, + pci_width_file_name); + if (len < 1) { + ucs_debug("failed to read file: " UCT_IB_DEVICE_SYSFS_FMT, + ib_device->name, pci_width_file_name); + return DBL_MAX; /* failed to read file */ + } + pci_width_str[len] = '\0'; + + len = ucs_read_file(pci_speed_str, sizeof(pci_speed_str) - 1, 1, + UCT_IB_DEVICE_SYSFS_FMT, ib_device->name, + pci_speed_file_name); + if (len < 1) { + ucs_debug("failed to read file: " UCT_IB_DEVICE_SYSFS_FMT, + ib_device->name, pci_speed_file_name); + return DBL_MAX; /* failed to read file */ + } + pci_speed_str[len] = '\0'; + + if (sscanf(pci_width_str, "%u", &width) < 1) { + ucs_debug("incorrect format of %s file: expected: , actual: %s\n", + pci_width_file_name, pci_width_str); + return DBL_MAX; + } + + if ((sscanf(pci_speed_str, "%lf%s", &bw, gts) < 2) || + strcasecmp("GT/s", ucs_strtrim(gts))) { + ucs_debug("incorrect format of %s file: expected: GT/s, actual: %s\n", + pci_speed_file_name, pci_speed_str); + return DBL_MAX; + } + + bw *= UCS_GBYTE / 8; /* gigabit -> gigabyte */ + + for (i = 0; i < ucs_static_array_size(uct_ib_md_pci_info); i++) { + if (bw < (uct_ib_md_pci_info[i].bw * 1.2)) { /* use 1.2 multiplex to avoid round issues */ + p = &uct_ib_md_pci_info[i]; /* use pointer to make equation shorter */ + /* coverity[overflow] */ + effective_bw = bw * width * + (p->payload * p->nack) / + (((p->payload + p->overhead) * p->nack) + p->ctrl) * + p->encoding / p->decoding; + ucs_trace("%s: pcie %ux %s, effective throughput %.3lfMB/s (%.3lfGb/s)", + ib_device->name, width, p->name, + (effective_bw / UCS_MBYTE), (effective_bw * 8 / UCS_GBYTE)); + return effective_bw; + } + } + + return DBL_MAX; +} + +static double uct_ib_md_pci_bw(const uct_ib_md_config_t *md_config, + struct ibv_device *ib_device) +{ + unsigned i; + + for (i = 0; i < md_config->pci_bw.count; i++) { + if (!strcmp(ib_device->name, md_config->pci_bw.device[i].name)) { + if (md_config->pci_bw.device[i].bw == UCS_BANDWIDTH_AUTO) { + break; /* read data from system */ + } + return md_config->pci_bw.device[i].bw; + } + } + + return uct_ib_md_read_pci_bw(ib_device); +} + +ucs_status_t uct_ib_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *uct_md_config, uct_md_h *md_p) +{ + const uct_ib_md_config_t *md_config = ucs_derived_of(uct_md_config, uct_ib_md_config_t); + ucs_status_t status = UCS_ERR_UNSUPPORTED; + uct_ib_md_t *md = NULL; + struct ibv_device **ib_device_list, *ib_device; + uct_ib_md_ops_entry_t *md_ops_entry; + int i, num_devices, ret; + + ucs_trace("opening IB device %s", md_name); + +#if !HAVE_DEVX + if (md_config->devx == UCS_YES) { + ucs_error("DEVX requested but not supported"); + status = UCS_ERR_NO_DEVICE; + goto out; + } +#endif + + /* Get device list from driver */ + ib_device_list = ibv_get_device_list(&num_devices); + if (ib_device_list == NULL) { + ucs_debug("Failed to get IB device list, assuming no devices are present"); + status = UCS_ERR_NO_DEVICE; + goto out; + } + + ib_device = NULL; + for (i = 0; i < num_devices; ++i) { + if (!strcmp(ibv_get_device_name(ib_device_list[i]), md_name)) { + ib_device = ib_device_list[i]; + break; + } + } + + if (ib_device == NULL) { + ucs_debug("IB device %s not found", md_name); + status = UCS_ERR_NO_DEVICE; + goto out_free_dev_list; + } + + if (md_config->fork_init != UCS_NO) { + ret = ibv_fork_init(); + if (ret) { + if (md_config->fork_init == UCS_YES) { + ucs_error("ibv_fork_init() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto out_free_dev_list; + } + ucs_debug("ibv_fork_init() failed: %m, continuing, but fork may be unsafe."); + uct_ib_fork_warn_enable(); + } + } else { + uct_ib_fork_warn_enable(); + } + + ucs_list_for_each(md_ops_entry, &uct_ib_md_ops_list, list) { + status = md_ops_entry->ops->open(ib_device, md_config, &md); + if (status == UCS_OK) { + ucs_debug("%s: md open by '%s' is successful", md_name, + md_ops_entry->name); + md->ops = md_ops_entry->ops; + break; + } else if (status != UCS_ERR_UNSUPPORTED) { + goto out_free_dev_list; + } + ucs_debug("%s: md open by '%s' failed, trying next", md_name, + md_ops_entry->name); + } + + if (status != UCS_OK) { + ucs_assert(status == UCS_ERR_UNSUPPORTED); + ucs_debug("Unsupported IB device %s", md_name); + goto out_free_dev_list; + } + + /* cppcheck-suppress autoVariables */ + *md_p = &md->super; + status = UCS_OK; + +out_free_dev_list: + ibv_free_device_list(ib_device_list); +out: + return status; +} + +ucs_status_t uct_ib_md_open_common(uct_ib_md_t *md, + struct ibv_device *ib_device, + const uct_ib_md_config_t *md_config) +{ + uct_md_attr_t md_attr; + ucs_status_t status; + + md->super.ops = &uct_ib_md_ops; + md->super.component = &uct_ib_component; + + if (md->config.odp.max_size == UCS_MEMUNITS_AUTO) { + md->config.odp.max_size = uct_ib_device_odp_max_size(&md->dev); + } + + /* Create statistics */ + status = UCS_STATS_NODE_ALLOC(&md->stats, &uct_ib_md_stats_class, + ucs_stats_get_root(), + "%s-%p", ibv_get_device_name(ib_device), md); + if (status != UCS_OK) { + goto err; + } + + status = uct_ib_device_init(&md->dev, ib_device, md_config->async_events + UCS_STATS_ARG(md->stats)); + if (status != UCS_OK) { + goto err_release_stats; + } + +#if HAVE_DECL_IBV_EXP_SETENV + ibv_exp_setenv(md->dev.ibv_context, "MLX_QP_ALLOC_TYPE", "ANON", 0); + ibv_exp_setenv(md->dev.ibv_context, "MLX_CQ_ALLOC_TYPE", "ANON", 0); +#endif + + if (strlen(md_config->subnet_prefix) > 0) { + status = uct_ib_md_parse_subnet_prefix(md_config->subnet_prefix, + &md->subnet_filter); + + if (status != UCS_OK) { + goto err_cleanup_device; + } + + md->check_subnet_filter = 1; + } + + /* Allocate memory domain */ + md->pd = ibv_alloc_pd(md->dev.ibv_context); + if (md->pd == NULL) { + ucs_error("ibv_alloc_pd() failed: %m"); + status = UCS_ERR_NO_MEMORY; + goto err_cleanup_device; + } + + status = uct_md_query(&md->super, &md_attr); + if (status != UCS_OK) { + goto err_dealloc_pd; + } + + status = uct_ib_md_parse_reg_methods(md, &md_attr, md_config); + if (status != UCS_OK) { + goto err_dealloc_pd; + } + + md->dev.max_zcopy_log_sge = INT_MAX; + if (md_attr.cap.reg_mem_types & ~UCS_BIT(UCS_MEMORY_TYPE_HOST)) { + md->dev.max_zcopy_log_sge = 1; + } + + md->pci_bw = uct_ib_md_pci_bw(md_config, ib_device); + return UCS_OK; + +err_dealloc_pd: + ibv_dealloc_pd(md->pd); +err_cleanup_device: + uct_ib_device_cleanup(&md->dev); +err_release_stats: + UCS_STATS_NODE_FREE(md->stats); +err: + return status; +} + +void uct_ib_md_close(uct_md_h uct_md) +{ + uct_ib_md_t *md = ucs_derived_of(uct_md, uct_ib_md_t); + + md->ops->cleanup(md); + uct_ib_md_release_device_config(md); + uct_ib_md_release_reg_method(md); + uct_ib_device_cleanup_ah_cached(&md->dev); + ibv_dealloc_pd(md->pd); + uct_ib_device_cleanup(&md->dev); + ibv_close_device(md->dev.ibv_context); + UCS_STATS_NODE_FREE(md->stats); + ucs_free(md); +} + +static uct_ib_md_ops_t uct_ib_verbs_md_ops; + +static ucs_status_t uct_ib_verbs_md_open(struct ibv_device *ibv_device, + const uct_ib_md_config_t *md_config, + uct_ib_md_t **p_md) +{ + uct_ib_device_t *dev; + ucs_status_t status; + uct_ib_md_t *md; + + md = ucs_calloc(1, sizeof(*md), "ib_md"); + if (md == NULL) { + return UCS_ERR_NO_MEMORY; + } + + /* Open verbs context */ + dev = &md->dev; + dev->ibv_context = ibv_open_device(ibv_device); + if (dev->ibv_context == NULL) { + ucs_error("ibv_open_device(%s) failed: %m", ibv_get_device_name(ibv_device)); + status = UCS_ERR_IO_ERROR; + goto err; + } + + md->config = md_config->ext; + + status = uct_ib_device_query(dev, ibv_device); + if (status != UCS_OK) { + goto err_free_context; + } + + if (UCT_IB_HAVE_ODP_IMPLICIT(&dev->dev_attr)) { + md->dev.flags |= UCT_IB_DEVICE_FLAG_ODP_IMPLICIT; + } + + if (IBV_EXP_HAVE_ATOMIC_HCA(&dev->dev_attr)) { + dev->atomic_arg_sizes = sizeof(uint64_t); + } + + md->ops = &uct_ib_verbs_md_ops; + status = uct_ib_md_parse_device_config(md, md_config); + if (status != UCS_OK) { + goto err_free_context; + } + + status = uct_ib_md_open_common(md, ibv_device, md_config); + if (status != UCS_OK) { + goto err_dev_cfg; + } + + md->dev.flags = uct_ib_device_spec(&md->dev)->flags; + *p_md = md; + return UCS_OK; + +err_dev_cfg: + uct_ib_md_release_device_config(md); +err_free_context: + ibv_close_device(dev->ibv_context); +err: + ucs_free(md); + return status; +} + +static uct_ib_md_ops_t uct_ib_verbs_md_ops = { + .open = uct_ib_verbs_md_open, + .cleanup = (uct_ib_md_cleanup_func_t)ucs_empty_function, + .memh_struct_size = sizeof(uct_ib_verbs_mem_t), + .reg_key = uct_ib_verbs_reg_key, + .dereg_key = uct_ib_verbs_dereg_key, + .reg_atomic_key = (uct_ib_md_reg_atomic_key_func_t)ucs_empty_function_return_unsupported, + .dereg_atomic_key = (uct_ib_md_dereg_atomic_key_func_t)ucs_empty_function_return_unsupported, + .reg_multithreaded = (uct_ib_md_reg_multithreaded_func_t)ucs_empty_function_return_unsupported, + .dereg_multithreaded = (uct_ib_md_dereg_multithreaded_func_t)ucs_empty_function_return_unsupported, + .mem_prefetch = (uct_ib_md_mem_prefetch_func_t)ucs_empty_function_return_success, +}; + +UCT_IB_MD_OPS(uct_ib_verbs_md_ops, 0); + +uct_component_t uct_ib_component = { + .query_md_resources = uct_ib_query_md_resources, + .md_open = uct_ib_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_ib_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = ucs_empty_function_return_success, + .name = "ib", + .md_config = { + .name = "IB memory domain", + .prefix = "IB_", + .table = uct_ib_md_config_table, + .size = sizeof(uct_ib_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_ib_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_ib_component); diff --git a/src/uct/ib/base/ib_md.h b/src/uct/ib/base/ib_md.h new file mode 100644 index 0000000..fdb9409 --- /dev/null +++ b/src/uct/ib/base/ib_md.h @@ -0,0 +1,406 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * Copyright (C) The University of Tennessee and The University + * of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCT_IB_MD_H_ +#define UCT_IB_MD_H_ + +#include "ib_device.h" + +#include +#include +#include +#include + +#define UCT_IB_MD_MAX_MR_SIZE 0x80000000UL +#define UCT_IB_MD_PACKED_RKEY_SIZE sizeof(uint64_t) + +#define UCT_IB_MD_DEFAULT_GID_INDEX 0 /**< The gid index used by default for an IB/RoCE port */ + +#define UCT_IB_MEM_ACCESS_FLAGS (IBV_ACCESS_LOCAL_WRITE | \ + IBV_ACCESS_REMOTE_WRITE | \ + IBV_ACCESS_REMOTE_READ | \ + IBV_ACCESS_REMOTE_ATOMIC) + +#define UCT_IB_MEM_DEREG 0 + +/** + * IB MD statistics counters + */ +enum { + UCT_IB_MD_STAT_MEM_ALLOC, + UCT_IB_MD_STAT_MEM_REG, + UCT_IB_MD_STAT_LAST +}; + + +enum { + UCT_IB_MEM_FLAG_ODP = UCS_BIT(0), /**< The memory region has on + demand paging enabled */ + UCT_IB_MEM_FLAG_ATOMIC_MR = UCS_BIT(1), /**< The memory region has UMR + for the atomic access */ + UCT_IB_MEM_ACCESS_REMOTE_ATOMIC = UCS_BIT(2), /**< An atomic access was + requested for the memory + region */ + UCT_IB_MEM_MULTITHREADED = UCS_BIT(3), /**< The memory region registration + handled by chunks in parallel + threads */ +}; + +enum { + UCT_IB_DEVX_OBJ_RCQP, + UCT_IB_DEVX_OBJ_RCSRQ, + UCT_IB_DEVX_OBJ_DCT, + UCT_IB_DEVX_OBJ_DCSRQ +}; + +typedef struct uct_ib_md_ext_config { + int eth_pause; /**< Whether or not Pause Frame is + enabled on the Ethernet network */ + int prefer_nearest_device; /**< Give priority for near + device */ + int enable_indirect_atomic; /** Enable indirect atomic */ + int enable_gpudirect_rdma; /** Enable GPUDirect RDMA */ +#if HAVE_EXP_UMR + unsigned max_inline_klm_list; /* Maximal length of inline KLM list */ +#endif + + struct { + ucs_numa_policy_t numa_policy; /**< NUMA policy flags for ODP */ + int prefetch; /**< Auto-prefetch non-blocking memory + registrations / allocations */ + size_t max_size; /**< Maximal memory region size for ODP */ + } odp; + + size_t gid_index; /**< IB GID index to use */ + + size_t min_mt_reg; /**< Multi-threaded registration threshold */ + size_t mt_reg_chunk; /**< Multi-threaded registration chunk */ + int mt_reg_bind; /**< Multi-threaded registration bind to core */ +} uct_ib_md_ext_config_t; + + +typedef struct uct_ib_mem { + uint32_t lkey; + uint32_t rkey; + uint32_t atomic_rkey; + uint32_t flags; +} uct_ib_mem_t; + +/** + * IB memory domain. + */ +typedef struct uct_ib_md { + uct_md_t super; + ucs_rcache_t *rcache; /**< Registration cache (can be NULL) */ + uct_mem_h global_odp;/**< Implicit ODP memory handle */ + struct ibv_pd *pd; /**< IB memory domain */ + uct_ib_device_t dev; /**< IB device */ + uct_linear_growth_t reg_cost; /**< Memory registration cost */ + struct uct_ib_md_ops *ops; + UCS_STATS_NODE_DECLARE(stats) + uct_ib_md_ext_config_t config; /* IB external configuration */ + struct { + uct_ib_device_spec_t *specs; /* Custom device specifications */ + unsigned count; /* Number of custom devices */ + } custom_devices; + int check_subnet_filter; + uint64_t subnet_filter; + double pci_bw; +} uct_ib_md_t; + + +/** + * IB memory domain configuration. + */ +typedef struct uct_ib_md_config { + uct_md_config_t super; + + /** List of registration methods in order of preference */ + UCS_CONFIG_STRING_ARRAY_FIELD(rmtd) reg_methods; + + uct_md_rcache_config_t rcache; /**< Registration cache config */ + uct_linear_growth_t uc_reg_cost; /**< Memory registration cost estimation + without using the cache */ + unsigned fork_init; /**< Use ibv_fork_init() */ + int async_events; /**< Whether async events should be delivered */ + + uct_ib_md_ext_config_t ext; /**< External configuration */ + + UCS_CONFIG_STRING_ARRAY_FIELD(spec) custom_devices; /**< Custom device specifications */ + + char *subnet_prefix; /**< Filter of subnet_prefix for IB ports */ + + UCS_CONFIG_ARRAY_FIELD(ucs_config_bw_spec_t, device) pci_bw; /**< List of PCI BW for devices */ + + unsigned devx; /**< DEVX support */ + unsigned devx_objs; /**< Objects to be created by DevX */ +} uct_ib_md_config_t; + +/** + * Memory domain constructor. + * + * @param [in] ibv_device IB device. + * + * @param [in] md_config Memory domain configuration parameters. + * + * @param [out] md_p Handle to memory domain. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_ib_md_open_func_t)(struct ibv_device *ibv_device, + const uct_ib_md_config_t *md_config, + struct uct_ib_md **md_p); + +/** + * Memory domain destructor. + * + * @param [in] md Memory domain. + */ +typedef void (*uct_ib_md_cleanup_func_t)(struct uct_ib_md *); + +/** + * Memory domain method to register memory area. + * + * @param [in] md Memory domain. + * + * @param [in] address Memory area start address. + * + * @param [in] length Memory area length. + * + * @param [in] access IB verbs registration access flags + * + * @param [in] memh Memory region handle. + * Method should initialize lkey & rkey. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_ib_md_reg_key_func_t)(struct uct_ib_md *md, + void *address, size_t length, + uint64_t access, + uct_ib_mem_t *memh); + +/** + * Memory domain method to deregister memory area. + * + * @param [in] md Memory domain. + * + * @param [in] memh Memory region handle registered with + * uct_ib_md_reg_key_func_t. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_ib_md_dereg_key_func_t)(struct uct_ib_md *md, + uct_ib_mem_t *memh); + +/** + * Memory domain method to register memory area optimized for atomic ops. + * + * @param [in] md Memory domain. + * + * @param [in] memh Memory region handle registered for regular ops. + * Method should initialize atomic_rkey + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_ib_md_reg_atomic_key_func_t)(struct uct_ib_md *md, + uct_ib_mem_t *memh); + +/** + * Memory domain method to release resources registered for atomic ops. + * + * @param [in] md Memory domain. + * + * @param [in] memh Memory region handle registered with + * uct_ib_md_reg_atomic_key_func_t. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_ib_md_dereg_atomic_key_func_t)(struct uct_ib_md *md, + uct_ib_mem_t *memh); + +/** + * Memory domain method to register memory area using multiple threads. + * + * @param [in] md Memory domain. + * + * @param [in] address Memory area start address. + * + * @param [in] length Memory area length. + * + * @param [in] access IB verbs registration access flags + * + * @param [in] memh Memory region handle. + * Method should initialize lkey & rkey. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_ib_md_reg_multithreaded_func_t)(uct_ib_md_t *md, + void *address, + size_t length, + uint64_t access, + uct_ib_mem_t *memh); + +/** + * Memory domain method to deregister memory area. + * + * @param [in] md Memory domain. + * + * @param [in] memh Memory region handle registered with + * uct_ib_md_reg_key_func_t. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_ib_md_dereg_multithreaded_func_t)(uct_ib_md_t *md, + uct_ib_mem_t *memh); + +/** + * Memory domain method to prefetch physical memory for virtual memory area. + * + * @param [in] md Memory domain. + * + * @param [in] memh Memory region handle. + * + * @param [in] address Memory area start address. + * + * @param [in] length Memory area length. + * + * @return UCS_OK on success or error code in case of failure. + */ +typedef ucs_status_t (*uct_ib_md_mem_prefetch_func_t)(uct_ib_md_t *md, + uct_ib_mem_t *memh, + void *addr, size_t length); + +typedef struct uct_ib_md_ops { + uct_ib_md_open_func_t open; + uct_ib_md_cleanup_func_t cleanup; + + size_t memh_struct_size; + + uct_ib_md_reg_key_func_t reg_key; + uct_ib_md_dereg_key_func_t dereg_key; + uct_ib_md_reg_atomic_key_func_t reg_atomic_key; + uct_ib_md_dereg_atomic_key_func_t dereg_atomic_key; + uct_ib_md_reg_multithreaded_func_t reg_multithreaded; + uct_ib_md_dereg_multithreaded_func_t dereg_multithreaded; + uct_ib_md_mem_prefetch_func_t mem_prefetch; +} uct_ib_md_ops_t; + + +/** + * IB memory region in the registration cache. + */ +typedef struct uct_ib_rcache_region { + ucs_rcache_region_t super; + uct_ib_mem_t memh; /**< mr exposed to the user as the memh */ +} uct_ib_rcache_region_t; + + +/** + * IB memory domain constructor. Should have following logic: + * - probe provided IB device, may return UCS_ERR_UNSUPPORTED + * - allocate MD and IB context + * - setup atomic MR ops + * - determine device attributes and flags + */ +typedef struct uct_ib_md_ops_entry { + ucs_list_link_t list; + const char *name; + uct_ib_md_ops_t *ops; + int priority; +} uct_ib_md_ops_entry_t; + +#define UCT_IB_MD_OPS(_md_ops, _priority) \ + UCS_STATIC_INIT { \ + extern ucs_list_link_t uct_ib_md_ops_list; \ + static uct_ib_md_ops_entry_t *p, entry = { \ + .name = UCS_PP_MAKE_STRING(_md_ops), \ + .ops = &_md_ops, \ + .priority = _priority, \ + }; \ + ucs_list_for_each(p, &uct_ib_md_ops_list, list) { \ + if (p->priority < _priority) { \ + ucs_list_insert_before(&p->list, &entry.list); \ + return; \ + } \ + } \ + ucs_list_add_tail(&uct_ib_md_ops_list, &entry.list); \ + } + +extern uct_component_t uct_ib_component; + + +static inline uint32_t uct_ib_md_direct_rkey(uct_rkey_t uct_rkey) +{ + return (uint32_t)uct_rkey; +} + + +static uint32_t uct_ib_md_indirect_rkey(uct_rkey_t uct_rkey) +{ + return uct_rkey >> 32; +} + + +static UCS_F_ALWAYS_INLINE void +uct_ib_md_pack_rkey(uint32_t rkey, uint32_t atomic_rkey, void *rkey_buffer) +{ + uint64_t *rkey_p = (uint64_t*)rkey_buffer; + *rkey_p = (((uint64_t)atomic_rkey) << 32) | rkey; + ucs_trace("packed rkey: direct 0x%x indirect 0x%x", rkey, atomic_rkey); +} + + +/** + * rkey is packed/unpacked is such a way that: + * low 32 bits contain a direct key + * high 32 bits contain either UCT_IB_INVALID_RKEY or a valid indirect key. + */ +static inline uint32_t uct_ib_resolve_atomic_rkey(uct_rkey_t uct_rkey, + uint16_t atomic_mr_offset, + uint64_t *remote_addr_p) +{ + uint32_t atomic_rkey = uct_ib_md_indirect_rkey(uct_rkey); + if (atomic_rkey == UCT_IB_INVALID_RKEY) { + return uct_ib_md_direct_rkey(uct_rkey); + } else { + *remote_addr_p += atomic_mr_offset; + return atomic_rkey; + } +} + + +static inline uint16_t uct_ib_md_atomic_offset(uint8_t atomic_mr_id) +{ + return 8 * atomic_mr_id; +} + +static inline void uct_ib_memh_init_from_mr(uct_ib_mem_t *memh, struct ibv_mr *mr) +{ + memh->lkey = mr->lkey; + memh->rkey = mr->rkey; +} + +ucs_status_t uct_ib_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *uct_md_config, uct_md_h *md_p); + +ucs_status_t uct_ib_md_open_common(uct_ib_md_t *md, + struct ibv_device *ib_device, + const uct_ib_md_config_t *md_config); + +void uct_ib_md_close(uct_md_h uct_md); + +ucs_status_t uct_ib_reg_mr(struct ibv_pd *pd, void *addr, size_t length, + uint64_t access, struct ibv_mr **mr_p); +ucs_status_t uct_ib_dereg_mr(struct ibv_mr *mr); +ucs_status_t uct_ib_dereg_mrs(struct ibv_mr **mrs, size_t mr_num); + +ucs_status_t +uct_ib_md_handle_mr_list_multithreaded(uct_ib_md_t *md, void *address, + size_t length, uint64_t access, + size_t chunk, struct ibv_mr **mrs); +#endif diff --git a/src/uct/ib/base/ib_verbs.h b/src/uct/ib/base/ib_verbs.h new file mode 100644 index 0000000..63b9ce1 --- /dev/null +++ b/src/uct/ib/base/ib_verbs.h @@ -0,0 +1,304 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_VERBS_H +#define UCT_IB_VERBS_H + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#ifdef HAVE_VERBS_EXP_H +#include +#endif + +#include + +#include +#include + +#ifndef HAVE_VERBS_EXP_H +# define IBV_EXP_SEND_INLINE IBV_SEND_INLINE +# define IBV_EXP_SEND_SIGNALED IBV_SEND_SIGNALED +# define IBV_EXP_SEND_SOLICITED IBV_SEND_SOLICITED +# define IBV_EXP_SEND_FENCE IBV_SEND_FENCE +# define IBV_EXP_QP_STATE IBV_QP_STATE +# define IBV_EXP_QP_PKEY_INDEX IBV_QP_PKEY_INDEX +# define IBV_EXP_QP_PORT IBV_QP_PORT +# define IBV_EXP_QP_PATH_MTU IBV_QP_PATH_MTU +# define IBV_EXP_QP_TIMEOUT IBV_QP_TIMEOUT +# define IBV_EXP_QP_AV IBV_QP_AV +# define IBV_EXP_QP_RETRY_CNT IBV_QP_RETRY_CNT +# define IBV_EXP_QP_RNR_RETRY IBV_QP_RNR_RETRY +# define IBV_EXP_QP_MAX_QP_RD_ATOMIC IBV_QP_MAX_QP_RD_ATOMIC +# define IBV_EXP_ACCESS_REMOTE_WRITE IBV_ACCESS_REMOTE_WRITE +# define IBV_EXP_ACCESS_REMOTE_READ IBV_ACCESS_REMOTE_READ +# define IBV_EXP_ACCESS_REMOTE_ATOMIC IBV_ACCESS_REMOTE_ATOMIC +# define ibv_exp_reg_shared_mr ibv_reg_shared_mr_ex +# define ibv_exp_reg_shared_mr_in ibv_reg_shared_mr_in +# define ibv_exp_send_wr ibv_send_wr +# define exp_opcode opcode +# define ibv_exp_post_send ibv_post_send +# define IBV_EXP_WR_NOP IBV_WR_NOP +# define IBV_EXP_WR_SEND IBV_WR_SEND +# define IBV_EXP_WR_RDMA_WRITE IBV_WR_RDMA_WRITE +# define IBV_EXP_WR_RDMA_READ IBV_WR_RDMA_READ +# define IBV_EXP_WR_ATOMIC_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD +# define IBV_EXP_WR_ATOMIC_CMP_AND_SWP IBV_WR_ATOMIC_CMP_AND_SWP +# define ibv_exp_qp_init_attr ibv_qp_init_attr +# define ibv_exp_port_attr ibv_port_attr +# define ibv_exp_query_port ibv_query_port +# define exp_device_cap_flags device_cap_flags +# define ibv_exp_create_qp ibv_create_qp + +# define IBV_SHARED_MR_ACCESS_FLAGS(_shared_mr) ((_shared_mr)->exp_access) +# define IBV_EXP_DEVICE_ATTR_SET_COMP_MASK(_attr) + +#else +# define IBV_SHARED_MR_ACCESS_FLAGS(_shared_mr) ((_shared_mr)->access) +#endif /* HAVE_VERBS_EXP_H */ + +/* Read device properties */ +#if HAVE_DECL_IBV_EXP_QUERY_DEVICE + +# define IBV_DEV_ATTR(_dev, _attr) ((_dev)->dev_attr._attr) + +typedef struct ibv_exp_device_attr uct_ib_device_attr; + +static inline ucs_status_t uct_ib_query_device(struct ibv_context *ctx, + uct_ib_device_attr* attr) { + int ret; +#if HAVE_DECL_IBV_EXP_DEVICE_ATTR_RESERVED_2 + attr->comp_mask = 0xffffffff; + attr->comp_mask_2 = IBV_EXP_DEVICE_ATTR_RESERVED_2 - 1; +#else + attr->comp_mask = IBV_EXP_DEVICE_ATTR_RESERVED - 1; +#endif + ret = ibv_exp_query_device(ctx, attr); + if (ret != 0) { + ucs_error("ibv_exp_query_device(%s) returned %d: %m", + ibv_get_device_name(ctx->device), ret); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} +#elif HAVE_DECL_IBV_QUERY_DEVICE_EX + +# define IBV_DEV_ATTR(_dev, _attr) ((_dev)->dev_attr.orig_attr._attr) + +typedef struct ibv_device_attr_ex uct_ib_device_attr; + +static inline ucs_status_t uct_ib_query_device(struct ibv_context *ctx, + uct_ib_device_attr* attr) { + int ret; + + attr->comp_mask = 0; + ret = ibv_query_device_ex(ctx, NULL, attr); + if (ret != 0) { + ucs_error("ibv_query_device_ex(%s) returned %d: %m", + ibv_get_device_name(ctx->device), ret); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +#else + +# define IBV_DEV_ATTR(_dev, _attr) ((_dev)->dev_attr._attr) + +typedef struct ibv_device_attr uct_ib_device_attr; + +static inline ucs_status_t uct_ib_query_device(struct ibv_context *ctx, + uct_ib_device_attr* attr) { + int ret; + + ret = ibv_query_device(ctx, attr); + if (ret != 0) { + ucs_error("ibv_query_device(%s) returned %d: %m", + ibv_get_device_name(ctx->device), ret); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +#endif + + +/* + * Contiguous pages support + */ +#if HAVE_DECL_IBV_EXP_DEVICE_MR_ALLOCATE +# define IBV_EXP_HAVE_CONTIG_PAGES(_attr) ((_attr)->exp_device_cap_flags & IBV_EXP_DEVICE_MR_ALLOCATE) +#else +# define IBV_EXP_HAVE_CONTIG_PAGES(_attr) 0 +#endif + + +/* + * On-demand paging support + */ +#if HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_ODP_CAPS +# define IBV_EXP_HAVE_ODP(_attr) ((_attr)->odp_caps.general_odp_caps & IBV_EXP_ODP_SUPPORT) +# define IBV_EXP_ODP_CAPS(_attr, _xport) ((_attr)->odp_caps.per_transport_caps._xport##_odp_caps) +#else +# define IBV_EXP_HAVE_ODP(_attr) 0 +# define IBV_EXP_ODP_CAPS(_attr, _xport) 0 +#endif + +#if HAVE_ODP +# if HAVE_VERBS_EXP_H +# define IBV_ACCESS_ON_DEMAND IBV_EXP_ACCESS_ON_DEMAND +# define ibv_reg_mr_func_name "ibv_exp_reg_mr" +# else +# define ibv_reg_mr_func_name "ibv_reg_mr" +# endif +#else +# define IBV_ACCESS_ON_DEMAND 0 +# define ibv_reg_mr_func_name "ibv_reg_mr" +#endif + +#if HAVE_ODP_IMPLICIT +# if HAVE_VERBS_EXP_H +# define UCT_IB_HAVE_ODP_IMPLICIT(_attr) ((_attr)->odp_caps.general_odp_caps & IBV_EXP_ODP_SUPPORT_IMPLICIT) +# else +# define UCT_IB_HAVE_ODP_IMPLICIT(_attr) ((_attr)->odp_caps.general_caps & IBV_ODP_SUPPORT_IMPLICIT) +# endif +#else +# define UCT_IB_HAVE_ODP_IMPLICIT(_attr) 0 +#endif + +#if !HAVE_DECL_IBV_EXP_PREFETCH_WRITE_ACCESS +# define IBV_EXP_PREFETCH_WRITE_ACCESS IBV_EXP_ACCESS_LOCAL_WRITE +#endif + +/* + * DC support + */ +#define IBV_DEVICE_HAS_DC(dev) (dev->flags & UCT_IB_DEVICE_FLAG_DC) + +/* + * Adaptive Routing support + */ +#if HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT +# define UCX_IB_DEV_IS_OOO_SUPPORTED(_ibdev, _transport) \ + (((_ibdev)->dev_attr.comp_mask & IBV_EXP_DEVICE_ATTR_OOO_CAPS) && \ + ((_ibdev)->dev_attr.ooo_caps._transport##_caps & IBV_EXP_OOO_SUPPORT_RW_DATA_PLACEMENT)) +#else +# define UCX_IB_DEV_IS_OOO_SUPPORTED(_ibdev, _transport) 0 +#endif + +#if !HAVE_DECL_IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT +# define IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT 0 +#endif + +#if !HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT +# define IBV_EXP_QP_OOO_RW_DATA_PLACEMENT 0 +#endif + + +/* + * Safe setenv + */ +#if HAVE_DECL_IBV_EXP_SETENV +# define ibv_exp_unsetenv(_c, _n) 0 +#else +# define ibv_exp_setenv(_c, _n, _v, _o) setenv(_n, _v, _o) +# define ibv_exp_unsetenv(_c, _n) unsetenv(_n) +#endif + + +/* + * CQ overrun support + */ +#if HAVE_DECL_IBV_EXP_CQ_IGNORE_OVERRUN +static inline int ibv_exp_cq_ignore_overrun(struct ibv_cq *cq) +{ + struct ibv_exp_cq_attr cq_attr = {}; + cq_attr.comp_mask = IBV_EXP_CQ_ATTR_CQ_CAP_FLAGS; + cq_attr.cq_cap_flags = IBV_EXP_CQ_IGNORE_OVERRUN; + return ibv_exp_modify_cq(cq, &cq_attr, IBV_EXP_CQ_CAP_FLAGS); +} +#elif HAVE_DECL_IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN +static inline int ibv_exp_cq_ignore_overrun(struct ibv_cq *cq) { return 0; } +#else +static inline int ibv_exp_cq_ignore_overrun(struct ibv_cq *cq) +{ + errno = ENOSYS; + return -1; +} +#endif /* HAVE_IBV_EXP_CQ_IGNORE_OVERRUN */ + + +/* + * Atomics support + */ +#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE +# define IBV_EXP_HAVE_ATOMIC_HCA(_attr) ((_attr)->exp_atomic_cap == IBV_EXP_ATOMIC_HCA) +# define IBV_EXP_HAVE_ATOMIC_GLOB(_attr) ((_attr)->exp_atomic_cap == IBV_EXP_ATOMIC_GLOB) +# define IBV_EXP_HAVE_ATOMIC_HCA_REPLY_BE(_attr) ((_attr)->exp_atomic_cap == IBV_EXP_ATOMIC_HCA_REPLY_BE) +#elif HAVE_DECL_IBV_QUERY_DEVICE_EX +# define IBV_EXP_HAVE_ATOMIC_HCA(_attr) ((_attr)->orig_attr.atomic_cap == IBV_ATOMIC_HCA) +# define IBV_EXP_HAVE_ATOMIC_GLOB(_attr) ((_attr)->orig_attr.atomic_cap == IBV_ATOMIC_GLOB) +# define IBV_EXP_HAVE_ATOMIC_HCA_REPLY_BE(_attr) 0 +#else +# define IBV_EXP_HAVE_ATOMIC_HCA(_attr) ((_attr)->atomic_cap == IBV_ATOMIC_HCA) +# define IBV_EXP_HAVE_ATOMIC_GLOB(_attr) ((_attr)->atomic_cap == IBV_ATOMIC_GLOB) +# define IBV_EXP_HAVE_ATOMIC_HCA_REPLY_BE(_attr) 0 +#endif /* HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE */ + + +/* Ethernet link layer */ +#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET +# define IBV_PORT_IS_LINK_LAYER_ETHERNET(_attr) ((_attr)->link_layer == IBV_LINK_LAYER_ETHERNET) +#else +# define IBV_PORT_IS_LINK_LAYER_ETHERNET(_attr) 0 +#endif + + +typedef uint8_t uct_ib_uint24_t[3]; + +static inline void uct_ib_pack_uint24(uct_ib_uint24_t buf, const uint32_t qp_num) +{ + + buf[0] = (qp_num >> 0) & 0xFF; + buf[1] = (qp_num >> 8) & 0xFF; + buf[2] = (qp_num >> 16) & 0xFF; +} + +static inline uint32_t uct_ib_unpack_uint24(const uct_ib_uint24_t buf) +{ + return buf[0] | ((uint32_t)buf[1] << 8) | ((uint32_t)buf[2] << 16); +} + +static inline void uct_ib_destroy_qp(struct ibv_qp *qp) +{ + int ret; + + ret = ibv_destroy_qp(qp); + if (ret) { + ucs_warn("ibv_destroy_qp() failed: %m"); + } +} + +static inline void uct_ib_destroy_srq(struct ibv_srq *srq) +{ + int ret; + + ret = ibv_destroy_srq(srq); + if (ret) { + ucs_warn("ibv_destroy_srq() failed: %m"); + } +} + +typedef struct uct_ib_qpnum { + uct_ib_uint24_t qp_num; +} uct_ib_qpnum_t; + +#endif /* UCT_IB_VERBS_H */ diff --git a/src/uct/ib/cm/Makefile.am b/src/uct/ib/cm/Makefile.am new file mode 100644 index 0000000..bd0c26e --- /dev/null +++ b/src/uct/ib/cm/Makefile.am @@ -0,0 +1,24 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_TL_CM + +module_LTLIBRARIES = libuct_ib_cm.la +libuct_ib_cm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(IBVERBS_CPPFLAGS) +libuct_ib_cm_la_CFLAGS = $(BASE_CFLAGS) +libuct_ib_cm_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/ib/libuct_ib.la +libuct_ib_cm_la_LDFLAGS = $(IBVERBS_LDFLAGS) $(IBCM_LIBS) -version-info $(SOVERSION) + +noinst_HEADERS = \ + cm.h + +libuct_ib_cm_la_SOURCES = \ + cm_iface.c \ + cm_ep.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/uct/ib/cm/Makefile.in b/src/uct/ib/cm/Makefile.in new file mode 100644 index 0000000..20b6f7c --- /dev/null +++ b/src/uct/ib/cm/Makefile.in @@ -0,0 +1,871 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/ib/cm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_TL_CM_TRUE@libuct_ib_cm_la_DEPENDENCIES = \ +@HAVE_TL_CM_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_TL_CM_TRUE@ $(top_builddir)/src/uct/ib/libuct_ib.la +am__libuct_ib_cm_la_SOURCES_DIST = cm_iface.c cm_ep.c +@HAVE_TL_CM_TRUE@am_libuct_ib_cm_la_OBJECTS = \ +@HAVE_TL_CM_TRUE@ libuct_ib_cm_la-cm_iface.lo \ +@HAVE_TL_CM_TRUE@ libuct_ib_cm_la-cm_ep.lo +libuct_ib_cm_la_OBJECTS = $(am_libuct_ib_cm_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_ib_cm_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libuct_ib_cm_la_CFLAGS) $(CFLAGS) $(libuct_ib_cm_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_TL_CM_TRUE@am_libuct_ib_cm_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libuct_ib_cm_la-cm_ep.Plo \ + ./$(DEPDIR)/libuct_ib_cm_la-cm_iface.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_ib_cm_la_SOURCES) +DIST_SOURCES = $(am__libuct_ib_cm_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = cm.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_TL_CM_TRUE@module_LTLIBRARIES = libuct_ib_cm.la +@HAVE_TL_CM_TRUE@libuct_ib_cm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(IBVERBS_CPPFLAGS) +@HAVE_TL_CM_TRUE@libuct_ib_cm_la_CFLAGS = $(BASE_CFLAGS) +@HAVE_TL_CM_TRUE@libuct_ib_cm_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_TL_CM_TRUE@ $(top_builddir)/src/uct/ib/libuct_ib.la + +@HAVE_TL_CM_TRUE@libuct_ib_cm_la_LDFLAGS = $(IBVERBS_LDFLAGS) $(IBCM_LIBS) -version-info $(SOVERSION) +@HAVE_TL_CM_TRUE@noinst_HEADERS = \ +@HAVE_TL_CM_TRUE@ cm.h + +@HAVE_TL_CM_TRUE@libuct_ib_cm_la_SOURCES = \ +@HAVE_TL_CM_TRUE@ cm_iface.c \ +@HAVE_TL_CM_TRUE@ cm_ep.c + + +# Automake silent rules +@HAVE_TL_CM_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_TL_CM_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_TL_CM_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_TL_CM_TRUE@AM_V_LN_1 = true +@HAVE_TL_CM_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/ib/cm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/ib/cm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libuct_ib_cm.la: $(libuct_ib_cm_la_OBJECTS) $(libuct_ib_cm_la_DEPENDENCIES) $(EXTRA_libuct_ib_cm_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_ib_cm_la_LINK) $(am_libuct_ib_cm_la_rpath) $(libuct_ib_cm_la_OBJECTS) $(libuct_ib_cm_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_ib_cm_la-cm_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_ib_cm_la-cm_iface.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libuct_ib_cm_la-cm_iface.lo: cm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_cm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_cm_la_CFLAGS) $(CFLAGS) -MT libuct_ib_cm_la-cm_iface.lo -MD -MP -MF $(DEPDIR)/libuct_ib_cm_la-cm_iface.Tpo -c -o libuct_ib_cm_la-cm_iface.lo `test -f 'cm_iface.c' || echo '$(srcdir)/'`cm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_ib_cm_la-cm_iface.Tpo $(DEPDIR)/libuct_ib_cm_la-cm_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cm_iface.c' object='libuct_ib_cm_la-cm_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_cm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_cm_la_CFLAGS) $(CFLAGS) -c -o libuct_ib_cm_la-cm_iface.lo `test -f 'cm_iface.c' || echo '$(srcdir)/'`cm_iface.c + +libuct_ib_cm_la-cm_ep.lo: cm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_cm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_cm_la_CFLAGS) $(CFLAGS) -MT libuct_ib_cm_la-cm_ep.lo -MD -MP -MF $(DEPDIR)/libuct_ib_cm_la-cm_ep.Tpo -c -o libuct_ib_cm_la-cm_ep.lo `test -f 'cm_ep.c' || echo '$(srcdir)/'`cm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_ib_cm_la-cm_ep.Tpo $(DEPDIR)/libuct_ib_cm_la-cm_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cm_ep.c' object='libuct_ib_cm_la-cm_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ib_cm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ib_cm_la_CFLAGS) $(CFLAGS) -c -o libuct_ib_cm_la-cm_ep.lo `test -f 'cm_ep.c' || echo '$(srcdir)/'`cm_ep.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_TL_CM_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libuct_ib_cm_la-cm_ep.Plo + -rm -f ./$(DEPDIR)/libuct_ib_cm_la-cm_iface.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libuct_ib_cm_la-cm_ep.Plo + -rm -f ./$(DEPDIR)/libuct_ib_cm_la-cm_iface.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_TL_CM_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_TL_CM_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_TL_CM_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_TL_CM_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_TL_CM_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_TL_CM_TRUE@ done +@HAVE_TL_CM_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_TL_CM_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_TL_CM_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/ib/cm/cm.h b/src/uct/ib/cm/cm.h new file mode 100644 index 0000000..019af5a --- /dev/null +++ b/src/uct/ib/cm/cm.h @@ -0,0 +1,144 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_CM_H_ +#define UCT_IB_CM_H_ + +#include +#include +#include +#include +#include +#include + + +/** + * IB CM configuration + */ +typedef struct uct_cm_iface_config { + uct_ib_iface_config_t super; + double timeout; + unsigned retry_count; + unsigned max_outstanding; +} uct_cm_iface_config_t; + + +/** + * Outstanding operation - can be either a send or flush request. + */ +typedef struct uct_cm_iface_op { + ucs_queue_elem_t queue; /* queue element */ + int is_id; /* 1: id field is valid. 0: comp field is valid */ + union { + struct ib_cm_id *id; /* send operation: cm id */ + uct_completion_t *comp; /* flush request: user completion */ + }; +} uct_cm_iface_op_t; + + +/** + * IB CM interface/ + */ +typedef struct uct_cm_iface { + uct_ib_iface_t super; + uint32_t service_id; /* Service ID we're listening to */ + struct ib_cm_device *cmdev; /* CM device */ + struct ib_cm_id *listen_id; /* Listening "socket" */ + ucs_queue_head_t notify_q; /* Notification queue */ + uint32_t num_outstanding; /* Number of outstanding sends */ + uint32_t num_completions; /* Number of completed sends */ + ucs_queue_head_t outstanding_q; /* Outstanding operations queue */ + uct_worker_cb_id_t slow_prog_id; /* Callback id for slowpath progress */ + + struct { + int timeout_ms; + uint32_t max_outstanding; + uint8_t retry_count; + } config; +} uct_cm_iface_t; + + +/** + * CM endpoint - container for destination address + */ +typedef struct uct_cm_ep { + uct_base_ep_t super; + uint16_t dlid; + uint32_t dest_service_id; + union ibv_gid dgid; +} uct_cm_ep_t; + + +/** + * CM network header + */ +typedef struct uct_cm_hdr { + uint8_t am_id; /* Active message ID */ + uint8_t length; /* Payload length */ +} UCS_S_PACKED uct_cm_hdr_t; + + +/** + * CM pending request private data + */ +typedef struct { + uct_pending_req_priv_queue_t base; + uct_cm_ep_t *ep; +} uct_cm_pending_req_priv_t; + + +UCS_CLASS_DECLARE_NEW_FUNC(uct_cm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_cm_ep_t, uct_ep_t); + +ucs_status_t uct_cm_ep_connect_to_iface(uct_ep_h ep, const uct_iface_addr_t *iface_addr); +ucs_status_t uct_cm_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_cm_iface_flush_do(uct_cm_iface_t *iface, uct_completion_t *comp); + +ssize_t uct_cm_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, + void *arg, unsigned flags); + +ucs_status_t uct_cm_ep_pending_add(uct_ep_h ep, uct_pending_req_t *req, + unsigned flags); +void uct_cm_ep_pending_purge(uct_ep_h ep, uct_pending_purge_callback_t cb, + void *arg); + +ucs_status_t uct_cm_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp); + +static inline int uct_cm_iface_has_tx_resources(uct_cm_iface_t *iface) +{ + return iface->num_outstanding < iface->config.max_outstanding; +} + + +static UCS_F_ALWAYS_INLINE uct_cm_pending_req_priv_t * +uct_cm_pending_req_priv(uct_pending_req_t *req) +{ + return (uct_cm_pending_req_priv_t *)&req->priv; +} + + +#define uct_cm_iface_trace_data(_iface, _type, _hdr, _fmt, ...) \ + uct_iface_trace_am(&(_iface)->super.super, _type, (_hdr)->am_id, \ + (_hdr) + 1, (_hdr)->length, _fmt, ## __VA_ARGS__) + + +#define uct_cm_iface_worker(_iface) \ + ((_iface)->super.super.worker) + + +#define uct_cm_enter(_iface) \ + UCS_ASYNC_BLOCK(uct_cm_iface_worker(_iface)->async); + + +#define uct_cm_leave(_iface) \ + UCS_ASYNC_UNBLOCK(uct_cm_iface_worker(_iface)->async); \ + ucs_async_check_miss(uct_cm_iface_worker(_iface)->async); + + +#endif diff --git a/src/uct/ib/cm/cm_ep.c b/src/uct/ib/cm/cm_ep.c new file mode 100644 index 0000000..2247fac --- /dev/null +++ b/src/uct/ib/cm/cm_ep.c @@ -0,0 +1,255 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. +* Copyright (c) 2009 IBM Corporation. All rights reserved. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "cm.h" + +#include +#include +#include + + +typedef struct uct_cm_iov { + uct_pack_callback_t pack; + const void *arg; + size_t length; +} uct_cm_iov_t; + + +static UCS_CLASS_INIT_FUNC(uct_cm_ep_t, const uct_ep_params_t *params) + +{ + uct_cm_iface_t *iface = ucs_derived_of(params->iface, uct_cm_iface_t); + + UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params); + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super); + + uct_ib_address_unpack((const uct_ib_address_t*)params->dev_addr, &self->dlid, + &self->dgid); + self->dest_service_id = *(const uint32_t*)params->iface_addr; + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_cm_ep_t) +{ + ucs_trace_func(""); +} + +UCS_CLASS_DEFINE(uct_cm_ep_t, uct_base_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_cm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_cm_ep_t, uct_ep_t); + + +static ucs_status_t uct_cm_ep_fill_path_rec(uct_cm_ep_t *ep, + struct ibv_sa_path_rec *path) +{ + uct_cm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_cm_iface_t); + + path->sgid = iface->super.gid; + path->dlid = htons(ep->dlid); + path->slid = htons(uct_ib_iface_port_attr(&iface->super)->lid); + if (iface->super.config.force_global_addr) { + ucs_assert_always(ep->dgid.global.interface_id != 0); + path->dgid = ep->dgid; + path->hop_limit = iface->super.config.hop_limit; + } else { + memset(&path->dgid, 0, sizeof(path->dgid)); + path->hop_limit = 0; + } + path->raw_traffic = 0; /* IB traffic */ + path->flow_label = 0; + path->traffic_class = iface->super.config.traffic_class; + path->reversible = htonl(1); /* IBCM currently only supports reversible paths */ + path->numb_path = 0; + path->pkey = ntohs(iface->super.pkey_value); + path->sl = iface->super.config.sl; + path->mtu_selector = 2; /* EQ */ + path->mtu = uct_ib_iface_port_attr(&iface->super)->active_mtu; + path->rate_selector = 2; /* EQ */ + path->rate = IBV_RATE_MAX; + path->packet_life_time_selector = 2; /* EQ */ + path->packet_life_time = 0; + path->preference = 0; /* Use first path */ + return UCS_OK; +} + +static void uct_cm_dump_path(struct ibv_sa_path_rec *path) +{ + char sgid_buf[256]; + char dgid_buf[256]; + + inet_ntop(AF_INET6, &path->dgid, dgid_buf, sizeof(dgid_buf)); + inet_ntop(AF_INET6, &path->sgid, sgid_buf, sizeof(sgid_buf)); + + ucs_trace_data("slid %d sgid %s dlid %d dgid %s", + ntohs(path->slid), sgid_buf, ntohs(path->dlid), dgid_buf); + ucs_trace_data("traffic %d flow_label %d hop %d class %d revers. 0x%x " + "numb %d pkey 0x%x sl %d", + path->raw_traffic, path->flow_label, path->hop_limit, + path->traffic_class, path->reversible, path->numb_path, + path->pkey, path->sl); + ucs_trace_data("mtu %d(%d) rate %d(%d) lifetime %d(%d) pref %d", + path->mtu, path->mtu_selector, path->rate, path->rate_selector, + path->packet_life_time, path->packet_life_time_selector, + path->preference); +} + +ssize_t uct_cm_ep_am_bcopy(uct_ep_h tl_ep, uint8_t am_id, uct_pack_callback_t pack_cb, + void *arg, unsigned flags) +{ + uct_cm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cm_iface_t); + uct_cm_ep_t *ep = ucs_derived_of(tl_ep, uct_cm_ep_t); + struct ib_cm_sidr_req_param req; + struct ibv_sa_path_rec path; + uct_cm_iface_op_t *op; + ucs_status_t status; + uct_cm_hdr_t *hdr; + size_t payload_len; + size_t total_len; + int ret; + + UCT_CHECK_AM_ID(am_id); + + uct_cm_enter(iface); + + if (!uct_cm_iface_has_tx_resources(iface)) { + status = UCS_ERR_NO_RESOURCE; + goto err; + } + + /* Allocate temporary contiguous buffer */ + hdr = ucs_malloc(IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, "cm_send_buf"); + if (hdr == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + payload_len = pack_cb(hdr + 1, arg); + hdr->am_id = am_id; + hdr->length = payload_len; + total_len = sizeof(*hdr) + payload_len; + + status = uct_cm_ep_fill_path_rec(ep, &path); + if (status != UCS_OK) { + goto err_free_hdr; + } + + /* Fill SIDR request */ + memset(&req, 0, sizeof req); + req.path = &path; + req.service_id = ep->dest_service_id; + req.timeout_ms = iface->config.timeout_ms; + req.private_data = hdr; + req.private_data_len = total_len; + req.max_cm_retries = iface->config.retry_count; + + op = ucs_malloc(sizeof *op, "cm_op"); + if (op == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_free_hdr; + } + + op->is_id = 1; + + /* Create temporary ID for this message. Will be released when getting REP. */ + ret = ib_cm_create_id(iface->cmdev, &op->id, NULL); + if (ret) { + ucs_error("ib_cm_create_id() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_free_op; + } + + uct_cm_dump_path(&path); + + ret = ib_cm_send_sidr_req(op->id, &req); + if (ret) { + ucs_error("ib_cm_send_sidr_req() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_destroy_id; + } + + ucs_queue_push(&iface->outstanding_q, &op->queue); + ++iface->num_outstanding; + ucs_trace("outs=%d", iface->num_outstanding); + UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, payload_len); + + uct_cm_iface_trace_data(iface, UCT_AM_TRACE_TYPE_SEND, hdr, + "TX: SIDR_REQ [id %p{%u} dlid %d svc 0x%"PRIx64"]", + op->id, op->id->handle, ntohs(path.dlid), + (uint64_t)req.service_id); + uct_cm_leave(iface); + ucs_free(hdr); + /* coverity[missing_unlock] */ + return payload_len; + +err_destroy_id: + ib_cm_destroy_id(op->id); +err_free_op: + ucs_free(op); +err_free_hdr: + ucs_free(hdr); +err: + uct_cm_leave(iface); + return status; +} + +ucs_status_t uct_cm_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *req, + unsigned flags) +{ + uct_cm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cm_iface_t); + ucs_status_t status; + + uct_cm_enter(iface); + if (iface->num_outstanding < iface->config.max_outstanding) { + status = UCS_ERR_BUSY; + } else { + uct_cm_pending_req_priv(req)->ep = ucs_derived_of(tl_ep, uct_cm_ep_t); + uct_pending_req_queue_push(&iface->notify_q, req); + status = UCS_OK; + UCT_TL_EP_STAT_PEND(ucs_derived_of(tl_ep, uct_base_ep_t)); + } + uct_cm_leave(iface); + return status; +} + +void uct_cm_ep_pending_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, + void *arg) +{ + uct_cm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cm_iface_t); + uct_cm_ep_t *ep = ucs_derived_of(tl_ep, uct_cm_ep_t); + uct_cm_pending_req_priv_t *priv; + + uct_cm_enter(iface); + uct_pending_queue_purge(priv, &iface->notify_q, priv->ep == ep, cb, arg); + uct_cm_leave(iface); +} + +ucs_status_t uct_cm_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + uct_cm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cm_iface_t); + ucs_status_t status; + + uct_cm_enter(iface); + if (!uct_cm_iface_has_tx_resources(iface)) { + status = UCS_ERR_NO_RESOURCE; + } else { + status = uct_cm_iface_flush_do(iface, comp); + if (status == UCS_OK) { + UCT_TL_EP_STAT_FLUSH(ucs_derived_of(tl_ep, uct_base_ep_t)); + } else if (status == UCS_INPROGRESS) { + UCT_TL_EP_STAT_FLUSH_WAIT(ucs_derived_of(tl_ep, uct_base_ep_t)); + } + } + uct_cm_leave(iface); + + return status; +} diff --git a/src/uct/ib/cm/cm_iface.c b/src/uct/ib/cm/cm_iface.c new file mode 100644 index 0000000..598d0da --- /dev/null +++ b/src/uct/ib/cm/cm_iface.c @@ -0,0 +1,493 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "cm.h" + +#include +#include +#include +#include +#include +#include +#include + + +static ucs_config_field_t uct_cm_iface_config_table[] = { + {"IB_", "RX_INLINE=0", NULL, + ucs_offsetof(uct_cm_iface_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_ib_iface_config_table)}, + + {"TIMEOUT", "300ms", "Timeout for MAD layer", + ucs_offsetof(uct_cm_iface_config_t, timeout), UCS_CONFIG_TYPE_TIME}, + + {"RETRY_COUNT", "100", "Number of retries for MAD layer", + ucs_offsetof(uct_cm_iface_config_t, retry_count), UCS_CONFIG_TYPE_UINT}, + + {"MAX_OP", "1024", "Maximal number of outstanding SIDR operations", + ucs_offsetof(uct_cm_iface_config_t, max_outstanding), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + +static uct_ib_iface_ops_t uct_cm_iface_ops; + + +static unsigned uct_cm_iface_progress(void *arg) +{ + uct_cm_iface_t *iface = arg; + uct_cm_pending_req_priv_t *priv; + uct_cm_iface_op_t *op; + unsigned count; + + uct_cm_enter(iface); + + /* Invoke flush completions at the head of the queue - the sends which + * started before them were already completed. + */ + count = 0; + ucs_queue_for_each_extract(op, &iface->outstanding_q, queue, !op->is_id) { + uct_invoke_completion(op->comp, UCS_OK); + ucs_free(op); + ++count; + } + + /* we are in the progress() context. Now it is safe to release resources. */ + iface->num_outstanding -= iface->num_completions; + iface->num_completions = 0; + + /* Dispatch pending operations */ + uct_pending_queue_dispatch(priv, &iface->notify_q, + iface->num_outstanding < iface->config.max_outstanding); + + /* Remove the progress callback only if there is no user completion at the + * head of the queue. It could be added by the progress callback. + */ + if (ucs_queue_is_empty(&iface->outstanding_q) || + ucs_queue_head_elem_non_empty(&iface->outstanding_q, uct_cm_iface_op_t, queue)->is_id) + { + uct_worker_progress_unregister_safe(&uct_cm_iface_worker(iface)->super, + &iface->slow_prog_id); + } + + uct_cm_leave(iface); + + return count; +} + +ucs_status_t uct_cm_iface_flush_do(uct_cm_iface_t *iface, uct_completion_t *comp) +{ + uct_cm_iface_op_t *op; + + if (iface->num_outstanding == 0) { + return UCS_OK; + } + + /* If user request a completion callback, allocate a new operation and put + * it in the tail of the queue. It will be called when all operations which + * were sent before are completed. + */ + if (comp != NULL) { + op = ucs_malloc(sizeof *op, "cm_op"); + if (op == NULL) { + return UCS_ERR_NO_MEMORY; + } + + op->is_id = 0; + op->comp = comp; + ucs_queue_push(&iface->outstanding_q, &op->queue); + } + + sched_yield(); + return UCS_INPROGRESS; +} + +ucs_status_t uct_cm_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + uct_cm_iface_t *iface = ucs_derived_of(tl_iface, uct_cm_iface_t); + ucs_status_t status; + + uct_cm_enter(iface); + status = uct_cm_iface_flush_do(iface, comp); + if (status == UCS_OK) { + UCT_TL_IFACE_STAT_FLUSH(ucs_derived_of(tl_iface, uct_base_iface_t)); + } else if (status == UCS_INPROGRESS){ + UCT_TL_IFACE_STAT_FLUSH_WAIT(ucs_derived_of(tl_iface, uct_base_iface_t)); + } + uct_cm_leave(iface); + + return status; +} + +static void uct_cm_iface_handle_sidr_req(uct_cm_iface_t *iface, + struct ib_cm_event *event) +{ + uct_cm_hdr_t *hdr = event->private_data; + struct ib_cm_sidr_rep_param rep; + int ret; + + VALGRIND_MAKE_MEM_DEFINED(hdr, sizeof(hdr)); + VALGRIND_MAKE_MEM_DEFINED(hdr + 1, hdr->length); + + uct_cm_iface_trace_data(iface, UCT_AM_TRACE_TYPE_RECV, hdr, "RX: SIDR_REQ"); + + /* Send reply */ + ucs_trace_data("TX: SIDR_REP [id %p{%u}]", event->cm_id, + event->cm_id->handle); + memset(&rep, 0, sizeof rep); + rep.status = IB_SIDR_SUCCESS; + ret = ib_cm_send_sidr_rep(event->cm_id, &rep); + if (ret) { + ucs_error("ib_cm_send_sidr_rep() failed: %m"); + } + + uct_iface_invoke_am(&iface->super.super, hdr->am_id, hdr + 1, hdr->length, 0); +} + +static void uct_cm_iface_outstanding_remove(uct_cm_iface_t* iface, + struct ib_cm_id* id) +{ + uct_cm_iface_op_t *op; + ucs_queue_iter_t iter; + + ucs_queue_for_each_safe(op, iter, &iface->outstanding_q, queue) { + if (op->is_id && (op->id == id)) { + ucs_queue_del_iter(&iface->outstanding_q, iter); + /* Must not release resources from the async context + * because it will break pending op ordering. + * For example bcopy() may succeed while there are queued + * pending ops: + * bcopy() -> no resources + * pending_add() -> ok + * <-- async event: resources available + * bcopy() --> ok. oops this is out of order send + * + * save the number and do actual release in the + * progress() context. + */ + ++iface->num_completions; + ucs_free(op); + return; + } + } + + ucs_fatal("outstanding cm id %p not found", id); +} + +static void uct_cm_iface_outstanding_purge(uct_cm_iface_t *iface) +{ + uct_cm_iface_op_t *op; + + ucs_queue_for_each_extract(op, &iface->outstanding_q, queue, 1) { + if (op->is_id) { + ib_cm_destroy_id(op->id); + } else { + uct_invoke_completion(op->comp, UCS_ERR_CANCELED); + } + ucs_free(op); + } + iface->num_outstanding = 0; +} + +static void uct_cm_iface_event_handler(int fd, void *arg) +{ + uct_cm_iface_t *iface = arg; + struct ib_cm_event *event; + struct ib_cm_id *id; + int destroy_id; + int ret; + + ucs_trace_func(""); + + for (;;) { + /* Fetch all events */ + ret = ib_cm_get_event(iface->cmdev, &event); + if (ret) { + if (errno != EAGAIN) { + ucs_warn("ib_cm_get_event() failed: %m"); + } + return; + } + + id = event->cm_id; + + /* Handle the event */ + switch (event->event) { + case IB_CM_SIDR_REQ_ERROR: + ucs_error("SIDR request error, status: %s", + ibv_wc_status_str(event->param.send_status)); + destroy_id = 1; + break; + case IB_CM_SIDR_REQ_RECEIVED: + uct_cm_iface_handle_sidr_req(iface, event); + destroy_id = 1; /* Destroy the ID created by the driver */ + break; + case IB_CM_SIDR_REP_RECEIVED: + ucs_trace_data("RX: SIDR_REP [id %p{%u}]", id, id->handle); + uct_cm_iface_outstanding_remove(iface, id); + destroy_id = 1; /* Destroy the ID which was used for sending */ + break; + default: + ucs_warn("Unexpected CM event: %d", event->event); + destroy_id = 0; + break; + } + + /* Acknowledge CM event, remember the id, in case we would destroy it */ + ret = ib_cm_ack_event(event); + if (ret) { + ucs_warn("ib_cm_ack_event() failed: %m"); + } + + /* If there is an id which should be destroyed, do it now, after + * acknowledging all events. + */ + if (destroy_id) { + ret = ib_cm_destroy_id(id); + if (ret) { + ucs_error("ib_cm_destroy_id() failed: %m"); + } + } + + uct_worker_progress_register_safe(&uct_cm_iface_worker(iface)->super, + uct_cm_iface_progress, iface, 0, + &iface->slow_prog_id); + } +} + +static void uct_cm_iface_release_desc(uct_recv_desc_t *self, void *desc) +{ + uct_ib_iface_t *iface = ucs_container_of(self, uct_ib_iface_t, release_desc); + ucs_free(desc - iface->config.rx_headroom_offset); +} + +static UCS_CLASS_INIT_FUNC(uct_cm_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_cm_iface_config_t *config = ucs_derived_of(tl_config, uct_cm_iface_config_t); + uct_ib_iface_init_attr_t init_attr = {}; + ucs_status_t status; + int ret; + + ucs_trace_func(""); + + init_attr.tx_cq_len = 1; + init_attr.rx_cq_len = config->super.rx.queue_len; + init_attr.seg_size = ucs_min(IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, + config->super.seg_size); + + UCS_CLASS_CALL_SUPER_INIT(uct_ib_iface_t, &uct_cm_iface_ops, md, worker, + params, &config->super, &init_attr); + + if (self->super.super.worker->async == NULL) { + ucs_error("cm must have async!=NULL"); + return UCS_ERR_INVALID_PARAM; + } + + self->num_outstanding = 0; + self->num_completions = 0; + self->service_id = 0; + self->config.timeout_ms = (int)(config->timeout * 1e3 + 0.5); + self->config.max_outstanding = config->max_outstanding; + self->config.retry_count = ucs_min(config->retry_count, UINT8_MAX); + self->notify_q.head = NULL; + self->slow_prog_id = UCS_CALLBACKQ_ID_NULL; + ucs_queue_head_init(&self->notify_q); + ucs_queue_head_init(&self->outstanding_q); + + /* Redefine receive desc release callback */ + self->super.release_desc.cb = uct_cm_iface_release_desc; + + self->cmdev = ib_cm_open_device(uct_ib_iface_device(&self->super)->ibv_context); + if (self->cmdev == NULL) { + ucs_error("ib_cm_open_device() failed: %m. Check if ib_ucm.ko module is loaded."); + status = UCS_ERR_NO_DEVICE; + goto err; + } + + status = ucs_sys_fcntl_modfl(self->cmdev->fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto err_close_device; + } + + ret = ib_cm_create_id(self->cmdev, &self->listen_id, self); + if (ret) { + ucs_error("ib_cm_create_id() failed: %m"); + status = UCS_ERR_NO_DEVICE; + goto err_close_device; + } + + do { + self->service_id = (uint32_t)(ucs_generate_uuid((uintptr_t)self) & + (~IB_CM_ASSIGN_SERVICE_ID_MASK)); + ret = ib_cm_listen(self->listen_id, self->service_id, 0); + if (ret) { + if (errno == EBUSY) { + /* The generated service id is already in use - try to + * generate another one. + */ + ucs_debug("ib_cm service id 0x%x already in use, " + "trying another one", self->service_id); + continue; + } else { + ucs_error("ib_cm_listen(service_id=0x%x) failed: %m", + self->service_id); + status = UCS_ERR_INVALID_ADDR; + goto err_destroy_id; + } + } + } while (ret); + + if (self->super.super.worker->async->mode == UCS_ASYNC_MODE_SIGNAL) { + ucs_warn("ib_cm fd does not support SIGIO"); + } + + status = ucs_async_set_event_handler(self->super.super.worker->async->mode, + self->cmdev->fd, UCS_EVENT_SET_EVREAD, + uct_cm_iface_event_handler, self, + self->super.super.worker->async); + if (status != UCS_OK) { + ucs_error("failed to set event handler"); + goto err_destroy_id; + } + + ucs_debug("listening for SIDR service_id 0x%x on fd %d", self->service_id, + self->cmdev->fd); + return UCS_OK; + +err_destroy_id: + ib_cm_destroy_id(self->listen_id); +err_close_device: + ib_cm_close_device(self->cmdev); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_cm_iface_t) +{ + + ucs_trace_func(""); + + ucs_async_remove_handler(self->cmdev->fd, 1); + + uct_cm_enter(self); + uct_cm_iface_outstanding_purge(self); + ib_cm_destroy_id(self->listen_id); + ib_cm_close_device(self->cmdev); + uct_worker_progress_unregister_safe(&uct_cm_iface_worker(self)->super, + &self->slow_prog_id); + uct_cm_leave(self); + + /* At this point all outstanding have been removed, and no further events + * can be added. + */ +} + +UCS_CLASS_DEFINE(uct_cm_iface_t, uct_ib_iface_t); +static UCS_CLASS_DEFINE_NEW_FUNC(uct_cm_iface_t, uct_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t*); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_cm_iface_t, uct_iface_t); + +static ucs_status_t uct_cm_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_cm_iface_t *iface = ucs_derived_of(tl_iface, uct_cm_iface_t); + ucs_status_t status; + size_t mtu; + + status = uct_ib_iface_query(&iface->super, 32 /* TODO */, iface_attr); + if (status != UCS_OK) { + return status; + } + + iface_attr->overhead = 1200e-9; + + mtu = ucs_min(IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - sizeof(uct_cm_hdr_t), + UINT8_MAX); + + iface_attr->cap.am.max_bcopy = mtu; + iface_attr->iface_addr_len = sizeof(uint32_t); + iface_attr->ep_addr_len = 0; + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_AM_DUP | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CB_ASYNC | + UCT_IFACE_FLAG_CONNECT_TO_IFACE; + return UCS_OK; +} + +static ucs_status_t uct_cm_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *iface_addr) +{ + uct_cm_iface_t *iface = ucs_derived_of(tl_iface, uct_cm_iface_t); + *(uint32_t*)iface_addr = iface->service_id; + return UCS_OK; +} + + +static uct_ib_iface_ops_t uct_cm_iface_ops = { + { + .ep_am_bcopy = uct_cm_ep_am_bcopy, + .ep_pending_add = uct_cm_ep_pending_add, + .ep_pending_purge = uct_cm_ep_pending_purge, + .ep_flush = uct_cm_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_cm_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_cm_ep_t), + .iface_flush = uct_cm_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = ucs_empty_function_return_zero, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_cm_iface_t), + .iface_query = uct_cm_iface_query, + .iface_get_device_address = uct_ib_iface_get_device_address, + .iface_get_address = uct_cm_iface_get_address, + .iface_is_reachable = uct_ib_iface_is_reachable + }, + .create_cq = uct_ib_verbs_create_cq, + .arm_cq = (void*)ucs_empty_function_return_success, +}; + +static int uct_cm_is_module_loaded(uct_ib_md_t *ib_md) +{ + struct ib_cm_device *cmdev = NULL; + + cmdev = ib_cm_open_device(ib_md->dev.ibv_context); + if (cmdev == NULL) { + ucs_debug("ib_cm_open_device() for %s failed: %m. " + "Check if ib_ucm.ko module is loaded.", + uct_ib_device_name(&ib_md->dev)); + return 0; + } + + ib_cm_close_device(cmdev); + return 1; +} + +static ucs_status_t +uct_cm_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + uct_ib_md_t *ib_md = ucs_derived_of(md, uct_ib_md_t); + + if (!uct_cm_is_module_loaded(ib_md)) { + *num_tl_devices_p = 0; + *tl_devices_p = NULL; + return UCS_OK; + } + + return uct_ib_device_query_ports(&ib_md->dev, UCT_IB_DEVICE_FLAG_LINK_IB, + tl_devices_p, num_tl_devices_p); +} + +UCT_TL_DEFINE(&uct_ib_component, cm, uct_cm_query_tl_devices, uct_cm_iface_t, + "CM_", uct_cm_iface_config_table, uct_cm_iface_config_t); diff --git a/src/uct/ib/cm/configure.m4 b/src/uct/ib/cm/configure.m4 new file mode 100644 index 0000000..b7bc041 --- /dev/null +++ b/src/uct/ib/cm/configure.m4 @@ -0,0 +1,33 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# CM (IB connection manager) Support +# +cm_happy="no" + +AC_ARG_WITH([cm], + [AC_HELP_STRING([--with-cm], [Compile with IB Connection Manager support])], + [], + [with_cm=guess]) + +AS_IF([test "x$with_cm" != xno], + [save_LIBS="$LIBS" + AC_CHECK_LIB([ibcm], [ib_cm_send_req], + [AC_SUBST(IBCM_LIBS, [-libcm]) + uct_ib_modules="${uct_ib_modules}:cm" + cm_happy="yes"], + [AS_IF([test "x$with_cm" = xyes], + [AC_MSG_ERROR([CM requested but lib ibcm not found])], + [AC_MSG_WARN([CM support not found, skipping])] + ) + ]) + LIBS="$save_LIBS"]) + +AM_CONDITIONAL([HAVE_TL_CM], [test "x$cm_happy" != xno]) +AC_CONFIG_FILES([src/uct/ib/cm/Makefile]) diff --git a/src/uct/ib/configure.m4 b/src/uct/ib/configure.m4 new file mode 100644 index 0000000..7169455 --- /dev/null +++ b/src/uct/ib/configure.m4 @@ -0,0 +1,444 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +AC_ARG_WITH([verbs], + [AC_HELP_STRING([--with-verbs(=DIR)], + [Build OpenFabrics support, adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])], + [], + [with_verbs=/usr]) + +AS_IF([test "x$with_verbs" = "xyes"], [with_verbs=/usr]) +AS_IF([test -d "$with_verbs"], [with_ib=yes; str="with verbs support from $with_verbs"], [with_ib=no; str="without verbs support"]) +AS_IF([test -d "$with_verbs/lib64"],[libsuff="64"],[libsuff=""]) + +AC_MSG_NOTICE([Compiling $str]) + +# +# RC Support +# +AC_ARG_WITH([rc], + [AC_HELP_STRING([--with-rc], [Compile with IB Reliable Connection support])], + [], + [with_rc=yes]) + + +# +# UD Support +# +AC_ARG_WITH([ud], + [AC_HELP_STRING([--with-ud], [Compile with IB Unreliable Datagram support])], + [], + [with_ud=yes]) + + +# +# DC Support +# +AC_ARG_WITH([dc], + [AC_HELP_STRING([--with-dc], [Compile with IB Dynamic Connection support])], + [], + [with_dc=yes]) + + +# +# mlx5 DV support +# +AC_ARG_WITH([mlx5-dv], + [AC_HELP_STRING([--with-mlx5-dv], [Compile with mlx5 Direct Verbs + support. Direct Verbs (DV) support provides additional + acceleration capabilities that are not available in a + regular mode.])]) + +# +# TM (IB Tag Matching) Support +# +AC_ARG_WITH([ib-hw-tm], + [AC_HELP_STRING([--with-ib-hw-tm], [Compile with IB Tag Matching support])], + [], + [with_ib_hw_tm=yes]) + + +# +# DM Support +# +AC_ARG_WITH([dm], + [AC_HELP_STRING([--with-dm], [Compile with Device Memory support])], + [], + [with_dm=yes]) + +# +# DEVX Support +# +AC_ARG_WITH([devx], [], [], [with_devx=check]) + +# +# Check basic IB support: User wanted at least one IB transport, and we found +# verbs header file and library. +# +AS_IF([test "x$with_ib" = "xyes"], + [ + save_LDFLAGS="$LDFLAGS" + save_CFLAGS="$CFLAGS" + save_CPPFLAGS="$CPPFLAGS" + AS_IF([test "x/usr" = "x$with_verbs"], + [], + [verbs_incl="-I$with_verbs/include" + verbs_libs="-L$with_verbs/lib$libsuff"]) + LDFLAGS="$verbs_libs $LDFLAGS" + CFLAGS="$verbs_incl $CFLAGS" + CPPFLAGS="$verbs_incl $CPPFLAGS" + AC_CHECK_HEADER([infiniband/verbs.h], [], + [AC_MSG_WARN([ibverbs header files not found]); with_ib=no]) + AC_CHECK_LIB([ibverbs], [ibv_get_device_list], + [ + AC_SUBST(IBVERBS_LDFLAGS, ["$verbs_libs -libverbs"]) + AC_SUBST(IBVERBS_DIR, ["$with_verbs"]) + AC_SUBST(IBVERBS_CPPFLAGS, ["$verbs_incl"]) + AC_SUBST(IBVERBS_CFLAGS, ["$verbs_incl"]) + ], + [AC_MSG_WARN([libibverbs not found]); with_ib=no]) + + have_ib_funcs=yes + LDFLAGS="$LDFLAGS $IBVERBS_LDFLAGS" + AC_CHECK_DECLS([ibv_wc_status_str, + ibv_event_type_str, + ibv_query_gid, + ibv_get_device_name, + ibv_create_srq, + ibv_get_async_event], + [], + [have_ib_funcs=no], + [#include ]) + AS_IF([test "x$have_ib_funcs" != xyes], + [AC_MSG_WARN([Some IB verbs are not found. Please make sure OFED version is 1.5 or above.]) + with_ib=no]) + + LDFLAGS="$save_LDFLAGS" + CFLAGS="$save_CFLAGS" + CPPFLAGS="$save_CPPFLAGS" + ],[:]) + +AS_IF([test "x$with_ib" = "xyes"], + [ + save_LDFLAGS="$LDFLAGS" + save_CFLAGS="$CFLAGS" + save_CPPFLAGS="$CPPFLAGS" + LDFLAGS="$IBVERBS_LDFLAGS $LDFLAGS" + CFLAGS="$IBVERBS_CFLAGS $CFLAGS" + CPPFLAGS="$IBVERBS_CPPFLAGS $CPPFLAGS" + AC_CHECK_HEADER([infiniband/verbs_exp.h], + [AC_DEFINE([HAVE_VERBS_EXP_H], 1, [IB experimental verbs]) + verbs_exp=yes], + [verbs_exp=no]) + + AC_CHECK_MEMBERS([struct ibv_exp_device_attr.exp_device_cap_flags, + struct ibv_exp_device_attr.odp_caps, + struct ibv_exp_device_attr.odp_caps.per_transport_caps.dc_odp_caps, + struct ibv_exp_device_attr.odp_mr_max_size, + struct ibv_exp_qp_init_attr.max_inl_recv, + struct ibv_async_event.element.dct], + [], [], [[#include ]]) + + AC_CHECK_DECLS([IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN], + [have_cq_io=yes], [], [[#include ]]) + + AC_CHECK_DECLS([IBV_EXP_CQ_IGNORE_OVERRUN], + [have_cq_io=yes], [], [[#include ]]) + + AS_IF([test "x$with_mlx5_dv" != xno], [ + AC_MSG_NOTICE([Checking for legacy bare-metal support]) + AC_CHECK_HEADERS([infiniband/mlx5_hw.h], + [with_mlx5_hw=yes + mlx5_include=mlx5_hw.h + AC_CHECK_DECLS([ + ibv_mlx5_exp_get_qp_info, + ibv_mlx5_exp_get_cq_info, + ibv_mlx5_exp_get_srq_info, + ibv_mlx5_exp_update_cq_ci, + MLX5_WQE_CTRL_SOLICITED], + [], [], [[#include ]]) + AC_CHECK_MEMBERS([struct mlx5_srq.cmd_qp], + [], [with_ib_hw_tm=no], + [[#include ]]) + AC_CHECK_MEMBERS([struct mlx5_ah.ibv_ah], + [has_get_av=yes], [], + [[#include ]]) + AC_CHECK_MEMBERS([struct ibv_mlx5_qp_info.bf.need_lock], + [], + [AC_MSG_WARN([Cannot use mlx5 QP because it assumes dedicated BF]) + AC_MSG_WARN([Please upgrade MellanoxOFED to 3.0 or above]) + with_mlx5_hw=no], + [[#include ]]) + AC_CHECK_DECLS([ + IBV_EXP_QP_INIT_ATTR_RES_DOMAIN, + IBV_EXP_RES_DOMAIN_THREAD_MODEL, + ibv_exp_create_res_domain, + ibv_exp_destroy_res_domain], + [AC_DEFINE([HAVE_IBV_EXP_RES_DOMAIN], 1, [IB resource domain]) + has_res_domain=yes], [], [[#include ]]) + ], [with_mlx5_hw=no]) + + AC_MSG_NOTICE([Checking for DV bare-metal support]) + + AC_CHECK_LIB([mlx5-rdmav2], [mlx5dv_query_device], + [AC_SUBST(LIB_MLX5, [-lmlx5-rdmav2])],[ + AC_CHECK_LIB([mlx5], [mlx5dv_query_device], + [AC_SUBST(LIB_MLX5, [-lmlx5])], + [with_mlx5_dv=no], [-libverbs])], [-libverbs]) + + AS_IF([test "x$with_mlx5_dv" != xno], [ + AC_CHECK_HEADERS([infiniband/mlx5dv.h], + [with_mlx5_hw=yes + with_mlx5_dv=yes + mlx5_include=mlx5dv.h], [], [ ])]) + + AS_IF([test "x$with_mlx5_dv" = "xyes" -a "x$have_cq_io" = "xyes" ], [ + AC_CHECK_DECLS([ + mlx5dv_init_obj, + mlx5dv_create_qp, + mlx5dv_is_supported, + MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE, + MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE], + [], [], [[#include ]]) + AC_CHECK_MEMBERS([struct mlx5dv_cq.cq_uar], + [], [], [[#include ]]) + AC_CHECK_DECLS([MLX5DV_OBJ_AH], [has_get_av=yes], + [], [[#include ]]) + AC_CHECK_DECLS([MLX5DV_DCTYPE_DCT], + [have_dc_dv=yes], [], [[#include ]]) + AC_CHECK_DECLS([ibv_alloc_td], + [has_res_domain=yes], [], [[#include ]])]) + + AC_CHECK_DECLS([ibv_alloc_td], + [has_res_domain=yes], [], [[#include ]])]) + + AS_IF([test "x$with_devx" != xno], [ + AC_CHECK_DECL(MLX5DV_CONTEXT_FLAGS_DEVX, [ + AC_DEFINE([HAVE_DEVX], [1], [DEVX support]) + have_devx=yes + ], [ + AS_IF([test "x$with_devx" != xcheck], + [AC_MSG_ERROR([devx requested but not found])]) + ], [[#include ]])]) + + AS_IF([test "x$has_res_domain" = "xyes" -a "x$have_cq_io" = "xyes" ], [], [ + with_mlx5_hw=no]) + + AS_IF([test "x$with_mlx5_hw" = "xyes"], + [AC_MSG_NOTICE([Compiling with mlx5 bare-metal support]) + AC_DEFINE([HAVE_MLX5_HW], 1, [mlx5 bare-metal support]) + AS_IF([test "x$has_get_av" = "xyes"], + [AC_DEFINE([HAVE_MLX5_HW_UD], 1, [mlx5 UD bare-metal support])])]) + + AC_CHECK_DECLS([IBV_LINK_LAYER_INFINIBAND, + IBV_LINK_LAYER_ETHERNET, + IBV_EVENT_GID_CHANGE, + ibv_create_qp_ex, + ibv_create_srq_ex], + [], [], [[#include ]]) + + # We shouldn't confuse upstream ibv_query_device_ex with + # legacy MOFED one, distinguish by arguments number + AC_CHECK_DECL(ibv_query_device_ex, [ + AC_TRY_COMPILE([#include ], + [ibv_query_device_ex(NULL, NULL, NULL)], + [AC_DEFINE([HAVE_DECL_IBV_QUERY_DEVICE_EX], 1, + [have upstream ibv_query_device_ex])])], + [], [[#include ]]) + + AC_CHECK_DECLS([IBV_EXP_ACCESS_ALLOCATE_MR, + IBV_EXP_ACCESS_ON_DEMAND, + IBV_EXP_DEVICE_MR_ALLOCATE, + IBV_EXP_WR_NOP, + IBV_EXP_DEVICE_DC_TRANSPORT, + IBV_EXP_ATOMIC_HCA_REPLY_BE, + IBV_EXP_PREFETCH_WRITE_ACCESS, + IBV_EXP_QP_OOO_RW_DATA_PLACEMENT, + IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT, + IBV_EXP_CQ_MODERATION, + IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS, + ibv_exp_reg_mr, + ibv_exp_create_qp, + ibv_exp_prefetch_mr, + ibv_exp_create_srq, + ibv_exp_setenv, + ibv_exp_query_gid_attr, + ibv_exp_query_device], + [], [], [[#include ]]) + + AC_CHECK_DECLS([ibv_exp_post_send, + IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP, + IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD, + IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG, + IBV_EXP_SEND_EXT_ATOMIC_INLINE], + [], + [have_ext_atomics=no], + [[#include ]]) + + AC_CHECK_DECLS(IBV_EXP_DEVICE_ATTR_RESERVED_2, [], [], + [[#include ]]) + + # UMR support + AC_CHECK_DECLS(IBV_EXP_MR_INDIRECT_KLMS, + [AC_DEFINE([HAVE_EXP_UMR], 1, [IB UMR support])], + [], + [[#include ]]) + + AC_CHECK_DECLS(IBV_EXP_QP_CREATE_UMR, + [AC_DEFINE([HAVE_IBV_EXP_QP_CREATE_UMR], 1, [IB QP Create UMR support])], + [], + [[#include ]]) + + AC_CHECK_MEMBERS([struct ibv_exp_qp_init_attr.umr_caps], + [AC_DEFINE([HAVE_IBV_EXP_QP_CREATE_UMR_CAPS], 1, [Support UMR max caps v2])], + [], + [[#include ]]) + + AC_CHECK_DECLS(IBV_EXP_MR_FIXED_BUFFER_SIZE, + [AC_DEFINE([HAVE_EXP_UMR_KSM], 1, [IB UMR KSM support])], + [], + [[#include ]]) + + # Extended atomics + AS_IF([test "x$have_ext_atomics" != xno], + [AC_DEFINE([HAVE_IB_EXT_ATOMICS], 1, [IB extended atomics support])], + [AC_MSG_WARN([Compiling without extended atomics support])]) + + # Check for driver which exposes masked atomics endianity per size + AC_CHECK_MEMBER(struct ibv_exp_masked_atomic_params.masked_log_atomic_arg_sizes_network_endianness, + [AC_DEFINE([HAVE_MASKED_ATOMICS_ENDIANNESS], 1, [have masked atomic endianness])], + [], [[#include ]]) + + AC_CHECK_DECLS(IBV_EXP_ODP_SUPPORT_IMPLICIT, [], [], + [[#include ]]) + + AC_CHECK_DECLS(IBV_EXP_ACCESS_ON_DEMAND, [with_odp=yes], [], + [[#include ]]) + + AC_CHECK_DECLS(IBV_ACCESS_ON_DEMAND, [with_odp=yes], [], + [[#include ]]) + + AS_IF([test "x$with_odp" = "xyes" ], [ + AC_DEFINE([HAVE_ODP], 1, [ODP support]) + + AC_CHECK_DECLS(IBV_EXP_ODP_SUPPORT_IMPLICIT, [with_odp_i=yes], [], + [[#include ]]) + + AC_CHECK_DECLS(IBV_ODP_SUPPORT_IMPLICIT, [with_odp_i=yes], [], + [[#include ]]) + + AS_IF([test "x$with_odp_i" = "xyes" ], [ + AC_DEFINE([HAVE_ODP_IMPLICIT], 1, [Implicit ODP support])])]) + + AC_CHECK_DECLS(ibv_exp_prefetch_mr, [with_prefetch=yes], [], + [[#include ]]) + + AC_CHECK_DECLS(ibv_advise_mr, [with_prefetch=yes], [], + [[#include ]]) + + AS_IF([test "x$with_prefetch" = "xyes" ], [ + AC_DEFINE([HAVE_PREFETCH], 1, [Prefetch support])]) + + AC_CHECK_MEMBERS([struct mlx5_wqe_av.base, + struct mlx5_grh_av.rmac], + [], [], [[#include ]]) + + AC_CHECK_MEMBERS([struct mlx5_cqe64.ib_stride_index], + [], [], [[#include ]]) + + AC_DEFINE([HAVE_IB], 1, [IB support]) + + AC_CHECK_DECLS([IBV_EXP_QPT_DC_INI], + [have_dc_exp=yes], [], [[#include ]]) + + AS_IF([test "x$with_dc" != xno -a \( "x$have_dc_exp" = xyes -o "x$have_dc_dv" = xyes \) -a "x$with_mlx5_hw" = "xyes"], [ + AC_DEFINE([HAVE_TL_DC], 1, [DC transport support]) + AS_IF([test -n "$have_dc_dv"], + [AC_DEFINE([HAVE_DC_DV], 1, [DC DV support])], [ + AS_IF([test -n "$have_dc_exp"], + [AC_DEFINE([HAVE_DC_EXP], 1, [DC EXP support])])])], + [with_dc=no]) + + AS_IF([test "x$with_rc" != xno], + [AC_DEFINE([HAVE_TL_RC], 1, [RC transport support])]) + + AS_IF([test "x$with_ud" != xno], + [AC_DEFINE([HAVE_TL_UD], 1, [UD transport support])]) + + # XRQ with Tag Matching support + AS_IF([test "x$with_ib_hw_tm" != xno], + [AC_CHECK_HEADERS([infiniband/tm_types.h]) + AC_CHECK_MEMBER([struct ibv_exp_tmh.tag], [with_ib_hw_tm=exp], [], + [[#include ]]) + AC_CHECK_MEMBER([struct ibv_tmh.tag], [with_ib_hw_tm=upstream], [], + [[#include ]]) + ]) + AS_IF([test "x$with_ib_hw_tm" = xexp], + [AC_CHECK_MEMBERS([struct ibv_exp_create_srq_attr.dc_offload_params], [ + AC_DEFINE([IBV_HW_TM], 1, [IB Tag Matching support]) + ], [], [#include ]) + ]) + AS_IF([test "x$with_ib_hw_tm" = xupstream], + [AC_DEFINE([IBV_HW_TM], 1, [IB Tag Matching support]) + AC_CHECK_MEMBERS([struct ibv_tm_caps.flags], [], [], + [#include ])]) + + # Device Memory support + AS_IF([test "x$with_dm" != xno], [ + AC_CHECK_DECLS([ibv_exp_alloc_dm], + [AC_DEFINE([HAVE_IBV_DM], 1, [Device Memory support]) + AC_DEFINE([HAVE_IBV_EXP_DM], 1, [Device Memory support (EXP)])], + [], [[#include ]]) + AC_CHECK_DECLS([ibv_alloc_dm], + [AC_DEFINE([HAVE_IBV_DM], 1, [Device Memory support])], + [], [[#include ]])]) + + AC_CHECK_DECLS([ibv_cmd_modify_qp], + [], [], [[#include ]]) + + mlnx_valg_libdir=$with_verbs/lib${libsuff}/mlnx_ofed/valgrind + AC_MSG_NOTICE([Checking OFED valgrind libs $mlnx_valg_libdir]) + + AS_IF([test -d "$mlnx_valg_libdir"], + [AC_MSG_NOTICE([Added $mlnx_valg_libdir to valgrind LD_LIBRARY_PATH]) + valgrind_libpath="$mlnx_valg_libdir:$valgrind_libpath"]) + LDFLAGS="$save_LDFLAGS" + CFLAGS="$save_CFLAGS" + CPPFLAGS="$save_CPPFLAGS" + + uct_modules="${uct_modules}:ib" + ], + [ + with_dc=no + with_rc=no + with_ud=no + with_mlx5_hw=no + with_mlx5_dv=no + ]) + +# +# For automake +# +AM_CONDITIONAL([HAVE_IB], [test "x$with_ib" != xno]) +AM_CONDITIONAL([HAVE_TL_RC], [test "x$with_rc" != xno]) +AM_CONDITIONAL([HAVE_TL_DC], [test "x$with_dc" != xno]) +AM_CONDITIONAL([HAVE_DC_DV], [test -n "$have_dc_dv"]) +AM_CONDITIONAL([HAVE_DC_EXP], [test -n "$have_dc_exp"]) +AM_CONDITIONAL([HAVE_TL_UD], [test "x$with_ud" != xno]) +AM_CONDITIONAL([HAVE_MLX5_HW], [test "x$with_mlx5_hw" != xno]) +AM_CONDITIONAL([HAVE_MLX5_DV], [test "x$with_mlx5_dv" = xyes]) +AM_CONDITIONAL([HAVE_DEVX], [test -n "$have_devx"]) +AM_CONDITIONAL([HAVE_EXP], [test "x$verbs_exp" != xno]) +AM_CONDITIONAL([HAVE_MLX5_HW_UD], [test "x$with_mlx5_hw" != xno -a "x$has_get_av" != xno]) + +uct_ib_modules="" +m4_include([src/uct/ib/cm/configure.m4]) +m4_include([src/uct/ib/rdmacm/configure.m4]) +AC_DEFINE_UNQUOTED([uct_ib_MODULES], ["${uct_ib_modules}"], [IB loadable modules]) +AC_CONFIG_FILES([src/uct/ib/Makefile]) diff --git a/src/uct/ib/dc/dc_mlx5.c b/src/uct/ib/dc/dc_mlx5.c new file mode 100644 index 0000000..dcc67e9 --- /dev/null +++ b/src/uct/ib/dc/dc_mlx5.c @@ -0,0 +1,1271 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "dc_mlx5.h" +#include "dc_mlx5_ep.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define UCT_DC_MLX5_MAX_TX_CQ_LEN (16 * UCS_MBYTE) + + +static const char *uct_dc_tx_policy_names[] = { + [UCT_DC_TX_POLICY_DCS] = "dcs", + [UCT_DC_TX_POLICY_DCS_QUOTA] = "dcs_quota", + [UCT_DC_TX_POLICY_RAND] = "rand", + [UCT_DC_TX_POLICY_LAST] = NULL +}; + +/* DC specific parameters, expecting DC_ prefix */ +ucs_config_field_t uct_dc_mlx5_iface_config_sub_table[] = { + {"RC_", "IB_TX_QUEUE_LEN=128;FC_ENABLE=y;", NULL, + ucs_offsetof(uct_dc_mlx5_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_rc_iface_common_config_table)}, + + {"RC_", "", NULL, + ucs_offsetof(uct_dc_mlx5_iface_config_t, rc_mlx5_common), + UCS_CONFIG_TYPE_TABLE(uct_rc_mlx5_common_config_table)}, + + {"UD_", "", NULL, + ucs_offsetof(uct_dc_mlx5_iface_config_t, ud_common), + UCS_CONFIG_TYPE_TABLE(uct_ud_iface_common_config_table)}, + + {"NUM_DCI", "8", + "Number of DC initiator QPs (DCI) used by the interface " + "(up to " UCS_PP_MAKE_STRING(UCT_DC_MLX5_IFACE_MAX_DCIS) ").", + ucs_offsetof(uct_dc_mlx5_iface_config_t, ndci), UCS_CONFIG_TYPE_UINT}, + + {"TX_POLICY", "dcs_quota", + "Specifies how DC initiator (DCI) is selected by the endpoint. The policies are:\n" + "\n" + "dcs The endpoint either uses already assigned DCI or one is allocated\n" + " in a LIFO order, and released once it has no outstanding operations.\n" + "\n" + "dcs_quota Same as \"dcs\" but in addition the DCI is scheduled for release\n" + " if it has sent more than quota, and there are endpoints waiting for a DCI.\n" + " The dci is released once it completes all outstanding operations.\n" + " This policy ensures that there will be no starvation among endpoints.\n" + "\n" + "rand Every endpoint is assigned with a randomly selected DCI.\n" + " Multiple endpoints may share the same DCI.", + ucs_offsetof(uct_dc_mlx5_iface_config_t, tx_policy), + UCS_CONFIG_TYPE_ENUM(uct_dc_tx_policy_names)}, + + {"RAND_DCI_SEED", "0", + "Seed for DCI allocation when \"rand\" dci policy is used (0 - use default).", + ucs_offsetof(uct_dc_mlx5_iface_config_t, rand_seed), UCS_CONFIG_TYPE_UINT}, + + {"QUOTA", "32", + "When \"dcs_quota\" policy is selected, how much to send from a DCI when\n" + "there are other endpoints waiting for it.", + ucs_offsetof(uct_dc_mlx5_iface_config_t, quota), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + +/* Bundle of all parameters */ +ucs_config_field_t uct_dc_mlx5_iface_config_table[] = { + {"DC_", "", NULL, 0, + UCS_CONFIG_TYPE_TABLE(uct_dc_mlx5_iface_config_sub_table)}, + + {"UD_", "", NULL, + ucs_offsetof(uct_dc_mlx5_iface_config_t, mlx5_ud), + UCS_CONFIG_TYPE_TABLE(uct_ud_mlx5_iface_common_config_table)}, + + {NULL} +}; + + +static ucs_status_t +uct_dc_mlx5_ep_create_connected(const uct_ep_params_t *params, uct_ep_h* ep_p) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(params->iface, + uct_dc_mlx5_iface_t); + const uct_ib_address_t *ib_addr; + const uct_dc_mlx5_iface_addr_t *if_addr; + ucs_status_t status; + int is_global; + uct_ib_mlx5_base_av_t av; + struct mlx5_grh_av grh_av; + + ucs_trace_func(""); + + UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params); + ib_addr = (const uct_ib_address_t *)params->dev_addr; + if_addr = (const uct_dc_mlx5_iface_addr_t *)params->iface_addr; + + status = uct_ud_mlx5_iface_get_av(&iface->super.super.super, &iface->ud_common, + ib_addr, &av, &grh_av, &is_global); + if (status != UCS_OK) { + return UCS_ERR_INVALID_ADDR; + } + + if (is_global) { + return UCS_CLASS_NEW(uct_dc_mlx5_grh_ep_t, ep_p, iface, if_addr, &av, &grh_av); + } else { + return UCS_CLASS_NEW(uct_dc_mlx5_ep_t, ep_p, iface, if_addr, &av); + } +} + +static void uct_dc_mlx5_ep_destroy(uct_ep_h tl_ep) +{ + uct_dc_mlx5_ep_cleanup(tl_ep, &UCS_CLASS_NAME(uct_dc_mlx5_ep_t)); +} + +static ucs_status_t uct_dc_mlx5_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_dc_mlx5_iface_t); + size_t max_am_inline = UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE); + size_t max_put_inline = UCT_IB_MLX5_PUT_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE); + ucs_status_t status; + +#if HAVE_IBV_DM + if (iface->super.dm.dm != NULL) { + max_am_inline = ucs_max(iface->super.dm.dm->seg_len, + UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)); + max_put_inline = ucs_max(iface->super.dm.dm->seg_len, + UCT_IB_MLX5_PUT_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)); + } +#endif + + status = uct_rc_iface_query(&iface->super.super, iface_attr, + max_put_inline, + max_am_inline, + UCT_IB_MLX5_AM_ZCOPY_MAX_HDR(UCT_IB_MLX5_AV_FULL_SIZE), + UCT_IB_MLX5_AM_ZCOPY_MAX_IOV, + UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(UCT_IB_MLX5_AV_FULL_SIZE), + sizeof(uct_rc_mlx5_hdr_t)); + if (status != UCS_OK) { + return status; + } + + /* fixup flags and address lengths */ + iface_attr->cap.flags &= ~UCT_IFACE_FLAG_CONNECT_TO_EP; + iface_attr->cap.flags |= UCT_IFACE_FLAG_CONNECT_TO_IFACE; + iface_attr->ep_addr_len = 0; + iface_attr->max_conn_priv = 0; + iface_attr->iface_addr_len = sizeof(uct_dc_mlx5_iface_addr_t); + iface_attr->latency.overhead += 60e-9; /* connect packet + cqe */ + + uct_rc_mlx5_iface_common_query(&iface->super.super.super, iface_attr, + max_am_inline, UCT_IB_MLX5_AV_FULL_SIZE); + + /* Error handling is not supported with random dci policy + * TODO: Fix */ + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + iface_attr->cap.flags &= ~(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE | + UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF | + UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM); + } + + return UCS_OK; +} + +static void uct_dc_mlx5_iface_progress_enable(uct_iface_h tl_iface, unsigned flags) +{ + uct_rc_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_iface_t); + + uct_base_iface_progress_enable_cb(&iface->super.super, iface->progress, flags); +} + +static ucs_status_t uct_dc_mlx5_ep_set_failed(uct_ib_iface_t *ib_iface, + uct_ep_h ep, ucs_status_t status) +{ + return uct_set_ep_failed(&UCS_CLASS_NAME(uct_dc_mlx5_ep_t), ep, + &ib_iface->super.super, status); +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_dc_mlx5_poll_tx(uct_dc_mlx5_iface_t *iface) +{ + uint8_t dci; + struct mlx5_cqe64 *cqe; + uint32_t qp_num; + uint16_t hw_ci; + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + cqe = uct_ib_mlx5_poll_cq(&iface->super.super.super, + &iface->super.cq[UCT_IB_DIR_TX]); + if (cqe == NULL) { + return 0; + } + UCS_STATS_UPDATE_COUNTER(iface->super.super.stats, UCT_RC_IFACE_STAT_TX_COMPLETION, 1); + + ucs_memory_cpu_load_fence(); + + qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); + dci = uct_dc_mlx5_iface_dci_find(iface, qp_num); + txqp = &iface->tx.dcis[dci].txqp; + txwq = &iface->tx.dcis[dci].txwq; + hw_ci = ntohs(cqe->wqe_counter); + + ucs_trace_poll("dc iface %p tx_cqe: dci[%d] qpn 0x%x txqp %p hw_ci %d", + iface, dci, qp_num, txqp, hw_ci); + + uct_rc_txqp_available_set(txqp, uct_ib_mlx5_txwq_update_bb(txwq, hw_ci)); + ucs_assert(uct_rc_txqp_available(txqp) <= txwq->bb_max); + + uct_dc_mlx5_iface_dci_put(iface, dci); + uct_rc_mlx5_txqp_process_tx_cqe(txqp, cqe, hw_ci); + + uct_dc_mlx5_iface_progress_pending(iface); + return 1; +} + +static unsigned uct_dc_mlx5_iface_progress(void *arg) +{ + uct_dc_mlx5_iface_t *iface = arg; + unsigned count; + + count = uct_rc_mlx5_iface_common_poll_rx(&iface->super, 0); + if (count > 0) { + return count; + } + return uct_dc_mlx5_poll_tx(iface); +} + +static unsigned uct_dc_mlx5_iface_progress_tm(void *arg) +{ + uct_dc_mlx5_iface_t *iface = arg; + unsigned count; + + count = uct_rc_mlx5_iface_common_poll_rx(&iface->super, + UCT_RC_MLX5_POLL_FLAG_TM); + if (count > 0) { + return count; + } + return uct_dc_mlx5_poll_tx(iface); +} + +static void UCS_CLASS_DELETE_FUNC_NAME(uct_dc_mlx5_iface_t)(uct_iface_t*); + +ucs_status_t uct_dc_mlx5_iface_reset_dci(uct_dc_mlx5_iface_t *iface, + uct_dc_dci_t *dci) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.super.super.super.md, + uct_ib_mlx5_md_t); + ucs_status_t status; + + ucs_assert(dci->txwq.super.type == UCT_IB_MLX5_OBJ_TYPE_VERBS); + ucs_debug("iface %p reset dci[%p]", iface, dci); + + /* Synchronize CQ index with the driver, since it would remove pending + * completions for this QP (both send and receive) during ibv_destroy_qp(). + */ + uct_rc_mlx5_iface_common_update_cqs_ci(&iface->super, + &iface->super.super.super); + status = uct_ib_mlx5_modify_qp_state(md, &dci->txwq.super, IBV_QPS_RESET); + uct_rc_mlx5_iface_common_sync_cqs_ci(&iface->super, + &iface->super.super.super); + + uct_rc_mlx5_iface_commom_clean(&iface->super.cq[UCT_IB_DIR_TX], NULL, + dci->txwq.super.qp_num); + + /* Resume posting from to the beginning of the QP */ + uct_ib_mlx5_txwq_reset(&dci->txwq); + + return status; +} + +static void uct_dc_mlx5_iface_event_cq(uct_ib_iface_t *ib_iface, + uct_ib_dir_t dir) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ib_iface, uct_dc_mlx5_iface_t); + + iface->super.cq[dir].cq_sn++; +} + +static ucs_status_t uct_dc_mlx5_iface_create_qp(uct_dc_mlx5_iface_t *iface, + struct ibv_qp_cap *cap, + uct_dc_dci_t *dci) +{ + uct_ib_iface_t *ib_iface = &iface->super.super.super; + uct_ib_qp_attr_t attr = {}; + ucs_status_t status; +#if HAVE_DC_DV + uct_ib_device_t *dev = uct_ib_iface_device(ib_iface); + struct mlx5dv_qp_init_attr dv_attr = {}; + struct ibv_qp *qp; + + uct_rc_mlx5_iface_fill_attr(&iface->super, &attr, + iface->super.super.config.tx_qp_len, + &iface->super.rx.srq); + status = uct_ib_mlx5_iface_fill_attr(ib_iface, &dci->txwq.super, &attr); + if (status != UCS_OK) { + return status; + } + + uct_ib_iface_fill_attr(ib_iface, &attr); + attr.ibv.cap.max_recv_sge = 0; + + dv_attr.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_DC; + dv_attr.dc_init_attr.dc_type = MLX5DV_DCTYPE_DCI; + dv_attr.dc_init_attr.dct_access_key = UCT_IB_KEY; + qp = mlx5dv_create_qp(dev->ibv_context, &attr.ibv, &dv_attr); + if (qp == NULL) { + ucs_error("mlx5dv_create_qp("UCT_IB_IFACE_FMT", DCI): failed: %m", + UCT_IB_IFACE_ARG(ib_iface)); + return UCS_ERR_IO_ERROR; + } + + dci->txwq.super.verbs.qp = qp; + dci->txwq.super.qp_num = dci->txwq.super.verbs.qp->qp_num; +#else + uct_rc_mlx5_iface_fill_attr(&iface->super, &attr, + iface->super.super.config.tx_qp_len, + &iface->super.rx.srq); + status = uct_ib_mlx5_iface_create_qp(ib_iface, &dci->txwq.super, &attr); + if (status != UCS_OK) { + return status; + } +#endif + + status = uct_rc_txqp_init(&dci->txqp, &iface->super.super, + dci->txwq.super.qp_num + UCS_STATS_ARG(iface->super.super.stats)); + if (status != UCS_OK) { + goto err_qp; + } + + status = uct_dc_mlx5_iface_dci_connect(iface, dci); + if (status != UCS_OK) { + goto err; + } + + dci->ep = NULL; +#if UCS_ENABLE_ASSERT + dci->flags = 0; +#endif + status = uct_ib_mlx5_txwq_init(iface->super.super.super.super.worker, + iface->super.tx.mmio_mode, &dci->txwq, + dci->txwq.super.verbs.qp); + if (status != UCS_OK) { + goto err; + } + + uct_rc_txqp_available_set(&dci->txqp, dci->txwq.bb_max); + *cap = attr.ibv.cap; + return UCS_OK; + +err: + uct_rc_txqp_cleanup(&dci->txqp); +err_qp: + ibv_destroy_qp(dci->txwq.super.verbs.qp); + return status; +} + +#if HAVE_DC_DV +ucs_status_t uct_dc_mlx5_iface_dci_connect(uct_dc_mlx5_iface_t *iface, + uct_dc_dci_t *dci) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.super.super.super.md, + uct_ib_mlx5_md_t); + struct ibv_qp_attr attr; + long attr_mask; + + if (md->flags & UCT_IB_MLX5_MD_FLAG_DEVX) { + return uct_dc_mlx5_iface_devx_dci_connect(iface, &dci->txwq.super); + } + + ucs_assert(dci->txwq.super.type == UCT_IB_MLX5_OBJ_TYPE_VERBS); + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_INIT; + attr.pkey_index = iface->super.super.super.pkey_index; + attr.port_num = iface->super.super.super.config.port_num; + attr_mask = IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT; + + if (ibv_modify_qp(dci->txwq.super.verbs.qp, &attr, attr_mask)) { + ucs_error("ibv_modify_qp(DCI, INIT) failed : %m"); + return UCS_ERR_IO_ERROR; + } + + /* Move QP to the RTR state */ + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTR; + attr.path_mtu = iface->super.super.config.path_mtu; + attr.ah_attr.is_global = iface->super.super.super.config.force_global_addr; + attr.ah_attr.sl = iface->super.super.super.config.sl; + /* ib_core expects valied ah_attr::port_num when IBV_QP_AV is set */ + attr.ah_attr.port_num = iface->super.super.super.config.port_num; + attr_mask = IBV_QP_STATE | + IBV_QP_PATH_MTU | + IBV_QP_AV; + + if (ibv_modify_qp(dci->txwq.super.verbs.qp, &attr, attr_mask)) { + ucs_error("ibv_modify_qp(DCI, RTR) failed : %m"); + return UCS_ERR_IO_ERROR; + } + + /* Move QP to the RTS state */ + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTS; + attr.timeout = iface->super.super.config.timeout; + attr.rnr_retry = iface->super.super.config.rnr_retry; + attr.retry_cnt = iface->super.super.config.retry_cnt; + attr.max_rd_atomic = iface->super.super.config.max_rd_atomic; + attr_mask = IBV_QP_STATE | + IBV_QP_SQ_PSN | + IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | + IBV_QP_MAX_QP_RD_ATOMIC; + + if (ibv_modify_qp(dci->txwq.super.verbs.qp, &attr, attr_mask)) { + ucs_error("ibv_modify_qp(DCI, RTS) failed : %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +ucs_status_t uct_dc_mlx5_iface_create_dct(uct_dc_mlx5_iface_t *iface) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.super.super.super.md, + uct_ib_mlx5_md_t); + uct_ib_device_t *dev = uct_ib_iface_device(&iface->super.super.super); + struct mlx5dv_qp_init_attr dv_init_attr = {}; + struct ibv_qp_init_attr_ex init_attr = {}; + struct ibv_qp_attr attr = {}; + int ret; + + if (md->flags & UCT_IB_MLX5_MD_FLAG_DEVX_DCT) { + return uct_dc_mlx5_iface_devx_create_dct(iface); + } + + init_attr.comp_mask = IBV_QP_INIT_ATTR_PD; + init_attr.pd = uct_ib_iface_md(&iface->super.super.super)->pd; + init_attr.recv_cq = iface->super.super.super.cq[UCT_IB_DIR_RX]; + /* DCT can't send, but send_cq have to point to valid CQ */ + init_attr.send_cq = iface->super.super.super.cq[UCT_IB_DIR_RX]; + init_attr.srq = iface->super.rx.srq.verbs.srq; + init_attr.qp_type = IBV_QPT_DRIVER; + init_attr.cap.max_inline_data = iface->super.super.config.rx_inline; + + dv_init_attr.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_DC; + dv_init_attr.dc_init_attr.dc_type = MLX5DV_DCTYPE_DCT; + dv_init_attr.dc_init_attr.dct_access_key = UCT_IB_KEY; + + iface->rx.dct.verbs.qp = mlx5dv_create_qp(dev->ibv_context, + &init_attr, &dv_init_attr); + if (iface->rx.dct.verbs.qp == NULL) { + ucs_error("mlx5dv_create_qp(DCT) failed: %m"); + return UCS_ERR_INVALID_PARAM; + } + + attr.pkey_index = iface->super.super.super.pkey_index; + attr.qp_state = IBV_QPS_INIT; + attr.port_num = iface->super.super.super.config.port_num; + attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_ATOMIC; + + ret = ibv_modify_qp(iface->rx.dct.verbs.qp, &attr, IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_ACCESS_FLAGS); + + if (ret) { + ucs_error("error modifying DCT to INIT: %m"); + goto err; + } + + attr.qp_state = IBV_QPS_RTR; + attr.path_mtu = iface->super.super.config.path_mtu; + attr.min_rnr_timer = iface->super.super.config.min_rnr_timer; + attr.ah_attr.is_global = iface->super.super.super.config.force_global_addr; + attr.ah_attr.grh.hop_limit = iface->super.super.super.config.hop_limit; + attr.ah_attr.grh.traffic_class = iface->super.super.super.config.traffic_class; + attr.ah_attr.grh.sgid_index = iface->super.super.super.config.gid_index; + attr.ah_attr.port_num = iface->super.super.super.config.port_num; + + ret = ibv_modify_qp(iface->rx.dct.verbs.qp, &attr, IBV_QP_STATE | + IBV_QP_MIN_RNR_TIMER | + IBV_QP_AV | + IBV_QP_PATH_MTU); + if (ret) { + ucs_error("error modifying DCT to RTR: %m"); + goto err; + } + + iface->rx.dct.type = UCT_IB_MLX5_OBJ_TYPE_VERBS; + iface->rx.dct.qp_num = iface->rx.dct.verbs.qp->qp_num; + return UCS_OK; + +err: + uct_ib_destroy_qp(iface->rx.dct.verbs.qp); + return UCS_ERR_IO_ERROR; +} + +void uct_dc_mlx5_destroy_dct(uct_dc_mlx5_iface_t *iface) +{ + switch (iface->rx.dct.type) { + case UCT_IB_MLX5_OBJ_TYPE_VERBS: + uct_ib_destroy_qp(iface->rx.dct.verbs.qp); + break; + case UCT_IB_MLX5_OBJ_TYPE_DEVX: +#if HAVE_DEVX + mlx5dv_devx_obj_destroy(iface->rx.dct.devx.obj); +#endif + break; + case UCT_IB_MLX5_OBJ_TYPE_LAST: + break; + } +} +#endif + +static void uct_dc_mlx5_iface_cleanup_dcis(uct_dc_mlx5_iface_t *iface) +{ + int i; + + for (i = 0; i < iface->tx.ndci; i++) { + uct_ib_mlx5_txwq_cleanup(&iface->tx.dcis[i].txwq); + } +} + +#if HAVE_DC_EXP +static uint64_t +uct_dc_mlx5_iface_ooo_flag(uct_dc_mlx5_iface_t *iface, uint64_t flag, + char *str, uint32_t qp_num) +{ +#if HAVE_DECL_IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT && HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT + uct_ib_device_t *dev = uct_ib_iface_device(&iface->super.super.super); + + if (iface->super.super.config.ooo_rw && + UCX_IB_DEV_IS_OOO_SUPPORTED(dev, dc)) { + ucs_debug("enabling out-of-order support on %s%.0x dev %s", + str, qp_num, uct_ib_device_name(dev)); + return flag; + } + +#endif + return 0; +} +#endif + +static ucs_status_t +uct_dc_mlx5_init_rx(uct_rc_iface_t *rc_iface, + const uct_rc_iface_common_config_t *rc_config) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(rc_iface->super.super.md, uct_ib_mlx5_md_t); + uct_dc_mlx5_iface_config_t *config = ucs_derived_of(rc_config, uct_dc_mlx5_iface_config_t); + uct_dc_mlx5_iface_t *iface = ucs_derived_of(rc_iface, uct_dc_mlx5_iface_t); + struct ibv_srq_init_attr_ex srq_attr = {}; + ucs_status_t status; + + if (UCT_RC_MLX5_TM_ENABLED(&iface->super)) { + if (md->flags & UCT_IB_MLX5_MD_FLAG_DEVX_DC_SRQ) { + status = uct_rc_mlx5_devx_init_rx_tm(&iface->super, &config->super, + 1, UCT_DC_RNDV_HDR_LEN); + if (status != UCS_OK) { + goto err; + } + + status = uct_dc_mlx5_iface_devx_set_srq_dc_params(iface); + if (status != UCS_OK) { + goto err_free_srq; + } + } else { +#ifdef HAVE_STRUCT_IBV_EXP_CREATE_SRQ_ATTR_DC_OFFLOAD_PARAMS + struct ibv_exp_srq_dc_offload_params dc_op = {}; + + dc_op.timeout = rc_iface->config.timeout; + dc_op.path_mtu = rc_iface->config.path_mtu; + dc_op.pkey_index = rc_iface->super.pkey_index; + dc_op.sl = rc_iface->super.config.sl; + dc_op.dct_key = UCT_IB_KEY; + dc_op.ooo_caps = uct_dc_mlx5_iface_ooo_flag(iface, + IBV_EXP_OOO_SUPPORT_RW_DATA_PLACEMENT, + "TM XRQ", 0); + + srq_attr.comp_mask = IBV_EXP_CREATE_SRQ_DC_OFFLOAD_PARAMS; + srq_attr.dc_offload_params = &dc_op; +#endif + status = uct_rc_mlx5_init_rx_tm(&iface->super, &config->super, + &srq_attr, UCT_DC_RNDV_HDR_LEN); + if (status != UCS_OK) { + goto err; + } + } + + iface->super.super.progress = uct_dc_mlx5_iface_progress_tm; + return status; + } + + status = uct_rc_iface_init_rx(rc_iface, rc_config, + &iface->super.rx.srq.verbs.srq); + if (status != UCS_OK) { + goto err; + } + + status = uct_ib_mlx5_srq_init(&iface->super.rx.srq, + iface->super.rx.srq.verbs.srq, + iface->super.super.super.config.seg_size, + iface->super.tm.mp.num_strides); + if (status != UCS_OK) { + goto err_free_srq; + } + + iface->super.rx.srq.type = UCT_IB_MLX5_OBJ_TYPE_VERBS; + iface->super.super.progress = uct_dc_mlx5_iface_progress; + return UCS_OK; + +err_free_srq: + uct_rc_mlx5_destroy_srq(&iface->super.rx.srq); +err: + return status; +} + +void uct_dc_mlx5_cleanup_rx(uct_rc_iface_t *rc_iface) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(rc_iface, uct_dc_mlx5_iface_t); + + uct_rc_mlx5_destroy_srq(&iface->super.rx.srq); +} + +#if HAVE_DC_EXP +ucs_status_t uct_dc_mlx5_iface_create_dct(uct_dc_mlx5_iface_t *iface) +{ + struct ibv_exp_dct_init_attr init_attr; + + memset(&init_attr, 0, sizeof(init_attr)); + + init_attr.pd = uct_ib_iface_md(&iface->super.super.super)->pd; + init_attr.cq = iface->super.super.super.cq[UCT_IB_DIR_RX]; + init_attr.srq = iface->super.rx.srq.verbs.srq; + init_attr.dc_key = UCT_IB_KEY; + init_attr.port = iface->super.super.super.config.port_num; + init_attr.mtu = iface->super.super.config.path_mtu; + init_attr.access_flags = IBV_EXP_ACCESS_REMOTE_WRITE | + IBV_EXP_ACCESS_REMOTE_READ | + IBV_EXP_ACCESS_REMOTE_ATOMIC; + init_attr.min_rnr_timer = iface->super.super.config.min_rnr_timer; + init_attr.tclass = iface->super.super.super.config.traffic_class; + init_attr.hop_limit = iface->super.super.super.config.hop_limit; + init_attr.gid_index = iface->super.super.super.config.gid_index; + init_attr.inline_size = iface->super.super.config.rx_inline; + init_attr.pkey_index = iface->super.super.super.pkey_index; + init_attr.create_flags |= uct_dc_mlx5_iface_ooo_flag(iface, + IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT, + "DCT", 0); + iface->rx.dct.verbs.dct = ibv_exp_create_dct(uct_ib_iface_device(&iface->super.super.super)->ibv_context, + &init_attr); + if (iface->rx.dct.verbs.dct == NULL) { + ucs_error("failed to create DC target: %m"); + return UCS_ERR_INVALID_PARAM; + } + + iface->rx.dct.qp_num = iface->rx.dct.verbs.dct->dct_num; + return UCS_OK; +} + +/* take dc qp to rts state */ +ucs_status_t uct_dc_mlx5_iface_dci_connect(uct_dc_mlx5_iface_t *iface, + uct_dc_dci_t *dci) +{ + struct ibv_exp_qp_attr attr; + long attr_mask; + uint64_t ooo_qp_flag; + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_INIT; + attr.pkey_index = iface->super.super.super.pkey_index; + attr.port_num = iface->super.super.super.config.port_num; + attr.dct_key = UCT_IB_KEY; + attr_mask = IBV_EXP_QP_STATE | + IBV_EXP_QP_PKEY_INDEX | + IBV_EXP_QP_PORT | + IBV_EXP_QP_DC_KEY; + + if (ibv_exp_modify_qp(dci->txwq.super.verbs.qp, &attr, attr_mask)) { + ucs_error("ibv_exp_modify_qp(DCI, INIT) failed : %m"); + return UCS_ERR_IO_ERROR; + } + + /* Move QP to the RTR state */ + ooo_qp_flag = uct_dc_mlx5_iface_ooo_flag(iface, + IBV_EXP_QP_OOO_RW_DATA_PLACEMENT, + "DCI QP 0x", dci->txwq.super.qp_num); + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTR; + attr.path_mtu = iface->super.super.config.path_mtu; + attr.ah_attr.is_global = iface->super.super.super.config.force_global_addr; + attr.ah_attr.sl = iface->super.super.super.config.sl; + attr_mask = IBV_EXP_QP_STATE | + IBV_EXP_QP_PATH_MTU | + IBV_EXP_QP_AV | + ooo_qp_flag; + + if (ibv_exp_modify_qp(dci->txwq.super.verbs.qp, &attr, attr_mask)) { + ucs_error("ibv_exp_modify_qp(DCI, RTR) failed : %m"); + return UCS_ERR_IO_ERROR; + } + + /* Move QP to the RTS state */ + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IBV_QPS_RTS; + attr.timeout = iface->super.super.config.timeout; + attr.rnr_retry = iface->super.super.config.rnr_retry; + attr.retry_cnt = iface->super.super.config.retry_cnt; + attr.max_rd_atomic = iface->super.super.config.max_rd_atomic; + attr_mask = IBV_EXP_QP_STATE | + IBV_EXP_QP_TIMEOUT | + IBV_EXP_QP_RETRY_CNT | + IBV_EXP_QP_RNR_RETRY | + IBV_EXP_QP_MAX_QP_RD_ATOMIC; + + if (ibv_exp_modify_qp(dci->txwq.super.verbs.qp, &attr, attr_mask)) { + ucs_error("ibv_exp_modify_qp(DCI, RTS) failed : %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +void uct_dc_mlx5_destroy_dct(uct_dc_mlx5_iface_t *iface) +{ + ibv_exp_destroy_dct(iface->rx.dct.verbs.dct); +} +#endif + +void uct_dc_mlx5_iface_dcis_destroy(uct_dc_mlx5_iface_t *iface, int max) +{ + int i; + for (i = 0; i < max; i++) { + uct_rc_txqp_cleanup(&iface->tx.dcis[i].txqp); + ucs_assert(iface->tx.dcis[i].txwq.super.type == UCT_IB_MLX5_OBJ_TYPE_VERBS); + uct_ib_destroy_qp(iface->tx.dcis[i].txwq.super.verbs.qp); + } +} + +static ucs_status_t uct_dc_mlx5_iface_create_dcis(uct_dc_mlx5_iface_t *iface) +{ + struct ibv_qp_cap cap = {}; + ucs_status_t status; + int i; + + ucs_debug("creating %d dci(s)", iface->tx.ndci); + + iface->tx.stack_top = 0; + for (i = 0; i < iface->tx.ndci; i++) { + ucs_assert(iface->super.super.super.config.qp_type == UCT_IB_QPT_DCI); + + status = uct_dc_mlx5_iface_create_qp(iface, &cap, &iface->tx.dcis[i]); + if (status != UCS_OK) { + goto err; + } + + iface->tx.dcis_stack[i] = i; + } + + iface->super.super.config.tx_qp_len = iface->tx.dcis[0].txwq.bb_max; + uct_ib_iface_set_max_iov(&iface->super.super.super, cap.max_send_sge); + return UCS_OK; + +err: + uct_dc_mlx5_iface_dcis_destroy(iface, i); + return status; +} + +void uct_dc_mlx5_iface_set_quota(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_iface_config_t *config) +{ + iface->tx.available_quota = iface->super.super.config.tx_qp_len - + ucs_min(iface->super.super.config.tx_qp_len, config->quota); +} + +void uct_dc_mlx5_iface_init_version(uct_dc_mlx5_iface_t *iface, uct_md_h md) +{ + uct_ib_device_t *dev; + unsigned ver; + + dev = &ucs_derived_of(md, uct_ib_md_t)->dev; + ver = uct_ib_device_spec(dev)->flags & UCT_IB_DEVICE_FLAG_DC; + ucs_assert(ver != UCT_IB_DEVICE_FLAG_DC); + + iface->version_flag = 0; + + if (ver & UCT_IB_DEVICE_FLAG_DC_V2) { + iface->version_flag = UCT_DC_MLX5_IFACE_ADDR_DC_V2; + } + + if (ver & UCT_IB_DEVICE_FLAG_DC_V1) { + iface->version_flag = UCT_DC_MLX5_IFACE_ADDR_DC_V1; + } +} + +int uct_dc_mlx5_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uct_dc_mlx5_iface_addr_t *addr = (uct_dc_mlx5_iface_addr_t *)iface_addr; + uct_dc_mlx5_iface_t UCS_V_UNUSED *iface; + + iface = ucs_derived_of(tl_iface, uct_dc_mlx5_iface_t); + ucs_assert_always(iface_addr != NULL); + + return ((addr->flags & UCT_DC_MLX5_IFACE_ADDR_DC_VERS) == iface->version_flag) && + (UCT_DC_MLX5_IFACE_ADDR_TM_ENABLED(addr) == + UCT_RC_MLX5_TM_ENABLED(&iface->super)) && + uct_ib_iface_is_reachable(tl_iface, dev_addr, iface_addr); +} + +ucs_status_t +uct_dc_mlx5_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *iface_addr) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_iface_addr_t *addr = (uct_dc_mlx5_iface_addr_t *)iface_addr; + + uct_ib_pack_uint24(addr->qp_num, iface->rx.dct.qp_num); + addr->atomic_mr_id = uct_ib_mlx5_iface_get_atomic_mr_id(&iface->super.super.super); + addr->flags = iface->version_flag; + if (UCT_RC_MLX5_TM_ENABLED(&iface->super)) { + addr->flags |= UCT_DC_MLX5_IFACE_ADDR_HW_TM; + } + + return UCS_OK; +} + +static inline ucs_status_t uct_dc_mlx5_iface_flush_dcis(uct_dc_mlx5_iface_t *iface) +{ + int i; + + if (iface->tx.fc_grants) { + /* If some ep is waiting for grant it may have some pending + * operations, while all QP resources are available. */ + return UCS_INPROGRESS; + } + + for (i = 0; i < iface->tx.ndci; i++) { + if (uct_dc_mlx5_iface_flush_dci(iface, i) != UCS_OK) { + return UCS_INPROGRESS; + } + } + + return UCS_OK; +} + +ucs_status_t uct_dc_mlx5_iface_flush(uct_iface_h tl_iface, unsigned flags, uct_completion_t *comp) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_dc_mlx5_iface_t); + ucs_status_t status; + + if (comp != NULL) { + return UCS_ERR_UNSUPPORTED; + } + status = uct_dc_mlx5_iface_flush_dcis(iface); + if (status == UCS_OK) { + UCT_TL_IFACE_STAT_FLUSH(&iface->super.super.super.super); + } + else if (status == UCS_INPROGRESS) { + UCT_TL_IFACE_STAT_FLUSH_WAIT(&iface->super.super.super.super); + } + return status; +} + +ucs_status_t uct_dc_mlx5_iface_init_fc_ep(uct_dc_mlx5_iface_t *iface) +{ + ucs_status_t status; + uct_dc_mlx5_ep_t *ep; + + ep = ucs_malloc(sizeof(uct_dc_mlx5_ep_t), "fc_ep"); + if (ep == NULL) { + ucs_error("Failed to allocate FC ep"); + status = UCS_ERR_NO_MEMORY; + goto err; + } + /* We do not have any peer address at this point, so init basic subclasses + * only (for statistics, iface, etc) */ + status = UCS_CLASS_INIT(uct_base_ep_t, (void*)(&ep->super), + &iface->super.super.super.super); + if (status != UCS_OK) { + ucs_error("Failed to initialize fake FC ep, status: %s", + ucs_status_string(status)); + goto err_free; + } + + status = uct_dc_mlx5_ep_basic_init(iface, ep); + if (status != UCS_OK) { + ucs_error("FC ep init failed %s", ucs_status_string(status)); + goto err_cleanup; + } + + iface->tx.fc_ep = ep; + return UCS_OK; + +err_cleanup: + UCS_CLASS_CLEANUP(uct_base_ep_t, &ep->super); +err_free: + ucs_free(ep); +err: + return status; +} + +void uct_dc_mlx5_iface_cleanup_fc_ep(uct_dc_mlx5_iface_t *iface) +{ + uct_dc_mlx5_ep_pending_purge(&iface->tx.fc_ep->super.super, NULL, NULL); + ucs_arbiter_group_cleanup(&iface->tx.fc_ep->arb_group); + uct_rc_fc_cleanup(&iface->tx.fc_ep->fc); + UCS_CLASS_CLEANUP(uct_base_ep_t, iface->tx.fc_ep); + ucs_free(iface->tx.fc_ep); +} + +ucs_status_t uct_dc_mlx5_iface_fc_grant(uct_pending_req_t *self) +{ + uct_rc_fc_request_t *freq = ucs_derived_of(self, uct_rc_fc_request_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(freq->ep, uct_dc_mlx5_ep_t); + uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_rc_iface_t); + ucs_status_t status; + + ucs_assert_always(iface->config.fc_enabled); + + status = uct_rc_fc_ctrl(&ep->super.super, UCT_RC_EP_FC_PURE_GRANT, freq); + if (status == UCS_OK) { + ucs_mpool_put(freq); + UCS_STATS_UPDATE_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_TX_PURE_GRANT, 1); + } + return status; +} + +ucs_status_t uct_dc_mlx5_iface_fc_handler(uct_rc_iface_t *rc_iface, unsigned qp_num, + uct_rc_hdr_t *hdr, unsigned length, + uint32_t imm_data, uint16_t lid, unsigned flags) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(rc_iface, uct_dc_mlx5_iface_t); + uint8_t fc_hdr = uct_rc_fc_get_fc_hdr(hdr->am_id); + uct_dc_fc_request_t *dc_req; + int16_t cur_wnd; + ucs_status_t status; + uct_dc_mlx5_ep_t *ep; + + ucs_assert(rc_iface->config.fc_enabled); + + if (fc_hdr == UCT_RC_EP_FC_FLAG_HARD_REQ) { + ep = iface->tx.fc_ep; + UCS_STATS_UPDATE_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_RX_HARD_REQ, 1); + + dc_req = ucs_mpool_get(&iface->super.super.tx.fc_mp); + if (ucs_unlikely(dc_req == NULL)) { + ucs_error("Failed to allocate FC request"); + return UCS_ERR_NO_MEMORY; + } + dc_req->super.super.func = uct_dc_mlx5_iface_fc_grant; + dc_req->super.ep = &ep->super.super; + dc_req->dct_num = imm_data; + dc_req->lid = lid; + dc_req->sender = *((uct_dc_fc_sender_data_t*)(hdr + 1)); + + status = uct_dc_mlx5_iface_fc_grant(&dc_req->super.super); + if (status == UCS_ERR_NO_RESOURCE){ + uct_dc_mlx5_ep_pending_common(iface, ep, &dc_req->super.super, 0, 1); + } else { + ucs_assertv_always(status == UCS_OK, + "Failed to send FC grant msg: %s", + ucs_status_string(status)); + } + } else if (fc_hdr == UCT_RC_EP_FC_PURE_GRANT) { + ep = *((uct_dc_mlx5_ep_t**)(hdr + 1)); + + if (!(ep->flags & UCT_DC_MLX5_EP_FLAG_VALID)) { + /* Just remove ep now, no need to clear waiting for grant state + * (it was done in destroy_ep func) */ + uct_dc_mlx5_ep_release(ep); + return UCS_OK; + } + + cur_wnd = ep->fc.fc_wnd; + + /* Peer granted resources, so update wnd */ + ep->fc.fc_wnd = rc_iface->config.fc_wnd_size; + + /* Clear the flag for flush to complete */ + uct_dc_mlx5_ep_clear_fc_grant_flag(iface, ep); + + UCS_STATS_UPDATE_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_RX_PURE_GRANT, 1); + UCS_STATS_SET_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_FC_WND, ep->fc.fc_wnd); + + /* To preserve ordering we have to dispatch all pending + * operations if current fc_wnd is <= 0 */ + if (cur_wnd <= 0) { + if (ep->dci == UCT_DC_MLX5_EP_NO_DCI) { + ucs_arbiter_group_schedule(uct_dc_mlx5_iface_dci_waitq(iface), + &ep->arb_group); + } else { + /* Need to schedule fake ep in TX arbiter, because it + * might have been descheduled due to lack of FC window. */ + ucs_arbiter_group_schedule(uct_dc_mlx5_iface_tx_waitq(iface), + uct_dc_mlx5_ep_arb_group(iface, ep)); + } + + uct_dc_mlx5_iface_progress_pending(iface); + } + } + + return UCS_OK; +} + +void uct_dc_mlx5_iface_set_av_sport(uct_dc_mlx5_iface_t *iface, + uct_ib_mlx5_base_av_t *av, + uint32_t remote_dctn) +{ + uct_ib_mlx5_iface_set_av_sport(&iface->super.super.super, av, + remote_dctn, iface->rx.dct.qp_num); +} + +static void uct_dc_mlx5_iface_handle_failure(uct_ib_iface_t *ib_iface, + void *arg, ucs_status_t status) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ib_iface, uct_dc_mlx5_iface_t); + struct mlx5_cqe64 *cqe = arg; + uint32_t qp_num = ntohl(cqe->sop_drop_qpn) & + UCS_MASK(UCT_IB_QPN_ORDER); + uint8_t dci = uct_dc_mlx5_iface_dci_find(iface, qp_num); + uct_dc_mlx5_ep_t *ep; + ucs_log_level_t level; + + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + ep = NULL; + level = UCS_LOG_LEVEL_FATAL; /* error handling is not supported with rand dci */ + } else { + ep = uct_dc_mlx5_ep_from_dci(iface, dci); + level = ib_iface->super.config.failure_level; + } + + if (ep == NULL) { + uct_ib_mlx5_completion_with_err(ib_iface, arg, &iface->tx.dcis[dci].txwq, + level); + return; + } + + ep = uct_dc_mlx5_ep_from_dci(iface, dci); + uct_dc_mlx5_ep_handle_failure(ep, arg, status); +} + +static uct_rc_iface_ops_t uct_dc_mlx5_iface_ops = { + { + { + .ep_put_short = uct_dc_mlx5_ep_put_short, + .ep_put_bcopy = uct_dc_mlx5_ep_put_bcopy, + .ep_put_zcopy = uct_dc_mlx5_ep_put_zcopy, + .ep_get_bcopy = uct_dc_mlx5_ep_get_bcopy, + .ep_get_zcopy = uct_dc_mlx5_ep_get_zcopy, + .ep_am_short = uct_dc_mlx5_ep_am_short, + .ep_am_bcopy = uct_dc_mlx5_ep_am_bcopy, + .ep_am_zcopy = uct_dc_mlx5_ep_am_zcopy, + .ep_atomic_cswap64 = uct_dc_mlx5_ep_atomic_cswap64, + .ep_atomic_cswap32 = uct_dc_mlx5_ep_atomic_cswap32, + .ep_atomic64_post = uct_dc_mlx5_ep_atomic64_post, + .ep_atomic32_post = uct_dc_mlx5_ep_atomic32_post, + .ep_atomic64_fetch = uct_dc_mlx5_ep_atomic64_fetch, + .ep_atomic32_fetch = uct_dc_mlx5_ep_atomic32_fetch, + .ep_pending_add = uct_dc_mlx5_ep_pending_add, + .ep_pending_purge = uct_dc_mlx5_ep_pending_purge, + .ep_flush = uct_dc_mlx5_ep_flush, + .ep_fence = uct_dc_mlx5_ep_fence, +#if IBV_HW_TM + .ep_tag_eager_short = uct_dc_mlx5_ep_tag_eager_short, + .ep_tag_eager_bcopy = uct_dc_mlx5_ep_tag_eager_bcopy, + .ep_tag_eager_zcopy = uct_dc_mlx5_ep_tag_eager_zcopy, + .ep_tag_rndv_zcopy = uct_dc_mlx5_ep_tag_rndv_zcopy, + .ep_tag_rndv_request = uct_dc_mlx5_ep_tag_rndv_request, + .ep_tag_rndv_cancel = uct_rc_mlx5_ep_tag_rndv_cancel, + .iface_tag_recv_zcopy = uct_dc_mlx5_iface_tag_recv_zcopy, + .iface_tag_recv_cancel = uct_dc_mlx5_iface_tag_recv_cancel, +#endif + .iface_flush = uct_dc_mlx5_iface_flush, + .iface_fence = uct_rc_iface_fence, + .iface_progress_enable = uct_dc_mlx5_iface_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = uct_rc_iface_do_progress, + .iface_event_fd_get = uct_ib_iface_event_fd_get, + .iface_event_arm = uct_rc_iface_event_arm, + .ep_create = uct_dc_mlx5_ep_create_connected, + .ep_destroy = uct_dc_mlx5_ep_destroy, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_dc_mlx5_iface_t), + .iface_query = uct_dc_mlx5_iface_query, + .iface_get_device_address = uct_ib_iface_get_device_address, + .iface_is_reachable = uct_dc_mlx5_iface_is_reachable, + .iface_get_address = uct_dc_mlx5_iface_get_address, + }, + .create_cq = uct_ib_mlx5_create_cq, + .arm_cq = uct_ib_iface_arm_cq, + .event_cq = uct_dc_mlx5_iface_event_cq, + .handle_failure = uct_dc_mlx5_iface_handle_failure, + .set_ep_failed = uct_dc_mlx5_ep_set_failed, + }, + .init_rx = uct_dc_mlx5_init_rx, + .cleanup_rx = uct_dc_mlx5_cleanup_rx, + .fc_ctrl = uct_dc_mlx5_ep_fc_ctrl, + .fc_handler = uct_dc_mlx5_iface_fc_handler, +}; + +static UCS_CLASS_INIT_FUNC(uct_dc_mlx5_iface_t, uct_md_h tl_md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_dc_mlx5_iface_config_t *config = ucs_derived_of(tl_config, + uct_dc_mlx5_iface_config_t); + uct_ib_mlx5_md_t *md = ucs_derived_of(tl_md, uct_ib_mlx5_md_t); + uct_ib_iface_init_attr_t init_attr = {}; + ucs_status_t status; + ucs_trace_func(""); + + if (config->ndci < 1) { + ucs_error("dc interface must have at least 1 dci (requested: %d)", + config->ndci); + return UCS_ERR_INVALID_PARAM; + } + + if (config->ndci > UCT_DC_MLX5_IFACE_MAX_DCIS) { + ucs_error("dc interface can have at most %d dcis (requested: %d)", + UCT_DC_MLX5_IFACE_MAX_DCIS, config->ndci); + return UCS_ERR_INVALID_PARAM; + } + + init_attr.qp_type = UCT_IB_QPT_DCI; + init_attr.flags = UCT_IB_CQ_IGNORE_OVERRUN; + init_attr.fc_req_size = sizeof(uct_dc_fc_request_t); + init_attr.rx_hdr_len = sizeof(uct_rc_mlx5_hdr_t); + + if (md->flags & UCT_IB_MLX5_MD_FLAG_DC_TM) { + init_attr.flags |= UCT_IB_TM_SUPPORTED; + } + + /* driver will round up to pow of 2 if needed */ + init_attr.tx_cq_len = config->super.super.tx.queue_len * + UCT_IB_MLX5_MAX_BB * config->ndci; + /* TODO check caps instead */ + if (ucs_roundup_pow2(init_attr.tx_cq_len) > UCT_DC_MLX5_MAX_TX_CQ_LEN) { + ucs_error("Can't allocate TX resources, try to decrease dcis number (%d)" + " or tx qp length (%d)", + config->ndci, config->super.super.tx.queue_len); + return UCS_ERR_INVALID_PARAM; + } + + UCS_CLASS_CALL_SUPER_INIT(uct_rc_mlx5_iface_common_t, + &uct_dc_mlx5_iface_ops, + tl_md, worker, params, &config->super, + &config->rc_mlx5_common, &init_attr); + + uct_dc_mlx5_iface_init_version(self, tl_md); + + self->tx.ndci = config->ndci; + self->tx.policy = (uct_dc_tx_policy_t)config->tx_policy; + self->tx.fc_grants = 0; + self->super.super.config.tx_moderation = 0; /* disable tx moderation for dcs */ + ucs_list_head_init(&self->tx.gc_list); + + self->tx.rand_seed = config->rand_seed ? config->rand_seed : time(NULL); + self->tx.pend_cb = uct_dc_mlx5_iface_is_dci_rand(self) ? + uct_dc_mlx5_iface_dci_do_rand_pending_tx : + uct_dc_mlx5_iface_dci_do_dcs_pending_tx; + + /* create DC target */ + status = uct_dc_mlx5_iface_create_dct(self); + if (status != UCS_OK) { + goto err; + } + + /* create DC initiators */ + status = uct_dc_mlx5_iface_create_dcis(self); + if (status != UCS_OK) { + goto err_destroy_dct; + } + + ucs_debug("dc iface %p: using '%s' policy with %d dcis and %d cqes, dct 0x%x", + self, uct_dc_tx_policy_names[self->tx.policy], self->tx.ndci, + init_attr.tx_cq_len, UCT_RC_MLX5_TM_ENABLED(&self->super) ? + 0 : self->rx.dct.qp_num); + + /* Create fake endpoint which will be used for sending FC grants */ + uct_dc_mlx5_iface_init_fc_ep(self); + + ucs_arbiter_init(&self->tx.dci_arbiter); + + /* mlx5 init part */ + status = uct_ud_mlx5_iface_common_init(&self->super.super.super, + &self->ud_common, &config->mlx5_ud); + if (status != UCS_OK) { + goto err_destroy_dct; + } + + self->tx.available_quota = self->super.super.config.tx_qp_len - + ucs_min(self->super.super.config.tx_qp_len, config->quota); + /* Set max_iov for put_zcopy and get_zcopy */ + uct_ib_iface_set_max_iov(&self->super.super.super, + (UCT_IB_MLX5_MAX_SEND_WQE_SIZE - + sizeof(struct mlx5_wqe_raddr_seg) - + sizeof(struct mlx5_wqe_ctrl_seg) - + UCT_IB_MLX5_AV_FULL_SIZE) / + sizeof(struct mlx5_wqe_data_seg)); + + uct_rc_mlx5_iface_common_prepost_recvs(&self->super); + + ucs_debug("created dc iface %p", self); + + return UCS_OK; + +err_destroy_dct: + uct_dc_mlx5_destroy_dct(self); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_dc_mlx5_iface_t) +{ + uct_dc_mlx5_ep_t *ep, *tmp; + + ucs_trace_func(""); + uct_base_iface_progress_disable(&self->super.super.super.super.super, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + uct_dc_mlx5_iface_cleanup_dcis(self); + + uct_dc_mlx5_destroy_dct(self); + + ucs_list_for_each_safe(ep, tmp, &self->tx.gc_list, list) { + uct_dc_mlx5_ep_release(ep); + } + uct_dc_mlx5_iface_dcis_destroy(self, self->tx.ndci); + uct_dc_mlx5_iface_cleanup_fc_ep(self); + ucs_arbiter_cleanup(&self->tx.dci_arbiter); +} + +UCS_CLASS_DEFINE(uct_dc_mlx5_iface_t, uct_rc_mlx5_iface_common_t); + +static UCS_CLASS_DEFINE_NEW_FUNC(uct_dc_mlx5_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); + +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_dc_mlx5_iface_t, uct_iface_t); + +static ucs_status_t +uct_dc_mlx5_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + uct_ib_md_t *ib_md = ucs_derived_of(md, uct_ib_md_t); + int flags; + + flags = UCT_IB_DEVICE_FLAG_MLX5_PRM | UCT_IB_DEVICE_FLAG_DC | + (ib_md->config.eth_pause ? 0 : UCT_IB_DEVICE_FLAG_LINK_IB); + return uct_ib_device_query_ports(&ib_md->dev, flags, tl_devices_p, + num_tl_devices_p); +} + +UCT_TL_DEFINE(&uct_ib_component, dc_mlx5, uct_dc_mlx5_query_tl_devices, + uct_dc_mlx5_iface_t, "DC_MLX5_", uct_dc_mlx5_iface_config_table, + uct_dc_mlx5_iface_config_t); diff --git a/src/uct/ib/dc/dc_mlx5.h b/src/uct/ib/dc/dc_mlx5.h new file mode 100644 index 0000000..17a1d0a --- /dev/null +++ b/src/uct/ib/dc/dc_mlx5.h @@ -0,0 +1,343 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2016-2018. ALL RIGHTS RESERVED. + +* See file LICENSE for terms. +*/ + +#ifndef UCT_DC_IFACE_H +#define UCT_DC_IFACE_H + +#include +#include +#include +#include +#include +#include +#include + + +/* + * HW tag matching + */ +#if IBV_HW_TM +# if HAVE_INFINIBAND_TM_TYPES_H +/* upstream tm_types.h doesn't provide RAVH header */ +struct ibv_ravh { + uint32_t sl_dct; + uint32_t reserved; /* must be zero */ + uint64_t dc_access_key; +}; +# else +# define ibv_ravh ibv_exp_tmh_ravh +# endif +# define UCT_DC_RNDV_HDR_LEN (sizeof(struct ibv_rvh) + \ + sizeof(struct ibv_ravh)) +#else +# define UCT_DC_RNDV_HDR_LEN 0 +#endif + +#define UCT_DC_MLX5_IFACE_MAX_DCIS 16 + +#define UCT_DC_MLX5_IFACE_ADDR_TM_ENABLED(_addr) \ + (!!((_addr)->flags & UCT_DC_MLX5_IFACE_ADDR_HW_TM)) + +typedef struct uct_dc_mlx5_ep uct_dc_mlx5_ep_t; +typedef struct uct_dc_mlx5_iface uct_dc_mlx5_iface_t; + + +typedef enum { + UCT_DC_MLX5_IFACE_ADDR_HW_TM = UCS_BIT(0), + UCT_DC_MLX5_IFACE_ADDR_DC_V1 = UCS_BIT(1), + UCT_DC_MLX5_IFACE_ADDR_DC_V2 = UCS_BIT(2), + UCT_DC_MLX5_IFACE_ADDR_DC_VERS = UCT_DC_MLX5_IFACE_ADDR_DC_V1 | + UCT_DC_MLX5_IFACE_ADDR_DC_V2 +} uct_dc_mlx5_iface_addr_flags_t; + + +typedef struct uct_dc_mlx5_iface_addr { + uct_ib_uint24_t qp_num; + uint8_t atomic_mr_id; + uint8_t flags; +} UCS_S_PACKED uct_dc_mlx5_iface_addr_t; + + +/** + * dci policies: + * - fixed: all eps always use same dci no matter what + * - dcs: + * - ep uses already assigned dci or + * - free dci is assigned in LIFO (stack) order or + * - ep has not resources to transmit + * - on FULL completion (once there are no outstanding ops) + * dci is pushed to the stack of free dcis + * it is possible that ep will never release its dci: + * ep send, gets some completion, sends more, repeat + * - dcs + quota: + * - same as dcs with following addition: + * - if dci can not tx, and there are eps waiting for dci + * allocation ep goes into tx_wait state + * - in tx_wait state: + * - ep can not transmit while there are eps + * waiting for dci allocation. This will break + * starvation. + * - if there are no eps that are waiting for dci allocation + * ep goes back to normal state + * - random + * - dci is choosen by random() % ndci + * - ep keeps using dci as long as it has oustanding sends + * + * Not implemented policies: + * + * - hash: + * - dci is allocated to ep by some hash function + * for example dlid % ndci + * + */ +typedef enum { + UCT_DC_TX_POLICY_DCS, + UCT_DC_TX_POLICY_DCS_QUOTA, + UCT_DC_TX_POLICY_RAND, + UCT_DC_TX_POLICY_LAST +} uct_dc_tx_policy_t; + + +typedef struct uct_dc_mlx5_iface_config { + uct_rc_iface_common_config_t super; + uct_rc_mlx5_iface_common_config_t rc_mlx5_common; + uct_ud_iface_common_config_t ud_common; + int ndci; + int tx_policy; + unsigned quota; + unsigned rand_seed; + uct_ud_mlx5_iface_common_config_t mlx5_ud; +} uct_dc_mlx5_iface_config_t; + + +typedef struct uct_dc_dci { + uct_rc_txqp_t txqp; /* DCI qp */ + uct_ib_mlx5_txwq_t txwq; /* DCI mlx5 wq */ + union { + uct_dc_mlx5_ep_t *ep; /* points to an endpoint that currently + owns the dci. Relevant only for dcs + and dcs quota policies. */ + ucs_arbiter_group_t arb_group; /* pending group, relevant for rand + policy. With rand, groups are not + descheduled until all elements + processed. Better have dci num + groups scheduled than ep num. */ + }; +#if UCS_ENABLE_ASSERT + uint8_t flags; /* debug state, @ref uct_dc_dci_state_t */ +#endif +} uct_dc_dci_t; + + +typedef struct uct_dc_fc_sender_data { + uint64_t ep; + struct { + int is_global; + union ibv_gid gid; + } UCS_S_PACKED global; +} UCS_S_PACKED uct_dc_fc_sender_data_t; + +typedef struct uct_dc_fc_request { + uct_rc_fc_request_t super; + uct_dc_fc_sender_data_t sender; + uint32_t dct_num; + + /* Lid can be stored either in BE or in LE order. The endianess depends + * on the transport (BE for mlx5 and LE for dc verbs) */ + uint16_t lid; +} uct_dc_fc_request_t; + + +struct uct_dc_mlx5_iface { + uct_rc_mlx5_iface_common_t super; + struct { + /* Array of dcis */ + uct_dc_dci_t dcis[UCT_DC_MLX5_IFACE_MAX_DCIS]; + + uint8_t ndci; /* Number of DCIs */ + uct_dc_tx_policy_t policy; /* dci selection algorithm */ + int16_t available_quota; /* if available tx is lower, let + another endpoint use the dci */ + + /* LIFO is only relevant for dcs allocation policy */ + uint8_t stack_top; /* dci stack top */ + uint8_t dcis_stack[UCT_DC_MLX5_IFACE_MAX_DCIS]; /* LIFO of indexes of available dcis */ + + ucs_arbiter_t dci_arbiter; + + /* Used to send grant messages for all peers */ + uct_dc_mlx5_ep_t *fc_ep; + + /* List of destroyed endpoints waiting for credit grant */ + ucs_list_link_t gc_list; + + /* Number of expected FC grants */ + unsigned fc_grants; + + /* Seed used for random dci allocation */ + unsigned rand_seed; + + ucs_arbiter_callback_t pend_cb; + } tx; + + struct { + uct_ib_mlx5_qp_t dct; + } rx; + + uint8_t version_flag; + + uct_ud_mlx5_iface_common_t ud_common; +}; + + +extern ucs_config_field_t uct_dc_mlx5_iface_config_table[]; + +ucs_status_t uct_dc_mlx5_iface_create_dct(uct_dc_mlx5_iface_t *iface); + +int uct_dc_mlx5_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr); + +ucs_status_t uct_dc_mlx5_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *iface_addr); + +ucs_status_t uct_dc_mlx5_iface_flush(uct_iface_h tl_iface, unsigned flags, uct_completion_t *comp); + +void uct_dc_mlx5_iface_set_quota(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_iface_config_t *config); + +ucs_status_t uct_dc_mlx5_iface_init_fc_ep(uct_dc_mlx5_iface_t *iface); + +void uct_dc_mlx5_iface_cleanup_fc_ep(uct_dc_mlx5_iface_t *iface); + +ucs_status_t uct_dc_mlx5_iface_fc_grant(uct_pending_req_t *self); + +ucs_status_t uct_dc_mlx5_iface_fc_handler(uct_rc_iface_t *rc_iface, unsigned qp_num, + uct_rc_hdr_t *hdr, unsigned length, + uint32_t imm_data, uint16_t lid, unsigned flags); + +void uct_dc_mlx5_iface_set_av_sport(uct_dc_mlx5_iface_t *iface, + uct_ib_mlx5_base_av_t *av, + uint32_t remote_dctn); + +void uct_dc_mlx5_destroy_dct(uct_dc_mlx5_iface_t *iface); + +void uct_dc_mlx5_iface_init_version(uct_dc_mlx5_iface_t *iface, uct_md_h md); + +ucs_status_t uct_dc_mlx5_iface_reset_dci(uct_dc_mlx5_iface_t *iface, + uct_dc_dci_t *dci); + +ucs_status_t uct_dc_mlx5_iface_dci_connect(uct_dc_mlx5_iface_t *iface, + uct_dc_dci_t *dci); + +void uct_dc_mlx5_iface_dcis_destroy(uct_dc_mlx5_iface_t *iface, int max); + +#if HAVE_DEVX + +ucs_status_t uct_dc_mlx5_iface_devx_create_dct(uct_dc_mlx5_iface_t *iface); + +ucs_status_t uct_dc_mlx5_iface_devx_set_srq_dc_params(uct_dc_mlx5_iface_t *iface); + +ucs_status_t uct_dc_mlx5_iface_devx_dci_connect(uct_dc_mlx5_iface_t *iface, + uct_ib_mlx5_qp_t *qp); + +#else + +static UCS_F_MAYBE_UNUSED ucs_status_t +uct_dc_mlx5_iface_devx_create_dct(uct_dc_mlx5_iface_t *iface) +{ + return UCS_ERR_UNSUPPORTED; +} + +static UCS_F_MAYBE_UNUSED ucs_status_t +uct_dc_mlx5_iface_devx_set_srq_dc_params(uct_dc_mlx5_iface_t *iface) +{ + return UCS_ERR_UNSUPPORTED; +} + +static UCS_F_MAYBE_UNUSED ucs_status_t +uct_dc_mlx5_iface_devx_dci_connect(uct_dc_mlx5_iface_t *iface, + uct_ib_mlx5_qp_t *qp) +{ + return UCS_ERR_UNSUPPORTED; +} + +#endif + +#if IBV_HW_TM +static UCS_F_ALWAYS_INLINE void +uct_dc_mlx5_iface_fill_ravh(struct ibv_ravh *ravh, uint32_t dct_num) +{ + ravh->sl_dct = htobe32(dct_num); + ravh->dc_access_key = htobe64(UCT_IB_KEY); + ravh->reserved = 0; +} +#endif + +/* TODO: + * use a better seach algorithm (perfect hash, bsearch, hash) ??? + * + * linear search is most probably the best way to go + * because the number of dcis is usually small + */ +static inline uint8_t uct_dc_mlx5_iface_dci_find(uct_dc_mlx5_iface_t *iface, uint32_t qp_num) +{ + uct_dc_dci_t *dcis = iface->tx.dcis; + int i, ndci = iface->tx.ndci; + + for (i = 0; i < ndci; i++) { + if (dcis[i].txwq.super.qp_num == qp_num) { + return i; + } + } + ucs_fatal("DCI (qpnum=%d) does not exist", qp_num); +} + +static UCS_F_ALWAYS_INLINE int +uct_dc_mlx5_iface_has_tx_resources(uct_dc_mlx5_iface_t *iface) +{ + return !ucs_mpool_is_empty(&iface->super.super.tx.mp); +} + +static inline int uct_dc_mlx5_iface_dci_has_tx_resources(uct_dc_mlx5_iface_t *iface, uint8_t dci) +{ + return uct_rc_txqp_available(&iface->tx.dcis[dci].txqp) > 0; +} + +/* returns pending queue of eps waiting for tx resources */ +static inline ucs_arbiter_t *uct_dc_mlx5_iface_tx_waitq(uct_dc_mlx5_iface_t *iface) +{ + return &iface->tx.dci_arbiter; +} + +/* returns pending queue of eps waiting for the dci allocation */ +static inline ucs_arbiter_t *uct_dc_mlx5_iface_dci_waitq(uct_dc_mlx5_iface_t *iface) +{ + return &iface->super.super.tx.arbiter; +} + +static inline int +uct_dc_mlx5_iface_dci_has_outstanding(uct_dc_mlx5_iface_t *iface, int dci) +{ + uct_rc_txqp_t *txqp; + + txqp = &iface->tx.dcis[dci].txqp; + return uct_rc_txqp_available(txqp) < (int16_t)iface->super.super.config.tx_qp_len; +} + +static inline ucs_status_t uct_dc_mlx5_iface_flush_dci(uct_dc_mlx5_iface_t *iface, int dci) +{ + + if (!uct_dc_mlx5_iface_dci_has_outstanding(iface, dci)) { + return UCS_OK; + } + ucs_trace_poll("dci %d is not flushed %d/%d", dci, + iface->tx.dcis[dci].txqp.available, + iface->super.super.config.tx_qp_len); + ucs_assertv(uct_rc_txqp_unsignaled(&iface->tx.dcis[dci].txqp) == 0, + "unsignalled send is not supported!!!"); + return UCS_INPROGRESS; +} + +#endif diff --git a/src/uct/ib/dc/dc_mlx5_devx.c b/src/uct/ib/dc/dc_mlx5_devx.c new file mode 100644 index 0000000..dee3598 --- /dev/null +++ b/src/uct/ib/dc/dc_mlx5_devx.c @@ -0,0 +1,167 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "dc_mlx5.h" + +#include +#include +#include + + +ucs_status_t uct_dc_mlx5_iface_devx_create_dct(uct_dc_mlx5_iface_t *iface) +{ + uct_ib_device_t *dev = uct_ib_iface_device(&iface->super.super.super); + char in[UCT_IB_MLX5DV_ST_SZ_BYTES(create_dct_in)] = {}; + char out[UCT_IB_MLX5DV_ST_SZ_BYTES(create_dct_out)] = {}; + struct mlx5dv_pd dvpd = {}; + struct mlx5dv_cq dvcq = {}; + struct mlx5dv_srq dvsrq = {}; + struct mlx5dv_obj dv = {}; + int dvflags; + void *dctc; + + dvflags = MLX5DV_OBJ_PD | MLX5DV_OBJ_CQ; + dv.pd.in = uct_ib_iface_md(&iface->super.super.super)->pd; + dv.pd.out = &dvpd; + dv.cq.in = iface->super.super.super.cq[UCT_IB_DIR_RX]; + dv.cq.out = &dvcq; + + if (!UCT_RC_MLX5_TM_ENABLED(&iface->super)) { + dvflags |= MLX5DV_OBJ_SRQ; + dv.srq.in = iface->super.rx.srq.verbs.srq; + dv.srq.out = &dvsrq; + dvsrq.comp_mask = MLX5DV_SRQ_MASK_SRQN; + } + + mlx5dv_init_obj(&dv, dvflags); + + UCT_IB_MLX5DV_SET(create_dct_in, in, opcode, UCT_IB_MLX5_CMD_OP_CREATE_DCT); + dctc = UCT_IB_MLX5DV_ADDR_OF(create_dct_in, in, dct_context_entry); + UCT_IB_MLX5DV_SET(dctc, dctc, pd, dvpd.pdn); + if (UCT_RC_MLX5_TM_ENABLED(&iface->super)) { + UCT_IB_MLX5DV_SET(dctc, dctc, srqn_xrqn, iface->super.rx.srq.srq_num); + UCT_IB_MLX5DV_SET(dctc, dctc, offload_type, UCT_IB_MLX5_QPC_OFFLOAD_TYPE_RNDV); + } else { + UCT_IB_MLX5DV_SET(dctc, dctc, srqn_xrqn, dvsrq.srqn); + } + UCT_IB_MLX5DV_SET(dctc, dctc, cqn, dvcq.cqn); + UCT_IB_MLX5DV_SET64(dctc, dctc, dc_access_key, UCT_IB_KEY); + + UCT_IB_MLX5DV_SET(dctc, dctc, rre, true); + UCT_IB_MLX5DV_SET(dctc, dctc, rwe, true); + UCT_IB_MLX5DV_SET(dctc, dctc, rae, true); + UCT_IB_MLX5DV_SET(dctc, dctc, cs_res, uct_ib_mlx5_qpc_cs_res( + iface->super.super.super.config.max_inl_resp)); + UCT_IB_MLX5DV_SET(dctc, dctc, atomic_mode, UCT_IB_MLX5_ATOMIC_MODE); + UCT_IB_MLX5DV_SET(dctc, dctc, pkey_index, iface->super.super.super.pkey_index); + UCT_IB_MLX5DV_SET(dctc, dctc, port, iface->super.super.super.config.port_num); + + UCT_IB_MLX5DV_SET(dctc, dctc, min_rnr_nak, iface->super.super.config.min_rnr_timer); + UCT_IB_MLX5DV_SET(dctc, dctc, tclass, iface->super.super.super.config.traffic_class); + UCT_IB_MLX5DV_SET(dctc, dctc, mtu, iface->super.super.config.path_mtu); + UCT_IB_MLX5DV_SET(dctc, dctc, my_addr_index, iface->super.super.super.config.gid_index); + UCT_IB_MLX5DV_SET(dctc, dctc, hop_limit, iface->super.super.super.config.hop_limit); + + iface->rx.dct.devx.obj = mlx5dv_devx_obj_create(dev->ibv_context, in, sizeof(in), + out, sizeof(out)); + if (iface->rx.dct.devx.obj == NULL) { + ucs_error("mlx5dv_devx_obj_create(DCT) failed, syndrome %x: %m", + UCT_IB_MLX5DV_GET(create_dct_out, out, syndrome)); + return UCS_ERR_INVALID_PARAM; + } + + iface->rx.dct.type = UCT_IB_MLX5_OBJ_TYPE_DEVX; + iface->rx.dct.qp_num = UCT_IB_MLX5DV_GET(create_dct_out, out, dctn); + return UCS_OK; +} + +ucs_status_t +uct_dc_mlx5_iface_devx_dci_connect(uct_dc_mlx5_iface_t *iface, + uct_ib_mlx5_qp_t *qp) +{ + char in_2init[UCT_IB_MLX5DV_ST_SZ_BYTES(rst2init_qp_in)] = {}; + char out_2init[UCT_IB_MLX5DV_ST_SZ_BYTES(rst2init_qp_out)] = {}; + char in_2rtr[UCT_IB_MLX5DV_ST_SZ_BYTES(init2rtr_qp_in)] = {}; + char out_2rtr[UCT_IB_MLX5DV_ST_SZ_BYTES(init2rtr_qp_out)] = {}; + char in_2rts[UCT_IB_MLX5DV_ST_SZ_BYTES(rtr2rts_qp_in)] = {}; + char out_2rts[UCT_IB_MLX5DV_ST_SZ_BYTES(rtr2rts_qp_out)] = {}; + ucs_status_t status; + void *qpc; + + UCT_IB_MLX5DV_SET(rst2init_qp_in, in_2init, opcode, UCT_IB_MLX5_CMD_OP_RST2INIT_QP); + UCT_IB_MLX5DV_SET(rst2init_qp_in, in_2init, qpn, qp->qp_num); + + qpc = UCT_IB_MLX5DV_ADDR_OF(rst2init_qp_in, in_2init, qpc); + UCT_IB_MLX5DV_SET(qpc, qpc, pm_state, UCT_IB_MLX5_QPC_PM_STATE_MIGRATED); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.vhca_port_num, iface->super.super.super.config.port_num); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.pkey_index, iface->super.super.super.pkey_index); + + status = uct_ib_mlx5_devx_modify_qp(qp, in_2init, sizeof(in_2init), + out_2init, sizeof(out_2init)); + if (status) { + return status; + } + + UCT_IB_MLX5DV_SET(init2rtr_qp_in, in_2rtr, opcode, UCT_IB_MLX5_CMD_OP_INIT2RTR_QP); + UCT_IB_MLX5DV_SET(init2rtr_qp_in, in_2rtr, qpn, qp->qp_num); + UCT_IB_MLX5DV_SET(init2rtr_qp_in, in_2rtr, opt_param_mask, 4); + + qpc = UCT_IB_MLX5DV_ADDR_OF(init2rtr_qp_in, in_2rtr, qpc); + UCT_IB_MLX5DV_SET(qpc, qpc, pm_state, UCT_IB_MLX5_QPC_PM_STATE_MIGRATED); + UCT_IB_MLX5DV_SET(qpc, qpc, mtu, iface->super.super.config.path_mtu); + UCT_IB_MLX5DV_SET(qpc, qpc, log_msg_max, UCT_IB_MLX5_LOG_MAX_MSG_SIZE); + UCT_IB_MLX5DV_SET(qpc, qpc, atomic_mode, UCT_IB_MLX5_ATOMIC_MODE); + UCT_IB_MLX5DV_SET(qpc, qpc, rae, true); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.sl, iface->super.super.super.config.sl); + + status = uct_ib_mlx5_devx_modify_qp(qp, in_2rtr, sizeof(in_2rtr), + out_2rtr, sizeof(out_2rtr)); + if (status) { + return status; + } + + UCT_IB_MLX5DV_SET(rtr2rts_qp_in, in_2rts, opcode, UCT_IB_MLX5_CMD_OP_RTR2RTS_QP); + UCT_IB_MLX5DV_SET(rtr2rts_qp_in, in_2rts, qpn, qp->qp_num); + + qpc = UCT_IB_MLX5DV_ADDR_OF(rtr2rts_qp_in, in_2rts, qpc); + UCT_IB_MLX5DV_SET(qpc, qpc, pm_state, UCT_IB_MLX5_QPC_PM_STATE_MIGRATED); + UCT_IB_MLX5DV_SET(qpc, qpc, log_sra_max, ucs_ilog2_or0(iface->super.super.config.max_rd_atomic)); + UCT_IB_MLX5DV_SET(qpc, qpc, retry_count, iface->super.super.config.retry_cnt); + UCT_IB_MLX5DV_SET(qpc, qpc, rnr_retry, iface->super.super.config.rnr_retry); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.ack_timeout, iface->super.super.config.timeout); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.log_rtm, iface->super.super.config.exp_backoff); + + return uct_ib_mlx5_devx_modify_qp(qp, in_2rts, sizeof(in_2rts), + out_2rts, sizeof(out_2rts)); +} + +ucs_status_t uct_dc_mlx5_iface_devx_set_srq_dc_params(uct_dc_mlx5_iface_t *iface) +{ + char in[UCT_IB_MLX5DV_ST_SZ_BYTES(set_xrq_dc_params_entry_in)] = {}; + char out[UCT_IB_MLX5DV_ST_SZ_BYTES(set_xrq_dc_params_entry_out)] = {}; + int ret; + + UCT_IB_MLX5DV_SET(set_xrq_dc_params_entry_in, in, pkey_table_index, iface->super.super.super.pkey_index); + UCT_IB_MLX5DV_SET(set_xrq_dc_params_entry_in, in, mtu, iface->super.super.config.path_mtu); + UCT_IB_MLX5DV_SET(set_xrq_dc_params_entry_in, in, sl, iface->super.super.super.config.sl); + UCT_IB_MLX5DV_SET(set_xrq_dc_params_entry_in, in, reverse_sl, iface->super.super.super.config.sl); + UCT_IB_MLX5DV_SET(set_xrq_dc_params_entry_in, in, cnak_reverse_sl, iface->super.super.super.config.sl); + UCT_IB_MLX5DV_SET(set_xrq_dc_params_entry_in, in, ack_timeout, iface->super.super.config.timeout); + UCT_IB_MLX5DV_SET64(set_xrq_dc_params_entry_in, in, dc_access_key, UCT_IB_KEY); + UCT_IB_MLX5DV_SET(set_xrq_dc_params_entry_in, in, xrqn, iface->super.rx.srq.srq_num); + UCT_IB_MLX5DV_SET(set_xrq_dc_params_entry_in, in, opcode, + UCT_IB_MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY); + + ret = mlx5dv_devx_obj_modify(iface->super.rx.srq.devx.obj, in, sizeof(in), out, sizeof(out)); + if (ret) { + ucs_error("mlx5dv_devx_obj_modify(SET_XRQ_DC_PARAMS) failed, syndrome %x: %m", + UCT_IB_MLX5DV_GET(set_xrq_dc_params_entry_out, out, syndrome)); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + diff --git a/src/uct/ib/dc/dc_mlx5_ep.c b/src/uct/ib/dc/dc_mlx5_ep.c new file mode 100644 index 0000000..c395e47 --- /dev/null +++ b/src/uct/ib/dc/dc_mlx5_ep.c @@ -0,0 +1,1313 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "dc_mlx5_ep.h" +#include "dc_mlx5.h" + +#include +#include + +#define UCT_DC_MLX5_IFACE_TXQP_GET(_iface, _ep, _txqp, _txwq) \ +{ \ + uint8_t dci; \ + dci = (_ep)->dci; \ + _txqp = &(_iface)->tx.dcis[dci].txqp; \ + _txwq = &(_iface)->tx.dcis[dci].txwq; \ +} + +static UCS_F_ALWAYS_INLINE void +uct_dc_mlx5_iface_bcopy_post(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep, + unsigned opcode, unsigned length, + /* RDMA */ uint64_t rdma_raddr, uct_rkey_t rdma_rkey, + uct_rc_iface_send_desc_t *desc, uint8_t send_flags, + uint32_t imm_val_be, const void *buffer, + uct_ib_log_sge_t *log_sge) +{ + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); + desc->super.sn = txwq->sw_pi; + uct_rc_mlx5_txqp_dptr_post(&iface->super, UCT_IB_QPT_DCI, txqp, txwq, + opcode, buffer, length, &desc->lkey, + rdma_raddr, uct_ib_md_direct_rkey(rdma_rkey), + 0, 0, 0, 0, + &ep->av, uct_dc_mlx5_ep_get_grh(ep), + uct_ib_mlx5_wqe_av_size(&ep->av), + MLX5_WQE_CTRL_CQ_UPDATE | send_flags, imm_val_be, INT_MAX, + log_sge); + uct_rc_txqp_add_send_op(txqp, &desc->super); +} + + +static UCS_F_ALWAYS_INLINE void +uct_dc_mlx5_iface_zcopy_post(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep, + unsigned opcode, const uct_iov_t *iov, size_t iovcnt, + /* SEND */ uint8_t am_id, const void *am_hdr, unsigned am_hdr_len, + /* RDMA */ uint64_t rdma_raddr, uct_rkey_t rdma_rkey, + /* TAG */ uct_tag_t tag, uint32_t app_ctx, uint32_t ib_imm_be, + uct_completion_t *comp, uint8_t send_flags) +{ + uint16_t sn; + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); + + sn = txwq->sw_pi; + uct_rc_mlx5_txqp_dptr_post_iov(&iface->super, UCT_IB_QPT_DCI, txqp, + txwq, opcode, iov, iovcnt, + am_id, am_hdr, am_hdr_len, + rdma_raddr, uct_ib_md_direct_rkey(rdma_rkey), + tag, app_ctx, ib_imm_be, + &ep->av, uct_dc_mlx5_ep_get_grh(ep), + uct_ib_mlx5_wqe_av_size(&ep->av), + MLX5_WQE_CTRL_CQ_UPDATE | send_flags, + UCT_IB_MAX_ZCOPY_LOG_SGE(&iface->super.super.super)); + + uct_rc_txqp_add_send_comp(&iface->super.super, txqp, comp, sn, + UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY); +} + +static UCS_F_ALWAYS_INLINE void +uct_dc_mlx5_iface_atomic_post(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep, + unsigned opcode, uct_rc_iface_send_desc_t *desc, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t compare_mask, uint64_t compare, + uint64_t swap_mask, uint64_t swap_add) +{ + uint32_t ib_rkey = uct_ib_resolve_atomic_rkey(rkey, ep->atomic_mr_offset, + &remote_addr); + + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); + + desc->super.sn = txwq->sw_pi; + uct_rc_mlx5_txqp_dptr_post(&iface->super, UCT_IB_QPT_DCI, txqp, txwq, + opcode, desc + 1, length, &desc->lkey, + remote_addr, ib_rkey, + compare_mask, compare, swap_mask, swap_add, + &ep->av, uct_dc_mlx5_ep_get_grh(ep), + uct_ib_mlx5_wqe_av_size(&ep->av), + MLX5_WQE_CTRL_CQ_UPDATE, 0, INT_MAX, NULL); + + UCT_TL_EP_STAT_ATOMIC(&ep->super); + uct_rc_txqp_add_send_op(txqp, &desc->super); +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_dc_mlx5_ep_atomic_op_post(uct_ep_h tl_ep, unsigned opcode, unsigned size, + uint64_t value, uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + uct_rc_iface_send_desc_t *desc; + int op; + uint64_t compare_mask; + uint64_t compare; + uint64_t swap_mask; + uint64_t swap; + int ext; /* not used here */ + ucs_status_t status; + + UCT_DC_MLX5_CHECK_RES(iface, ep); + UCT_RC_MLX5_CHECK_ATOMIC_OPS(opcode, size, UCT_RC_MLX5_ATOMIC_OPS); + + status = uct_rc_mlx5_iface_common_atomic_data(opcode, size, value, &op, &compare_mask, + &compare, &swap_mask, &swap, &ext); + if (ucs_unlikely(UCS_STATUS_IS_ERR(status))) { + return status; + } + + UCT_RC_IFACE_GET_TX_ATOMIC_DESC(&iface->super.super, &iface->super.tx.atomic_desc_mp, desc); + uct_dc_mlx5_iface_atomic_post(iface, ep, op, desc, size, remote_addr, rkey, + compare_mask, compare, swap_mask, swap); + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_dc_mlx5_ep_atomic_fop(uct_dc_mlx5_ep_t *ep, int opcode, void *result, int ext, + unsigned length, uint64_t remote_addr, uct_rkey_t rkey, + uint64_t compare_mask, uint64_t compare, + uint64_t swap_mask, uint64_t swap_add, uct_completion_t *comp) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_mlx5_iface_t); + uct_rc_iface_send_desc_t *desc; + + UCT_DC_MLX5_CHECK_RES(iface, ep); + UCT_RC_IFACE_GET_TX_ATOMIC_FETCH_DESC(&iface->super.super, &iface->super.tx.atomic_desc_mp, + desc, uct_rc_iface_atomic_handler(&iface->super.super, + ext, length), + result, comp); + uct_dc_mlx5_iface_atomic_post(iface, ep, opcode, desc, length, remote_addr, rkey, + compare_mask, compare, swap_mask, swap_add); + return UCS_INPROGRESS; +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_dc_mlx5_ep_atomic_fop_post(uct_ep_h tl_ep, unsigned opcode, unsigned size, + uint64_t value, void *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + int op; + uint64_t compare_mask; + uint64_t compare; + uint64_t swap_mask; + uint64_t swap; + int ext; + ucs_status_t status; + + UCT_RC_MLX5_CHECK_ATOMIC_OPS(opcode, size, UCT_RC_MLX5_ATOMIC_FOPS); + + status = uct_rc_mlx5_iface_common_atomic_data(opcode, size, value, &op, &compare_mask, + &compare, &swap_mask, &swap, &ext); + if (ucs_unlikely(UCS_STATUS_IS_ERR(status))) { + return status; + } + + return uct_dc_mlx5_ep_atomic_fop(ep, op, result, ext, size, remote_addr, rkey, + compare_mask, compare, swap_mask, swap, comp); +} + +ucs_status_t uct_dc_mlx5_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp) +{ + return uct_dc_mlx5_ep_atomic_fop(ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t), + MLX5_OPCODE_ATOMIC_CS, result, 0, sizeof(uint64_t), + remote_addr, rkey, 0, htobe64(compare), UINT64_MAX, + htobe64(swap), comp); +} + +ucs_status_t uct_dc_mlx5_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp) +{ + return uct_dc_mlx5_ep_atomic_fop(ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t), + MLX5_OPCODE_ATOMIC_MASKED_CS, result, 1, + sizeof(uint32_t), remote_addr, rkey, UCS_MASK(32), + htonl(compare), UINT64_MAX, htonl(swap), comp); +} + +ucs_status_t uct_dc_mlx5_ep_atomic32_post(uct_ep_h ep, unsigned opcode, uint32_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + return uct_dc_mlx5_ep_atomic_op_post(ep, opcode, sizeof(value), value, remote_addr, rkey); +} + +ucs_status_t uct_dc_mlx5_ep_atomic64_post(uct_ep_h ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + return uct_dc_mlx5_ep_atomic_op_post(ep, opcode, sizeof(value), value, remote_addr, rkey); +} + +ucs_status_t uct_dc_mlx5_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + return uct_dc_mlx5_ep_atomic_fop_post(ep, opcode, sizeof(value), value, result, + remote_addr, rkey, comp); +} + +ucs_status_t uct_dc_mlx5_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + return uct_dc_mlx5_ep_atomic_fop_post(ep, opcode, sizeof(value), value, result, + remote_addr, rkey, comp); +} + +ucs_status_t uct_dc_mlx5_ep_fence(uct_ep_h tl_ep, unsigned flags) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + + return uct_rc_ep_fence(tl_ep, &iface->tx.dcis[ep->dci].txwq.fi, + ep->dci != UCT_DC_MLX5_EP_NO_DCI); +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_dc_mlx5_ep_am_short_inline(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *buffer, unsigned length) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + UCT_RC_MLX5_CHECK_AM_SHORT(id, length, UCT_IB_MLX5_AV_FULL_SIZE); + UCT_DC_CHECK_RES_AND_FC(iface, ep); + + UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); + + uct_rc_mlx5_txqp_inline_post(&iface->super, UCT_IB_QPT_DCI, + txqp, txwq, + MLX5_OPCODE_SEND, + buffer, length, id, hdr, 0, + 0, 0, + &ep->av, uct_dc_mlx5_ep_get_grh(ep), + uct_ib_mlx5_wqe_av_size(&ep->av), + MLX5_WQE_CTRL_SOLICITED, INT_MAX); + + UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->fc); + UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, sizeof(hdr) + length); + return UCS_OK; +} + +#if HAVE_IBV_DM +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_dc_mlx5_ep_short_dm(uct_dc_mlx5_ep_t *ep, uct_rc_mlx5_dm_copy_data_t *cache, + size_t hdr_len, const void *payload, unsigned length, + unsigned opcode, uint8_t fm_ce_se, + uint64_t rdma_raddr, uct_rkey_t rdma_rkey) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_mlx5_iface_t); + uct_rc_iface_send_desc_t *desc; + void *buffer; + ucs_status_t status; + uct_ib_log_sge_t log_sge; + + status = uct_rc_mlx5_common_dm_make_data(&iface->super, cache, hdr_len, + payload, length, &desc, + &buffer, &log_sge); + if (ucs_unlikely(UCS_STATUS_IS_ERR(status))) { + return status; + } + + uct_dc_mlx5_iface_bcopy_post(iface, ep, opcode, + hdr_len + length, + rdma_raddr, rdma_rkey, + desc, fm_ce_se, 0, buffer, + log_sge.num_sge ? &log_sge : NULL); + return UCS_OK; +} +#endif + +ucs_status_t uct_dc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *buffer, unsigned length) +{ +#if HAVE_IBV_DM + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + ucs_status_t status; + uct_rc_mlx5_dm_copy_data_t cache; + + if (ucs_likely((sizeof(uct_rc_mlx5_am_short_hdr_t) + length <= + UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) || + !iface->super.dm.dm)) { +#endif + return uct_dc_mlx5_ep_am_short_inline(tl_ep, id, hdr, buffer, length); +#if HAVE_IBV_DM + } + + UCT_CHECK_AM_ID(id); + UCT_CHECK_LENGTH(length + sizeof(uct_rc_mlx5_am_short_hdr_t), 0, + iface->super.dm.seg_len, "am_short"); + UCT_DC_CHECK_RES_AND_FC(iface, ep); + + uct_rc_mlx5_am_hdr_fill(&cache.am_hdr.rc_hdr, id); + cache.am_hdr.am_hdr = hdr; + + status = uct_dc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.am_hdr), buffer, length, + MLX5_OPCODE_SEND, + MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE, + 0, 0); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, sizeof(cache.am_hdr) + length); + UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->fc); + return UCS_OK; +#endif +} + +ssize_t uct_dc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + uct_rc_iface_send_desc_t *desc; + size_t length; + + UCT_CHECK_AM_ID(id); + UCT_DC_CHECK_RES_AND_FC(iface, ep); + UCT_RC_IFACE_GET_TX_AM_BCOPY_DESC(&iface->super.super, &iface->super.super.tx.mp, desc, + id, uct_rc_mlx5_am_hdr_fill, uct_rc_mlx5_hdr_t, + pack_cb, arg, &length); + + uct_dc_mlx5_iface_bcopy_post(iface, ep, MLX5_OPCODE_SEND, + sizeof(uct_rc_mlx5_hdr_t) + length, 0, 0, desc, + MLX5_WQE_CTRL_SOLICITED, 0, desc + 1, NULL); + + UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->fc); + UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, length); + return length; +} + +ucs_status_t uct_dc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + + UCT_CHECK_IOV_SIZE(iovcnt, UCT_IB_MLX5_AM_ZCOPY_MAX_IOV, + "uct_dc_mlx5_ep_am_zcopy"); + UCT_RC_MLX5_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), + iface->super.super.super.config.seg_size, + UCT_IB_MLX5_AV_FULL_SIZE); + UCT_DC_CHECK_RES_AND_FC(iface, ep); + + uct_dc_mlx5_iface_zcopy_post(iface, ep, MLX5_OPCODE_SEND, iov, iovcnt, + id, header, header_length, 0, 0, 0ul, 0, 0, + comp, MLX5_WQE_CTRL_SOLICITED); + + UCT_RC_UPDATE_FC_WND(&iface->super.super, &ep->fc); + UCT_TL_EP_STAT_OP(&ep->super, AM, ZCOPY, header_length + + uct_iov_total_length(iov, iovcnt)); + + return UCS_INPROGRESS; +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_dc_mlx5_ep_put_short_inline(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + UCT_RC_MLX5_CHECK_PUT_SHORT(length, UCT_IB_MLX5_AV_FULL_SIZE); + UCT_DC_MLX5_CHECK_RES(iface, ep); + + UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); + uct_rc_mlx5_txqp_inline_post(&iface->super, UCT_IB_QPT_DCI, + txqp, txwq, + MLX5_OPCODE_RDMA_WRITE, + buffer, length, 0, 0, 0, + remote_addr, uct_ib_md_direct_rkey(rkey), + &ep->av, uct_dc_mlx5_ep_get_grh(ep), + uct_ib_mlx5_wqe_av_size(&ep->av), 0, INT_MAX); + + UCT_TL_EP_STAT_OP(&ep->super, PUT, SHORT, length); + + return UCS_OK; +} + +ucs_status_t uct_dc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *payload, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ +#if HAVE_IBV_DM + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + ucs_status_t status; + + if (ucs_likely((length <= UCT_IB_MLX5_PUT_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) || + !iface->super.dm.dm)) { +#endif + return uct_dc_mlx5_ep_put_short_inline(tl_ep, payload, length, remote_addr, rkey); +#if HAVE_IBV_DM + } + + UCT_CHECK_LENGTH(length, 0, iface->super.dm.seg_len, "put_short"); + UCT_DC_MLX5_CHECK_RES(iface, ep); + status = uct_dc_mlx5_ep_short_dm(ep, NULL, 0, payload, length, + MLX5_OPCODE_RDMA_WRITE, + MLX5_WQE_CTRL_CQ_UPDATE, + remote_addr, rkey); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + UCT_TL_EP_STAT_OP(&ep->super, PUT, SHORT, length); + return UCS_OK; +#endif +} + +ssize_t uct_dc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + uct_rc_iface_send_desc_t *desc; + size_t length; + + UCT_DC_MLX5_CHECK_RES(iface, ep); + UCT_RC_IFACE_GET_TX_PUT_BCOPY_DESC(&iface->super.super, &iface->super.super.tx.mp, + desc, pack_cb, arg, length); + uct_dc_mlx5_iface_bcopy_post(iface, ep, MLX5_OPCODE_RDMA_WRITE, length, + remote_addr, rkey, desc, 0, 0, desc + 1, NULL); + UCT_TL_EP_STAT_OP(&ep->super, PUT, BCOPY, length); + return length; +} + +ucs_status_t uct_dc_mlx5_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + + UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super.super), + "uct_dc_mlx5_ep_put_zcopy"); + UCT_CHECK_LENGTH(uct_iov_total_length(iov, iovcnt), 0, UCT_IB_MAX_MESSAGE_SIZE, + "put_zcopy"); + UCT_DC_MLX5_CHECK_RES(iface, ep); + + uct_dc_mlx5_iface_zcopy_post(iface, ep, MLX5_OPCODE_RDMA_WRITE, iov, iovcnt, + 0, NULL, 0, remote_addr, rkey, 0ul, 0, 0, + comp, 0); + + UCT_TL_EP_STAT_OP(&ep->super, PUT, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + return UCS_INPROGRESS; +} + +ucs_status_t uct_dc_mlx5_ep_get_bcopy(uct_ep_h tl_ep, + uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + uct_rc_iface_send_desc_t *desc; + + UCT_CHECK_LENGTH(length, 0, iface->super.super.super.config.seg_size, "get_bcopy"); + UCT_DC_MLX5_CHECK_RES(iface, ep); + UCT_RC_IFACE_GET_TX_GET_BCOPY_DESC(&iface->super.super, &iface->super.super.tx.mp, + desc, unpack_cb, comp, arg, length); + uct_dc_mlx5_iface_bcopy_post(iface, ep, MLX5_OPCODE_RDMA_READ, length, + remote_addr, rkey, desc, 0, 0, desc + 1, NULL); + UCT_TL_EP_STAT_OP(&ep->super, GET, BCOPY, length); + return UCS_INPROGRESS; +} + + +ucs_status_t uct_dc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + + UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super.super), + "uct_dc_mlx5_ep_get_zcopy"); + UCT_CHECK_LENGTH(uct_iov_total_length(iov, iovcnt), + iface->super.super.super.config.max_inl_resp + 1, UCT_IB_MAX_MESSAGE_SIZE, + "get_zcopy"); + UCT_DC_MLX5_CHECK_RES(iface, ep); + + uct_dc_mlx5_iface_zcopy_post(iface, ep, MLX5_OPCODE_RDMA_READ, iov, iovcnt, + 0, NULL, 0, remote_addr, rkey, 0ul, 0, 0, + comp, 0); + UCT_TL_EP_STAT_OP(&ep->super, GET, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + return UCS_INPROGRESS; +} + +ucs_status_t uct_dc_mlx5_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + ucs_status_t status; + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + if (ucs_unlikely(flags & UCT_FLUSH_FLAG_CANCEL)) { + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + return UCS_ERR_UNSUPPORTED; + } + + uct_ep_pending_purge(tl_ep, NULL, 0); + if (ep->dci == UCT_DC_MLX5_EP_NO_DCI) { + /* No dci -> no WQEs -> HW is clean, nothing to cancel */ + return UCS_OK; + } + + uct_dc_mlx5_ep_handle_failure(ep, NULL, UCS_ERR_CANCELED); + return UCS_OK; + } + + if (!uct_dc_mlx5_iface_has_tx_resources(iface)) { + return UCS_ERR_NO_RESOURCE; + } + + if (ep->dci == UCT_DC_MLX5_EP_NO_DCI) { + if (!uct_dc_mlx5_iface_dci_can_alloc(iface)) { + return UCS_ERR_NO_RESOURCE; /* waiting for dci */ + } else { + UCT_TL_EP_STAT_FLUSH(&ep->super); /* no sends */ + return UCS_OK; + } + } + + if (!uct_dc_mlx5_iface_dci_ep_can_send(ep)) { + return UCS_ERR_NO_RESOURCE; /* cannot send */ + } + + status = uct_dc_mlx5_iface_flush_dci(iface, ep->dci); + if (status == UCS_OK) { + UCT_TL_EP_STAT_FLUSH(&ep->super); + return UCS_OK; /* all sends completed */ + } + + ucs_assert(status == UCS_INPROGRESS); + ucs_assert(ep->dci != UCT_DC_MLX5_EP_NO_DCI); + + UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); + + return uct_rc_txqp_add_flush_comp(&iface->super.super, &ep->super, txqp, + comp, txwq->sig_pi); +} + +#if IBV_HW_TM +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_dc_mlx5_ep_tag_eager_short_inline(uct_ep_h tl_ep, uct_tag_t tag, + const void *data, size_t length) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + UCT_CHECK_LENGTH(length + sizeof(struct ibv_tmh), 0, + UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE), + "uct_dc_mlx5_ep_tag_short"); + UCT_DC_MLX5_CHECK_RES(iface, ep); + + UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); + + uct_rc_mlx5_txqp_tag_inline_post(&iface->super, UCT_IB_QPT_DCI, + txqp, txwq, MLX5_OPCODE_SEND, data, length, + NULL, tag, 0, IBV_TMH_EAGER, 0, + &ep->av, uct_dc_mlx5_ep_get_grh(ep), + uct_ib_mlx5_wqe_av_size(&ep->av), NULL, 0, + MLX5_WQE_CTRL_SOLICITED); + + UCT_TL_EP_STAT_OP(&ep->super, TAG, SHORT, length); + + return UCS_OK; +} + +ucs_status_t uct_dc_mlx5_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag, + const void *data, size_t length) +{ +#if HAVE_IBV_DM + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + uct_rc_mlx5_dm_copy_data_t cache; + ucs_status_t status; + + if (ucs_likely((sizeof(struct ibv_tmh) + length <= + UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE)) || + !iface->super.dm.dm)) { +#endif + return uct_dc_mlx5_ep_tag_eager_short_inline(tl_ep, tag, data, length); +#if HAVE_IBV_DM + } + + UCT_CHECK_LENGTH(length + sizeof(struct ibv_tmh), 0, + iface->super.dm.seg_len, "tag_short"); + UCT_DC_MLX5_CHECK_RES(iface, ep); + + uct_rc_mlx5_fill_tmh(ucs_unaligned_ptr(&cache.tm_hdr), tag, 0, IBV_TMH_EAGER); + + status = uct_dc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.tm_hdr), data, + length, MLX5_OPCODE_SEND, + MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE, + 0, 0); + if (!UCS_STATUS_IS_ERR(status)) { + UCT_TL_EP_STAT_OP(&ep->super, TAG, SHORT, length); + } + + return status; +#endif +} + +ssize_t uct_dc_mlx5_ep_tag_eager_bcopy(uct_ep_h tl_ep, uct_tag_t tag, + uint64_t imm, + uct_pack_callback_t pack_cb, + void *arg, unsigned flags) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + uct_rc_iface_send_desc_t *desc; + uint32_t app_ctx, ib_imm; + int opcode; + size_t length; + + UCT_DC_MLX5_CHECK_RES(iface, ep); + + UCT_RC_MLX5_FILL_TM_IMM(imm, app_ctx, ib_imm, opcode, MLX5_OPCODE_SEND, + _IMM); + + UCT_RC_MLX5_IFACE_GET_TM_BCOPY_DESC(&iface->super.super, + iface->super.tm.bcopy_mp, + desc, tag, app_ctx, pack_cb, + arg, length); + + uct_dc_mlx5_iface_bcopy_post(iface, ep, opcode, + sizeof(struct ibv_tmh) + length, + 0, 0, desc, MLX5_WQE_CTRL_SOLICITED, ib_imm, + desc + 1, NULL); + + UCT_TL_EP_STAT_OP(&ep->super, TAG, BCOPY, length); + + return length; +} + +ucs_status_t uct_dc_mlx5_ep_tag_eager_zcopy(uct_ep_h tl_ep, uct_tag_t tag, + uint64_t imm, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + uint32_t app_ctx, ib_imm; + int opcode; + + UCT_CHECK_IOV_SIZE(iovcnt, UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(UCT_IB_MLX5_AV_FULL_SIZE), + "uct_dc_mlx5_ep_tag_eager_zcopy"); + UCT_RC_CHECK_ZCOPY_DATA(sizeof(struct ibv_tmh), + uct_iov_total_length(iov, iovcnt), + iface->super.tm.max_zcopy); + + UCT_DC_MLX5_CHECK_RES(iface, ep); + + UCT_RC_MLX5_FILL_TM_IMM(imm, app_ctx, ib_imm, opcode, MLX5_OPCODE_SEND, _IMM); + + uct_dc_mlx5_iface_zcopy_post(iface, ep, opcode|UCT_RC_MLX5_OPCODE_FLAG_TM, + iov, iovcnt, 0, "", 0, 0, 0, tag, app_ctx, + ib_imm, comp, MLX5_WQE_CTRL_SOLICITED); + + UCT_TL_EP_STAT_OP(&ep->super, TAG, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + + return UCS_INPROGRESS; +} + +ucs_status_ptr_t uct_dc_mlx5_ep_tag_rndv_zcopy(uct_ep_h tl_ep, uct_tag_t tag, + const void *header, + unsigned header_length, + const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + unsigned tm_hdr_len = sizeof(struct ibv_tmh) + + sizeof(struct ibv_rvh) + + sizeof(struct ibv_ravh); + struct ibv_ravh ravh; + uint32_t op_index; + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + UCT_RC_MLX5_CHECK_RNDV_PARAMS(iovcnt, header_length, tm_hdr_len, + UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE), + iface->super.tm.max_rndv_data + + UCT_RC_MLX5_TMH_PRIV_LEN); + UCT_DC_CHECK_RES_PTR(iface, ep); + + op_index = uct_rc_mlx5_tag_get_op_id(&iface->super, comp); + + uct_dc_mlx5_iface_fill_ravh(&ravh, iface->rx.dct.qp_num); + + UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); + + uct_rc_mlx5_txqp_tag_inline_post(&iface->super, UCT_IB_QPT_DCI, + txqp, txwq, MLX5_OPCODE_SEND, header, + header_length, iov, tag, op_index, + IBV_TMH_RNDV, 0, &ep->av, + uct_dc_mlx5_ep_get_grh(ep), + uct_ib_mlx5_wqe_av_size(&ep->av), &ravh, + sizeof(ravh), MLX5_WQE_CTRL_SOLICITED); + + return (ucs_status_ptr_t)((uint64_t)op_index); +} + +ucs_status_t uct_dc_mlx5_ep_tag_rndv_request(uct_ep_h tl_ep, uct_tag_t tag, + const void* header, + unsigned header_length, + unsigned flags) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + UCT_CHECK_LENGTH(header_length + sizeof(struct ibv_tmh), 0, + UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE), + "tag_rndv_request"); + UCT_DC_MLX5_CHECK_RES(iface, ep); + + UCT_DC_MLX5_IFACE_TXQP_GET(iface, ep, txqp, txwq); + + uct_rc_mlx5_txqp_tag_inline_post(&iface->super, UCT_IB_QPT_DCI, + txqp, txwq, MLX5_OPCODE_SEND_IMM, header, + header_length, NULL, tag, 0, + IBV_TMH_EAGER, 0, &ep->av, + uct_dc_mlx5_ep_get_grh(ep), + uct_ib_mlx5_wqe_av_size(&ep->av), NULL, 0, + MLX5_WQE_CTRL_SOLICITED); + return UCS_OK; +} + +ucs_status_t uct_dc_mlx5_iface_tag_recv_zcopy(uct_iface_h tl_iface, + uct_tag_t tag, + uct_tag_t tag_mask, + const uct_iov_t *iov, + size_t iovcnt, + uct_tag_context_t *ctx) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_dc_mlx5_iface_t); + + return uct_rc_mlx5_iface_common_tag_recv(&iface->super, tag, tag_mask, + iov, iovcnt, ctx); +} + +ucs_status_t uct_dc_mlx5_iface_tag_recv_cancel(uct_iface_h tl_iface, + uct_tag_context_t *ctx, + int force) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_dc_mlx5_iface_t); + + return uct_rc_mlx5_iface_common_tag_recv_cancel(&iface->super, ctx, force); +} +#endif + +ucs_status_t uct_dc_mlx5_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op, + uct_rc_fc_request_t *req) +{ + uct_dc_mlx5_ep_t *dc_ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_dc_mlx5_iface_t); + uct_ib_iface_t *ib_iface = &iface->super.super.super; + struct ibv_ah_attr ah_attr = {.is_global = 0}; + uct_dc_fc_sender_data_t sender; + uct_dc_fc_request_t *dc_req; + struct mlx5_wqe_av mlx5_av; + uct_ib_mlx5_base_av_t av; + ucs_status_t status; + uintptr_t sender_ep; + struct ibv_ah *ah; + + UCT_DC_MLX5_TXQP_DECL(txqp, txwq); + + ucs_assert((sizeof(uint8_t) + sizeof(sender_ep)) <= + UCT_IB_MLX5_AV_FULL_SIZE); + + UCT_DC_MLX5_CHECK_RES(iface, dc_ep); + UCT_DC_MLX5_IFACE_TXQP_GET(iface, dc_ep, txqp, txwq); + + dc_req = ucs_derived_of(req, uct_dc_fc_request_t); + + if (op == UCT_RC_EP_FC_PURE_GRANT) { + ucs_assert(req != NULL); + + sender_ep = (uintptr_t)dc_req->sender.ep; + + /* TODO: look at common code with uct_ud_mlx5_iface_get_av */ + if (dc_req->sender.global.is_global) { + uct_ib_iface_fill_ah_attr_from_gid_lid(ib_iface, dc_req->lid, + ucs_unaligned_ptr(&dc_req->sender.global.gid), + &ah_attr); + + status = uct_ib_iface_create_ah(ib_iface, &ah_attr, &ah); + if (status != UCS_OK) { + return status; + } + + uct_ib_mlx5_get_av(ah, &mlx5_av); + } + + /* Note av initialization is copied from exp verbs */ + av.stat_rate_sl = ib_iface->config.sl; /* (attr->static_rate << 4) | attr->sl */ + av.fl_mlid = ib_iface->path_bits[0] & 0x7f; + + /* lid in dc_req is in BE already */ + av.rlid = uct_ib_iface_is_roce(ib_iface) ? 0 : + (dc_req->lid | htons(ib_iface->path_bits[0])); + av.dqp_dct = htonl(dc_req->dct_num); + uct_dc_mlx5_iface_set_av_sport(iface, &av, dc_req->dct_num); + + if (!iface->ud_common.config.compact_av || ah_attr.is_global) { + av.dqp_dct |= UCT_IB_MLX5_EXTENDED_UD_AV; + } + + uct_rc_mlx5_txqp_inline_post(&iface->super, UCT_IB_QPT_DCI, + txqp, txwq, MLX5_OPCODE_SEND, + &av /*dummy*/, 0, op, sender_ep, 0, + 0, 0, + &av, ah_attr.is_global ? mlx5_av_grh(&mlx5_av) : NULL, + uct_ib_mlx5_wqe_av_size(&av), 0, INT_MAX); + } else { + ucs_assert(op == UCT_RC_EP_FC_FLAG_HARD_REQ); + sender.ep = (uint64_t)dc_ep; + sender.global.gid = ib_iface->gid; + sender.global.is_global = dc_ep->flags & UCT_DC_MLX5_EP_FLAG_GRH; + + UCS_STATS_UPDATE_COUNTER(dc_ep->fc.stats, + UCT_RC_FC_STAT_TX_HARD_REQ, 1); + + uct_rc_mlx5_txqp_inline_post(&iface->super, UCT_IB_QPT_DCI, + txqp, txwq, MLX5_OPCODE_SEND_IMM, + &sender.global, sizeof(sender.global), op, sender.ep, + iface->rx.dct.qp_num, + 0, 0, + &dc_ep->av, + uct_dc_mlx5_ep_get_grh(dc_ep), + uct_ib_mlx5_wqe_av_size(&dc_ep->av), + MLX5_WQE_CTRL_SOLICITED, INT_MAX); + } + + return UCS_OK; +} + + +UCS_CLASS_INIT_FUNC(uct_dc_mlx5_ep_t, uct_dc_mlx5_iface_t *iface, const uct_dc_mlx5_iface_addr_t *if_addr, + uct_ib_mlx5_base_av_t *av) +{ + uint32_t remote_dctn; + + ucs_trace_func(""); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super.super.super); + + self->atomic_mr_offset = uct_ib_md_atomic_offset(if_addr->atomic_mr_id); + remote_dctn = uct_ib_unpack_uint24(if_addr->qp_num); + + memcpy(&self->av, av, sizeof(*av)); + self->av.dqp_dct |= htonl(remote_dctn); + uct_dc_mlx5_iface_set_av_sport(iface, &self->av, remote_dctn); + + return uct_dc_mlx5_ep_basic_init(iface, self); +} + +static UCS_CLASS_CLEANUP_FUNC(uct_dc_mlx5_ep_t) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_dc_mlx5_iface_t); + + uct_dc_mlx5_ep_pending_purge(&self->super.super, NULL, NULL); + uct_rc_fc_cleanup(&self->fc); + + ucs_assert_always(self->flags & UCT_DC_MLX5_EP_FLAG_VALID); + + if ((self->dci == UCT_DC_MLX5_EP_NO_DCI) || + uct_dc_mlx5_iface_is_dci_rand(iface)) { + return; + } + + /* TODO: this is good for dcs policy only. + * Need to change if eps share dci + */ + ucs_arbiter_group_cleanup(uct_dc_mlx5_ep_arb_group(iface, self)); + ucs_assertv_always(uct_dc_mlx5_iface_dci_has_outstanding(iface, self->dci), + "iface (%p) ep (%p) dci leak detected: dci=%d", iface, + self, self->dci); + + /* we can handle it but well behaving app should not do this */ + ucs_debug("ep (%p) is destroyed with %d outstanding ops", + self, (int16_t)iface->super.super.config.tx_qp_len - + uct_rc_txqp_available(&iface->tx.dcis[self->dci].txqp)); + uct_rc_txqp_purge_outstanding(&iface->tx.dcis[self->dci].txqp, UCS_ERR_CANCELED, 1); + iface->tx.dcis[self->dci].ep = NULL; +} + +UCS_CLASS_DEFINE(uct_dc_mlx5_ep_t, uct_base_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_dc_mlx5_ep_t, uct_ep_t, uct_dc_mlx5_iface_t *, + const uct_dc_mlx5_iface_addr_t *, + uct_ib_mlx5_base_av_t *); + +UCS_CLASS_INIT_FUNC(uct_dc_mlx5_grh_ep_t, uct_dc_mlx5_iface_t *iface, + const uct_dc_mlx5_iface_addr_t *if_addr, + uct_ib_mlx5_base_av_t *av, + struct mlx5_grh_av *grh_av) +{ + ucs_trace_func(""); + + UCS_CLASS_CALL_SUPER_INIT(uct_dc_mlx5_ep_t, iface, if_addr, av); + + self->super.flags |= UCT_DC_MLX5_EP_FLAG_GRH; + memcpy(&self->grh_av, grh_av, sizeof(*grh_av)); + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(uct_dc_mlx5_grh_ep_t) +{ + ucs_trace_func(""); +} + +UCS_CLASS_DEFINE(uct_dc_mlx5_grh_ep_t, uct_dc_mlx5_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_dc_mlx5_grh_ep_t, uct_ep_t, uct_dc_mlx5_iface_t *, + const uct_dc_mlx5_iface_addr_t *, + uct_ib_mlx5_base_av_t *, struct mlx5_grh_av *); + +void uct_dc_mlx5_ep_cleanup(uct_ep_h tl_ep, ucs_class_t *cls) +{ + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_mlx5_iface_t); + + UCS_CLASS_CLEANUP_CALL(cls, ep); + + if (uct_dc_mlx5_ep_fc_wait_for_grant(ep)) { + ucs_trace("not releasing dc_mlx5_ep %p - waiting for grant", ep); + ep->flags &= ~UCT_DC_MLX5_EP_FLAG_VALID; + /* No need to wait for grant on this ep anymore */ + uct_dc_mlx5_ep_clear_fc_grant_flag(iface, ep); + ucs_list_add_tail(&iface->tx.gc_list, &ep->list); + } else { + ucs_free(ep); + } +} + +void uct_dc_mlx5_ep_release(uct_dc_mlx5_ep_t *ep) +{ + ucs_assert_always(!(ep->flags & UCT_DC_MLX5_EP_FLAG_VALID)); + ucs_debug("release dc_mlx5_ep %p", ep); + ucs_list_del(&ep->list); + ucs_free(ep); +} + +void uct_dc_mlx5_ep_pending_common(uct_dc_mlx5_iface_t *iface, + uct_dc_mlx5_ep_t *ep, uct_pending_req_t *r, + unsigned flags, int push_to_head) +{ + int no_dci = (ep->dci == UCT_DC_MLX5_EP_NO_DCI); + ucs_arbiter_group_t *group; + + UCS_STATIC_ASSERT(sizeof(uct_dc_mlx5_pending_req_priv) <= + UCT_PENDING_REQ_PRIV_LEN); + + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + uct_dc_mlx5_pending_req_priv(r)->ep = ep; + group = uct_dc_mlx5_ep_rand_arb_group(iface, ep); + } else { + group = &ep->arb_group; + } + + if (push_to_head) { + uct_pending_req_arb_group_push_head(no_dci ? + uct_dc_mlx5_iface_dci_waitq(iface) : + uct_dc_mlx5_iface_tx_waitq(iface), + group, r); + } else { + uct_pending_req_arb_group_push(group, r); + } + + if (no_dci) { + /* no dci: + * Do not grab dci here. Instead put the group on dci allocation arbiter. + * This way we can assure fairness between all eps waiting for + * dci allocation. Relevant for dcs and dcs_quota policies. + */ + uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep); + } else { + uct_dc_mlx5_iface_dci_sched_tx(iface, ep); + } + + UCT_TL_EP_STAT_PEND(&ep->super); +} + + +/* TODO: + currently pending code supports only dcs policy + support hash/random policies + */ +ucs_status_t uct_dc_mlx5_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *r, + unsigned flags) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + + /* ep can tx iff + * - iface has resources: cqe and tx skb + * - dci is either assigned or can be assigned + * - dci has resources + */ + if (uct_dc_mlx5_iface_has_tx_resources(iface)) { + if (ep->dci == UCT_DC_MLX5_EP_NO_DCI) { + if (uct_dc_mlx5_iface_dci_can_alloc(iface) && (ep->fc.fc_wnd > 0)) { + return UCS_ERR_BUSY; + } + } else { + if (uct_dc_mlx5_iface_dci_ep_can_send(ep)) { + return UCS_ERR_BUSY; + } + } + } + + uct_dc_mlx5_ep_pending_common(iface, ep, r, flags, 0); + + return UCS_OK; +} + +/** + * dispatch requests waiting for dci allocation + * Relevant for dcs and dcs_quota policies only. + */ +ucs_arbiter_cb_result_t +uct_dc_mlx5_iface_dci_do_pending_wait(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_dc_mlx5_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_dc_mlx5_ep_t, arb_group); + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_mlx5_iface_t); + + ucs_assert(!uct_dc_mlx5_iface_is_dci_rand(iface)); + + if (ep->dci != UCT_DC_MLX5_EP_NO_DCI) { + return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; + } + + if (!uct_dc_mlx5_iface_dci_can_alloc(iface)) { + return UCS_ARBITER_CB_RESULT_STOP; + } + uct_dc_mlx5_iface_dci_alloc(iface, ep); + ucs_assert_always(ep->dci != UCT_DC_MLX5_EP_NO_DCI); + uct_dc_mlx5_iface_dci_sched_tx(iface, ep); + return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; +} + +ucs_arbiter_cb_result_t +uct_dc_mlx5_iface_dci_do_common_pending_tx(uct_dc_mlx5_ep_t *ep, + ucs_arbiter_elem_t *elem) +{ + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_dc_mlx5_iface_t); + ucs_status_t status; + + if (!uct_dc_mlx5_iface_has_tx_resources(iface)) { + return UCS_ARBITER_CB_RESULT_STOP; + } + + ucs_trace_data("progressing pending request %p", req); + status = req->func(req); + ucs_trace_data("status returned from progress pending: %s", + ucs_status_string(status)); + + if (status == UCS_OK) { + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } else if (status == UCS_INPROGRESS) { + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } + + if (!uct_dc_mlx5_iface_dci_ep_can_send(ep)) { + return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; + } + + ucs_assertv(!uct_dc_mlx5_iface_has_tx_resources(iface), + "pending callback returned error but send resources are available"); + return UCS_ARBITER_CB_RESULT_STOP; +} + +/** + * dispatch requests waiting for tx resources (dcs* DCI policies) + */ +ucs_arbiter_cb_result_t +uct_dc_mlx5_iface_dci_do_dcs_pending_tx(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + + uct_dc_mlx5_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), + uct_dc_mlx5_ep_t, arb_group); + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_dc_mlx5_iface_t); + ucs_arbiter_cb_result_t res; + + res = uct_dc_mlx5_iface_dci_do_common_pending_tx(ep, elem); + if (res == UCS_ARBITER_CB_RESULT_REMOVE_ELEM) { + /* For dcs* policies release dci if this is the last elem in the group + * and the dci has no outstanding operations. For example pending + * callback did not send anything. (uct_ep_flush or just return ok) + */ + if (ucs_arbiter_elem_is_last(&ep->arb_group, elem)) { + uct_dc_mlx5_iface_dci_free(iface, ep); + } + } + + return res; +} + +/** + * dispatch requests waiting for tx resources (rand DCI policy) + */ +ucs_arbiter_cb_result_t +uct_dc_mlx5_iface_dci_do_rand_pending_tx(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); + uct_dc_mlx5_ep_t *ep = uct_dc_mlx5_pending_req_priv(req)->ep; + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_dc_mlx5_iface_t); + ucs_arbiter_cb_result_t res; + + res = uct_dc_mlx5_iface_dci_do_common_pending_tx(ep, elem); + if ((res == UCS_ARBITER_CB_RESULT_DESCHED_GROUP) && + uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) { + /* We can't desched group with rand policy if non FC resources are + * missing, since it's never scheduled again. */ + res = UCS_ARBITER_CB_RESULT_RESCHED_GROUP; + } + + return res; +} + +static ucs_arbiter_cb_result_t +uct_dc_mlx5_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, void *arg) +{ + uct_purge_cb_args_t *cb_args = arg; + void **priv_args = cb_args->arg; + uct_dc_mlx5_ep_t *ep = priv_args[0]; + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_dc_mlx5_iface_t); + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); + uct_rc_fc_request_t *freq; + + if (uct_dc_mlx5_iface_is_dci_rand(iface) && + (uct_dc_mlx5_pending_req_priv(req)->ep != ep)) { + /* element belongs to another ep - do not remove it */ + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } + + if (ucs_likely(req->func != uct_dc_mlx5_iface_fc_grant)){ + if (cb_args->cb != NULL) { + cb_args->cb(req, priv_args[1]); + } else { + ucs_debug("ep=%p cancelling user pending request %p", ep, req); + } + } else { + /* User callback should not be called for FC messages. + * Just return pending request memory to the pool */ + freq = ucs_derived_of(req, uct_rc_fc_request_t); + ucs_mpool_put(freq); + } + + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; +} + +void uct_dc_mlx5_ep_pending_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, void *arg) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_dc_mlx5_iface_t); + uct_dc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_dc_mlx5_ep_t); + void *priv_args[2] = {ep, arg}; + uct_purge_cb_args_t args = {cb, priv_args}; + + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + ucs_arbiter_group_purge(uct_dc_mlx5_iface_tx_waitq(iface), + uct_dc_mlx5_ep_rand_arb_group(iface, ep), + uct_dc_mlx5_ep_abriter_purge_cb, &args); + return; + } + + if (ep->dci == UCT_DC_MLX5_EP_NO_DCI) { + ucs_arbiter_group_purge(uct_dc_mlx5_iface_dci_waitq(iface), &ep->arb_group, + uct_dc_mlx5_ep_abriter_purge_cb, &args); + } else { + ucs_arbiter_group_purge(uct_dc_mlx5_iface_tx_waitq(iface), &ep->arb_group, + uct_dc_mlx5_ep_abriter_purge_cb, &args); + uct_dc_mlx5_iface_dci_free(iface, ep); + } +} + +ucs_status_t uct_dc_mlx5_ep_check_fc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +{ + ucs_status_t status; + + if (iface->super.super.config.fc_enabled) { + UCT_RC_CHECK_FC_WND(&ep->fc, ep->super.stats); + if ((ep->fc.fc_wnd == iface->super.super.config.fc_hard_thresh) && + !uct_dc_mlx5_ep_fc_wait_for_grant(ep)) { + status = uct_rc_fc_ctrl(&ep->super.super, + UCT_RC_EP_FC_FLAG_HARD_REQ, + NULL); + if (status != UCS_OK) { + return status; + } + ep->fc.flags |= UCT_DC_MLX5_EP_FC_FLAG_WAIT_FOR_GRANT; + ++iface->tx.fc_grants; + } + } else { + /* Set fc_wnd to max, to send as much as possible without checks */ + ep->fc.fc_wnd = INT16_MAX; + } + return UCS_OK; +} + +void uct_dc_mlx5_ep_handle_failure(uct_dc_mlx5_ep_t *ep, void *arg, + ucs_status_t ep_status) +{ + uct_iface_h tl_iface = ep->super.super.iface; + uint8_t dci = ep->dci; + uct_ib_iface_t *ib_iface = ucs_derived_of(tl_iface, uct_ib_iface_t); + uct_dc_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_dc_mlx5_iface_t); + uct_rc_txqp_t *txqp = &iface->tx.dcis[dci].txqp; + uct_ib_mlx5_txwq_t *txwq = &iface->tx.dcis[dci].txwq; + int16_t outstanding; + ucs_status_t status; + + ucs_assert(!uct_dc_mlx5_iface_is_dci_rand(iface)); + + uct_rc_txqp_purge_outstanding(txqp, ep_status, 0); + + /* poll_cqe for mlx5 returns NULL in case of failure and the cq_avaialble + is not updated for the error cqe and all outstanding wqes*/ + outstanding = (int16_t)iface->super.super.config.tx_qp_len - + uct_rc_txqp_available(txqp); + iface->super.super.tx.cq_available += outstanding; + uct_rc_txqp_available_set(txqp, (int16_t)iface->super.super.config.tx_qp_len); + + /* since we removed all outstanding ops on the dci, it should be released */ + ucs_assert(ep->dci != UCT_DC_MLX5_EP_NO_DCI); + uct_dc_mlx5_iface_dci_put(iface, dci); + ucs_assert_always(ep->dci == UCT_DC_MLX5_EP_NO_DCI); + + if (uct_dc_mlx5_ep_fc_wait_for_grant(ep)) { + /* No need to wait for grant on this ep anymore */ + uct_dc_mlx5_ep_clear_fc_grant_flag(iface, ep); + } + + if (ep == iface->tx.fc_ep) { + ucs_assert(ep_status != UCS_ERR_CANCELED); + /* Cannot handle errors on flow-control endpoint. + * Or shall we ignore them? + */ + ucs_debug("got error on DC flow-control endpoint, iface %p: %s", iface, + ucs_status_string(ep_status)); + } else { + status = ib_iface->ops->set_ep_failed(ib_iface, &ep->super.super, + ep_status); + if (status != UCS_OK) { + uct_ib_mlx5_completion_with_err(ib_iface, arg, + &iface->tx.dcis[dci].txwq, + UCS_LOG_LEVEL_FATAL); + return; + } + } + + if (ep_status != UCS_ERR_CANCELED) { + uct_ib_mlx5_completion_with_err(ib_iface, arg, &iface->tx.dcis[dci].txwq, + ib_iface->super.config.failure_level); + } + + status = uct_dc_mlx5_iface_reset_dci(iface, &iface->tx.dcis[dci]); + if (status != UCS_OK) { + ucs_fatal("iface %p failed to reset dci[%d] qpn 0x%x: %s", + iface, dci, txwq->super.qp_num, ucs_status_string(status)); + } + + status = uct_dc_mlx5_iface_dci_connect(iface, &iface->tx.dcis[dci]); + if (status != UCS_OK) { + ucs_fatal("iface %p failed to connect dci[%d] qpn 0x%x: %s", + iface, dci, txwq->super.qp_num, ucs_status_string(status)); + } +} diff --git a/src/uct/ib/dc/dc_mlx5_ep.h b/src/uct/ib/dc/dc_mlx5_ep.h new file mode 100644 index 0000000..8b71ad3 --- /dev/null +++ b/src/uct/ib/dc/dc_mlx5_ep.h @@ -0,0 +1,550 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2016-2020. ALL RIGHTS RESERVED. + +* See file LICENSE for terms. +*/ + +#ifndef UCT_DC_EP_H +#define UCT_DC_EP_H + +#include +#include +#include + +#include "dc_mlx5.h" + +#define UCT_DC_MLX5_EP_NO_DCI ((uint8_t)-1) + + +enum { + /* Indicates that FC grant has been requested, but is not received yet. + * Flush will not complete until an outgoing grant request is acked. + * It is needed to avoid the following cases: + * 1) Grant arrives for the recently deleted ep. + * 2) QP resources are available, but there are some pending requests. */ + UCT_DC_MLX5_EP_FC_FLAG_WAIT_FOR_GRANT = UCS_BIT(0) +}; + +struct uct_dc_mlx5_ep { + /* + * per value of 'flags': + * INVALID - 'list' is added to iface->tx.gc_list. + * Otherwise - 'super' and 'arb_group' are used. + */ + union { + struct { + uct_base_ep_t super; + ucs_arbiter_group_t arb_group; + }; + ucs_list_link_t list; + }; + + uint8_t dci; + uint8_t flags; + uint16_t atomic_mr_offset; + uct_rc_fc_t fc; + uct_ib_mlx5_base_av_t av; +}; + +typedef struct { + uct_dc_mlx5_ep_t super; + struct mlx5_grh_av grh_av; +} uct_dc_mlx5_grh_ep_t; + +typedef struct { + uct_pending_req_priv_arb_t arb; + uct_dc_mlx5_ep_t *ep; +} uct_dc_mlx5_pending_req_priv_t; + + +UCS_CLASS_DECLARE(uct_dc_mlx5_ep_t, uct_dc_mlx5_iface_t *, const uct_dc_mlx5_iface_addr_t *, + uct_ib_mlx5_base_av_t *); + +UCS_CLASS_DECLARE(uct_dc_mlx5_grh_ep_t, uct_dc_mlx5_iface_t *, + const uct_dc_mlx5_iface_addr_t *, + uct_ib_mlx5_base_av_t *, struct mlx5_grh_av *); + + +ucs_status_t uct_dc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *payload, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +ssize_t uct_dc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, uct_rkey_t rkey); + +ucs_status_t uct_dc_mlx5_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_dc_mlx5_ep_get_bcopy(uct_ep_h tl_ep, + uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_dc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_dc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *buffer, unsigned length); + +ssize_t uct_dc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags); + +ucs_status_t uct_dc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_dc_mlx5_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp); + +ucs_status_t uct_dc_mlx5_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp); + +ucs_status_t uct_dc_mlx5_ep_atomic64_post(uct_ep_h ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey); + +ucs_status_t uct_dc_mlx5_ep_atomic32_post(uct_ep_h ep, unsigned opcode, uint32_t value, + uint64_t remote_addr, uct_rkey_t rkey); + +ucs_status_t uct_dc_mlx5_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_dc_mlx5_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +#if IBV_HW_TM +ucs_status_t uct_dc_mlx5_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag, + const void *data, size_t length); + +ssize_t uct_dc_mlx5_ep_tag_eager_bcopy(uct_ep_h tl_ep, uct_tag_t tag, + uint64_t imm, + uct_pack_callback_t pack_cb, + void *arg, unsigned flags); + +ucs_status_t uct_dc_mlx5_ep_tag_eager_zcopy(uct_ep_h tl_ep, uct_tag_t tag, + uint64_t imm, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp); + +ucs_status_ptr_t uct_dc_mlx5_ep_tag_rndv_zcopy(uct_ep_h tl_ep, uct_tag_t tag, + const void *header, + unsigned header_length, + const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_dc_mlx5_ep_tag_rndv_request(uct_ep_h tl_ep, uct_tag_t tag, + const void* header, + unsigned header_length, + unsigned flags); + +ucs_status_t uct_dc_mlx5_iface_tag_recv_zcopy(uct_iface_h tl_iface, + uct_tag_t tag, + uct_tag_t tag_mask, + const uct_iov_t *iov, + size_t iovcnt, + uct_tag_context_t *ctx); + +ucs_status_t uct_dc_mlx5_iface_tag_recv_cancel(uct_iface_h tl_iface, + uct_tag_context_t *ctx, + int force); +#endif + +ucs_status_t uct_dc_mlx5_ep_fence(uct_ep_h tl_ep, unsigned flags); + +ucs_status_t uct_dc_mlx5_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp); + +ucs_status_t uct_dc_mlx5_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op, + uct_rc_fc_request_t *req); + +ucs_arbiter_cb_result_t +uct_dc_mlx5_iface_dci_do_pending_wait(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg); + +ucs_arbiter_cb_result_t +uct_dc_mlx5_iface_dci_do_dcs_pending_tx(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg); + +ucs_arbiter_cb_result_t +uct_dc_mlx5_iface_dci_do_rand_pending_tx(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg); + +ucs_status_t uct_dc_mlx5_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *r, + unsigned flags); +void uct_dc_mlx5_ep_pending_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, void *arg); + +void uct_dc_mlx5_ep_pending_common(uct_dc_mlx5_iface_t *iface, + uct_dc_mlx5_ep_t *ep, uct_pending_req_t *r, + unsigned flags, int push_to_head); + +void uct_dc_mlx5_ep_cleanup(uct_ep_h tl_ep, ucs_class_t *cls); + +void uct_dc_mlx5_ep_release(uct_dc_mlx5_ep_t *ep); + +static UCS_F_ALWAYS_INLINE uct_dc_mlx5_pending_req_priv_t * +uct_dc_mlx5_pending_req_priv(uct_pending_req_t *req) +{ + return (uct_dc_mlx5_pending_req_priv_t *)&(req)->priv; +} + +static UCS_F_ALWAYS_INLINE int uct_dc_mlx5_iface_is_dci_rand(uct_dc_mlx5_iface_t *iface) +{ + return iface->tx.policy == UCT_DC_TX_POLICY_RAND; +} + +static UCS_F_ALWAYS_INLINE ucs_arbiter_group_t* +uct_dc_mlx5_ep_rand_arb_group(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +{ + ucs_assert(uct_dc_mlx5_iface_is_dci_rand(iface) && + (ep->dci != UCT_DC_MLX5_EP_NO_DCI)); + /* If DCI random policy is used, DCI is always assigned to EP */ + return &iface->tx.dcis[ep->dci].arb_group; +} + +static UCS_F_ALWAYS_INLINE ucs_arbiter_group_t* +uct_dc_mlx5_ep_arb_group(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +{ + return (uct_dc_mlx5_iface_is_dci_rand(iface)) ? + uct_dc_mlx5_ep_rand_arb_group(iface, ep) : &ep->arb_group; +} + +static UCS_F_ALWAYS_INLINE void +uct_dc_mlx5_iface_dci_sched_tx(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +{ + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + ucs_arbiter_group_schedule(uct_dc_mlx5_iface_tx_waitq(iface), + uct_dc_mlx5_ep_rand_arb_group(iface, ep)); + } else if (uct_dc_mlx5_iface_dci_has_tx_resources(iface, ep->dci)) { + ucs_arbiter_group_schedule(uct_dc_mlx5_iface_tx_waitq(iface), + &ep->arb_group); + } +} + +static UCS_F_ALWAYS_INLINE uct_dc_mlx5_ep_t * +uct_dc_mlx5_ep_from_dci(uct_dc_mlx5_iface_t *iface, uint8_t dci) +{ + /* Can be used with dcs* policies only, with rand policy every dci may + * be used by many eps */ + ucs_assert(!uct_dc_mlx5_iface_is_dci_rand(iface)); + return iface->tx.dcis[dci].ep; +} + +static UCS_F_ALWAYS_INLINE void +uct_dc_mlx5_ep_clear_fc_grant_flag(uct_dc_mlx5_iface_t *iface, + uct_dc_mlx5_ep_t *ep) +{ + ucs_assert((ep->fc.flags & UCT_DC_MLX5_EP_FC_FLAG_WAIT_FOR_GRANT) && + iface->tx.fc_grants); + ep->fc.flags &= ~UCT_DC_MLX5_EP_FC_FLAG_WAIT_FOR_GRANT; + --iface->tx.fc_grants; +} + +enum uct_dc_mlx5_ep_flags { + UCT_DC_MLX5_EP_FLAG_TX_WAIT = UCS_BIT(0), /* ep is in the tx_wait state. See + description of the dcs+quota dci + selection policy above */ + UCT_DC_MLX5_EP_FLAG_GRH = UCS_BIT(1), /* ep has GRH address. Used by + dc_mlx5 endpoint */ + UCT_DC_MLX5_EP_FLAG_VALID = UCS_BIT(2) /* ep is a valid endpoint */ +}; + + +void uct_dc_mlx5_ep_handle_failure(uct_dc_mlx5_ep_t *ep, void *arg, + ucs_status_t status); + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_dc_mlx5_ep_basic_init(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +{ + ucs_arbiter_group_init(&ep->arb_group); + + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + /* coverity[dont_call] */ + ep->dci = rand_r(&iface->tx.rand_seed) % iface->tx.ndci; + } else { + ep->dci = UCT_DC_MLX5_EP_NO_DCI; + } + + /* valid = 1, global = 0, tx_wait = 0 */ + ep->flags = UCT_DC_MLX5_EP_FLAG_VALID; + + return uct_rc_fc_init(&ep->fc, iface->super.super.config.fc_wnd_size + UCS_STATS_ARG(ep->super.stats)); +} + +static UCS_F_ALWAYS_INLINE int +uct_dc_mlx5_iface_dci_can_alloc(uct_dc_mlx5_iface_t *iface) +{ + return iface->tx.stack_top < iface->tx.ndci; +} + +static UCS_F_ALWAYS_INLINE void +uct_dc_mlx5_iface_progress_pending(uct_dc_mlx5_iface_t *iface) +{ + do { + /** + * Pending op on the tx_waitq can complete with the UCS_OK + * status without actually sending anything on the dci. + * In this case pending ops on the waitq may never be + * scheduled. + * + * So we keep progressing pending while dci_waitq is not + * empty and it is possible to allocate a dci. + * NOTE: in case of rand dci allocation policy, dci_waitq is always + * empty. + */ + if (uct_dc_mlx5_iface_dci_can_alloc(iface) && + !uct_dc_mlx5_iface_is_dci_rand(iface)) { + ucs_arbiter_dispatch(uct_dc_mlx5_iface_dci_waitq(iface), 1, + uct_dc_mlx5_iface_dci_do_pending_wait, NULL); + } + ucs_arbiter_dispatch(uct_dc_mlx5_iface_tx_waitq(iface), 1, + iface->tx.pend_cb, NULL); + + } while (ucs_unlikely(!ucs_arbiter_is_empty(uct_dc_mlx5_iface_dci_waitq(iface)) && + uct_dc_mlx5_iface_dci_can_alloc(iface))); +} + +static inline int uct_dc_mlx5_iface_dci_ep_can_send(uct_dc_mlx5_ep_t *ep) +{ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_dc_mlx5_iface_t); + return (!(ep->flags & UCT_DC_MLX5_EP_FLAG_TX_WAIT)) && + uct_rc_fc_has_resources(&iface->super.super, &ep->fc) && + uct_dc_mlx5_iface_dci_has_tx_resources(iface, ep->dci); +} + +static UCS_F_ALWAYS_INLINE +void uct_dc_mlx5_iface_schedule_dci_alloc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +{ + /* If FC window is empty the group will be scheduled when + * grant is received */ + if (uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) { + ucs_arbiter_group_schedule(uct_dc_mlx5_iface_dci_waitq(iface), &ep->arb_group); + } +} + +static inline void uct_dc_mlx5_iface_dci_put(uct_dc_mlx5_iface_t *iface, uint8_t dci) +{ + uct_dc_mlx5_ep_t *ep; + + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + return; + } + + ep = uct_dc_mlx5_ep_from_dci(iface, dci); + + ucs_assert(iface->tx.stack_top > 0); + + if (uct_dc_mlx5_iface_dci_has_outstanding(iface, dci)) { + if (iface->tx.policy == UCT_DC_TX_POLICY_DCS_QUOTA) { + /* in tx_wait state: + * - if there are no eps are waiting for dci allocation + * ep goes back to normal state + */ + if (ep->flags & UCT_DC_MLX5_EP_FLAG_TX_WAIT) { + if (!ucs_arbiter_is_empty(uct_dc_mlx5_iface_dci_waitq(iface))) { + return; + } + ep->flags &= ~UCT_DC_MLX5_EP_FLAG_TX_WAIT; + } + } + ucs_arbiter_group_schedule(uct_dc_mlx5_iface_tx_waitq(iface), &ep->arb_group); + return; + } + iface->tx.stack_top--; + iface->tx.dcis_stack[iface->tx.stack_top] = dci; +#if UCS_ENABLE_ASSERT + iface->tx.dcis[dci].flags = 0; +#endif + + if (ucs_unlikely(ep == NULL)) { + return; + } + + ucs_assert(uct_dc_mlx5_ep_from_dci(iface, dci)->dci != UCT_DC_MLX5_EP_NO_DCI); + ep->dci = UCT_DC_MLX5_EP_NO_DCI; + ep->flags &= ~UCT_DC_MLX5_EP_FLAG_TX_WAIT; + iface->tx.dcis[dci].ep = NULL; + + /* it is possible that dci is released while ep still has scheduled pending ops. + * move the group to the 'wait for dci alloc' state + */ + ucs_arbiter_group_desched(uct_dc_mlx5_iface_tx_waitq(iface), &ep->arb_group); + uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep); +} + +static inline void uct_dc_mlx5_iface_dci_alloc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +{ + /* take a first available dci from stack. + * There is no need to check txqp because + * dci must have resources to transmit. + */ + ucs_assert(!uct_dc_mlx5_iface_is_dci_rand(iface)); + ep->dci = iface->tx.dcis_stack[iface->tx.stack_top]; + ucs_assert(ep->dci < iface->tx.ndci); + ucs_assert(uct_dc_mlx5_ep_from_dci(iface, ep->dci) == NULL); + ucs_assert(iface->tx.dcis[ep->dci].flags == 0); + iface->tx.dcis[ep->dci].ep = ep; + iface->tx.stack_top++; +} + +static inline void uct_dc_mlx5_iface_dci_free(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +{ + uint8_t dci; + + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + return; + } + + dci = ep->dci; + + ucs_assert(dci != UCT_DC_MLX5_EP_NO_DCI); + ucs_assert(iface->tx.stack_top > 0); + + if (uct_dc_mlx5_iface_dci_has_outstanding(iface, dci)) { + return; + } + + iface->tx.stack_top--; + iface->tx.dcis_stack[iface->tx.stack_top] = dci; + iface->tx.dcis[dci].ep = NULL; +#if UCS_ENABLE_ASSERT + iface->tx.dcis[ep->dci].flags = 0; +#endif + + ep->dci = UCT_DC_MLX5_EP_NO_DCI; + ep->flags &= ~UCT_DC_MLX5_EP_FLAG_TX_WAIT; +} + +static inline ucs_status_t uct_dc_mlx5_iface_dci_get(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) +{ + uct_rc_txqp_t *txqp; + int16_t available; + + ucs_assert(!iface->super.super.config.tx_moderation); + + if (uct_dc_mlx5_iface_is_dci_rand(iface)) { + if (uct_dc_mlx5_iface_dci_has_tx_resources(iface, ep->dci)) { + return UCS_OK; + } else { + UCS_STATS_UPDATE_COUNTER(iface->tx.dcis[ep->dci].txqp.stats, + UCT_RC_TXQP_STAT_QP_FULL, 1); + goto out_no_res; + } + } + + if (ep->dci != UCT_DC_MLX5_EP_NO_DCI) { + /* dci is already assigned - keep using it */ + if ((iface->tx.policy == UCT_DC_TX_POLICY_DCS_QUOTA) && + (ep->flags & UCT_DC_MLX5_EP_FLAG_TX_WAIT)) { + goto out_no_res; + } + + /* if dci has sent more than quota, and there are eps waiting for dci + * allocation ep goes into tx_wait state. + */ + txqp = &iface->tx.dcis[ep->dci].txqp; + available = uct_rc_txqp_available(txqp); + if ((iface->tx.policy == UCT_DC_TX_POLICY_DCS_QUOTA) && + (available <= iface->tx.available_quota) && + !ucs_arbiter_is_empty(uct_dc_mlx5_iface_dci_waitq(iface))) + { + ep->flags |= UCT_DC_MLX5_EP_FLAG_TX_WAIT; + goto out_no_res; + } + + if (available <= 0) { + UCS_STATS_UPDATE_COUNTER(txqp->stats, UCT_RC_TXQP_STAT_QP_FULL, 1); + goto out_no_res; + } + + return UCS_OK; + } + + /* Do not alloc dci if no TX desc resources, + * otherwise this dci may never be released. */ + if (uct_dc_mlx5_iface_dci_can_alloc(iface) && + uct_dc_mlx5_iface_has_tx_resources(iface)) { + uct_dc_mlx5_iface_dci_alloc(iface, ep); + return UCS_OK; + } + +out_no_res: + /* we will have to wait until someone releases dci */ + UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); + return UCS_ERR_NO_RESOURCE; +} + +static UCS_F_ALWAYS_INLINE int uct_dc_mlx5_ep_fc_wait_for_grant(uct_dc_mlx5_ep_t *ep) +{ + return ep->fc.flags & UCT_DC_MLX5_EP_FC_FLAG_WAIT_FOR_GRANT; +} + +ucs_status_t uct_dc_mlx5_ep_check_fc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep); + +static inline struct mlx5_grh_av *uct_dc_mlx5_ep_get_grh(uct_dc_mlx5_ep_t *ep) +{ + return (ep->flags & UCT_DC_MLX5_EP_FLAG_GRH) ? + &(ucs_derived_of(ep, uct_dc_mlx5_grh_ep_t)->grh_av) : NULL; +} + + +#define UCT_DC_MLX5_TXQP_DECL(_txqp, _txwq) \ + uct_rc_txqp_t *_txqp; \ + uct_ib_mlx5_txwq_t *_txwq; + + +#define UCT_DC_MLX5_CHECK_RES(_iface, _ep) \ + { \ + ucs_status_t status = uct_dc_mlx5_iface_dci_get(_iface, _ep); \ + if (ucs_unlikely(status != UCS_OK)) { \ + return status; \ + } \ + } + + +#define UCT_DC_CHECK_RES_PTR(_iface, _ep) \ + { \ + ucs_status_t status = uct_dc_mlx5_iface_dci_get(_iface, _ep); \ + if (ucs_unlikely(status != UCS_OK)) { \ + return UCS_STATUS_PTR(status); \ + } \ + } + + +/* First, check whether we have FC window. If hard threshold is reached, credit + * request will be sent by "fc_ctrl" as a separate message. TX resources + * are checked after FC, because fc credits request may consume latest + * available TX resources. */ +#define UCT_DC_CHECK_RES_AND_FC(_iface, _ep) \ + { \ + if (ucs_unlikely((_ep)->fc.fc_wnd <= \ + (_iface)->super.super.config.fc_hard_thresh)) { \ + ucs_status_t status = uct_dc_mlx5_ep_check_fc(_iface, _ep); \ + if (ucs_unlikely(status != UCS_OK)) { \ + if (((_ep)->dci != UCT_DC_MLX5_EP_NO_DCI) && \ + !uct_dc_mlx5_iface_is_dci_rand(_iface)) { \ + ucs_assertv_always(uct_dc_mlx5_iface_dci_has_outstanding(_iface, (_ep)->dci), \ + "iface (%p) ep (%p) dci leak detected: dci=%d", \ + _iface, _ep, (_ep)->dci); \ + } \ + return status; \ + } \ + } \ + UCT_DC_MLX5_CHECK_RES(_iface, _ep) \ + } + + +#endif diff --git a/src/uct/ib/mlx5/dv/ib_mlx5_dv.c b/src/uct/ib/mlx5/dv/ib_mlx5_dv.c new file mode 100644 index 0000000..4a5c245 --- /dev/null +++ b/src/uct/ib/mlx5/dv/ib_mlx5_dv.c @@ -0,0 +1,328 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ib_mlx5_ifc.h" + +#include +#include + +#if HAVE_DECL_MLX5DV_INIT_OBJ +ucs_status_t uct_ib_mlx5dv_init_obj(uct_ib_mlx5dv_t *obj, uint64_t type) +{ + int ret; + + ret = mlx5dv_init_obj(&obj->dv, type); +#if HAVE_IBV_EXP_DM + if (!ret && (type & MLX5DV_OBJ_DM)) { + ret = uct_ib_mlx5_get_dm_info(obj->dv_dm.in, obj->dv_dm.out); + } +#endif + if (ret != 0) { + ucs_error("DV failed to get mlx5 information. Type %lx.", type); + return UCS_ERR_NO_DEVICE; + } + + return UCS_OK; +} +#endif + +#if HAVE_DEVX +ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface, + uct_ib_mlx5_qp_t *qp, + uct_ib_mlx5_txwq_t *tx, + uct_ib_qp_attr_t *attr) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.md, uct_ib_mlx5_md_t); + uct_ib_device_t *dev = &md->super.dev; + char in[UCT_IB_MLX5DV_ST_SZ_BYTES(create_qp_in)] = {}; + char out[UCT_IB_MLX5DV_ST_SZ_BYTES(create_qp_out)] = {}; + char in_2init[UCT_IB_MLX5DV_ST_SZ_BYTES(rst2init_qp_in)] = {}; + char out_2init[UCT_IB_MLX5DV_ST_SZ_BYTES(rst2init_qp_out)] = {}; + ucs_status_t status = UCS_ERR_NO_MEMORY; + struct mlx5dv_pd dvpd = {}; + struct mlx5dv_cq dvscq = {}; + struct mlx5dv_cq dvrcq = {}; + struct mlx5dv_srq dvsrq = {}; + struct mlx5dv_obj dv = {}; + uct_ib_mlx5_devx_uar_t *uar; + int max_tx, max_rx, len_tx, len; + int wqe_size; + int dvflags; + void *qpc; + int ret; + + uct_ib_iface_fill_attr(iface, attr); + + uar = uct_worker_tl_data_get(iface->super.worker, + UCT_IB_MLX5_DEVX_UAR_KEY, + uct_ib_mlx5_devx_uar_t, + uct_ib_mlx5_devx_uar_cmp, + uct_ib_mlx5_devx_uar_init, + md, UCT_IB_MLX5_MMIO_MODE_BF_POST); + if (UCS_PTR_IS_ERR(uar)) { + status = UCS_PTR_STATUS(uar); + goto err; + } + + wqe_size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_wqe_mkey_context_seg) + + ucs_max(sizeof(struct mlx5_wqe_umr_klm_seg), 64) + + ucs_max(attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg), + ucs_align_up(sizeof(struct mlx5_wqe_inl_data_seg) + + attr->cap.max_inline_data, 16)); + len_tx = ucs_roundup_pow2_or0(attr->cap.max_send_wr * wqe_size); + max_tx = len_tx / MLX5_SEND_WQE_BB; + max_rx = ucs_roundup_pow2_or0(attr->cap.max_recv_wr); + len = len_tx + max_rx * UCT_IB_MLX5_MAX_BB * UCT_IB_MLX5_WQE_SEG_SIZE; + + if (tx != NULL) { + ret = ucs_posix_memalign(&qp->devx.wq_buf, ucs_get_page_size(), len, + "qp umem"); + if (ret != 0) { + ucs_error("failed to allocate QP buffer of %d bytes: %m", len); + goto err_uar; + } + + qp->devx.mem = mlx5dv_devx_umem_reg(dev->ibv_context, qp->devx.wq_buf, len, 0); + if (!qp->devx.mem) { + ucs_error("mlx5dv_devx_umem_reg() failed: %m"); + goto err_free_buf; + } + } else { + qp->devx.wq_buf = qp->devx.mem = NULL; + } + + qp->devx.dbrec = uct_ib_mlx5_get_dbrec(md); + if (!qp->devx.dbrec) { + goto err_free_mem; + } + + dv.pd.in = attr->ibv.pd; + dv.pd.out = &dvpd; + dv.cq.in = attr->ibv.send_cq; + dv.cq.out = &dvscq; + dvflags = MLX5DV_OBJ_PD | MLX5DV_OBJ_CQ; + + if (attr->srq) { + dv.srq.in = attr->srq; + dvflags |= MLX5DV_OBJ_SRQ; + dv.srq.out = &dvsrq; + dvsrq.comp_mask = MLX5DV_SRQ_MASK_SRQN; + } else { + dvsrq.srqn = attr->srq_num; + } + + mlx5dv_init_obj(&dv, dvflags); + dv.cq.in = attr->ibv.recv_cq; + dv.cq.out = &dvrcq; + mlx5dv_init_obj(&dv, MLX5DV_OBJ_CQ); + + UCT_IB_MLX5DV_SET(create_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_CREATE_QP); + qpc = UCT_IB_MLX5DV_ADDR_OF(create_qp_in, in, qpc); + UCT_IB_MLX5DV_SET(qpc, qpc, st, UCT_IB_MLX5_QPC_ST_RC); + UCT_IB_MLX5DV_SET(qpc, qpc, pm_state, UCT_IB_MLX5_QPC_PM_STATE_MIGRATED); + UCT_IB_MLX5DV_SET(qpc, qpc, pd, dvpd.pdn); + UCT_IB_MLX5DV_SET(qpc, qpc, uar_page, uar->uar->page_id); + UCT_IB_MLX5DV_SET(qpc, qpc, rq_type, !!dvsrq.srqn); + UCT_IB_MLX5DV_SET(qpc, qpc, srqn_rmpn_xrqn, dvsrq.srqn); + UCT_IB_MLX5DV_SET(qpc, qpc, cqn_snd, dvscq.cqn); + UCT_IB_MLX5DV_SET(qpc, qpc, cqn_rcv, dvrcq.cqn); + UCT_IB_MLX5DV_SET(qpc, qpc, log_sq_size, ucs_ilog2_or0(max_tx)); + UCT_IB_MLX5DV_SET(qpc, qpc, log_rq_size, ucs_ilog2_or0(max_rx)); + UCT_IB_MLX5DV_SET(qpc, qpc, cs_req, UCT_IB_MLX5_QPC_CS_REQ_UP_TO_64B); + UCT_IB_MLX5DV_SET(qpc, qpc, cs_res, + uct_ib_mlx5_qpc_cs_res(attr->max_inl_resp)); + UCT_IB_MLX5DV_SET64(qpc, qpc, dbr_addr, qp->devx.dbrec->offset); + UCT_IB_MLX5DV_SET(qpc, qpc, dbr_umem_id, qp->devx.dbrec->mem_id); + + if (qp->devx.mem == NULL) { + UCT_IB_MLX5DV_SET(qpc, qpc, no_sq, true); + UCT_IB_MLX5DV_SET(qpc, qpc, offload_type, true); + UCT_IB_MLX5DV_SET(create_qp_in, in, wq_umem_id, md->zero_mem->umem_id); + } else { + UCT_IB_MLX5DV_SET(create_qp_in, in, wq_umem_id, qp->devx.mem->umem_id); + } + + status = UCS_ERR_IO_ERROR; + + qp->devx.obj = mlx5dv_devx_obj_create(dev->ibv_context, in, sizeof(in), + out, sizeof(out)); + if (!qp->devx.obj) { + ucs_error("mlx5dv_devx_obj_create(QP) failed, syndrome %x: %m", + UCT_IB_MLX5DV_GET(create_qp_out, out, syndrome)); + goto err_free_db; + } + + qp->qp_num = UCT_IB_MLX5DV_GET(create_qp_out, out, qpn); + + qpc = UCT_IB_MLX5DV_ADDR_OF(rst2init_qp_in, in_2init, qpc); + UCT_IB_MLX5DV_SET(rst2init_qp_in, in_2init, opcode, UCT_IB_MLX5_CMD_OP_RST2INIT_QP); + UCT_IB_MLX5DV_SET(rst2init_qp_in, in_2init, qpn, qp->qp_num); + UCT_IB_MLX5DV_SET(qpc, qpc, pm_state, UCT_IB_MLX5_QPC_PM_STATE_MIGRATED); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port); + UCT_IB_MLX5DV_SET(qpc, qpc, rwe, true); + + ret = mlx5dv_devx_obj_modify(qp->devx.obj, in_2init, sizeof(in_2init), + out_2init, sizeof(out_2init)); + if (ret) { + ucs_error("mlx5dv_devx_obj_modify(2INIT_QP) failed, syndrome %x: %m", + UCT_IB_MLX5DV_GET(rst2init_qp_out, out_2init, syndrome)); + goto err_free; + } + + qp->type = UCT_IB_MLX5_OBJ_TYPE_DEVX; + + attr->cap.max_send_wr = max_tx; + attr->cap.max_recv_wr = max_rx; + + if (tx != NULL) { + tx->reg = &uar->super; + tx->qstart = qp->devx.wq_buf; + tx->qend = UCS_PTR_BYTE_OFFSET(qp->devx.wq_buf, len_tx); + tx->dbrec = &qp->devx.dbrec->db[MLX5_SND_DBR]; + tx->bb_max = max_tx - 2 * UCT_IB_MLX5_MAX_BB; + uct_ib_mlx5_txwq_reset(tx); + } else { + uct_worker_tl_data_put(uar, uct_ib_mlx5_devx_uar_cleanup); + } + + return UCS_OK; + +err_free: + mlx5dv_devx_obj_destroy(qp->devx.obj); +err_free_db: + uct_ib_mlx5_put_dbrec(qp->devx.dbrec); +err_free_mem: + if (qp->devx.mem != NULL) { + mlx5dv_devx_umem_dereg(qp->devx.mem); + } +err_free_buf: + ucs_free(qp->devx.wq_buf); +err_uar: + uct_worker_tl_data_put(uar, uct_ib_mlx5_devx_uar_cleanup); +err: + return status; +} + +ucs_status_t uct_ib_mlx5_devx_modify_qp(uct_ib_mlx5_qp_t *qp, + const void *in, size_t inlen, + void *out, size_t outlen) +{ + int ret; + + switch (qp->type) { + case UCT_IB_MLX5_OBJ_TYPE_VERBS: + ret = mlx5dv_devx_qp_modify(qp->verbs.qp, in, inlen, out, outlen); + if (ret) { + ucs_error("mlx5dv_devx_qp_modify(%x) failed, syndrome %x: %m", + UCT_IB_MLX5DV_GET(modify_qp_in, in, opcode), + UCT_IB_MLX5DV_GET(modify_qp_out, out, syndrome)); + return UCS_ERR_IO_ERROR; + } + break; + case UCT_IB_MLX5_OBJ_TYPE_DEVX: + ret = mlx5dv_devx_obj_modify(qp->devx.obj, in, inlen, out, outlen); + if (ret) { + ucs_error("mlx5dv_devx_obj_modify(%x) failed, syndrome %x: %m", + UCT_IB_MLX5DV_GET(modify_qp_in, in, opcode), + UCT_IB_MLX5DV_GET(modify_qp_out, out, syndrome)); + return UCS_ERR_IO_ERROR; + } + break; + case UCT_IB_MLX5_OBJ_TYPE_LAST: + return UCS_ERR_UNSUPPORTED; + } + + return UCS_OK; +} + +ucs_status_t uct_ib_mlx5_devx_modify_qp_state(uct_ib_mlx5_qp_t *qp, + enum ibv_qp_state state) +{ + char in[UCT_IB_MLX5DV_ST_SZ_BYTES(modify_qp_in)] = {}; + char out[UCT_IB_MLX5DV_ST_SZ_BYTES(modify_qp_out)] = {}; + + switch (state) { + case IBV_QPS_ERR: + UCT_IB_MLX5DV_SET(modify_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_2ERR_QP); + break; + case IBV_QPS_RESET: + UCT_IB_MLX5DV_SET(modify_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_2RST_QP); + break; + default: + return UCS_ERR_UNSUPPORTED; + } + + UCT_IB_MLX5DV_SET(modify_qp_in, in, qpn, qp->qp_num); + return uct_ib_mlx5_devx_modify_qp(qp, in, sizeof(in), out, sizeof(out)); +} + +void uct_ib_mlx5_devx_destroy_qp(uct_ib_mlx5_qp_t *qp) +{ + int ret = mlx5dv_devx_obj_destroy(qp->devx.obj); + if (ret) { + ucs_error("mlx5dv_devx_obj_destroy(QP) failed: %m"); + } + uct_ib_mlx5_put_dbrec(qp->devx.dbrec); + if (qp->devx.mem != NULL) { + mlx5dv_devx_umem_dereg(qp->devx.mem); + } + ucs_free(qp->devx.wq_buf); +} +#endif + +ucs_status_t uct_ib_mlx5dv_arm_cq(uct_ib_mlx5_cq_t *cq, int solicited) +{ + uint64_t doorbell, sn_ci_cmd; + uint32_t sn, ci, cmd; + + sn = cq->cq_sn & 3; + ci = cq->cq_ci & 0xffffff; + cmd = solicited ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT; + sn_ci_cmd = (sn << 28) | cmd | ci; + + cq->dbrec[UCT_IB_MLX5_CQ_ARM_DB] = htobe32(sn_ci_cmd); + + ucs_memory_cpu_fence(); + + doorbell = (sn_ci_cmd << 32) | cq->cq_num; + + *(uint64_t *)((uint8_t *)cq->uar + MLX5_CQ_DOORBELL) = htobe64(doorbell); + + ucs_memory_bus_store_fence(); + + return UCS_OK; +} + +#if HAVE_DECL_MLX5DV_OBJ_AH +void uct_ib_mlx5_get_av(struct ibv_ah *ah, struct mlx5_wqe_av *av) +{ + struct mlx5dv_obj dv; + struct mlx5dv_ah dah; + + dv.ah.in = ah; + dv.ah.out = &dah; + mlx5dv_init_obj(&dv, MLX5DV_OBJ_AH); + + *av = *(dah.av); + av->dqp_dct |= UCT_IB_MLX5_EXTENDED_UD_AV; +} +#elif !HAVE_INFINIBAND_MLX5_HW_H +void uct_ib_mlx5_get_av(struct ibv_ah *ah, struct mlx5_wqe_av *av) +{ + ucs_bug("MLX5DV_OBJ_AH not supported"); +} +#endif + +#if HAVE_DEVX +ucs_status_t uct_ib_mlx5_get_compact_av(uct_ib_iface_t *iface, int *compact_av) +{ + *compact_av = !!(uct_ib_iface_device(iface)->flags & UCT_IB_DEVICE_FLAG_AV); + return UCS_OK; +} +#endif + diff --git a/src/uct/ib/mlx5/dv/ib_mlx5_dv.h b/src/uct/ib/mlx5/dv/ib_mlx5_dv.h new file mode 100644 index 0000000..2d23f35 --- /dev/null +++ b/src/uct/ib/mlx5/dv/ib_mlx5_dv.h @@ -0,0 +1,116 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_MLX5_DV_H_ +#define UCT_IB_MLX5_DV_H_ + +#ifndef UCT_IB_MLX5_H_ +# error "Never include directly; use instead." +#endif + +#include +#include + +typedef struct { + struct mlx5dv_obj dv; +#if HAVE_IBV_EXP_DM + struct { + struct ibv_exp_dm *in; + struct mlx5dv_dm *out; + } dv_dm; +#endif +} uct_ib_mlx5dv_t; + +typedef struct { + struct mlx5dv_qp dv; +} uct_ib_mlx5dv_qp_t; + +typedef struct { + struct mlx5dv_srq dv; +} uct_ib_mlx5dv_srq_t; + +/* Completion queue */ +typedef struct { + struct mlx5dv_cq dv; +} uct_ib_mlx5dv_cq_t; + +/** + * Get internal verbs information. + */ +ucs_status_t uct_ib_mlx5dv_init_obj(uct_ib_mlx5dv_t *obj, uint64_t type); + +/** + * Update CI to support req_notify_cq + */ +void uct_ib_mlx5_update_cq_ci(struct ibv_cq *cq, unsigned cq_ci); + +/** + * Retrieve CI from the driver + */ +unsigned uct_ib_mlx5_get_cq_ci(struct ibv_cq *cq); + +/** + * Get internal AV information. + */ +void uct_ib_mlx5_get_av(struct ibv_ah *ah, struct mlx5_wqe_av *av); + +/** + * Backports for legacy bare-metal support + */ +struct ibv_qp *uct_dv_get_cmd_qp(struct ibv_srq *srq); + +void *uct_dv_get_info_uar0(void *uar); + +/* + * DM backports + */ +#if HAVE_IBV_EXP_DM +# define ibv_dm ibv_exp_dm +# define ibv_alloc_dm_attr ibv_exp_alloc_dm_attr +# define ibv_alloc_dm ibv_exp_alloc_dm +# define ibv_free_dm ibv_exp_free_dm + +struct mlx5dv_dm { + void *buf; + uint64_t length; + uint64_t comp_mask; +}; + +enum { + MLX5DV_OBJ_DM = 1 << 4, +}; + +static struct ibv_mr * UCS_F_MAYBE_UNUSED +ibv_reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm, + uint64_t dm_offset, size_t length, unsigned int access) +{ + struct ibv_exp_reg_mr_in mr_in = {}; + mr_in.pd = pd; + mr_in.comp_mask = IBV_EXP_REG_MR_DM; + mr_in.dm = dm; + mr_in.length = length; + return ibv_exp_reg_mr(&mr_in); +} + +typedef struct uct_mlx5_dm_va { + struct ibv_dm ibv_dm; + size_t length; + uint64_t *start_va; +} uct_mlx5_dm_va_t; + +static ucs_status_t UCS_F_MAYBE_UNUSED +uct_ib_mlx5_get_dm_info(struct ibv_exp_dm *dm, struct mlx5dv_dm *dm_info) +{ + dm_info->buf = ((uct_mlx5_dm_va_t*)dm)->start_va; + return UCS_OK; +} + +# define UCT_IB_MLX5_DV_DM(_obj) _obj.dv_dm +#else +# define UCT_IB_MLX5_DV_DM(_obj) _obj.dv.dm +#endif + +#endif diff --git a/src/uct/ib/mlx5/dv/ib_mlx5_ifc.h b/src/uct/ib/mlx5/dv/ib_mlx5_ifc.h new file mode 100644 index 0000000..dd5100e --- /dev/null +++ b/src/uct/ib/mlx5/dv/ib_mlx5_ifc.h @@ -0,0 +1,1337 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_MLX5_IFC_H_ +#define UCT_IB_MLX5_IFC_H_ + +#include + +#include +#include +#include + +#define __uct_nullp(_typ) ((struct uct_ib_mlx5_##_typ##_bits *)0) +#define __uct_bit_sz(_typ, _fld) sizeof(__uct_nullp(_typ)->_fld) +#define __uct_bit_off(_typ, _fld) (offsetof(struct uct_ib_mlx5_##_typ##_bits, _fld)) +#define __uct_dw_off(_typ, _fld) (__uct_bit_off(_typ, _fld) / 32) +#define __uct_64_off(_typ, _fld) (__uct_bit_off(_typ, _fld) / 64) +#define __uct_dw_bit_off(_typ, _fld) (32 - __uct_bit_sz(_typ, _fld) - (__uct_bit_off(_typ, _fld) & 0x1f)) +#define __uct_mask(_typ, _fld) ((uint32_t)((1ull << __uct_bit_sz(_typ, _fld)) - 1)) +#define __uct_dw_mask(_typ, _fld) (__uct_mask(_typ, _fld) << __uct_dw_bit_off(_typ, _fld)) +#define __uct_st_sz_bits(_typ) sizeof(struct uct_ib_mlx5_##_typ##_bits) + +#define UCT_IB_MLX5DV_FLD_SZ_BYTES(_typ, _fld) (__uct_bit_sz(_typ, _fld) / 8) +#define UCT_IB_MLX5DV_ST_SZ_BYTES(_typ) (sizeof(struct uct_ib_mlx5_##_typ##_bits) / 8) +#define UCT_IB_MLX5DV_ST_SZ_DW(_typ) (sizeof(struct uct_ib_mlx5_##_typ##_bits) / 32) +#define UCT_IB_MLX5DV_ST_SZ_QW(_typ) (sizeof(struct uct_ib_mlx5_##_typ##_bits) / 64) +#define UCT_IB_MLX5DV_UN_SZ_BYTES(_typ) (sizeof(union uct_ib_mlx5_##_typ##_bits) / 8) +#define UCT_IB_MLX5DV_UN_SZ_DW(_typ) (sizeof(union uct_ib_mlx5_##_typ##_bits) / 32) +#define UCT_IB_MLX5DV_BYTE_OFF(_typ, _fld) (__uct_bit_off(_typ, _fld) / 8) +#define UCT_IB_MLX5DV_ADDR_OF(_typ, _p, _fld) ((char *)(_p) + UCT_IB_MLX5DV_BYTE_OFF(_typ, _fld)) + +/* insert a value to a struct */ +#define UCT_IB_MLX5DV_SET(_typ, _p, _fld, _v) \ + do { \ + char *___p = _p; \ + uint32_t ___v = _v; \ + UCS_STATIC_ASSERT(__uct_st_sz_bits(_typ) % 32 == 0); \ + *((__be32 *)(___p) + __uct_dw_off(_typ, _fld)) = \ + htobe32((be32toh(*((__be32 *)(___p) + __uct_dw_off(_typ, _fld))) & \ + (~__uct_dw_mask(_typ, _fld))) | (((___v) & __uct_mask(_typ, _fld)) \ + << __uct_dw_bit_off(_typ, _fld))); \ + } while (0) + +#define UCT_IB_MLX5DV_GET(_typ, _p, _fld) \ + ((be32toh(*((__be32 *)(_p) + \ + __uct_dw_off(_typ, _fld))) >> __uct_dw_bit_off(_typ, _fld)) & \ + __uct_mask(_typ, _fld)) + +#define UCT_IB_MLX5DV_SET64(_typ, _p, _fld, _v) \ + do { \ + UCS_STATIC_ASSERT(__uct_st_sz_bits(_typ) % 64 == 0); \ + UCS_STATIC_ASSERT(__uct_bit_sz(_typ, _fld) == 64); \ + *((__be64 *)(_p) + __uct_64_off(_typ, _fld)) = htobe64(_v); \ + } while (0) + +#define UCT_IB_MLX5DV_GET64(_typ, _p, _fld) \ + be64toh(*((__be64 *)(_p) + __uct_64_off(_typ, _fld))) + +enum { + UCT_IB_MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, + UCT_IB_MLX5_CMD_OP_CREATE_MKEY = 0x200, + UCT_IB_MLX5_CMD_OP_CREATE_QP = 0x500, + UCT_IB_MLX5_CMD_OP_RST2INIT_QP = 0x502, + UCT_IB_MLX5_CMD_OP_INIT2RTR_QP = 0x503, + UCT_IB_MLX5_CMD_OP_RTR2RTS_QP = 0x504, + UCT_IB_MLX5_CMD_OP_2ERR_QP = 0x507, + UCT_IB_MLX5_CMD_OP_2RST_QP = 0x50a, + UCT_IB_MLX5_CMD_OP_CREATE_DCT = 0x710, + UCT_IB_MLX5_CMD_OP_DRAIN_DCT = 0x712, + UCT_IB_MLX5_CMD_OP_CREATE_XRQ = 0x717, + UCT_IB_MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726 +}; + +enum { + UCT_IB_MLX5_HCA_CAP_OPMOD_GET_MAX = 0, + UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR = 1 +}; + +enum { + UCT_IB_MLX5_CAP_GENERAL = 0, + UCT_IB_MLX5_CAP_ODP = 2, + UCT_IB_MLX5_CAP_ATOMIC = 3, +}; + +struct uct_ib_mlx5_cmd_hca_cap_bits { + uint8_t reserved_at_0[0x30]; + uint8_t vhca_id[0x10]; + + uint8_t reserved_at_40[0x40]; + + uint8_t log_max_srq_sz[0x8]; + uint8_t log_max_qp_sz[0x8]; + uint8_t reserved_at_90[0xb]; + uint8_t log_max_qp[0x5]; + + uint8_t reserved_at_a0[0xb]; + uint8_t log_max_srq[0x5]; + uint8_t reserved_at_b0[0x10]; + + uint8_t reserved_at_c0[0x8]; + uint8_t log_max_cq_sz[0x8]; + uint8_t reserved_at_d0[0xb]; + uint8_t log_max_cq[0x5]; + + uint8_t log_max_eq_sz[0x8]; + uint8_t reserved_at_e8[0x2]; + uint8_t log_max_mkey[0x6]; + uint8_t reserved_at_f0[0x4]; + uint8_t cmd_on_behalf[0x1]; + uint8_t device_emulation_manager[0x1]; + uint8_t reserved_at_f6[0x6]; + uint8_t log_max_eq[0x4]; + + uint8_t max_indirection[0x8]; + uint8_t fixed_buffer_size[0x1]; + uint8_t log_max_mrw_sz[0x7]; + uint8_t force_teardown[0x1]; + uint8_t reserved_at_111[0x1]; + uint8_t log_max_bsf_list_size[0x6]; + uint8_t umr_extended_translation_offset[0x1]; + uint8_t null_mkey[0x1]; + uint8_t log_max_klm_list_size[0x6]; + + uint8_t reserved_at_120[0xa]; + uint8_t log_max_ra_req_dc[0x6]; + uint8_t reserved_at_130[0xa]; + uint8_t log_max_ra_res_dc[0x6]; + + uint8_t reserved_at_140[0xa]; + uint8_t log_max_ra_req_qp[0x6]; + uint8_t reserved_at_150[0xa]; + uint8_t log_max_ra_res_qp[0x6]; + + uint8_t end_pad[0x1]; + uint8_t cc_query_allowed[0x1]; + uint8_t cc_modify_allowed[0x1]; + uint8_t start_pad[0x1]; + uint8_t cache_line_128byte[0x1]; + uint8_t reserved_at_165[0xa]; + uint8_t qcam_reg[0x1]; + uint8_t gid_table_size[0x10]; + + uint8_t out_of_seq_cnt[0x1]; + uint8_t vport_counters[0x1]; + uint8_t retransmission_q_counters[0x1]; + uint8_t debug[0x1]; + uint8_t modify_rq_counter_set_id[0x1]; + uint8_t rq_delay_drop[0x1]; + uint8_t max_qp_cnt[0xa]; + uint8_t pkey_table_size[0x10]; + + uint8_t vport_group_manager[0x1]; + uint8_t vhca_group_manager[0x1]; + uint8_t ib_virt[0x1]; + uint8_t eth_virt[0x1]; + uint8_t vnic_env_queue_counters[0x1]; + uint8_t ets[0x1]; + uint8_t nic_flow_table[0x1]; + uint8_t eswitch_flow_table[0x1]; + uint8_t device_memory[0x1]; + uint8_t mcam_reg[0x1]; + uint8_t pcam_reg[0x1]; + uint8_t local_ca_ack_delay[0x5]; + uint8_t port_module_event[0x1]; + uint8_t enhanced_error_q_counters[0x1]; + uint8_t ports_check[0x1]; + uint8_t reserved_at_1b3[0x1]; + uint8_t disable_link_up[0x1]; + uint8_t beacon_led[0x1]; + uint8_t port_type[0x2]; + uint8_t num_ports[0x8]; + + uint8_t reserved_at_1c0[0x1]; + uint8_t pps[0x1]; + uint8_t pps_modify[0x1]; + uint8_t log_max_msg[0x5]; + uint8_t reserved_at_1c8[0x4]; + uint8_t max_tc[0x4]; + uint8_t reserved_at_1d0[0x1]; + uint8_t dcbx[0x1]; + uint8_t general_notification_event[0x1]; + uint8_t reserved_at_1d3[0x2]; + uint8_t fpga[0x1]; + uint8_t rol_s[0x1]; + uint8_t rol_g[0x1]; + uint8_t reserved_at_1d8[0x1]; + uint8_t wol_s[0x1]; + uint8_t wol_g[0x1]; + uint8_t wol_a[0x1]; + uint8_t wol_b[0x1]; + uint8_t wol_m[0x1]; + uint8_t wol_u[0x1]; + uint8_t wol_p[0x1]; + + uint8_t stat_rate_support[0x10]; + uint8_t reserved_at_1f0[0xc]; + uint8_t cqe_version[0x4]; + + uint8_t compact_address_vector[0x1]; + uint8_t striding_rq[0x1]; + uint8_t reserved_at_202[0x1]; + uint8_t ipoib_enhanced_offloads[0x1]; + uint8_t ipoib_basic_offloads[0x1]; + uint8_t reserved_at_205[0x1]; + uint8_t repeated_block_disabled[0x1]; + uint8_t umr_modify_entity_size_disabled[0x1]; + uint8_t umr_modify_atomic_disabled[0x1]; + uint8_t umr_indirect_mkey_disabled[0x1]; + uint8_t umr_fence[0x2]; + uint8_t reserved_at_20c[0x3]; + uint8_t drain_sigerr[0x1]; + uint8_t cmdif_checksum[0x2]; + uint8_t sigerr_cqe[0x1]; + uint8_t reserved_at_213[0x1]; + uint8_t wq_signature[0x1]; + uint8_t sctr_data_cqe[0x1]; + uint8_t reserved_at_216[0x1]; + uint8_t sho[0x1]; + uint8_t tph[0x1]; + uint8_t rf[0x1]; + uint8_t dct[0x1]; + uint8_t qos[0x1]; + uint8_t eth_net_offloads[0x1]; + uint8_t roce[0x1]; + uint8_t atomic[0x1]; + uint8_t reserved_at_21f[0x1]; + + uint8_t cq_oi[0x1]; + uint8_t cq_resize[0x1]; + uint8_t cq_moderation[0x1]; + uint8_t reserved_at_223[0x3]; + uint8_t cq_eq_remap[0x1]; + uint8_t pg[0x1]; + uint8_t block_lb_mc[0x1]; + uint8_t reserved_at_229[0x1]; + uint8_t scqe_break_moderation[0x1]; + uint8_t cq_period_start_from_cqe[0x1]; + uint8_t cd[0x1]; + uint8_t reserved_at_22d[0x1]; + uint8_t apm[0x1]; + uint8_t vector_calc[0x1]; + uint8_t umr_ptr_rlky[0x1]; + uint8_t imaicl[0x1]; + uint8_t reserved_at_232[0x4]; + uint8_t qkv[0x1]; + uint8_t pkv[0x1]; + uint8_t set_deth_sqpn[0x1]; + uint8_t reserved_at_239[0x3]; + uint8_t xrc[0x1]; + uint8_t ud[0x1]; + uint8_t uc[0x1]; + uint8_t rc[0x1]; + + uint8_t uar_4k[0x1]; + uint8_t reserved_at_241[0x9]; + uint8_t uar_sz[0x6]; + uint8_t reserved_at_250[0x8]; + uint8_t log_pg_sz[0x8]; + + uint8_t bf[0x1]; + uint8_t driver_version[0x1]; + uint8_t pad_tx_eth_packet[0x1]; + uint8_t reserved_at_263[0x8]; + uint8_t log_bf_reg_size[0x5]; + + uint8_t reserved_at_270[0xb]; + uint8_t lag_master[0x1]; + uint8_t num_lag_ports[0x4]; + + uint8_t reserved_at_280[0x10]; + uint8_t max_wqe_sz_sq[0x10]; + + uint8_t reserved_at_2a0[0x10]; + uint8_t max_wqe_sz_rq[0x10]; + + uint8_t max_flow_counter_31_16[0x10]; + uint8_t max_wqe_sz_sq_dc[0x10]; + + uint8_t reserved_at_2e0[0x7]; + uint8_t max_qp_mcg[0x19]; + + uint8_t reserved_at_300[0x18]; + uint8_t log_max_mcg[0x8]; + + uint8_t reserved_at_320[0x3]; + uint8_t log_max_transport_domain[0x5]; + uint8_t reserved_at_328[0x3]; + uint8_t log_max_pd[0x5]; + uint8_t reserved_at_330[0xb]; + uint8_t log_max_xrcd[0x5]; + + uint8_t nic_receive_steering_discard[0x1]; + uint8_t receive_discard_vport_down[0x1]; + uint8_t transmit_discard_vport_down[0x1]; + uint8_t reserved_at_343[0x5]; + uint8_t log_max_flow_counter_bulk[0x8]; + uint8_t max_flow_counter_15_0[0x10]; + + + uint8_t reserved_at_360[0x3]; + uint8_t log_max_rq[0x5]; + uint8_t reserved_at_368[0x3]; + uint8_t log_max_sq[0x5]; + uint8_t reserved_at_370[0x3]; + uint8_t log_max_tir[0x5]; + uint8_t reserved_at_378[0x3]; + uint8_t log_max_tis[0x5]; + + uint8_t basic_cyclic_rcv_wqe[0x1]; + uint8_t reserved_at_381[0x2]; + uint8_t log_max_rmp[0x5]; + uint8_t reserved_at_388[0x3]; + uint8_t log_max_rqt[0x5]; + uint8_t reserved_at_390[0x3]; + uint8_t log_max_rqt_size[0x5]; + uint8_t reserved_at_398[0x3]; + uint8_t log_max_tis_per_sq[0x5]; + + uint8_t ext_stride_num_range[0x1]; + uint8_t reserved_at_3a1[0x2]; + uint8_t log_max_stride_sz_rq[0x5]; + uint8_t reserved_at_3a8[0x3]; + uint8_t log_min_stride_sz_rq[0x5]; + uint8_t reserved_at_3b0[0x3]; + uint8_t log_max_stride_sz_sq[0x5]; + uint8_t reserved_at_3b8[0x3]; + uint8_t log_min_stride_sz_sq[0x5]; + + uint8_t hairpin[0x1]; + uint8_t reserved_at_3c1[0x2]; + uint8_t log_max_hairpin_queues[0x5]; + uint8_t reserved_at_3c8[0x3]; + uint8_t log_max_hairpin_wq_data_sz[0x5]; + uint8_t reserved_at_3d0[0x3]; + uint8_t log_max_hairpin_num_packets[0x5]; + uint8_t reserved_at_3d8[0x3]; + uint8_t log_max_wq_sz[0x5]; + + uint8_t nic_vport_change_event[0x1]; + uint8_t disable_local_lb_uc[0x1]; + uint8_t disable_local_lb_mc[0x1]; + uint8_t log_min_hairpin_wq_data_sz[0x5]; + uint8_t reserved_at_3e8[0x3]; + uint8_t log_max_vlan_list[0x5]; + uint8_t reserved_at_3f0[0x3]; + uint8_t log_max_current_mc_list[0x5]; + uint8_t reserved_at_3f8[0x3]; + uint8_t log_max_current_uc_list[0x5]; + + uint8_t general_obj_types[0x40]; + + uint8_t reserved_at_440[0x40]; + + uint8_t reserved_at_480[0x3]; + uint8_t log_max_l2_table[0x5]; + uint8_t reserved_at_488[0x8]; + uint8_t log_uar_page_sz[0x10]; + + uint8_t reserved_at_4a0[0x20]; + uint8_t device_frequency_mhz[0x20]; + uint8_t device_frequency_khz[0x20]; + + uint8_t reserved_at_500[0x20]; + uint8_t num_of_uars_per_page[0x20]; + uint8_t reserved_at_540[0x40]; + + uint8_t reserved_at_580[0x3d]; + uint8_t cqe_128_always[0x1]; + uint8_t cqe_compression_128[0x1]; + uint8_t cqe_compression[0x1]; + + uint8_t cqe_compression_timeout[0x10]; + uint8_t cqe_compression_max_num[0x10]; + + uint8_t reserved_at_5e0[0x10]; + uint8_t tag_matching[0x1]; + uint8_t rndv_offload_rc[0x1]; + uint8_t rndv_offload_dc[0x1]; + uint8_t log_tag_matching_list_sz[0x5]; + uint8_t reserved_at_5f8[0x3]; + uint8_t log_max_xrq[0x5]; + + uint8_t affiliate_nic_vport_criteria[0x8]; + uint8_t native_port_num[0x8]; + uint8_t num_vhca_ports[0x8]; + uint8_t reserved_at_618[0x6]; + uint8_t sw_owner_id[0x1]; + uint8_t reserved_at_61f[0x1e1]; +}; + +enum { + UCT_IB_MLX5_ATOMIC_OPS_CMP_SWAP = UCS_BIT(0), + UCT_IB_MLX5_ATOMIC_OPS_FETCH_ADD = UCS_BIT(1), + UCT_IB_MLX5_ATOMIC_OPS_MASKED_CMP_SWAP = UCS_BIT(2), + UCT_IB_MLX5_ATOMIC_OPS_MASKED_FETCH_ADD = UCS_BIT(3) +}; + +struct uct_ib_mlx5_atomic_caps_bits { + uint8_t reserved_at_0[0x40]; + + uint8_t atomic_req_8B_endianness_mode[0x2]; + uint8_t reserved_at_42[0x4]; + uint8_t supported_atomic_req_8B_endianness_mode_1[0x1]; + + uint8_t reserved_at_47[0x19]; + + uint8_t reserved_at_60[0x20]; + + uint8_t reserved_at_80[0x10]; + uint8_t atomic_operations[0x10]; + + uint8_t reserved_at_a0[0x10]; + uint8_t atomic_size_qp[0x10]; + + uint8_t reserved_at_c0[0x10]; + uint8_t atomic_size_dc[0x10]; + + uint8_t reserved_at_e0[0x1a0]; + + uint8_t fetch_add_pci_atomic[0x10]; + uint8_t swap_pci_atomic[0x10]; + uint8_t compare_swap_pci_atomic[0x10]; + uint8_t reserved_at_2b0[0x10]; + + uint8_t reserved_at_2c0[0x540]; +}; + +struct uct_ib_mlx5_odp_per_transport_service_cap_bits { + uint8_t send[0x1]; + uint8_t receive[0x1]; + uint8_t write[0x1]; + uint8_t read[0x1]; + uint8_t atomic[0x1]; + uint8_t srq_receive[0x1]; + uint8_t reserved_at_6[0x1a]; +}; + +struct uct_ib_mlx5_odp_cap_bits { + uint8_t reserved_at_0[0x40]; + + uint8_t sig[0x1]; + uint8_t reserved_at_41[0x1f]; + + uint8_t reserved_at_60[0x20]; + + struct uct_ib_mlx5_odp_per_transport_service_cap_bits rc_odp_caps; + + struct uct_ib_mlx5_odp_per_transport_service_cap_bits uc_odp_caps; + + struct uct_ib_mlx5_odp_per_transport_service_cap_bits ud_odp_caps; + + struct uct_ib_mlx5_odp_per_transport_service_cap_bits xrc_odp_caps; + + struct uct_ib_mlx5_odp_per_transport_service_cap_bits dc_odp_caps; + + uint8_t reserved_at_100[0x700]; +}; + +union uct_ib_mlx5_hca_cap_union_bits { + struct uct_ib_mlx5_cmd_hca_cap_bits cmd_hca_cap; + struct uct_ib_mlx5_odp_cap_bits odp_cap; + struct uct_ib_mlx5_atomic_caps_bits atomic_caps; + uint8_t reserved_at_0[0x8000]; +}; + +struct uct_ib_mlx5_query_hca_cap_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x40]; + + union uct_ib_mlx5_hca_cap_union_bits capability; +}; + +struct uct_ib_mlx5_query_hca_cap_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x40]; +}; + +enum { + UCT_IB_MLX5_MKC_ACCESS_MODE_PA = 0x0, + UCT_IB_MLX5_MKC_ACCESS_MODE_MTT = 0x1, + UCT_IB_MLX5_MKC_ACCESS_MODE_KLMS = 0x2, + UCT_IB_MLX5_MKC_ACCESS_MODE_KSM = 0x3, + UCT_IB_MLX5_MKC_ACCESS_MODE_MEMIC = 0x5 +}; + +struct uct_ib_mlx5_mkc_bits { + uint8_t reserved_at_0[0x1]; + uint8_t free[0x1]; + uint8_t reserved_at_2[0x1]; + uint8_t access_mode_4_2[0x3]; + uint8_t reserved_at_6[0x7]; + uint8_t relaxed_ordering_write[0x1]; + uint8_t reserved_at_e[0x1]; + uint8_t small_fence_on_rdma_read_response[0x1]; + uint8_t umr_en[0x1]; + uint8_t a[0x1]; + uint8_t rw[0x1]; + uint8_t rr[0x1]; + uint8_t lw[0x1]; + uint8_t lr[0x1]; + uint8_t access_mode_1_0[0x2]; + uint8_t reserved_at_18[0x8]; + + uint8_t qpn[0x18]; + uint8_t mkey_7_0[0x8]; + + uint8_t reserved_at_40[0x20]; + + uint8_t length64[0x1]; + uint8_t bsf_en[0x1]; + uint8_t sync_umr[0x1]; + uint8_t reserved_at_63[0x2]; + uint8_t expected_sigerr_count[0x1]; + uint8_t reserved_at_66[0x1]; + uint8_t en_rinval[0x1]; + uint8_t pd[0x18]; + + uint8_t start_addr[0x40]; + + uint8_t len[0x40]; + + uint8_t bsf_octword_size[0x20]; + + uint8_t reserved_at_120[0x80]; + + uint8_t translations_octword_size[0x20]; + + uint8_t reserved_at_1c0[0x1b]; + uint8_t log_entity_size[0x5]; + + uint8_t reserved_at_1e0[0x20]; +}; + +struct uct_ib_mlx5_create_mkey_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x20]; + + uint8_t pg_access[0x1]; + uint8_t mkey_umem_valid[0x1]; + uint8_t cmd_on_behalf[0x1]; + uint8_t reserved_at_63[0xd]; + uint8_t function_id[0x10]; + + struct uct_ib_mlx5_mkc_bits memory_key_mkey_entry; + + uint8_t reserved_at_280[0x80]; + + uint8_t translations_octword_actual_size[0x20]; + + uint8_t mkey_umem_id[0x20]; + + uint8_t mkey_umem_offset[0x40]; + + uint8_t reserved_at_380[0x500]; + + uint8_t klm_pas_mtt[0][0x20]; +}; + +struct uct_ib_mlx5_klm_bits { + uint8_t byte_count[0x20]; + + uint8_t mkey[0x20]; + + uint8_t address[0x40]; +}; + +struct uct_ib_mlx5_create_mkey_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x8]; + uint8_t mkey_index[0x18]; + + uint8_t reserved_at_60[0x20]; +}; + +struct uct_ib_mlx5_set_xrq_dc_params_entry_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x40]; +}; + +struct uct_ib_mlx5_set_xrq_dc_params_entry_in_bits { + uint8_t opcode[0x10]; + uint8_t reserved_at_10[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x8]; + uint8_t xrqn[0x18]; + + uint8_t reserved_at_60[0x20]; + + uint8_t reserved_at_80[0x3]; + uint8_t ack_timeout[0x5]; + uint8_t reserved_at_88[0x4]; + uint8_t multi_path[0x1]; + uint8_t mtu[0x3]; + uint8_t pkey_table_index[0x10]; + + uint8_t reserved_at_a0[0xc]; + uint8_t cnak_reverse_sl[0x4]; + uint8_t reserved_at_b0[0x4]; + uint8_t reverse_sl[0x4]; + uint8_t reserved_at_b8[0x4]; + uint8_t sl[0x4]; + + uint8_t dc_access_key[0x40]; + + uint8_t reserved_at_100[0x80]; +}; + +enum { + UCT_IB_MLX5_DCTC_STATE_ACTIVE = 0x0, + UCT_IB_MLX5_DCTC_STATE_DRAINING = 0x1, + UCT_IB_MLX5_DCTC_STATE_DRAINED = 0x2 +}; + +enum { + UCT_IB_MLX5_DCTC_CS_RES_DISABLE = 0x0, + UCT_IB_MLX5_DCTC_CS_RES_NA = 0x1, + UCT_IB_MLX5_DCTC_CS_RES_UP_TO_64B = 0x2 +}; + +enum { + UCT_IB_MLX5_DCTC_MTU_256_BYTES = 0x1, + UCT_IB_MLX5_DCTC_MTU_512_BYTES = 0x2, + UCT_IB_MLX5_DCTC_MTU_1K_BYTES = 0x3, + UCT_IB_MLX5_DCTC_MTU_2K_BYTES = 0x4, + UCT_IB_MLX5_DCTC_MTU_4K_BYTES = 0x5 +}; + +struct uct_ib_mlx5_dctc_bits { + uint8_t reserved_at_0[0x4]; + uint8_t state[0x4]; + uint8_t reserved_at_8[0x10]; + uint8_t offload_type[0x4]; + uint8_t reserved_at_1c[0x4]; + + uint8_t reserved_at_20[0x8]; + uint8_t user_index[0x18]; + + uint8_t reserved_at_40[0x8]; + uint8_t cqn[0x18]; + + uint8_t counter_set_id[0x8]; + uint8_t atomic_mode[0x4]; + uint8_t rre[0x1]; + uint8_t rwe[0x1]; + uint8_t rae[0x1]; + uint8_t atomic_like_write_en[0x1]; + uint8_t latency_sensitive[0x1]; + uint8_t rlky[0x1]; + uint8_t free_ar[0x1]; + uint8_t reserved_at_73[0xd]; + + uint8_t reserved_at_80[0x8]; + uint8_t cs_res[0x8]; + uint8_t reserved_at_90[0x3]; + uint8_t min_rnr_nak[0x5]; + uint8_t reserved_at_98[0x8]; + + uint8_t reserved_at_a0[0x8]; + uint8_t srqn_xrqn[0x18]; + + uint8_t reserved_at_c0[0x8]; + uint8_t pd[0x18]; + + uint8_t tclass[0x8]; + uint8_t reserved_at_e8[0x4]; + uint8_t flow_label[0x14]; + + uint8_t dc_access_key[0x40]; + + uint8_t reserved_at_140[0x5]; + uint8_t mtu[0x3]; + uint8_t port[0x8]; + uint8_t pkey_index[0x10]; + + uint8_t reserved_at_160[0x8]; + uint8_t my_addr_index[0x8]; + uint8_t reserved_at_170[0x8]; + uint8_t hop_limit[0x8]; + + uint8_t dc_access_key_violation_count[0x20]; + + uint8_t reserved_at_1a0[0x14]; + uint8_t dei_cfi[0x1]; + uint8_t eth_prio[0x3]; + uint8_t ecn[0x2]; + uint8_t dscp[0x6]; + + uint8_t reserved_at_1c0[0x40]; +}; + +struct uct_ib_mlx5_create_dct_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x8]; + uint8_t dctn[0x18]; + + uint8_t reserved_at_60[0x20]; +}; + +struct uct_ib_mlx5_create_dct_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x40]; + + struct uct_ib_mlx5_dctc_bits dct_context_entry; + + uint8_t reserved_at_280[0x180]; +}; + +struct uct_ib_mlx5_drain_dct_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x40]; +}; + +struct uct_ib_mlx5_drain_dct_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x8]; + uint8_t dctn[0x18]; + + uint8_t reserved_at_60[0x20]; +}; + +struct uct_ib_mlx5_cmd_pas_bits { + uint8_t pa_h[0x20]; + + uint8_t pa_l[0x14]; + uint8_t reserved_at_34[0xc]; +}; + +enum { + UCT_IB_MLX5_WQ_WQ_TYPE_WQ_LINKED_LIST = 0x0, + UCT_IB_MLX5_WQ_WQ_TYPE_WQ_CYCLIC = 0x1 +}; + +enum { + UCT_IB_MLX5_WQ_END_PADDING_MODE_END_PAD_NONE = 0x0, + UCT_IB_MLX5_WQ_END_PADDING_MODE_END_PAD_ALIGN = 0x1 +}; + +struct uct_ib_mlx5_wq_bits { + uint8_t wq_type[0x4]; + uint8_t wq_signature[0x1]; + uint8_t end_padding_mode[0x2]; + uint8_t cd_slave[0x1]; + uint8_t reserved_at_8[0x18]; + + uint8_t hds_skip_first_sge[0x1]; + uint8_t log2_hds_buf_size[0x3]; + uint8_t reserved_at_24[0x7]; + uint8_t page_offset[0x5]; + uint8_t lwm[0x10]; + + uint8_t reserved_at_40[0x8]; + uint8_t pd[0x18]; + + uint8_t reserved_at_60[0x8]; + uint8_t uar_page[0x18]; + + uint8_t dbr_addr[0x40]; + + uint8_t hw_counter[0x20]; + + uint8_t sw_counter[0x20]; + + uint8_t reserved_at_100[0xc]; + uint8_t log_wq_stride[0x4]; + uint8_t reserved_at_110[0x3]; + uint8_t log_wq_pg_sz[0x5]; + uint8_t reserved_at_118[0x3]; + uint8_t log_wq_sz[0x5]; + + uint8_t dbr_umem_valid[0x1]; + uint8_t wq_umem_valid[0x1]; + uint8_t reserved_at_122[0x1]; + uint8_t log_hairpin_num_packets[0x5]; + uint8_t reserved_at_128[0x3]; + uint8_t log_hairpin_data_sz[0x5]; + uint8_t reserved_at_130[0x4]; + uint8_t log_wqe_num_of_strides[0x4]; + uint8_t two_byte_shift_en[0x1]; + uint8_t reserved_at_139[0x4]; + uint8_t log_wqe_stride_size[0x3]; + + uint8_t dbr_umem_id[0x20]; + + uint8_t wq_umem_id[0x20]; + + uint8_t reserved_at_180[0x480]; + + struct uct_ib_mlx5_cmd_pas_bits pas[0]; +}; + +enum { + UCT_IB_MLX5_XRQC_STATE_GOOD = 0x0, + UCT_IB_MLX5_XRQC_STATE_ERROR = 0x1 +}; + +enum { + UCT_IB_MLX5_XRQC_TOPOLOGY_NO_SPECIAL_TOPOLOGY = 0x0, + UCT_IB_MLX5_XRQC_TOPOLOGY_TAG_MATCHING = 0x1 +}; + +enum { + UCT_IB_MLX5_XRQC_OFFLOAD_RNDV = 0x1 +}; + +struct uct_ib_mlx5_tag_matching_topology_context_bits { + uint8_t log_matching_list_sz[0x4]; + uint8_t reserved_at_4[0xc]; + uint8_t append_next_index[0x10]; + + uint8_t sw_phase_cnt[0x10]; + uint8_t hw_phase_cnt[0x10]; + + uint8_t reserved_at_40[0x40]; +}; + +struct uct_ib_mlx5_xrqc_bits { + uint8_t state[0x4]; + uint8_t rlkey[0x1]; + uint8_t reserved_at_5[0xf]; + uint8_t topology[0x4]; + uint8_t reserved_at_18[0x4]; + uint8_t offload[0x4]; + + uint8_t reserved_at_20[0x8]; + uint8_t user_index[0x18]; + + uint8_t reserved_at_40[0x8]; + uint8_t cqn[0x18]; + + uint8_t reserved_at_60[0x1f]; + uint8_t dc[0x1]; + + uint8_t reserved_at_80[0x80]; + + struct uct_ib_mlx5_tag_matching_topology_context_bits tag_matching_topology_context; + + uint8_t reserved_at_180[0x280]; + + struct uct_ib_mlx5_wq_bits wq; +}; + +struct uct_ib_mlx5_create_xrq_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x8]; + uint8_t xrqn[0x18]; + + uint8_t reserved_at_60[0x20]; +}; + +struct uct_ib_mlx5_create_xrq_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x40]; + + struct uct_ib_mlx5_xrqc_bits xrq_context; +}; + +enum { + UCT_IB_MLX5_ADS_STAT_RATE_NO_LIMIT = 0x0, + UCT_IB_MLX5_ADS_STAT_RATE_2_5GBPS = 0x7, + UCT_IB_MLX5_ADS_STAT_RATE_10GBPS = 0x8, + UCT_IB_MLX5_ADS_STAT_RATE_30GBPS = 0x9, + UCT_IB_MLX5_ADS_STAT_RATE_5GBPS = 0xa, + UCT_IB_MLX5_ADS_STAT_RATE_20GBPS = 0xb, + UCT_IB_MLX5_ADS_STAT_RATE_40GBPS = 0xc, + UCT_IB_MLX5_ADS_STAT_RATE_60GBPS = 0xd, + UCT_IB_MLX5_ADS_STAT_RATE_80GBPS = 0xe, + UCT_IB_MLX5_ADS_STAT_RATE_120GBPS = 0xf +}; + +struct uct_ib_mlx5_ads_bits { + uint8_t fl[0x1]; + uint8_t free_ar[0x1]; + uint8_t reserved_at_2[0xe]; + uint8_t pkey_index[0x10]; + + uint8_t reserved_at_20[0x8]; + uint8_t grh[0x1]; + uint8_t mlid[0x7]; + uint8_t rlid[0x10]; + + uint8_t ack_timeout[0x5]; + uint8_t reserved_at_45[0x3]; + uint8_t src_addr_index[0x8]; + uint8_t log_rtm[0x4]; + uint8_t stat_rate[0x4]; + uint8_t hop_limit[0x8]; + + uint8_t reserved_at_60[0x4]; + uint8_t tclass[0x8]; + uint8_t flow_label[0x14]; + + uint8_t rgid_rip[16][0x8]; + + uint8_t reserved_at_100[0x4]; + uint8_t f_dscp[0x1]; + uint8_t f_ecn[0x1]; + uint8_t reserved_at_106[0x1]; + uint8_t f_eth_prio[0x1]; + uint8_t ecn[0x2]; + uint8_t dscp[0x6]; + uint8_t udp_sport[0x10]; + + uint8_t dei_cfi[0x1]; + uint8_t eth_prio[0x3]; + uint8_t sl[0x4]; + uint8_t vhca_port_num[0x8]; + uint8_t rmac_47_32[0x10]; + + uint8_t rmac_31_0[0x20]; +}; + +enum { + UCT_IB_MLX5_QPC_STATE_RST = 0x0, + UCT_IB_MLX5_QPC_STATE_INIT = 0x1, + UCT_IB_MLX5_QPC_STATE_RTR = 0x2, + UCT_IB_MLX5_QPC_STATE_RTS = 0x3, + UCT_IB_MLX5_QPC_STATE_SQER = 0x4, + UCT_IB_MLX5_QPC_STATE_ERR = 0x6, + UCT_IB_MLX5_QPC_STATE_SQD = 0x7, + UCT_IB_MLX5_QPC_STATE_SUSPENDED = 0x9 +}; + +enum { + UCT_IB_MLX5_QPC_ST_RC = 0x0, + UCT_IB_MLX5_QPC_ST_UC = 0x1, + UCT_IB_MLX5_QPC_ST_UD = 0x2, + UCT_IB_MLX5_QPC_ST_XRC = 0x3, + UCT_IB_MLX5_QPC_ST_DCI = 0x5, + UCT_IB_MLX5_QPC_ST_QP0 = 0x7, + UCT_IB_MLX5_QPC_ST_QP1 = 0x8, + UCT_IB_MLX5_QPC_ST_RAW_DATAGRAM = 0x9, + UCT_IB_MLX5_QPC_ST_REG_UMR = 0xc +}; + +enum { + UCT_IB_MLX5_QPC_PM_STATE_ARMED = 0x0, + UCT_IB_MLX5_QPC_PM_STATE_REARM = 0x1, + UCT_IB_MLX5_QPC_PM_STATE_RESERVED = 0x2, + UCT_IB_MLX5_QPC_PM_STATE_MIGRATED = 0x3 +}; + +enum { + UCT_IB_MLX5_QPC_OFFLOAD_TYPE_RNDV = 0x1 +}; + +enum { + UCT_IB_MLX5_QPC_END_PADDING_MODE_SCATTER_AS_IS = 0x0, + UCT_IB_MLX5_QPC_END_PADDING_MODE_PAD_TO_CACHE_LINE_ALIGNMENT = 0x1 +}; + +enum { + UCT_IB_MLX5_QPC_MTU_256_BYTES = 0x1, + UCT_IB_MLX5_QPC_MTU_512_BYTES = 0x2, + UCT_IB_MLX5_QPC_MTU_1K_BYTES = 0x3, + UCT_IB_MLX5_QPC_MTU_2K_BYTES = 0x4, + UCT_IB_MLX5_QPC_MTU_4K_BYTES = 0x5, + UCT_IB_MLX5_QPC_MTU_RAW_ETHERNET_QP = 0x7 +}; + +enum { + UCT_IB_MLX5_QPC_ATOMIC_MODE_IB_SPEC = 0x1, + UCT_IB_MLX5_QPC_ATOMIC_MODE_ONLY_8B = 0x2, + UCT_IB_MLX5_QPC_ATOMIC_MODE_UP_TO_8B = 0x3, + UCT_IB_MLX5_QPC_ATOMIC_MODE_UP_TO_16B = 0x4, + UCT_IB_MLX5_QPC_ATOMIC_MODE_UP_TO_32B = 0x5, + UCT_IB_MLX5_QPC_ATOMIC_MODE_UP_TO_64B = 0x6, + UCT_IB_MLX5_QPC_ATOMIC_MODE_UP_TO_128B = 0x7, + UCT_IB_MLX5_QPC_ATOMIC_MODE_UP_TO_256B = 0x8 +}; + +enum { + UCT_IB_MLX5_QPC_CS_REQ_DISABLE = 0x0, + UCT_IB_MLX5_QPC_CS_REQ_UP_TO_32B = 0x11, + UCT_IB_MLX5_QPC_CS_REQ_UP_TO_64B = 0x22 +}; + +static inline unsigned uct_ib_mlx5_qpc_cs_req(unsigned size) +{ + return (size > 32) ? UCT_IB_MLX5_QPC_CS_REQ_UP_TO_64B : + size ? UCT_IB_MLX5_QPC_CS_REQ_UP_TO_32B : + UCT_IB_MLX5_QPC_CS_REQ_DISABLE; +} + +enum { + UCT_IB_MLX5_QPC_CS_RES_DISABLE = 0x0, + UCT_IB_MLX5_QPC_CS_RES_UP_TO_32B = 0x1, + UCT_IB_MLX5_QPC_CS_RES_UP_TO_64B = 0x2 +}; + +static inline unsigned uct_ib_mlx5_qpc_cs_res(unsigned size) +{ + return (size > 32) ? UCT_IB_MLX5_QPC_CS_RES_UP_TO_64B : + size ? UCT_IB_MLX5_QPC_CS_RES_UP_TO_32B : + UCT_IB_MLX5_QPC_CS_RES_DISABLE; +} + +struct uct_ib_mlx5_qpc_bits { + uint8_t state[0x4]; + uint8_t lag_tx_port_affinity[0x4]; + uint8_t st[0x8]; + uint8_t reserved_at_10[0x3]; + uint8_t pm_state[0x2]; + uint8_t reserved_at_15[0x1]; + uint8_t req_e2e_credit_mode[0x2]; + uint8_t offload_type[0x4]; + uint8_t end_padding_mode[0x2]; + uint8_t reserved_at_1e[0x2]; + + uint8_t wq_signature[0x1]; + uint8_t block_lb_mc[0x1]; + uint8_t atomic_like_write_en[0x1]; + uint8_t latency_sensitive[0x1]; + uint8_t reserved_at_24[0x1]; + uint8_t drain_sigerr[0x1]; + uint8_t reserved_at_26[0x2]; + uint8_t pd[0x18]; + + uint8_t mtu[0x3]; + uint8_t log_msg_max[0x5]; + uint8_t reserved_at_48[0x1]; + uint8_t log_rq_size[0x4]; + uint8_t log_rq_stride[0x3]; + uint8_t no_sq[0x1]; + uint8_t log_sq_size[0x4]; + uint8_t reserved_at_55[0x6]; + uint8_t rlky[0x1]; + uint8_t ulp_stateless_offload_mode[0x4]; + + uint8_t counter_set_id[0x8]; + uint8_t uar_page[0x18]; + + uint8_t reserved_at_80[0x8]; + uint8_t user_index[0x18]; + + uint8_t reserved_at_a0[0x3]; + uint8_t log_page_size[0x5]; + uint8_t remote_qpn[0x18]; + + struct uct_ib_mlx5_ads_bits primary_address_path; + + struct uct_ib_mlx5_ads_bits secondary_address_path; + + uint8_t log_ack_req_freq[0x4]; + uint8_t reserved_at_384[0x4]; + uint8_t log_sra_max[0x3]; + uint8_t reserved_at_38b[0x2]; + uint8_t retry_count[0x3]; + uint8_t rnr_retry[0x3]; + uint8_t reserved_at_393[0x1]; + uint8_t fre[0x1]; + uint8_t cur_rnr_retry[0x3]; + uint8_t cur_retry_count[0x3]; + uint8_t reserved_at_39b[0x5]; + + uint8_t reserved_at_3a0[0x20]; + + uint8_t reserved_at_3c0[0x8]; + uint8_t next_send_psn[0x18]; + + uint8_t reserved_at_3e0[0x8]; + uint8_t cqn_snd[0x18]; + + uint8_t reserved_at_400[0x8]; + uint8_t deth_sqpn[0x18]; + + uint8_t reserved_at_420[0x20]; + + uint8_t reserved_at_440[0x8]; + uint8_t last_acked_psn[0x18]; + + uint8_t reserved_at_460[0x8]; + uint8_t ssn[0x18]; + + uint8_t reserved_at_480[0x8]; + uint8_t log_rra_max[0x3]; + uint8_t reserved_at_48b[0x1]; + uint8_t atomic_mode[0x4]; + uint8_t rre[0x1]; + uint8_t rwe[0x1]; + uint8_t rae[0x1]; + uint8_t reserved_at_493[0x1]; + uint8_t page_offset[0x6]; + uint8_t reserved_at_49a[0x3]; + uint8_t cd_slave_receive[0x1]; + uint8_t cd_slave_send[0x1]; + uint8_t cd_master[0x1]; + + uint8_t reserved_at_4a0[0x3]; + uint8_t min_rnr_nak[0x5]; + uint8_t next_rcv_psn[0x18]; + + uint8_t reserved_at_4c0[0x8]; + uint8_t xrcd[0x18]; + + uint8_t reserved_at_4e0[0x8]; + uint8_t cqn_rcv[0x18]; + + uint8_t dbr_addr[0x40]; + + uint8_t q_key[0x20]; + + uint8_t reserved_at_560[0x5]; + uint8_t rq_type[0x3]; + uint8_t srqn_rmpn_xrqn[0x18]; + + uint8_t reserved_at_580[0x8]; + uint8_t rmsn[0x18]; + + uint8_t hw_sq_wqebb_counter[0x10]; + uint8_t sw_sq_wqebb_counter[0x10]; + + uint8_t hw_rq_counter[0x20]; + + uint8_t sw_rq_counter[0x20]; + + uint8_t reserved_at_600[0x20]; + + uint8_t reserved_at_620[0xf]; + uint8_t cgs[0x1]; + uint8_t cs_req[0x8]; + uint8_t cs_res[0x8]; + + uint8_t dc_access_key[0x40]; + + uint8_t reserved_at_680[0x3]; + uint8_t dbr_umem_valid[0x1]; + uint8_t reserved_at_684[0x1c]; + + uint8_t reserved_at_6a0[0x80]; + + uint8_t dbr_umem_id[0x20]; +}; + +struct uct_ib_mlx5_create_qp_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x8]; + uint8_t qpn[0x18]; + + uint8_t reserved_at_60[0x20]; +}; + +struct uct_ib_mlx5_create_qp_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x40]; + + uint8_t opt_param_mask[0x20]; + + uint8_t reserved_at_a0[0x20]; + + struct uct_ib_mlx5_qpc_bits qpc; + + uint8_t reserved_at_800[0x40]; + + uint8_t wq_umem_id[0x20]; + + uint8_t wq_umem_valid[0x1]; + uint8_t reserved_at_861[0x1f]; + + uint8_t pas[0][0x40]; +}; + +struct uct_ib_mlx5_init2rtr_qp_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x40]; +}; + +struct uct_ib_mlx5_init2rtr_qp_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x8]; + uint8_t qpn[0x18]; + + uint8_t reserved_at_60[0x20]; + + uint8_t opt_param_mask[0x20]; + + uint8_t reserved_at_a0[0x20]; + + struct uct_ib_mlx5_qpc_bits qpc; + + uint8_t reserved_at_800[0x80]; +}; + +struct uct_ib_mlx5_rtr2rts_qp_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x40]; +}; + +struct uct_ib_mlx5_rtr2rts_qp_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x8]; + uint8_t qpn[0x18]; + + uint8_t reserved_at_60[0x20]; + + uint8_t opt_param_mask[0x20]; + + uint8_t reserved_at_a0[0x20]; + + struct uct_ib_mlx5_qpc_bits qpc; + + uint8_t reserved_at_800[0x80]; +}; + +struct uct_ib_mlx5_rst2init_qp_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x40]; +}; + +struct uct_ib_mlx5_rst2init_qp_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x8]; + uint8_t qpn[0x18]; + + uint8_t reserved_at_60[0x20]; + + uint8_t opt_param_mask[0x20]; + + uint8_t reserved_at_a0[0x20]; + + struct uct_ib_mlx5_qpc_bits qpc; + + uint8_t reserved_at_800[0x80]; +}; + +struct uct_ib_mlx5_modify_qp_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x40]; +}; + +struct uct_ib_mlx5_modify_qp_in_bits { + uint8_t opcode[0x10]; + uint8_t uid[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x8]; + uint8_t qpn[0x18]; + + uint8_t reserved_at_60[0x20]; +}; +#endif diff --git a/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c b/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c new file mode 100644 index 0000000..6376209 --- /dev/null +++ b/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c @@ -0,0 +1,962 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#include "ib_mlx5_ifc.h" + +#include +#include + +typedef struct { + struct mlx5dv_devx_obj *atomic_dvmr; + int mr_num; + size_t length; + struct ibv_mr *mrs[]; +} uct_ib_mlx5_ksm_data_t; + +typedef struct uct_ib_mlx5_mem { + uct_ib_mem_t super; + union { + struct ibv_mr *mr; +#if HAVE_DEVX + struct mlx5dv_devx_obj *dvmr; + }; + union { + struct mlx5dv_devx_obj *atomic_dvmr; + uct_ib_mlx5_ksm_data_t *ksm_data; +#endif + }; +} uct_ib_mlx5_mem_t; + +typedef struct uct_ib_mlx5_dbrec_page { + struct mlx5dv_devx_umem *mem; +} uct_ib_mlx5_dbrec_page_t; + + +static ucs_status_t uct_ib_mlx5_reg_key(uct_ib_md_t *md, void *address, + size_t length, uint64_t access, + uct_ib_mem_t *ib_memh) +{ + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + ucs_status_t status; + + status = uct_ib_reg_mr(md->pd, address, length, access, &memh->mr); + if (status != UCS_OK) { + return status; + } + + uct_ib_memh_init_from_mr(&memh->super, memh->mr); + + return UCS_OK; +} + +static ucs_status_t uct_ib_mlx5_dereg_key(uct_ib_md_t *md, uct_ib_mem_t *ib_memh) +{ + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + + return uct_ib_dereg_mr(memh->mr); +} + +static ucs_status_t +uct_ib_mlx5_mem_prefetch(uct_ib_md_t *md, uct_ib_mem_t *ib_memh, void *addr, + size_t length) +{ +#if HAVE_DECL_IBV_ADVISE_MR + struct ibv_sge sg_list; + int ret; + + if (!(ib_memh->flags & UCT_IB_MEM_FLAG_ODP)) { + return UCS_OK; + } + + ucs_debug("memh %p prefetch %p length %zu", ib_memh, addr, length); + + sg_list.lkey = ib_memh->lkey; + sg_list.addr = (uintptr_t)addr; + sg_list.length = length; + + ret = UCS_PROFILE_CALL(ibv_advise_mr, md->pd, + IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE, + IB_UVERBS_ADVISE_MR_FLAG_FLUSH, &sg_list, 1); + if (ret) { + ucs_error("ibv_advise_mr(addr=%p length=%zu) returned %d: %m", + addr, length, ret); + return UCS_ERR_IO_ERROR; + } +#endif + return UCS_OK; +} + +static int uct_ib_mlx5_has_roce_port(uct_ib_device_t *dev) +{ + int port_num; + + for (port_num = dev->first_port; + port_num < dev->first_port + dev->num_ports; + port_num++) + { + if (uct_ib_device_is_port_roce(dev, port_num)) { + return 1; + } + } + + return 0; +} + +#if HAVE_DEVX + +static size_t uct_ib_mlx5_calc_mkey_inlen(int list_size) +{ + return UCT_IB_MLX5DV_ST_SZ_BYTES(create_mkey_in) + + UCT_IB_MLX5DV_ST_SZ_BYTES(klm) * list_size; +} + +static ucs_status_t uct_ib_mlx5_alloc_mkey_inbox(int list_size, char **in_p) +{ + size_t inlen; + char *in; + + inlen = uct_ib_mlx5_calc_mkey_inlen(list_size); + in = ucs_calloc(1, inlen, "mkey mailbox"); + if (in == NULL) { + return UCS_ERR_NO_MEMORY; + } + + *in_p = in; + return UCS_OK; +} + +static ucs_status_t uct_ib_mlx5_devx_reg_ksm(uct_ib_mlx5_md_t *md, + intptr_t addr, size_t length, + int list_size, size_t entity_size, + char *in, + struct mlx5dv_devx_obj **mr_p, + uint32_t *mkey) +{ + char out[UCT_IB_MLX5DV_ST_SZ_BYTES(create_mkey_out)] = {}; + struct mlx5dv_pd dvpd = {}; + struct mlx5dv_obj dv = {}; + struct mlx5dv_devx_obj *mr; + void *mkc; + + dv.pd.in = md->super.pd; + dv.pd.out = &dvpd; + mlx5dv_init_obj(&dv, MLX5DV_OBJ_PD); + + UCT_IB_MLX5DV_SET(create_mkey_in, in, opcode, UCT_IB_MLX5_CMD_OP_CREATE_MKEY); + mkc = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + UCT_IB_MLX5DV_SET(mkc, mkc, access_mode_1_0, UCT_IB_MLX5_MKC_ACCESS_MODE_KSM); + UCT_IB_MLX5DV_SET(mkc, mkc, a, 1); + UCT_IB_MLX5DV_SET(mkc, mkc, rw, 1); + UCT_IB_MLX5DV_SET(mkc, mkc, rr, 1); + UCT_IB_MLX5DV_SET(mkc, mkc, lw, 1); + UCT_IB_MLX5DV_SET(mkc, mkc, lr, 1); + UCT_IB_MLX5DV_SET(mkc, mkc, pd, dvpd.pdn); + UCT_IB_MLX5DV_SET(mkc, mkc, translations_octword_size, list_size); + UCT_IB_MLX5DV_SET(mkc, mkc, log_entity_size, ucs_ilog2(entity_size)); + UCT_IB_MLX5DV_SET(mkc, mkc, qpn, 0xffffff); + UCT_IB_MLX5DV_SET(mkc, mkc, mkey_7_0, addr & 0xff); + UCT_IB_MLX5DV_SET64(mkc, mkc, start_addr, addr); + UCT_IB_MLX5DV_SET64(mkc, mkc, len, length); + UCT_IB_MLX5DV_SET(create_mkey_in, in, translations_octword_actual_size, list_size); + + mr = mlx5dv_devx_obj_create(md->super.dev.ibv_context, in, + uct_ib_mlx5_calc_mkey_inlen(list_size), + out, sizeof(out)); + if (mr == NULL) { + ucs_debug("mlx5dv_devx_obj_create(CREATE_MKEY, mode=KSM) failed, syndrome %x: %m", + UCT_IB_MLX5DV_GET(create_mkey_out, out, syndrome)); + return UCS_ERR_UNSUPPORTED; + } + + *mr_p = mr; + *mkey = (UCT_IB_MLX5DV_GET(create_mkey_out, out, mkey_index) << 8) | + (addr & 0xff); + + return UCS_OK; +} + +static ucs_status_t +uct_ib_mlx5_devx_reg_ksm_data(uct_ib_mlx5_md_t *md, + uct_ib_mlx5_ksm_data_t *ksm_data, + size_t length, off_t off, + struct mlx5dv_devx_obj **mr_p, + uint32_t *mkey) +{ + ucs_status_t status; + char *in; + void *klm; + int i; + + status = uct_ib_mlx5_alloc_mkey_inbox(ksm_data->mr_num, &in); + if (status != UCS_OK) { + return UCS_ERR_NO_MEMORY; + } + + klm = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < ksm_data->mr_num; i++) { + UCT_IB_MLX5DV_SET64(klm, klm, address, (intptr_t)ksm_data->mrs[i]->addr); + UCT_IB_MLX5DV_SET(klm, klm, byte_count, ksm_data->mrs[i]->length); + UCT_IB_MLX5DV_SET(klm, klm, mkey, ksm_data->mrs[i]->lkey); + klm = UCS_PTR_BYTE_OFFSET(klm, UCT_IB_MLX5DV_ST_SZ_BYTES(klm)); + } + + status = uct_ib_mlx5_devx_reg_ksm(md, (intptr_t)ksm_data->mrs[0]->addr + off, + length, ksm_data->mr_num, + ksm_data->mrs[0]->length, in, mr_p, mkey); + ucs_free(in); + return status; +} + +static ucs_status_t uct_ib_mlx5_devx_reg_atomic_key(uct_ib_md_t *ibmd, + uct_ib_mem_t *ib_memh) +{ + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + uct_ib_mlx5_md_t *md = ucs_derived_of(ibmd, uct_ib_mlx5_md_t); + off_t offset = uct_ib_md_atomic_offset(uct_ib_mlx5_md_get_atomic_mr_id(md)); + struct ibv_mr *mr = memh->mr; + size_t reg_length, length; + ucs_status_t status; + int list_size, i; + void *klm; + char *in; + intptr_t addr; + + if (!(md->flags & UCT_IB_MLX5_MD_FLAG_KSM)) { + return UCS_ERR_UNSUPPORTED; + } + + if (memh->super.flags & UCT_IB_MEM_MULTITHREADED) { + return uct_ib_mlx5_devx_reg_ksm_data(md, memh->ksm_data, memh->mr->length, + offset, &memh->ksm_data->atomic_dvmr, + &memh->super.atomic_rkey); + } + + reg_length = UCT_IB_MD_MAX_MR_SIZE; + addr = (intptr_t)mr->addr & ~(reg_length - 1); + length = mr->length + (intptr_t)mr->addr - addr; + list_size = ucs_div_round_up(length, reg_length); + + status = uct_ib_mlx5_alloc_mkey_inbox(list_size, &in); + if (status != UCS_OK) { + return status; + } + + klm = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0; i < list_size; i++) { + if (i == list_size - 1) { + UCT_IB_MLX5DV_SET(klm, klm, byte_count, length % reg_length); + } else { + UCT_IB_MLX5DV_SET(klm, klm, byte_count, reg_length); + } + UCT_IB_MLX5DV_SET(klm, klm, mkey, mr->lkey); + UCT_IB_MLX5DV_SET64(klm, klm, address, addr + (i * reg_length)); + klm = UCS_PTR_BYTE_OFFSET(klm, UCT_IB_MLX5DV_ST_SZ_BYTES(klm)); + } + + status = uct_ib_mlx5_devx_reg_ksm(md, addr + offset, length, list_size, + reg_length, in, &memh->atomic_dvmr, + &memh->super.atomic_rkey); + if (status != UCS_OK) { + if (status == UCS_ERR_UNSUPPORTED) { + md->flags &= ~UCT_IB_MLX5_MD_FLAG_KSM; + } + goto out; + } + + ucs_debug("KSM registered memory %p..%p offset 0x%lx on %s rkey 0x%x", + mr->addr, UCS_PTR_BYTE_OFFSET(mr->addr, mr->length), offset, + uct_ib_device_name(&md->super.dev), memh->super.atomic_rkey); +out: + ucs_free(in); + return status; +} + +static ucs_status_t uct_ib_mlx5_devx_dereg_atomic_key(uct_ib_md_t *ibmd, + uct_ib_mem_t *ib_memh) +{ + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + int ret; + + ret = mlx5dv_devx_obj_destroy(memh->atomic_dvmr); + if (ret != 0) { + ucs_error("mlx5dv_devx_obj_destroy(MKEY, ATOMIC KSM) failed: %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +static ucs_status_t uct_ib_mlx5_devx_reg_multithreaded(uct_ib_md_t *ibmd, + void *address, size_t length, + uint64_t access, + uct_ib_mem_t *ib_memh) +{ + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + uct_ib_mlx5_md_t *md = ucs_derived_of(ibmd, uct_ib_mlx5_md_t); + size_t chunk = md->super.config.mt_reg_chunk; + uct_ib_mlx5_ksm_data_t *ksm_data; + size_t ksm_data_size; + ucs_status_t status; + int mr_num; + + if (!(md->flags & UCT_IB_MLX5_MD_FLAG_KSM) || + !(md->flags & UCT_IB_MLX5_MD_FLAG_INDIRECT_ATOMICS)) { + return UCS_ERR_UNSUPPORTED; + } + + mr_num = ucs_div_round_up(length, chunk); + ksm_data_size = (mr_num * sizeof(*ksm_data->mrs)) + sizeof(*ksm_data); + ksm_data = ucs_calloc(1, ksm_data_size, "ksm_data"); + if (!ksm_data) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + ucs_trace("multithreaded register memory %p..%p chunks %d", + address, UCS_PTR_BYTE_OFFSET(address, length), mr_num); + + ksm_data->mr_num = mr_num; + status = uct_ib_md_handle_mr_list_multithreaded(ibmd, address, length, + access, chunk, ksm_data->mrs); + if (status != UCS_OK) { + goto err; + } + + status = uct_ib_mlx5_devx_reg_ksm_data(md, ksm_data, length, 0, + &memh->dvmr, &memh->super.lkey); + if (status != UCS_OK) { + goto err_dereg; + } + + ksm_data->length = length; + memh->ksm_data = ksm_data; + memh->super.rkey = memh->super.lkey; + return UCS_OK; + +err_dereg: + uct_ib_md_handle_mr_list_multithreaded(ibmd, address, length, UCT_IB_MEM_DEREG, + chunk, ksm_data->mrs); +err: + ucs_free(ksm_data); + return status; +} + +static ucs_status_t uct_ib_mlx5_devx_dereg_multithreaded(uct_ib_md_t *ibmd, + uct_ib_mem_t *ib_memh) +{ + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + size_t chunk = ibmd->config.mt_reg_chunk; + ucs_status_t s, status = UCS_OK; + int ret; + + if (memh->super.flags & UCT_IB_MEM_FLAG_ATOMIC_MR) { + ret = mlx5dv_devx_obj_destroy(memh->ksm_data->atomic_dvmr); + if (ret != 0) { + ucs_error("mlx5dv_devx_obj_destroy(MKEY, ATOMIC) failed: %m"); + status = UCS_ERR_IO_ERROR; + } + } + + s = uct_ib_md_handle_mr_list_multithreaded(ibmd, 0, memh->ksm_data->length, + UCT_IB_MEM_DEREG, chunk, + memh->ksm_data->mrs); + if (s == UCS_ERR_UNSUPPORTED) { + s = uct_ib_dereg_mrs(memh->ksm_data->mrs, memh->ksm_data->mr_num); + if (s != UCS_OK) { + status = s; + } + } else if (s != UCS_OK) { + status = s; + } + + ret = mlx5dv_devx_obj_destroy(memh->dvmr); + if (ret != 0) { + ucs_error("mlx5dv_devx_obj_destroy(MKEY, KSM) failed: %m"); + status = UCS_ERR_IO_ERROR; + } + + ucs_free(memh->ksm_data); + + return status; +} + +static ucs_status_t uct_ib_mlx5_add_page(ucs_mpool_t *mp, size_t *size_p, void **page_p) +{ + uct_ib_mlx5_md_t *md = ucs_container_of(mp, uct_ib_mlx5_md_t, dbrec_pool); + uintptr_t ps = ucs_get_page_size(); + uct_ib_mlx5_dbrec_page_t *page; + size_t size = ucs_align_up(*size_p + sizeof(*page), ps); + int ret; + + ret = ucs_posix_memalign((void **)&page, ps, size, "devx dbrec"); + if (ret != 0) { + goto err; + } + + page->mem = mlx5dv_devx_umem_reg(md->super.dev.ibv_context, page, size, 0); + if (page->mem == NULL) { + goto err_free; + } + + *size_p = size; + *page_p = page + 1; + return UCS_OK; + +err_free: + ucs_free(page); +err: + return UCS_ERR_IO_ERROR; +} + +static void uct_ib_mlx5_init_dbrec(ucs_mpool_t *mp, void *obj, void *chunk) +{ + uct_ib_mlx5_dbrec_page_t *page = (uct_ib_mlx5_dbrec_page_t*)chunk - 1; + uct_ib_mlx5_dbrec_t *dbrec = obj; + + dbrec->mem_id = page->mem->umem_id; + dbrec->offset = UCS_PTR_BYTE_DIFF(chunk, obj) + sizeof(*page); +} + +static void uct_ib_mlx5_free_page(ucs_mpool_t *mp, void *chunk) +{ + uct_ib_mlx5_dbrec_page_t *page = (uct_ib_mlx5_dbrec_page_t*)chunk - 1; + mlx5dv_devx_umem_dereg(page->mem); + ucs_free(page); +} + +static ucs_mpool_ops_t uct_ib_mlx5_dbrec_ops = { + .chunk_alloc = uct_ib_mlx5_add_page, + .chunk_release = uct_ib_mlx5_free_page, + .obj_init = uct_ib_mlx5_init_dbrec, + .obj_cleanup = NULL +}; + +static UCS_F_MAYBE_UNUSED ucs_status_t +uct_ib_mlx5_devx_check_odp(uct_ib_mlx5_md_t *md, + const uct_ib_md_config_t *md_config, void *cap) +{ + char out[UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_out)] = {}; + char in[UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_in)] = {}; + void *odp; + int ret; + + if (md_config->devx_objs & UCS_BIT(UCT_IB_DEVX_OBJ_RCQP)) { + ucs_debug("%s: disable ODP because it's not supported for DevX QP", + uct_ib_device_name(&md->super.dev)); + goto no_odp; + } + + if (uct_ib_mlx5_has_roce_port(&md->super.dev)) { + ucs_debug("%s: disable ODP on RoCE", uct_ib_device_name(&md->super.dev)); + goto no_odp; + } + + if (!UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, pg)) { + goto no_odp; + } + + odp = UCT_IB_MLX5DV_ADDR_OF(query_hca_cap_out, out, capability); + UCT_IB_MLX5DV_SET(query_hca_cap_in, in, opcode, UCT_IB_MLX5_CMD_OP_QUERY_HCA_CAP); + UCT_IB_MLX5DV_SET(query_hca_cap_in, in, op_mod, UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR | + (UCT_IB_MLX5_CAP_ODP << 1)); + ret = mlx5dv_devx_general_cmd(md->super.dev.ibv_context, in, sizeof(in), + out, sizeof(out)); + if (ret != 0) { + ucs_error("mlx5dv_devx_general_cmd(QUERY_HCA_CAP, ODP) failed: %m"); + return UCS_ERR_IO_ERROR; + } + + if (!UCT_IB_MLX5DV_GET(odp_cap, odp, ud_odp_caps.send) || + !UCT_IB_MLX5DV_GET(odp_cap, odp, rc_odp_caps.send) || + !UCT_IB_MLX5DV_GET(odp_cap, odp, rc_odp_caps.write) || + !UCT_IB_MLX5DV_GET(odp_cap, odp, rc_odp_caps.read)) { + goto no_odp; + } + + if ((md->super.dev.flags & UCT_IB_DEVICE_FLAG_DC) && + (!UCT_IB_MLX5DV_GET(odp_cap, odp, dc_odp_caps.send) || + !UCT_IB_MLX5DV_GET(odp_cap, odp, dc_odp_caps.write) || + !UCT_IB_MLX5DV_GET(odp_cap, odp, dc_odp_caps.read))) { + goto no_odp; + } + + if (md->super.config.odp.max_size == UCS_MEMUNITS_AUTO) { + if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, umr_extended_translation_offset)) { + md->super.config.odp.max_size = 1ul << 55; + } else { + md->super.config.odp.max_size = 1ul << 28; + } + } + + if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, fixed_buffer_size) && + UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, null_mkey) && + UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, umr_extended_translation_offset)) { + md->super.dev.flags |= UCT_IB_DEVICE_FLAG_ODP_IMPLICIT; + } + + return UCS_OK; + +no_odp: + md->super.config.odp.max_size = 0; + return UCS_OK; +} + +static struct ibv_context * +uct_ib_mlx5_devx_open_device(struct ibv_device *ibv_device, + struct mlx5dv_context_attr *dv_attr) +{ + struct ibv_context *ctx; + struct ibv_cq *cq; + + ctx = mlx5dv_open_device(ibv_device, dv_attr); + if (ctx == NULL) { + return NULL; + } + + cq = ibv_create_cq(ctx, 1, NULL, NULL, 0); + if (cq == NULL) { + ibv_close_device(ctx); + return NULL; + } + + ibv_destroy_cq(cq); + return ctx; +} + +static uct_ib_md_ops_t uct_ib_mlx5_devx_md_ops; + +static ucs_status_t uct_ib_mlx5_devx_md_open(struct ibv_device *ibv_device, + const uct_ib_md_config_t *md_config, + uct_ib_md_t **p_md) +{ + char out[UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_out)] = {}; + char in[UCT_IB_MLX5DV_ST_SZ_BYTES(query_hca_cap_in)] = {}; + struct mlx5dv_context_attr dv_attr = {}; + ucs_status_t status = UCS_OK; + struct ibv_context *ctx; + uct_ib_device_t *dev; + uct_ib_mlx5_md_t *md; + void *cap; + int ret; + +#if HAVE_DECL_MLX5DV_IS_SUPPORTED + if (!mlx5dv_is_supported(ibv_device)) { + return UCS_ERR_UNSUPPORTED; + } +#endif + + if (md_config->devx == UCS_NO) { + return UCS_ERR_UNSUPPORTED; + } + + dv_attr.flags |= MLX5DV_CONTEXT_FLAGS_DEVX; + ctx = uct_ib_mlx5_devx_open_device(ibv_device, &dv_attr); + if (ctx == NULL) { + if (md_config->devx == UCS_YES) { + status = UCS_ERR_IO_ERROR; + ucs_error("DEVX requested but not supported by %s", + ibv_get_device_name(ibv_device)); + } else { + status = UCS_ERR_UNSUPPORTED; + ucs_debug("mlx5dv_open_device(%s) failed: %m", + ibv_get_device_name(ibv_device)); + } + goto err; + } + + md = ucs_calloc(1, sizeof(*md), "ib_mlx5_md"); + if (md == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_free_context; + } + + dev = &md->super.dev; + dev->ibv_context = ctx; + md->super.config = md_config->ext; + + status = uct_ib_device_query(dev, ibv_device); + if (status != UCS_OK) { + goto err_free; + } + + cap = UCT_IB_MLX5DV_ADDR_OF(query_hca_cap_out, out, capability); + UCT_IB_MLX5DV_SET(query_hca_cap_in, in, opcode, UCT_IB_MLX5_CMD_OP_QUERY_HCA_CAP); + UCT_IB_MLX5DV_SET(query_hca_cap_in, in, op_mod, UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR | + (UCT_IB_MLX5_CAP_GENERAL << 1)); + ret = mlx5dv_devx_general_cmd(ctx, in, sizeof(in), out, sizeof(out)); + if (ret != 0) { + if ((errno == EPERM) || (errno == EPROTONOSUPPORT) || + (errno == EOPNOTSUPP)) { + status = UCS_ERR_UNSUPPORTED; + ucs_debug("mlx5dv_devx_general_cmd(QUERY_HCA_CAP) failed: %m"); + } else { + ucs_error("mlx5dv_devx_general_cmd(QUERY_HCA_CAP) failed: %m"); + status = UCS_ERR_IO_ERROR; + } + goto err_free; + } + + if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, log_max_msg) != + UCT_IB_MLX5_LOG_MAX_MSG_SIZE) { + status = UCS_ERR_UNSUPPORTED; + ucs_debug("Unexpected QUERY_HCA_CAP.log_max_msg %d\n", + UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, log_max_msg)); + goto err_free; + } + + if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, dct)) { + dev->flags |= UCT_IB_DEVICE_FLAG_DC; + } + + if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, rndv_offload_dc)) { + md->flags |= UCT_IB_MLX5_MD_FLAG_DC_TM; + } + + if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, compact_address_vector)) { + dev->flags |= UCT_IB_DEVICE_FLAG_AV; + } + + if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, fixed_buffer_size)) { + md->flags |= UCT_IB_MLX5_MD_FLAG_KSM; + } + + if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, ext_stride_num_range)) { + /* TODO: check if need to check for XRQ (not RQ) MP support */ + md->flags |= UCT_IB_MLX5_MD_FLAG_MP_RQ; + } + + if (!UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, umr_modify_atomic_disabled)) { + md->flags |= UCT_IB_MLX5_MD_FLAG_INDIRECT_ATOMICS; + } + + status = uct_ib_mlx5_devx_check_odp(md, md_config, cap); + if (status != UCS_OK) { + goto err_free; + } + + if (UCT_IB_MLX5DV_GET(cmd_hca_cap, cap, atomic)) { + int ops = UCT_IB_MLX5_ATOMIC_OPS_CMP_SWAP | + UCT_IB_MLX5_ATOMIC_OPS_FETCH_ADD; + uint8_t arg_size; + int cap_ops, mode8b; + + UCT_IB_MLX5DV_SET(query_hca_cap_in, in, op_mod, UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR | + (UCT_IB_MLX5_CAP_ATOMIC << 1)); + ret = mlx5dv_devx_general_cmd(ctx, in, sizeof(in), out, sizeof(out)); + if (ret != 0) { + ucs_error("mlx5dv_devx_general_cmd(QUERY_HCA_CAP, ATOMIC) failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_free; + } + + arg_size = UCT_IB_MLX5DV_GET(atomic_caps, cap, atomic_size_qp); + cap_ops = UCT_IB_MLX5DV_GET(atomic_caps, cap, atomic_operations); + mode8b = UCT_IB_MLX5DV_GET(atomic_caps, cap, atomic_req_8B_endianness_mode); + + if ((cap_ops & ops) == ops) { + dev->atomic_arg_sizes = sizeof(uint64_t); + if (!mode8b) { + dev->atomic_arg_sizes_be = sizeof(uint64_t); + } + } + + ops |= UCT_IB_MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | + UCT_IB_MLX5_ATOMIC_OPS_MASKED_FETCH_ADD; + + arg_size &= UCT_IB_MLX5DV_GET(query_hca_cap_out, out, + capability.atomic_caps.atomic_size_dc); + + if ((cap_ops & ops) == ops) { + dev->ext_atomic_arg_sizes = arg_size; + if (mode8b) { + arg_size &= ~(sizeof(uint64_t)); + } + dev->ext_atomic_arg_sizes_be = arg_size; + } + + dev->pci_fadd_arg_sizes = UCT_IB_MLX5DV_GET(atomic_caps, cap, fetch_add_pci_atomic) << 2; + dev->pci_cswap_arg_sizes = UCT_IB_MLX5DV_GET(atomic_caps, cap, compare_swap_pci_atomic) << 2; + } + + md->super.ops = &uct_ib_mlx5_devx_md_ops; + status = uct_ib_md_open_common(&md->super, ibv_device, md_config); + if (status != UCS_OK) { + goto err_free; + } + + ucs_spinlock_init(&md->dbrec_lock); + status = ucs_mpool_init(&md->dbrec_pool, 0, + sizeof(uct_ib_mlx5_dbrec_t), 0, + UCS_SYS_CACHE_LINE_SIZE, + ucs_get_page_size() / UCS_SYS_CACHE_LINE_SIZE - 1, + UINT_MAX, &uct_ib_mlx5_dbrec_ops, "devx dbrec"); + if (status != UCS_OK) { + goto err_free; + } + + ret = ucs_posix_memalign(&md->zero_buf, ucs_get_page_size(), + ucs_get_page_size(), "zero umem"); + if (ret != 0) { + ucs_error("failed to allocate zero buffer: %m"); + goto err_release_dbrec; + } + + md->zero_mem = mlx5dv_devx_umem_reg(dev->ibv_context, md->zero_buf, ucs_get_page_size(), 0); + if (!md->zero_mem) { + ucs_error("mlx5dv_devx_umem_reg() zero umem failed: %m"); + goto err_free_zero_buf; + } + + dev->flags |= UCT_IB_DEVICE_FLAG_MLX5_PRM; + md->flags |= UCT_IB_MLX5_MD_FLAG_DEVX; + md->flags |= UCT_IB_MLX5_MD_FLAGS_DEVX_OBJS(md_config->devx_objs); + *p_md = &md->super; + return status; + +err_free_zero_buf: + ucs_free(md->zero_buf); +err_release_dbrec: + ucs_mpool_cleanup(&md->dbrec_pool, 1); +err_free: + ucs_free(md); +err_free_context: + ibv_close_device(ctx); +err: + return status; +} + +void uct_ib_mlx5_devx_md_cleanup(uct_ib_md_t *ibmd) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(ibmd, uct_ib_mlx5_md_t); + ucs_status_t status; + + mlx5dv_devx_umem_dereg(md->zero_mem); + ucs_free(md->zero_buf); + ucs_mpool_cleanup(&md->dbrec_pool, 1); + status = ucs_spinlock_destroy(&md->dbrec_lock); + if (status != UCS_OK) { + ucs_warn("ucs_spinlock_destroy() failed (%d)", status); + } +} + +static uct_ib_md_ops_t uct_ib_mlx5_devx_md_ops = { + .open = uct_ib_mlx5_devx_md_open, + .cleanup = uct_ib_mlx5_devx_md_cleanup, + .memh_struct_size = sizeof(uct_ib_mlx5_mem_t), + .reg_key = uct_ib_mlx5_reg_key, + .dereg_key = uct_ib_mlx5_dereg_key, + .reg_atomic_key = uct_ib_mlx5_devx_reg_atomic_key, + .dereg_atomic_key = uct_ib_mlx5_devx_dereg_atomic_key, + .reg_multithreaded = uct_ib_mlx5_devx_reg_multithreaded, + .dereg_multithreaded = uct_ib_mlx5_devx_dereg_multithreaded, + .mem_prefetch = uct_ib_mlx5_mem_prefetch, +}; + +UCT_IB_MD_OPS(uct_ib_mlx5_devx_md_ops, 2); + +#endif + +static ucs_status_t uct_ib_mlx5dv_check_dc(uct_ib_device_t *dev) +{ + ucs_status_t status = UCS_OK; +#if HAVE_DC_DV + struct ibv_srq_init_attr srq_attr = {}; + struct ibv_context *ctx = dev->ibv_context; + struct ibv_qp_init_attr_ex qp_attr = {}; + struct mlx5dv_qp_init_attr dv_attr = {}; + struct ibv_qp_attr attr = {}; + struct ibv_srq *srq; + struct ibv_pd *pd; + struct ibv_cq *cq; + struct ibv_qp *qp; + int ret; + + ucs_debug("checking for DC support on %s", uct_ib_device_name(dev)); + + pd = ibv_alloc_pd(ctx); + if (pd == NULL) { + ucs_error("ibv_alloc_pd() failed: %m"); + return UCS_ERR_IO_ERROR; + } + + cq = ibv_create_cq(ctx, 1, NULL, NULL, 0); + if (cq == NULL) { + ucs_error("ibv_create_cq() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_cq; + } + + srq_attr.attr.max_sge = 1; + srq_attr.attr.max_wr = 1; + srq = ibv_create_srq(pd, &srq_attr); + if (srq == NULL) { + ucs_error("ibv_create_srq() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_srq; + } + + qp_attr.send_cq = cq; + qp_attr.recv_cq = cq; + qp_attr.qp_type = IBV_QPT_DRIVER; + qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD; + qp_attr.pd = pd; + qp_attr.srq = srq; + + dv_attr.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_DC; + dv_attr.dc_init_attr.dc_type = MLX5DV_DCTYPE_DCT; + dv_attr.dc_init_attr.dct_access_key = UCT_IB_KEY; + + /* create DCT qp successful means DC is supported */ + qp = mlx5dv_create_qp(ctx, &qp_attr, &dv_attr); + if (qp == NULL) { + ucs_debug("failed to create DCT on %s: %m", uct_ib_device_name(dev)); + goto err_qp; + } + + attr.qp_state = IBV_QPS_INIT; + attr.port_num = 1; + attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_ATOMIC; + ret = ibv_modify_qp(qp, &attr, IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_ACCESS_FLAGS); + if (ret != 0) { + ucs_debug("failed to ibv_modify_qp(DCT, INIT) on %s: %m", + uct_ib_device_name(dev)); + goto err; + } + + /* always set global address parameters, in case the port is RoCE or SRIOV */ + attr.qp_state = IBV_QPS_RTR; + attr.min_rnr_timer = 1; + attr.path_mtu = IBV_MTU_256; + attr.ah_attr.port_num = 1; + attr.ah_attr.sl = 0; + attr.ah_attr.is_global = 1; + attr.ah_attr.grh.hop_limit = 1; + attr.ah_attr.grh.traffic_class = 0; + attr.ah_attr.grh.sgid_index = 0; + + ret = ibv_modify_qp(qp, &attr, IBV_QP_STATE | + IBV_QP_MIN_RNR_TIMER | + IBV_QP_AV | + IBV_QP_PATH_MTU); + + if (ret == 0) { + ucs_debug("DC is supported on %s", uct_ib_device_name(dev)); + dev->flags |= UCT_IB_DEVICE_FLAG_DC; + } else { + ucs_debug("failed to ibv_modify_qp(DCT, RTR) on %s: %m", + uct_ib_device_name(dev)); + } + +err: + uct_ib_destroy_qp(qp); +err_qp: + uct_ib_destroy_srq(srq); +err_srq: + ibv_destroy_cq(cq); +err_cq: + ibv_dealloc_pd(pd); +#endif + return status; +} + +static uct_ib_md_ops_t uct_ib_mlx5_md_ops; + +static ucs_status_t uct_ib_mlx5dv_md_open(struct ibv_device *ibv_device, + const uct_ib_md_config_t *md_config, + uct_ib_md_t **p_md) +{ + ucs_status_t status = UCS_OK; + struct ibv_context *ctx; + uct_ib_device_t *dev; + uct_ib_mlx5_md_t *md; + +#if HAVE_DECL_MLX5DV_IS_SUPPORTED + if (!mlx5dv_is_supported(ibv_device)) { + return UCS_ERR_UNSUPPORTED; + } +#endif + + ctx = ibv_open_device(ibv_device); + if (ctx == NULL) { + ucs_debug("ibv_open_device(%s) failed: %m", ibv_get_device_name(ibv_device)); + status = UCS_ERR_UNSUPPORTED; + goto err; + } + + md = ucs_calloc(1, sizeof(*md), "ib_mlx5_md"); + if (md == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_free_context; + } + + dev = &md->super.dev; + dev->ibv_context = ctx; + md->super.config = md_config->ext; + + status = uct_ib_device_query(dev, ibv_device); + if (status != UCS_OK) { + goto err_free; + } + + if (!(uct_ib_device_spec(dev)->flags & UCT_IB_DEVICE_FLAG_MLX5_PRM)) { + status = UCS_ERR_UNSUPPORTED; + goto err_free; + } + + if (UCT_IB_HAVE_ODP_IMPLICIT(&dev->dev_attr) && + !uct_ib_mlx5_has_roce_port(dev)) { + dev->flags |= UCT_IB_DEVICE_FLAG_ODP_IMPLICIT; + } + + if (IBV_EXP_HAVE_ATOMIC_HCA(&dev->dev_attr)) { + dev->atomic_arg_sizes = sizeof(uint64_t); + } + + status = uct_ib_mlx5dv_check_dc(dev); + if (status != UCS_OK) { + goto err_free; + } + + md->super.ops = &uct_ib_mlx5_md_ops; + status = uct_ib_md_open_common(&md->super, ibv_device, md_config); + if (status != UCS_OK) { + goto err_free; + } + + dev->flags |= UCT_IB_DEVICE_FLAG_MLX5_PRM; + /* cppcheck-suppress autoVariables */ + *p_md = &md->super; + return UCS_OK; + +err_free: + ucs_free(md); +err_free_context: + ibv_close_device(ctx); +err: + return status; +} + +static uct_ib_md_ops_t uct_ib_mlx5_md_ops = { + .open = uct_ib_mlx5dv_md_open, + .cleanup = (uct_ib_md_cleanup_func_t)ucs_empty_function, + .memh_struct_size = sizeof(uct_ib_mlx5_mem_t), + .reg_key = uct_ib_mlx5_reg_key, + .dereg_key = uct_ib_mlx5_dereg_key, + .reg_atomic_key = (uct_ib_md_reg_atomic_key_func_t)ucs_empty_function_return_unsupported, + .dereg_atomic_key = (uct_ib_md_dereg_atomic_key_func_t)ucs_empty_function_return_unsupported, + .reg_multithreaded = (uct_ib_md_reg_multithreaded_func_t)ucs_empty_function_return_unsupported, + .dereg_multithreaded = (uct_ib_md_dereg_multithreaded_func_t)ucs_empty_function_return_unsupported, + .mem_prefetch = uct_ib_mlx5_mem_prefetch, +}; + +UCT_IB_MD_OPS(uct_ib_mlx5_md_ops, 1); + diff --git a/src/uct/ib/mlx5/exp/ib_exp.c b/src/uct/ib/mlx5/exp/ib_exp.c new file mode 100644 index 0000000..aea211d --- /dev/null +++ b/src/uct/ib/mlx5/exp/ib_exp.c @@ -0,0 +1,40 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include + +void uct_ib_exp_qp_fill_attr(uct_ib_iface_t *iface, uct_ib_qp_attr_t *attr) +{ +#if HAVE_DECL_IBV_EXP_CREATE_QP + if (!(attr->ibv.comp_mask & IBV_EXP_QP_INIT_ATTR_PD)) { + attr->ibv.comp_mask = IBV_EXP_QP_INIT_ATTR_PD; + attr->ibv.pd = uct_ib_iface_md(iface)->pd; + } +#endif + + if (attr->qp_type == IBV_QPT_UD) { + return; + } + +#if HAVE_IB_EXT_ATOMICS + attr->ibv.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG; + attr->ibv.max_atomic_arg = UCT_IB_MAX_ATOMIC_SIZE; +#endif + +#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE + if (uct_ib_iface_device(iface)->dev_attr.exp_atomic_cap == + IBV_EXP_ATOMIC_HCA_REPLY_BE) { + attr->ibv.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS; + attr->ibv.exp_create_flags = IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY; + } +#endif + +#if HAVE_STRUCT_IBV_EXP_QP_INIT_ATTR_MAX_INL_RECV + attr->ibv.comp_mask |= IBV_EXP_QP_INIT_ATTR_INL_RECV; + attr->ibv.max_inl_recv = attr->max_inl_recv; +#endif +} + diff --git a/src/uct/ib/mlx5/exp/ib_exp.h b/src/uct/ib/mlx5/exp/ib_exp.h new file mode 100644 index 0000000..ab1d923 --- /dev/null +++ b/src/uct/ib/mlx5/exp/ib_exp.h @@ -0,0 +1,16 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_MLX5_EXP_H_ +#define UCT_IB_MLX5_EXP_H_ + +#if HAVE_VERBS_EXP_H +void uct_ib_exp_qp_fill_attr(uct_ib_iface_t *iface, uct_ib_qp_attr_t *attr); +#else +static inline void uct_ib_exp_qp_fill_attr(uct_ib_iface_t *iface, uct_ib_qp_attr_t *attr) { } +#endif + +#endif diff --git a/src/uct/ib/mlx5/exp/ib_exp_md.c b/src/uct/ib/mlx5/exp/ib_exp_md.c new file mode 100644 index 0000000..87bab04 --- /dev/null +++ b/src/uct/ib/mlx5/exp/ib_exp_md.c @@ -0,0 +1,717 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include + +#include +#include + + +typedef struct { + struct ibv_mr *atomic_mr; + int mr_num; + struct ibv_mr *mrs[]; +} uct_ib_mlx5_ksm_data_t; + +typedef struct uct_ib_mlx5_mem { + uct_ib_mem_t super; + struct ibv_mr *mr; +#if HAVE_EXP_UMR + union { + struct ibv_mr *atomic_mr; + uct_ib_mlx5_ksm_data_t *ksm_data; + }; +#endif +} uct_ib_mlx5_mem_t; + + +static ucs_status_t uct_ib_mlx5_reg_key(uct_ib_md_t *md, void *address, + size_t length, uint64_t access, + uct_ib_mem_t *ib_memh) +{ + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + ucs_status_t status; + + status = uct_ib_reg_mr(md->pd, address, length, access, &memh->mr); + if (status != UCS_OK) { + return status; + } + + uct_ib_memh_init_from_mr(&memh->super, memh->mr); + return UCS_OK; +} + +static ucs_status_t uct_ib_mlx5_dereg_key(uct_ib_md_t *md, uct_ib_mem_t *ib_memh) +{ + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + + return uct_ib_dereg_mr(memh->mr); +} + +static ucs_status_t +uct_ib_mlx5_mem_prefetch(uct_ib_md_t *md, uct_ib_mem_t *ib_memh, void *addr, + size_t length) +{ +#if HAVE_DECL_IBV_EXP_PREFETCH_MR + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + struct ibv_exp_prefetch_attr attr = {}; + int ret; + + if (!(memh->super.flags & UCT_IB_MEM_FLAG_ODP)) { + return UCS_OK; + } + + ucs_debug("memh %p prefetch %p length %zu", memh, addr, length); + + attr.flags = IBV_EXP_PREFETCH_WRITE_ACCESS; + attr.addr = addr; + attr.length = length; + + ret = UCS_PROFILE_CALL(ibv_exp_prefetch_mr, memh->mr, &attr); + if (ret) { + ucs_error("ibv_exp_prefetch_mr(addr=%p length=%zu) returned %d: %m", + addr, length, ret); + return UCS_ERR_IO_ERROR; + } +#endif + return UCS_OK; +} + +static ucs_status_t uct_ib_mlx5_exp_md_umr_qp_create(uct_ib_mlx5_md_t *md) +{ +#if HAVE_EXP_UMR + struct ibv_exp_qp_init_attr qp_init_attr; + struct ibv_qp_attr qp_attr; + uint8_t port_num; + int ret; + uct_ib_device_t *ibdev; + struct ibv_port_attr *port_attr; + int is_roce_v2; + + ibdev = &md->super.dev; + + if (!(ibdev->dev_attr.exp_device_cap_flags & IBV_EXP_DEVICE_UMR) || + !md->super.config.enable_indirect_atomic) { + return UCS_ERR_UNSUPPORTED; + } + + /* TODO: fix port selection. It looks like active port should be used */ + port_num = ibdev->first_port; + port_attr = uct_ib_device_port_attr(ibdev, port_num); + + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); + + md->umr_cq = ibv_create_cq(ibdev->ibv_context, 1, NULL, NULL, 0); + if (md->umr_cq == NULL) { + ucs_error("failed to create UMR CQ: %m"); + goto err; + } + + md->super.config.max_inline_klm_list = + ucs_min(md->super.config.max_inline_klm_list, + ibdev->dev_attr.umr_caps.max_send_wqe_inline_klms); + + qp_init_attr.qp_type = IBV_QPT_RC; + qp_init_attr.send_cq = md->umr_cq; + qp_init_attr.recv_cq = md->umr_cq; + qp_init_attr.cap.max_inline_data = 0; + qp_init_attr.cap.max_recv_sge = 1; + qp_init_attr.cap.max_send_sge = 1; + qp_init_attr.srq = NULL; + qp_init_attr.cap.max_recv_wr = 16; + qp_init_attr.cap.max_send_wr = 16; + qp_init_attr.pd = md->super.pd; + qp_init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD|IBV_EXP_QP_INIT_ATTR_MAX_INL_KLMS; + qp_init_attr.max_inl_recv = 0; + qp_init_attr.max_inl_send_klms = md->super.config.max_inline_klm_list; + +#if HAVE_IBV_EXP_QP_CREATE_UMR + qp_init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS; + qp_init_attr.exp_create_flags = IBV_EXP_QP_CREATE_UMR; +#endif + + md->umr_qp = ibv_exp_create_qp(ibdev->ibv_context, &qp_init_attr); + if (md->umr_qp == NULL) { + ucs_error("failed to create UMR QP: %m"); + goto err_destroy_cq; + } + + memset(&qp_attr, 0, sizeof(qp_attr)); + + /* Modify QP to INIT state */ + qp_attr.qp_state = IBV_QPS_INIT; + qp_attr.pkey_index = 0; + qp_attr.port_num = port_num; + qp_attr.qp_access_flags = UCT_IB_MEM_ACCESS_FLAGS; + ret = ibv_modify_qp(md->umr_qp, &qp_attr, + IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); + if (ret) { + ucs_error("Failed to modify UMR QP to INIT: %m"); + goto err_destroy_qp; + } + + /* Modify to RTR */ + qp_attr.qp_state = IBV_QPS_RTR; + qp_attr.dest_qp_num = md->umr_qp->qp_num; + + memset(&qp_attr.ah_attr, 0, sizeof(qp_attr.ah_attr)); + qp_attr.ah_attr.port_num = port_num; + qp_attr.ah_attr.dlid = port_attr->lid; + qp_attr.ah_attr.is_global = 1; + if (uct_ib_device_query_gid(ibdev, port_num, UCT_IB_MD_DEFAULT_GID_INDEX, + &qp_attr.ah_attr.grh.dgid, &is_roce_v2) != UCS_OK) { + goto err_destroy_qp; + } + + qp_attr.rq_psn = 0; + qp_attr.path_mtu = IBV_MTU_512; + qp_attr.min_rnr_timer = 7; + qp_attr.max_dest_rd_atomic = 1; + ret = ibv_modify_qp(md->umr_qp, &qp_attr, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); + if (ret) { + ucs_error("Failed to modify UMR QP to RTR: %m"); + goto err_destroy_qp; + } + + /* Modify to RTS */ + qp_attr.qp_state = IBV_QPS_RTS; + qp_attr.sq_psn = 0; + qp_attr.timeout = 7; + qp_attr.rnr_retry = 7; + qp_attr.retry_cnt = 7; + qp_attr.max_rd_atomic = 1; + ret = ibv_modify_qp(md->umr_qp, &qp_attr, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | + IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC); + if (ret) { + ucs_error("Failed to modify UMR QP to RTS: %m"); + goto err_destroy_qp; + } + + ucs_debug("initialized UMR QP 0x%x, max_inline_klm_list %u", + md->umr_qp->qp_num, md->super.config.max_inline_klm_list); + return UCS_OK; + +err_destroy_qp: + uct_ib_destroy_qp(md->umr_qp); +err_destroy_cq: + ibv_destroy_cq(md->umr_cq); +err: + return UCS_ERR_IO_ERROR; +#else + return UCS_ERR_UNSUPPORTED; +#endif +} + +#if HAVE_EXP_UMR +static ucs_status_t +uct_ib_mlx5_exp_reg_indirect_mr(uct_ib_mlx5_md_t *md, + void *addr, size_t length, + struct ibv_exp_mem_region *mem_reg, + int list_size, uint32_t create_flags, + uint32_t umr_type, struct ibv_mr **mr_p) +{ + struct ibv_exp_send_wr wr, *bad_wr; + struct ibv_exp_create_mr_in mrin; + ucs_status_t status; + struct ibv_mr *umr; + struct ibv_wc wc; + int ret; + + if (md->umr_qp == NULL) { + status = UCS_ERR_UNSUPPORTED; + goto err; + } + + /* Create and fill memory key */ + memset(&mrin, 0, sizeof(mrin)); + memset(&wr, 0, sizeof(wr)); + + mrin.pd = md->super.pd; + wr.exp_opcode = IBV_EXP_WR_UMR_FILL; + wr.exp_send_flags = IBV_EXP_SEND_SIGNALED; + wr.ext_op.umr.exp_access = UCT_IB_MEM_ACCESS_FLAGS; + + mrin.attr.create_flags = create_flags; + wr.ext_op.umr.umr_type = umr_type; + + mrin.attr.exp_access_flags = UCT_IB_MEM_ACCESS_FLAGS; + mrin.attr.max_klm_list_size = list_size; + + umr = ibv_exp_create_mr(&mrin); + if (!umr) { + ucs_error("ibv_exp_create_mr() failed: %m"); + status = UCS_ERR_NO_MEMORY; + goto err; + } + + wr.ext_op.umr.mem_list.mem_reg_list = mem_reg; + wr.ext_op.umr.base_addr = (uint64_t)(uintptr_t)addr; + wr.ext_op.umr.num_mrs = list_size; + wr.ext_op.umr.modified_mr = umr; + + /* If the list exceeds max inline size, allocate a container object */ + if (list_size > md->super.config.max_inline_klm_list) { + struct ibv_exp_mkey_list_container_attr in = { + .pd = md->super.pd, + .mkey_list_type = IBV_EXP_MKEY_LIST_TYPE_INDIRECT_MR, + .max_klm_list_size = list_size + }; + + wr.ext_op.umr.memory_objects = ibv_exp_alloc_mkey_list_memory(&in); + if (wr.ext_op.umr.memory_objects == NULL) { + ucs_error("ibv_exp_alloc_mkey_list_memory(list_size=%d) failed: %m", + list_size); + status = UCS_ERR_IO_ERROR; + goto err_free_umr; + } + } else { + wr.ext_op.umr.memory_objects = NULL; + wr.exp_send_flags |= IBV_EXP_SEND_INLINE; + } + + ucs_trace_data("UMR_FILL qp 0x%x lkey 0x%x base 0x%lx [addr %lx len %zu lkey 0x%x] list_size %d", + md->umr_qp->qp_num, wr.ext_op.umr.modified_mr->lkey, + wr.ext_op.umr.base_addr, mem_reg[0].base_addr, + mem_reg[0].length, mem_reg[0].mr->lkey, list_size); + + /* Post UMR */ + ret = ibv_exp_post_send(md->umr_qp, &wr, &bad_wr); + if (ret) { + ucs_error("ibv_exp_post_send(UMR_FILL) failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_free_klm_container; + } + + /* Wait for send UMR completion */ + for (;;) { + ret = ibv_poll_cq(md->umr_cq, 1, &wc); + if (ret < 0) { + ucs_error("ibv_exp_poll_cq(umr_cq) failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_free_klm_container; + } + if (ret == 1) { + if (wc.status != IBV_WC_SUCCESS) { + ucs_error("UMR_FILL completed with error: %s vendor_err %d", + ibv_wc_status_str(wc.status), wc.vendor_err); + status = UCS_ERR_IO_ERROR; + goto err_free_klm_container; + } + break; + } + } + + if (wr.ext_op.umr.memory_objects != NULL) { + ibv_exp_dealloc_mkey_list_memory(wr.ext_op.umr.memory_objects); + } + + umr->addr = addr; + umr->length = length; + ucs_debug("UMR registered memory %p..%p on %s lkey 0x%x rkey 0x%x", + umr->addr, UCS_PTR_BYTE_OFFSET(umr->addr, length), + uct_ib_device_name(&md->super.dev), + umr->lkey, umr->rkey); + + *mr_p = umr; + + return UCS_OK; + +err_free_klm_container: + if (wr.ext_op.umr.memory_objects != NULL) { + ibv_exp_dealloc_mkey_list_memory(wr.ext_op.umr.memory_objects); + } +err_free_umr: + UCS_PROFILE_CALL(ibv_dereg_mr, umr); +err: + return status; +} +#endif + +ucs_status_t uct_ib_mlx5_exp_reg_ksm(uct_ib_mlx5_md_t *md, + uct_ib_mlx5_ksm_data_t *ksm_data, + size_t length, off_t off, + struct ibv_mr **mr_p) +{ +#if HAVE_EXP_UMR_KSM + struct ibv_exp_mem_region *mem_reg; + ucs_status_t status; + int i; + + mem_reg = ucs_calloc(ksm_data->mr_num, sizeof(mem_reg[0]), "mem_reg"); + if (!mem_reg) { + return UCS_ERR_NO_MEMORY; + } + + for (i = 0; i < ksm_data->mr_num; i++) { + mem_reg[i].base_addr = (uint64_t) (uintptr_t) ksm_data->mrs[i]->addr; + mem_reg[i].length = ksm_data->mrs[i]->length; + mem_reg[i].mr = ksm_data->mrs[i]; + } + + status = uct_ib_mlx5_exp_reg_indirect_mr(md, ksm_data->mrs[0]->addr + off, + length, mem_reg, ksm_data->mr_num, + IBV_EXP_MR_FIXED_BUFFER_SIZE, + IBV_EXP_UMR_MR_LIST_FIXED_SIZE, + mr_p); + + ucs_free(mem_reg); + return status; +#else + return UCS_ERR_UNSUPPORTED; +#endif +} + +static ucs_status_t uct_ib_mlx5_exp_reg_atomic_key(uct_ib_md_t *ibmd, + uct_ib_mem_t *ib_memh) +{ +#if HAVE_EXP_UMR + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + uct_ib_mlx5_md_t *md = ucs_derived_of(ibmd, uct_ib_mlx5_md_t); + off_t offset = uct_ib_md_atomic_offset(uct_ib_mlx5_md_get_atomic_mr_id(md)); + struct ibv_exp_mem_region *mem_reg = NULL; + struct ibv_mr *mr = memh->mr; + uint32_t create_flags, umr_type; + ucs_status_t status; + struct ibv_mr *umr; + int i, list_size; + size_t reg_length; + + if (memh->super.flags & UCT_IB_MEM_MULTITHREADED) { + status = uct_ib_mlx5_exp_reg_ksm(md, memh->ksm_data, memh->mr->length, + offset, &memh->ksm_data->atomic_mr); + if (status == UCS_OK) { + memh->super.atomic_rkey = memh->ksm_data->atomic_mr->rkey; + } + + return status; + } + + reg_length = UCT_IB_MD_MAX_MR_SIZE; +#if HAVE_EXP_UMR_KSM + if ((md->super.dev.dev_attr.comp_mask & IBV_EXP_DEVICE_ATTR_COMP_MASK_2) && + (md->super.dev.dev_attr.comp_mask_2 & IBV_EXP_DEVICE_ATTR_UMR_FIXED_SIZE_CAPS) && + (md->super.dev.dev_attr.exp_device_cap_flags & IBV_EXP_DEVICE_UMR_FIXED_SIZE)) + { + reg_length = md->super.dev.dev_attr.umr_fixed_size_caps.max_entity_size; + list_size = ucs_div_round_up(mr->length, reg_length); + } else if (mr->length < reg_length) { + list_size = 1; + } else { + status = UCS_ERR_UNSUPPORTED; + goto err; + } + + if (list_size > 1) { + create_flags = IBV_EXP_MR_FIXED_BUFFER_SIZE; + umr_type = IBV_EXP_UMR_MR_LIST_FIXED_SIZE; + } else { + create_flags = IBV_EXP_MR_INDIRECT_KLMS; + umr_type = IBV_EXP_UMR_MR_LIST; + } +#else + if (mr->length >= reg_length) { + status = UCS_ERR_UNSUPPORTED; + goto err; + } + + list_size = 1; + create_flags = IBV_EXP_MR_INDIRECT_KLMS; + umr_type = IBV_EXP_UMR_MR_LIST; +#endif + + mem_reg = ucs_calloc(list_size, sizeof(mem_reg[0]), "mem_reg"); + if (!mem_reg) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + for (i = 0; i < list_size; i++) { + mem_reg[i].base_addr = (uintptr_t) mr->addr + i * reg_length; + mem_reg[i].length = reg_length; + mem_reg[i].mr = mr; + } + + ucs_assert(list_size >= 1); + mem_reg[list_size - 1].length = mr->length % reg_length; + + status = uct_ib_mlx5_exp_reg_indirect_mr(md, mr->addr + offset, mr->length, + mem_reg, list_size, create_flags, + umr_type, &umr); + if (status != UCS_OK) { + goto err; + } + + memh->atomic_mr = umr; + memh->super.atomic_rkey = umr->rkey; + +err: + ucs_free(mem_reg); + return status; +#else + return UCS_ERR_UNSUPPORTED; +#endif +} + +static ucs_status_t uct_ib_mlx5_exp_dereg_atomic_key(uct_ib_md_t *ibmd, + uct_ib_mem_t *ib_memh) +{ +#if HAVE_EXP_UMR + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + int ret; + + ret = UCS_PROFILE_CALL(ibv_dereg_mr, memh->atomic_mr); + if (ret != 0) { + ucs_error("ibv_dereg_mr() failed: %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +#else + return UCS_ERR_UNSUPPORTED; +#endif +} + +static ucs_status_t uct_ib_mlx5_exp_reg_multithreaded(uct_ib_md_t *ibmd, + void *address, size_t length, + uint64_t access, + uct_ib_mem_t *ib_memh) +{ +#if HAVE_EXP_UMR_KSM + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + uct_ib_mlx5_md_t *md = ucs_derived_of(ibmd, uct_ib_mlx5_md_t); + size_t chunk = md->super.config.mt_reg_chunk; + uct_ib_mlx5_ksm_data_t *ksm_data; + size_t ksm_data_size; + ucs_status_t status; + struct ibv_mr *umr; + int mr_num; + + if (!(md->super.dev.dev_attr.comp_mask & IBV_EXP_DEVICE_ATTR_COMP_MASK_2) || + !(md->super.dev.dev_attr.comp_mask_2 & IBV_EXP_DEVICE_ATTR_UMR_FIXED_SIZE_CAPS) || + !(md->super.dev.dev_attr.exp_device_cap_flags & IBV_EXP_DEVICE_UMR_FIXED_SIZE)) { + return UCS_ERR_UNSUPPORTED; + } + + mr_num = ucs_div_round_up(length, chunk); + ksm_data_size = (mr_num * sizeof(*ksm_data->mrs)) + sizeof(*ksm_data); + ksm_data = ucs_calloc(1, ksm_data_size, "ksm_data"); + if (!ksm_data) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + ucs_trace("multithreaded register memory %p..%p chunks %d", + address, UCS_PTR_BYTE_OFFSET(address, length), mr_num); + + ksm_data->mr_num = mr_num; + status = uct_ib_md_handle_mr_list_multithreaded(ibmd, address, length, + access, chunk, ksm_data->mrs); + if (status != UCS_OK) { + goto err; + } + + status = uct_ib_mlx5_exp_reg_ksm(md, ksm_data, length, 0, &umr); + if (status != UCS_OK) { + goto err_dereg; + } + + memh->mr = umr; + memh->ksm_data = ksm_data; + memh->super.lkey = umr->lkey; + memh->super.rkey = umr->rkey; + return UCS_OK; + +err_dereg: + uct_ib_md_handle_mr_list_multithreaded(ibmd, address, length, UCT_IB_MEM_DEREG, + chunk, ksm_data->mrs); +err: + ucs_free(ksm_data); + return status; +#else + return UCS_ERR_UNSUPPORTED; +#endif +} + +static ucs_status_t uct_ib_mlx5_exp_dereg_multithreaded(uct_ib_md_t *ibmd, + uct_ib_mem_t *ib_memh) +{ +#if HAVE_EXP_UMR_KSM + uct_ib_mlx5_mem_t *memh = ucs_derived_of(ib_memh, uct_ib_mlx5_mem_t); + size_t chunk = ibmd->config.mt_reg_chunk; + ucs_status_t s, status = UCS_OK; + + if (memh->super.flags & UCT_IB_MEM_FLAG_ATOMIC_MR) { + s = uct_ib_dereg_mr(memh->ksm_data->atomic_mr); + if (s != UCS_OK) { + status = s; + } + } + + s = uct_ib_md_handle_mr_list_multithreaded(ibmd, memh->mr->addr, + memh->mr->length, + UCT_IB_MEM_DEREG, chunk, + memh->ksm_data->mrs); + if (s == UCS_ERR_UNSUPPORTED) { + s = uct_ib_dereg_mrs(memh->ksm_data->mrs, memh->ksm_data->mr_num); + if (s != UCS_OK) { + status = s; + } + } else if (s != UCS_OK) { + status = s; + } + + s = uct_ib_dereg_mr(memh->mr); + if (s != UCS_OK) { + status = s; + } + + ucs_free(memh->ksm_data); + + return status; +#else + return UCS_ERR_UNSUPPORTED; +#endif +} + +static uct_ib_md_ops_t uct_ib_mlx5_md_ops; + +static ucs_status_t uct_ib_mlx5_exp_md_open(struct ibv_device *ibv_device, + const uct_ib_md_config_t *md_config, + uct_ib_md_t **p_md) +{ + ucs_status_t status = UCS_OK; + struct ibv_context *ctx; + uct_ib_device_t *dev; + uct_ib_mlx5_md_t *md; + + ctx = ibv_open_device(ibv_device); + if (ctx == NULL) { + ucs_debug("ibv_open_device(%s) failed: %m", ibv_get_device_name(ibv_device)); + status = UCS_ERR_UNSUPPORTED; + goto err; + } + + md = ucs_calloc(1, sizeof(*md), "ib_mlx5_md"); + if (md == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_free_context; + } + + dev = &md->super.dev; + dev->ibv_context = ctx; + md->super.config = md_config->ext; + + status = uct_ib_device_query(dev, ibv_device); + if (status != UCS_OK) { + goto err_free; + } + + if (!(uct_ib_device_spec(dev)->flags & UCT_IB_DEVICE_FLAG_MLX5_PRM)) { + status = UCS_ERR_UNSUPPORTED; + goto err_free; + } + +#if HAVE_DECL_IBV_EXP_DEVICE_DC_TRANSPORT && HAVE_STRUCT_IBV_EXP_DEVICE_ATTR_EXP_DEVICE_CAP_FLAGS + if (dev->dev_attr.exp_device_cap_flags & IBV_EXP_DEVICE_DC_TRANSPORT) { + dev->flags |= UCT_IB_DEVICE_FLAG_DC; + } +#endif + +#if IBV_HW_TM + if (dev->dev_attr.tm_caps.capability_flags & IBV_EXP_TM_CAP_DC) { + md->flags |= UCT_IB_MLX5_MD_FLAG_DC_TM; + } +#endif + + if (UCT_IB_HAVE_ODP_IMPLICIT(&dev->dev_attr)) { + dev->flags |= UCT_IB_DEVICE_FLAG_ODP_IMPLICIT; + } + + if (IBV_EXP_HAVE_ATOMIC_HCA(&dev->dev_attr) || + IBV_EXP_HAVE_ATOMIC_GLOB(&dev->dev_attr) || + IBV_EXP_HAVE_ATOMIC_HCA_REPLY_BE(&dev->dev_attr)) + { +#ifdef HAVE_IB_EXT_ATOMICS + if (dev->dev_attr.comp_mask & IBV_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS) { + dev->ext_atomic_arg_sizes = dev->dev_attr.ext_atom.log_atomic_arg_sizes; + } +# if HAVE_MASKED_ATOMICS_ENDIANNESS + if (dev->dev_attr.comp_mask & IBV_EXP_DEVICE_ATTR_MASKED_ATOMICS) { + dev->ext_atomic_arg_sizes |= + dev->dev_attr.masked_atomic.masked_log_atomic_arg_sizes; + dev->ext_atomic_arg_sizes_be = + dev->dev_attr.masked_atomic.masked_log_atomic_arg_sizes_network_endianness; + } +# endif + dev->ext_atomic_arg_sizes &= UCS_MASK(dev->dev_attr.ext_atom.log_max_atomic_inline + 1); +#endif + dev->atomic_arg_sizes = sizeof(uint64_t); + if (IBV_EXP_HAVE_ATOMIC_HCA_REPLY_BE(&dev->dev_attr)) { + dev->atomic_arg_sizes_be = sizeof(uint64_t); + } + } + +#if HAVE_DECL_IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS + dev->pci_fadd_arg_sizes = dev->dev_attr.pci_atomic_caps.fetch_add << 2; + dev->pci_cswap_arg_sizes = dev->dev_attr.pci_atomic_caps.compare_swap << 2; +#endif + + md->super.ops = &uct_ib_mlx5_md_ops; + status = uct_ib_md_open_common(&md->super, ibv_device, md_config); + if (status != UCS_OK) { + goto err_free; + } + + status = uct_ib_mlx5_exp_md_umr_qp_create(md); + if (status != UCS_OK && status != UCS_ERR_UNSUPPORTED) { + goto err_free; + } + + dev->flags |= UCT_IB_DEVICE_FLAG_MLX5_PRM; + *p_md = &md->super; + return UCS_OK; + +err_free: + ucs_free(md); +err_free_context: + ibv_close_device(ctx); +err: + return status; +} + +void uct_ib_mlx5_exp_md_cleanup(uct_ib_md_t *ibmd) +{ +#if HAVE_EXP_UMR + uct_ib_mlx5_md_t *md = ucs_derived_of(ibmd, uct_ib_mlx5_md_t); + + if (md->umr_qp != NULL) { + uct_ib_destroy_qp(md->umr_qp); + } + if (md->umr_cq != NULL) { + ibv_destroy_cq(md->umr_cq); + } +#endif +} + +static uct_ib_md_ops_t uct_ib_mlx5_md_ops = { + .open = uct_ib_mlx5_exp_md_open, + .cleanup = uct_ib_mlx5_exp_md_cleanup, + .memh_struct_size = sizeof(uct_ib_mlx5_mem_t), + .reg_key = uct_ib_mlx5_reg_key, + .dereg_key = uct_ib_mlx5_dereg_key, + .reg_atomic_key = uct_ib_mlx5_exp_reg_atomic_key, + .dereg_atomic_key = uct_ib_mlx5_exp_dereg_atomic_key, + .reg_multithreaded = uct_ib_mlx5_exp_reg_multithreaded, + .dereg_multithreaded = uct_ib_mlx5_exp_dereg_multithreaded, + .mem_prefetch = uct_ib_mlx5_mem_prefetch, +}; + +UCT_IB_MD_OPS(uct_ib_mlx5_md_ops, 1); + diff --git a/src/uct/ib/mlx5/exp/ib_mlx5_hw.c b/src/uct/ib/mlx5/exp/ib_mlx5_hw.c new file mode 100644 index 0000000..48430a3 --- /dev/null +++ b/src/uct/ib/mlx5/exp/ib_mlx5_hw.c @@ -0,0 +1,245 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#if HAVE_INFINIBAND_MLX5_HW_H + +#include "ib_mlx5_hw.h" + +#include +#include +#include +#include +#include +#include + +/* Since this file intended to emulate DV using legacy mlx5_hw.h definitions + * we include DV declarations. */ +#define UCT_IB_MLX5_H_ +#include + +static void UCS_F_MAYBE_UNUSED uct_ib_mlx5_obj_error(const char *obj_name) +{ + ucs_error("Failed to get mlx5 %s information. Please make sure the installed " + "libmlx5 version matches the one UCX was compiled with (%s)", + obj_name, UCT_IB_LIBMLX5_VER); +} + +#if !HAVE_DECL_MLX5DV_INIT_OBJ +ucs_status_t uct_ib_mlx5_get_qp_info(struct ibv_qp *qp, uct_ib_mlx5dv_qp_t *qp_info) +{ +#if HAVE_DECL_IBV_MLX5_EXP_GET_QP_INFO + struct ibv_mlx5_qp_info ibv_qp_info; + int ret; + + ret = ibv_mlx5_exp_get_qp_info(qp, &ibv_qp_info); + if (ret != 0) { + uct_ib_mlx5_obj_error("qp"); + return UCS_ERR_NO_DEVICE; + } + + qp_info->dv.dbrec = ibv_qp_info.dbrec; + qp_info->dv.sq.buf = ibv_qp_info.sq.buf; + qp_info->dv.sq.wqe_cnt = ibv_qp_info.sq.wqe_cnt; + qp_info->dv.sq.stride = ibv_qp_info.sq.stride; + qp_info->dv.rq.buf = ibv_qp_info.rq.buf; + qp_info->dv.rq.wqe_cnt = ibv_qp_info.rq.wqe_cnt; + qp_info->dv.rq.stride = ibv_qp_info.rq.stride; + qp_info->dv.bf.reg = ibv_qp_info.bf.reg; + qp_info->dv.bf.size = ibv_qp_info.bf.size; +#else + struct mlx5_qp *mqp = ucs_container_of(qp, struct mlx5_qp, verbs_qp.qp); + + if ((mqp->sq.cur_post != 0) || (mqp->rq.head != 0)) { + ucs_warn("cur_post=%d head=%d need_lock=%d", mqp->sq.cur_post, + mqp->rq.head, mqp->bf->need_lock); + return UCS_ERR_NO_DEVICE; + } + + qp_info->dv.qpn = qp->qp_num; + qp_info->dv.dbrec = mqp->db; + qp_info->dv.sq.buf = mqp->buf.buf + mqp->sq.offset; + qp_info->dv.sq.wqe_cnt = mqp->sq.wqe_cnt; + qp_info->dv.sq.stride = 1 << mqp->sq.wqe_shift; + qp_info->dv.rq.buf = mqp->buf.buf + mqp->rq.offset; + qp_info->dv.rq.wqe_cnt = mqp->rq.wqe_cnt; + qp_info->dv.rq.stride = 1 << mqp->rq.wqe_shift; + qp_info->dv.bf.reg = mqp->bf->reg; + + if (mqp->bf->uuarn > 0) { + qp_info->dv.bf.size = mqp->bf->buf_size; + } else { + qp_info->dv.bf.size = 0; /* No BF */ + } +#endif + return UCS_OK; +} + +ucs_status_t uct_ib_mlx5_get_srq_info(struct ibv_srq *srq, + uct_ib_mlx5dv_srq_t *srq_info) +{ +#if HAVE_DECL_IBV_MLX5_EXP_GET_SRQ_INFO + struct ibv_mlx5_srq_info ibv_srq_info; + int ret; + + ret = ibv_mlx5_exp_get_srq_info(srq, &ibv_srq_info); + if (ret != 0) { + uct_ib_mlx5_obj_error("srq"); + return UCS_ERR_NO_DEVICE; + } + + srq_info->dv.buf = ibv_srq_info.buf; + srq_info->dv.dbrec = ibv_srq_info.dbrec; + srq_info->dv.stride = ibv_srq_info.stride; + srq_info->dv.head = ibv_srq_info.head; + srq_info->dv.tail = ibv_srq_info.tail; +#else + struct mlx5_srq *msrq; + + if (srq->handle == LEGACY_XRC_SRQ_HANDLE) { + srq = (struct ibv_srq *)(((struct ibv_srq_legacy *)srq)->ibv_srq); + } + + msrq = ucs_container_of(srq, struct mlx5_srq, vsrq.srq); + + if (msrq->counter != 0) { + ucs_error("SRQ counter is not 0 (%d)", msrq->counter); + return UCS_ERR_NO_DEVICE; + } + + srq_info->dv.buf = msrq->buf.buf; + srq_info->dv.dbrec = msrq->db; + srq_info->dv.stride = 1 << msrq->wqe_shift; + srq_info->dv.head = msrq->head; + srq_info->dv.tail = msrq->tail; +#endif + return UCS_OK; +} + +static ucs_status_t uct_ib_mlx5_get_cq(struct ibv_cq *cq, uct_ib_mlx5dv_cq_t *mlx5_cq) +{ +#if HAVE_DECL_IBV_MLX5_EXP_GET_CQ_INFO + struct ibv_mlx5_cq_info ibv_cq_info; + int ret; + + ret = ibv_mlx5_exp_get_cq_info(cq, &ibv_cq_info); + if (ret != 0) { + uct_ib_mlx5_obj_error("cq"); + return UCS_ERR_NO_DEVICE; + } + + mlx5_cq->dv.buf = ibv_cq_info.buf; + mlx5_cq->dv.cqe_cnt = ibv_cq_info.cqe_cnt; + mlx5_cq->dv.cqn = ibv_cq_info.cqn; + mlx5_cq->dv.cqe_size = ibv_cq_info.cqe_size; +#else + struct mlx5_cq *mcq = ucs_container_of(cq, struct mlx5_cq, ibv_cq); + int ret; + + if (mcq->cons_index != 0) { + ucs_error("CQ consumer index is not 0 (%d)", mcq->cons_index); + return UCS_ERR_NO_DEVICE; + } + + mlx5_cq->dv.buf = mcq->active_buf->buf; + mlx5_cq->dv.cqe_cnt = mcq->ibv_cq.cqe + 1; + mlx5_cq->dv.cqn = mcq->cqn; + mlx5_cq->dv.cqe_size = mcq->cqe_sz; +#endif + return UCS_OK; +} + +ucs_status_t uct_ib_mlx5dv_init_obj(uct_ib_mlx5dv_t *obj, uint64_t obj_type) +{ + ucs_status_t ret = UCS_OK; + + if (obj_type & MLX5DV_OBJ_QP) { + ret = uct_ib_mlx5_get_qp_info(obj->dv.qp.in, + ucs_container_of(obj->dv.qp.out, uct_ib_mlx5dv_qp_t, dv)); + } + + if (!ret && (obj_type & MLX5DV_OBJ_CQ)) { + ret = uct_ib_mlx5_get_cq(obj->dv.cq.in, + ucs_container_of(obj->dv.cq.out, uct_ib_mlx5dv_cq_t, dv)); + } + + if (!ret && (obj_type & MLX5DV_OBJ_SRQ)) { + ret = uct_ib_mlx5_get_srq_info(obj->dv.srq.in, + ucs_container_of(obj->dv.srq.out, uct_ib_mlx5dv_srq_t, dv)); + } + +#if HAVE_IBV_EXP_DM + if (!ret && (obj_type & MLX5DV_OBJ_DM)) { + ret = uct_ib_mlx5_get_dm_info(obj->dv_dm.in, obj->dv_dm.out); + } +#endif + + return ret; +} +#endif + +void uct_ib_mlx5_update_cq_ci(struct ibv_cq *cq, unsigned cq_ci) +{ +#if HAVE_DECL_IBV_MLX5_EXP_UPDATE_CQ_CI + ibv_mlx5_exp_update_cq_ci(cq, cq_ci); +#else + struct mlx5_cq *mcq = ucs_container_of(cq, struct mlx5_cq, ibv_cq); + mcq->cons_index = cq_ci; +#endif +} + +unsigned uct_ib_mlx5_get_cq_ci(struct ibv_cq *cq) +{ + struct mlx5_cq *mcq = ucs_container_of(cq, struct mlx5_cq, ibv_cq); + return mcq->cons_index; +} + +#if !HAVE_DECL_MLX5DV_OBJ_AH +void uct_ib_mlx5_get_av(struct ibv_ah *ah, struct mlx5_wqe_av *av) +{ + memcpy(av, &ucs_container_of(ah, struct mlx5_ah, ibv_ah)->av, sizeof(*av)); +} +#endif + +struct ibv_qp *uct_dv_get_cmd_qp(struct ibv_srq *srq) +{ +#if HAVE_STRUCT_MLX5_SRQ_CMD_QP + struct mlx5_srq *msrq; + + if (srq->handle == LEGACY_XRC_SRQ_HANDLE) { + srq = (struct ibv_srq *)(((struct ibv_srq_legacy *)srq)->ibv_srq); + } + + msrq = ucs_container_of(srq, struct mlx5_srq, vsrq.srq); + if (msrq->counter != 0) { + ucs_error("SRQ counter is not 0 (%d)", msrq->counter); + return NULL; + } + + return &msrq->cmd_qp->verbs_qp.qp; +#else + return NULL; +#endif +} + +struct mlx5_uar_data { + enum { __DUMMY } map_type; + void *regs; +}; + +void *uct_dv_get_info_uar0(void *uar) +{ +#if HAVE_DECL_MLX5DV_INIT_OBJ + struct mlx5_uar_data *muar = uar; + return muar[0].regs; +#else + return NULL; +#endif +} + +#endif diff --git a/src/uct/ib/mlx5/exp/ib_mlx5_hw.h b/src/uct/ib/mlx5/exp/ib_mlx5_hw.h new file mode 100644 index 0000000..320e46a --- /dev/null +++ b/src/uct/ib/mlx5/exp/ib_mlx5_hw.h @@ -0,0 +1,79 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_MLX5_HW_H_ +#define UCT_IB_MLX5_HW_H_ + +#include + +struct mlx5dv_qp { + volatile uint32_t *dbrec; + struct { + void *buf; + uint32_t wqe_cnt; + uint32_t stride; + } sq; + struct { + void *buf; + uint32_t wqe_cnt; + uint32_t stride; + } rq; + struct { + void *reg; + uint32_t size; + } bf; + uint64_t comp_mask; +}; + +struct mlx5dv_cq { + void *buf; + volatile uint32_t *dbrec; + uint32_t cqe_cnt; + uint32_t cqe_size; + void *cq_uar; +/* DV backport will behave as DV with fixed CQ UAR */ +#undef HAVE_STRUCT_MLX5DV_CQ_CQ_UAR +#define HAVE_STRUCT_MLX5DV_CQ_CQ_UAR 1 + uint32_t cqn; + uint64_t comp_mask; +}; + +struct mlx5dv_srq { + void *buf; + volatile uint32_t *dbrec; + uint32_t stride; + uint32_t head; + uint32_t tail; + uint64_t comp_mask; +}; + +struct mlx5dv_obj { + struct { + struct ibv_qp *in; + struct mlx5dv_qp *out; + } qp; + struct { + struct ibv_cq *in; + struct mlx5dv_cq *out; + } cq; + struct { + struct ibv_srq *in; + struct mlx5dv_srq *out; + } srq; + struct { + struct ibv_exp_wq *in; + struct mlx5dv_rwq *out; + } rwq; +}; + +enum mlx5dv_obj_type { + MLX5DV_OBJ_QP = 1 << 0, + MLX5DV_OBJ_CQ = 1 << 1, + MLX5DV_OBJ_SRQ = 1 << 2, + MLX5DV_OBJ_RWQ = 1 << 3, +}; + +#endif diff --git a/src/uct/ib/mlx5/ib_mlx5.c b/src/uct/ib/mlx5/ib_mlx5.c new file mode 100644 index 0000000..4243860 --- /dev/null +++ b/src/uct/ib/mlx5/ib_mlx5.c @@ -0,0 +1,660 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ib_mlx5.h" +#include "ib_mlx5.inl" +#include "ib_mlx5_log.h" +#include +#include +#include +#include +#include +#include +#include +#include + + +static const char *uct_ib_mlx5_mmio_modes[] = { + [UCT_IB_MLX5_MMIO_MODE_BF_POST] = "bf_post", + [UCT_IB_MLX5_MMIO_MODE_BF_POST_MT] = "bf_post_mt", + [UCT_IB_MLX5_MMIO_MODE_DB] = "db", + [UCT_IB_MLX5_MMIO_MODE_AUTO] = "auto", + [UCT_IB_MLX5_MMIO_MODE_LAST] = NULL +}; + +ucs_config_field_t uct_ib_mlx5_iface_config_table[] = { +#if HAVE_IBV_DM + {"DM_SIZE", "2k", + "Device Memory segment size (0 - disabled)", + ucs_offsetof(uct_ib_mlx5_iface_config_t, dm.seg_len), UCS_CONFIG_TYPE_MEMUNITS}, + {"DM_COUNT", "1", + "Device Memory segments count (0 - disabled)", + ucs_offsetof(uct_ib_mlx5_iface_config_t, dm.count), UCS_CONFIG_TYPE_UINT}, +#endif + + {"MMIO_MODE", "auto", + "How to write to MMIO register when posting sends on a QP. One of the following:\n" + " bf_post - BlueFlame post, write the WQE fully to MMIO register.\n" + " bf_post_mt - Thread-safe BlueFlame, same as bf_post but same MMIO register can be used\n" + " by multiple threads.\n" + " db - Doorbell mode, write only 8 bytes to MMIO register, followed by a memory\n" + " store fence, which makes sure the doorbell goes out on the bus.\n" + " auto - Select best according to worker thread mode.", + ucs_offsetof(uct_ib_mlx5_iface_config_t, mmio_mode), + UCS_CONFIG_TYPE_ENUM(uct_ib_mlx5_mmio_modes)}, + + {NULL} +}; + +ucs_status_t uct_ib_mlx5_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector, int ignore_overrun, + size_t *inl, struct ibv_cq **cq_p) +{ +#if HAVE_DECL_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE + struct ibv_cq *cq; + struct ibv_cq_init_attr_ex cq_attr = {}; + struct mlx5dv_cq_init_attr dv_attr = {}; + + cq_attr.cqe = cqe; + cq_attr.channel = channel; + cq_attr.comp_vector = comp_vector; + if (ignore_overrun) { + cq_attr.comp_mask = IBV_CQ_INIT_ATTR_MASK_FLAGS; + cq_attr.flags = IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN; + } + dv_attr.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE; + dv_attr.cqe_size = uct_ib_get_cqe_size(*inl > 32 ? 128 : 64); + cq = ibv_cq_ex_to_cq(mlx5dv_create_cq(context, &cq_attr, &dv_attr)); + if (!cq) { + ucs_error("mlx5dv_create_cq(cqe=%d) failed: %m", cqe); + return UCS_ERR_IO_ERROR; + } + + *cq_p = cq; + *inl = dv_attr.cqe_size / 2; + return UCS_OK; +#else + return uct_ib_verbs_create_cq(context, cqe, channel, comp_vector, + ignore_overrun, inl, cq_p); +#endif +} + +ucs_status_t uct_ib_mlx5_get_cq(struct ibv_cq *cq, uct_ib_mlx5_cq_t *mlx5_cq) +{ + uct_ib_mlx5dv_cq_t dcq = {}; + uct_ib_mlx5dv_t obj = {}; + struct mlx5_cqe64 *cqe; + unsigned cqe_size; + ucs_status_t status; + int ret, i; + + obj.dv.cq.in = cq; + obj.dv.cq.out = &dcq.dv; + status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ); + if (status != UCS_OK) { + return UCS_ERR_IO_ERROR; + } + + mlx5_cq->cq_buf = dcq.dv.buf; + mlx5_cq->cq_ci = 0; + mlx5_cq->cq_sn = 0; + mlx5_cq->cq_length = dcq.dv.cqe_cnt; + mlx5_cq->cq_num = dcq.dv.cqn; +#if HAVE_STRUCT_MLX5DV_CQ_CQ_UAR + mlx5_cq->uar = dcq.dv.cq_uar; +#else + /* coverity[var_deref_model] */ + mlx5_cq->uar = uct_dv_get_info_uar0(dcq.dv.uar); +#endif + mlx5_cq->dbrec = dcq.dv.dbrec; + cqe_size = dcq.dv.cqe_size; + + /* Move buffer forward for 128b CQE, so we would get pointer to the 2nd + * 64b when polling. + */ + mlx5_cq->cq_buf = UCS_PTR_BYTE_OFFSET(mlx5_cq->cq_buf, + cqe_size - sizeof(struct mlx5_cqe64)); + + ret = ibv_exp_cq_ignore_overrun(cq); + if (ret != 0) { + ucs_error("Failed to modify send CQ to ignore overrun: %s", strerror(ret)); + return UCS_ERR_UNSUPPORTED; + } + + mlx5_cq->cqe_size_log = ucs_ilog2(cqe_size); + ucs_assert_always((1ul << mlx5_cq->cqe_size_log) == cqe_size); + + /* Set owner bit for all CQEs, so that CQE would look like it is in HW + * ownership. In this case CQ polling functions will return immediately if + * no any CQE ready, there is no need to check opcode for + * MLX5_CQE_INVALID value anymore. */ + for (i = 0; i < mlx5_cq->cq_length; ++i) { + cqe = uct_ib_mlx5_get_cqe(mlx5_cq, i); + cqe->op_own |= MLX5_CQE_OWNER_MASK; + } + + + return UCS_OK; +} + +static int +uct_ib_mlx5_res_domain_cmp(uct_ib_mlx5_res_domain_t *res_domain, + uct_ib_md_t *md, uct_priv_worker_t *worker) +{ +#if HAVE_IBV_EXP_RES_DOMAIN + return res_domain->ibv_domain->context == md->dev.ibv_context; +#elif HAVE_DECL_IBV_ALLOC_TD + return res_domain->pd->context == md->dev.ibv_context; +#else + return 1; +#endif +} + +static ucs_status_t +uct_ib_mlx5_res_domain_init(uct_ib_mlx5_res_domain_t *res_domain, + uct_ib_md_t *md, uct_priv_worker_t *worker) +{ +#if HAVE_IBV_EXP_RES_DOMAIN + struct ibv_exp_res_domain_init_attr attr; + + attr.comp_mask = IBV_EXP_RES_DOMAIN_THREAD_MODEL | + IBV_EXP_RES_DOMAIN_MSG_MODEL; + attr.msg_model = IBV_EXP_MSG_LOW_LATENCY; + + switch (worker->thread_mode) { + case UCS_THREAD_MODE_SINGLE: + attr.thread_model = IBV_EXP_THREAD_SINGLE; + break; + case UCS_THREAD_MODE_SERIALIZED: + attr.thread_model = IBV_EXP_THREAD_UNSAFE; + break; + default: + attr.thread_model = IBV_EXP_THREAD_SAFE; + break; + } + + res_domain->ibv_domain = ibv_exp_create_res_domain(md->dev.ibv_context, &attr); + if (res_domain->ibv_domain == NULL) { + ucs_error("ibv_exp_create_res_domain() on %s failed: %m", + uct_ib_device_name(&md->dev)); + return UCS_ERR_IO_ERROR; + } +#elif HAVE_DECL_IBV_ALLOC_TD + struct ibv_parent_domain_init_attr attr; + struct ibv_td_init_attr td_attr; + + if (worker->thread_mode == UCS_THREAD_MODE_MULTI) { + td_attr.comp_mask = 0; + res_domain->td = ibv_alloc_td(md->dev.ibv_context, &td_attr); + if (res_domain->td == NULL) { + ucs_error("ibv_alloc_td() on %s failed: %m", + uct_ib_device_name(&md->dev)); + return UCS_ERR_IO_ERROR; + } + } else { + res_domain->td = NULL; + res_domain->pd = md->pd; + return UCS_OK; + } + + attr.td = res_domain->td; + attr.pd = md->pd; + attr.comp_mask = 0; + res_domain->pd = ibv_alloc_parent_domain(md->dev.ibv_context, &attr); + if (res_domain->pd == NULL) { + ucs_error("ibv_alloc_parent_domain() on %s failed: %m", + uct_ib_device_name(&md->dev)); + ibv_dealloc_td(res_domain->td); + return UCS_ERR_IO_ERROR; + } +#endif + return UCS_OK; +} + +static void uct_ib_mlx5_res_domain_cleanup(uct_ib_mlx5_res_domain_t *res_domain) +{ +#if HAVE_IBV_EXP_RES_DOMAIN + struct ibv_exp_destroy_res_domain_attr attr; + int ret; + + attr.comp_mask = 0; + ret = ibv_exp_destroy_res_domain(res_domain->ibv_domain->context, + res_domain->ibv_domain, &attr); + if (ret != 0) { + ucs_warn("ibv_exp_destroy_res_domain() failed: %m"); + } +#elif HAVE_DECL_IBV_ALLOC_TD + int ret; + + if (res_domain->td != NULL) { + ret = ibv_dealloc_pd(res_domain->pd); + if (ret != 0) { + ucs_warn("ibv_dealloc_pd() failed: %m"); + return; + } + + ret = ibv_dealloc_td(res_domain->td); + if (ret != 0) { + ucs_warn("ibv_dealloc_td() failed: %m"); + } + } +#endif +} + +ucs_status_t uct_ib_mlx5_iface_get_res_domain(uct_ib_iface_t *iface, + uct_ib_mlx5_qp_t *qp) +{ + qp->verbs.rd = uct_worker_tl_data_get(iface->super.worker, + UCT_IB_MLX5_RES_DOMAIN_KEY, + uct_ib_mlx5_res_domain_t, + uct_ib_mlx5_res_domain_cmp, + uct_ib_mlx5_res_domain_init, + uct_ib_iface_md(iface), + iface->super.worker); + if (UCS_PTR_IS_ERR(qp->verbs.rd)) { + return UCS_PTR_STATUS(qp->verbs.rd); + } + + qp->type = UCT_IB_MLX5_OBJ_TYPE_VERBS; + + return UCS_OK; +} + +void uct_ib_mlx5_iface_put_res_domain(uct_ib_mlx5_qp_t *qp) +{ + if (qp->type == UCT_IB_MLX5_OBJ_TYPE_VERBS) { + uct_worker_tl_data_put(qp->verbs.rd, uct_ib_mlx5_res_domain_cleanup); + } +} + +ucs_status_t uct_ib_mlx5_iface_create_qp(uct_ib_iface_t *iface, + uct_ib_mlx5_qp_t *qp, + uct_ib_qp_attr_t *attr) +{ + ucs_status_t status; + + status = uct_ib_mlx5_iface_fill_attr(iface, qp, attr); + if (status != UCS_OK) { + return status; + } + + uct_ib_exp_qp_fill_attr(iface, attr); + status = uct_ib_iface_create_qp(iface, attr, &qp->verbs.qp); + if (status != UCS_OK) { + return status; + } + + qp->qp_num = qp->verbs.qp->qp_num; + return UCS_OK; +} + +#if !HAVE_DEVX +ucs_status_t uct_ib_mlx5_get_compact_av(uct_ib_iface_t *iface, int *compact_av) +{ + struct mlx5_wqe_av mlx5_av; + struct ibv_ah *ah; + uct_ib_address_t *ib_addr; + ucs_status_t status; + struct ibv_ah_attr ah_attr; + + /* coverity[result_independent_of_operands] */ + ib_addr = ucs_alloca((size_t)iface->addr_size); + + status = uct_ib_iface_get_device_address(&iface->super.super, + (uct_device_addr_t*)ib_addr); + if (status != UCS_OK) { + return status; + } + + uct_ib_iface_fill_ah_attr_from_addr(iface, ib_addr, &ah_attr); + ah_attr.is_global = iface->config.force_global_addr; + status = uct_ib_iface_create_ah(iface, &ah_attr, &ah); + if (status != UCS_OK) { + return status; + } + + uct_ib_mlx5_get_av(ah, &mlx5_av); + + /* copy MLX5_EXTENDED_UD_AV from the driver, if the flag is not present then + * the device supports compact address vector. */ + *compact_av = !(mlx5_av_base(&mlx5_av)->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV); + return UCS_OK; +} +#endif + +void uct_ib_mlx5_check_completion(uct_ib_iface_t *iface, uct_ib_mlx5_cq_t *cq, + struct mlx5_cqe64 *cqe) +{ + ucs_status_t status; + + switch (cqe->op_own >> 4) { + case MLX5_CQE_REQ_ERR: + /* update ci before invoking error callback, since it can poll on cq */ + UCS_STATIC_ASSERT(MLX5_CQE_REQ_ERR & (UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK >> 4)); + ++cq->cq_ci; + status = uct_ib_mlx5_completion_with_err(iface, (void*)cqe, NULL, + UCS_LOG_LEVEL_DEBUG); + iface->ops->handle_failure(iface, cqe, status); + return; + case MLX5_CQE_RESP_ERR: + /* Local side failure - treat as fatal */ + UCS_STATIC_ASSERT(MLX5_CQE_RESP_ERR & (UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK >> 4)); + ++cq->cq_ci; + uct_ib_mlx5_completion_with_err(iface, (void*)cqe, NULL, + UCS_LOG_LEVEL_FATAL); + return; + default: + /* CQE might have been updated by HW. Skip it now, and it would be handled + * in next polling. */ + return; + } +} + +static int uct_ib_mlx5_mmio_cmp(uct_ib_mlx5_mmio_reg_t *reg, uintptr_t addr, + unsigned bf_size) +{ + return (reg->addr.uint & ~UCT_IB_MLX5_BF_REG_SIZE) == + (addr & ~UCT_IB_MLX5_BF_REG_SIZE); +} + +static ucs_status_t uct_ib_mlx5_mmio_init(uct_ib_mlx5_mmio_reg_t *reg, + uintptr_t addr, + uct_ib_mlx5_mmio_mode_t mmio_mode) +{ + reg->addr.uint = addr; + reg->mode = mmio_mode; + return UCS_OK; +} + +static void uct_ib_mlx5_mmio_cleanup(uct_ib_mlx5_mmio_reg_t *reg) +{ +} + +int uct_ib_mlx5_devx_uar_cmp(uct_ib_mlx5_devx_uar_t *uar, + uct_ib_mlx5_md_t *md, + uct_ib_mlx5_mmio_mode_t mmio_mode) +{ + return uar->ctx == md->super.dev.ibv_context; +} + +ucs_status_t uct_ib_mlx5_devx_uar_init(uct_ib_mlx5_devx_uar_t *uar, + uct_ib_mlx5_md_t *md, + uct_ib_mlx5_mmio_mode_t mmio_mode) +{ +#if HAVE_DEVX + uar->uar = mlx5dv_devx_alloc_uar(md->super.dev.ibv_context, 0); + if (uar->uar == NULL) { + ucs_error("mlx5dv_devx_alloc_uar() failed: %m"); + return UCS_ERR_NO_MEMORY; + } + + uar->super.addr.ptr = uar->uar->reg_addr; + uar->super.mode = mmio_mode; + uar->ctx = md->super.dev.ibv_context; + + return UCS_OK; +#else + return UCS_ERR_UNSUPPORTED; +#endif +} + +void uct_ib_mlx5_devx_uar_cleanup(uct_ib_mlx5_devx_uar_t *uar) +{ +#if HAVE_DEVX + mlx5dv_devx_free_uar(uar->uar); +#endif +} + +ucs_status_t uct_ib_mlx5_txwq_init_devx(uct_priv_worker_t *worker, + uct_ib_mlx5_md_t *md, + uct_ib_mlx5_txwq_t *txwq, + uct_ib_mlx5_mmio_mode_t mode) +{ + uct_ib_mlx5_devx_uar_t *uar; + + uar = uct_worker_tl_data_get(worker, + UCT_IB_MLX5_DEVX_UAR_KEY, + uct_ib_mlx5_devx_uar_t, + uct_ib_mlx5_devx_uar_cmp, + uct_ib_mlx5_devx_uar_init, + md, mode); + if (UCS_PTR_IS_ERR(uar)) { + return UCS_PTR_STATUS(uar); + } + + txwq->reg = &uar->super; + txwq->super.type = UCT_IB_MLX5_OBJ_TYPE_DEVX; + + return UCS_OK; +} + +void uct_ib_mlx5_txwq_reset(uct_ib_mlx5_txwq_t *txwq) +{ + txwq->curr = txwq->qstart; + txwq->sw_pi = 0; + txwq->prev_sw_pi = UINT16_MAX; +#if UCS_ENABLE_ASSERT + txwq->hw_ci = 0xFFFF; +#endif + uct_ib_fence_info_init(&txwq->fi); + memset(txwq->qstart, 0, UCS_PTR_BYTE_DIFF(txwq->qstart, txwq->qend)); +} + +ucs_status_t uct_ib_mlx5_txwq_init(uct_priv_worker_t *worker, + uct_ib_mlx5_mmio_mode_t cfg_mmio_mode, + uct_ib_mlx5_txwq_t *txwq, + struct ibv_qp *verbs_qp) +{ + uct_ib_mlx5_mmio_mode_t mmio_mode; + uct_ib_mlx5dv_qp_t qp_info = {}; + uct_ib_mlx5dv_t obj = {}; + ucs_status_t status; + + obj.dv.qp.in = verbs_qp; + obj.dv.qp.out = &qp_info.dv; + + status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_QP); + if (status != UCS_OK) { + return UCS_ERR_IO_ERROR; + } + + if ((qp_info.dv.sq.stride != MLX5_SEND_WQE_BB) || !ucs_is_pow2(qp_info.dv.sq.wqe_cnt) || + ((qp_info.dv.bf.size != 0) && (qp_info.dv.bf.size != UCT_IB_MLX5_BF_REG_SIZE))) + { + ucs_error("mlx5 device parameters not suitable for transport " + "bf.size(%d) %d, sq.stride(%d) %d, wqe_cnt %d", + UCT_IB_MLX5_BF_REG_SIZE, qp_info.dv.bf.size, + MLX5_SEND_WQE_BB, qp_info.dv.sq.stride, qp_info.dv.sq.wqe_cnt); + return UCS_ERR_IO_ERROR; + } + + if (cfg_mmio_mode != UCT_IB_MLX5_MMIO_MODE_AUTO) { + mmio_mode = cfg_mmio_mode; + } else if (qp_info.dv.bf.size > 0) { + if (worker->thread_mode == UCS_THREAD_MODE_SINGLE) { + mmio_mode = UCT_IB_MLX5_MMIO_MODE_BF_POST; + } else if (worker->thread_mode == UCS_THREAD_MODE_SERIALIZED) { + mmio_mode = UCT_IB_MLX5_MMIO_MODE_BF_POST_MT; + } else { + ucs_error("unsupported thread mode for mlx5: %d", worker->thread_mode); + return UCS_ERR_UNSUPPORTED; + } + } else { + mmio_mode = UCT_IB_MLX5_MMIO_MODE_DB; + } + + ucs_debug("tx wq %d bytes [bb=%d, nwqe=%d] mmio_mode %s", + qp_info.dv.sq.stride * qp_info.dv.sq.wqe_cnt, + qp_info.dv.sq.stride, qp_info.dv.sq.wqe_cnt, + uct_ib_mlx5_mmio_modes[mmio_mode]); + + txwq->qstart = qp_info.dv.sq.buf; + txwq->qend = UCS_PTR_BYTE_OFFSET(qp_info.dv.sq.buf, + qp_info.dv.sq.stride * qp_info.dv.sq.wqe_cnt); + txwq->reg = uct_worker_tl_data_get(worker, + UCT_IB_MLX5_WORKER_BF_KEY, + uct_ib_mlx5_mmio_reg_t, + uct_ib_mlx5_mmio_cmp, + uct_ib_mlx5_mmio_init, + (uintptr_t)qp_info.dv.bf.reg, + mmio_mode); + if (UCS_PTR_IS_ERR(txwq->reg)) { + return UCS_PTR_STATUS(txwq->reg); + } + + /* cppcheck-suppress autoVariables */ + txwq->dbrec = &qp_info.dv.dbrec[MLX5_SND_DBR]; + /* need to reserve 2x because: + * - on completion we only get the index of last wqe and we do not + * really know how many bb is there (but no more than max bb + * - on send we check that there is at least one bb. We know + * exact number of bbs once we actually are sending. + */ + txwq->bb_max = qp_info.dv.sq.wqe_cnt - 2 * UCT_IB_MLX5_MAX_BB; + ucs_assert_always(txwq->bb_max > 0); + + uct_ib_mlx5_txwq_reset(txwq); + return UCS_OK; +} + +void uct_ib_mlx5_txwq_cleanup(uct_ib_mlx5_txwq_t* txwq) +{ + uct_ib_mlx5_devx_uar_t *uar = ucs_derived_of(txwq->reg, + uct_ib_mlx5_devx_uar_t); + switch (txwq->super.type) { + case UCT_IB_MLX5_OBJ_TYPE_DEVX: + uct_worker_tl_data_put(uar, uct_ib_mlx5_devx_uar_cleanup); + break; + case UCT_IB_MLX5_OBJ_TYPE_VERBS: + uct_ib_mlx5_iface_put_res_domain(&txwq->super); + uct_worker_tl_data_put(txwq->reg, uct_ib_mlx5_mmio_cleanup); + break; + case UCT_IB_MLX5_OBJ_TYPE_LAST: + if (txwq->reg != NULL) { + uct_worker_tl_data_put(txwq->reg, uct_ib_mlx5_mmio_cleanup); + } + } +} + +ucs_status_t uct_ib_mlx5_get_rxwq(struct ibv_qp *verbs_qp, uct_ib_mlx5_rxwq_t *rxwq) +{ + uct_ib_mlx5dv_qp_t qp_info = {}; + uct_ib_mlx5dv_t obj = {}; + ucs_status_t status; + + obj.dv.qp.in = verbs_qp; + obj.dv.qp.out = &qp_info.dv; + + status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_QP); + if (status != UCS_OK) { + return UCS_ERR_IO_ERROR; + } + + if (!ucs_is_pow2(qp_info.dv.rq.wqe_cnt) || + qp_info.dv.rq.stride != sizeof(struct mlx5_wqe_data_seg)) { + ucs_error("mlx5 rx wq [count=%d stride=%d] has invalid parameters", + qp_info.dv.rq.wqe_cnt, + qp_info.dv.rq.stride); + return UCS_ERR_IO_ERROR; + } + rxwq->wqes = qp_info.dv.rq.buf; + rxwq->rq_wqe_counter = 0; + rxwq->cq_wqe_counter = 0; + rxwq->mask = qp_info.dv.rq.wqe_cnt - 1; + /* cppcheck-suppress autoVariables */ + rxwq->dbrec = &qp_info.dv.dbrec[MLX5_RCV_DBR]; + memset(rxwq->wqes, 0, qp_info.dv.rq.wqe_cnt * sizeof(struct mlx5_wqe_data_seg)); + + return UCS_OK; +} + +ucs_status_t uct_ib_mlx5_srq_init(uct_ib_mlx5_srq_t *srq, struct ibv_srq *verbs_srq, + size_t sg_byte_count, int sge_num) +{ + uct_ib_mlx5dv_srq_t srq_info = {}; + uct_ib_mlx5dv_t obj = {}; + ucs_status_t status; + uint16_t stride; + + obj.dv.srq.in = verbs_srq; + obj.dv.srq.out = &srq_info.dv; + + status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_SRQ); + if (status != UCS_OK) { + return status; + } + + if (srq_info.dv.head != 0) { + ucs_error("SRQ head is not 0 (%d)", srq_info.dv.head); + return UCS_ERR_NO_DEVICE; + } + + stride = uct_ib_mlx5_srq_stride(sge_num); + if (srq_info.dv.stride != stride) { + ucs_error("SRQ stride is not %u (%d), sgenum %d", + stride, srq_info.dv.stride, sge_num); + return UCS_ERR_NO_DEVICE; + } + + if (!ucs_is_pow2(srq_info.dv.tail + 1)) { + ucs_error("SRQ length is not power of 2 (%d)", srq_info.dv.tail + 1); + return UCS_ERR_NO_DEVICE; + } + + srq->buf = srq_info.dv.buf; + srq->db = srq_info.dv.dbrec; + uct_ib_mlx5_srq_buff_init(srq, srq_info.dv.head, srq_info.dv.tail, + sg_byte_count, sge_num); + return UCS_OK; +} + +void uct_ib_mlx5_srq_buff_init(uct_ib_mlx5_srq_t *srq, uint32_t head, + uint32_t tail, size_t sg_byte_count, int sge_num) +{ + uct_ib_mlx5_srq_seg_t *seg; + unsigned i, j; + + srq->free_idx = tail; + srq->ready_idx = UINT16_MAX; + srq->sw_pi = UINT16_MAX; + srq->mask = tail; + srq->tail = tail; + srq->stride = uct_ib_mlx5_srq_stride(sge_num); + + for (i = head; i <= tail; ++i) { + seg = uct_ib_mlx5_srq_get_wqe(srq, i); + seg->srq.next_wqe_index = htons((i + 1) & tail); + seg->srq.ptr_mask = 0; + seg->srq.free = 0; + seg->srq.desc = NULL; + seg->srq.strides = sge_num; + for (j = 0; j < sge_num; ++j) { + seg->dptr[j].byte_count = htonl(sg_byte_count); + } + } +} + +void uct_ib_mlx5_srq_cleanup(uct_ib_mlx5_srq_t *srq, struct ibv_srq *verbs_srq) +{ + uct_ib_mlx5dv_srq_t srq_info = {}; + uct_ib_mlx5dv_t obj = {}; + ucs_status_t status; + + if (srq->type != UCT_IB_MLX5_OBJ_TYPE_VERBS) { + return; + } + + /* check if mlx5 driver didn't modified SRQ */ + obj.dv.srq.in = verbs_srq; + obj.dv.srq.out = &srq_info.dv; + + status = uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_SRQ); + ucs_assert_always(status == UCS_OK); + ucs_assertv_always(srq->tail == srq_info.dv.tail, "srq->tail=%d srq_info.tail=%d", + srq->tail, srq_info.dv.tail); +} + diff --git a/src/uct/ib/mlx5/ib_mlx5.h b/src/uct/ib/mlx5/ib_mlx5.h new file mode 100644 index 0000000..09407c1 --- /dev/null +++ b/src/uct/ib/mlx5/ib_mlx5.h @@ -0,0 +1,623 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_MLX5_H_ +#define UCT_IB_MLX5_H_ + + +#include +#include +#include +#include +#include +#include + +/** + * When using a clang version that is higher than 3.0, the GNUC_MINOR is set + * to 2, which affects the offset of several fields that are used by UCX from + * the liblmlx5 library (from the mlx5_qp struct). + * According to libmlx5, resetting the GNUC_MINOR version to 3, will make the + * offset of these fields inside libmlx5 (when compiled with GCC) the same as + * the one used by UCX (when compiled with clang). + */ +#ifdef __clang__ +# define CLANG_VERSION ( __clang_major__ * 100 + __clang_minor__) +# if CLANG_VERSION >= 300 +# undef __GNUC_MINOR__ +# define __GNUC_MINOR__ 3 +# endif +#endif + +#if HAVE_INFINIBAND_MLX5DV_H +# include +#else +# include +# include +#endif +#include + +#include +#include +#include + + +#define UCT_IB_MLX5_WQE_SEG_SIZE 16 /* Size of a segment in a WQE */ +#define UCT_IB_MLX5_CQE64_MAX_INL 32 /* Inline scatter size in 64-byte CQE */ +#define UCT_IB_MLX5_CQE128_MAX_INL 64 /* Inline scatter size in 128-byte CQE */ +#define UCT_IB_MLX5_CQE64_SIZE_LOG 6 +#define UCT_IB_MLX5_CQE128_SIZE_LOG 7 +#define UCT_IB_MLX5_MAX_BB 4 +#define UCT_IB_MLX5_WORKER_BF_KEY 0x00c1b7e8u +#define UCT_IB_MLX5_DEVX_UAR_KEY 0xdea1ab1eU +#define UCT_IB_MLX5_RES_DOMAIN_KEY 0x1b1bda7aU +#define UCT_IB_MLX5_WORKER_DM_KEY 0xacdf1245u +#define UCT_IB_MLX5_EXTENDED_UD_AV 0x80 /* htonl(0x80000000) */ +#define UCT_IB_MLX5_AV_GRH_PRESENT 0x40 /* htonl(UCS_BIT(30)) */ +#define UCT_IB_MLX5_BF_REG_SIZE 256 +#define UCT_IB_MLX5_CQE_VENDOR_SYND_ODP 0x93 +#define UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK 0x80 +#define UCT_IB_MLX5_MAX_SEND_WQE_SIZE (UCT_IB_MLX5_MAX_BB * MLX5_SEND_WQE_BB) +#define UCT_IB_MLX5_CQ_SET_CI 0 +#define UCT_IB_MLX5_CQ_ARM_DB 1 +#define UCT_IB_MLX5_ROCE_SRC_PORT_MIN 0xC000 +#define UCT_IB_MLX5_LOG_MAX_MSG_SIZE 30 +#define UCT_IB_MLX5_ATOMIC_MODE 3 +#define UCT_IB_MLX5_CQE_FLAG_L3_IN_DATA UCS_BIT(28) /* GRH/IP in the receive buffer */ +#define UCT_IB_MLX5_CQE_FLAG_L3_IN_CQE UCS_BIT(29) /* GRH/IP in the CQE */ + + +#define UCT_IB_MLX5_OPMOD_EXT_ATOMIC(_log_arg_size) \ + ((8) | ((_log_arg_size) - 2)) + +#if HAVE_STRUCT_MLX5_WQE_AV_BASE + +# define mlx5_av_base(_av) (&(_av)->base) +# define mlx5_av_grh(_av) (&(_av)->grh_sec) +# define UCT_IB_MLX5_AV_BASE_SIZE sizeof(struct mlx5_base_av) +# define UCT_IB_MLX5_AV_FULL_SIZE sizeof(struct mlx5_wqe_av) + +#else + +# define mlx5_av_base(_av) (_av) +/* do not use direct cast from address of reserved0 to avoid compilation warnings */ +# define mlx5_av_grh(_av) ((struct mlx5_grh_av *)(((char*)(_av)) + \ + ucs_offsetof(struct mlx5_wqe_av, reserved0[0]))) +# define UCT_IB_MLX5_AV_BASE_SIZE ucs_offsetof(struct mlx5_wqe_av, reserved0[0]) +# define UCT_IB_MLX5_AV_FULL_SIZE sizeof(struct mlx5_wqe_av) + +# define mlx5_base_av mlx5_wqe_av + +struct mlx5_grh_av { + uint8_t reserved0[4]; + uint8_t rmac[6]; + uint8_t tclass; + uint8_t hop_limit; + uint32_t grh_gid_fl; + uint8_t rgid[16]; +}; + +# define HAVE_STRUCT_MLX5_GRH_AV_RMAC 1 + +#endif + +#if !(HAVE_MLX5_WQE_CTRL_SOLICITED) +# define MLX5_WQE_CTRL_SOLICITED (1<<1) +#endif + +#define UCT_IB_MLX5_WQE_CTRL_FLAG_FENCE (2<<5) +#define UCT_IB_MLX5_WQE_CTRL_FLAG_STRONG_ORDER (3<<5) + +#define UCT_IB_MLX5_AM_ZCOPY_MAX_IOV 3UL + +#define UCT_IB_MLX5_AM_MAX_SHORT(_av_size) \ + (UCT_IB_MLX5_MAX_SEND_WQE_SIZE - \ + (sizeof(struct mlx5_wqe_ctrl_seg) + \ + (_av_size) + \ + sizeof(struct mlx5_wqe_inl_data_seg))) + +#define UCT_IB_MLX5_AM_ZCOPY_MAX_HDR(_av_size) \ + (UCT_IB_MLX5_AM_MAX_SHORT(_av_size) - \ + UCT_IB_MLX5_AM_ZCOPY_MAX_IOV * sizeof(struct mlx5_wqe_data_seg)) + +#define UCT_IB_MLX5_PUT_MAX_SHORT(_av_size) \ + (UCT_IB_MLX5_AM_MAX_SHORT(_av_size) - sizeof(struct mlx5_wqe_raddr_seg)) + +#define UCT_IB_MLX5_SRQ_STRIDE (sizeof(struct mlx5_wqe_srq_next_seg) + \ + sizeof(struct mlx5_wqe_data_seg)) + +#define UCT_IB_MLX5_XRQ_MIN_UWQ_POST 33 + +#define UCT_IB_MLX5_MD_FLAGS_DEVX_OBJS(_devx_objs) \ + ((_devx_objs) << UCT_IB_MLX5_MD_FLAG_DEVX_OBJS_SHIFT) + +#define UCT_IB_MLX5_MD_FLAG_DEVX_OBJS(_obj) \ + UCT_IB_MLX5_MD_FLAGS_DEVX_OBJS(UCS_BIT(UCT_IB_DEVX_OBJ_ ## _obj)) + +enum { + /* Device supports KSM */ + UCT_IB_MLX5_MD_FLAG_KSM = UCS_BIT(0), + /* Device supports DEVX */ + UCT_IB_MLX5_MD_FLAG_DEVX = UCS_BIT(1), + /* Device supports TM DC */ + UCT_IB_MLX5_MD_FLAG_DC_TM = UCS_BIT(2), + /* Device supports MP RQ */ + UCT_IB_MLX5_MD_FLAG_MP_RQ = UCS_BIT(3), + /* Device supports creation of indirect MR with atomics access rights */ + UCT_IB_MLX5_MD_FLAG_INDIRECT_ATOMICS = UCS_BIT(4), + + /* Object to be created by DevX */ + UCT_IB_MLX5_MD_FLAG_DEVX_OBJS_SHIFT = 5, + UCT_IB_MLX5_MD_FLAG_DEVX_RC_QP = UCT_IB_MLX5_MD_FLAG_DEVX_OBJS(RCQP), + UCT_IB_MLX5_MD_FLAG_DEVX_RC_SRQ = UCT_IB_MLX5_MD_FLAG_DEVX_OBJS(RCSRQ), + UCT_IB_MLX5_MD_FLAG_DEVX_DCT = UCT_IB_MLX5_MD_FLAG_DEVX_OBJS(DCT), + UCT_IB_MLX5_MD_FLAG_DEVX_DC_SRQ = UCT_IB_MLX5_MD_FLAG_DEVX_OBJS(DCSRQ), +}; + + +enum { + UCT_IB_MLX5_SRQ_TOPO_LIST = 0x0, + UCT_IB_MLX5_SRQ_TOPO_CYCLIC = 0x1, + UCT_IB_MLX5_SRQ_TOPO_LIST_MP_RQ = 0x2, + UCT_IB_MLX5_SRQ_TOPO_CYCLIC_MP_RQ = 0x3 +}; + + +/** + * MLX5 IB memory domain. + */ +typedef struct uct_ib_mlx5_md { + uct_ib_md_t super; + uint32_t flags; + ucs_mpool_t dbrec_pool; + ucs_spinlock_t dbrec_lock; + struct ibv_qp *umr_qp; /* special QP for creating UMR */ + struct ibv_cq *umr_cq; /* special CQ for creating UMR */ + + void *zero_buf; + struct mlx5dv_devx_umem *zero_mem; +} uct_ib_mlx5_md_t; + + +typedef enum { + UCT_IB_MLX5_MMIO_MODE_BF_POST, /* BF without flush, can be used only from + one thread */ + UCT_IB_MLX5_MMIO_MODE_BF_POST_MT, /* BF with order, can be used by multiple + serialized threads */ + UCT_IB_MLX5_MMIO_MODE_DB, /* 8-byte doorbell (with the mandatory flush) */ + UCT_IB_MLX5_MMIO_MODE_AUTO, /* Auto-select according to driver/HW capabilities + and multi-thread support level */ + UCT_IB_MLX5_MMIO_MODE_LAST +} uct_ib_mlx5_mmio_mode_t; + + +typedef struct uct_ib_mlx5_iface_config { +#if HAVE_IBV_DM + struct { + size_t seg_len; + unsigned count; + } dm; +#endif + uct_ib_mlx5_mmio_mode_t mmio_mode; +} uct_ib_mlx5_iface_config_t; + + +/** + * MLX5 DoorBell record + */ +typedef struct uct_ib_mlx5_dbrec { + volatile uint32_t db[2]; + uint32_t mem_id; + size_t offset; + uct_ib_mlx5_md_t *md; +} uct_ib_mlx5_dbrec_t; + + +typedef enum { + UCT_IB_MLX5_OBJ_TYPE_VERBS, + UCT_IB_MLX5_OBJ_TYPE_DEVX, + UCT_IB_MLX5_OBJ_TYPE_LAST +} uct_ib_mlx5_obj_type_t; + + +/* Shared receive queue */ +typedef struct uct_ib_mlx5_srq { + uct_ib_mlx5_obj_type_t type; + int topo; /* linked-list or cyclic */ + uint32_t srq_num; + void *buf; + volatile uint32_t *db; + uint16_t free_idx; /* what is completed contiguously */ + uint16_t ready_idx; /* what is ready to be posted to hw */ + uint16_t sw_pi; /* what is posted to hw */ + uint16_t mask; + uint16_t tail; /* tail in the driver */ + uint16_t stride; + union { + struct { + struct ibv_srq *srq; + } verbs; +#if HAVE_DEVX + struct { + uct_ib_mlx5_dbrec_t *dbrec; + struct mlx5dv_devx_umem *mem; + struct mlx5dv_devx_obj *obj; + } devx; +#endif + }; +} uct_ib_mlx5_srq_t; + + +/* Completion queue */ +typedef struct uct_ib_mlx5_cq { + void *cq_buf; + unsigned cq_ci; + unsigned cq_sn; + unsigned cq_length; + unsigned cqe_size_log; + unsigned cq_num; + void *uar; + volatile uint32_t *dbrec; +} uct_ib_mlx5_cq_t; + + +/* Blue flame register */ +typedef struct uct_ib_mlx5_mmio_reg { + uct_worker_tl_data_t super; + union { + void *ptr; + uintptr_t uint; + } addr; + uct_ib_mlx5_mmio_mode_t mode; +} uct_ib_mlx5_mmio_reg_t; + + +typedef struct uct_ib_mlx5_devx_uar { + uct_ib_mlx5_mmio_reg_t super; +#if HAVE_DEVX + struct mlx5dv_devx_uar *uar; +#endif + struct ibv_context *ctx; +} uct_ib_mlx5_devx_uar_t; + + +/* resource domain */ +typedef struct uct_ib_mlx5_res_domain { + uct_worker_tl_data_t super; +#if HAVE_IBV_EXP_RES_DOMAIN + struct ibv_exp_res_domain *ibv_domain; +#elif HAVE_DECL_IBV_ALLOC_TD + struct ibv_td *td; + struct ibv_pd *pd; +#endif +} uct_ib_mlx5_res_domain_t; + + +/* MLX5 QP wrapper */ +typedef struct uct_ib_mlx5_qp { + uct_ib_mlx5_obj_type_t type; + uint32_t qp_num; + union { + struct { + union { + struct ibv_qp *qp; +#if HAVE_DC_EXP + struct ibv_exp_dct *dct; +#endif + }; + uct_ib_mlx5_res_domain_t *rd; + } verbs; +#if HAVE_DEVX + struct { + void *wq_buf; + uct_ib_mlx5_dbrec_t *dbrec; + struct mlx5dv_devx_umem *mem; + struct mlx5dv_devx_obj *obj; + } devx; +#endif + }; +} uct_ib_mlx5_qp_t; + + +/* Send work-queue */ +typedef struct uct_ib_mlx5_txwq { + uct_ib_mlx5_qp_t super; + uint16_t sw_pi; /* PI for next WQE */ + uint16_t prev_sw_pi; /* PI where last WQE *started* */ + uct_ib_mlx5_mmio_reg_t *reg; + void *curr; + volatile uint32_t *dbrec; + void *qstart; + void *qend; + uint16_t bb_max; + uint16_t sig_pi; /* PI for last signaled WQE */ +#if UCS_ENABLE_ASSERT + uint16_t hw_ci; +#endif + uct_ib_fence_info_t fi; +} uct_ib_mlx5_txwq_t; + + +/* Receive work-queue */ +typedef struct uct_ib_mlx5_rxwq { + /* producer index. It updated when new receive wqe is posted */ + uint16_t rq_wqe_counter; + /* consumer index. It is better to track it ourselves than to do ntohs() + * on the index in the cqe + */ + uint16_t cq_wqe_counter; + uint16_t mask; + volatile uint32_t *dbrec; + struct mlx5_wqe_data_seg *wqes; +} uct_ib_mlx5_rxwq_t; + + +/* Address-vector for link-local scope */ +typedef struct uct_ib_mlx5_base_av { + uint32_t dqp_dct; + uint8_t stat_rate_sl; + uint8_t fl_mlid; + uint16_t rlid; +} UCS_S_PACKED uct_ib_mlx5_base_av_t; + + +typedef struct uct_ib_mlx5_err_cqe { + uint8_t rsvd0[32]; + uint32_t srqn; + uint8_t rsvd1[16]; + uint8_t hw_err_synd; + uint8_t hw_synd_type; + uint8_t vendor_err_synd; + uint8_t syndrome; + uint32_t s_wqe_opcode_qpn; + uint16_t wqe_counter; + uint8_t signature; + uint8_t op_own; +} UCS_S_PACKED uct_ib_mlx5_err_cqe_t; + + +/** + * SRQ segment + * + * We add some SW book-keeping information in the unused HW fields: + * - desc - the receive descriptor. + * - strides - Number of available strides in this WQE. When it is 0, + * this segment can be reposted to the HW. Relevant for + * Multi-Packet SRQ only. + * - free - points to the next out-of-order completed segment. + */ +typedef struct uct_rc_mlx5_srq_seg { + union { + struct mlx5_wqe_srq_next_seg mlx5_srq; + struct { + uint16_t ptr_mask; + uint16_t next_wqe_index; /* Network byte order */ + uint8_t signature; + uint8_t rsvd1[1]; + uint8_t strides; + uint8_t free; /* Released but not posted */ + uct_ib_iface_recv_desc_t *desc; /* Host byte order */ + } srq; + }; + struct mlx5_wqe_data_seg dptr[0]; +} uct_ib_mlx5_srq_seg_t; + + +struct uct_ib_mlx5_atomic_masked_cswap32_seg { + uint32_t swap; + uint32_t compare; + uint32_t swap_mask; + uint32_t compare_mask; +} UCS_S_PACKED; + + +struct uct_ib_mlx5_atomic_masked_fadd32_seg { + uint32_t add; + uint32_t filed_boundary; + uint32_t reserved[2]; +} UCS_S_PACKED; + + +struct uct_ib_mlx5_atomic_masked_cswap64_seg { + uint64_t swap; + uint64_t compare; +} UCS_S_PACKED; + + +struct uct_ib_mlx5_atomic_masked_fadd64_seg { + uint64_t add; + uint64_t filed_boundary; +} UCS_S_PACKED; + +/** + * Calculate unique id for atomic + */ +static inline uint8_t uct_ib_mlx5_md_get_atomic_mr_id(uct_ib_mlx5_md_t *md) +{ +#if HAVE_EXP_UMR + if ((md->umr_qp == NULL) || (md->umr_cq == NULL)) { + return 0; + } + /* Generate atomic UMR id. We want umrs for same virtual addresses to have + * different ids across processes. + * + * Usually parallel processes running on the same node as part of a single + * job will have consecutive PIDs. For example MPI ranks, slurm spawned tasks... + */ + return getpid() % 256; +#else + return 0; +#endif +} + +static inline uint8_t uct_ib_mlx5_iface_get_atomic_mr_id(uct_ib_iface_t *iface) +{ + ucs_assert(ucs_derived_of(iface->super.md, uct_ib_md_t)->dev.flags & + UCT_IB_DEVICE_FLAG_MLX5_PRM); + return uct_ib_mlx5_md_get_atomic_mr_id(ucs_derived_of(iface->super.md, + uct_ib_mlx5_md_t)); +} + +ucs_status_t uct_ib_mlx5_iface_get_res_domain(uct_ib_iface_t *iface, + uct_ib_mlx5_qp_t *txwq); + +void uct_ib_mlx5_iface_put_res_domain(uct_ib_mlx5_qp_t *qp); + +ucs_status_t uct_ib_mlx5_iface_create_qp(uct_ib_iface_t *iface, + uct_ib_mlx5_qp_t *qp, + uct_ib_qp_attr_t *attr); + +/** + * Create CQ with DV + */ +ucs_status_t uct_ib_mlx5_create_cq(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector, int ignore_overrun, + size_t *inl, struct ibv_cq **cq_p); + +extern ucs_config_field_t uct_ib_mlx5_iface_config_table[]; + +/** + * Get internal CQ information. + */ +ucs_status_t uct_ib_mlx5_get_cq(struct ibv_cq *cq, uct_ib_mlx5_cq_t *mlx5_cq); + +/** + * Get flag indicating compact AV support. + */ +ucs_status_t uct_ib_mlx5_get_compact_av(uct_ib_iface_t *iface, int *compact_av); + +/** + * Requests completion notification. + */ +ucs_status_t uct_ib_mlx5dv_arm_cq(uct_ib_mlx5_cq_t *cq, int solicited); + +/** + * Check for completion with error. + */ +void uct_ib_mlx5_check_completion(uct_ib_iface_t *iface, uct_ib_mlx5_cq_t *cq, + struct mlx5_cqe64 *cqe); + +/** + * Initialize txwq structure. + */ +ucs_status_t uct_ib_mlx5_txwq_init(uct_priv_worker_t *worker, + uct_ib_mlx5_mmio_mode_t cfg_mmio_mode, + uct_ib_mlx5_txwq_t *txwq, struct ibv_qp *verbs_qp); + +ucs_status_t uct_ib_mlx5_txwq_init_devx(uct_priv_worker_t *worker, + uct_ib_mlx5_md_t *md, + uct_ib_mlx5_txwq_t *txwq, + uct_ib_mlx5_mmio_mode_t mode); + +void uct_ib_mlx5_txwq_cleanup(uct_ib_mlx5_txwq_t* txwq); + +/** + * Reset txwq contents and posting indices. + */ +void uct_ib_mlx5_txwq_reset(uct_ib_mlx5_txwq_t *txwq); + +/** + * Initialize rxwq structure. + */ +ucs_status_t uct_ib_mlx5_get_rxwq(struct ibv_qp *qp, uct_ib_mlx5_rxwq_t *wq); + +/** + * Initialize srq structure. + */ +ucs_status_t uct_ib_mlx5_srq_init(uct_ib_mlx5_srq_t *srq, struct ibv_srq *verbs_srq, + size_t sg_byte_count, int num_sge); + +void uct_ib_mlx5_srq_buff_init(uct_ib_mlx5_srq_t *srq, uint32_t head, + uint32_t tail, size_t sg_byte_count, int num_sge); + +void uct_ib_mlx5_srq_cleanup(uct_ib_mlx5_srq_t *srq, struct ibv_srq *verbs_srq); + +/** + * DEVX UAR API + */ +int uct_ib_mlx5_devx_uar_cmp(uct_ib_mlx5_devx_uar_t *uar, + uct_ib_mlx5_md_t *md, + uct_ib_mlx5_mmio_mode_t mmio_mode); + +ucs_status_t uct_ib_mlx5_devx_uar_init(uct_ib_mlx5_devx_uar_t *uar, + uct_ib_mlx5_md_t *md, + uct_ib_mlx5_mmio_mode_t mmio_mode); + +void uct_ib_mlx5_devx_uar_cleanup(uct_ib_mlx5_devx_uar_t *uar); + +/** + * DEVX QP API + */ + +#if HAVE_DEVX + +ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface, + uct_ib_mlx5_qp_t *qp, + uct_ib_mlx5_txwq_t *tx, + uct_ib_qp_attr_t *attr); + +ucs_status_t uct_ib_mlx5_devx_modify_qp(uct_ib_mlx5_qp_t *qp, + const void *in, size_t inlen, + void *out, size_t outlen); + +ucs_status_t uct_ib_mlx5_devx_modify_qp_state(uct_ib_mlx5_qp_t *qp, + enum ibv_qp_state state); + +void uct_ib_mlx5_devx_destroy_qp(uct_ib_mlx5_qp_t *qp); + +#else + +static inline ucs_status_t +uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface, + uct_ib_mlx5_qp_t *qp, + uct_ib_mlx5_txwq_t *tx, + uct_ib_qp_attr_t *attr) +{ + return UCS_ERR_UNSUPPORTED; +} + +static inline ucs_status_t +uct_ib_mlx5_devx_modify_qp(uct_ib_mlx5_qp_t *qp, + enum ibv_qp_state state) +{ + return UCS_ERR_UNSUPPORTED; +} + +static inline ucs_status_t +uct_ib_mlx5_devx_modify_qp_state(uct_ib_mlx5_qp_t *qp, enum ibv_qp_state state) +{ + return UCS_ERR_UNSUPPORTED; +} + +static inline void uct_ib_mlx5_devx_destroy_qp(uct_ib_mlx5_qp_t *qp) { } + +#endif + +static inline uct_ib_mlx5_dbrec_t *uct_ib_mlx5_get_dbrec(uct_ib_mlx5_md_t *md) +{ + uct_ib_mlx5_dbrec_t *dbrec; + + ucs_spin_lock(&md->dbrec_lock); + dbrec = (uct_ib_mlx5_dbrec_t *)ucs_mpool_get_inline(&md->dbrec_pool); + ucs_spin_unlock(&md->dbrec_lock); + if (dbrec != NULL) { + dbrec->md = md; + } + + return dbrec; +} + +static inline void uct_ib_mlx5_put_dbrec(uct_ib_mlx5_dbrec_t *dbrec) +{ + uct_ib_mlx5_md_t *md = dbrec->md; + + ucs_spin_lock(&md->dbrec_lock); + ucs_mpool_put_inline(dbrec); + ucs_spin_unlock(&md->dbrec_lock); +} + +#endif diff --git a/src/uct/ib/mlx5/ib_mlx5.inl b/src/uct/ib/mlx5/ib_mlx5.inl new file mode 100644 index 0000000..222d53b --- /dev/null +++ b/src/uct/ib/mlx5/ib_mlx5.inl @@ -0,0 +1,583 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "ib_mlx5.h" + + +static UCS_F_ALWAYS_INLINE struct mlx5_cqe64* +uct_ib_mlx5_get_cqe(uct_ib_mlx5_cq_t *cq, unsigned index) +{ + return UCS_PTR_BYTE_OFFSET(cq->cq_buf, ((index & (cq->cq_length - 1)) << + cq->cqe_size_log)); +} + +static UCS_F_ALWAYS_INLINE int +uct_ib_mlx5_cqe_is_hw_owned(uint8_t op_own, unsigned index, unsigned mask) +{ + return (op_own & MLX5_CQE_OWNER_MASK) == !(index & mask); +} + +static UCS_F_ALWAYS_INLINE int +uct_ib_mlx5_cqe_stride_index(struct mlx5_cqe64* cqe) +{ +#if HAVE_STRUCT_MLX5_CQE64_IB_STRIDE_INDEX + return ntohs(cqe->ib_stride_index); +#else + uint16_t *stride = (uint16_t*)&cqe->rsvd20[2]; + return ntohs(*stride); +#endif +} + +static UCS_F_ALWAYS_INLINE int uct_ib_mlx5_srq_stride(int num_sge) +{ + int stride; + + stride = sizeof(struct mlx5_wqe_srq_next_seg) + + (num_sge * sizeof(struct mlx5_wqe_data_seg)); + + return ucs_roundup_pow2(stride); +} + +static UCS_F_ALWAYS_INLINE int +uct_ib_mlx5_srq_max_wrs(int rxq_len, int num_sge) +{ + return ucs_max(rxq_len / num_sge, UCT_IB_MLX5_XRQ_MIN_UWQ_POST); +} + +static UCS_F_ALWAYS_INLINE int +uct_ib_mlx5_cqe_is_grh_present(struct mlx5_cqe64* cqe) +{ + return cqe->flags_rqpn & htonl(UCT_IB_MLX5_CQE_FLAG_L3_IN_DATA | + UCT_IB_MLX5_CQE_FLAG_L3_IN_CQE); +} + +static UCS_F_ALWAYS_INLINE void* +uct_ib_mlx5_gid_from_cqe(struct mlx5_cqe64* cqe) +{ + ucs_assert(uct_ib_mlx5_cqe_is_grh_present(cqe) == + htonl(UCT_IB_MLX5_CQE_FLAG_L3_IN_CQE)); /* GRH is in CQE */ + return UCS_PTR_BYTE_OFFSET(cqe, -UCT_IB_GRH_LEN); +} + +static UCS_F_ALWAYS_INLINE struct mlx5_cqe64* +uct_ib_mlx5_poll_cq(uct_ib_iface_t *iface, uct_ib_mlx5_cq_t *cq) +{ + struct mlx5_cqe64 *cqe; + unsigned index; + uint8_t op_own; + + index = cq->cq_ci; + cqe = uct_ib_mlx5_get_cqe(cq, index); + op_own = cqe->op_own; + + if (ucs_unlikely(uct_ib_mlx5_cqe_is_hw_owned(op_own, index, cq->cq_length))) { + return NULL; + } else if (ucs_unlikely(op_own & UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK)) { + UCS_STATIC_ASSERT(MLX5_CQE_INVALID & (UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK >> 4)); + ucs_assert((op_own >> 4) != MLX5_CQE_INVALID); + uct_ib_mlx5_check_completion(iface, cq, cqe); + return NULL; /* No CQE */ + } + + cq->cq_ci = index + 1; + return cqe; /* TODO optimize - let complier know cqe is not null */ +} + + +static UCS_F_ALWAYS_INLINE uint16_t +uct_ib_mlx5_txwq_update_bb(uct_ib_mlx5_txwq_t *wq, uint16_t hw_ci) +{ +#if UCS_ENABLE_ASSERT + wq->hw_ci = hw_ci; +#endif + return wq->bb_max - (wq->prev_sw_pi - hw_ci); +} + + +/* check that work queue has enough space for the new work request */ +static inline void +uct_ib_mlx5_txwq_validate(uct_ib_mlx5_txwq_t *wq, uint16_t num_bb) +{ + +#if UCS_ENABLE_ASSERT + uint16_t wqe_s, wqe_e; + uint16_t hw_ci, sw_pi; + uint16_t wqe_cnt; + int is_ok = 1; + + if (wq->hw_ci == 0xFFFF) { + return; + } + + wqe_cnt = UCS_PTR_BYTE_DIFF(wq->qstart, wq->qend) / MLX5_SEND_WQE_BB; + if (wqe_cnt < wq->bb_max) { + ucs_fatal("wqe count (%u) < bb_max (%u)", wqe_cnt, wq->bb_max); + } + + wqe_s = UCS_PTR_BYTE_DIFF(wq->qstart, wq->curr) / MLX5_SEND_WQE_BB; + wqe_e = (wqe_s + num_bb) % wqe_cnt; + + sw_pi = wq->prev_sw_pi % wqe_cnt; + hw_ci = wq->hw_ci % wqe_cnt; + + if (hw_ci <= sw_pi) { + if (hw_ci <= wqe_s && wqe_s <= sw_pi) { + is_ok = 0; + } + if (hw_ci <= wqe_e && wqe_e <= sw_pi) { + is_ok = 0; + } + } + else { + if (!(sw_pi < wqe_s && wqe_s < hw_ci)) { + is_ok = 0; + } + if (!(sw_pi < wqe_e && wqe_e < hw_ci)) { + is_ok = 0; + } + } + if (!is_ok) { + ucs_fatal("tx wq overrun: hw_ci: %u sw_pi: %u cur: %u-%u num_bb: %u wqe_cnt: %u", + hw_ci, sw_pi, wqe_s, wqe_e, num_bb, wqe_cnt); + } +#endif +} + + +/** + * Copy data to inline segment, taking into account QP wrap-around. + * + * @param dest Inline data in the WQE to copy to. + * @param src Data to copy. + * @param length Data length. + * + */ +static UCS_F_ALWAYS_INLINE void +uct_ib_mlx5_inline_copy(void *restrict dest, const void *restrict src, unsigned + length, uct_ib_mlx5_txwq_t *wq) +{ + ptrdiff_t n; + + if (UCS_PTR_BYTE_OFFSET(dest, length) <= wq->qend) { + memcpy(dest, src, length); + } else { + n = UCS_PTR_BYTE_DIFF(dest, wq->qend); + memcpy(dest, src, n); + memcpy(wq->qstart, UCS_PTR_BYTE_OFFSET(src, n), length - n); + } +} + + +/* wrapping of 'seg' should not happen */ +static UCS_F_ALWAYS_INLINE void* +uct_ib_mlx5_txwq_wrap_none(uct_ib_mlx5_txwq_t *txwq, void *seg) +{ + ucs_assertv(((unsigned long)seg % UCT_IB_MLX5_WQE_SEG_SIZE) == 0, "seg=%p", seg); + ucs_assertv(seg >= txwq->qstart, "seg=%p qstart=%p", seg, txwq->qstart); + ucs_assertv(seg < txwq->qend, "seg=%p qend=%p", seg, txwq->qend); + return seg; +} + + +/* wrapping of 'seg' could happen, but only on exact 'qend' boundary */ +static UCS_F_ALWAYS_INLINE void * +uct_ib_mlx5_txwq_wrap_exact(uct_ib_mlx5_txwq_t *txwq, void *seg) +{ + ucs_assert(seg <= txwq->qend); + if (ucs_unlikely(seg == txwq->qend)) { + seg = txwq->qstart; + } + return uct_ib_mlx5_txwq_wrap_none(txwq, seg); +} + + +/* wrapping of 'seg' could happen, even past 'qend' boundary */ +static UCS_F_ALWAYS_INLINE void * +uct_ib_mlx5_txwq_wrap_any(uct_ib_mlx5_txwq_t *txwq, void *seg) +{ + if (ucs_unlikely(seg >= txwq->qend)) { + seg = UCS_PTR_BYTE_OFFSET(seg, -UCS_PTR_BYTE_DIFF(txwq->qstart, + txwq->qend)); + } + return uct_ib_mlx5_txwq_wrap_none(txwq, seg); +} + + +/* Wrapping of 'data' could happen, even past 'qend' boundary. + * Do not check for alignment. */ +static UCS_F_ALWAYS_INLINE void * +uct_ib_mlx5_txwq_wrap_data(uct_ib_mlx5_txwq_t *txwq, void *data) +{ + if (ucs_unlikely(data >= txwq->qend)) { + data = UCS_PTR_BYTE_OFFSET(data, -UCS_PTR_BYTE_DIFF(txwq->qstart, + txwq->qend)); + } + return data; +} + + +static UCS_F_ALWAYS_INLINE void +uct_ib_mlx5_ep_set_rdma_seg(struct mlx5_wqe_raddr_seg *raddr, uint64_t rdma_raddr, + uct_rkey_t rdma_rkey) +{ +#if defined(__SSE4_2__) + *(__m128i*)raddr = _mm_shuffle_epi8( + _mm_set_epi64x(rdma_rkey, rdma_raddr), + _mm_set_epi8(0, 0, 0, 0, /* reserved */ + 8, 9, 10, 11, /* rkey */ + 0, 1, 2, 3, 4, 5, 6, 7 /* rdma_raddr */ + )); +#elif defined(__ARM_NEON) + uint8x16_t table = {7, 6, 5, 4, 3, 2, 1, 0, /* rdma_raddr */ + 11, 10, 9, 8, /* rkey */ + 16,16,16,16}; /* reserved (set 0) */ + uint64x2_t data = {rdma_raddr, rdma_rkey}; + *(uint8x16_t *)raddr = vqtbl1q_u8((uint8x16_t)data, table); +#else + raddr->raddr = htobe64(rdma_raddr); + raddr->rkey = htonl(rdma_rkey); +#endif +} + + +static UCS_F_ALWAYS_INLINE void +uct_ib_mlx5_set_dgram_seg(struct mlx5_wqe_datagram_seg *seg, + uct_ib_mlx5_base_av_t *av, struct mlx5_grh_av *grh_av, + int qp_type) +{ + if (qp_type == IBV_QPT_UD) { + mlx5_av_base(&seg->av)->key.qkey.qkey = htonl(UCT_IB_KEY); +#if HAVE_TL_DC + } else if (qp_type == UCT_IB_QPT_DCI) { + mlx5_av_base(&seg->av)->key.dc_key = htobe64(UCT_IB_KEY); +#endif + } + mlx5_av_base(&seg->av)->dqp_dct = av->dqp_dct; + mlx5_av_base(&seg->av)->stat_rate_sl = av->stat_rate_sl; + mlx5_av_base(&seg->av)->fl_mlid = av->fl_mlid; + mlx5_av_base(&seg->av)->rlid = av->rlid; + + if (grh_av) { + ucs_assert(av->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV); +#if HAVE_STRUCT_MLX5_GRH_AV_RMAC + memcpy(mlx5_av_grh(&seg->av)->rmac, grh_av->rmac, + sizeof(mlx5_av_grh(&seg->av)->rmac)); +#endif + mlx5_av_grh(&seg->av)->tclass = grh_av->tclass; + mlx5_av_grh(&seg->av)->hop_limit = grh_av->hop_limit; + mlx5_av_grh(&seg->av)->grh_gid_fl = grh_av->grh_gid_fl; + memcpy(mlx5_av_grh(&seg->av)->rgid, grh_av->rgid, + sizeof(mlx5_av_grh(&seg->av)->rgid)); + } else if (av->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV) { + mlx5_av_grh(&seg->av)->grh_gid_fl = 0; + } +} + + +static UCS_F_ALWAYS_INLINE void +uct_ib_mlx5_set_ctrl_seg(struct mlx5_wqe_ctrl_seg* ctrl, uint16_t pi, + uint8_t opcode, uint8_t opmod, uint32_t qp_num, + uint8_t fm_ce_se, unsigned wqe_size) +{ + uint8_t ds; + + ucs_assert(((unsigned long)ctrl % UCT_IB_MLX5_WQE_SEG_SIZE) == 0); + ds = ucs_div_round_up(wqe_size, UCT_IB_MLX5_WQE_SEG_SIZE); +#if defined(__SSE4_2__) + *(__m128i *) ctrl = _mm_shuffle_epi8( + _mm_set_epi32(qp_num, ds, pi, + (opcode << 16) | (opmod << 8) | fm_ce_se), /* OR of constants */ + _mm_set_epi8(0, 0, 0, 0, /* immediate */ + 0, /* signal/fence_mode */ + 0, 0, /* reserved */ + 0, /* signature */ + 8, /* data size */ + 12, 13, 14, /* QP num */ + 2, /* opcode */ + 4, 5, /* sw_pi in BE */ + 1 /* opmod */ + )); +#elif defined(__ARM_NEON) + uint8x16_t table = {1, /* opmod */ + 5, 4, /* sw_pi in BE */ + 2, /* opcode */ + 14, 13, 12, /* QP num */ + 8, /* data size */ + 16, /* signature (set 0) */ + 16, 16, /* reserved (set 0) */ + 0, /* signal/fence_mode */ + 16, 16, 16, 16}; /* immediate (set to 0)*/ + uint32x4_t data = {(opcode << 16) | (opmod << 8) | (uint32_t)fm_ce_se, + pi, ds, qp_num}; + *(uint8x16_t *)ctrl = vqtbl1q_u8((uint8x16_t)data, table); +#else + ctrl->opmod_idx_opcode = (opcode << 24) | (htons(pi) << 8) | opmod; + ctrl->qpn_ds = htonl((qp_num << 8) | ds); + ctrl->fm_ce_se = fm_ce_se; +#endif +} + + +static UCS_F_ALWAYS_INLINE void +uct_ib_mlx5_set_ctrl_seg_with_imm(struct mlx5_wqe_ctrl_seg* ctrl, uint16_t pi, + uint8_t opcode, uint8_t opmod, uint32_t qp_num, + uint8_t fm_ce_se, unsigned wqe_size, uint32_t imm) +{ + uint8_t ds; + + ucs_assert(((unsigned long)ctrl % UCT_IB_MLX5_WQE_SEG_SIZE) == 0); + ds = ucs_div_round_up(wqe_size, UCT_IB_MLX5_WQE_SEG_SIZE); +#if defined(__SSE4_2__) + *(__m128i *) ctrl = _mm_shuffle_epi8( + _mm_set_epi32(qp_num, imm, (ds << 16) | pi, + (opcode << 16) | (opmod << 8) | fm_ce_se), /* OR of constants */ + _mm_set_epi8(11, 10, 9, 8, /* immediate */ + 0, /* signal/fence_mode */ + 0, 0, /* reserved */ + 0, /* signature */ + 6, /* data size */ + 12, 13, 14, /* QP num */ + 2, /* opcode */ + 4, 5, /* sw_pi in BE */ + 1 /* opmod */ + )); +#elif defined(__ARM_NEON) + uint8x16_t table = {1, /* opmod */ + 5, 4, /* sw_pi in BE */ + 2, /* opcode */ + 14, 13, 12, /* QP num */ + 6, /* data size */ + 16, /* signature (set 0) */ + 16, 16, /* reserved (set 0) */ + 0, /* signal/fence_mode */ + 8, 9, 10, 11}; /* immediate (set to 0)*/ + uint32x4_t data = {(opcode << 16) | (opmod << 8) | (uint32_t)fm_ce_se, + (ds << 16) | pi, imm, qp_num}; + *(uint8x16_t *)ctrl = vqtbl1q_u8((uint8x16_t)data, table); +#else + ctrl->opmod_idx_opcode = (opcode << 24) | (htons(pi) << 8) | opmod; + ctrl->qpn_ds = htonl((qp_num << 8) | ds); + ctrl->fm_ce_se = fm_ce_se; + ctrl->imm = imm; +#endif +} + + +static UCS_F_ALWAYS_INLINE void +uct_ib_mlx5_set_data_seg(struct mlx5_wqe_data_seg *dptr, + const void *address, + unsigned length, uint32_t lkey) +{ + ucs_assert(((unsigned long)dptr % UCT_IB_MLX5_WQE_SEG_SIZE) == 0); + dptr->byte_count = htonl(length); + dptr->lkey = htonl(lkey); + dptr->addr = htobe64((uintptr_t)address); +} + + +static UCS_F_ALWAYS_INLINE +unsigned uct_ib_mlx5_set_data_seg_iov(uct_ib_mlx5_txwq_t *txwq, + struct mlx5_wqe_data_seg *dptr, + const uct_iov_t *iov, size_t iovcnt) +{ + unsigned len = 0; + size_t iov_it; + + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + if (!iov[iov_it].length) { /* Skip zero length WQE*/ + continue; + } + ucs_assert(iov[iov_it].memh != UCT_MEM_HANDLE_NULL); + + /* place data into the buffer */ + dptr = uct_ib_mlx5_txwq_wrap_any(txwq, dptr); + uct_ib_mlx5_set_data_seg(dptr, iov[iov_it].buffer, iov[iov_it].length, + ((uct_ib_mem_t*)iov[iov_it].memh)->lkey); + len += sizeof(*dptr); + ++dptr; + } + return len; +} + + +static UCS_F_ALWAYS_INLINE void uct_ib_mlx5_bf_copy_bb(void * restrict dst, + void * restrict src) +{ +#if defined( __SSE4_2__) + UCS_WORD_COPY(__m128i, dst, __m128i, src, MLX5_SEND_WQE_BB); +#elif defined(__ARM_NEON) + UCS_WORD_COPY(int16x8_t, dst, int16x8_t, src, MLX5_SEND_WQE_BB); +#else /* NO SIMD support */ + UCS_WORD_COPY(uint64_t, dst, uint64_t, src, MLX5_SEND_WQE_BB); +#endif +} + +static UCS_F_ALWAYS_INLINE +void *uct_ib_mlx5_bf_copy(void *dst, void *src, uint16_t num_bb, + const uct_ib_mlx5_txwq_t *wq) +{ + uint16_t n; + + for (n = 0; n < num_bb; ++n) { + uct_ib_mlx5_bf_copy_bb(dst, src); + dst = UCS_PTR_BYTE_OFFSET(dst, MLX5_SEND_WQE_BB); + src = UCS_PTR_BYTE_OFFSET(src, MLX5_SEND_WQE_BB); + if (ucs_unlikely(src == wq->qend)) { + src = wq->qstart; + } + } + return src; +} + +static UCS_F_ALWAYS_INLINE uint16_t +uct_ib_mlx5_post_send(uct_ib_mlx5_txwq_t *wq, + struct mlx5_wqe_ctrl_seg *ctrl, unsigned wqe_size) +{ + uint16_t sw_pi, num_bb, res_count; + void *src, *dst; + + ucs_assert(((unsigned long)ctrl % UCT_IB_MLX5_WQE_SEG_SIZE) == 0); + num_bb = ucs_div_round_up(wqe_size, MLX5_SEND_WQE_BB); + sw_pi = wq->sw_pi; + + uct_ib_mlx5_txwq_validate(wq, num_bb); + /* TODO Put memory store fence here too, to prevent WC being flushed after DBrec */ + ucs_memory_cpu_store_fence(); + + /* Write doorbell record */ + *wq->dbrec = htonl(sw_pi += num_bb); + + /* Make sure that doorbell record is written before ringing the doorbell */ + ucs_memory_bus_store_fence(); + + /* Set up copy pointers */ + dst = wq->reg->addr.ptr; + src = ctrl; + + ucs_assert(wqe_size <= UCT_IB_MLX5_BF_REG_SIZE); + ucs_assert(num_bb <= UCT_IB_MLX5_MAX_BB); + if (ucs_likely(wq->reg->mode == UCT_IB_MLX5_MMIO_MODE_BF_POST)) { + src = uct_ib_mlx5_bf_copy(dst, src, num_bb, wq); + ucs_memory_bus_cacheline_wc_flush(); + } else if (wq->reg->mode == UCT_IB_MLX5_MMIO_MODE_BF_POST_MT) { + src = uct_ib_mlx5_bf_copy(dst, src, num_bb, wq); + /* Make sure that HW observes WC writes in order, in case of multiple + * threads which use the same BF register in a serialized way + */ + ucs_memory_cpu_wc_fence(); + } else { + ucs_assert(wq->reg->mode == UCT_IB_MLX5_MMIO_MODE_DB); + *(volatile uint64_t*)dst = *(volatile uint64_t*)src; + ucs_memory_bus_store_fence(); + src = UCS_PTR_BYTE_OFFSET(src, num_bb * MLX5_SEND_WQE_BB); + src = uct_ib_mlx5_txwq_wrap_any(wq, src); + } + + /* We don't want the compiler to reorder instructions and hurt latency */ + ucs_compiler_fence(); + + /* + * Advance queue pointer. + * We return the number of BBs the *previous* WQE has consumed, since CQEs + * are reporting the index of the first BB rather than the last. We have + * reserved QP space for at least UCT_IB_MLX5_MAX_BB to accommodate. + * */ + ucs_assert(ctrl == wq->curr); + res_count = wq->sw_pi - wq->prev_sw_pi; + wq->curr = src; + wq->prev_sw_pi += res_count; + ucs_assert(wq->prev_sw_pi == wq->sw_pi); + wq->sw_pi = sw_pi; + + /* Flip BF register */ + wq->reg->addr.uint ^= UCT_IB_MLX5_BF_REG_SIZE; + return res_count; +} + + +static inline uct_ib_mlx5_srq_seg_t * +uct_ib_mlx5_srq_get_wqe(uct_ib_mlx5_srq_t *srq, uint16_t index) +{ + return UCS_PTR_BYTE_OFFSET(srq->buf, (index & srq->mask) * srq->stride); +} + +static inline uint16_t uct_ib_mlx5_calc_av_sport(uint32_t rqpn, uint32_t qpn) +{ + uint32_t flow_id = qpn ^ rqpn; + uint16_t sport = flow_id ^ (flow_id >> 16); + + return UCT_IB_MLX5_ROCE_SRC_PORT_MIN | sport; +} + +static inline void uct_ib_mlx5_iface_set_av_sport(uct_ib_iface_t *iface, + uct_ib_mlx5_base_av_t *av, + uint32_t rqpn, uint32_t qpn) +{ + if (!uct_ib_iface_is_roce(iface) || + (ntohs(av->rlid) >= UCT_IB_MLX5_ROCE_SRC_PORT_MIN)) { + return; + } + + av->rlid = htons(uct_ib_mlx5_calc_av_sport(qpn, rqpn)); +} + +static ucs_status_t UCS_F_MAYBE_UNUSED +uct_ib_mlx5_iface_fill_attr(uct_ib_iface_t *iface, + uct_ib_mlx5_qp_t *qp, + uct_ib_qp_attr_t *attr) +{ + ucs_status_t status; + + status = uct_ib_mlx5_iface_get_res_domain(iface, qp); + if (status) { + return status; + } + +#if HAVE_DECL_IBV_EXP_CREATE_QP + attr->ibv.comp_mask = IBV_EXP_QP_INIT_ATTR_PD; + attr->ibv.pd = uct_ib_iface_md(iface)->pd; +#elif HAVE_DECL_IBV_CREATE_QP_EX + attr->ibv.comp_mask = IBV_QP_INIT_ATTR_PD; + if (qp->verbs.rd->pd != NULL) { + attr->ibv.pd = qp->verbs.rd->pd; + } else { + attr->ibv.pd = uct_ib_iface_md(iface)->pd; + } +#endif + +#if HAVE_IBV_EXP_RES_DOMAIN + attr->ibv.comp_mask |= IBV_EXP_QP_INIT_ATTR_RES_DOMAIN; + attr->ibv.res_domain = qp->verbs.rd->ibv_domain; +#endif + + return UCS_OK; +} + +static void UCS_F_MAYBE_UNUSED +uct_ib_mlx5_destroy_qp(uct_ib_mlx5_qp_t *qp) +{ + switch (qp->type) { + case UCT_IB_MLX5_OBJ_TYPE_VERBS: + uct_ib_destroy_qp(qp->verbs.qp); + break; + case UCT_IB_MLX5_OBJ_TYPE_DEVX: + uct_ib_mlx5_devx_destroy_qp(qp); + break; + case UCT_IB_MLX5_OBJ_TYPE_LAST: + break; + } +} + +static ucs_status_t UCS_F_MAYBE_UNUSED +uct_ib_mlx5_modify_qp_state(uct_ib_mlx5_md_t *md, uct_ib_mlx5_qp_t *qp, + enum ibv_qp_state state) +{ + if (md->flags & UCT_IB_MLX5_MD_FLAG_DEVX) { + return uct_ib_mlx5_devx_modify_qp_state(qp, state); + } else { + return uct_ib_modify_qp(qp->verbs.qp, state); + } +} diff --git a/src/uct/ib/mlx5/ib_mlx5_log.c b/src/uct/ib/mlx5/ib_mlx5_log.c new file mode 100644 index 0000000..c5b659f --- /dev/null +++ b/src/uct/ib/mlx5/ib_mlx5_log.c @@ -0,0 +1,467 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ib_mlx5_log.h" + +#include +#include + + +static void uct_ib_mlx5_wqe_dump(uct_ib_iface_t *iface, void *wqe, void *qstart, + void *qend, int max_sge, int dump_qp, + uct_log_data_dump_func_t packet_dump_cb, + char *buffer, size_t max, uct_ib_log_sge_t *log_sge); + +static const char *uct_ib_mlx5_cqe_err_opcode(uct_ib_mlx5_err_cqe_t *ecqe) +{ + uint8_t wqe_err_opcode = ntohl(ecqe->s_wqe_opcode_qpn) >> 24; + + switch (ecqe->op_own >> 4) { + case MLX5_CQE_REQ_ERR: + switch (wqe_err_opcode) { + case MLX5_OPCODE_RDMA_WRITE_IMM: + case MLX5_OPCODE_RDMA_WRITE: + return "RDMA_WRITE"; + case MLX5_OPCODE_SEND_IMM: + case MLX5_OPCODE_SEND: + case MLX5_OPCODE_SEND_INVAL: + return "SEND"; + case MLX5_OPCODE_RDMA_READ: + return "RDMA_READ"; + case MLX5_OPCODE_ATOMIC_CS: + return "CSWAP"; + case MLX5_OPCODE_ATOMIC_FA: + return "FETCH_ADD"; + case MLX5_OPCODE_ATOMIC_MASKED_CS: + return "MASKED_CSWAP"; + case MLX5_OPCODE_ATOMIC_MASKED_FA: + return "MASKED_FETCH_ADD"; + default: + return ""; + } + case MLX5_CQE_RESP_ERR: + return "RECV"; + default: + return ""; + } +} + +ucs_status_t uct_ib_mlx5_completion_with_err(uct_ib_iface_t *iface, + uct_ib_mlx5_err_cqe_t *ecqe, + uct_ib_mlx5_txwq_t *txwq, + ucs_log_level_t log_level) +{ + ucs_status_t status = UCS_ERR_IO_ERROR; + char err_info[256] = {}; + char wqe_info[256] = {}; + uint16_t wqe_index; + uint32_t qp_num; + void *wqe; + + wqe_index = ntohs(ecqe->wqe_counter); + qp_num = ntohl(ecqe->s_wqe_opcode_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); + if (txwq != NULL) { + wqe_index %= UCS_PTR_BYTE_DIFF(txwq->qstart, txwq->qend) / MLX5_SEND_WQE_BB; + } + + if (ecqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { + ucs_trace("QP 0x%x wqe[%d] is flushed", qp_num, wqe_index); + return status; + } + + switch (ecqe->syndrome) { + case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: + snprintf(err_info, sizeof(err_info), "Local length"); + break; + case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR: + snprintf(err_info, sizeof(err_info), "Local QP operation"); + break; + case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: + snprintf(err_info, sizeof(err_info), "Local protection"); + break; + case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: + snprintf(err_info, sizeof(err_info), "WR flushed because QP in error state"); + break; + case MLX5_CQE_SYNDROME_MW_BIND_ERR: + snprintf(err_info, sizeof(err_info), "Memory window bind"); + break; + case MLX5_CQE_SYNDROME_BAD_RESP_ERR: + snprintf(err_info, sizeof(err_info), "Bad response"); + break; + case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR: + snprintf(err_info, sizeof(err_info), "Local access"); + break; + case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + snprintf(err_info, sizeof(err_info), "Invalid request"); + break; + case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: + snprintf(err_info, sizeof(err_info), "Remote access"); + break; + case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: + snprintf(err_info, sizeof(err_info), "Remote QP"); + break; + case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + snprintf(err_info, sizeof(err_info), "Transport retry count exceeded"); + status = UCS_ERR_ENDPOINT_TIMEOUT; + break; + case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + snprintf(err_info, sizeof(err_info), "Receive-no-ready retry count exceeded"); + status = UCS_ERR_ENDPOINT_TIMEOUT; + break; + case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: + snprintf(err_info, sizeof(err_info), "Remote side aborted"); + status = UCS_ERR_ENDPOINT_TIMEOUT; + break; + default: + snprintf(err_info, sizeof(err_info), "Generic"); + break; + } + + if ((txwq != NULL) && ((ecqe->op_own >> 4) == MLX5_CQE_REQ_ERR)) { + wqe = UCS_PTR_BYTE_OFFSET(txwq->qstart, MLX5_SEND_WQE_BB * wqe_index); + uct_ib_mlx5_wqe_dump(iface, wqe, txwq->qstart, txwq->qend, INT_MAX, 0, + NULL, wqe_info, sizeof(wqe_info) - 1, NULL); + } else { + snprintf(wqe_info, sizeof(wqe_info) - 1, "opcode %s", + uct_ib_mlx5_cqe_err_opcode(ecqe)); + } + + ucs_log(log_level, + "%s on "UCT_IB_IFACE_FMT"/%s (synd 0x%x vend 0x%x hw_synd %d/%d)\n" + "%s QP 0x%x wqe[%d]: %s", + err_info, UCT_IB_IFACE_ARG(iface), + uct_ib_iface_is_roce(iface) ? "RoCE" : "IB", + ecqe->syndrome, ecqe->vendor_err_synd, ecqe->hw_synd_type >> 4, + ecqe->hw_err_synd, uct_ib_qp_type_str(iface->config.qp_type), + qp_num, wqe_index, wqe_info); + return status; +} + +static unsigned uct_ib_mlx5_parse_dseg(void **dseg_p, void *qstart, void *qend, + struct ibv_sge *sg_list, int *index, + int *is_inline) +{ + struct mlx5_wqe_data_seg *dpseg; + struct mlx5_wqe_inl_data_seg *inl; + struct ibv_sge *sg = &sg_list[*index]; + int byte_count; + void *addr; + int ds; + + if (*dseg_p == qend) { + *dseg_p = qstart; + } + inl = *dseg_p; + if (inl->byte_count & htonl(MLX5_INLINE_SEG)) { + addr = inl + 1; + sg->addr = (uintptr_t)addr; + sg->lkey = 0; + byte_count = ntohl(inl->byte_count) & ~MLX5_INLINE_SEG; + if (UCS_PTR_BYTE_OFFSET(addr, byte_count) > qend) { + sg->length = UCS_PTR_BYTE_DIFF(addr, qend); + (sg + 1)->addr = (uintptr_t)qstart; + (sg + 1)->lkey = 0; + (sg + 1)->length = byte_count - sg->length; + ++(*index); + } else { + sg->length = byte_count; + } + *is_inline = 1; + ds = ucs_div_round_up(sizeof(*inl) + byte_count, + UCT_IB_MLX5_WQE_SEG_SIZE); + ++(*index); + } else { + dpseg = *dseg_p; + sg->addr = be64toh(dpseg->addr); + sg->length = ntohl(dpseg->byte_count); + sg->lkey = ntohl(dpseg->lkey); + *is_inline = 0; + ds = 1; + ++(*index); + } + + *dseg_p = UCS_PTR_BYTE_OFFSET(*dseg_p, ds * UCT_IB_MLX5_WQE_SEG_SIZE); + if (*dseg_p >= qend) { + *dseg_p = UCS_PTR_BYTE_OFFSET(*dseg_p, -UCS_PTR_BYTE_DIFF(qstart, qend)); + } + return ds; +} + +static uint64_t network_to_host(void *ptr, int size) +{ + if (size == 4) { + return ntohl(*(uint32_t*)ptr); + } else if (size == 8) { + return be64toh(*(uint64_t*)ptr); + } else { + return *(uint64_t*)ptr; + } +} +static size_t uct_ib_mlx5_dump_dgram(char *buf, size_t max, void *seg, int is_eth) +{ + struct mlx5_wqe_datagram_seg *dgseg = seg; + struct mlx5_base_av *base_av; + struct mlx5_grh_av *grh_av; + char gid_buf[32]; + int sgid_index; + char *p, *endp; + + p = buf; + endp = buf + max - 1; + base_av = mlx5_av_base(&dgseg->av); + + snprintf(p, endp - p, " [rqpn 0x%x", + ntohl(base_av->dqp_dct & ~UCT_IB_MLX5_EXTENDED_UD_AV)); + p += strlen(p); + + if (!is_eth) { + snprintf(p, endp - p, " rlid %d", ntohs(base_av->rlid)); + p += strlen(p); + } + + if (mlx5_av_base(&dgseg->av)->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV) { + grh_av = mlx5_av_grh(&dgseg->av); + if (is_eth || (grh_av->grh_gid_fl & UCT_IB_MLX5_AV_GRH_PRESENT)) { + if (is_eth) { + snprintf(p, endp - p, " rmac %02x:%02x:%02x:%02x:%02x:%02x", + grh_av->rmac[0], grh_av->rmac[1], grh_av->rmac[2], + grh_av->rmac[3], grh_av->rmac[4], grh_av->rmac[5]); + p += strlen(p); + } + + sgid_index = (htonl(grh_av->grh_gid_fl) >> 20) & UCS_MASK(8); + snprintf(p, endp - p, " sgix %d dgid %s tc %d]", sgid_index, + inet_ntop(AF_INET6, grh_av->rgid, gid_buf, sizeof(gid_buf)), + grh_av->tclass); + } else { + snprintf(p, endp - p, "]"); + } + return UCT_IB_MLX5_AV_FULL_SIZE; + } else { + snprintf(p, endp - p, "]"); + return UCT_IB_MLX5_AV_BASE_SIZE; + } +} + +static int uct_ib_mlx5_is_qp_require_av_seg(int qp_type) +{ + if (qp_type == IBV_QPT_UD) { + return 1; + } +#if HAVE_TL_DC + if (qp_type == UCT_IB_QPT_DCI) { + return 1; + } +#endif + return 0; +} + +static void uct_ib_mlx5_wqe_dump(uct_ib_iface_t *iface, void *wqe, void *qstart, + void *qend, int max_sge, int dump_qp, + uct_log_data_dump_func_t packet_dump_cb, + char *buffer, size_t max, uct_ib_log_sge_t *log_sge) +{ + static uct_ib_opcode_t opcodes[] = { + [MLX5_OPCODE_NOP] = { "NOP", 0 }, + [MLX5_OPCODE_RDMA_WRITE] = { "RDMA_WRITE", UCT_IB_OPCODE_FLAG_HAS_RADDR }, + [MLX5_OPCODE_RDMA_READ] = { "RDMA_READ", UCT_IB_OPCODE_FLAG_HAS_RADDR }, + [MLX5_OPCODE_SEND] = { "SEND", 0 }, + [MLX5_OPCODE_SEND_IMM] = { "SEND_IMM", 0 }, + [MLX5_OPCODE_ATOMIC_CS] = { "CSWAP", UCT_IB_OPCODE_FLAG_HAS_RADDR|UCT_IB_OPCODE_FLAG_HAS_ATOMIC }, + [MLX5_OPCODE_ATOMIC_FA] = { "FETCH_ADD", UCT_IB_OPCODE_FLAG_HAS_RADDR|UCT_IB_OPCODE_FLAG_HAS_ATOMIC }, + [MLX5_OPCODE_ATOMIC_MASKED_CS] = { "MASKED_CSWAP", + UCT_IB_OPCODE_FLAG_HAS_RADDR|UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC }, + [MLX5_OPCODE_ATOMIC_MASKED_FA] = { "MASKED_FETCH_ADD", + UCT_IB_OPCODE_FLAG_HAS_RADDR|UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC }, + }; + + struct mlx5_wqe_ctrl_seg *ctrl = wqe; + uint8_t opcode = ctrl->opmod_idx_opcode >> 24; + uint8_t opmod = ctrl->opmod_idx_opcode & 0xff; + uint32_t qp_num = ntohl(ctrl->qpn_ds) >> 8; + int ds = ctrl->qpn_ds >> 24; + uct_ib_opcode_t *op = &opcodes[opcode]; + char *s = buffer; + char *ends = buffer + max; + struct ibv_sge sg_list[16]; + uint64_t inline_bitmap; + int i, is_inline, is_eth; + size_t dg_size; + void *seg; + + /* QP and WQE index */ + if (dump_qp) { + snprintf(s, ends - s, "QP 0x%x [%03ld] ", qp_num, + UCS_PTR_BYTE_DIFF(qstart, wqe) / MLX5_SEND_WQE_BB); + s += strlen(s); + } + + /* Opcode and flags */ + uct_ib_log_dump_opcode(op, + ctrl->fm_ce_se & MLX5_WQE_CTRL_CQ_UPDATE, + ctrl->fm_ce_se & MLX5_WQE_CTRL_FENCE, + ctrl->fm_ce_se & (1 << 1), + s, ends - s); + s += strlen(s); + + /* Additional segments */ + --ds; + seg = ctrl + 1; + if (seg == qend) { + seg = qstart; + } + + if (uct_ib_mlx5_is_qp_require_av_seg(iface->config.qp_type)) { + is_eth = uct_ib_iface_is_roce(iface); + dg_size = uct_ib_mlx5_dump_dgram(s, ends - s, seg, is_eth); + s += strlen(s); + + seg = (char *)seg + dg_size; + ds -= ucs_div_round_up(dg_size, UCT_IB_MLX5_WQE_SEG_SIZE); + } + if (seg == qend) { + seg = qstart; + } + + /* Remote address segment */ + if (op->flags & UCT_IB_OPCODE_FLAG_HAS_RADDR) { + struct mlx5_wqe_raddr_seg *rseg = seg; + uct_ib_log_dump_remote_addr(be64toh(rseg->raddr), ntohl(rseg->rkey), s, ends - s); + s += strlen(s); + + --ds; + seg = rseg + 1; + if (seg == qend) { + seg = qstart; + } + } + + /* Atomic segment */ + if (op->flags & UCT_IB_OPCODE_FLAG_HAS_ATOMIC) { + struct mlx5_wqe_atomic_seg *atomic = seg; + if (opcode == MLX5_OPCODE_ATOMIC_FA) { + uct_ib_log_dump_atomic_fadd(be64toh(atomic->swap_add), s, ends - s); + } else if (opcode == MLX5_OPCODE_ATOMIC_CS) { + uct_ib_log_dump_atomic_cswap(be64toh(atomic->compare), + be64toh(atomic->swap_add), s, ends - s); + } + s += strlen(s); + + --ds; + seg = atomic + 1; + if (seg == qend) { + seg = qstart; + } + } + + /* Extended atomic segment */ + if (op->flags & UCT_IB_OPCODE_FLAG_HAS_EXT_ATOMIC) { + uint64_t add, boundary, compare, swap, compare_mask, swap_mask; + int size = 1 << ((opmod & 7) + 2); + + if (opcode == MLX5_OPCODE_ATOMIC_MASKED_FA) { + add = network_to_host(seg, size); + boundary = network_to_host(UCS_PTR_BYTE_OFFSET(seg, size), size); + seg = UCS_PTR_BYTE_OFFSET(seg, + ucs_align_up_pow2(size * 2, + UCT_IB_MLX5_WQE_SEG_SIZE)); + ds -= ucs_div_round_up(2 * size, UCT_IB_MLX5_WQE_SEG_SIZE); + + uct_ib_log_dump_atomic_masked_fadd(size, add, boundary, s, ends - s); + } else if (opcode == MLX5_OPCODE_ATOMIC_MASKED_CS) { + swap = network_to_host(seg, size); + compare = network_to_host(UCS_PTR_BYTE_OFFSET(seg, size), size); + + seg = UCS_PTR_BYTE_OFFSET(seg, size * 2); + if (seg == qend) { + seg = qstart; + } + + swap_mask = network_to_host(seg, size); + compare_mask = network_to_host(UCS_PTR_BYTE_OFFSET(seg, size), size); + seg = UCS_PTR_BYTE_OFFSET(seg, size * 2); + if (seg == qend) { + seg = qstart; + } + + ucs_assert(((size * 4) % UCT_IB_MLX5_WQE_SEG_SIZE) == 0); + ds -= size * 4; + + uct_ib_log_dump_atomic_masked_cswap(size, compare, compare_mask, swap, + swap_mask, s, ends - s); + } + s += strlen(s); + } + + /* Data segments*/ + if (log_sge == NULL) { + i = 0; + inline_bitmap = 0; + + while ((ds > 0) && (i < sizeof(sg_list) / sizeof(sg_list[0]))) { + ds -= uct_ib_mlx5_parse_dseg(&seg, qstart, qend, sg_list, &i, &is_inline); + if (is_inline) { + inline_bitmap |= UCS_BIT(i-1); + } + s += strlen(s); + } + } + + uct_ib_log_dump_sg_list(iface, UCT_AM_TRACE_TYPE_SEND, + log_sge ? log_sge->sg_list : sg_list, + log_sge ? log_sge->num_sge : ucs_min(i, max_sge), + log_sge ? log_sge->inline_bitmap : inline_bitmap, + packet_dump_cb, s, ends - s); +} + +void __uct_ib_mlx5_log_tx(const char *file, int line, const char *function, + uct_ib_iface_t *iface, void *wqe, void *qstart, + void *qend, int max_sge, uct_ib_log_sge_t *log_sge, + uct_log_data_dump_func_t packet_dump_cb) +{ + char buf[256] = {0}; + uct_ib_mlx5_wqe_dump(iface, wqe, qstart, qend, max_sge, 1, packet_dump_cb, + buf, sizeof(buf) - 1, log_sge); + uct_log_data(file, line, function, buf); +} + +void uct_ib_mlx5_cqe_dump(const char *file, int line, const char *function, struct mlx5_cqe64 *cqe) +{ + char buf[256] = {0}; + + snprintf(buf, sizeof(buf) - 1, + "CQE(op_own 0x%x) qp 0x%x sqp 0x%x slid %d bytes %d wqe_idx %d ", + (unsigned)cqe->op_own, + (unsigned)(ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER)), + (unsigned)(ntohl(cqe->flags_rqpn) & UCS_MASK(UCT_IB_QPN_ORDER)), + (unsigned)ntohs(cqe->slid), + (unsigned)ntohl(cqe->byte_cnt), + (unsigned)ntohs(cqe->wqe_counter)); + + uct_log_data(file, line, function, buf); +} + +void __uct_ib_mlx5_log_rx(const char *file, int line, const char *function, + uct_ib_iface_t *iface, struct mlx5_cqe64 *cqe, + void *data, uct_log_data_dump_func_t packet_dump_cb) +{ + char buf[256] = {0}; + size_t length; + + length = ntohl(cqe->byte_cnt); + if (iface->config.qp_type == IBV_QPT_UD) { + length -= UCT_IB_GRH_LEN; + data = UCS_PTR_BYTE_OFFSET(data, UCT_IB_GRH_LEN); + } + uct_ib_log_dump_recv_completion(iface, + ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER), + ntohl(cqe->flags_rqpn) & UCS_MASK(UCT_IB_QPN_ORDER), + ntohs(cqe->slid), + data, length, + packet_dump_cb, buf, sizeof(buf) - 1); + uct_log_data(file, line, function, buf); +} + diff --git a/src/uct/ib/mlx5/ib_mlx5_log.h b/src/uct/ib/mlx5/ib_mlx5_log.h new file mode 100644 index 0000000..02ee779 --- /dev/null +++ b/src/uct/ib/mlx5/ib_mlx5_log.h @@ -0,0 +1,57 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_IB_MLX5_LOG_H_ +#define UCT_IB_MLX5_LOG_H_ + +#include "ib_mlx5.h" + +#include + + +typedef struct uct_ib_log_sge { + int num_sge; + uint64_t inline_bitmap; + struct ibv_sge sg_list[2]; +} uct_ib_log_sge_t; + +ucs_status_t uct_ib_mlx5_completion_with_err(uct_ib_iface_t *iface, + uct_ib_mlx5_err_cqe_t *ecqe, + uct_ib_mlx5_txwq_t *txwq, + ucs_log_level_t log_level); + + +void __uct_ib_mlx5_log_tx(const char *file, int line, const char *function, + uct_ib_iface_t *iface, void *wqe, void *qstart, + void *qend, int max_sge, uct_ib_log_sge_t *log_sge, + uct_log_data_dump_func_t packet_dump_cb); + +void __uct_ib_mlx5_log_rx(const char *file, int line, const char *function, + uct_ib_iface_t *iface, struct mlx5_cqe64 *cqe, + void *data, uct_log_data_dump_func_t packet_dump_cb); + +void uct_ib_mlx5_cqe_dump(const char *file, int line, const char *function, + struct mlx5_cqe64 *cqe); + +#define uct_ib_mlx5_log_tx(_iface, _wqe, _qstart, _qend, _max_sge, _log_sge, _dump_cb) \ + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { \ + __uct_ib_mlx5_log_tx(__FILE__, __LINE__, __FUNCTION__, \ + _iface, _wqe, _qstart, _qend, _max_sge, _log_sge, _dump_cb); \ + } + +#define uct_ib_mlx5_log_rx(_iface, _cqe, _data, _dump_cb) \ + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { \ + __uct_ib_mlx5_log_rx(__FILE__, __LINE__, __FUNCTION__, \ + _iface, _cqe, _data, _dump_cb); \ + } + +#define uct_ib_mlx5_log_cqe(_cqe) \ + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { \ + uct_ib_mlx5_cqe_dump(__FILE__, __LINE__, __FUNCTION__, \ + cqe); \ + } + +#endif diff --git a/src/uct/ib/rc/accel/rc_mlx5.h b/src/uct/ib/rc/accel/rc_mlx5.h new file mode 100644 index 0000000..6216acd --- /dev/null +++ b/src/uct/ib/rc/accel/rc_mlx5.h @@ -0,0 +1,162 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_RC_MLX5_H +#define UCT_RC_MLX5_H + +#include "rc_mlx5_common.h" + +#include +#include +#include +#include +#include + + +#define UCT_RC_MLX5_CHECK_RES_PTR(_iface, _ep) \ + UCT_RC_CHECK_CQE_RET(&(_iface)->super, &(_ep)->super, \ + UCS_STATUS_PTR(UCS_ERR_NO_RESOURCE)) \ + UCT_RC_CHECK_TXQP_RET(&(_iface)->super, &(_ep)->super, \ + UCS_STATUS_PTR(UCS_ERR_NO_RESOURCE)) + + +/** + * RC remote endpoint + */ +typedef struct uct_rc_mlx5_ep { + uct_rc_ep_t super; + struct { + uct_ib_mlx5_txwq_t wq; + } tx; + uct_ib_mlx5_qp_t tm_qp; + uct_rc_mlx5_mp_context_t mp; + uint16_t atomic_mr_offset; +} uct_rc_mlx5_ep_t; + +typedef struct uct_rc_mlx5_ep_address { + uct_ib_uint24_t qp_num; + /* For RNDV TM enabling 2 QPs should be created, one is for sending WRs and + * another one for HW (device will use it for RDMA reads and sending RNDV + * Complete messages). */ + uct_ib_uint24_t tm_qp_num; + uint8_t atomic_mr_id; +} UCS_S_PACKED uct_rc_mlx5_ep_address_t; + +UCS_CLASS_DECLARE(uct_rc_mlx5_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_NEW_FUNC(uct_rc_mlx5_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_rc_mlx5_ep_t, uct_ep_t); + +void uct_rc_mlx5_iface_check_rx_completion(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe); + +ucs_status_t uct_rc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey); + +ssize_t uct_rc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, uct_rkey_t rkey); + +ucs_status_t uct_rc_mlx5_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_get_bcopy(uct_ep_h tl_ep, + uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, + const void *payload, unsigned length); + +ssize_t uct_rc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags); + +ucs_status_t uct_rc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_atomic64_post(uct_ep_h ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey); + +ucs_status_t uct_rc_mlx5_ep_atomic32_post(uct_ep_h ep, unsigned opcode, uint32_t value, + uint64_t remote_addr, uct_rkey_t rkey); + +ucs_status_t uct_rc_mlx5_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_fence(uct_ep_h tl_ep, unsigned flags); + +ucs_status_t uct_rc_mlx5_ep_flush(uct_ep_h tl_ep, unsigned flags, uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op, + uct_rc_fc_request_t *req); + +ucs_status_t uct_rc_mlx5_iface_create_qp(uct_rc_mlx5_iface_common_t *iface, + uct_ib_mlx5_qp_t *qp, + uct_ib_mlx5_txwq_t *txwq, + uct_ib_qp_attr_t *attr); + +ucs_status_t uct_rc_mlx5_ep_connect_to_ep(uct_ep_h tl_ep, + const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr); + +unsigned uct_rc_mlx5_iface_progress(void *arg); + +ucs_status_t uct_rc_mlx5_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag, + const void *data, size_t length); + +ssize_t uct_rc_mlx5_ep_tag_eager_bcopy(uct_ep_h tl_ep, uct_tag_t tag, + uint64_t imm, + uct_pack_callback_t pack_cb, + void *arg, unsigned flags); + +ucs_status_t uct_rc_mlx5_ep_tag_eager_zcopy(uct_ep_h tl_ep, uct_tag_t tag, + uint64_t imm, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp); + +ucs_status_ptr_t uct_rc_mlx5_ep_tag_rndv_zcopy(uct_ep_h tl_ep, uct_tag_t tag, + const void *header, + unsigned header_length, + const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_rc_mlx5_ep_tag_rndv_request(uct_ep_h tl_ep, uct_tag_t tag, + const void* header, + unsigned header_length, + unsigned flags); + +ucs_status_t uct_rc_mlx5_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr); + +ucs_status_t uct_rc_mlx5_ep_handle_failure(uct_rc_mlx5_ep_t *ep, + ucs_status_t status); + +ucs_status_t uct_rc_mlx5_ep_set_failed(uct_ib_iface_t *iface, uct_ep_h ep, + ucs_status_t status); + +#endif diff --git a/src/uct/ib/rc/accel/rc_mlx5.inl b/src/uct/ib/rc/accel/rc_mlx5.inl new file mode 100644 index 0000000..1334ec1 --- /dev/null +++ b/src/uct/ib/rc/accel/rc_mlx5.inl @@ -0,0 +1,1637 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "rc_mlx5.h" +#include "rc_mlx5_common.h" + +#include +#include + +#define UCT_RC_MLX5_EP_DECL(_tl_ep, _iface, _ep) \ + uct_rc_mlx5_ep_t *_ep = ucs_derived_of(_tl_ep, uct_rc_mlx5_ep_t); \ + uct_rc_mlx5_iface_common_t *_iface = ucs_derived_of(_tl_ep->iface, \ + uct_rc_mlx5_iface_common_t) + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_common_update_tx_res(uct_rc_iface_t *rc_iface, uct_ib_mlx5_txwq_t *txwq, + uct_rc_txqp_t *txqp, uint16_t hw_ci) +{ + uint16_t bb_num; + + bb_num = uct_ib_mlx5_txwq_update_bb(txwq, hw_ci) - uct_rc_txqp_available(txqp); + + /* Must always have positive number of released resources. The first completion + * will report bb_num=1 (because prev_sw_pi is initialized to -1) and all the rest + * report the amount of BBs the previous WQE has consumed. + */ + ucs_assertv(bb_num > 0, "hw_ci=%d prev_sw_pi=%d available=%d bb_num=%d", + hw_ci, txwq->prev_sw_pi, txqp->available, bb_num); + + uct_rc_txqp_available_add(txqp, bb_num); + ucs_assert(uct_rc_txqp_available(txqp) <= txwq->bb_max); + + rc_iface->tx.cq_available += bb_num; + ucs_assertv(rc_iface->tx.cq_available <= rc_iface->config.tx_cq_len, + "cq_available=%d tx_cq_len=%d bb_num=%d txwq=%p txqp=%p", + rc_iface->tx.cq_available, rc_iface->config.tx_cq_len, bb_num, + txwq, txqp); +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_txqp_process_tx_cqe(uct_rc_txqp_t *txqp, struct mlx5_cqe64 *cqe, + uint16_t hw_ci) +{ + if (cqe->op_own & MLX5_INLINE_SCATTER_32) { + uct_rc_txqp_completion_inl_resp(txqp, cqe, hw_ci); + } else if (cqe->op_own & MLX5_INLINE_SCATTER_64) { + uct_rc_txqp_completion_inl_resp(txqp, cqe - 1, hw_ci); + } else { + uct_rc_txqp_completion_desc(txqp, hw_ci); + } +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_common_rx_inline(uct_rc_mlx5_iface_common_t *iface, + uct_ib_iface_recv_desc_t *desc, + int stats_counter, unsigned byte_len) +{ + UCS_STATS_UPDATE_COUNTER(iface->stats, stats_counter, 1); + VALGRIND_MAKE_MEM_UNDEFINED(uct_ib_iface_recv_desc_hdr(&iface->super.super, desc), + byte_len); +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_srq_prefetch_setup(uct_rc_mlx5_iface_common_t *iface) +{ + unsigned wqe_ctr = iface->rx.srq.free_idx + 2; + uct_ib_mlx5_srq_seg_t *seg; + + seg = uct_ib_mlx5_srq_get_wqe(&iface->rx.srq, wqe_ctr); + iface->rx.pref_ptr = + uct_ib_iface_recv_desc_hdr(&iface->super.super, seg->srq.desc); +} + +static UCS_F_NOINLINE void +uct_rc_mlx5_iface_hold_srq_desc(uct_rc_mlx5_iface_common_t *iface, + uct_ib_mlx5_srq_seg_t *seg, + struct mlx5_cqe64 *cqe, uint16_t wqe_ctr, + ucs_status_t status, unsigned offset, + uct_recv_desc_t *release_desc) +{ + void *udesc; + int stride_idx; + int desc_offset; + + if (UCT_RC_MLX5_MP_ENABLED(iface)) { + /* stride_idx is valid in non inline CQEs only. + * We can assume that stride_idx is correct here, because CQE + * with data would always force upper layer to save the data and + * return UCS_OK from the corresponding callback. */ + stride_idx = uct_ib_mlx5_cqe_stride_index(cqe); + ucs_assert(stride_idx < iface->tm.mp.num_strides); + ucs_assert(!(cqe->op_own & (MLX5_INLINE_SCATTER_32 | + MLX5_INLINE_SCATTER_64))); + + udesc = (void*)be64toh(seg->dptr[stride_idx].addr); + desc_offset = offset - iface->super.super.config.rx_hdr_offset; + udesc = UCS_PTR_BYTE_OFFSET(udesc, desc_offset); + uct_recv_desc(udesc) = release_desc; + seg->srq.ptr_mask &= ~UCS_BIT(stride_idx); + } else { + udesc = UCS_PTR_BYTE_OFFSET(seg->srq.desc, offset); + uct_recv_desc(udesc) = release_desc; + seg->srq.ptr_mask &= ~1; + seg->srq.desc = NULL; + } +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_release_srq_seg(uct_rc_mlx5_iface_common_t *iface, + uct_ib_mlx5_srq_seg_t *seg, + struct mlx5_cqe64 *cqe, uint16_t wqe_ctr, + ucs_status_t status, unsigned offset, + uct_recv_desc_t *release_desc) +{ + uint16_t wqe_index; + int seg_free; + + /* Need to wrap wqe_ctr, because in case of cyclic srq topology + * it is wrapped around 0xFFFF regardless of real SRQ size. + * But it respects srq size when srq topology is a linked-list. */ + wqe_index = wqe_ctr & iface->rx.srq.mask; + + if (ucs_unlikely(status != UCS_OK)) { + uct_rc_mlx5_iface_hold_srq_desc(iface, seg, cqe, wqe_ctr, status, + offset, release_desc); + } + + if (UCT_RC_MLX5_MP_ENABLED(iface)) { + if (--seg->srq.strides) { + /* Segment can't be freed until all strides are consumed */ + return; + } + seg->srq.strides = iface->tm.mp.num_strides; + } + + seg_free = (seg->srq.ptr_mask == UCS_MASK(iface->tm.mp.num_strides)); + + if (ucs_likely(seg_free && (wqe_index == ((iface->rx.srq.ready_idx + 1) & + iface->rx.srq.mask)))) { + /* If the descriptor was not used - if there are no "holes", we can just + * reuse it on the receive queue. Otherwise, ready pointer will stay behind + * until post_recv allocated more descriptors from the memory pool, fills + * the holes, and moves it forward. + */ + ucs_assert(wqe_index == ((iface->rx.srq.free_idx + 1) & iface->rx.srq.mask)); + ++iface->rx.srq.ready_idx; + ++iface->rx.srq.free_idx; + } else { + if (wqe_index == ((iface->rx.srq.free_idx + 1) & iface->rx.srq.mask)) { + ++iface->rx.srq.free_idx; + } else { + /* Mark the segment as out-of-order, post_recv will advance free */ + seg->srq.free = 1; + } + } + + ++iface->super.rx.srq.available; +} + +#define uct_rc_mlx5_iface_mp_hash_lookup(_h_name, _h_ptr, _key, _last, _flags, \ + _iface) \ + ({ \ + uct_rc_mlx5_mp_context_t *ctx; \ + khiter_t h_it; \ + int ret; \ + h_it = kh_get(_h_name, _h_ptr, _key); \ + if (h_it == kh_end(_h_ptr)) { \ + /* No data from this sender - this must be the first fragment */ \ + *(_flags) |= UCT_CB_PARAM_FLAG_FIRST; \ + if (ucs_likely(_last)) { \ + /* fast path - single fragment message */ \ + return &(_iface)->tm.mp.last_frag_ctx; \ + } \ + h_it = kh_put(_h_name, _h_ptr, _key, &ret); \ + ucs_assert(ret != 0); \ + ctx = &kh_value(_h_ptr, h_it); \ + } else { \ + ctx = &kh_value(_h_ptr, h_it); \ + if (_last) { \ + (_iface)->tm.mp.last_frag_ctx = *ctx; \ + kh_del(_h_name, _h_ptr, h_it); \ + return &(_iface)->tm.mp.last_frag_ctx; \ + } \ + } \ + *(_flags) |= UCT_CB_PARAM_FLAG_MORE; \ + ctx; \ + }) + +static UCS_F_ALWAYS_INLINE uct_rc_mlx5_mp_context_t* +uct_rc_mlx5_iface_rx_mp_context_from_ep(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe, unsigned *flags) +{ + uint32_t qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); + uct_rc_mlx5_ep_t *ep = ucs_derived_of(uct_rc_iface_lookup_ep(&iface->super, + qp_num), + uct_rc_mlx5_ep_t); + ucs_assert(ep != NULL); + if (ep->mp.free) { + *flags |= UCT_CB_PARAM_FLAG_FIRST; + ep->mp.free = 0; + } + + if (cqe->byte_cnt & htonl(UCT_RC_MLX5_MP_RQ_LAST_MSG_FIELD)) { + ucs_assert(!ep->mp.free); + ep->mp.free = 1; + } else { + *flags |= UCT_CB_PARAM_FLAG_MORE; + } + + return &ep->mp; +} + +static UCS_F_ALWAYS_INLINE uct_rc_mlx5_mp_context_t* +uct_rc_mlx5_iface_rx_mp_context_from_hash(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe, + unsigned *flags) +{ + uct_rc_mlx5_mp_context_t *mp_ctx; + uct_rc_mlx5_mp_hash_key_t key_gid; + uint64_t key_lid; + void *gid; + int last; + + last = cqe->byte_cnt & htonl(UCT_RC_MLX5_MP_RQ_LAST_MSG_FIELD); + + if (uct_ib_mlx5_cqe_is_grh_present(cqe)) { + gid = uct_ib_mlx5_gid_from_cqe(cqe); + /* Use guid and QP as a key. No need to fetch just qp + * and convert to le. */ + key_gid.guid = *(uint64_t*)UCS_PTR_BYTE_OFFSET(gid, 8); + key_gid.qp_num = cqe->flags_rqpn; + mp_ctx = uct_rc_mlx5_iface_mp_hash_lookup(uct_rc_mlx5_mp_hash_gid, + &iface->tm.mp.hash_gid, + key_gid, last, flags, + iface); + } else { + /* Combine QP and SLID as a key. No need to fetch just qp + * and convert to le. */ + key_lid = (uint64_t)cqe->flags_rqpn << 32 | cqe->slid; + mp_ctx = uct_rc_mlx5_iface_mp_hash_lookup(uct_rc_mlx5_mp_hash_lid, + &iface->tm.mp.hash_lid, + key_lid, last, flags, + iface); + } + + ucs_assert(mp_ctx != NULL); + return mp_ctx; +} + +static UCS_F_ALWAYS_INLINE struct mlx5_cqe64* +uct_rc_mlx5_iface_poll_rx_cq(uct_rc_mlx5_iface_common_t *iface) +{ + uct_ib_mlx5_cq_t *cq = &iface->cq[UCT_IB_DIR_RX]; + struct mlx5_cqe64 *cqe; + unsigned index; + uint8_t op_own; + + /* Prefetch the descriptor if it was scheduled */ + ucs_prefetch(iface->rx.pref_ptr); + + index = cq->cq_ci; + cqe = uct_ib_mlx5_get_cqe(cq, index); + op_own = cqe->op_own; + + if (ucs_unlikely(uct_ib_mlx5_cqe_is_hw_owned(op_own, index, cq->cq_length))) { + return NULL; + } else if (ucs_unlikely(op_own & UCT_IB_MLX5_CQE_OP_OWN_ERR_MASK)) { + uct_rc_mlx5_iface_check_rx_completion(iface, cqe); + return NULL; + } + + cq->cq_ci = index + 1; + return cqe; /* TODO optimize - let complier know cqe is not null */ +} + +static UCS_F_ALWAYS_INLINE void* +uct_rc_mlx5_iface_common_data(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe, + unsigned byte_len, unsigned *flags) +{ + uct_ib_mlx5_srq_seg_t *seg; + uct_ib_iface_recv_desc_t *desc; + void *hdr; + + seg = uct_ib_mlx5_srq_get_wqe(&iface->rx.srq, ntohs(cqe->wqe_counter)); + desc = seg->srq.desc; + + /* Get a pointer to AM or Tag header (after which comes the payload) + * Support cases of inline scatter by pointing directly to CQE. + */ + if (cqe->op_own & MLX5_INLINE_SCATTER_32) { + hdr = cqe; + uct_rc_mlx5_iface_common_rx_inline(iface, desc, + UCT_RC_MLX5_IFACE_STAT_RX_INL_32, + byte_len); + *flags = 0; + } else if (cqe->op_own & MLX5_INLINE_SCATTER_64) { + hdr = cqe - 1; + uct_rc_mlx5_iface_common_rx_inline(iface, desc, + UCT_RC_MLX5_IFACE_STAT_RX_INL_64, + byte_len); + *flags = 0; + } else { + hdr = uct_ib_iface_recv_desc_hdr(&iface->super.super, desc); + VALGRIND_MAKE_MEM_DEFINED(hdr, byte_len); + *flags = UCT_CB_PARAM_FLAG_DESC; + /* Assuming that next packet likely will be non-inline, + * setup the next prefetch pointer + */ + uct_rc_mlx5_srq_prefetch_setup(iface); + } + + return hdr; +} + +static UCS_F_ALWAYS_INLINE void* +uct_rc_mlx5_iface_tm_common_data(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe, unsigned byte_len, + unsigned *flags, int poll_flags, + uct_rc_mlx5_mp_context_t **context_p) +{ + uct_ib_mlx5_srq_seg_t *seg; + void *hdr; + int stride_idx; + + if (!UCT_RC_MLX5_MP_ENABLED(iface)) { + /* uct_rc_mlx5_iface_common_data will initialize flags value */ + hdr = uct_rc_mlx5_iface_common_data(iface, cqe, byte_len, flags); + *flags |= UCT_CB_PARAM_FLAG_FIRST; + *context_p = &iface->tm.mp.last_frag_ctx; + return hdr; + } + + ucs_assert(byte_len <= UCT_RC_MLX5_MP_RQ_BYTE_CNT_FIELD_MASK); + *flags = 0; + + if (poll_flags & UCT_RC_MLX5_POLL_FLAG_HAS_EP) { + *context_p = uct_rc_mlx5_iface_rx_mp_context_from_ep(iface, cqe, flags); + } else { + *context_p = uct_rc_mlx5_iface_rx_mp_context_from_hash(iface, cqe, flags); + } + + /* Get a pointer to the tag header or the payload (if it is not the first + * fragment). */ + if (cqe->op_own & MLX5_INLINE_SCATTER_32) { + hdr = cqe; + uct_rc_mlx5_iface_common_rx_inline(iface, NULL, + UCT_RC_MLX5_IFACE_STAT_RX_INL_32, + byte_len); + } else if (cqe->op_own & MLX5_INLINE_SCATTER_64) { + hdr = cqe - 1; + uct_rc_mlx5_iface_common_rx_inline(iface, NULL, + UCT_RC_MLX5_IFACE_STAT_RX_INL_64, + byte_len); + } else { + *flags |= UCT_CB_PARAM_FLAG_DESC; + seg = uct_ib_mlx5_srq_get_wqe(&iface->rx.srq, ntohs(cqe->wqe_counter)); + stride_idx = uct_ib_mlx5_cqe_stride_index(cqe); + ucs_assert(stride_idx < iface->tm.mp.num_strides); + hdr = (void*)be64toh(seg->dptr[stride_idx].addr); + VALGRIND_MAKE_MEM_DEFINED(hdr, byte_len); + } + + return hdr; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_common_am_handler(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe, + uct_rc_mlx5_hdr_t *hdr, + unsigned flags, unsigned byte_len) +{ + uint16_t wqe_ctr; + uct_rc_iface_ops_t *rc_ops; + uct_ib_mlx5_srq_seg_t *seg; + uint32_t qp_num; + ucs_status_t status; + + wqe_ctr = ntohs(cqe->wqe_counter); + seg = uct_ib_mlx5_srq_get_wqe(&iface->rx.srq, wqe_ctr); + + uct_ib_mlx5_log_rx(&iface->super.super, cqe, hdr, + uct_rc_mlx5_common_packet_dump); + + if (ucs_unlikely(hdr->rc_hdr.am_id & UCT_RC_EP_FC_MASK)) { + qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); + rc_ops = ucs_derived_of(iface->super.super.ops, uct_rc_iface_ops_t); + + /* coverity[overrun-buffer-val] */ + status = rc_ops->fc_handler(&iface->super, qp_num, &hdr->rc_hdr, + byte_len - sizeof(*hdr), + cqe->imm_inval_pkey, cqe->slid, flags); + } else { + status = uct_iface_invoke_am(&iface->super.super.super, hdr->rc_hdr.am_id, + hdr + 1, byte_len - sizeof(*hdr), + flags); + } + + uct_rc_mlx5_iface_release_srq_seg(iface, seg, cqe, wqe_ctr, status, + iface->tm.am_desc.offset, + &iface->tm.am_desc.super); +} + +static UCS_F_ALWAYS_INLINE uint8_t +uct_rc_mlx5_ep_fm_cq_update(uct_rc_mlx5_iface_common_t *iface, + uct_ib_mlx5_txwq_t *txwq, int flag) +{ + uint8_t fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + + fm_ce_se |= uct_rc_ep_fm(&iface->super, &txwq->fi, flag); + + return fm_ce_se; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_common_post_send(uct_rc_mlx5_iface_common_t *iface, int qp_type, + uct_rc_txqp_t *txqp, uct_ib_mlx5_txwq_t *txwq, + uint8_t opcode, uint8_t opmod, uint8_t fm_ce_se, + size_t wqe_size, uct_ib_mlx5_base_av_t *av, + struct mlx5_grh_av *grh_av, uint32_t imm, int max_log_sge, + uct_ib_log_sge_t *log_sge) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + uint16_t res_count; + + ctrl = txwq->curr; + + if (opcode == MLX5_OPCODE_SEND_IMM) { + uct_ib_mlx5_set_ctrl_seg_with_imm(ctrl, txwq->sw_pi, opcode, opmod, + txwq->super.qp_num, fm_ce_se, wqe_size, + imm); + } else { + uct_ib_mlx5_set_ctrl_seg(ctrl, txwq->sw_pi, opcode, opmod, + txwq->super.qp_num, fm_ce_se, wqe_size); + } + + ucs_assert(qp_type == iface->super.super.config.qp_type); + +#if HAVE_TL_DC + if (qp_type == UCT_IB_QPT_DCI) { + uct_ib_mlx5_set_dgram_seg((void*)(ctrl + 1), av, grh_av, qp_type); + } +#endif + + uct_ib_mlx5_log_tx(&iface->super.super, ctrl, txwq->qstart, txwq->qend, + max_log_sge, log_sge, + ((opcode == MLX5_OPCODE_SEND) || (opcode == MLX5_OPCODE_SEND_IMM)) ? + uct_rc_mlx5_common_packet_dump : NULL); + + res_count = uct_ib_mlx5_post_send(txwq, ctrl, wqe_size); + if (fm_ce_se & MLX5_WQE_CTRL_CQ_UPDATE) { + txwq->sig_pi = txwq->prev_sw_pi; + } + +#if HAVE_TL_DC + if (qp_type == UCT_IB_QPT_DCI) { + txqp->available -= res_count; + return; + } +#endif + + uct_rc_txqp_posted(txqp, &iface->super, res_count, fm_ce_se & MLX5_WQE_CTRL_CQ_UPDATE); +} + + +/* + * Generic function that setups and posts WQE with inline segment + * Parameters which are not relevant to the opcode are ignored. + * + * +--------+-----+-------+--------+------------ + * SEND | CTRL | INL | am_id | am_hdr | payload ... + * +--------+-----+---+---+-+-------+----------- + * RDMA_WRITE | CTRL | RADDR | INL | payload ... + * +--------+---------+-----+------------------- + * + * CTRL is mlx5_wqe_ctrl_seg for RC and + * mlx5_wqe_ctrl_seg + mlx5_wqe_datagram_seg for DC + * + * NOTE: switch is optimized away during inlining because opcode + * is a compile time constant + */ +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_txqp_inline_post(uct_rc_mlx5_iface_common_t *iface, int qp_type, + uct_rc_txqp_t *txqp, uct_ib_mlx5_txwq_t *txwq, + unsigned opcode, const void *buffer, unsigned length, + /* SEND */ uint8_t am_id, uint64_t am_hdr, uint32_t imm_val_be, + /* RDMA */ uint64_t rdma_raddr, uct_rkey_t rdma_rkey, + /* AV */ uct_ib_mlx5_base_av_t *av, struct mlx5_grh_av *grh_av, + size_t av_size, unsigned fm_ce_se, int max_log_sge) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_raddr_seg *raddr; + struct mlx5_wqe_inl_data_seg *inl; + uct_rc_mlx5_am_short_hdr_t *am; + uct_rc_mlx5_hdr_t *rc_hdr; + size_t wqe_size, ctrl_av_size; + void *next_seg; + + ctrl = txwq->curr; + ctrl_av_size = sizeof(*ctrl) + av_size; + next_seg = UCS_PTR_BYTE_OFFSET(ctrl, ctrl_av_size); + next_seg = uct_ib_mlx5_txwq_wrap_exact(txwq, next_seg); + + switch (opcode) { + case MLX5_OPCODE_SEND_IMM: + /* Fall through to MLX5_OPCODE_SEND handler */ + case MLX5_OPCODE_SEND: + /* Set inline segment which has AM id, AM header, and AM payload */ + wqe_size = ctrl_av_size + sizeof(*inl) + sizeof(*am) + length; + inl = next_seg; + inl->byte_count = htonl((length + sizeof(*am)) | MLX5_INLINE_SEG); + am = (void*)(inl + 1); + am->am_hdr = am_hdr; + uct_rc_mlx5_am_hdr_fill(&am->rc_hdr, am_id); + uct_ib_mlx5_inline_copy(am + 1, buffer, length, txwq); + fm_ce_se |= uct_rc_iface_tx_moderation(&iface->super, txqp, MLX5_WQE_CTRL_CQ_UPDATE); + break; + + case MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW: + /* Send empty AM with just AM id (used by FC) */ + wqe_size = ctrl_av_size + sizeof(*inl) + sizeof(*rc_hdr); + inl = next_seg; + inl->byte_count = htonl(sizeof(*rc_hdr) | MLX5_INLINE_SEG); + rc_hdr = (void*)(inl + 1); + uct_rc_mlx5_am_hdr_fill(rc_hdr, am_id); + fm_ce_se |= uct_rc_iface_tx_moderation(&iface->super, txqp, MLX5_WQE_CTRL_CQ_UPDATE); + break; + + case MLX5_OPCODE_RDMA_WRITE: + /* Set RDMA segment */ + if (length == 0) { + wqe_size = ctrl_av_size + sizeof(*raddr); + } else { + wqe_size = ctrl_av_size + sizeof(*raddr) + sizeof(*inl) + length; + } + raddr = next_seg; + uct_ib_mlx5_ep_set_rdma_seg(raddr, rdma_raddr, rdma_rkey); + inl = uct_ib_mlx5_txwq_wrap_none(txwq, raddr + 1); + inl->byte_count = htonl(length | MLX5_INLINE_SEG); + uct_ib_mlx5_inline_copy(inl + 1, buffer, length, txwq); + fm_ce_se |= uct_rc_iface_tx_moderation(&iface->super, txqp, MLX5_WQE_CTRL_CQ_UPDATE); + break; + + case MLX5_OPCODE_NOP: + /* Empty inline segment */ + wqe_size = sizeof(*ctrl) + av_size; + inl = next_seg; + inl->byte_count = htonl(MLX5_INLINE_SEG); + fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE | MLX5_WQE_CTRL_FENCE; + break; + + default: + ucs_fatal("invalid send opcode"); + } + + uct_rc_mlx5_common_post_send(iface, qp_type, txqp, txwq, opcode, 0, fm_ce_se, + wqe_size, av, grh_av, imm_val_be, max_log_sge, NULL); +} + +/* + * Generic data-pointer posting function. + * Parameters which are not relevant to the opcode are ignored. + * + * +--------+-----+-------+ + * SEND | CTRL | INL | DPSEG | + * +--------+-----+---+---+----+ + * RDMA_WRITE | CTRL | RADDR | DPSEG | + * +--------+---------+--------+-------+ + * ATOMIC | CTRL | RADDR | ATOMIC | DPSEG | + * +--------+---------+--------+-------+ + * + * CTRL is mlx5_wqe_ctrl_seg for RC and + * mlx5_wqe_ctrl_seg + mlx5_wqe_datagram_seg for DC + * + * NOTE: switch is optimized away during inlining because opcode_flags + * is a compile time constant + */ +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_txqp_dptr_post(uct_rc_mlx5_iface_common_t *iface, int qp_type, + uct_rc_txqp_t *txqp, uct_ib_mlx5_txwq_t *txwq, + unsigned opcode_flags, const void *buffer, + unsigned length, uint32_t *lkey_p, + /* RDMA/ATOMIC */ uint64_t remote_addr, uct_rkey_t rkey, + /* ATOMIC */ uint64_t compare_mask, uint64_t compare, + /* ATOMIC */ uint64_t swap_mask, uint64_t swap_add, + /* AV */ uct_ib_mlx5_base_av_t *av, struct mlx5_grh_av *grh_av, + size_t av_size, uint8_t fm_ce_se, uint32_t imm_val_be, + int max_log_sge, uct_ib_log_sge_t *log_sge) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_raddr_seg *raddr; + struct mlx5_wqe_atomic_seg *atomic; + struct mlx5_wqe_data_seg *dptr; + struct uct_ib_mlx5_atomic_masked_cswap32_seg *masked_cswap32; + struct uct_ib_mlx5_atomic_masked_fadd32_seg *masked_fadd32; + struct uct_ib_mlx5_atomic_masked_cswap64_seg *masked_cswap64; + struct uct_ib_mlx5_atomic_masked_fadd64_seg *masked_fadd64; + size_t wqe_size, ctrl_av_size; + uint8_t opmod; + void *next_seg; + + if (!(fm_ce_se & MLX5_WQE_CTRL_CQ_UPDATE)) { + fm_ce_se |= uct_rc_iface_tx_moderation(&iface->super, txqp, MLX5_WQE_CTRL_CQ_UPDATE); + } + + opmod = 0; + ctrl = txwq->curr; + ctrl_av_size = sizeof(*ctrl) + av_size; + next_seg = UCS_PTR_BYTE_OFFSET(ctrl, ctrl_av_size); + next_seg = uct_ib_mlx5_txwq_wrap_exact(txwq, next_seg); + + switch (opcode_flags) { + case MLX5_OPCODE_SEND_IMM: /* Used by tag offload */ + case MLX5_OPCODE_SEND: + /* Data segment only */ + ucs_assert(length < (2ul << 30)); + + /* TODO: make proper check for all cases TM, MP, etc + * ucs_assert(length <= iface->super.super.config.seg_size); */ + + wqe_size = ctrl_av_size + sizeof(struct mlx5_wqe_data_seg); + uct_ib_mlx5_set_data_seg(next_seg, buffer, length, *lkey_p); + break; + + case MLX5_OPCODE_RDMA_READ: + fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + /* Fall through */ + case MLX5_OPCODE_RDMA_WRITE: + /* Set RDMA segment */ + fm_ce_se |= uct_rc_ep_fm(&iface->super, &txwq->fi, + (opcode_flags == MLX5_OPCODE_RDMA_READ) ? + iface->config.atomic_fence_flag : + iface->config.put_fence_flag); + + ucs_assert(length <= UCT_IB_MAX_MESSAGE_SIZE); + + raddr = next_seg; + uct_ib_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey); + + /* Data segment */ + if (length == 0) { + wqe_size = ctrl_av_size + sizeof(*raddr); + } else { + /* dptr cannot wrap, because ctrl+av could be either 2 or 4 segs */ + dptr = uct_ib_mlx5_txwq_wrap_none(txwq, raddr + 1); + wqe_size = ctrl_av_size + sizeof(*raddr) + sizeof(*dptr); + uct_ib_mlx5_set_data_seg(dptr, buffer, length, *lkey_p); + } + break; + + case MLX5_OPCODE_ATOMIC_FA: + case MLX5_OPCODE_ATOMIC_CS: + fm_ce_se |= uct_rc_mlx5_ep_fm_cq_update(iface, txwq, + iface->config.atomic_fence_flag); + ucs_assert(length == sizeof(uint64_t)); + raddr = next_seg; + uct_ib_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey); + + /* atomic cannot wrap, because ctrl+av could be either 2 or 4 segs */ + atomic = uct_ib_mlx5_txwq_wrap_none(txwq, raddr + 1); + if (opcode_flags == MLX5_OPCODE_ATOMIC_CS) { + atomic->compare = compare; + } + atomic->swap_add = swap_add; + + dptr = uct_ib_mlx5_txwq_wrap_exact(txwq, atomic + 1); + uct_ib_mlx5_set_data_seg(dptr, buffer, length, *lkey_p); + wqe_size = ctrl_av_size + sizeof(*raddr) + sizeof(*atomic) + + sizeof(*dptr); + break; + + case MLX5_OPCODE_ATOMIC_MASKED_CS: + fm_ce_se |= uct_rc_mlx5_ep_fm_cq_update(iface, txwq, + iface->config.atomic_fence_flag); + raddr = next_seg; + uct_ib_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey); + + switch (length) { + case sizeof(uint32_t): + opmod = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(2); + masked_cswap32 = uct_ib_mlx5_txwq_wrap_none(txwq, raddr + 1); + masked_cswap32->swap = swap_add; + masked_cswap32->compare = compare; + masked_cswap32->swap_mask = swap_mask; + masked_cswap32->compare_mask = compare_mask; + dptr = uct_ib_mlx5_txwq_wrap_exact(txwq, masked_cswap32 + 1); + wqe_size = ctrl_av_size + sizeof(*raddr) + + sizeof(*masked_cswap32) + sizeof(*dptr); + break; + case sizeof(uint64_t): + opmod = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(3); /* Ext. atomic, size 2**3 */ + masked_cswap64 = uct_ib_mlx5_txwq_wrap_none(txwq, raddr + 1); + masked_cswap64->swap = swap_add; + masked_cswap64->compare = compare; + + /* 2nd half of masked_cswap64 can wrap */ + masked_cswap64 = uct_ib_mlx5_txwq_wrap_exact(txwq, masked_cswap64 + 1); + masked_cswap64->swap = swap_mask; + masked_cswap64->compare = compare_mask; + + dptr = uct_ib_mlx5_txwq_wrap_exact(txwq, masked_cswap64 + 1); + wqe_size = ctrl_av_size + sizeof(*raddr) + + 2 * sizeof(*masked_cswap64) + sizeof(*dptr); + break; + default: + ucs_fatal("invalid atomic type length %d", length); + } + uct_ib_mlx5_set_data_seg(dptr, buffer, length, *lkey_p); + break; + + case MLX5_OPCODE_ATOMIC_MASKED_FA: + fm_ce_se |= uct_rc_mlx5_ep_fm_cq_update(iface, txwq, iface->config.atomic_fence_flag); + raddr = next_seg; + uct_ib_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey); + + switch (length) { + case sizeof(uint32_t): + opmod = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(2); + masked_fadd32 = uct_ib_mlx5_txwq_wrap_none(txwq, raddr + 1); + masked_fadd32->add = swap_add; + masked_fadd32->filed_boundary = compare; + + dptr = uct_ib_mlx5_txwq_wrap_exact(txwq, masked_fadd32 + 1); + wqe_size = ctrl_av_size + sizeof(*raddr) + + sizeof(*masked_fadd32) + sizeof(*dptr); + break; + case sizeof(uint64_t): + opmod = UCT_IB_MLX5_OPMOD_EXT_ATOMIC(3); /* Ext. atomic, size 2**3 */ + masked_fadd64 = uct_ib_mlx5_txwq_wrap_none(txwq, raddr + 1); + masked_fadd64->add = swap_add; + masked_fadd64->filed_boundary = compare; + + dptr = uct_ib_mlx5_txwq_wrap_exact(txwq, masked_fadd64 + 1); + wqe_size = ctrl_av_size + sizeof(*raddr) + + sizeof(*masked_fadd64) + sizeof(*dptr); + break; + default: + ucs_fatal("invalid atomic type length %d", length); + } + uct_ib_mlx5_set_data_seg(dptr, buffer, length, *lkey_p); + break; + + default: + ucs_fatal("invalid send opcode"); + } + + uct_rc_mlx5_common_post_send(iface, qp_type, txqp, txwq, + (opcode_flags & UCT_RC_MLX5_OPCODE_MASK), opmod, + fm_ce_se, wqe_size, av, grh_av, imm_val_be, + max_log_sge, log_sge); +} + +static UCS_F_ALWAYS_INLINE +void uct_rc_mlx5_txqp_dptr_post_iov(uct_rc_mlx5_iface_common_t *iface, int qp_type, + uct_rc_txqp_t *txqp, uct_ib_mlx5_txwq_t *txwq, + unsigned opcode_flags, + /* IOV */ const uct_iov_t *iov, size_t iovcnt, + /* SEND */ uint8_t am_id, const void *am_hdr, unsigned am_hdr_len, + /* RDMA */ uint64_t remote_addr, uct_rkey_t rkey, + /* TAG */ uct_tag_t tag, uint32_t app_ctx, uint32_t ib_imm_be, + /* AV */ uct_ib_mlx5_base_av_t *av, struct mlx5_grh_av *grh_av, + size_t av_size, uint8_t fm_ce_se, int max_log_sge) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_raddr_seg *raddr; + struct mlx5_wqe_data_seg *dptr; + struct mlx5_wqe_inl_data_seg *inl; + uct_rc_mlx5_hdr_t *rch; + unsigned wqe_size, inl_seg_size, ctrl_av_size; + void *next_seg; + + if (!(fm_ce_se & MLX5_WQE_CTRL_CQ_UPDATE)) { + fm_ce_se |= uct_rc_iface_tx_moderation(&iface->super, txqp, MLX5_WQE_CTRL_CQ_UPDATE); + } + + ctrl = txwq->curr; + ctrl_av_size = sizeof(*ctrl) + av_size; + next_seg = UCS_PTR_BYTE_OFFSET(ctrl, ctrl_av_size); + next_seg = uct_ib_mlx5_txwq_wrap_exact(txwq, next_seg); + + switch (opcode_flags) { + case MLX5_OPCODE_SEND: + inl_seg_size = ucs_align_up_pow2(sizeof(*inl) + sizeof(*rch) + am_hdr_len, + UCT_IB_MLX5_WQE_SEG_SIZE); + + ucs_assert(uct_iov_total_length(iov, iovcnt) + sizeof(*rch) + am_hdr_len <= + iface->super.super.config.seg_size); + + /* Inline segment with AM ID and header */ + inl = next_seg; + inl->byte_count = htonl((sizeof(*rch) + am_hdr_len) | MLX5_INLINE_SEG); + rch = (uct_rc_mlx5_hdr_t *)(inl + 1); + + uct_rc_mlx5_am_hdr_fill(rch, am_id); + uct_ib_mlx5_inline_copy(rch + 1, am_hdr, am_hdr_len, txwq); + + /* Data segment with payload */ + dptr = (struct mlx5_wqe_data_seg *)((char *)inl + inl_seg_size); + wqe_size = ctrl_av_size + inl_seg_size + + uct_ib_mlx5_set_data_seg_iov(txwq, dptr, iov, iovcnt); + + ucs_assert(wqe_size <= UCT_IB_MLX5_MAX_SEND_WQE_SIZE); + break; + +#if IBV_HW_TM + case MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_TM: + case MLX5_OPCODE_SEND_IMM|UCT_RC_MLX5_OPCODE_FLAG_TM: + inl_seg_size = ucs_align_up_pow2(sizeof(*inl) + sizeof(struct ibv_tmh), + UCT_IB_MLX5_WQE_SEG_SIZE); + inl = next_seg; + inl->byte_count = htonl(sizeof(struct ibv_tmh) | MLX5_INLINE_SEG); + dptr = uct_ib_mlx5_txwq_wrap_exact(txwq, (char *)inl + inl_seg_size); + wqe_size = ctrl_av_size + inl_seg_size + + uct_ib_mlx5_set_data_seg_iov(txwq, dptr, iov, iovcnt); + + uct_rc_mlx5_fill_tmh((struct ibv_tmh*)(inl + 1), tag, app_ctx, + IBV_TMH_EAGER); + ucs_assert(wqe_size <= UCT_IB_MLX5_MAX_SEND_WQE_SIZE); + break; +#endif + + case MLX5_OPCODE_RDMA_READ: + fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + /* Fall through */ + case MLX5_OPCODE_RDMA_WRITE: + /* Set RDMA segment */ + fm_ce_se |= uct_rc_ep_fm(&iface->super, &txwq->fi, + (opcode_flags == MLX5_OPCODE_RDMA_READ) ? + iface->config.atomic_fence_flag : + iface->config.put_fence_flag); + + ucs_assert(uct_iov_total_length(iov, iovcnt) <= UCT_IB_MAX_MESSAGE_SIZE); + + raddr = next_seg; + uct_ib_mlx5_ep_set_rdma_seg(raddr, remote_addr, rkey); + + /* Data segment */ + wqe_size = ctrl_av_size + sizeof(*raddr) + + uct_ib_mlx5_set_data_seg_iov(txwq, (void*)(raddr + 1), + iov, iovcnt); + break; + + default: + ucs_fatal("invalid send opcode"); + } + + uct_rc_mlx5_common_post_send(iface, qp_type, txqp, txwq, + opcode_flags & UCT_RC_MLX5_OPCODE_MASK, + 0, fm_ce_se, wqe_size, av, grh_av, ib_imm_be, + max_log_sge, NULL); +} + +#if IBV_HW_TM +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_set_tm_seg(uct_ib_mlx5_txwq_t *txwq, + uct_rc_mlx5_wqe_tm_seg_t *tmseg, int op, int index, + uint32_t unexp_cnt, uint64_t tag, uint64_t mask, + unsigned tm_flags) +{ + tmseg->sw_cnt = htons(unexp_cnt); + tmseg->opcode = op << 4; + tmseg->flags = tm_flags; + + if (op == UCT_RC_MLX5_TM_OPCODE_NOP) { + return; + } + + tmseg->index = htons(index); + + if (op == UCT_RC_MLX5_TM_OPCODE_REMOVE) { + return; + } + + tmseg->append_tag = tag; + tmseg->append_mask = mask; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_release_tag_entry(uct_rc_mlx5_iface_common_t *iface, + uct_rc_mlx5_tag_entry_t *tag) +{ + if (!--tag->num_cqes) { + tag->next = NULL; + iface->tm.tail->next = tag; + iface->tm.tail = tag; + } +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_add_cmd_wq_op(uct_rc_mlx5_iface_common_t *iface, + uct_rc_mlx5_tag_entry_t *tag) +{ + uct_rc_mlx5_srq_op_t *op; + + op = iface->tm.cmd_wq.ops + + (iface->tm.cmd_wq.ops_tail++ & iface->tm.cmd_wq.ops_mask); + op->tag = tag; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_txqp_tag_inline_post(uct_rc_mlx5_iface_common_t *iface, int qp_type, + uct_rc_txqp_t *txqp, uct_ib_mlx5_txwq_t *txwq, + unsigned opcode, const void *buffer, unsigned length, + const uct_iov_t *iov, /* relevant for RNDV */ + uct_tag_t tag, uint32_t app_ctx, int tm_op, + uint32_t imm_val_be, uct_ib_mlx5_base_av_t *av, + struct mlx5_grh_av *grh_av, size_t av_size, + void *ravh, size_t ravh_len, unsigned fm_ce_se) +{ + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_inl_data_seg *inl; + size_t wqe_size, ctrl_av_size; + struct ibv_tmh *tmh; + struct ibv_rvh rvh; + unsigned tmh_data_len; + size_t tm_hdr_len; + void UCS_V_UNUSED *ravh_ptr; + void *data; + + ctrl = txwq->curr; + ctrl_av_size = sizeof(*ctrl) + av_size; /* can be 16, 32 or 64 bytes */ + inl = uct_ib_mlx5_txwq_wrap_exact(txwq, (char*)ctrl + ctrl_av_size); + tmh = (struct ibv_tmh*)(inl + 1); + + ucs_assert((opcode == MLX5_OPCODE_SEND_IMM) || (opcode == MLX5_OPCODE_SEND)); + + switch (tm_op) { + case IBV_TMH_EAGER: + wqe_size = ctrl_av_size + sizeof(*inl) + sizeof(*tmh) + length; + inl->byte_count = htonl((length + sizeof(*tmh)) | MLX5_INLINE_SEG); + data = tmh + 1; + tmh_data_len = 0; + break; + + case IBV_TMH_RNDV: + /* RVH can be wrapped */ + uct_rc_mlx5_fill_rvh(&rvh, iov->buffer, + ((uct_ib_mem_t*)iov->memh)->rkey, iov->length); + uct_ib_mlx5_inline_copy(tmh + 1, &rvh, sizeof(rvh), txwq); + + tm_hdr_len = sizeof(*tmh) + sizeof(rvh); +#if HAVE_TL_DC + if (qp_type == UCT_IB_QPT_DCI) { + /* RAVH can be wrapped as well */ + ravh_ptr = uct_ib_mlx5_txwq_wrap_data(txwq, (char*)tmh + + sizeof(*tmh) + sizeof(rvh)); + uct_ib_mlx5_inline_copy(ravh_ptr, ravh, ravh_len, txwq); + tm_hdr_len += ravh_len; + } +#endif + + tmh_data_len = uct_rc_mlx5_fill_tmh_priv_data(tmh, buffer, length, + iface->tm.max_rndv_data); + length -= tmh_data_len; /* Note: change length func parameter */ + wqe_size = ctrl_av_size + sizeof(*inl) + tm_hdr_len + length; + inl->byte_count = htonl((length + tm_hdr_len) | MLX5_INLINE_SEG); + data = uct_ib_mlx5_txwq_wrap_data(txwq, (char*)tmh + tm_hdr_len); + + break; + + default: + ucs_fatal("Invalid tag opcode: %d", tm_op); + break; + } + + ucs_assert(wqe_size <= UCT_IB_MLX5_MAX_SEND_WQE_SIZE); + + uct_rc_mlx5_fill_tmh(tmh, tag, app_ctx, tm_op); + + /* In case of RNDV first bytes of data could be stored in TMH */ + uct_ib_mlx5_inline_copy(data, (char*)buffer + tmh_data_len, length, txwq); + fm_ce_se |= uct_rc_iface_tx_moderation(&iface->super, txqp, MLX5_WQE_CTRL_CQ_UPDATE); + + uct_rc_mlx5_common_post_send(iface, qp_type, txqp, txwq, opcode, 0, fm_ce_se, + wqe_size, av, grh_av, imm_val_be, INT_MAX, NULL); +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_common_post_srq_op(uct_rc_mlx5_cmd_wq_t *cmd_wq, + unsigned extra_wqe_size, unsigned op_code, + uint16_t next_idx, unsigned unexp_cnt, + uct_tag_t tag, uct_tag_t tag_mask, + unsigned tm_flags) +{ + uct_ib_mlx5_txwq_t *txwq = &cmd_wq->super; + struct mlx5_wqe_ctrl_seg *ctrl = txwq->curr; /* 16 bytes */ + uct_rc_mlx5_wqe_tm_seg_t *tm; /* 32 bytes */ + unsigned wqe_size; + + wqe_size = sizeof(*ctrl) + sizeof(*tm) + extra_wqe_size; + + tm = uct_ib_mlx5_txwq_wrap_none(txwq, ctrl + 1); + + uct_ib_mlx5_set_ctrl_seg(ctrl, txwq->sw_pi, UCT_RC_MLX5_OPCODE_TAG_MATCHING, + 0, txwq->super.qp_num, 0, wqe_size); + + uct_rc_mlx5_set_tm_seg(txwq, tm, op_code, next_idx, unexp_cnt, + tag, tag_mask, tm_flags); + + uct_ib_mlx5_post_send(txwq, ctrl, wqe_size); +} + + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_rc_mlx5_iface_common_tag_recv(uct_rc_mlx5_iface_common_t *iface, + uct_tag_t tag, + uct_tag_t tag_mask, const uct_iov_t *iov, + size_t iovcnt, uct_tag_context_t *ctx) +{ + uct_rc_mlx5_ctx_priv_t *priv = uct_rc_mlx5_ctx_priv(ctx); + uct_ib_mlx5_txwq_t *txwq = &iface->tm.cmd_wq.super; + struct mlx5_wqe_data_seg *dptr; /* 16 bytes */ + uct_rc_mlx5_tag_entry_t *tag_entry; + uint16_t next_idx; + unsigned ctrl_size; + int ret; + + UCT_CHECK_IOV_SIZE(iovcnt, 1ul, "uct_rc_mlx5_iface_common_tag_recv"); + UCT_RC_MLX5_CHECK_TAG(iface); + + kh_put(uct_rc_mlx5_tag_addrs, &iface->tm.tag_addrs, iov->buffer, &ret); + if (ucs_unlikely(ret == 0)) { + /* Do not post the same buffer more than once (even with different tags) + * to avoid memory corruption. */ + return UCS_ERR_ALREADY_EXISTS; + } + ucs_assert(ret > 0); + + ctrl_size = sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(uct_rc_mlx5_wqe_tm_seg_t); + tag_entry = iface->tm.head; + next_idx = tag_entry->next - iface->tm.list; + iface->tm.head = tag_entry->next; + tag_entry->next = NULL; + tag_entry->ctx = ctx; + tag_entry->num_cqes = 2; /* ADD and MSG_ARRIVED/CANCELED */ + + /* Save aux data (which will be needed in the following ops) in the context */ + priv->tag_handle = tag_entry - iface->tm.list; + priv->tag = tag; + priv->buffer = iov->buffer; /* Only one iov is supported so far */ + priv->length = iov->length; + + uct_rc_mlx5_add_cmd_wq_op(iface, tag_entry); + + dptr = uct_ib_mlx5_txwq_wrap_none(txwq, (char*)txwq->curr + ctrl_size); + uct_ib_mlx5_set_data_seg(dptr, iov->buffer, iov->length, + ((uct_ib_mem_t *)(iov->memh))->lkey); + + uct_rc_mlx5_iface_common_post_srq_op(&iface->tm.cmd_wq, sizeof(*dptr), + UCT_RC_MLX5_TM_OPCODE_APPEND, next_idx, + iface->tm.unexpected_cnt, tag, + tag_mask, + UCT_RC_MLX5_SRQ_FLAG_TM_CQE_REQ | + UCT_RC_MLX5_SRQ_FLAG_TM_SW_CNT); + + UCT_RC_MLX5_TM_STAT(iface, LIST_ADD); + + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_tag_del_from_hash(uct_rc_mlx5_iface_common_t *iface, + void *buffer) +{ + khiter_t iter; + + iter = kh_get(uct_rc_mlx5_tag_addrs, &iface->tm.tag_addrs, buffer); + ucs_assert(iter != kh_end(&iface->tm.tag_addrs)); + kh_del(uct_rc_mlx5_tag_addrs, &iface->tm.tag_addrs, iter); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_rc_mlx5_iface_common_tag_recv_cancel(uct_rc_mlx5_iface_common_t *iface, + uct_tag_context_t *ctx, int force) +{ + uct_rc_mlx5_ctx_priv_t *priv = uct_rc_mlx5_ctx_priv(ctx); + uint16_t index = priv->tag_handle; + uct_rc_mlx5_tag_entry_t *tag_entry; + unsigned flags; + + tag_entry = &iface->tm.list[index]; + + if (ucs_likely(force)) { + flags = UCT_RC_MLX5_SRQ_FLAG_TM_SW_CNT; + uct_rc_mlx5_release_tag_entry(iface, tag_entry); + uct_rc_mlx5_iface_tag_del_from_hash(iface, priv->buffer); + } else { + flags = UCT_RC_MLX5_SRQ_FLAG_TM_CQE_REQ | UCT_RC_MLX5_SRQ_FLAG_TM_SW_CNT; + uct_rc_mlx5_add_cmd_wq_op(iface, tag_entry); + } + + uct_rc_mlx5_iface_common_post_srq_op(&iface->tm.cmd_wq, 0, + UCT_RC_MLX5_TM_OPCODE_REMOVE, index, + iface->tm.unexpected_cnt, 0ul, 0ul, + flags); + + UCT_RC_MLX5_TM_STAT(iface, LIST_DEL); + + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_handle_tm_list_op(uct_rc_mlx5_iface_common_t *iface, int opcode) +{ + uct_rc_mlx5_cmd_wq_t *cmd_wq; + uct_rc_mlx5_srq_op_t *op; + uct_tag_context_t *ctx; + uct_rc_mlx5_ctx_priv_t *priv; + + cmd_wq = &iface->tm.cmd_wq; + op = cmd_wq->ops + (cmd_wq->ops_head++ & cmd_wq->ops_mask); + uct_rc_mlx5_release_tag_entry(iface, op->tag); + + if (opcode == UCT_RC_MLX5_CQE_APP_OP_TM_REMOVE) { + ctx = op->tag->ctx; + priv = uct_rc_mlx5_ctx_priv(ctx); + uct_rc_mlx5_iface_tag_del_from_hash(iface, priv->buffer); + ctx->completed_cb(ctx, priv->tag, 0, priv->length, UCS_ERR_CANCELED); + } +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_tag_consumed(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe, int opcode) +{ + struct ibv_tmh *tmh = (struct ibv_tmh*)cqe; + uct_rc_mlx5_tag_entry_t *tag; + uct_tag_context_t *ctx; + uct_rc_mlx5_ctx_priv_t *priv; + + /* coverity[tainted_data] */ + tag = &iface->tm.list[ntohs(cqe->app_info)]; + ctx = tag->ctx; + ctx->tag_consumed_cb(ctx); + + if (opcode == UCT_RC_MLX5_CQE_APP_OP_TM_CONSUMED) { + /* Need to save TMH info, which will be used when + * UCT_RC_MLX5_CQE_APP_OP_TM_EXPECTED CQE is received */ + priv = uct_rc_mlx5_ctx_priv(ctx); + priv->tag = tmh->tag; + priv->app_ctx = tmh->app_ctx; + } +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_handle_expected(uct_rc_mlx5_iface_common_t *iface, struct mlx5_cqe64 *cqe, + uint64_t tag, uint32_t app_ctx) +{ + uint64_t imm_data; + uct_rc_mlx5_tag_entry_t *tag_entry; + uct_tag_context_t *ctx; + uct_rc_mlx5_ctx_priv_t *priv; + unsigned byte_len; + + /* coverity[tainted_data] */ + tag_entry = &iface->tm.list[ntohs(cqe->app_info)]; + ctx = tag_entry->ctx; + priv = uct_rc_mlx5_ctx_priv(tag_entry->ctx); + /* Tag expected CQEs use all bits of byte_cnt even if MP XRQ is configured */ + byte_len = ntohl(cqe->byte_cnt); + + uct_rc_mlx5_release_tag_entry(iface, tag_entry); + uct_rc_mlx5_iface_tag_del_from_hash(iface, priv->buffer); + + if (cqe->op_own & MLX5_INLINE_SCATTER_64) { + ucs_assert(byte_len <= priv->length); + memcpy(priv->buffer, cqe - 1, byte_len); + } else { + VALGRIND_MAKE_MEM_DEFINED(priv->buffer, byte_len); + } + + imm_data = uct_rc_mlx5_tag_imm_data_unpack(cqe->imm_inval_pkey, app_ctx, + (cqe->op_own >> 4) == + MLX5_CQE_RESP_SEND_IMM); + + if (UCT_RC_MLX5_TM_IS_SW_RNDV(cqe, imm_data)) { + ctx->rndv_cb(ctx, tag, priv->buffer, byte_len, UCS_OK); + UCT_RC_MLX5_TM_STAT(iface, RX_RNDV_REQ_EXP); + } else { + ctx->completed_cb(ctx, tag, imm_data, byte_len, UCS_OK); + UCT_RC_MLX5_TM_STAT(iface, RX_EXP); + } +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_unexp_consumed(uct_rc_mlx5_iface_common_t *iface, + unsigned offset, uct_recv_desc_t *release_desc, + struct mlx5_cqe64 *cqe, ucs_status_t status, + uint16_t wqe_ctr) +{ + uct_ib_mlx5_srq_seg_t *seg; + + seg = uct_ib_mlx5_srq_get_wqe(&iface->rx.srq, wqe_ctr); + + uct_rc_mlx5_iface_release_srq_seg(iface, seg, cqe, wqe_ctr, + status, offset, release_desc); + + if (ucs_unlikely(!(iface->tm.unexpected_cnt % IBV_DEVICE_MAX_UNEXP_COUNT))) { + uct_rc_mlx5_iface_common_post_srq_op(&iface->tm.cmd_wq, 0, + UCT_RC_MLX5_TM_OPCODE_NOP, 0, + iface->tm.unexpected_cnt, 0ul, 0ul, + UCT_RC_MLX5_SRQ_FLAG_TM_SW_CNT); + + UCT_RC_MLX5_TM_STAT(iface, LIST_SYNC); + } +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_iface_tag_handle_unexp(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe, unsigned byte_len, + int poll_flags) +{ + struct ibv_tmh *tmh; + uint64_t imm_data; + ucs_status_t status; + unsigned flags; + uct_rc_mlx5_mp_context_t *msg_ctx; + + tmh = uct_rc_mlx5_iface_tm_common_data(iface, cqe, byte_len, &flags, + poll_flags, &msg_ctx); + + /* Fast path: single fragment eager message */ + if (ucs_likely(UCT_RC_MLX5_SINGLE_FRAG_MSG(flags) && + (tmh->opcode == IBV_TMH_EAGER) && + !UCT_RC_MLX5_TM_CQE_WITH_IMM(cqe))) { + status = iface->tm.eager_unexp.cb(iface->tm.eager_unexp.arg, tmh + 1, + byte_len - sizeof(*tmh), flags, + tmh->tag, 0, &msg_ctx->context); + + ++iface->tm.unexpected_cnt; + uct_rc_mlx5_iface_unexp_consumed(iface, iface->tm.eager_desc.offset, + &iface->tm.eager_desc.super, cqe, + status, ntohs(cqe->wqe_counter)); + + UCT_RC_MLX5_TM_STAT(iface, RX_EAGER_UNEXP); + return; + + } + + if (ucs_unlikely(!(flags & UCT_CB_PARAM_FLAG_FIRST))) { + /* Either middle or last fragment. Can pass zero tag, because it was + * already provided in the first fragment. If it is last fragment and + * CQE contains immediate value, construct user's immediate data using + * imm value and TMH->app_ctx (saved in message context when the first + * message arrived). Note, in case of send with immediate, only last + * fragment CQE contains immediate data. */ + ucs_assert(!UCT_RC_MLX5_TM_CQE_WITH_IMM(cqe) || + !(flags & UCT_CB_PARAM_FLAG_MORE)); + imm_data = uct_rc_mlx5_tag_imm_data_unpack(cqe->imm_inval_pkey, + msg_ctx->app_ctx, + UCT_RC_MLX5_TM_CQE_WITH_IMM(cqe)); + status = iface->tm.eager_unexp.cb(iface->tm.eager_unexp.arg, tmh, + byte_len, flags, msg_ctx->tag, + imm_data, &msg_ctx->context); + + /* Do not increase unexpected_cnt count here, because it is counter per + * message rather than per every fragment */ + uct_rc_mlx5_iface_unexp_consumed(iface, + iface->super.super.config.rx_headroom_offset, + &iface->super.super.release_desc, + cqe, status, ntohs(cqe->wqe_counter)); + return; + } + + ++iface->tm.unexpected_cnt; + + if (ucs_unlikely(tmh->opcode == IBV_TMH_RNDV)) { + uct_rc_mlx5_handle_unexp_rndv(iface, tmh, tmh->tag, cqe, flags, byte_len); + return; + } + + ucs_assertv_always(tmh->opcode == IBV_TMH_EAGER, + "Unsupported packet arrived %d", tmh->opcode); + + /* Eager sync only, eager sync first or eager first. CQE can contain + immediate value if it is eager sync only or sw rndv messages */ + imm_data = uct_rc_mlx5_tag_imm_data_unpack(cqe->imm_inval_pkey, + tmh->app_ctx, + UCT_RC_MLX5_TM_CQE_WITH_IMM(cqe)); + + if (UCT_RC_MLX5_TM_CQE_WITH_IMM(cqe) && !imm_data) { + ucs_assert(UCT_RC_MLX5_SINGLE_FRAG_MSG(flags)); + /* Opcode is WITH_IMM, but imm_data is 0 - this must be SW RNDV */ + status = iface->tm.rndv_unexp.cb(iface->tm.rndv_unexp.arg, 0, tmh->tag, + tmh + 1, byte_len - sizeof(*tmh), + 0ul, 0, NULL); + + UCT_RC_MLX5_TM_STAT(iface, RX_RNDV_REQ_UNEXP); + } else { + + /* Save app_context to assemble eager immediate data when the last + fragment arrives (and contains imm value) */ + msg_ctx->app_ctx = tmh->app_ctx; + + /* Save tag to pass it with non-first fragments */ + msg_ctx->tag = tmh->tag; + + status = iface->tm.eager_unexp.cb(iface->tm.eager_unexp.arg, + tmh + 1, byte_len - sizeof(*tmh), + flags, tmh->tag, imm_data, + &msg_ctx->context); + + UCT_RC_MLX5_TM_STAT(iface, RX_EAGER_UNEXP); + } + + uct_rc_mlx5_iface_unexp_consumed(iface, iface->tm.eager_desc.offset, + &iface->tm.eager_desc.super, cqe, + status, ntohs(cqe->wqe_counter)); +} + +static UCS_F_NOINLINE void +uct_rc_mlx5_iface_handle_filler_cqe(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe) +{ + uct_ib_mlx5_srq_seg_t *seg; + + /* filler CQE is relevant for MP XRQ only */ + ucs_assert_always(UCT_RC_MLX5_MP_ENABLED(iface)); + + seg = uct_ib_mlx5_srq_get_wqe(&iface->rx.srq, ntohs(cqe->wqe_counter)); + + /* at least one stride should be in HW ownership when filler CQE arrives */ + ucs_assert(seg->srq.strides); + uct_rc_mlx5_iface_release_srq_seg(iface, seg, cqe, ntohs(cqe->wqe_counter), + UCS_OK, 0, NULL); +} +#endif /* IBV_HW_TM */ + +static UCS_F_ALWAYS_INLINE unsigned +uct_rc_mlx5_iface_common_poll_rx(uct_rc_mlx5_iface_common_t *iface, + int poll_flags) +{ + uct_ib_mlx5_srq_seg_t UCS_V_UNUSED *seg; + struct mlx5_cqe64 *cqe; + unsigned byte_len; + uint16_t max_batch; + unsigned count; + void *rc_hdr; + unsigned flags; +#if IBV_HW_TM + struct ibv_tmh *tmh; + uct_rc_mlx5_tag_entry_t *tag; + uct_tag_context_t *ctx; + uct_rc_mlx5_ctx_priv_t *priv; + uct_rc_mlx5_mp_context_t UCS_V_UNUSED *dummy_ctx; +#endif + + ucs_assert(uct_ib_mlx5_srq_get_wqe(&iface->rx.srq, + iface->rx.srq.mask)->srq.next_wqe_index == 0); + + cqe = uct_rc_mlx5_iface_poll_rx_cq(iface); + if (cqe == NULL) { + /* If no CQE - post receives */ + count = 0; + goto done; + } + + ucs_memory_cpu_load_fence(); + UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_RX_COMPLETION, 1); + + byte_len = ntohl(cqe->byte_cnt) & UCT_RC_MLX5_MP_RQ_BYTE_CNT_FIELD_MASK; + count = 1; + + if (!(poll_flags & UCT_RC_MLX5_POLL_FLAG_TM)) { + rc_hdr = uct_rc_mlx5_iface_common_data(iface, cqe, byte_len, &flags); + uct_rc_mlx5_iface_common_am_handler(iface, cqe, rc_hdr, flags, byte_len); + goto done; + } + +#if IBV_HW_TM + ucs_assert(cqe->app == UCT_RC_MLX5_CQE_APP_TAG_MATCHING); + + if (ucs_unlikely(byte_len & UCT_RC_MLX5_MP_RQ_FILLER_CQE)) { + /* TODO: Check if cqe->app_op is valid for filler CQE. Then this check + * could be done for specific CQE types only. */ + uct_rc_mlx5_iface_handle_filler_cqe(iface, cqe); + count = 0; + goto done; + } + + /* Should be a fast path, because small (latency-critical) messages + * are not supposed to be offloaded to the HW. */ + if (ucs_likely(cqe->app_op == UCT_RC_MLX5_CQE_APP_OP_TM_UNEXPECTED)) { + uct_rc_mlx5_iface_tag_handle_unexp(iface, cqe, byte_len, poll_flags); + goto done; + } + + switch (cqe->app_op) { + case UCT_RC_MLX5_CQE_APP_OP_TM_APPEND: + uct_rc_mlx5_iface_handle_tm_list_op(iface, + UCT_RC_MLX5_CQE_APP_OP_TM_APPEND); + break; + + case UCT_RC_MLX5_CQE_APP_OP_TM_REMOVE: + uct_rc_mlx5_iface_handle_tm_list_op(iface, + UCT_RC_MLX5_CQE_APP_OP_TM_REMOVE); + break; + + case UCT_RC_MLX5_CQE_APP_OP_TM_NO_TAG: + /* TODO: optimize */ + tmh = uct_rc_mlx5_iface_tm_common_data(iface, cqe, byte_len, &flags, + poll_flags, &dummy_ctx); + + /* With MP XRQ, AM can be single-fragment only */ + ucs_assert(UCT_RC_MLX5_SINGLE_FRAG_MSG(flags)); + + if (tmh->opcode == IBV_TMH_NO_TAG) { + uct_rc_mlx5_iface_common_am_handler(iface, cqe, + (uct_rc_mlx5_hdr_t*)tmh, + flags, byte_len); + } else { + ucs_assert(tmh->opcode == IBV_TMH_FIN); + uct_rc_mlx5_handle_rndv_fin(iface, tmh->app_ctx); + seg = uct_ib_mlx5_srq_get_wqe(&iface->rx.srq, + ntohs(cqe->wqe_counter)); + + uct_rc_mlx5_iface_release_srq_seg(iface, seg, cqe, + ntohs(cqe->wqe_counter), UCS_OK, + 0, NULL); + + UCT_RC_MLX5_TM_STAT(iface, RX_RNDV_FIN); + } + break; + + case UCT_RC_MLX5_CQE_APP_OP_TM_CONSUMED: + uct_rc_mlx5_iface_tag_consumed(iface, cqe, + UCT_RC_MLX5_CQE_APP_OP_TM_CONSUMED); + break; + + case UCT_RC_MLX5_CQE_APP_OP_TM_CONSUMED_MSG: + tmh = (struct ibv_tmh*)cqe; + + uct_rc_mlx5_iface_tag_consumed(iface, cqe, + UCT_RC_MLX5_CQE_APP_OP_TM_CONSUMED_MSG); + + uct_rc_mlx5_iface_handle_expected(iface, cqe, tmh->tag, tmh->app_ctx); + break; + + case UCT_RC_MLX5_CQE_APP_OP_TM_EXPECTED: + /* coverity[tainted_data] */ + tag = &iface->tm.list[ntohs(cqe->app_info)]; + ctx = tag->ctx; + priv = uct_rc_mlx5_ctx_priv(ctx); + uct_rc_mlx5_iface_handle_expected(iface, cqe, priv->tag, priv->app_ctx); + break; + + default: + ucs_fatal("Unsupported packet arrived %d", cqe->app_op); + break; + } +#endif + +done: + max_batch = iface->super.super.config.rx_max_batch; + if (ucs_unlikely(iface->super.rx.srq.available >= max_batch)) { + uct_rc_mlx5_iface_srq_post_recv(iface); + } + return count; +} + +#if HAVE_IBV_DM +/* DM memory should be written by 8 bytes to eliminate + * processor cache issues. To make this used uct_rc_mlx5_dm_copy_data_t + * datatype where first hdr_len bytes are filled by message header + * and tail is filled by head of message. */ +static void UCS_F_ALWAYS_INLINE +uct_rc_mlx5_iface_common_copy_to_dm(uct_rc_mlx5_dm_copy_data_t *cache, size_t hdr_len, + const void *payload, size_t length, void *dm, + uct_ib_log_sge_t *log_sge) +{ + typedef uint64_t misaligned_t UCS_V_ALIGNED(1); + + uint64_t padding = 0; /* init by 0 to suppress valgrind error */ + size_t head = (cache && hdr_len) ? ucs_min(length, sizeof(*cache) - hdr_len) : 0; + size_t body = ucs_align_down(length - head, sizeof(padding)); + size_t tail = length - (head + body); + char *dst = dm; + int i = 0; + + ucs_assert(sizeof(*cache) >= hdr_len); + ucs_assert(head + body + tail == length); + ucs_assert(tail < sizeof(padding)); + + /* copy head of payload to tail of cache */ + memcpy(cache->bytes + hdr_len, payload, head); + + UCS_STATIC_ASSERT(sizeof(*cache) == sizeof(cache->bytes)); + UCS_STATIC_ASSERT(sizeof(log_sge->sg_list) / sizeof(log_sge->sg_list[0]) >= 2); + + /* condition is static-evaluated */ + if (cache && hdr_len) { + /* atomically by 8 bytes copy data to DM */ + /* cache buffer must be aligned, so, source data type is aligned */ + UCS_WORD_COPY(volatile uint64_t, dst, uint64_t, cache->bytes, sizeof(cache->bytes)); + dst += sizeof(cache->bytes); + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { + log_sge->sg_list[0].addr = (uint64_t)cache; + log_sge->sg_list[0].length = (uint32_t)hdr_len; + i++; + } + } + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { + log_sge->sg_list[i].addr = (uint64_t)payload; + log_sge->sg_list[i].length = (uint32_t)length; + i++; + } + log_sge->num_sge = i; + + /* copy payload to DM */ + UCS_WORD_COPY(volatile uint64_t, dst, misaligned_t, + UCS_PTR_BYTE_OFFSET(payload, head), body); + if (tail) { + dst += body; + memcpy(&padding, UCS_PTR_BYTE_OFFSET(payload, head + body), tail); + /* use uint64_t for source datatype because it is aligned buffer on stack */ + UCS_WORD_COPY(volatile uint64_t, dst, uint64_t, &padding, sizeof(padding)); + } +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_rc_mlx5_common_dm_make_data(uct_rc_mlx5_iface_common_t *iface, + uct_rc_mlx5_dm_copy_data_t *cache, + size_t hdr_len, const void *payload, + unsigned length, + uct_rc_iface_send_desc_t **desc_p, + void **buffer_p, uct_ib_log_sge_t *log_sge) +{ + uct_rc_iface_send_desc_t *desc; + void *buffer; + + ucs_assert(iface->dm.dm != NULL); + ucs_assert(log_sge != NULL); + + desc = ucs_mpool_get_inline(&iface->dm.dm->mp); + if (ucs_unlikely(desc == NULL)) { + /* in case if no resources available - fallback to bcopy */ + UCT_RC_IFACE_GET_TX_DESC(&iface->super, &iface->super.tx.mp, desc); + desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; + buffer = desc + 1; + + /* condition is static-evaluated, no performance penalty */ + if (cache && hdr_len) { + memcpy(buffer, cache->bytes, hdr_len); + } + memcpy(UCS_PTR_BYTE_OFFSET(buffer, hdr_len), payload, length); + log_sge->num_sge = 0; + } else { + /* desc must be partially initialized by mpool. + * hint to valgrind to make it defined */ + VALGRIND_MAKE_MEM_DEFINED(desc, sizeof(*desc)); + ucs_assert(desc->super.buffer != NULL); + buffer = (void*)UCS_PTR_BYTE_DIFF(iface->dm.dm->start_va, desc->super.buffer); + + uct_rc_mlx5_iface_common_copy_to_dm(cache, hdr_len, payload, + length, desc->super.buffer, log_sge); + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { + log_sge->sg_list[0].lkey = log_sge->sg_list[1].lkey = desc->lkey; + log_sge->inline_bitmap = 0; + } + } + + *desc_p = desc; + *buffer_p = buffer; + return UCS_OK; +} +#endif + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_rc_mlx5_iface_common_atomic_data(unsigned opcode, unsigned size, uint64_t value, + int *op, uint64_t *compare_mask, uint64_t *compare, + uint64_t *swap_mask, uint64_t *swap, int *ext) +{ + ucs_assert((size == sizeof(uint64_t)) || (size == sizeof(uint32_t))); + + switch (opcode) { + case UCT_ATOMIC_OP_ADD: + switch (size) { + case sizeof(uint64_t): + *op = MLX5_OPCODE_ATOMIC_FA; + *ext = 0; + break; + case sizeof(uint32_t): + *op = MLX5_OPCODE_ATOMIC_MASKED_FA; + *ext = 1; + break; + default: + ucs_assertv(0, "incorrect atomic size: %d", size); + return UCS_ERR_INVALID_PARAM; + } + *compare_mask = 0; + *compare = 0; + *swap_mask = 0; + *swap = UCT_RC_MLX5_TO_BE(value, size); + break; + case UCT_ATOMIC_OP_AND: + *op = MLX5_OPCODE_ATOMIC_MASKED_CS; + *compare_mask = 0; + *compare = 0; + *swap_mask = UCT_RC_MLX5_TO_BE(~value, size); + *swap = UCT_RC_MLX5_TO_BE(value, size); + *ext = 1; + break; + case UCT_ATOMIC_OP_OR: + *op = MLX5_OPCODE_ATOMIC_MASKED_CS; + *compare_mask = 0; + *compare = 0; + *swap_mask = UCT_RC_MLX5_TO_BE(value, size); + *swap = UCT_RC_MLX5_TO_BE(value, size); + *ext = 1; + break; + case UCT_ATOMIC_OP_XOR: + *op = MLX5_OPCODE_ATOMIC_MASKED_FA; + *compare_mask = 0; + *compare = UINT64_MAX; + *swap_mask = 0; + *swap = UCT_RC_MLX5_TO_BE(value, size); + *ext = 1; + break; + case UCT_ATOMIC_OP_SWAP: + *op = MLX5_OPCODE_ATOMIC_MASKED_CS; + *compare_mask = 0; + *compare = 0; + *swap_mask = UINT64_MAX; + *swap = UCT_RC_MLX5_TO_BE(value, size); + *ext = 1; + break; + default: + ucs_assertv(0, "incorrect atomic opcode: %d", opcode); + return UCS_ERR_UNSUPPORTED; + } + return UCS_OK; +} + diff --git a/src/uct/ib/rc/accel/rc_mlx5_common.c b/src/uct/ib/rc/accel/rc_mlx5_common.c new file mode 100644 index 0000000..fa565d7 --- /dev/null +++ b/src/uct/ib/rc/accel/rc_mlx5_common.c @@ -0,0 +1,1064 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#include "rc_mlx5.inl" + +#include +#include +#include + + +#if HAVE_DEVX +static const char *uct_rc_mlx5_srq_topo_names[] = { + [UCT_RC_MLX5_SRQ_TOPO_LIST] = "list", + [UCT_RC_MLX5_SRQ_TOPO_CYCLIC] = "cyclic", + [UCT_RC_MLX5_SRQ_TOPO_AUTO] = "auto", + [UCT_RC_MLX5_SRQ_TOPO_LAST] = NULL +}; +#endif + + +ucs_config_field_t uct_rc_mlx5_common_config_table[] = { + {"IB_", "", NULL, + ucs_offsetof(uct_rc_mlx5_iface_common_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_ib_mlx5_iface_config_table)}, + + {"TX_MAX_BB", "-1", + "Limits the number of outstanding WQE building blocks. The actual limit is\n" + "a minimum between this value and the number of building blocks in the TX QP.\n" + "-1 means no limit.", + ucs_offsetof(uct_rc_mlx5_iface_common_config_t, tx_max_bb), UCS_CONFIG_TYPE_UINT}, + + {"TM_ENABLE", "n", + "Enable HW tag matching", + ucs_offsetof(uct_rc_mlx5_iface_common_config_t, tm.enable), UCS_CONFIG_TYPE_BOOL}, + + {"TM_LIST_SIZE", "1024", + "Limits the number of tags posted to the HW for matching. The actual limit \n" + "is a minimum between this value and the maximum value supported by the HW. \n" + "-1 means no limit.", + ucs_offsetof(uct_rc_mlx5_iface_common_config_t, tm.list_size), UCS_CONFIG_TYPE_UINT}, + + {"TM_SEG_SIZE", "48k", + "Maximal size of copy-out sends when tag-matching offload is enabled.", + ucs_offsetof(uct_rc_mlx5_iface_common_config_t, tm.seg_size), + UCS_CONFIG_TYPE_MEMUNITS}, + + {"TM_NUM_STRIDES", "auto", + "Number of strides used per single receive WQE for hardware tag-matching\n" + "unexpected messages. Can be 1, 8 or 16 only.", + ucs_offsetof(uct_rc_mlx5_iface_common_config_t, tm.mp_num_strides), + UCS_CONFIG_TYPE_ULUNITS}, + + {"TM_MAX_BCOPY", NULL, "", + ucs_offsetof(uct_rc_mlx5_iface_common_config_t, tm.seg_size), + UCS_CONFIG_TYPE_MEMUNITS}, + + {"EXP_BACKOFF", "0", + "Exponential Backoff Timeout Multiplier. ACK timeout will be multiplied \n" + "by 2^EXP_BACKOFF every consecutive retry.", + ucs_offsetof(uct_rc_mlx5_iface_common_config_t, exp_backoff), + UCS_CONFIG_TYPE_UINT}, + +#if HAVE_DEVX + {"SRQ_TOPO", "auto", + "SRQ topology type. The types are:\n" + "\n" + "list SRQ is organized as a buffer containing linked list of WQEs.\n" + "\n" + "cyclic SRQ is organized as a continuos array of WQEs.\n" + " Supported with tag offload only.\n" + "\n" + "auto The most optimal SRQ topology is selected automatically.", + ucs_offsetof(uct_rc_mlx5_iface_common_config_t, srq_topo), + UCS_CONFIG_TYPE_ENUM(uct_rc_mlx5_srq_topo_names)}, +#endif + + {NULL} +}; + + +unsigned uct_rc_mlx5_iface_srq_post_recv(uct_rc_mlx5_iface_common_t *iface) +{ + uct_ib_mlx5_srq_t *srq = &iface->rx.srq; + uct_rc_iface_t *rc_iface = &iface->super; + uct_ib_mlx5_srq_seg_t *seg; + uct_ib_iface_recv_desc_t *desc; + uint16_t count, index, next_index; + uint64_t desc_map; + void *hdr; + int i; + + /* Make sure the union is right */ + UCS_STATIC_ASSERT(ucs_offsetof(uct_ib_mlx5_srq_seg_t, mlx5_srq.next_wqe_index) == + ucs_offsetof(uct_ib_mlx5_srq_seg_t, srq.next_wqe_index)); + UCS_STATIC_ASSERT(ucs_offsetof(uct_ib_mlx5_srq_seg_t, dptr) == + sizeof(struct mlx5_wqe_srq_next_seg)); + + ucs_assert(UCS_CIRCULAR_COMPARE16(srq->ready_idx, <=, srq->free_idx)); + + index = srq->ready_idx; + for (;;) { + next_index = index + 1; + seg = uct_ib_mlx5_srq_get_wqe(srq, next_index); + if (UCS_CIRCULAR_COMPARE16(next_index, >, srq->free_idx)) { + if (!seg->srq.free) { + break; + } + + ucs_assert(next_index == (uint16_t)(srq->free_idx + 1)); + seg->srq.free = 0; + srq->free_idx = next_index; + } + + desc_map = ~seg->srq.ptr_mask & UCS_MASK(iface->tm.mp.num_strides); + ucs_for_each_bit(i, desc_map) { + UCT_TL_IFACE_GET_RX_DESC(&rc_iface->super.super, &rc_iface->rx.mp, + desc, goto out); + + /* Set receive data segment pointer. Length is pre-initialized. */ + hdr = uct_ib_iface_recv_desc_hdr(&rc_iface->super, desc); + seg->srq.ptr_mask |= UCS_BIT(i); + seg->srq.desc = desc; /* Optimization for non-MP case (1 stride) */ + seg->dptr[i].lkey = htonl(desc->lkey); + seg->dptr[i].addr = htobe64((uintptr_t)hdr); + VALGRIND_MAKE_MEM_NOACCESS(hdr, rc_iface->super.config.seg_size); + } + + index = next_index; + } + +out: + count = index - srq->sw_pi; + ucs_assert(rc_iface->rx.srq.available >= count); + + if (count > 0) { + srq->ready_idx = index; + srq->sw_pi = index; + rc_iface->rx.srq.available -= count; + ucs_memory_cpu_store_fence(); + *srq->db = htonl(srq->sw_pi); + ucs_assert(uct_ib_mlx5_srq_get_wqe(srq, srq->mask)->srq.next_wqe_index == 0); + } + return count; +} + +void uct_rc_mlx5_iface_common_prepost_recvs(uct_rc_mlx5_iface_common_t *iface) +{ + iface->super.rx.srq.available = iface->super.rx.srq.quota; + iface->super.rx.srq.quota = 0; + uct_rc_mlx5_iface_srq_post_recv(iface); +} + +#define UCT_RC_MLX5_DEFINE_ATOMIC_LE_HANDLER(_bits) \ + void \ + uct_rc_mlx5_common_atomic##_bits##_le_handler(uct_rc_iface_send_op_t *op, \ + const void *resp) \ + { \ + uct_rc_iface_send_desc_t *desc = ucs_derived_of(op, uct_rc_iface_send_desc_t); \ + uint##_bits##_t *dest = desc->super.buffer; \ + const uint##_bits##_t *value = resp; \ + \ + VALGRIND_MAKE_MEM_DEFINED(value, sizeof(*value)); \ + if (resp == (desc + 1)) { \ + *dest = *value; /* response in desc buffer */ \ + } else if (_bits == 32) { \ + *dest = ntohl(*value); /* response in CQE as 32-bit value */ \ + } else if (_bits == 64) { \ + *dest = be64toh(*value); /* response in CQE as 64-bit value */ \ + } \ + \ + uct_invoke_completion(desc->super.user_comp, UCS_OK); \ + ucs_mpool_put(desc); \ + } + +UCT_RC_MLX5_DEFINE_ATOMIC_LE_HANDLER(32) +UCT_RC_MLX5_DEFINE_ATOMIC_LE_HANDLER(64) + +#if IBV_HW_TM +# if ENABLE_STATS +static ucs_stats_class_t uct_rc_mlx5_tag_stats_class = { + .name = "tag", + .num_counters = UCT_RC_MLX5_STAT_TAG_LAST, + .counter_names = { + [UCT_RC_MLX5_STAT_TAG_RX_EXP] = "rx_exp", + [UCT_RC_MLX5_STAT_TAG_RX_EAGER_UNEXP] = "rx_unexp_eager", + [UCT_RC_MLX5_STAT_TAG_RX_RNDV_UNEXP] = "rx_unexp_rndv", + [UCT_RC_MLX5_STAT_TAG_RX_RNDV_REQ_EXP] = "rx_exp_rndv_req", + [UCT_RC_MLX5_STAT_TAG_RX_RNDV_REQ_UNEXP] = "rx_unexp_rndv_req", + [UCT_RC_MLX5_STAT_TAG_RX_RNDV_FIN] = "rx_rndv_fin", + [UCT_RC_MLX5_STAT_TAG_LIST_ADD] = "tx_add_op", + [UCT_RC_MLX5_STAT_TAG_LIST_DEL] = "tx_del_op", + [UCT_RC_MLX5_STAT_TAG_LIST_SYNC] = "tx_sync_op" + } +}; +# endif + + +static ucs_status_t UCS_F_MAYBE_UNUSED +uct_rc_mlx5_devx_create_cmd_qp(uct_rc_mlx5_iface_common_t *iface) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.super.super.md, + uct_ib_mlx5_md_t); + uct_ib_device_t *dev = &md->super.dev; + struct ibv_ah_attr ah_attr = {}; + uct_ib_qp_attr_t attr = {}; + ucs_status_t status; + + ucs_assert(iface->tm.cmd_wq.super.super.type == UCT_IB_MLX5_OBJ_TYPE_LAST); + + attr.cap.max_send_wr = iface->tm.cmd_qp_len; + attr.cap.max_send_sge = 1; + attr.ibv.pd = md->super.pd; + attr.ibv.send_cq = iface->super.super.cq[UCT_IB_DIR_RX]; + attr.ibv.recv_cq = iface->super.super.cq[UCT_IB_DIR_RX]; + attr.srq_num = iface->rx.srq.srq_num; + attr.port = dev->first_port; + status = uct_ib_mlx5_devx_create_qp(&iface->super.super, + &iface->tm.cmd_wq.super.super, + &iface->tm.cmd_wq.super, + &attr); + if (status != UCS_OK) { + return status; + } + + ah_attr.is_global = 1; + ah_attr.grh.dgid = iface->super.super.gid; + ah_attr.dlid = uct_ib_device_port_attr(dev, attr.port)->lid; + ah_attr.port_num = dev->first_port; + status = uct_rc_mlx5_iface_common_devx_connect_qp( + iface, &iface->tm.cmd_wq.super.super, + iface->tm.cmd_wq.super.super.qp_num, &ah_attr); + if (status != UCS_OK) { + goto err_destroy_qp; + } + + return UCS_OK; + +err_destroy_qp: + uct_ib_mlx5_devx_destroy_qp(&iface->tm.cmd_wq.super.super); + return status; +} + +static struct ibv_qp * UCS_F_MAYBE_UNUSED +uct_rc_mlx5_verbs_create_cmd_qp(uct_rc_mlx5_iface_common_t *iface) +{ + uct_ib_md_t *md = uct_ib_iface_md(&iface->super.super); + struct ibv_qp_init_attr qp_init_attr = {}; + struct ibv_qp_attr qp_attr = {}; + uct_ib_device_t *ibdev = &md->dev; + struct ibv_port_attr *port_attr; + ucs_status_t status; + struct ibv_qp *qp; + uint8_t port_num; + int ret; + + port_num = ibdev->first_port; + port_attr = uct_ib_device_port_attr(ibdev, port_num); + + status = uct_ib_mlx5_iface_get_res_domain(&iface->super.super, + &iface->tm.cmd_wq.super.super); + if (status != UCS_OK) { + goto err; + } + + qp_init_attr.qp_type = IBV_QPT_RC; + qp_init_attr.send_cq = iface->super.super.cq[UCT_IB_DIR_RX]; + qp_init_attr.recv_cq = iface->super.super.cq[UCT_IB_DIR_RX]; + qp_init_attr.cap.max_send_sge = 1; + qp_init_attr.srq = iface->rx.srq.verbs.srq; + qp_init_attr.cap.max_send_wr = iface->tm.cmd_qp_len; + + qp = ibv_create_qp(md->pd, &qp_init_attr); + if (qp == NULL) { + ucs_error("failed to create TM control QP: %m"); + goto err_rd; + } + + + /* Modify QP to INIT state */ + qp_attr.qp_state = IBV_QPS_INIT; + qp_attr.port_num = port_num; + ret = ibv_modify_qp(qp, &qp_attr, + IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); + if (ret) { + ucs_error("Failed to modify TM control QP to INIT: %m"); + goto err_destroy_qp; + } + + /* Modify to RTR */ + qp_attr.qp_state = IBV_QPS_RTR; + qp_attr.dest_qp_num = qp->qp_num; + qp_attr.path_mtu = IBV_MTU_512; + qp_attr.ah_attr.port_num = port_num; + qp_attr.ah_attr.dlid = port_attr->lid; + qp_attr.ah_attr.is_global = 1; + qp_attr.ah_attr.grh.dgid = iface->super.super.gid; + ret = ibv_modify_qp(qp, &qp_attr, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); + if (ret) { + ucs_error("Failed to modify TM control QP to RTR: %m"); + goto err_destroy_qp; + } + + /* Modify to RTS */ + qp_attr.qp_state = IBV_QPS_RTS; + ret = ibv_modify_qp(qp, &qp_attr, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | + IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC); + if (ret) { + ucs_error("Failed to modify TM control QP to RTS: %m"); + goto err_destroy_qp; + } + + iface->tm.cmd_wq.super.super.verbs.qp = qp; + return qp; + +err_destroy_qp: + uct_ib_destroy_qp(qp); +err_rd: + uct_ib_mlx5_iface_put_res_domain(&iface->tm.cmd_wq.super.super); +err: + return NULL; +} + +static ucs_status_t +uct_rc_mlx5_get_cmd_qp(uct_rc_mlx5_iface_common_t *iface) +{ + struct ibv_qp *qp; +#if HAVE_STRUCT_MLX5_SRQ_CMD_QP + iface->tm.cmd_wq.super.super.verbs.qp = NULL; + iface->tm.cmd_wq.super.super.verbs.rd = NULL; + iface->tm.cmd_wq.super.super.type = UCT_IB_MLX5_OBJ_TYPE_LAST; + qp = uct_dv_get_cmd_qp(iface->rx.srq.verbs.srq); +#else + if (iface->rx.srq.type == UCT_IB_MLX5_OBJ_TYPE_DEVX) { + return uct_rc_mlx5_devx_create_cmd_qp(iface); + } else { + qp = uct_rc_mlx5_verbs_create_cmd_qp(iface); + } +#endif + iface->tm.cmd_wq.super.super.qp_num = qp->qp_num; + return uct_ib_mlx5_txwq_init(iface->super.super.super.worker, + iface->tx.mmio_mode, + &iface->tm.cmd_wq.super, qp); +} +#endif + +ucs_status_t uct_rc_mlx5_iface_common_tag_init(uct_rc_mlx5_iface_common_t *iface) +{ + ucs_status_t status = UCS_OK; +#if IBV_HW_TM + int i; + + if (!UCT_RC_MLX5_TM_ENABLED(iface)) { + return UCS_OK; + } + + status = uct_rc_mlx5_get_cmd_qp(iface); + if (status != UCS_OK) { + goto err_tag_cleanup; + } + + iface->tm.cmd_wq.ops_mask = iface->tm.cmd_qp_len - 1; + iface->tm.cmd_wq.ops_head = iface->tm.cmd_wq.ops_tail = 0; + iface->tm.cmd_wq.ops = ucs_calloc(iface->tm.cmd_qp_len, + sizeof(uct_rc_mlx5_srq_op_t), + "srq tag ops"); + if (iface->tm.cmd_wq.ops == NULL) { + ucs_error("Failed to allocate memory for srq tm ops array"); + status = UCS_ERR_NO_MEMORY; + goto err_tag_cleanup; + } + + iface->tm.list = ucs_calloc(iface->tm.num_tags + 1, + sizeof(uct_rc_mlx5_tag_entry_t), "tm list"); + if (iface->tm.list == NULL) { + ucs_error("Failed to allocate memory for tag matching list"); + status = UCS_ERR_NO_MEMORY; + goto err_cmd_wq_free; + } + + for (i = 0; i < iface->tm.num_tags; ++i) { + iface->tm.list[i].next = &iface->tm.list[i + 1]; + } + + iface->tm.head = &iface->tm.list[0]; + iface->tm.tail = &iface->tm.list[i]; + + status = UCS_STATS_NODE_ALLOC(&iface->tm.stats, &uct_rc_mlx5_tag_stats_class, + iface->stats); + if (status != UCS_OK) { + ucs_error("Failed to allocate tag stats: %s", ucs_status_string(status)); + goto err_cmd_wq_free; + } + + return UCS_OK; + +err_cmd_wq_free: + ucs_free(iface->tm.cmd_wq.ops); +err_tag_cleanup: + uct_rc_mlx5_tag_cleanup(iface); +#endif + + return status; +} + +void uct_rc_mlx5_iface_common_tag_cleanup(uct_rc_mlx5_iface_common_t *iface) +{ + uct_rc_mlx5_mp_hash_key_t key_gid; + uint64_t key_lid; + void *recv_buffer; + + if (!UCT_RC_MLX5_TM_ENABLED(iface)) { + return; + } + + uct_ib_mlx5_destroy_qp(&iface->tm.cmd_wq.super.super); + uct_ib_mlx5_txwq_cleanup(&iface->tm.cmd_wq.super); + ucs_free(iface->tm.list); + ucs_free(iface->tm.cmd_wq.ops); + uct_rc_mlx5_tag_cleanup(iface); + + kh_foreach_key(&iface->tm.tag_addrs, recv_buffer, { + ucs_debug("destroying iface %p, with recv buffer %p offloaded to the HW", + iface, recv_buffer); + }); + kh_destroy_inplace(uct_rc_mlx5_tag_addrs, &iface->tm.tag_addrs); + + if (!UCT_RC_MLX5_MP_ENABLED(iface)) { + return; + } + + kh_foreach_key(&iface->tm.mp.hash_lid, key_lid, { + ucs_debug("destroying iface %p with partially received rx msg (key: %lu)", + iface, key_lid); + }); + kh_destroy_inplace(uct_rc_mlx5_mp_hash_lid, &iface->tm.mp.hash_lid); + + kh_foreach_key(&iface->tm.mp.hash_gid, key_gid, { + ucs_debug("destroying iface %p with partially received rx msg (key: %lu-%u)", + iface, key_gid.guid, key_gid.qp_num); + }); + kh_destroy_inplace(uct_rc_mlx5_mp_hash_gid, &iface->tm.mp.hash_gid); + + ucs_mpool_cleanup(&iface->tm.mp.tx_mp, 1); +} + +void uct_rc_mlx5_iface_fill_attr(uct_rc_mlx5_iface_common_t *iface, + uct_ib_qp_attr_t *qp_attr, + unsigned max_send_wr, + uct_ib_mlx5_srq_t *srq) +{ + switch (srq->type) { + case UCT_IB_MLX5_OBJ_TYPE_VERBS: + uct_rc_iface_fill_attr(&iface->super, qp_attr, max_send_wr, + srq->verbs.srq); + break; + case UCT_IB_MLX5_OBJ_TYPE_DEVX: + uct_rc_iface_fill_attr(&iface->super, qp_attr, max_send_wr, NULL); + qp_attr->srq_num = srq->srq_num; + break; + case UCT_IB_MLX5_OBJ_TYPE_LAST: + break; + } +} + +void uct_rc_mlx5_destroy_srq(uct_ib_mlx5_srq_t *srq) +{ + int UCS_V_UNUSED ret; + + switch (srq->type) { + case UCT_IB_MLX5_OBJ_TYPE_VERBS: + uct_ib_destroy_srq(srq->verbs.srq); + break; + case UCT_IB_MLX5_OBJ_TYPE_DEVX: +#if HAVE_DEVX + ret = mlx5dv_devx_obj_destroy(srq->devx.obj); + if (ret) { + ucs_warn("mlx5dv_devx_obj_destroy(SRQ) failed: %m"); + } + uct_ib_mlx5_put_dbrec(srq->devx.dbrec); + mlx5dv_devx_umem_dereg(srq->devx.mem); + ucs_free(srq->buf); +#endif + break; + case UCT_IB_MLX5_OBJ_TYPE_LAST: + break; + } +} + +void uct_rc_mlx5_release_desc(uct_recv_desc_t *self, void *desc) +{ + uct_rc_mlx5_release_desc_t *release = ucs_derived_of(self, + uct_rc_mlx5_release_desc_t); + void *ib_desc = (char*)desc - release->offset; + ucs_mpool_put_inline(ib_desc); +} + +#if IBV_HW_TM +/* tag is passed as parameter, because some (but not all!) transports may need + * to translate TMH to LE */ +void uct_rc_mlx5_handle_unexp_rndv(uct_rc_mlx5_iface_common_t *iface, + struct ibv_tmh *tmh, uct_tag_t tag, + struct mlx5_cqe64 *cqe, unsigned flags, + unsigned byte_len) +{ + uct_rc_mlx5_tmh_priv_data_t *priv = (uct_rc_mlx5_tmh_priv_data_t*)tmh->reserved; + uct_ib_md_t *ib_md = uct_ib_iface_md(&iface->super.super); + struct ibv_rvh *rvh; + unsigned tm_hdrs_len; + unsigned rndv_usr_hdr_len; + size_t rndv_data_len; + void *rndv_usr_hdr; + void *rb; + ucs_status_t status; + char packed_rkey[UCT_COMPONENT_NAME_MAX + UCT_IB_MD_PACKED_RKEY_SIZE]; + + rvh = (struct ibv_rvh*)(tmh + 1); + + /* RC uses two headers: TMH + RVH, DC uses three: TMH + RVH + RAVH. + * So, get actual RNDV hdrs len from offsets. */ + tm_hdrs_len = sizeof(*tmh) + + (iface->tm.rndv_desc.offset - iface->tm.eager_desc.offset); + + rndv_usr_hdr = (char*)tmh + tm_hdrs_len; + rndv_usr_hdr_len = byte_len - tm_hdrs_len; + rndv_data_len = ntohl(rvh->len); + + /* Private TMH data may contain the first bytes of the user header, so it + needs to be copied before that. Thus, either RVH (rc) or RAVH (dc) + will be overwritten. That's why we saved rvh->length before. */ + ucs_assert(priv->length <= UCT_RC_MLX5_TMH_PRIV_LEN); + + /* When MP XRQ is configured, RTS is always a single fragment message */ + ucs_assert(UCT_RC_MLX5_SINGLE_FRAG_MSG(flags)); + + memcpy((char*)rndv_usr_hdr - priv->length, &priv->data, priv->length); + + /* Create "packed" rkey to pass it in the callback */ + rb = uct_md_fill_md_name(&ib_md->super, packed_rkey); + uct_ib_md_pack_rkey(ntohl(rvh->rkey), UCT_IB_INVALID_RKEY, rb); + + /* Do not pass flags to cb, because rkey is allocated on stack */ + status = iface->tm.rndv_unexp.cb(iface->tm.rndv_unexp.arg, 0, tag, + (char *)rndv_usr_hdr - priv->length, + rndv_usr_hdr_len + priv->length, + be64toh(rvh->va), rndv_data_len, + packed_rkey); + + uct_rc_mlx5_iface_unexp_consumed(iface, iface->tm.rndv_desc.offset, + &iface->tm.rndv_desc.super, cqe, + status, ntohs(cqe->wqe_counter)); + + UCT_RC_MLX5_TM_STAT(iface, RX_RNDV_UNEXP); +} +#endif + +#if HAVE_IBV_DM +static ucs_status_t +uct_rc_mlx5_iface_common_dm_mpool_chunk_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) +{ + ucs_status_t status; + + status = ucs_mpool_chunk_malloc(mp, size_p, chunk_p); + if (status == UCS_OK) { + memset(*chunk_p, 0, *size_p); + } + + return status; +} + +static void uct_rc_mlx5_iface_common_dm_mp_obj_init(ucs_mpool_t *mp, void *obj, void *chunk) +{ + uct_mlx5_dm_data_t *dm = ucs_container_of(mp, uct_mlx5_dm_data_t, mp); + uct_rc_iface_send_desc_t* desc = (uct_rc_iface_send_desc_t*)obj; + + ucs_assert(desc->super.buffer == NULL); + ucs_assert(dm->seg_attached < dm->seg_count); + + desc->lkey = dm->mr->lkey; + desc->super.buffer = UCS_PTR_BYTE_OFFSET(dm->start_va, dm->seg_attached * dm->seg_len); + desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; + dm->seg_attached++; +} + +static ucs_mpool_ops_t uct_dm_iface_mpool_ops = { + .chunk_alloc = uct_rc_mlx5_iface_common_dm_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = uct_rc_mlx5_iface_common_dm_mp_obj_init, + .obj_cleanup = NULL +}; + + +static int uct_rc_mlx5_iface_common_dm_device_cmp(uct_mlx5_dm_data_t *dm_data, + uct_rc_iface_t *iface, + const uct_ib_mlx5_iface_config_t *config) +{ + uct_ib_device_t *dev = uct_ib_iface_device(&iface->super); + + return dm_data->device->ibv_context == dev->ibv_context; +} + +static ucs_status_t +uct_rc_mlx5_iface_common_dm_tl_init(uct_mlx5_dm_data_t *data, + uct_rc_iface_t *iface, + const uct_ib_mlx5_iface_config_t *config) +{ + struct ibv_alloc_dm_attr dm_attr = {}; + struct mlx5dv_dm dvdm = {}; + uct_ib_mlx5dv_t obj = {}; + ucs_status_t status; + + data->seg_len = ucs_min(ucs_align_up(config->dm.seg_len, + sizeof(uct_rc_mlx5_dm_copy_data_t)), + iface->super.config.seg_size); + data->seg_count = config->dm.count; + data->seg_attached = 0; + data->device = uct_ib_iface_device(&iface->super); + + dm_attr.length = data->seg_len * data->seg_count; + dm_attr.comp_mask = 0; + data->dm = ibv_alloc_dm(data->device->ibv_context, &dm_attr); + + if (data->dm == NULL) { + /* TODO: prompt warning? */ + ucs_debug("ibv_alloc_dm(dev=%s length=%zu) failed: %m", + uct_ib_device_name(data->device), dm_attr.length); + return UCS_ERR_NO_RESOURCE; + } + + data->mr = ibv_reg_dm_mr(uct_ib_iface_md(&iface->super)->pd, + data->dm, 0, dm_attr.length, + IBV_ACCESS_ZERO_BASED | + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_WRITE | + IBV_ACCESS_REMOTE_ATOMIC); + if (data->mr == NULL) { + ucs_warn("ibv_reg_mr_dm() error - On Device Memory registration failed, %d %m", errno); + status = UCS_ERR_NO_RESOURCE; + goto failed_mr; + } + + UCT_IB_MLX5_DV_DM(obj).in = data->dm; + UCT_IB_MLX5_DV_DM(obj).out = &dvdm; + uct_ib_mlx5dv_init_obj(&obj, MLX5DV_OBJ_DM); + data->start_va = dvdm.buf; + + status = ucs_mpool_init(&data->mp, 0, + sizeof(uct_rc_iface_send_desc_t), 0, UCS_SYS_CACHE_LINE_SIZE, + data->seg_count, data->seg_count, + &uct_dm_iface_mpool_ops, "mlx5_dm_desc"); + if (status != UCS_OK) { + goto failed_mpool; + } + + /* DM initialization may fail due to any reason, just + * free resources & continue without DM */ + return UCS_OK; + +failed_mpool: + ibv_dereg_mr(data->mr); +failed_mr: + ibv_free_dm(data->dm); + data->dm = NULL; + return status; +} + +static void uct_rc_mlx5_iface_common_dm_tl_cleanup(uct_mlx5_dm_data_t *data) +{ + ucs_assert(data->dm != NULL); + ucs_assert(data->mr != NULL); + + ucs_mpool_cleanup(&data->mp, 1); + ibv_dereg_mr(data->mr); + ibv_free_dm(data->dm); +} +#endif + +#if IBV_HW_TM + +void uct_rc_mlx5_init_rx_tm_common(uct_rc_mlx5_iface_common_t *iface, + const uct_rc_iface_common_config_t *config, + unsigned rndv_hdr_len) +{ + uct_ib_md_t *md = uct_ib_iface_md(&iface->super.super); + unsigned tmh_hdrs_len = sizeof(struct ibv_tmh) + rndv_hdr_len; + ucs_status_t status; + + iface->tm.eager_desc.super.cb = uct_rc_mlx5_release_desc; + iface->tm.rndv_desc.super.cb = uct_rc_mlx5_release_desc; + + if (UCT_RC_MLX5_MP_ENABLED(iface)) { + iface->tm.eager_desc.offset = sizeof(struct ibv_tmh) + + iface->super.super.config.rx_headroom_offset; + iface->tm.am_desc.offset = sizeof(uct_rc_mlx5_hdr_t) + + iface->super.super.config.rx_headroom_offset; + status = uct_iface_mpool_init(&iface->super.super.super, + &iface->tm.mp.tx_mp, + sizeof(uct_rc_iface_send_desc_t) + + iface->tm.max_bcopy, + sizeof(uct_rc_iface_send_desc_t), + UCS_SYS_CACHE_LINE_SIZE, + &config->super.tx.mp, + iface->super.config.tx_qp_len, + uct_rc_iface_send_desc_init, + "tag_eager_send_desc"); + if (status != UCS_OK) { + return; + } + + kh_init_inplace(uct_rc_mlx5_mp_hash_lid, &iface->tm.mp.hash_lid); + kh_init_inplace(uct_rc_mlx5_mp_hash_gid, &iface->tm.mp.hash_gid); + + iface->tm.bcopy_mp = &iface->tm.mp.tx_mp; + iface->tm.max_zcopy = uct_ib_iface_port_attr(&iface->super.super)->max_msg_sz; + + ucs_debug("MP WQ config: iface %p stride size %d, strides per WQE %d", + iface, iface->super.super.config.seg_size, + iface->tm.mp.num_strides); + } else { + iface->tm.eager_desc.offset = sizeof(struct ibv_tmh) - + sizeof(uct_rc_mlx5_hdr_t) + + iface->super.super.config.rx_headroom_offset; + iface->tm.bcopy_mp = &iface->super.tx.mp; + iface->tm.max_zcopy = iface->super.super.config.seg_size; + } + + iface->tm.rndv_desc.offset = iface->tm.eager_desc.offset + rndv_hdr_len; + + ucs_assert(IBV_DEVICE_TM_CAPS(&md->dev, max_rndv_hdr_size) >= tmh_hdrs_len); + iface->tm.max_rndv_data = IBV_DEVICE_TM_CAPS(&md->dev, max_rndv_hdr_size) - + tmh_hdrs_len; + + /* Init ptr array to store completions of RNDV operations. Index in + * ptr_array is used as operation ID and is passed in "app_context" + * of TM header. */ + ucs_ptr_array_init(&iface->tm.rndv_comps, 0, "rm_rndv_completions"); + + /* Set of addresses posted to the HW. Used to avoid posting of the same + * address more than once. */ + kh_init_inplace(uct_rc_mlx5_tag_addrs, &iface->tm.tag_addrs); +} + +ucs_status_t uct_rc_mlx5_init_rx_tm(uct_rc_mlx5_iface_common_t *iface, + const uct_rc_iface_common_config_t *config, + struct ibv_srq_init_attr_ex *srq_attr, + unsigned rndv_hdr_len) +{ + uct_ib_md_t *md = uct_ib_iface_md(&iface->super.super); + ucs_status_t status; + + uct_rc_mlx5_init_rx_tm_common(iface, config, rndv_hdr_len); + + ucs_assert(iface->tm.mp.num_strides == 1); /* MP XRQ is supported with DEVX only */ +#if HAVE_DECL_IBV_EXP_CREATE_SRQ + /* Create TM-capable XRQ */ + srq_attr->base.attr.max_sge = 1; + srq_attr->base.attr.max_wr = ucs_max(UCT_IB_MLX5_XRQ_MIN_UWQ_POST, + config->super.rx.queue_len); + srq_attr->base.attr.srq_limit = 0; + srq_attr->base.srq_context = iface; + srq_attr->srq_type = IBV_EXP_SRQT_TAG_MATCHING; + srq_attr->pd = md->pd; + srq_attr->cq = iface->super.super.cq[UCT_IB_DIR_RX]; + srq_attr->tm_cap.max_num_tags = iface->tm.num_tags; + + uct_rc_mlx5_iface_tm_set_cmd_qp_len(iface); + srq_attr->tm_cap.max_ops = iface->tm.cmd_qp_len; + srq_attr->comp_mask |= IBV_EXP_CREATE_SRQ_CQ | + IBV_EXP_CREATE_SRQ_TM; + + iface->rx.srq.verbs.srq = ibv_exp_create_srq(md->dev.ibv_context, srq_attr); + if (iface->rx.srq.verbs.srq == NULL) { + ucs_error("ibv_exp_create_srq(device=%s) failed: %m", + uct_ib_device_name(&md->dev)); + return UCS_ERR_IO_ERROR; + } + + iface->super.rx.srq.quota = srq_attr->base.attr.max_wr; +#elif HAVE_DECL_IBV_CREATE_SRQ_EX + srq_attr->attr.max_sge = 1; + srq_attr->attr.max_wr = ucs_max(UCT_IB_MLX5_XRQ_MIN_UWQ_POST, + config->super.rx.queue_len); + srq_attr->attr.srq_limit = 0; + srq_attr->srq_context = iface; + srq_attr->srq_type = IBV_SRQT_TM; + srq_attr->pd = md->pd; + srq_attr->cq = iface->super.super.cq[UCT_IB_DIR_RX]; + srq_attr->tm_cap.max_num_tags = iface->tm.num_tags; + + uct_rc_mlx5_iface_tm_set_cmd_qp_len(iface); + srq_attr->tm_cap.max_ops = iface->tm.cmd_qp_len; + srq_attr->comp_mask |= IBV_SRQ_INIT_ATTR_TYPE | + IBV_SRQ_INIT_ATTR_PD | + IBV_SRQ_INIT_ATTR_CQ | + IBV_SRQ_INIT_ATTR_TM; + + iface->rx.srq.verbs.srq = ibv_create_srq_ex(md->dev.ibv_context, srq_attr); + if (iface->rx.srq.verbs.srq == NULL) { + ucs_error("ibv_create_srq_ex(device=%s) failed: %m", + uct_ib_device_name(&md->dev)); + return UCS_ERR_IO_ERROR; + } + + iface->super.rx.srq.quota = srq_attr->attr.max_wr; +#endif + + status = uct_ib_mlx5_srq_init(&iface->rx.srq, iface->rx.srq.verbs.srq, + iface->super.super.config.seg_size, + iface->tm.mp.num_strides); + if (status != UCS_OK) { + goto err_free_srq; + } + + iface->rx.srq.type = UCT_IB_MLX5_OBJ_TYPE_VERBS; + ucs_debug("Tag Matching enabled: tag list size %d", iface->tm.num_tags); + return UCS_OK; + +err_free_srq: + uct_ib_destroy_srq(iface->rx.srq.verbs.srq); + return status; +} +#endif + +void uct_rc_mlx5_tag_cleanup(uct_rc_mlx5_iface_common_t *iface) +{ +#if IBV_HW_TM + if (UCT_RC_MLX5_TM_ENABLED(iface)) { + ucs_ptr_array_cleanup(&iface->tm.rndv_comps); + UCS_STATS_NODE_FREE(iface->tm.stats); + } +#endif +} + +static void uct_rc_mlx5_tag_query(uct_rc_mlx5_iface_common_t *iface, + uct_iface_attr_t *iface_attr, + size_t max_inline, size_t max_iov) +{ +#if IBV_HW_TM + unsigned eager_hdr_size = sizeof(struct ibv_tmh); + struct ibv_port_attr* port_attr; + + if (!UCT_RC_MLX5_TM_ENABLED(iface)) { + return; + } + + iface_attr->cap.flags |= UCT_IFACE_FLAG_TAG_EAGER_BCOPY | + UCT_IFACE_FLAG_TAG_EAGER_ZCOPY | + UCT_IFACE_FLAG_TAG_RNDV_ZCOPY; + + if (max_inline >= eager_hdr_size) { + iface_attr->cap.tag.eager.max_short = max_inline - eager_hdr_size; + iface_attr->cap.flags |= UCT_IFACE_FLAG_TAG_EAGER_SHORT; + } + + port_attr = uct_ib_iface_port_attr(&iface->super.super); + iface_attr->cap.tag.rndv.max_zcopy = port_attr->max_msg_sz; + + /* TMH can carry 2 additional bytes of private data */ + iface_attr->cap.tag.rndv.max_hdr = iface->tm.max_rndv_data + + UCT_RC_MLX5_TMH_PRIV_LEN; + iface_attr->cap.tag.rndv.max_iov = 1; + iface_attr->cap.tag.recv.max_zcopy = port_attr->max_msg_sz; + iface_attr->cap.tag.recv.max_iov = 1; + iface_attr->cap.tag.recv.min_recv = 0; + iface_attr->cap.tag.recv.max_outstanding = iface->tm.num_tags; + iface_attr->cap.tag.eager.max_iov = max_iov; + iface_attr->cap.tag.eager.max_bcopy = iface->tm.max_bcopy - eager_hdr_size; + iface_attr->cap.tag.eager.max_zcopy = iface->tm.max_zcopy - eager_hdr_size; +#endif +} + +ucs_status_t +uct_rc_mlx5_iface_common_dm_init(uct_rc_mlx5_iface_common_t *iface, + uct_rc_iface_t *rc_iface, + const uct_ib_mlx5_iface_config_t *mlx5_config) +{ +#if HAVE_IBV_DM + if ((mlx5_config->dm.seg_len * mlx5_config->dm.count) == 0) { + goto fallback; + } + + iface->dm.dm = uct_worker_tl_data_get(iface->super.super.super.worker, + UCT_IB_MLX5_WORKER_DM_KEY, + uct_mlx5_dm_data_t, + uct_rc_mlx5_iface_common_dm_device_cmp, + uct_rc_mlx5_iface_common_dm_tl_init, + &iface->super, mlx5_config); + if (UCS_PTR_IS_ERR(iface->dm.dm)) { + goto fallback; + } + + ucs_assert(iface->dm.dm->dm != NULL); + iface->dm.seg_len = iface->dm.dm->seg_len; + return UCS_OK; + +fallback: + iface->dm.dm = NULL; +#endif + return UCS_OK; +} + +void uct_rc_mlx5_iface_common_dm_cleanup(uct_rc_mlx5_iface_common_t *iface) +{ +#if HAVE_IBV_DM + if (iface->dm.dm) { + uct_worker_tl_data_put(iface->dm.dm, uct_rc_mlx5_iface_common_dm_tl_cleanup); + } +#endif +} + +void uct_rc_mlx5_iface_common_query(uct_ib_iface_t *ib_iface, + uct_iface_attr_t *iface_attr, + size_t max_inline, size_t av_size) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(ib_iface, + uct_rc_mlx5_iface_common_t); + uct_ib_device_t *dev = uct_ib_iface_device(ib_iface); + + /* Atomics */ + iface_attr->cap.flags |= UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF | + UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM; + + if (uct_ib_device_has_pci_atomics(dev)) { + if (dev->pci_fadd_arg_sizes & sizeof(uint64_t)) { + iface_attr->cap.atomic64.op_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD); + iface_attr->cap.atomic64.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD); + } + if (dev->pci_cswap_arg_sizes & sizeof(uint64_t)) { + iface_attr->cap.atomic64.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_CSWAP); + } + + if (dev->pci_fadd_arg_sizes & sizeof(uint32_t)) { + iface_attr->cap.atomic32.op_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD); + iface_attr->cap.atomic32.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD); + } + if (dev->pci_cswap_arg_sizes & sizeof(uint32_t)) { + iface_attr->cap.atomic32.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_CSWAP); + } + iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_CPU; + } else { + if (dev->atomic_arg_sizes & sizeof(uint64_t)) { + iface_attr->cap.atomic64.op_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD); + iface_attr->cap.atomic64.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_CSWAP); + + iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_DEVICE; + } + + if (dev->ext_atomic_arg_sizes & sizeof(uint64_t)) { + iface_attr->cap.atomic64.op_flags |= UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR); + iface_attr->cap.atomic64.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR) | + UCS_BIT(UCT_ATOMIC_OP_SWAP); + iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_DEVICE; + } + + if (dev->ext_atomic_arg_sizes & sizeof(uint32_t)) { + iface_attr->cap.atomic32.op_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR); + iface_attr->cap.atomic32.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR) | + UCS_BIT(UCT_ATOMIC_OP_SWAP) | + UCS_BIT(UCT_ATOMIC_OP_CSWAP); + iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_DEVICE; + } + } + + /* Software overhead */ + iface_attr->overhead = 40e-9; + + /* Tag Offload */ + uct_rc_mlx5_tag_query(iface, iface_attr, max_inline, + UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(av_size)); +} + +void uct_rc_mlx5_iface_common_update_cqs_ci(uct_rc_mlx5_iface_common_t *iface, + uct_ib_iface_t *ib_iface) +{ +#if !HAVE_DECL_MLX5DV_INIT_OBJ + uct_ib_mlx5_update_cq_ci(ib_iface->cq[UCT_IB_DIR_TX], iface->cq[UCT_IB_DIR_TX].cq_ci); + uct_ib_mlx5_update_cq_ci(ib_iface->cq[UCT_IB_DIR_RX], iface->cq[UCT_IB_DIR_RX].cq_ci); +#endif +} + +void uct_rc_mlx5_iface_common_sync_cqs_ci(uct_rc_mlx5_iface_common_t *iface, + uct_ib_iface_t *ib_iface) +{ +#if !HAVE_DECL_MLX5DV_INIT_OBJ + iface->cq[UCT_IB_DIR_TX].cq_ci = uct_ib_mlx5_get_cq_ci(ib_iface->cq[UCT_IB_DIR_TX]); + iface->cq[UCT_IB_DIR_RX].cq_ci = uct_ib_mlx5_get_cq_ci(ib_iface->cq[UCT_IB_DIR_RX]); +#endif +} + +int uct_rc_mlx5_iface_commom_clean(uct_ib_mlx5_cq_t *mlx5_cq, + uct_ib_mlx5_srq_t *srq, uint32_t qpn) +{ + const size_t cqe_sz = 1ul << mlx5_cq->cqe_size_log; + struct mlx5_cqe64 *cqe, *dest; + uct_ib_mlx5_srq_seg_t *seg; + unsigned pi, idx; + uint8_t owner_bit; + int nfreed; + + pi = mlx5_cq->cq_ci; + for (;;) { + cqe = uct_ib_mlx5_get_cqe(mlx5_cq, pi); + if (uct_ib_mlx5_cqe_is_hw_owned(cqe->op_own, pi, mlx5_cq->cq_length)) { + break; + } + + ucs_assert((cqe->op_own >> 4) != MLX5_CQE_INVALID); + + ++pi; + if (pi == (mlx5_cq->cq_ci + mlx5_cq->cq_length - 1)) { + break; + } + } + + ucs_memory_cpu_load_fence(); + + /* Remove CQEs of the destroyed QP, so the driver would not see them and try + * to remove them itself, creating a mess with the free-list. + */ + nfreed = 0; + while ((int)--pi - (int)mlx5_cq->cq_ci >= 0) { + cqe = uct_ib_mlx5_get_cqe(mlx5_cq, pi); + if ((ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER)) == qpn) { + idx = ntohs(cqe->wqe_counter); + if (srq) { + seg = uct_ib_mlx5_srq_get_wqe(srq, idx); + seg->srq.free = 1; + ucs_trace("cq %p: freed srq seg[%d] of qpn 0x%x", + mlx5_cq, idx, qpn); + } + ++nfreed; + } else if (nfreed) { + dest = uct_ib_mlx5_get_cqe(mlx5_cq, pi + nfreed); + owner_bit = dest->op_own & MLX5_CQE_OWNER_MASK; + memcpy(UCS_PTR_BYTE_OFFSET(dest + 1, -cqe_sz), + UCS_PTR_BYTE_OFFSET(cqe + 1, -cqe_sz), cqe_sz); + dest->op_own = (dest->op_own & ~MLX5_CQE_OWNER_MASK) | owner_bit; + } + } + + mlx5_cq->cq_ci += nfreed; + + return nfreed; +} + diff --git a/src/uct/ib/rc/accel/rc_mlx5_common.h b/src/uct/ib/rc/accel/rc_mlx5_common.h new file mode 100644 index 0000000..7bcf276 --- /dev/null +++ b/src/uct/ib/rc/accel/rc_mlx5_common.h @@ -0,0 +1,708 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_RC_MLX5_COMMON_H +#define UCT_RC_MLX5_COMMON_H + +#include +#include +#include +#include + + +/* + * HW tag matching + */ +#if IBV_HW_TM +# define UCT_RC_RNDV_HDR_LEN sizeof(struct ibv_rvh) +#else +# define UCT_RC_RNDV_HDR_LEN 0 +#endif + +#if IBV_HW_TM +# if HAVE_INFINIBAND_TM_TYPES_H +# include +# else +# define ibv_tmh ibv_exp_tmh +# define ibv_rvh ibv_exp_tmh_rvh +# define IBV_TM_CAP_RC IBV_EXP_TM_CAP_RC +# define IBV_TMH_EAGER IBV_EXP_TMH_EAGER +# define IBV_TMH_RNDV IBV_EXP_TMH_RNDV +# define IBV_TMH_FIN IBV_EXP_TMH_FIN +# define IBV_TMH_NO_TAG IBV_EXP_TMH_NO_TAG +# endif +# define IBV_DEVICE_TM_CAPS(_dev, _field) ((_dev)->dev_attr.tm_caps._field) +# define IBV_DEVICE_MP_MIN_NUM_STRIDES 8 +#else +# define IBV_TM_CAP_RC 0 +# define IBV_DEVICE_TM_CAPS(_dev, _field) 0 +# define IBV_DEVICE_MP_MIN_NUM_STRIDES 0 +#endif + +#if HAVE_STRUCT_IBV_TM_CAPS_FLAGS +# define IBV_DEVICE_TM_FLAGS(_dev) IBV_DEVICE_TM_CAPS(_dev, flags) +#else +# define IBV_DEVICE_TM_FLAGS(_dev) IBV_DEVICE_TM_CAPS(_dev, capability_flags) +#endif + +#define IBV_DEVICE_MAX_UNEXP_COUNT UCS_BIT(14) + +#if HAVE_DECL_IBV_EXP_CREATE_SRQ +# define ibv_srq_init_attr_ex ibv_exp_create_srq_attr +#endif + +#define UCT_RC_MLX5_OPCODE_FLAG_RAW 0x100 +#define UCT_RC_MLX5_OPCODE_FLAG_TM 0x200 +#define UCT_RC_MLX5_OPCODE_MASK 0xff +#define UCT_RC_MLX5_SINGLE_FRAG_MSG(_flags) \ + (((_flags) & UCT_CB_PARAM_FLAG_FIRST) && !((_flags) & UCT_CB_PARAM_FLAG_MORE)) + +#define UCT_RC_MLX5_CHECK_AM_ZCOPY(_id, _header_length, _length, _seg_size, _av_size) \ + UCT_CHECK_AM_ID(_id); \ + UCT_RC_CHECK_ZCOPY_DATA(_header_length, _length, _seg_size) \ + UCT_CHECK_LENGTH(sizeof(uct_rc_mlx5_hdr_t) + _header_length, 0, \ + UCT_IB_MLX5_AM_ZCOPY_MAX_HDR(_av_size), "am_zcopy header"); + + +#define UCT_RC_MLX5_CHECK_AM_SHORT(_id, _length, _av_size) \ + UCT_CHECK_AM_ID(_id); \ + UCT_CHECK_LENGTH(sizeof(uct_rc_mlx5_am_short_hdr_t) + _length, 0, \ + UCT_IB_MLX5_AM_MAX_SHORT(_av_size), "am_short"); + + +/* there is no need to do a special check for length == 0 because in that + * case wqe size is valid: inl + raddr + dgram + ctrl fit in 2 WQ BB + */ +#define UCT_RC_MLX5_CHECK_PUT_SHORT(_length, _av_size) \ + UCT_CHECK_LENGTH(_length, 0, UCT_IB_MLX5_PUT_MAX_SHORT(_av_size), "put_short") + +#define UCT_RC_MLX5_ATOMIC_OPS (UCS_BIT(UCT_ATOMIC_OP_ADD) | \ + UCS_BIT(UCT_ATOMIC_OP_AND) | \ + UCS_BIT(UCT_ATOMIC_OP_OR) | \ + UCS_BIT(UCT_ATOMIC_OP_XOR)) + +#define UCT_RC_MLX5_ATOMIC_FOPS (UCT_RC_MLX5_ATOMIC_OPS | UCS_BIT(UCT_ATOMIC_OP_SWAP)) + +#define UCT_RC_MLX5_CHECK_ATOMIC_OPS(_op, _size, _flags) \ + if (ucs_unlikely(!(UCS_BIT(_op) & (_flags)))) { \ + ucs_assertv(0, "incorrect opcode for atomic: %d", _op); \ + return UCS_ERR_UNSUPPORTED; \ + } else { \ + ucs_assert((_size == sizeof(uint64_t)) || (_size == sizeof(uint32_t))); \ + } + +#define UCT_RC_MLX5_TO_BE(_val, _size) \ + ((_size) == sizeof(uint64_t) ? htobe64(_val) : htobe32(_val)) + +#define UCT_RC_MLX5_DECLARE_ATOMIC_LE_HANDLER(_bits) \ + void \ + uct_rc_mlx5_common_atomic##_bits##_le_handler(uct_rc_iface_send_op_t *op, \ + const void *resp); + +UCT_RC_MLX5_DECLARE_ATOMIC_LE_HANDLER(32) +UCT_RC_MLX5_DECLARE_ATOMIC_LE_HANDLER(64) + +typedef enum { + UCT_RC_MLX5_SRQ_TOPO_LIST, + UCT_RC_MLX5_SRQ_TOPO_CYCLIC, + UCT_RC_MLX5_SRQ_TOPO_AUTO, + UCT_RC_MLX5_SRQ_TOPO_LAST +} uct_rc_mlx5_srq_topo_t; + +enum { + UCT_RC_MLX5_IFACE_STAT_RX_INL_32, + UCT_RC_MLX5_IFACE_STAT_RX_INL_64, + UCT_RC_MLX5_IFACE_STAT_LAST +}; + +enum { + UCT_RC_MLX5_TM_OPCODE_NOP = 0x00, + UCT_RC_MLX5_TM_OPCODE_APPEND = 0x01, + UCT_RC_MLX5_TM_OPCODE_REMOVE = 0x02 +}; + +/* TODO: Remove/replace this enum when mlx5dv.h is included */ +enum { + UCT_RC_MLX5_OPCODE_TAG_MATCHING = 0x28, + UCT_RC_MLX5_CQE_APP_TAG_MATCHING = 1, + + /* last packet flag for multi-packet RQs */ + UCT_RC_MLX5_MP_RQ_LAST_MSG_FIELD = 0x40000000, + + /* byte count mask for multi-packet RQs */ + UCT_RC_MLX5_MP_RQ_BYTE_CNT_FIELD_MASK = 0x0000FFFF, + + UCT_RC_MLX5_MP_RQ_NUM_STRIDES_FIELD_MASK = 0x3FFF0000, + + /* filler cqe indicator */ + UCT_RC_MLX5_MP_RQ_FILLER_CQE = UCS_BIT(31), + + /* tag segment flags */ + UCT_RC_MLX5_SRQ_FLAG_TM_SW_CNT = (1 << 6), + UCT_RC_MLX5_SRQ_FLAG_TM_CQE_REQ = (1 << 7), + + /* tag CQE codes */ + UCT_RC_MLX5_CQE_APP_OP_TM_CONSUMED = 0x1, + UCT_RC_MLX5_CQE_APP_OP_TM_EXPECTED = 0x2, + UCT_RC_MLX5_CQE_APP_OP_TM_UNEXPECTED = 0x3, + UCT_RC_MLX5_CQE_APP_OP_TM_NO_TAG = 0x4, + UCT_RC_MLX5_CQE_APP_OP_TM_APPEND = 0x5, + UCT_RC_MLX5_CQE_APP_OP_TM_REMOVE = 0x6, + UCT_RC_MLX5_CQE_APP_OP_TM_CONSUMED_MSG = 0xA +}; + +enum { + UCT_RC_MLX5_POLL_FLAG_TM = UCS_BIT(0), + UCT_RC_MLX5_POLL_FLAG_HAS_EP = UCS_BIT(1) +}; + +#if IBV_HW_TM +# define UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(_av_size) \ + (UCT_IB_MLX5_AM_MAX_SHORT(_av_size + sizeof(struct ibv_tmh))/ \ + sizeof(struct mlx5_wqe_data_seg)) +# else +# define UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(_av_size) 0 +#endif /* IBV_HW_TM */ + +#define UCT_RC_MLX5_TM_CQE_WITH_IMM(_cqe64) \ + (((_cqe64)->op_own >> 4) == MLX5_CQE_RESP_SEND_IMM) + +#define UCT_RC_MLX5_TM_IS_SW_RNDV(_cqe64, _imm_data) \ + (ucs_unlikely(UCT_RC_MLX5_TM_CQE_WITH_IMM(_cqe64) && !(_imm_data))) + +#define UCT_RC_MLX5_CHECK_TAG(_mlx5_common_iface) \ + if (ucs_unlikely((_mlx5_common_iface)->tm.head->next == NULL)) { \ + return UCS_ERR_EXCEEDS_LIMIT; \ + } + + +typedef struct uct_rc_mlx5_hdr { + uint8_t tmh_opcode; /* TMH.opcode */ + uct_rc_hdr_t rc_hdr; +} UCS_S_PACKED uct_rc_mlx5_hdr_t; + +/* + * Short active message header (active message header is always 64 bit). + */ +typedef struct uct_rc_mlx5_am_short_hdr { + uct_rc_mlx5_hdr_t rc_hdr; + uint64_t am_hdr; +} UCS_S_PACKED uct_rc_mlx5_am_short_hdr_t; + + +/* TODO: Remove this struct when mlx5dv.h is included! */ +typedef struct uct_rc_mlx5_wqe_tm_seg { + uint8_t opcode; + uint8_t flags; + uint16_t index; + uint8_t rsvd0[2]; + uint16_t sw_cnt; + uint8_t rsvd1[8]; + uint64_t append_tag; + uint64_t append_mask; +} uct_rc_mlx5_wqe_tm_seg_t; + + +/* Tag matching list entry */ +typedef struct uct_rc_mlx5_tag_entry { + struct uct_rc_mlx5_tag_entry *next; + uct_tag_context_t *ctx; /* the corresponding UCT context */ + unsigned num_cqes; /* how many CQEs is expected for this entry */ +} uct_rc_mlx5_tag_entry_t; + + +/* Pending operation on the command QP */ +typedef struct uct_rc_mlx5_srq_op { + uct_rc_mlx5_tag_entry_t *tag; +} uct_rc_mlx5_srq_op_t; + + +/* Command QP work-queue. All tag matching list operations are posted on it. */ +typedef struct uct_rc_mlx5_cmd_wq { + uct_ib_mlx5_txwq_t super; + uct_rc_mlx5_srq_op_t *ops; /* array of operations on command QP */ + int ops_head; /* points to the next operation to be completed */ + int ops_tail; /* points to the last adde operation*/ + int ops_mask; /* mask which bounds head and tail by + ops array size */ +} uct_rc_mlx5_cmd_wq_t; + + +/* Message context used with multi-packet XRQ */ +typedef struct uct_rc_mlx5_mp_context { + /* Storage for a per-message user-defined context. Must be passed unchanged + * to the user in uct_tag_unexp_eager_cb_t. */ + void *context; + + /* Tag is saved when first fragment (with TMH) arrives and then passed to + * the eager unexpected callback for subsequent fragments. */ + uct_tag_t tag; + + /* With MP XRQ immediate value is delivered with the last fragment, while + * TMH is present in the first fragment only. Need to save app_context + * from TMH in this field and construct immediate data for unexpected + * eager callback when the last message fragment arrives. */ + uint32_t app_ctx; + + /* Used when local EP can be found by sender QP number (rc_mlx5 tl). + * When 0, it means that tag eager unexpected multi-fragmented message + * is being processed (not all fragments are delivered to the user via + * uct_tag_unexp_eager_cb_t callback yet). Otherwise, any incoming tag + * eager message should be either a single fragment message or the first + * fragment of multi-fragmeneted message. */ + uint8_t free; +} uct_rc_mlx5_mp_context_t; + + +typedef struct uct_rc_mlx5_mp_hash_key { + uint64_t guid; + uint32_t qp_num; +} uct_rc_mlx5_mp_hash_key_t; + + +static UCS_F_ALWAYS_INLINE int +uct_rc_mlx5_mp_hash_equal(uct_rc_mlx5_mp_hash_key_t key1, + uct_rc_mlx5_mp_hash_key_t key2) +{ + return (key1.qp_num == key2.qp_num) && (key1.guid == key2.guid); +} + + +static UCS_F_ALWAYS_INLINE khint32_t +uct_rc_mlx5_mp_hash_func(uct_rc_mlx5_mp_hash_key_t key) +{ + return kh_int64_hash_func(key.guid ^ key.qp_num); +} + + +KHASH_MAP_INIT_INT64(uct_rc_mlx5_mp_hash_lid, uct_rc_mlx5_mp_context_t); + + +KHASH_INIT(uct_rc_mlx5_mp_hash_gid, uct_rc_mlx5_mp_hash_key_t, + uct_rc_mlx5_mp_context_t, 1, uct_rc_mlx5_mp_hash_func, + uct_rc_mlx5_mp_hash_equal); + + +#if IBV_HW_TM +# define UCT_RC_MLX5_IFACE_GET_TM_BCOPY_DESC(_iface, _mp, _desc, _tag, _app_ctx, \ + _pack_cb, _arg, _length) \ + { \ + void *hdr; \ + UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \ + (_desc)->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; \ + hdr = (_desc) + 1; \ + uct_rc_mlx5_fill_tmh(hdr, _tag, _app_ctx, IBV_TMH_EAGER); \ + hdr = UCS_PTR_BYTE_OFFSET(hdr, sizeof(struct ibv_tmh)); \ + _length = _pack_cb(hdr, _arg); \ + } +#endif + +enum { + UCT_RC_MLX5_STAT_TAG_RX_EXP, + UCT_RC_MLX5_STAT_TAG_RX_EAGER_UNEXP, + UCT_RC_MLX5_STAT_TAG_RX_RNDV_UNEXP, + UCT_RC_MLX5_STAT_TAG_RX_RNDV_REQ_EXP, + UCT_RC_MLX5_STAT_TAG_RX_RNDV_REQ_UNEXP, + UCT_RC_MLX5_STAT_TAG_RX_RNDV_FIN, + UCT_RC_MLX5_STAT_TAG_LIST_ADD, + UCT_RC_MLX5_STAT_TAG_LIST_DEL, + UCT_RC_MLX5_STAT_TAG_LIST_SYNC, + UCT_RC_MLX5_STAT_TAG_LAST +}; + +typedef struct uct_rc_mlx5_tmh_priv_data { + uint8_t length; + uint16_t data; +} UCS_S_PACKED uct_rc_mlx5_tmh_priv_data_t; + +void uct_rc_mlx5_release_desc(uct_recv_desc_t *self, void *desc); + +typedef struct uct_rc_mlx5_release_desc { + uct_recv_desc_t super; + unsigned offset; +} uct_rc_mlx5_release_desc_t; + + +typedef struct uct_rc_mlx5_ctx_priv { + uint64_t tag; + void *buffer; + uint32_t app_ctx; + uint32_t length; + uint32_t tag_handle; +} uct_rc_mlx5_ctx_priv_t; + +#if HAVE_IBV_DM +typedef struct uct_mlx5_dm_data { + uct_worker_tl_data_t super; + ucs_mpool_t mp; + struct ibv_mr *mr; + struct ibv_dm *dm; + void *start_va; + size_t seg_len; + unsigned seg_count; + unsigned seg_attached; + uct_ib_device_t *device; +} uct_mlx5_dm_data_t; + +typedef union uct_rc_mlx5_dm_copy_data { + uct_rc_mlx5_am_short_hdr_t am_hdr; + struct ibv_tmh tm_hdr; + char bytes[sizeof(uint64_t) * 2]; +} UCS_S_PACKED uct_rc_mlx5_dm_copy_data_t; +#endif + +#define uct_rc_mlx5_tag_addr_hash(_ptr) kh_int64_hash_func((uintptr_t)(_ptr)) +KHASH_INIT(uct_rc_mlx5_tag_addrs, void*, char, 0, uct_rc_mlx5_tag_addr_hash, + kh_int64_hash_equal) + +typedef struct uct_rc_mlx5_iface_common { + uct_rc_iface_t super; + struct { + ucs_mpool_t atomic_desc_mp; + uct_ib_mlx5_mmio_mode_t mmio_mode; + uint16_t bb_max; /* limit number of outstanding WQE BBs */ + } tx; + struct { + uct_ib_mlx5_srq_t srq; + void *pref_ptr; + } rx; + uct_ib_mlx5_cq_t cq[UCT_IB_DIR_NUM]; + struct { + uct_rc_mlx5_cmd_wq_t cmd_wq; + uct_rc_mlx5_tag_entry_t *head; + uct_rc_mlx5_tag_entry_t *tail; + uct_rc_mlx5_tag_entry_t *list; + ucs_mpool_t *bcopy_mp; + khash_t(uct_rc_mlx5_tag_addrs) tag_addrs; + + ucs_ptr_array_t rndv_comps; + size_t max_bcopy; + size_t max_zcopy; + unsigned num_tags; + unsigned num_outstanding; + unsigned max_rndv_data; + uint16_t unexpected_cnt; + uint16_t cmd_qp_len; + uint8_t enabled; + struct { + uint8_t num_strides; + ucs_mpool_t tx_mp; + uct_rc_mlx5_mp_context_t last_frag_ctx; + khash_t(uct_rc_mlx5_mp_hash_lid) hash_lid; + khash_t(uct_rc_mlx5_mp_hash_gid) hash_gid; + } mp; + struct { + void *arg; /* User defined arg */ + uct_tag_unexp_eager_cb_t cb; /* Callback for unexpected eager messages */ + } eager_unexp; + + struct { + void *arg; /* User defined arg */ + uct_tag_unexp_rndv_cb_t cb; /* Callback for unexpected rndv messages */ + } rndv_unexp; + uct_rc_mlx5_release_desc_t eager_desc; + uct_rc_mlx5_release_desc_t rndv_desc; + uct_rc_mlx5_release_desc_t am_desc; + UCS_STATS_NODE_DECLARE(stats) + } tm; +#if HAVE_IBV_DM + struct { + uct_mlx5_dm_data_t *dm; + size_t seg_len; /* cached value to avoid double-pointer access */ + ucs_status_t (*am_short)(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *payload, unsigned length); + ucs_status_t (*tag_short)(uct_ep_h tl_ep, uct_tag_t tag, + const void *data, size_t length); + } dm; +#endif + struct { + uint8_t atomic_fence_flag; + uint8_t put_fence_flag; + } config; + UCS_STATS_NODE_DECLARE(stats) +} uct_rc_mlx5_iface_common_t; + +/** + * Common RC/DC mlx5 interface configuration + */ +typedef struct uct_rc_mlx5_iface_common_config { + uct_ib_mlx5_iface_config_t super; + unsigned tx_max_bb; + struct { + int enable; + unsigned list_size; + size_t seg_size; + size_t mp_num_strides; + } tm; + unsigned exp_backoff; + uct_rc_mlx5_srq_topo_t srq_topo; +} uct_rc_mlx5_iface_common_config_t; + + +UCS_CLASS_DECLARE(uct_rc_mlx5_iface_common_t, + uct_rc_iface_ops_t*, + uct_md_h, uct_worker_h, + const uct_iface_params_t*, + uct_rc_iface_common_config_t*, + uct_rc_mlx5_iface_common_config_t*, + uct_ib_iface_init_attr_t*); + + +#define UCT_RC_MLX5_TM_STAT(_iface, _op) \ + UCS_STATS_UPDATE_COUNTER((_iface)->tm.stats, UCT_RC_MLX5_STAT_TAG_##_op, 1) + +#define UCT_RC_MLX5_TM_ENABLED(_iface) (_iface)->tm.enabled + +#define UCT_RC_MLX5_MP_ENABLED(_iface) ((_iface)->tm.mp.num_strides > 1) + +/* TMH can carry 2 bytes of data in its reserved filed */ +#define UCT_RC_MLX5_TMH_PRIV_LEN ucs_field_sizeof(uct_rc_mlx5_tmh_priv_data_t, \ + data) + +#define UCT_RC_MLX5_CHECK_RNDV_PARAMS(_iovcnt, _header_len, _tm_len, \ + _max_inline, _max_rndv_hdr) \ + { \ + UCT_CHECK_PARAM_PTR(_iovcnt <= 1ul, "Wrong iovcnt %lu", iovcnt); \ + UCT_CHECK_PARAM_PTR(_header_len <= _max_rndv_hdr, \ + "Invalid header len %u", _header_len); \ + UCT_CHECK_PARAM_PTR((_header_len + _tm_len) <= _max_inline, \ + "Invalid RTS len gth %u", \ + _header_len + _tm_len); \ + } + +#define UCT_RC_MLX5_FILL_TM_IMM(_imm_data, _app_ctx, _ib_imm, _res_op, \ + _op, _imm_suffix) \ + if (_imm_data == 0) { \ + _res_op = _op; \ + _app_ctx = 0; \ + _ib_imm = 0; \ + } else { \ + _res_op = UCS_PP_TOKENPASTE(_op, _imm_suffix); \ + uct_rc_mlx5_tag_imm_data_pack(&(_ib_imm), &(_app_ctx), _imm_data); \ + } + +#define UCT_RC_MLX5_GET_TX_TM_DESC(_iface, _mp, _desc, _tag, _app_ctx, _hdr) \ + { \ + UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \ + _hdr = _desc + 1; \ + uct_rc_mlx5_fill_tmh(_hdr, _tag, _app_ctx, IBV_EXP_TMH_EAGER); \ + _hdr += sizeof(struct ibv_tmh); \ + } + +#define UCT_RC_MLX5_GET_TM_BCOPY_DESC(_iface, _mp, _desc, _tag, _app_ctx, \ + _pack_cb, _arg, _length) \ + { \ + void *hdr; \ + UCT_RC_MLX5_GET_TX_TM_DESC(_iface, _mp, _desc, _tag, _app_ctx, hdr) \ + (_desc)->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; \ + _length = _pack_cb(hdr, _arg); \ + } + +#if IBV_HW_TM +void uct_rc_mlx5_handle_unexp_rndv(uct_rc_mlx5_iface_common_t *iface, + struct ibv_tmh *tmh, uct_tag_t tag, + struct mlx5_cqe64 *cqe, unsigned flags, + unsigned byte_len); + + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_fill_tmh(struct ibv_tmh *tmh, uct_tag_t tag, + uint32_t app_ctx, unsigned op) +{ + tmh->opcode = op; + tmh->app_ctx = app_ctx; + tmh->tag = tag; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_fill_rvh(struct ibv_rvh *rvh, const void *vaddr, + uint32_t rkey, uint32_t len) +{ + rvh->va = htobe64((uint64_t)vaddr); + rvh->rkey = htonl(rkey); + rvh->len = htonl(len); +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_rc_mlx5_tag_get_op_id(uct_rc_mlx5_iface_common_t *iface, uct_completion_t *comp) +{ + uint32_t prev_ph; + return ucs_ptr_array_insert(&iface->tm.rndv_comps, comp, &prev_ph); +} + + +static UCS_F_ALWAYS_INLINE unsigned +uct_rc_mlx5_fill_tmh_priv_data(struct ibv_tmh *tmh, const void *hdr, + unsigned hdr_len, unsigned max_rndv_priv_data) +{ + uct_rc_mlx5_tmh_priv_data_t *priv = (uct_rc_mlx5_tmh_priv_data_t*)tmh->reserved; + + /* If header length is bigger tha max_rndv_priv_data size, need to add the + * rest to the TMH reserved field. */ + if (hdr_len > max_rndv_priv_data) { + priv->length = hdr_len - max_rndv_priv_data; + ucs_assert(priv->length <= UCT_RC_MLX5_TMH_PRIV_LEN); + memcpy(&priv->data, (char*)hdr, priv->length); + } else { + priv->length = 0; + } + + return priv->length; +} +#endif + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_tag_imm_data_pack(uint32_t *ib_imm, uint32_t *app_ctx, + uint64_t imm_val) +{ + *ib_imm = (uint32_t)(imm_val & 0xFFFFFFFF); + *app_ctx = (uint32_t)(imm_val >> 32); +} + +static UCS_F_ALWAYS_INLINE uint64_t +uct_rc_mlx5_tag_imm_data_unpack(uint32_t ib_imm, uint32_t app_ctx, int is_imm) +{ + return is_imm ? (((uint64_t)app_ctx << 32) | ib_imm) : 0ul; +} + +static UCS_F_ALWAYS_INLINE uct_rc_mlx5_ctx_priv_t* +uct_rc_mlx5_ctx_priv(uct_tag_context_t *ctx) +{ + return (uct_rc_mlx5_ctx_priv_t*)ctx->priv; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_handle_rndv_fin(uct_rc_mlx5_iface_common_t *iface, uint32_t app_ctx) +{ + int found; + void *rndv_comp; + + found = ucs_ptr_array_lookup(&iface->tm.rndv_comps, app_ctx, rndv_comp); + ucs_assert_always(found > 0); + uct_invoke_completion((uct_completion_t*)rndv_comp, UCS_OK); + ucs_ptr_array_remove(&iface->tm.rndv_comps, app_ctx, 0); +} + +extern ucs_config_field_t uct_rc_mlx5_common_config_table[]; + +unsigned uct_rc_mlx5_iface_srq_post_recv(uct_rc_mlx5_iface_common_t *iface); + +void uct_rc_mlx5_iface_common_prepost_recvs(uct_rc_mlx5_iface_common_t *iface); + +ucs_status_t uct_rc_mlx5_iface_common_init(uct_rc_mlx5_iface_common_t *iface, + uct_rc_iface_t *rc_iface, + const uct_rc_iface_config_t *config, + const uct_ib_mlx5_iface_config_t *mlx5_config); + +void uct_rc_mlx5_iface_common_cleanup(uct_rc_mlx5_iface_common_t *iface); + +ucs_status_t uct_rc_mlx5_iface_common_dm_init(uct_rc_mlx5_iface_common_t *iface, + uct_rc_iface_t *rc_iface, + const uct_ib_mlx5_iface_config_t *mlx5_config); + +void uct_rc_mlx5_iface_common_dm_cleanup(uct_rc_mlx5_iface_common_t *iface); + +void uct_rc_mlx5_iface_common_query(uct_ib_iface_t *ib_iface, + uct_iface_attr_t *iface_attr, + size_t max_inline, size_t av_size); + +void uct_rc_mlx5_iface_common_update_cqs_ci(uct_rc_mlx5_iface_common_t *iface, + uct_ib_iface_t *ib_iface); + +void uct_rc_mlx5_iface_common_sync_cqs_ci(uct_rc_mlx5_iface_common_t *iface, + uct_ib_iface_t *ib_iface); + +int uct_rc_mlx5_iface_commom_clean(uct_ib_mlx5_cq_t *mlx5_cq, + uct_ib_mlx5_srq_t *srq, uint32_t qpn); + +static UCS_F_MAYBE_UNUSED void +uct_rc_mlx5_iface_tm_set_cmd_qp_len(uct_rc_mlx5_iface_common_t *iface) +{ + /* 2 ops for each tag (ADD + DEL) and extra ops for SYNC. */ + iface->tm.cmd_qp_len = (2 * iface->tm.num_tags) + 2; +} + +#if IBV_HW_TM +void uct_rc_mlx5_init_rx_tm_common(uct_rc_mlx5_iface_common_t *iface, + const uct_rc_iface_common_config_t *config, + unsigned rndv_hdr_len); + +ucs_status_t uct_rc_mlx5_init_rx_tm(uct_rc_mlx5_iface_common_t *iface, + const uct_rc_iface_common_config_t *config, + struct ibv_srq_init_attr_ex *srq_init_attr, + unsigned rndv_hdr_len); +#else +static UCS_F_MAYBE_UNUSED ucs_status_t +uct_rc_mlx5_init_rx_tm(uct_rc_mlx5_iface_common_t *iface, + const uct_rc_iface_common_config_t *config, + struct ibv_srq_init_attr_ex *srq_init_attr, + unsigned rndv_hdr_len) +{ + return UCS_ERR_UNSUPPORTED; +} +#endif + +#if IBV_HW_TM && HAVE_DEVX +ucs_status_t uct_rc_mlx5_devx_init_rx_tm(uct_rc_mlx5_iface_common_t *iface, + const uct_rc_iface_common_config_t *config, + int dc, unsigned rndv_hdr_len); +#else +static UCS_F_MAYBE_UNUSED ucs_status_t +uct_rc_mlx5_devx_init_rx_tm(uct_rc_mlx5_iface_common_t *iface, + const uct_rc_iface_common_config_t *config, + int dc, unsigned rndv_hdr_len) +{ + return UCS_ERR_UNSUPPORTED; +} +#endif + +void uct_rc_mlx5_tag_cleanup(uct_rc_mlx5_iface_common_t *iface); + +ucs_status_t uct_rc_mlx5_iface_common_tag_init(uct_rc_mlx5_iface_common_t *iface); + +void uct_rc_mlx5_iface_common_tag_cleanup(uct_rc_mlx5_iface_common_t *iface); + +ucs_status_t uct_rc_mlx5_ep_tag_rndv_cancel(uct_ep_h tl_ep, void *op); + +void uct_rc_mlx5_common_packet_dump(uct_base_iface_t *iface, uct_am_trace_type_t type, + void *data, size_t length, size_t valid_length, + char *buffer, size_t max); + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_am_hdr_fill(uct_rc_mlx5_hdr_t *rch, uint8_t id) +{ +#if IBV_HW_TM + rch->tmh_opcode = IBV_TMH_NO_TAG; +#endif + rch->rc_hdr.am_id = id; +} + +#if HAVE_DEVX +ucs_status_t +uct_rc_mlx5_iface_common_devx_connect_qp(uct_rc_mlx5_iface_common_t *iface, + uct_ib_mlx5_qp_t *qp, + uint32_t dest_qp_num, + struct ibv_ah_attr *ah_attr); + +#else +static UCS_F_MAYBE_UNUSED ucs_status_t +uct_rc_mlx5_iface_common_devx_connect_qp(uct_rc_mlx5_iface_common_t *iface, + uct_ib_mlx5_qp_t *qp, + uint32_t dest_qp_num, + struct ibv_ah_attr *ah_attr) +{ + return UCS_ERR_UNSUPPORTED; +} +#endif + +void uct_rc_mlx5_iface_fill_attr(uct_rc_mlx5_iface_common_t *iface, + uct_ib_qp_attr_t *qp_attr, + unsigned max_send_wr, + uct_ib_mlx5_srq_t *srq); + +void uct_rc_mlx5_destroy_srq(uct_ib_mlx5_srq_t *srq); + +#endif diff --git a/src/uct/ib/rc/accel/rc_mlx5_devx.c b/src/uct/ib/rc/accel/rc_mlx5_devx.c new file mode 100644 index 0000000..164121d --- /dev/null +++ b/src/uct/ib/rc/accel/rc_mlx5_devx.c @@ -0,0 +1,204 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#include "rc_mlx5.inl" + +#include +#include +#include +#include + + +ucs_status_t +uct_rc_mlx5_devx_init_rx_tm(uct_rc_mlx5_iface_common_t *iface, + const uct_rc_iface_common_config_t *config, + int dc, unsigned rndv_hdr_len) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(uct_ib_iface_md(&iface->super.super), uct_ib_mlx5_md_t); + uct_ib_device_t *dev = &md->super.dev; + char in[UCT_IB_MLX5DV_ST_SZ_BYTES(create_xrq_in)] = {}; + char out[UCT_IB_MLX5DV_ST_SZ_BYTES(create_xrq_out)] = {}; + ucs_status_t status = UCS_ERR_NO_MEMORY; + struct mlx5dv_pd dvpd = {}; + struct mlx5dv_cq dvcq = {}; + struct mlx5dv_obj dv = {}; + void *xrqc, *wq; + int len, ret, max, stride, log_num_of_strides; + + uct_rc_mlx5_init_rx_tm_common(iface, config, rndv_hdr_len); + + stride = uct_ib_mlx5_srq_stride(iface->tm.mp.num_strides); + max = uct_ib_mlx5_srq_max_wrs(config->super.rx.queue_len, + iface->tm.mp.num_strides); + max = ucs_roundup_pow2(max); + len = max * stride; + ret = posix_memalign(&iface->rx.srq.buf, ucs_get_page_size(), len); + if (ret) { + return status; + } + + iface->rx.srq.devx.mem = mlx5dv_devx_umem_reg(dev->ibv_context, + iface->rx.srq.buf, len, + IBV_ACCESS_LOCAL_WRITE); + if (!iface->rx.srq.devx.mem) { + goto err_free_buf; + } + + iface->rx.srq.devx.dbrec = uct_ib_mlx5_get_dbrec(md); + if (!iface->rx.srq.devx.dbrec) { + goto err_free_mem; + } + + iface->rx.srq.db = &iface->rx.srq.devx.dbrec->db[MLX5_RCV_DBR]; + dv.pd.in = uct_ib_iface_md(&iface->super.super)->pd; + dv.cq.in = iface->super.super.cq[UCT_IB_DIR_RX]; + dv.pd.out = &dvpd; + dv.cq.out = &dvcq; + mlx5dv_init_obj(&dv, MLX5DV_OBJ_PD | MLX5DV_OBJ_CQ); + + UCT_IB_MLX5DV_SET(create_xrq_in, in, opcode, UCT_IB_MLX5_CMD_OP_CREATE_XRQ); + xrqc = UCT_IB_MLX5DV_ADDR_OF(create_xrq_in, in, xrq_context); + + UCT_IB_MLX5DV_SET(xrqc, xrqc, topology, UCT_IB_MLX5_XRQC_TOPOLOGY_TAG_MATCHING); + UCT_IB_MLX5DV_SET(xrqc, xrqc, offload, UCT_IB_MLX5_XRQC_OFFLOAD_RNDV); + UCT_IB_MLX5DV_SET(xrqc, xrqc, tag_matching_topology_context.log_matching_list_sz, + ucs_ilog2(iface->tm.num_tags) + 1); + UCT_IB_MLX5DV_SET(xrqc, xrqc, dc, dc); + UCT_IB_MLX5DV_SET(xrqc, xrqc, cqn, dvcq.cqn); + + wq = UCT_IB_MLX5DV_ADDR_OF(xrqc, xrqc, wq); + + UCT_IB_MLX5DV_SET(wq, wq, wq_type, iface->rx.srq.topo); + UCT_IB_MLX5DV_SET(wq, wq, log_wq_sz, ucs_ilog2(max)); + UCT_IB_MLX5DV_SET(wq, wq, log_wq_stride, ucs_ilog2(stride)); + UCT_IB_MLX5DV_SET(wq, wq, pd, dvpd.pdn); + UCT_IB_MLX5DV_SET(wq, wq, dbr_umem_id, iface->rx.srq.devx.dbrec->mem_id); + UCT_IB_MLX5DV_SET64(wq, wq, dbr_addr, iface->rx.srq.devx.dbrec->offset); + UCT_IB_MLX5DV_SET(wq, wq, wq_umem_id, iface->rx.srq.devx.mem->umem_id); + + if (UCT_RC_MLX5_MP_ENABLED(iface)) { + /* Normalize to device's interface values (range of (-6) - 7) */ + log_num_of_strides = ucs_ilog2(iface->tm.mp.num_strides) - 9; + + UCT_IB_MLX5DV_SET(wq, wq, log_wqe_num_of_strides, log_num_of_strides & 0xF); + UCT_IB_MLX5DV_SET(wq, wq, log_wqe_stride_size, + (ucs_ilog2(iface->super.super.config.seg_size) - 6)); + } + + iface->rx.srq.devx.obj = mlx5dv_devx_obj_create(dev->ibv_context, in, sizeof(in), + out, sizeof(out)); + if (iface->rx.srq.devx.obj == NULL) { + ucs_error("mlx5dv_devx_obj_create(SRQ) failed, syndrome %x: %m", + UCT_IB_MLX5DV_GET(create_xrq_out, out, syndrome)); + status = UCS_ERR_IO_ERROR; + goto err_free; + } + + iface->rx.srq.type = UCT_IB_MLX5_OBJ_TYPE_DEVX; + iface->rx.srq.srq_num = UCT_IB_MLX5DV_GET(create_xrq_out, out, xrqn); + uct_rc_mlx5_iface_tm_set_cmd_qp_len(iface); + uct_ib_mlx5_srq_buff_init(&iface->rx.srq, 0, max - 1, + iface->super.super.config.seg_size, + iface->tm.mp.num_strides); + iface->super.rx.srq.quota = max - 1; + + return UCS_OK; + +err_free: + ucs_mpool_put_inline(iface->rx.srq.devx.dbrec); +err_free_mem: + mlx5dv_devx_umem_dereg(iface->rx.srq.devx.mem); +err_free_buf: + ucs_free(iface->rx.srq.buf); + return status; +} + +ucs_status_t +uct_rc_mlx5_iface_common_devx_connect_qp(uct_rc_mlx5_iface_common_t *iface, + uct_ib_mlx5_qp_t *qp, + uint32_t dest_qp_num, + struct ibv_ah_attr *ah_attr) +{ + char in_2rtr[UCT_IB_MLX5DV_ST_SZ_BYTES(init2rtr_qp_in)] = {}; + char out_2rtr[UCT_IB_MLX5DV_ST_SZ_BYTES(init2rtr_qp_out)] = {}; + char in_2rts[UCT_IB_MLX5DV_ST_SZ_BYTES(rtr2rts_qp_in)] = {}; + char out_2rts[UCT_IB_MLX5DV_ST_SZ_BYTES(rtr2rts_qp_out)] = {}; + struct mlx5_wqe_av mlx5_av; + ucs_status_t status; + struct ibv_ah *ah; + void *qpc; + + UCT_IB_MLX5DV_SET(init2rtr_qp_in, in_2rtr, opcode, UCT_IB_MLX5_CMD_OP_INIT2RTR_QP); + UCT_IB_MLX5DV_SET(init2rtr_qp_in, in_2rtr, qpn, qp->qp_num); + UCT_IB_MLX5DV_SET(init2rtr_qp_in, in_2rtr, opt_param_mask, 14); + + qpc = UCT_IB_MLX5DV_ADDR_OF(init2rtr_qp_in, in_2rtr, qpc); + UCT_IB_MLX5DV_SET(qpc, qpc, mtu, iface->super.config.path_mtu); + UCT_IB_MLX5DV_SET(qpc, qpc, log_msg_max, UCT_IB_MLX5_LOG_MAX_MSG_SIZE); + UCT_IB_MLX5DV_SET(qpc, qpc, remote_qpn, dest_qp_num); + if (uct_ib_iface_is_roce(&iface->super.super)) { + status = uct_ib_iface_create_ah(&iface->super.super, ah_attr, &ah); + if (status != UCS_OK) { + return status; + } + + uct_ib_mlx5_get_av(ah, &mlx5_av); + memcpy(UCT_IB_MLX5DV_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), + &mlx5_av.rmac, sizeof(mlx5_av.rmac)); + memcpy(UCT_IB_MLX5DV_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + &mlx5_av.rgid, sizeof(mlx5_av.rgid)); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.hop_limit, mlx5_av.hop_limit); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.src_addr_index, ah_attr->grh.sgid_index); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.udp_sport, + uct_ib_mlx5_calc_av_sport(dest_qp_num, qp->qp_num)); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.eth_prio, iface->super.super.config.sl); + if (iface->super.super.is_roce_v2) { + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.dscp, + iface->super.super.config.traffic_class >> 2); + } + } else { + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.grh, ah_attr->is_global); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.rlid, ah_attr->dlid); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.mlid, ah_attr->src_path_bits & 0x7f); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.hop_limit, ah_attr->grh.hop_limit); + memcpy(UCT_IB_MLX5DV_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), + &ah_attr->grh.dgid, + UCT_IB_MLX5DV_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip)); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.sl, iface->super.super.config.sl); + /* TODO add flow_label support */ + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.tclass, + iface->super.super.config.traffic_class); + } + + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.vhca_port_num, ah_attr->port_num); + UCT_IB_MLX5DV_SET(qpc, qpc, log_rra_max, ucs_ilog2_or0(iface->super.config.max_rd_atomic)); + UCT_IB_MLX5DV_SET(qpc, qpc, atomic_mode, UCT_IB_MLX5_ATOMIC_MODE); + UCT_IB_MLX5DV_SET(qpc, qpc, rre, true); + UCT_IB_MLX5DV_SET(qpc, qpc, rwe, true); + UCT_IB_MLX5DV_SET(qpc, qpc, rae, true); + UCT_IB_MLX5DV_SET(qpc, qpc, min_rnr_nak, iface->super.config.min_rnr_timer); + + status = uct_ib_mlx5_devx_modify_qp(qp, in_2rtr, sizeof(in_2rtr), + out_2rtr, sizeof(out_2rtr)); + if (status) { + return status; + } + + UCT_IB_MLX5DV_SET(rtr2rts_qp_in, in_2rts, opcode, UCT_IB_MLX5_CMD_OP_RTR2RTS_QP); + UCT_IB_MLX5DV_SET(rtr2rts_qp_in, in_2rts, qpn, qp->qp_num); + + qpc = UCT_IB_MLX5DV_ADDR_OF(rtr2rts_qp_in, in_2rts, qpc); + UCT_IB_MLX5DV_SET(qpc, qpc, log_sra_max, ucs_ilog2_or0(iface->super.config.max_rd_atomic)); + UCT_IB_MLX5DV_SET(qpc, qpc, retry_count, iface->super.config.retry_cnt); + UCT_IB_MLX5DV_SET(qpc, qpc, rnr_retry, iface->super.config.rnr_retry); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.ack_timeout, iface->super.config.timeout); + UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.log_rtm, iface->super.config.exp_backoff); + + return uct_ib_mlx5_devx_modify_qp(qp, in_2rts, sizeof(in_2rts), + out_2rts, sizeof(out_2rts)); +} + diff --git a/src/uct/ib/rc/accel/rc_mlx5_ep.c b/src/uct/ib/rc/accel/rc_mlx5_ep.c new file mode 100644 index 0000000..c17140f --- /dev/null +++ b/src/uct/ib/rc/accel/rc_mlx5_ep.c @@ -0,0 +1,989 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "rc_mlx5.h" +#if HAVE_DECL_IBV_CMD_MODIFY_QP +#include +#endif +#include +#include +#include +#include +#include /* For htonl */ + +#include "rc_mlx5.inl" + + +/* + * + * Helper function for buffer-copy post. + * Adds the descriptor to the callback queue. + */ +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_txqp_bcopy_post(uct_rc_mlx5_iface_common_t *iface, + uct_rc_txqp_t *txqp, uct_ib_mlx5_txwq_t *txwq, + unsigned opcode, unsigned length, + /* RDMA */ uint64_t rdma_raddr, uct_rkey_t rdma_rkey, + uint8_t fm_ce_se, uint32_t imm_val_be, + uct_rc_iface_send_desc_t *desc, const void *buffer, + uct_ib_log_sge_t *log_sge) +{ + desc->super.sn = txwq->sw_pi; + uct_rc_mlx5_txqp_dptr_post(iface, IBV_QPT_RC, txqp, txwq, + opcode, buffer, length, &desc->lkey, + rdma_raddr, uct_ib_md_direct_rkey(rdma_rkey), + 0, 0, 0, 0, + NULL, NULL, 0, fm_ce_se, imm_val_be, INT_MAX, log_sge); + uct_rc_txqp_add_send_op(txqp, &desc->super); +} + +/* + * Helper function for zero-copy post. + * Adds user completion to the callback queue. + */ +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_rc_mlx5_ep_zcopy_post(uct_rc_mlx5_ep_t *ep, + unsigned opcode, const uct_iov_t *iov, size_t iovcnt, + /* SEND */ uint8_t am_id, const void *am_hdr, unsigned am_hdr_len, + /* RDMA */ uint64_t rdma_raddr, uct_rkey_t rdma_rkey, + /* TAG */ uct_tag_t tag, uint32_t app_ctx, uint32_t ib_imm_be, + int force_sig, uct_completion_t *comp) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(ep->super.super.super.iface, + uct_rc_mlx5_iface_common_t); + uint16_t sn; + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + + sn = ep->tx.wq.sw_pi; + uct_rc_mlx5_txqp_dptr_post_iov(iface, IBV_QPT_RC, + &ep->super.txqp, &ep->tx.wq, + opcode, iov, iovcnt, + am_id, am_hdr, am_hdr_len, + rdma_raddr, uct_ib_md_direct_rkey(rdma_rkey), + tag, app_ctx, ib_imm_be, + NULL, NULL, 0, + (comp == NULL) ? force_sig : MLX5_WQE_CTRL_CQ_UPDATE, + UCT_IB_MAX_ZCOPY_LOG_SGE(&iface->super.super)); + + uct_rc_txqp_add_send_comp(&iface->super, &ep->super.txqp, comp, sn, + UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY); + return UCS_INPROGRESS; +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_rc_mlx5_ep_put_short_inline(uct_ep_h tl_ep, const void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + UCT_RC_MLX5_CHECK_PUT_SHORT(length, 0); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + + uct_rc_mlx5_txqp_inline_post(iface, IBV_QPT_RC, + &ep->super.txqp, &ep->tx.wq, + MLX5_OPCODE_RDMA_WRITE, + buffer, length, 0, 0, 0, + remote_addr, uct_ib_md_direct_rkey(rkey), + NULL, NULL, 0, 0, INT_MAX); + UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); + return UCS_OK; +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_rc_mlx5_ep_am_short_inline(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *payload, unsigned length) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + UCT_RC_MLX5_CHECK_AM_SHORT(id, length, 0); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_CHECK_FC(&iface->super, &ep->super, id); + + uct_rc_mlx5_txqp_inline_post(iface, IBV_QPT_RC, + &ep->super.txqp, &ep->tx.wq, + MLX5_OPCODE_SEND, + payload, length, + id, hdr, 0, + 0, 0, + NULL, NULL, 0, + MLX5_WQE_CTRL_SOLICITED, + INT_MAX); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); + UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); + return UCS_OK; +} + +#if HAVE_IBV_DM +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_rc_mlx5_ep_short_dm(uct_rc_mlx5_ep_t *ep, uct_rc_mlx5_dm_copy_data_t *cache, + size_t hdr_len, const void *payload, unsigned length, + unsigned opcode, uint8_t fm_ce_se, + uint64_t rdma_raddr, uct_rkey_t rdma_rkey) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(ep->super.super.super.iface, + uct_rc_mlx5_iface_common_t); + uct_rc_iface_send_desc_t *desc; + void *buffer; + ucs_status_t status; + uct_ib_log_sge_t log_sge; + + status = uct_rc_mlx5_common_dm_make_data(iface, cache, hdr_len, payload, + length, &desc, &buffer, &log_sge); + if (ucs_unlikely(UCS_STATUS_IS_ERR(status))) { + return status; + } + + uct_rc_mlx5_txqp_bcopy_post(iface, &ep->super.txqp, &ep->tx.wq, + opcode, hdr_len + length, + rdma_raddr, rdma_rkey, fm_ce_se, + 0, desc, buffer, + log_sge.num_sge ? &log_sge : NULL); + return UCS_OK; +} +#endif + +ucs_status_t +uct_rc_mlx5_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ +#if HAVE_IBV_DM + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_common_t); + uct_rc_iface_t *rc_iface = &iface->super; + uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); + ucs_status_t status; + + if (ucs_likely((length <= UCT_IB_MLX5_PUT_MAX_SHORT(0)) || !iface->dm.dm)) { +#endif + return uct_rc_mlx5_ep_put_short_inline(tl_ep, buffer, length, remote_addr, rkey); +#if HAVE_IBV_DM + } + + UCT_CHECK_LENGTH(length, 0, iface->dm.seg_len, "put_short"); + UCT_RC_CHECK_RES(rc_iface, &ep->super); + status = uct_rc_mlx5_ep_short_dm(ep, NULL, 0, buffer, length, + MLX5_OPCODE_RDMA_WRITE, + MLX5_WQE_CTRL_CQ_UPDATE, + remote_addr, rkey); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); + return UCS_OK; +#endif +} + +ssize_t uct_rc_mlx5_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, uct_rkey_t rkey) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uct_rc_iface_send_desc_t *desc; + size_t length; + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_IFACE_GET_TX_PUT_BCOPY_DESC(&iface->super, &iface->super.tx.mp, + desc, pack_cb, arg, length); + + uct_rc_mlx5_txqp_bcopy_post(iface, &ep->super.txqp, &ep->tx.wq, + MLX5_OPCODE_RDMA_WRITE, length, remote_addr, + rkey, MLX5_WQE_CTRL_CQ_UPDATE, 0, desc, desc + 1, + NULL); + UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); + return length; +} + +ucs_status_t uct_rc_mlx5_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_ib_iface_t UCS_V_UNUSED *iface = ucs_derived_of(tl_ep->iface, + uct_ib_iface_t); + uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, + uct_rc_mlx5_ep_t); + ucs_status_t status; + + UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(iface), + "uct_rc_mlx5_ep_put_zcopy"); + UCT_CHECK_LENGTH(uct_iov_total_length(iov, iovcnt), 0, UCT_IB_MAX_MESSAGE_SIZE, + "put_zcopy"); + + status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_RDMA_WRITE, iov, iovcnt, + 0, NULL, 0, remote_addr, rkey, 0ul, 0, 0, + MLX5_WQE_CTRL_CQ_UPDATE, comp); + UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, PUT, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + return status; +} + +ucs_status_t uct_rc_mlx5_ep_get_bcopy(uct_ep_h tl_ep, + uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uct_rc_iface_send_desc_t *desc; + + UCT_CHECK_LENGTH(length, 0, iface->super.super.config.seg_size, "get_bcopy"); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_IFACE_GET_TX_GET_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, + unpack_cb, comp, arg, length); + + uct_rc_mlx5_txqp_bcopy_post(iface, &ep->super.txqp, &ep->tx.wq, + MLX5_OPCODE_RDMA_READ, length, remote_addr, + rkey, MLX5_WQE_CTRL_CQ_UPDATE, 0, desc, desc + 1, + NULL); + UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length); + return UCS_INPROGRESS; +} + +ucs_status_t uct_rc_mlx5_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + ucs_status_t status; + + UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super), + "uct_rc_mlx5_ep_get_zcopy"); + UCT_CHECK_LENGTH(uct_iov_total_length(iov, iovcnt), + iface->super.super.config.max_inl_resp + 1, + UCT_IB_MAX_MESSAGE_SIZE, "get_zcopy"); + + status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_RDMA_READ, iov, iovcnt, + 0, NULL, 0, remote_addr, rkey, 0ul, 0, 0, + MLX5_WQE_CTRL_CQ_UPDATE, comp); + UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, GET, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + return status; +} + +ucs_status_t +uct_rc_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *payload, unsigned length) +{ +#if HAVE_IBV_DM + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_mlx5_iface_common_t); + uct_rc_iface_t *rc_iface = &iface->super; + uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); + ucs_status_t status; + uct_rc_mlx5_dm_copy_data_t cache; + + if (ucs_likely((sizeof(uct_rc_mlx5_am_short_hdr_t) + length <= UCT_IB_MLX5_AM_MAX_SHORT(0)) || + !iface->dm.dm)) { +#endif + return uct_rc_mlx5_ep_am_short_inline(tl_ep, id, hdr, payload, length); +#if HAVE_IBV_DM + } + + UCT_CHECK_LENGTH(length + sizeof(uct_rc_mlx5_am_short_hdr_t), 0, + iface->dm.seg_len, "am_short"); + UCT_CHECK_AM_ID(id); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_CHECK_FC(&iface->super, &ep->super, id); + + uct_rc_mlx5_am_hdr_fill(&cache.am_hdr.rc_hdr, id); + cache.am_hdr.am_hdr = hdr; + + status = uct_rc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.am_hdr), payload, length, + MLX5_OPCODE_SEND, + MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE, + 0, 0); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(cache.am_hdr) + length); + UCT_RC_UPDATE_FC(rc_iface, &ep->super, id); + return UCS_OK; +#endif +} + +ssize_t uct_rc_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uct_rc_iface_send_desc_t *desc; + size_t length; + + UCT_CHECK_AM_ID(id); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_CHECK_FC(&iface->super, &ep->super, id); + UCT_RC_IFACE_GET_TX_AM_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, + id, uct_rc_mlx5_am_hdr_fill, uct_rc_mlx5_hdr_t, + pack_cb, arg, &length); + + uct_rc_mlx5_txqp_bcopy_post(iface, &ep->super.txqp, &ep->tx.wq, + MLX5_OPCODE_SEND, sizeof(uct_rc_mlx5_hdr_t) + length, + 0, 0, MLX5_WQE_CTRL_SOLICITED, 0, desc, desc + 1, + NULL); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); + UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); + return length; +} + +ucs_status_t uct_rc_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + ucs_status_t status; + + UCT_CHECK_IOV_SIZE(iovcnt, UCT_IB_MLX5_AM_ZCOPY_MAX_IOV, + "uct_rc_mlx5_ep_am_zcopy"); + UCT_RC_MLX5_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), + iface->super.super.config.seg_size, 0); + UCT_RC_CHECK_FC(&iface->super, &ep->super, id); + + status = uct_rc_mlx5_ep_zcopy_post(ep, MLX5_OPCODE_SEND, iov, iovcnt, + id, header, header_length, 0, 0, 0ul, 0, 0, + MLX5_WQE_CTRL_SOLICITED, comp); + if (ucs_likely(status >= 0)) { + UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, + header_length + uct_iov_total_length(iov, iovcnt)); + UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); + } + return status; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_mlx5_ep_atomic_post(uct_ep_h tl_ep, unsigned opcode, + uct_rc_iface_send_desc_t *desc, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t compare_mask, uint64_t compare, + uint64_t swap_mask, uint64_t swap_add) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uint32_t ib_rkey = uct_ib_resolve_atomic_rkey(rkey, ep->atomic_mr_offset, + &remote_addr); + + desc->super.sn = ep->tx.wq.sw_pi; + uct_rc_mlx5_txqp_dptr_post(iface, IBV_QPT_RC, + &ep->super.txqp, &ep->tx.wq, + opcode, desc + 1, length, &desc->lkey, + remote_addr, ib_rkey, + compare_mask, compare, swap_mask, swap_add, + NULL, NULL, 0, MLX5_WQE_CTRL_CQ_UPDATE, + 0, INT_MAX, NULL); + + UCT_TL_EP_STAT_ATOMIC(&ep->super.super); + uct_rc_txqp_add_send_op(&ep->super.txqp, &desc->super); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_rc_mlx5_ep_atomic_fop(uct_ep_h tl_ep, int opcode, void *result, int ext, + unsigned length, uint64_t remote_addr, uct_rkey_t rkey, + uint64_t compare_mask, uint64_t compare, + uint64_t swap_mask, uint64_t swap_add, uct_completion_t *comp) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uct_rc_iface_send_desc_t *desc; + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_IFACE_GET_TX_ATOMIC_FETCH_DESC(&iface->super, &iface->tx.atomic_desc_mp, + desc, uct_rc_iface_atomic_handler(&iface->super, ext, + length), + result, comp); + uct_rc_mlx5_ep_atomic_post(tl_ep, opcode, desc, length, remote_addr, rkey, + compare_mask, compare, swap_mask, swap_add); + return UCS_INPROGRESS; +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_rc_mlx5_ep_atomic_op_post(uct_ep_h tl_ep, unsigned opcode, unsigned size, + uint64_t value, uint64_t remote_addr, uct_rkey_t rkey) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uct_rc_iface_send_desc_t *desc; + int op; + uint64_t compare_mask; + uint64_t compare; + uint64_t swap_mask; + uint64_t swap; + int ext; /* not used here */ + ucs_status_t status; + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_MLX5_CHECK_ATOMIC_OPS(opcode, size, UCT_RC_MLX5_ATOMIC_OPS); + + status = uct_rc_mlx5_iface_common_atomic_data(opcode, size, value, &op, &compare_mask, + &compare, &swap_mask, &swap, &ext); + if (ucs_unlikely(UCS_STATUS_IS_ERR(status))) { + return status; + } + + UCT_RC_IFACE_GET_TX_ATOMIC_DESC(&iface->super, &iface->tx.atomic_desc_mp, desc); + + uct_rc_mlx5_ep_atomic_post(tl_ep, op, desc, size, remote_addr, rkey, + compare_mask, compare, swap_mask, swap); + return UCS_OK; +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_rc_mlx5_ep_atomic_fop_post(uct_ep_h tl_ep, unsigned opcode, unsigned size, + uint64_t value, void *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + int op; + uint64_t compare_mask; + uint64_t compare; + uint64_t swap_mask; + uint64_t swap; + int ext; + ucs_status_t status; + + UCT_RC_MLX5_CHECK_ATOMIC_OPS(opcode, size, UCT_RC_MLX5_ATOMIC_FOPS); + + status = uct_rc_mlx5_iface_common_atomic_data(opcode, size, value, &op, &compare_mask, + &compare, &swap_mask, &swap, &ext); + if (ucs_unlikely(UCS_STATUS_IS_ERR(status))) { + return status; + } + + return uct_rc_mlx5_ep_atomic_fop(tl_ep, op, result, ext, size, remote_addr, rkey, + compare_mask, compare, swap_mask, swap, comp); +} + +ucs_status_t uct_rc_mlx5_ep_atomic32_post(uct_ep_h ep, unsigned opcode, uint32_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + return uct_rc_mlx5_ep_atomic_op_post(ep, opcode, sizeof(value), value, remote_addr, rkey); +} + +ucs_status_t uct_rc_mlx5_ep_atomic64_post(uct_ep_h ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + return uct_rc_mlx5_ep_atomic_op_post(ep, opcode, sizeof(value), value, remote_addr, rkey); +} + +ucs_status_t uct_rc_mlx5_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + return uct_rc_mlx5_ep_atomic_fop_post(ep, opcode, sizeof(value), value, result, + remote_addr, rkey, comp); +} + +ucs_status_t uct_rc_mlx5_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + return uct_rc_mlx5_ep_atomic_fop_post(ep, opcode, sizeof(value), value, result, + remote_addr, rkey, comp); +} + +ucs_status_t uct_rc_mlx5_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp) +{ + return uct_rc_mlx5_ep_atomic_fop(tl_ep, MLX5_OPCODE_ATOMIC_CS, result, 0, sizeof(uint64_t), + remote_addr, rkey, 0, htobe64(compare), + UINT64_MAX, htobe64(swap), comp); +} + +ucs_status_t uct_rc_mlx5_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp) +{ + return uct_rc_mlx5_ep_atomic_fop(tl_ep, MLX5_OPCODE_ATOMIC_MASKED_CS, result, 1, + sizeof(uint32_t), remote_addr, rkey, UCS_MASK(32), + htonl(compare), UINT64_MAX, htonl(swap), comp); +} + +ucs_status_t uct_rc_mlx5_ep_fence(uct_ep_h tl_ep, unsigned flags) +{ + uct_rc_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_mlx5_ep_t); + + return uct_rc_ep_fence(tl_ep, &ep->tx.wq.fi, 1); +} + +ucs_status_t uct_rc_mlx5_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + ucs_status_t status; + uint16_t sn; + + if (ucs_unlikely(flags & UCT_FLUSH_FLAG_CANCEL)) { + uct_ep_pending_purge(&ep->super.super.super, NULL, 0); + uct_rc_mlx5_ep_handle_failure(ep, UCS_ERR_CANCELED); + return UCS_OK; + } + + status = uct_rc_ep_flush(&ep->super, ep->tx.wq.bb_max, flags); + if (status != UCS_INPROGRESS) { + return status; + } + + if (uct_rc_txqp_unsignaled(&ep->super.txqp) != 0) { + sn = ep->tx.wq.sw_pi; + UCT_RC_CHECK_RES(&iface->super, &ep->super); + uct_rc_mlx5_txqp_inline_post(iface, IBV_QPT_RC, + &ep->super.txqp, &ep->tx.wq, + MLX5_OPCODE_NOP, NULL, 0, + 0, 0, 0, + 0, 0, + NULL, NULL, 0, 0, + INT_MAX); + } else { + sn = ep->tx.wq.sig_pi; + } + + return uct_rc_txqp_add_flush_comp(&iface->super, &ep->super.super, + &ep->super.txqp, comp, sn); +} + +ucs_status_t uct_rc_mlx5_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op, + uct_rc_fc_request_t *req) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + + /* In RC only PURE grant is sent as a separate message. Other FC + * messages are bundled with AM. */ + ucs_assert(op == UCT_RC_EP_FC_PURE_GRANT); + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + uct_rc_mlx5_txqp_inline_post(iface, IBV_QPT_RC, + &ep->super.txqp, &ep->tx.wq, + MLX5_OPCODE_SEND|UCT_RC_MLX5_OPCODE_FLAG_RAW, + NULL, 0, + UCT_RC_EP_FC_PURE_GRANT, 0, 0, + 0, 0, + NULL, NULL, 0, 0, + INT_MAX); + return UCS_OK; +} + +ucs_status_t uct_rc_mlx5_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uct_rc_mlx5_ep_address_t *rc_addr = (uct_rc_mlx5_ep_address_t*)addr; + + uct_ib_pack_uint24(rc_addr->qp_num, ep->tx.wq.super.qp_num); + rc_addr->atomic_mr_id = uct_ib_mlx5_iface_get_atomic_mr_id(&iface->super.super); + + if (UCT_RC_MLX5_TM_ENABLED(iface)) { + uct_ib_pack_uint24(rc_addr->tm_qp_num, ep->tm_qp.qp_num); + } + + return UCS_OK; +} + +void uct_rc_mlx5_common_packet_dump(uct_base_iface_t *iface, uct_am_trace_type_t type, + void *data, size_t length, size_t valid_length, + char *buffer, size_t max) +{ + uct_rc_mlx5_hdr_t *rch = data; + +#if IBV_HW_TM + if (rch->tmh_opcode != IBV_TMH_NO_TAG) { + struct ibv_tmh *tmh = ucs_unaligned_ptr(rch); + struct ibv_rvh *rvh = (void*)(tmh + 1); + uct_tag_t tag; + uint32_t app_ctx; + + tag = tmh->tag; + app_ctx = tmh->app_ctx; + + switch (rch->tmh_opcode) { + case IBV_TMH_EAGER: + snprintf(buffer, max, " EAGER tag %lx app_ctx %d", tag, app_ctx); + return; + case IBV_TMH_RNDV: + snprintf(buffer, max, " RNDV tag %lx app_ctx %d va 0x%lx len %d rkey %x", + tag, app_ctx, be64toh(rvh->va), ntohl(rvh->len), ntohl(rvh->rkey)); + return; + case IBV_TMH_FIN: + snprintf(buffer, max, " FIN tag %lx app_ctx %d", tag, app_ctx); + return; + default: + break; + } + } +#endif + + data = &rch->rc_hdr; + /* coverity[overrun-buffer-val] */ + uct_rc_ep_packet_dump(iface, type, data, length - UCS_PTR_BYTE_DIFF(rch, data), + valid_length, buffer, max); +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_rc_mlx5_ep_connect_qp(uct_rc_mlx5_iface_common_t *iface, + uct_ib_mlx5_qp_t *qp, uint32_t qp_num, + struct ibv_ah_attr *ah_attr) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.super.super.md, uct_ib_mlx5_md_t); + + if (md->flags & UCT_IB_MLX5_MD_FLAG_DEVX) { + return uct_rc_mlx5_iface_common_devx_connect_qp(iface, qp, qp_num, ah_attr); + } else { + return uct_rc_iface_qp_connect(&iface->super, qp->verbs.qp, qp_num, ah_attr); + } +} + +ucs_status_t uct_rc_mlx5_ep_connect_to_ep(uct_ep_h tl_ep, + const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + const uct_ib_address_t *ib_addr = (const uct_ib_address_t *)dev_addr; + const uct_rc_mlx5_ep_address_t *rc_addr = (const uct_rc_mlx5_ep_address_t*)ep_addr; + uint32_t qp_num; + struct ibv_ah_attr ah_attr; + ucs_status_t status; + + uct_ib_iface_fill_ah_attr_from_addr(&iface->super.super, ib_addr, &ah_attr); + + if (UCT_RC_MLX5_TM_ENABLED(iface)) { + /* For HW TM we need 2 QPs, one of which will be used by the device for + * RNDV offload (for issuing RDMA reads and sending RNDV ACK). No WQEs + * should be posted to the send side of the QP which is owned by device. */ + status = uct_rc_mlx5_ep_connect_qp(iface, &ep->tm_qp, + uct_ib_unpack_uint24(rc_addr->qp_num), + &ah_attr); + if (status != UCS_OK) { + return status; + } + + /* Need to connect local ep QP to the one owned by device + * (and bound to XRQ) on the peer. */ + qp_num = uct_ib_unpack_uint24(rc_addr->tm_qp_num); + } else { + qp_num = uct_ib_unpack_uint24(rc_addr->qp_num); + } + + status = uct_rc_mlx5_ep_connect_qp(iface, &ep->tx.wq.super, + qp_num, &ah_attr); + if (status != UCS_OK) { + return status; + } + + ep->atomic_mr_offset = uct_ib_md_atomic_offset(rc_addr->atomic_mr_id); + + return UCS_OK; +} + +#if IBV_HW_TM + +ucs_status_t uct_rc_mlx5_ep_tag_rndv_cancel(uct_ep_h tl_ep, void *op) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(tl_ep->iface, + uct_rc_mlx5_iface_common_t); + + uint32_t op_index = (uint32_t)((uint64_t)op); + ucs_ptr_array_remove(&iface->tm.rndv_comps, op_index, 0); + return UCS_OK; +} + +static ucs_status_t UCS_F_ALWAYS_INLINE +uct_rc_mlx5_ep_tag_eager_short_inline(uct_ep_h tl_ep, uct_tag_t tag, + const void *data, size_t length) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + UCT_CHECK_LENGTH(length + sizeof(struct ibv_tmh), 0, + UCT_IB_MLX5_AM_MAX_SHORT(0), "tag_short"); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + + uct_rc_mlx5_txqp_tag_inline_post(iface, IBV_QPT_RC, &ep->super.txqp, + &ep->tx.wq, MLX5_OPCODE_SEND, data, length, + NULL, tag, 0, IBV_TMH_EAGER, 0, NULL, + NULL, 0, NULL, 0, MLX5_WQE_CTRL_SOLICITED); + + UCT_TL_EP_STAT_OP(&ep->super.super, TAG, SHORT, length); + + return UCS_OK; +} + +ucs_status_t uct_rc_mlx5_ep_tag_eager_short(uct_ep_h tl_ep, uct_tag_t tag, + const void *data, size_t length) +{ +#if HAVE_IBV_DM + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uct_rc_mlx5_dm_copy_data_t cache; + ucs_status_t status; + + if (ucs_likely((sizeof(struct ibv_tmh) + length <= UCT_IB_MLX5_AM_MAX_SHORT(0)) || + !iface->dm.dm)) { +#endif + return uct_rc_mlx5_ep_tag_eager_short_inline(tl_ep, tag, data, length); +#if HAVE_IBV_DM + } + + UCT_CHECK_LENGTH(length + sizeof(struct ibv_tmh), 0, + iface->dm.seg_len, "tag_short"); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + + uct_rc_mlx5_fill_tmh(ucs_unaligned_ptr(&cache.tm_hdr), tag, 0, IBV_TMH_EAGER); + + status = uct_rc_mlx5_ep_short_dm(ep, &cache, sizeof(cache.tm_hdr), data, length, + MLX5_OPCODE_SEND, + MLX5_WQE_CTRL_SOLICITED | MLX5_WQE_CTRL_CQ_UPDATE, + 0, 0); + if (!UCS_STATUS_IS_ERR(status)) { + UCT_TL_EP_STAT_OP(&ep->super.super, TAG, SHORT, length); + } + + return status; +#endif +} + +ssize_t uct_rc_mlx5_ep_tag_eager_bcopy(uct_ep_h tl_ep, uct_tag_t tag, + uint64_t imm, + uct_pack_callback_t pack_cb, + void *arg, unsigned flags) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uct_rc_iface_send_desc_t *desc; + uint32_t app_ctx, ib_imm; + int opcode; + size_t length; + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + + UCT_RC_MLX5_FILL_TM_IMM(imm, app_ctx, ib_imm, opcode, MLX5_OPCODE_SEND, + _IMM); + + UCT_RC_MLX5_IFACE_GET_TM_BCOPY_DESC(&iface->super, iface->tm.bcopy_mp, + desc, tag, app_ctx, pack_cb, arg, length); + + uct_rc_mlx5_txqp_bcopy_post(iface, &ep->super.txqp, &ep->tx.wq, + opcode, sizeof(struct ibv_tmh) + length, + 0, 0, MLX5_WQE_CTRL_SOLICITED, ib_imm, + desc, desc + 1, NULL); + + UCT_TL_EP_STAT_OP(&ep->super.super, TAG, BCOPY, length); + + return length; +} + +ucs_status_t uct_rc_mlx5_ep_tag_eager_zcopy(uct_ep_h tl_ep, uct_tag_t tag, + uint64_t imm, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + uint32_t app_ctx, ib_imm; + int opcode; + + UCT_CHECK_IOV_SIZE(iovcnt, UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(0), + "uct_rc_mlx5_ep_tag_eager_zcopy"); + UCT_RC_CHECK_ZCOPY_DATA(sizeof(struct ibv_tmh), + uct_iov_total_length(iov, iovcnt), + iface->tm.max_zcopy); + + UCT_RC_MLX5_FILL_TM_IMM(imm, app_ctx, ib_imm, opcode, MLX5_OPCODE_SEND, + _IMM); + + UCT_TL_EP_STAT_OP(&ep->super.super, TAG, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + + return uct_rc_mlx5_ep_zcopy_post(ep, opcode|UCT_RC_MLX5_OPCODE_FLAG_TM, + iov, iovcnt, 0, "", 0, 0, 0, + tag, app_ctx, ib_imm, + MLX5_WQE_CTRL_SOLICITED, comp); +} + +ucs_status_ptr_t uct_rc_mlx5_ep_tag_rndv_zcopy(uct_ep_h tl_ep, uct_tag_t tag, + const void *header, + unsigned header_length, + const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + unsigned tm_hdr_len = sizeof(struct ibv_tmh) + + sizeof(struct ibv_rvh); + uint32_t op_index; + + UCT_RC_MLX5_CHECK_RNDV_PARAMS(iovcnt, header_length, tm_hdr_len, + UCT_IB_MLX5_AM_MAX_SHORT(0), + iface->tm.max_rndv_data + + UCT_RC_MLX5_TMH_PRIV_LEN); + UCT_RC_MLX5_CHECK_RES_PTR(iface, ep); + + op_index = uct_rc_mlx5_tag_get_op_id(iface, comp); + + uct_rc_mlx5_txqp_tag_inline_post(iface, IBV_QPT_RC, &ep->super.txqp, + &ep->tx.wq, MLX5_OPCODE_SEND, header, + header_length, iov, tag, op_index, + IBV_TMH_RNDV, 0, NULL, NULL, 0, + NULL, 0, MLX5_WQE_CTRL_SOLICITED); + + return (ucs_status_ptr_t)((uint64_t)op_index); +} + +ucs_status_t uct_rc_mlx5_ep_tag_rndv_request(uct_ep_h tl_ep, uct_tag_t tag, + const void* header, + unsigned header_length, + unsigned flags) +{ + UCT_RC_MLX5_EP_DECL(tl_ep, iface, ep); + UCT_CHECK_LENGTH(header_length + sizeof(struct ibv_tmh), 0, + UCT_IB_MLX5_AM_MAX_SHORT(0), "tag_rndv_request"); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + + uct_rc_mlx5_txqp_tag_inline_post(iface, IBV_QPT_RC, &ep->super.txqp, + &ep->tx.wq, MLX5_OPCODE_SEND_IMM, header, + header_length, NULL, tag, 0, + IBV_TMH_EAGER, 0, NULL, NULL, 0, + NULL, 0, MLX5_WQE_CTRL_SOLICITED); + return UCS_OK; +} +#endif /* IBV_HW_TM */ + +UCS_CLASS_INIT_FUNC(uct_rc_mlx5_ep_t, const uct_ep_params_t *params) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(params->iface, + uct_rc_mlx5_iface_common_t); + uct_ib_qp_attr_t attr = {}; + ucs_status_t status; + + /* Need to create QP before super constructor to get QP number */ + uct_rc_mlx5_iface_fill_attr(iface, &attr, iface->super.config.tx_qp_len, + &iface->rx.srq); + uct_ib_exp_qp_fill_attr(&iface->super.super, &attr); + status = uct_rc_mlx5_iface_create_qp(iface, &self->tx.wq.super, &self->tx.wq, &attr); + if (status != UCS_OK) { + return status; + } + + UCS_CLASS_CALL_SUPER_INIT(uct_rc_ep_t, &iface->super, self->tx.wq.super.qp_num); + + if (self->tx.wq.super.type == UCT_IB_MLX5_OBJ_TYPE_VERBS) { + status = uct_rc_iface_qp_init(&iface->super, self->tx.wq.super.verbs.qp); + if (status != UCS_OK) { + goto err; + } + } + + uct_rc_iface_add_qp(&iface->super, &self->super, self->tx.wq.super.qp_num); + + if (UCT_RC_MLX5_TM_ENABLED(iface)) { + /* Send queue of this QP will be used by FW for HW RNDV. Driver requires + * such a QP to be initialized with zero send queue length. */ + memset(&attr, 0, sizeof(attr)); + uct_rc_mlx5_iface_fill_attr(iface, &attr, 0, &iface->rx.srq); + uct_ib_exp_qp_fill_attr(&iface->super.super, &attr); + status = uct_rc_mlx5_iface_create_qp(iface, &self->tm_qp, NULL, &attr); + if (status != UCS_OK) { + goto err; + } + + uct_rc_iface_add_qp(&iface->super, &self->super, self->tm_qp.qp_num); + } + + self->tx.wq.bb_max = ucs_min(self->tx.wq.bb_max, iface->tx.bb_max); + self->mp.free = 1; + uct_rc_txqp_available_set(&self->super.txqp, self->tx.wq.bb_max); + return UCS_OK; + +err: + uct_ib_mlx5_destroy_qp(&self->tx.wq.super); + return status; +} + +static void uct_rc_mlx5_ep_clean_qp(uct_rc_mlx5_ep_t *ep, uct_ib_mlx5_qp_t *qp) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(ep->super.super.super.iface, + uct_rc_mlx5_iface_common_t); + uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.super.super.md, + uct_ib_mlx5_md_t); + + /* Make the HW generate CQEs for all in-progress SRQ receives from the QP, + * so we clean them all before ibv_modify_qp() can see them. + */ +#if HAVE_DECL_IBV_CMD_MODIFY_QP && !HAVE_DEVX + struct ibv_qp_attr qp_attr; + struct ibv_modify_qp cmd; + int ret; + + /* Bypass mlx5 driver, and go directly to command interface, to avoid + * cleaning the CQ in mlx5 driver + */ + memset(&qp_attr, 0, sizeof(qp_attr)); + qp_attr.qp_state = IBV_QPS_RESET; + ret = ibv_cmd_modify_qp(qp->verbs.qp, &qp_attr, IBV_QP_STATE, &cmd, sizeof(cmd)); + if (ret) { + ucs_warn("modify qp 0x%x to RESET failed: %m", qp->qp_num); + } +#else + (void)uct_ib_mlx5_modify_qp_state(md, qp, IBV_QPS_ERR); +#endif + + iface->super.rx.srq.available += uct_rc_mlx5_iface_commom_clean( + &iface->cq[UCT_IB_DIR_RX], + &iface->rx.srq, qp->qp_num); + + /* Synchronize CQ index with the driver, since it would remove pending + * completions for this QP (both send and receive) during ibv_destroy_qp(). + */ + uct_rc_mlx5_iface_common_update_cqs_ci(iface, &iface->super.super); + (void)uct_ib_mlx5_modify_qp_state(md, qp, IBV_QPS_RESET); + uct_rc_mlx5_iface_common_sync_cqs_ci(iface, &iface->super.super); +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rc_mlx5_ep_t) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(self->super.super.super.iface, + uct_rc_mlx5_iface_common_t); + + uct_ib_mlx5_txwq_cleanup(&self->tx.wq); + uct_rc_mlx5_ep_clean_qp(self, &self->tx.wq.super); +#if IBV_HW_TM + if (UCT_RC_MLX5_TM_ENABLED(iface)) { + uct_rc_mlx5_ep_clean_qp(self, &self->tm_qp); + uct_ib_mlx5_iface_put_res_domain(&self->tm_qp); + uct_rc_iface_remove_qp(&iface->super, self->tm_qp.qp_num); + uct_ib_mlx5_destroy_qp(&self->tm_qp); + } +#endif + + ucs_assert(self->mp.free == 1); + + /* Return all credits if user do flush(UCT_FLUSH_FLAG_CANCEL) before + * ep_destroy. + */ + uct_rc_txqp_available_add(&self->super.txqp, + self->tx.wq.bb_max - + uct_rc_txqp_available(&self->super.txqp)); + + uct_ib_mlx5_srq_cleanup(&iface->rx.srq, iface->rx.srq.verbs.srq); + + uct_rc_iface_remove_qp(&iface->super, self->tx.wq.super.qp_num); + uct_ib_mlx5_destroy_qp(&self->tx.wq.super); +} + +ucs_status_t uct_rc_mlx5_ep_handle_failure(uct_rc_mlx5_ep_t *ep, + ucs_status_t status) +{ + uct_ib_iface_t *ib_iface = ucs_derived_of(ep->super.super.super.iface, + uct_ib_iface_t); + uct_rc_iface_t *rc_iface = ucs_derived_of(ib_iface, uct_rc_iface_t); + + uct_rc_txqp_purge_outstanding(&ep->super.txqp, status, 0); + /* poll_cqe for mlx5 returns NULL in case of failure and the cq_avaialble + is not updated for the error cqe and all outstanding wqes*/ + rc_iface->tx.cq_available += ep->tx.wq.bb_max - + uct_rc_txqp_available(&ep->super.txqp); + return ib_iface->ops->set_ep_failed(ib_iface, &ep->super.super.super, + status); +} + +ucs_status_t uct_rc_mlx5_ep_set_failed(uct_ib_iface_t *iface, uct_ep_h ep, + ucs_status_t status) +{ + return uct_set_ep_failed(&UCS_CLASS_NAME(uct_rc_mlx5_ep_t), ep, + &iface->super.super, status); +} + +UCS_CLASS_DEFINE(uct_rc_mlx5_ep_t, uct_rc_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_rc_mlx5_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_rc_mlx5_ep_t, uct_ep_t); diff --git a/src/uct/ib/rc/accel/rc_mlx5_iface.c b/src/uct/ib/rc/accel/rc_mlx5_iface.c new file mode 100644 index 0000000..5022965 --- /dev/null +++ b/src/uct/ib/rc/accel/rc_mlx5_iface.c @@ -0,0 +1,832 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "rc_mlx5.inl" + + +enum { + UCT_RC_MLX5_IFACE_ADDR_TYPE_BASIC, + + /* Tag Matching address. It additionaly contains QP number which + * is used for hardware offloads. */ + UCT_RC_MLX5_IFACE_ADDR_TYPE_TM +}; + + +/** + * RC mlx5 interface configuration + */ +typedef struct uct_rc_mlx5_iface_config { + uct_rc_iface_config_t super; + uct_rc_mlx5_iface_common_config_t rc_mlx5_common; + /* TODO wc_mode, UAR mode SnB W/A... */ +} uct_rc_mlx5_iface_config_t; + + +ucs_config_field_t uct_rc_mlx5_iface_config_table[] = { + {"RC_", "", NULL, + ucs_offsetof(uct_rc_mlx5_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_rc_iface_config_table)}, + + {"RC_", "", NULL, + ucs_offsetof(uct_rc_mlx5_iface_config_t, rc_mlx5_common), + UCS_CONFIG_TYPE_TABLE(uct_rc_mlx5_common_config_table)}, + + {NULL} +}; + + +static uct_rc_iface_ops_t uct_rc_mlx5_iface_ops; + +#if ENABLE_STATS +ucs_stats_class_t uct_rc_mlx5_iface_stats_class = { + .name = "mlx5", + .num_counters = UCT_RC_MLX5_IFACE_STAT_LAST, + .counter_names = { + [UCT_RC_MLX5_IFACE_STAT_RX_INL_32] = "rx_inl_32", + [UCT_RC_MLX5_IFACE_STAT_RX_INL_64] = "rx_inl_64" + } +}; +#endif + +void uct_rc_mlx5_iface_check_rx_completion(uct_rc_mlx5_iface_common_t *iface, + struct mlx5_cqe64 *cqe) +{ + uct_ib_mlx5_cq_t *cq = &iface->cq[UCT_IB_DIR_RX]; + struct mlx5_err_cqe *ecqe = (void*)cqe; + uct_ib_mlx5_srq_seg_t *seg; + uint16_t wqe_ctr; + + ucs_memory_cpu_load_fence(); + + if (((ecqe->op_own >> 4) == MLX5_CQE_RESP_ERR) && + (ecqe->syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR) && + (ecqe->vendor_err_synd == UCT_IB_MLX5_CQE_VENDOR_SYND_ODP)) + { + /* Release the aborted segment */ + wqe_ctr = ntohs(ecqe->wqe_counter); + seg = uct_ib_mlx5_srq_get_wqe(&iface->rx.srq, wqe_ctr); + ++cq->cq_ci; + /* TODO: Check if ib_stride_index valid for error CQE */ + uct_rc_mlx5_iface_release_srq_seg(iface, seg, cqe, wqe_ctr, UCS_OK, + iface->super.super.config.rx_headroom_offset, + &iface->super.super.release_desc); + } else { + ucs_assert((ecqe->op_own >> 4) != MLX5_CQE_INVALID); + uct_ib_mlx5_check_completion(&iface->super.super, cq, cqe); + } +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_rc_mlx5_iface_poll_tx(uct_rc_mlx5_iface_common_t *iface) +{ + struct mlx5_cqe64 *cqe; + uct_rc_mlx5_ep_t *ep; + unsigned qp_num; + uint16_t hw_ci; + + cqe = uct_ib_mlx5_poll_cq(&iface->super.super, &iface->cq[UCT_IB_DIR_TX]); + if (cqe == NULL) { + return 0; + } + + UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_TX_COMPLETION, 1); + + ucs_memory_cpu_load_fence(); + + qp_num = ntohl(cqe->sop_drop_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); + ep = ucs_derived_of(uct_rc_iface_lookup_ep(&iface->super, qp_num), uct_rc_mlx5_ep_t); + ucs_assert(ep != NULL); + hw_ci = ntohs(cqe->wqe_counter); + ucs_trace_poll("rc_mlx5 iface %p tx_cqe: ep %p qpn 0x%x hw_ci %d", iface, ep, + qp_num, hw_ci); + + uct_rc_mlx5_common_update_tx_res(&iface->super, &ep->tx.wq, &ep->super.txqp, + hw_ci); + uct_rc_mlx5_txqp_process_tx_cqe(&ep->super.txqp, cqe, hw_ci); + + ucs_arbiter_group_schedule(&iface->super.tx.arbiter, &ep->super.arb_group); + ucs_arbiter_dispatch(&iface->super.tx.arbiter, 1, uct_rc_ep_process_pending, NULL); + + return 1; +} + +unsigned uct_rc_mlx5_iface_progress(void *arg) +{ + uct_rc_mlx5_iface_common_t *iface = arg; + unsigned count; + + count = uct_rc_mlx5_iface_common_poll_rx(iface, UCT_RC_MLX5_POLL_FLAG_HAS_EP); + if (count > 0) { + return count; + } + return uct_rc_mlx5_iface_poll_tx(iface); +} + +static ucs_status_t uct_rc_mlx5_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(tl_iface, uct_rc_mlx5_iface_common_t); + uct_rc_iface_t *rc_iface = &iface->super; + size_t max_am_inline = UCT_IB_MLX5_AM_MAX_SHORT(0); + size_t max_put_inline = UCT_IB_MLX5_PUT_MAX_SHORT(0); + ucs_status_t status; + +#if HAVE_IBV_DM + if (iface->dm.dm != NULL) { + max_am_inline = ucs_max(iface->dm.dm->seg_len, UCT_IB_MLX5_AM_MAX_SHORT(0)); + max_put_inline = ucs_max(iface->dm.dm->seg_len, UCT_IB_MLX5_PUT_MAX_SHORT(0)); + } +#endif + + status = uct_rc_iface_query(rc_iface, iface_attr, + max_put_inline, + max_am_inline, + UCT_IB_MLX5_AM_ZCOPY_MAX_HDR(0), + UCT_IB_MLX5_AM_ZCOPY_MAX_IOV, + UCT_RC_MLX5_TM_EAGER_ZCOPY_MAX_IOV(0), + sizeof(uct_rc_mlx5_hdr_t)); + if (status != UCS_OK) { + return status; + } + + uct_rc_mlx5_iface_common_query(&rc_iface->super, iface_attr, max_am_inline, 0); + iface_attr->latency.growth += 1e-9; /* 1 ns per each extra QP */ + iface_attr->ep_addr_len = sizeof(uct_rc_mlx5_ep_address_t); + iface_attr->iface_addr_len = sizeof(uint8_t); + return UCS_OK; +} + +static ucs_status_t uct_rc_mlx5_iface_arm_cq(uct_ib_iface_t *ib_iface, + uct_ib_dir_t dir, + int solicited_only) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(ib_iface, uct_rc_mlx5_iface_common_t); +#if HAVE_DECL_MLX5DV_INIT_OBJ + return uct_ib_mlx5dv_arm_cq(&iface->cq[dir], solicited_only); +#else + uct_ib_mlx5_update_cq_ci(iface->super.super.cq[dir], + iface->cq[dir].cq_ci); + return uct_ib_iface_arm_cq(ib_iface, dir, solicited_only); +#endif +} + +static void +uct_rc_mlx5_iface_handle_failure(uct_ib_iface_t *ib_iface, void *arg, + ucs_status_t status) +{ + struct mlx5_cqe64 *cqe = arg; + uct_rc_iface_t *iface = ucs_derived_of(ib_iface, uct_rc_iface_t); + unsigned qp_num = ntohl(cqe->sop_drop_qpn) & + UCS_MASK(UCT_IB_QPN_ORDER); + uct_rc_mlx5_ep_t *ep = ucs_derived_of(uct_rc_iface_lookup_ep(iface, + qp_num), + uct_rc_mlx5_ep_t); + ucs_log_level_t log_lvl = UCS_LOG_LEVEL_FATAL; + uct_ib_mlx5_txwq_t txwq_copy; + size_t txwq_size; + + if (!ep) { + return; + } + + /* Create a copy of RC txwq for completion error reporting, since the QP + * would be released by set_ep_failed()*/ + txwq_copy = ep->tx.wq; + txwq_size = UCS_PTR_BYTE_DIFF(ep->tx.wq.qstart, ep->tx.wq.qend); + txwq_copy.qstart = ucs_malloc(txwq_size, "rc_txwq_copy"); + if (txwq_copy.qstart != NULL) { + memcpy(txwq_copy.qstart, ep->tx.wq.qstart, txwq_size); + txwq_copy.qend = UCS_PTR_BYTE_OFFSET(txwq_copy.qstart, txwq_size); + } + + if (uct_rc_mlx5_ep_handle_failure(ep, status) == UCS_OK) { + log_lvl = ib_iface->super.config.failure_level; + } + + uct_ib_mlx5_completion_with_err(ib_iface, arg, + txwq_copy.qstart ? &txwq_copy : NULL, + log_lvl); + ucs_free(txwq_copy.qstart); +} + +static void uct_rc_mlx5_iface_progress_enable(uct_iface_h tl_iface, unsigned flags) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(tl_iface, uct_rc_mlx5_iface_common_t); + + if (flags & UCT_PROGRESS_RECV) { + uct_rc_mlx5_iface_common_prepost_recvs(iface); + } + + uct_base_iface_progress_enable_cb(&iface->super.super.super, + iface->super.progress, flags); +} + +ucs_status_t uct_rc_mlx5_iface_create_qp(uct_rc_mlx5_iface_common_t *iface, + uct_ib_mlx5_qp_t *qp, + uct_ib_mlx5_txwq_t *txwq, + uct_ib_qp_attr_t *attr) +{ + uct_ib_iface_t *ib_iface = &iface->super.super; + ucs_status_t status; +#if HAVE_DECL_MLX5DV_CREATE_QP + uct_ib_mlx5_md_t *md = ucs_derived_of(ib_iface->super.md, + uct_ib_mlx5_md_t); + uct_ib_device_t *dev = &md->super.dev; + struct mlx5dv_qp_init_attr dv_attr = {}; + + if (md->flags & UCT_IB_MLX5_MD_FLAG_DEVX_RC_QP) { + return uct_ib_mlx5_devx_create_qp(ib_iface, qp, txwq, attr); + } + + status = uct_ib_mlx5_iface_fill_attr(ib_iface, qp, attr); + if (status != UCS_OK) { + return status; + } + + uct_ib_iface_fill_attr(ib_iface, attr); +#if HAVE_DECL_MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE + dv_attr.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; + dv_attr.create_flags = MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE; +#endif + qp->verbs.qp = mlx5dv_create_qp(dev->ibv_context, &attr->ibv, &dv_attr); + if (qp->verbs.qp == NULL) { + ucs_error("mlx5dv_create_qp("UCT_IB_IFACE_FMT"): failed: %m", + UCT_IB_IFACE_ARG(ib_iface)); + status = UCS_ERR_IO_ERROR; + goto err; + } + + qp->qp_num = qp->verbs.qp->qp_num; +#else + status = uct_ib_mlx5_iface_create_qp(ib_iface, qp, attr); + if (status != UCS_OK) { + goto err; + } +#endif + + status = uct_rc_iface_qp_init(&iface->super, qp->verbs.qp); + if (status != UCS_OK) { + goto err_destory_qp; + } + + if (attr->cap.max_send_wr) { + status = uct_ib_mlx5_txwq_init(iface->super.super.super.worker, + iface->tx.mmio_mode, txwq, + qp->verbs.qp); + if (status != UCS_OK) { + ucs_error("Failed to get mlx5 QP information"); + goto err_destory_qp; + } + } + + return UCS_OK; + +err_destory_qp: + ibv_destroy_qp(qp->verbs.qp); +err: + return status; +} + +static UCS_F_MAYBE_UNUSED unsigned uct_rc_mlx5_iface_progress_tm(void *arg) +{ + uct_rc_mlx5_iface_common_t *iface = arg; + unsigned count; + + count = uct_rc_mlx5_iface_common_poll_rx(iface, + UCT_RC_MLX5_POLL_FLAG_HAS_EP | + UCT_RC_MLX5_POLL_FLAG_TM); + if (count > 0) { + return count; + } + return uct_rc_mlx5_iface_poll_tx(iface); +} + +#if IBV_HW_TM +static ucs_status_t uct_rc_mlx5_iface_tag_recv_zcopy(uct_iface_h tl_iface, + uct_tag_t tag, + uct_tag_t tag_mask, + const uct_iov_t *iov, + size_t iovcnt, + uct_tag_context_t *ctx) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(tl_iface, uct_rc_mlx5_iface_common_t); + + return uct_rc_mlx5_iface_common_tag_recv(iface, tag, tag_mask, iov, + iovcnt, ctx); +} + +static ucs_status_t uct_rc_mlx5_iface_tag_recv_cancel(uct_iface_h tl_iface, + uct_tag_context_t *ctx, + int force) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(tl_iface, uct_rc_mlx5_iface_common_t); + + return uct_rc_mlx5_iface_common_tag_recv_cancel(iface, ctx, force); +} +#endif + +static ucs_status_t uct_rc_mlx5_iface_preinit(uct_rc_mlx5_iface_common_t *iface, + uct_md_h tl_md, + uct_rc_iface_common_config_t *rc_config, + uct_rc_mlx5_iface_common_config_t *mlx5_config, + const uct_iface_params_t *params, + uct_ib_iface_init_attr_t *init_attr) +{ +#if IBV_HW_TM + uct_ib_mlx5_md_t *md = ucs_derived_of(tl_md, uct_ib_mlx5_md_t); + uct_ib_device_t UCS_V_UNUSED *dev = &md->super.dev; + struct ibv_tmh tmh; + int mtu; + ucs_status_t status; + + iface->tm.enabled = mlx5_config->tm.enable && (init_attr->flags & + UCT_IB_TM_SUPPORTED); + if (!iface->tm.enabled) { + goto out_tm_disabled; + } + + /* Compile-time check that THM and uct_rc_mlx5_hdr_t are wire-compatible for the + * case of no-tag protocol. + */ + UCS_STATIC_ASSERT(sizeof(tmh.opcode) == sizeof(((uct_rc_mlx5_hdr_t*)0)->tmh_opcode)); + UCS_STATIC_ASSERT(ucs_offsetof(struct ibv_tmh, opcode) == + ucs_offsetof(uct_rc_mlx5_hdr_t, tmh_opcode)); + + UCS_STATIC_ASSERT(sizeof(uct_rc_mlx5_ctx_priv_t) <= UCT_TAG_PRIV_LEN); + + iface->tm.eager_unexp.cb = (params->field_mask & + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_CB) ? + params->eager_cb : NULL; + iface->tm.eager_unexp.arg = (params->field_mask & + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_ARG) ? + params->eager_arg : NULL; + iface->tm.rndv_unexp.cb = (params->field_mask & + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_CB) ? + params->rndv_cb : NULL; + iface->tm.rndv_unexp.arg = (params->field_mask & + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_ARG) ? + params->rndv_arg : NULL; + iface->tm.unexpected_cnt = 0; + iface->tm.num_outstanding = 0; + iface->tm.num_tags = ucs_min(IBV_DEVICE_TM_CAPS(dev, max_num_tags), + mlx5_config->tm.list_size); + + /* There can be: + * - up to rx.queue_len RX CQEs + * - up to 3 CQEs for every posted tag: ADD, TM_CONSUMED and MSG_ARRIVED + * - one SYNC CQE per every IBV_DEVICE_MAX_UNEXP_COUNT unexpected receives */ + UCS_STATIC_ASSERT(IBV_DEVICE_MAX_UNEXP_COUNT); + init_attr->rx_cq_len = rc_config->super.rx.queue_len + iface->tm.num_tags * 3 + + rc_config->super.rx.queue_len / + IBV_DEVICE_MAX_UNEXP_COUNT; + init_attr->seg_size = ucs_max(mlx5_config->tm.seg_size, + rc_config->super.seg_size); + iface->tm.mp.num_strides = 1; + iface->tm.max_bcopy = init_attr->seg_size; + + /* Multi-Packet XRQ initialization */ + if (!ucs_test_all_flags(md->flags, UCT_IB_MLX5_MD_FLAG_MP_RQ | + UCT_IB_MLX5_MD_FLAG_DEVX_RC_SRQ | + UCT_IB_MLX5_MD_FLAG_DEVX_RC_QP)) { + return UCS_OK; + } + + if ((mlx5_config->tm.mp_num_strides == UCS_ULUNITS_AUTO) || + (mlx5_config->tm.mp_num_strides == 1)) { + return UCS_OK; + /* TODO: make the following to be default when MP support is added to UCP + iface->tm.mp.num_strides = UCS_BIT(IBV_DEVICE_MP_MIN_LOG_NUM_STRIDES); */ + } else if ((mlx5_config->tm.mp_num_strides != 8) && + (mlx5_config->tm.mp_num_strides != 16)){ + ucs_error("invalid value of TM_NUM_STRIDES: %lu, must be 1,8 or 16", + mlx5_config->tm.mp_num_strides); + return UCS_ERR_INVALID_PARAM; + } + + status = uct_ib_device_mtu(params->mode.device.dev_name, tl_md, &mtu); + if (status != UCS_OK) { + ucs_error("failed to get port MTU: %s", ucs_status_string(status)); + return UCS_ERR_IO_ERROR; + } + + iface->tm.mp.num_strides = mlx5_config->tm.mp_num_strides; + init_attr->seg_size = mtu; + + return UCS_OK; + +out_tm_disabled: +#else + iface->tm.enabled = 0; +#endif + init_attr->rx_cq_len = rc_config->super.rx.queue_len; + init_attr->seg_size = rc_config->super.seg_size; + iface->tm.mp.num_strides = 1; + + return UCS_OK; +} + +static ucs_status_t +uct_rc_mlx5_iface_init_rx(uct_rc_iface_t *rc_iface, + const uct_rc_iface_common_config_t *rc_config) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(rc_iface, uct_rc_mlx5_iface_common_t); + uct_ib_mlx5_md_t *md = ucs_derived_of(rc_iface->super.super.md, uct_ib_mlx5_md_t); + struct ibv_srq_init_attr_ex srq_attr = {}; + ucs_status_t status; + + if (UCT_RC_MLX5_TM_ENABLED(iface)) { + if (md->flags & UCT_IB_MLX5_MD_FLAG_DEVX_RC_SRQ) { + status = uct_rc_mlx5_devx_init_rx_tm(iface, rc_config, 0, + UCT_RC_RNDV_HDR_LEN); + } else { + status = uct_rc_mlx5_init_rx_tm(iface, rc_config, &srq_attr, + UCT_RC_RNDV_HDR_LEN); + } + + if (status != UCS_OK) { + goto err; + } + + iface->super.progress = uct_rc_mlx5_iface_progress_tm; + return UCS_OK; + } + + /* MP XRQ is supported with HW TM only */ + ucs_assert(iface->tm.mp.num_strides == 1); + + status = uct_rc_iface_init_rx(rc_iface, rc_config, &iface->rx.srq.verbs.srq); + if (status != UCS_OK) { + goto err; + } + + status = uct_ib_mlx5_srq_init(&iface->rx.srq, iface->rx.srq.verbs.srq, + iface->super.super.config.seg_size, + iface->tm.mp.num_strides); + if (status != UCS_OK) { + goto err_free_srq; + } + + iface->rx.srq.type = UCT_IB_MLX5_OBJ_TYPE_VERBS; + iface->super.progress = uct_rc_mlx5_iface_progress; + return UCS_OK; + +err_free_srq: + uct_rc_mlx5_destroy_srq(&iface->rx.srq); +err: + return status; +} + +static void uct_rc_mlx5_iface_cleanup_rx(uct_rc_iface_t *rc_iface) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(rc_iface, uct_rc_mlx5_iface_common_t); + + uct_rc_mlx5_destroy_srq(&iface->rx.srq); +} + +static void uct_rc_mlx5_iface_event_cq(uct_ib_iface_t *ib_iface, + uct_ib_dir_t dir) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(ib_iface, uct_rc_mlx5_iface_common_t); + + iface->cq[dir].cq_sn++; +} + +static uint8_t uct_rc_mlx5_iface_get_address_type(uct_iface_h tl_iface) +{ + uct_rc_mlx5_iface_common_t *iface = ucs_derived_of(tl_iface, + uct_rc_mlx5_iface_common_t); + + return UCT_RC_MLX5_TM_ENABLED(iface) ? UCT_RC_MLX5_IFACE_ADDR_TYPE_TM : + UCT_RC_MLX5_IFACE_ADDR_TYPE_BASIC; +} + +static ucs_status_t uct_rc_mlx5_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *addr) +{ + *(uint8_t*)addr = uct_rc_mlx5_iface_get_address_type(tl_iface); + + return UCS_OK; +} + +int uct_rc_mlx5_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uint8_t my_type = uct_rc_mlx5_iface_get_address_type(tl_iface); + + if ((iface_addr != NULL) && (my_type != *(uint8_t*)iface_addr)) { + return 0; + } + + return uct_ib_iface_is_reachable(tl_iface, dev_addr, iface_addr); +} + + +static int uct_rc_mlx5_iface_srq_topo(uct_rc_mlx5_iface_common_t *iface, + uct_md_h md, + uct_rc_mlx5_iface_common_config_t *mlx5_config) +{ + uct_ib_mlx5_md_t *ib_md = ucs_derived_of(md, uct_ib_mlx5_md_t); + + /* Cyclic SRQ is supported with HW TM and DEVX only. */ + if (((mlx5_config->srq_topo == UCT_RC_MLX5_SRQ_TOPO_AUTO) || + (mlx5_config->srq_topo == UCT_RC_MLX5_SRQ_TOPO_CYCLIC)) && + UCT_RC_MLX5_TM_ENABLED(iface) && + (ib_md->flags & UCT_IB_MLX5_MD_FLAG_DEVX)) { + + return UCT_RC_MLX5_MP_ENABLED(iface) ? + UCT_IB_MLX5_SRQ_TOPO_CYCLIC_MP_RQ : UCT_IB_MLX5_SRQ_TOPO_CYCLIC; + } + + return UCT_RC_MLX5_MP_ENABLED(iface) ? + UCT_IB_MLX5_SRQ_TOPO_LIST_MP_RQ : UCT_IB_MLX5_SRQ_TOPO_LIST; +} + +UCS_CLASS_INIT_FUNC(uct_rc_mlx5_iface_common_t, + uct_rc_iface_ops_t *ops, + uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + uct_rc_iface_common_config_t *rc_config, + uct_rc_mlx5_iface_common_config_t *mlx5_config, + uct_ib_iface_init_attr_t *init_attr) +{ + uct_ib_device_t *dev; + ucs_status_t status; + + status = uct_rc_mlx5_iface_preinit(self, md, rc_config, mlx5_config, + params, init_attr); + if (status != UCS_OK) { + return status; + } + + self->rx.srq.type = UCT_IB_MLX5_OBJ_TYPE_LAST; + self->rx.srq.topo = uct_rc_mlx5_iface_srq_topo(self, md, mlx5_config); + self->tm.cmd_wq.super.super.type = UCT_IB_MLX5_OBJ_TYPE_LAST; + init_attr->rx_hdr_len = UCT_RC_MLX5_MP_ENABLED(self) ? + 0 : sizeof(uct_rc_mlx5_hdr_t); + + UCS_CLASS_CALL_SUPER_INIT(uct_rc_iface_t, ops, md, worker, params, + rc_config, init_attr); + + dev = uct_ib_iface_device(&self->super.super); + self->tx.mmio_mode = mlx5_config->super.mmio_mode; + self->tx.bb_max = ucs_min(mlx5_config->tx_max_bb, UINT16_MAX); + self->tm.am_desc.super.cb = uct_rc_mlx5_release_desc; + + if (!UCT_RC_MLX5_MP_ENABLED(self)) { + self->tm.am_desc.offset = self->super.super.config.rx_headroom_offset; + } + + status = uct_ib_mlx5_get_cq(self->super.super.cq[UCT_IB_DIR_TX], + &self->cq[UCT_IB_DIR_TX]); + if (status != UCS_OK) { + return status; + } + + status = uct_ib_mlx5_get_cq(self->super.super.cq[UCT_IB_DIR_RX], + &self->cq[UCT_IB_DIR_RX]); + if (status != UCS_OK) { + return status; + } + + status = UCS_STATS_NODE_ALLOC(&self->stats, &uct_rc_mlx5_iface_stats_class, + self->super.stats); + if (status != UCS_OK) { + return status; + } + + status = uct_rc_mlx5_iface_common_tag_init(self); + if (status != UCS_OK) { + goto cleanup_stats; + } + + status = uct_rc_mlx5_iface_common_dm_init(self, &self->super, + &mlx5_config->super); + if (status != UCS_OK) { + goto cleanup_tm; + } + + self->super.config.fence_mode = (uct_rc_fence_mode_t)rc_config->fence_mode; + self->super.rx.srq.quota = self->rx.srq.mask + 1; + self->super.config.exp_backoff = mlx5_config->exp_backoff; + + if ((rc_config->fence_mode == UCT_RC_FENCE_MODE_WEAK) || + ((rc_config->fence_mode == UCT_RC_FENCE_MODE_AUTO) && + uct_ib_device_has_pci_atomics(dev))) { + self->config.atomic_fence_flag = UCT_IB_MLX5_WQE_CTRL_FLAG_FENCE; + self->config.put_fence_flag = 0; + self->super.config.fence_mode = UCT_RC_FENCE_MODE_WEAK; + } else if (rc_config->fence_mode == UCT_RC_FENCE_MODE_STRONG) { + self->config.atomic_fence_flag = UCT_IB_MLX5_WQE_CTRL_FLAG_STRONG_ORDER; + self->config.put_fence_flag = UCT_IB_MLX5_WQE_CTRL_FLAG_STRONG_ORDER; + self->super.config.fence_mode = UCT_RC_FENCE_MODE_STRONG; + } else if ((rc_config->fence_mode == UCT_RC_FENCE_MODE_NONE) || + ((rc_config->fence_mode == UCT_RC_FENCE_MODE_AUTO) && + !uct_ib_device_has_pci_atomics(dev))) { + self->config.atomic_fence_flag = 0; + self->config.put_fence_flag = 0; + self->super.config.fence_mode = UCT_RC_FENCE_MODE_NONE; + } else { + ucs_error("incorrect fence value: %d", self->super.config.fence_mode); + status = UCS_ERR_INVALID_PARAM; + goto cleanup_tm; + } + + /* By default set to something that is always in cache */ + self->rx.pref_ptr = self; + + status = uct_iface_mpool_init(&self->super.super.super, + &self->tx.atomic_desc_mp, + sizeof(uct_rc_iface_send_desc_t) + UCT_IB_MAX_ATOMIC_SIZE, + sizeof(uct_rc_iface_send_desc_t) + UCT_IB_MAX_ATOMIC_SIZE, + UCS_SYS_CACHE_LINE_SIZE, + &rc_config->super.tx.mp, + self->super.config.tx_qp_len, + uct_rc_iface_send_desc_init, + "rc_mlx5_atomic_desc"); + if (status != UCS_OK) { + goto cleanup_dm; + } + + /* For little-endian atomic reply, override the default functions, to still + * treat the response as big-endian when it arrives in the CQE. + */ + if (!(uct_ib_iface_device(&self->super.super)->atomic_arg_sizes_be & sizeof(uint64_t))) { + self->super.config.atomic64_handler = uct_rc_mlx5_common_atomic64_le_handler; + } + if (!(uct_ib_iface_device(&self->super.super)->ext_atomic_arg_sizes_be & sizeof(uint32_t))) { + self->super.config.atomic32_ext_handler = uct_rc_mlx5_common_atomic32_le_handler; + } + if (!(uct_ib_iface_device(&self->super.super)->ext_atomic_arg_sizes_be & sizeof(uint64_t))) { + self->super.config.atomic64_ext_handler = uct_rc_mlx5_common_atomic64_le_handler; + } + + return UCS_OK; + +cleanup_dm: + uct_rc_mlx5_iface_common_dm_cleanup(self); +cleanup_tm: + uct_rc_mlx5_iface_common_tag_cleanup(self); +cleanup_stats: + UCS_STATS_NODE_FREE(self->stats); + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rc_mlx5_iface_common_t) +{ + ucs_mpool_cleanup(&self->tx.atomic_desc_mp, 1); + uct_rc_mlx5_iface_common_dm_cleanup(self); + uct_rc_mlx5_iface_common_tag_cleanup(self); + UCS_STATS_NODE_FREE(self->stats); +} + +UCS_CLASS_DEFINE(uct_rc_mlx5_iface_common_t, uct_rc_iface_t); + +typedef struct { + uct_rc_mlx5_iface_common_t super; +} uct_rc_mlx5_iface_t; + +UCS_CLASS_INIT_FUNC(uct_rc_mlx5_iface_t, + uct_md_h tl_md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_rc_mlx5_iface_config_t *config = ucs_derived_of(tl_config, + uct_rc_mlx5_iface_config_t); + uct_ib_mlx5_md_t UCS_V_UNUSED *md = ucs_derived_of(tl_md, uct_ib_mlx5_md_t); + uct_ib_iface_init_attr_t init_attr = {}; + ucs_status_t status; + + init_attr.fc_req_size = sizeof(uct_rc_fc_request_t); + init_attr.flags = UCT_IB_CQ_IGNORE_OVERRUN; + init_attr.rx_hdr_len = sizeof(uct_rc_mlx5_hdr_t); + init_attr.tx_cq_len = config->super.tx_cq_len; + init_attr.qp_type = IBV_QPT_RC; + + if (IBV_DEVICE_TM_FLAGS(&md->super.dev)) { + init_attr.flags |= UCT_IB_TM_SUPPORTED; + } + + UCS_CLASS_CALL_SUPER_INIT(uct_rc_mlx5_iface_common_t, &uct_rc_mlx5_iface_ops, + tl_md, worker, params, &config->super.super, + &config->rc_mlx5_common, &init_attr); + + self->super.super.config.tx_moderation = ucs_min(config->super.tx_cq_moderation, + self->super.tx.bb_max / 4); + + status = uct_rc_init_fc_thresh(&config->super, &self->super.super); + if (status != UCS_OK) { + return status; + } + + /* Set max_iov for put_zcopy and get_zcopy */ + uct_ib_iface_set_max_iov(&self->super.super.super, + (UCT_IB_MLX5_MAX_SEND_WQE_SIZE - + sizeof(struct mlx5_wqe_raddr_seg) - + sizeof(struct mlx5_wqe_ctrl_seg)) / + sizeof(struct mlx5_wqe_data_seg)); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rc_mlx5_iface_t) +{ + uct_base_iface_progress_disable(&self->super.super.super.super.super, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); +} + +UCS_CLASS_DEFINE(uct_rc_mlx5_iface_t, uct_rc_mlx5_iface_common_t); + +static UCS_CLASS_DEFINE_NEW_FUNC(uct_rc_mlx5_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); + +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_rc_mlx5_iface_t, uct_iface_t); + +static uct_rc_iface_ops_t uct_rc_mlx5_iface_ops = { + { + { + .ep_put_short = uct_rc_mlx5_ep_put_short, + .ep_put_bcopy = uct_rc_mlx5_ep_put_bcopy, + .ep_put_zcopy = uct_rc_mlx5_ep_put_zcopy, + .ep_get_bcopy = uct_rc_mlx5_ep_get_bcopy, + .ep_get_zcopy = uct_rc_mlx5_ep_get_zcopy, + .ep_am_short = uct_rc_mlx5_ep_am_short, + .ep_am_bcopy = uct_rc_mlx5_ep_am_bcopy, + .ep_am_zcopy = uct_rc_mlx5_ep_am_zcopy, + .ep_atomic_cswap64 = uct_rc_mlx5_ep_atomic_cswap64, + .ep_atomic_cswap32 = uct_rc_mlx5_ep_atomic_cswap32, + .ep_atomic64_post = uct_rc_mlx5_ep_atomic64_post, + .ep_atomic32_post = uct_rc_mlx5_ep_atomic32_post, + .ep_atomic64_fetch = uct_rc_mlx5_ep_atomic64_fetch, + .ep_atomic32_fetch = uct_rc_mlx5_ep_atomic32_fetch, + .ep_pending_add = uct_rc_ep_pending_add, + .ep_pending_purge = uct_rc_ep_pending_purge, + .ep_flush = uct_rc_mlx5_ep_flush, + .ep_fence = uct_rc_mlx5_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_rc_mlx5_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_rc_mlx5_ep_t), + .ep_get_address = uct_rc_mlx5_ep_get_address, + .ep_connect_to_ep = uct_rc_mlx5_ep_connect_to_ep, +#if IBV_HW_TM + .ep_tag_eager_short = uct_rc_mlx5_ep_tag_eager_short, + .ep_tag_eager_bcopy = uct_rc_mlx5_ep_tag_eager_bcopy, + .ep_tag_eager_zcopy = uct_rc_mlx5_ep_tag_eager_zcopy, + .ep_tag_rndv_zcopy = uct_rc_mlx5_ep_tag_rndv_zcopy, + .ep_tag_rndv_request = uct_rc_mlx5_ep_tag_rndv_request, + .ep_tag_rndv_cancel = uct_rc_mlx5_ep_tag_rndv_cancel, + .iface_tag_recv_zcopy = uct_rc_mlx5_iface_tag_recv_zcopy, + .iface_tag_recv_cancel = uct_rc_mlx5_iface_tag_recv_cancel, +#endif + .iface_flush = uct_rc_iface_flush, + .iface_fence = uct_rc_iface_fence, + .iface_progress_enable = uct_rc_mlx5_iface_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = uct_rc_iface_do_progress, + .iface_event_fd_get = uct_ib_iface_event_fd_get, + .iface_event_arm = uct_rc_iface_event_arm, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_rc_mlx5_iface_t), + .iface_query = uct_rc_mlx5_iface_query, + .iface_get_address = uct_rc_mlx5_iface_get_address, + .iface_get_device_address = uct_ib_iface_get_device_address, + .iface_is_reachable = uct_rc_mlx5_iface_is_reachable + }, + .create_cq = uct_ib_mlx5_create_cq, + .arm_cq = uct_rc_mlx5_iface_arm_cq, + .event_cq = uct_rc_mlx5_iface_event_cq, + .handle_failure = uct_rc_mlx5_iface_handle_failure, + .set_ep_failed = uct_rc_mlx5_ep_set_failed, + }, + .init_rx = uct_rc_mlx5_iface_init_rx, + .cleanup_rx = uct_rc_mlx5_iface_cleanup_rx, + .fc_ctrl = uct_rc_mlx5_ep_fc_ctrl, + .fc_handler = uct_rc_iface_fc_handler, +}; + +static ucs_status_t +uct_rc_mlx5_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + uct_ib_md_t *ib_md = ucs_derived_of(md, uct_ib_md_t); + int flags; + + flags = UCT_IB_DEVICE_FLAG_MLX5_PRM | + (ib_md->config.eth_pause ? 0 : UCT_IB_DEVICE_FLAG_LINK_IB); + return uct_ib_device_query_ports(&ib_md->dev, flags, tl_devices_p, + num_tl_devices_p); +} + +UCT_TL_DEFINE(&uct_ib_component, rc_mlx5, uct_rc_mlx5_query_tl_devices, + uct_rc_mlx5_iface_t, "RC_MLX5_", uct_rc_mlx5_iface_config_table, + uct_rc_mlx5_iface_config_t); diff --git a/src/uct/ib/rc/base/rc_def.h b/src/uct/ib/rc/base/rc_def.h new file mode 100644 index 0000000..33529e7 --- /dev/null +++ b/src/uct/ib/rc/base/rc_def.h @@ -0,0 +1,20 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef RC_DEF_H_ +#define RC_DEF_H_ + + +typedef struct uct_rc_ep uct_rc_ep_t; +typedef struct uct_rc_iface uct_rc_iface_t; +typedef struct uct_rc_iface_send_op uct_rc_iface_send_op_t; +typedef struct uct_rc_iface_send_desc uct_rc_iface_send_desc_t; +typedef struct uct_rc_iface_config uct_rc_iface_config_t; +typedef struct uct_rc_pending_priv uct_rc_pending_priv_t; +typedef struct uct_rc_txqp uct_rc_txqp_t; + + +#endif diff --git a/src/uct/ib/rc/base/rc_ep.c b/src/uct/ib/rc/base/rc_ep.c new file mode 100644 index 0000000..fdfea8c --- /dev/null +++ b/src/uct/ib/rc/base/rc_ep.c @@ -0,0 +1,418 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rc_ep.h" +#include "rc_iface.h" + +#include +#include +#include +#include +#include + +#if ENABLE_STATS +static ucs_stats_class_t uct_rc_fc_stats_class = { + .name = "rc_fc", + .num_counters = UCT_RC_FC_STAT_LAST, + .counter_names = { + [UCT_RC_FC_STAT_NO_CRED] = "no_cred", + [UCT_RC_FC_STAT_TX_GRANT] = "tx_grant", + [UCT_RC_FC_STAT_TX_PURE_GRANT] = "tx_pure_grant", + [UCT_RC_FC_STAT_TX_SOFT_REQ] = "tx_soft_req", + [UCT_RC_FC_STAT_TX_HARD_REQ] = "tx_hard_req", + [UCT_RC_FC_STAT_RX_GRANT] = "rx_grant", + [UCT_RC_FC_STAT_RX_PURE_GRANT] = "rx_pure_grant", + [UCT_RC_FC_STAT_RX_SOFT_REQ] = "rx_soft_req", + [UCT_RC_FC_STAT_RX_HARD_REQ] = "rx_hard_req", + [UCT_RC_FC_STAT_FC_WND] = "fc_wnd" + } +}; + +static ucs_stats_class_t uct_rc_txqp_stats_class = { + .name = "rc_txqp", + .num_counters = UCT_RC_TXQP_STAT_LAST, + .counter_names = { + [UCT_RC_TXQP_STAT_QP_FULL] = "qp_full", + [UCT_RC_TXQP_STAT_SIGNAL] = "signal" + } +}; +#endif + +ucs_status_t uct_rc_txqp_init(uct_rc_txqp_t *txqp, uct_rc_iface_t *iface, + uint32_t qp_num + UCS_STATS_ARG(ucs_stats_node_t* stats_parent)) +{ + txqp->unsignaled = 0; + txqp->unsignaled_store = 0; + txqp->unsignaled_store_count = 0; + txqp->available = 0; + ucs_queue_head_init(&txqp->outstanding); + + return UCS_STATS_NODE_ALLOC(&txqp->stats, &uct_rc_txqp_stats_class, + stats_parent, "-0x%x", qp_num); +} + +void uct_rc_txqp_cleanup(uct_rc_txqp_t *txqp) +{ + uct_rc_txqp_purge_outstanding(txqp, UCS_ERR_CANCELED, 1); + UCS_STATS_NODE_FREE(txqp->stats); +} + +ucs_status_t uct_rc_fc_init(uct_rc_fc_t *fc, int16_t winsize + UCS_STATS_ARG(ucs_stats_node_t* stats_parent)) +{ + ucs_status_t status; + + fc->fc_wnd = winsize; + fc->flags = 0; + + status = UCS_STATS_NODE_ALLOC(&fc->stats, &uct_rc_fc_stats_class, + stats_parent); + if (status != UCS_OK) { + return status; + } + + UCS_STATS_SET_COUNTER(fc->stats, UCT_RC_FC_STAT_FC_WND, fc->fc_wnd); + + return UCS_OK; +} + +void uct_rc_fc_cleanup(uct_rc_fc_t *fc) +{ + UCS_STATS_NODE_FREE(fc->stats); +} + +UCS_CLASS_INIT_FUNC(uct_rc_ep_t, uct_rc_iface_t *iface, uint32_t qp_num) +{ + ucs_status_t status; + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super); + + status = uct_rc_txqp_init(&self->txqp, iface, qp_num + UCS_STATS_ARG(self->super.stats)); + if (status != UCS_OK) { + return status; + } + + status = uct_rc_fc_init(&self->fc, iface->config.fc_wnd_size + UCS_STATS_ARG(self->super.stats)); + if (status != UCS_OK) { + goto err_txqp_cleanup; + } + + /* Check that FC protocol fits AM id + * (just in case AM id space gets extended) */ + UCS_STATIC_ASSERT(UCT_RC_EP_FC_MASK < UINT8_MAX); + + ucs_arbiter_group_init(&self->arb_group); + + ucs_list_add_head(&iface->ep_list, &self->list); + return UCS_OK; + +err_txqp_cleanup: + uct_rc_txqp_cleanup(&self->txqp); + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rc_ep_t) +{ + ucs_debug("destroy rc ep %p", self); + + ucs_list_del(&self->list); + uct_rc_ep_pending_purge(&self->super.super, NULL, NULL); + uct_rc_fc_cleanup(&self->fc); + uct_rc_txqp_cleanup(&self->txqp); +} + +UCS_CLASS_DEFINE(uct_rc_ep_t, uct_base_ep_t) + +void uct_rc_ep_packet_dump(uct_base_iface_t *iface, uct_am_trace_type_t type, + void *data, size_t length, size_t valid_length, + char *buffer, size_t max) +{ + uct_rc_hdr_t *rch = data; + uint8_t fc_hdr = uct_rc_fc_get_fc_hdr(rch->am_id); + uint8_t am_wo_fc; + + /* Do not invoke AM tracer for auxiliary pure FC_GRANT message */ + if (fc_hdr != UCT_RC_EP_FC_PURE_GRANT) { + am_wo_fc = rch->am_id & ~UCT_RC_EP_FC_MASK; /* mask out FC bits*/ + snprintf(buffer, max, " %c%c am %d ", + fc_hdr & UCT_RC_EP_FC_FLAG_SOFT_REQ ? 's' : + fc_hdr & UCT_RC_EP_FC_FLAG_HARD_REQ ? 'h' : '-', + fc_hdr & UCT_RC_EP_FC_FLAG_GRANT ? 'g' : '-', + am_wo_fc); + uct_iface_dump_am(iface, type, am_wo_fc, rch + 1, length - sizeof(*rch), + buffer + strlen(buffer), max - strlen(buffer)); + } else { + snprintf(buffer, max, " FC pure grant am "); + } +} + +void uct_rc_ep_get_bcopy_handler(uct_rc_iface_send_op_t *op, const void *resp) +{ + uct_rc_iface_send_desc_t *desc = ucs_derived_of(op, uct_rc_iface_send_desc_t); + + VALGRIND_MAKE_MEM_DEFINED(resp, desc->super.length); + + desc->unpack_cb(desc->super.unpack_arg, resp, desc->super.length); + + uct_invoke_completion(desc->super.user_comp, UCS_OK); + + ucs_mpool_put(desc); +} + +void uct_rc_ep_get_bcopy_handler_no_completion(uct_rc_iface_send_op_t *op, + const void *resp) +{ + uct_rc_iface_send_desc_t *desc = ucs_derived_of(op, uct_rc_iface_send_desc_t); + + VALGRIND_MAKE_MEM_DEFINED(resp, desc->super.length); + + desc->unpack_cb(desc->super.unpack_arg, resp, desc->super.length); + + ucs_mpool_put(desc); +} + +void uct_rc_ep_send_op_completion_handler(uct_rc_iface_send_op_t *op, + const void *resp) +{ + uct_invoke_completion(op->user_comp, UCS_OK); + uct_rc_iface_put_send_op(op); +} + +void uct_rc_ep_flush_op_completion_handler(uct_rc_iface_send_op_t *op, + const void *resp) +{ + uct_invoke_completion(op->user_comp, UCS_OK); + ucs_mpool_put(op); +} + +ucs_status_t uct_rc_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n, + unsigned flags) +{ + uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); + uct_rc_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_ep_t); + + if (uct_rc_ep_has_tx_resources(ep) && + uct_rc_iface_has_tx_resources(iface)) { + return UCS_ERR_BUSY; + } + + UCS_STATIC_ASSERT(sizeof(uct_pending_req_priv_arb_t) <= + UCT_PENDING_REQ_PRIV_LEN); + uct_pending_req_arb_group_push(&ep->arb_group, n); + UCT_TL_EP_STAT_PEND(&ep->super); + + if (uct_rc_ep_has_tx_resources(ep)) { + /* If we have ep (but not iface) resources, we need to schedule the ep */ + ucs_arbiter_group_schedule(&iface->tx.arbiter, &ep->arb_group); + } + + return UCS_OK; +} + +ucs_arbiter_cb_result_t uct_rc_ep_process_pending(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); + uct_rc_iface_t *iface UCS_V_UNUSED; + ucs_status_t status; + uct_rc_ep_t *ep; + + ucs_trace_data("progressing pending request %p", req); + status = req->func(req); + ucs_trace_data("status returned from progress pending: %s", + ucs_status_string(status)); + + if (status == UCS_OK) { + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } else if (status == UCS_INPROGRESS) { + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } else { + ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_rc_ep_t, arb_group); + iface = ucs_derived_of(ep->super.super.iface, uct_rc_iface_t); + if (!uct_rc_iface_has_tx_resources(iface)) { + /* No iface resources */ + return UCS_ARBITER_CB_RESULT_STOP; + } else { + /* No ep resources */ + ucs_assertv(!uct_rc_ep_has_tx_resources(ep), + "pending callback returned error but send resources are available"); + return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; + } + } +} + +static ucs_arbiter_cb_result_t uct_rc_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_purge_cb_args_t *cb_args = arg; + uct_pending_purge_callback_t cb = cb_args->cb; + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, + priv); + uct_rc_ep_t UCS_V_UNUSED *ep = ucs_container_of( + ucs_arbiter_elem_group(elem), + uct_rc_ep_t, arb_group); + uct_rc_fc_request_t *freq; + + /* Invoke user's callback only if it is not internal FC message */ + if (ucs_likely(req->func != uct_rc_ep_fc_grant)){ + if (cb != NULL) { + cb(req, cb_args->arg); + } else { + ucs_debug("ep=%p cancelling user pending request %p", ep, req); + } + } else { + freq = ucs_derived_of(req, uct_rc_fc_request_t); + ucs_mpool_put(freq); + } + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; +} + +void uct_rc_ep_pending_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, + void *arg) +{ + uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); + uct_rc_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_ep_t); + uct_purge_cb_args_t args = {cb, arg}; + + ucs_arbiter_group_purge(&iface->tx.arbiter, &ep->arb_group, + uct_rc_ep_abriter_purge_cb, &args); +} + +ucs_status_t uct_rc_ep_fc_grant(uct_pending_req_t *self) +{ + ucs_status_t status; + uct_rc_fc_request_t *freq = ucs_derived_of(self, uct_rc_fc_request_t); + uct_rc_ep_t *ep = ucs_derived_of(freq->ep, uct_rc_ep_t); + uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_rc_iface_t); + + ucs_assert_always(iface->config.fc_enabled); + status = uct_rc_fc_ctrl(&ep->super.super, UCT_RC_EP_FC_PURE_GRANT, NULL); + if (status == UCS_OK) { + UCS_STATS_UPDATE_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_TX_PURE_GRANT, 1); + ucs_mpool_put(freq); + } + return status; +} + +void uct_rc_txqp_purge_outstanding(uct_rc_txqp_t *txqp, ucs_status_t status, + int is_log) +{ + uct_rc_iface_send_op_t *op; + uct_rc_iface_send_desc_t *desc; + + ucs_queue_for_each_extract(op, &txqp->outstanding, queue, 1) { + if (op->handler != (uct_rc_send_handler_t)ucs_mpool_put) { + if (is_log != 0) { + ucs_warn("destroying rc ep %p with uncompleted operation %p", + txqp, op); + } + + if (op->user_comp != NULL) { + /* This must be uct_rc_ep_get_bcopy_handler, + * uct_rc_ep_send_completion_proxy_handler, + * one of the atomic handlers, + * so invoke user completion */ + uct_invoke_completion(op->user_comp, status); + } + } + op->flags &= ~(UCT_RC_IFACE_SEND_OP_FLAG_INUSE | + UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY); + if (op->handler == uct_rc_ep_send_op_completion_handler) { + uct_rc_iface_put_send_op(op); + } else if (op->handler == uct_rc_ep_flush_op_completion_handler) { + ucs_mpool_put(op); + } else { + desc = ucs_derived_of(op, uct_rc_iface_send_desc_t); + ucs_mpool_put(desc); + } + } +} + +ucs_status_t uct_rc_ep_flush(uct_rc_ep_t *ep, int16_t max_available, + unsigned flags) +{ + uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_rc_iface_t); + + if (!uct_rc_iface_has_tx_resources(iface) || + !uct_rc_ep_has_tx_resources(ep)) { + return UCS_ERR_NO_RESOURCE; + } + + if (uct_rc_txqp_available(&ep->txqp) == max_available) { + UCT_TL_EP_STAT_FLUSH(&ep->super); + return UCS_OK; + } + + return UCS_INPROGRESS; +} + +ucs_status_t uct_rc_ep_check_cqe(uct_rc_iface_t *iface, uct_rc_ep_t *ep) +{ + uct_rc_txqp_t *txqp; + + if (!uct_rc_iface_have_tx_cqe_avail(iface)) { + UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_RC_IFACE_STAT_NO_CQE, 1); + UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); + return UCS_ERR_NO_RESOURCE; + } + + txqp = &ep->txqp; + /* if unsignaled == RC_UNSIGNALED_INF this value was already saved and \ + next operation will be defenitly signaled */ + if (txqp->unsignaled != RC_UNSIGNALED_INF) { + txqp->unsignaled_store_count++; + txqp->unsignaled_store += txqp->unsignaled; + txqp->unsignaled = RC_UNSIGNALED_INF; + } + + return UCS_OK; +} + +#define UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC(_num_bits, _is_be) \ + void UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(_num_bits, _is_be) \ + (uct_rc_iface_send_op_t *op, const void *resp) \ + { \ + uct_rc_iface_send_desc_t *desc = \ + ucs_derived_of(op, uct_rc_iface_send_desc_t); \ + const uint##_num_bits##_t *value = resp; \ + uint##_num_bits##_t *dest = desc->super.buffer; \ + \ + VALGRIND_MAKE_MEM_DEFINED(value, sizeof(*value)); \ + if (_is_be && (_num_bits == 32)) { \ + *dest = ntohl(*value); /* TODO swap in-place */ \ + } else if (_is_be && (_num_bits == 64)) { \ + *dest = be64toh(*value); \ + } else { \ + *dest = *value; \ + } \ + \ + uct_invoke_completion(desc->super.user_comp, UCS_OK); \ + ucs_mpool_put(desc); \ + } + +UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC(32, 0); +UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC(32, 1); +UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC(64, 0); +UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC(64, 1); + +void uct_rc_ep_am_zcopy_handler(uct_rc_iface_send_op_t *op, const void *resp) +{ + uct_rc_iface_send_desc_t *desc = ucs_derived_of(op, uct_rc_iface_send_desc_t); + uct_invoke_completion(desc->super.user_comp, UCS_OK); + ucs_mpool_put(desc); +} diff --git a/src/uct/ib/rc/base/rc_ep.h b/src/uct/ib/rc/base/rc_ep.h new file mode 100644 index 0000000..f772f05 --- /dev/null +++ b/src/uct/ib/rc/base/rc_ep.h @@ -0,0 +1,447 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_RC_EP_H +#define UCT_RC_EP_H + +#include "rc_iface.h" + +#include +#include + + +#define RC_UNSIGNALED_INF UINT16_MAX + +enum { + UCT_RC_FC_STAT_NO_CRED, + UCT_RC_FC_STAT_TX_GRANT, + UCT_RC_FC_STAT_TX_PURE_GRANT, + UCT_RC_FC_STAT_TX_SOFT_REQ, + UCT_RC_FC_STAT_TX_HARD_REQ, + UCT_RC_FC_STAT_RX_GRANT, + UCT_RC_FC_STAT_RX_PURE_GRANT, + UCT_RC_FC_STAT_RX_SOFT_REQ, + UCT_RC_FC_STAT_RX_HARD_REQ, + UCT_RC_FC_STAT_FC_WND, + UCT_RC_FC_STAT_LAST +}; + +enum { + UCT_RC_TXQP_STAT_QP_FULL, + UCT_RC_TXQP_STAT_SIGNAL, + UCT_RC_TXQP_STAT_LAST +}; + +/* + * Auxillary AM ID bits used by FC protocol. + */ +enum { + /* Soft Credit Request: indicates that peer needs to piggy-back credits + * grant to counter AM (if any). Can be bundled with + * UCT_RC_EP_FC_FLAG_GRANT */ + UCT_RC_EP_FC_FLAG_SOFT_REQ = UCS_BIT(UCT_AM_ID_BITS), + + /* Hard Credit Request: indicates that wnd is close to be exhausted. + * The peer must send separate AM with credit grant as soon as it + * receives AM with this bit set. Can be bundled with + * UCT_RC_EP_FC_FLAG_GRANT */ + UCT_RC_EP_FC_FLAG_HARD_REQ = UCS_BIT((UCT_AM_ID_BITS) + 1), + + /* Credit Grant: ep should update its FC wnd as soon as it receives AM with + * this bit set. Can be bundled with either soft or hard request bits */ + UCT_RC_EP_FC_FLAG_GRANT = UCS_BIT((UCT_AM_ID_BITS) + 2), + + /* Special FC AM with Credit Grant: Just an empty message indicating + * credit grant. Can't be bundled with any other FC flag (as it consumes + * all 3 FC bits). */ + UCT_RC_EP_FC_PURE_GRANT = (UCT_RC_EP_FC_FLAG_HARD_REQ | + UCT_RC_EP_FC_FLAG_SOFT_REQ | + UCT_RC_EP_FC_FLAG_GRANT) +}; + +/* + * FC protocol header mask + */ +#define UCT_RC_EP_FC_MASK UCT_RC_EP_FC_PURE_GRANT + +/* + * Macro to generate functions for AMO completions. + */ +#define UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(_num_bits, _is_be) \ + uct_rc_ep_atomic_handler_##_num_bits##_be##_is_be + +/* + * Check for send resources + */ +#define UCT_RC_CHECK_CQE_RET(_iface, _ep, _ret) \ + /* tx_moderation == 0 for TLs which don't support it */ \ + if (ucs_unlikely((_iface)->tx.cq_available <= \ + (signed)(_iface)->config.tx_moderation)) { \ + if (uct_rc_ep_check_cqe(_iface, _ep) != UCS_OK) { \ + return _ret; \ + } \ + } + +#define UCT_RC_CHECK_TXQP_RET(_iface, _ep, _ret) \ + if (uct_rc_txqp_available(&(_ep)->txqp) <= 0) { \ + UCS_STATS_UPDATE_COUNTER((_ep)->txqp.stats, UCT_RC_TXQP_STAT_QP_FULL, 1); \ + UCS_STATS_UPDATE_COUNTER((_ep)->super.stats, UCT_EP_STAT_NO_RES, 1); \ + return _ret; \ + } + +#define UCT_RC_CHECK_RES(_iface, _ep) \ + UCT_RC_CHECK_CQE_RET(_iface, _ep, UCS_ERR_NO_RESOURCE) \ + UCT_RC_CHECK_TXQP_RET(_iface, _ep, UCS_ERR_NO_RESOURCE) + +/* + * check for FC credits and add FC protocol bits (if any) + */ +#define UCT_RC_CHECK_FC_WND(_fc, _stats)\ + if ((_fc)->fc_wnd <= 0) { \ + UCS_STATS_UPDATE_COUNTER((_fc)->stats, UCT_RC_FC_STAT_NO_CRED, 1); \ + UCS_STATS_UPDATE_COUNTER(_stats, UCT_EP_STAT_NO_RES, 1); \ + return UCS_ERR_NO_RESOURCE; \ + } \ + + +#define UCT_RC_UPDATE_FC_WND(_iface, _fc) \ + { \ + /* For performance reasons, prefer to update fc_wnd unconditionally */ \ + (_fc)->fc_wnd--; \ + \ + if ((_iface)->config.fc_enabled) { \ + UCS_STATS_SET_COUNTER((_fc)->stats, UCT_RC_FC_STAT_FC_WND, \ + (_fc)->fc_wnd); \ + } \ + } + +#define UCT_RC_CHECK_FC(_iface, _ep, _am_id) \ + { \ + if (ucs_unlikely((_ep)->fc.fc_wnd <= (_iface)->config.fc_soft_thresh)) { \ + if ((_iface)->config.fc_enabled) { \ + UCT_RC_CHECK_FC_WND(&(_ep)->fc, (_ep)->super.stats); \ + (_am_id) |= uct_rc_fc_req_moderation(&(_ep)->fc, _iface); \ + } else { \ + /* Set fc_wnd to max, to send as much as possible without checks */ \ + (_ep)->fc.fc_wnd = INT16_MAX; \ + } \ + } \ + (_am_id) |= uct_rc_fc_get_fc_hdr((_ep)->fc.flags); /* take grant bit */ \ + } + +#define UCT_RC_UPDATE_FC(_iface, _ep, _fc_hdr) \ + { \ + if ((_fc_hdr) & UCT_RC_EP_FC_FLAG_GRANT) { \ + UCS_STATS_UPDATE_COUNTER((_ep)->fc.stats, UCT_RC_FC_STAT_TX_GRANT, 1); \ + } \ + if ((_fc_hdr) & UCT_RC_EP_FC_FLAG_SOFT_REQ) { \ + UCS_STATS_UPDATE_COUNTER((_ep)->fc.stats, UCT_RC_FC_STAT_TX_SOFT_REQ, 1); \ + } else if ((_fc_hdr) & UCT_RC_EP_FC_FLAG_HARD_REQ) { \ + UCS_STATS_UPDATE_COUNTER((_ep)->fc.stats, UCT_RC_FC_STAT_TX_HARD_REQ, 1); \ + } \ + \ + (_ep)->fc.flags = 0; \ + \ + UCT_RC_UPDATE_FC_WND(_iface, &(_ep)->fc) \ + } + + +/* this is a common type for all rc and dc transports */ +struct uct_rc_txqp { + ucs_queue_head_t outstanding; + /* RC_UNSIGNALED_INF value forces signaled in moderation logic when + * CQ credits are close to zero (less tx_moderation value) */ + uint16_t unsignaled; + /* Saved unsignaled value before it was set to inf to have possibility + * to return correct amount of CQ credits on TX completion */ + uint16_t unsignaled_store; + /* If unsignaled was stored several times to aggregative value, let's return + * credits only when this counter == 0 because it's impossible to return + * exact value on each signaled completion */ + uint16_t unsignaled_store_count; + int16_t available; + UCS_STATS_NODE_DECLARE(stats) +}; + +typedef struct uct_rc_fc { + /* Not more than fc_wnd active messages can be sent w/o acknowledgment */ + int16_t fc_wnd; + /* used only for FC protocol at this point (3 higher bits) */ + uint8_t flags; + UCS_STATS_NODE_DECLARE(stats) +} uct_rc_fc_t; + +struct uct_rc_ep { + uct_base_ep_t super; + uct_rc_txqp_t txqp; + ucs_list_link_t list; + ucs_arbiter_group_t arb_group; + uct_rc_fc_t fc; +}; + +UCS_CLASS_DECLARE(uct_rc_ep_t, uct_rc_iface_t*, uint32_t); + + +typedef struct uct_rc_ep_address { + uct_ib_uint24_t qp_num; +} UCS_S_PACKED uct_rc_ep_address_t; + +void uct_rc_ep_packet_dump(uct_base_iface_t *iface, uct_am_trace_type_t type, + void *data, size_t length, size_t valid_length, + char *buffer, size_t max); + +void uct_rc_ep_get_bcopy_handler(uct_rc_iface_send_op_t *op, const void *resp); + +void uct_rc_ep_get_bcopy_handler_no_completion(uct_rc_iface_send_op_t *op, + const void *resp); + +void uct_rc_ep_send_op_completion_handler(uct_rc_iface_send_op_t *op, + const void *resp); + +void uct_rc_ep_flush_op_completion_handler(uct_rc_iface_send_op_t *op, + const void *resp); + +ucs_status_t uct_rc_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n, + unsigned flags); + +void uct_rc_ep_pending_purge(uct_ep_h ep, uct_pending_purge_callback_t cb, + void*arg); + +ucs_arbiter_cb_result_t uct_rc_ep_process_pending(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg); + +ucs_status_t uct_rc_fc_init(uct_rc_fc_t *fc, int16_t winsize + UCS_STATS_ARG(ucs_stats_node_t* stats_parent)); +void uct_rc_fc_cleanup(uct_rc_fc_t *fc); + +ucs_status_t uct_rc_ep_fc_grant(uct_pending_req_t *self); + +void uct_rc_txqp_purge_outstanding(uct_rc_txqp_t *txqp, ucs_status_t status, + int is_log); + +ucs_status_t uct_rc_ep_flush(uct_rc_ep_t *ep, int16_t max_available, + unsigned flags); + +ucs_status_t uct_rc_ep_check_cqe(uct_rc_iface_t *iface, uct_rc_ep_t *ep); + +void UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(32, 0)(uct_rc_iface_send_op_t *op, + const void *resp); +void UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(32, 1)(uct_rc_iface_send_op_t *op, + const void *resp); +void UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(64, 0)(uct_rc_iface_send_op_t *op, + const void *resp); +void UCT_RC_DEFINE_ATOMIC_HANDLER_FUNC_NAME(64, 1)(uct_rc_iface_send_op_t *op, + const void *resp); + +ucs_status_t uct_rc_txqp_init(uct_rc_txqp_t *txqp, uct_rc_iface_t *iface, + uint32_t qp_num + UCS_STATS_ARG(ucs_stats_node_t* stats_parent)); +void uct_rc_txqp_cleanup(uct_rc_txqp_t *txqp); + +static inline int16_t uct_rc_txqp_available(uct_rc_txqp_t *txqp) +{ + return txqp->available; +} + +static inline void uct_rc_txqp_available_add(uct_rc_txqp_t *txqp, int16_t val) +{ + txqp->available += val; +} + +static inline void uct_rc_txqp_available_set(uct_rc_txqp_t *txqp, int16_t val) +{ + txqp->available = val; +} + +static inline uint16_t uct_rc_txqp_unsignaled(uct_rc_txqp_t *txqp) +{ + return txqp->unsignaled; +} + +static UCS_F_ALWAYS_INLINE +int uct_rc_fc_has_resources(uct_rc_iface_t *iface, uct_rc_fc_t *fc) +{ + /* When FC is disabled, fc_wnd may still become 0 because it's decremented + * unconditionally (for performance reasons) */ + return (fc->fc_wnd > 0) || !iface->config.fc_enabled; +} + +static UCS_F_ALWAYS_INLINE int uct_rc_ep_has_tx_resources(uct_rc_ep_t *ep) +{ + uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_rc_iface_t); + + return (ep->txqp.available > 0) && uct_rc_fc_has_resources(iface, &ep->fc); +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_txqp_add_send_op(uct_rc_txqp_t *txqp, uct_rc_iface_send_op_t *op) +{ + + /* NOTE: We insert the descriptor with the sequence number after the post, + * because when polling completions, we get the number of completions (rather + * than completion zero-based index). + */ + ucs_assert(op != NULL); + ucs_assertv(!(op->flags & UCT_RC_IFACE_SEND_OP_FLAG_INUSE), "op=%p", op); + op->flags |= UCT_RC_IFACE_SEND_OP_FLAG_INUSE; + ucs_queue_push(&txqp->outstanding, &op->queue); +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_txqp_add_send_op_sn(uct_rc_txqp_t *txqp, uct_rc_iface_send_op_t *op, uint16_t sn) +{ + ucs_trace_poll("txqp %p add send op %p sn %d handler %s", txqp, op, sn, + ucs_debug_get_symbol_name((void*)op->handler)); + op->sn = sn; + uct_rc_txqp_add_send_op(txqp, op); +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_txqp_add_send_comp(uct_rc_iface_t *iface, uct_rc_txqp_t *txqp, + uct_completion_t *comp, uint16_t sn, uint16_t flags) +{ + uct_rc_iface_send_op_t *op; + + if (comp == NULL) { + return; + } + + op = uct_rc_iface_get_send_op(iface); + op->user_comp = comp; + op->flags |= flags; + uct_rc_txqp_add_send_op_sn(txqp, op, sn); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_rc_txqp_add_flush_comp(uct_rc_iface_t *iface, uct_base_ep_t *ep, + uct_rc_txqp_t *txqp, uct_completion_t *comp, + uint16_t sn) +{ + uct_rc_iface_send_op_t *op; + + if (comp != NULL) { + op = (uct_rc_iface_send_op_t*)ucs_mpool_get(&iface->tx.flush_mp); + if (ucs_unlikely(op == NULL)) { + ucs_error("Failed to allocate flush completion"); + return UCS_ERR_NO_MEMORY; + } + + op->flags = 0; + op->user_comp = comp; + uct_rc_txqp_add_send_op_sn(txqp, op, sn); + VALGRIND_MAKE_MEM_DEFINED(op, sizeof(*op)); /* handler set by mpool init */ + } + UCT_TL_EP_STAT_FLUSH_WAIT(ep); + + return UCS_INPROGRESS; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_txqp_completion_op(uct_rc_iface_send_op_t *op, const void *resp) +{ + ucs_trace_poll("complete op %p sn %d handler %s", op, op->sn, + ucs_debug_get_symbol_name((void*)op->handler)); + ucs_assert(op->flags & UCT_RC_IFACE_SEND_OP_FLAG_INUSE); + op->flags &= ~(UCT_RC_IFACE_SEND_OP_FLAG_INUSE | + UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY); + op->handler(op, resp); +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_txqp_completion_desc(uct_rc_txqp_t *txqp, uint16_t sn) +{ + uct_rc_iface_send_op_t *op; + + ucs_trace_poll("txqp %p complete ops up to sn %d", txqp, sn); + ucs_queue_for_each_extract(op, &txqp->outstanding, queue, + UCS_CIRCULAR_COMPARE16(op->sn, <=, sn)) { + uct_rc_txqp_completion_op(op, ucs_derived_of(op, uct_rc_iface_send_desc_t) + 1); + } +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_txqp_completion_inl_resp(uct_rc_txqp_t *txqp, const void *resp, uint16_t sn) +{ + uct_rc_iface_send_op_t *op; + + ucs_trace_poll("txqp %p complete ops up to sn %d", txqp, sn); + ucs_queue_for_each_extract(op, &txqp->outstanding, queue, + UCS_CIRCULAR_COMPARE16(op->sn, <=, sn)) { + ucs_assert(!(op->flags & UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY)); + uct_rc_txqp_completion_op(op, resp); + } +} + +static UCS_F_ALWAYS_INLINE uint8_t +uct_rc_iface_tx_moderation(uct_rc_iface_t *iface, uct_rc_txqp_t *txqp, uint8_t flag) +{ + return (txqp->unsignaled >= iface->config.tx_moderation) ? flag : 0; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_txqp_posted(uct_rc_txqp_t *txqp, uct_rc_iface_t *iface, uint16_t res_count, + int signaled) +{ + if (signaled) { + ucs_assert(uct_rc_iface_have_tx_cqe_avail(iface)); + txqp->unsignaled = 0; + UCS_STATS_UPDATE_COUNTER(txqp->stats, UCT_RC_TXQP_STAT_SIGNAL, 1); + } else { + ucs_assert(txqp->unsignaled != RC_UNSIGNALED_INF); + ++txqp->unsignaled; + } + + /* reserve cq credits for every posted operation, + * in case it would complete with error */ + iface->tx.cq_available -= res_count; + txqp->available -= res_count; +} + +static UCS_F_ALWAYS_INLINE uint8_t +uct_rc_fc_get_fc_hdr(uint8_t id) +{ + return id & UCT_RC_EP_FC_MASK; +} + +static UCS_F_ALWAYS_INLINE uint8_t +uct_rc_fc_req_moderation(uct_rc_fc_t *fc, uct_rc_iface_t *iface) +{ + return (fc->fc_wnd == iface->config.fc_hard_thresh) ? + UCT_RC_EP_FC_FLAG_HARD_REQ : + (fc->fc_wnd == iface->config.fc_soft_thresh) ? + UCT_RC_EP_FC_FLAG_SOFT_REQ : 0; +} + +static UCS_F_ALWAYS_INLINE int +uct_rc_ep_fm(uct_rc_iface_t *iface, uct_ib_fence_info_t* fi, int flag) +{ + int fence; + + /* a call to iface_fence increases beat, so if endpoint beat is not in + * sync with iface beat it means the endpoint did not post any WQE with + * fence flag yet */ + fence = (fi->fence_beat != iface->tx.fi.fence_beat) ? flag : 0; + fi->fence_beat = iface->tx.fi.fence_beat; + return fence; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_rc_ep_fence(uct_ep_h tl_ep, uct_ib_fence_info_t* fi, int fence) +{ + uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); + + /* in case if fence is requested and enabled by configuration + * we need to schedule fence for next RDMA operation */ + if (fence && (iface->config.fence_mode != UCT_RC_FENCE_MODE_NONE)) { + fi->fence_beat = iface->tx.fi.fence_beat - 1; + } + + UCT_TL_EP_STAT_FENCE(ucs_derived_of(tl_ep, uct_base_ep_t)); + return UCS_OK; +} + +#endif diff --git a/src/uct/ib/rc/base/rc_iface.c b/src/uct/ib/rc/base/rc_iface.c new file mode 100644 index 0000000..7c48fb1 --- /dev/null +++ b/src/uct/ib/rc/base/rc_iface.c @@ -0,0 +1,889 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rc_iface.h" +#include "rc_ep.h" + +#include +#include +#include +#include + + +static const char *uct_rc_fence_mode_values[] = { + [UCT_RC_FENCE_MODE_NONE] = "none", + [UCT_RC_FENCE_MODE_WEAK] = "weak", + [UCT_RC_FENCE_MODE_STRONG] = "strong", + [UCT_RC_FENCE_MODE_AUTO] = "auto", + [UCT_RC_FENCE_MODE_LAST] = NULL +}; + +ucs_config_field_t uct_rc_iface_common_config_table[] = { + {"IB_", "RX_INLINE=64;RX_QUEUE_LEN=4095;SEG_SIZE=8256", NULL, + ucs_offsetof(uct_rc_iface_common_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_ib_iface_config_table)}, + + {"PATH_MTU", "default", + "Path MTU. \"default\" will select the best MTU for the device.", + ucs_offsetof(uct_rc_iface_common_config_t, path_mtu), + UCS_CONFIG_TYPE_ENUM(uct_ib_mtu_values)}, + + {"MAX_RD_ATOMIC", "4", + "Maximal number of outstanding read or atomic replies", + ucs_offsetof(uct_rc_iface_common_config_t, max_rd_atomic), UCS_CONFIG_TYPE_UINT}, + + {"TIMEOUT", "1.0s", + "Transport timeout", + ucs_offsetof(uct_rc_iface_common_config_t, tx.timeout), UCS_CONFIG_TYPE_TIME}, + + {"RETRY_COUNT", "7", + "Transport retries", + ucs_offsetof(uct_rc_iface_common_config_t, tx.retry_count), UCS_CONFIG_TYPE_UINT}, + + {"RNR_TIMEOUT", "1ms", + "RNR timeout", + ucs_offsetof(uct_rc_iface_common_config_t, tx.rnr_timeout), UCS_CONFIG_TYPE_TIME}, + + {"RNR_RETRY_COUNT", "7", + "RNR retries", + ucs_offsetof(uct_rc_iface_common_config_t, tx.rnr_retry_count), UCS_CONFIG_TYPE_UINT}, + + {"FC_ENABLE", "y", + "Enable flow control protocol to prevent sender from overwhelming the receiver,\n" + "thus avoiding RC RnR backoff timer.", + ucs_offsetof(uct_rc_iface_common_config_t, fc.enable), UCS_CONFIG_TYPE_BOOL}, + + {"FC_WND_SIZE", "512", + "The size of flow control window per endpoint. limits the number of AM\n" + "which can be sent w/o acknowledgment.", + ucs_offsetof(uct_rc_iface_common_config_t, fc.wnd_size), UCS_CONFIG_TYPE_UINT}, + + {"FC_HARD_THRESH", "0.25", + "Threshold for sending hard request for FC credits to the peer. This value\n" + "refers to the percentage of the FC_WND_SIZE value. (must be > 0 and < 1)", + ucs_offsetof(uct_rc_iface_common_config_t, fc.hard_thresh), UCS_CONFIG_TYPE_DOUBLE}, + +#if HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT + {"OOO_RW", "n", + "Enable out-of-order RDMA data placement", + ucs_offsetof(uct_rc_iface_common_config_t, ooo_rw), UCS_CONFIG_TYPE_BOOL}, +#endif + + {"FENCE", "auto", + "IB fence type when API fence requested:\n" + " none - fence is a no-op\n" + " weak - fence makes sure remote reads are ordered with respect to remote writes\n" + " strong - fence makes sure that subsequent remote operations start only after\n" + " previous remote operations complete\n" + " auto - select fence mode based on hardware capabilities", + ucs_offsetof(uct_rc_iface_common_config_t, fence_mode), + UCS_CONFIG_TYPE_ENUM(uct_rc_fence_mode_values)}, + + {NULL} +}; + + +/* Config relevant for rc_mlx5 and rc_verbs only (not for dc) */ +ucs_config_field_t uct_rc_iface_config_table[] = { + {"RC_", "MAX_NUM_EPS=256", NULL, + ucs_offsetof(uct_rc_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_rc_iface_common_config_table)}, + + {"FC_SOFT_THRESH", "0.5", + "Threshold for sending soft request for FC credits to the peer. This value\n" + "refers to the percentage of the FC_WND_SIZE value. (must be > HARD_THRESH and < 1)", + ucs_offsetof(uct_rc_iface_config_t, soft_thresh), UCS_CONFIG_TYPE_DOUBLE}, + + {"TX_CQ_MODERATION", "64", + "Maximum number of send WQEs which can be posted without requesting a completion.", + ucs_offsetof(uct_rc_iface_config_t, tx_cq_moderation), UCS_CONFIG_TYPE_UINT}, + + {"TX_CQ_LEN", "4096", + "Length of send completion queue. This limits the total number of outstanding signaled sends.", + ucs_offsetof(uct_rc_iface_config_t, tx_cq_len), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + + +#if ENABLE_STATS +static ucs_stats_class_t uct_rc_iface_stats_class = { + .name = "rc_iface", + .num_counters = UCT_RC_IFACE_STAT_LAST, + .counter_names = { + [UCT_RC_IFACE_STAT_RX_COMPLETION] = "rx_completion", + [UCT_RC_IFACE_STAT_TX_COMPLETION] = "tx_completion", + [UCT_RC_IFACE_STAT_NO_CQE] = "no_cqe" + } +}; + +#endif /* ENABLE_STATS */ + + +static ucs_mpool_ops_t uct_rc_fc_pending_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = NULL, + .obj_cleanup = NULL +}; + +static void +uct_rc_iface_flush_comp_init(ucs_mpool_t *mp, void *obj, void *chunk) +{ + uct_rc_iface_t *iface = ucs_container_of(mp, uct_rc_iface_t, tx.flush_mp); + uct_rc_iface_send_op_t *op = obj; + + op->handler = uct_rc_ep_flush_op_completion_handler; + op->flags = 0; + op->iface = iface; +} + +static ucs_mpool_ops_t uct_rc_flush_comp_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = uct_rc_iface_flush_comp_init, + .obj_cleanup = NULL +}; + +ucs_status_t uct_rc_iface_query(uct_rc_iface_t *iface, + uct_iface_attr_t *iface_attr, + size_t put_max_short, size_t max_inline, + size_t am_max_hdr, size_t am_max_iov, + size_t tag_max_iov, size_t tag_min_hdr) +{ + uct_ib_device_t *dev = uct_ib_iface_device(&iface->super); + ucs_status_t status; + + status = uct_ib_iface_query(&iface->super, + ucs_max(sizeof(uct_rc_hdr_t), UCT_IB_RETH_LEN), + iface_attr); + if (status != UCS_OK) { + return status; + } + + iface_attr->iface_addr_len = 0; + iface_attr->ep_addr_len = sizeof(uct_rc_ep_address_t); + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_AM_ZCOPY | + UCT_IFACE_FLAG_PUT_BCOPY | + UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_GET_BCOPY | + UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CONNECT_TO_EP | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_EVENT_SEND_COMP | + UCT_IFACE_FLAG_EVENT_RECV; + + if (uct_ib_device_has_pci_atomics(dev)) { + if (dev->pci_fadd_arg_sizes & sizeof(uint64_t)) { + iface_attr->cap.atomic64.op_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD); + iface_attr->cap.atomic64.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD); + } + if (dev->pci_cswap_arg_sizes & sizeof(uint64_t)) { + iface_attr->cap.atomic64.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_CSWAP); + } + iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_CPU; + } else { + if (dev->atomic_arg_sizes & sizeof(uint64_t)) { + /* TODO: remove deprecated flags */ + iface_attr->cap.flags |= UCT_IFACE_FLAG_ATOMIC_DEVICE; + + iface_attr->cap.atomic64.op_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD); + iface_attr->cap.atomic64.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_CSWAP); + } + } + + iface_attr->cap.put.opt_zcopy_align = UCS_SYS_PCI_MAX_PAYLOAD; + iface_attr->cap.get.opt_zcopy_align = UCS_SYS_PCI_MAX_PAYLOAD; + iface_attr->cap.am.opt_zcopy_align = UCS_SYS_PCI_MAX_PAYLOAD; + iface_attr->cap.put.align_mtu = uct_ib_mtu_value(iface->config.path_mtu); + iface_attr->cap.get.align_mtu = uct_ib_mtu_value(iface->config.path_mtu); + iface_attr->cap.am.align_mtu = uct_ib_mtu_value(iface->config.path_mtu); + + + /* PUT */ + iface_attr->cap.put.max_short = put_max_short; + iface_attr->cap.put.max_bcopy = iface->super.config.seg_size; + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = uct_ib_iface_port_attr(&iface->super)->max_msg_sz; + iface_attr->cap.put.max_iov = uct_ib_iface_get_max_iov(&iface->super); + + /* GET */ + iface_attr->cap.get.max_bcopy = iface->super.config.seg_size; + iface_attr->cap.get.min_zcopy = iface->super.config.max_inl_resp + 1; + iface_attr->cap.get.max_zcopy = uct_ib_iface_port_attr(&iface->super)->max_msg_sz; + iface_attr->cap.get.max_iov = uct_ib_iface_get_max_iov(&iface->super); + + /* AM */ + iface_attr->cap.am.max_short = uct_ib_iface_hdr_size(max_inline, tag_min_hdr); + iface_attr->cap.am.max_bcopy = iface->super.config.seg_size - tag_min_hdr; + iface_attr->cap.am.min_zcopy = 0; + iface_attr->cap.am.max_zcopy = iface->super.config.seg_size - tag_min_hdr; + iface_attr->cap.am.max_hdr = am_max_hdr - tag_min_hdr; + iface_attr->cap.am.max_iov = am_max_iov; + + /* Error Handling */ + iface_attr->cap.flags |= UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE; + + if (iface_attr->cap.am.max_short) { + iface_attr->cap.flags |= UCT_IFACE_FLAG_AM_SHORT; + } + + if (iface_attr->cap.put.max_short) { + iface_attr->cap.flags |= UCT_IFACE_FLAG_PUT_SHORT; + } + + return UCS_OK; +} + +void uct_rc_iface_add_qp(uct_rc_iface_t *iface, uct_rc_ep_t *ep, + unsigned qp_num) +{ + uct_rc_ep_t ***ptr, **memb; + + ptr = &iface->eps[qp_num >> UCT_RC_QP_TABLE_ORDER]; + if (*ptr == NULL) { + *ptr = ucs_calloc(UCS_BIT(UCT_RC_QP_TABLE_MEMB_ORDER), sizeof(**ptr), + "rc qp table"); + } + + memb = &(*ptr)[qp_num & UCS_MASK(UCT_RC_QP_TABLE_MEMB_ORDER)]; + ucs_assert(*memb == NULL); + *memb = ep; +} + +void uct_rc_iface_remove_qp(uct_rc_iface_t *iface, unsigned qp_num) +{ + uct_rc_ep_t **memb; + + memb = &iface->eps[qp_num >> UCT_RC_QP_TABLE_ORDER] + [qp_num & UCS_MASK(UCT_RC_QP_TABLE_MEMB_ORDER)]; + ucs_assert(*memb != NULL); + *memb = NULL; +} + +ucs_status_t uct_rc_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + uct_rc_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_iface_t); + ucs_status_t status; + unsigned count; + uct_rc_ep_t *ep; + + if (comp != NULL) { + return UCS_ERR_UNSUPPORTED; + } + + count = 0; + ucs_list_for_each(ep, &iface->ep_list, list) { + status = uct_ep_flush(&ep->super.super, 0, NULL); + if ((status == UCS_ERR_NO_RESOURCE) || (status == UCS_INPROGRESS)) { + ++count; + } else if (status != UCS_OK) { + return status; + } + } + + if (count != 0) { + UCT_TL_IFACE_STAT_FLUSH_WAIT(&iface->super.super); + return UCS_INPROGRESS; + } + + UCT_TL_IFACE_STAT_FLUSH(&iface->super.super); + return UCS_OK; +} + +void uct_rc_iface_send_desc_init(uct_iface_h tl_iface, void *obj, uct_mem_h memh) +{ + uct_rc_iface_send_desc_t *desc = obj; + uct_ib_mem_t *ib_memh = memh; + + desc->lkey = ib_memh->lkey; + desc->super.flags = 0; +} + +static void uct_rc_iface_set_path_mtu(uct_rc_iface_t *iface, + const uct_rc_iface_common_config_t *config) +{ + enum ibv_mtu port_mtu = uct_ib_iface_port_attr(&iface->super)->active_mtu; + uct_ib_device_t *dev = uct_ib_iface_device(&iface->super); + + /* MTU is set by user configuration */ + if (config->path_mtu != UCT_IB_MTU_DEFAULT) { + iface->config.path_mtu = (enum ibv_mtu)(config->path_mtu + (IBV_MTU_512 - UCT_IB_MTU_512)); + } else if ((port_mtu > IBV_MTU_2048) && (IBV_DEV_ATTR(dev, vendor_id) == 0x02c9) && + ((IBV_DEV_ATTR(dev, vendor_part_id) == 4099) || (IBV_DEV_ATTR(dev, vendor_part_id) == 4100) || + (IBV_DEV_ATTR(dev, vendor_part_id) == 4103) || (IBV_DEV_ATTR(dev, vendor_part_id) == 4104))) + { + /* On some devices optimal path_mtu is 2048 */ + iface->config.path_mtu = IBV_MTU_2048; + } else { + iface->config.path_mtu = port_mtu; + } +} + +ucs_status_t uct_rc_init_fc_thresh(uct_rc_iface_config_t *config, + uct_rc_iface_t *iface) +{ + /* Check FC parameters correctness */ + if ((config->soft_thresh <= config->super.fc.hard_thresh) || + (config->soft_thresh >= 1)) { + ucs_error("The factor for soft FC threshold should be bigger" + " than FC_HARD_THRESH value and less than 1 (s=%f, h=%f)", + config->soft_thresh, config->super.fc.hard_thresh); + return UCS_ERR_INVALID_PARAM; + } + + if (config->super.fc.enable) { + iface->config.fc_soft_thresh = ucs_max((int)(iface->config.fc_wnd_size * + config->soft_thresh), 1); + } else { + iface->config.fc_soft_thresh = 0; + } + return UCS_OK; +} + +ucs_status_t uct_rc_iface_fc_handler(uct_rc_iface_t *iface, unsigned qp_num, + uct_rc_hdr_t *hdr, unsigned length, + uint32_t imm_data, uint16_t lid, unsigned flags) +{ + ucs_status_t status; + int16_t cur_wnd; + uct_rc_fc_request_t *fc_req; + uct_rc_ep_t *ep = uct_rc_iface_lookup_ep(iface, qp_num); + uint8_t fc_hdr = uct_rc_fc_get_fc_hdr(hdr->am_id); + + ucs_assert(iface->config.fc_enabled); + + if (fc_hdr & UCT_RC_EP_FC_FLAG_GRANT) { + UCS_STATS_UPDATE_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_RX_GRANT, 1); + + /* Got either grant flag or special FC grant message */ + cur_wnd = ep->fc.fc_wnd; + + /* Peer granted resources, so update wnd */ + ep->fc.fc_wnd = iface->config.fc_wnd_size; + UCS_STATS_SET_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_FC_WND, ep->fc.fc_wnd); + + /* To preserve ordering we have to dispatch all pending + * operations if current fc_wnd is <= 0 + * (otherwise it will be dispatched by tx progress) */ + if (cur_wnd <= 0) { + ucs_arbiter_group_schedule(&iface->tx.arbiter, &ep->arb_group); + ucs_arbiter_dispatch(&iface->tx.arbiter, 1, + uct_rc_ep_process_pending, NULL); + } + if (fc_hdr == UCT_RC_EP_FC_PURE_GRANT) { + /* Special FC grant message can't be bundled with any other FC + * request. Stop processing this AM and do not invoke AM handler */ + UCS_STATS_UPDATE_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_RX_PURE_GRANT, 1); + return UCS_OK; + } + } + + if (fc_hdr & UCT_RC_EP_FC_FLAG_SOFT_REQ) { + UCS_STATS_UPDATE_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_RX_SOFT_REQ, 1); + + /* Got soft credit request. Mark ep that it needs to grant + * credits to the peer in outgoing AM (if any). */ + ep->fc.flags |= UCT_RC_EP_FC_FLAG_GRANT; + + } else if (fc_hdr & UCT_RC_EP_FC_FLAG_HARD_REQ) { + UCS_STATS_UPDATE_COUNTER(ep->fc.stats, UCT_RC_FC_STAT_RX_HARD_REQ, 1); + fc_req = ucs_mpool_get(&iface->tx.fc_mp); + if (ucs_unlikely(fc_req == NULL)) { + ucs_error("Failed to allocate FC request. " + "Grant will not be sent on ep %p", ep); + return UCS_ERR_NO_MEMORY; + } + fc_req->ep = &ep->super.super; + fc_req->super.func = uct_rc_ep_fc_grant; + + /* Got hard credit request. Send grant to the peer immediately */ + status = uct_rc_ep_fc_grant(&fc_req->super); + + if (status == UCS_ERR_NO_RESOURCE){ + /* force add request to group & schedule group to eliminate + * FC deadlock */ + uct_pending_req_arb_group_push_head(&iface->tx.arbiter, + &ep->arb_group, &fc_req->super); + ucs_arbiter_group_schedule(&iface->tx.arbiter, &ep->arb_group); + } else { + ucs_assertv_always(status == UCS_OK, "Failed to send FC grant msg: %s", + ucs_status_string(status)); + } + } + + return uct_iface_invoke_am(&iface->super.super, + (hdr->am_id & ~UCT_RC_EP_FC_MASK), + hdr + 1, length, flags); +} + +static ucs_status_t uct_rc_iface_tx_ops_init(uct_rc_iface_t *iface) +{ + const unsigned count = iface->config.tx_ops_count; + uct_rc_iface_send_op_t *op; + ucs_status_t status; + + iface->tx.ops_buffer = ucs_calloc(count, sizeof(*iface->tx.ops_buffer), + "rc_tx_ops"); + if (iface->tx.ops_buffer == NULL) { + return UCS_ERR_NO_MEMORY; + } + + iface->tx.free_ops = &iface->tx.ops_buffer[0]; + for (op = iface->tx.ops_buffer; op < iface->tx.ops_buffer + count; ++op) { + op->handler = uct_rc_ep_send_op_completion_handler; + op->flags = UCT_RC_IFACE_SEND_OP_FLAG_IFACE; + op->iface = iface; + op->next = (op == (iface->tx.ops_buffer + count - 1)) ? NULL : (op + 1); + } + + /* Create memory pool for flush completions. Can't just alloc a certain + * size buffer, because number of simultaneous flushes is not limited by + * CQ or QP resources. */ + status = ucs_mpool_init(&iface->tx.flush_mp, 0, sizeof(*op), 0, + UCS_SYS_CACHE_LINE_SIZE, 256, + UINT_MAX, &uct_rc_flush_comp_mpool_ops, + "flush-comps-only"); + + return status; +} + +static void uct_rc_iface_tx_ops_cleanup(uct_rc_iface_t *iface) +{ + const unsigned total_count = iface->config.tx_ops_count; + uct_rc_iface_send_op_t *op; + unsigned free_count; + + free_count = 0; + for (op = iface->tx.free_ops; op != NULL; op = op->next) { + ++free_count; + ucs_assert(free_count <= total_count); + } + if (free_count != iface->config.tx_ops_count) { + ucs_warn("rc_iface %p: %u/%d send ops were not released", iface, + total_count- free_count, total_count); + } + ucs_free(iface->tx.ops_buffer); + + ucs_mpool_cleanup(&iface->tx.flush_mp, 1); +} + +unsigned uct_rc_iface_do_progress(uct_iface_h tl_iface) +{ + uct_rc_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_iface_t); + return iface->progress(iface); +} + +ucs_status_t uct_rc_iface_init_rx(uct_rc_iface_t *iface, + const uct_rc_iface_common_config_t *config, + struct ibv_srq **srq_p) +{ + struct ibv_srq_init_attr srq_init_attr; + struct ibv_pd *pd = uct_ib_iface_md(&iface->super)->pd; + struct ibv_srq *srq; + + srq_init_attr.attr.max_sge = 1; + srq_init_attr.attr.max_wr = config->super.rx.queue_len; + srq_init_attr.attr.srq_limit = 0; + srq_init_attr.srq_context = iface; + srq = ibv_create_srq(pd, &srq_init_attr); + if (srq == NULL) { + ucs_error("ibv_create_srq() failed: %m"); + return UCS_ERR_IO_ERROR; + } + iface->rx.srq.quota = srq_init_attr.attr.max_wr; + *srq_p = srq; + + return UCS_OK; +} + +static int uct_rc_iface_config_limit_value(const char *name, + int provided, int limit) +{ + if (provided > limit) { + ucs_warn("using maximal value for %s (%d) instead of %d", + name, limit, provided); + return limit; + } else { + return provided; + } +} + +UCS_CLASS_INIT_FUNC(uct_rc_iface_t, uct_rc_iface_ops_t *ops, uct_md_h md, + uct_worker_h worker, const uct_iface_params_t *params, + const uct_rc_iface_common_config_t *config, + uct_ib_iface_init_attr_t *init_attr) +{ + uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev; + ucs_status_t status; + + UCS_CLASS_CALL_SUPER_INIT(uct_ib_iface_t, &ops->super, md, worker, params, + &config->super, init_attr); + + self->tx.cq_available = init_attr->tx_cq_len - 1; + self->rx.srq.available = 0; + self->rx.srq.quota = 0; + self->config.tx_qp_len = config->super.tx.queue_len; + self->config.tx_min_sge = config->super.tx.min_sge; + self->config.tx_min_inline = config->super.tx.min_inline; + self->config.tx_ops_count = init_attr->tx_cq_len; + self->config.rx_inline = config->super.rx.inl; + self->config.min_rnr_timer = uct_ib_to_rnr_fabric_time(config->tx.rnr_timeout); + self->config.timeout = uct_ib_to_qp_fabric_time(config->tx.timeout); + self->config.rnr_retry = uct_rc_iface_config_limit_value( + "RNR_RETRY_COUNT", + config->tx.rnr_retry_count, + UCT_RC_QP_MAX_RETRY_COUNT); + self->config.retry_cnt = uct_rc_iface_config_limit_value( + "RETRY_COUNT", + config->tx.retry_count, + UCT_RC_QP_MAX_RETRY_COUNT); + self->config.max_rd_atomic = config->max_rd_atomic; + self->config.ooo_rw = config->ooo_rw; +#if UCS_ENABLE_ASSERT + self->config.tx_cq_len = init_attr->tx_cq_len; +#endif + + uct_ib_fence_info_init(&self->tx.fi); + uct_rc_iface_set_path_mtu(self, config); + memset(self->eps, 0, sizeof(self->eps)); + ucs_arbiter_init(&self->tx.arbiter); + ucs_list_head_init(&self->ep_list); + + /* Check FC parameters correctness */ + if ((config->fc.hard_thresh <= 0) || (config->fc.hard_thresh >= 1)) { + ucs_error("The factor for hard FC threshold should be > 0 and < 1 (%f)", + config->fc.hard_thresh); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + /* Create RX buffers mempool */ + status = uct_ib_iface_recv_mpool_init(&self->super, &config->super, + "rc_recv_desc", &self->rx.mp); + if (status != UCS_OK) { + goto err; + } + + /* Create TX buffers mempool */ + status = uct_iface_mpool_init(&self->super.super, + &self->tx.mp, + sizeof(uct_rc_iface_send_desc_t) + self->super.config.seg_size, + sizeof(uct_rc_iface_send_desc_t), + UCS_SYS_CACHE_LINE_SIZE, + &config->super.tx.mp, + self->config.tx_qp_len, + uct_rc_iface_send_desc_init, + "rc_send_desc"); + if (status != UCS_OK) { + goto err_destroy_rx_mp; + } + + /* Allocate tx operations */ + status = uct_rc_iface_tx_ops_init(self); + if (status != UCS_OK) { + goto err_destroy_tx_mp; + } + + /* Set atomic handlers according to atomic reply endianness */ + self->config.atomic64_handler = dev->atomic_arg_sizes_be & sizeof(uint64_t) ? + uct_rc_ep_atomic_handler_64_be1 : + uct_rc_ep_atomic_handler_64_be0; + self->config.atomic32_ext_handler = dev->ext_atomic_arg_sizes_be & sizeof(uint32_t) ? + uct_rc_ep_atomic_handler_32_be1 : + uct_rc_ep_atomic_handler_32_be0; + self->config.atomic64_ext_handler = dev->ext_atomic_arg_sizes_be & sizeof(uint64_t) ? + uct_rc_ep_atomic_handler_64_be1 : + uct_rc_ep_atomic_handler_64_be0; + + status = UCS_STATS_NODE_ALLOC(&self->stats, &uct_rc_iface_stats_class, + self->super.super.stats); + if (status != UCS_OK) { + goto err_cleanup_tx_ops; + } + + /* Initialize RX resources (SRQ) */ + status = ops->init_rx(self, config); + if (status != UCS_OK) { + goto err_destroy_stats; + } + + self->config.fc_enabled = config->fc.enable; + + if (self->config.fc_enabled) { + /* Assume that number of recv buffers is the same on all peers. + * Then FC window size is the same for all endpoints as well. + * TODO: Make wnd size to be a property of the particular interface. + * We could distribute it via rc address then.*/ + self->config.fc_wnd_size = ucs_min(config->fc.wnd_size, + config->super.rx.queue_len); + self->config.fc_hard_thresh = ucs_max((int)(self->config.fc_wnd_size * + config->fc.hard_thresh), 1); + + /* Create mempool for pending requests for FC grant */ + status = ucs_mpool_init(&self->tx.fc_mp, + 0, + init_attr->fc_req_size, + 0, + 1, + 128, + UINT_MAX, + &uct_rc_fc_pending_mpool_ops, + "pending-fc-grants-only"); + if (status != UCS_OK) { + goto err_cleanup_rx; + } + } else { + self->config.fc_wnd_size = INT16_MAX; + self->config.fc_hard_thresh = 0; + } + + return UCS_OK; + +err_cleanup_rx: + ops->cleanup_rx(self); +err_destroy_stats: + UCS_STATS_NODE_FREE(self->stats); +err_cleanup_tx_ops: + uct_rc_iface_tx_ops_cleanup(self); +err_destroy_tx_mp: + ucs_mpool_cleanup(&self->tx.mp, 1); +err_destroy_rx_mp: + ucs_mpool_cleanup(&self->rx.mp, 1); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rc_iface_t) +{ + uct_rc_iface_ops_t *ops = ucs_derived_of(self->super.ops, uct_rc_iface_ops_t); + unsigned i; + + /* Release table. TODO release on-demand when removing ep. */ + for (i = 0; i < UCT_RC_QP_TABLE_SIZE; ++i) { + ucs_free(self->eps[i]); + } + + if (!ucs_list_is_empty(&self->ep_list)) { + ucs_warn("some eps were not destroyed"); + } + + ucs_arbiter_cleanup(&self->tx.arbiter); + + UCS_STATS_NODE_FREE(self->stats); + + ops->cleanup_rx(self); + uct_rc_iface_tx_ops_cleanup(self); + ucs_mpool_cleanup(&self->tx.mp, 1); + ucs_mpool_cleanup(&self->rx.mp, 0); /* Cannot flush SRQ */ + if (self->config.fc_enabled) { + ucs_mpool_cleanup(&self->tx.fc_mp, 1); + } +} + +UCS_CLASS_DEFINE(uct_rc_iface_t, uct_ib_iface_t); + +void uct_rc_iface_fill_attr(uct_rc_iface_t *iface, + uct_ib_qp_attr_t *qp_init_attr, + unsigned max_send_wr, + struct ibv_srq *srq) +{ + qp_init_attr->srq = srq; + qp_init_attr->cap.max_send_wr = max_send_wr; + qp_init_attr->cap.max_recv_wr = 0; + qp_init_attr->cap.max_send_sge = iface->config.tx_min_sge; + qp_init_attr->cap.max_recv_sge = 1; + qp_init_attr->cap.max_inline_data = iface->config.tx_min_inline; + qp_init_attr->qp_type = iface->super.config.qp_type; + qp_init_attr->sq_sig_all = !iface->config.tx_moderation; + qp_init_attr->max_inl_recv = iface->config.rx_inline; + qp_init_attr->max_inl_resp = iface->super.config.max_inl_resp; +} + +ucs_status_t uct_rc_iface_qp_create(uct_rc_iface_t *iface, struct ibv_qp **qp_p, + uct_ib_qp_attr_t *attr, unsigned max_send_wr, + struct ibv_srq *srq) +{ + uct_rc_iface_fill_attr(iface, attr, max_send_wr, srq); + uct_ib_iface_fill_attr(&iface->super, attr); + + return uct_ib_iface_create_qp(&iface->super, attr, qp_p); +} + +ucs_status_t uct_rc_iface_qp_init(uct_rc_iface_t *iface, struct ibv_qp *qp) +{ + struct ibv_qp_attr qp_attr; + int ret; + + memset(&qp_attr, 0, sizeof(qp_attr)); + + qp_attr.qp_state = IBV_QPS_INIT; + qp_attr.pkey_index = iface->super.pkey_index; + qp_attr.port_num = iface->super.config.port_num; + qp_attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_ATOMIC; + ret = ibv_modify_qp(qp, &qp_attr, + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_ACCESS_FLAGS); + if (ret) { + ucs_error("error modifying QP to INIT: %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +ucs_status_t uct_rc_iface_qp_connect(uct_rc_iface_t *iface, struct ibv_qp *qp, + const uint32_t dest_qp_num, + struct ibv_ah_attr *ah_attr) +{ +#if HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT + struct ibv_exp_qp_attr qp_attr; + uct_ib_device_t *dev; +#else + struct ibv_qp_attr qp_attr; +#endif + long qp_attr_mask; + int ret; + + memset(&qp_attr, 0, sizeof(qp_attr)); + + qp_attr.qp_state = IBV_QPS_RTR; + qp_attr.dest_qp_num = dest_qp_num; + qp_attr.rq_psn = 0; + qp_attr.path_mtu = iface->config.path_mtu; + qp_attr.max_dest_rd_atomic = iface->config.max_rd_atomic; + qp_attr.min_rnr_timer = iface->config.min_rnr_timer; + qp_attr.ah_attr = *ah_attr; + qp_attr_mask = IBV_QP_STATE | + IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER; + +#if HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT + dev = uct_ib_iface_device(&iface->super); + if (iface->config.ooo_rw && UCX_IB_DEV_IS_OOO_SUPPORTED(dev, rc)) { + ucs_debug("enabling out-of-order on RC QP %x dev %s", + qp->qp_num, uct_ib_device_name(dev)); + qp_attr_mask |= IBV_EXP_QP_OOO_RW_DATA_PLACEMENT; + } + ret = ibv_exp_modify_qp(qp, &qp_attr, qp_attr_mask); +#else + ret = ibv_modify_qp(qp, &qp_attr, qp_attr_mask); +#endif + if (ret) { + ucs_error("error modifying QP to RTR: %m"); + return UCS_ERR_IO_ERROR; + } + + qp_attr.qp_state = IBV_QPS_RTS; + qp_attr.sq_psn = 0; + qp_attr.timeout = iface->config.timeout; + qp_attr.rnr_retry = iface->config.rnr_retry; + qp_attr.retry_cnt = iface->config.retry_cnt; + qp_attr.max_rd_atomic = iface->config.max_rd_atomic; + qp_attr_mask = IBV_QP_STATE | + IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | + IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC; + +#if HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT + ret = ibv_exp_modify_qp(qp, &qp_attr, qp_attr_mask); +#else + ret = ibv_modify_qp(qp, &qp_attr, qp_attr_mask); +#endif + if (ret) { + ucs_error("error modifying QP to RTS: %m"); + return UCS_ERR_IO_ERROR; + } + + ucs_debug("connected rc qp 0x%x on "UCT_IB_IFACE_FMT" to lid %d(+%d) sl %d " + "remote_qp 0x%x mtu %zu timer %dx%d rnr %dx%d rd_atom %d", + qp->qp_num, UCT_IB_IFACE_ARG(&iface->super), ah_attr->dlid, + ah_attr->src_path_bits, ah_attr->sl, qp_attr.dest_qp_num, + uct_ib_mtu_value(qp_attr.path_mtu), qp_attr.timeout, + qp_attr.retry_cnt, qp_attr.min_rnr_timer, qp_attr.rnr_retry, + qp_attr.max_rd_atomic); + + return UCS_OK; +} + +ucs_status_t uct_rc_iface_common_event_arm(uct_iface_h tl_iface, + unsigned events, int force_rx_all) +{ + uct_rc_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_iface_t); + int arm_rx_solicited, arm_rx_all; + ucs_status_t status; + + status = uct_ib_iface_pre_arm(&iface->super); + if (status != UCS_OK) { + return status; + } + + if (events & UCT_EVENT_SEND_COMP) { + status = iface->super.ops->arm_cq(&iface->super, UCT_IB_DIR_TX, 0); + if (status != UCS_OK) { + return status; + } + } + + arm_rx_solicited = 0; + arm_rx_all = 0; + if (events & UCT_EVENT_RECV) { + arm_rx_solicited = 1; /* to wake up on active messages */ + } + if (((events & UCT_EVENT_SEND_COMP) && iface->config.fc_enabled) || + force_rx_all) { + arm_rx_all = 1; /* to wake up on FC grants (or if forced) */ + } + + if (arm_rx_solicited || arm_rx_all) { + status = iface->super.ops->arm_cq(&iface->super, UCT_IB_DIR_RX, + arm_rx_solicited && !arm_rx_all); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; + +} + +ucs_status_t uct_rc_iface_event_arm(uct_iface_h tl_iface, unsigned events) +{ + return uct_rc_iface_common_event_arm(tl_iface, events, 0); +} + +ucs_status_t uct_rc_iface_fence(uct_iface_h tl_iface, unsigned flags) +{ + uct_rc_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_iface_t); + + if (iface->config.fence_mode != UCT_RC_FENCE_MODE_NONE) { + iface->tx.fi.fence_beat++; + } + + UCT_TL_IFACE_STAT_FENCE(&iface->super.super); + return UCS_OK; +} + diff --git a/src/uct/ib/rc/base/rc_iface.h b/src/uct/ib/rc/base/rc_iface.h new file mode 100644 index 0000000..6d8ebfb --- /dev/null +++ b/src/uct/ib/rc/base/rc_iface.h @@ -0,0 +1,452 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_RC_IFACE_H +#define UCT_RC_IFACE_H + +#include "rc_def.h" + +#include +#include +#include +#include +#include +#include +#include + + +#define UCT_RC_QP_TABLE_ORDER 12 +#define UCT_RC_QP_TABLE_SIZE UCS_BIT(UCT_RC_QP_TABLE_ORDER) +#define UCT_RC_QP_TABLE_MEMB_ORDER (UCT_IB_QPN_ORDER - UCT_RC_QP_TABLE_ORDER) +#define UCT_RC_QP_MAX_RETRY_COUNT 7 + +#define UCT_RC_CHECK_AM_SHORT(_am_id, _length, _max_inline) \ + UCT_CHECK_AM_ID(_am_id); \ + UCT_CHECK_LENGTH(sizeof(uct_rc_am_short_hdr_t) + _length, 0, _max_inline, "am_short"); + +#define UCT_RC_CHECK_ZCOPY_DATA(_header_length, _length, _seg_size) \ + UCT_CHECK_LENGTH(_header_length + _length, 0, _seg_size, "am_zcopy payload"); \ + UCT_CHECK_LENGTH(_header_length + _length, 0, UCT_IB_MAX_MESSAGE_SIZE, "am_zcopy ib max message"); + +#define UCT_RC_CHECK_AM_ZCOPY(_id, _header_length, _length, _desc_size, _seg_size) \ + UCT_CHECK_AM_ID(_id); \ + UCT_RC_CHECK_ZCOPY_DATA(_header_length, _length, _seg_size) \ + UCT_CHECK_LENGTH(sizeof(uct_rc_hdr_t) + _header_length, 0, _desc_size, "am_zcopy header"); + + +#define UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \ + UCT_TL_IFACE_GET_TX_DESC(&(_iface)->super.super, _mp, _desc, \ + return UCS_ERR_NO_RESOURCE); + +#define UCT_RC_IFACE_GET_TX_AM_BCOPY_DESC(_iface, _mp, _desc, _id, _pk_hdr_cb, \ + _hdr, _pack_cb, _arg, _length) ({ \ + _hdr *rch; \ + UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \ + (_desc)->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; \ + rch = (_hdr *)(_desc + 1); \ + _pk_hdr_cb(rch, _id); \ + *(_length) = _pack_cb(rch + 1, _arg); \ +}) + +#define UCT_RC_IFACE_GET_TX_AM_ZCOPY_DESC(_iface, _mp, _desc, \ + _id, _header, _header_length, _comp, _send_flags) \ + UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc); \ + uct_rc_zcopy_desc_set_comp(_desc, _comp, _send_flags); \ + uct_rc_zcopy_desc_set_header((uct_rc_hdr_t*)(_desc + 1), _id, _header, _header_length); + +#define UCT_RC_IFACE_GET_TX_PUT_BCOPY_DESC(_iface, _mp, _desc, _pack_cb, _arg, _length) \ + UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \ + (_desc)->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; \ + _length = _pack_cb(_desc + 1, _arg); \ + UCT_SKIP_ZERO_LENGTH(_length, _desc); + +#define UCT_RC_IFACE_GET_TX_GET_BCOPY_DESC(_iface, _mp, _desc, _unpack_cb, _comp, _arg, _length) \ + UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \ + ucs_assert(_length <= (_iface)->super.config.seg_size); \ + _desc->super.handler = (_comp == NULL) ? \ + uct_rc_ep_get_bcopy_handler_no_completion : \ + uct_rc_ep_get_bcopy_handler; \ + _desc->super.unpack_arg = _arg; \ + _desc->super.user_comp = _comp; \ + _desc->super.length = _length; \ + _desc->unpack_cb = _unpack_cb; + + +#define UCT_RC_IFACE_GET_TX_ATOMIC_DESC(_iface, _mp, _desc) \ + UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \ + _desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; + +#define UCT_RC_IFACE_GET_TX_ATOMIC_FETCH_DESC(_iface, _mp, _desc, _handler, _result, _comp) \ + UCT_CHECK_PARAM(_comp != NULL, "completion must be non-NULL"); \ + UCT_RC_IFACE_GET_TX_DESC(_iface, _mp, _desc) \ + _desc->super.handler = _handler; \ + _desc->super.buffer = _result; \ + _desc->super.user_comp = _comp; + + +enum { + UCT_RC_IFACE_STAT_RX_COMPLETION, + UCT_RC_IFACE_STAT_TX_COMPLETION, + UCT_RC_IFACE_STAT_NO_CQE, + UCT_RC_IFACE_STAT_LAST +}; + + +/* flags for uct_rc_iface_send_op_t */ +enum { +#if UCS_ENABLE_ASSERT + UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY = UCS_BIT(13), /* zcopy */ + UCT_RC_IFACE_SEND_OP_FLAG_IFACE = UCS_BIT(14), /* belongs to iface ops buffer */ + UCT_RC_IFACE_SEND_OP_FLAG_INUSE = UCS_BIT(15) /* queued on a txqp */ +#else + UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY = 0, + UCT_RC_IFACE_SEND_OP_FLAG_IFACE = 0, + UCT_RC_IFACE_SEND_OP_FLAG_INUSE = 0 +#endif +}; + + +typedef void (*uct_rc_send_handler_t)(uct_rc_iface_send_op_t *op, const void *resp); + + +/** + * RC network header. + */ +typedef struct uct_rc_hdr { + uint8_t am_id; /* Active message ID */ +} UCS_S_PACKED uct_rc_hdr_t; + + +typedef struct uct_rc_fc_request { + uct_pending_req_t super; + uct_ep_t *ep; +} uct_rc_fc_request_t; + + +/** + * RC fence type. + */ +typedef enum uct_rc_fence_mode { + UCT_RC_FENCE_MODE_NONE, + UCT_RC_FENCE_MODE_WEAK, + UCT_RC_FENCE_MODE_STRONG, + UCT_RC_FENCE_MODE_AUTO, + UCT_RC_FENCE_MODE_LAST +} uct_rc_fence_mode_t; + + +/* Common configuration used for rc verbs, rcx and dc transports */ +typedef struct uct_rc_iface_common_config { + uct_ib_iface_config_t super; + uct_ib_mtu_t path_mtu; + unsigned max_rd_atomic; + int ooo_rw; /* Enable out-of-order RDMA data placement */ + int fence_mode; + + struct { + double timeout; + unsigned retry_count; + double rnr_timeout; + unsigned rnr_retry_count; + } tx; + + struct { + int enable; + double hard_thresh; + unsigned wnd_size; + } fc; +} uct_rc_iface_common_config_t; + + +/* RC specific configuration used for rc verbs and rcx transports only */ +struct uct_rc_iface_config { + uct_rc_iface_common_config_t super; + double soft_thresh; + unsigned tx_cq_moderation; /* How many TX messages are + batched to one CQE */ + unsigned tx_cq_len; +}; + + +typedef struct uct_rc_iface_ops { + uct_ib_iface_ops_t super; + ucs_status_t (*init_rx)(uct_rc_iface_t *iface, + const uct_rc_iface_common_config_t *config); + void (*cleanup_rx)(uct_rc_iface_t *iface); + ucs_status_t (*fc_ctrl)(uct_ep_t *ep, unsigned op, + uct_rc_fc_request_t *req); + ucs_status_t (*fc_handler)(uct_rc_iface_t *iface, unsigned qp_num, + uct_rc_hdr_t *hdr, unsigned length, + uint32_t imm_data, uint16_t lid, + unsigned flags); +} uct_rc_iface_ops_t; + + +typedef struct uct_rc_srq { + unsigned available; + unsigned quota; +} uct_rc_srq_t; + + +struct uct_rc_iface { + uct_ib_iface_t super; + + struct { + ucs_mpool_t mp; /* pool for send descriptors */ + ucs_mpool_t fc_mp; /* pool for FC grant pending requests */ + ucs_mpool_t flush_mp; /* pool for flush completions */ + /* Credits for completions. + * May be negative in case mlx5 because we take "num_bb" credits per + * post to be able to calculate credits of outstanding ops on failure. + * In case of verbs TL we use QWE number, so 1 post always takes 1 + * credit */ + signed cq_available; + uct_rc_iface_send_op_t *free_ops; /* stack of free send operations */ + ucs_arbiter_t arbiter; + uct_rc_iface_send_op_t *ops_buffer; + uct_ib_fence_info_t fi; + } tx; + + struct { + ucs_mpool_t mp; + uct_rc_srq_t srq; + } rx; + + struct { + unsigned tx_qp_len; + unsigned tx_min_sge; + unsigned tx_min_inline; + unsigned tx_ops_count; + unsigned rx_inline; + uint16_t tx_moderation; + + /* Threshold to send "soft" FC credit request. The peer will try to + * piggy-back credits grant to the counter AM, if any. */ + int16_t fc_soft_thresh; + + /* Threshold to sent "hard" credits request. The peer will grant + * credits in a separate AM as soon as it handles this request. */ + int16_t fc_hard_thresh; + + uint16_t fc_wnd_size; + uint8_t fc_enabled; + + uint8_t min_rnr_timer; + uint8_t timeout; + uint8_t rnr_retry; + uint8_t retry_cnt; + uint8_t max_rd_atomic; + enum ibv_mtu path_mtu; + /* Enable out-of-order RDMA data placement */ + uint8_t ooo_rw; +#if UCS_ENABLE_ASSERT + int tx_cq_len; +#endif + uct_rc_fence_mode_t fence_mode; + unsigned exp_backoff; + + /* Atomic callbacks */ + uct_rc_send_handler_t atomic64_handler; /* 64bit ib-spec */ + uct_rc_send_handler_t atomic32_ext_handler; /* 32bit extended */ + uct_rc_send_handler_t atomic64_ext_handler; /* 64bit extended */ + } config; + + UCS_STATS_NODE_DECLARE(stats) + + uct_rc_ep_t **eps[UCT_RC_QP_TABLE_SIZE]; + ucs_list_link_t ep_list; + + /* Progress function (either regular or TM aware) */ + ucs_callback_t progress; +}; +UCS_CLASS_DECLARE(uct_rc_iface_t, uct_rc_iface_ops_t*, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_rc_iface_common_config_t*, + uct_ib_iface_init_attr_t*); + + +struct uct_rc_iface_send_op { + union { + ucs_queue_elem_t queue; /* used when enqueued on a txqp */ + uct_rc_iface_send_op_t *next; /* used when on free list */ + }; + uct_rc_send_handler_t handler; + uint16_t sn; + uint16_t flags; + unsigned length; + union { + void *buffer; /* atomics / desc */ + void *unpack_arg; /* get_bcopy / desc */ + uct_rc_iface_t *iface; /* zcopy / op */ + }; + uct_completion_t *user_comp; +}; + + +struct uct_rc_iface_send_desc { + uct_rc_iface_send_op_t super; + uct_unpack_callback_t unpack_cb; + uint32_t lkey; +}; + + +/* + * Short active message header (active message header is always 64 bit). + */ +typedef struct uct_rc_am_short_hdr { + uct_rc_hdr_t rc_hdr; + uint64_t am_hdr; +} UCS_S_PACKED uct_rc_am_short_hdr_t; + + +extern ucs_config_field_t uct_rc_iface_config_table[]; +extern ucs_config_field_t uct_rc_iface_common_config_table[]; + +unsigned uct_rc_iface_do_progress(uct_iface_h tl_iface); + +ucs_status_t uct_rc_iface_query(uct_rc_iface_t *iface, + uct_iface_attr_t *iface_attr, + size_t put_max_short, size_t max_inline, + size_t am_max_hdr, size_t am_max_iov, + size_t tag_max_iov, size_t tag_min_hdr); + +void uct_rc_iface_add_qp(uct_rc_iface_t *iface, uct_rc_ep_t *ep, + unsigned qp_num); + +void uct_rc_iface_remove_qp(uct_rc_iface_t *iface, unsigned qp_num); + +ucs_status_t uct_rc_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp); + +void uct_rc_iface_send_desc_init(uct_iface_h tl_iface, void *obj, uct_mem_h memh); + +void uct_rc_ep_am_zcopy_handler(uct_rc_iface_send_op_t *op, const void *resp); + +/** + * Creates an RC or DCI QP + */ +ucs_status_t uct_rc_iface_qp_create(uct_rc_iface_t *iface, struct ibv_qp **qp_p, + uct_ib_qp_attr_t *attr, unsigned max_send_wr, + struct ibv_srq *srq); + +void uct_rc_iface_fill_attr(uct_rc_iface_t *iface, + uct_ib_qp_attr_t *qp_init_attr, + unsigned max_send_wr, + struct ibv_srq *srq); + +ucs_status_t uct_rc_iface_qp_init(uct_rc_iface_t *iface, struct ibv_qp *qp); + +ucs_status_t uct_rc_iface_qp_connect(uct_rc_iface_t *iface, struct ibv_qp *qp, + const uint32_t qp_num, + struct ibv_ah_attr *ah_attr); + +ucs_status_t uct_rc_iface_fc_handler(uct_rc_iface_t *iface, unsigned qp_num, + uct_rc_hdr_t *hdr, unsigned length, + uint32_t imm_data, uint16_t lid, unsigned flags); + +ucs_status_t uct_rc_init_fc_thresh(uct_rc_iface_config_t *rc_cfg, + uct_rc_iface_t *iface); + +ucs_status_t uct_rc_iface_event_arm(uct_iface_h tl_iface, unsigned events); + +ucs_status_t uct_rc_iface_common_event_arm(uct_iface_h tl_iface, + unsigned events, int force_rx_all); + +ucs_status_t uct_rc_iface_init_rx(uct_rc_iface_t *iface, + const uct_rc_iface_common_config_t *config, + struct ibv_srq **p_srq); + +ucs_status_t uct_rc_iface_fence(uct_iface_h tl_iface, unsigned flags); + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_rc_fc_ctrl(uct_ep_t *ep, unsigned op, uct_rc_fc_request_t *req) +{ + uct_rc_iface_t *iface = ucs_derived_of(ep->iface, uct_rc_iface_t); + uct_rc_iface_ops_t *ops = ucs_derived_of(iface->super.ops, + uct_rc_iface_ops_t); + return ops->fc_ctrl(ep, op, req); +} + +static inline uct_rc_ep_t *uct_rc_iface_lookup_ep(uct_rc_iface_t *iface, + unsigned qp_num) +{ + ucs_assert(qp_num < UCS_BIT(UCT_IB_QPN_ORDER)); + return iface->eps[qp_num >> UCT_RC_QP_TABLE_ORDER] + [qp_num & UCS_MASK(UCT_RC_QP_TABLE_MEMB_ORDER)]; +} + + +static UCS_F_ALWAYS_INLINE int +uct_rc_iface_have_tx_cqe_avail(uct_rc_iface_t* iface) +{ + return iface->tx.cq_available > 0; +} + +static UCS_F_ALWAYS_INLINE uct_rc_iface_send_op_t* +uct_rc_iface_get_send_op(uct_rc_iface_t *iface) +{ + uct_rc_iface_send_op_t *op; + op = iface->tx.free_ops; + iface->tx.free_ops = op->next; + return op; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_iface_put_send_op(uct_rc_iface_send_op_t *op) +{ + uct_rc_iface_t *iface = op->iface; + ucs_assert(op->flags == UCT_RC_IFACE_SEND_OP_FLAG_IFACE); + op->next = iface->tx.free_ops; + iface->tx.free_ops = op; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_am_hdr_fill(uct_rc_hdr_t *rch, uint8_t id) +{ + rch->am_id = id; +} + +static inline void uct_rc_zcopy_desc_set_comp(uct_rc_iface_send_desc_t *desc, + uct_completion_t *comp, + int *send_flags) +{ + if (comp == NULL) { + desc->super.handler = (uct_rc_send_handler_t)ucs_mpool_put; + *send_flags = 0; + } else { + desc->super.handler = uct_rc_ep_am_zcopy_handler; + desc->super.user_comp = comp; + *send_flags = IBV_SEND_SIGNALED; + } +} + +static inline void uct_rc_zcopy_desc_set_header(uct_rc_hdr_t *rch, + uint8_t id, const void *header, + unsigned header_length) +{ + uct_rc_am_hdr_fill(rch, id); + memcpy(rch + 1, header, header_length); +} + +static inline int uct_rc_iface_has_tx_resources(uct_rc_iface_t *iface) +{ + return uct_rc_iface_have_tx_cqe_avail(iface) && + !ucs_mpool_is_empty(&iface->tx.mp); +} + +static UCS_F_ALWAYS_INLINE uct_rc_send_handler_t +uct_rc_iface_atomic_handler(uct_rc_iface_t *iface, int ext, unsigned length) +{ + ucs_assert((length == sizeof(uint32_t)) || (length == sizeof(uint64_t))); + switch (length) { + case sizeof(uint32_t): + return iface->config.atomic32_ext_handler; + case sizeof(uint64_t): + return ext ? iface->config.atomic64_ext_handler : + iface->config.atomic64_handler; + } + return NULL; +} +#endif diff --git a/src/uct/ib/rc/verbs/rc_verbs.h b/src/uct/ib/rc/verbs/rc_verbs.h new file mode 100644 index 0000000..7d89617 --- /dev/null +++ b/src/uct/ib/rc/verbs/rc_verbs.h @@ -0,0 +1,139 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_RC_VERBS_H +#define UCT_RC_VERBS_H + +#include +#include +#include + +#define UCT_RC_VERBS_IFACE_FOREACH_TXWQE(_iface, _i, _wc, _num_wcs) \ + status = uct_ib_poll_cq((_iface)->super.cq[UCT_IB_DIR_TX], &_num_wcs, _wc); \ + if (status != UCS_OK) { \ + return 0; \ + } \ + UCS_STATS_UPDATE_COUNTER((_iface)->stats, \ + UCT_RC_IFACE_STAT_TX_COMPLETION, _num_wcs); \ + for (_i = 0; _i < _num_wcs; ++_i) + + +typedef struct uct_rc_verbs_txcnt { + uint16_t pi; /* producer (post_send) count */ + uint16_t ci; /* consumer (ibv_poll_cq) completion count */ +} uct_rc_verbs_txcnt_t; + +/** + * RC verbs communication context. + */ +typedef struct uct_rc_verbs_ep { + uct_rc_ep_t super; + uct_rc_verbs_txcnt_t txcnt; + uct_ib_fence_info_t fi; + struct ibv_qp *qp; +} uct_rc_verbs_ep_t; + + +/** + * RC verbs interface configuration. + */ +typedef struct uct_rc_verbs_iface_config { + uct_rc_iface_config_t super; + size_t max_am_hdr; + unsigned tx_max_wr; +} uct_rc_verbs_iface_config_t; + + +/** + * RC verbs interface. + */ +typedef struct uct_rc_verbs_iface { + uct_rc_iface_t super; + struct ibv_srq *srq; + struct ibv_send_wr inl_am_wr; + struct ibv_send_wr inl_rwrite_wr; + struct ibv_sge inl_sge[2]; + uct_rc_am_short_hdr_t am_inl_hdr; + ucs_mpool_t short_desc_mp; + uct_rc_iface_send_desc_t *fc_desc; /* used when max_inline is zero */ + struct { + size_t short_desc_size; + size_t max_inline; + unsigned tx_max_wr; + } config; +} uct_rc_verbs_iface_t; + + +UCS_CLASS_DECLARE(uct_rc_verbs_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_NEW_FUNC(uct_rc_verbs_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_rc_verbs_ep_t, uct_ep_t); + +ucs_status_t uct_rc_verbs_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +ssize_t uct_rc_verbs_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, + uct_rkey_t rkey); + +ucs_status_t uct_rc_verbs_ep_put_zcopy(uct_ep_h tl_ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rc_verbs_ep_get_bcopy(uct_ep_h tl_ep, + uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rc_verbs_ep_get_zcopy(uct_ep_h tl_ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rc_verbs_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *buffer, unsigned length); + +ssize_t uct_rc_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags); + +ucs_status_t uct_rc_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_rc_verbs_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp); + +ucs_status_t uct_rc_verbs_ep_atomic64_post(uct_ep_h tl_ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey); + +ucs_status_t uct_rc_verbs_ep_atomic64_fetch(uct_ep_h tl_ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rc_verbs_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_rc_verbs_ep_fence(uct_ep_h tl_ep, unsigned flags); + +ucs_status_t uct_rc_verbs_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op, + uct_rc_fc_request_t *req); + +ucs_status_t uct_rc_verbs_ep_handle_failure(uct_rc_verbs_ep_t *ep, + ucs_status_t status); + +ucs_status_t uct_rc_verbs_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr); + +ucs_status_t uct_rc_verbs_ep_connect_to_ep(uct_ep_h tl_ep, + const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr); + +#endif diff --git a/src/uct/ib/rc/verbs/rc_verbs_ep.c b/src/uct/ib/rc/verbs/rc_verbs_ep.c new file mode 100644 index 0000000..91393f9 --- /dev/null +++ b/src/uct/ib/rc/verbs/rc_verbs_ep.c @@ -0,0 +1,531 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rc_verbs.h" +#include "rc_verbs_impl.h" + +#include +#include + +void uct_rc_verbs_txcnt_init(uct_rc_verbs_txcnt_t *txcnt) +{ + txcnt->pi = txcnt->ci = 0; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_verbs_ep_post_send(uct_rc_verbs_iface_t* iface, uct_rc_verbs_ep_t* ep, + struct ibv_send_wr *wr, int send_flags, int max_log_sge) +{ + struct ibv_send_wr *bad_wr; + int ret; + + ucs_assertv(ep->qp->state == IBV_QPS_RTS, "QP 0x%x state is %d", + ep->qp->qp_num, ep->qp->state); + + if (!(send_flags & IBV_SEND_SIGNALED)) { + send_flags |= uct_rc_iface_tx_moderation(&iface->super, &ep->super.txqp, + IBV_SEND_SIGNALED); + } + if (wr->opcode == IBV_WR_RDMA_READ) { + send_flags |= uct_rc_ep_fm(&iface->super, &ep->fi, IBV_SEND_FENCE); + } + + wr->send_flags = send_flags; + wr->wr_id = uct_rc_txqp_unsignaled(&ep->super.txqp); + + uct_ib_log_post_send(&iface->super.super, ep->qp, wr, max_log_sge, + (wr->opcode == IBV_WR_SEND) ? uct_rc_ep_packet_dump : NULL); + + ret = ibv_post_send(ep->qp, wr, &bad_wr); + if (ret != 0) { + ucs_fatal("ibv_post_send() returned %d (%m)", ret); + } + + uct_rc_verbs_txqp_posted(&ep->super.txqp, &ep->txcnt, &iface->super, send_flags & IBV_SEND_SIGNALED); +} + +/* + * Helper function for posting sends with a descriptor. + * User needs to fill: wr.opcode, wr.sg_list, wr.num_sge, first sge length, and + * operation-specific fields (e.g rdma). + */ +static UCS_F_ALWAYS_INLINE void +uct_rc_verbs_ep_post_send_desc(uct_rc_verbs_ep_t* ep, struct ibv_send_wr *wr, + uct_rc_iface_send_desc_t *desc, int send_flags, + int max_log_sge) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, + uct_rc_verbs_iface_t); + UCT_RC_VERBS_FILL_DESC_WR(wr, desc); + uct_rc_verbs_ep_post_send(iface, ep, wr, send_flags, max_log_sge); + uct_rc_txqp_add_send_op_sn(&ep->super.txqp, &desc->super, ep->txcnt.pi); +} + +static inline ucs_status_t +uct_rc_verbs_ep_rdma_zcopy(uct_rc_verbs_ep_t *ep, const uct_iov_t *iov, + size_t iovcnt, uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp, int opcode) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, + uct_rc_verbs_iface_t); + struct ibv_sge sge[UCT_IB_MAX_IOV]; + struct ibv_send_wr wr; + size_t sge_cnt; + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + sge_cnt = uct_ib_verbs_sge_fill_iov(sge, iov, iovcnt); + UCT_SKIP_ZERO_LENGTH(sge_cnt); + UCT_RC_VERBS_FILL_RDMA_WR_IOV(wr, wr.opcode, (enum ibv_wr_opcode)opcode, + sge, sge_cnt, remote_addr, rkey); + wr.next = NULL; + + uct_rc_verbs_ep_post_send(iface, ep, &wr, IBV_SEND_SIGNALED, INT_MAX); + uct_rc_txqp_add_send_comp(&iface->super, &ep->super.txqp, comp, ep->txcnt.pi, + UCT_RC_IFACE_SEND_OP_FLAG_ZCOPY); + return UCS_INPROGRESS; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_verbs_ep_atomic_post(uct_rc_verbs_ep_t *ep, int opcode, uint64_t compare_add, + uint64_t swap, uint64_t remote_addr, uct_rkey_t rkey, + uct_rc_iface_send_desc_t *desc, int force_sig) +{ + struct ibv_send_wr wr; + struct ibv_sge sge; + + UCT_RC_VERBS_FILL_ATOMIC_WR(wr, wr.opcode, sge, (enum ibv_wr_opcode)opcode, + compare_add, swap, remote_addr, + uct_ib_md_direct_rkey(rkey)); + UCT_TL_EP_STAT_ATOMIC(&ep->super.super); + uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, force_sig, INT_MAX); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_rc_verbs_ep_atomic(uct_rc_verbs_ep_t *ep, int opcode, void *result, + uint64_t compare_add, uint64_t swap, uint64_t remote_addr, + uct_rkey_t rkey, uct_completion_t *comp) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, + uct_rc_verbs_iface_t); + uct_rc_iface_send_desc_t *desc; + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_IFACE_GET_TX_ATOMIC_FETCH_DESC(&iface->super, &iface->short_desc_mp, + desc, iface->super.config.atomic64_handler, + result, comp); + uct_rc_verbs_ep_atomic_post(ep, opcode, compare_add, swap, remote_addr, + rkey, desc, IBV_SEND_SIGNALED | + uct_rc_ep_fm(&iface->super, &ep->fi, IBV_SEND_FENCE)); + return UCS_INPROGRESS; +} + +ucs_status_t uct_rc_verbs_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + + UCT_CHECK_LENGTH(length, 0, iface->config.max_inline, "put_short"); + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_VERBS_FILL_INL_PUT_WR(iface, remote_addr, rkey, buffer, length); + UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); + uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_rwrite_wr, + IBV_SEND_INLINE | IBV_SEND_SIGNALED, INT_MAX); + return UCS_OK; +} + +ssize_t uct_rc_verbs_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + uct_rc_iface_send_desc_t *desc; + struct ibv_send_wr wr; + struct ibv_sge sge; + size_t length; + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_IFACE_GET_TX_PUT_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, + pack_cb, arg, length); + UCT_RC_VERBS_FILL_RDMA_WR(wr, wr.opcode, IBV_WR_RDMA_WRITE, sge, + length, remote_addr, rkey); + UCT_TL_EP_STAT_OP(&ep->super.super, PUT, BCOPY, length); + uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED, INT_MAX); + return length; +} + +ucs_status_t uct_rc_verbs_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_ib_iface_t UCS_V_UNUSED *iface = ucs_derived_of(tl_ep->iface, + uct_ib_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, + uct_rc_verbs_ep_t); + ucs_status_t status; + + UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(iface), + "uct_rc_verbs_ep_put_zcopy"); + status = uct_rc_verbs_ep_rdma_zcopy(ep, iov, iovcnt, remote_addr, + rkey, comp, IBV_WR_RDMA_WRITE); + UCT_TL_EP_STAT_OP_IF_SUCCESS(status, &ep->super.super, PUT, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + return status; +} + +ucs_status_t uct_rc_verbs_ep_get_bcopy(uct_ep_h tl_ep, + uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + uct_rc_iface_send_desc_t *desc; + struct ibv_send_wr wr; + struct ibv_sge sge; + + UCT_CHECK_LENGTH(length, 0, iface->super.super.config.seg_size, "get_bcopy"); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_IFACE_GET_TX_GET_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, + unpack_cb, comp, arg, length); + + UCT_RC_VERBS_FILL_RDMA_WR(wr, wr.opcode, IBV_WR_RDMA_READ, sge, length, remote_addr, + rkey); + + UCT_TL_EP_STAT_OP(&ep->super.super, GET, BCOPY, length); + uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SIGNALED, INT_MAX); + return UCS_INPROGRESS; +} + +ucs_status_t uct_rc_verbs_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_ib_iface_t UCS_V_UNUSED *iface = ucs_derived_of(tl_ep->iface, + uct_ib_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, + uct_rc_verbs_ep_t); + ucs_status_t status; + + UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(iface), + "uct_rc_verbs_ep_get_zcopy"); + status = uct_rc_verbs_ep_rdma_zcopy(ep, iov, iovcnt, remote_addr, + rkey, comp, IBV_WR_RDMA_READ); + if (status == UCS_INPROGRESS) { + UCT_TL_EP_STAT_OP(&ep->super.super, GET, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + } + return status; +} + +ucs_status_t uct_rc_verbs_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *buffer, unsigned length) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + + UCT_RC_CHECK_AM_SHORT(id, length, iface->config.max_inline); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_CHECK_FC(&iface->super, &ep->super, id); + uct_rc_verbs_iface_fill_inl_am_sge(iface, id, hdr, buffer, length); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); + uct_rc_verbs_ep_post_send(iface, ep, &iface->inl_am_wr, + IBV_SEND_INLINE | IBV_SEND_SOLICITED, INT_MAX); + UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); + + return UCS_OK; +} + +ssize_t uct_rc_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + uct_rc_iface_send_desc_t *desc; + struct ibv_send_wr wr; + struct ibv_sge sge; + size_t length; + + UCT_CHECK_AM_ID(id); + + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_CHECK_FC(&iface->super, &ep->super, id); + UCT_RC_IFACE_GET_TX_AM_BCOPY_DESC(&iface->super, &iface->super.tx.mp, desc, + id, uct_rc_am_hdr_fill, uct_rc_hdr_t, + pack_cb, arg, &length); + UCT_RC_VERBS_FILL_AM_BCOPY_WR(wr, sge, length + sizeof(uct_rc_hdr_t), + wr.opcode); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); + uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, IBV_SEND_SOLICITED, INT_MAX); + UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); + + return length; +} + +ucs_status_t uct_rc_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + uct_rc_iface_send_desc_t *desc = NULL; + struct ibv_sge sge[UCT_IB_MAX_IOV]; /* First sge is reserved for the header */ + struct ibv_send_wr wr; + int send_flags; + size_t sge_cnt; + + UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super) - 1, + "uct_rc_verbs_ep_am_zcopy"); + UCT_RC_CHECK_AM_ZCOPY(id, header_length, uct_iov_total_length(iov, iovcnt), + iface->config.short_desc_size, + iface->super.super.config.seg_size); + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_CHECK_FC(&iface->super, &ep->super, id); + + UCT_RC_IFACE_GET_TX_AM_ZCOPY_DESC(&iface->super, &iface->short_desc_mp, + desc, id, header, header_length, comp, + &send_flags); + sge[0].length = sizeof(uct_rc_hdr_t) + header_length; + sge_cnt = uct_ib_verbs_sge_fill_iov(sge + 1, iov, iovcnt); + UCT_RC_VERBS_FILL_AM_ZCOPY_WR_IOV(wr, sge, (sge_cnt + 1), wr.opcode); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, + (header_length + uct_iov_total_length(iov, iovcnt))); + + uct_rc_verbs_ep_post_send_desc(ep, &wr, desc, send_flags | IBV_SEND_SOLICITED, + UCT_IB_MAX_ZCOPY_LOG_SGE(&iface->super.super)); + UCT_RC_UPDATE_FC(&iface->super, &ep->super, id); + + return UCS_INPROGRESS; +} + +ucs_status_t uct_rc_verbs_ep_atomic64_post(uct_ep_h tl_ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + uct_rc_iface_send_desc_t *desc; + + if (opcode != UCT_ATOMIC_OP_ADD) { + return UCS_ERR_UNSUPPORTED; + } + + /* TODO don't allocate descriptor - have dummy buffer */ + UCT_RC_CHECK_RES(&iface->super, &ep->super); + UCT_RC_IFACE_GET_TX_ATOMIC_DESC(&iface->super, &iface->short_desc_mp, desc); + + uct_rc_verbs_ep_atomic_post(ep, + IBV_WR_ATOMIC_FETCH_AND_ADD, value, 0, + remote_addr, rkey, desc, + IBV_SEND_SIGNALED); + return UCS_OK; +} + +ucs_status_t uct_rc_verbs_ep_atomic64_fetch(uct_ep_h tl_ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + if (opcode != UCT_ATOMIC_OP_ADD) { + return UCS_ERR_UNSUPPORTED; + } + + return uct_rc_verbs_ep_atomic(ucs_derived_of(tl_ep, uct_rc_verbs_ep_t), + IBV_WR_ATOMIC_FETCH_AND_ADD, result, value, 0, + remote_addr, rkey, comp); +} + +ucs_status_t uct_rc_verbs_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp) +{ + return uct_rc_verbs_ep_atomic(ucs_derived_of(tl_ep, uct_rc_verbs_ep_t), + IBV_WR_ATOMIC_CMP_AND_SWP, result, compare, swap, + remote_addr, rkey, comp); +} + +ucs_status_t uct_rc_verbs_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_verbs_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + ucs_status_t status; + + if (ucs_unlikely(flags & UCT_FLUSH_FLAG_CANCEL)) { + uct_ep_pending_purge(&ep->super.super.super, NULL, 0); + uct_rc_verbs_ep_handle_failure(ep, UCS_ERR_CANCELED); + return UCS_OK; + } + + status = uct_rc_ep_flush(&ep->super, iface->config.tx_max_wr, flags); + if (status != UCS_INPROGRESS) { + return status; + } + + if (uct_rc_txqp_unsignaled(&ep->super.txqp) != 0) { + status = uct_rc_verbs_ep_put_short(tl_ep, NULL, 0, 0, 0); + if (status != UCS_OK) { + return status; + } + } + + return uct_rc_txqp_add_flush_comp(&iface->super, &ep->super.super, + &ep->super.txqp, comp, ep->txcnt.pi); +} + +ucs_status_t uct_rc_verbs_ep_fence(uct_ep_h tl_ep, unsigned flags) +{ + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + + return uct_rc_ep_fence(tl_ep, &ep->fi, 1); +} + +ucs_status_t uct_rc_verbs_ep_fc_ctrl(uct_ep_t *tl_ep, unsigned op, + uct_rc_fc_request_t *req) +{ + struct ibv_send_wr fc_wr; + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_rc_verbs_iface_t); + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + uct_rc_hdr_t *hdr; + struct ibv_sge sge; + int flags; + + if (!iface->fc_desc) { + hdr = &iface->am_inl_hdr.rc_hdr; + hdr->am_id = UCT_RC_EP_FC_PURE_GRANT; + fc_wr.sg_list = iface->inl_sge; + iface->inl_sge[0].addr = (uintptr_t)hdr; + iface->inl_sge[0].length = sizeof(*hdr); + flags = IBV_SEND_INLINE; + } else { + hdr = (uct_rc_hdr_t*)(iface->fc_desc + 1); + sge.addr = (uintptr_t)hdr; + sge.length = sizeof(*hdr); + sge.lkey = iface->fc_desc->lkey; + fc_wr.sg_list = &sge; + flags = 0; + } + + /* In RC only PURE grant is sent as a separate message. Other FC + * messages are bundled with AM. */ + ucs_assert(op == UCT_RC_EP_FC_PURE_GRANT); + + /* Do not check FC WND here to avoid head-to-head deadlock. + * Credits grant should be sent regardless of FC wnd state. */ + UCT_RC_CHECK_RES(&iface->super, &ep->super); + + fc_wr.opcode = IBV_WR_SEND; + fc_wr.next = NULL; + fc_wr.num_sge = 1; + + uct_rc_verbs_ep_post_send(iface, ep, &fc_wr, flags, INT_MAX); + return UCS_OK; +} + +ucs_status_t uct_rc_verbs_ep_handle_failure(uct_rc_verbs_ep_t *ep, + ucs_status_t status) +{ + uct_rc_iface_t *iface = ucs_derived_of(ep->super.super.super.iface, + uct_rc_iface_t); + + iface->tx.cq_available += ep->txcnt.pi - ep->txcnt.ci; + /* Reset CI to prevent cq_available overrun on ep_destroy */ + ep->txcnt.ci = ep->txcnt.pi; + uct_rc_txqp_purge_outstanding(&ep->super.txqp, status, 0); + + return iface->super.ops->set_ep_failed(&iface->super, &ep->super.super.super, + status); +} + +ucs_status_t uct_rc_verbs_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr) +{ + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + uct_rc_ep_address_t *rc_addr = (uct_rc_ep_address_t*)addr; + + uct_ib_pack_uint24(rc_addr->qp_num, ep->qp->qp_num); + return UCS_OK; +} + +ucs_status_t uct_rc_verbs_ep_connect_to_ep(uct_ep_h tl_ep, const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr) +{ + uct_rc_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_rc_verbs_ep_t); + uct_rc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rc_iface_t); + const uct_ib_address_t *ib_addr = (const uct_ib_address_t *)dev_addr; + const uct_rc_ep_address_t *rc_addr = (const uct_rc_ep_address_t*)ep_addr; + uint32_t qp_num; + struct ibv_ah_attr ah_attr; + + uct_ib_iface_fill_ah_attr_from_addr(&iface->super, ib_addr, &ah_attr); + qp_num = uct_ib_unpack_uint24(rc_addr->qp_num); + + return uct_rc_iface_qp_connect(iface, ep->qp, qp_num, &ah_attr); +} + +UCS_CLASS_INIT_FUNC(uct_rc_verbs_ep_t, const uct_ep_params_t *params) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(params->iface, uct_rc_verbs_iface_t); + uct_ib_qp_attr_t attr = {}; + ucs_status_t status; + + status = uct_rc_iface_qp_create(&iface->super, &self->qp, &attr, + iface->super.config.tx_qp_len, iface->srq); + if (status != UCS_OK) { + goto err; + } + + UCS_CLASS_CALL_SUPER_INIT(uct_rc_ep_t, &iface->super, self->qp->qp_num); + + status = uct_rc_iface_qp_init(&iface->super, self->qp); + if (status != UCS_OK) { + goto err_qp_cleanup; + } + + uct_rc_iface_add_qp(&iface->super, &self->super, self->qp->qp_num); + + uct_rc_txqp_available_set(&self->super.txqp, iface->config.tx_max_wr); + uct_rc_verbs_txcnt_init(&self->txcnt); + uct_ib_fence_info_init(&self->fi); + + return UCS_OK; + +err_qp_cleanup: + uct_ib_destroy_qp(self->qp); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rc_verbs_ep_t) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(self->super.super.super.iface, + uct_rc_verbs_iface_t); + + /* NOTE: usually, ci == pi here, but if user calls + * flush(UCT_FLUSH_FLAG_CANCEL) then ep_destroy without next progress, + * TX-completion handler is not able to return CQ credits because + * the EP will not be found (base class destructor deletes itself from + * iface->eps). So, lets return credits here since handle_failure + * ignores not found EP. */ + ucs_assert(self->txcnt.pi >= self->txcnt.ci); + iface->super.tx.cq_available += self->txcnt.pi - self->txcnt.ci; + ucs_assert(iface->super.tx.cq_available < iface->super.config.tx_ops_count); + uct_rc_iface_remove_qp(&iface->super, self->qp->qp_num); + uct_ib_destroy_qp(self->qp); +} + +UCS_CLASS_DEFINE(uct_rc_verbs_ep_t, uct_rc_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_rc_verbs_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_rc_verbs_ep_t, uct_ep_t); diff --git a/src/uct/ib/rc/verbs/rc_verbs_iface.c b/src/uct/ib/rc/verbs/rc_verbs_iface.c new file mode 100644 index 0000000..035c2fb --- /dev/null +++ b/src/uct/ib/rc/verbs/rc_verbs_iface.c @@ -0,0 +1,435 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "rc_verbs.h" +#include "rc_verbs_impl.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static uct_rc_iface_ops_t uct_rc_verbs_iface_ops; + +static ucs_config_field_t uct_rc_verbs_iface_config_table[] = { + {"RC_", "", NULL, + ucs_offsetof(uct_rc_verbs_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_rc_iface_config_table)}, + + {"MAX_AM_HDR", "128", + "Buffer size to reserve for active message headers. If set to 0, the transport will\n" + "not support zero-copy active messages.", + ucs_offsetof(uct_rc_verbs_iface_config_t, max_am_hdr), UCS_CONFIG_TYPE_MEMUNITS}, + + {"TX_MAX_WR", "-1", + "Limits the number of outstanding posted work requests. The actual limit is\n" + "a minimum between this value and the TX queue length. -1 means no limit.", + ucs_offsetof(uct_rc_verbs_iface_config_t, tx_max_wr), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + +static void uct_rc_verbs_handle_failure(uct_ib_iface_t *ib_iface, void *arg, + ucs_status_t status) +{ + struct ibv_wc *wc = arg; + uct_rc_iface_t *iface = ucs_derived_of(ib_iface, uct_rc_iface_t); + ucs_log_level_t log_lvl = UCS_LOG_LEVEL_FATAL; + uct_rc_verbs_ep_t *ep; + + ep = ucs_derived_of(uct_rc_iface_lookup_ep(iface, wc->qp_num), + uct_rc_verbs_ep_t); + if (!ep) { + return; + } + + if (uct_rc_verbs_ep_handle_failure(ep, status) == UCS_OK) { + log_lvl = iface->super.super.config.failure_level; + } + + ucs_log(log_lvl, + "send completion with error: %s qpn 0x%x wrid 0x%lx vendor_err 0x%x", + ibv_wc_status_str(wc->status), wc->qp_num, wc->wr_id, wc->vendor_err); +} + +static ucs_status_t uct_rc_verbs_ep_set_failed(uct_ib_iface_t *iface, + uct_ep_h ep, ucs_status_t status) +{ + return uct_set_ep_failed(&UCS_CLASS_NAME(uct_rc_verbs_ep_t), ep, + &iface->super.super, status); +} + +ucs_status_t uct_rc_verbs_wc_to_ucs_status(enum ibv_wc_status status) +{ + switch (status) + { + case IBV_WC_SUCCESS: + return UCS_OK; + case IBV_WC_RETRY_EXC_ERR: + case IBV_WC_RNR_RETRY_EXC_ERR: + return UCS_ERR_ENDPOINT_TIMEOUT; + default: + return UCS_ERR_IO_ERROR; + } +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_rc_verbs_iface_poll_tx(uct_rc_verbs_iface_t *iface) +{ + uct_rc_verbs_ep_t *ep; + uint16_t count; + int i; + unsigned num_wcs = iface->super.super.config.tx_max_poll; + struct ibv_wc wc[num_wcs]; + ucs_status_t status; + + UCT_RC_VERBS_IFACE_FOREACH_TXWQE(&iface->super, i, wc, num_wcs) { + ep = ucs_derived_of(uct_rc_iface_lookup_ep(&iface->super, wc[i].qp_num), + uct_rc_verbs_ep_t); + if (ucs_unlikely((wc[i].status != IBV_WC_SUCCESS) || (ep == NULL))) { + status = uct_rc_verbs_wc_to_ucs_status(wc[i].status); + iface->super.super.ops->handle_failure(&iface->super.super, &wc[i], + status); + continue; + } + + count = uct_rc_verbs_txcq_get_comp_count(&wc[i], &ep->super.txqp); + ucs_trace_poll("rc_verbs iface %p tx_wc wrid 0x%lx ep %p qpn 0x%x count %d", + iface, wc[i].wr_id, ep, wc[i].qp_num, count); + uct_rc_verbs_txqp_completed(&ep->super.txqp, &ep->txcnt, count); + iface->super.tx.cq_available += count; + + uct_rc_txqp_completion_desc(&ep->super.txqp, ep->txcnt.ci); + ucs_arbiter_group_schedule(&iface->super.tx.arbiter, &ep->super.arb_group); + } + ucs_arbiter_dispatch(&iface->super.tx.arbiter, 1, uct_rc_ep_process_pending, NULL); + return num_wcs; +} + +static unsigned uct_rc_verbs_iface_progress(void *arg) +{ + uct_rc_verbs_iface_t *iface = arg; + unsigned count; + + count = uct_rc_verbs_iface_poll_rx_common(iface); + if (count > 0) { + return count; + } + + return uct_rc_verbs_iface_poll_tx(iface); +} + +static void uct_rc_verbs_iface_init_inl_wrs(uct_rc_verbs_iface_t *iface) +{ + memset(&iface->inl_am_wr, 0, sizeof(iface->inl_am_wr)); + iface->inl_am_wr.sg_list = iface->inl_sge; + iface->inl_am_wr.num_sge = 2; + iface->inl_am_wr.opcode = IBV_WR_SEND; + iface->inl_am_wr.send_flags = IBV_SEND_INLINE; + + memset(&iface->inl_rwrite_wr, 0, sizeof(iface->inl_rwrite_wr)); + iface->inl_rwrite_wr.sg_list = iface->inl_sge; + iface->inl_rwrite_wr.num_sge = 1; + iface->inl_rwrite_wr.opcode = IBV_WR_RDMA_WRITE; + iface->inl_rwrite_wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; +} + +static ucs_status_t uct_rc_verbs_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_verbs_iface_t); + ucs_status_t status; + + status = uct_rc_iface_query(&iface->super, iface_attr, + iface->config.max_inline, + iface->config.max_inline, + iface->config.short_desc_size, + uct_ib_iface_get_max_iov(&iface->super.super) - 1, + uct_ib_iface_get_max_iov(&iface->super.super) - 1, + sizeof(uct_rc_hdr_t)); + if (status != UCS_OK) { + return status; + } + + iface_attr->latency.growth += 1e-9; /* 1 ns per each extra QP */ + iface_attr->overhead = 75e-9; /* Software overhead */ + + return UCS_OK; +} + +static ucs_status_t +uct_rc_iface_verbs_init_rx(uct_rc_iface_t *rc_iface, + const uct_rc_iface_common_config_t *config) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(rc_iface, uct_rc_verbs_iface_t); + + return uct_rc_iface_init_rx(rc_iface, config, &iface->srq); +} + +void uct_rc_iface_verbs_cleanup_rx(uct_rc_iface_t *rc_iface) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(rc_iface, uct_rc_verbs_iface_t); + + /* TODO flush RX buffers */ + uct_ib_destroy_srq(iface->srq); +} + +static UCS_CLASS_INIT_FUNC(uct_rc_verbs_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_rc_verbs_iface_config_t *config = + ucs_derived_of(tl_config, uct_rc_verbs_iface_config_t); + ucs_status_t status; + uct_ib_iface_init_attr_t init_attr = {}; + uct_ib_qp_attr_t attr = {}; + struct ibv_qp *qp; + uct_rc_hdr_t *hdr; + + init_attr.fc_req_size = sizeof(uct_rc_fc_request_t); + init_attr.rx_hdr_len = sizeof(uct_rc_hdr_t); + init_attr.qp_type = IBV_QPT_RC; + init_attr.rx_cq_len = config->super.super.super.rx.queue_len; + init_attr.tx_cq_len = config->super.tx_cq_len; + init_attr.seg_size = config->super.super.super.seg_size; + + UCS_CLASS_CALL_SUPER_INIT(uct_rc_iface_t, &uct_rc_verbs_iface_ops, md, + worker, params, &config->super.super, &init_attr); + + self->config.tx_max_wr = ucs_min(config->tx_max_wr, + self->super.config.tx_qp_len); + self->super.config.tx_moderation = ucs_min(config->super.tx_cq_moderation, + self->config.tx_max_wr / 4); + self->super.config.fence_mode = (uct_rc_fence_mode_t)config->super.super.fence_mode; + self->super.progress = uct_rc_verbs_iface_progress; + + if ((config->super.super.fence_mode == UCT_RC_FENCE_MODE_WEAK) || + (config->super.super.fence_mode == UCT_RC_FENCE_MODE_AUTO)) { + self->super.config.fence_mode = UCT_RC_FENCE_MODE_WEAK; + } else if (config->super.super.fence_mode == UCT_RC_FENCE_MODE_NONE) { + self->super.config.fence_mode = UCT_RC_FENCE_MODE_NONE; + } else if (config->super.super.fence_mode == UCT_RC_FENCE_MODE_STRONG) { + /* TODO: for now strong fence mode is not supported by verbs */ + ucs_error("fence mode 'strong' is not supported by verbs"); + status = UCS_ERR_INVALID_PARAM; + goto err; + } else { + ucs_error("incorrect fence value: %d", self->super.config.fence_mode); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + memset(self->inl_sge, 0, sizeof(self->inl_sge)); + uct_rc_am_hdr_fill(&self->am_inl_hdr.rc_hdr, 0); + + /* Configuration */ + self->config.short_desc_size = ucs_max(sizeof(uct_rc_hdr_t), + config->max_am_hdr); + self->config.short_desc_size = ucs_max(UCT_IB_MAX_ATOMIC_SIZE, + self->config.short_desc_size); + + /* Create AM headers and Atomic mempool */ + status = uct_iface_mpool_init(&self->super.super.super, + &self->short_desc_mp, + sizeof(uct_rc_iface_send_desc_t) + + self->config.short_desc_size, + sizeof(uct_rc_iface_send_desc_t), + UCS_SYS_CACHE_LINE_SIZE, + &config->super.super.super.tx.mp, + self->super.config.tx_qp_len, + uct_rc_iface_send_desc_init, + "rc_verbs_short_desc"); + if (status != UCS_OK) { + goto err; + } + + uct_rc_verbs_iface_init_inl_wrs(self); + + /* Check FC parameters correctness */ + status = uct_rc_init_fc_thresh(&config->super, &self->super); + if (status != UCS_OK) { + goto err_common_cleanup; + } + + /* Create a dummy QP in order to find out max_inline */ + uct_ib_exp_qp_fill_attr(&self->super.super, &attr); + status = uct_rc_iface_qp_create(&self->super, &qp, &attr, + self->super.config.tx_qp_len, + self->srq); + if (status != UCS_OK) { + goto err_common_cleanup; + } + uct_ib_destroy_qp(qp); + + self->config.max_inline = attr.cap.max_inline_data; + uct_ib_iface_set_max_iov(&self->super.super, attr.cap.max_send_sge); + + if (self->config.max_inline < sizeof(*hdr)) { + self->fc_desc = ucs_mpool_get(&self->short_desc_mp); + ucs_assert_always(self->fc_desc != NULL); + hdr = (uct_rc_hdr_t*)(self->fc_desc + 1); + hdr->am_id = UCT_RC_EP_FC_PURE_GRANT; + } else { + self->fc_desc = NULL; + } + + return UCS_OK; + +err_common_cleanup: + ucs_mpool_cleanup(&self->short_desc_mp, 1); +err: + return status; +} + +ucs_status_t uct_rc_verbs_iface_common_prepost_recvs(uct_rc_verbs_iface_t *iface, + unsigned max) +{ + unsigned count; + + count = ucs_min(max, iface->super.rx.srq.quota); + iface->super.rx.srq.available += count; + iface->super.rx.srq.quota -= count; + while (iface->super.rx.srq.available > 0) { + if (uct_rc_verbs_iface_post_recv_common(iface, 1) == 0) { + ucs_error("failed to post receives"); + return UCS_ERR_NO_MEMORY; + } + } + return UCS_OK; +} + +void uct_rc_verbs_iface_common_progress_enable(uct_iface_h tl_iface, unsigned flags) +{ + uct_rc_verbs_iface_t *iface = ucs_derived_of(tl_iface, uct_rc_verbs_iface_t); + + if (flags & UCT_PROGRESS_RECV) { + /* ignore return value from prepost_recv, since it's not really possible + * to handle here, and some receives were already pre-posted during iface + * creation anyway. + */ + uct_rc_verbs_iface_common_prepost_recvs(iface, UINT_MAX); + } + + uct_base_iface_progress_enable_cb(&iface->super.super.super, + iface->super.progress, + flags); +} + +unsigned uct_rc_verbs_iface_post_recv_always(uct_rc_verbs_iface_t *iface, unsigned max) +{ + struct ibv_recv_wr *bad_wr; + uct_ib_recv_wr_t *wrs; + unsigned count; + int ret; + + wrs = ucs_alloca(sizeof *wrs * max); + + count = uct_ib_iface_prepare_rx_wrs(&iface->super.super, &iface->super.rx.mp, + wrs, max); + if (ucs_unlikely(count == 0)) { + return 0; + } + + ret = ibv_post_srq_recv(iface->srq, &wrs[0].ibwr, &bad_wr); + if (ret != 0) { + ucs_fatal("ibv_post_srq_recv() returned %d: %m", ret); + } + iface->super.rx.srq.available -= count; + + return count; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rc_verbs_iface_t) +{ + uct_base_iface_progress_disable(&self->super.super.super.super, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + if (self->fc_desc != NULL) { + ucs_mpool_put(self->fc_desc); + } + ucs_mpool_cleanup(&self->short_desc_mp, 1); +} + +UCS_CLASS_DEFINE(uct_rc_verbs_iface_t, uct_rc_iface_t); +static UCS_CLASS_DEFINE_NEW_FUNC(uct_rc_verbs_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_rc_verbs_iface_t, uct_iface_t); + +static uct_rc_iface_ops_t uct_rc_verbs_iface_ops = { + { + { + .ep_am_short = uct_rc_verbs_ep_am_short, + .ep_am_bcopy = uct_rc_verbs_ep_am_bcopy, + .ep_am_zcopy = uct_rc_verbs_ep_am_zcopy, + .ep_put_short = uct_rc_verbs_ep_put_short, + .ep_put_bcopy = uct_rc_verbs_ep_put_bcopy, + .ep_put_zcopy = uct_rc_verbs_ep_put_zcopy, + .ep_get_bcopy = uct_rc_verbs_ep_get_bcopy, + .ep_get_zcopy = uct_rc_verbs_ep_get_zcopy, + .ep_atomic_cswap64 = uct_rc_verbs_ep_atomic_cswap64, + .ep_atomic64_post = uct_rc_verbs_ep_atomic64_post, + .ep_atomic64_fetch = uct_rc_verbs_ep_atomic64_fetch, + .ep_atomic_cswap32 = (uct_ep_atomic_cswap32_func_t)ucs_empty_function_return_unsupported, + .ep_atomic32_post = (uct_ep_atomic32_post_func_t)ucs_empty_function_return_unsupported, + .ep_atomic32_fetch = (uct_ep_atomic32_fetch_func_t)ucs_empty_function_return_unsupported, + .ep_pending_add = uct_rc_ep_pending_add, + .ep_pending_purge = uct_rc_ep_pending_purge, + .ep_flush = uct_rc_verbs_ep_flush, + .ep_fence = uct_rc_verbs_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_rc_verbs_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_rc_verbs_ep_t), + .ep_get_address = uct_rc_verbs_ep_get_address, + .ep_connect_to_ep = uct_rc_verbs_ep_connect_to_ep, + .iface_flush = uct_rc_iface_flush, + .iface_fence = uct_rc_iface_fence, + .iface_progress_enable = uct_rc_verbs_iface_common_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = uct_rc_iface_do_progress, + .iface_event_fd_get = uct_ib_iface_event_fd_get, + .iface_event_arm = uct_rc_iface_event_arm, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_rc_verbs_iface_t), + .iface_query = uct_rc_verbs_iface_query, + .iface_get_address = ucs_empty_function_return_success, + .iface_get_device_address = uct_ib_iface_get_device_address, + .iface_is_reachable = uct_ib_iface_is_reachable, + }, + .create_cq = uct_ib_verbs_create_cq, + .arm_cq = uct_ib_iface_arm_cq, + .event_cq = (uct_ib_iface_event_cq_func_t)ucs_empty_function, + .handle_failure = uct_rc_verbs_handle_failure, + .set_ep_failed = uct_rc_verbs_ep_set_failed, + }, + .init_rx = uct_rc_iface_verbs_init_rx, + .cleanup_rx = uct_rc_iface_verbs_cleanup_rx, + .fc_ctrl = uct_rc_verbs_ep_fc_ctrl, + .fc_handler = uct_rc_iface_fc_handler +}; + +static ucs_status_t +uct_rc_verbs_query_tl_devices(uct_md_h md, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + uct_ib_md_t *ib_md = ucs_derived_of(md, uct_ib_md_t); + int flags; + + flags = ib_md->config.eth_pause ? 0 : UCT_IB_DEVICE_FLAG_LINK_IB; + return uct_ib_device_query_ports(&ib_md->dev, flags, tl_devices_p, + num_tl_devices_p); +} + +UCT_TL_DEFINE(&uct_ib_component, rc_verbs, uct_rc_verbs_query_tl_devices, + uct_rc_verbs_iface_t, "RC_VERBS_", uct_rc_verbs_iface_config_table, + uct_rc_verbs_iface_config_t); diff --git a/src/uct/ib/rc/verbs/rc_verbs_impl.h b/src/uct/ib/rc/verbs/rc_verbs_impl.h new file mode 100644 index 0000000..efe038e --- /dev/null +++ b/src/uct/ib/rc/verbs/rc_verbs_impl.h @@ -0,0 +1,211 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_RC_VERBS_IMPL_H +#define UCT_RC_VERBS_IMPL_H + +#include + +#include +#include + +ucs_status_t uct_rc_verbs_wc_to_ucs_status(enum ibv_wc_status status); + +static inline void +uct_rc_verbs_txqp_posted(uct_rc_txqp_t *txqp, uct_rc_verbs_txcnt_t *txcnt, + uct_rc_iface_t *iface, int signaled) +{ + txcnt->pi++; + uct_rc_txqp_posted(txqp, iface, 1, signaled); +} + +static inline void +uct_rc_verbs_txqp_completed(uct_rc_txqp_t *txqp, uct_rc_verbs_txcnt_t *txcnt, uint16_t count) +{ + txcnt->ci += count; + uct_rc_txqp_available_add(txqp, count); +} + +ucs_status_t uct_rc_verbs_iface_common_prepost_recvs(uct_rc_verbs_iface_t *iface, + unsigned max); + +void uct_rc_verbs_iface_common_progress_enable(uct_iface_h tl_iface, unsigned flags); + +unsigned uct_rc_verbs_iface_post_recv_always(uct_rc_verbs_iface_t *iface, unsigned max); + +static inline unsigned uct_rc_verbs_iface_post_recv_common(uct_rc_verbs_iface_t *iface, + int fill) +{ + unsigned batch = iface->super.super.config.rx_max_batch; + unsigned count; + + if (iface->super.rx.srq.available < batch) { + if (ucs_likely(fill == 0)) { + return 0; + } else { + count = iface->super.rx.srq.available; + } + } else { + count = batch; + } + return uct_rc_verbs_iface_post_recv_always(iface, count); +} + + +/* TODO: think of a better name */ +static inline int +uct_rc_verbs_txcq_get_comp_count(struct ibv_wc *wc, uct_rc_txqp_t *txqp) +{ + uint16_t count = 1; + + if (ucs_likely(wc->wr_id != RC_UNSIGNALED_INF)) { + return wc->wr_id + 1; + } + + ucs_assert(txqp->unsignaled_store != RC_UNSIGNALED_INF); + ucs_assert(txqp->unsignaled_store_count != 0); + + txqp->unsignaled_store_count--; + if (txqp->unsignaled_store_count == 0) { + count += txqp->unsignaled_store; + txqp->unsignaled_store = 0; + } + + return count; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_verbs_iface_handle_am(uct_rc_iface_t *iface, uct_rc_hdr_t *hdr, + uint64_t wr_id, uint32_t qp_num, uint32_t length, + uint32_t imm_data, uint32_t slid) +{ + uct_ib_iface_recv_desc_t *desc; + uct_rc_iface_ops_t *rc_ops; + ucs_status_t status; + void *udesc; + + desc = (uct_ib_iface_recv_desc_t *)wr_id; + if (ucs_unlikely(hdr->am_id & UCT_RC_EP_FC_MASK)) { + rc_ops = ucs_derived_of(iface->super.ops, uct_rc_iface_ops_t); + status = rc_ops->fc_handler(iface, qp_num, hdr, length - sizeof(*hdr), + imm_data, slid, UCT_CB_PARAM_FLAG_DESC); + } else { + status = uct_iface_invoke_am(&iface->super.super, hdr->am_id, hdr + 1, + length - sizeof(*hdr), UCT_CB_PARAM_FLAG_DESC); + } + if (ucs_likely(status == UCS_OK)) { + ucs_mpool_put_inline(desc); + } else { + udesc = (char*)desc + iface->super.config.rx_headroom_offset; + uct_recv_desc(udesc) = &iface->super.release_desc; + } +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_rc_verbs_iface_poll_rx_common(uct_rc_verbs_iface_t *iface) +{ + uct_rc_hdr_t *hdr; + unsigned i; + ucs_status_t status; + unsigned num_wcs = iface->super.super.config.rx_max_poll; + struct ibv_wc wc[num_wcs]; + + status = uct_ib_poll_cq(iface->super.super.cq[UCT_IB_DIR_RX], &num_wcs, wc); + if (status != UCS_OK) { + num_wcs = 0; + goto out; + } + + UCT_IB_IFACE_VERBS_FOREACH_RXWQE(&iface->super.super, i, hdr, wc, num_wcs) { + uct_ib_log_recv_completion(&iface->super.super, &wc[i], hdr, wc[i].byte_len, + uct_rc_ep_packet_dump); + uct_rc_verbs_iface_handle_am(&iface->super, hdr, wc[i].wr_id, wc[i].qp_num, + wc[i].byte_len, wc[i].imm_data, wc[i].slid); + } + iface->super.rx.srq.available += num_wcs; + UCS_STATS_UPDATE_COUNTER(iface->super.stats, UCT_RC_IFACE_STAT_RX_COMPLETION, num_wcs); + +out: + uct_rc_verbs_iface_post_recv_common(iface, 0); + return num_wcs; +} + +static UCS_F_ALWAYS_INLINE void +uct_rc_verbs_iface_fill_inl_sge(uct_rc_verbs_iface_t *iface, const void *addr0, + unsigned len0, const void* addr1, unsigned len1) +{ + iface->inl_sge[0].addr = (uintptr_t)addr0; + iface->inl_sge[0].length = len0; + iface->inl_sge[1].addr = (uintptr_t)addr1; + iface->inl_sge[1].length = len1; +} + +static inline void +uct_rc_verbs_iface_fill_inl_am_sge(uct_rc_verbs_iface_t *iface, + uint8_t id, uint64_t hdr, + const void *buffer, unsigned length) +{ + uct_rc_am_short_hdr_t *am = &iface->am_inl_hdr; + am->rc_hdr.am_id = id; + am->am_hdr = hdr; + uct_rc_verbs_iface_fill_inl_sge(iface, am, sizeof(*am), buffer, length); +} + +#define UCT_RC_VERBS_FILL_SGE(_wr, _sge, _length) \ + _wr.sg_list = &_sge; \ + _wr.num_sge = 1; \ + _sge.length = _length; + +#define UCT_RC_VERBS_FILL_INL_PUT_WR(_iface, _raddr, _rkey, _buf, _len) \ + _iface->inl_rwrite_wr.wr.rdma.remote_addr = _raddr; \ + _iface->inl_rwrite_wr.wr.rdma.rkey = uct_ib_md_direct_rkey(_rkey); \ + _iface->inl_sge[0].addr = (uintptr_t)_buf; \ + _iface->inl_sge[0].length = _len; + +#define UCT_RC_VERBS_FILL_AM_BCOPY_WR(_wr, _sge, _length, _wr_opcode) \ + UCT_RC_VERBS_FILL_SGE(_wr, _sge, _length) \ + _wr_opcode = (typeof(_wr_opcode))IBV_WR_SEND; + +#define UCT_RC_VERBS_FILL_AM_ZCOPY_WR_IOV(_wr, _sge, _iovlen, _wr_opcode) \ + _wr.sg_list = _sge; \ + _wr.num_sge = _iovlen; \ + _wr_opcode = (typeof(_wr_opcode))IBV_WR_SEND; + +#define UCT_RC_VERBS_FILL_RDMA_WR(_wr, _wr_opcode, _opcode, \ + _sge, _length, _raddr, _rkey) \ + UCT_RC_VERBS_FILL_SGE(_wr, _sge, _length) \ + _wr.wr.rdma.remote_addr = _raddr; \ + _wr.wr.rdma.rkey = uct_ib_md_direct_rkey(_rkey); \ + _wr_opcode = _opcode; \ + +#define UCT_RC_VERBS_FILL_RDMA_WR_IOV(_wr, _wr_opcode, _opcode, _sge, _sgelen, \ + _raddr, _rkey) \ + _wr.wr.rdma.remote_addr = _raddr; \ + _wr.wr.rdma.rkey = uct_ib_md_direct_rkey(_rkey); \ + _wr.sg_list = _sge; \ + _wr.num_sge = _sgelen; \ + _wr_opcode = _opcode; + +#define UCT_RC_VERBS_FILL_DESC_WR(_wr, _desc) \ + { \ + struct ibv_sge *sge; \ + (_wr)->next = NULL; \ + sge = (_wr)->sg_list; \ + sge->addr = (uintptr_t)(desc + 1); \ + sge->lkey = (_desc)->lkey; \ + } + +#define UCT_RC_VERBS_FILL_ATOMIC_WR(_wr, _wr_opcode, _sge, _opcode, \ + _compare_add, _swap, _remote_addr, _rkey) \ + UCT_RC_VERBS_FILL_SGE(_wr, _sge, sizeof(uint64_t)) \ + _wr_opcode = _opcode; \ + _wr.wr.atomic.compare_add = _compare_add; \ + _wr.wr.atomic.swap = _swap; \ + _wr.wr.atomic.remote_addr = _remote_addr; \ + _wr.wr.atomic.rkey = _rkey; \ + + +#endif diff --git a/src/uct/ib/rdmacm/Makefile.am b/src/uct/ib/rdmacm/Makefile.am new file mode 100644 index 0000000..0e4ad24 --- /dev/null +++ b/src/uct/ib/rdmacm/Makefile.am @@ -0,0 +1,43 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_RDMACM + +# rdmacm is under IB, but it's actually a uct module, because it defines its own +# memory domain component +module_LTLIBRARIES = libuct_rdmacm.la +libuct_rdmacm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(IBVERBS_CPPFLAGS) $(RDMACM_CPPFLAGS) +libuct_rdmacm_la_CFLAGS = $(BASE_CFLAGS) +libuct_rdmacm_la_LIBADD = $(RDMACM_LIBS) $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la \ + $(top_builddir)/src/uct/ib/libuct_ib.la +libuct_rdmacm_la_LDFLAGS = $(IBVERBS_LDFLAGS) $(RDMACM_LDFLAGS) -version-info $(SOVERSION) + +noinst_HEADERS = \ + rdmacm_md.h \ + rdmacm_iface.h \ + rdmacm_ep.h \ + rdmacm_def.h + +libuct_rdmacm_la_SOURCES = \ + rdmacm_md.c \ + rdmacm_iface.c \ + rdmacm_ep.c + +if HAVE_RDMACM_QP_LESS +noinst_HEADERS += \ + rdmacm_cm.h \ + rdmacm_listener.h \ + rdmacm_cm_ep.h + +libuct_rdmacm_la_SOURCES += \ + rdmacm_cm.c \ + rdmacm_listener.c \ + rdmacm_cm_ep.c +endif # HAVE_RDMACM_QP_LESS + +include $(top_srcdir)/config/module.am + +endif # HAVE_RDMACM diff --git a/src/uct/ib/rdmacm/Makefile.in b/src/uct/ib/rdmacm/Makefile.in new file mode 100644 index 0000000..217107d --- /dev/null +++ b/src/uct/ib/rdmacm/Makefile.in @@ -0,0 +1,936 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@am__append_1 = \ +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@ rdmacm_cm.h \ +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@ rdmacm_listener.h \ +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@ rdmacm_cm_ep.h + +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@am__append_2 = \ +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@ rdmacm_cm.c \ +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@ rdmacm_listener.c \ +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@ rdmacm_cm_ep.c + +subdir = src/uct/ib/rdmacm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +am__DEPENDENCIES_1 = +@HAVE_RDMACM_TRUE@libuct_rdmacm_la_DEPENDENCIES = \ +@HAVE_RDMACM_TRUE@ $(am__DEPENDENCIES_1) \ +@HAVE_RDMACM_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_RDMACM_TRUE@ $(top_builddir)/src/uct/libuct.la \ +@HAVE_RDMACM_TRUE@ $(top_builddir)/src/uct/ib/libuct_ib.la +am__libuct_rdmacm_la_SOURCES_DIST = rdmacm_md.c rdmacm_iface.c \ + rdmacm_ep.c rdmacm_cm.c rdmacm_listener.c rdmacm_cm_ep.c +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@am__objects_1 = libuct_rdmacm_la-rdmacm_cm.lo \ +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@ libuct_rdmacm_la-rdmacm_listener.lo \ +@HAVE_RDMACM_QP_LESS_TRUE@@HAVE_RDMACM_TRUE@ libuct_rdmacm_la-rdmacm_cm_ep.lo +@HAVE_RDMACM_TRUE@am_libuct_rdmacm_la_OBJECTS = \ +@HAVE_RDMACM_TRUE@ libuct_rdmacm_la-rdmacm_md.lo \ +@HAVE_RDMACM_TRUE@ libuct_rdmacm_la-rdmacm_iface.lo \ +@HAVE_RDMACM_TRUE@ libuct_rdmacm_la-rdmacm_ep.lo \ +@HAVE_RDMACM_TRUE@ $(am__objects_1) +libuct_rdmacm_la_OBJECTS = $(am_libuct_rdmacm_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_rdmacm_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) \ + $(libuct_rdmacm_la_LDFLAGS) $(LDFLAGS) -o $@ +@HAVE_RDMACM_TRUE@am_libuct_rdmacm_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_cm.Plo \ + ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_cm_ep.Plo \ + ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_ep.Plo \ + ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_iface.Plo \ + ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_listener.Plo \ + ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_md.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_rdmacm_la_SOURCES) +DIST_SOURCES = $(am__libuct_rdmacm_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = rdmacm_md.h rdmacm_iface.h rdmacm_ep.h \ + rdmacm_def.h rdmacm_cm.h rdmacm_listener.h rdmacm_cm_ep.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ + +# rdmacm is under IB, but it's actually a uct module, because it defines its own +# memory domain component +@HAVE_RDMACM_TRUE@module_LTLIBRARIES = libuct_rdmacm.la +@HAVE_RDMACM_TRUE@libuct_rdmacm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(IBVERBS_CPPFLAGS) $(RDMACM_CPPFLAGS) +@HAVE_RDMACM_TRUE@libuct_rdmacm_la_CFLAGS = $(BASE_CFLAGS) +@HAVE_RDMACM_TRUE@libuct_rdmacm_la_LIBADD = $(RDMACM_LIBS) $(top_builddir)/src/ucs/libucs.la \ +@HAVE_RDMACM_TRUE@ $(top_builddir)/src/uct/libuct.la \ +@HAVE_RDMACM_TRUE@ $(top_builddir)/src/uct/ib/libuct_ib.la + +@HAVE_RDMACM_TRUE@libuct_rdmacm_la_LDFLAGS = $(IBVERBS_LDFLAGS) $(RDMACM_LDFLAGS) -version-info $(SOVERSION) +@HAVE_RDMACM_TRUE@noinst_HEADERS = rdmacm_md.h rdmacm_iface.h \ +@HAVE_RDMACM_TRUE@ rdmacm_ep.h rdmacm_def.h $(am__append_1) +@HAVE_RDMACM_TRUE@libuct_rdmacm_la_SOURCES = rdmacm_md.c \ +@HAVE_RDMACM_TRUE@ rdmacm_iface.c rdmacm_ep.c $(am__append_2) + +# Automake silent rules +@HAVE_RDMACM_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_RDMACM_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_RDMACM_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_RDMACM_TRUE@AM_V_LN_1 = true +@HAVE_RDMACM_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/ib/rdmacm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/ib/rdmacm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libuct_rdmacm.la: $(libuct_rdmacm_la_OBJECTS) $(libuct_rdmacm_la_DEPENDENCIES) $(EXTRA_libuct_rdmacm_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_rdmacm_la_LINK) $(am_libuct_rdmacm_la_rpath) $(libuct_rdmacm_la_OBJECTS) $(libuct_rdmacm_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_rdmacm_la-rdmacm_cm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_rdmacm_la-rdmacm_cm_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_rdmacm_la-rdmacm_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_rdmacm_la-rdmacm_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_rdmacm_la-rdmacm_listener.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_rdmacm_la-rdmacm_md.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libuct_rdmacm_la-rdmacm_md.lo: rdmacm_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -MT libuct_rdmacm_la-rdmacm_md.lo -MD -MP -MF $(DEPDIR)/libuct_rdmacm_la-rdmacm_md.Tpo -c -o libuct_rdmacm_la-rdmacm_md.lo `test -f 'rdmacm_md.c' || echo '$(srcdir)/'`rdmacm_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_rdmacm_la-rdmacm_md.Tpo $(DEPDIR)/libuct_rdmacm_la-rdmacm_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdmacm_md.c' object='libuct_rdmacm_la-rdmacm_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -c -o libuct_rdmacm_la-rdmacm_md.lo `test -f 'rdmacm_md.c' || echo '$(srcdir)/'`rdmacm_md.c + +libuct_rdmacm_la-rdmacm_iface.lo: rdmacm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -MT libuct_rdmacm_la-rdmacm_iface.lo -MD -MP -MF $(DEPDIR)/libuct_rdmacm_la-rdmacm_iface.Tpo -c -o libuct_rdmacm_la-rdmacm_iface.lo `test -f 'rdmacm_iface.c' || echo '$(srcdir)/'`rdmacm_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_rdmacm_la-rdmacm_iface.Tpo $(DEPDIR)/libuct_rdmacm_la-rdmacm_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdmacm_iface.c' object='libuct_rdmacm_la-rdmacm_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -c -o libuct_rdmacm_la-rdmacm_iface.lo `test -f 'rdmacm_iface.c' || echo '$(srcdir)/'`rdmacm_iface.c + +libuct_rdmacm_la-rdmacm_ep.lo: rdmacm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -MT libuct_rdmacm_la-rdmacm_ep.lo -MD -MP -MF $(DEPDIR)/libuct_rdmacm_la-rdmacm_ep.Tpo -c -o libuct_rdmacm_la-rdmacm_ep.lo `test -f 'rdmacm_ep.c' || echo '$(srcdir)/'`rdmacm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_rdmacm_la-rdmacm_ep.Tpo $(DEPDIR)/libuct_rdmacm_la-rdmacm_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdmacm_ep.c' object='libuct_rdmacm_la-rdmacm_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -c -o libuct_rdmacm_la-rdmacm_ep.lo `test -f 'rdmacm_ep.c' || echo '$(srcdir)/'`rdmacm_ep.c + +libuct_rdmacm_la-rdmacm_cm.lo: rdmacm_cm.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -MT libuct_rdmacm_la-rdmacm_cm.lo -MD -MP -MF $(DEPDIR)/libuct_rdmacm_la-rdmacm_cm.Tpo -c -o libuct_rdmacm_la-rdmacm_cm.lo `test -f 'rdmacm_cm.c' || echo '$(srcdir)/'`rdmacm_cm.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_rdmacm_la-rdmacm_cm.Tpo $(DEPDIR)/libuct_rdmacm_la-rdmacm_cm.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdmacm_cm.c' object='libuct_rdmacm_la-rdmacm_cm.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -c -o libuct_rdmacm_la-rdmacm_cm.lo `test -f 'rdmacm_cm.c' || echo '$(srcdir)/'`rdmacm_cm.c + +libuct_rdmacm_la-rdmacm_listener.lo: rdmacm_listener.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -MT libuct_rdmacm_la-rdmacm_listener.lo -MD -MP -MF $(DEPDIR)/libuct_rdmacm_la-rdmacm_listener.Tpo -c -o libuct_rdmacm_la-rdmacm_listener.lo `test -f 'rdmacm_listener.c' || echo '$(srcdir)/'`rdmacm_listener.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_rdmacm_la-rdmacm_listener.Tpo $(DEPDIR)/libuct_rdmacm_la-rdmacm_listener.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdmacm_listener.c' object='libuct_rdmacm_la-rdmacm_listener.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -c -o libuct_rdmacm_la-rdmacm_listener.lo `test -f 'rdmacm_listener.c' || echo '$(srcdir)/'`rdmacm_listener.c + +libuct_rdmacm_la-rdmacm_cm_ep.lo: rdmacm_cm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -MT libuct_rdmacm_la-rdmacm_cm_ep.lo -MD -MP -MF $(DEPDIR)/libuct_rdmacm_la-rdmacm_cm_ep.Tpo -c -o libuct_rdmacm_la-rdmacm_cm_ep.lo `test -f 'rdmacm_cm_ep.c' || echo '$(srcdir)/'`rdmacm_cm_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_rdmacm_la-rdmacm_cm_ep.Tpo $(DEPDIR)/libuct_rdmacm_la-rdmacm_cm_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdmacm_cm_ep.c' object='libuct_rdmacm_la-rdmacm_cm_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rdmacm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rdmacm_la_CFLAGS) $(CFLAGS) -c -o libuct_rdmacm_la-rdmacm_cm_ep.lo `test -f 'rdmacm_cm_ep.c' || echo '$(srcdir)/'`rdmacm_cm_ep.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_RDMACM_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_cm.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_cm_ep.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_ep.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_iface.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_listener.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_md.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_cm.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_cm_ep.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_ep.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_iface.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_listener.Plo + -rm -f ./$(DEPDIR)/libuct_rdmacm_la-rdmacm_md.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_RDMACM_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_RDMACM_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_RDMACM_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_RDMACM_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_RDMACM_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_RDMACM_TRUE@ done +@HAVE_RDMACM_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_RDMACM_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_RDMACM_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/ib/rdmacm/configure.m4 b/src/uct/ib/rdmacm/configure.m4 new file mode 100644 index 0000000..35f078f --- /dev/null +++ b/src/uct/ib/rdmacm/configure.m4 @@ -0,0 +1,63 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# Check for RDMACM support +# +rdmacm_happy="no" +rdmacm_qp_less_happy="no" +AC_ARG_WITH([rdmacm], + [AS_HELP_STRING([--with-rdmacm=(DIR)], [Enable the use of RDMACM (default is guess).])], + [], [with_rdmacm=guess]) + +AS_IF([test "x$with_rdmacm" != xno], + [AS_IF([test "x$with_rdmacm" = xguess -o "x$with_rdmacm" = xyes -o "x$with_rdmacm" = x], + [ucx_check_rdmacm_dir=/usr], + [ucx_check_rdmacm_dir=$with_rdmacm]) + + AS_IF([test -d "$ucx_check_rdmacm_dir/lib64"],[libsuff="64"],[libsuff=""]) + save_LDFLAGS="$LDFLAGS" + save_CPPFLAGS="$CPPFLAGS" + + AS_IF([test "$ucx_check_rdmacm_dir" != /usr], + [ + LDFLAGS="-L$ucx_check_rdmacm_dir/lib$libsuff $LDFLAGS" + CPPFLAGS="-I$ucx_check_rdmacm_dir/include $CPPFLAGS"]) + + AC_CHECK_HEADER([$ucx_check_rdmacm_dir/include/rdma/rdma_cma.h], + [ + AC_CHECK_LIB([rdmacm], [rdma_create_id], + [uct_modules="${uct_modules}:rdmacm" + rdmacm_happy="yes" + AS_IF([test "$ucx_check_rdmacm_dir" != /usr], + [ + AC_SUBST(RDMACM_CPPFLAGS, ["-I$ucx_check_rdmacm_dir/include"]) + AC_SUBST(RDMACM_LDFLAGS, ["-L$ucx_check_rdmacm_dir/lib$libsuff"])]) + AC_SUBST(RDMACM_LIBS, [-lrdmacm]) + # QP less support + AC_CHECK_DECLS([rdma_establish, rdma_init_qp_attr], + [rdmacm_qp_less_happy="yes" + AC_DEFINE([HAVE_RDMACM_QP_LESS], 1, [RDMACM QP less support])], + [], [#include <$ucx_check_rdmacm_dir/include/rdma/rdma_cma.h>]) + ], + [AC_MSG_WARN([RDMACM requested but librdmacm is not found]) + AC_MSG_ERROR([Please install librdmacm and librdmacm-devel or disable rdmacm support]) + ]) + ], + [ + AS_IF([test "x$with_rdmacm" != xguess], + [AC_MSG_ERROR([RDMACM requested but required file (rdma/rdma_cma.h) could not be found in $ucx_check_rdmacm_dir])], + [AC_MSG_WARN([RDMACM requested but required file (rdma/rdma_cma.h) could not be found in $ucx_check_rdmacm_dir])]) + ]) + + LDFLAGS="$save_LDFLAGS" + CPPFLAGS="$save_CPPFLAGS" + ] +) + +AM_CONDITIONAL([HAVE_RDMACM], [test "x$rdmacm_happy" != xno]) +AM_CONDITIONAL([HAVE_RDMACM_QP_LESS], [test "x$rdmacm_qp_less_happy" != xno]) +AC_CONFIG_FILES([src/uct/ib/rdmacm/Makefile]) diff --git a/src/uct/ib/rdmacm/rdmacm_cm.c b/src/uct/ib/rdmacm/rdmacm_cm.c new file mode 100644 index 0000000..9bca1d4 --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_cm.c @@ -0,0 +1,548 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" /* Defines HAVE_RDMACM_QP_LESS */ +#endif + +#include "rdmacm_cm_ep.h" +#include +#include + +#include +#include + + +ucs_status_t uct_rdmacm_cm_destroy_id(struct rdma_cm_id *id) +{ + ucs_trace("destroying cm_id %p", id); + + if (rdma_destroy_id(id)) { + ucs_warn("rdma_destroy_id() failed: %m"); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +ucs_status_t uct_rdmacm_cm_ack_event(struct rdma_cm_event *event) +{ + ucs_trace("ack event %p, cm_id %p", event, event->id); + + if (rdma_ack_cm_event(event)) { + ucs_warn("rdma_ack_cm_event failed on event %s: %m", + rdma_event_str(event->event)); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +ucs_status_t uct_rdmacm_cm_reject(struct rdma_cm_id *id) +{ + ucs_trace("reject on cm_id %p", id); + + if (rdma_reject(id, NULL, 0)) { + ucs_error("rdma_reject (id=%p) failed with error: %m", id); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +size_t uct_rdmacm_cm_get_max_conn_priv() +{ + return UCT_RDMACM_TCP_PRIV_DATA_LEN - sizeof(uct_rdmacm_priv_data_hdr_t); +} + +static ucs_status_t uct_rdmacm_cm_query(uct_cm_h cm, uct_cm_attr_t *cm_attr) +{ + if (cm_attr->field_mask & UCT_CM_ATTR_FIELD_MAX_CONN_PRIV) { + cm_attr->max_conn_priv = uct_rdmacm_cm_get_max_conn_priv(); + } + return UCS_OK; +} + +static void uct_rdmacm_cm_handle_event_addr_resolved(struct rdma_cm_event *event) +{ + struct sockaddr *remote_addr = rdma_get_peer_addr(event->id); + uct_rdmacm_cm_ep_t *cep = (uct_rdmacm_cm_ep_t *)event->id->context; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + uct_cm_remote_data_t remote_data; + + ucs_assert(event->id == cep->id); + + ucs_trace("%s: rdma_resolve_route on cm_id %p", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + event->id); + + if (rdma_resolve_route(event->id, 1000 /* TODO */)) { + ucs_error("%s: rdma_resolve_route(to addr=%s) failed: %m", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + ucs_sockaddr_str(remote_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + remote_data.field_mask = 0; + uct_rdmacm_cm_ep_set_failed(cep, &remote_data, UCS_ERR_IO_ERROR); + } +} + +static void uct_rdmacm_cm_handle_event_route_resolved(struct rdma_cm_event *event) +{ + struct sockaddr *remote_addr = rdma_get_peer_addr(event->id); + uct_rdmacm_cm_ep_t *cep = (uct_rdmacm_cm_ep_t *)event->id->context; + uct_cm_remote_data_t remote_data; + ucs_status_t status; + struct rdma_conn_param conn_param; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + + ucs_assert(event->id == cep->id); + + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.private_data = ucs_alloca(uct_rdmacm_cm_get_max_conn_priv() + + sizeof(uct_rdmacm_priv_data_hdr_t)); + + status = uct_rdmacm_cm_ep_conn_param_init(cep, &conn_param); + if (status != UCS_OK) { + remote_data.field_mask = 0; + uct_rdmacm_cm_ep_set_failed(cep, &remote_data, status); + return; + } + + ucs_trace("%s: rdma_connect, cm_id %p", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), cep->id); + + if (rdma_connect(cep->id, &conn_param)) { + ucs_error("%s: rdma_connect(to addr=%s) failed: %m", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + ucs_sockaddr_str(remote_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + remote_data.field_mask = 0; + uct_rdmacm_cm_ep_set_failed(cep, &remote_data, UCS_ERR_IO_ERROR); + } +} + +static ucs_status_t uct_rdmacm_cm_id_to_dev_addr(struct rdma_cm_id *cm_id, + uct_device_addr_t **dev_addr_p, + size_t *dev_addr_len_p) +{ + struct ibv_port_attr port_attr; + uct_ib_address_t *dev_addr; + struct ibv_qp_attr qp_attr; + size_t addr_length; + int qp_attr_mask; + char dev_name[UCT_DEVICE_NAME_MAX]; + unsigned address_pack_flags; + + /* get the qp attributes in order to modify the qp state. + * the ah_attr fields from them are required to extract the device address + * of the remote peer. + */ + qp_attr.qp_state = IBV_QPS_RTR; + if (rdma_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask)) { + ucs_error("rdma_init_qp_attr (id=%p, qp_state=%d) failed: %m", + cm_id, qp_attr.qp_state); + return UCS_ERR_IO_ERROR; + } + + if (ibv_query_port(cm_id->verbs, cm_id->port_num, &port_attr)) { + uct_rdmacm_cm_id_to_dev_name(cm_id, dev_name); + ucs_error("ibv_query_port (%s) failed: %m", dev_name); + return UCS_ERR_IO_ERROR; + } + + if (IBV_PORT_IS_LINK_LAYER_ETHERNET(&port_attr)) { + /* Ethernet address */ + ucs_assert(qp_attr.ah_attr.is_global); + address_pack_flags = UCT_IB_ADDRESS_PACK_FLAG_ETH; + } else if (qp_attr.ah_attr.is_global) { + /* IB global address */ + address_pack_flags = UCT_IB_ADDRESS_PACK_FLAG_INTERFACE_ID | + UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX; + } else { + /* IB local address - need just LID */ + address_pack_flags = 0; + } + + addr_length = uct_ib_address_size(&qp_attr.ah_attr.grh.dgid, + address_pack_flags); + + dev_addr = ucs_malloc(addr_length, "IB device address"); + if (dev_addr == NULL) { + ucs_error("failed to allocate IB device address"); + return UCS_ERR_NO_MEMORY; + } + + uct_ib_address_pack(&qp_attr.ah_attr.grh.dgid, qp_attr.ah_attr.dlid, + address_pack_flags, dev_addr); + + *dev_addr_p = (uct_device_addr_t *)dev_addr; + *dev_addr_len_p = addr_length; + return UCS_OK; +} + +static void uct_rdmacm_cm_handle_event_connect_request(struct rdma_cm_event *event) +{ + uct_rdmacm_priv_data_hdr_t *hdr = (uct_rdmacm_priv_data_hdr_t *) + event->param.conn.private_data; + uct_rdmacm_listener_t *listener = event->listen_id->context; + char dev_name[UCT_DEVICE_NAME_MAX]; + uct_device_addr_t *dev_addr; + size_t addr_length; + uct_cm_remote_data_t remote_data; + ucs_status_t status; + + ucs_assert(hdr->status == UCS_OK); + + uct_rdmacm_cm_id_to_dev_name(event->id, dev_name); + + status = uct_rdmacm_cm_id_to_dev_addr(event->id, &dev_addr, &addr_length); + if (status != UCS_OK) { + uct_rdmacm_cm_reject(event->id); + uct_rdmacm_cm_destroy_id(event->id); + return; + } + + remote_data.field_mask = UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR | + UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR_LENGTH | + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA | + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA_LENGTH; + remote_data.dev_addr = dev_addr; + remote_data.dev_addr_length = addr_length; + remote_data.conn_priv_data = hdr + 1; + remote_data.conn_priv_data_length = hdr->length; + + listener->conn_request_cb(&listener->super, listener->user_data, + dev_name, event, &remote_data); + ucs_free(dev_addr); +} + +static void uct_rdmacm_cm_handle_event_connect_response(struct rdma_cm_event *event) +{ + struct sockaddr *remote_addr = rdma_get_peer_addr(event->id); + uct_rdmacm_priv_data_hdr_t *hdr = (uct_rdmacm_priv_data_hdr_t *) + event->param.conn.private_data; + uct_rdmacm_cm_ep_t *cep = event->id->context; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + uct_device_addr_t *dev_addr; + size_t addr_length; + uct_cm_remote_data_t remote_data; + ucs_status_t status; + + ucs_assert(event->id == cep->id); + + /* Do not notify user on disconnected EP, RDMACM out of order case */ + if (cep->flags & UCT_RDMACM_CM_EP_GOT_DISCONNECT) { + return; + } + + remote_data.field_mask = UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA | + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA_LENGTH; + remote_data.conn_priv_data = hdr + 1; + remote_data.conn_priv_data_length = hdr->length; + + status = uct_rdmacm_cm_id_to_dev_addr(event->id, &dev_addr, &addr_length); + if (status != UCS_OK) { + ucs_error("client (ep=%p id=%p) failed to process a connect response " + "from server %s.", cep, event->id, + ucs_sockaddr_str(remote_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + uct_rdmacm_cm_ep_set_failed(cep, &remote_data, status); + /* notify remote side about local error */ + rdma_disconnect(cep->id); + return; + } + + remote_data.field_mask |= UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR | + UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR_LENGTH; + remote_data.dev_addr = dev_addr; + remote_data.dev_addr_length = addr_length; + + uct_rdmacm_cm_ep_client_connect_cb(cep, &remote_data, + (ucs_status_t)hdr->status); + ucs_free(dev_addr); + + if (rdma_establish(event->id)) { + ucs_error("rdma_establish on ep %p (to server addr=%s) failed: %m", + cep, ucs_sockaddr_str(remote_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + uct_rdmacm_cm_ep_set_failed(cep, &remote_data, UCS_ERR_IO_ERROR); + } +} + +static void uct_rdmacm_cm_handle_event_established(struct rdma_cm_event *event) +{ + uct_rdmacm_cm_ep_t *cep = event->id->context; + + ucs_assert(event->id == cep->id); + /* do not call connect callback again, RDMACM out of order case */ + if (cep->flags & UCT_RDMACM_CM_EP_GOT_DISCONNECT) { + return; + } + + uct_rdmacm_cm_ep_server_connect_cb(cep, UCS_OK); +} + +static void uct_rdmacm_cm_handle_event_disconnected(struct rdma_cm_event *event) +{ + uct_rdmacm_cm_ep_t *cep = event->id->context; + struct sockaddr *remote_addr = rdma_get_peer_addr(event->id); + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + uct_cm_remote_data_t remote_data; + + ucs_debug("%s: got disconnect event, status %d peer %s", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + event->status, ucs_sockaddr_str(remote_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + + cep->flags |= UCT_RDMACM_CM_EP_GOT_DISCONNECT; + /* calling error_cb instead of disconnect CB directly handles out-of-order + * disconnect event prior connect_response/connect_established event */ + remote_data.field_mask = 0; + uct_rdmacm_cm_ep_error_cb(cep, &remote_data, UCS_ERR_CONNECTION_RESET); +} + +static void uct_rdmacm_cm_handle_error_event(struct rdma_cm_event *event) +{ + uct_rdmacm_cm_ep_t *cep = event->id->context; + struct sockaddr *remote_addr = rdma_get_peer_addr(event->id); + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + uct_cm_remote_data_t remote_data; + ucs_log_level_t log_level; + ucs_status_t status; + + if (event->event == RDMA_CM_EVENT_REJECTED) { + if (cep->flags & UCT_RDMACM_CM_EP_ON_SERVER) { + /* response was rejected by the client in the middle of + * connection establishment, so report connection reset */ + status = UCS_ERR_CONNECTION_RESET; + } else { + ucs_assert(cep->flags & UCT_RDMACM_CM_EP_ON_CLIENT); + status = UCS_ERR_REJECTED; + } + + log_level = UCS_LOG_LEVEL_DEBUG; + } else { + status = UCS_ERR_IO_ERROR; + log_level = UCS_LOG_LEVEL_ERROR; + } + + ucs_log(log_level, "%s: got error event %s, status %d peer %s", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + rdma_event_str(event->event), event->status, + ucs_sockaddr_str(remote_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + + remote_data.field_mask = 0; + uct_rdmacm_cm_ep_set_failed(cep, &remote_data, status); +} + +static void +uct_rdmacm_cm_process_event(uct_rdmacm_cm_t *cm, struct rdma_cm_event *event) +{ + struct sockaddr *remote_addr = rdma_get_peer_addr(event->id); + uint8_t ack_event = 1; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + + ucs_trace("rdmacm event (fd=%d cm_id %p cm %p event_channel %p): %s. Peer: %s.", + cm->ev_ch->fd, event->id, cm, cm->ev_ch, rdma_event_str(event->event), + ucs_sockaddr_str(remote_addr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); + + /* The following applies for rdma_cm_id of type RDMA_PS_TCP only */ + ucs_assert(event->id->ps == RDMA_PS_TCP); + + /* Using https://linux.die.net/man/3/rdma_get_cm_event to distinguish + * between client and server events */ + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + /* Client side event */ + uct_rdmacm_cm_handle_event_addr_resolved(event); + break; + case RDMA_CM_EVENT_ROUTE_RESOLVED: + /* Client side event */ + uct_rdmacm_cm_handle_event_route_resolved(event); + break; + case RDMA_CM_EVENT_CONNECT_REQUEST: + /* Server side event */ + uct_rdmacm_cm_handle_event_connect_request(event); + /* The server will ack the event after accepting/rejecting the request + * (in ep_create). */ + ack_event = 0; + break; + case RDMA_CM_EVENT_CONNECT_RESPONSE: + /* Client side event */ + uct_rdmacm_cm_handle_event_connect_response(event); + break; + case RDMA_CM_EVENT_ESTABLISHED: + /* Server side event */ + uct_rdmacm_cm_handle_event_established(event); + break; + case RDMA_CM_EVENT_DISCONNECTED: + /* Client and Server side event */ + uct_rdmacm_cm_handle_event_disconnected(event); + break; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + /* This event is generated when the QP associated with the connection + * has exited its timewait state and is now ready to be re-used. + * After a QP has been disconnected, it is maintained in a timewait + * state to allow any in flight packets to exit the network. + * After the timewait state has completed, the rdma_cm will report this event.*/ + break; + /* client error events */ + case RDMA_CM_EVENT_REJECTED: + case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_ADDR_CHANGE: + /* client and server error events */ + case RDMA_CM_EVENT_CONNECT_ERROR: + uct_rdmacm_cm_handle_error_event(event); + break; + default: + ucs_warn("unexpected RDMACM event: %s", rdma_event_str(event->event)); + break; + } + + if (ack_event) { + uct_rdmacm_cm_ack_event(event); + } +} + +static void uct_rdmacm_cm_event_handler(int fd, void *arg) +{ + uct_rdmacm_cm_t *cm = (uct_rdmacm_cm_t *)arg; + struct rdma_cm_event *event; + int ret; + + for (;;) { + /* Fetch an event */ + ret = rdma_get_cm_event(cm->ev_ch, &event); + if (ret) { + /* EAGAIN (in a non-blocking rdma_get_cm_event) means that + * there are no more events */ + if ((errno != EAGAIN) && (errno != EINTR)) { + ucs_warn("rdma_get_cm_event() failed: %m"); + } + + return; + } + + UCS_ASYNC_BLOCK(uct_rdmacm_cm_get_async(cm)); + uct_rdmacm_cm_process_event(cm, event); + UCS_ASYNC_UNBLOCK(uct_rdmacm_cm_get_async(cm)); + } +} + +static uct_cm_ops_t uct_rdmacm_cm_ops = { + .close = UCS_CLASS_DELETE_FUNC_NAME(uct_rdmacm_cm_t), + .cm_query = uct_rdmacm_cm_query, + .listener_create = UCS_CLASS_NEW_FUNC_NAME(uct_rdmacm_listener_t), + .listener_reject = uct_rdmacm_listener_reject, + .listener_query = uct_rdmacm_listener_query, + .listener_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_rdmacm_listener_t), + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_rdmacm_cm_ep_t) +}; + +static uct_iface_ops_t uct_rdmacm_cm_iface_ops = { + .ep_pending_purge = ucs_empty_function, + .ep_disconnect = uct_rdmacm_cm_ep_disconnect, + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_rdmacm_cm_ep_t), + .ep_put_short = (uct_ep_put_short_func_t)ucs_empty_function_return_unsupported, + .ep_put_bcopy = (uct_ep_put_bcopy_func_t)ucs_empty_function_return_unsupported, + .ep_get_bcopy = (uct_ep_get_bcopy_func_t)ucs_empty_function_return_unsupported, + .ep_am_short = (uct_ep_am_short_func_t)ucs_empty_function_return_unsupported, + .ep_am_bcopy = (uct_ep_am_bcopy_func_t)ucs_empty_function_return_unsupported, + .ep_atomic_cswap64 = (uct_ep_atomic_cswap64_func_t)ucs_empty_function_return_unsupported, + .ep_atomic64_post = (uct_ep_atomic64_post_func_t)ucs_empty_function_return_unsupported, + .ep_atomic64_fetch = (uct_ep_atomic64_fetch_func_t)ucs_empty_function_return_unsupported, + .ep_atomic_cswap32 = (uct_ep_atomic_cswap32_func_t)ucs_empty_function_return_unsupported, + .ep_atomic32_post = (uct_ep_atomic32_post_func_t)ucs_empty_function_return_unsupported, + .ep_atomic32_fetch = (uct_ep_atomic32_fetch_func_t)ucs_empty_function_return_unsupported, + .ep_pending_add = (uct_ep_pending_add_func_t)ucs_empty_function_return_unsupported, + .ep_flush = (uct_ep_flush_func_t)ucs_empty_function_return_success, + .ep_fence = (uct_ep_fence_func_t)ucs_empty_function_return_unsupported, + .ep_check = (uct_ep_check_func_t)ucs_empty_function_return_unsupported, + .ep_create = (uct_ep_create_func_t)ucs_empty_function_return_unsupported, + .iface_flush = (uct_iface_flush_func_t)ucs_empty_function_return_unsupported, + .iface_fence = (uct_iface_fence_func_t)ucs_empty_function_return_unsupported, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = (uct_iface_progress_func_t)ucs_empty_function_return_zero, + .iface_event_fd_get = (uct_iface_event_fd_get_func_t)ucs_empty_function_return_unsupported, + .iface_event_arm = (uct_iface_event_arm_func_t)ucs_empty_function_return_unsupported, + .iface_close = ucs_empty_function, + .iface_query = (uct_iface_query_func_t)ucs_empty_function_return_unsupported, + .iface_get_device_address = (uct_iface_get_device_address_func_t)ucs_empty_function_return_unsupported, + .iface_get_address = (uct_iface_get_address_func_t)ucs_empty_function_return_unsupported, + .iface_is_reachable = (uct_iface_is_reachable_func_t)ucs_empty_function_return_zero +}; + +UCS_CLASS_INIT_FUNC(uct_rdmacm_cm_t, uct_component_h component, + uct_worker_h worker, const uct_cm_config_t *config) +{ + uct_priv_worker_t *worker_priv; + ucs_status_t status; + + UCS_CLASS_CALL_SUPER_INIT(uct_cm_t, &uct_rdmacm_cm_ops, + &uct_rdmacm_cm_iface_ops, worker, component); + + self->ev_ch = rdma_create_event_channel(); + if (self->ev_ch == NULL) { + ucs_error("rdma_create_event_channel failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err; + } + + /* Set the event_channel fd to non-blocking mode + * (so that rdma_get_cm_event won't be blocking) */ + status = ucs_sys_fcntl_modfl(self->ev_ch->fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + status = UCS_ERR_IO_ERROR; + goto err_destroy_ev_ch; + } + + worker_priv = ucs_derived_of(worker, uct_priv_worker_t); + status = ucs_async_set_event_handler(worker_priv->async->mode, + self->ev_ch->fd, UCS_EVENT_SET_EVREAD, + uct_rdmacm_cm_event_handler, self, + worker_priv->async); + if (status != UCS_OK) { + goto err_destroy_ev_ch; + } + + ucs_debug("created rdmacm_cm %p with event_channel %p (fd=%d)", + self, self->ev_ch, self->ev_ch->fd); + + return UCS_OK; + +err_destroy_ev_ch: + rdma_destroy_event_channel(self->ev_ch); +err: + return status; +} + +UCS_CLASS_CLEANUP_FUNC(uct_rdmacm_cm_t) +{ + ucs_status_t status; + + status = ucs_async_remove_handler(self->ev_ch->fd, 1); + if (status != UCS_OK) { + ucs_warn("failed to remove event handler for fd %d: %s", + self->ev_ch->fd, ucs_status_string(status)); + } + + ucs_trace("destroying event_channel %p on cm %p", self->ev_ch, self); + rdma_destroy_event_channel(self->ev_ch); +} + +UCS_CLASS_DEFINE(uct_rdmacm_cm_t, uct_cm_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_rdmacm_cm_t, uct_cm_t, uct_component_h, + uct_worker_h, const uct_cm_config_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_rdmacm_cm_t, uct_cm_t); diff --git a/src/uct/ib/rdmacm/rdmacm_cm.h b/src/uct/ib/rdmacm/rdmacm_cm.h new file mode 100644 index 0000000..6a23671 --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_cm.h @@ -0,0 +1,41 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_RDMACM_CM_H +#define UCT_RDMACM_CM_H + +#include +#include "rdmacm_def.h" + + +/** + * An rdmacm connection manager + */ +typedef struct uct_rdmacm_cm { + uct_cm_t super; + struct rdma_event_channel *ev_ch; +} uct_rdmacm_cm_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_rdmacm_cm_t, uct_cm_t, uct_component_h, + uct_worker_h, const uct_cm_config_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_rdmacm_cm_t, uct_cm_t); + +static UCS_F_ALWAYS_INLINE ucs_async_context_t * +uct_rdmacm_cm_get_async(uct_rdmacm_cm_t *cm) +{ + uct_priv_worker_t *wpriv = ucs_derived_of(cm->super.iface.worker, + uct_priv_worker_t); + + return wpriv->async; +} + +ucs_status_t uct_rdmacm_cm_destroy_id(struct rdma_cm_id *id); + +ucs_status_t uct_rdmacm_cm_ack_event(struct rdma_cm_event *event); + +ucs_status_t uct_rdmacm_cm_reject(struct rdma_cm_id *id); + +#endif diff --git a/src/uct/ib/rdmacm/rdmacm_cm_ep.c b/src/uct/ib/rdmacm/rdmacm_cm_ep.c new file mode 100644 index 0000000..639524d --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_cm_ep.c @@ -0,0 +1,473 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "rdmacm_cm_ep.h" +#include "rdmacm_cm.h" +#include + + +const char* uct_rdmacm_cm_ep_str(uct_rdmacm_cm_ep_t *cep, char *str, + size_t max_len) +{ + char flags_buf[UCT_RDMACM_EP_FLAGS_STRING_LEN]; + + static const char *ep_flag_to_str[] = { + [ucs_ilog2(UCT_RDMACM_CM_EP_ON_CLIENT)] = "client", + [ucs_ilog2(UCT_RDMACM_CM_EP_ON_SERVER)] = "server", + [ucs_ilog2(UCT_RDMACM_CM_EP_CONN_CB_INVOKED)] = "connect_cb_invoked", + [ucs_ilog2(UCT_RDMACM_CM_EP_GOT_DISCONNECT)] = "got_disconnect", + [ucs_ilog2(UCT_RDMACM_CM_EP_DISCONNECTING)] = "disconnecting", + [ucs_ilog2(UCT_RDMACM_CM_EP_FAILED)] = "failed", + NULL + }; + + ucs_flags_str(flags_buf, sizeof(flags_buf), cep->flags, ep_flag_to_str); + ucs_snprintf_safe(str, max_len, "rdmacm_ep %p, status %s, flags %s", + cep, ucs_status_string(cep->status), flags_buf); + return str; +} + +void uct_rdmacm_cm_ep_client_connect_cb(uct_rdmacm_cm_ep_t *cep, + uct_cm_remote_data_t *remote_data, + ucs_status_t status) +{ + cep->flags |= UCT_RDMACM_CM_EP_CONN_CB_INVOKED; + uct_cm_ep_client_connect_cb(&cep->super, remote_data, status); +} + +void uct_rdmacm_cm_ep_server_connect_cb(uct_rdmacm_cm_ep_t *cep, + ucs_status_t status) +{ + cep->flags |= UCT_RDMACM_CM_EP_CONN_CB_INVOKED; + uct_cm_ep_server_connect_cb(&cep->super, status); +} + +void uct_rdmacm_cm_ep_error_cb(uct_rdmacm_cm_ep_t *cep, + uct_cm_remote_data_t *remote_data, + ucs_status_t status) +{ + if (cep->flags & UCT_RDMACM_CM_EP_FAILED) { + return; + } + + ucs_assert(status != UCS_OK); + cep->status = status; + + if (cep->flags & UCT_RDMACM_CM_EP_CONN_CB_INVOKED) { + /* already connected, so call disconnect callback */ + cep->super.disconnect_cb(&cep->super.super.super, cep->super.user_data); + } else if (cep->flags & UCT_RDMACM_CM_EP_ON_CLIENT) { + /* not connected yet, so call client side connect callback with err + * status */ + uct_rdmacm_cm_ep_client_connect_cb(cep, remote_data, status); + } else { + ucs_assert(cep->flags & UCT_RDMACM_CM_EP_ON_SERVER); + /* not connected yet, so call server side connect callback with err + * status */ + uct_rdmacm_cm_ep_server_connect_cb(cep, status); + } +} + +void uct_rdmacm_cm_ep_set_failed(uct_rdmacm_cm_ep_t *cep, + uct_cm_remote_data_t *remote_data, + ucs_status_t status) +{ + uct_rdmacm_cm_ep_error_cb(cep, remote_data, status); + cep->flags |= UCT_RDMACM_CM_EP_FAILED; +} + +static UCS_F_ALWAYS_INLINE +uct_rdmacm_cm_t *uct_rdmacm_cm_ep_get_cm(uct_rdmacm_cm_ep_t *cep) +{ + /* return the rdmacm connection manager this ep is using */ + return ucs_container_of(cep->super.super.super.iface, uct_rdmacm_cm_t, + super.iface); +} + +static UCS_F_ALWAYS_INLINE +ucs_async_context_t *uct_rdmacm_cm_ep_get_async(uct_rdmacm_cm_ep_t *cep) +{ + return uct_rdmacm_cm_get_async(uct_rdmacm_cm_ep_get_cm(cep)); +} + +static void uct_rdmacm_cm_ep_destroy_dummy_cq_qp(uct_rdmacm_cm_ep_t *cep) +{ + int ret; + + if (cep->qp != NULL) { + ret = ibv_destroy_qp(cep->qp); + if (ret != 0) { + ucs_warn("ibv_destroy_qp() returned %d: %m", ret); + } + } + + if (cep->cq != NULL) { + ret = ibv_destroy_cq(cep->cq); + if (ret != 0) { + ucs_warn("ibv_destroy_cq() returned %d: %m", ret); + } + } + + cep->qp = NULL; + cep->cq = NULL; +} + +static ucs_status_t uct_rdmacm_cm_create_dummy_cq_qp(struct rdma_cm_id *id, + struct ibv_cq **cq_p, + struct ibv_qp **qp_p) +{ + struct ibv_qp_init_attr qp_init_attr; + ucs_status_t status; + struct ibv_cq *cq; + struct ibv_qp *qp; + + /* Create a dummy completion queue */ + cq = ibv_create_cq(id->verbs, 1, NULL, NULL, 0); + if (cq == NULL) { + ucs_error("ibv_create_cq() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err; + } + + /* Create a dummy UD qp */ + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); + qp_init_attr.send_cq = cq; + qp_init_attr.recv_cq = cq; + qp_init_attr.qp_type = IBV_QPT_UD; + qp_init_attr.cap.max_send_wr = 2; + qp_init_attr.cap.max_recv_wr = 2; + qp_init_attr.cap.max_send_sge = 1; + qp_init_attr.cap.max_recv_sge = 1; + + qp = ibv_create_qp(id->pd, &qp_init_attr); + if (qp == NULL) { + ucs_error("failed to create a dummy ud qp. %m"); + status = UCS_ERR_IO_ERROR; + goto err_destroy_cq; + } + + ucs_debug("created ud QP %p with qp_num: 0x%x and cq %p on rdmacm_id %p", + qp, qp->qp_num, cq, id); + + *cq_p = cq; + *qp_p = qp; + + return UCS_OK; + +err_destroy_cq: + ibv_destroy_cq(cq); +err: + return status; +} + +ucs_status_t +uct_rdamcm_cm_ep_set_qp_num(struct rdma_conn_param *conn_param, + uct_rdmacm_cm_ep_t *cep) +{ + ucs_status_t status; + struct ibv_qp *qp; + struct ibv_cq *cq; + + /* create a dummy qp in order to get a unique qp_num to provide to librdmacm */ + status = uct_rdmacm_cm_create_dummy_cq_qp(cep->id, &cq, &qp); + if (status != UCS_OK) { + return status; + } + + cep->cq = cq; + cep->qp = qp; + conn_param->qp_num = qp->qp_num; + return UCS_OK; +} + +ucs_status_t uct_rdmacm_cm_ep_conn_param_init(uct_rdmacm_cm_ep_t *cep, + struct rdma_conn_param *conn_param) +{ + uct_rdmacm_priv_data_hdr_t *hdr; + ucs_status_t status; + char dev_name[UCT_DEVICE_NAME_MAX]; + ssize_t priv_data_ret; + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + + uct_rdmacm_cm_id_to_dev_name(cep->id, dev_name); + + /* Pack data to send inside rdmacm's conn_param to the remote peer */ + hdr = (uct_rdmacm_priv_data_hdr_t*)conn_param->private_data; + priv_data_ret = cep->super.priv_pack_cb(cep->super.user_data, + dev_name, hdr + 1); + + if (priv_data_ret < 0) { + ucs_assert(priv_data_ret > UCS_ERR_LAST); + status = (ucs_status_t)priv_data_ret; + ucs_error("%s: rdma_cm private data pack function failed with error: %s", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + ucs_status_string(status)); + + goto err; + } else if (priv_data_ret > uct_rdmacm_cm_get_max_conn_priv()) { + status = UCS_ERR_EXCEEDS_LIMIT; + ucs_error("%s: rdma_cm private data pack function returned %zd " + "(max: %zu)", uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + priv_data_ret, uct_rdmacm_cm_get_max_conn_priv()); + goto err; + } + + ucs_assert_always(priv_data_ret <= UINT8_MAX); + hdr->length = (uint8_t)priv_data_ret; + hdr->status = UCS_OK; + + status = uct_rdamcm_cm_ep_set_qp_num(conn_param, cep); + if (status != UCS_OK) { + goto err; + } + + conn_param->private_data_len = sizeof(*hdr) + hdr->length; + + return UCS_OK; + +err: + return status; +} + +static ucs_status_t uct_rdamcm_cm_ep_client_init(uct_rdmacm_cm_ep_t *cep, + const uct_ep_params_t *params) +{ + uct_rdmacm_cm_t *rdmacm_cm = uct_rdmacm_cm_ep_get_cm(cep); + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + ucs_status_t status; + + cep->flags |= UCT_RDMACM_CM_EP_ON_CLIENT; + cep->super.client.connect_cb = params->sockaddr_connect_cb.client; + + ucs_trace("%s: rdma_create_id on client (rdmacm %p, event_channel=%p)", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + rdmacm_cm, rdmacm_cm->ev_ch); + + if (rdma_create_id(rdmacm_cm->ev_ch, &cep->id, cep, RDMA_PS_TCP)) { + ucs_error("rdma_create_id() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err; + } + + /* rdma_resolve_addr needs to be called last in the ep_create flow to + * prevent a race where there are uninitialized fields used when the + * RDMA_CM_EVENT_ROUTE_RESOLVED event is already received in the the async + * thread. Therefore, all ep fields have to be initialized before this + * function is called. */ + ucs_trace("%s: rdma_resolve_addr on cm_id %p", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), cep->id); + if (rdma_resolve_addr(cep->id, NULL, (struct sockaddr *)params->sockaddr->addr, + 1000/* TODO */)) { + ucs_error("rdma_resolve_addr() to dst addr %s failed: %m", + ucs_sockaddr_str((struct sockaddr *)params->sockaddr->addr, + ip_port_str, UCS_SOCKADDR_STRING_LEN)); + status = UCS_ERR_IO_ERROR; + goto err_destroy_id; + } + + return UCS_OK; + +err_destroy_id: + uct_rdmacm_cm_destroy_id(cep->id); +err: + return status; +} + +static ucs_status_t uct_rdamcm_cm_ep_server_init(uct_rdmacm_cm_ep_t *cep, + const uct_ep_params_t *params) +{ + struct rdma_cm_event *event = (struct rdma_cm_event *)params->conn_request; + uct_rdmacm_cm_t *cm = uct_rdmacm_cm_ep_get_cm(cep); + struct rdma_conn_param conn_param; + ucs_status_t status; + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + uct_cm_remote_data_t remote_data; + + cep->flags |= UCT_RDMACM_CM_EP_ON_SERVER; + + if (event->listen_id->channel != cm->ev_ch) { + /* the server will open the ep to the client on a different CM. + * not the one on which its listener is listening on */ + if (rdma_migrate_id(event->id, cm->ev_ch)) { + ucs_error("failed to migrate id %p to event_channel %p (cm=%p)", + event->id, cm->ev_ch, cm); + uct_rdmacm_cm_reject(event->id); + status = UCS_ERR_IO_ERROR; + goto err_server_cb; + } + + ucs_debug("%s: migrated id %p from event_channel=%p to " + "new cm %p (event_channel=%p)", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + event->id, event->listen_id->channel, cm, cm->ev_ch); + } + + cep->super.server.connect_cb = params->sockaddr_connect_cb.server; + cep->id = event->id; + cep->id->context = cep; + + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.private_data = ucs_alloca(uct_rdmacm_cm_get_max_conn_priv() + + sizeof(uct_rdmacm_priv_data_hdr_t)); + + status = uct_rdmacm_cm_ep_conn_param_init(cep, &conn_param); + if (status != UCS_OK) { + uct_rdmacm_cm_reject(event->id); + goto err_server_cb; + } + + ucs_trace("%s: rdma_accept on cm_id %p", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + event->id); + + if (rdma_accept(event->id, &conn_param)) { + ucs_error("rdma_accept(on id=%p) failed: %m", event->id); + uct_rdmacm_cm_ep_destroy_dummy_cq_qp(cep); + status = UCS_ERR_IO_ERROR; + goto err_server_cb; + } + + uct_rdmacm_cm_ack_event(event); + return UCS_OK; + +err_server_cb: + remote_data.field_mask = 0; + uct_rdmacm_cm_ep_set_failed(cep, &remote_data, status); + uct_rdmacm_cm_destroy_id(event->id); + uct_rdmacm_cm_ack_event(event); + return status; +} + +ucs_status_t uct_rdmacm_cm_ep_disconnect(uct_ep_h ep, unsigned flags) +{ + uct_rdmacm_cm_ep_t *cep = ucs_derived_of(ep, uct_rdmacm_cm_ep_t); + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + + UCS_ASYNC_BLOCK(uct_rdmacm_cm_ep_get_async(cep)); + if (ucs_unlikely(cep->flags & UCT_RDMACM_CM_EP_FAILED)) { + ucs_error("%s: id=%p to peer %s", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + cep->id, ucs_sockaddr_str(rdma_get_peer_addr(cep->id), + ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + status = cep->status; + goto out; + } + + if (ucs_unlikely(cep->flags & UCT_RDMACM_CM_EP_DISCONNECTING)) { + if (cep->flags & UCT_RDMACM_CM_EP_GOT_DISCONNECT) { + ucs_error("%s: duplicate call of uct_ep_disconnect on a " + "disconnected ep (id=%p to peer %s)", + uct_rdmacm_cm_ep_str(cep, ep_str, + UCT_RDMACM_EP_STRING_LEN), + cep->id, ucs_sockaddr_str(rdma_get_peer_addr(cep->id), + ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + status = UCS_ERR_NOT_CONNECTED; + goto out; + } + + ucs_debug("%s: duplicate call of uct_ep_disconnect on an ep " + "that was not disconnected yet (id=%p to peer %s).", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + cep->id, ucs_sockaddr_str(rdma_get_peer_addr(cep->id), + ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + status = UCS_INPROGRESS; + goto out; + } + + if (!(cep->flags & UCT_RDMACM_CM_EP_CONN_CB_INVOKED)) { + ucs_debug("%s: calling uct_ep_disconnect on an ep that is not " + "connected yet (id=%p to peer %s)", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + cep->id, ucs_sockaddr_str(rdma_get_peer_addr(cep->id), + ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + status = UCS_ERR_BUSY; + goto out; + } + + cep->flags |= UCT_RDMACM_CM_EP_DISCONNECTING; + if (rdma_disconnect(cep->id)) { + ucs_error("%s: (id=%p) failed to disconnect from peer %p", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + cep->id, ucs_sockaddr_str(rdma_get_peer_addr(cep->id), ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + status = UCS_ERR_IO_ERROR; + goto out; + } + + ucs_debug("%s: (id=%p) disconnecting from peer :%s", + uct_rdmacm_cm_ep_str(cep, ep_str, UCT_RDMACM_EP_STRING_LEN), + cep->id, ucs_sockaddr_str(rdma_get_peer_addr(cep->id), ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + status = UCS_OK; + +out: + UCS_ASYNC_UNBLOCK(uct_rdmacm_cm_ep_get_async(cep)); + return status; +} + +UCS_CLASS_INIT_FUNC(uct_rdmacm_cm_ep_t, const uct_ep_params_t *params) +{ + ucs_status_t status; + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + + UCS_CLASS_CALL_SUPER_INIT(uct_cm_base_ep_t, params); + + self->cq = NULL; + self->qp = NULL; + self->flags = 0; + self->status = UCS_OK; + + if (params->field_mask & UCT_EP_PARAM_FIELD_SOCKADDR) { + status = uct_rdamcm_cm_ep_client_init(self, params); + } else if (params->field_mask & UCT_EP_PARAM_FIELD_CONN_REQUEST) { + status = uct_rdamcm_cm_ep_server_init(self, params); + } else { + ucs_error("either UCT_EP_PARAM_FIELD_SOCKADDR or UCT_EP_PARAM_FIELD_CONN_REQUEST " + "has to be provided"); + status = UCS_ERR_INVALID_PARAM; + } + + if (status == UCS_OK) { + ucs_debug("%s: created an endpoint on rdmacm %p id: %p", + uct_rdmacm_cm_ep_str(self, ep_str, UCT_RDMACM_EP_STRING_LEN), + uct_rdmacm_cm_ep_get_cm(self), self->id); + } + + return status; +} + +UCS_CLASS_CLEANUP_FUNC(uct_rdmacm_cm_ep_t) +{ + uct_rdmacm_cm_t *rdmacm_cm = uct_rdmacm_cm_ep_get_cm(self); + uct_priv_worker_t *worker_priv = ucs_derived_of(rdmacm_cm->super.iface.worker, + uct_priv_worker_t); + char ep_str[UCT_RDMACM_EP_STRING_LEN]; + + ucs_trace("%s: destroy ep on cm %p (worker_priv=%p)", + uct_rdmacm_cm_ep_str(self, ep_str, UCT_RDMACM_EP_STRING_LEN), + rdmacm_cm, worker_priv); + + UCS_ASYNC_BLOCK(worker_priv->async); + + uct_rdmacm_cm_ep_destroy_dummy_cq_qp(self); + + /* rdma_destroy_id() cleans all events not yet reported on progress thread, + * so no events would be reported to the user after destroying the id */ + uct_rdmacm_cm_destroy_id(self->id); + + UCS_ASYNC_UNBLOCK(worker_priv->async); +} + +UCS_CLASS_DEFINE(uct_rdmacm_cm_ep_t, uct_base_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_rdmacm_cm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_rdmacm_cm_ep_t, uct_ep_t); diff --git a/src/uct/ib/rdmacm/rdmacm_cm_ep.h b/src/uct/ib/rdmacm/rdmacm_cm_ep.h new file mode 100644 index 0000000..e3357bb --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_cm_ep.h @@ -0,0 +1,63 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "rdmacm_listener.h" + + +/** + * RDMACM endpoint that is opened on a connection manager + */ +typedef struct uct_rdmacm_cm_ep { + uct_cm_base_ep_t super; + struct rdma_cm_id *id; /* The rdmacm id that is created per this ep */ + struct ibv_cq *cq; /* Dummy cq used for creating a dummy qp */ + struct ibv_qp *qp; /* Dummy qp used for generating a unique qp_num */ + uint8_t flags; + ucs_status_t status; +} uct_rdmacm_cm_ep_t; + +enum { + UCT_RDMACM_CM_EP_ON_CLIENT = UCS_BIT(0), + UCT_RDMACM_CM_EP_ON_SERVER = UCS_BIT(1), + UCT_RDMACM_CM_EP_CONN_CB_INVOKED = UCS_BIT(2), /* Connect callback was + invoked. */ + UCT_RDMACM_CM_EP_GOT_DISCONNECT = UCS_BIT(3), /* Got disconnect event. */ + UCT_RDMACM_CM_EP_DISCONNECTING = UCS_BIT(4), /* @ref uct_ep_disconnect was + called on the ep. */ + UCT_RDMACM_CM_EP_FAILED = UCS_BIT(5) /* The EP is in error state, + see @ref + uct_rdmacm_cm_ep_t::status.*/ +}; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_rdmacm_cm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_rdmacm_cm_ep_t, uct_ep_t); + +ucs_status_t uct_rdmacm_cm_ep_disconnect(uct_ep_h ep, unsigned flags); + +ucs_status_t +uct_rdamcm_cm_ep_set_qp_num(struct rdma_conn_param *conn_param, + uct_rdmacm_cm_ep_t *cep); + +ucs_status_t uct_rdmacm_cm_ep_conn_param_init(uct_rdmacm_cm_ep_t *cep, + struct rdma_conn_param *conn_param); + +void uct_rdmacm_cm_ep_error_cb(uct_rdmacm_cm_ep_t *cep, + uct_cm_remote_data_t *remote_data, + ucs_status_t status); + +void uct_rdmacm_cm_ep_set_failed(uct_rdmacm_cm_ep_t *cep, + uct_cm_remote_data_t *remote_data, + ucs_status_t status); + +const char* uct_rdmacm_cm_ep_str(uct_rdmacm_cm_ep_t *cep, char *str, + size_t max_len); + +void uct_rdmacm_cm_ep_client_connect_cb(uct_rdmacm_cm_ep_t *cep, + uct_cm_remote_data_t *remote_data, + ucs_status_t status); + +void uct_rdmacm_cm_ep_server_connect_cb(uct_rdmacm_cm_ep_t *cep, + ucs_status_t status); diff --git a/src/uct/ib/rdmacm/rdmacm_def.h b/src/uct/ib/rdmacm/rdmacm_def.h new file mode 100644 index 0000000..6220be2 --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_def.h @@ -0,0 +1,58 @@ +/* + * * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * * See file LICENSE for terms. + * */ + +#ifndef UCT_RDMACM_H +#define UCT_RDMACM_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UCT_RDMACM_TL_NAME "rdmacm" +#define UCT_RDMACM_UDP_PRIV_DATA_LEN 136 /** See rdma_accept(3) */ +#define UCT_RDMACM_TCP_PRIV_DATA_LEN 56 /** See rdma_connect(3) */ +#define UCT_RDMACM_EP_FLAGS_STRING_LEN 128 /** A string to hold the + representation of the ep flags */ +#define UCT_RDMACM_EP_STRING_LEN 192 /** A string to hold the ep info */ + +typedef struct uct_rdmacm_iface uct_rdmacm_iface_t; +typedef struct uct_rdmacm_ep uct_rdmacm_ep_t; + +typedef struct uct_rdmacm_priv_data_hdr { + uint8_t length; /* length of the private data */ + uint8_t status; +} uct_rdmacm_priv_data_hdr_t; + +typedef struct uct_rdmacm_ctx { + struct rdma_cm_id *cm_id; + uct_rdmacm_ep_t *ep; + ucs_list_link_t list; /* for list of used cm_ids */ +} uct_rdmacm_ctx_t; + +size_t uct_rdmacm_cm_get_max_conn_priv(); + +ucs_status_t uct_rdmacm_resolve_addr(struct rdma_cm_id *cm_id, + struct sockaddr *addr, int timeout_ms, + ucs_log_level_t log_level); + +ucs_status_t uct_rdmacm_ep_resolve_addr(uct_rdmacm_ep_t *ep); + +ucs_status_t uct_rdmacm_ep_set_cm_id(uct_rdmacm_iface_t *iface, uct_rdmacm_ep_t *ep); + +static inline void uct_rdmacm_cm_id_to_dev_name(struct rdma_cm_id *cm_id, char *dev_name) +{ + ucs_snprintf_zero(dev_name, UCT_DEVICE_NAME_MAX, "%s:%d", + ibv_get_device_name(cm_id->verbs->device), cm_id->port_num); +} + +#endif /* UCT_RDMACM_H */ diff --git a/src/uct/ib/rdmacm/rdmacm_ep.c b/src/uct/ib/rdmacm/rdmacm_ep.c new file mode 100644 index 0000000..41c7091 --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_ep.c @@ -0,0 +1,264 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rdmacm_ep.h" + + +#define UCT_RDMACM_CB_FLAGS_CHECK(_flags) \ + do { \ + UCT_CB_FLAGS_CHECK(_flags); \ + if (!((_flags) & UCT_CB_FLAG_ASYNC)) { \ + return UCS_ERR_UNSUPPORTED; \ + } \ + } while (0) + + +ucs_status_t uct_rdmacm_ep_resolve_addr(uct_rdmacm_ep_t *ep) +{ + uct_rdmacm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_rdmacm_iface_t); + ucs_status_t status; + + UCS_ASYNC_BLOCK(iface->super.worker->async); + + status = uct_rdmacm_resolve_addr(ep->cm_id_ctx->cm_id, + (struct sockaddr *)&ep->remote_addr, + UCS_MSEC_PER_SEC * iface->config.addr_resolve_timeout, + UCS_LOG_LEVEL_ERROR); + + UCS_ASYNC_UNBLOCK(iface->super.worker->async); + return status; +} + +ucs_status_t uct_rdmacm_ep_set_cm_id(uct_rdmacm_iface_t *iface, uct_rdmacm_ep_t *ep) +{ + ucs_status_t status; + + UCS_ASYNC_BLOCK(iface->super.worker->async); + + /* create a cm_id for the client side */ + if (iface->cm_id_quota > 0) { + /* Create an id for this interface. Events associated with this id will be + * reported on the event_channel that was created on iface init. */ + ep->cm_id_ctx = ucs_malloc(sizeof(*ep->cm_id_ctx), "client cm_id_ctx"); + if (ep->cm_id_ctx == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + if (rdma_create_id(iface->event_ch, &ep->cm_id_ctx->cm_id, + ep->cm_id_ctx, RDMA_PS_UDP)) { + ucs_error("rdma_create_id() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto out_free; + } + + ep->cm_id_ctx->ep = ep; + ucs_list_add_tail(&iface->used_cm_ids_list, &ep->cm_id_ctx->list); + iface->cm_id_quota--; + ucs_debug("ep %p, new cm_id %p. cm_id_in_quota %d", ep, + ep->cm_id_ctx->cm_id, iface->cm_id_quota); + status = UCS_OK; + goto out; + } else { + ep->cm_id_ctx = NULL; + status = UCS_ERR_NO_RESOURCE; + goto out; + } + +out_free: + ucs_free(ep->cm_id_ctx); +out: + UCS_ASYNC_UNBLOCK(iface->super.worker->async); + return status; +} + +static inline void uct_rdmacm_ep_add_to_pending(uct_rdmacm_iface_t *iface, uct_rdmacm_ep_t *ep) +{ + UCS_ASYNC_BLOCK(iface->super.worker->async); + ucs_list_add_tail(&iface->pending_eps_list, &ep->list_elem); + ep->is_on_pending = 1; + UCS_ASYNC_UNBLOCK(iface->super.worker->async); +} + +static UCS_CLASS_INIT_FUNC(uct_rdmacm_ep_t, const uct_ep_params_t *params) +{ + uct_rdmacm_iface_t *iface = ucs_derived_of(params->iface, + uct_rdmacm_iface_t); + const ucs_sock_addr_t *sockaddr = params->sockaddr; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); + + if (iface->is_server) { + /* TODO allow an interface to be used both for server and client */ + return UCS_ERR_UNSUPPORTED; + } + + if (!(params->field_mask & UCT_EP_PARAM_FIELD_SOCKADDR)) { + return UCS_ERR_INVALID_PARAM; + } + + UCT_RDMACM_CB_FLAGS_CHECK((params->field_mask & + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS) ? + params->sockaddr_cb_flags : 0); + + /* Initialize these fields before calling rdma_resolve_addr to avoid a race + * where they are used before being initialized (from the async thread + * - after an RDMA_CM_EVENT_ROUTE_RESOLVED event) */ + self->pack_cb = (params->field_mask & + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB) ? + params->sockaddr_pack_cb : NULL; + self->pack_cb_arg = (params->field_mask & + UCT_EP_PARAM_FIELD_USER_DATA) ? + params->user_data : NULL; + self->pack_cb_flags = (params->field_mask & + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS) ? + params->sockaddr_cb_flags : 0; + pthread_mutex_init(&self->ops_mutex, NULL); + ucs_queue_head_init(&self->ops); + + /* Save the remote address */ + if (sockaddr->addr->sa_family == AF_INET) { + memcpy(&self->remote_addr, sockaddr->addr, sizeof(struct sockaddr_in)); + } else if (sockaddr->addr->sa_family == AF_INET6) { + memcpy(&self->remote_addr, sockaddr->addr, sizeof(struct sockaddr_in6)); + } else { + ucs_error("rdmacm ep: unknown remote sa_family=%d", sockaddr->addr->sa_family); + status = UCS_ERR_IO_ERROR; + goto err; + } + + self->slow_prog_id = UCS_CALLBACKQ_ID_NULL; + + status = uct_rdmacm_ep_set_cm_id(iface, self); + if (status == UCS_ERR_NO_RESOURCE) { + goto add_to_pending; + } else if (status != UCS_OK) { + goto err; + } + + self->is_on_pending = 0; + + /* After rdma_resolve_addr(), the client will wait for an + * RDMA_CM_EVENT_ADDR_RESOLVED event on the event_channel + * to proceed with the connection establishment. + * This event will be retrieved from the event_channel by the async thread. + * All endpoints share the interface's event_channel. */ + status = uct_rdmacm_ep_resolve_addr(self); + if (status != UCS_OK) { + goto err; + } + + goto out; + +add_to_pending: + /* Add the ep to the pending queue of eps since there is no + * available cm_id for it */ + uct_rdmacm_ep_add_to_pending(iface, self); +out: + ucs_debug("created an RDMACM endpoint on iface %p. event_channel: %p, " + "iface cm_id: %p remote addr: %s", + iface, iface->event_ch, iface->cm_id, + ucs_sockaddr_str((struct sockaddr *)sockaddr->addr, + ip_port_str, UCS_SOCKADDR_STRING_LEN)); + self->status = UCS_INPROGRESS; + return UCS_OK; + +err: + pthread_mutex_destroy(&self->ops_mutex); + + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rdmacm_ep_t) +{ + uct_rdmacm_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_rdmacm_iface_t); + uct_rdmacm_ctx_t *cm_id_ctx; + + ucs_debug("rdmacm_ep %p: destroying", self); + + UCS_ASYNC_BLOCK(iface->super.worker->async); + if (self->is_on_pending) { + ucs_list_del(&self->list_elem); + self->is_on_pending = 0; + } + + /* remove the slow progress function in case it was placed on the slow progress + * chain but wasn't invoked yet */ + uct_worker_progress_unregister_safe(&iface->super.worker->super, + &self->slow_prog_id); + + pthread_mutex_destroy(&self->ops_mutex); + if (!ucs_queue_is_empty(&self->ops)) { + ucs_warn("destroying endpoint %p with not completed operations", self); + } + + /* mark this ep as destroyed so that arriving events on it won't try to + * use it */ + if (self->cm_id_ctx != NULL) { + cm_id_ctx = self->cm_id_ctx->cm_id->context; + cm_id_ctx->ep = NULL; + ucs_debug("ep destroy: cm_id %p", cm_id_ctx->cm_id); + } + UCS_ASYNC_UNBLOCK(iface->super.worker->async); +} + +UCS_CLASS_DEFINE(uct_rdmacm_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_rdmacm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_rdmacm_ep_t, uct_ep_t); + +static unsigned uct_rdmacm_client_err_handle_progress(void *arg) +{ + uct_rdmacm_ep_t *rdmacm_ep = arg; + uct_rdmacm_iface_t *iface = ucs_derived_of(rdmacm_ep->super.super.iface, + uct_rdmacm_iface_t); + + ucs_trace_func("err_handle ep=%p", rdmacm_ep); + UCS_ASYNC_BLOCK(iface->super.worker->async); + + rdmacm_ep->slow_prog_id = UCS_CALLBACKQ_ID_NULL; + uct_set_ep_failed(&UCS_CLASS_NAME(uct_rdmacm_ep_t), &rdmacm_ep->super.super, + rdmacm_ep->super.super.iface, rdmacm_ep->status); + + UCS_ASYNC_UNBLOCK(iface->super.worker->async); + return 0; +} + +void uct_rdmacm_ep_set_failed(uct_iface_t *iface, uct_ep_h ep, ucs_status_t status) +{ + uct_rdmacm_iface_t *rdmacm_iface = ucs_derived_of(iface, uct_rdmacm_iface_t); + uct_rdmacm_ep_t *rdmacm_ep = ucs_derived_of(ep, uct_rdmacm_ep_t); + + if (rdmacm_iface->super.err_handler_flags & UCT_CB_FLAG_ASYNC) { + uct_set_ep_failed(&UCS_CLASS_NAME(uct_rdmacm_ep_t), &rdmacm_ep->super.super, + &rdmacm_iface->super.super, status); + } else { + /* invoke the error handling flow from the main thread */ + rdmacm_ep->status = status; + uct_worker_progress_register_safe(&rdmacm_iface->super.worker->super, + uct_rdmacm_client_err_handle_progress, + rdmacm_ep, UCS_CALLBACKQ_FLAG_ONESHOT, + &rdmacm_ep->slow_prog_id); + } +} + +/** + * Caller must lock ep->ops_mutex + */ +void uct_rdmacm_ep_invoke_completions(uct_rdmacm_ep_t *ep, ucs_status_t status) +{ + uct_rdmacm_ep_op_t *op; + + ucs_assert(pthread_mutex_trylock(&ep->ops_mutex) == EBUSY); + + ucs_queue_for_each_extract(op, &ep->ops, queue_elem, 1) { + pthread_mutex_unlock(&ep->ops_mutex); + uct_invoke_completion(op->user_comp, status); + ucs_free(op); + pthread_mutex_lock(&ep->ops_mutex); + } + /* coverity[missing_unlock] */ +} diff --git a/src/uct/ib/rdmacm/rdmacm_ep.h b/src/uct/ib/rdmacm/rdmacm_ep.h new file mode 100644 index 0000000..b2fb02e --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_ep.h @@ -0,0 +1,44 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_RDMACM_EP_H +#define UCT_RDMACM_EP_H + +#include "rdmacm_iface.h" + + +typedef struct uct_rdmacm_ep_op uct_rdmacm_ep_op_t; + +struct uct_rdmacm_ep_op { + ucs_queue_elem_t queue_elem; + uct_completion_t *user_comp; +}; + + +struct uct_rdmacm_ep { + uct_base_ep_t super; + uct_sockaddr_priv_pack_callback_t pack_cb; + void *pack_cb_arg; + uint32_t pack_cb_flags; + int is_on_pending; + + pthread_mutex_t ops_mutex; /* guards ops and status */ + ucs_queue_head_t ops; + ucs_status_t status; /* client EP status */ + + ucs_list_link_t list_elem; /* for the pending_eps_list */ + struct sockaddr_storage remote_addr; + uct_worker_cb_id_t slow_prog_id; + uct_rdmacm_ctx_t *cm_id_ctx; +}; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_rdmacm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_rdmacm_ep_t, uct_ep_t); + +void uct_rdmacm_ep_set_failed(uct_iface_t *iface, uct_ep_h ep, ucs_status_t status); + +void uct_rdmacm_ep_invoke_completions(uct_rdmacm_ep_t *ep, ucs_status_t status); + +#endif diff --git a/src/uct/ib/rdmacm/rdmacm_iface.c b/src/uct/ib/rdmacm/rdmacm_iface.c new file mode 100644 index 0000000..e0b8440 --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_iface.c @@ -0,0 +1,631 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rdmacm_iface.h" +#include "rdmacm_ep.h" +#include +#include + + +enum uct_rdmacm_process_event_flags { + UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG = UCS_BIT(0), + UCT_RDMACM_PROCESS_EVENT_ACK_EVENT_FLAG = UCS_BIT(1) +}; + +static ucs_config_field_t uct_rdmacm_iface_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_rdmacm_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {"BACKLOG", "1024", + "Maximum number of pending connections for an rdma_cm_id.", + ucs_offsetof(uct_rdmacm_iface_config_t, backlog), UCS_CONFIG_TYPE_UINT}, + + {"CM_ID_QUOTA", "64", + "How many rdma_cm connections can progress simultaneously.", + ucs_offsetof(uct_rdmacm_iface_config_t, cm_id_quota), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + +static UCS_CLASS_DECLARE_DELETE_FUNC(uct_rdmacm_iface_t, uct_iface_t); + +static ucs_status_t uct_rdmacm_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_rdmacm_iface_t *rdmacm_iface = ucs_derived_of(tl_iface, uct_rdmacm_iface_t); + struct sockaddr *addr; + ucs_status_t status; + + uct_base_iface_query(&rdmacm_iface->super, iface_attr); + + iface_attr->iface_addr_len = sizeof(ucs_sock_addr_t); + iface_attr->device_addr_len = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR | + UCT_IFACE_FLAG_CB_ASYNC | + UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE; + /* User's private data size is UCT_RDMACM_UDP_PRIV_DATA_LEN minus room for + * the private_data header (to hold the length of the data) */ + iface_attr->max_conn_priv = UCT_RDMACM_MAX_CONN_PRIV; + + if (rdmacm_iface->is_server) { + addr = rdma_get_local_addr(rdmacm_iface->cm_id); + status = ucs_sockaddr_copy((struct sockaddr *)&iface_attr->listen_sockaddr, + addr); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +static ucs_status_t uct_rdmacm_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *iface_addr) +{ + ucs_sock_addr_t *rdmacm_addr = (ucs_sock_addr_t *)iface_addr; + + rdmacm_addr->addr = NULL; + rdmacm_addr->addrlen = 0; + return UCS_OK; +} + +static ucs_status_t uct_rdmacm_accept(struct rdma_cm_id *id) +{ + /* The server will not send any reply data back to the client */ + struct rdma_conn_param conn_param = {0}; + + /* Accepting the connection will generate the RDMA_CM_EVENT_ESTABLISHED + * event on the client side. */ + if (rdma_accept(id, &conn_param)) { + ucs_error("rdma_accept(to id=%p) failed: %m", id); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +static ucs_status_t uct_rdmacm_iface_accept(uct_iface_h tl_iface, + uct_conn_request_h conn_request) +{ + struct rdma_cm_event *event = conn_request; + ucs_status_t status; + + ucs_trace("accepting event %p with id %p", event, event->id); + status = uct_rdmacm_accept(event->id); + rdma_destroy_id(event->id); + rdma_ack_cm_event(event); + + return status; +} + +static ucs_status_t uct_rdmacm_iface_reject(uct_iface_h tl_iface, + uct_conn_request_h conn_request) +{ + struct rdma_cm_event *event = conn_request; + ucs_status_t status = UCS_OK; + uct_rdmacm_priv_data_hdr_t hdr = { + .length = 0, + .status = (uint8_t)UCS_ERR_REJECTED + }; + + ucs_trace("rejecting event %p with id %p", event, event->id); + if (rdma_reject(event->id, &hdr, sizeof(hdr))) { + ucs_warn("rdma_reject(id=%p) failed: %m", event->id); + status = UCS_ERR_IO_ERROR; + } + + rdma_destroy_id(event->id); + rdma_ack_cm_event(event); + return status; +} + +static ucs_status_t uct_rdmacm_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + uct_rdmacm_ep_t *ep = ucs_derived_of(tl_ep, uct_rdmacm_ep_t); + ucs_status_t status; + uct_rdmacm_ep_op_t *op; + + pthread_mutex_lock(&ep->ops_mutex); + status = ep->status; + if ((status == UCS_INPROGRESS) && (comp != NULL)) { + op = ucs_malloc(sizeof(*op), "uct_rdmacm_ep_flush op"); + if (op != NULL) { + op->user_comp = comp; + ucs_queue_push(&ep->ops, &op->queue_elem); + } else { + status = UCS_ERR_NO_MEMORY; + } + } + pthread_mutex_unlock(&ep->ops_mutex); + + return status; +} + +static uct_iface_ops_t uct_rdmacm_iface_ops = { + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_rdmacm_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_rdmacm_ep_t), + .ep_flush = uct_rdmacm_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_pending_purge = ucs_empty_function, + .iface_accept = uct_rdmacm_iface_accept, + .iface_reject = uct_rdmacm_iface_reject, + .iface_progress_enable = (uct_iface_progress_enable_func_t)ucs_empty_function_return_success, + .iface_progress_disable = (uct_iface_progress_disable_func_t)ucs_empty_function_return_success, + .iface_progress = ucs_empty_function_return_zero, + .iface_flush = uct_base_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_rdmacm_iface_t), + .iface_query = uct_rdmacm_iface_query, + .iface_is_reachable = (uct_iface_is_reachable_func_t)ucs_empty_function_return_zero, + .iface_get_device_address = (uct_iface_get_device_address_func_t)ucs_empty_function_return_success, + .iface_get_address = uct_rdmacm_iface_get_address +}; + +ucs_status_t uct_rdmacm_resolve_addr(struct rdma_cm_id *cm_id, + struct sockaddr *addr, int timeout_ms, + ucs_log_level_t log_level) +{ + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + + if (rdma_resolve_addr(cm_id, NULL, addr, timeout_ms)) { + ucs_log(log_level, "rdma_resolve_addr(addr=%s) failed: %m", + ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); + return UCS_ERR_IO_ERROR; + } + return UCS_OK; +} + +void uct_rdmacm_iface_client_start_next_ep(uct_rdmacm_iface_t *iface) +{ + ucs_status_t status; + uct_rdmacm_ep_t *ep, *tmp; + + UCS_ASYNC_BLOCK(iface->super.worker->async); + + /* try to start an ep from the pending eps list */ + ucs_list_for_each_safe(ep, tmp, &iface->pending_eps_list, list_elem) { + status = uct_rdmacm_ep_set_cm_id(iface, ep); + if (status != UCS_OK) { + continue; + } + + ucs_list_del(&ep->list_elem); + ep->is_on_pending = 0; + + status = uct_rdmacm_ep_resolve_addr(ep); + if (status == UCS_OK) { + break; + } + + uct_rdmacm_ep_set_failed(&iface->super.super, &ep->super.super, status); + } + + UCS_ASYNC_UNBLOCK(iface->super.worker->async); +} + +static void uct_rdmacm_client_handle_failure(uct_rdmacm_iface_t *iface, + uct_rdmacm_ep_t *ep, + ucs_status_t status) +{ + ucs_assert(!iface->is_server); + if (ep != NULL) { + pthread_mutex_lock(&ep->ops_mutex); + uct_rdmacm_ep_set_failed(&iface->super.super, &ep->super.super, status); + uct_rdmacm_ep_invoke_completions(ep, status); + pthread_mutex_unlock(&ep->ops_mutex); + } +} + +static void uct_rdmacm_iface_process_conn_req(uct_rdmacm_iface_t *iface, + struct rdma_cm_event *event, + struct sockaddr *remote_addr) +{ + uct_rdmacm_priv_data_hdr_t *hdr; + + hdr = (uct_rdmacm_priv_data_hdr_t*) event->param.ud.private_data; + ucs_assert(hdr->status == UCS_OK); + + /* TODO check the iface's cb_flags to determine when to invoke this callback. + * currently only UCT_CB_FLAG_ASYNC is supported so the cb is invoked from here */ + iface->conn_request_cb(&iface->super.super, iface->conn_request_arg, + /* connection request*/ + event, + /* private data */ + UCS_PTR_BYTE_OFFSET(event->param.ud.private_data, + sizeof(uct_rdmacm_priv_data_hdr_t)), + /* length */ + hdr->length); +} + +/** + * Release a cm_id. This function should be called when the async context + * is locked. + */ +static void uct_rdmacm_iface_release_cm_id(uct_rdmacm_iface_t *iface, + uct_rdmacm_ctx_t **cm_id_ctx_p) +{ + uct_rdmacm_ctx_t *cm_id_ctx = *cm_id_ctx_p; + + ucs_trace("destroying cm_id %p", cm_id_ctx->cm_id); + + ucs_list_del(&cm_id_ctx->list); + if (cm_id_ctx->ep != NULL) { + cm_id_ctx->ep->cm_id_ctx = NULL; + } + rdma_destroy_id(cm_id_ctx->cm_id); + ucs_free(cm_id_ctx); + iface->cm_id_quota++; + + *cm_id_ctx_p = NULL; +} + +static unsigned +uct_rdmacm_iface_process_event(uct_rdmacm_iface_t *iface, + struct rdma_cm_event *event) +{ + struct sockaddr *remote_addr = rdma_get_peer_addr(event->id); + uct_rdmacm_md_t *rdmacm_md = (uct_rdmacm_md_t *)iface->super.md; + unsigned ret_flags = UCT_RDMACM_PROCESS_EVENT_ACK_EVENT_FLAG; + uct_rdmacm_ep_t *ep = NULL; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + char dev_name[UCT_DEVICE_NAME_MAX]; + uct_rdmacm_priv_data_hdr_t *hdr; + struct rdma_conn_param conn_param; + uct_rdmacm_ctx_t *cm_id_ctx; + ssize_t priv_data_ret; + ucs_status_t status; + + if (iface->is_server) { + ucs_assert((iface->cm_id == event->id) || + ((event->event == RDMA_CM_EVENT_CONNECT_REQUEST) && + (iface->cm_id == event->listen_id))); + } else { + cm_id_ctx = event->id->context; + ep = cm_id_ctx->ep; + } + + ucs_trace("rdmacm event (fd=%d cm_id %p) on %s (ep=%p): %s. Peer: %s.", + iface->event_ch->fd, event->id, (iface->is_server ? "server" : "client"), + ep, rdma_event_str(event->event), + ucs_sockaddr_str(remote_addr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); + + status = UCS_ERR_UNREACHABLE; + /* The following applies for rdma_cm_id of type RDMA_PS_UDP only */ + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + /* Client - resolve the route to the server */ + if (ep == NULL) { + /* received an event on an non-existing ep - an already destroyed ep */ + ret_flags |= UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG; + } else if (rdma_resolve_route(event->id, UCS_MSEC_PER_SEC * + rdmacm_md->addr_resolve_timeout)) { + ucs_error("rdma_resolve_route(to addr=%s) failed: %m", + ucs_sockaddr_str(remote_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + ret_flags |= UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG; + uct_rdmacm_client_handle_failure(iface, ep, UCS_ERR_INVALID_ADDR); + } + break; + + case RDMA_CM_EVENT_ROUTE_RESOLVED: + /* Client - send a connection request to the server */ + if (ep == NULL) { + /* received an event on an non-existing ep - an already destroyed ep */ + ret_flags |= UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG; + } else { + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.private_data = ucs_alloca(UCT_RDMACM_MAX_CONN_PRIV + + sizeof(uct_rdmacm_priv_data_hdr_t)); + + uct_rdmacm_cm_id_to_dev_name(ep->cm_id_ctx->cm_id, dev_name); + hdr = (uct_rdmacm_priv_data_hdr_t*)conn_param.private_data; + /* TODO check the ep's cb_flags to determine when to invoke this callback. + * currently only UCT_CB_FLAG_ASYNC is supported so the cb is invoked from here */ + priv_data_ret = ep->pack_cb(ep->pack_cb_arg, dev_name, hdr + 1); + if (priv_data_ret < 0) { + ucs_trace("rdmacm client (iface=%p cm_id=%p fd=%d) failed to fill " + "private data. status: %s", + iface, event->id, iface->event_ch->fd, + ucs_status_string((ucs_status_t)priv_data_ret)); + ret_flags |= UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG; + uct_rdmacm_client_handle_failure(iface, ep, (ucs_status_t)priv_data_ret); + break; + } + + hdr->length = (uint8_t)priv_data_ret; + hdr->status = UCS_OK; + /* The private_data starts with the header of the user's private data + * and then the private data itself */ + conn_param.private_data_len = sizeof(*hdr) + hdr->length; + + if (rdma_connect(event->id, &conn_param)) { + ucs_error("rdma_connect(to addr=%s) failed: %m", + ucs_sockaddr_str(remote_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + ret_flags |= UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG; + uct_rdmacm_client_handle_failure(iface, ep, + UCS_ERR_SOME_CONNECTS_FAILED); + } + } + break; + + case RDMA_CM_EVENT_CONNECT_REQUEST: + /* Server - handle a connection request from the client */ + ucs_assert(iface->is_server); + uct_rdmacm_iface_process_conn_req(iface, event, remote_addr); + ret_flags &= ~UCT_RDMACM_PROCESS_EVENT_ACK_EVENT_FLAG; + break; + + case RDMA_CM_EVENT_REJECTED: + /* Client - server rejected the connection request */ + ucs_warn("rdmacm connection request to %s rejected, id %p", + ucs_sockaddr_str(remote_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN), event->id); + + ret_flags |= UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG; + uct_rdmacm_client_handle_failure(iface, ep, UCS_ERR_REJECTED); + break; + + case RDMA_CM_EVENT_ESTABLISHED: + /* Client - connection is ready */ + ucs_assert(!iface->is_server); + ret_flags |= UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG; + if (ep != NULL) { + pthread_mutex_lock(&ep->ops_mutex); + ep->status = UCS_OK; + uct_rdmacm_ep_invoke_completions(ep, UCS_OK); + pthread_mutex_unlock(&ep->ops_mutex); + } + break; + + /* client error events */ + case RDMA_CM_EVENT_UNREACHABLE: + hdr = (uct_rdmacm_priv_data_hdr_t *)event->param.ud.private_data; + if ((hdr != NULL) && (event->param.ud.private_data_len > 0) && + ((ucs_status_t)hdr->status == UCS_ERR_REJECTED)) { + ucs_assert(hdr->length == 0); + ucs_assert(event->param.ud.private_data_len >= sizeof(*hdr)); + ucs_assert(!iface->is_server); + status = UCS_ERR_REJECTED; + } + /* Fall through */ + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_CONNECT_RESPONSE: + /* client and server error events */ + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_DISCONNECTED: + /* Server/Client - connection was disconnected */ + if (status != UCS_ERR_REJECTED) { + ucs_error("received event %s. status = %d. Peer: %s.", + rdma_event_str(event->event), event->status, + ucs_sockaddr_str(remote_addr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); + } + + if (!iface->is_server) { + ret_flags |= UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG; + uct_rdmacm_client_handle_failure(iface, ep, status); + } + break; + + default: + ucs_warn("unexpected RDMACM event: %d", event->event); + break; + } + + return ret_flags; +} + +static void uct_rdmacm_iface_event_handler(int fd, void *arg) +{ + uct_rdmacm_iface_t *iface = arg; + uct_rdmacm_ctx_t *cm_id_ctx = NULL; + struct rdma_cm_event *event; + unsigned proc_event_flags; + int ret; + + for (;;) { + /* Fetch an event */ + ret = rdma_get_cm_event(iface->event_ch, &event); + if (ret) { + /* EAGAIN (in a non-blocking rdma_get_cm_event) means that + * there are no more events */ + if (errno != EAGAIN) { + ucs_warn("rdma_get_cm_event() failed: %m"); + } + return; + } + + proc_event_flags = uct_rdmacm_iface_process_event(iface, event); + if (!iface->is_server) { + cm_id_ctx = (uct_rdmacm_ctx_t *)event->id->context; + } + + if (proc_event_flags & UCT_RDMACM_PROCESS_EVENT_ACK_EVENT_FLAG) { + ret = rdma_ack_cm_event(event); + if (ret) { + ucs_warn("rdma_ack_cm_event() failed: %m"); + } + } + + if ((proc_event_flags & UCT_RDMACM_PROCESS_EVENT_DESTROY_CM_ID_FLAG) && + (cm_id_ctx != NULL)) { + uct_rdmacm_iface_release_cm_id(iface, &cm_id_ctx); + uct_rdmacm_iface_client_start_next_ep(iface); + } + } +} + +static UCS_CLASS_INIT_FUNC(uct_rdmacm_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_rdmacm_iface_config_t *config = ucs_derived_of(tl_config, uct_rdmacm_iface_config_t); + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + uct_rdmacm_md_t *rdmacm_md; + struct sockaddr *listen_addr; + ucs_status_t status; + + UCT_CHECK_PARAM(params->field_mask & UCT_IFACE_PARAM_FIELD_OPEN_MODE, + "UCT_IFACE_PARAM_FIELD_OPEN_MODE is not defined"); + + UCT_CHECK_PARAM((params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER) || + (params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT), + "Invalid open mode %zu", params->open_mode); + + UCT_CHECK_PARAM(!(params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER) || + (params->field_mask & UCT_IFACE_PARAM_FIELD_SOCKADDR), + "UCT_IFACE_PARAM_FIELD_SOCKADDR is not defined for UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER"); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_rdmacm_iface_ops, md, worker, + params, tl_config + UCS_STATS_ARG((params->field_mask & + UCT_IFACE_PARAM_FIELD_STATS_ROOT) ? + params->stats_root : NULL) + UCS_STATS_ARG(UCT_RDMACM_TL_NAME)); + + rdmacm_md = ucs_derived_of(self->super.md, uct_rdmacm_md_t); + + if (self->super.worker->async == NULL) { + ucs_error("rdmacm must have async != NULL"); + return UCS_ERR_INVALID_PARAM; + } + if (self->super.worker->async->mode == UCS_ASYNC_MODE_SIGNAL) { + ucs_warn("rdmacm does not support SIGIO"); + } + + self->config.addr_resolve_timeout = rdmacm_md->addr_resolve_timeout; + + self->event_ch = rdma_create_event_channel(); + if (self->event_ch == NULL) { + ucs_error("rdma_create_event_channel(open_mode=%zu) failed: %m", + params->open_mode); + status = UCS_ERR_IO_ERROR; + goto err; + } + + /* Set the event_channel fd to non-blocking mode + * (so that rdma_get_cm_event won't be blocking) */ + status = ucs_sys_fcntl_modfl(self->event_ch->fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto err_destroy_event_channel; + } + + if (params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER) { + self->is_server = 1; + + /* Create an id for this interface. Events associated with this id will be + * reported on the event_channel that was previously created. */ + if (rdma_create_id(self->event_ch, &self->cm_id, NULL, RDMA_PS_UDP)) { + ucs_error("rdma_create_id() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err_destroy_event_channel; + } + + listen_addr = (struct sockaddr *)params->mode.sockaddr.listen_sockaddr.addr; + if (rdma_bind_addr(self->cm_id, listen_addr)) { + status = (errno == EADDRINUSE || errno == EADDRNOTAVAIL) ? + UCS_ERR_BUSY : UCS_ERR_IO_ERROR; + ucs_error("rdma_bind_addr(addr=%s) failed: %m", + ucs_sockaddr_str(listen_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + goto err_destroy_id; + } + + if (rdma_listen(self->cm_id, config->backlog)) { + ucs_error("rdma_listen(cm_id:=%p event_channel=%p addr=%s) failed: %m", + self->cm_id, self->event_ch, + ucs_sockaddr_str(listen_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + status = UCS_ERR_IO_ERROR; + goto err_destroy_id; + } + + ucs_debug("rdma_cm id %p listening on %s:%d", self->cm_id, + ucs_sockaddr_str(listen_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN), + ntohs(rdma_get_src_port(self->cm_id))); + + if (!(params->mode.sockaddr.cb_flags & UCT_CB_FLAG_ASYNC)) { + ucs_fatal("Synchronous callback is not supported"); + } + + self->cb_flags = params->mode.sockaddr.cb_flags; + self->conn_request_cb = params->mode.sockaddr.conn_request_cb; + self->conn_request_arg = params->mode.sockaddr.conn_request_arg; + } else { + self->cm_id = NULL; + self->is_server = 0; + } + + self->cm_id_quota = config->cm_id_quota; + ucs_list_head_init(&self->pending_eps_list); + ucs_list_head_init(&self->used_cm_ids_list); + + /* Server and client register an event handler for incoming messages */ + status = ucs_async_set_event_handler(self->super.worker->async->mode, + self->event_ch->fd, UCS_EVENT_SET_EVREAD, + uct_rdmacm_iface_event_handler, + self, self->super.worker->async); + if (status != UCS_OK) { + ucs_error("failed to set event handler"); + goto err_destroy_id; + } + + + ucs_debug("created an RDMACM iface %p. event_channel: %p, fd: %d, cm_id: %p", + self, self->event_ch, self->event_ch->fd, self->cm_id); + return UCS_OK; + +err_destroy_id: + if (self->is_server) { + rdma_destroy_id(self->cm_id); + } +err_destroy_event_channel: + rdma_destroy_event_channel(self->event_ch); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rdmacm_iface_t) +{ + uct_rdmacm_ctx_t *cm_id_ctx, *tmp_cm_id_ctx; + + ucs_async_remove_handler(self->event_ch->fd, 1); + if (self->is_server) { + rdma_destroy_id(self->cm_id); + } + + UCS_ASYNC_BLOCK(self->super.worker->async); + + ucs_list_for_each_safe(cm_id_ctx, tmp_cm_id_ctx, + &self->used_cm_ids_list, list) { + uct_rdmacm_iface_release_cm_id(self, &cm_id_ctx); + } + + UCS_ASYNC_UNBLOCK(self->super.worker->async); + + rdma_destroy_event_channel(self->event_ch); +} + +UCS_CLASS_DEFINE(uct_rdmacm_iface_t, uct_base_iface_t); +static UCS_CLASS_DEFINE_NEW_FUNC(uct_rdmacm_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t *, + const uct_iface_config_t *); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_rdmacm_iface_t, uct_iface_t); + +static ucs_status_t +uct_rdmacm_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + *num_tl_devices_p = 0; + *tl_devices_p = NULL; + return UCS_OK; +} + +UCT_TL_DEFINE(&uct_rdmacm_component, rdmacm, uct_rdmacm_query_tl_devices, + uct_rdmacm_iface_t, "RDMACM_", uct_rdmacm_iface_config_table, + uct_rdmacm_iface_config_t); diff --git a/src/uct/ib/rdmacm/rdmacm_iface.h b/src/uct/ib/rdmacm/rdmacm_iface.h new file mode 100644 index 0000000..a10297f --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_iface.h @@ -0,0 +1,48 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_RDMACM_IFACE_H +#define UCT_RDMACM_IFACE_H + +#include "rdmacm_def.h" +#include "rdmacm_md.h" + +#define UCT_RDMACM_MAX_CONN_PRIV \ + (UCT_RDMACM_UDP_PRIV_DATA_LEN) - (sizeof(uct_rdmacm_priv_data_hdr_t)) + +typedef struct uct_rdmacm_iface_config { + uct_iface_config_t super; + unsigned backlog; + unsigned cm_id_quota; +} uct_rdmacm_iface_config_t; + + +struct uct_rdmacm_iface { + uct_base_iface_t super; + + struct rdma_cm_id *cm_id; + struct rdma_event_channel *event_ch; + + uint8_t is_server; + /** Fields used only for server side */ + void *conn_request_arg; + uct_sockaddr_conn_request_callback_t conn_request_cb; + uint32_t cb_flags; + + /** Field used only for client side */ + ucs_list_link_t pending_eps_list; + ucs_list_link_t used_cm_ids_list; + int cm_id_quota; /* num of cm_ids in the quota*/ + + struct { + double addr_resolve_timeout; + } config; +}; + +void uct_rdmacm_iface_client_start_next_ep(uct_rdmacm_iface_t *iface); + +extern uct_component_t uct_rdmacm_component; + +#endif diff --git a/src/uct/ib/rdmacm/rdmacm_listener.c b/src/uct/ib/rdmacm/rdmacm_listener.c new file mode 100644 index 0000000..57a66d1 --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_listener.c @@ -0,0 +1,108 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "rdmacm_listener.h" + + +UCS_CLASS_INIT_FUNC(uct_rdmacm_listener_t, uct_cm_h cm, + const struct sockaddr *saddr, socklen_t socklen, + const uct_listener_params_t *params) +{ + uct_rdmacm_cm_t *rdmacm_cm = ucs_derived_of(cm, uct_rdmacm_cm_t); + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + int backlog; + + UCS_CLASS_CALL_SUPER_INIT(uct_listener_t, cm); + + self->conn_request_cb = params->conn_request_cb; + self->user_data = (params->field_mask & UCT_LISTENER_PARAM_FIELD_USER_DATA) ? + params->user_data : NULL; + + if (rdma_create_id(rdmacm_cm->ev_ch, &self->id, self, RDMA_PS_TCP)) { + ucs_error("rdma_create_id() failed: %m"); + status = UCS_ERR_IO_ERROR; + goto err; + } + + if (rdma_bind_addr(self->id, (struct sockaddr *)saddr)) { + status = ((errno == EADDRINUSE) || (errno == EADDRNOTAVAIL)) ? + UCS_ERR_BUSY : UCS_ERR_IO_ERROR; + ucs_error("rdma_bind_addr(addr=%s) failed: %m", + ucs_sockaddr_str(saddr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + goto err_destroy_id; + } + + backlog = (params->field_mask & UCT_LISTENER_PARAM_FIELD_BACKLOG) ? + params->backlog : SOMAXCONN; + if (rdma_listen(self->id, backlog)) { + ucs_error("rdma_listen(id:=%p addr=%s backlog=%d) failed: %m", + self->id, ucs_sockaddr_str(saddr, ip_port_str, + UCS_SOCKADDR_STRING_LEN), + backlog); + status = UCS_ERR_IO_ERROR; + goto err_destroy_id; + } + + ucs_debug("created an RDMACM listener %p on cm %p with cm_id: %p. " + "listening on %s:%d", self, cm, self->id, + ucs_sockaddr_str(saddr, ip_port_str, UCS_SOCKADDR_STRING_LEN), + ntohs(rdma_get_src_port(self->id))); + + return UCS_OK; + +err_destroy_id: + uct_rdmacm_cm_destroy_id(self->id); +err: + return status; +} + +ucs_status_t uct_rdmacm_listener_reject(uct_listener_h listener, + uct_conn_request_h conn_request) +{ + uct_rdmacm_listener_t *rdmacm_listener = ucs_derived_of(listener, uct_rdmacm_listener_t); + struct rdma_cm_event *event = (struct rdma_cm_event *)conn_request; + + ucs_assert_always(rdmacm_listener->id == event->listen_id); + + uct_rdmacm_cm_reject(event->id); + + uct_rdmacm_cm_destroy_id(event->id); + + return uct_rdmacm_cm_ack_event(event); +} + +UCS_CLASS_CLEANUP_FUNC(uct_rdmacm_listener_t) +{ + uct_rdmacm_cm_destroy_id(self->id); +} + +ucs_status_t uct_rdmacm_listener_query(uct_listener_h listener, + uct_listener_attr_t *listener_attr) +{ + uct_rdmacm_listener_t *rdmacm_listener = ucs_derived_of(listener, + uct_rdmacm_listener_t); + struct sockaddr *addr; + ucs_status_t status; + + if (listener_attr->field_mask & UCT_LISTENER_ATTR_FIELD_SOCKADDR) { + addr = rdma_get_local_addr(rdmacm_listener->id); + status = ucs_sockaddr_copy((struct sockaddr *)&listener_attr->sockaddr, + addr); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +UCS_CLASS_DEFINE(uct_rdmacm_listener_t, uct_listener_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_rdmacm_listener_t, uct_listener_t, + uct_cm_h , const struct sockaddr *, socklen_t , + const uct_listener_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_rdmacm_listener_t, uct_listener_t); diff --git a/src/uct/ib/rdmacm/rdmacm_listener.h b/src/uct/ib/rdmacm/rdmacm_listener.h new file mode 100644 index 0000000..104d9b8 --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_listener.h @@ -0,0 +1,35 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "rdmacm_cm.h" + +/** + * An rdmacm listener for incoming connections requests on the server side. + */ +typedef struct uct_rdmacm_listener { + uct_listener_t super; + + /** The rdmacm id assiciated with the listener */ + struct rdma_cm_id *id; + + /** Callback to invoke upon receving a connection request from a client */ + uct_listener_conn_request_callback_t conn_request_cb; + + /** User's data to be passed as argument to the conn_request_cb */ + void *user_data; +} uct_rdmacm_listener_t; + + +UCS_CLASS_DECLARE_NEW_FUNC(uct_rdmacm_listener_t, uct_listener_t, + uct_cm_h , const struct sockaddr *, socklen_t , + const uct_listener_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_rdmacm_listener_t, uct_listener_t); + +ucs_status_t uct_rdmacm_listener_query(uct_listener_h listener, + uct_listener_attr_t *listener_attr); + +ucs_status_t uct_rdmacm_listener_reject(uct_listener_h listener, + uct_conn_request_h conn_request); diff --git a/src/uct/ib/rdmacm/rdmacm_md.c b/src/uct/ib/rdmacm/rdmacm_md.c new file mode 100644 index 0000000..9894359 --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_md.c @@ -0,0 +1,262 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-219. ALL RIGHTS RESERVED. + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#include "rdmacm_md.h" +#include "rdmacm_cm.h" + + +static ucs_config_field_t uct_rdmacm_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_rdmacm_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {"ADDR_RESOLVE_TIMEOUT", "500ms", + "Time to wait for address resolution to complete", + ucs_offsetof(uct_rdmacm_md_config_t, addr_resolve_timeout), UCS_CONFIG_TYPE_TIME}, + + {NULL} +}; + +static void uct_rdmacm_md_close(uct_md_h md); + +static uct_md_ops_t uct_rdmacm_md_ops = { + .close = uct_rdmacm_md_close, + .query = uct_rdmacm_md_query, + .is_sockaddr_accessible = uct_rdmacm_is_sockaddr_accessible, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + +static void uct_rdmacm_md_close(uct_md_h md) +{ + uct_rdmacm_md_t *rdmacm_md = ucs_derived_of(md, uct_rdmacm_md_t); + ucs_free(rdmacm_md); +} + +ucs_status_t uct_rdmacm_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->cap.flags = UCT_MD_FLAG_SOCKADDR; + md_attr->cap.reg_mem_types = 0; + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; + md_attr->cap.detect_mem_types = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = 0; + md_attr->rkey_packed_size = 0; + md_attr->reg_cost.overhead = 0; + md_attr->reg_cost.growth = 0; + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +static enum rdma_cm_event_type +uct_rdmacm_get_event_type(struct rdma_event_channel *event_ch) +{ + enum rdma_cm_event_type event_type; + struct rdma_cm_event *event; + int ret; + + /* Fetch an event */ + ret = rdma_get_cm_event(event_ch, &event); + if (ret) { + ucs_warn("rdma_get_cm_event() failed: %m"); + return RDMA_CM_EVENT_ADDR_RESOLVED; + } + + event_type = event->event; + ret = rdma_ack_cm_event(event); + if (ret) { + ucs_warn("rdma_ack_cm_event() failed. event status: %d. %m.", event->status); + } + + return event_type; +} + +static int uct_rdmacm_is_addr_route_resolved(struct rdma_cm_id *cm_id, + struct sockaddr *addr, + int timeout_ms) +{ + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + enum rdma_cm_event_type event_type; + ucs_status_t status; + + status = uct_rdmacm_resolve_addr(cm_id, addr, timeout_ms, UCS_LOG_LEVEL_DEBUG); + if (status != UCS_OK) { + return 0; + } + + event_type = uct_rdmacm_get_event_type(cm_id->channel); + if (event_type != RDMA_CM_EVENT_ADDR_RESOLVED) { + ucs_debug("failed to resolve address (addr = %s). RDMACM event %s.", + ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN), + rdma_event_str(event_type)); + return 0; + } + + if (cm_id->verbs->device->transport_type == IBV_TRANSPORT_IWARP) { + ucs_debug("%s: iWarp support is not implemented", + ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); + return 0; + } + + if (rdma_resolve_route(cm_id, timeout_ms)) { + ucs_debug("rdma_resolve_route(addr = %s) failed: %m", + ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); + return 0; + } + + event_type = uct_rdmacm_get_event_type(cm_id->channel); + if (event_type != RDMA_CM_EVENT_ROUTE_RESOLVED) { + ucs_debug("failed to resolve route to addr = %s. RDMACM event %s.", + ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN), + rdma_event_str(event_type)); + return 0; + } + + return 1; +} + +int uct_rdmacm_is_sockaddr_accessible(uct_md_h md, const ucs_sock_addr_t *sockaddr, + uct_sockaddr_accessibility_t mode) +{ + uct_rdmacm_md_t *rdmacm_md = ucs_derived_of(md, uct_rdmacm_md_t); + struct rdma_event_channel *event_ch = NULL; + struct rdma_cm_id *cm_id = NULL; + int is_accessible = 0; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + + if ((mode != UCT_SOCKADDR_ACC_LOCAL) && (mode != UCT_SOCKADDR_ACC_REMOTE)) { + ucs_error("Unknown sockaddr accessibility mode %d", mode); + return 0; + } + + event_ch = rdma_create_event_channel(); + if (event_ch == NULL) { + ucs_error("rdma_create_event_channel() failed: %m"); + goto out; + } + + if (rdma_create_id(event_ch, &cm_id, NULL, RDMA_PS_UDP)) { + ucs_error("rdma_create_id() failed: %m"); + goto out_destroy_event_channel; + } + + if (mode == UCT_SOCKADDR_ACC_LOCAL) { + /* Server side to check if can bind to the given sockaddr */ + if (rdma_bind_addr(cm_id, (struct sockaddr *)sockaddr->addr)) { + ucs_debug("rdma_bind_addr(addr = %s) failed: %m", + ucs_sockaddr_str((struct sockaddr *)sockaddr->addr, + ip_port_str, UCS_SOCKADDR_STRING_LEN)); + goto out_destroy_id; + } + + if (ucs_sockaddr_is_inaddr_any((struct sockaddr *)sockaddr->addr)) { + is_accessible = 1; + goto out_print; + } + } + + /* Client and server sides check if can access the given sockaddr. + * The timeout needs to be passed in ms */ + is_accessible = uct_rdmacm_is_addr_route_resolved(cm_id, + (struct sockaddr *)sockaddr->addr, + UCS_MSEC_PER_SEC * rdmacm_md->addr_resolve_timeout); + if (!is_accessible) { + goto out_destroy_id; + } + +out_print: + ucs_debug("address %s (port %d) is accessible from rdmacm_md %p with mode: %d", + ucs_sockaddr_str((struct sockaddr *)sockaddr->addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN), + ntohs(rdma_get_src_port(cm_id)), rdmacm_md, mode); + +out_destroy_id: + rdma_destroy_id(cm_id); +out_destroy_event_channel: + rdma_destroy_event_channel(event_ch); +out: + return is_accessible; +} + +static ucs_status_t +uct_rdmacm_query_md_resources(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + struct rdma_event_channel *event_ch = NULL; + + /* Create a dummy event channel to check if RDMACM can be used */ + event_ch = rdma_create_event_channel(); + if (event_ch == NULL) { + ucs_debug("could not create an RDMACM event channel. %m. " + "Disabling the RDMACM resource"); + return uct_md_query_empty_md_resource(resources_p, num_resources_p); + + } + + rdma_destroy_event_channel(event_ch); + + return uct_md_query_single_md_resource(component, resources_p, + num_resources_p); +} + +static ucs_status_t +uct_rdmacm_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *uct_md_config, uct_md_h *md_p) +{ + uct_rdmacm_md_config_t *md_config = ucs_derived_of(uct_md_config, + uct_rdmacm_md_config_t); + uct_rdmacm_md_t *md; + ucs_status_t status; + + md = ucs_malloc(sizeof(*md), "rdmacm_md"); + if (md == NULL) { + status = UCS_ERR_NO_MEMORY; + goto out; + } + + md->super.ops = &uct_rdmacm_md_ops; + md->super.component = &uct_rdmacm_component; + md->addr_resolve_timeout = md_config->addr_resolve_timeout; + + /* cppcheck-suppress autoVariables */ + *md_p = &md->super; + status = UCS_OK; + +out: + return status; +} + +uct_component_t uct_rdmacm_component = { + .query_md_resources = uct_rdmacm_query_md_resources, + .md_open = uct_rdmacm_md_open, +#if HAVE_RDMACM_QP_LESS + .cm_open = UCS_CLASS_NEW_FUNC_NAME(uct_rdmacm_cm_t), +#else + .cm_open = ucs_empty_function_return_unsupported, +#endif + .rkey_unpack = ucs_empty_function_return_unsupported, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = ucs_empty_function_return_success, + .name = "rdmacm", + .md_config = { + .name = "RDMA-CM memory domain", + .prefix = "IB_", + .table = uct_rdmacm_md_config_table, + .size = sizeof(uct_rdmacm_md_config_t), + }, + .cm_config = { + .name = "RDMA-CM connection manager", + .prefix = "RDMACM_", + .table = uct_cm_config_table, + .size = sizeof(uct_cm_config_t), + }, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_rdmacm_component), +#if HAVE_RDMACM_QP_LESS + .flags = UCT_COMPONENT_FLAG_CM +#else + .flags = 0 +#endif +}; +UCT_COMPONENT_REGISTER(&uct_rdmacm_component) diff --git a/src/uct/ib/rdmacm/rdmacm_md.h b/src/uct/ib/rdmacm/rdmacm_md.h new file mode 100644 index 0000000..cd93010 --- /dev/null +++ b/src/uct/ib/rdmacm/rdmacm_md.h @@ -0,0 +1,38 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_RDMACM_MD_H_ +#define UCT_RDMACM_MD_H_ + +#include "rdmacm_def.h" +#include +#include +#include +#include + +/** + * RDMACM memory domain. + */ +typedef struct uct_rdmacm_md { + uct_md_t super; + double addr_resolve_timeout; +} uct_rdmacm_md_t; + +/** + * RDMACM memory domain configuration. + */ +typedef struct uct_rdmacm_md_config { + uct_md_config_t super; + double addr_resolve_timeout; +} uct_rdmacm_md_config_t; + +extern uct_component_t uct_rdmacm_component; + +ucs_status_t uct_rdmacm_md_query(uct_md_h md, uct_md_attr_t *md_attr); + +int uct_rdmacm_is_sockaddr_accessible(uct_md_h md, const ucs_sock_addr_t *sockaddr, + uct_sockaddr_accessibility_t mode); + +#endif diff --git a/src/uct/ib/ud/accel/ud_mlx5.c b/src/uct/ib/ud/accel/ud_mlx5.c new file mode 100644 index 0000000..bdedc3c --- /dev/null +++ b/src/uct/ib/ud/accel/ud_mlx5.c @@ -0,0 +1,841 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ud_mlx5.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For htonl */ + +#include +#include +#include + +#include +#include +#include +#include + + +static ucs_config_field_t uct_ud_mlx5_iface_config_table[] = { + {"UD_", "", NULL, + ucs_offsetof(uct_ud_mlx5_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_ud_iface_config_table)}, + + {"IB_", "", NULL, + ucs_offsetof(uct_ud_mlx5_iface_config_t, mlx5_common), + UCS_CONFIG_TYPE_TABLE(uct_ib_mlx5_iface_config_table)}, + + {"UD_", "", NULL, + ucs_offsetof(uct_ud_mlx5_iface_config_t, ud_mlx5_common), + UCS_CONFIG_TYPE_TABLE(uct_ud_mlx5_iface_common_config_table)}, + + {NULL} +}; + +static UCS_F_ALWAYS_INLINE size_t +uct_ud_mlx5_ep_ctrl_av_size(uct_ud_mlx5_ep_t *ep) +{ + return sizeof(struct mlx5_wqe_ctrl_seg) + uct_ib_mlx5_wqe_av_size(&ep->av); +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_mlx5_post_send(uct_ud_mlx5_iface_t *iface, uct_ud_mlx5_ep_t *ep, + uint8_t se, struct mlx5_wqe_ctrl_seg *ctrl, size_t wqe_size, + int max_log_sge) +{ + struct mlx5_wqe_datagram_seg *dgram = (void*)(ctrl + 1); + + uct_ib_mlx5_set_ctrl_seg(ctrl, iface->tx.wq.sw_pi, MLX5_OPCODE_SEND, 0, + iface->super.qp->qp_num, + uct_ud_mlx5_tx_moderation(iface) | se, wqe_size); + uct_ib_mlx5_set_dgram_seg(dgram, &ep->av, ep->is_global ? &ep->grh_av : NULL, + IBV_QPT_UD); + + uct_ib_mlx5_log_tx(&iface->super.super, ctrl, iface->tx.wq.qstart, + iface->tx.wq.qend, max_log_sge, NULL, uct_ud_dump_packet); + iface->super.tx.available -= uct_ib_mlx5_post_send(&iface->tx.wq, ctrl, + wqe_size); + ucs_assert((int16_t)iface->tx.wq.bb_max >= iface->super.tx.available); +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_mlx5_ep_tx_skb(uct_ud_mlx5_iface_t *iface, uct_ud_mlx5_ep_t *ep, + uct_ud_send_skb_t *skb, uint8_t se, int max_log_sge) +{ + size_t ctrl_av_size = uct_ud_mlx5_ep_ctrl_av_size(ep); + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_data_seg *dptr; + + ctrl = iface->tx.wq.curr; + dptr = uct_ib_mlx5_txwq_wrap_exact(&iface->tx.wq, + UCS_PTR_BYTE_OFFSET(ctrl, ctrl_av_size)); + uct_ib_mlx5_set_data_seg(dptr, skb->neth, skb->len, skb->lkey); + UCT_UD_EP_HOOK_CALL_TX(&ep->super, skb->neth); + uct_ud_mlx5_post_send(iface, ep, se, ctrl, ctrl_av_size + sizeof(*dptr), max_log_sge); +} + +static inline void +uct_ud_mlx5_ep_tx_inl(uct_ud_mlx5_iface_t *iface, uct_ud_mlx5_ep_t *ep, + const void *buf, unsigned length, uint8_t se) +{ + size_t ctrl_av_size = uct_ud_mlx5_ep_ctrl_av_size(ep); + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_inl_data_seg *inl; + + ctrl = iface->tx.wq.curr; + inl = uct_ib_mlx5_txwq_wrap_exact(&iface->tx.wq, + UCS_PTR_BYTE_OFFSET(ctrl, ctrl_av_size)); + inl->byte_count = htonl(length | MLX5_INLINE_SEG); + uct_ib_mlx5_inline_copy(inl + 1, buf, length, &iface->tx.wq); + UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)buf); + uct_ud_mlx5_post_send(iface, ep, se, ctrl, + ctrl_av_size + sizeof(*inl) + length, INT_MAX); +} + + +static void uct_ud_mlx5_ep_tx_ctl_skb(uct_ud_ep_t *ud_ep, uct_ud_send_skb_t *skb, + int solicited) +{ + uct_ud_mlx5_iface_t *iface = ucs_derived_of(ud_ep->super.super.iface, + uct_ud_mlx5_iface_t); + uct_ud_mlx5_ep_t *ep = ucs_derived_of(ud_ep, uct_ud_mlx5_ep_t); + uint8_t se; + + se = solicited ? MLX5_WQE_CTRL_SOLICITED : 0; + if (skb->len >= iface->super.config.max_inline) { + uct_ud_mlx5_ep_tx_skb(iface, ep, skb, se, INT_MAX); + } else { + uct_ud_mlx5_ep_tx_inl(iface, ep, skb->neth, skb->len, se); + } +} + +static UCS_F_NOINLINE void +uct_ud_mlx5_iface_post_recv(uct_ud_mlx5_iface_t *iface) +{ + unsigned batch = iface->super.super.config.rx_max_batch; + struct mlx5_wqe_data_seg *rx_wqes; + uint16_t pi, next_pi, count; + uct_ib_iface_recv_desc_t *desc; + + rx_wqes = iface->rx.wq.wqes; + pi = iface->rx.wq.rq_wqe_counter & iface->rx.wq.mask; + + for (count = 0; count < batch; count ++) { + next_pi = (pi + 1) & iface->rx.wq.mask; + ucs_prefetch(rx_wqes + next_pi); + UCT_TL_IFACE_GET_RX_DESC(&iface->super.super.super, &iface->super.rx.mp, + desc, break); + rx_wqes[pi].lkey = htonl(desc->lkey); + rx_wqes[pi].addr = htobe64((uintptr_t)uct_ib_iface_recv_desc_hdr(&iface->super.super, desc)); + pi = next_pi; + } + if (ucs_unlikely(count == 0)) { + ucs_debug("iface(%p) failed to post receive wqes", iface); + return; + } + pi = iface->rx.wq.rq_wqe_counter + count; + iface->rx.wq.rq_wqe_counter = pi; + iface->super.rx.available -= count; + ucs_memory_cpu_fence(); + *iface->rx.wq.dbrec = htonl(pi); +} + +static UCS_CLASS_INIT_FUNC(uct_ud_mlx5_ep_t, uct_iface_h tl_iface) +{ + uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_ud_mlx5_iface_t); + ucs_trace_func(""); + UCS_CLASS_CALL_SUPER_INIT(uct_ud_ep_t, &iface->super); + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ud_mlx5_ep_t) +{ + ucs_trace_func(""); +} + +UCS_CLASS_DEFINE(uct_ud_mlx5_ep_t, uct_ud_ep_t); +static UCS_CLASS_DEFINE_NEW_FUNC(uct_ud_mlx5_ep_t, uct_ep_t, uct_iface_h); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_ud_mlx5_ep_t, uct_ep_t); + + +static ucs_status_t +uct_ud_mlx5_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *buffer, unsigned length) +{ + uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); + uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_ud_mlx5_iface_t); + size_t ctrl_av_size = uct_ud_mlx5_ep_ctrl_av_size(ep); + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_inl_data_seg *inl; + uct_ud_am_short_hdr_t *am; + uct_ud_neth_t *neth; + uct_ud_send_skb_t *skb; + size_t wqe_size; + + /* data a written directly into tx wqe, so it is impossible to use + * common ud am code + */ + UCT_CHECK_AM_ID(id); + UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(hdr) + length, + 0, iface->super.config.max_inline, "am_short"); + + uct_ud_enter(&iface->super); + + skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); + if (!skb) { + uct_ud_leave(&iface->super); + return UCS_ERR_NO_RESOURCE; + } + + ctrl = iface->tx.wq.curr; + /* Set inline segment which has AM id, AM header, and AM payload */ + inl = uct_ib_mlx5_txwq_wrap_exact(&iface->tx.wq, + UCS_PTR_BYTE_OFFSET(ctrl, ctrl_av_size)); + wqe_size = length + sizeof(*am) + sizeof(*neth); + inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG); + + /* assume that neth and am header fit into one bb */ + ucs_assert(sizeof(*am) + sizeof(*neth) < MLX5_SEND_WQE_BB); + neth = (void*)(inl + 1); + uct_ud_am_set_neth(neth, &ep->super, id); + + am = (void*)(neth + 1); + am->hdr = hdr; + uct_ib_mlx5_inline_copy(am + 1, buffer, length, &iface->tx.wq); + + wqe_size += ctrl_av_size + sizeof(*inl); + UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth); + uct_ud_mlx5_post_send(iface, ep, 0, ctrl, wqe_size, INT_MAX); + + skb->len = sizeof(*neth) + sizeof(*am); + memcpy(skb->neth, neth, skb->len); + uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, + (char *)skb->neth + skb->len, buffer, length); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); + uct_ud_leave(&iface->super); + return UCS_OK; +} + +static ssize_t uct_ud_mlx5_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); + uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_ud_mlx5_iface_t); + uct_ud_send_skb_t *skb; + ucs_status_t status; + size_t length; + + uct_ud_enter(&iface->super); + + status = uct_ud_am_common(&iface->super, &ep->super, id, &skb); + if (status != UCS_OK) { + uct_ud_leave(&iface->super); + return status; + } + + length = uct_ud_skb_bcopy(skb, pack_cb, arg); + UCT_UD_CHECK_BCOPY_LENGTH(&iface->super, length); + + uct_ud_mlx5_ep_tx_skb(iface, ep, skb, 0, INT_MAX); + uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); + uct_ud_leave(&iface->super); + return length; +} + +static ucs_status_t +uct_ud_mlx5_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, uct_completion_t *comp) +{ + uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); + uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_ud_mlx5_iface_t); + size_t ctrl_av_size = uct_ud_mlx5_ep_ctrl_av_size(ep); + uct_ud_send_skb_t *skb; + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_inl_data_seg *inl; + uct_ud_neth_t *neth; + size_t inl_size, wqe_size; + + UCT_CHECK_AM_ID(id); + UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super), + "uct_ud_mlx5_ep_am_zcopy"); + UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + header_length, 0, + UCT_IB_MLX5_AM_ZCOPY_MAX_HDR(UCT_IB_MLX5_AV_FULL_SIZE), + "am_zcopy header"); + UCT_UD_CHECK_ZCOPY_LENGTH(&iface->super, header_length, + uct_iov_total_length(iov, iovcnt)); + + uct_ud_enter(&iface->super); + + skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); + if (!skb) { + uct_ud_leave(&iface->super); + return UCS_ERR_NO_RESOURCE; + } + + ctrl = iface->tx.wq.curr; + inl = uct_ib_mlx5_txwq_wrap_exact(&iface->tx.wq, + UCS_PTR_BYTE_OFFSET(ctrl, ctrl_av_size)); + inl_size = header_length + sizeof(*neth); + inl->byte_count = htonl(inl_size | MLX5_INLINE_SEG); + + neth = (void*)(inl + 1); + uct_ud_am_set_neth(neth, &ep->super, id); + /* force ACK_REQ because we want to call user completion ASAP */ + neth->packet_type |= UCT_UD_PACKET_FLAG_ACK_REQ; + + uct_ib_mlx5_inline_copy(neth + 1, header, header_length, &iface->tx.wq); + + wqe_size = ucs_align_up_pow2(ctrl_av_size + inl_size + sizeof(*inl), + UCT_IB_MLX5_WQE_SEG_SIZE); + wqe_size += uct_ib_mlx5_set_data_seg_iov(&iface->tx.wq, + UCS_PTR_BYTE_OFFSET(ctrl, wqe_size), + iov, iovcnt); + ucs_assert(wqe_size <= UCT_IB_MLX5_MAX_SEND_WQE_SIZE); + + UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth); + uct_ud_mlx5_post_send(iface, ep, 0, ctrl, wqe_size, + UCT_IB_MAX_ZCOPY_LOG_SGE(&iface->super.super)); + + skb->len = sizeof(*neth) + header_length; + memcpy(skb->neth, neth, sizeof(*neth)); + memcpy(skb->neth + 1, header, header_length); + uct_ud_am_set_zcopy_desc(skb, iov, iovcnt, comp); + + uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + + uct_iov_total_length(iov, iovcnt)); + uct_ud_leave(&iface->super); + return UCS_INPROGRESS; +} + +static ucs_status_t +uct_ud_mlx5_ep_put_short(uct_ep_h tl_ep, const void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); + uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_ud_mlx5_iface_t); + size_t ctrl_av_size = uct_ud_mlx5_ep_ctrl_av_size(ep); + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_wqe_inl_data_seg *inl; + uct_ud_put_hdr_t *put_hdr; + uct_ud_neth_t *neth; + uct_ud_send_skb_t *skb; + size_t wqe_size; + + UCT_CHECK_LENGTH(sizeof(*neth) + sizeof(*put_hdr) + length, + 0, iface->super.config.max_inline, "put_short"); + + uct_ud_enter(&iface->super); + + skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); + if (!skb) { + uct_ud_leave(&iface->super); + return UCS_ERR_NO_RESOURCE; + } + + ctrl = iface->tx.wq.curr; + /* Set inline segment which has AM id, AM header, and AM payload */ + inl = uct_ib_mlx5_txwq_wrap_exact(&iface->tx.wq, + UCS_PTR_BYTE_OFFSET(ctrl, ctrl_av_size)); + wqe_size = length + sizeof(*put_hdr) + sizeof(*neth); + inl->byte_count = htonl(wqe_size | MLX5_INLINE_SEG); + + /* assume that neth and am header fit into one bb */ + ucs_assert(sizeof(*put_hdr) + sizeof(*neth) < MLX5_SEND_WQE_BB); + neth = (void*)(inl + 1); + uct_ud_neth_init_data(&ep->super, neth); + uct_ud_neth_set_type_put(&ep->super, neth); + uct_ud_neth_ack_req(&ep->super, neth); + + put_hdr = (uct_ud_put_hdr_t *)(neth+1); + put_hdr->rva = remote_addr; + + uct_ib_mlx5_inline_copy(put_hdr + 1, buffer, length, &iface->tx.wq); + + wqe_size += ctrl_av_size + sizeof(*inl); + UCT_UD_EP_HOOK_CALL_TX(&ep->super, neth); + uct_ud_mlx5_post_send(iface, ep, 0, ctrl, wqe_size, INT_MAX); + + skb->len = sizeof(*neth) + sizeof(*put_hdr); + memcpy(skb->neth, neth, skb->len); + uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, + (char *)skb->neth + skb->len, buffer, length); + UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); + uct_ud_leave(&iface->super); + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_ud_mlx5_iface_poll_rx(uct_ud_mlx5_iface_t *iface, int is_async) +{ + struct mlx5_cqe64 *cqe; + uint16_t ci; + uct_ib_iface_recv_desc_t *desc; + uint32_t len; + void *packet; + unsigned count; + ptrdiff_t rx_hdr_offset; + + ci = iface->rx.wq.cq_wqe_counter & iface->rx.wq.mask; + packet = (void *)be64toh(iface->rx.wq.wqes[ci].addr); + ucs_prefetch(UCS_PTR_BYTE_OFFSET(packet, UCT_IB_GRH_LEN)); + rx_hdr_offset = iface->super.super.config.rx_hdr_offset; + desc = UCS_PTR_BYTE_OFFSET(packet, -rx_hdr_offset); + + cqe = uct_ib_mlx5_poll_cq(&iface->super.super, &iface->cq[UCT_IB_DIR_RX]); + if (cqe == NULL) { + count = 0; + goto out; + } + + ucs_memory_cpu_load_fence(); + + ucs_assert(0 == (cqe->op_own & + (MLX5_INLINE_SCATTER_32|MLX5_INLINE_SCATTER_64))); + ucs_assert(ntohs(cqe->wqe_counter) == iface->rx.wq.cq_wqe_counter); + + iface->super.rx.available++; + iface->rx.wq.cq_wqe_counter++; + count = 1; + len = ntohl(cqe->byte_cnt); + VALGRIND_MAKE_MEM_DEFINED(packet, len); + + if (!uct_ud_iface_check_grh(&iface->super, + UCS_PTR_BYTE_OFFSET(packet, UCT_IB_GRH_LEN), + uct_ib_mlx5_cqe_is_grh_present(cqe))) { + ucs_mpool_put_inline(desc); + goto out; + } + + uct_ib_mlx5_log_rx(&iface->super.super, cqe, packet, uct_ud_dump_packet); + /* coverity[tainted_data] */ + uct_ud_ep_process_rx(&iface->super, + (uct_ud_neth_t *)UCS_PTR_BYTE_OFFSET(packet, UCT_IB_GRH_LEN), + len - UCT_IB_GRH_LEN, + (uct_ud_recv_skb_t *)ucs_unaligned_ptr(desc), is_async); +out: + if (iface->super.rx.available >= iface->super.super.config.rx_max_batch) { + /* we need to try to post buffers always. Otherwise it is possible + * to run out of rx wqes if receiver is slow and there are always + * cqe to process + */ + uct_ud_mlx5_iface_post_recv(iface); + } + return count; +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_ud_mlx5_iface_poll_tx(uct_ud_mlx5_iface_t *iface) +{ + struct mlx5_cqe64 *cqe; + + cqe = uct_ib_mlx5_poll_cq(&iface->super.super, &iface->cq[UCT_IB_DIR_TX]); + if (cqe == NULL) { + return 0; + } + + ucs_memory_cpu_load_fence(); + + uct_ib_mlx5_log_cqe(cqe); + iface->super.tx.available = uct_ib_mlx5_txwq_update_bb(&iface->tx.wq, + ntohs(cqe->wqe_counter)); + return 1; +} + +static unsigned uct_ud_mlx5_iface_progress(uct_iface_h tl_iface) +{ + uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_iface, uct_ud_mlx5_iface_t); + ucs_status_t status; + unsigned n, count = 0; + + uct_ud_enter(&iface->super); + uct_ud_iface_dispatch_zcopy_comps(&iface->super); + + status = uct_ud_iface_dispatch_pending_rx(&iface->super); + if (ucs_likely(status == UCS_OK)) { + do { + n = uct_ud_mlx5_iface_poll_rx(iface, 0); + count += n; + } while ((n > 0) && (count < iface->super.super.config.rx_max_poll)); + } + + count += uct_ud_mlx5_iface_poll_tx(iface); + uct_ud_iface_progress_pending(&iface->super, 0); + uct_ud_leave(&iface->super); + return count; +} + +static unsigned uct_ud_mlx5_iface_async_progress(uct_ud_iface_t *ud_iface) +{ + uct_ud_mlx5_iface_t *iface = ucs_derived_of(ud_iface, uct_ud_mlx5_iface_t); + unsigned n, count; + + count = 0; + do { + n = uct_ud_mlx5_iface_poll_rx(iface, 1); + count += n; + } while (n > 0); + + count += uct_ud_mlx5_iface_poll_tx(iface); + + uct_ud_iface_progress_pending(&iface->super, 1); + + return count; +} + +static ucs_status_t +uct_ud_mlx5_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) +{ + uct_ud_iface_t *iface = ucs_derived_of(tl_iface, uct_ud_iface_t); + ucs_status_t status; + + ucs_trace_func(""); + status = uct_ud_iface_query(iface, iface_attr); + if (status != UCS_OK) { + return status; + } + + iface_attr->overhead = 80e-9; /* Software overhead */ + iface_attr->cap.am.max_iov = uct_ib_iface_get_max_iov(&iface->super); + + iface_attr->cap.am.max_hdr = UCT_IB_MLX5_AM_ZCOPY_MAX_HDR(UCT_IB_MLX5_AV_FULL_SIZE) + - sizeof(uct_ud_neth_t); + + return UCS_OK; +} + +static ucs_status_t +uct_ud_mlx5_ep_create_ah(uct_ud_mlx5_iface_t *iface, uct_ud_mlx5_ep_t *ep, + const uct_ib_address_t *ib_addr, + const uct_ud_iface_addr_t *if_addr) +{ + ucs_status_t status; + uint32_t remote_qpn; + int is_global; + + status = uct_ud_mlx5_iface_get_av(&iface->super.super, &iface->ud_mlx5_common, + ib_addr, &ep->av, &ep->grh_av, &is_global); + if (status != UCS_OK) { + return status; + } + + remote_qpn = uct_ib_unpack_uint24(if_addr->qp_num); + ep->is_global = is_global; + ep->av.dqp_dct |= htonl(remote_qpn); + uct_ib_mlx5_iface_set_av_sport(&iface->super.super, &ep->av, + remote_qpn, iface->super.qp->qp_num); + return UCS_OK; +} + +static ucs_status_t +uct_ud_mlx5_ep_create_connected(uct_iface_h iface_h, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr, + uct_ep_h *new_ep_p) +{ + uct_ud_mlx5_iface_t *iface = ucs_derived_of(iface_h, uct_ud_mlx5_iface_t); + uct_ud_mlx5_ep_t *ep; + uct_ud_ep_t *new_ud_ep; + const uct_ud_iface_addr_t *if_addr = (const uct_ud_iface_addr_t *)iface_addr; + const uct_ib_address_t *ib_addr = (const uct_ib_address_t *)dev_addr; + uct_ud_send_skb_t *skb; + ucs_status_t status, status_ah; + + uct_ud_enter(&iface->super); + status = uct_ud_ep_create_connected_common(&iface->super, ib_addr, if_addr, + &new_ud_ep, &skb); + if (status != UCS_OK && + status != UCS_ERR_NO_RESOURCE && + status != UCS_ERR_ALREADY_EXISTS) { + uct_ud_leave(&iface->super); + return status; + } + + ep = ucs_derived_of(new_ud_ep, uct_ud_mlx5_ep_t); + /* cppcheck-suppress autoVariables */ + *new_ep_p = &ep->super.super.super; + if (status == UCS_ERR_ALREADY_EXISTS) { + uct_ud_leave(&iface->super); + return UCS_OK; + } + + status_ah = uct_ud_mlx5_ep_create_ah(iface, ep, ib_addr, if_addr); + if (status_ah != UCS_OK) { + uct_ud_ep_destroy_connected(&ep->super, ib_addr, if_addr); + *new_ep_p = NULL; + uct_ud_leave(&iface->super); + return status_ah; + } + + if (status == UCS_OK) { + uct_ud_mlx5_ep_tx_ctl_skb(&ep->super, skb, 1); + uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); + ep->super.flags |= UCT_UD_EP_FLAG_CREQ_SENT; + } + + uct_ud_leave(&iface->super); + return UCS_OK; +} + +static ucs_status_t +uct_ud_mlx5_ep_create(const uct_ep_params_t* params, uct_ep_h *ep_p) +{ + if (ucs_test_all_flags(params->field_mask, UCT_EP_PARAM_FIELD_DEV_ADDR | + UCT_EP_PARAM_FIELD_IFACE_ADDR)) { + return uct_ud_mlx5_ep_create_connected(params->iface, params->dev_addr, + params->iface_addr, ep_p); + } + + return uct_ud_mlx5_ep_t_new(params->iface, ep_p); +} + + +static ucs_status_t +uct_ud_mlx5_ep_connect_to_ep(uct_ep_h tl_ep, + const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *uct_ep_addr) +{ + ucs_status_t status; + uct_ud_mlx5_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_mlx5_ep_t); + uct_ud_mlx5_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_ud_mlx5_iface_t); + const uct_ud_ep_addr_t *ep_addr = (const uct_ud_ep_addr_t *)uct_ep_addr; + const uct_ib_address_t *ib_addr = (const uct_ib_address_t *)dev_addr; + + ucs_trace_func(""); + status = uct_ud_ep_connect_to_ep(&ep->super, ib_addr, ep_addr); + if (status != UCS_OK) { + return status; + } + + status = uct_ud_mlx5_ep_create_ah(iface, ep, ib_addr, (const uct_ud_iface_addr_t *)ep_addr); + if (status != UCS_OK) { + return status; + } + + return UCS_OK; +} + +static ucs_status_t uct_ud_mlx5_iface_arm_cq(uct_ib_iface_t *ib_iface, + uct_ib_dir_t dir, + int solicited) +{ + uct_ud_mlx5_iface_t *iface = ucs_derived_of(ib_iface, uct_ud_mlx5_iface_t); +#if HAVE_DECL_MLX5DV_INIT_OBJ + return uct_ib_mlx5dv_arm_cq(&iface->cq[dir], solicited); +#else + uct_ib_mlx5_update_cq_ci(iface->super.super.cq[dir], + iface->cq[dir].cq_ci); + return uct_ib_iface_arm_cq(ib_iface, dir, solicited); +#endif +} + +static ucs_status_t uct_ud_mlx5_ep_set_failed(uct_ib_iface_t *iface, + uct_ep_h ep, ucs_status_t status) +{ + return uct_set_ep_failed(&UCS_CLASS_NAME(uct_ud_mlx5_ep_t), ep, + &iface->super.super, status); +} + +static void uct_ud_mlx5_iface_event_cq(uct_ib_iface_t *ib_iface, + uct_ib_dir_t dir) +{ + uct_ud_mlx5_iface_t *iface = ucs_derived_of(ib_iface, uct_ud_mlx5_iface_t); + + iface->cq[dir].cq_sn++; +} + +static ucs_status_t uct_ud_mlx5_iface_create_qp(uct_ib_iface_t *ib_iface, + uct_ib_qp_attr_t *attr, + struct ibv_qp **qp_p) +{ + uct_ud_mlx5_iface_t *iface = ucs_derived_of(ib_iface, uct_ud_mlx5_iface_t); + uct_ib_mlx5_qp_t *qp = &iface->tx.wq.super; + ucs_status_t status; + + status = uct_ib_mlx5_iface_create_qp(ib_iface, qp, attr); + if (status != UCS_OK) { + return status; + } + + *qp_p = qp->verbs.qp; + return status; +} + +static void UCS_CLASS_DELETE_FUNC_NAME(uct_ud_mlx5_iface_t)(uct_iface_t*); + +static void uct_ud_mlx5_iface_handle_failure(uct_ib_iface_t *ib_iface, void *arg, + ucs_status_t status) +{ + uct_ud_mlx5_iface_t *iface = ucs_derived_of(ib_iface, uct_ud_mlx5_iface_t); + + if (status == UCS_ERR_ENDPOINT_TIMEOUT) { + uct_ud_iface_handle_failure(ib_iface, arg, status); + } else { + /* Local side failure - treat as fatal */ + uct_ib_mlx5_completion_with_err(ib_iface, arg, &iface->tx.wq, + UCS_LOG_LEVEL_FATAL); + } +} + +static uct_ud_iface_ops_t uct_ud_mlx5_iface_ops = { + { + { + .ep_put_short = uct_ud_mlx5_ep_put_short, + .ep_am_short = uct_ud_mlx5_ep_am_short, + .ep_am_bcopy = uct_ud_mlx5_ep_am_bcopy, + .ep_am_zcopy = uct_ud_mlx5_ep_am_zcopy, + .ep_pending_add = uct_ud_ep_pending_add, + .ep_pending_purge = uct_ud_ep_pending_purge, + .ep_flush = uct_ud_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = uct_ud_mlx5_ep_create, + .ep_destroy = uct_ud_ep_disconnect , + .ep_get_address = uct_ud_ep_get_address, + .ep_connect_to_ep = uct_ud_mlx5_ep_connect_to_ep, + .iface_flush = uct_ud_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = uct_ud_iface_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = uct_ud_mlx5_iface_progress, + .iface_event_fd_get = uct_ib_iface_event_fd_get, + .iface_event_arm = uct_ud_iface_event_arm, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_ud_mlx5_iface_t), + .iface_query = uct_ud_mlx5_iface_query, + .iface_get_device_address = uct_ib_iface_get_device_address, + .iface_get_address = uct_ud_iface_get_address, + .iface_is_reachable = uct_ib_iface_is_reachable + }, + .create_cq = uct_ib_mlx5_create_cq, + .arm_cq = uct_ud_mlx5_iface_arm_cq, + .event_cq = uct_ud_mlx5_iface_event_cq, + .handle_failure = uct_ud_mlx5_iface_handle_failure, + .set_ep_failed = uct_ud_mlx5_ep_set_failed, + }, + .async_progress = uct_ud_mlx5_iface_async_progress, + .tx_skb = uct_ud_mlx5_ep_tx_ctl_skb, + .ep_free = UCS_CLASS_DELETE_FUNC_NAME(uct_ud_mlx5_ep_t), + .create_qp = uct_ud_mlx5_iface_create_qp, +}; + +static UCS_CLASS_INIT_FUNC(uct_ud_mlx5_iface_t, + uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_ud_mlx5_iface_config_t *config = ucs_derived_of(tl_config, + uct_ud_mlx5_iface_config_t); + uct_ib_iface_init_attr_t init_attr = {}; + ucs_status_t status; + int i; + + ucs_trace_func(""); + + init_attr.flags = UCT_IB_CQ_IGNORE_OVERRUN; + + self->tx.wq.super.type = UCT_IB_MLX5_OBJ_TYPE_LAST; + + UCS_CLASS_CALL_SUPER_INIT(uct_ud_iface_t, &uct_ud_mlx5_iface_ops, + md, worker, params, &config->super, &init_attr); + + uct_ib_iface_set_max_iov(&self->super.super, UCT_IB_MLX5_AM_ZCOPY_MAX_IOV); + self->super.config.max_inline = UCT_IB_MLX5_AM_MAX_SHORT(UCT_IB_MLX5_AV_FULL_SIZE); + + status = uct_ib_mlx5_get_cq(self->super.super.cq[UCT_IB_DIR_TX], &self->cq[UCT_IB_DIR_TX]); + if (status != UCS_OK) { + return status; + } + + status = uct_ib_mlx5_get_cq(self->super.super.cq[UCT_IB_DIR_RX], &self->cq[UCT_IB_DIR_RX]); + if (status != UCS_OK) { + return status; + } + + status = uct_ib_mlx5_txwq_init(self->super.super.super.worker, + config->mlx5_common.mmio_mode, &self->tx.wq, + self->super.qp); + if (status != UCS_OK) { + return status; + } + self->super.tx.available = self->tx.wq.bb_max; + + status = uct_ib_mlx5_get_rxwq(self->super.qp, &self->rx.wq); + if (status != UCS_OK) { + return status; + } + + status = uct_ud_mlx5_iface_common_init(&self->super.super, + &self->ud_mlx5_common, + &config->ud_mlx5_common); + if (status != UCS_OK) { + return status; + } + + /* write buffer sizes */ + for (i = 0; i <= self->rx.wq.mask; i++) { + self->rx.wq.wqes[i].byte_count = htonl(self->super.super.config.rx_payload_offset + + self->super.super.config.seg_size); + } + while (self->super.rx.available >= self->super.super.config.rx_max_batch) { + uct_ud_mlx5_iface_post_recv(self); + } + + status = uct_ud_iface_complete_init(&self->super); + if (status != UCS_OK) { + return status; + } + + return UCS_OK; +} + + +static UCS_CLASS_CLEANUP_FUNC(uct_ud_mlx5_iface_t) +{ + ucs_trace_func(""); + uct_ud_iface_remove_async_handlers(&self->super); + uct_ud_enter(&self->super); + UCT_UD_IFACE_DELETE_EPS(&self->super, uct_ud_mlx5_ep_t); + ucs_twheel_cleanup(&self->super.async.slow_timer); + uct_ib_mlx5_txwq_cleanup(&self->tx.wq); + uct_ud_leave(&self->super); +} + +UCS_CLASS_DEFINE(uct_ud_mlx5_iface_t, uct_ud_iface_t); + +static UCS_CLASS_DEFINE_NEW_FUNC(uct_ud_mlx5_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); + +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_ud_mlx5_iface_t, uct_iface_t); + +static ucs_status_t +uct_ud_mlx5_query_tl_devices(uct_md_h md, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + uct_ib_md_t *ib_md = ucs_derived_of(md, uct_ib_md_t); + return uct_ib_device_query_ports(&ib_md->dev, UCT_IB_DEVICE_FLAG_MLX5_PRM, + tl_devices_p, num_tl_devices_p); +} + +UCT_TL_DEFINE(&uct_ib_component, ud_mlx5, uct_ud_mlx5_query_tl_devices, + uct_ud_mlx5_iface_t, "UD_MLX5_", uct_ud_mlx5_iface_config_table, + uct_ud_mlx5_iface_config_t); diff --git a/src/uct/ib/ud/accel/ud_mlx5.h b/src/uct/ib/ud/accel/ud_mlx5.h new file mode 100644 index 0000000..3cd43f1 --- /dev/null +++ b/src/uct/ib/ud/accel/ud_mlx5.h @@ -0,0 +1,54 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ +#ifndef UD_MLX5_H +#define UD_MLX5_H + +#include "ud_mlx5_common.h" + +#include +#include + + +typedef struct { + uct_ud_ep_t super; + uct_ib_mlx5_base_av_t av; + uint8_t is_global; + struct mlx5_grh_av grh_av; +} uct_ud_mlx5_ep_t; + + +typedef struct { + uct_ud_iface_config_t super; + uct_ib_mlx5_iface_config_t mlx5_common; + uct_ud_mlx5_iface_common_config_t ud_mlx5_common; +} uct_ud_mlx5_iface_config_t; + + +typedef struct { + uct_ud_iface_t super; + struct { + uct_ib_mlx5_txwq_t wq; + } tx; + struct { + uct_ib_mlx5_rxwq_t wq; + } rx; + uct_ib_mlx5_cq_t cq[UCT_IB_DIR_NUM]; + uct_ud_mlx5_iface_common_t ud_mlx5_common; +} uct_ud_mlx5_iface_t; + + +static inline unsigned uct_ud_mlx5_tx_moderation(uct_ud_mlx5_iface_t *iface) +{ + if (iface->super.tx.unsignaled >= UCT_UD_TX_MODERATION) { + iface->super.tx.unsignaled = 0; + return MLX5_WQE_CTRL_CQ_UPDATE; + } + iface->super.tx.unsignaled++; + return 0; +} + +#endif + diff --git a/src/uct/ib/ud/accel/ud_mlx5_common.c b/src/uct/ib/ud/accel/ud_mlx5_common.c new file mode 100644 index 0000000..bfd1c50 --- /dev/null +++ b/src/uct/ib/ud/accel/ud_mlx5_common.c @@ -0,0 +1,71 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "ud_mlx5_common.h" + + +ucs_config_field_t uct_ud_mlx5_iface_common_config_table[] = { + {"COMPACT_AV", "yes", + "Enable compact address-vector optimization.", + ucs_offsetof(uct_ud_mlx5_iface_common_config_t, enable_compact_av), UCS_CONFIG_TYPE_BOOL}, + + {NULL} +}; + +ucs_status_t uct_ud_mlx5_iface_common_init(uct_ib_iface_t *ib_iface, + uct_ud_mlx5_iface_common_t *iface, + uct_ud_mlx5_iface_common_config_t *config) +{ + if (config->enable_compact_av) { + /* Check that compact AV supported by device */ + return uct_ib_mlx5_get_compact_av(ib_iface, &iface->config.compact_av); + } + + iface->config.compact_av = 0; + return UCS_OK; +} + +ucs_status_t uct_ud_mlx5_iface_get_av(uct_ib_iface_t *iface, + uct_ud_mlx5_iface_common_t *ud_common_iface, + const uct_ib_address_t *ib_addr, + uct_ib_mlx5_base_av_t *base_av, + struct mlx5_grh_av *grh_av, + int *is_global) +{ + ucs_status_t status; + struct ibv_ah *ah; + struct mlx5_wqe_av mlx5_av; + struct ibv_ah_attr ah_attr; + + uct_ib_iface_fill_ah_attr_from_addr(iface, ib_addr, &ah_attr); + status = uct_ib_iface_create_ah(iface, &ah_attr, &ah); + if (status != UCS_OK) { + return status; + } + *is_global = ah_attr.is_global; + + uct_ib_mlx5_get_av(ah, &mlx5_av); + + base_av->stat_rate_sl = mlx5_av_base(&mlx5_av)->stat_rate_sl; + base_av->fl_mlid = mlx5_av_base(&mlx5_av)->fl_mlid; + base_av->rlid = mlx5_av_base(&mlx5_av)->rlid; + base_av->dqp_dct = 0; + + if (!ud_common_iface->config.compact_av || ah_attr.is_global) { + base_av->dqp_dct |= UCT_IB_MLX5_EXTENDED_UD_AV; + } + + ucs_assertv_always((UCT_IB_MLX5_AV_FULL_SIZE > UCT_IB_MLX5_AV_BASE_SIZE) || + (base_av->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV), + "compact address vector not supported, and EXTENDED_AV flag is missing"); + + if (*is_global) { + ucs_assert_always(grh_av != NULL); + memcpy(grh_av, mlx5_av_grh(&mlx5_av), sizeof(*grh_av)); + } + return UCS_OK; +} + diff --git a/src/uct/ib/ud/accel/ud_mlx5_common.h b/src/uct/ib/ud/accel/ud_mlx5_common.h new file mode 100644 index 0000000..e3d75ce --- /dev/null +++ b/src/uct/ib/ud/accel/ud_mlx5_common.h @@ -0,0 +1,50 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UD_MLX5_COMMON_H_ +#define UD_MLX5_COMMON_H_ + +#include + + +typedef struct uct_ud_mlx5_iface_common_config { + int enable_compact_av; +} uct_ud_mlx5_iface_common_config_t; + + +typedef struct uct_ud_mlx5_iface_common { + struct { + int compact_av; + } config; +} uct_ud_mlx5_iface_common_t; + + +extern ucs_config_field_t uct_ud_mlx5_iface_common_config_table[]; + + +static UCS_F_ALWAYS_INLINE size_t +uct_ib_mlx5_wqe_av_size(uct_ib_mlx5_base_av_t *av) +{ + return (av->dqp_dct & UCT_IB_MLX5_EXTENDED_UD_AV) ? + UCT_IB_MLX5_AV_FULL_SIZE : + UCT_IB_MLX5_AV_BASE_SIZE; +} + + +ucs_status_t uct_ud_mlx5_iface_common_init(uct_ib_iface_t *ib_iface, + uct_ud_mlx5_iface_common_t *iface, + uct_ud_mlx5_iface_common_config_t *config); + + +ucs_status_t uct_ud_mlx5_iface_get_av(uct_ib_iface_t *iface, + uct_ud_mlx5_iface_common_t *ud_common_iface, + const uct_ib_address_t *ib_addr, + uct_ib_mlx5_base_av_t *base_av, + struct mlx5_grh_av *grh_av, + int *is_global); + + +#endif diff --git a/src/uct/ib/ud/base/ud_def.h b/src/uct/ib/ud/base/ud_def.h new file mode 100644 index 0000000..1ac1c81 --- /dev/null +++ b/src/uct/ib/ud/base/ud_def.h @@ -0,0 +1,251 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UD_DEF_H_ +#define UD_DEF_H_ + +#include +#include +#include +#include +#include + + +#define UCT_UD_QP_HASH_SIZE 256 +#define UCT_UD_TX_MODERATION 64 +#define UCT_UD_MIN_INLINE 48 +#define UCT_UD_HASH_SIZE 997 +#define UCT_UD_RX_BATCH_MIN 8 + +#define UCT_UD_INITIAL_PSN 1 /* initial packet serial number */ +/* congestion avoidance settings. See ud_ep.h for details */ +#define UCT_UD_CA_AI_VALUE 1 /* window += AI_VALUE */ +#define UCT_UD_CA_MD_FACTOR 2 /* window = window/factor */ +#define UCT_UD_CA_DUP_ACK_CNT 2 /* TODO: not implemented yet */ +#define UCT_UD_RESENDS_PER_ACK 4 /* request per every N resends */ +#define UCT_UD_SKB_ALIGN UCS_SYS_CACHE_LINE_SIZE + +/* note that the ud tx window is [acked_psn+1, max_psn) + * and max_psn = acked_psn + cwnd + * so add 1 to the max/min window constants instead of doing this in the code + */ +#define UCT_UD_CA_MIN_WINDOW 2 +#define UCT_UD_CA_MAX_WINDOW 1025 + + +typedef uint16_t uct_ud_psn_t; +#define UCT_UD_PSN_COMPARE UCS_CIRCULAR_COMPARE16 +typedef struct uct_ud_iface uct_ud_iface_t; +typedef struct uct_ud_ep uct_ud_ep_t; +typedef struct uct_ud_ctl_hdr uct_ud_ctl_hdr_t; +typedef uct_ib_qpnum_t uct_ud_iface_addr_t; +typedef struct uct_ud_ep_addr uct_ud_ep_addr_t; +typedef struct uct_ud_iface_peer uct_ud_iface_peer_t; + +enum { + UCT_UD_PACKET_ACK_REQ_SHIFT = 25, + UCT_UD_PACKET_AM_ID_SHIFT = 27, + UCT_UD_PACKET_DEST_ID_SHIFT = 24, + UCT_UD_PACKET_PUT_SHIFT = 28, +}; + +enum { + UCT_UD_PACKET_FLAG_AM = UCS_BIT(24), + UCT_UD_PACKET_FLAG_ACK_REQ = UCS_BIT(25), + UCT_UD_PACKET_FLAG_ECN = UCS_BIT(26), + UCT_UD_PACKET_FLAG_NAK = UCS_BIT(27), + UCT_UD_PACKET_FLAG_PUT = UCS_BIT(28), + UCT_UD_PACKET_FLAG_CTL = UCS_BIT(29), + + UCT_UD_PACKET_AM_ID_MASK = UCS_MASK(UCT_UD_PACKET_AM_ID_SHIFT), + UCT_UD_PACKET_DEST_ID_MASK = UCS_MASK(UCT_UD_PACKET_DEST_ID_SHIFT), +}; + +enum { + UCT_UD_PACKET_CREQ = 1, + UCT_UD_PACKET_CREP = 2, +}; + +/* +network header layout + +A - ack request +E - explicit congestion notification (ecn) +N - negative acknoledgement +P - put emulation (will be disabled in the future) +C - control packet extended header + +Active message packet header + + 3 2 2 2 2 1 1 + 1 6 5 4 3 6 5 0 ++---------------------------------------------------------------+ +| am_id |E|A|1| dest_ep_id (24 bit) | ++---------------------------------------------------------------+ +| ack_psn (16 bit) | psn (16 bit) | ++---------------------------------------------------------------+ + +Control packet header + + 3 2 2 2 2 2 2 2 1 1 + 1 9 8 7 6 5 4 3 6 5 0 ++---------------------------------------------------------------+ +|rsv|C|P|N|E|A|0| dest_ep_id (24 bit) | ++---------------------------------------------------------------+ +| ack_psn (16 bit) | psn (16 bit) | ++---------------------------------------------------------------+ + + // neth layout in human readable form + uint32_t dest_ep_id:24; + uint8_t is_am:1; + union { + struct { // am false + uint8_t ack_req:1; + uint8_t ecn:1; + uint8_t nak:1; + uint8_t put:1; + uint8_t ctl:1; + uint8_t reserved:2; + } ctl; + struct { // am true + uint8_t ack_req:1; + uint8_t ecn:1; + uint8_t am_id:5; + } am; + }; +*/ + +typedef struct uct_ud_neth { + uint32_t packet_type; + uct_ud_psn_t psn; + uct_ud_psn_t ack_psn; +} UCS_S_PACKED uct_ud_neth_t; + + +enum { + UCT_UD_SEND_SKB_FLAG_ACK_REQ = UCS_BIT(1), /* ACK was requested for this skb */ + UCT_UD_SEND_SKB_FLAG_COMP = UCS_BIT(2), /* This skb contains a completion */ + UCT_UD_SEND_SKB_FLAG_ZCOPY = UCS_BIT(3), /* This skb contains a zero-copy segment */ + UCT_UD_SEND_SKB_FLAG_ERR = UCS_BIT(4), /* This skb contains a status after failure */ + UCT_UD_SEND_SKB_FLAG_CANCEL = UCS_BIT(5) /* This skb contains a UCS_ERR_CANCEL status */ +}; + + +/* + * Send skb with completion layout: + * - if COMP skb flag is set, skb contains uct_ud_comp_desc_t after the payload + * - if ZCOPY skb flag is set, skb contains uct_ud_zcopy_desc_t after the payload. + * - otherwise, there is no additional data. + */ +typedef struct uct_ud_send_skb { + ucs_queue_elem_t queue; /* in send window */ + uint32_t lkey; + uint16_t len; /* data size */ + uint8_t flags; + uint8_t status; /* used in case of failure */ + uct_ud_neth_t neth[0]; +} UCS_S_PACKED UCS_V_ALIGNED(UCT_UD_SKB_ALIGN) uct_ud_send_skb_t; + + +typedef struct uct_ud_comp_desc { + uct_completion_t *comp; + uct_ud_ep_t *ep; +} uct_ud_comp_desc_t; + + +/** + * Used to keep uct_iov_t buffers without datatype information. + */ +typedef struct uct_ud_iov { + void *buffer; /**< Data buffer */ + uint16_t length; /**< Length of the buffer in bytes */ +} UCS_S_PACKED uct_ud_iov_t; + + +typedef struct uct_ud_zcopy_desc { + uct_ud_comp_desc_t super; + uct_ud_iov_t iov[UCT_IB_MAX_IOV]; + uint16_t iovcnt; /* Count of the iov[] array valid elements */ +} uct_ud_zcopy_desc_t; + + +typedef struct uct_ud_send_skb_inl { + uct_ud_send_skb_t super; + char data[sizeof(uct_ud_neth_t)]; /* placeholder for super.neth */ +} uct_ud_send_skb_inl_t; + + +typedef struct uct_ud_recv_skb { + uct_ib_iface_recv_desc_t super; + union { + struct { + ucs_frag_list_elem_t elem; + } ooo; + struct { + ucs_queue_elem_t queue; + uint32_t len; + } am; + } u; +} uct_ud_recv_skb_t; + + +typedef struct uct_ud_am_short_hdr { + uint64_t hdr; +} UCS_S_PACKED uct_ud_am_short_hdr_t; + + +typedef struct uct_ud_put_hdr { + uint64_t rva; +} UCS_S_PACKED uct_ud_put_hdr_t; + + +struct uct_ud_iface_addr { + uct_ib_uint24_t qp_num; +}; + + +struct uct_ud_ep_addr { + uct_ud_iface_addr_t iface_addr; + uct_ib_uint24_t ep_id; +}; + + +static inline uint32_t uct_ud_neth_get_dest_id(uct_ud_neth_t *neth) +{ + return neth->packet_type & UCT_UD_PACKET_DEST_ID_MASK; +} + +static inline void uct_ud_neth_set_dest_id(uct_ud_neth_t *neth, uint32_t id) +{ + neth->packet_type |= id; +} + +static inline uint8_t uct_ud_neth_get_am_id(uct_ud_neth_t *neth) +{ + return neth->packet_type >> UCT_UD_PACKET_AM_ID_SHIFT; +} + +static inline void uct_ud_neth_set_am_id(uct_ud_neth_t *neth, uint8_t id) +{ + neth->packet_type |= (id << UCT_UD_PACKET_AM_ID_SHIFT); +} + +static inline uct_ud_comp_desc_t *uct_ud_comp_desc(uct_ud_send_skb_t *skb) +{ + ucs_assert(skb->flags & (UCT_UD_SEND_SKB_FLAG_COMP | + UCT_UD_SEND_SKB_FLAG_ERR | + UCT_UD_SEND_SKB_FLAG_CANCEL)); + return (uct_ud_comp_desc_t*)((char *)skb->neth + skb->len); +} + +static inline uct_ud_zcopy_desc_t *uct_ud_zcopy_desc(uct_ud_send_skb_t *skb) +{ + ucs_assert(skb->flags & UCT_UD_SEND_SKB_FLAG_ZCOPY); + return (uct_ud_zcopy_desc_t*)((char *)skb->neth + skb->len); +} + + +#endif diff --git a/src/uct/ib/ud/base/ud_ep.c b/src/uct/ib/ud/base/ud_ep.c new file mode 100644 index 0000000..54a63eb --- /dev/null +++ b/src/uct/ib/ud/base/ud_ep.c @@ -0,0 +1,1304 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "ud_ep.h" +#include "ud_iface.h" +#include "ud_inl.h" +#include "ud_def.h" + +#include +#include +#include +#include +#include + + +/* Must be less then peer_timeout to avoid false positive errors taking into + * account timer resolution and not too small to avoid performance degradation + */ +#define UCT_UD_SLOW_TIMER_MAX_TICK(_iface) ((_iface)->config.peer_timeout / 3) + +static void uct_ud_ep_do_pending_ctl(uct_ud_ep_t *ep, uct_ud_iface_t *iface); + +static void uct_ud_peer_name(uct_ud_peer_name_t *peer) +{ + gethostname(peer->name, sizeof(peer->name)); + peer->pid = getpid(); +} + +static void uct_ud_ep_set_state(uct_ud_ep_t *ep, uint32_t state) +{ + ep->flags |= state; +} + +#if ENABLE_DEBUG_DATA +static void uct_ud_peer_copy(uct_ud_peer_name_t *dst, uct_ud_peer_name_t *src) +{ + memcpy(dst, src, sizeof(*src)); +} + +#else +#define uct_ud_peer_copy(dst, src) +#endif + + +static void uct_ud_ep_resend_start(uct_ud_iface_t *iface, uct_ud_ep_t *ep) +{ + ep->resend.max_psn = ep->tx.psn - 1; + ep->resend.psn = ep->tx.acked_psn + 1; + ep->resend.pos = ucs_queue_iter_begin(&ep->tx.window); + uct_ud_ep_ctl_op_add(iface, ep, UCT_UD_EP_OP_RESEND); +} + + +static void uct_ud_ep_resend_ack(uct_ud_iface_t *iface, uct_ud_ep_t *ep) +{ + if (UCT_UD_PSN_COMPARE(ep->tx.acked_psn, <, ep->resend.max_psn)) { + /* new ack arrived that acked something in our resend window. */ + if (UCT_UD_PSN_COMPARE(ep->resend.psn, <=, ep->tx.acked_psn)) { + ucs_debug("ep(%p): ack received during resend resend.psn=%d tx.acked_psn=%d", + ep, ep->resend.psn, ep->tx.acked_psn); + ep->resend.pos = ucs_queue_iter_begin(&ep->tx.window); + ep->resend.psn = ep->tx.acked_psn + 1; + } + uct_ud_ep_ctl_op_add(iface, ep, UCT_UD_EP_OP_RESEND); + } else { + /* everything in resend window was acked - no need to resend anymore */ + ep->resend.psn = ep->resend.max_psn + 1; + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_RESEND); + } +} + + +static void uct_ud_ep_ca_drop(uct_ud_ep_t *ep) +{ + ucs_debug("ep: %p ca drop@cwnd = %d in flight: %d", + ep, ep->ca.cwnd, (int)ep->tx.psn-(int)ep->tx.acked_psn-1); + ep->ca.cwnd /= UCT_UD_CA_MD_FACTOR; + if (ep->ca.cwnd < UCT_UD_CA_MIN_WINDOW) { + ep->ca.cwnd = UCT_UD_CA_MIN_WINDOW; + } + ep->tx.max_psn = ep->tx.acked_psn + ep->ca.cwnd; + if (UCT_UD_PSN_COMPARE(ep->tx.max_psn, >, ep->tx.psn)) { + /* do not send more until we get acks going */ + uct_ud_ep_tx_stop(ep); + } +} + +static UCS_F_ALWAYS_INLINE void uct_ud_ep_ca_ack(uct_ud_ep_t *ep) +{ + if (ep->ca.cwnd < ep->ca.wmax) { + ep->ca.cwnd += UCT_UD_CA_AI_VALUE; + } + ep->tx.max_psn = ep->tx.acked_psn + ep->ca.cwnd; +} + + +static void uct_ud_ep_reset(uct_ud_ep_t *ep) +{ + ep->tx.psn = UCT_UD_INITIAL_PSN; + ep->ca.cwnd = UCT_UD_CA_MIN_WINDOW; + ep->ca.wmax = ucs_derived_of(ep->super.super.iface, + uct_ud_iface_t)->config.max_window; + ep->tx.max_psn = ep->tx.psn + ep->ca.cwnd; + ep->tx.acked_psn = UCT_UD_INITIAL_PSN - 1; + ep->tx.pending.ops = UCT_UD_EP_OP_NONE; + ucs_queue_head_init(&ep->tx.window); + + ep->resend.pos = ucs_queue_iter_begin(&ep->tx.window); + ep->resend.psn = ep->tx.psn; + ep->resend.max_psn = ep->tx.acked_psn; + ep->rx_creq_count = 0; + + ep->rx.acked_psn = UCT_UD_INITIAL_PSN - 1; + ucs_frag_list_init(ep->tx.psn-1, &ep->rx.ooo_pkts, 0 /*TODO: ooo support */ + UCS_STATS_ARG(ep->super.stats)); +} + +static ucs_status_t uct_ud_ep_free_by_timeout(uct_ud_ep_t *ep, + uct_ud_iface_t *iface) +{ + uct_ud_iface_ops_t *ops; + ucs_time_t diff; + + diff = ucs_twheel_get_time(&iface->async.slow_timer) - ep->close_time; + if (diff > iface->config.peer_timeout) { + ucs_debug("ud_ep %p is destroyed after %fs with timeout %fs\n", + ep, ucs_time_to_sec(diff), + ucs_time_to_sec(iface->config.peer_timeout)); + ops = ucs_derived_of(iface->super.ops, uct_ud_iface_ops_t); + ops->ep_free(&ep->super.super); + return UCS_OK; + } + return UCS_INPROGRESS; +} + +static void uct_ud_ep_slow_timer(ucs_wtimer_t *self) +{ + uct_ud_ep_t *ep = ucs_container_of(self, uct_ud_ep_t, slow_timer); + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_ud_iface_t); + ucs_time_t now; + ucs_time_t diff; + ucs_status_t status; + + UCT_UD_EP_HOOK_CALL_TIMER(ep); + + if (ucs_queue_is_empty(&ep->tx.window)) { + /* Do not free the EP until all scheduled communications are done. */ + if (ep->flags & UCT_UD_EP_FLAG_DISCONNECTED) { + status = uct_ud_ep_free_by_timeout(ep, iface); + if (status == UCS_INPROGRESS) { + goto again; + } + } + return; + } + + now = ucs_twheel_get_time(&iface->async.slow_timer); + diff = now - ep->tx.send_time; + if (diff > iface->config.peer_timeout) { + ucs_debug("ep %p: timeout of %.2f sec, config::peer_timeout - %.2f sec", + ep, ucs_time_to_sec(diff), + ucs_time_to_sec(iface->config.peer_timeout)); + iface->super.ops->handle_failure(&iface->super, ep, + UCS_ERR_ENDPOINT_TIMEOUT); + return; + } else if (diff > 3*iface->async.slow_tick) { + ucs_trace("scheduling resend now: %lu send_time: %lu diff: %lu tick: %lu", + now, ep->tx.send_time, now - ep->tx.send_time, + ep->tx.slow_tick); + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_ACK_REQ); + uct_ud_ep_ca_drop(ep); + uct_ud_ep_resend_start(iface, ep); + } else if ((diff > iface->async.slow_tick) && uct_ud_ep_is_connected(ep)) { + /* It is possible that the sender is slow. + * Try to flush the window twice before going into + * full resend mode. + */ + uct_ud_ep_ctl_op_add(iface, ep, UCT_UD_EP_OP_ACK_REQ); + } + +again: + /* Cool down the timer on rescheduling/resending */ + ep->tx.slow_tick *= iface->config.slow_timer_backoff; + ep->tx.slow_tick = ucs_min(ep->tx.slow_tick, + UCT_UD_SLOW_TIMER_MAX_TICK(iface)); + ucs_wtimer_add(&iface->async.slow_timer, &ep->slow_timer, ep->tx.slow_tick); +} + +UCS_CLASS_INIT_FUNC(uct_ud_ep_t, uct_ud_iface_t *iface) +{ + ucs_trace_func(""); + + memset(self, 0, sizeof(*self)); + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super); + + self->dest_ep_id = UCT_UD_EP_NULL_ID; + uct_ud_ep_reset(self); + ucs_list_head_init(&self->cep_list); + uct_ud_iface_add_ep(iface, self); + self->tx.slow_tick = iface->async.slow_tick; + ucs_wtimer_init(&self->slow_timer, uct_ud_ep_slow_timer); + ucs_arbiter_group_init(&self->tx.pending.group); + ucs_arbiter_elem_init(&self->tx.pending.elem); + + UCT_UD_EP_HOOK_INIT(self); + ucs_debug("created ep ep=%p iface=%p id=%d", self, iface, self->ep_id); + return UCS_OK; +} + +static ucs_arbiter_cb_result_t +uct_ud_ep_pending_cancel_cb(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_ud_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), + uct_ud_ep_t, tx.pending.group); + uct_pending_req_t *req; + + /* we may have pending op on ep */ + if (&ep->tx.pending.elem == elem) { + /* return ignored by arbiter */ + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + + /* uct user should not have anything pending */ + req = ucs_container_of(elem, uct_pending_req_t, priv); + ucs_warn("ep=%p removing user pending req=%p", ep, req); + + /* return ignored by arbiter */ + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ud_ep_t) +{ + uct_ud_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_ud_iface_t); + + ucs_trace_func("ep=%p id=%d conn_id=%d", self, self->ep_id, self->conn_id); + + ucs_wtimer_remove(&self->slow_timer); + uct_ud_iface_remove_ep(iface, self); + uct_ud_iface_cep_remove(self); + ucs_frag_list_cleanup(&self->rx.ooo_pkts); + + ucs_arbiter_group_purge(&iface->tx.pending_q, &self->tx.pending.group, + uct_ud_ep_pending_cancel_cb, 0); + + if (!ucs_queue_is_empty(&self->tx.window)) { + ucs_debug("ep=%p id=%d conn_id=%d has %d unacked packets", + self, self->ep_id, self->conn_id, + (int)ucs_queue_length(&self->tx.window)); + } + ucs_arbiter_group_cleanup(&self->tx.pending.group); +} + +UCS_CLASS_DEFINE(uct_ud_ep_t, uct_base_ep_t); + +void uct_ud_ep_clone(uct_ud_ep_t *old_ep, uct_ud_ep_t *new_ep) +{ + uct_ep_t *ep_h = &old_ep->super.super; + uct_iface_t *iface_h = ep_h->iface; + + uct_ud_iface_replace_ep(ucs_derived_of(iface_h, uct_ud_iface_t), old_ep, new_ep); + memcpy(new_ep, old_ep, sizeof(uct_ud_ep_t)); +} + +ucs_status_t uct_ud_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr) +{ + uct_ud_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_ep_t); + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); + uct_ud_ep_addr_t *ep_addr = (uct_ud_ep_addr_t *)addr; + + uct_ib_pack_uint24(ep_addr->iface_addr.qp_num, iface->qp->qp_num); + uct_ib_pack_uint24(ep_addr->ep_id, ep->ep_id); + return UCS_OK; +} + +static ucs_status_t uct_ud_ep_connect_to_iface(uct_ud_ep_t *ep, + const uct_ib_address_t *ib_addr, + const uct_ud_iface_addr_t *if_addr) +{ + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); + uct_ib_device_t UCS_V_UNUSED *dev = uct_ib_iface_device(&iface->super); + char buf[128]; + + ucs_frag_list_cleanup(&ep->rx.ooo_pkts); + uct_ud_ep_reset(ep); + + ucs_debug(UCT_IB_IFACE_FMT" lid %d qpn 0x%x epid %u ep %p connected to " + "IFACE %s qpn 0x%x", UCT_IB_IFACE_ARG(&iface->super), + dev->port_attr[iface->super.config.port_num - dev->first_port].lid, + iface->qp->qp_num, ep->ep_id, ep, + uct_ib_address_str(ib_addr, buf, sizeof(buf)), + uct_ib_unpack_uint24(if_addr->qp_num)); + + return UCS_OK; +} + +static ucs_status_t uct_ud_ep_disconnect_from_iface(uct_ep_h tl_ep) +{ + uct_ud_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_ep_t); + + ucs_frag_list_cleanup(&ep->rx.ooo_pkts); + uct_ud_ep_reset(ep); + ep->dest_ep_id = UCT_UD_EP_NULL_ID; + + return UCS_OK; +} + +ucs_status_t uct_ud_ep_create_connected_common(uct_ud_iface_t *iface, + const uct_ib_address_t *ib_addr, + const uct_ud_iface_addr_t *if_addr, + uct_ud_ep_t **new_ep_p, + uct_ud_send_skb_t **skb_p) +{ + uct_ep_params_t params; + ucs_status_t status; + uct_ud_ep_t *ep; + uct_ep_h new_ep_h; + + ep = uct_ud_iface_cep_lookup(iface, ib_addr, if_addr, UCT_UD_EP_CONN_ID_MAX); + if (ep) { + uct_ud_ep_set_state(ep, UCT_UD_EP_FLAG_CREQ_NOTSENT); + ep->flags &= ~UCT_UD_EP_FLAG_PRIVATE; + *new_ep_p = ep; + *skb_p = NULL; + return UCS_ERR_ALREADY_EXISTS; + } + + params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + params.iface = &iface->super.super.super; + status = uct_ep_create(¶ms, &new_ep_h); + if (status != UCS_OK) { + return status; + } + ep = ucs_derived_of(new_ep_h, uct_ud_ep_t); + + status = uct_ud_ep_connect_to_iface(ep, ib_addr, if_addr); + if (status != UCS_OK) { + return status; + } + + status = uct_ud_iface_cep_insert(iface, ib_addr, if_addr, ep, UCT_UD_EP_CONN_ID_MAX); + if (status != UCS_OK) { + goto err_cep_insert; + } + + *skb_p = uct_ud_ep_prepare_creq(ep); + if (!*skb_p) { + status = UCS_ERR_NO_RESOURCE; + uct_ud_ep_ctl_op_add(iface, ep, UCT_UD_EP_OP_CREQ); + } + + *new_ep_p = ep; + return status; + +err_cep_insert: + uct_ud_ep_disconnect_from_iface(&ep->super.super); + return status; +} + +void uct_ud_ep_destroy_connected(uct_ud_ep_t *ep, + const uct_ib_address_t *ib_addr, + const uct_ud_iface_addr_t *if_addr) +{ + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); + uct_ud_iface_cep_rollback(iface, ib_addr, if_addr, ep); + uct_ud_ep_disconnect_from_iface(&ep->super.super); +} + +ucs_status_t uct_ud_ep_connect_to_ep(uct_ud_ep_t *ep, + const uct_ib_address_t *ib_addr, + const uct_ud_ep_addr_t *ep_addr) +{ + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); + uct_ib_device_t UCS_V_UNUSED *dev = uct_ib_iface_device(&iface->super); + char buf[128]; + + ucs_assert_always(ep->dest_ep_id == UCT_UD_EP_NULL_ID); + ucs_trace_func(""); + + ep->dest_ep_id = uct_ib_unpack_uint24(ep_addr->ep_id); + + ucs_frag_list_cleanup(&ep->rx.ooo_pkts); + uct_ud_ep_reset(ep); + + ucs_debug(UCT_IB_IFACE_FMT" slid %d qpn 0x%x epid %u connected to %s qpn 0x%x " + "epid %u", UCT_IB_IFACE_ARG(&iface->super), + dev->port_attr[iface->super.config.port_num - dev->first_port].lid, + iface->qp->qp_num, ep->ep_id, + uct_ib_address_str(ib_addr, buf, sizeof(buf)), + uct_ib_unpack_uint24(ep_addr->iface_addr.qp_num), ep->dest_ep_id); + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_iface_add_async_comp(uct_ud_iface_t *iface, uct_ud_ep_t *ep, + uct_ud_send_skb_t *skb, ucs_status_t status) +{ + uct_ud_comp_desc_t *cdesc; + + skb->status = status; + if (status != UCS_OK) { + if (!(skb->flags & UCT_UD_SEND_SKB_FLAG_COMP)) { + skb->len = 0; + } + + if (status == UCS_ERR_ENDPOINT_TIMEOUT) { + skb->flags |= UCT_UD_SEND_SKB_FLAG_ERR; + ++ep->tx.err_skb_count; + } else if (status == UCS_ERR_CANCELED) { + skb->flags |= UCT_UD_SEND_SKB_FLAG_CANCEL; + } + } + + cdesc = uct_ud_comp_desc(skb); + + /* don't call user completion from async context. instead, put + * it on a queue which will be progressed from main thread. + */ + ucs_queue_push(&iface->tx.async_comp_q, &skb->queue); + cdesc->ep = ep; + ep->flags |= UCT_UD_EP_FLAG_ASYNC_COMPS; +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_ep_process_ack(uct_ud_iface_t *iface, uct_ud_ep_t *ep, + uct_ud_psn_t ack_psn, int is_async) +{ + uct_ud_send_skb_t *skb; + if (ucs_unlikely(UCT_UD_PSN_COMPARE(ack_psn, <=, ep->tx.acked_psn))) { + return; + } + + ep->tx.acked_psn = ack_psn; + + /* Release acknowledged skb's */ + ucs_queue_for_each_extract(skb, &ep->tx.window, queue, + UCT_UD_PSN_COMPARE(skb->neth->psn, <=, ack_psn)) { + if (ucs_unlikely(skb->flags & UCT_UD_SEND_SKB_FLAG_COMP)) { + if (ucs_unlikely(is_async)) { + uct_ud_iface_add_async_comp(iface, ep, skb, UCS_OK); + continue; + } + + uct_invoke_completion(uct_ud_comp_desc(skb)->comp, UCS_OK); + } + + skb->flags = 0; /* reset also ACK_REQ flag */ + ucs_mpool_put(skb); + } + + uct_ud_ep_ca_ack(ep); + + if (ucs_unlikely(UCT_UD_PSN_COMPARE(ep->resend.psn, <=, ep->resend.max_psn))) { + uct_ud_ep_resend_ack(iface, ep); + } + + ucs_arbiter_group_schedule(&iface->tx.pending_q, &ep->tx.pending.group); + + ep->tx.slow_tick = iface->async.slow_tick; + ep->tx.send_time = uct_ud_iface_get_async_time(iface); +} + +static inline void uct_ud_ep_rx_put(uct_ud_neth_t *neth, unsigned byte_len) +{ + uct_ud_put_hdr_t *put_hdr; + + put_hdr = (uct_ud_put_hdr_t *)(neth+1); + + memcpy((void *)put_hdr->rva, put_hdr+1, + byte_len - sizeof(*neth) - sizeof(*put_hdr)); +} + +static uct_ud_ep_t *uct_ud_ep_create_passive(uct_ud_iface_t *iface, uct_ud_ctl_hdr_t *ctl) +{ + uct_ep_params_t params; + uct_ud_ep_t *ep; + ucs_status_t status; + uct_ep_t *ep_h; + + /* create new endpoint */ + params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + params.iface = &iface->super.super.super; + status = uct_ep_create(¶ms, &ep_h); + ucs_assert_always(status == UCS_OK); + ep = ucs_derived_of(ep_h, uct_ud_ep_t); + + status = uct_ep_connect_to_ep(ep_h, (void*)uct_ud_creq_ib_addr(ctl), + (void*)&ctl->conn_req.ep_addr); + ucs_assert_always(status == UCS_OK); + + status = uct_ud_iface_cep_insert(iface, uct_ud_creq_ib_addr(ctl), + &ctl->conn_req.ep_addr.iface_addr, + ep, ctl->conn_req.conn_id); + ucs_assert_always(status == UCS_OK); + return ep; +} + +static void uct_ud_ep_rx_creq(uct_ud_iface_t *iface, uct_ud_neth_t *neth) +{ + uct_ud_ep_t *ep; + uct_ud_ctl_hdr_t *ctl = (uct_ud_ctl_hdr_t *)(neth + 1); + + ucs_assert_always(ctl->type == UCT_UD_PACKET_CREQ); + + ep = uct_ud_iface_cep_lookup(iface, uct_ud_creq_ib_addr(ctl), + &ctl->conn_req.ep_addr.iface_addr, + ctl->conn_req.conn_id); + if (!ep) { + ep = uct_ud_ep_create_passive(iface, ctl); + ucs_assert_always(ep != NULL); + ep->rx.ooo_pkts.head_sn = neth->psn; + uct_ud_peer_copy(&ep->peer, ucs_unaligned_ptr(&ctl->peer)); + uct_ud_ep_ctl_op_add(iface, ep, UCT_UD_EP_OP_CREP); + uct_ud_ep_set_state(ep, UCT_UD_EP_FLAG_PRIVATE); + } else { + if (ep->dest_ep_id == UCT_UD_EP_NULL_ID) { + /* simultanuous CREQ */ + ep->dest_ep_id = uct_ib_unpack_uint24(ctl->conn_req.ep_addr.ep_id); + ep->rx.ooo_pkts.head_sn = neth->psn; + uct_ud_peer_copy(&ep->peer, ucs_unaligned_ptr(&ctl->peer)); + ucs_debug("simultanuous CREQ ep=%p" + "(iface=%p conn_id=%d ep_id=%d, dest_ep_id=%d rx_psn=%u)", + ep, iface, ep->conn_id, ep->ep_id, + ep->dest_ep_id, ep->rx.ooo_pkts.head_sn); + if (UCT_UD_PSN_COMPARE(ep->tx.psn, >, UCT_UD_INITIAL_PSN)) { + /* our own creq was sent, treat incoming creq as ack and remove our own + * from tx window + */ + uct_ud_ep_process_ack(iface, ep, UCT_UD_INITIAL_PSN, 0); + } + uct_ud_ep_ctl_op_add(iface, ep, UCT_UD_EP_OP_CREP); + } + } + + ++ep->rx_creq_count; + + ucs_assert_always(ctl->conn_req.conn_id == ep->conn_id); + ucs_assert_always(uct_ib_unpack_uint24(ctl->conn_req.ep_addr.ep_id) == ep->dest_ep_id); + /* creq must always have same psn */ + ucs_assertv_always(ep->rx.ooo_pkts.head_sn == neth->psn, + "iface=%p ep=%p conn_id=%d ep_id=%d, dest_ep_id=%d rx_psn=%u " + "neth_psn=%u ep_flags=0x%x ctl_ops=0x%x rx_creq_count=%d", + iface, ep, ep->conn_id, ep->ep_id, ep->dest_ep_id, + ep->rx.ooo_pkts.head_sn, neth->psn, ep->flags, + ep->tx.pending.ops, ep->rx_creq_count); + /* scedule connection reply op */ + UCT_UD_EP_HOOK_CALL_RX(ep, neth, sizeof(*neth) + sizeof(*ctl)); + if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_CREQ)) { + uct_ud_ep_set_state(ep, UCT_UD_EP_FLAG_CREQ_NOTSENT); + } + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_CREQ); + uct_ud_ep_set_state(ep, UCT_UD_EP_FLAG_CREQ_RCVD); +} + +static void uct_ud_ep_rx_ctl(uct_ud_iface_t *iface, uct_ud_ep_t *ep, + uct_ud_neth_t *neth, uct_ud_recv_skb_t *skb) +{ + uct_ud_ctl_hdr_t *ctl = (uct_ud_ctl_hdr_t*)(neth + 1); + + ucs_trace_func(""); + ucs_assert_always(ctl->type == UCT_UD_PACKET_CREP); + ucs_assert_always(ep->dest_ep_id == UCT_UD_EP_NULL_ID || + ep->dest_ep_id == ctl->conn_rep.src_ep_id); + + /* Discard duplicate CREP */ + if (UCT_UD_PSN_COMPARE(neth->psn, <, ep->rx.ooo_pkts.head_sn)) { + return; + } + + ep->rx.ooo_pkts.head_sn = neth->psn; + ep->dest_ep_id = ctl->conn_rep.src_ep_id; + ucs_arbiter_group_schedule(&iface->tx.pending_q, &ep->tx.pending.group); + uct_ud_peer_copy(&ep->peer, ucs_unaligned_ptr(&ctl->peer)); + uct_ud_ep_set_state(ep, UCT_UD_EP_FLAG_CREP_RCVD); +} + +uct_ud_send_skb_t *uct_ud_ep_prepare_creq(uct_ud_ep_t *ep) +{ + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); + uct_ud_ctl_hdr_t *creq; + uct_ud_send_skb_t *skb; + uct_ud_neth_t *neth; + ucs_status_t status; + + ucs_assert_always(ep->dest_ep_id == UCT_UD_EP_NULL_ID); + ucs_assert_always(ep->ep_id != UCT_UD_EP_NULL_ID); + + /* CREQ should not be sent if CREP for the counter CREQ is scheduled + * (or sent already) */ + ucs_assertv_always(!uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_CREP) && + !(ep->flags & UCT_UD_EP_FLAG_CREP_SENT), + "iface=%p ep=%p conn_id=%d rx_psn=%u ep_flags=0x%x " + "ctl_ops=0x%x rx_creq_count=%d", + iface, ep, ep->conn_id, ep->rx.ooo_pkts.head_sn, + ep->flags, ep->tx.pending.ops, ep->rx_creq_count); + + skb = uct_ud_iface_get_tx_skb(iface, ep); + if (!skb) { + return NULL; + } + + neth = skb->neth; + uct_ud_neth_init_data(ep, neth); + + neth->packet_type = UCT_UD_EP_NULL_ID; + neth->packet_type |= UCT_UD_PACKET_FLAG_CTL; + + creq = (uct_ud_ctl_hdr_t *)(neth + 1); + + creq->type = UCT_UD_PACKET_CREQ; + creq->conn_req.conn_id = ep->conn_id; + + status = uct_ud_ep_get_address(&ep->super.super, + (void*)&creq->conn_req.ep_addr); + if (status != UCS_OK) { + return NULL; + } + + status = uct_ib_iface_get_device_address(&iface->super.super.super, + (uct_device_addr_t*)uct_ud_creq_ib_addr(creq)); + if (status != UCS_OK) { + return NULL; + } + + uct_ud_peer_name(ucs_unaligned_ptr(&creq->peer)); + + skb->len = sizeof(*neth) + sizeof(*creq) + iface->super.addr_size; + return skb; +} + +void uct_ud_ep_process_rx(uct_ud_iface_t *iface, uct_ud_neth_t *neth, unsigned byte_len, + uct_ud_recv_skb_t *skb, int is_async) +{ + uint32_t dest_id; + uint32_t is_am, am_id; + uct_ud_ep_t *ep = 0; /* todo: check why gcc complaints about uninitialized var */ + ucs_frag_list_ooo_type_t ooo_type; + + UCT_UD_IFACE_HOOK_CALL_RX(iface, neth, byte_len); + + dest_id = uct_ud_neth_get_dest_id(neth); + am_id = uct_ud_neth_get_am_id(neth); + is_am = neth->packet_type & UCT_UD_PACKET_FLAG_AM; + + if (ucs_unlikely(dest_id == UCT_UD_EP_NULL_ID)) { + /* must be connection request packet */ + uct_ud_ep_rx_creq(iface, neth); + goto out; + } else if (ucs_unlikely(!ucs_ptr_array_lookup(&iface->eps, dest_id, ep) || + ep->ep_id != dest_id)) + { + /* Drop the packet because it is + * allowed to do disconnect without flush/barrier. So it + * is possible to get packet for the ep that has been destroyed + */ + ucs_trace("RX: failed to find ep %d, dropping packet", dest_id); + goto out; + } + + ucs_assert(ep->ep_id != UCT_UD_EP_NULL_ID); + UCT_UD_EP_HOOK_CALL_RX(ep, neth, byte_len); + + uct_ud_ep_process_ack(iface, ep, neth->ack_psn, is_async); + + if (ucs_unlikely(neth->packet_type & UCT_UD_PACKET_FLAG_ACK_REQ)) { + uct_ud_ep_ctl_op_add(iface, ep, UCT_UD_EP_OP_ACK); + ucs_trace_data("ACK_REQ - schedule ack, head_sn=%d sn=%d", + ep->rx.ooo_pkts.head_sn, neth->psn); + } + + if (ucs_unlikely(!is_am)) { + if ((size_t)byte_len == sizeof(*neth)) { + goto out; + } + if (neth->packet_type & UCT_UD_PACKET_FLAG_CTL) { + uct_ud_ep_rx_ctl(iface, ep, neth, skb); + goto out; + } + } + + ooo_type = ucs_frag_list_insert(&ep->rx.ooo_pkts, &skb->u.ooo.elem, neth->psn); + if (ucs_unlikely(ooo_type != UCS_FRAG_LIST_INSERT_FAST)) { + if (ooo_type != UCS_FRAG_LIST_INSERT_DUP && + ooo_type != UCS_FRAG_LIST_INSERT_FAIL) { + ucs_fatal("Out of order is not implemented: got %d", ooo_type); + } + ucs_trace_data("DUP/OOB - schedule ack, head_sn=%d sn=%d", + ep->rx.ooo_pkts.head_sn, neth->psn); + uct_ud_ep_ctl_op_add(iface, ep, UCT_UD_EP_OP_ACK); + goto out; + } + + if (ucs_unlikely(!is_am && (neth->packet_type & UCT_UD_PACKET_FLAG_PUT))) { + /* TODO: remove once ucp implements put */ + uct_ud_ep_rx_put(neth, byte_len); + goto out; + } + + if (ucs_unlikely(is_async && + !(iface->super.super.am[am_id].flags & UCT_CB_FLAG_ASYNC))) { + skb->u.am.len = byte_len - sizeof(*neth); + ucs_queue_push(&iface->rx.pending_q, &skb->u.am.queue); + } else { + /* Avoid reordering with respect to pending operations, if user AM handler + * initiates sends from any endpoint created on the iface. + * This flag would be cleared after all incoming messages + * are processed. */ + uct_ud_iface_raise_pending_async_ev(iface); + + uct_ib_iface_invoke_am_desc(&iface->super, am_id, neth + 1, + byte_len - sizeof(*neth), &skb->super); + } + return; + +out: + ucs_mpool_put(skb); +} + +ucs_status_t uct_ud_ep_flush_nolock(uct_ud_iface_t *iface, uct_ud_ep_t *ep, + uct_completion_t *comp) +{ + uct_ud_send_skb_t *skb; + uct_ud_psn_t psn; + + if (ucs_unlikely(!uct_ud_ep_is_connected(ep))) { + /* check for CREQ either being scheduled or sent and waiting for CREP ack */ + if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_CREQ) || + !ucs_queue_is_empty(&ep->tx.window)) + { + return UCS_ERR_NO_RESOURCE; /* connection in progress */ + } + + return UCS_OK; /* Nothing was ever sent */ + } + + if (!uct_ud_iface_can_tx(iface) || !uct_ud_iface_has_skbs(iface) || + uct_ud_ep_no_window(ep)) + { + /* iface/ep has no resources, prevent reordering with possible pending + * operations by not starting the flush. + */ + return UCS_ERR_NO_RESOURCE; + } + + if (ucs_queue_is_empty(&ep->tx.window)) { + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_ACK_REQ); + + /* Check if have pending async completions for this ep, + * if not - all was acknowledged, nothing is pending - return OK + * if yes - continue to add + * */ + if (!(ep->flags & UCT_UD_EP_FLAG_ASYNC_COMPS)) { + return UCS_OK; + } + + /* + * If we have pending async completion, and the user requested a callback, + * add a new async completion in the queue. + */ + if (comp != NULL) { + skb = ucs_mpool_get(&iface->tx.mp); + if (skb == NULL) { + return UCS_ERR_NO_RESOURCE; + } + + skb->flags = UCT_UD_SEND_SKB_FLAG_COMP; + skb->len = 0; + uct_ud_comp_desc(skb)->comp = comp; + uct_ud_comp_desc(skb)->ep = ep; + ucs_queue_push(&iface->tx.async_comp_q, &skb->queue); + } + } else { + skb = ucs_queue_tail_elem_non_empty(&ep->tx.window, uct_ud_send_skb_t, queue); + psn = skb->neth->psn; + if (!(skb->flags & UCT_UD_SEND_SKB_FLAG_ACK_REQ)) { + /* If we didn't ask for ACK on last skb, send an ACK_REQ message. + * It will speed up the flush because we will not have to wait untill + * retransmit is triggered. + * Also, prevent from sending more control messages like this after + * first time by turning on the flag on the last skb. + */ + + /* Since the function can be called from the arbiter context it is + * impossible to schedule a control operation. So just raise a + * flag and if there is no other control send ACK_REQ directly. + * + * If there is other control arbiter will take care of it. + */ + ep->tx.pending.ops |= UCT_UD_EP_OP_ACK_REQ; + if (uct_ud_ep_ctl_op_check_ex(ep, UCT_UD_EP_OP_ACK_REQ)) { + uct_ud_ep_do_pending_ctl(ep, iface); + } + + skb->flags |= UCT_UD_SEND_SKB_FLAG_ACK_REQ; + } + + /* If the user requested a callback, add a dummy skb to the window which + * will be released when the current sequence number is acknowledged. + */ + if (comp != NULL) { + skb = ucs_mpool_get(&iface->tx.mp); + if (skb == NULL) { + return UCS_ERR_NO_RESOURCE; + } + + /* Add dummy skb to the window, which would call user completion + * callback when getting ACK. + */ + skb->flags = UCT_UD_SEND_SKB_FLAG_COMP; + skb->len = sizeof(skb->neth[0]); + skb->neth->packet_type = 0; + skb->neth->psn = psn; + uct_ud_comp_desc(skb)->comp = comp; + ucs_assert(psn == (uct_ud_psn_t)(ep->tx.psn - 1)); + + uct_ud_neth_set_dest_id(skb->neth, UCT_UD_EP_NULL_ID); + ucs_queue_push(&ep->tx.window, &skb->queue); + ucs_trace_data("added dummy flush skb %p psn %d user_comp %p", skb, + skb->neth->psn, comp); + } + } + + return UCS_INPROGRESS; +} + +void uct_ud_tx_wnd_purge_outstanding(uct_ud_iface_t *iface, uct_ud_ep_t *ud_ep, + ucs_status_t status) +{ + uct_ud_send_skb_t *skb; + + uct_ud_ep_tx_stop(ud_ep); + + ucs_queue_for_each_extract(skb, &ud_ep->tx.window, queue, 1) { + uct_ud_iface_add_async_comp(iface, ud_ep, skb, status); + } +} + +ucs_status_t uct_ud_ep_flush(uct_ep_h ep_h, unsigned flags, + uct_completion_t *comp) +{ + ucs_status_t status; + uct_ud_ep_t *ep = ucs_derived_of(ep_h, uct_ud_ep_t); + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_ud_iface_t); + + uct_ud_enter(iface); + + if (ucs_unlikely(flags & UCT_FLUSH_FLAG_CANCEL)) { + uct_ud_tx_wnd_purge_outstanding(iface, ep, UCS_ERR_CANCELED); + uct_ud_iface_dispatch_zcopy_comps(iface); + uct_ep_pending_purge(ep_h, NULL, 0); + /* Open window after cancellation for next sending */ + uct_ud_ep_ca_ack(ep); + status = UCS_OK; + goto out; + } + + if (ucs_unlikely(uct_ud_iface_has_pending_async_ev(iface))) { + status = UCS_ERR_NO_RESOURCE; + goto out; + } + + status = uct_ud_ep_flush_nolock(iface, ep, comp); + if (status == UCS_OK) { + UCT_TL_EP_STAT_FLUSH(&ep->super); + } else if (status == UCS_INPROGRESS) { + UCT_TL_EP_STAT_FLUSH_WAIT(&ep->super); + } + +out: + uct_ud_leave(iface); + return status; +} + +static uct_ud_send_skb_t *uct_ud_ep_prepare_crep(uct_ud_ep_t *ep) +{ + uct_ud_send_skb_t *skb; + uct_ud_neth_t *neth; + uct_ud_ctl_hdr_t *crep; + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); + + ucs_assert_always(ep->dest_ep_id != UCT_UD_EP_NULL_ID); + ucs_assert_always(ep->ep_id != UCT_UD_EP_NULL_ID); + + /* Check that CREQ is neither sheduled nor waiting for CREP ack */ + ucs_assertv_always(!uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_CREQ) && + ucs_queue_is_empty(&ep->tx.window), + "iface=%p ep=%p conn_id=%d ep_id=%d, dest_ep_id=%d rx_psn=%u " + "ep_flags=0x%x ctl_ops=0x%x rx_creq_count=%d", + iface, ep, ep->conn_id, ep->ep_id, ep->dest_ep_id, + ep->rx.ooo_pkts.head_sn, ep->flags, ep->tx.pending.ops, + ep->rx_creq_count); + + skb = uct_ud_iface_get_tx_skb(iface, ep); + if (!skb) { + return NULL; + } + + neth = skb->neth; + uct_ud_neth_init_data(ep, neth); + + neth->packet_type = ep->dest_ep_id; + neth->packet_type |= (UCT_UD_PACKET_FLAG_ACK_REQ|UCT_UD_PACKET_FLAG_CTL); + + crep = (uct_ud_ctl_hdr_t *)(neth + 1); + + crep->type = UCT_UD_PACKET_CREP; + crep->conn_rep.src_ep_id = ep->ep_id; + + uct_ud_peer_name(ucs_unaligned_ptr(&crep->peer)); + + skb->len = sizeof(*neth) + sizeof(*crep); + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_CREP); + return skb; +} + +static uct_ud_send_skb_t *uct_ud_ep_resend(uct_ud_ep_t *ep) +{ + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); + uct_ud_send_skb_t *skb, *sent_skb; + ucs_queue_iter_t resend_pos; + uct_ud_zcopy_desc_t *zdesc; + size_t iov_it; + + /* check window */ + resend_pos = (void*)ep->resend.pos; + sent_skb = ucs_queue_iter_elem(sent_skb, resend_pos, queue); + if (sent_skb == NULL) { + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_RESEND); + return NULL; + } + + ucs_assert(((uintptr_t)sent_skb % UCT_UD_SKB_ALIGN) == 0); + if (UCT_UD_PSN_COMPARE(sent_skb->neth->psn, >=, ep->tx.max_psn)) { + ucs_debug("ep(%p): out of window(psn=%d/max_psn=%d) - can not resend more", + ep, sent_skb ? sent_skb->neth->psn : -1, ep->tx.max_psn); + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_RESEND); + return NULL; + } + + /* skip dummy skb created for non-blocking flush */ + if ((uct_ud_neth_get_dest_id(sent_skb->neth) == UCT_UD_EP_NULL_ID) && + !(sent_skb->neth->packet_type & UCT_UD_PACKET_FLAG_CTL)) + { + ep->resend.pos = ucs_queue_iter_next(resend_pos); + return NULL; + } + + /* creq/crep must remove creq packet from window */ + ucs_assertv_always(!(uct_ud_ep_is_connected(ep) && + (uct_ud_neth_get_dest_id(sent_skb->neth) == UCT_UD_EP_NULL_ID) && + !(sent_skb->neth->packet_type & UCT_UD_PACKET_FLAG_AM)), + "ep(%p): CREQ resend on endpoint which is already connected", ep); + + skb = uct_ud_iface_resend_skb_get(iface); + ucs_assert_always(skb != NULL); + + ep->resend.pos = ucs_queue_iter_next(resend_pos); + ep->resend.psn = sent_skb->neth->psn; + memcpy(skb->neth, sent_skb->neth, sent_skb->len); + skb->neth->ack_psn = ep->rx.acked_psn; + skb->len = sent_skb->len; + if (sent_skb->flags & UCT_UD_SEND_SKB_FLAG_ZCOPY) { + zdesc = uct_ud_zcopy_desc(sent_skb); + for (iov_it = 0; iov_it < zdesc->iovcnt; ++iov_it) { + if (zdesc->iov[iov_it].length > 0) { + memcpy((char *)skb->neth + skb->len, zdesc->iov[iov_it].buffer, + zdesc->iov[iov_it].length); + skb->len += zdesc->iov[iov_it].length; + } + } + } + /* force ack request on every Nth packet or on first packet in resend window */ + if ((skb->neth->psn % UCT_UD_RESENDS_PER_ACK) == 0 || + UCT_UD_PSN_COMPARE(skb->neth->psn, ==, ep->tx.acked_psn+1)) { + skb->neth->packet_type |= UCT_UD_PACKET_FLAG_ACK_REQ; + } else { + skb->neth->packet_type &= ~UCT_UD_PACKET_FLAG_ACK_REQ; + } + + ucs_debug("ep(%p): resending rt_psn %u rt_max_psn %u acked_psn %u max_psn %u ack_req %d", + ep, ep->resend.psn, ep->resend.max_psn, + ep->tx.acked_psn, ep->tx.max_psn, + skb->neth->packet_type&UCT_UD_PACKET_FLAG_ACK_REQ ? 1 : 0); + + if (UCT_UD_PSN_COMPARE(ep->resend.psn, ==, ep->resend.max_psn)) { + ucs_debug("ep(%p): resending completed", ep); + ep->resend.psn = ep->resend.max_psn + 1; + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_RESEND); + } + + return skb; +} + +static void uct_ud_ep_do_pending_ctl(uct_ud_ep_t *ep, uct_ud_iface_t *iface) +{ + uct_ud_send_skb_t *skb; + int flag = 0; + + if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_CREQ)) { + skb = uct_ud_ep_prepare_creq(ep); + if (skb) { + flag = 1; + uct_ud_ep_set_state(ep, UCT_UD_EP_FLAG_CREQ_SENT); + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_CREQ); + } + } else if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_CREP)) { + skb = uct_ud_ep_prepare_crep(ep); + if (skb) { + flag = 1; + uct_ud_ep_set_state(ep, UCT_UD_EP_FLAG_CREP_SENT); + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_CREP); + } + } else if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_RESEND)) { + skb = uct_ud_ep_resend(ep); + } else if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_ACK)) { + if (uct_ud_ep_is_connected(ep)) { + if (iface->config.max_inline >= sizeof(uct_ud_neth_t)) { + skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super); + } else { + skb = uct_ud_iface_resend_skb_get(iface); + skb->len = sizeof(uct_ud_neth_t); + } + uct_ud_neth_ctl_ack(ep, skb->neth); + } else { + /* Do not send ACKs if not connected yet. It may happen if + * CREQ and CREP from peer are lost. Need to wait for CREP + * resending by peer. */ + skb = NULL; + } + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_ACK); + } else if (uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_ACK_REQ)) { + if (iface->config.max_inline >= sizeof(uct_ud_neth_t)) { + skb = ucs_unaligned_ptr(&iface->tx.skb_inl.super); + } else { + skb = uct_ud_iface_resend_skb_get(iface); + skb->len = sizeof(uct_ud_neth_t); + } + uct_ud_neth_ctl_ack_req(ep, skb->neth); + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_ACK_REQ); + } else if (uct_ud_ep_ctl_op_isany(ep)) { + ucs_fatal("unsupported pending op mask: %x", ep->tx.pending.ops); + } else { + skb = 0; + } + + if (!skb) { + /* no pending - nothing to do */ + return; + } + + VALGRIND_MAKE_MEM_DEFINED(skb, sizeof *skb); + ucs_derived_of(iface->super.ops, uct_ud_iface_ops_t)->tx_skb(ep, skb, flag); + if (flag) { + /* creq and crep allocate real skb, it must be put on window like + * a regular packet to ensure a retransmission. + */ + uct_ud_iface_complete_tx_skb(iface, ep, skb); + } else { + uct_ud_iface_resend_skb_put(iface, skb); + } +} + +static inline ucs_arbiter_cb_result_t +uct_ud_ep_ctl_op_next(uct_ud_ep_t *ep) +{ + if (uct_ud_ep_ctl_op_isany(ep)) { + /* can send more control - come here later */ + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } + /* no more control - nothing to do in + * this dispatch cycle. */ + return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; +} + +/** + * pending operations are processed according to priority: + * - high prio control: + * - creq request + * - crep reply + * - resends + * - pending uct requests + * - low prio control: ack reply/ack requests + * + * Low priority control can be send along with user data, so + * there is a good chance that processing pending uct reqs will + * also deal with the low prio control. + * However we can not let pending uct req block control forever. + */ +ucs_arbiter_cb_result_t +uct_ud_ep_do_pending(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, + priv); + uct_ud_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), + uct_ud_ep_t, + tx.pending.group); + uct_ud_iface_t *iface = ucs_container_of(arbiter, uct_ud_iface_t, + tx.pending_q); + uintptr_t in_async_progress = (uintptr_t)arg; + int allow_callback; + int async_before_pending; + ucs_status_t status; + + /* check if we have global resources + * - tx_wqe + * - skb + * control messages does not need skb. + */ + if (!uct_ud_iface_can_tx(iface)) { + return UCS_ARBITER_CB_RESULT_STOP; + } + + /* here we rely on the fact that arbiter + * will start next dispatch cycle from the + * next group. + * So it is ok to stop if there is no ctl. + * However in worst case only one ctl per + * dispatch cycle will be send. + */ + if (!uct_ud_iface_has_skbs(iface) && !uct_ud_ep_ctl_op_isany(ep)) { + return UCS_ARBITER_CB_RESULT_STOP; + } + + /* we can desched group: iff + * - no control + * - no ep resources (connect or window) + **/ + + if (!uct_ud_ep_ctl_op_isany(ep) && + (!uct_ud_ep_is_connected(ep) || + uct_ud_ep_no_window(ep))) { + return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; + } + + if (&ep->tx.pending.elem == elem) { + uct_ud_ep_do_pending_ctl(ep, iface); + if (uct_ud_ep_ctl_op_isany(ep)) { + /* there is still some ctl left. go to next group */ + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } else { + /* no more ctl - dummy elem can be removed */ + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + } + + /* user pending can be send iff + * - not in async progress + * - there are no high priority pending control messages + */ + allow_callback = !in_async_progress || + (uct_ud_pending_req_priv(req)->flags & UCT_CB_FLAG_ASYNC); + if (allow_callback && !uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_CTL_HI_PRIO)) { + ucs_assert(!(ep->flags & UCT_UD_EP_FLAG_IN_PENDING)); + ep->flags |= UCT_UD_EP_FLAG_IN_PENDING; + async_before_pending = iface->tx.async_before_pending; + if (uct_ud_pending_req_priv(req)->flags & UCT_CB_FLAG_ASYNC) { + /* temporary reset the flag to unblock sends from async context */ + iface->tx.async_before_pending = 0; + } + status = req->func(req); + iface->tx.async_before_pending = async_before_pending; + ep->flags &= ~UCT_UD_EP_FLAG_IN_PENDING; + + if (status == UCS_INPROGRESS) { + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } else if (status != UCS_OK) { + /* avoid deadlock: send low priority ctl if user cb failed + * no need to check for low prio here because we + * already checked above. + */ + uct_ud_ep_do_pending_ctl(ep, iface); + return uct_ud_ep_ctl_op_next(ep); + } + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + + /* try to send ctl messages */ + uct_ud_ep_do_pending_ctl(ep, iface); + if (in_async_progress) { + return uct_ud_ep_ctl_op_next(ep); + } else { + /* we still didn't process the current pending request because of hi-prio + * control messages, so cannot stop sending yet. If we stop, not all + * resources will be exhausted and out-of-order with pending can occur. + * (pending control ops may be cleared by uct_ud_ep_do_pending_ctl) + */ + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } +} + +ucs_status_t uct_ud_ep_pending_add(uct_ep_h ep_h, uct_pending_req_t *req, + unsigned flags) +{ + uct_ud_ep_t *ep = ucs_derived_of(ep_h, uct_ud_ep_t); + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_ud_iface_t); + + uct_ud_enter(iface); + + /* if there was an async progress all 'send' ops return + * UCS_ERR_NO_RESOURCE. If we return UCS_ERR_BUSY there will + * be a deadlock. + * So we must skip a resource check and add a pending op in order to + * avoid a deadlock. + */ + if (ucs_unlikely(uct_ud_iface_has_pending_async_ev(iface))) { + goto add_req; + } + + if (uct_ud_iface_can_tx(iface) && + uct_ud_iface_has_skbs(iface) && + uct_ud_ep_is_connected(ep) && + !uct_ud_ep_no_window(ep)) { + + uct_ud_leave(iface); + return UCS_ERR_BUSY; + } + +add_req: + UCS_STATIC_ASSERT(sizeof(uct_ud_pending_req_priv_t) <= + UCT_PENDING_REQ_PRIV_LEN); + uct_ud_pending_req_priv(req)->flags = flags; + uct_pending_req_arb_group_push(&ep->tx.pending.group, req); + ucs_arbiter_group_schedule(&iface->tx.pending_q, &ep->tx.pending.group); + ucs_trace_data("ud ep %p: added pending req %p tx_psn %d acked_psn %d cwnd %d", + ep, req, ep->tx.psn, ep->tx.acked_psn, ep->ca.cwnd); + UCT_TL_EP_STAT_PEND(&ep->super); + + uct_ud_leave(iface); + return UCS_OK; +} + +static ucs_arbiter_cb_result_t +uct_ud_ep_pending_purge_cb(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_ud_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), + uct_ud_ep_t, tx.pending.group); + uct_purge_cb_args_t *cb_args = arg; + uct_pending_purge_callback_t cb = cb_args->cb; + uct_pending_req_t *req; + + if (&ep->tx.pending.elem == elem) { + /* return ignored by arbiter */ + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + req = ucs_container_of(elem, uct_pending_req_t, priv); + if (cb) { + cb(req, cb_args->arg); + } else { + ucs_debug("ep=%p cancelling user pending request %p", ep, req); + } + + /* return ignored by arbiter */ + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; +} + + +void uct_ud_ep_pending_purge(uct_ep_h ep_h, uct_pending_purge_callback_t cb, + void *arg) +{ + uct_ud_ep_t *ep = ucs_derived_of(ep_h, uct_ud_ep_t); + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_ud_iface_t); + uct_purge_cb_args_t args = {cb, arg}; + + uct_ud_enter(iface); + ucs_arbiter_group_purge(&iface->tx.pending_q, &ep->tx.pending.group, + uct_ud_ep_pending_purge_cb, &args); + if (uct_ud_ep_ctl_op_isany(ep)) { + ucs_arbiter_group_push_elem(&ep->tx.pending.group, + &ep->tx.pending.elem); + ucs_arbiter_group_schedule(&iface->tx.pending_q, &ep->tx.pending.group); + } + uct_ud_leave(iface); +} + +void uct_ud_ep_disconnect(uct_ep_h tl_ep) +{ + uct_ud_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_ep_t); + uct_ud_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ud_iface_t); + + ucs_debug("ep %p: disconnect", ep); + + /* cancel user pending */ + uct_ud_ep_pending_purge(tl_ep, NULL, NULL); + + /* schedule flush */ + uct_ud_ep_flush(tl_ep, 0, NULL); + + /* the EP will be destroyed by interface destroy or timeout in + * uct_ud_ep_slow_timer + */ + ep->close_time = ucs_twheel_get_time(&iface->async.slow_timer); + ep->flags |= UCT_UD_EP_FLAG_DISCONNECTED; + ucs_wtimer_add(&iface->async.slow_timer, &ep->slow_timer, + UCT_UD_SLOW_TIMER_MAX_TICK(iface)); +} diff --git a/src/uct/ib/ud/base/ud_ep.h b/src/uct/ib/ud/base/ud_ep.h new file mode 100644 index 0000000..55060cc --- /dev/null +++ b/src/uct/ib/ud/base/ud_ep.h @@ -0,0 +1,445 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_UD_EP_H +#define UCT_UD_EP_H + +#include "ud_def.h" + +#include +#include +#include +#include +#include +#include + +#define UCT_UD_EP_NULL_ID ((1<<24)-1) +#define UCT_UD_EP_ID_MAX UCT_UD_EP_NULL_ID +#define UCT_UD_EP_CONN_ID_MAX UCT_UD_EP_ID_MAX + +#if UCT_UD_EP_DEBUG_HOOKS +/* + Hooks that allow packet header inspection and rewriting. UCT user can + set functions that will be called just before packet is put on wire + and when packet is received. Packet will be discarded if RX function + returns status different from UCS_OK. + + Example: + + static ucs_status_t clear_ack_req(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + neth->packet_type &= ~UCT_UD_PACKET_FLAG_ACK_REQ; + return UCS_OK; + } + + uct_ep_t ep; + .... + // clear ack request bin on all outgoing packets + ucs_derived_of(ep, uct_ud_ep_t)->tx.tx_hook = clear_ack_req; + +*/ + +typedef ucs_status_t (*uct_ud_ep_hook_t)(uct_ud_ep_t *ep, uct_ud_neth_t *neth); + +#define UCT_UD_EP_HOOK_DECLARE(name) uct_ud_ep_hook_t name; + +#define UCT_UD_EP_HOOK_CALL_RX(ep, neth, len) \ + if ((ep)->rx.rx_hook(ep, neth) != UCS_OK) { \ + ucs_trace_data("RX: dropping packet"); \ + return; \ + } + +#define UCT_UD_EP_HOOK_CALL_TX(ep, neth) (ep)->tx.tx_hook(ep, neth); +#define UCT_UD_EP_HOOK_CALL_TIMER(ep) (ep)->timer_hook(ep, NULL); + +static inline ucs_status_t uct_ud_ep_null_hook(uct_ud_ep_t *ep, uct_ud_neth_t *neth) +{ + return UCS_OK; +} + +#define UCT_UD_EP_HOOK_INIT(ep) \ +do { \ + (ep)->tx.tx_hook = uct_ud_ep_null_hook; \ + (ep)->rx.rx_hook = uct_ud_ep_null_hook; \ + (ep)->timer_hook = uct_ud_ep_null_hook; \ +} while(0); + +#else + +#define UCT_UD_EP_HOOK_DECLARE(name) +#define UCT_UD_EP_HOOK_CALL_RX(ep, neth, len) +#define UCT_UD_EP_HOOK_CALL_TX(ep, neth) +#define UCT_UD_EP_HOOK_CALL_TIMER(ep) +#define UCT_UD_EP_HOOK_INIT(ep) + +#endif + + +/** + * Slow ep timer + * The purpose of the slow timer is to schedule resends and ack replies. + * The timer is a wheel timer. Timer wheel sweep is done on every async + * progress invocation. One tick usually happens once in 0.1 seconds. + * It is best to avoid to take time in the fast path. + * + * wheel_time is the time of last timer wheel sweep. + * on send: + * - try to start wheel timer. + * - send_time = wheel_time. That is sending a packet resets retransmission + * timeout. This does not allow infinite number of resets because number of + * outstanding packets is bound by the TX window size. + * on ack recv: + * - send_time = wheel_time. (advance last send time) + * on timer expiration: + * - if wheel_time - saved_time > 3*one_tick_time + * schedule resend + * send_time = wheel_time + * consgestion avoidance decreases tx window + * - if window is not empty resched timer + * 3x is needed to avoid false resends because of errors in timekeeping + * + * Fast ep timer (Not implemented) + * + * The purpose of the fast timer is to detect packet loss as early as + * possible. The timer is a wheel timer. Fast timer sweep is done on + * CQ polling which happens either in explicit polling or in async + * progress. As a result fast timer resolution may vary. + * + * TODO: adaptive CHK algo description + * + * Fast time is relatively expensive. It is best to disable if packet loss + * is not expected. Usual reasons for packet loss are: slow receiver, + * many to one traffic pattern. + */ + +/* Congestion avoidance and retransmits + * + * UD uses additive increase/multiplicative decrease algorightm + * See https://en.wikipedia.org/wiki/Additive_increase/multiplicative_decrease + * + * tx window is increased when ack is received and decreased when + * resend is scheduled. Ack must be a 'new' one that is it must + * acknowledge packets on window. Increasing window on ack does not casue + * exponential window increase because, unlike tcp, only two acks + * per window are sent. + * + * Todo: + * + * Consider trigering window decrease before resend timeout: + * - on ECN (explicit congestion notification) from receiever. ECN can + * be based on some heuristic. For example on number of rx completions + * that receiver picked from CQ. + * - upon receiving a 'duplicate ack' packet + * + * Consider using other algorithm (ex BIC/CUBIC) + */ + +/* + * Handling retransmits + * + * On slow timer timeout schedule a retransmit operation for + * [acked_psn+1, psn-1]. These values are saved as 'resend window' + * + * Resend operation will resend no more then the current cwnd + * If ack arrives when resend window is active it means that + * - something new in the resend window was acked. As a + * resutlt a new resend operation will be scheduled. + * - either resend window or something beyond it was + * acked. It means that no more retransmisions are needed. + * Current 'resend window' is deactivated + * + * When retransmitting, ack is requested if: + * psn == acked_psn + 1 or + * psn % UCT_UD_RESENDS_PER_ACK = 0 + */ + +/* + * Endpoint pending control operations. The operations + * are executed in time of progress along with + * pending requests added by uct user. + */ +enum { + UCT_UD_EP_OP_NONE = 0, + UCT_UD_EP_OP_ACK = UCS_BIT(0), /* ack data */ + UCT_UD_EP_OP_ACK_REQ = UCS_BIT(1), /* request ack of sent packets */ + UCT_UD_EP_OP_RESEND = UCS_BIT(2), /* resend un acked packets */ + UCT_UD_EP_OP_CREP = UCS_BIT(3), /* send connection reply */ + UCT_UD_EP_OP_CREQ = UCS_BIT(4) /* send connection request */ +}; + +#define UCT_UD_EP_OP_CTL_LOW_PRIO (UCT_UD_EP_OP_ACK_REQ|UCT_UD_EP_OP_ACK) +#define UCT_UD_EP_OP_CTL_HI_PRIO (UCT_UD_EP_OP_CREQ|UCT_UD_EP_OP_CREP|UCT_UD_EP_OP_RESEND) + +typedef struct uct_ud_ep_pending_op { + ucs_arbiter_group_t group; + uint32_t ops; /* bitmask that describes what control ops are sceduled */ + ucs_arbiter_elem_t elem; +} uct_ud_ep_pending_op_t; + +enum { + UCT_UD_EP_STAT_TODO +}; + +/* TODO: optimize endpoint memory footprint */ +enum { + UCT_UD_EP_FLAG_ASYNC_COMPS = UCS_BIT(0), /* set if there are completions that + * were picked by async thread and queued */ + UCT_UD_EP_FLAG_DISCONNECTED = UCS_BIT(1), /* set if the endpoint was disconnected */ + UCT_UD_EP_FLAG_PRIVATE = UCS_BIT(2), /* EP is was created as internal */ + + /* debug flags */ + UCT_UD_EP_FLAG_CREQ_RCVD = UCS_BIT(3), /* CREQ message was received */ + UCT_UD_EP_FLAG_CREP_RCVD = UCS_BIT(4), /* CREP message was received */ + UCT_UD_EP_FLAG_CREQ_SENT = UCS_BIT(5), /* CREQ message was sent */ + UCT_UD_EP_FLAG_CREP_SENT = UCS_BIT(6), /* CREP message was sent */ + UCT_UD_EP_FLAG_CREQ_NOTSENT = UCS_BIT(7), /* CREQ message is NOT sent, because + connection establishment process + is driven by remote side. */ + + /* Endpoint is currently executing the pending queue */ +#if UCS_ENABLE_ASSERT + UCT_UD_EP_FLAG_IN_PENDING = UCS_BIT(8) +#else + UCT_UD_EP_FLAG_IN_PENDING = 0 +#endif +}; + +typedef struct uct_ud_peer_name { + char name[16]; + int pid; +} uct_ud_peer_name_t; + +struct uct_ud_ep { + uct_base_ep_t super; + uint32_t ep_id; + uint32_t dest_ep_id; + struct { + uct_ud_psn_t psn; /* Next PSN to send */ + uct_ud_psn_t max_psn; /* Largest PSN that can be sent */ + uct_ud_psn_t acked_psn; /* last psn that was acked by remote side */ + uint16_t err_skb_count;/* number of failed SKBs on the ep */ + ucs_queue_head_t window; /* send window: [acked_psn+1, psn-1] */ + uct_ud_ep_pending_op_t pending; /* pending ops */ + ucs_time_t send_time; /* tx time of last packet */ + ucs_time_t slow_tick; /* timeout to trigger slow timer */ + UCS_STATS_NODE_DECLARE(stats) + UCT_UD_EP_HOOK_DECLARE(tx_hook) + } tx; + struct { + uct_ud_psn_t acked_psn; /* Last psn we acked */ + ucs_frag_list_t ooo_pkts; /* Out of order packets that can not be processed yet, + also keeps last psn we successfully received and processed */ + UCS_STATS_NODE_DECLARE(stats) + UCT_UD_EP_HOOK_DECLARE(rx_hook) + } rx; + struct { + uct_ud_psn_t wmax; + uct_ud_psn_t cwnd; + } ca; + struct UCS_S_PACKED { + ucs_queue_iter_t pos; /* points to the part of tx window that needs to be resent */ + uct_ud_psn_t psn; /* last psn that was retransmitted */ + uct_ud_psn_t max_psn; /* max psn that should be retransmitted */ + } resend; + ucs_list_link_t cep_list; + uint32_t conn_id; /* connection id. assigned in connect_to_iface() */ + uint16_t flags; + uint8_t rx_creq_count; /* TODO: remove when reason for DUP/OOO CREQ is found */ + ucs_wtimer_t slow_timer; + ucs_time_t close_time; /* timestamp of closure */ + UCS_STATS_NODE_DECLARE(stats) + UCT_UD_EP_HOOK_DECLARE(timer_hook) +#if ENABLE_DEBUG_DATA + uct_ud_peer_name_t peer; +#endif +}; + +UCS_CLASS_DECLARE(uct_ud_ep_t, uct_ud_iface_t*) + +/** + * UD pending request private data + */ +typedef struct { + uct_pending_req_priv_arb_t arb; + unsigned flags; +} uct_ud_pending_req_priv_t; + + +static UCS_F_ALWAYS_INLINE uct_ud_pending_req_priv_t * +uct_ud_pending_req_priv(uct_pending_req_t *req) +{ + return (uct_ud_pending_req_priv_t *)&(req)->priv; +} + + +void uct_ud_tx_wnd_purge_outstanding(uct_ud_iface_t *iface, uct_ud_ep_t *ud_ep, + ucs_status_t status); + +ucs_status_t uct_ud_ep_flush(uct_ep_h ep, unsigned flags, + uct_completion_t *comp); +/* internal flush */ +ucs_status_t uct_ud_ep_flush_nolock(uct_ud_iface_t *iface, uct_ud_ep_t *ep, + uct_completion_t *comp); + +ucs_status_t uct_ud_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr); + +ucs_status_t uct_ud_ep_connect_to_ep(uct_ud_ep_t *ep, + const uct_ib_address_t *ib_addr, + const uct_ud_ep_addr_t *ep_addr); + +ucs_status_t uct_ud_ep_pending_add(uct_ep_h ep, uct_pending_req_t *n, + unsigned flags); + +void uct_ud_ep_pending_purge(uct_ep_h ep, uct_pending_purge_callback_t cb, + void *arg); + +void uct_ud_ep_disconnect(uct_ep_h ep); + + +/* helper function to create/destroy new connected ep */ +ucs_status_t uct_ud_ep_create_connected_common(uct_ud_iface_t *iface, + const uct_ib_address_t *ib_addr, + const uct_ud_iface_addr_t *if_addr, + uct_ud_ep_t **new_ep_p, + uct_ud_send_skb_t **skb_p); + +void uct_ud_ep_destroy_connected(uct_ud_ep_t *ep, + const uct_ib_address_t *ib_addr, + const uct_ud_iface_addr_t *if_addr); + +uct_ud_send_skb_t *uct_ud_ep_prepare_creq(uct_ud_ep_t *ep); + +ucs_arbiter_cb_result_t +uct_ud_ep_do_pending(ucs_arbiter_t *arbiter, ucs_arbiter_elem_t *elem, + void *arg); + +void uct_ud_ep_clone(uct_ud_ep_t *old_ep, uct_ud_ep_t *new_ep); + +static UCS_F_ALWAYS_INLINE void +uct_ud_neth_set_type_am(uct_ud_ep_t *ep, uct_ud_neth_t *neth, uint8_t id) +{ + neth->packet_type = (id << UCT_UD_PACKET_AM_ID_SHIFT) | + ep->dest_ep_id | + UCT_UD_PACKET_FLAG_AM; +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_neth_set_type_put(uct_ud_ep_t *ep, uct_ud_neth_t *neth) +{ + neth->packet_type = ep->dest_ep_id | UCT_UD_PACKET_FLAG_PUT; +} + +void uct_ud_ep_process_rx(uct_ud_iface_t *iface, + uct_ud_neth_t *neth, unsigned byte_len, + uct_ud_recv_skb_t *skb, int is_async); + + +static UCS_F_ALWAYS_INLINE void +uct_ud_neth_ctl_ack(uct_ud_ep_t *ep, uct_ud_neth_t *neth) +{ + neth->psn = ep->tx.psn; + neth->ack_psn = ep->rx.acked_psn = ucs_frag_list_sn(&ep->rx.ooo_pkts); + neth->packet_type = ep->dest_ep_id; +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_neth_ctl_ack_req(uct_ud_ep_t *ep, uct_ud_neth_t *neth) +{ + neth->psn = ep->tx.psn; + neth->ack_psn = ep->rx.acked_psn = ucs_frag_list_sn(&ep->rx.ooo_pkts); + neth->packet_type = ep->dest_ep_id|UCT_UD_PACKET_FLAG_ACK_REQ; +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_neth_init_data(uct_ud_ep_t *ep, uct_ud_neth_t *neth) +{ + neth->psn = ep->tx.psn; + neth->ack_psn = ep->rx.acked_psn = ucs_frag_list_sn(&ep->rx.ooo_pkts); +} + + + +static inline int uct_ud_ep_compare(uct_ud_ep_t *a, uct_ud_ep_t *b) +{ + return a->conn_id - b->conn_id; +} + +static inline int uct_ud_ep_hash(uct_ud_ep_t *ep) +{ + return ep->conn_id % UCT_UD_HASH_SIZE; +} + +SGLIB_DEFINE_LIST_PROTOTYPES(uct_ud_ep_t, uct_ud_ep_compare, next) +SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(uct_ud_ep_t, UCT_UD_HASH_SIZE, uct_ud_ep_hash) + + +static UCS_F_ALWAYS_INLINE void +uct_ud_ep_ctl_op_del(uct_ud_ep_t *ep, uint32_t ops) +{ + ep->tx.pending.ops &= ~ops; +} + +static UCS_F_ALWAYS_INLINE int +uct_ud_ep_ctl_op_check(uct_ud_ep_t *ep, uint32_t op) +{ + return ep->tx.pending.ops & op; +} + +static UCS_F_ALWAYS_INLINE int +uct_ud_ep_ctl_op_isany(uct_ud_ep_t *ep) +{ + return ep->tx.pending.ops; +} + +static UCS_F_ALWAYS_INLINE int +uct_ud_ep_ctl_op_check_ex(uct_ud_ep_t *ep, uint32_t ops) +{ + /* check that at least one the given ops is set and + * all ops not given are not set */ + return (ep->tx.pending.ops & ops) && + ((ep->tx.pending.ops & ~ops) == 0); +} + + +/* TODO: relay on window check instead. max_psn = psn */ +static UCS_F_ALWAYS_INLINE int uct_ud_ep_is_connected(uct_ud_ep_t *ep) +{ + return ep->dest_ep_id != UCT_UD_EP_NULL_ID; +} + +static UCS_F_ALWAYS_INLINE int uct_ud_ep_no_window(uct_ud_ep_t *ep) +{ + /* max_psn can be decreased by CA, so check >= */ + return UCT_UD_PSN_COMPARE(ep->tx.psn, >=, ep->tx.max_psn); +} + +/* + * Request ACK once we sent 1/4 of the window or once we got to the window end + * or there is a pending ack request operation + */ +static UCS_F_ALWAYS_INLINE int uct_ud_ep_req_ack(uct_ud_ep_t *ep) +{ + uct_ud_psn_t acked_psn, max_psn, psn; + + max_psn = ep->tx.max_psn; + acked_psn = ep->tx.acked_psn; + psn = ep->tx.psn; + + return UCT_UD_PSN_COMPARE(psn, ==, ((acked_psn * 3 + max_psn) >> 2)) || + UCT_UD_PSN_COMPARE(psn + 1, ==, max_psn) || + uct_ud_ep_ctl_op_check(ep, UCT_UD_EP_OP_ACK_REQ); + +} + + +static UCS_F_ALWAYS_INLINE void +uct_ud_neth_ack_req(uct_ud_ep_t *ep, uct_ud_neth_t *neth) +{ + neth->packet_type |= uct_ud_ep_req_ack(ep) << UCT_UD_PACKET_ACK_REQ_SHIFT; + uct_ud_ep_ctl_op_del(ep, UCT_UD_EP_OP_ACK|UCT_UD_EP_OP_ACK_REQ); +} + +#endif diff --git a/src/uct/ib/ud/base/ud_iface.c b/src/uct/ib/ud/base/ud_iface.c new file mode 100644 index 0000000..af0b2f1 --- /dev/null +++ b/src/uct/ib/ud/base/ud_iface.c @@ -0,0 +1,962 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ud_iface.h" +#include "ud_ep.h" +#include "ud_inl.h" + +#include +#include +#include +#include +#include +#include +#include + + +#define UCT_UD_IPV4_ADDR_LEN sizeof(struct in_addr) +#define UCT_UD_IPV6_ADDR_LEN sizeof(struct in6_addr) + +#if ENABLE_STATS +static ucs_stats_class_t uct_ud_iface_stats_class = { + .name = "ud_iface", + .num_counters = UCT_UD_IFACE_STAT_LAST, + .counter_names = { + [UCT_UD_IFACE_STAT_RX_DROP] = "rx_drop" + } +}; +#endif + +/* cppcheck-suppress ctunullpointer */ +SGLIB_DEFINE_LIST_FUNCTIONS(uct_ud_iface_peer_t, uct_ud_iface_peer_cmp, next) +SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS(uct_ud_iface_peer_t, + UCT_UD_HASH_SIZE, + uct_ud_iface_peer_hash) + +static void uct_ud_iface_free_resend_skbs(uct_ud_iface_t *iface); +static void uct_ud_iface_timer(int timer_id, void *arg); + +static void uct_ud_iface_free_pending_rx(uct_ud_iface_t *iface); +static void uct_ud_iface_free_async_comps(uct_ud_iface_t *iface); + + +void uct_ud_iface_cep_init(uct_ud_iface_t *iface) +{ + sglib_hashed_uct_ud_iface_peer_t_init(iface->peers); +} + +static void +uct_ud_iface_cep_cleanup_eps(uct_ud_iface_t *iface, uct_ud_iface_peer_t *peer) +{ + uct_ud_ep_t *ep, *tmp; + + ucs_list_for_each_safe(ep, tmp, &peer->ep_list, cep_list) { + if (ep->conn_id < peer->conn_id_last) { + /* active connection should already be cleaned by owner */ + ucs_warn("iface (%p) peer (qpn=%d lid=%d) cleanup with %d endpoints still active", + iface, peer->dst_qpn, peer->dlid, + (int)ucs_list_length(&peer->ep_list)); + continue; + } + ucs_list_del(&ep->cep_list); + ucs_trace("cep:ep_destroy(%p) conn_id %d", ep, ep->conn_id); + uct_ep_destroy(&ep->super.super); + } +} + +void uct_ud_iface_cep_cleanup(uct_ud_iface_t *iface) +{ + uct_ud_iface_peer_t *peer; + struct sglib_hashed_uct_ud_iface_peer_t_iterator it_peer; + + for (peer = sglib_hashed_uct_ud_iface_peer_t_it_init(&it_peer, + iface->peers); + peer != NULL; + peer = sglib_hashed_uct_ud_iface_peer_t_it_next(&it_peer)) { + + uct_ud_iface_cep_cleanup_eps(iface, peer); + free(peer); + } +} + +static uct_ud_iface_peer_t * +uct_ud_iface_cep_lookup_addr(uct_ud_iface_t *iface, uint16_t dlid, + const union ibv_gid *dgid, uint32_t dest_qpn) +{ + uct_ud_iface_peer_t key; + key.dlid = dlid; + key.dgid = *dgid; + key.dst_qpn = dest_qpn; + return sglib_hashed_uct_ud_iface_peer_t_find_member(iface->peers, &key); +} + +static uct_ud_iface_peer_t * +uct_ud_iface_cep_lookup_peer(uct_ud_iface_t *iface, + const uct_ib_address_t *src_ib_addr, + const uct_ud_iface_addr_t *src_if_addr) +{ + uint32_t dest_qpn = uct_ib_unpack_uint24(src_if_addr->qp_num); + union ibv_gid dgid; + uint16_t dlid; + + uct_ib_address_unpack(src_ib_addr, &dlid, &dgid); + return uct_ud_iface_cep_lookup_addr(iface, dlid, &dgid, dest_qpn); +} + +static uct_ud_ep_t * +uct_ud_iface_cep_lookup_ep(uct_ud_iface_peer_t *peer, uint32_t conn_id) +{ + uint32_t id; + uct_ud_ep_t *ep; + + if (conn_id != UCT_UD_EP_CONN_ID_MAX) { + id = conn_id; + } else { + id = peer->conn_id_last; + /* TODO: O(1) lookup in this case (new connection) */ + } + ucs_list_for_each(ep, &peer->ep_list, cep_list) { + if (ep->conn_id == id) { + return ep; + } + if (ep->conn_id < id) { + break; + } + } + return NULL; +} + +static uint32_t +uct_ud_iface_cep_getid(uct_ud_iface_peer_t *peer, uint32_t conn_id) +{ + uint32_t new_id; + + if (conn_id != UCT_UD_EP_CONN_ID_MAX) { + return conn_id; + } + new_id = peer->conn_id_last++; + return new_id; +} + +/* insert new ep that is connected to src_if_addr */ +ucs_status_t uct_ud_iface_cep_insert(uct_ud_iface_t *iface, + const uct_ib_address_t *src_ib_addr, + const uct_ud_iface_addr_t *src_if_addr, + uct_ud_ep_t *ep, uint32_t conn_id) +{ + uint32_t dest_qpn = uct_ib_unpack_uint24(src_if_addr->qp_num); + uct_ud_iface_peer_t *peer; + union ibv_gid dgid; + uct_ud_ep_t *cep; + uint16_t dlid; + + uct_ib_address_unpack(src_ib_addr, &dlid, &dgid); + peer = uct_ud_iface_cep_lookup_addr(iface, dlid, &dgid, dest_qpn); + if (peer == NULL) { + peer = malloc(sizeof *peer); + if (peer == NULL) { + return UCS_ERR_NO_MEMORY; + } + + peer->dlid = dlid; + peer->dgid = dgid; + peer->dst_qpn = dest_qpn; + sglib_hashed_uct_ud_iface_peer_t_add(iface->peers, peer); + ucs_list_head_init(&peer->ep_list); + peer->conn_id_last = 0; + } + + ep->conn_id = uct_ud_iface_cep_getid(peer, conn_id); + if (ep->conn_id == UCT_UD_EP_CONN_ID_MAX) { + return UCS_ERR_NO_RESOURCE; + } + + if (ucs_list_is_empty(&peer->ep_list)) { + ucs_list_add_head(&peer->ep_list, &ep->cep_list); + return UCS_OK; + } + ucs_list_for_each(cep, &peer->ep_list, cep_list) { + ucs_assert_always(cep->conn_id != ep->conn_id); + if (cep->conn_id < ep->conn_id) { + ucs_list_insert_before(&cep->cep_list, &ep->cep_list); + return UCS_OK; + } + } + return UCS_OK; +} + +void uct_ud_iface_cep_remove(uct_ud_ep_t *ep) +{ + if (ucs_list_is_empty(&ep->cep_list)) { + return; + } + ucs_trace("iface(%p) cep_remove:ep(%p)", ep->super.super.iface, ep); + ucs_list_del(&ep->cep_list); + ucs_list_head_init(&ep->cep_list); +} + +uct_ud_ep_t *uct_ud_iface_cep_lookup(uct_ud_iface_t *iface, + const uct_ib_address_t *src_ib_addr, + const uct_ud_iface_addr_t *src_if_addr, + uint32_t conn_id) +{ + uct_ud_iface_peer_t *peer; + uct_ud_ep_t *ep; + + peer = uct_ud_iface_cep_lookup_peer(iface, src_ib_addr, src_if_addr); + if (peer == NULL) { + return NULL; + } + + ep = uct_ud_iface_cep_lookup_ep(peer, conn_id); + if (ep && conn_id == UCT_UD_EP_CONN_ID_MAX) { + peer->conn_id_last++; + } + return ep; +} + +void uct_ud_iface_cep_rollback(uct_ud_iface_t *iface, + const uct_ib_address_t *src_ib_addr, + const uct_ud_iface_addr_t *src_if_addr, + uct_ud_ep_t *ep) +{ + uct_ud_iface_peer_t *peer; + + peer = uct_ud_iface_cep_lookup_peer(iface, src_ib_addr, src_if_addr); + ucs_assert_always(peer != NULL); + ucs_assert_always(peer->conn_id_last > 0); + ucs_assert_always(ep->conn_id + 1 == peer->conn_id_last); + ucs_assert_always(!ucs_list_is_empty(&peer->ep_list)); + ucs_assert_always(!ucs_list_is_empty(&ep->cep_list)); + + peer->conn_id_last--; + uct_ud_iface_cep_remove(ep); +} + +static void uct_ud_iface_send_skb_init(uct_iface_h tl_iface, void *obj, + uct_mem_h memh) +{ + uct_ud_send_skb_t *skb = obj; + uct_ib_mem_t *ib_memh = memh; + + skb->lkey = ib_memh->lkey; + skb->flags = 0; +} + +static ucs_status_t +uct_ud_iface_create_qp(uct_ud_iface_t *self, const uct_ud_iface_config_t *config) +{ + uct_ud_iface_ops_t *ops = ucs_derived_of(self->super.ops, uct_ud_iface_ops_t); + uct_ib_qp_attr_t qp_init_attr = {}; + struct ibv_qp_attr qp_attr; + static ucs_status_t status; + int ret; + + qp_init_attr.qp_type = IBV_QPT_UD; + qp_init_attr.sq_sig_all = 0; + qp_init_attr.cap.max_send_wr = config->super.tx.queue_len; + qp_init_attr.cap.max_recv_wr = config->super.rx.queue_len; + qp_init_attr.cap.max_send_sge = 2; + qp_init_attr.cap.max_recv_sge = 1; + qp_init_attr.cap.max_inline_data = ucs_max(config->super.tx.min_inline, + UCT_UD_MIN_INLINE); + + status = ops->create_qp(&self->super, &qp_init_attr, &self->qp); + if (status != UCS_OK) { + return status; + } + + self->config.max_inline = qp_init_attr.cap.max_inline_data; + uct_ib_iface_set_max_iov(&self->super, qp_init_attr.cap.max_send_sge); + + memset(&qp_attr, 0, sizeof(qp_attr)); + /* Modify QP to INIT state */ + qp_attr.qp_state = IBV_QPS_INIT; + qp_attr.pkey_index = self->super.pkey_index; + qp_attr.port_num = self->super.config.port_num; + qp_attr.qkey = UCT_IB_KEY; + ret = ibv_modify_qp(self->qp, &qp_attr, + IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY); + if (ret) { + ucs_error("Failed to modify UD QP to INIT: %m"); + goto err_destroy_qp; + } + + /* Modify to RTR */ + qp_attr.qp_state = IBV_QPS_RTR; + ret = ibv_modify_qp(self->qp, &qp_attr, IBV_QP_STATE); + if (ret) { + ucs_error("Failed to modify UD QP to RTR: %m"); + goto err_destroy_qp; + } + + /* Modify to RTS */ + qp_attr.qp_state = IBV_QPS_RTS; + qp_attr.sq_psn = 0; + ret = ibv_modify_qp(self->qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN); + if (ret) { + ucs_error("Failed to modify UD QP to RTS: %m"); + goto err_destroy_qp; + } + + return UCS_OK; +err_destroy_qp: + uct_ib_destroy_qp(self->qp); + return UCS_ERR_INVALID_PARAM; +} + +ucs_status_t uct_ud_iface_complete_init(uct_ud_iface_t *iface) +{ + ucs_async_context_t *async = iface->super.super.worker->async; + ucs_async_mode_t async_mode = async->mode; + ucs_status_t status; + + iface->tx.resend_skbs_quota = iface->tx.available; + + status = ucs_twheel_init(&iface->async.slow_timer, + iface->async.slow_tick / 4, + uct_ud_iface_get_async_time(iface)); + if (status != UCS_OK) { + goto err; + } + + status = ucs_async_add_timer(async_mode, iface->async.slow_tick, + uct_ud_iface_timer, iface, async, + &iface->async.timer_id); + if (status != UCS_OK) { + goto err_twheel_cleanup; + } + + return UCS_OK; + +err_twheel_cleanup: + ucs_twheel_cleanup(&iface->async.slow_timer); +err: + return status; +} + +void uct_ud_iface_remove_async_handlers(uct_ud_iface_t *iface) +{ + uct_base_iface_progress_disable(&iface->super.super.super, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + ucs_async_remove_handler(iface->async.timer_id, 1); +} + +/* Calculate real GIDs len. Can be either 16 (RoCEv1 or RoCEv2/IPv6) + * or 4 (RoCEv2/IPv4). This len is used for packets filtering by DGIDs. + * + * According to Annex17_RoCEv2 (A17.4.5.2): + * "The first 40 bytes of user posted UD Receive Buffers are reserved for the L3 + * header of the incoming packet (as per the InfiniBand Spec Section 11.4.1.2). + * In RoCEv2, this area is filled up with the IP header. IPv6 header uses the + * entire 40 bytes. IPv4 headers use the 20 bytes in the second half of the + * reserved 40 bytes area (i.e. offset 20 from the beginning of the receive + * buffer). In this case, the content of the first 20 bytes is undefined." */ +static void uct_ud_iface_calc_gid_len(uct_ud_iface_t *iface) +{ + uint16_t *local_gid_u16 = (uint16_t*)iface->super.gid.raw; + + /* Make sure that daddr in IPv4 resides in the last 4 bytes in GRH */ + UCS_STATIC_ASSERT((UCT_IB_GRH_LEN - (20 + offsetof(struct iphdr, daddr))) == + UCT_UD_IPV4_ADDR_LEN); + + /* Make sure that dgid resides in the last 16 bytes in GRH */ + UCS_STATIC_ASSERT((UCT_IB_GRH_LEN - offsetof(struct ibv_grh, dgid)) == + UCT_UD_IPV6_ADDR_LEN); + + /* IPv4 mapped to IPv6 looks like: 0000:0000:0000:0000:0000:ffff:????:????, + * so check for leading zeroes and verify that 11-12 bytes are 0xff. + * Otherwise either RoCEv1 or RoCEv2/IPv6 are used. */ + if (local_gid_u16[0] == 0x0000) { + ucs_assert_always(local_gid_u16[5] == 0xffff); + iface->config.gid_len = UCT_UD_IPV4_ADDR_LEN; + } else { + iface->config.gid_len = UCT_UD_IPV6_ADDR_LEN; + } +} + +UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md, + uct_worker_h worker, const uct_iface_params_t *params, + const uct_ud_iface_config_t *config, + uct_ib_iface_init_attr_t *init_attr) +{ + ucs_status_t status; + size_t data_size; + int mtu; + + UCT_CHECK_PARAM(params->field_mask & UCT_IFACE_PARAM_FIELD_OPEN_MODE, + "UCT_IFACE_PARAM_FIELD_OPEN_MODE is not defined"); + if (!(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE)) { + ucs_error("only UCT_IFACE_OPEN_MODE_DEVICE is supported"); + return UCS_ERR_UNSUPPORTED; + } + + ucs_trace_func("%s: iface=%p ops=%p worker=%p rx_headroom=%zu", + params->mode.device.dev_name, self, ops, worker, + (params->field_mask & UCT_IFACE_PARAM_FIELD_RX_HEADROOM) ? + params->rx_headroom : 0); + + if (config->super.tx.queue_len <= UCT_UD_TX_MODERATION) { + ucs_error("%s ud iface tx queue is too short (%d <= %d)", + params->mode.device.dev_name, + config->super.tx.queue_len, UCT_UD_TX_MODERATION); + return UCS_ERR_INVALID_PARAM; + } + + status = uct_ib_device_mtu(params->mode.device.dev_name, md, &mtu); + if (status != UCS_OK) { + return status; + } + + init_attr->rx_priv_len = sizeof(uct_ud_recv_skb_t) - + sizeof(uct_ib_iface_recv_desc_t); + init_attr->rx_hdr_len = UCT_IB_GRH_LEN + sizeof(uct_ud_neth_t); + init_attr->tx_cq_len = config->super.tx.queue_len; + init_attr->rx_cq_len = config->super.rx.queue_len; + init_attr->seg_size = ucs_min(mtu, config->super.seg_size); + init_attr->qp_type = IBV_QPT_UD; + + UCS_CLASS_CALL_SUPER_INIT(uct_ib_iface_t, &ops->super, md, worker, + params, &config->super, init_attr); + + if (self->super.super.worker->async == NULL) { + ucs_error("%s ud iface must have valid async context", params->mode.device.dev_name); + return UCS_ERR_INVALID_PARAM; + } + + self->tx.unsignaled = 0; + self->tx.available = config->super.tx.queue_len; + + self->rx.available = config->super.rx.queue_len; + self->rx.quota = 0; + self->config.tx_qp_len = config->super.tx.queue_len; + self->config.peer_timeout = ucs_time_from_sec(config->peer_timeout); + self->config.check_grh_dgid = config->dgid_check && + uct_ib_iface_is_roce(&self->super); + + if ((config->max_window < UCT_UD_CA_MIN_WINDOW) || + (config->max_window > UCT_UD_CA_MAX_WINDOW)) { + ucs_error("Max congestion avoidance window should be >= %d and <= %d (%d)", + UCT_UD_CA_MIN_WINDOW, UCT_UD_CA_MAX_WINDOW, config->max_window); + return UCS_ERR_INVALID_PARAM; + } + + self->config.max_window = config->max_window; + + if (config->slow_timer_tick <= 0.) { + ucs_error("The slow timer tick should be > 0 (%lf)", + config->slow_timer_tick); + return UCS_ERR_INVALID_PARAM; + } else { + self->async.slow_tick = ucs_time_from_sec(config->slow_timer_tick); + } + + if (config->slow_timer_backoff < UCT_UD_MIN_TIMER_TIMER_BACKOFF) { + ucs_error("The slow timer back off must be >= %lf (%lf)", + UCT_UD_MIN_TIMER_TIMER_BACKOFF, config->slow_timer_backoff); + return UCS_ERR_INVALID_PARAM; + } else { + self->config.slow_timer_backoff = config->slow_timer_backoff; + } + + /* Redefine receive desc release callback */ + self->super.release_desc.cb = uct_ud_iface_release_desc; + + UCT_UD_IFACE_HOOK_INIT(self); + + if (uct_ud_iface_create_qp(self, config) != UCS_OK) { + return UCS_ERR_INVALID_PARAM; + } + + ucs_ptr_array_init(&self->eps, 0, "ud_eps"); + uct_ud_iface_cep_init(self); + + status = uct_ib_iface_recv_mpool_init(&self->super, &config->super, + "ud_recv_skb", &self->rx.mp); + if (status != UCS_OK) { + goto err_qp; + } + + self->rx.available = ucs_min(config->ud_common.rx_queue_len_init, + config->super.rx.queue_len); + self->rx.quota = config->super.rx.queue_len - self->rx.available; + ucs_mpool_grow(&self->rx.mp, self->rx.available); + + data_size = sizeof(uct_ud_ctl_hdr_t) + self->super.addr_size; + data_size = ucs_max(data_size, self->super.config.seg_size); + data_size = ucs_max(data_size, + sizeof(uct_ud_zcopy_desc_t) + self->config.max_inline); + + status = uct_iface_mpool_init(&self->super.super, &self->tx.mp, + sizeof(uct_ud_send_skb_t) + data_size, + sizeof(uct_ud_send_skb_t), + UCT_UD_SKB_ALIGN, + &config->super.tx.mp, self->config.tx_qp_len, + uct_ud_iface_send_skb_init, "ud_tx_skb"); + if (status != UCS_OK) { + goto err_rx_mpool; + } + + ucs_assert_always(data_size >= UCT_UD_MIN_INLINE); + + self->tx.skb = NULL; + self->tx.skb_inl.super.len = sizeof(uct_ud_neth_t); + + ucs_queue_head_init(&self->tx.resend_skbs); + self->tx.resend_skbs_quota = 0; + + ucs_arbiter_init(&self->tx.pending_q); + + ucs_queue_head_init(&self->tx.async_comp_q); + + ucs_queue_head_init(&self->rx.pending_q); + + self->tx.async_before_pending = 0; + + uct_ud_iface_calc_gid_len(self); + + status = UCS_STATS_NODE_ALLOC(&self->stats, &uct_ud_iface_stats_class, + self->super.super.stats); + if (status != UCS_OK) { + goto err_tx_mpool; + } + + return UCS_OK; + +err_tx_mpool: + ucs_mpool_cleanup(&self->tx.mp, 1); +err_rx_mpool: + ucs_mpool_cleanup(&self->rx.mp, 1); +err_qp: + uct_ib_destroy_qp(self->qp); + ucs_ptr_array_cleanup(&self->eps); + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ud_iface_t) +{ + ucs_trace_func(""); + + /* TODO: proper flush and connection termination */ + uct_ud_enter(self); + ucs_debug("iface(%p): cep cleanup", self); + uct_ud_iface_cep_cleanup(self); + uct_ud_iface_free_resend_skbs(self); + uct_ud_iface_free_async_comps(self); + ucs_mpool_cleanup(&self->tx.mp, 0); + /* TODO: qp to error state and cleanup all wqes */ + uct_ud_iface_free_pending_rx(self); + ucs_mpool_cleanup(&self->rx.mp, 0); + uct_ib_destroy_qp(self->qp); + ucs_debug("iface(%p): ptr_array cleanup", self); + ucs_ptr_array_cleanup(&self->eps); + ucs_arbiter_cleanup(&self->tx.pending_q); + UCS_STATS_NODE_FREE(self->stats); + uct_ud_leave(self); +} + +UCS_CLASS_DEFINE(uct_ud_iface_t, uct_ib_iface_t); + +ucs_config_field_t uct_ud_iface_config_table[] = { + {"IB_", "", NULL, + ucs_offsetof(uct_ud_iface_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_ib_iface_config_table)}, + + {"UD_", "", NULL, + ucs_offsetof(uct_ud_iface_config_t, ud_common), + UCS_CONFIG_TYPE_TABLE(uct_ud_iface_common_config_table)}, + + {"TIMEOUT", "5.0m", "Transport timeout", + ucs_offsetof(uct_ud_iface_config_t, peer_timeout), UCS_CONFIG_TYPE_TIME}, + {"SLOW_TIMER_TICK", "100ms", "Initial timeout for retransmissions", + ucs_offsetof(uct_ud_iface_config_t, slow_timer_tick), UCS_CONFIG_TYPE_TIME}, + {"SLOW_TIMER_BACKOFF", "2.0", + "Timeout multiplier for resending trigger (must be >= " + UCS_PP_MAKE_STRING(UCT_UD_MIN_TIMER_TIMER_BACKOFF) ")", + ucs_offsetof(uct_ud_iface_config_t, slow_timer_backoff), + UCS_CONFIG_TYPE_DOUBLE}, + {"ETH_DGID_CHECK", "y", + "Enable checking destination GID for incoming packets of Ethernet network.\n" + "Mismatched packets are silently dropped.", + ucs_offsetof(uct_ud_iface_config_t, dgid_check), UCS_CONFIG_TYPE_BOOL}, + + {"MAX_WINDOW", UCS_PP_MAKE_STRING(UCT_UD_CA_MAX_WINDOW), + "Max congestion avoidance window. Should be >= " + UCS_PP_MAKE_STRING(UCT_UD_CA_MIN_WINDOW) " and <= " + UCS_PP_MAKE_STRING(UCT_UD_CA_MAX_WINDOW), + ucs_offsetof(uct_ud_iface_config_t, max_window), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + + +ucs_status_t uct_ud_iface_query(uct_ud_iface_t *iface, uct_iface_attr_t *iface_attr) +{ + ucs_status_t status; + + status = uct_ib_iface_query(&iface->super, + UCT_IB_DETH_LEN + sizeof(uct_ud_neth_t), + iface_attr); + if (status != UCS_OK) { + return status; + } + + iface_attr->cap.flags = UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_AM_ZCOPY | + UCT_IFACE_FLAG_CONNECT_TO_EP | + UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_CB_ASYNC | + UCT_IFACE_FLAG_EVENT_SEND_COMP | + UCT_IFACE_FLAG_EVENT_RECV | + UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE; + + iface_attr->cap.am.max_short = uct_ib_iface_hdr_size(iface->config.max_inline, + sizeof(uct_ud_neth_t)); + iface_attr->cap.am.max_bcopy = iface->super.config.seg_size - sizeof(uct_ud_neth_t); + iface_attr->cap.am.min_zcopy = 0; + iface_attr->cap.am.max_zcopy = iface->super.config.seg_size - sizeof(uct_ud_neth_t); + iface_attr->cap.am.align_mtu = uct_ib_mtu_value(uct_ib_iface_port_attr(&iface->super)->active_mtu); + iface_attr->cap.am.opt_zcopy_align = UCS_SYS_PCI_MAX_PAYLOAD; + /* The first iov is reserved for the header */ + iface_attr->cap.am.max_iov = uct_ib_iface_get_max_iov(&iface->super) - 1; + + iface_attr->cap.put.max_short = uct_ib_iface_hdr_size(iface->config.max_inline, + sizeof(uct_ud_neth_t) + + sizeof(uct_ud_put_hdr_t)); + + iface_attr->iface_addr_len = sizeof(uct_ud_iface_addr_t); + iface_attr->ep_addr_len = sizeof(uct_ud_ep_addr_t); + iface_attr->max_conn_priv = 0; + + /* UD lacks of scatter to CQE support */ + iface_attr->latency.overhead += 10e-9; + + if (iface_attr->cap.am.max_short) { + iface_attr->cap.flags |= UCT_IFACE_FLAG_AM_SHORT; + } + + return UCS_OK; +} + +ucs_status_t +uct_ud_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *iface_addr) +{ + uct_ud_iface_t *iface = ucs_derived_of(tl_iface, uct_ud_iface_t); + uct_ud_iface_addr_t *addr = (uct_ud_iface_addr_t *)iface_addr; + + uct_ib_pack_uint24(addr->qp_num, iface->qp->qp_num); + + return UCS_OK; +} + +ucs_status_t uct_ud_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + uct_ud_iface_t *iface = ucs_derived_of(tl_iface, uct_ud_iface_t); + uct_ud_ep_t *ep; + ucs_status_t status; + int i, count; + + ucs_trace_func(""); + + if (comp != NULL) { + return UCS_ERR_UNSUPPORTED; + } + + uct_ud_enter(iface); + + if (ucs_unlikely(uct_ud_iface_has_pending_async_ev(iface))) { + UCT_TL_IFACE_STAT_FLUSH_WAIT(&iface->super.super); + uct_ud_leave(iface); + return UCS_INPROGRESS; + } + + count = 0; + ucs_ptr_array_for_each(ep, i, &iface->eps) { + /* ud ep flush returns either ok or in progress */ + status = uct_ud_ep_flush_nolock(iface, ep, NULL); + if ((status == UCS_INPROGRESS) || (status == UCS_ERR_NO_RESOURCE)) { + ++count; + } + } + + uct_ud_leave(iface); + if (count != 0) { + UCT_TL_IFACE_STAT_FLUSH_WAIT(&iface->super.super); + return UCS_INPROGRESS; + } + + UCT_TL_IFACE_STAT_FLUSH(&iface->super.super); + return UCS_OK; +} + +void uct_ud_iface_add_ep(uct_ud_iface_t *iface, uct_ud_ep_t *ep) +{ + uint32_t prev_gen; + ep->ep_id = ucs_ptr_array_insert(&iface->eps, ep, &prev_gen); +} + +void uct_ud_iface_remove_ep(uct_ud_iface_t *iface, uct_ud_ep_t *ep) +{ + if (ep->ep_id != UCT_UD_EP_NULL_ID) { + ucs_trace("iface(%p) remove ep: %p id %d", iface, ep, ep->ep_id); + ucs_ptr_array_remove(&iface->eps, ep->ep_id, 0); + } +} + +void uct_ud_iface_replace_ep(uct_ud_iface_t *iface, + uct_ud_ep_t *old_ep, uct_ud_ep_t *new_ep) +{ + void *p; + ucs_assert_always(old_ep != new_ep); + ucs_assert_always(old_ep->ep_id != new_ep->ep_id); + p = ucs_ptr_array_replace(&iface->eps, old_ep->ep_id, new_ep); + ucs_assert_always(p == (void *)old_ep); + ucs_trace("replace_ep: old(%p) id=%d new(%p) id=%d", old_ep, old_ep->ep_id, new_ep, new_ep->ep_id); + ucs_ptr_array_remove(&iface->eps, new_ep->ep_id, 0); +} + + +uct_ud_send_skb_t *uct_ud_iface_resend_skb_get(uct_ud_iface_t *iface) +{ + ucs_queue_elem_t *elem; + uct_ud_send_skb_t *skb; + + /* grow reserved skb's queue on-demand */ + if (iface->tx.resend_skbs_quota > 0) { + skb = ucs_mpool_get(&iface->tx.mp); + if (skb == NULL) { + ucs_fatal("failed to allocate control skb"); + } + --iface->tx.resend_skbs_quota; + return skb; + } else { + elem = ucs_queue_pull(&iface->tx.resend_skbs); + ucs_assert(elem != NULL); + return ucs_container_of(elem, uct_ud_send_skb_t, queue); + } +} + + +static void uct_ud_iface_free_resend_skbs(uct_ud_iface_t *iface) +{ + uct_ud_send_skb_t *skb; + + iface->tx.resend_skbs_quota = 0; + ucs_queue_for_each_extract(skb, &iface->tx.resend_skbs, queue, 1) { + ucs_mpool_put(skb); + } +} + +static void uct_ud_ep_dispatch_err_comp(uct_ud_ep_t *ep, uct_ud_send_skb_t *skb) +{ + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ud_iface_t); + ucs_status_t status; + + ucs_assert(ep->tx.err_skb_count > 0); + --ep->tx.err_skb_count; + + if ((ep->tx.err_skb_count > 0) || (ep->flags & UCT_UD_EP_FLAG_DISCONNECTED)) { + return; + } + + if (ep->flags & UCT_UD_EP_FLAG_PRIVATE) { + uct_ep_destroy(&ep->super.super); + return; + } + + status = iface->super.ops->set_ep_failed(&iface->super, &ep->super.super, + (ucs_status_t)skb->status); + if (status != UCS_OK) { + ucs_fatal("transport error: %s", ucs_status_string(status)); + } +} + +void uct_ud_iface_dispatch_async_comps_do(uct_ud_iface_t *iface) +{ + uct_ud_comp_desc_t *cdesc; + uct_ud_send_skb_t *skb; + uct_ud_ep_t *ep; + + do { + skb = ucs_queue_pull_elem_non_empty(&iface->tx.async_comp_q, + uct_ud_send_skb_t, queue); + cdesc = uct_ud_comp_desc(skb); + ep = cdesc->ep; + + if (skb->flags & UCT_UD_SEND_SKB_FLAG_COMP) { + ucs_assert(!(ep->flags & UCT_UD_EP_FLAG_DISCONNECTED)); + uct_invoke_completion(cdesc->comp, (ucs_status_t)skb->status); + } + + if (ucs_unlikely(skb->flags & UCT_UD_SEND_SKB_FLAG_ERR)) { + uct_ud_ep_dispatch_err_comp(ep, skb); + } + + ep->flags &= ~UCT_UD_EP_FLAG_ASYNC_COMPS; + skb->flags = 0; + ucs_mpool_put(skb); + } while (!ucs_queue_is_empty(&iface->tx.async_comp_q)); +} + +static void uct_ud_iface_free_async_comps(uct_ud_iface_t *iface) +{ + uct_ud_send_skb_t *skb; + + while (!ucs_queue_is_empty(&iface->tx.async_comp_q)) { + skb = ucs_queue_pull_elem_non_empty(&iface->tx.async_comp_q, + uct_ud_send_skb_t, queue); + ucs_mpool_put(skb); + } +} + +ucs_status_t uct_ud_iface_dispatch_pending_rx_do(uct_ud_iface_t *iface) +{ + int count; + uct_ud_recv_skb_t *skb; + uct_ud_neth_t *neth; + unsigned max_poll = iface->super.config.rx_max_poll; + + count = 0; + do { + skb = ucs_queue_pull_elem_non_empty(&iface->rx.pending_q, uct_ud_recv_skb_t, u.am.queue); + neth = (uct_ud_neth_t *)((char *)uct_ib_iface_recv_desc_hdr(&iface->super, + (uct_ib_iface_recv_desc_t *)skb) + + UCT_IB_GRH_LEN); + uct_ib_iface_invoke_am_desc(&iface->super, + uct_ud_neth_get_am_id(neth), + neth + 1, + skb->u.am.len, + &skb->super); + count++; + if (count >= max_poll) { + return UCS_ERR_NO_RESOURCE; + } + } while (!ucs_queue_is_empty(&iface->rx.pending_q)); + + return UCS_OK; +} + +static void uct_ud_iface_free_pending_rx(uct_ud_iface_t *iface) +{ + uct_ud_recv_skb_t *skb; + + while (!ucs_queue_is_empty(&iface->rx.pending_q)) { + skb = ucs_queue_pull_elem_non_empty(&iface->rx.pending_q, uct_ud_recv_skb_t, u.am.queue); + ucs_mpool_put(skb); + } +} + +static inline void uct_ud_iface_async_progress(uct_ud_iface_t *iface) +{ + unsigned ev_count; + uct_ud_iface_ops_t *ops; + + ops = ucs_derived_of(iface->super.ops, uct_ud_iface_ops_t); + ev_count = ops->async_progress(iface); + if (ev_count > 0) { + uct_ud_iface_raise_pending_async_ev(iface); + } +} + +static void uct_ud_iface_timer(int timer_id, void *arg) +{ + uct_ud_iface_t *iface = arg; + ucs_time_t now; + + uct_ud_enter(iface); + now = uct_ud_iface_get_async_time(iface); + ucs_trace_async("iface(%p) slow_timer_sweep: now %lu", iface, now); + ucs_twheel_sweep(&iface->async.slow_timer, now); + uct_ud_iface_async_progress(iface); + uct_ud_leave(iface); +} + +void uct_ud_iface_release_desc(uct_recv_desc_t *self, void *desc) +{ + uct_ud_iface_t *iface = ucs_container_of(self, + uct_ud_iface_t, super.release_desc); + + uct_ud_enter(iface); + uct_ib_iface_release_desc(self, desc); + uct_ud_leave(iface); +} + +void uct_ud_iface_handle_failure(uct_ib_iface_t *iface, void *arg, + ucs_status_t status) +{ + uct_ud_tx_wnd_purge_outstanding(ucs_derived_of(iface, uct_ud_iface_t), + (uct_ud_ep_t *)arg, status); +} + +ucs_status_t uct_ud_iface_event_arm(uct_iface_h tl_iface, unsigned events) +{ + uct_ud_iface_t *iface = ucs_derived_of(tl_iface, uct_ud_iface_t); + ucs_status_t status; + + uct_ud_enter(iface); + + status = uct_ib_iface_pre_arm(&iface->super); + if (status != UCS_OK) { + goto out; + } + + /* Check if some receives were not delivered yet */ + if ((events & (UCT_EVENT_RECV | UCT_EVENT_RECV_SIG)) && + !ucs_queue_is_empty(&iface->rx.pending_q)) + { + status = UCS_ERR_BUSY; + goto out; + } + + /* Check if some send completions were not delivered yet */ + if ((events & UCT_EVENT_SEND_COMP) && + !ucs_queue_is_empty(&iface->tx.async_comp_q)) + { + status = UCS_ERR_BUSY; + goto out; + } + + if (events & UCT_EVENT_SEND_COMP) { + status = iface->super.ops->arm_cq(&iface->super, UCT_IB_DIR_TX, 0); + if (status != UCS_OK) { + goto out; + } + } + + if (events & (UCT_EVENT_SEND_COMP | UCT_EVENT_RECV)) { + /* we may get send completion through ACKs as well */ + status = iface->super.ops->arm_cq(&iface->super, UCT_IB_DIR_RX, 0); + if (status != UCS_OK) { + goto out; + } + } + + status = UCS_OK; +out: + uct_ud_leave(iface); + return status; +} + +void uct_ud_iface_progress_enable(uct_iface_h tl_iface, unsigned flags) +{ + uct_ud_iface_t *iface = ucs_derived_of(tl_iface, uct_ud_iface_t); + + if (flags & UCT_PROGRESS_RECV) { + uct_ud_enter(iface); + iface->rx.available += iface->rx.quota; + iface->rx.quota = 0; + /* let progress (possibly async) post the missing receives */ + uct_ud_leave(iface); + } + + uct_base_iface_progress_enable(tl_iface, flags); +} diff --git a/src/uct/ib/ud/base/ud_iface.h b/src/uct/ib/ud/base/ud_iface.h new file mode 100644 index 0000000..e4f9ff0 --- /dev/null +++ b/src/uct/ib/ud/base/ud_iface.h @@ -0,0 +1,477 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCT_UD_IFACE_H +#define UCT_UD_IFACE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ud_def.h" +#include "ud_ep.h" +#include "ud_iface_common.h" + +BEGIN_C_DECLS + +#define UCT_UD_MIN_TIMER_TIMER_BACKOFF 1.0 + +/** @file ud_iface.h */ + +enum { + UCT_UD_IFACE_STAT_RX_DROP, + UCT_UD_IFACE_STAT_LAST +}; + +/* TODO: maybe tx_moderation can be defined at compile-time since tx completions are used only to know how much space is there in tx qp */ + +typedef struct uct_ud_iface_config { + uct_ib_iface_config_t super; + uct_ud_iface_common_config_t ud_common; + double peer_timeout; + double slow_timer_tick; + double slow_timer_backoff; + int dgid_check; + unsigned max_window; +} uct_ud_iface_config_t; + + +struct uct_ud_iface_peer { + uct_ud_iface_peer_t *next; + union ibv_gid dgid; + uint16_t dlid; + uint32_t dst_qpn; + uint32_t conn_id_last; + ucs_list_link_t ep_list; /* ep list ordered by connection id */ +}; + + +static inline int uct_ud_iface_peer_cmp(uct_ud_iface_peer_t *a, uct_ud_iface_peer_t *b) { + return (int)a->dst_qpn - (int)b->dst_qpn || + memcmp(a->dgid.raw, b->dgid.raw, sizeof(union ibv_gid)) || + (int)a->dlid - (int)b->dlid; +} + +static inline int uct_ud_iface_peer_hash(uct_ud_iface_peer_t *a) { + return (a->dlid + a->dgid.global.interface_id + a->dgid.global.subnet_prefix) + % UCT_UD_HASH_SIZE; +} + +SGLIB_DEFINE_LIST_PROTOTYPES(uct_ud_iface_peer_t, uct_ud_iface_peer_cmp, next) +SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(uct_ud_iface_peer_t, UCT_UD_HASH_SIZE, + uct_ud_iface_peer_hash) + + + +#if UCT_UD_EP_DEBUG_HOOKS + +typedef ucs_status_t (*uct_ud_iface_hook_t)(uct_ud_iface_t *iface, uct_ud_neth_t *neth); + +#define UCT_UD_IFACE_HOOK_DECLARE(_name) \ + uct_ud_iface_hook_t _name; + +#define UCT_UD_IFACE_HOOK_CALL_RX(_iface, _neth, _len) \ + if ((_iface)->rx.hook(_iface, _neth) != UCS_OK) { \ + ucs_trace_data("RX: dropping packet"); \ + return; \ + } + +#define UCT_UD_IFACE_HOOK_INIT(_iface) { \ + (_iface)->rx.hook = uct_ud_iface_null_hook; \ + } + +static inline ucs_status_t uct_ud_iface_null_hook(uct_ud_iface_t *iface, + uct_ud_neth_t *neth) +{ + return UCS_OK; +} + +#else + +#define UCT_UD_IFACE_HOOK_DECLARE(_name) +#define UCT_UD_IFACE_HOOK_CALL_RX(_iface, _neth, _len) +#define UCT_UD_IFACE_HOOK_INIT(_iface) + +#endif + +typedef struct uct_ud_iface_ops { + uct_ib_iface_ops_t super; + unsigned (*async_progress)(uct_ud_iface_t *iface); + void (*tx_skb)(uct_ud_ep_t *ep, uct_ud_send_skb_t *skb, + int solicited); + void (*ep_free)(uct_ep_h ep); + ucs_status_t (*create_qp)(uct_ib_iface_t *iface, uct_ib_qp_attr_t *attr, + struct ibv_qp **qp_p); +} uct_ud_iface_ops_t; + +struct uct_ud_iface { + uct_ib_iface_t super; + struct ibv_qp *qp; + struct { + ucs_mpool_t mp; + unsigned available; + unsigned quota; + ucs_queue_head_t pending_q; + UCT_UD_IFACE_HOOK_DECLARE(hook) + } rx; + struct { + uct_ud_send_skb_t *skb; /* ready to use skb */ + uct_ud_send_skb_inl_t skb_inl; + ucs_mpool_t mp; + /* got async events but pending queue was not dispatched */ + uint8_t async_before_pending; + int16_t available; + unsigned unsignaled; + /* pool of skbs that are reserved for retransmissions */ + ucs_queue_head_t resend_skbs; + unsigned resend_skbs_quota; + ucs_arbiter_t pending_q; + ucs_queue_head_t async_comp_q; + } tx; + struct { + ucs_time_t peer_timeout; + double slow_timer_backoff; + unsigned tx_qp_len; + unsigned max_inline; + int check_grh_dgid; + unsigned gid_len; + unsigned max_window; + } config; + + UCS_STATS_NODE_DECLARE(stats) + + ucs_ptr_array_t eps; + uct_ud_iface_peer_t *peers[UCT_UD_HASH_SIZE]; + struct { + ucs_twheel_t slow_timer; + ucs_time_t slow_tick; + int timer_id; + } async; +}; + +UCS_CLASS_DECLARE(uct_ud_iface_t, uct_ud_iface_ops_t*, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_ud_iface_config_t*, + uct_ib_iface_init_attr_t*) + +struct uct_ud_ctl_hdr { + uint8_t type; + uint8_t reserved[3]; + union { + struct { + uct_ud_ep_addr_t ep_addr; + uint32_t conn_id; + } conn_req; + struct { + uint32_t src_ep_id; + } conn_rep; + uint32_t data; + }; + uct_ud_peer_name_t peer; + /* For CREQ packet, IB address follows */ +} UCS_S_PACKED; + + +extern ucs_config_field_t uct_ud_iface_config_table[]; + +ucs_status_t uct_ud_iface_query(uct_ud_iface_t *iface, uct_iface_attr_t *iface_attr); +void uct_ud_iface_release_desc(uct_recv_desc_t *self, void *desc); + +ucs_status_t uct_ud_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *addr); + +void uct_ud_iface_add_ep(uct_ud_iface_t *iface, uct_ud_ep_t *ep); +void uct_ud_iface_remove_ep(uct_ud_iface_t *iface, uct_ud_ep_t *ep); +void uct_ud_iface_replace_ep(uct_ud_iface_t *iface, uct_ud_ep_t *old_ep, uct_ud_ep_t *new_ep); + +ucs_status_t uct_ud_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_ud_iface_complete_init(uct_ud_iface_t *iface); + +void uct_ud_iface_remove_async_handlers(uct_ud_iface_t *iface); + +void uct_ud_dump_packet(uct_base_iface_t *iface, uct_am_trace_type_t type, + void *data, size_t length, size_t valid_length, + char *buffer, size_t max); + + +static UCS_F_ALWAYS_INLINE int uct_ud_iface_can_tx(uct_ud_iface_t *iface) +{ + return iface->tx.available > 0; +} + +static UCS_F_ALWAYS_INLINE int uct_ud_iface_has_skbs(uct_ud_iface_t *iface) +{ + return iface->tx.skb || !ucs_mpool_is_empty(&iface->tx.mp); +} + + +uct_ud_send_skb_t *uct_ud_iface_resend_skb_get(uct_ud_iface_t *iface); + +static inline void +uct_ud_iface_resend_skb_put(uct_ud_iface_t *iface, uct_ud_send_skb_t *skb) +{ + if (skb != ucs_unaligned_ptr(&iface->tx.skb_inl.super)) { + ucs_queue_push(&iface->tx.resend_skbs, &skb->queue); + } +} + +static inline uct_ib_address_t* uct_ud_creq_ib_addr(uct_ud_ctl_hdr_t *conn_req) +{ + ucs_assert(conn_req->type == UCT_UD_PACKET_CREQ); + return (uct_ib_address_t*)(conn_req + 1); +} + +static UCS_F_ALWAYS_INLINE void uct_ud_enter(uct_ud_iface_t *iface) +{ + UCS_ASYNC_BLOCK(iface->super.super.worker->async); +} + +static UCS_F_ALWAYS_INLINE void uct_ud_leave(uct_ud_iface_t *iface) +{ + UCS_ASYNC_UNBLOCK(iface->super.super.worker->async); +} + +static UCS_F_ALWAYS_INLINE int +uct_ud_iface_check_grh(uct_ud_iface_t *iface, void *grh_end, int is_grh_present) +{ + void *dest_gid, *local_gid; + + if (!iface->config.check_grh_dgid) { + return 1; + } + + if (ucs_unlikely(!is_grh_present)) { + ucs_warn("RoCE packet does not contain GRH"); + return 1; + } + + local_gid = (char*)iface->super.gid.raw + (16 - iface->config.gid_len); + dest_gid = (char*)grh_end - iface->config.gid_len; + + if (memcmp(local_gid, dest_gid, iface->config.gid_len)) { + UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_UD_IFACE_STAT_RX_DROP, 1); + ucs_trace_data("Drop packet with wrong dgid"); + return 0; + } + + return 1; +} + +/* +management of connecting endpoints (cep) + +Such endpoint are created either by explicitely calling ep_create_connected() +or implicitely as a result of UD connection protocol. Calling +ep_create_connected() may reuse already existing endpoint that was implicitely +created. + +UD connection protocol + +The protocol allows connection establishment in environment where UD packets +can be dropped, duplicated or reordered. The connection is done as 3 way +handshake: + +1: CREQ (src_if_addr, src_ep_addr, conn_id) +Connection request. It includes source interface address, source ep address +and connection id. + +Connection id is essentially a counter of endpoints that are created by +ep_create_connected(). The counter is per destination interface. Purpose of +conn_id is to ensure order between multiple CREQ packets and to handle +simultanuous connection establishment. The case when both sides call +ep_create_connected(). The rule is that connected endpoints must have +same conn_id. + +2: CREP (dest_ep_id) + +Connection reply. It includes id of destination endpoint and optinally ACK +request flag. From this point reliability is handled by UD protocol as +source and destination endpoint ids are known. + +Endpoint may be created upon reception of CREQ. It is possible that the +endpoint already exists because CREQ is retransmitted or because of +simultaneous connection. In any case endpoint connection id must be +equal to connection id in CREQ. + +3: ACK + +Ack on connection reply. It may be send as part of the data packet. + +Implicit endpoints reuse + +Endpoints created upon receive of CREP request can be re-used when +application calls ep_create_connected(). + +Data structure + +Hash table and double linked sorted list: +hash(src_if_addr) -> peer ->ep (list sorted in descending order) + +List is used to save memory (8 bytes instead of 500-1000 bytes of hashtable) +In many cases list will provide fast lookup and insertion. +It is expected that most of connect requests will arrive in order. In +such case the insertion is O(1) because it is done to the head of the +list. Lookup is O(number of 'passive' eps) which is expected to be small. + +TODO: add and maintain pointer to the list element with conn_id equal to +conn_last_id. This will allow for O(1) endpoint lookup. + +Connection id assignment: + + 0 1 ... conn_last_id, +1, +2, ... UCT_UD_EP_CONN_ID_MAX + +Ids upto (not including) conn_last_id are already assigned to endpoints. +Any endpoint with conn_id >= conn_last_id is created on receive of CREQ +There may be holes because CREQs are not received in order. + +Call to ep_create_connected() will try reuse endpoint with +conn_id = conn_last_id + +If there is no such endpoint new endpoint with id conn_last_id +will be created. + +In both cases conn_last_id = conn_last_id + 1 + +*/ +void uct_ud_iface_cep_init(uct_ud_iface_t *iface); + +/* find ep that is connected to (src_if, src_ep), + * if conn_id == UCT_UD_EP_CONN_ID_MAX then try to + * reuse ep with conn_id == conn_last_id + */ +uct_ud_ep_t *uct_ud_iface_cep_lookup(uct_ud_iface_t *iface, + const uct_ib_address_t *src_ib_addr, + const uct_ud_iface_addr_t *src_if_addr, + uint32_t conn_id); + +/* remove ep */ +void uct_ud_iface_cep_remove(uct_ud_ep_t *ep); + +/* + * rollback last ordered insert (conn_id == UCT_UD_EP_CONN_ID_MAX). + */ +void uct_ud_iface_cep_rollback(uct_ud_iface_t *iface, + const uct_ib_address_t *src_ib_addr, + const uct_ud_iface_addr_t *src_if_addr, + uct_ud_ep_t *ep); + +/* insert new ep that is connected to src_if_addr */ +ucs_status_t uct_ud_iface_cep_insert(uct_ud_iface_t *iface, + const uct_ib_address_t *src_ib_addr, + const uct_ud_iface_addr_t *src_if_addr, + uct_ud_ep_t *ep, uint32_t conn_id); + +void uct_ud_iface_cep_cleanup(uct_ud_iface_t *iface); + +/* get time of the last async wakeup */ +static UCS_F_ALWAYS_INLINE ucs_time_t +uct_ud_iface_get_async_time(uct_ud_iface_t *iface) +{ + return iface->super.super.worker->async->last_wakeup; +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_iface_progress_pending(uct_ud_iface_t *iface, const uintptr_t is_async) +{ + if (!is_async) { + iface->tx.async_before_pending = 0; + } + + if (!uct_ud_iface_can_tx(iface)) { + return; + } + + ucs_arbiter_dispatch(&iface->tx.pending_q, 1, uct_ud_ep_do_pending, + (void *)is_async); +} + +static UCS_F_ALWAYS_INLINE int +uct_ud_iface_has_pending_async_ev(uct_ud_iface_t *iface) +{ + return iface->tx.async_before_pending; +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_iface_raise_pending_async_ev(uct_ud_iface_t *iface) +{ + if (!ucs_arbiter_is_empty(&iface->tx.pending_q)) { + iface->tx.async_before_pending = 1; + } +} + +/* Go over all active eps and remove them. Do it this way because class destructors are not + * virtual + */ +#define UCT_UD_IFACE_DELETE_EPS(_iface, _ep_type_t) \ + { \ + int _i; \ + _ep_type_t *_ep; \ + ucs_ptr_array_for_each(_ep, _i, &(_iface)->eps) { \ + UCS_CLASS_DELETE(_ep_type_t, _ep); \ + } \ + } + +ucs_status_t uct_ud_iface_dispatch_pending_rx_do(uct_ud_iface_t *iface); + +void uct_ud_iface_handle_failure(uct_ib_iface_t *iface, void *arg, + ucs_status_t status); + +ucs_status_t uct_ud_iface_event_arm(uct_iface_h tl_iface, unsigned events); + +void uct_ud_iface_progress_enable(uct_iface_h tl_iface, unsigned flags); + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_ud_iface_dispatch_pending_rx(uct_ud_iface_t *iface) +{ + if (ucs_likely(ucs_queue_is_empty(&iface->rx.pending_q))) { + return UCS_OK; + } + return uct_ud_iface_dispatch_pending_rx_do(iface); +} + +void uct_ud_iface_dispatch_async_comps_do(uct_ud_iface_t *iface); + +static UCS_F_ALWAYS_INLINE void +uct_ud_iface_dispatch_zcopy_comps(uct_ud_iface_t *iface) +{ + if (ucs_likely(ucs_queue_is_empty(&iface->tx.async_comp_q))) { + return; + } + uct_ud_iface_dispatch_async_comps_do(iface); +} + +#if ENABLE_PARAMS_CHECK +#define UCT_UD_CHECK_LENGTH(iface, header_len, payload_len, msg) \ + do { \ + int mtu; \ + mtu = uct_ib_mtu_value(uct_ib_iface_port_attr(&(iface)->super)->active_mtu); \ + UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + payload_len + header_len, \ + 0, mtu, msg); \ + } while(0); + +#define UCT_UD_CHECK_BCOPY_LENGTH(iface, len) \ + UCT_UD_CHECK_LENGTH(iface, 0, len, "am_bcopy length") + +#define UCT_UD_CHECK_ZCOPY_LENGTH(iface, header_len, payload_len) \ + UCT_UD_CHECK_LENGTH(iface, header_len, payload_len, "am_zcopy payload") + +#else +#define UCT_UD_CHECK_ZCOPY_LENGTH(iface, header_len, payload_len) +#define UCT_UD_CHECK_BCOPY_LENGTH(iface, len) +#endif + +END_C_DECLS + +#endif diff --git a/src/uct/ib/ud/base/ud_iface_common.c b/src/uct/ib/ud/base/ud_iface_common.c new file mode 100644 index 0000000..940dbf9 --- /dev/null +++ b/src/uct/ib/ud/base/ud_iface_common.c @@ -0,0 +1,23 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ud_iface_common.h" + +#include + + +ucs_config_field_t uct_ud_iface_common_config_table[] = { + {"RX_QUEUE_LEN_INIT", "128", + "Initial length of receive queue, before the interface is activated.", + ucs_offsetof(uct_ud_iface_common_config_t, rx_queue_len_init), + UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; diff --git a/src/uct/ib/ud/base/ud_iface_common.h b/src/uct/ib/ud/base/ud_iface_common.h new file mode 100644 index 0000000..2d6e7de --- /dev/null +++ b/src/uct/ib/ud/base/ud_iface_common.h @@ -0,0 +1,24 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UD_IFACE_COMMON_H +#define UD_IFACE_COMMON_H + +#include + + +/** + * Common configuration for IB non peer-to-peer transports (UD and DC). + */ +typedef struct uct_ud_iface_common_config { + unsigned rx_queue_len_init; +} uct_ud_iface_common_config_t; + + +extern ucs_config_field_t uct_ud_iface_common_config_table[]; + + +#endif diff --git a/src/uct/ib/ud/base/ud_inl.h b/src/uct/ib/ud/base/ud_inl.h new file mode 100644 index 0000000..05971d3 --- /dev/null +++ b/src/uct/ib/ud/base/ud_inl.h @@ -0,0 +1,189 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +static UCS_F_ALWAYS_INLINE void +uct_ud_ep_ctl_op_schedule(uct_ud_iface_t *iface, uct_ud_ep_t *ep) +{ + ucs_arbiter_group_push_elem(&ep->tx.pending.group, + &ep->tx.pending.elem); + ucs_arbiter_group_schedule(&iface->tx.pending_q, &ep->tx.pending.group); +} + +/** + * schedule control operation. + */ +static UCS_F_ALWAYS_INLINE void +uct_ud_ep_ctl_op_add(uct_ud_iface_t *iface, uct_ud_ep_t *ep, int op) +{ + ep->tx.pending.ops |= op; + uct_ud_ep_ctl_op_schedule(iface, ep); +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_ep_tx_stop(uct_ud_ep_t *ep) +{ + ep->tx.max_psn = ep->tx.psn; +} + +/* + * check iface resources:tx_queue and return + * prefetched/cached skb + * + * NOTE: caller must not return skb to mpool until it is + * removed from the cache + * skb is removed from cache by + * uct_ud_iface_complete_tx_inl() + * uct_ud_iface_complete_tx_skb() + * + * In case of error flow caller must do nothing with the skb + */ +static UCS_F_ALWAYS_INLINE +uct_ud_send_skb_t *uct_ud_iface_get_tx_skb(uct_ud_iface_t *iface, + uct_ud_ep_t *ep) +{ + uct_ud_send_skb_t *skb; + + if (ucs_unlikely(!uct_ud_iface_can_tx(iface))) { + UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); + return NULL; + } + + skb = iface->tx.skb; + if (ucs_unlikely(skb == NULL)) { + skb = ucs_mpool_get(&iface->tx.mp); + if (skb == NULL) { + ucs_trace_data("iface=%p out of tx skbs", iface); + UCT_TL_IFACE_STAT_TX_NO_DESC(&iface->super.super); + return NULL; + } + iface->tx.skb = skb; + } + VALGRIND_MAKE_MEM_DEFINED(skb, sizeof *skb); + ucs_prefetch(skb->neth); + return skb; +} + +/* same as above but also check ep resources: window&connection state */ +static UCS_F_ALWAYS_INLINE uct_ud_send_skb_t * +uct_ud_ep_get_tx_skb(uct_ud_iface_t *iface, uct_ud_ep_t *ep) +{ + if (ucs_unlikely(!uct_ud_ep_is_connected(ep) || + uct_ud_ep_no_window(ep) || + uct_ud_iface_has_pending_async_ev(iface))) { + ucs_trace_poll("iface=%p ep=%p (%d->%d) no ep resources (psn=%u max_psn=%u)", + iface, ep, ep->ep_id, ep->dest_ep_id, + (unsigned)ep->tx.psn, + (unsigned)ep->tx.max_psn); + UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); + return NULL; + } + + return uct_ud_iface_get_tx_skb(iface, ep); +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_am_set_zcopy_desc(uct_ud_send_skb_t *skb, const uct_iov_t *iov, size_t iovcnt, + uct_completion_t *comp) +{ + uct_ud_zcopy_desc_t *zdesc; + size_t iov_it_length; + size_t iov_it; + + skb->flags |= UCT_UD_SEND_SKB_FLAG_ZCOPY; + zdesc = uct_ud_zcopy_desc(skb); + zdesc->iovcnt = iovcnt; + for (iov_it = 0; iov_it < iovcnt; ++iov_it) { + iov_it_length = uct_iov_get_length(iov + iov_it); + ucs_assert(iov_it_length <= UINT16_MAX); + zdesc->iov[iov_it].buffer = iov[iov_it].buffer; + zdesc->iov[iov_it].length = iov_it_length; + } + if (comp != NULL) { + skb->flags |= UCT_UD_SEND_SKB_FLAG_COMP; + zdesc->super.comp = comp; + } +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_iface_complete_tx_inl(uct_ud_iface_t *iface, uct_ud_ep_t *ep, + uct_ud_send_skb_t *skb, void *data, + const void *buffer, unsigned length) +{ + iface->tx.skb = ucs_mpool_get(&iface->tx.mp); + ep->tx.psn++; + skb->len += length; + memcpy(data, buffer, length); + ucs_queue_push(&ep->tx.window, &skb->queue); + ep->tx.slow_tick = iface->async.slow_tick; + ucs_wtimer_add(&iface->async.slow_timer, &ep->slow_timer, + uct_ud_iface_get_async_time(iface) - + ucs_twheel_get_time(&iface->async.slow_timer) + + ep->tx.slow_tick); + ep->tx.send_time = uct_ud_iface_get_async_time(iface); +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_iface_complete_tx_skb(uct_ud_iface_t *iface, uct_ud_ep_t *ep, + uct_ud_send_skb_t *skb) +{ + iface->tx.skb = ucs_mpool_get(&iface->tx.mp); + ep->tx.psn++; + ucs_queue_push(&ep->tx.window, &skb->queue); + ep->tx.slow_tick = iface->async.slow_tick; + ucs_wtimer_add(&iface->async.slow_timer, &ep->slow_timer, + uct_ud_iface_get_async_time(iface) - + ucs_twheel_get_time(&iface->async.slow_timer) + + ep->tx.slow_tick); + ep->tx.send_time = uct_ud_iface_get_async_time(iface); +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_am_set_neth(uct_ud_neth_t *neth, uct_ud_ep_t *ep, uint8_t id) +{ + uct_ud_neth_init_data(ep, neth); + uct_ud_neth_set_type_am(ep, neth, id); + uct_ud_neth_ack_req(ep, neth); +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_ud_am_common(uct_ud_iface_t *iface, uct_ud_ep_t *ep, uint8_t id, + uct_ud_send_skb_t **skb_p) +{ + uct_ud_send_skb_t *skb; + + UCT_CHECK_AM_ID(id); + + skb = uct_ud_ep_get_tx_skb(iface, ep); + if (!skb) { + return UCS_ERR_NO_RESOURCE; + } + + /* either we are executing pending operations, or there are no any pending + * elements, or the only pending element is for sending control messages + * (we don't care about reordering with respect to control messages) + */ + ucs_assertv((ep->flags & UCT_UD_EP_FLAG_IN_PENDING) || + ucs_arbiter_group_is_empty(&ep->tx.pending.group) || + ucs_arbiter_elem_is_only(&ep->tx.pending.group, &ep->tx.pending.elem), + "out-of-order send detected for ep %p am %d ep_pending %d arbtail %p arbelem %p", + ep, id, (ep->flags & UCT_UD_EP_FLAG_IN_PENDING), + ep->tx.pending.group.tail, + &ep->tx.pending.elem); + uct_ud_am_set_neth(skb->neth, ep, id); + + *skb_p = skb; + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE size_t +uct_ud_skb_bcopy(uct_ud_send_skb_t *skb, uct_pack_callback_t pack_cb, void *arg) +{ + size_t payload_len; + + payload_len = pack_cb(skb->neth + 1, arg); + skb->len = sizeof(skb->neth[0]) + payload_len; + return payload_len; +} diff --git a/src/uct/ib/ud/base/ud_log.c b/src/uct/ib/ud/base/ud_log.c new file mode 100644 index 0000000..bd6429b --- /dev/null +++ b/src/uct/ib/ud/base/ud_log.c @@ -0,0 +1,69 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ud_iface.h" +#include "ud_ep.h" + + +void uct_ud_dump_packet(uct_base_iface_t *iface, uct_am_trace_type_t type, + void *data, size_t length, size_t valid_length, + char *buffer, size_t max) +{ + uct_ud_neth_t *neth = data; + uct_ud_put_hdr_t *puth; + uct_ud_ctl_hdr_t *ctlh; + char *p, *endp; + char buf[128]; + int am_id; + + p = buffer; + endp = buffer + max; + + snprintf(p, endp - p, " dst %d psn %u apsn %u %c%c", + uct_ud_neth_get_dest_id(neth), neth->psn, neth->ack_psn, + (neth->packet_type & UCT_UD_PACKET_FLAG_ACK_REQ) ? 'r' : '-', + (neth->packet_type & UCT_UD_PACKET_FLAG_ECN) ? 'e' : '-'); + p += strlen(p); + + if (neth->packet_type & UCT_UD_PACKET_FLAG_AM) { + am_id = uct_ud_neth_get_am_id(neth); + snprintf(p, endp - p, " am %d ", am_id); + p += strlen(p); + uct_iface_dump_am(iface, type, am_id, neth + 1, + length - sizeof(*neth), p, endp - p); + } else if (neth->packet_type & UCT_UD_PACKET_FLAG_NAK) { + snprintf(p, endp - p, " NAK"); + } else if (neth->packet_type & UCT_UD_PACKET_FLAG_PUT) { + puth = (uct_ud_put_hdr_t *)(neth + 1); + snprintf(p, endp - p, " PUT: 0x%0lx", puth->rva); + } else if (neth->packet_type & UCT_UD_PACKET_FLAG_CTL) { + ctlh = (uct_ud_ctl_hdr_t *)(neth + 1); + switch (ctlh->type) { + case UCT_UD_PACKET_CREQ: + snprintf(p, endp - p, " CREQ from %s:%d qpn 0x%x %s epid %d cid %d", + ctlh->peer.name, ctlh->peer.pid, + uct_ib_unpack_uint24(ctlh->conn_req.ep_addr.iface_addr.qp_num), + uct_ib_address_str(uct_ud_creq_ib_addr(ctlh), buf, sizeof(buf)), + uct_ib_unpack_uint24(ctlh->conn_req.ep_addr.ep_id), + ctlh->conn_req.conn_id); + break; + case UCT_UD_PACKET_CREP: + snprintf(p, endp - p, " CREP from %s:%d src_ep_id %d", + ctlh->peer.name, ctlh->peer.pid, + ctlh->conn_rep.src_ep_id); + break; + default: + snprintf(p, endp - p, " from %s:%d", + ctlh->type, ctlh->peer.name, ctlh->peer.pid); + break; + } + } +} + diff --git a/src/uct/ib/ud/verbs/ud_verbs.c b/src/uct/ib/ud/verbs/ud_verbs.c new file mode 100644 index 0000000..d22a8b3 --- /dev/null +++ b/src/uct/ib/ud/verbs/ud_verbs.c @@ -0,0 +1,671 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For htonl */ + +#include + +#include +#include +#include + +#include "ud_verbs.h" +#include "ucs/sys/math.h" + +#include + +static UCS_F_NOINLINE void +uct_ud_verbs_iface_post_recv_always(uct_ud_verbs_iface_t *iface, int max); + +static inline void +uct_ud_verbs_iface_post_recv(uct_ud_verbs_iface_t *iface); + +static ucs_config_field_t uct_ud_verbs_iface_config_table[] = { + {"UD_", "", NULL, + 0, UCS_CONFIG_TYPE_TABLE(uct_ud_iface_config_table)}, + + {NULL} +}; + + +UCS_CLASS_INIT_FUNC(uct_ud_verbs_ep_t, const uct_ep_params_t *params) +{ + uct_ud_verbs_iface_t *iface = ucs_derived_of(params->iface, + uct_ud_verbs_iface_t); + + ucs_trace_func(""); + UCS_CLASS_CALL_SUPER_INIT(uct_ud_ep_t, &iface->super); + self->ah = NULL; + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ud_verbs_ep_t) +{ + ucs_trace_func(""); +} + +UCS_CLASS_DEFINE(uct_ud_verbs_ep_t, uct_ud_ep_t); +static UCS_CLASS_DEFINE_NEW_FUNC(uct_ud_verbs_ep_t, uct_ep_t, + const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_ud_verbs_ep_t, uct_ep_t); + +static inline void +uct_ud_verbs_iface_fill_tx_wr(uct_ud_verbs_iface_t *iface, + uct_ud_verbs_ep_t *ep, + struct ibv_send_wr *wr, unsigned flags) +{ + if (iface->super.tx.unsignaled >= UCT_UD_TX_MODERATION) { + wr->send_flags = (flags|IBV_SEND_SIGNALED); + iface->super.tx.unsignaled = 0; + } else { + wr->send_flags = flags; + ++iface->super.tx.unsignaled; + } + wr->wr.ud.remote_qpn = ep->dest_qpn; + wr->wr.ud.ah = ep->ah; +} + +static inline void +uct_ud_verbs_ep_tx_inlv(uct_ud_verbs_iface_t *iface, uct_ud_verbs_ep_t *ep, + const void *buffer, unsigned length) +{ + int UCS_V_UNUSED ret; + struct ibv_send_wr *bad_wr; + + iface->tx.sge[1].addr = (uintptr_t)buffer; + iface->tx.sge[1].length = length; + uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_inl, IBV_SEND_INLINE); + UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr); + ret = ibv_post_send(iface->super.qp, &iface->tx.wr_inl, &bad_wr); + ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret); + uct_ib_log_post_send(&iface->super.super, iface->super.qp, &iface->tx.wr_inl, + INT_MAX, uct_ud_dump_packet); + --iface->super.tx.available; +} + +static inline void +uct_ud_verbs_ep_tx_skb(uct_ud_verbs_iface_t *iface, + uct_ud_verbs_ep_t *ep, uct_ud_send_skb_t *skb, unsigned flags) +{ + int UCS_V_UNUSED ret; + struct ibv_send_wr *bad_wr; + + iface->tx.sge[0].lkey = skb->lkey; + iface->tx.sge[0].length = skb->len; + iface->tx.sge[0].addr = (uintptr_t)skb->neth; + uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_skb, flags); + UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr); + ret = ibv_post_send(iface->super.qp, &iface->tx.wr_skb, &bad_wr); + ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret); + uct_ib_log_post_send(&iface->super.super, iface->super.qp, &iface->tx.wr_skb, + INT_MAX, uct_ud_dump_packet); + --iface->super.tx.available; +} + +static void uct_ud_verbs_ep_tx_ctl_skb(uct_ud_ep_t *ud_ep, uct_ud_send_skb_t *skb, + int solicited) +{ + uct_ud_verbs_iface_t *iface = ucs_derived_of(ud_ep->super.super.iface, + uct_ud_verbs_iface_t); + uct_ud_verbs_ep_t *ep = ucs_derived_of(ud_ep, uct_ud_verbs_ep_t); + unsigned flags = 0; + + if (skb->len < iface->super.config.max_inline) { + flags = IBV_SEND_INLINE; + } + if (solicited) { + flags |= IBV_SEND_SOLICITED; + } + uct_ud_verbs_ep_tx_skb(iface, ep, skb, flags); +} + +static +ucs_status_t uct_ud_verbs_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t hdr, + const void *buffer, unsigned length) +{ + uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); + uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_ud_verbs_iface_t); + uct_ud_send_skb_t *skb; + uct_ud_am_short_hdr_t *am_hdr; + ucs_status_t status; + + UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(hdr) + length, + 0, iface->super.config.max_inline, "am_short"); + + uct_ud_enter(&iface->super); + + status = uct_ud_am_common(&iface->super, &ep->super, id, &skb); + if (status != UCS_OK) { + uct_ud_leave(&iface->super); + return status; + } + + am_hdr = (uct_ud_am_short_hdr_t *)(skb->neth+1); + am_hdr->hdr = hdr; + iface->tx.sge[0].length = sizeof(uct_ud_neth_t) + sizeof(*am_hdr); + iface->tx.sge[0].addr = (uintptr_t)skb->neth; + + uct_ud_verbs_ep_tx_inlv(iface, ep, buffer, length); + + skb->len = iface->tx.sge[0].length; + + uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, + am_hdr+1, buffer, length); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, SHORT, sizeof(hdr) + length); + uct_ud_leave(&iface->super); + return UCS_OK; +} + +static ssize_t uct_ud_verbs_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); + uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_ud_verbs_iface_t); + uct_ud_send_skb_t *skb; + ucs_status_t status; + size_t length; + + uct_ud_enter(&iface->super); + + status = uct_ud_am_common(&iface->super, &ep->super, id, &skb); + if (status != UCS_OK) { + uct_ud_leave(&iface->super); + return status; + } + + length = uct_ud_skb_bcopy(skb, pack_cb, arg); + UCT_UD_CHECK_BCOPY_LENGTH(&iface->super, length); + + uct_ud_verbs_ep_tx_skb(iface, ep, skb, 0); + uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, BCOPY, length); + uct_ud_leave(&iface->super); + return length; +} + +static ucs_status_t +uct_ud_verbs_ep_am_zcopy(uct_ep_h tl_ep, uint8_t id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, uct_completion_t *comp) +{ + uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); + uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_ud_verbs_iface_t); + uct_ud_send_skb_t *skb; + ucs_status_t status; + + UCT_CHECK_IOV_SIZE(iovcnt, uct_ib_iface_get_max_iov(&iface->super.super) - 1, + "uct_ud_verbs_ep_am_zcopy"); + + UCT_CHECK_LENGTH(sizeof(uct_ud_neth_t) + sizeof(uct_ud_zcopy_desc_t) + header_length, + 0, iface->super.super.config.seg_size, "am_zcopy header"); + + UCT_UD_CHECK_ZCOPY_LENGTH(&iface->super, header_length, + uct_iov_total_length(iov, iovcnt)); + + uct_ud_enter(&iface->super); + + status = uct_ud_am_common(&iface->super, &ep->super, id, &skb); + if (status != UCS_OK) { + uct_ud_leave(&iface->super); + return status; + } + /* force ACK_REQ because we want to call user completion ASAP */ + skb->neth->packet_type |= UCT_UD_PACKET_FLAG_ACK_REQ; + memcpy(skb->neth + 1, header, header_length); + skb->len = sizeof(uct_ud_neth_t) + header_length; + + iface->tx.wr_skb.num_sge = uct_ib_verbs_sge_fill_iov(iface->tx.sge + 1, + iov, iovcnt) + 1; + + uct_ud_verbs_ep_tx_skb(iface, ep, skb, 0); + iface->tx.wr_skb.num_sge = 1; + + uct_ud_am_set_zcopy_desc(skb, iov, iovcnt, comp); + uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); + UCT_TL_EP_STAT_OP(&ep->super.super, AM, ZCOPY, header_length + + uct_iov_total_length(iov, iovcnt)); + uct_ud_leave(&iface->super); + return UCS_INPROGRESS; +} + +static +ucs_status_t uct_ud_verbs_ep_put_short(uct_ep_h tl_ep, + const void *buffer, unsigned length, + uint64_t remote_addr, uct_rkey_t rkey) +{ + uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); + uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_ep->iface, + uct_ud_verbs_iface_t); + uct_ud_send_skb_t *skb; + uct_ud_put_hdr_t *put_hdr; + uct_ud_neth_t *neth; + + UCT_CHECK_LENGTH(sizeof(*neth) + sizeof(*put_hdr) + length, + 0, iface->super.config.max_inline, "put_short"); + + uct_ud_enter(&iface->super); + + skb = uct_ud_ep_get_tx_skb(&iface->super, &ep->super); + if (!skb) { + uct_ud_leave(&iface->super); + return UCS_ERR_NO_RESOURCE; + } + + neth = skb->neth; + uct_ud_neth_init_data(&ep->super, neth); + uct_ud_neth_set_type_put(&ep->super, neth); + uct_ud_neth_ack_req(&ep->super, neth); + + put_hdr = (uct_ud_put_hdr_t *)(neth+1); + put_hdr->rva = remote_addr; + iface->tx.sge[0].addr = (uintptr_t)neth; + iface->tx.sge[0].length = sizeof(*neth) + sizeof(*put_hdr); + + uct_ud_verbs_ep_tx_inlv(iface, ep, buffer, length); + + skb->len = iface->tx.sge[0].length; + uct_ud_iface_complete_tx_inl(&iface->super, &ep->super, skb, + put_hdr+1, buffer, length); + UCT_TL_EP_STAT_OP(&ep->super.super, PUT, SHORT, length); + uct_ud_leave(&iface->super); + return UCS_OK; +} + + +static UCS_F_ALWAYS_INLINE unsigned +uct_ud_verbs_iface_poll_tx(uct_ud_verbs_iface_t *iface) +{ + struct ibv_wc wc; + int ret; + + ret = ibv_poll_cq(iface->super.super.cq[UCT_IB_DIR_TX], 1, &wc); + if (ucs_unlikely(ret < 0)) { + ucs_fatal("Failed to poll send CQ"); + return 0; + } + + if (ret == 0) { + return 0; + } + + if (ucs_unlikely(wc.status != IBV_WC_SUCCESS)) { + ucs_fatal("Send completion (wr_id=0x%0X with error: %s ", + (unsigned)wc.wr_id, ibv_wc_status_str(wc.status)); + return 0; + } + + iface->super.tx.available += UCT_UD_TX_MODERATION + 1; + return 1; +} + +static UCS_F_ALWAYS_INLINE unsigned +uct_ud_verbs_iface_poll_rx(uct_ud_verbs_iface_t *iface, int is_async) +{ + unsigned num_wcs = iface->super.super.config.rx_max_poll; + struct ibv_wc wc[num_wcs]; + ucs_status_t status; + void *packet; + int i; + + status = uct_ib_poll_cq(iface->super.super.cq[UCT_IB_DIR_RX], &num_wcs, wc); + if (status != UCS_OK) { + num_wcs = 0; + goto out; + } + + UCT_IB_IFACE_VERBS_FOREACH_RXWQE(&iface->super.super, i, packet, wc, num_wcs) { + if (!uct_ud_iface_check_grh(&iface->super, + UCS_PTR_BYTE_OFFSET(packet, UCT_IB_GRH_LEN), + wc[i].wc_flags & IBV_WC_GRH)) { + ucs_mpool_put_inline((void*)wc[i].wr_id); + continue; + } + uct_ib_log_recv_completion(&iface->super.super, &wc[i], packet, + wc[i].byte_len, uct_ud_dump_packet); + uct_ud_ep_process_rx(&iface->super, + (uct_ud_neth_t *)UCS_PTR_BYTE_OFFSET(packet, UCT_IB_GRH_LEN), + wc[i].byte_len - UCT_IB_GRH_LEN, + (uct_ud_recv_skb_t *)wc[i].wr_id, + is_async); + + } + iface->super.rx.available += num_wcs; +out: + uct_ud_verbs_iface_post_recv(iface); + return num_wcs; +} + +static ucs_status_t uct_ud_verbs_ep_set_failed(uct_ib_iface_t *iface, + uct_ep_h ep, ucs_status_t status) +{ + return uct_set_ep_failed(&UCS_CLASS_NAME(uct_ud_verbs_ep_t), ep, + &iface->super.super, status); +} + +static unsigned uct_ud_verbs_iface_async_progress(uct_ud_iface_t *ud_iface) +{ + uct_ud_verbs_iface_t *iface = ucs_derived_of(ud_iface, uct_ud_verbs_iface_t); + unsigned count, n; + + count = 0; + do { + n = uct_ud_verbs_iface_poll_rx(iface, 1); + count += n; + } while (n > 0); + + count += uct_ud_verbs_iface_poll_tx(iface); + + uct_ud_iface_progress_pending(&iface->super, 1); + return count; +} + +static unsigned uct_ud_verbs_iface_progress(uct_iface_h tl_iface) +{ + uct_ud_verbs_iface_t *iface = ucs_derived_of(tl_iface, uct_ud_verbs_iface_t); + ucs_status_t status; + unsigned count; + + uct_ud_enter(&iface->super); + uct_ud_iface_dispatch_zcopy_comps(&iface->super); + status = uct_ud_iface_dispatch_pending_rx(&iface->super); + if (status == UCS_OK) { + count = uct_ud_verbs_iface_poll_rx(iface, 0); + if (count == 0) { + count = uct_ud_verbs_iface_poll_tx(iface); + } + } else { + count = 0; + } + + uct_ud_iface_progress_pending(&iface->super, 0); + uct_ud_leave(&iface->super); + + return count; +} + +static ucs_status_t +uct_ud_verbs_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) +{ + uct_ud_iface_t *iface = ucs_derived_of(tl_iface, uct_ud_iface_t); + ucs_status_t status; + + ucs_trace_func(""); + status = uct_ud_iface_query(iface, iface_attr); + if (status != UCS_OK) { + return status; + } + + iface_attr->overhead = 105e-9; /* Software overhead */ + iface_attr->cap.am.max_hdr = uct_ib_iface_hdr_size(iface->super.config.seg_size, + sizeof(uct_ud_neth_t) + + sizeof(uct_ud_zcopy_desc_t)); + + return UCS_OK; +} + +static ucs_status_t +uct_ud_verbs_ep_create_connected(uct_iface_h iface_h, const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr, uct_ep_h *new_ep_p) +{ + uct_ud_verbs_iface_t *iface = ucs_derived_of(iface_h, uct_ud_verbs_iface_t); + uct_ib_iface_t *ib_iface = &iface->super.super; + uct_ud_verbs_ep_t *ep; + uct_ud_ep_t *new_ud_ep; + const uct_ib_address_t *ib_addr = (const uct_ib_address_t *)dev_addr; + const uct_ud_iface_addr_t *if_addr = (const uct_ud_iface_addr_t *)iface_addr; + uct_ud_send_skb_t *skb; + ucs_status_t status, status_ah; + struct ibv_ah_attr ah_attr; + + uct_ud_enter(&iface->super); + status = uct_ud_ep_create_connected_common(&iface->super, ib_addr, if_addr, + &new_ud_ep, &skb); + if (status != UCS_OK && + status != UCS_ERR_NO_RESOURCE && + status != UCS_ERR_ALREADY_EXISTS) { + uct_ud_leave(&iface->super); + return status; + } + + ep = ucs_derived_of(new_ud_ep, uct_ud_verbs_ep_t); + /* cppcheck-suppress autoVariables */ + *new_ep_p = &ep->super.super.super; + if (status == UCS_ERR_ALREADY_EXISTS) { + uct_ud_leave(&iface->super); + return UCS_OK; + } + + ucs_assert_always(ep->ah == NULL); + + uct_ib_iface_fill_ah_attr_from_addr(ib_iface, ib_addr, &ah_attr); + status_ah = uct_ib_iface_create_ah(ib_iface, &ah_attr, &ep->ah); + if (status_ah != UCS_OK) { + uct_ud_ep_destroy_connected(&ep->super, ib_addr, if_addr); + *new_ep_p = NULL; + uct_ud_leave(&iface->super); + return status_ah; + } + + ep->dest_qpn = uct_ib_unpack_uint24(if_addr->qp_num); + + if (status == UCS_OK) { + uct_ud_verbs_ep_tx_skb(iface, ep, skb, IBV_SEND_INLINE|IBV_SEND_SOLICITED); + uct_ud_iface_complete_tx_skb(&iface->super, &ep->super, skb); + ep->super.flags |= UCT_UD_EP_FLAG_CREQ_SENT; + } + uct_ud_leave(&iface->super); + return UCS_OK; +} + + +static ucs_status_t +uct_ud_verbs_ep_connect_to_ep(uct_ep_h tl_ep, + const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr) +{ + uct_ud_verbs_ep_t *ep = ucs_derived_of(tl_ep, uct_ud_verbs_ep_t); + uct_ib_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ib_iface_t); + const uct_ib_address_t *ib_addr = (const uct_ib_address_t *)dev_addr; + const uct_ud_ep_addr_t *ud_ep_addr = (const uct_ud_ep_addr_t *)ep_addr; + ucs_status_t status; + struct ibv_ah_attr ah_attr; + + status = uct_ud_ep_connect_to_ep(&ep->super, ib_addr, ud_ep_addr); + if (status != UCS_OK) { + return status; + } + ucs_assert_always(ep->ah == NULL); + ep->dest_qpn = uct_ib_unpack_uint24(ud_ep_addr->iface_addr.qp_num); + + uct_ib_iface_fill_ah_attr_from_addr(iface, ib_addr, &ah_attr); + return uct_ib_iface_create_ah(iface, &ah_attr, &ep->ah); +} + +static ucs_status_t +uct_ud_verbs_ep_create(const uct_ep_params_t *params, uct_ep_h *ep_p) +{ + if (ucs_test_all_flags(params->field_mask, UCT_EP_PARAM_FIELD_DEV_ADDR | + UCT_EP_PARAM_FIELD_IFACE_ADDR)) { + return uct_ud_verbs_ep_create_connected(params->iface, params->dev_addr, + params->iface_addr, ep_p); + } + + return uct_ud_verbs_ep_t_new(params, ep_p); +} + +static void UCS_CLASS_DELETE_FUNC_NAME(uct_ud_verbs_iface_t)(uct_iface_t*); + +static uct_ud_iface_ops_t uct_ud_verbs_iface_ops = { + { + { + .ep_put_short = uct_ud_verbs_ep_put_short, + .ep_am_short = uct_ud_verbs_ep_am_short, + .ep_am_bcopy = uct_ud_verbs_ep_am_bcopy, + .ep_am_zcopy = uct_ud_verbs_ep_am_zcopy, + .ep_pending_add = uct_ud_ep_pending_add, + .ep_pending_purge = uct_ud_ep_pending_purge, + .ep_flush = uct_ud_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = uct_ud_verbs_ep_create, + .ep_destroy = uct_ud_ep_disconnect, + .ep_get_address = uct_ud_ep_get_address, + .ep_connect_to_ep = uct_ud_verbs_ep_connect_to_ep, + .iface_flush = uct_ud_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = uct_ud_iface_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = uct_ud_verbs_iface_progress, + .iface_event_fd_get = uct_ib_iface_event_fd_get, + .iface_event_arm = uct_ud_iface_event_arm, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_ud_verbs_iface_t), + .iface_query = uct_ud_verbs_iface_query, + .iface_get_device_address = uct_ib_iface_get_device_address, + .iface_get_address = uct_ud_iface_get_address, + .iface_is_reachable = uct_ib_iface_is_reachable + }, + .create_cq = uct_ib_verbs_create_cq, + .arm_cq = uct_ib_iface_arm_cq, + .event_cq = (uct_ib_iface_event_cq_func_t)ucs_empty_function, + .handle_failure = uct_ud_iface_handle_failure, + .set_ep_failed = uct_ud_verbs_ep_set_failed, + }, + .async_progress = uct_ud_verbs_iface_async_progress, + .tx_skb = uct_ud_verbs_ep_tx_ctl_skb, + .ep_free = UCS_CLASS_DELETE_FUNC_NAME(uct_ud_verbs_ep_t), + .create_qp = uct_ib_iface_create_qp, +}; + +static UCS_F_NOINLINE void +uct_ud_verbs_iface_post_recv_always(uct_ud_verbs_iface_t *iface, int max) +{ + struct ibv_recv_wr *bad_wr; + uct_ib_recv_wr_t *wrs; + unsigned count; + int ret; + + wrs = ucs_alloca(sizeof *wrs * max); + + count = uct_ib_iface_prepare_rx_wrs(&iface->super.super, &iface->super.rx.mp, + wrs, max); + if (count == 0) { + return; + } + + ret = ibv_post_recv(iface->super.qp, &wrs[0].ibwr, &bad_wr); + if (ret != 0) { + ucs_fatal("ibv_post_recv() returned %d: %m", ret); + } + iface->super.rx.available -= count; +} + +static UCS_F_ALWAYS_INLINE void +uct_ud_verbs_iface_post_recv(uct_ud_verbs_iface_t *iface) +{ + unsigned batch = iface->super.super.config.rx_max_batch; + + if (iface->super.rx.available < batch) + return; + + uct_ud_verbs_iface_post_recv_always(iface, batch); +} + +static UCS_CLASS_INIT_FUNC(uct_ud_verbs_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_ud_iface_config_t *config = ucs_derived_of(tl_config, + uct_ud_iface_config_t); + uct_ib_iface_init_attr_t init_attr = {}; + ucs_status_t status; + + ucs_trace_func(""); + + UCS_CLASS_CALL_SUPER_INIT(uct_ud_iface_t, &uct_ud_verbs_iface_ops, md, + worker, params, config, &init_attr); + + memset(&self->tx.wr_inl, 0, sizeof(self->tx.wr_inl)); + self->tx.wr_inl.opcode = IBV_WR_SEND; + self->tx.wr_inl.wr_id = 0xBEEBBEEB; + self->tx.wr_inl.wr.ud.remote_qkey = UCT_IB_KEY; + self->tx.wr_inl.imm_data = 0; + self->tx.wr_inl.next = 0; + self->tx.wr_inl.sg_list = self->tx.sge; + self->tx.wr_inl.num_sge = 2; + + memset(&self->tx.wr_skb, 0, sizeof(self->tx.wr_skb)); + self->tx.wr_skb.opcode = IBV_WR_SEND; + self->tx.wr_skb.wr_id = 0xFAAFFAAF; + self->tx.wr_skb.wr.ud.remote_qkey = UCT_IB_KEY; + self->tx.wr_skb.imm_data = 0; + self->tx.wr_skb.next = 0; + self->tx.wr_skb.sg_list = self->tx.sge; + self->tx.wr_skb.num_sge = 1; + + if (self->super.super.config.rx_max_batch < UCT_UD_RX_BATCH_MIN) { + ucs_warn("rx max batch is too low (%d < %d), performance may be impacted", + self->super.super.config.rx_max_batch, + UCT_UD_RX_BATCH_MIN); + } + + while (self->super.rx.available >= self->super.super.config.rx_max_batch) { + uct_ud_verbs_iface_post_recv(self); + } + + status = uct_ud_iface_complete_init(&self->super); + if (status != UCS_OK) { + return status; + } + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ud_verbs_iface_t) +{ + ucs_trace_func(""); + uct_ud_iface_remove_async_handlers(&self->super); + uct_ud_enter(&self->super); + UCT_UD_IFACE_DELETE_EPS(&self->super, uct_ud_verbs_ep_t); + ucs_twheel_cleanup(&self->super.async.slow_timer); + uct_ud_leave(&self->super); +} + +UCS_CLASS_DEFINE(uct_ud_verbs_iface_t, uct_ud_iface_t); + +static UCS_CLASS_DEFINE_NEW_FUNC(uct_ud_verbs_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); + +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_ud_verbs_iface_t, uct_iface_t); + +static ucs_status_t +uct_ud_verbs_query_tl_devices(uct_md_h md, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + uct_ib_md_t *ib_md = ucs_derived_of(md, uct_ib_md_t); + return uct_ib_device_query_ports(&ib_md->dev, 0, tl_devices_p, + num_tl_devices_p); +} + +UCT_TL_DEFINE(&uct_ib_component, ud_verbs, uct_ud_verbs_query_tl_devices, + uct_ud_verbs_iface_t, "UD_VERBS_", + uct_ud_verbs_iface_config_table, uct_ud_iface_config_t); diff --git a/src/uct/ib/ud/verbs/ud_verbs.h b/src/uct/ib/ud/verbs/ud_verbs.h new file mode 100644 index 0000000..502def8 --- /dev/null +++ b/src/uct/ib/ud/verbs/ud_verbs.h @@ -0,0 +1,34 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UD_VERBS_H +#define UD_VERBS_H + +#include + +#include +#include +#include + + +typedef struct { + uct_ud_ep_t super; + uint32_t dest_qpn; + struct ibv_ah *ah; +} uct_ud_verbs_ep_t; + +typedef struct { + uct_ud_iface_t super; + struct { + struct ibv_sge sge[UCT_IB_MAX_IOV]; + struct ibv_send_wr wr_inl; + struct ibv_send_wr wr_skb; + } tx; +} uct_ud_verbs_iface_t; + +UCS_CLASS_DECLARE(uct_ud_verbs_ep_t, const uct_ep_params_t *) + +#endif diff --git a/src/uct/rocm/Makefile.am b/src/uct/rocm/Makefile.am new file mode 100644 index 0000000..bd6bf65 --- /dev/null +++ b/src/uct/rocm/Makefile.am @@ -0,0 +1,47 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_ROCM + +SUBDIRS = . gdr + +module_LTLIBRARIES = libuct_rocm.la +libuct_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(ROCM_CPPFLAGS) +libuct_rocm_la_CFLAGS = $(BASE_CFLAGS) +libuct_rocm_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la +libuct_rocm_la_LDFLAGS = $(ROCM_LDFLAGS) $(ROCM_LIBS) -version-info $(SOVERSION) + +noinst_HEADERS = \ + base/rocm_base.h + +libuct_rocm_la_SOURCES = \ + base/rocm_base.c + +noinst_HEADERS += \ + copy/rocm_copy_md.h \ + copy/rocm_copy_iface.h \ + copy/rocm_copy_ep.h + +libuct_rocm_la_SOURCES += \ + copy/rocm_copy_md.c \ + copy/rocm_copy_iface.c \ + copy/rocm_copy_ep.c + +noinst_HEADERS += \ + ipc/rocm_ipc_md.h \ + ipc/rocm_ipc_iface.h \ + ipc/rocm_ipc_ep.h \ + ipc/rocm_ipc_cache.h + +libuct_rocm_la_SOURCES += \ + ipc/rocm_ipc_md.c \ + ipc/rocm_ipc_iface.c \ + ipc/rocm_ipc_ep.c \ + ipc/rocm_ipc_cache.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/uct/rocm/Makefile.in b/src/uct/rocm/Makefile.in new file mode 100644 index 0000000..6301673 --- /dev/null +++ b/src/uct/rocm/Makefile.in @@ -0,0 +1,1115 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/rocm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_ROCM_TRUE@libuct_rocm_la_DEPENDENCIES = \ +@HAVE_ROCM_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_ROCM_TRUE@ $(top_builddir)/src/uct/libuct.la +am__libuct_rocm_la_SOURCES_DIST = base/rocm_base.c copy/rocm_copy_md.c \ + copy/rocm_copy_iface.c copy/rocm_copy_ep.c ipc/rocm_ipc_md.c \ + ipc/rocm_ipc_iface.c ipc/rocm_ipc_ep.c ipc/rocm_ipc_cache.c +am__dirstamp = $(am__leading_dot)dirstamp +@HAVE_ROCM_TRUE@am_libuct_rocm_la_OBJECTS = \ +@HAVE_ROCM_TRUE@ base/libuct_rocm_la-rocm_base.lo \ +@HAVE_ROCM_TRUE@ copy/libuct_rocm_la-rocm_copy_md.lo \ +@HAVE_ROCM_TRUE@ copy/libuct_rocm_la-rocm_copy_iface.lo \ +@HAVE_ROCM_TRUE@ copy/libuct_rocm_la-rocm_copy_ep.lo \ +@HAVE_ROCM_TRUE@ ipc/libuct_rocm_la-rocm_ipc_md.lo \ +@HAVE_ROCM_TRUE@ ipc/libuct_rocm_la-rocm_ipc_iface.lo \ +@HAVE_ROCM_TRUE@ ipc/libuct_rocm_la-rocm_ipc_ep.lo \ +@HAVE_ROCM_TRUE@ ipc/libuct_rocm_la-rocm_ipc_cache.lo +libuct_rocm_la_OBJECTS = $(am_libuct_rocm_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_rocm_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libuct_rocm_la_CFLAGS) $(CFLAGS) $(libuct_rocm_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_ROCM_TRUE@am_libuct_rocm_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = base/$(DEPDIR)/libuct_rocm_la-rocm_base.Plo \ + copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_ep.Plo \ + copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_iface.Plo \ + copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_md.Plo \ + ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_cache.Plo \ + ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_ep.Plo \ + ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_iface.Plo \ + ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_md.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_rocm_la_SOURCES) +DIST_SOURCES = $(am__libuct_rocm_la_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = base/rocm_base.h copy/rocm_copy_md.h \ + copy/rocm_copy_iface.h copy/rocm_copy_ep.h ipc/rocm_ipc_md.h \ + ipc/rocm_ipc_iface.h ipc/rocm_ipc_ep.h ipc/rocm_ipc_cache.h +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = . gdr +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_ROCM_TRUE@SUBDIRS = . gdr +@HAVE_ROCM_TRUE@module_LTLIBRARIES = libuct_rocm.la +@HAVE_ROCM_TRUE@libuct_rocm_la_CPPFLAGS = $(BASE_CPPFLAGS) $(ROCM_CPPFLAGS) +@HAVE_ROCM_TRUE@libuct_rocm_la_CFLAGS = $(BASE_CFLAGS) +@HAVE_ROCM_TRUE@libuct_rocm_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_ROCM_TRUE@ $(top_builddir)/src/uct/libuct.la + +@HAVE_ROCM_TRUE@libuct_rocm_la_LDFLAGS = $(ROCM_LDFLAGS) $(ROCM_LIBS) -version-info $(SOVERSION) +@HAVE_ROCM_TRUE@noinst_HEADERS = base/rocm_base.h copy/rocm_copy_md.h \ +@HAVE_ROCM_TRUE@ copy/rocm_copy_iface.h copy/rocm_copy_ep.h \ +@HAVE_ROCM_TRUE@ ipc/rocm_ipc_md.h ipc/rocm_ipc_iface.h \ +@HAVE_ROCM_TRUE@ ipc/rocm_ipc_ep.h ipc/rocm_ipc_cache.h +@HAVE_ROCM_TRUE@libuct_rocm_la_SOURCES = base/rocm_base.c \ +@HAVE_ROCM_TRUE@ copy/rocm_copy_md.c copy/rocm_copy_iface.c \ +@HAVE_ROCM_TRUE@ copy/rocm_copy_ep.c ipc/rocm_ipc_md.c \ +@HAVE_ROCM_TRUE@ ipc/rocm_ipc_iface.c ipc/rocm_ipc_ep.c \ +@HAVE_ROCM_TRUE@ ipc/rocm_ipc_cache.c + +# Automake silent rules +@HAVE_ROCM_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_ROCM_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_ROCM_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_ROCM_TRUE@AM_V_LN_1 = true +@HAVE_ROCM_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/rocm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/rocm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +base/$(am__dirstamp): + @$(MKDIR_P) base + @: > base/$(am__dirstamp) +base/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) base/$(DEPDIR) + @: > base/$(DEPDIR)/$(am__dirstamp) +base/libuct_rocm_la-rocm_base.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +copy/$(am__dirstamp): + @$(MKDIR_P) copy + @: > copy/$(am__dirstamp) +copy/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) copy/$(DEPDIR) + @: > copy/$(DEPDIR)/$(am__dirstamp) +copy/libuct_rocm_la-rocm_copy_md.lo: copy/$(am__dirstamp) \ + copy/$(DEPDIR)/$(am__dirstamp) +copy/libuct_rocm_la-rocm_copy_iface.lo: copy/$(am__dirstamp) \ + copy/$(DEPDIR)/$(am__dirstamp) +copy/libuct_rocm_la-rocm_copy_ep.lo: copy/$(am__dirstamp) \ + copy/$(DEPDIR)/$(am__dirstamp) +ipc/$(am__dirstamp): + @$(MKDIR_P) ipc + @: > ipc/$(am__dirstamp) +ipc/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ipc/$(DEPDIR) + @: > ipc/$(DEPDIR)/$(am__dirstamp) +ipc/libuct_rocm_la-rocm_ipc_md.lo: ipc/$(am__dirstamp) \ + ipc/$(DEPDIR)/$(am__dirstamp) +ipc/libuct_rocm_la-rocm_ipc_iface.lo: ipc/$(am__dirstamp) \ + ipc/$(DEPDIR)/$(am__dirstamp) +ipc/libuct_rocm_la-rocm_ipc_ep.lo: ipc/$(am__dirstamp) \ + ipc/$(DEPDIR)/$(am__dirstamp) +ipc/libuct_rocm_la-rocm_ipc_cache.lo: ipc/$(am__dirstamp) \ + ipc/$(DEPDIR)/$(am__dirstamp) + +libuct_rocm.la: $(libuct_rocm_la_OBJECTS) $(libuct_rocm_la_DEPENDENCIES) $(EXTRA_libuct_rocm_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_rocm_la_LINK) $(am_libuct_rocm_la_rpath) $(libuct_rocm_la_OBJECTS) $(libuct_rocm_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f base/*.$(OBJEXT) + -rm -f base/*.lo + -rm -f copy/*.$(OBJEXT) + -rm -f copy/*.lo + -rm -f ipc/*.$(OBJEXT) + -rm -f ipc/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_rocm_la-rocm_base.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_cache.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_md.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +base/libuct_rocm_la-rocm_base.lo: base/rocm_base.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -MT base/libuct_rocm_la-rocm_base.lo -MD -MP -MF base/$(DEPDIR)/libuct_rocm_la-rocm_base.Tpo -c -o base/libuct_rocm_la-rocm_base.lo `test -f 'base/rocm_base.c' || echo '$(srcdir)/'`base/rocm_base.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_rocm_la-rocm_base.Tpo base/$(DEPDIR)/libuct_rocm_la-rocm_base.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/rocm_base.c' object='base/libuct_rocm_la-rocm_base.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -c -o base/libuct_rocm_la-rocm_base.lo `test -f 'base/rocm_base.c' || echo '$(srcdir)/'`base/rocm_base.c + +copy/libuct_rocm_la-rocm_copy_md.lo: copy/rocm_copy_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -MT copy/libuct_rocm_la-rocm_copy_md.lo -MD -MP -MF copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_md.Tpo -c -o copy/libuct_rocm_la-rocm_copy_md.lo `test -f 'copy/rocm_copy_md.c' || echo '$(srcdir)/'`copy/rocm_copy_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_md.Tpo copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='copy/rocm_copy_md.c' object='copy/libuct_rocm_la-rocm_copy_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -c -o copy/libuct_rocm_la-rocm_copy_md.lo `test -f 'copy/rocm_copy_md.c' || echo '$(srcdir)/'`copy/rocm_copy_md.c + +copy/libuct_rocm_la-rocm_copy_iface.lo: copy/rocm_copy_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -MT copy/libuct_rocm_la-rocm_copy_iface.lo -MD -MP -MF copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_iface.Tpo -c -o copy/libuct_rocm_la-rocm_copy_iface.lo `test -f 'copy/rocm_copy_iface.c' || echo '$(srcdir)/'`copy/rocm_copy_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_iface.Tpo copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='copy/rocm_copy_iface.c' object='copy/libuct_rocm_la-rocm_copy_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -c -o copy/libuct_rocm_la-rocm_copy_iface.lo `test -f 'copy/rocm_copy_iface.c' || echo '$(srcdir)/'`copy/rocm_copy_iface.c + +copy/libuct_rocm_la-rocm_copy_ep.lo: copy/rocm_copy_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -MT copy/libuct_rocm_la-rocm_copy_ep.lo -MD -MP -MF copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_ep.Tpo -c -o copy/libuct_rocm_la-rocm_copy_ep.lo `test -f 'copy/rocm_copy_ep.c' || echo '$(srcdir)/'`copy/rocm_copy_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_ep.Tpo copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='copy/rocm_copy_ep.c' object='copy/libuct_rocm_la-rocm_copy_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -c -o copy/libuct_rocm_la-rocm_copy_ep.lo `test -f 'copy/rocm_copy_ep.c' || echo '$(srcdir)/'`copy/rocm_copy_ep.c + +ipc/libuct_rocm_la-rocm_ipc_md.lo: ipc/rocm_ipc_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -MT ipc/libuct_rocm_la-rocm_ipc_md.lo -MD -MP -MF ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_md.Tpo -c -o ipc/libuct_rocm_la-rocm_ipc_md.lo `test -f 'ipc/rocm_ipc_md.c' || echo '$(srcdir)/'`ipc/rocm_ipc_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_md.Tpo ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ipc/rocm_ipc_md.c' object='ipc/libuct_rocm_la-rocm_ipc_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -c -o ipc/libuct_rocm_la-rocm_ipc_md.lo `test -f 'ipc/rocm_ipc_md.c' || echo '$(srcdir)/'`ipc/rocm_ipc_md.c + +ipc/libuct_rocm_la-rocm_ipc_iface.lo: ipc/rocm_ipc_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -MT ipc/libuct_rocm_la-rocm_ipc_iface.lo -MD -MP -MF ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_iface.Tpo -c -o ipc/libuct_rocm_la-rocm_ipc_iface.lo `test -f 'ipc/rocm_ipc_iface.c' || echo '$(srcdir)/'`ipc/rocm_ipc_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_iface.Tpo ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ipc/rocm_ipc_iface.c' object='ipc/libuct_rocm_la-rocm_ipc_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -c -o ipc/libuct_rocm_la-rocm_ipc_iface.lo `test -f 'ipc/rocm_ipc_iface.c' || echo '$(srcdir)/'`ipc/rocm_ipc_iface.c + +ipc/libuct_rocm_la-rocm_ipc_ep.lo: ipc/rocm_ipc_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -MT ipc/libuct_rocm_la-rocm_ipc_ep.lo -MD -MP -MF ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_ep.Tpo -c -o ipc/libuct_rocm_la-rocm_ipc_ep.lo `test -f 'ipc/rocm_ipc_ep.c' || echo '$(srcdir)/'`ipc/rocm_ipc_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_ep.Tpo ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ipc/rocm_ipc_ep.c' object='ipc/libuct_rocm_la-rocm_ipc_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -c -o ipc/libuct_rocm_la-rocm_ipc_ep.lo `test -f 'ipc/rocm_ipc_ep.c' || echo '$(srcdir)/'`ipc/rocm_ipc_ep.c + +ipc/libuct_rocm_la-rocm_ipc_cache.lo: ipc/rocm_ipc_cache.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -MT ipc/libuct_rocm_la-rocm_ipc_cache.lo -MD -MP -MF ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_cache.Tpo -c -o ipc/libuct_rocm_la-rocm_ipc_cache.lo `test -f 'ipc/rocm_ipc_cache.c' || echo '$(srcdir)/'`ipc/rocm_ipc_cache.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_cache.Tpo ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_cache.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ipc/rocm_ipc_cache.c' object='ipc/libuct_rocm_la-rocm_ipc_cache.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_la_CFLAGS) $(CFLAGS) -c -o ipc/libuct_rocm_la-rocm_ipc_cache.lo `test -f 'ipc/rocm_ipc_cache.c' || echo '$(srcdir)/'`ipc/rocm_ipc_cache.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf base/.libs base/_libs + -rm -rf copy/.libs copy/_libs + -rm -rf ipc/.libs ipc/_libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +@HAVE_ROCM_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f base/$(DEPDIR)/$(am__dirstamp) + -rm -f base/$(am__dirstamp) + -rm -f copy/$(DEPDIR)/$(am__dirstamp) + -rm -f copy/$(am__dirstamp) + -rm -f ipc/$(DEPDIR)/$(am__dirstamp) + -rm -f ipc/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f base/$(DEPDIR)/libuct_rocm_la-rocm_base.Plo + -rm -f copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_ep.Plo + -rm -f copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_iface.Plo + -rm -f copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_md.Plo + -rm -f ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_cache.Plo + -rm -f ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_ep.Plo + -rm -f ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_iface.Plo + -rm -f ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_md.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f base/$(DEPDIR)/libuct_rocm_la-rocm_base.Plo + -rm -f copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_ep.Plo + -rm -f copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_iface.Plo + -rm -f copy/$(DEPDIR)/libuct_rocm_la-rocm_copy_md.Plo + -rm -f ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_cache.Plo + -rm -f ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_ep.Plo + -rm -f ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_iface.Plo + -rm -f ipc/$(DEPDIR)/libuct_rocm_la-rocm_ipc_md.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ + am--depfiles check check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_ROCM_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_ROCM_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_ROCM_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_ROCM_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_ROCM_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_ROCM_TRUE@ done +@HAVE_ROCM_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_ROCM_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_ROCM_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/rocm/base/rocm_base.c b/src/uct/rocm/base/rocm_base.c new file mode 100644 index 0000000..2155168 --- /dev/null +++ b/src/uct/rocm/base/rocm_base.c @@ -0,0 +1,202 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_base.h" + +#include + +#include +#include + + +#define MAX_AGENTS 16 +static struct agents { + hsa_agent_t agents[MAX_AGENTS]; + int num; + hsa_agent_t gpu_agents[MAX_AGENTS]; + int num_gpu; +} uct_rocm_base_agents; + +int uct_rocm_base_get_gpu_agents(hsa_agent_t **agents) +{ + *agents = uct_rocm_base_agents.gpu_agents; + return uct_rocm_base_agents.num_gpu; +} + +static hsa_status_t uct_rocm_hsa_agent_callback(hsa_agent_t agent, void* data) +{ + hsa_device_type_t device_type; + + ucs_assert(uct_rocm_base_agents.num < MAX_AGENTS); + + hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + if (device_type == HSA_DEVICE_TYPE_CPU) { + ucs_trace("%d found cpu agent %lu", getpid(), agent.handle); + } + else if (device_type == HSA_DEVICE_TYPE_GPU) { + uint32_t bdfid = 0; + uct_rocm_base_agents.gpu_agents[uct_rocm_base_agents.num_gpu++] = agent; + hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &bdfid); + ucs_trace("%d found gpu agent %lu bdfid %x", getpid(), agent.handle, bdfid); + } + else { + ucs_trace("%d found unknown agent %lu", getpid(), agent.handle); + } + + uct_rocm_base_agents.agents[uct_rocm_base_agents.num++] = agent; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t uct_rocm_base_init(void) +{ + static pthread_mutex_t rocm_init_mutex = PTHREAD_MUTEX_INITIALIZER; + static volatile int rocm_ucx_initialized = 0; + hsa_status_t status; + + if (pthread_mutex_lock(&rocm_init_mutex) == 0) { + if (rocm_ucx_initialized) { + status = HSA_STATUS_SUCCESS; + goto end; + } + } else { + ucs_error("Could not take mutex"); + status = HSA_STATUS_ERROR; + return status; + } + + memset(&uct_rocm_base_agents, 0, sizeof(uct_rocm_base_agents)); + + status = hsa_init(); + if (status != HSA_STATUS_SUCCESS) { + ucs_debug("Failure to open HSA connection: 0x%x", status); + goto end; + } + + status = hsa_iterate_agents(uct_rocm_hsa_agent_callback, NULL); + if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { + ucs_debug("Failure to iterate HSA agents: 0x%x", status); + goto end; + } + + rocm_ucx_initialized = 1; + +end: + pthread_mutex_unlock(&rocm_init_mutex); + return status; +} + +ucs_status_t +uct_rocm_base_query_md_resources(uct_component_h component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + if (uct_rocm_base_init() != HSA_STATUS_SUCCESS) { + ucs_debug("could not initialize ROCm support"); + return uct_md_query_empty_md_resource(resources_p, num_resources_p); + } + + return uct_md_query_single_md_resource(component, resources_p, + num_resources_p); +} + +ucs_status_t uct_rocm_base_query_devices(uct_md_h md, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + return uct_single_device_resource(md, md->component->name, + UCT_DEVICE_TYPE_ACC, tl_devices_p, + num_tl_devices_p); +} + +hsa_agent_t uct_rocm_base_get_dev_agent(int dev_num) +{ + ucs_assert(dev_num < uct_rocm_base_agents.num); + return uct_rocm_base_agents.agents[dev_num]; +} + +int uct_rocm_base_get_dev_num(hsa_agent_t agent) +{ + int i; + + for (i = 0; i < uct_rocm_base_agents.num; i++) { + if (uct_rocm_base_agents.agents[i].handle == agent.handle) + return i; + } + ucs_assert(0); + return -1; +} + +int uct_rocm_base_is_gpu_agent(hsa_agent_t agent) +{ + int i; + + for (i = 0; i < uct_rocm_base_agents.num_gpu; i++) { + if (uct_rocm_base_agents.gpu_agents[i].handle == agent.handle) + return 1; + } + return 0; +} + +hsa_status_t uct_rocm_base_get_ptr_info(void *ptr, size_t size, + void **base_ptr, size_t *base_size, + hsa_agent_t *agent) +{ + hsa_status_t status; + hsa_amd_pointer_info_t info; + + info.size = sizeof(hsa_amd_pointer_info_t); + status = hsa_amd_pointer_info(ptr, &info, NULL, NULL, NULL); + if (status != HSA_STATUS_SUCCESS) { + ucs_error("get pointer info fail %p", ptr); + return status; + } + + if (info.type != HSA_EXT_POINTER_TYPE_HSA) + return HSA_STATUS_ERROR; + + *agent = info.agentOwner; + + if (base_ptr) + *base_ptr = info.agentBaseAddress; + if (base_size) + *base_size = info.sizeInBytes; + + return HSA_STATUS_SUCCESS; +} + +ucs_status_t uct_rocm_base_detect_memory_type(uct_md_h md, const void *addr, + size_t length, + ucs_memory_type_t *mem_type_p) +{ + hsa_status_t status; + hsa_amd_pointer_info_t info; + + if (addr == NULL) { + *mem_type_p = UCS_MEMORY_TYPE_HOST; + return UCS_OK; + } + + info.size = sizeof(hsa_amd_pointer_info_t); + status = hsa_amd_pointer_info((void*)addr, &info, NULL, NULL, NULL); + if ((status == HSA_STATUS_SUCCESS) && + (info.type == HSA_EXT_POINTER_TYPE_HSA)) { + hsa_device_type_t dev_type; + + status = hsa_agent_get_info(info.agentOwner, HSA_AGENT_INFO_DEVICE, &dev_type); + if ((status == HSA_STATUS_SUCCESS) && + (dev_type == HSA_DEVICE_TYPE_GPU)) { + *mem_type_p = UCS_MEMORY_TYPE_ROCM; + return UCS_OK; + } + } + + return UCS_ERR_INVALID_ADDR; +} + +UCS_MODULE_INIT() { + UCS_MODULE_FRAMEWORK_DECLARE(uct_rocm); + UCS_MODULE_FRAMEWORK_LOAD(uct_rocm, 0); + return UCS_OK; +} diff --git a/src/uct/rocm/base/rocm_base.h b/src/uct/rocm/base/rocm_base.h new file mode 100644 index 0000000..d818b73 --- /dev/null +++ b/src/uct/rocm/base/rocm_base.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + + +#ifndef ROCM_BASE_H +#define ROCM_BASE_H + +#include +#include +#include + + +hsa_status_t uct_rocm_base_init(void); +ucs_status_t uct_rocm_base_query_md_resources(uct_component_h component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p); +ucs_status_t uct_rocm_base_query_devices(uct_md_h md, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p); +hsa_agent_t uct_rocm_base_get_dev_agent(int dev_num); +int uct_rocm_base_is_gpu_agent(hsa_agent_t agent); +int uct_rocm_base_get_gpu_agents(hsa_agent_t **agents); +int uct_rocm_base_get_dev_num(hsa_agent_t agent); +hsa_status_t uct_rocm_base_get_ptr_info(void *ptr, size_t size, + void **base_ptr, size_t *base_size, + hsa_agent_t *agent); +ucs_status_t uct_rocm_base_detect_memory_type(uct_md_h md, const void *addr, + size_t length, + ucs_memory_type_t *mem_type_p); + +#endif diff --git a/src/uct/rocm/configure.m4 b/src/uct/rocm/configure.m4 new file mode 100644 index 0000000..f5179a5 --- /dev/null +++ b/src/uct/rocm/configure.m4 @@ -0,0 +1,13 @@ +# +# Copyright (C) Advanced Micro Devices, Inc. 2016 - 2018. ALL RIGHTS RESERVED. +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +UCX_CHECK_ROCM + +AS_IF([test "x$rocm_happy" = "xyes"], [uct_modules="${uct_modules}:rocm"]) +uct_rocm_modules="" +m4_include([src/uct/rocm/gdr/configure.m4]) +AC_DEFINE_UNQUOTED([uct_rocm_MODULES], ["${uct_rocm_modules}"], [ROCM loadable modules]) +AC_CONFIG_FILES([src/uct/rocm/Makefile]) diff --git a/src/uct/rocm/copy/rocm_copy_ep.c b/src/uct/rocm/copy/rocm_copy_ep.c new file mode 100644 index 0000000..b895676 --- /dev/null +++ b/src/uct/rocm/copy/rocm_copy_ep.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_copy_ep.h" +#include "rocm_copy_iface.h" + +#include +#include +#include +#include + +#define uct_rocm_memcpy_h2d(_d,_s,_l) memcpy((_d),(_s),(_l)) +#define uct_rocm_memcpy_d2h(_d,_s,_l) ucs_memcpy_nontemporal((_d),(_s),(_l)) + +static UCS_CLASS_INIT_FUNC(uct_rocm_copy_ep_t, const uct_ep_params_t *params) +{ + uct_rocm_copy_iface_t *iface = ucs_derived_of(params->iface, uct_rocm_copy_iface_t); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rocm_copy_ep_t) +{ +} + +UCS_CLASS_DEFINE(uct_rocm_copy_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_rocm_copy_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_rocm_copy_ep_t, uct_ep_t); + +#define uct_rocm_copy_trace_data(_remote_addr, _rkey, _fmt, ...) \ + ucs_trace_data(_fmt " to %"PRIx64"(%+ld)", ## __VA_ARGS__, (_remote_addr), \ + (_rkey)) + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_rocm_copy_ep_zcopy(uct_ep_h tl_ep, + uint64_t remote_addr, + const uct_iov_t *iov, + int is_put) +{ + size_t size = uct_iov_get_length(iov); + + if (!size) { + return UCS_OK; + } + + if (is_put) + uct_rocm_memcpy_h2d((void *)remote_addr, iov->buffer, size); + else + uct_rocm_memcpy_d2h(iov->buffer, (void *)remote_addr, size); + + return UCS_OK; +} + +ucs_status_t uct_rocm_copy_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + ucs_status_t status; + + status = uct_rocm_copy_ep_zcopy(tl_ep, remote_addr, iov, 0); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_rocm_copy_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + return status; +} + +ucs_status_t uct_rocm_copy_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + ucs_status_t status; + + status = uct_rocm_copy_ep_zcopy(tl_ep, remote_addr, iov, 1); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_rocm_copy_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + return status; + +} + + +ucs_status_t uct_rocm_copy_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + uct_rocm_memcpy_h2d((void *)remote_addr, buffer, length); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); + ucs_trace_data("PUT_SHORT size %d from %p to %p", + length, buffer, (void *)remote_addr); + return UCS_OK; +} + +ucs_status_t uct_rocm_copy_ep_get_short(uct_ep_h tl_ep, void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + uct_rocm_memcpy_d2h(buffer, (void *)remote_addr, length); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, SHORT, length); + ucs_trace_data("GET_SHORT size %d from %p to %p", + length, (void *)remote_addr, buffer); + return UCS_OK; +} diff --git a/src/uct/rocm/copy/rocm_copy_ep.h b/src/uct/rocm/copy/rocm_copy_ep.h new file mode 100644 index 0000000..4b05570 --- /dev/null +++ b/src/uct/rocm/copy/rocm_copy_ep.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_ROCM_COPY_EP_H +#define UCT_ROCM_COPY_EP_H + +#include +#include +#include + + +typedef struct uct_rocm_copy_ep_addr { + int ep_id; +} uct_rocm_copy_ep_addr_t; + +typedef struct uct_rocm_copy_ep { + uct_base_ep_t super; + struct uct_rocm_copy_ep *next; +} uct_rocm_copy_ep_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_rocm_copy_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_rocm_copy_ep_t, uct_ep_t); + +ucs_status_t uct_rocm_copy_ep_get_zcopy(uct_ep_h tl_ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rocm_copy_ep_put_zcopy(uct_ep_h tl_ep, + const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_rocm_copy_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +ucs_status_t uct_rocm_copy_ep_get_short(uct_ep_h tl_ep, void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +#endif diff --git a/src/uct/rocm/copy/rocm_copy_iface.c b/src/uct/rocm/copy/rocm_copy_iface.c new file mode 100644 index 0000000..895cc8e --- /dev/null +++ b/src/uct/rocm/copy/rocm_copy_iface.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_copy_iface.h" +#include "rocm_copy_md.h" +#include "rocm_copy_ep.h" + +#include +#include +#include + + +static ucs_config_field_t uct_rocm_copy_iface_config_table[] = { + + {"", "", NULL, + ucs_offsetof(uct_rocm_copy_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {NULL} +}; + +/* Forward declaration for the delete function */ +static void UCS_CLASS_DELETE_FUNC_NAME(uct_rocm_copy_iface_t)(uct_iface_t*); + + +static ucs_status_t uct_rocm_copy_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *iface_addr) +{ + uct_rocm_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_copy_iface_t); + + *(uct_rocm_copy_iface_addr_t*)iface_addr = iface->id; + return UCS_OK; +} + +static int uct_rocm_copy_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uct_rocm_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_copy_iface_t); + uct_rocm_copy_iface_addr_t *addr = (uct_rocm_copy_iface_addr_t*)iface_addr; + + return (addr != NULL) && (iface->id == *addr); +} + +static ucs_status_t uct_rocm_copy_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_rocm_copy_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_copy_iface_t); + + uct_base_iface_query(&iface->super, iface_attr); + + iface_attr->iface_addr_len = sizeof(uct_rocm_copy_iface_addr_t); + iface_attr->device_addr_len = 0; + iface_attr->ep_addr_len = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_GET_SHORT | + UCT_IFACE_FLAG_PUT_SHORT | + UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_PENDING; + + iface_attr->cap.put.max_short = UINT_MAX; + iface_attr->cap.put.max_bcopy = 0; + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = SIZE_MAX; + iface_attr->cap.put.opt_zcopy_align = 1; + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = 1; + + iface_attr->cap.get.max_short = UINT_MAX; + iface_attr->cap.get.max_bcopy = 0; + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = SIZE_MAX; + iface_attr->cap.get.opt_zcopy_align = 1; + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = 1; + + iface_attr->cap.am.max_short = 0; + iface_attr->cap.am.max_bcopy = 0; + iface_attr->cap.am.min_zcopy = 0; + iface_attr->cap.am.max_zcopy = 0; + iface_attr->cap.am.opt_zcopy_align = 1; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + iface_attr->cap.am.max_hdr = 0; + iface_attr->cap.am.max_iov = 1; + + iface_attr->latency.overhead = 10e-6; /* 10 us */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = 6911 * 1024.0 * 1024.0; + iface_attr->bandwidth.shared = 0; + iface_attr->overhead = 0; + iface_attr->priority = 0; + + return UCS_OK; +} + +static uct_iface_ops_t uct_rocm_copy_iface_ops = { + .ep_get_short = uct_rocm_copy_ep_get_short, + .ep_put_short = uct_rocm_copy_ep_put_short, + .ep_get_zcopy = uct_rocm_copy_ep_get_zcopy, + .ep_put_zcopy = uct_rocm_copy_ep_put_zcopy, + .ep_pending_add = ucs_empty_function_return_busy, + .ep_pending_purge = ucs_empty_function, + .ep_flush = uct_base_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_rocm_copy_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_rocm_copy_ep_t), + .iface_flush = uct_base_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = ucs_empty_function_return_zero, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_rocm_copy_iface_t), + .iface_query = uct_rocm_copy_iface_query, + .iface_get_device_address = (uct_iface_get_device_address_func_t)ucs_empty_function_return_success, + .iface_get_address = uct_rocm_copy_iface_get_address, + .iface_is_reachable = uct_rocm_copy_iface_is_reachable, +}; + +static UCS_CLASS_INIT_FUNC(uct_rocm_copy_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_rocm_copy_iface_ops, md, worker, + params, tl_config UCS_STATS_ARG(params->stats_root) + UCS_STATS_ARG(UCT_ROCM_COPY_TL_NAME)); + + self->id = ucs_generate_uuid((uintptr_t)self); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rocm_copy_iface_t) +{ + +} + +UCS_CLASS_DEFINE(uct_rocm_copy_iface_t, uct_base_iface_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_rocm_copy_iface_t, uct_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t*); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_rocm_copy_iface_t, uct_iface_t); + +UCT_TL_DEFINE(&uct_rocm_copy_component, rocm_copy, + uct_rocm_base_query_devices, uct_rocm_copy_iface_t, + "ROCM_COPY_", uct_rocm_copy_iface_config_table, + uct_rocm_copy_iface_config_t); diff --git a/src/uct/rocm/copy/rocm_copy_iface.h b/src/uct/rocm/copy/rocm_copy_iface.h new file mode 100644 index 0000000..e1b4f06 --- /dev/null +++ b/src/uct/rocm/copy/rocm_copy_iface.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_ROCM_COPY_IFACE_H +#define UCT_ROCM_COPY_IFACE_H + +#include + +#define UCT_ROCM_COPY_TL_NAME "rocm_cpy" + +typedef uint64_t uct_rocm_copy_iface_addr_t; + +typedef struct uct_rocm_copy_iface { + uct_base_iface_t super; + uct_rocm_copy_iface_addr_t id; +} uct_rocm_copy_iface_t; + +typedef struct uct_rocm_copy_iface_config { + uct_iface_config_t super; +} uct_rocm_copy_iface_config_t; + +#endif diff --git a/src/uct/rocm/copy/rocm_copy_md.c b/src/uct/rocm/copy/rocm_copy_md.c new file mode 100644 index 0000000..155189a --- /dev/null +++ b/src/uct/rocm/copy/rocm_copy_md.c @@ -0,0 +1,154 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_copy_md.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include + +static ucs_config_field_t uct_rocm_copy_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_rocm_copy_md_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {NULL} +}; + +static ucs_status_t uct_rocm_copy_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->cap.flags = UCT_MD_FLAG_REG; + md_attr->cap.reg_mem_types = UCS_BIT(UCS_MEMORY_TYPE_HOST); + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_ROCM; + md_attr->cap.detect_mem_types = UCS_BIT(UCS_MEMORY_TYPE_ROCM) | + UCS_BIT(UCS_MEMORY_TYPE_ROCM_MANAGED); + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = ULONG_MAX; + md_attr->rkey_packed_size = 0; + md_attr->reg_cost.overhead = 0; + md_attr->reg_cost.growth = 0; + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t uct_rocm_copy_mkey_pack(uct_md_h md, uct_mem_h memh, + void *rkey_buffer) +{ + return UCS_OK; +} + +static ucs_status_t uct_rocm_copy_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + *rkey_p = 0xdeadbeef; + *handle_p = NULL; + return UCS_OK; +} + +static ucs_status_t uct_rocm_copy_rkey_release(uct_component_t *component, + uct_rkey_t rkey, void *handle) +{ + return UCS_OK; +} + +static ucs_status_t uct_rocm_copy_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + hsa_status_t status; + void *lock_addr; + + if(address == NULL) { + *memh_p = address; + return UCS_OK; + } + + status = hsa_amd_memory_lock(address, length, NULL, 0, &lock_addr); + if (status != HSA_STATUS_SUCCESS) { + return UCS_ERR_IO_ERROR; + } + + *memh_p = address; + return UCS_OK; +} + +static ucs_status_t uct_rocm_copy_mem_dereg(uct_md_h md, uct_mem_h memh) +{ + void *address = (void *)memh; + hsa_status_t status; + + if (address == NULL) { + return UCS_OK; + } + + status = hsa_amd_memory_unlock(address); + if (status != HSA_STATUS_SUCCESS) { + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +static void uct_rocm_copy_md_close(uct_md_h uct_md) { + uct_rocm_copy_md_t *md = ucs_derived_of(uct_md, uct_rocm_copy_md_t); + + ucs_free(md); +} + +static uct_md_ops_t md_ops = { + .close = uct_rocm_copy_md_close, + .query = uct_rocm_copy_md_query, + .mkey_pack = uct_rocm_copy_mkey_pack, + .mem_reg = uct_rocm_copy_mem_reg, + .mem_dereg = uct_rocm_copy_mem_dereg, + .detect_memory_type = uct_rocm_base_detect_memory_type +}; + +static ucs_status_t +uct_rocm_copy_md_open(uct_component_h component, const char *md_name, + const uct_md_config_t *md_config, uct_md_h *md_p) +{ + uct_rocm_copy_md_t *md; + + md = ucs_malloc(sizeof(uct_rocm_copy_md_t), "uct_rocm_copy_md_t"); + if (NULL == md) { + ucs_error("Failed to allocate memory for uct_rocm_copy_md_t"); + return UCS_ERR_NO_MEMORY; + } + + md->super.ops = &md_ops; + md->super.component = &uct_rocm_copy_component; + + *md_p = (uct_md_h) md; + return UCS_OK; +} + +uct_component_t uct_rocm_copy_component = { + .query_md_resources = uct_rocm_base_query_md_resources, + .md_open = uct_rocm_copy_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_rocm_copy_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = uct_rocm_copy_rkey_release, + .name = "rocm_cpy", + .md_config = { + .name = "ROCm-copy memory domain", + .prefix = "ROCM_COPY_", + .table = uct_rocm_copy_md_config_table, + .size = sizeof(uct_rocm_copy_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_rocm_copy_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_rocm_copy_component); + diff --git a/src/uct/rocm/copy/rocm_copy_md.h b/src/uct/rocm/copy/rocm_copy_md.h new file mode 100644 index 0000000..642d202 --- /dev/null +++ b/src/uct/rocm/copy/rocm_copy_md.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_ROCM_COPY_MD_H +#define UCT_ROCM_COPY_MD_H + +#include + + +extern uct_component_t uct_rocm_copy_component; + +typedef struct uct_rocm_copy_md { + struct uct_md super; +} uct_rocm_copy_md_t; + +typedef struct uct_rocm_copy_md_config { + uct_md_config_t super; +} uct_rocm_copy_md_config_t; + +#endif diff --git a/src/uct/rocm/gdr/Makefile.am b/src/uct/rocm/gdr/Makefile.am new file mode 100644 index 0000000..c491f04 --- /dev/null +++ b/src/uct/rocm/gdr/Makefile.am @@ -0,0 +1,29 @@ +# +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_ROCM +if HAVE_GDR_COPY + +module_LTLIBRARIES = libuct_rocm_gdr.la +libuct_rocm_gdr_la_CPPFLAGS = $(BASE_CPPFLAGS) $(ROCM_CPPFLAGS) $(GDR_COPY_CPPFLAGS) +libuct_rocm_gdr_la_CFLAGS = $(BASE_CFLAGS) +libuct_rocm_gdr_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/rocm/libuct_rocm.la +libuct_rocm_gdr_la_LDFLAGS = $(ROCM_LDFLAGS) $(GDR_COPY_LDFLAGS) -version-info $(SOVERSION) + +noinst_HEADERS = \ + rocm_gdr_md.h \ + rocm_gdr_iface.h \ + rocm_gdr_ep.h + +libuct_rocm_gdr_la_SOURCES = \ + rocm_gdr_md.c \ + rocm_gdr_iface.c \ + rocm_gdr_ep.c + +include $(top_srcdir)/config/module.am + +endif +endif diff --git a/src/uct/rocm/gdr/Makefile.in b/src/uct/rocm/gdr/Makefile.in new file mode 100644 index 0000000..b093cd1 --- /dev/null +++ b/src/uct/rocm/gdr/Makefile.in @@ -0,0 +1,887 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/rocm/gdr +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@libuct_rocm_gdr_la_DEPENDENCIES = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ $(top_builddir)/src/uct/rocm/libuct_rocm.la +am__libuct_rocm_gdr_la_SOURCES_DIST = rocm_gdr_md.c rocm_gdr_iface.c \ + rocm_gdr_ep.c +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@am_libuct_rocm_gdr_la_OBJECTS = libuct_rocm_gdr_la-rocm_gdr_md.lo \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ libuct_rocm_gdr_la-rocm_gdr_iface.lo \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ libuct_rocm_gdr_la-rocm_gdr_ep.lo +libuct_rocm_gdr_la_OBJECTS = $(am_libuct_rocm_gdr_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_rocm_gdr_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libuct_rocm_gdr_la_CFLAGS) $(CFLAGS) \ + $(libuct_rocm_gdr_la_LDFLAGS) $(LDFLAGS) -o $@ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@am_libuct_rocm_gdr_la_rpath = \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_ep.Plo \ + ./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_iface.Plo \ + ./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_md.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_rocm_gdr_la_SOURCES) +DIST_SOURCES = $(am__libuct_rocm_gdr_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = rocm_gdr_md.h rocm_gdr_iface.h rocm_gdr_ep.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@module_LTLIBRARIES = libuct_rocm_gdr.la +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@libuct_rocm_gdr_la_CPPFLAGS = $(BASE_CPPFLAGS) $(ROCM_CPPFLAGS) $(GDR_COPY_CPPFLAGS) +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@libuct_rocm_gdr_la_CFLAGS = $(BASE_CFLAGS) +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@libuct_rocm_gdr_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ $(top_builddir)/src/uct/rocm/libuct_rocm.la + +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@libuct_rocm_gdr_la_LDFLAGS = $(ROCM_LDFLAGS) $(GDR_COPY_LDFLAGS) -version-info $(SOVERSION) +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@noinst_HEADERS = \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ rocm_gdr_md.h \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ rocm_gdr_iface.h \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ rocm_gdr_ep.h + +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@libuct_rocm_gdr_la_SOURCES = \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ rocm_gdr_md.c \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ rocm_gdr_iface.c \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ rocm_gdr_ep.c + + +# Automake silent rules +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@AM_V_LN_1 = true +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/rocm/gdr/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/rocm/gdr/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libuct_rocm_gdr.la: $(libuct_rocm_gdr_la_OBJECTS) $(libuct_rocm_gdr_la_DEPENDENCIES) $(EXTRA_libuct_rocm_gdr_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_rocm_gdr_la_LINK) $(am_libuct_rocm_gdr_la_rpath) $(libuct_rocm_gdr_la_OBJECTS) $(libuct_rocm_gdr_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_md.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libuct_rocm_gdr_la-rocm_gdr_md.lo: rocm_gdr_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_gdr_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_gdr_la_CFLAGS) $(CFLAGS) -MT libuct_rocm_gdr_la-rocm_gdr_md.lo -MD -MP -MF $(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_md.Tpo -c -o libuct_rocm_gdr_la-rocm_gdr_md.lo `test -f 'rocm_gdr_md.c' || echo '$(srcdir)/'`rocm_gdr_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_md.Tpo $(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rocm_gdr_md.c' object='libuct_rocm_gdr_la-rocm_gdr_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_gdr_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_gdr_la_CFLAGS) $(CFLAGS) -c -o libuct_rocm_gdr_la-rocm_gdr_md.lo `test -f 'rocm_gdr_md.c' || echo '$(srcdir)/'`rocm_gdr_md.c + +libuct_rocm_gdr_la-rocm_gdr_iface.lo: rocm_gdr_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_gdr_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_gdr_la_CFLAGS) $(CFLAGS) -MT libuct_rocm_gdr_la-rocm_gdr_iface.lo -MD -MP -MF $(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_iface.Tpo -c -o libuct_rocm_gdr_la-rocm_gdr_iface.lo `test -f 'rocm_gdr_iface.c' || echo '$(srcdir)/'`rocm_gdr_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_iface.Tpo $(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rocm_gdr_iface.c' object='libuct_rocm_gdr_la-rocm_gdr_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_gdr_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_gdr_la_CFLAGS) $(CFLAGS) -c -o libuct_rocm_gdr_la-rocm_gdr_iface.lo `test -f 'rocm_gdr_iface.c' || echo '$(srcdir)/'`rocm_gdr_iface.c + +libuct_rocm_gdr_la-rocm_gdr_ep.lo: rocm_gdr_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_gdr_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_gdr_la_CFLAGS) $(CFLAGS) -MT libuct_rocm_gdr_la-rocm_gdr_ep.lo -MD -MP -MF $(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_ep.Tpo -c -o libuct_rocm_gdr_la-rocm_gdr_ep.lo `test -f 'rocm_gdr_ep.c' || echo '$(srcdir)/'`rocm_gdr_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_ep.Tpo $(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rocm_gdr_ep.c' object='libuct_rocm_gdr_la-rocm_gdr_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_rocm_gdr_la_CPPFLAGS) $(CPPFLAGS) $(libuct_rocm_gdr_la_CFLAGS) $(CFLAGS) -c -o libuct_rocm_gdr_la-rocm_gdr_ep.lo `test -f 'rocm_gdr_ep.c' || echo '$(srcdir)/'`rocm_gdr_ep.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_GDR_COPY_FALSE@all-local: +@HAVE_ROCM_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_ep.Plo + -rm -f ./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_iface.Plo + -rm -f ./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_md.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_ep.Plo + -rm -f ./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_iface.Plo + -rm -f ./$(DEPDIR)/libuct_rocm_gdr_la-rocm_gdr_md.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ done +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_GDR_COPY_TRUE@@HAVE_ROCM_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/rocm/gdr/configure.m4 b/src/uct/rocm/gdr/configure.m4 new file mode 100644 index 0000000..23c32b4 --- /dev/null +++ b/src/uct/rocm/gdr/configure.m4 @@ -0,0 +1,10 @@ +# +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +UCX_CHECK_GDRCOPY + +AS_IF([test "x$gdrcopy_happy" = "xyes" && test "x$rocm_happy" = "xyes"], + [uct_rocm_modules="${uct_rocm_modules}:gdr"]) +AC_CONFIG_FILES([src/uct/rocm/gdr/Makefile]) diff --git a/src/uct/rocm/gdr/rocm_gdr_ep.c b/src/uct/rocm/gdr/rocm_gdr_ep.c new file mode 100644 index 0000000..dbf2b1c --- /dev/null +++ b/src/uct/rocm/gdr/rocm_gdr_ep.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_gdr_ep.h" +#include "rocm_gdr_iface.h" + +#include +#include +#include + +#include + +static UCS_CLASS_INIT_FUNC(uct_rocm_gdr_ep_t, const uct_ep_params_t *params) +{ + uct_rocm_gdr_iface_t *iface = ucs_derived_of(params->iface, uct_rocm_gdr_iface_t); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rocm_gdr_ep_t) +{ +} + +UCS_CLASS_DEFINE(uct_rocm_gdr_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_rocm_gdr_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_rocm_gdr_ep_t, uct_ep_t); + +#define uct_rocm_gdr_trace_data(_remote_addr, _rkey, _fmt, ...) \ + ucs_trace_data(_fmt " to %"PRIx64"(%+ld)", ## __VA_ARGS__, (_remote_addr), \ + (_rkey)) + +ucs_status_t uct_rocm_gdr_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + int ret; + + if (ucs_likely(length)) { + ret = gdr_copy_to_bar((void *)remote_addr, buffer, length); + if (ret) { + ucs_error("gdr_copy_to_bar failed. ret:%d", ret); + return UCS_ERR_IO_ERROR; + } + } + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); + ucs_trace_data("PUT_SHORT size %d from %p to %p", + length, buffer, (void *)remote_addr); + return UCS_OK; +} + +ucs_status_t uct_rocm_gdr_ep_get_short(uct_ep_h tl_ep, void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + int ret; + + if (ucs_likely(length)) { + ret = gdr_copy_from_bar(buffer, (void *)remote_addr, length); + if (ret) { + ucs_error("gdr_copy_from_bar failed. ret:%d", ret); + return UCS_ERR_IO_ERROR; + } + } + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, SHORT, length); + ucs_trace_data("GET_SHORT size %d from %p to %p", + length, (void *)remote_addr, buffer); + return UCS_OK; +} diff --git a/src/uct/rocm/gdr/rocm_gdr_ep.h b/src/uct/rocm/gdr/rocm_gdr_ep.h new file mode 100644 index 0000000..fbff16b --- /dev/null +++ b/src/uct/rocm/gdr/rocm_gdr_ep.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_ROCM_GDR_EP_H +#define UCT_ROCM_GDR_EP_H + +#include +#include +#include + + +typedef struct uct_rocm_gdr_ep_addr { + int ep_id; +} uct_rocm_gdr_ep_addr_t; + +typedef struct uct_rocm_gdr_ep { + uct_base_ep_t super; + struct uct_rocm_gdr_ep *next; +} uct_rocm_gdr_ep_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_rocm_gdr_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_rocm_gdr_ep_t, uct_ep_t); + +ucs_status_t uct_rocm_gdr_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +ucs_status_t uct_rocm_gdr_ep_get_short(uct_ep_h tl_ep, void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); + +#endif diff --git a/src/uct/rocm/gdr/rocm_gdr_iface.c b/src/uct/rocm/gdr/rocm_gdr_iface.c new file mode 100644 index 0000000..8095a1b --- /dev/null +++ b/src/uct/rocm/gdr/rocm_gdr_iface.c @@ -0,0 +1,142 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_gdr_iface.h" +#include "rocm_gdr_md.h" +#include "rocm_gdr_ep.h" + +#include +#include +#include + + +static ucs_config_field_t uct_rocm_gdr_iface_config_table[] = { + + {"", "", NULL, + ucs_offsetof(uct_rocm_gdr_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {NULL} +}; + +/* Forward declaration for the delete function */ +static void UCS_CLASS_DELETE_FUNC_NAME(uct_rocm_gdr_iface_t)(uct_iface_t*); + + +static ucs_status_t uct_rocm_gdr_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *iface_addr) +{ + uct_rocm_gdr_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_gdr_iface_t); + + *(uct_rocm_gdr_iface_addr_t*)iface_addr = iface->id; + return UCS_OK; +} + +static int uct_rocm_gdr_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uct_rocm_gdr_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_gdr_iface_t); + uct_rocm_gdr_iface_addr_t *addr = (uct_rocm_gdr_iface_addr_t*)iface_addr; + + return (addr != NULL) && (iface->id == *addr); +} + +static ucs_status_t uct_rocm_gdr_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_rocm_gdr_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_gdr_iface_t); + + uct_base_iface_query(&iface->super, iface_attr); + + iface_attr->iface_addr_len = sizeof(uct_rocm_gdr_iface_addr_t); + iface_attr->device_addr_len = 0; + iface_attr->ep_addr_len = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_GET_SHORT | + UCT_IFACE_FLAG_PUT_SHORT; + + iface_attr->cap.put.max_short = UINT_MAX; + iface_attr->cap.put.max_bcopy = 0; + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = 0; + iface_attr->cap.put.opt_zcopy_align = 1; + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = 1; + + iface_attr->cap.get.max_short = UINT_MAX; + iface_attr->cap.get.max_bcopy = 0; + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = 0; + iface_attr->cap.get.opt_zcopy_align = 1; + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = 1; + + iface_attr->cap.am.max_short = 0; + iface_attr->cap.am.max_bcopy = 0; + iface_attr->cap.am.min_zcopy = 0; + iface_attr->cap.am.max_zcopy = 0; + iface_attr->cap.am.opt_zcopy_align = 1; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + iface_attr->cap.am.max_hdr = 0; + iface_attr->cap.am.max_iov = 1; + + iface_attr->latency.overhead = 1e-6; /* 1 us */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = 0; + iface_attr->bandwidth.shared = 6911 * 1024.0 * 1024.0; + iface_attr->overhead = 0; + iface_attr->priority = 0; + + return UCS_OK; +} + +static uct_iface_ops_t uct_rocm_gdr_iface_ops = { + .ep_get_short = uct_rocm_gdr_ep_get_short, + .ep_put_short = uct_rocm_gdr_ep_put_short, + .ep_pending_add = ucs_empty_function_return_busy, + .ep_pending_purge = ucs_empty_function, + .ep_flush = uct_base_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_rocm_gdr_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_rocm_gdr_ep_t), + .iface_flush = uct_base_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = ucs_empty_function_return_zero, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_rocm_gdr_iface_t), + .iface_query = uct_rocm_gdr_iface_query, + .iface_get_device_address = (uct_iface_get_device_address_func_t)ucs_empty_function_return_success, + .iface_get_address = uct_rocm_gdr_iface_get_address, + .iface_is_reachable = uct_rocm_gdr_iface_is_reachable, +}; + +static UCS_CLASS_INIT_FUNC(uct_rocm_gdr_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_rocm_gdr_iface_ops, md, worker, + params, tl_config UCS_STATS_ARG(params->stats_root) + UCS_STATS_ARG(UCT_ROCM_GDR_TL_NAME)); + + self->id = ucs_generate_uuid((uintptr_t)self); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rocm_gdr_iface_t) +{ + +} + +UCS_CLASS_DEFINE(uct_rocm_gdr_iface_t, uct_base_iface_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_rocm_gdr_iface_t, uct_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t*); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_rocm_gdr_iface_t, uct_iface_t); + +UCT_TL_DEFINE(&uct_rocm_gdr_component, rocm_gdr, uct_rocm_base_query_devices, + uct_rocm_gdr_iface_t, "ROCM_GDR_", + uct_rocm_gdr_iface_config_table, uct_rocm_gdr_iface_config_t); diff --git a/src/uct/rocm/gdr/rocm_gdr_iface.h b/src/uct/rocm/gdr/rocm_gdr_iface.h new file mode 100644 index 0000000..1e57e93 --- /dev/null +++ b/src/uct/rocm/gdr/rocm_gdr_iface.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_ROCM_GDR_IFACE_H +#define UCT_ROCM_GDR_IFACE_H + +#include + +#define UCT_ROCM_GDR_TL_NAME "rocm_gdr" + +typedef uint64_t uct_rocm_gdr_iface_addr_t; + +typedef struct uct_rocm_gdr_iface { + uct_base_iface_t super; + uct_rocm_gdr_iface_addr_t id; +} uct_rocm_gdr_iface_t; + +typedef struct uct_rocm_gdr_iface_config { + uct_iface_config_t super; +} uct_rocm_gdr_iface_config_t; + +#endif diff --git a/src/uct/rocm/gdr/rocm_gdr_md.c b/src/uct/rocm/gdr/rocm_gdr_md.c new file mode 100644 index 0000000..87a09b5 --- /dev/null +++ b/src/uct/rocm/gdr/rocm_gdr_md.c @@ -0,0 +1,157 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_gdr_md.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include + +static ucs_config_field_t uct_rocm_gdr_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_rocm_gdr_md_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {NULL} +}; + +static ucs_status_t uct_rocm_gdr_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->cap.flags = UCT_MD_FLAG_REG | + UCT_MD_FLAG_NEED_RKEY; + md_attr->cap.reg_mem_types = UCS_BIT(UCS_MEMORY_TYPE_ROCM); + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_ROCM; + md_attr->cap.detect_mem_types = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = ULONG_MAX; + md_attr->rkey_packed_size = sizeof(uct_rocm_gdr_key_t); + md_attr->reg_cost.overhead = 0; + md_attr->reg_cost.growth = 0; + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t uct_rocm_gdr_mkey_pack(uct_md_h md, uct_mem_h memh, + void *rkey_buffer) +{ + uct_rocm_gdr_key_t *packed = (uct_rocm_gdr_key_t *)rkey_buffer; + //uct_rocm_gdr_mem_t *mem_hndl = (uct_rocm_gdr_mem_t *)memh; + packed->dummy = 0; + return UCS_OK; +} + +static ucs_status_t uct_rocm_gdr_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + //uct_rocm_gdr_key_t *packed = (uct_rocm_gdr_key_t *)rkey_buffer; + uct_rocm_gdr_key_t *key; + + key = ucs_malloc(sizeof(uct_rocm_gdr_key_t), "uct_rocm_gdr_key_t"); + if (NULL == key) { + ucs_error("failed to allocate memory for uct_rocm_gdr_key_t"); + return UCS_ERR_NO_MEMORY; + } + + key->dummy = 0; + + *handle_p = NULL; + *rkey_p = (uintptr_t)key; + + return UCS_OK; +} + +static ucs_status_t uct_rocm_gdr_rkey_release(uct_component_t *component, + uct_rkey_t rkey, void *handle) +{ + ucs_assert(NULL == handle); + ucs_free((void *)rkey); + return UCS_OK; +} + +static ucs_status_t uct_rocm_gdr_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + uct_rocm_gdr_mem_t *mem_hndl = NULL; + + mem_hndl = ucs_malloc(sizeof(uct_rocm_gdr_mem_t), "rocm_gdr handle"); + if (NULL == mem_hndl) { + ucs_error("failed to allocate memory for rocm_gdr_mem_t"); + return UCS_ERR_NO_MEMORY; + } + + *memh_p = mem_hndl; + return UCS_OK; +} + +static ucs_status_t uct_rocm_gdr_mem_dereg(uct_md_h md, uct_mem_h memh) +{ + uct_rocm_gdr_mem_t *mem_hndl = memh; + + ucs_free(mem_hndl); + return UCS_OK; +} + +static void uct_rocm_gdr_md_close(uct_md_h uct_md) { + uct_rocm_gdr_md_t *md = ucs_derived_of(uct_md, uct_rocm_gdr_md_t); + + ucs_free(md); +} + +static uct_md_ops_t md_ops = { + .close = uct_rocm_gdr_md_close, + .query = uct_rocm_gdr_md_query, + .mkey_pack = uct_rocm_gdr_mkey_pack, + .mem_reg = uct_rocm_gdr_mem_reg, + .mem_dereg = uct_rocm_gdr_mem_dereg, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + +static ucs_status_t +uct_rocm_gdr_md_open(uct_component_h component, const char *md_name, + const uct_md_config_t *md_config, uct_md_h *md_p) +{ + uct_rocm_gdr_md_t *md; + + md = ucs_malloc(sizeof(uct_rocm_gdr_md_t), "uct_rocm_gdr_md_t"); + if (NULL == md) { + ucs_error("Failed to allocate memory for uct_rocm_gdr_md_t"); + return UCS_ERR_NO_MEMORY; + } + + md->super.ops = &md_ops; + md->super.component = &uct_rocm_gdr_component; + + *md_p = (uct_md_h) md; + return UCS_OK; +} + +uct_component_t uct_rocm_gdr_component = { + .query_md_resources = uct_md_query_single_md_resource, + .md_open = uct_rocm_gdr_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_rocm_gdr_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = uct_rocm_gdr_rkey_release, + .name = "rocm_gdr", + .md_config = { + .name = "ROCm-gdr memory domain", + .prefix = "ROCM_GDR_", + .table = uct_rocm_gdr_md_config_table, + .size = sizeof(uct_rocm_gdr_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_rocm_gdr_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_rocm_gdr_component); + diff --git a/src/uct/rocm/gdr/rocm_gdr_md.h b/src/uct/rocm/gdr/rocm_gdr_md.h new file mode 100644 index 0000000..f91faca --- /dev/null +++ b/src/uct/rocm/gdr/rocm_gdr_md.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_ROCM_GDR_MD_H +#define UCT_ROCM_GDR_MD_H + +#include + + +extern uct_component_t uct_rocm_gdr_component; + +typedef struct uct_rocm_gdr_md { + struct uct_md super; +} uct_rocm_gdr_md_t; + +typedef struct uct_rocm_gdr_md_config { + uct_md_config_t super; +} uct_rocm_gdr_md_config_t; + +typedef struct uct_rocm_gdr_mem { + int dummy; +} uct_rocm_gdr_mem_t; + +typedef struct uct_rocm_gdr_key { + int dummy; +} uct_rocm_gdr_key_t; + + +#endif diff --git a/src/uct/rocm/ipc/rocm_ipc_cache.c b/src/uct/rocm/ipc/rocm_ipc_cache.c new file mode 100644 index 0000000..cacc39f --- /dev/null +++ b/src/uct/rocm/ipc/rocm_ipc_cache.c @@ -0,0 +1,239 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "rocm_ipc_cache.h" + +#include +#include +#include +#include +#include + +static ucs_pgt_dir_t *uct_rocm_ipc_cache_pgt_dir_alloc(const ucs_pgtable_t *pgtable) +{ + void *ptr; + int ret; + + ret = ucs_posix_memalign(&ptr, + ucs_max(sizeof(void *), UCS_PGT_ENTRY_MIN_ALIGN), + sizeof(ucs_pgt_dir_t), "rocm_ipc_cache_pgdir"); + return (ret == 0) ? ptr : NULL; +} + +static void uct_rocm_ipc_cache_pgt_dir_release(const ucs_pgtable_t *pgtable, + ucs_pgt_dir_t *dir) +{ + ucs_free(dir); +} + +static void +uct_rocm_ipc_cache_region_collect_callback(const ucs_pgtable_t *pgtable, + ucs_pgt_region_t *pgt_region, + void *arg) +{ + ucs_list_link_t *list = arg; + uct_rocm_ipc_cache_region_t *region; + + region = ucs_derived_of(pgt_region, uct_rocm_ipc_cache_region_t); + ucs_list_add_tail(list, ®ion->list); +} + +static void uct_rocm_ipc_cache_purge(uct_rocm_ipc_cache_t *cache) +{ + uct_rocm_ipc_cache_region_t *region, *tmp; + ucs_list_link_t region_list; + + ucs_list_head_init(®ion_list); + ucs_pgtable_purge(&cache->pgtable, uct_rocm_ipc_cache_region_collect_callback, + ®ion_list); + + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + if (hsa_amd_ipc_memory_detach(region->mapped_addr) != HSA_STATUS_SUCCESS) { + ucs_fatal("failed to unmap addr:%p", region->mapped_addr); + } + + ucs_free(region); + } + + ucs_trace("%s: rocm ipc cache purged", cache->name); +} + +static void uct_rocm_ipc_cache_invalidate_regions(uct_rocm_ipc_cache_t *cache, + void *from, void *to) +{ + ucs_list_link_t region_list; + ucs_status_t status; + uct_rocm_ipc_cache_region_t *region, *tmp; + + ucs_list_head_init(®ion_list); + ucs_pgtable_search_range(&cache->pgtable, (ucs_pgt_addr_t)from, + (ucs_pgt_addr_t)to, + uct_rocm_ipc_cache_region_collect_callback, + ®ion_list); + ucs_list_for_each_safe(region, tmp, ®ion_list, list) { + status = ucs_pgtable_remove(&cache->pgtable, ®ion->super); + if (status != UCS_OK) { + ucs_error("failed to remove address:%p from cache (%s)", + (void *)region->key.address, ucs_status_string(status)); + } + + if (hsa_amd_ipc_memory_detach(region->mapped_addr) != HSA_STATUS_SUCCESS) { + ucs_fatal("failed to unmap addr:%p", region->mapped_addr); + } + ucs_free(region); + } + ucs_trace("%s: closed memhandles in the range [%p..%p]", + cache->name, from, to); +} + +ucs_status_t uct_rocm_ipc_cache_map_memhandle(void *arg, uct_rocm_ipc_key_t *key, + void **mapped_addr) +{ + uct_rocm_ipc_cache_t *cache = (uct_rocm_ipc_cache_t *)arg; + ucs_status_t status; + ucs_pgt_region_t *pgt_region; + uct_rocm_ipc_cache_region_t *region; + hsa_status_t hsa_status; + int ret; + + pthread_rwlock_rdlock(&cache->lock); + pgt_region = UCS_PROFILE_CALL(ucs_pgtable_lookup, + &cache->pgtable, key->address); + if (ucs_likely(pgt_region != NULL)) { + region = ucs_derived_of(pgt_region, uct_rocm_ipc_cache_region_t); + if (memcmp(&key->ipc, ®ion->key.ipc, sizeof(key->ipc)) == 0) { + /*cache hit */ + ucs_trace("%s: rocm_ipc cache hit addr:%p size:%lu region:" + UCS_PGT_REGION_FMT, cache->name, (void *)key->address, + key->length, UCS_PGT_REGION_ARG(®ion->super)); + + *mapped_addr = region->mapped_addr; + pthread_rwlock_unlock(&cache->lock); + return UCS_OK; + } else { + ucs_trace("%s: rocm_ipc cache remove stale region:" + UCS_PGT_REGION_FMT " new_addr:%p new_size:%lu", + cache->name, UCS_PGT_REGION_ARG(®ion->super), + (void *)key->address, key->length); + + status = ucs_pgtable_remove(&cache->pgtable, ®ion->super); + if (status != UCS_OK) { + ucs_error("%s: failed to remove address:%p from cache", + cache->name, (void *)key->address); + goto err; + } + + if (hsa_amd_ipc_memory_detach(region->mapped_addr) != HSA_STATUS_SUCCESS) { + ucs_fatal("failed to unmap addr:%p", region->mapped_addr); + } + + ucs_free(region); + } + } + + hsa_status = hsa_amd_ipc_memory_attach(&key->ipc, key->length, 0, NULL, mapped_addr); + if (ucs_unlikely(hsa_status != HSA_STATUS_SUCCESS)) { + ucs_fatal("%s: failed to open ipc mem handle. addr:%p len:%lu", + cache->name, (void *)key->address, key->length); + } + + /*create new cache entry */ + ret = ucs_posix_memalign((void **)®ion, + ucs_max(sizeof(void *), UCS_PGT_ENTRY_MIN_ALIGN), + sizeof(uct_rocm_ipc_cache_region_t), + "uct_rocm_ipc_cache_region"); + if (ret != 0) { + ucs_warn("failed to allocate uct_rocm_ipc_cache region"); + status = UCS_ERR_NO_MEMORY; + goto err; + } + + region->super.start = ucs_align_down_pow2(key->address, UCS_PGT_ADDR_ALIGN); + region->super.end = ucs_align_up_pow2(key->address + key->length, UCS_PGT_ADDR_ALIGN); + region->key = *key; + region->mapped_addr = *mapped_addr; + + status = UCS_PROFILE_CALL(ucs_pgtable_insert, + &cache->pgtable, ®ion->super); + if (status == UCS_ERR_ALREADY_EXISTS) { + /* overlapped region means memory freed at source. remove and try insert */ + uct_rocm_ipc_cache_invalidate_regions(cache, + (void *)region->super.start, + (void *)region->super.end); + status = UCS_PROFILE_CALL(ucs_pgtable_insert, + &cache->pgtable, ®ion->super); + } + if (status != UCS_OK) { + + ucs_error("%s: failed to insert region:"UCS_PGT_REGION_FMT" size:%lu :%s", + cache->name, UCS_PGT_REGION_ARG(®ion->super), key->length, + ucs_status_string(status)); + ucs_free(region); + goto err; + } + + ucs_trace("%s: rocm_ipc cache new region:"UCS_PGT_REGION_FMT" size:%lu", + cache->name, UCS_PGT_REGION_ARG(®ion->super), key->length); + + pthread_rwlock_unlock(&cache->lock); + return UCS_OK; +err: + pthread_rwlock_unlock(&cache->lock); + return status; +} + +ucs_status_t uct_rocm_ipc_create_cache(uct_rocm_ipc_cache_t **cache, + const char *name) +{ + ucs_status_t status; + uct_rocm_ipc_cache_t *cache_desc; + int ret; + + cache_desc = ucs_malloc(sizeof(uct_rocm_ipc_cache_t), "uct_rocm_ipc_cache_t"); + if (cache_desc == NULL) { + ucs_error("failed to allocate memory for rocm_ipc cache"); + return UCS_ERR_NO_MEMORY; + } + + ret = pthread_rwlock_init(&cache_desc->lock, NULL); + if (ret) { + ucs_error("pthread_rwlock_init() failed: %m"); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + status = ucs_pgtable_init(&cache_desc->pgtable, + uct_rocm_ipc_cache_pgt_dir_alloc, + uct_rocm_ipc_cache_pgt_dir_release); + if (status != UCS_OK) { + goto err_destroy_rwlock; + } + + cache_desc->name = strdup(name); + if (cache_desc->name == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err_destroy_rwlock; + } + + *cache = cache_desc; + return UCS_OK; + +err_destroy_rwlock: + pthread_rwlock_destroy(&cache_desc->lock); +err: + free(cache_desc); + return status; +} + +void uct_rocm_ipc_destroy_cache(uct_rocm_ipc_cache_t *cache) +{ + uct_rocm_ipc_cache_purge(cache); + ucs_pgtable_cleanup(&cache->pgtable); + pthread_rwlock_destroy(&cache->lock); + free(cache->name); + ucs_free(cache); +} diff --git a/src/uct/rocm/ipc/rocm_ipc_cache.h b/src/uct/rocm/ipc/rocm_ipc_cache.h new file mode 100644 index 0000000..fefce99 --- /dev/null +++ b/src/uct/rocm/ipc/rocm_ipc_cache.h @@ -0,0 +1,36 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCT_ROCM_IPC_CACHE_H_ +#define UCT_ROCM_IPC_CACHE_H_ + +#include +#include +#include "rocm_ipc_md.h" + + +typedef struct uct_cuda_ipc_cache_region { + ucs_pgt_region_t super; /**< Base class - page table region */ + ucs_list_link_t list; /**< List element */ + uct_rocm_ipc_key_t key; /**< Remote memory key */ + void *mapped_addr; /**< Local mapped address */ +} uct_rocm_ipc_cache_region_t; + +typedef struct uct_rocm_ipc_cache { + pthread_rwlock_t lock; /**< protests the page table */ + ucs_pgtable_t pgtable; /**< Page table to hold the regions */ + char *name; /**< Name */ +} uct_rocm_ipc_cache_t; + +ucs_status_t uct_rocm_ipc_create_cache(uct_rocm_ipc_cache_t **cache, + const char *name); + +void uct_rocm_ipc_destroy_cache(uct_rocm_ipc_cache_t *cache); + +ucs_status_t uct_rocm_ipc_cache_map_memhandle(void *arg, uct_rocm_ipc_key_t *key, + void **mapped_addr); +#endif diff --git a/src/uct/rocm/ipc/rocm_ipc_ep.c b/src/uct/rocm/ipc/rocm_ipc_ep.c new file mode 100644 index 0000000..d4da78d --- /dev/null +++ b/src/uct/rocm/ipc/rocm_ipc_ep.c @@ -0,0 +1,156 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_ipc_ep.h" +#include "rocm_ipc_iface.h" +#include "rocm_ipc_md.h" + +#include + +static UCS_CLASS_INIT_FUNC(uct_rocm_ipc_ep_t, const uct_ep_params_t *params) +{ + uct_rocm_ipc_iface_t *iface = ucs_derived_of(params->iface, uct_rocm_ipc_iface_t); + char target_name[64]; + ucs_status_t status; + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); + + self->remote_pid = *(const pid_t*)params->iface_addr; + + snprintf(target_name, sizeof(target_name), "dest:%d", *(pid_t*)params->iface_addr); + status = uct_rocm_ipc_create_cache(&self->remote_memh_cache, target_name); + if (status != UCS_OK) { + ucs_error("could not create create rocm ipc cache: %s", + ucs_status_string(status)); + return status; + } + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_rocm_ipc_ep_t) +{ + uct_rocm_ipc_destroy_cache(self->remote_memh_cache); +} + +UCS_CLASS_DEFINE(uct_rocm_ipc_ep_t, uct_base_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_rocm_ipc_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_rocm_ipc_ep_t, uct_ep_t); + +#define uct_rocm_ipc_trace_data(_remote_addr, _rkey, _fmt, ...) \ + ucs_trace_data(_fmt " to %"PRIx64"(%+ld)", ## __VA_ARGS__, (_remote_addr), \ + (_rkey)) + +ucs_status_t uct_rocm_ipc_ep_zcopy(uct_ep_h tl_ep, + uint64_t remote_addr, + const uct_iov_t *iov, + uct_rocm_ipc_key_t *key, + uct_completion_t *comp, + int is_put) +{ + uct_rocm_ipc_ep_t *ep = ucs_derived_of(tl_ep, uct_rocm_ipc_ep_t); + hsa_status_t status; + hsa_agent_t local_agent; + size_t size = uct_iov_get_length(iov); + ucs_status_t ret = UCS_OK; + void *base_addr, *local_addr = iov->buffer; + uct_rocm_ipc_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_rocm_ipc_iface_t); + void *remote_base_addr, *remote_copy_addr; + void *dst_addr, *src_addr; + uct_rocm_ipc_signal_desc_t *rocm_ipc_signal; + + /* no data to deliver */ + if (!size) + return UCS_OK; + + if ((remote_addr < key->address) || + (remote_addr + size > key->address + key->length)) { + ucs_error("remote addr %lx/%lx out of range %lx/%lx", + remote_addr, size, key->address, key->length); + return UCS_ERR_INVALID_PARAM; + } + + status = uct_rocm_base_get_ptr_info(local_addr, size, &base_addr, + NULL, &local_agent); + if (status != HSA_STATUS_SUCCESS) { + ucs_error("local addr %p/%lx is not ROCM memory", local_addr, size); + return UCS_ERR_INVALID_ADDR; + } + + ret = uct_rocm_ipc_cache_map_memhandle((void *)ep->remote_memh_cache, key, + &remote_base_addr); + if (ret != UCS_OK) { + ucs_error("fail to attach ipc mem %p %d\n", (void *)key->address, ret); + return ret; + } + + remote_copy_addr = UCS_PTR_BYTE_OFFSET(remote_base_addr, + remote_addr - key->address); + if (is_put) { + dst_addr = remote_copy_addr; + src_addr = local_addr; + } + else { + dst_addr = local_addr; + src_addr = remote_copy_addr; + } + + rocm_ipc_signal = ucs_mpool_get(&iface->signal_pool); + hsa_signal_store_screlease(rocm_ipc_signal->signal, 1); + + status = hsa_amd_memory_async_copy(dst_addr, local_agent, + src_addr, local_agent, + size, 0, NULL, + rocm_ipc_signal->signal); + + if (status != HSA_STATUS_SUCCESS) { + ucs_error("copy error"); + ucs_mpool_put(rocm_ipc_signal); + return UCS_ERR_IO_ERROR; + } + + rocm_ipc_signal->comp = comp; + rocm_ipc_signal->mapped_addr = remote_base_addr; + ucs_queue_push(&iface->signal_queue, &rocm_ipc_signal->queue); + + ucs_trace("rocm async copy issued :%p remote:%p, local:%p len:%ld", + rocm_ipc_signal, (void *)remote_addr, local_addr, size); + + return UCS_INPROGRESS; +} + +ucs_status_t uct_rocm_ipc_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + ucs_status_t ret; + uct_rocm_ipc_key_t *key = (uct_rocm_ipc_key_t *)rkey; + + ret = uct_rocm_ipc_ep_zcopy(tl_ep, remote_addr, iov, key, comp, 1); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_rocm_ipc_trace_data(remote_addr, rkey, "PUT_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + + return ret; +} + +ucs_status_t uct_rocm_ipc_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + ucs_status_t ret; + uct_rocm_ipc_key_t *key = (uct_rocm_ipc_key_t *)rkey; + + ret = uct_rocm_ipc_ep_zcopy(tl_ep, remote_addr, iov, key, comp, 0); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_rocm_ipc_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + + return ret; +} diff --git a/src/uct/rocm/ipc/rocm_ipc_ep.h b/src/uct/rocm/ipc/rocm_ipc_ep.h new file mode 100644 index 0000000..0accb54 --- /dev/null +++ b/src/uct/rocm/ipc/rocm_ipc_ep.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef ROCM_IPC_EP_H +#define ROCM_IPC_EP_H + +#include +#include +#include + +#include "rocm_ipc_cache.h" + +typedef struct uct_rocm_ipc_ep { + uct_base_ep_t super; + pid_t remote_pid; + uct_rocm_ipc_cache_t *remote_memh_cache; +} uct_rocm_ipc_ep_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_rocm_ipc_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_rocm_ipc_ep_t, uct_ep_t); + +ucs_status_t uct_rocm_ipc_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +ucs_status_t uct_rocm_ipc_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +#endif diff --git a/src/uct/rocm/ipc/rocm_ipc_iface.c b/src/uct/rocm/ipc/rocm_ipc_iface.c new file mode 100644 index 0000000..faa84f3 --- /dev/null +++ b/src/uct/rocm/ipc/rocm_ipc_iface.c @@ -0,0 +1,243 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_ipc_iface.h" +#include "rocm_ipc_md.h" +#include "rocm_ipc_ep.h" + +#include +#include +#include +#include + + +static ucs_config_field_t uct_rocm_ipc_iface_config_table[] = { + + {"", "", NULL, + ucs_offsetof(uct_rocm_ipc_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {NULL} +}; + +static uint64_t uct_rocm_ipc_iface_node_guid(uct_base_iface_t *iface) +{ + return ucs_machine_guid() * + ucs_string_to_id(iface->md->component->name); +} + +ucs_status_t uct_rocm_ipc_iface_get_device_address(uct_iface_t *tl_iface, + uct_device_addr_t *addr) +{ + uct_base_iface_t *iface = ucs_derived_of(tl_iface, uct_base_iface_t); + + *(uint64_t*)addr = uct_rocm_ipc_iface_node_guid(iface); + return UCS_OK; +} + +static ucs_status_t uct_rocm_ipc_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *iface_addr) +{ + *(pid_t*)iface_addr = getpid(); + return UCS_OK; +} + +static int uct_rocm_ipc_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + uct_rocm_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_ipc_iface_t); + + return ((uct_rocm_ipc_iface_node_guid(&iface->super) == + *((const uint64_t *)dev_addr)) && ((getpid() != *(pid_t *)iface_addr))); +} + +static ucs_status_t uct_rocm_ipc_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_rocm_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_ipc_iface_t); + + uct_base_iface_query(&iface->super, iface_attr); + + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = SIZE_MAX; + iface_attr->cap.put.opt_zcopy_align = sizeof(uint32_t); + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = 1; + + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = SIZE_MAX; + iface_attr->cap.get.opt_zcopy_align = sizeof(uint32_t); + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = 1; + + iface_attr->iface_addr_len = sizeof(pid_t); + iface_attr->device_addr_len = sizeof(uint64_t); + iface_attr->ep_addr_len = 0; + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CONNECT_TO_IFACE; + + /* TODO: get accurate info */ + iface_attr->latency.overhead = 80e-9; /* 80 ns */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = 10240 * 1024.0 * 1024.0; /* 10240 MB*/ + iface_attr->bandwidth.shared = 0; + iface_attr->overhead = 0.4e-6; /* 0.4 us */ + + return UCS_OK; +} + +static UCS_CLASS_DECLARE_DELETE_FUNC(uct_rocm_ipc_iface_t, uct_iface_t); + +static ucs_status_t +uct_rocm_ipc_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + uct_rocm_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_ipc_iface_t); + + if (comp != NULL) { + return UCS_ERR_UNSUPPORTED; + } + + if (ucs_queue_is_empty(&iface->signal_queue)) { + UCT_TL_IFACE_STAT_FLUSH(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_OK; + } + + UCT_TL_IFACE_STAT_FLUSH_WAIT(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_INPROGRESS; +} + +static unsigned uct_rocm_ipc_iface_progress(uct_iface_h tl_iface) +{ + uct_rocm_ipc_iface_t *iface = ucs_derived_of(tl_iface, uct_rocm_ipc_iface_t); + static const unsigned max_signals = 16; + unsigned count = 0; + uct_rocm_ipc_signal_desc_t *rocm_ipc_signal; + ucs_queue_iter_t iter; + + ucs_queue_for_each_safe(rocm_ipc_signal, iter, &iface->signal_queue, queue) { + if (hsa_signal_load_scacquire(rocm_ipc_signal->signal) != 0) { + continue; + } + + ucs_queue_del_iter(&iface->signal_queue, iter); + if (rocm_ipc_signal->comp != NULL) { + uct_invoke_completion(rocm_ipc_signal->comp, UCS_OK); + } + + ucs_trace_poll("ROCM_IPC Signal Done :%p", rocm_ipc_signal); + ucs_mpool_put(rocm_ipc_signal); + count++; + + if (count >= max_signals) { + break; + } + } + + return count; +} + +static uct_iface_ops_t uct_rocm_ipc_iface_ops = { + .ep_put_zcopy = uct_rocm_ipc_ep_put_zcopy, + .ep_get_zcopy = uct_rocm_ipc_ep_get_zcopy, + .ep_pending_add = ucs_empty_function_return_busy, + .ep_pending_purge = ucs_empty_function, + .ep_flush = uct_base_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_rocm_ipc_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_rocm_ipc_ep_t), + .iface_flush = uct_rocm_ipc_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = uct_base_iface_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = uct_rocm_ipc_iface_progress, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_rocm_ipc_iface_t), + .iface_query = uct_rocm_ipc_iface_query, + .iface_get_address = uct_rocm_ipc_iface_get_address, + .iface_get_device_address = uct_rocm_ipc_iface_get_device_address, + .iface_is_reachable = uct_rocm_ipc_iface_is_reachable +}; + +static void uct_rocm_ipc_signal_desc_init(ucs_mpool_t *mp, void *obj, void *chunk) +{ + uct_rocm_ipc_signal_desc_t *base = (uct_rocm_ipc_signal_desc_t *)obj; + hsa_status_t status; + + memset(base, 0, sizeof(*base)); + status = hsa_signal_create(1, 0, NULL, &base->signal); + if (status != HSA_STATUS_SUCCESS) { + ucs_fatal("fail to create signal"); + } +} + +static void uct_rocm_ipc_signal_desc_cleanup(ucs_mpool_t *mp, void *obj) +{ + uct_rocm_ipc_signal_desc_t *base = (uct_rocm_ipc_signal_desc_t *)obj; + hsa_status_t status; + + status = hsa_signal_destroy(base->signal); + if (status != HSA_STATUS_SUCCESS) { + ucs_fatal("fail to destroy signal"); + } +} + +static ucs_mpool_ops_t uct_rocm_ipc_signal_desc_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = uct_rocm_ipc_signal_desc_init, + .obj_cleanup = uct_rocm_ipc_signal_desc_cleanup, +}; + +static UCS_CLASS_INIT_FUNC(uct_rocm_ipc_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + ucs_status_t status; + + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_rocm_ipc_iface_ops, md, worker, + params, tl_config UCS_STATS_ARG(params->stats_root) + UCS_STATS_ARG(UCT_ROCM_IPC_TL_NAME)); + + status = ucs_mpool_init(&self->signal_pool, + 0, + sizeof(uct_rocm_ipc_signal_desc_t), + 0, + UCS_SYS_CACHE_LINE_SIZE, + 128, + 1024, + &uct_rocm_ipc_signal_desc_mpool_ops, + "ROCM_IPC signal objects"); + if (status != UCS_OK) { + ucs_error("rocm/ipc signal mpool creation failed"); + return status; + } + + ucs_queue_head_init(&self->signal_queue); + + return UCS_OK; +} + + +static UCS_CLASS_CLEANUP_FUNC(uct_rocm_ipc_iface_t) +{ + uct_base_iface_progress_disable(&self->super.super, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + ucs_mpool_cleanup(&self->signal_pool, 1); +} + +UCS_CLASS_DEFINE(uct_rocm_ipc_iface_t, uct_base_iface_t); + +static UCS_CLASS_DEFINE_NEW_FUNC(uct_rocm_ipc_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t *); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_rocm_ipc_iface_t, uct_iface_t); + +UCT_TL_DEFINE(&uct_rocm_ipc_component, rocm_ipc, uct_rocm_base_query_devices, + uct_rocm_ipc_iface_t, "ROCM_IPC_", + uct_rocm_ipc_iface_config_table, uct_rocm_ipc_iface_config_t); diff --git a/src/uct/rocm/ipc/rocm_ipc_iface.h b/src/uct/rocm/ipc/rocm_ipc_iface.h new file mode 100644 index 0000000..3e0ba08 --- /dev/null +++ b/src/uct/rocm/ipc/rocm_ipc_iface.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + + +#ifndef ROCM_IPC_IFACE_H +#define ROCM_IPC_IFACE_H + +#include + +#include + +#define UCT_ROCM_IPC_TL_NAME "rocm_ipc" + +typedef struct uct_rocm_ipc_signal_desc { + hsa_signal_t signal; + void *mapped_addr; + uct_completion_t *comp; + ucs_queue_elem_t queue; +} uct_rocm_ipc_signal_desc_t; + +typedef struct uct_rocm_ipc_iface { + uct_base_iface_t super; + ucs_mpool_t signal_pool; + ucs_queue_head_t signal_queue; +} uct_rocm_ipc_iface_t; + +typedef struct uct_rocm_ipc_iface_config { + uct_iface_config_t super; +} uct_rocm_ipc_iface_config_t; + +#endif diff --git a/src/uct/rocm/ipc/rocm_ipc_md.c b/src/uct/rocm/ipc/rocm_ipc_md.c new file mode 100644 index 0000000..6b53907 --- /dev/null +++ b/src/uct/rocm/ipc/rocm_ipc_md.c @@ -0,0 +1,175 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "rocm_ipc_md.h" + +#include + + +static ucs_config_field_t uct_rocm_ipc_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_rocm_ipc_md_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {NULL} +}; + +static ucs_status_t uct_rocm_ipc_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->rkey_packed_size = sizeof(uct_rocm_ipc_key_t); + md_attr->cap.flags = UCT_MD_FLAG_REG | + UCT_MD_FLAG_NEED_RKEY; + md_attr->cap.reg_mem_types = UCS_BIT(UCS_MEMORY_TYPE_ROCM); + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_ROCM; + md_attr->cap.detect_mem_types = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = ULONG_MAX; + + /* TODO: get accurate number */ + md_attr->reg_cost.overhead = 9e-9; + md_attr->reg_cost.growth = 0; + + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t uct_rocm_ipc_mkey_pack(uct_md_h md, uct_mem_h memh, + void *rkey_buffer) +{ + uct_rocm_ipc_key_t *packed = (uct_rocm_ipc_key_t *) rkey_buffer; + uct_rocm_ipc_key_t *key = (uct_rocm_ipc_key_t *) memh; + + *packed = *key; + + return UCS_OK; +} + +static hsa_status_t uct_rocm_ipc_pack_key(void *address, size_t length, + uct_rocm_ipc_key_t *key) +{ + hsa_status_t status; + hsa_agent_t agent; + void *base_ptr; + size_t size; + + status = uct_rocm_base_get_ptr_info(address, length, &base_ptr, &size, &agent); + if (status != HSA_STATUS_SUCCESS) { + ucs_error("pack none ROCM ptr %p/%lx", address, length); + return status; + } + + status = hsa_amd_ipc_memory_create(base_ptr, size, &key->ipc); + if (status != HSA_STATUS_SUCCESS) { + ucs_error("Failed to create ipc for %p/%lx", address, length); + return status; + } + + key->address = (uintptr_t)base_ptr; + key->length = size; + key->dev_num = uct_rocm_base_get_dev_num(agent); + + return HSA_STATUS_SUCCESS; +} + +static ucs_status_t uct_rocm_ipc_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + uct_rocm_ipc_key_t *key; + hsa_status_t status; + + key = ucs_malloc(sizeof(*key), "uct_rocm_ipc_key_t"); + if (NULL == key) { + ucs_error("Failed to allocate memory for uct_rocm_ipc_key_t"); + return UCS_ERR_NO_MEMORY; + } + + status = uct_rocm_ipc_pack_key(address, length, key); + if (status != HSA_STATUS_SUCCESS) { + ucs_free(key); + return UCS_ERR_INVALID_ADDR; + } + + *memh_p = key; + + return UCS_OK; +} + +static ucs_status_t uct_rocm_ipc_mem_dereg(uct_md_h md, uct_mem_h memh) +{ + uct_rocm_ipc_key_t *key = (uct_rocm_ipc_key_t *)memh; + + ucs_free(key); + return UCS_OK; +} + +static ucs_status_t +uct_rocm_ipc_md_open(uct_component_h component, const char *md_name, + const uct_md_config_t *uct_md_config, uct_md_h *md_p) +{ + static uct_md_ops_t md_ops = { + .close = (uct_md_close_func_t)ucs_empty_function, + .query = uct_rocm_ipc_md_query, + .mkey_pack = uct_rocm_ipc_mkey_pack, + .mem_reg = uct_rocm_ipc_mem_reg, + .mem_dereg = uct_rocm_ipc_mem_dereg, + .detect_memory_type = ucs_empty_function_return_unsupported, + }; + static uct_md_t md = { + .ops = &md_ops, + .component = &uct_rocm_ipc_component, + }; + + *md_p = &md; + return UCS_OK; +} + +static ucs_status_t uct_rocm_ipc_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + uct_rocm_ipc_key_t *packed = (uct_rocm_ipc_key_t *)rkey_buffer; + uct_rocm_ipc_key_t *key; + + key = ucs_malloc(sizeof(uct_rocm_ipc_key_t), "uct_rocm_ipc_key_t"); + if (NULL == key) { + ucs_error("Failed to allocate memory for uct_rocm_ipc_key_t"); + return UCS_ERR_NO_MEMORY; + } + + *key = *packed; + *handle_p = NULL; + *rkey_p = (uintptr_t)key; + + return UCS_OK; +} + +static ucs_status_t uct_rocm_ipc_rkey_release(uct_component_t *component, + uct_rkey_t rkey, void *handle) +{ + ucs_assert(NULL == handle); + ucs_free((void *)rkey); + return UCS_OK; +} + +uct_component_t uct_rocm_ipc_component = { + .query_md_resources = uct_rocm_base_query_md_resources, + .md_open = uct_rocm_ipc_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_rocm_ipc_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = uct_rocm_ipc_rkey_release, + .name = "rocm_ipc", + .md_config = { + .name = "ROCm-IPC memory domain", + .prefix = "ROCM_IPC_MD_", + .table = uct_rocm_ipc_md_config_table, + .size = sizeof(uct_rocm_ipc_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_rocm_ipc_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_rocm_ipc_component); + diff --git a/src/uct/rocm/ipc/rocm_ipc_md.h b/src/uct/rocm/ipc/rocm_ipc_md.h new file mode 100644 index 0000000..ebe4698 --- /dev/null +++ b/src/uct/rocm/ipc/rocm_ipc_md.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef ROCM_IPC_MD_H +#define ROCM_IPC_MD_H + +#include +#include + + +extern uct_component_t uct_rocm_ipc_component; + +typedef struct uct_rocm_ipc_md { + struct uct_md super; +} uct_rocm_ipc_md_t; + +typedef struct uct_rocm_ipc_md_config { + uct_md_config_t super; +} uct_rocm_ipc_md_config_t; + +typedef struct uct_rocm_ipc_key { + hsa_amd_ipc_memory_t ipc; + uintptr_t address; + size_t length; + int dev_num; +} uct_rocm_ipc_key_t; + +#endif diff --git a/src/uct/sm/Makefile.am b/src/uct/sm/Makefile.am new file mode 100644 index 0000000..189f97c --- /dev/null +++ b/src/uct/sm/Makefile.am @@ -0,0 +1,6 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +SUBDIRS = cma knem mm diff --git a/src/uct/sm/Makefile.in b/src/uct/sm/Makefile.in new file mode 100644 index 0000000..8ac405f --- /dev/null +++ b/src/uct/sm/Makefile.in @@ -0,0 +1,764 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/sm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +SUBDIRS = cma knem mm +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/sm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/sm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/sm/base/sm_ep.c b/src/uct/sm/base/sm_ep.c new file mode 100644 index 0000000..1fecff3 --- /dev/null +++ b/src/uct/sm/base/sm_ep.c @@ -0,0 +1,229 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sm_ep.h" + +#include + + +#define uct_sm_ep_trace_data(_remote_addr, _rkey, _fmt, ...) \ + ucs_trace_data(_fmt " to 0x%"PRIx64"(%+ld)", ## __VA_ARGS__, (_remote_addr), \ + (_rkey)) + +ucs_status_t uct_sm_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + if (ucs_likely(length != 0)) { + memcpy((void *)(rkey + remote_addr), buffer, length); + uct_sm_ep_trace_data(remote_addr, rkey, "PUT_SHORT [buffer %p size %u]", + buffer, length); + } else { + ucs_trace_data("PUT_SHORT [zero-length]"); + } + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); + return UCS_OK; +} + +ssize_t uct_sm_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, uct_rkey_t rkey) +{ + size_t length; + + length = pack_cb((void *)(rkey + remote_addr), arg); + uct_sm_ep_trace_data(remote_addr, rkey, "PUT_BCOPY [arg %p size %zu]", + arg, length); + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, BCOPY, length); + return length; +} + +ucs_status_t uct_sm_ep_get_bcopy(uct_ep_h tl_ep, uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + if (ucs_likely(0 != length)) { + unpack_cb(arg, (void *)(rkey + remote_addr), length); + uct_sm_ep_trace_data(remote_addr, rkey, "GET_BCOPY [length %zu]", length); + } else { + ucs_trace_data("GET_BCOPY [zero-length]"); + } + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, BCOPY, length); + return UCS_OK; +} + +ucs_status_t uct_sm_ep_atomic32_post(uct_ep_h ep, unsigned opcode, uint32_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + uint32_t *ptr = (uint32_t *)(rkey + remote_addr); + switch (opcode) { + case UCT_ATOMIC_OP_ADD: + ucs_atomic_add32(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_ADD32 [value %"PRIu32"]", value); + break; + case UCT_ATOMIC_OP_AND: + ucs_atomic_and32(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_AND32 [value %"PRIu32"]", value); + break; + case UCT_ATOMIC_OP_OR: + ucs_atomic_or32(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_OR32 [value %"PRIu32"]", value); + break; + case UCT_ATOMIC_OP_XOR: + ucs_atomic_xor32(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_XOR32 [value %"PRIu32"]", value); + break; + default: + ucs_assertv(0, "incorrect opcode: %d", opcode); + return UCS_ERR_UNSUPPORTED; + } + + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(ep, uct_base_ep_t)); + return UCS_OK; +} + +ucs_status_t uct_sm_ep_atomic64_post(uct_ep_h ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + uint64_t *ptr = (uint64_t *)(rkey + remote_addr); + switch (opcode) { + case UCT_ATOMIC_OP_ADD: + ucs_atomic_add64(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_ADD64 [value %"PRIu64"]", value); + break; + case UCT_ATOMIC_OP_AND: + ucs_atomic_and64(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_AND64 [value %"PRIu64"]", value); + break; + case UCT_ATOMIC_OP_OR: + ucs_atomic_or64(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_OR64 [value %"PRIu64"]", value); + break; + case UCT_ATOMIC_OP_XOR: + ucs_atomic_xor64(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_XOR64 [value %"PRIu64"]", value); + break; + default: + ucs_assertv(0, "incorrect opcode: %d", opcode); + return UCS_ERR_UNSUPPORTED; + } + + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(ep, uct_base_ep_t)); + return UCS_OK; +} + +ucs_status_t uct_sm_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uint64_t *ptr = (uint64_t *)(rkey + remote_addr); + switch (opcode) { + case UCT_ATOMIC_OP_ADD: + *result = ucs_atomic_fadd64(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_FADD64 [value %"PRIu64 + " result %"PRIu64"]", value, *result); + break; + case UCT_ATOMIC_OP_AND: + *result = ucs_atomic_fand64(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_FAND64 [value %"PRIu64 + " result %"PRIu64"]", value, *result); + break; + case UCT_ATOMIC_OP_OR: + *result = ucs_atomic_for64(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_FOR64 [value %"PRIu64 + " result %"PRIu64"]", value, *result); + break; + case UCT_ATOMIC_OP_XOR: + *result = ucs_atomic_fxor64(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_FXOR64 [value %"PRIu64 + " result %"PRIu64"]", value, *result); + break; + case UCT_ATOMIC_OP_SWAP: + *result = ucs_atomic_swap64(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_SWAP64 [value %"PRIu64 + " result %"PRIu64"]", value, *result); + break; + default: + ucs_assertv(0, "incorrect opcode: %d", opcode); + return UCS_ERR_UNSUPPORTED; + } + + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(ep, uct_base_ep_t)); + return UCS_OK; +} + +ucs_status_t uct_sm_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uint32_t *ptr = (uint32_t *)(rkey + remote_addr); + switch (opcode) { + case UCT_ATOMIC_OP_ADD: + *result = ucs_atomic_fadd32(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_FADD32 [value %"PRIu32 + " result %"PRIu32"]", value, *result); + break; + case UCT_ATOMIC_OP_AND: + *result = ucs_atomic_fand32(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_FAND32 [value %"PRIu32 + " result %"PRIu32"]", value, *result); + break; + case UCT_ATOMIC_OP_OR: + *result = ucs_atomic_for32(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_FOR32 [value %"PRIu32 + " result %"PRIu32"]", value, *result); + break; + case UCT_ATOMIC_OP_XOR: + *result = ucs_atomic_fxor32(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_FXOR32 [value %"PRIu32 + " result %"PRIu32"]", value, *result); + break; + case UCT_ATOMIC_OP_SWAP: + *result = ucs_atomic_swap32(ptr, value); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_SWAP32 [value %"PRIu32 + " result %"PRIu32"]", value, *result); + break; + default: + ucs_assertv(0, "incorrect opcode: %d", opcode); + return UCS_ERR_UNSUPPORTED; + } + + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(ep, uct_base_ep_t)); + return UCS_OK; +} + +ucs_status_t uct_sm_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, + uint64_t swap, uint64_t remote_addr, + uct_rkey_t rkey, uint64_t *result, + uct_completion_t *comp) +{ + uint64_t *ptr = (uint64_t *)(rkey + remote_addr); + *result = ucs_atomic_cswap64(ptr, compare, swap); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_CSWAP64 [compare %"PRIu64 + " swap %"PRIu64" result %"PRIu64"]", compare, swap, + *result); + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); + return UCS_OK; +} + +ucs_status_t uct_sm_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, + uint32_t swap, uint64_t remote_addr, + uct_rkey_t rkey, uint32_t *result, + uct_completion_t *comp) +{ + uint32_t *ptr = (uint32_t *)(rkey + remote_addr); + *result = ucs_atomic_cswap32(ptr, compare, swap); + uct_sm_ep_trace_data(remote_addr, rkey, "ATOMIC_CSWAP32 [compare %"PRIu32 + " swap %"PRIu32" result %"PRIu32"]", compare, swap, + *result); + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); + return UCS_OK; +} diff --git a/src/uct/sm/base/sm_ep.h b/src/uct/sm/base/sm_ep.h new file mode 100644 index 0000000..78454fb --- /dev/null +++ b/src/uct/sm/base/sm_ep.h @@ -0,0 +1,44 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_SM_EP_H +#define UCT_SM_EP_H + +#include "uct/base/uct_iface.h" + + +ucs_status_t uct_sm_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); +ssize_t uct_sm_ep_put_bcopy(uct_ep_h ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, uct_rkey_t rkey); + +ucs_status_t uct_sm_ep_get_bcopy(uct_ep_h ep, uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +ucs_status_t uct_sm_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, + uint64_t swap, uint64_t remote_addr, + uct_rkey_t rkey, uint64_t *result, + uct_completion_t *comp); +ucs_status_t uct_sm_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, + uint32_t swap, uint64_t remote_addr, + uct_rkey_t rkey, uint32_t *result, + uct_completion_t *comp); +ucs_status_t uct_sm_ep_atomic64_post(uct_ep_h ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey); +ucs_status_t uct_sm_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +ucs_status_t uct_sm_ep_atomic32_post(uct_ep_h ep, unsigned opcode, uint32_t value, + uint64_t remote_addr, uct_rkey_t rkey); +ucs_status_t uct_sm_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +#endif diff --git a/src/uct/sm/base/sm_iface.c b/src/uct/sm/base/sm_iface.c new file mode 100644 index 0000000..feb5a01 --- /dev/null +++ b/src/uct/sm/base/sm_iface.c @@ -0,0 +1,169 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sm_iface.h" + +#include +#include +#include +#include +#include + + +#define UCS_SM_IFACE_ADDR_FLAG_EXT UCS_BIT(63) + + +typedef struct { + uint64_t id; +} ucs_sm_iface_base_device_addr_t; + +typedef struct { + ucs_sm_iface_base_device_addr_t super; + ucs_sys_ns_t ipc_ns; +} ucs_sm_iface_ext_device_addr_t; + + +ucs_config_field_t uct_sm_iface_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_sm_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {"BW", "12179MBs", + "Effective memory bandwidth", + ucs_offsetof(uct_sm_iface_config_t, bandwidth), UCS_CONFIG_TYPE_BW}, + + {NULL} +}; + +ucs_status_t +uct_sm_base_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + return uct_single_device_resource(md, UCT_SM_DEVICE_NAME, + UCT_DEVICE_TYPE_SHM, tl_devices_p, + num_tl_devices_p); +} + + +/* read boot_id GUID or use machine_guid */ +static uint64_t uct_sm_iface_get_system_id() +{ + uint64_t high; + uint64_t low; + ucs_status_t status; + + status = ucs_sys_get_boot_id(&high, &low); + if (status == UCS_OK) { + return high ^ low; + } + + return ucs_machine_guid(); +} + +ucs_status_t UCS_F_NOOPTIMIZE /* GCC failed to compile it in release mode */ +uct_sm_iface_get_device_address(uct_iface_t *tl_iface, uct_device_addr_t *addr) +{ + ucs_sm_iface_ext_device_addr_t *ext_addr = (void*)addr; + + ext_addr->super.id = uct_sm_iface_get_system_id() & ~UCS_SM_IFACE_ADDR_FLAG_EXT; + + if (!ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_IPC)) { + ext_addr->super.id |= UCS_SM_IFACE_ADDR_FLAG_EXT; + ext_addr->ipc_ns = ucs_sys_get_ns(UCS_SYS_NS_TYPE_IPC); + } + + return UCS_OK; +} + +int uct_sm_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + ucs_sm_iface_ext_device_addr_t *ext_addr = (void*)dev_addr; + ucs_sm_iface_ext_device_addr_t my_addr = {}; + ucs_status_t status; + + status = uct_sm_iface_get_device_address(tl_iface, + (uct_device_addr_t*)&my_addr); + if (status != UCS_OK) { + ucs_error("failed to get device address"); + return 0; + } + + /* do not merge these evaluations into single 'if' due + * to clags compilation warning */ + /* check if both processes are on same host and + * both of them are in root (or non-root) pid namespace */ + if (ext_addr->super.id != my_addr.super.id) { + return 0; + } + + if (!(ext_addr->super.id & UCS_SM_IFACE_ADDR_FLAG_EXT)) { + return 1; /* both processes are in root namespace */ + } + + /* ok, we are in non-root PID namespace - return 1 if ID of + * namespaces are same */ + return ext_addr->ipc_ns == my_addr.ipc_ns; +} + +ucs_status_t uct_sm_iface_fence(uct_iface_t *tl_iface, unsigned flags) +{ + ucs_memory_cpu_fence(); + UCT_TL_IFACE_STAT_FENCE(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_OK; +} + +ucs_status_t uct_sm_ep_fence(uct_ep_t *tl_ep, unsigned flags) +{ + ucs_memory_cpu_fence(); + UCT_TL_EP_STAT_FENCE(ucs_derived_of(tl_ep, uct_base_ep_t)); + return UCS_OK; +} + +size_t uct_sm_iface_get_device_addr_len() +{ + return ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_IPC) ? + sizeof(ucs_sm_iface_base_device_addr_t) : + sizeof(ucs_sm_iface_ext_device_addr_t); +} + +UCS_CLASS_INIT_FUNC(uct_sm_iface_t, uct_iface_ops_t *ops, uct_md_h md, + uct_worker_h worker, const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_sm_iface_config_t *sm_config = ucs_derived_of(tl_config, + uct_sm_iface_config_t); + + UCT_CHECK_PARAM(params->field_mask & UCT_IFACE_PARAM_FIELD_OPEN_MODE, + "UCT_IFACE_PARAM_FIELD_OPEN_MODE is not defined"); + if (!(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE)) { + ucs_error("only UCT_IFACE_OPEN_MODE_DEVICE is supported"); + return UCS_ERR_UNSUPPORTED; + } + + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, ops, md, worker, params, + tl_config + UCS_STATS_ARG((params->field_mask & + UCT_IFACE_PARAM_FIELD_STATS_ROOT) ? + params->stats_root : NULL) + UCS_STATS_ARG(params->mode.device.dev_name)); + + self->config.bandwidth = sm_config->bandwidth; + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_sm_iface_t) +{ +} + +UCS_CLASS_DEFINE(uct_sm_iface_t, uct_base_iface_t); diff --git a/src/uct/sm/base/sm_iface.h b/src/uct/sm/base/sm_iface.h new file mode 100644 index 0000000..f745d19 --- /dev/null +++ b/src/uct/sm/base/sm_iface.h @@ -0,0 +1,58 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef SM_IFACE_H_ +#define SM_IFACE_H_ + +#include +#include +#include +#include + + +#define UCT_SM_MAX_IOV 16 +#define UCT_SM_DEVICE_NAME "memory" + + +extern ucs_config_field_t uct_sm_iface_config_table[]; + +typedef struct uct_sm_iface_common_config { + uct_iface_config_t super; + double bandwidth; /* Memory bandwidth in bytes per second */ +} uct_sm_iface_config_t; + +typedef struct uct_sm_iface { + uct_base_iface_t super; + struct { + double bandwidth; /* Memory bandwidth in bytes per second */ + } config; +} uct_sm_iface_t; + + +ucs_status_t +uct_sm_base_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p); + +ucs_status_t uct_sm_iface_get_device_address(uct_iface_t *tl_iface, + uct_device_addr_t *addr); + +int uct_sm_iface_is_reachable(const uct_iface_h tl_iface, const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr); + +ucs_status_t uct_sm_iface_fence(uct_iface_t *tl_iface, unsigned flags); + +size_t uct_sm_iface_get_device_addr_len(); + +ucs_status_t uct_sm_ep_fence(uct_ep_t *tl_ep, unsigned flags); + +static UCS_F_ALWAYS_INLINE size_t uct_sm_get_max_iov() { + return ucs_min(UCT_SM_MAX_IOV, ucs_iov_get_max()); +} + +UCS_CLASS_DECLARE(uct_sm_iface_t, uct_iface_ops_t*, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t*); + +#endif diff --git a/src/uct/sm/cma/Makefile.am b/src/uct/sm/cma/Makefile.am new file mode 100644 index 0000000..fef4895 --- /dev/null +++ b/src/uct/sm/cma/Makefile.am @@ -0,0 +1,27 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_CMA + +module_LTLIBRARIES = libuct_cma.la +libuct_cma_la_CFLAGS = $(BASE_CFLAGS) +libuct_cma_la_CPPFLAGS = $(BASE_CPPFLAGS) +libuct_cma_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la +libuct_cma_la_LDFLAGS = -version-info $(SOVERSION) + +noinst_HEADERS = \ + cma_iface.h \ + cma_ep.h \ + cma_md.h + +libuct_cma_la_SOURCES = \ + cma_iface.c \ + cma_ep.c \ + cma_md.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/uct/sm/cma/Makefile.in b/src/uct/sm/cma/Makefile.in new file mode 100644 index 0000000..5802396 --- /dev/null +++ b/src/uct/sm/cma/Makefile.in @@ -0,0 +1,883 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/sm/cma +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_CMA_TRUE@libuct_cma_la_DEPENDENCIES = \ +@HAVE_CMA_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_CMA_TRUE@ $(top_builddir)/src/uct/libuct.la +am__libuct_cma_la_SOURCES_DIST = cma_iface.c cma_ep.c cma_md.c +@HAVE_CMA_TRUE@am_libuct_cma_la_OBJECTS = libuct_cma_la-cma_iface.lo \ +@HAVE_CMA_TRUE@ libuct_cma_la-cma_ep.lo libuct_cma_la-cma_md.lo +libuct_cma_la_OBJECTS = $(am_libuct_cma_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_cma_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libuct_cma_la_CFLAGS) \ + $(CFLAGS) $(libuct_cma_la_LDFLAGS) $(LDFLAGS) -o $@ +@HAVE_CMA_TRUE@am_libuct_cma_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libuct_cma_la-cma_ep.Plo \ + ./$(DEPDIR)/libuct_cma_la-cma_iface.Plo \ + ./$(DEPDIR)/libuct_cma_la-cma_md.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_cma_la_SOURCES) +DIST_SOURCES = $(am__libuct_cma_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = cma_iface.h cma_ep.h cma_md.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_CMA_TRUE@module_LTLIBRARIES = libuct_cma.la +@HAVE_CMA_TRUE@libuct_cma_la_CFLAGS = $(BASE_CFLAGS) +@HAVE_CMA_TRUE@libuct_cma_la_CPPFLAGS = $(BASE_CPPFLAGS) +@HAVE_CMA_TRUE@libuct_cma_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_CMA_TRUE@ $(top_builddir)/src/uct/libuct.la + +@HAVE_CMA_TRUE@libuct_cma_la_LDFLAGS = -version-info $(SOVERSION) +@HAVE_CMA_TRUE@noinst_HEADERS = \ +@HAVE_CMA_TRUE@ cma_iface.h \ +@HAVE_CMA_TRUE@ cma_ep.h \ +@HAVE_CMA_TRUE@ cma_md.h + +@HAVE_CMA_TRUE@libuct_cma_la_SOURCES = \ +@HAVE_CMA_TRUE@ cma_iface.c \ +@HAVE_CMA_TRUE@ cma_ep.c \ +@HAVE_CMA_TRUE@ cma_md.c + + +# Automake silent rules +@HAVE_CMA_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_CMA_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_CMA_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_CMA_TRUE@AM_V_LN_1 = true +@HAVE_CMA_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/sm/cma/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/sm/cma/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libuct_cma.la: $(libuct_cma_la_OBJECTS) $(libuct_cma_la_DEPENDENCIES) $(EXTRA_libuct_cma_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_cma_la_LINK) $(am_libuct_cma_la_rpath) $(libuct_cma_la_OBJECTS) $(libuct_cma_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_cma_la-cma_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_cma_la-cma_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_cma_la-cma_md.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libuct_cma_la-cma_iface.lo: cma_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cma_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cma_la_CFLAGS) $(CFLAGS) -MT libuct_cma_la-cma_iface.lo -MD -MP -MF $(DEPDIR)/libuct_cma_la-cma_iface.Tpo -c -o libuct_cma_la-cma_iface.lo `test -f 'cma_iface.c' || echo '$(srcdir)/'`cma_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_cma_la-cma_iface.Tpo $(DEPDIR)/libuct_cma_la-cma_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cma_iface.c' object='libuct_cma_la-cma_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cma_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cma_la_CFLAGS) $(CFLAGS) -c -o libuct_cma_la-cma_iface.lo `test -f 'cma_iface.c' || echo '$(srcdir)/'`cma_iface.c + +libuct_cma_la-cma_ep.lo: cma_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cma_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cma_la_CFLAGS) $(CFLAGS) -MT libuct_cma_la-cma_ep.lo -MD -MP -MF $(DEPDIR)/libuct_cma_la-cma_ep.Tpo -c -o libuct_cma_la-cma_ep.lo `test -f 'cma_ep.c' || echo '$(srcdir)/'`cma_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_cma_la-cma_ep.Tpo $(DEPDIR)/libuct_cma_la-cma_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cma_ep.c' object='libuct_cma_la-cma_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cma_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cma_la_CFLAGS) $(CFLAGS) -c -o libuct_cma_la-cma_ep.lo `test -f 'cma_ep.c' || echo '$(srcdir)/'`cma_ep.c + +libuct_cma_la-cma_md.lo: cma_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cma_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cma_la_CFLAGS) $(CFLAGS) -MT libuct_cma_la-cma_md.lo -MD -MP -MF $(DEPDIR)/libuct_cma_la-cma_md.Tpo -c -o libuct_cma_la-cma_md.lo `test -f 'cma_md.c' || echo '$(srcdir)/'`cma_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_cma_la-cma_md.Tpo $(DEPDIR)/libuct_cma_la-cma_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cma_md.c' object='libuct_cma_la-cma_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_cma_la_CPPFLAGS) $(CPPFLAGS) $(libuct_cma_la_CFLAGS) $(CFLAGS) -c -o libuct_cma_la-cma_md.lo `test -f 'cma_md.c' || echo '$(srcdir)/'`cma_md.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_CMA_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libuct_cma_la-cma_ep.Plo + -rm -f ./$(DEPDIR)/libuct_cma_la-cma_iface.Plo + -rm -f ./$(DEPDIR)/libuct_cma_la-cma_md.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libuct_cma_la-cma_ep.Plo + -rm -f ./$(DEPDIR)/libuct_cma_la-cma_iface.Plo + -rm -f ./$(DEPDIR)/libuct_cma_la-cma_md.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_CMA_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_CMA_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_CMA_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_CMA_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CMA_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_CMA_TRUE@ done +@HAVE_CMA_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CMA_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_CMA_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/sm/cma/cma_ep.c b/src/uct/sm/cma/cma_ep.c new file mode 100644 index 0000000..7c079e9 --- /dev/null +++ b/src/uct/sm/cma/cma_ep.c @@ -0,0 +1,164 @@ +/** +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include + +#include "cma_ep.h" +#include +#include + +typedef ssize_t (*uct_cma_ep_zcopy_fn_t)(pid_t, const struct iovec *, + unsigned long, const struct iovec *, + unsigned long, unsigned long); + +static UCS_CLASS_INIT_FUNC(uct_cma_ep_t, const uct_ep_params_t *params) +{ + uct_cma_iface_t *iface = ucs_derived_of(params->iface, uct_cma_iface_t); + + UCT_CHECK_PARAM(params->field_mask & UCT_EP_PARAM_FIELD_IFACE_ADDR, + "UCT_EP_PARAM_FIELD_IFACE_ADDR and UCT_EP_PARAM_FIELD_DEV_ADDR are not defined"); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super); + self->remote_pid = *(const pid_t*)params->iface_addr & + ~UCT_CMA_IFACE_ADDR_FLAG_PID_NS; + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_cma_ep_t) +{ + /* No op */ +} + +UCS_CLASS_DEFINE(uct_cma_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_cma_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_cma_ep_t, uct_ep_t); + + +#define uct_cma_trace_data(_remote_addr, _rkey, _fmt, ...) \ + ucs_trace_data(_fmt " to %"PRIx64"(%+ld)", ## __VA_ARGS__, (_remote_addr), \ + (_rkey)) + +static UCS_F_ALWAYS_INLINE +ucs_status_t uct_cma_ep_do_zcopy(uct_cma_ep_t *ep, struct iovec *local_iov, + size_t local_iov_cnt, struct iovec *remote_iov, + uct_cma_ep_zcopy_fn_t fn_p, const char *fn_name) +{ + size_t local_iov_idx = 0; + size_t UCS_V_UNUSED remove_iov_idx = 0; + ssize_t ret; + + do { + ret = fn_p(ep->remote_pid, &local_iov[local_iov_idx], + local_iov_cnt - local_iov_idx, remote_iov, 1, 0); + if (ucs_unlikely(ret < 0)) { + ucs_error("%s(pid=%d length=%zu) returned %zd: %m", + fn_name, ep->remote_pid, remote_iov->iov_len, ret); + return UCS_ERR_IO_ERROR; + } + + ucs_assert(ret <= remote_iov->iov_len); + ucs_iov_advance(local_iov, local_iov_cnt, &local_iov_idx, ret); + ucs_iov_advance(remote_iov, 1, &remove_iov_idx, ret); + } while (remote_iov->iov_len); + + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE +ucs_status_t uct_cma_ep_common_zcopy(uct_ep_h tl_ep, + const uct_iov_t *iov, + size_t iovcnt, + uint64_t remote_addr, + uct_completion_t *comp, + ssize_t (*fn_p)(pid_t, + const struct iovec *, + unsigned long, + const struct iovec *, + unsigned long, + unsigned long), + const char *fn_name) +{ + uct_cma_ep_t *ep = ucs_derived_of(tl_ep, uct_cma_ep_t); + size_t iov_idx = 0; + ucs_status_t status; + size_t local_iov_cnt; + size_t length; + size_t cur_iov_cnt; + struct iovec local_iov[UCT_SM_MAX_IOV]; + struct iovec remote_iov; + + remote_iov.iov_base = (void*)remote_addr; + + while (iov_idx < iovcnt) { + cur_iov_cnt = ucs_min(iovcnt - iov_idx, UCT_SM_MAX_IOV); + local_iov_cnt = uct_iovec_fill_iov(local_iov, &iov[iov_idx], + cur_iov_cnt, &length); + ucs_assert(local_iov_cnt <= cur_iov_cnt); + + iov_idx += cur_iov_cnt; + ucs_assert(iov_idx <= iovcnt); + + if (!length) { + continue; /* Nothing to deliver */ + } + + remote_iov.iov_len = length; + + status = uct_cma_ep_do_zcopy(ep, local_iov, local_iov_cnt, + &remote_iov, fn_p, fn_name); + if (ucs_unlikely(status != UCS_OK)) { + return status; + } + } + + return UCS_OK; +} + +ucs_status_t uct_cma_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + UCT_CHECK_IOV_SIZE(iovcnt, uct_sm_get_max_iov(), "uct_cma_ep_put_zcopy"); + + ucs_status_t ret = uct_cma_ep_common_zcopy(tl_ep, + iov, + iovcnt, + remote_addr, + comp, + process_vm_writev, + "process_vm_writev"); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_cma_trace_data(remote_addr, rkey, "PUT_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + return ret; +} + +ucs_status_t uct_cma_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + UCT_CHECK_IOV_SIZE(iovcnt, uct_sm_get_max_iov(), "uct_cma_ep_get_zcopy"); + + ucs_status_t ret = uct_cma_ep_common_zcopy(tl_ep, + iov, + iovcnt, + remote_addr, + comp, + process_vm_readv, + "process_vm_readv"); + + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, + uct_iov_total_length(iov, iovcnt)); + uct_cma_trace_data(remote_addr, rkey, "GET_ZCOPY [length %zu]", + uct_iov_total_length(iov, iovcnt)); + return ret; +} diff --git a/src/uct/sm/cma/cma_ep.h b/src/uct/sm/cma/cma_ep.h new file mode 100644 index 0000000..14d479b --- /dev/null +++ b/src/uct/sm/cma/cma_ep.h @@ -0,0 +1,28 @@ +/** +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_CMA_EP_H +#define UCT_CMA_EP_H + +#include "cma_iface.h" + +#include + + +typedef struct uct_cma_ep { + uct_base_ep_t super; + pid_t remote_pid; +} uct_cma_ep_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_cma_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_cma_ep_t, uct_ep_t); +ucs_status_t uct_cma_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +ucs_status_t uct_cma_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +#endif diff --git a/src/uct/sm/cma/cma_iface.c b/src/uct/sm/cma/cma_iface.c new file mode 100644 index 0000000..7c94f2e --- /dev/null +++ b/src/uct/sm/cma/cma_iface.c @@ -0,0 +1,155 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "cma_md.h" +#include "cma_iface.h" +#include "cma_ep.h" + +#include +#include + + +typedef struct { + pid_t id; +} ucs_cma_iface_base_device_addr_t; + +typedef struct { + ucs_cma_iface_base_device_addr_t super; + ucs_sys_ns_t pid_ns; +} ucs_cma_iface_ext_device_addr_t; + + +static ucs_config_field_t uct_cma_iface_config_table[] = { + {"SM_", "ALLOC=huge,thp,mmap,heap;BW=11145MBs", NULL, + ucs_offsetof(uct_cma_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_sm_iface_config_table)}, + + {NULL} +}; + +static ucs_status_t uct_cma_iface_get_address(uct_iface_t *tl_iface, + uct_iface_addr_t *addr) +{ + ucs_cma_iface_ext_device_addr_t *iface_addr = (void*)addr; + + ucs_assert(!(getpid() & UCT_CMA_IFACE_ADDR_FLAG_PID_NS)); + + iface_addr->super.id = getpid(); + if (!ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID)) { + iface_addr->super.id |= UCT_CMA_IFACE_ADDR_FLAG_PID_NS; + iface_addr->pid_ns = ucs_sys_get_ns(UCS_SYS_NS_TYPE_PID); + } + return UCS_OK; +} + +static ucs_status_t uct_cma_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_cma_iface_t *iface = ucs_derived_of(tl_iface, uct_cma_iface_t); + + uct_base_iface_query(&iface->super.super, iface_attr); + + /* default values for all shared memory transports */ + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = SIZE_MAX; + iface_attr->cap.put.opt_zcopy_align = 1; + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = uct_sm_get_max_iov(); + + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = SIZE_MAX; + iface_attr->cap.get.opt_zcopy_align = 1; + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = uct_sm_get_max_iov(); + + iface_attr->cap.am.max_iov = 1; + iface_attr->cap.am.opt_zcopy_align = 1; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + + iface_attr->iface_addr_len = ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID) ? + sizeof(ucs_cma_iface_base_device_addr_t) : + sizeof(ucs_cma_iface_ext_device_addr_t); + iface_attr->device_addr_len = uct_sm_iface_get_device_addr_len(); + iface_attr->ep_addr_len = 0; + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CONNECT_TO_IFACE; + iface_attr->latency.overhead = 80e-9; /* 80 ns */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = iface->super.config.bandwidth; + iface_attr->bandwidth.shared = 0; + iface_attr->overhead = 0.4e-6; /* 0.4 us */ + + return UCS_OK; +} + +static int +uct_cma_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *tl_iface_addr) +{ + ucs_cma_iface_ext_device_addr_t *iface_addr = (void*)tl_iface_addr; + + if (!uct_sm_iface_is_reachable(tl_iface, dev_addr, tl_iface_addr)) { + return 0; + } + + if (iface_addr->super.id & UCT_CMA_IFACE_ADDR_FLAG_PID_NS) { + return ucs_sys_get_ns(UCS_SYS_NS_TYPE_PID) == iface_addr->pid_ns; + } + + return ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID); +} + +static UCS_CLASS_DECLARE_DELETE_FUNC(uct_cma_iface_t, uct_iface_t); + +static uct_iface_ops_t uct_cma_iface_ops = { + .ep_put_zcopy = uct_cma_ep_put_zcopy, + .ep_get_zcopy = uct_cma_ep_get_zcopy, + .ep_pending_add = ucs_empty_function_return_busy, + .ep_pending_purge = ucs_empty_function, + .ep_flush = uct_base_ep_flush, + .ep_fence = uct_sm_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_cma_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_cma_ep_t), + .iface_flush = uct_base_iface_flush, + .iface_fence = uct_sm_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = ucs_empty_function_return_zero, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_cma_iface_t), + .iface_query = uct_cma_iface_query, + .iface_get_address = uct_cma_iface_get_address, + .iface_get_device_address = uct_sm_iface_get_device_address, + .iface_is_reachable = uct_cma_iface_is_reachable +}; + +static UCS_CLASS_INIT_FUNC(uct_cma_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + UCS_CLASS_CALL_SUPER_INIT(uct_sm_iface_t, &uct_cma_iface_ops, md, + worker, params, tl_config); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_cma_iface_t) +{ +} + +UCS_CLASS_DEFINE(uct_cma_iface_t, uct_base_iface_t); + +static UCS_CLASS_DEFINE_NEW_FUNC(uct_cma_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t *); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_cma_iface_t, uct_iface_t); + +UCT_TL_DEFINE(&uct_cma_component, cma, uct_sm_base_query_tl_devices, + uct_cma_iface_t, "CMA_", uct_cma_iface_config_table, + uct_cma_iface_config_t); diff --git a/src/uct/sm/cma/cma_iface.h b/src/uct/sm/cma/cma_iface.h new file mode 100644 index 0000000..f14a046 --- /dev/null +++ b/src/uct/sm/cma/cma_iface.h @@ -0,0 +1,27 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_CMA_IFACE_H +#define UCT_CMA_IFACE_H + +#include +#include + + +#define UCT_CMA_IFACE_ADDR_FLAG_PID_NS UCS_BIT(31) /* use PID NS in address */ + + +typedef struct uct_cma_iface_config { + uct_sm_iface_config_t super; +} uct_cma_iface_config_t; + + +typedef struct uct_cma_iface { + uct_sm_iface_t super; +} uct_cma_iface_t; + + +#endif diff --git a/src/uct/sm/cma/cma_md.c b/src/uct/sm/cma/cma_md.c new file mode 100644 index 0000000..211c0b0 --- /dev/null +++ b/src/uct/sm/cma/cma_md.c @@ -0,0 +1,189 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#include "cma_md.h" + +#include +#include +#include +#include +#include +#include + +#if HAVE_SYS_CAPABILITY_H +# include +#endif + + +static int uct_cma_test_ptrace_scope() +{ + static const char *ptrace_scope_file = "/proc/sys/kernel/yama/ptrace_scope"; + const char *extra_info_str; + int cma_supported; + char buffer[32]; + ssize_t nread; + char *value; + + /* Check if ptrace_scope allows using CMA. + * See https://www.kernel.org/doc/Documentation/security/Yama.txt + */ + nread = ucs_read_file(buffer, sizeof(buffer) - 1, 1, "%s", ptrace_scope_file); + if (nread < 0) { + /* Cannot read file - assume that Yama security module is not enabled */ + ucs_debug("could not read '%s' - assuming Yama security is not enforced", + ptrace_scope_file); + return 1; + } + + ucs_assert(nread < sizeof(buffer)); + extra_info_str = ""; + cma_supported = 0; + buffer[nread] = '\0'; + value = ucs_strtrim(buffer); + if(!strcmp(value, "0")) { + /* ptrace scope 0 allow attaching within same UID */ + cma_supported = 1; + } else if (!strcmp(value, "1")) { + /* ptrace scope 1 allows attaching with explicit permission by prctl() */ +#if HAVE_DECL_PR_SET_PTRACER + int ret = prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0); + if (!ret) { + extra_info_str = ", enabled PR_SET_PTRACER_ANY"; + cma_supported = 1; + } else { + extra_info_str = " and prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY) failed"; + } +#else + extra_info_str = " but no PR_SET_PTRACER"; +#endif + } else if (!strcmp(value, "2")) { + /* ptrace scope 2 means only a process with CAP_SYS_PTRACE can attach */ +#if HAVE_SYS_CAPABILITY_H + ucs_status_t status; + uint32_t ecap; + + status = ucs_sys_get_proc_cap(&ecap); + UCS_STATIC_ASSERT(CAP_SYS_PTRACE < 32); + if ((status == UCS_OK) && (ecap & CAP_SYS_PTRACE)) { + extra_info_str = ", process has CAP_SYS_PTRACE"; + cma_supported = 1; + } else +#endif + extra_info_str = " but no CAP_SYS_PTRACE"; + } else { + /* ptrace scope 3 means attach is completely disabled on the system */ + } + + /* coverity[result_independent_of_operands] */ + ucs_log(cma_supported ? UCS_LOG_LEVEL_TRACE : UCS_LOG_LEVEL_DEBUG, + "ptrace_scope is %s%s, CMA is %ssupported", + value, extra_info_str, cma_supported ? "" : "un"); + return cma_supported; +} + +static int uct_cma_test_writev() +{ + uint64_t test_dst = 0; + uint64_t test_src = 0; + struct iovec local_iov = {.iov_base = &test_src, + .iov_len = sizeof(test_src)}; + struct iovec remote_iov = {.iov_base = &test_dst, + .iov_len = sizeof(test_dst)}; + ssize_t delivered; + + delivered = process_vm_writev(getpid(), &local_iov, 1, &remote_iov, 1, 0); + if (delivered != sizeof(test_dst)) { + ucs_debug("CMA is disabled:" + "process_vm_writev delivered %zu instead of %zu", + delivered, sizeof(test_dst)); + return 0; + } + + return 1; +} + +static ucs_status_t +uct_cma_query_md_resources(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + if (uct_cma_test_writev() && uct_cma_test_ptrace_scope()) { + return uct_md_query_single_md_resource(component, resources_p, + num_resources_p); + } else { + return uct_md_query_empty_md_resource(resources_p, num_resources_p); + } +} + +static ucs_status_t uct_cma_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + /* For testing we have to make sure that + * memh_h != UCT_MEM_HANDLE_NULL + * otherwise gtest is not happy */ + UCS_STATIC_ASSERT((uint64_t)0xdeadbeef != (uint64_t)UCT_MEM_HANDLE_NULL); + *memh_p = (void *) 0xdeadbeef; + return UCS_OK; +} + +static ucs_status_t +uct_cma_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *md_config, uct_md_h *md_p) +{ + static uct_md_ops_t md_ops = { + .close = (uct_md_close_func_t)ucs_empty_function, + .query = uct_cma_md_query, + .mem_alloc = (uct_md_mem_alloc_func_t)ucs_empty_function_return_success, + .mem_free = (uct_md_mem_free_func_t)ucs_empty_function_return_success, + .mkey_pack = (uct_md_mkey_pack_func_t)ucs_empty_function_return_success, + .mem_reg = uct_cma_mem_reg, + .mem_dereg = (uct_md_mem_dereg_func_t)ucs_empty_function_return_success, + .detect_memory_type = ucs_empty_function_return_unsupported, + }; + static uct_md_t md = { + .ops = &md_ops, + .component = &uct_cma_component + }; + + *md_p = &md; + return UCS_OK; +} + +ucs_status_t uct_cma_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->rkey_packed_size = 0; + md_attr->cap.flags = UCT_MD_FLAG_REG; + md_attr->cap.reg_mem_types = UCS_MEMORY_TYPES_CPU_ACCESSIBLE; + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; + md_attr->cap.detect_mem_types = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = ULONG_MAX; + md_attr->reg_cost.overhead = 9e-9; + md_attr->reg_cost.growth = 0; + + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +uct_component_t uct_cma_component = { + .query_md_resources = uct_cma_query_md_resources, + .md_open = uct_cma_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_md_stub_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = ucs_empty_function_return_success, + .name = "cma", + .md_config = UCT_MD_DEFAULT_CONFIG_INITIALIZER, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_cma_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_cma_component); diff --git a/src/uct/sm/cma/cma_md.h b/src/uct/sm/cma/cma_md.h new file mode 100644 index 0000000..9ab2449 --- /dev/null +++ b/src/uct/sm/cma/cma_md.h @@ -0,0 +1,23 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_CMA_MD_H_ +#define UCT_CMA_MD_H_ + +#include +#include +#include +#include + +#include +#include + +extern uct_component_t uct_cma_component; + +ucs_status_t uct_cma_md_query(uct_md_h md, uct_md_attr_t *md_attr); + +#endif diff --git a/src/uct/sm/cma/configure.m4 b/src/uct/sm/cma/configure.m4 new file mode 100644 index 0000000..1faf02d --- /dev/null +++ b/src/uct/sm/cma/configure.m4 @@ -0,0 +1,25 @@ +# +# Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +cma_happy="no" +AC_ARG_ENABLE([cma], + [AC_HELP_STRING([--enable-cma], + [Enable Cross Memory Attach])], + [], + [enable_cma=yes]) + +AS_IF([test "x$enable_cma" != xno], + [AC_CHECK_HEADERS([sys/uio.h], + [AC_CHECK_FUNC([process_vm_readv], + [cma_happy="yes"], + [cma_happy="no"]) + AS_IF([test "x$cma_happy" = "xyes"], + [uct_modules="${uct_modules}:cma"]) + ]) + ] +) + +AM_CONDITIONAL([HAVE_CMA], [test "x$cma_happy" != xno]) +AC_CONFIG_FILES([src/uct/sm/cma/Makefile]) diff --git a/src/uct/sm/configure.m4 b/src/uct/sm/configure.m4 new file mode 100644 index 0000000..4dfe1ca --- /dev/null +++ b/src/uct/sm/configure.m4 @@ -0,0 +1,10 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +m4_include([src/uct/sm/cma/configure.m4]) +m4_include([src/uct/sm/knem/configure.m4]) +m4_include([src/uct/sm/mm/configure.m4]) + +AC_CONFIG_FILES([src/uct/sm/Makefile]) diff --git a/src/uct/sm/knem/Makefile.am b/src/uct/sm/knem/Makefile.am new file mode 100644 index 0000000..46029c1 --- /dev/null +++ b/src/uct/sm/knem/Makefile.am @@ -0,0 +1,27 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_KNEM + +module_LTLIBRARIES = libuct_knem.la +libuct_knem_la_CFLAGS = $(BASE_CFLAGS) +libuct_knem_la_CPPFLAGS = $(BASE_CPPFLAGS) $(KNEM_CPPFLAGS) +libuct_knem_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la +libuct_knem_la_LDFLAGS = -version-info $(SOVERSION) $(UCT_MODULE_LDFLAGS) + +noinst_HEADERS = \ + knem_ep.h \ + knem_iface.h \ + knem_md.h + +libuct_knem_la_SOURCES = \ + knem_ep.c \ + knem_iface.c \ + knem_md.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/uct/sm/knem/Makefile.in b/src/uct/sm/knem/Makefile.in new file mode 100644 index 0000000..1b0e7c6 --- /dev/null +++ b/src/uct/sm/knem/Makefile.in @@ -0,0 +1,885 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/sm/knem +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_KNEM_TRUE@libuct_knem_la_DEPENDENCIES = \ +@HAVE_KNEM_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_KNEM_TRUE@ $(top_builddir)/src/uct/libuct.la +am__libuct_knem_la_SOURCES_DIST = knem_ep.c knem_iface.c knem_md.c +@HAVE_KNEM_TRUE@am_libuct_knem_la_OBJECTS = libuct_knem_la-knem_ep.lo \ +@HAVE_KNEM_TRUE@ libuct_knem_la-knem_iface.lo \ +@HAVE_KNEM_TRUE@ libuct_knem_la-knem_md.lo +libuct_knem_la_OBJECTS = $(am_libuct_knem_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_knem_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libuct_knem_la_CFLAGS) $(CFLAGS) $(libuct_knem_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_KNEM_TRUE@am_libuct_knem_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libuct_knem_la-knem_ep.Plo \ + ./$(DEPDIR)/libuct_knem_la-knem_iface.Plo \ + ./$(DEPDIR)/libuct_knem_la-knem_md.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_knem_la_SOURCES) +DIST_SOURCES = $(am__libuct_knem_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = knem_ep.h knem_iface.h knem_md.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_KNEM_TRUE@module_LTLIBRARIES = libuct_knem.la +@HAVE_KNEM_TRUE@libuct_knem_la_CFLAGS = $(BASE_CFLAGS) +@HAVE_KNEM_TRUE@libuct_knem_la_CPPFLAGS = $(BASE_CPPFLAGS) $(KNEM_CPPFLAGS) +@HAVE_KNEM_TRUE@libuct_knem_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_KNEM_TRUE@ $(top_builddir)/src/uct/libuct.la + +@HAVE_KNEM_TRUE@libuct_knem_la_LDFLAGS = -version-info $(SOVERSION) $(UCT_MODULE_LDFLAGS) +@HAVE_KNEM_TRUE@noinst_HEADERS = \ +@HAVE_KNEM_TRUE@ knem_ep.h \ +@HAVE_KNEM_TRUE@ knem_iface.h \ +@HAVE_KNEM_TRUE@ knem_md.h + +@HAVE_KNEM_TRUE@libuct_knem_la_SOURCES = \ +@HAVE_KNEM_TRUE@ knem_ep.c \ +@HAVE_KNEM_TRUE@ knem_iface.c \ +@HAVE_KNEM_TRUE@ knem_md.c + + +# Automake silent rules +@HAVE_KNEM_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_KNEM_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_KNEM_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_KNEM_TRUE@AM_V_LN_1 = true +@HAVE_KNEM_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/sm/knem/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/sm/knem/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libuct_knem.la: $(libuct_knem_la_OBJECTS) $(libuct_knem_la_DEPENDENCIES) $(EXTRA_libuct_knem_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_knem_la_LINK) $(am_libuct_knem_la_rpath) $(libuct_knem_la_OBJECTS) $(libuct_knem_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_knem_la-knem_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_knem_la-knem_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_knem_la-knem_md.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libuct_knem_la-knem_ep.lo: knem_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_knem_la_CPPFLAGS) $(CPPFLAGS) $(libuct_knem_la_CFLAGS) $(CFLAGS) -MT libuct_knem_la-knem_ep.lo -MD -MP -MF $(DEPDIR)/libuct_knem_la-knem_ep.Tpo -c -o libuct_knem_la-knem_ep.lo `test -f 'knem_ep.c' || echo '$(srcdir)/'`knem_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_knem_la-knem_ep.Tpo $(DEPDIR)/libuct_knem_la-knem_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='knem_ep.c' object='libuct_knem_la-knem_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_knem_la_CPPFLAGS) $(CPPFLAGS) $(libuct_knem_la_CFLAGS) $(CFLAGS) -c -o libuct_knem_la-knem_ep.lo `test -f 'knem_ep.c' || echo '$(srcdir)/'`knem_ep.c + +libuct_knem_la-knem_iface.lo: knem_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_knem_la_CPPFLAGS) $(CPPFLAGS) $(libuct_knem_la_CFLAGS) $(CFLAGS) -MT libuct_knem_la-knem_iface.lo -MD -MP -MF $(DEPDIR)/libuct_knem_la-knem_iface.Tpo -c -o libuct_knem_la-knem_iface.lo `test -f 'knem_iface.c' || echo '$(srcdir)/'`knem_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_knem_la-knem_iface.Tpo $(DEPDIR)/libuct_knem_la-knem_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='knem_iface.c' object='libuct_knem_la-knem_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_knem_la_CPPFLAGS) $(CPPFLAGS) $(libuct_knem_la_CFLAGS) $(CFLAGS) -c -o libuct_knem_la-knem_iface.lo `test -f 'knem_iface.c' || echo '$(srcdir)/'`knem_iface.c + +libuct_knem_la-knem_md.lo: knem_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_knem_la_CPPFLAGS) $(CPPFLAGS) $(libuct_knem_la_CFLAGS) $(CFLAGS) -MT libuct_knem_la-knem_md.lo -MD -MP -MF $(DEPDIR)/libuct_knem_la-knem_md.Tpo -c -o libuct_knem_la-knem_md.lo `test -f 'knem_md.c' || echo '$(srcdir)/'`knem_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_knem_la-knem_md.Tpo $(DEPDIR)/libuct_knem_la-knem_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='knem_md.c' object='libuct_knem_la-knem_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_knem_la_CPPFLAGS) $(CPPFLAGS) $(libuct_knem_la_CFLAGS) $(CFLAGS) -c -o libuct_knem_la-knem_md.lo `test -f 'knem_md.c' || echo '$(srcdir)/'`knem_md.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_KNEM_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libuct_knem_la-knem_ep.Plo + -rm -f ./$(DEPDIR)/libuct_knem_la-knem_iface.Plo + -rm -f ./$(DEPDIR)/libuct_knem_la-knem_md.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libuct_knem_la-knem_ep.Plo + -rm -f ./$(DEPDIR)/libuct_knem_la-knem_iface.Plo + -rm -f ./$(DEPDIR)/libuct_knem_la-knem_md.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_KNEM_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_KNEM_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_KNEM_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_KNEM_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_KNEM_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_KNEM_TRUE@ done +@HAVE_KNEM_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_KNEM_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_KNEM_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/sm/knem/configure.m4 b/src/uct/sm/knem/configure.m4 new file mode 100644 index 0000000..e1e9f64 --- /dev/null +++ b/src/uct/sm/knem/configure.m4 @@ -0,0 +1,37 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +knem_happy="no" + +AC_ARG_WITH([knem], + [AS_HELP_STRING([--with-knem=(DIR)], [Enable the use of KNEM (default is guess).])], + [], [with_knem=guess]) + +AS_IF([test "x$with_knem" != xno], + [AS_IF([test "x$with_knem" = "xguess" -o "x$with_knem" = xyes -o "x$with_knem" = "x"], + [AC_MSG_NOTICE([KNEM path was not found, guessing ...]) + ucx_check_knem_include_dir=$(pkg-config --cflags knem)], + [ucx_check_knem_include_dir=-I$with_knem/include]) + + save_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$ucx_check_knem_include_dir $CPPFLAGS" + + AC_CHECK_DECL([KNEM_CMD_GET_INFO], + [AC_SUBST([KNEM_CPPFLAGS], [$ucx_check_knem_include_dir]) + uct_modules="${uct_modules}:knem" + knem_happy="yes"], + [AS_IF([test "x$with_knem" != xguess], + [AC_MSG_ERROR([KNEM requested but required file (knem_io.h) could not be found])], + [AC_MSG_WARN([KNEM requested but required file (knem_io.h) could not be found])])], + [[#include ]]) + + CPPFLAGS="$save_CPPFLAGS" + + ], + [AC_MSG_WARN([KNEM was explicitly disabled])] +) + +AM_CONDITIONAL([HAVE_KNEM], [test "x$knem_happy" != xno]) +AC_CONFIG_FILES([src/uct/sm/knem/Makefile]) diff --git a/src/uct/sm/knem/knem_ep.c b/src/uct/sm/knem/knem_ep.c new file mode 100644 index 0000000..9a39a8c --- /dev/null +++ b/src/uct/sm/knem/knem_ep.c @@ -0,0 +1,119 @@ +/** + * Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include + +#include "knem_ep.h" +#include "knem_md.h" +#include +#include + +static UCS_CLASS_INIT_FUNC(uct_knem_ep_t, const uct_ep_params_t *params) +{ + uct_knem_iface_t *iface = ucs_derived_of(params->iface, uct_knem_iface_t); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super); + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_knem_ep_t) +{ + /* No op */ +} + +UCS_CLASS_DEFINE(uct_knem_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_knem_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_knem_ep_t, uct_ep_t); + + +#define uct_knem_trace_data(_remote_addr, _rkey, _fmt, ...) \ + ucs_trace_data(_fmt " to %"PRIx64"(%+ld)", ## __VA_ARGS__, (_remote_addr), \ + (_rkey)) + +#define UCT_KNEM_ZERO_LENGTH_POST(len) \ + if (0 == len) { \ + ucs_trace_data("Zero length request: skip it"); \ + return UCS_OK; \ + } + +static inline ucs_status_t uct_knem_rma(uct_ep_h tl_ep, const uct_iov_t *iov, + size_t iovcnt, uint64_t remote_addr, + uct_knem_key_t *key, int write) +{ + uct_knem_iface_t *knem_iface = ucs_derived_of(tl_ep->iface, uct_knem_iface_t); + int knem_fd = knem_iface->knem_md->knem_fd; + size_t knem_iov_it = 0; + struct knem_cmd_inline_copy icopy; + struct knem_cmd_param_iovec knem_iov[UCT_SM_MAX_IOV]; + int rc; + size_t iov_it; + + for (iov_it = 0; iov_it < ucs_min(UCT_SM_MAX_IOV, iovcnt); ++iov_it) { + knem_iov[knem_iov_it].base = (uintptr_t)iov[iov_it].buffer; + knem_iov[knem_iov_it].len = uct_iov_get_length(iov + iov_it); + /* Skip zero length buffers */ + if (knem_iov[knem_iov_it].len != 0) { + ++knem_iov_it; + } + } + + UCT_KNEM_ZERO_LENGTH_POST(knem_iov_it); + + icopy.local_iovec_array = (uintptr_t)knem_iov; + icopy.local_iovec_nr = knem_iov_it; + icopy.remote_cookie = key->cookie; + ucs_assert(remote_addr >= key->address); + icopy.current_status = 0; + icopy.remote_offset = remote_addr - key->address; + /* if 0 then, READ from the remote region into my local segments + * if 1 then, WRITE to the remote region from my local segment */ + icopy.write = write; + /* TBD: add check and support for KNEM_FLAG_DMA */ + icopy.flags = 0; + + ucs_assert(knem_fd > -1); + rc = ioctl(knem_fd, KNEM_CMD_INLINE_COPY, &icopy); + if (ucs_unlikely((rc < 0) || (icopy.current_status != KNEM_STATUS_SUCCESS))) { + ucs_error("KNEM inline copy failed, ioctl() return value - %d, " + "copy status - %d: %m", rc, icopy.current_status); + return UCS_ERR_IO_ERROR; + } + + uct_knem_trace_data(remote_addr, (uintptr_t)key, "%s [length %zu]", + write?"PUT_ZCOPY":"GET_ZCOPY", + uct_iov_total_length(iov, iovcnt)); + return UCS_OK; +} + +ucs_status_t uct_knem_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_knem_key_t *key = (uct_knem_key_t *)rkey; + ucs_status_t status; + + UCT_CHECK_IOV_SIZE(iovcnt, uct_sm_get_max_iov(), "uct_knem_ep_put_zcopy"); + + status = uct_knem_rma(tl_ep, iov, iovcnt, remote_addr, key, 1); + UCT_TL_EP_STAT_OP_IF_SUCCESS(status, ucs_derived_of(tl_ep, uct_base_ep_t), + PUT, ZCOPY, uct_iov_total_length(iov, iovcnt)); + return status; +} + +ucs_status_t uct_knem_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_knem_key_t *key = (uct_knem_key_t *)rkey; + ucs_status_t status; + + UCT_CHECK_IOV_SIZE(iovcnt, uct_sm_get_max_iov(), "uct_knem_ep_get_zcopy"); + + status = uct_knem_rma(tl_ep, iov, iovcnt, remote_addr, key, 0); + UCT_TL_EP_STAT_OP_IF_SUCCESS(status, ucs_derived_of(tl_ep, uct_base_ep_t), + GET, ZCOPY, uct_iov_total_length(iov, iovcnt)); + return status; +} diff --git a/src/uct/sm/knem/knem_ep.h b/src/uct/sm/knem/knem_ep.h new file mode 100644 index 0000000..d105cfb --- /dev/null +++ b/src/uct/sm/knem/knem_ep.h @@ -0,0 +1,25 @@ +/** + * Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_KNEM_EP_H +#define UCT_KNEM_EP_H + +#include "knem_iface.h" + + +typedef struct uct_knem_ep { + uct_base_ep_t super; +} uct_knem_ep_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_knem_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_knem_ep_t, uct_ep_t); +ucs_status_t uct_knem_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +ucs_status_t uct_knem_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +#endif diff --git a/src/uct/sm/knem/knem_iface.c b/src/uct/sm/knem/knem_iface.c new file mode 100644 index 0000000..2115e99 --- /dev/null +++ b/src/uct/sm/knem/knem_iface.c @@ -0,0 +1,113 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "knem_md.h" +#include "knem_iface.h" +#include "knem_ep.h" + +#include +#include + + +static ucs_config_field_t uct_knem_iface_config_table[] = { + {"SM_", "BW=13862MBs", NULL, + ucs_offsetof(uct_knem_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_sm_iface_config_table)}, + + {NULL} +}; + +static ucs_status_t uct_knem_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_knem_iface_t *iface = ucs_derived_of(tl_iface, uct_knem_iface_t); + + uct_base_iface_query(&iface->super.super, iface_attr); + + /* default values for all shared memory transports */ + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = SIZE_MAX; + iface_attr->cap.put.opt_zcopy_align = 1; + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = uct_sm_get_max_iov(); + + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = SIZE_MAX; + iface_attr->cap.get.opt_zcopy_align = 1; + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = uct_sm_get_max_iov(); + + iface_attr->cap.am.max_iov = 1; + iface_attr->cap.am.opt_zcopy_align = 1; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + + iface_attr->iface_addr_len = 0; + iface_attr->device_addr_len = uct_sm_iface_get_device_addr_len(); + iface_attr->ep_addr_len = 0; + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CONNECT_TO_IFACE; + iface_attr->latency.overhead = 80e-9; /* 80 ns */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.shared = iface->super.config.bandwidth; + iface_attr->bandwidth.dedicated = 0; + iface_attr->overhead = 0.25e-6; /* 0.25 us */ + + return UCS_OK; +} + +static UCS_CLASS_DECLARE_DELETE_FUNC(uct_knem_iface_t, uct_iface_t); + +static uct_iface_ops_t uct_knem_iface_ops = { + .ep_put_zcopy = uct_knem_ep_put_zcopy, + .ep_get_zcopy = uct_knem_ep_get_zcopy, + .ep_pending_add = (uct_ep_pending_add_func_t)ucs_empty_function_return_busy, + .ep_pending_purge = (uct_ep_pending_purge_func_t)ucs_empty_function, + .ep_flush = uct_base_ep_flush, + .ep_fence = uct_sm_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_knem_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_knem_ep_t), + .iface_fence = uct_sm_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = ucs_empty_function_return_zero, + .iface_flush = uct_base_iface_flush, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_knem_iface_t), + .iface_query = uct_knem_iface_query, + .iface_get_device_address = uct_sm_iface_get_device_address, + .iface_get_address = (uct_iface_get_address_func_t)ucs_empty_function_return_success, + .iface_is_reachable = uct_sm_iface_is_reachable +}; + +static UCS_CLASS_INIT_FUNC(uct_knem_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + UCS_CLASS_CALL_SUPER_INIT(uct_sm_iface_t, &uct_knem_iface_ops, md, + worker, params, tl_config); + self->knem_md = (uct_knem_md_t *)md; + uct_sm_get_max_iov(); /* to initialize ucs_iov_get_max static variable */ + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_knem_iface_t) +{ + /* No OP */ +} + +UCS_CLASS_DEFINE(uct_knem_iface_t, uct_base_iface_t); + +static UCS_CLASS_DEFINE_NEW_FUNC(uct_knem_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t *); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_knem_iface_t, uct_iface_t); + +UCT_TL_DEFINE(&uct_knem_component, knem, uct_sm_base_query_tl_devices, + uct_knem_iface_t, "KNEM_", uct_knem_iface_config_table, + uct_knem_iface_config_t); diff --git a/src/uct/sm/knem/knem_iface.h b/src/uct/sm/knem/knem_iface.h new file mode 100644 index 0000000..faa6a93 --- /dev/null +++ b/src/uct/sm/knem/knem_iface.h @@ -0,0 +1,27 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_KNEM_IFACE_H +#define UCT_KNEM_IFACE_H + +#include "knem_md.h" + +#include +#include + + +typedef struct uct_knem_iface_config { + uct_sm_iface_config_t super; +} uct_knem_iface_config_t; + + +typedef struct uct_knem_iface { + uct_sm_iface_t super; + uct_knem_md_t *knem_md; +} uct_knem_iface_t; + + +#endif diff --git a/src/uct/sm/knem/knem_md.c b/src/uct/sm/knem/knem_md.c new file mode 100644 index 0000000..f9f5290 --- /dev/null +++ b/src/uct/sm/knem/knem_md.c @@ -0,0 +1,401 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "knem_md.h" +#include "knem_io.h" + +#include +#include +#include +#include + + +static ucs_config_field_t uct_knem_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_knem_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {"RCACHE", "try", "Enable using memory registration cache", + ucs_offsetof(uct_knem_md_config_t, rcache_enable), UCS_CONFIG_TYPE_TERNARY}, + + {"", "", NULL, + ucs_offsetof(uct_knem_md_config_t, rcache), + UCS_CONFIG_TYPE_TABLE(uct_md_config_rcache_table)}, + + {NULL} +}; + +ucs_status_t uct_knem_md_query(uct_md_h uct_md, uct_md_attr_t *md_attr) +{ + uct_knem_md_t *md = ucs_derived_of(uct_md, uct_knem_md_t); + + md_attr->rkey_packed_size = sizeof(uct_knem_key_t); + md_attr->cap.flags = UCT_MD_FLAG_REG | + UCT_MD_FLAG_NEED_RKEY; + md_attr->cap.reg_mem_types = UCS_MEMORY_TYPES_CPU_ACCESSIBLE; + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; + md_attr->cap.detect_mem_types = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = ULONG_MAX; + md_attr->reg_cost = md->reg_cost; + + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t +uct_knem_query_md_resources(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + int fd; + int rc; + struct knem_cmd_info info; + + memset(&info, 0, sizeof(struct knem_cmd_info)); + + fd = open("/dev/knem", O_RDWR); + if (fd < 0) { + ucs_debug("could not open the KNEM device file at /dev/knem: %m. Disabling knem resource"); + goto out_empty; + } + + rc = ioctl(fd, KNEM_CMD_GET_INFO, &info); + if (rc < 0) { + ucs_debug("KNEM get info failed. not using knem, err = %d %m", rc); + goto out_empty_close_fd; + } + + if (KNEM_ABI_VERSION != info.abi) { + ucs_error("KNEM ABI mismatch: KNEM_ABI_VERSION: %d, Driver binary interface version: %d", + KNEM_ABI_VERSION, info.abi); + goto out_empty_close_fd; + } + + /* We have to close it since it is not clear + * if it will be selected in future */ + close(fd); + return uct_md_query_single_md_resource(component, resources_p, num_resources_p); + +out_empty_close_fd: + close(fd); +out_empty: + return uct_md_query_empty_md_resource(resources_p, num_resources_p); +} + +static void uct_knem_md_close(uct_md_h md) +{ + uct_knem_md_t *knem_md = ucs_derived_of(md, uct_knem_md_t); + if (knem_md->rcache != NULL) { + ucs_rcache_destroy(knem_md->rcache); + } + close(knem_md->knem_fd); + ucs_free(knem_md); +} + +static ucs_status_t uct_knem_mem_reg_internal(uct_md_h md, void *address, size_t length, + unsigned flags, unsigned silent, + uct_knem_key_t *key) +{ + int rc; + struct knem_cmd_create_region create; + struct knem_cmd_param_iovec knem_iov[1]; + uct_knem_md_t *knem_md = (uct_knem_md_t *)md; + int knem_fd = knem_md->knem_fd; + + ucs_assert_always(knem_fd > -1); + + knem_iov[0].base = (uintptr_t) address; + knem_iov[0].len = length; + + memset(&create, 0, sizeof(struct knem_cmd_create_region)); + create.iovec_array = (uintptr_t) &knem_iov[0]; + create.iovec_nr = 1; + create.flags = 0; + create.protection = PROT_READ | PROT_WRITE; + + rc = ioctl(knem_fd, KNEM_CMD_CREATE_REGION, &create); + if (rc < 0) { + if (!silent) { + /* do not report error in silent mode: it called from rcache + * internals, rcache will try to register memory again with + * more accurate data */ + ucs_error("KNEM create region failed: %m"); + } + return UCS_ERR_IO_ERROR; + } + + ucs_assert_always(create.cookie != 0); + key->cookie = create.cookie; + key->address = (uintptr_t)address; + + return UCS_OK; +} + +static ucs_status_t uct_knem_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + uct_knem_key_t *key; + ucs_status_t status; + + key = ucs_malloc(sizeof(uct_knem_key_t), "uct_knem_key_t"); + if (NULL == key) { + ucs_error("Failed to allocate memory for uct_knem_key_t"); + return UCS_ERR_NO_MEMORY; + } + + status = uct_knem_mem_reg_internal(md, address, length, flags, 0, key); + if (status == UCS_OK) { + *memh_p = key; + } else { + ucs_free(key); + } + return status; +} + +static ucs_status_t uct_knem_mem_dereg_internal(uct_md_h md, uct_knem_key_t *key) +{ + int rc; + uct_knem_md_t *knem_md = (uct_knem_md_t *)md; + int knem_fd = knem_md->knem_fd; + + ucs_assert_always(knem_fd > -1); + ucs_assert_always(key->cookie != 0); + ucs_assert_always(key->address != 0); + + rc = ioctl(knem_fd, KNEM_CMD_DESTROY_REGION, &key->cookie); + if (rc < 0) { + ucs_error("KNEM destroy region failed, err = %m"); + } + + return UCS_OK; +} + +static ucs_status_t uct_knem_mem_dereg(uct_md_h md, uct_mem_h memh) +{ + uct_knem_key_t *key = (uct_knem_key_t *)memh; + ucs_status_t status; + + status = uct_knem_mem_dereg_internal(md, key); + if (status == UCS_OK) { + ucs_free(key); + } + + return status; +} + +static ucs_status_t uct_knem_rkey_pack(uct_md_h md, uct_mem_h memh, + void *rkey_buffer) +{ + uct_knem_key_t *packed = (uct_knem_key_t*)rkey_buffer; + uct_knem_key_t *key = (uct_knem_key_t *)memh; + packed->cookie = (uint64_t)key->cookie; + packed->address = (uintptr_t)key->address; + ucs_trace("packed rkey: cookie 0x%"PRIx64" address %"PRIxPTR, + key->cookie, key->address); + return UCS_OK; +} + +static ucs_status_t uct_knem_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + uct_knem_key_t *packed = (uct_knem_key_t *)rkey_buffer; + uct_knem_key_t *key; + + key = ucs_malloc(sizeof(uct_knem_key_t), "uct_knem_key_t"); + if (NULL == key) { + ucs_error("Failed to allocate memory for uct_knem_key_t"); + return UCS_ERR_NO_MEMORY; + } + key->cookie = packed->cookie; + key->address = packed->address; + *handle_p = NULL; + *rkey_p = (uintptr_t)key; + ucs_trace("unpacked rkey: key %p cookie 0x%"PRIx64" address %"PRIxPTR, + key, key->cookie, key->address); + return UCS_OK; +} + +static ucs_status_t uct_knem_rkey_release(uct_component_t *component, + uct_rkey_t rkey, void *handle) +{ + ucs_assert(NULL == handle); + ucs_free((void *)rkey); + return UCS_OK; +} + +static uct_md_ops_t md_ops = { + .close = uct_knem_md_close, + .query = uct_knem_md_query, + .mkey_pack = uct_knem_rkey_pack, + .mem_reg = uct_knem_mem_reg, + .mem_dereg = uct_knem_mem_dereg, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + +static inline uct_knem_rcache_region_t* uct_knem_rcache_region_from_memh(uct_mem_h memh) +{ + return ucs_container_of(memh, uct_knem_rcache_region_t, key); +} + +static ucs_status_t uct_knem_mem_rcache_reg(uct_md_h uct_md, void *address, + size_t length, unsigned flags, + uct_mem_h *memh_p) +{ + uct_knem_md_t *md = ucs_derived_of(uct_md, uct_knem_md_t); + ucs_rcache_region_t *rregion; + ucs_status_t status; + + status = ucs_rcache_get(md->rcache, address, length, PROT_READ|PROT_WRITE, + &flags, &rregion); + if (status != UCS_OK) { + return status; + } + + ucs_assert(rregion->refcount > 0); + *memh_p = &ucs_derived_of(rregion, uct_knem_rcache_region_t)->key; + return UCS_OK; +} + +static ucs_status_t uct_knem_mem_rcache_dereg(uct_md_h uct_md, uct_mem_h memh) +{ + uct_knem_md_t *md = ucs_derived_of(uct_md, uct_knem_md_t); + uct_knem_rcache_region_t *region = uct_knem_rcache_region_from_memh(memh); + + ucs_rcache_region_put(md->rcache, ®ion->super); + return UCS_OK; +} + +static uct_md_ops_t uct_knem_md_rcache_ops = { + .close = uct_knem_md_close, + .query = uct_knem_md_query, + .mkey_pack = uct_knem_rkey_pack, + .mem_reg = uct_knem_mem_rcache_reg, + .mem_dereg = uct_knem_mem_rcache_dereg, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + + +static ucs_status_t uct_knem_rcache_mem_reg_cb(void *context, ucs_rcache_t *rcache, + void *arg, ucs_rcache_region_t *rregion, + uint16_t rcache_mem_reg_flags) +{ + uct_knem_rcache_region_t *region = ucs_derived_of(rregion, uct_knem_rcache_region_t); + uct_knem_md_t *md = context; + int *flags = arg; + + return uct_knem_mem_reg_internal(&md->super, (void*)region->super.super.start, + region->super.super.end - region->super.super.start, + *flags, + rcache_mem_reg_flags & UCS_RCACHE_MEM_REG_HIDE_ERRORS, + ®ion->key); +} + +static void uct_knem_rcache_mem_dereg_cb(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *rregion) +{ + uct_knem_rcache_region_t *region = ucs_derived_of(rregion, uct_knem_rcache_region_t); + uct_knem_md_t *md = context; + + uct_knem_mem_dereg_internal(&md->super, ®ion->key); +} + +static void uct_knem_rcache_dump_region_cb(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *rregion, char *buf, + size_t max) +{ + uct_knem_rcache_region_t *region = ucs_derived_of(rregion, uct_knem_rcache_region_t); + uct_knem_key_t *key = ®ion->key; + + snprintf(buf, max, "cookie %"PRIu64" addr %p", key->cookie, (void*)key->address); +} + +static ucs_rcache_ops_t uct_knem_rcache_ops = { + .mem_reg = uct_knem_rcache_mem_reg_cb, + .mem_dereg = uct_knem_rcache_mem_dereg_cb, + .dump_region = uct_knem_rcache_dump_region_cb +}; + +static ucs_status_t +uct_knem_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *uct_md_config, uct_md_h *md_p) +{ + const uct_knem_md_config_t *md_config = ucs_derived_of(uct_md_config, uct_knem_md_config_t); + uct_knem_md_t *knem_md; + ucs_rcache_params_t rcache_params; + ucs_status_t status; + + knem_md = ucs_malloc(sizeof(uct_knem_md_t), "uct_knem_md_t"); + if (NULL == knem_md) { + ucs_error("Failed to allocate memory for uct_knem_md_t"); + return UCS_ERR_NO_MEMORY; + } + + knem_md->super.ops = &md_ops; + knem_md->super.component = &uct_knem_component; + knem_md->reg_cost.overhead = 1200.0e-9; + knem_md->reg_cost.growth = 0.007e-9; + knem_md->rcache = NULL; + + knem_md->knem_fd = open("/dev/knem", O_RDWR); + if (knem_md->knem_fd < 0) { + ucs_error("Could not open the KNEM device file at /dev/knem: %m."); + free(knem_md); + return UCS_ERR_IO_ERROR; + } + + if (md_config->rcache_enable != UCS_NO) { + rcache_params.region_struct_size = sizeof(uct_knem_rcache_region_t); + rcache_params.alignment = md_config->rcache.alignment; + rcache_params.max_alignment = ucs_get_page_size(); + rcache_params.ucm_events = UCM_EVENT_VM_UNMAPPED; + rcache_params.ucm_event_priority = md_config->rcache.event_prio; + rcache_params.context = knem_md; + rcache_params.ops = &uct_knem_rcache_ops; + status = ucs_rcache_create(&rcache_params, "knem rcache device", + ucs_stats_get_root(), &knem_md->rcache); + if (status == UCS_OK) { + knem_md->super.ops = &uct_knem_md_rcache_ops; + knem_md->reg_cost.overhead = md_config->rcache.overhead; + knem_md->reg_cost.growth = 0; /* It's close enough to 0 */ + } else { + ucs_assert(knem_md->rcache == NULL); + if (md_config->rcache_enable == UCS_YES) { + ucs_error("Failed to create registration cache: %s", + ucs_status_string(status)); + uct_knem_md_close(&knem_md->super); + return status; + } else { + ucs_debug("Could not create registration cache: %s", + ucs_status_string(status)); + } + } + } + + *md_p = (uct_md_h)knem_md; + return UCS_OK; +} + +uct_component_t uct_knem_component = { + .query_md_resources = uct_knem_query_md_resources, + .md_open = uct_knem_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_knem_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = uct_knem_rkey_release, + .name = "knem", + .md_config = { + .name = "KNEM memory domain", + .prefix = "KNEM_", + .table = uct_knem_md_config_table, + .size = sizeof(uct_knem_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_knem_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_knem_component); diff --git a/src/uct/sm/knem/knem_md.h b/src/uct/sm/knem/knem_md.h new file mode 100644 index 0000000..051078a --- /dev/null +++ b/src/uct/sm/knem/knem_md.h @@ -0,0 +1,55 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef UCT_KNEM_MD_H_ +#define UCT_KNEM_MD_H_ + +#include +#include +#include +#include +#include + +extern uct_component_t uct_knem_component; +ucs_status_t uct_knem_md_query(uct_md_h md, uct_md_attr_t *md_attr); + +/** + * @brief KNEM MD descriptor + */ +typedef struct uct_knem_md { + struct uct_md super; /**< Domain info */ + int knem_fd; /**< File descriptor for /dev/knem */ + ucs_rcache_t *rcache; /**< Registration cache (can be NULL) */ + uct_linear_growth_t reg_cost; /**< Memory registration cost */ +} uct_knem_md_t; + +/** + * @brief KNEM packed and remote key + */ +typedef struct uct_knem_key { + uint64_t cookie; /**< knem cookie */ + uintptr_t address; /**< base addr for the registration */ +} uct_knem_key_t; + +/** + * KNEM memory domain configuration. + */ +typedef struct uct_knem_md_config { + uct_md_config_t super; + ucs_ternary_value_t rcache_enable; + uct_md_rcache_config_t rcache; +} uct_knem_md_config_t; + +/** + * KNEM memory region in the registration cache. + */ +typedef struct uct_knem_rcache_region { + ucs_rcache_region_t super; + uct_knem_key_t key; /**< exposed to the user as the memh */ +} uct_knem_rcache_region_t; + +#endif diff --git a/src/uct/sm/mm/Makefile.am b/src/uct/sm/mm/Makefile.am new file mode 100644 index 0000000..515d552 --- /dev/null +++ b/src/uct/sm/mm/Makefile.am @@ -0,0 +1,6 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +SUBDIRS = xpmem diff --git a/src/uct/sm/mm/Makefile.in b/src/uct/sm/mm/Makefile.in new file mode 100644 index 0000000..0bb6566 --- /dev/null +++ b/src/uct/sm/mm/Makefile.in @@ -0,0 +1,764 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/sm/mm +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +SUBDIRS = xpmem +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/sm/mm/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/sm/mm/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \ + check-am clean clean-generic clean-libtool cscopelist-am ctags \ + ctags-am distclean distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/sm/mm/base/mm_ep.c b/src/uct/sm/mm/base/mm_ep.c new file mode 100644 index 0000000..4bc3dc3 --- /dev/null +++ b/src/uct/sm/mm/base/mm_ep.c @@ -0,0 +1,458 @@ +/** +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "mm_ep.h" + +#include + + +/* send modes */ +typedef enum { + UCT_MM_SEND_AM_BCOPY, + UCT_MM_SEND_AM_SHORT, +} uct_mm_send_op_t; + + +/* Check if the resources on the remote peer are available for sending to it. + * i.e. check if the remote receive FIFO has room in it. + * return 1 if can send. + * return 0 if can't send. + */ +#define UCT_MM_EP_IS_ABLE_TO_SEND(_head, _tail, _fifo_size) \ + ucs_likely(((_head) - (_tail)) < (_fifo_size)) + + +static UCS_F_NOINLINE ucs_status_t +uct_mm_ep_attach_remote_seg(uct_mm_ep_t *ep, uct_mm_seg_id_t seg_id, + size_t length, void **address_p) +{ + uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_mm_iface_t); + uct_mm_remote_seg_t *remote_seg; + ucs_status_t status; + khiter_t khiter; + int khret; + + khiter = kh_put(uct_mm_remote_seg, &ep->remote_segs, seg_id, &khret); + if (khret == -1) { + ucs_error("failed to add remote segment to mm ep hash"); + return UCS_ERR_NO_MEMORY; + } + + /* we expect the key would either be never used (=1) or deleted (=2) */ + ucs_assert_always((khret == 1) || (khret == 2)); + + remote_seg = &kh_val(&ep->remote_segs, khiter); + + status = uct_mm_iface_mapper_call(iface, mem_attach, seg_id, length, + ep->remote_iface_addr, remote_seg); + if (status != UCS_OK) { + kh_del(uct_mm_remote_seg, &ep->remote_segs, khiter); + return status; + } + + *address_p = remote_seg->address; + ucs_debug("mm_ep %p: attached remote segment id 0x%"PRIx64" at %p cookie %p", + ep, seg_id, remote_seg->address, remote_seg->cookie); + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_mm_ep_get_remote_seg(uct_mm_ep_t *ep, uct_mm_seg_id_t seg_id, size_t length, + void **address_p) +{ + khiter_t khiter; + + /* fast path - segment is already present */ + khiter = kh_get(uct_mm_remote_seg, &ep->remote_segs, seg_id); + if (ucs_likely(khiter != kh_end(&ep->remote_segs))) { + *address_p = kh_val(&ep->remote_segs, khiter).address; + return UCS_OK; + } + + /* slow path - attach new segment */ + return uct_mm_ep_attach_remote_seg(ep, seg_id, length, address_p); +} + + +/* send a signal to remote interface using Unix-domain socket */ +static void uct_mm_ep_signal_remote(uct_mm_ep_t *ep) +{ + uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t); + char dummy = 0; + int ret; + + for (;;) { + ret = sendto(iface->signal_fd, &dummy, sizeof(dummy), 0, + (const struct sockaddr*)&ep->signal.sockaddr, + ep->signal.addrlen); + if (ucs_unlikely(ret < 0)) { + if (errno == EINTR) { + /* Interrupted system call - retry */ + continue; + } if ((errno == EAGAIN) || (errno == ECONNREFUSED)) { + /* If we failed to signal because buffer is full - ignore the error + * since it means the remote side would get a signal anyway. + * If the remote side is not there - ignore the error as well. + */ + ucs_trace("failed to send wakeup signal: %m"); + return; + } else { + ucs_warn("failed to send wakeup signal: %m"); + return; + } + } else { + ucs_assert(ret == sizeof(dummy)); + ucs_trace("sent wakeup from socket %d to %p", iface->signal_fd, + (const struct sockaddr*)&ep->signal.sockaddr); + return; + } + } +} + +static UCS_CLASS_INIT_FUNC(uct_mm_ep_t, const uct_ep_params_t *params) +{ + uct_mm_iface_t *iface = ucs_derived_of(params->iface, uct_mm_iface_t); + uct_mm_md_t *md = ucs_derived_of(iface->super.super.md, uct_mm_md_t); + const uct_mm_iface_addr_t *addr = (const void *)params->iface_addr; + ucs_status_t status; + void *fifo_ptr; + + UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params); + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super.super); + + kh_init_inplace(uct_mm_remote_seg, &self->remote_segs); + ucs_arbiter_group_init(&self->arb_group); + + /* save remote md address */ + if (md->iface_addr_len > 0) { + self->remote_iface_addr = ucs_malloc(md->iface_addr_len, "mm_md_addr"); + if (self->remote_iface_addr == NULL) { + status = UCS_ERR_NO_MEMORY; + goto err; + } + + memcpy(self->remote_iface_addr, addr + 1, md->iface_addr_len); + } else { + self->remote_iface_addr = NULL; + } + + /* Attach the remote FIFO, use the same method as bcopy descriptors */ + status = uct_mm_ep_get_remote_seg(self, addr->fifo_seg_id, + UCT_MM_GET_FIFO_SIZE(iface), &fifo_ptr); + if (status != UCS_OK) { + ucs_error("mm ep failed to connect to remote FIFO id 0x%lx: %s", + addr->fifo_seg_id, ucs_status_string(status)); + goto err_free_md_addr; + } + + /* Initialize remote FIFO control structure */ + uct_mm_iface_set_fifo_ptrs(fifo_ptr, &self->fifo_ctl, &self->fifo_elems); + self->cached_tail = self->fifo_ctl->tail; + self->signal.addrlen = self->fifo_ctl->signal_addrlen; + self->signal.sockaddr = self->fifo_ctl->signal_sockaddr; + + ucs_debug("created mm ep %p, connected to remote FIFO id 0x%lx", + self, addr->fifo_seg_id); + + return UCS_OK; + +err_free_md_addr: + ucs_free(self->remote_iface_addr); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_mm_ep_t) +{ + uct_mm_iface_t *iface = ucs_derived_of(self->super.super.iface, uct_mm_iface_t); + uct_mm_remote_seg_t remote_seg; + + uct_mm_ep_pending_purge(&self->super.super, NULL, NULL); + + kh_foreach_value(&self->remote_segs, remote_seg, { + uct_mm_iface_mapper_call(iface, mem_detach, &remote_seg); + }) + + ucs_free(self->remote_iface_addr); + kh_destroy_inplace(uct_mm_remote_seg, &self->remote_segs); +} + +UCS_CLASS_DEFINE(uct_mm_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_mm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_mm_ep_t, uct_ep_t); + + +static inline ucs_status_t uct_mm_ep_get_remote_elem(uct_mm_ep_t *ep, uint64_t head, + uct_mm_fifo_element_t **elem) +{ + uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t); + uint64_t elem_index; /* the fifo elem's index in the fifo. */ + /* must be smaller than fifo size */ + uint64_t returned_val; + + elem_index = ep->fifo_ctl->head & iface->fifo_mask; + *elem = UCT_MM_IFACE_GET_FIFO_ELEM(iface, ep->fifo_elems, elem_index); + + /* try to get ownership of the head element */ + returned_val = ucs_atomic_cswap64(ucs_unaligned_ptr(&ep->fifo_ctl->head), head, head+1); + if (returned_val != head) { + return UCS_ERR_NO_RESOURCE; + } + + return UCS_OK; +} + +static inline void uct_mm_ep_update_cached_tail(uct_mm_ep_t *ep) +{ + ucs_memory_cpu_load_fence(); + ep->cached_tail = ep->fifo_ctl->tail; +} + +/* A common mm active message sending function. + * The first parameter indicates the origin of the call. + * is_short = 1 - perform AM short sending + * is_short = 0 - perform AM bcopy sending + */ +static UCS_F_ALWAYS_INLINE ssize_t +uct_mm_ep_am_common_send(uct_mm_send_op_t send_op, uct_mm_ep_t *ep, + uct_mm_iface_t *iface, uint8_t am_id, size_t length, + uint64_t header, const void *payload, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + uct_mm_fifo_element_t *elem; + ucs_status_t status; + void *base_address; + uint8_t elem_flags; + uint64_t head; + + UCT_CHECK_AM_ID(am_id); + +retry: + head = ep->fifo_ctl->head; + /* check if there is room in the remote process's receive FIFO to write */ + if (!UCT_MM_EP_IS_ABLE_TO_SEND(head, ep->cached_tail, iface->config.fifo_size)) { + if (!ucs_arbiter_group_is_empty(&ep->arb_group)) { + /* pending isn't empty. don't send now to prevent out-of-order sending */ + UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); + return UCS_ERR_NO_RESOURCE; + } else { + /* pending is empty */ + /* update the local copy of the tail to its actual value on the remote peer */ + uct_mm_ep_update_cached_tail(ep); + if (!UCT_MM_EP_IS_ABLE_TO_SEND(head, ep->cached_tail, iface->config.fifo_size)) { + UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); + return UCS_ERR_NO_RESOURCE; + } + } + } + + status = uct_mm_ep_get_remote_elem(ep, head, &elem); + if (status != UCS_OK) { + ucs_assert(status == UCS_ERR_NO_RESOURCE); + ucs_trace_poll("couldn't get an available FIFO element. retrying"); + goto retry; + } + + switch (send_op) { + case UCT_MM_SEND_AM_SHORT: + /* write to the remote FIFO */ + uct_am_short_fill_data(elem + 1, header, payload, length); + + elem_flags = UCT_MM_FIFO_ELEM_FLAG_INLINE; + elem->length = length + sizeof(header); + + uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, am_id, + elem + 1, length + sizeof(header), "TX: AM_SHORT"); + UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, sizeof(header) + length); + break; + case UCT_MM_SEND_AM_BCOPY: + /* write to the remote descriptor */ + /* get the base_address: local ptr to remote memory chunk after attaching to it */ + status = uct_mm_ep_get_remote_seg(ep, elem->desc.seg_id, + elem->desc.seg_size, &base_address); + if (ucs_unlikely(status != UCS_OK)) { + return status; + } + + length = pack_cb(UCS_PTR_BYTE_OFFSET(base_address, + elem->desc.offset), + arg); + elem_flags = 0; + elem->length = length; + + uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, am_id, + UCS_PTR_BYTE_OFFSET(base_address, elem->desc.offset), + length, "TX: AM_BCOPY"); + UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, length); + break; + } + + elem->am_id = am_id; + + /* memory barrier - make sure that the memory is flushed before setting the + * 'writing is complete' flag which the reader checks */ + ucs_memory_cpu_store_fence(); + + /* set the owner bit to indicate that the writing is complete. + * the owner bit flips after every FIFO wraparound */ + if (head & iface->config.fifo_size) { + elem_flags |= UCT_MM_FIFO_ELEM_FLAG_OWNER; + } + elem->flags = elem_flags; + + if (ucs_unlikely(flags & UCT_SEND_FLAG_SIGNALED)) { + uct_mm_ep_signal_remote(ep); + } + + switch (send_op) { + case UCT_MM_SEND_AM_SHORT: + return UCS_OK; + case UCT_MM_SEND_AM_BCOPY: + return length; + default: + return UCS_ERR_INVALID_PARAM; + } +} + +ucs_status_t uct_mm_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, + const void *payload, unsigned length) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_mm_iface_t); + uct_mm_ep_t *ep = ucs_derived_of(tl_ep, uct_mm_ep_t); + + UCT_CHECK_LENGTH(length + sizeof(header), 0, + iface->config.fifo_elem_size - sizeof(uct_mm_fifo_element_t), + "am_short"); + + return (ucs_status_t)uct_mm_ep_am_common_send(UCT_MM_SEND_AM_SHORT, ep, + iface, id, length, header, + payload, NULL, NULL, 0); +} + +ssize_t uct_mm_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, + void *arg, unsigned flags) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_mm_iface_t); + uct_mm_ep_t *ep = ucs_derived_of(tl_ep, uct_mm_ep_t); + + return uct_mm_ep_am_common_send(UCT_MM_SEND_AM_BCOPY, ep, iface, id, 0, 0, + NULL, pack_cb, arg, flags); +} + +static inline int uct_mm_ep_has_tx_resources(uct_mm_ep_t *ep) +{ + uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t); + return UCT_MM_EP_IS_ABLE_TO_SEND(ep->fifo_ctl->head, ep->cached_tail, + iface->config.fifo_size); +} + +ucs_status_t uct_mm_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n, + unsigned flags) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_mm_iface_t); + uct_mm_ep_t *ep = ucs_derived_of(tl_ep, uct_mm_ep_t); + + /* check if resources became available */ + if (uct_mm_ep_has_tx_resources(ep)) { + ucs_assert(ucs_arbiter_group_is_empty(&ep->arb_group)); + return UCS_ERR_BUSY; + } + + UCS_STATIC_ASSERT(sizeof(uct_pending_req_priv_arb_t) <= + UCT_PENDING_REQ_PRIV_LEN); + uct_pending_req_arb_group_push(&ep->arb_group, n); + /* add the ep's group to the arbiter */ + ucs_arbiter_group_schedule(&iface->arbiter, &ep->arb_group); + UCT_TL_EP_STAT_PEND(&ep->super); + + return UCS_OK; +} + +ucs_arbiter_cb_result_t uct_mm_ep_process_pending(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); + ucs_status_t status; + uct_mm_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_mm_ep_t, arb_group); + + /* update the local tail with its actual value from the remote peer + * making sure that the pending sends would use the real tail value */ + uct_mm_ep_update_cached_tail(ep); + + if (!uct_mm_ep_has_tx_resources(ep)) { + return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; + } + + ucs_trace_data("progressing pending request %p", req); + status = req->func(req); + ucs_trace_data("status returned from progress pending: %s", + ucs_status_string(status)); + + if (status == UCS_OK) { + /* sent successfully. remove from the arbiter */ + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } else if (status == UCS_INPROGRESS) { + /* sent but not completed, keep in the arbiter */ + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } else { + /* couldn't send. keep this request in the arbiter until the next time + * this function is called */ + return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; + } +} + +static ucs_arbiter_cb_result_t uct_mm_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); + uct_purge_cb_args_t *cb_args = arg; + uct_pending_purge_callback_t cb = cb_args->cb; + uct_mm_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), + uct_mm_ep_t, arb_group); + if (cb != NULL) { + cb(req, cb_args->arg); + } else { + ucs_warn("ep=%p canceling user pending request %p", ep, req); + } + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; +} + +void uct_mm_ep_pending_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, + void *arg) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_mm_iface_t); + uct_mm_ep_t *ep = ucs_derived_of(tl_ep, uct_mm_ep_t); + uct_purge_cb_args_t args = {cb, arg}; + + ucs_arbiter_group_purge(&iface->arbiter, &ep->arb_group, + uct_mm_ep_abriter_purge_cb, &args); +} + +ucs_status_t uct_mm_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + uct_mm_ep_t *ep = ucs_derived_of(tl_ep, uct_mm_ep_t); + + if (!uct_mm_ep_has_tx_resources(ep)) { + if (!ucs_arbiter_group_is_empty(&ep->arb_group)) { + return UCS_ERR_NO_RESOURCE; + } else { + uct_mm_ep_update_cached_tail(ep); + if (!uct_mm_ep_has_tx_resources(ep)) { + return UCS_ERR_NO_RESOURCE; + } + } + } + + ucs_memory_cpu_store_fence(); + UCT_TL_EP_STAT_FLUSH(&ep->super); + return UCS_OK; +} diff --git a/src/uct/sm/mm/base/mm_ep.h b/src/uct/sm/mm/base/mm_ep.h new file mode 100644 index 0000000..88b97c7 --- /dev/null +++ b/src/uct/sm/mm/base/mm_ep.h @@ -0,0 +1,70 @@ +/** +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_MM_EP_H +#define UCT_MM_EP_H + +#include "mm_iface.h" + +#include + + +KHASH_INIT(uct_mm_remote_seg, uintptr_t, uct_mm_remote_seg_t, 1, + kh_int64_hash_func, kh_int64_hash_equal) + + +/** + * MM transport endpoint + */ +typedef struct uct_mm_ep { + uct_base_ep_t super; + + /* Remote peer */ + uct_mm_fifo_ctl_t *fifo_ctl; /* pointer to the destination's ctl struct in the receive fifo */ + void *fifo_elems; /* fifo elements (destination's receive fifo) */ + + uint64_t cached_tail; /* the sender's own copy of the remote FIFO's tail. + it is not always updated with the actual remote tail value */ + + /* mapped remote memory chunks to which remote descriptors belong to. + * (after attaching to them) */ + khash_t(uct_mm_remote_seg) remote_segs; + + void *remote_iface_addr; /* remote md-specific address, can be NULL */ + + ucs_arbiter_group_t arb_group; /* the group that holds this ep's pending operations */ + + /* Used for signaling remote side wakeup */ + struct { + struct sockaddr_un sockaddr; /* address of signaling socket */ + socklen_t addrlen; /* address length of signaling socket */ + } signal; +} uct_mm_ep_t; + + +UCS_CLASS_DECLARE_NEW_FUNC(uct_mm_ep_t, uct_ep_t,const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_mm_ep_t, uct_ep_t); + +ucs_status_t uct_mm_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, + const void *payload, unsigned length); +ssize_t uct_mm_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, uct_pack_callback_t pack_cb, + void *arg, unsigned flags); + +ucs_status_t uct_mm_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_mm_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n, + unsigned flags); + +void uct_mm_ep_pending_purge(uct_ep_h ep, uct_pending_purge_callback_t cb, + void *arg); + +ucs_arbiter_cb_result_t uct_mm_ep_process_pending(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg); + +#endif diff --git a/src/uct/sm/mm/base/mm_iface.c b/src/uct/sm/mm/base/mm_iface.c new file mode 100644 index 0000000..ab9f7c5 --- /dev/null +++ b/src/uct/sm/mm/base/mm_iface.c @@ -0,0 +1,647 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "mm_iface.h" +#include "mm_ep.h" + +#include +#include +#include +#include +#include +#include +#include + + +/* Maximal number of events to clear from the signaling pipe in single call */ +#define UCT_MM_IFACE_MAX_SIG_EVENTS 32 + + +ucs_config_field_t uct_mm_iface_config_table[] = { + {"SM_", "ALLOC=md,mmap,heap", NULL, + ucs_offsetof(uct_mm_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_sm_iface_config_table)}, + + {"FIFO_SIZE", "64", + "Size of the receive FIFO in the memory-map UCTs.", + ucs_offsetof(uct_mm_iface_config_t, fifo_size), UCS_CONFIG_TYPE_UINT}, + + {"SEG_SIZE", "8256", + "Size of send/receive buffers for copy-out sends.", + ucs_offsetof(uct_mm_iface_config_t, seg_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"FIFO_RELEASE_FACTOR", "0.5", + "Frequency of resource releasing on the receiver's side in the MM UCT.\n" + "This value refers to the percentage of the FIFO size. (must be >= 0 and < 1).", + ucs_offsetof(uct_mm_iface_config_t, release_fifo_factor), UCS_CONFIG_TYPE_DOUBLE}, + + UCT_IFACE_MPOOL_CONFIG_FIELDS("RX_", -1, 512, "receive", + ucs_offsetof(uct_mm_iface_config_t, mp), ""), + + {"FIFO_HUGETLB", "no", + "Enable using huge pages for internal shared memory buffers." + "Possible values are:\n" + " y - Allocate memory using huge pages only.\n" + " n - Allocate memory using regular pages only.\n" + " try - Try to allocate memory using huge pages and if it fails, allocate regular pages.", + ucs_offsetof(uct_mm_iface_config_t, hugetlb_mode), UCS_CONFIG_TYPE_TERNARY}, + + {"FIFO_ELEM_SIZE", "128", + "Size of the FIFO element size (data + header) in the MM UCTs.", + ucs_offsetof(uct_mm_iface_config_t, fifo_elem_size), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + +static ucs_status_t uct_mm_iface_get_address(uct_iface_t *tl_iface, + uct_iface_addr_t *addr) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_iface, uct_mm_iface_t); + uct_mm_md_t *md = ucs_derived_of(iface->super.super.md, + uct_mm_md_t); + uct_mm_iface_addr_t *iface_addr = (void*)addr; + uct_mm_seg_t *seg = iface->recv_fifo_mem.memh; + + iface_addr->fifo_seg_id = seg->seg_id; + return uct_mm_md_mapper_ops(md)->iface_addr_pack(md, iface_addr + 1); +} + +static int +uct_mm_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *tl_iface_addr) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_iface, uct_mm_iface_t); + uct_mm_md_t *md = ucs_derived_of(iface->super.super.md, + uct_mm_md_t); + uct_mm_iface_addr_t *iface_addr = (void*)tl_iface_addr; + + if (!uct_sm_iface_is_reachable(tl_iface, dev_addr, tl_iface_addr)) { + return 0; + } + + return uct_mm_md_mapper_ops(md)->is_reachable(md, iface_addr->fifo_seg_id, + iface_addr + 1); +} + +void uct_mm_iface_release_desc(uct_recv_desc_t *self, void *desc) +{ + void *mm_desc; + + mm_desc = UCS_PTR_BYTE_OFFSET(desc, -sizeof(uct_mm_recv_desc_t)); + ucs_mpool_put(mm_desc); +} + +ucs_status_t uct_mm_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + if (comp != NULL) { + return UCS_ERR_UNSUPPORTED; + } + + ucs_memory_cpu_store_fence(); + UCT_TL_IFACE_STAT_FLUSH(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_OK; +} + +static ucs_status_t uct_mm_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_iface, uct_mm_iface_t); + uct_mm_md_t *md = ucs_derived_of(iface->super.super.md, uct_mm_md_t); + + uct_base_iface_query(&iface->super.super, iface_attr); + + /* default values for all shared memory transports */ + iface_attr->cap.put.max_short = UINT_MAX; + iface_attr->cap.put.max_bcopy = SIZE_MAX; + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = SIZE_MAX; + iface_attr->cap.put.opt_zcopy_align = UCS_SYS_CACHE_LINE_SIZE; + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = 1; + + iface_attr->cap.get.max_bcopy = SIZE_MAX; + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = SIZE_MAX; + iface_attr->cap.get.opt_zcopy_align = UCS_SYS_CACHE_LINE_SIZE; + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = 1; + + iface_attr->cap.am.max_short = iface->config.fifo_elem_size - + sizeof(uct_mm_fifo_element_t); + iface_attr->cap.am.max_bcopy = iface->config.seg_size; + iface_attr->cap.am.min_zcopy = 0; + iface_attr->cap.am.max_zcopy = 0; + iface_attr->cap.am.opt_zcopy_align = UCS_SYS_CACHE_LINE_SIZE; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + iface_attr->cap.am.max_iov = 1; + + iface_attr->iface_addr_len = sizeof(uct_mm_iface_addr_t) + + md->iface_addr_len; + iface_attr->device_addr_len = uct_sm_iface_get_device_addr_len(); + iface_attr->ep_addr_len = 0; + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_PUT_SHORT | + UCT_IFACE_FLAG_PUT_BCOPY | + UCT_IFACE_FLAG_ATOMIC_CPU | + UCT_IFACE_FLAG_GET_BCOPY | + UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_EVENT_SEND_COMP | + UCT_IFACE_FLAG_EVENT_RECV_SIG | + UCT_IFACE_FLAG_CONNECT_TO_IFACE; + + iface_attr->cap.atomic32.op_flags = + iface_attr->cap.atomic64.op_flags = UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR); + iface_attr->cap.atomic32.fop_flags = + iface_attr->cap.atomic64.fop_flags = UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR) | + UCS_BIT(UCT_ATOMIC_OP_SWAP) | + UCS_BIT(UCT_ATOMIC_OP_CSWAP); + + iface_attr->latency.overhead = 80e-9; /* 80 ns */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = iface->super.config.bandwidth; + iface_attr->bandwidth.shared = 0; + iface_attr->overhead = 10e-9; /* 10 ns */ + iface_attr->priority = 0; + + return UCS_OK; +} + +static inline void uct_mm_progress_fifo_tail(uct_mm_iface_t *iface) +{ + /* don't progress the tail every time - release in batches. improves performance */ + if (iface->read_index & iface->fifo_release_factor_mask) { + return; + } + + iface->recv_fifo_ctl->tail = iface->read_index; +} + +ucs_status_t uct_mm_assign_desc_to_fifo_elem(uct_mm_iface_t *iface, + uct_mm_fifo_element_t *elem, + unsigned need_new_desc) +{ + uct_mm_recv_desc_t *desc; + + if (!need_new_desc) { + desc = iface->last_recv_desc; + } else { + UCT_TL_IFACE_GET_RX_DESC(&iface->super.super, &iface->recv_desc_mp, desc, + return UCS_ERR_NO_RESOURCE); + } + + elem->desc = desc->info; + elem->desc_data = UCS_PTR_BYTE_OFFSET(desc + 1, iface->rx_headroom); + return UCS_OK; +} + +static inline ucs_status_t uct_mm_iface_process_recv(uct_mm_iface_t *iface, + uct_mm_fifo_element_t* elem) +{ + ucs_status_t status; + void *data; + + if (ucs_likely(elem->flags & UCT_MM_FIFO_ELEM_FLAG_INLINE)) { + /* read short (inline) messages from the FIFO elements */ + uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV, + elem->am_id, elem + 1, elem->length, "RX: AM_SHORT"); + status = uct_mm_iface_invoke_am(iface, elem->am_id, elem + 1, + elem->length, 0); + } else { + /* read bcopy messages from the receive descriptors */ + data = elem->desc_data; + VALGRIND_MAKE_MEM_DEFINED(data, elem->length); + + uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV, + elem->am_id, data, elem->length, "RX: AM_BCOPY"); + + status = uct_mm_iface_invoke_am(iface, elem->am_id, data, elem->length, + UCT_CB_PARAM_FLAG_DESC); + if (status != UCS_OK) { + /* assign a new receive descriptor to this FIFO element.*/ + uct_mm_assign_desc_to_fifo_elem(iface, elem, 0); + } + } + return status; +} + +static inline unsigned uct_mm_iface_poll_fifo(uct_mm_iface_t *iface) +{ + uint64_t read_index_loc, read_index; + uct_mm_fifo_element_t* read_index_elem; + ucs_status_t status; + + /* check the memory pool to make sure that there is a new descriptor available */ + if (ucs_unlikely(iface->last_recv_desc == NULL)) { + UCT_TL_IFACE_GET_RX_DESC(&iface->super.super, &iface->recv_desc_mp, + iface->last_recv_desc, return 0); + } + + read_index = iface->read_index; + read_index_loc = (read_index & iface->fifo_mask); + /* the fifo_element which the read_index points to */ + read_index_elem = UCT_MM_IFACE_GET_FIFO_ELEM(iface, iface->recv_fifo_elems, + read_index_loc); + + /* check the read_index to see if there is a new item to read (checking the owner bit) */ + if (((read_index >> iface->fifo_shift) & 1) == ((read_index_elem->flags) & 1)) { + + /* read from read_index_elem */ + ucs_memory_cpu_load_fence(); + ucs_assert(iface->read_index <= iface->recv_fifo_ctl->head); + + status = uct_mm_iface_process_recv(iface, read_index_elem); + if (status != UCS_OK) { + /* the last_recv_desc is in use. get a new descriptor for it */ + UCT_TL_IFACE_GET_RX_DESC(&iface->super.super, &iface->recv_desc_mp, + iface->last_recv_desc, ucs_debug("recv mpool is empty")); + } + + /* raise the read_index. */ + iface->read_index++; + + uct_mm_progress_fifo_tail(iface); + + return 1; + } else { + return 0; + } +} + +unsigned uct_mm_iface_progress(void *arg) +{ + uct_mm_iface_t *iface = arg; + unsigned count; + + /* progress receive */ + count = uct_mm_iface_poll_fifo(iface); + + /* progress the pending sends (if there are any) */ + ucs_arbiter_dispatch(&iface->arbiter, 1, uct_mm_ep_process_pending, NULL); + + return count; +} + +static ucs_status_t uct_mm_iface_event_fd_get(uct_iface_h tl_iface, int *fd_p) +{ + *fd_p = ucs_derived_of(tl_iface, uct_mm_iface_t)->signal_fd; + return UCS_OK; +} + +static ucs_status_t uct_mm_iface_event_fd_arm(uct_iface_h tl_iface, + unsigned events) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_iface, uct_mm_iface_t); + char dummy[UCT_MM_IFACE_MAX_SIG_EVENTS]; /* pop multiple signals at once */ + int ret; + + ret = recvfrom(iface->signal_fd, &dummy, sizeof(dummy), 0, NULL, 0); + if (ret > 0) { + return UCS_ERR_BUSY; + } else if (ret == -1) { + if (errno == EAGAIN) { + return UCS_OK; + } else if (errno == EINTR) { + return UCS_ERR_BUSY; + } else { + ucs_error("failed to retrieve message from signal pipe: %m"); + return UCS_ERR_IO_ERROR; + } + } else { + ucs_assert(ret == 0); + return UCS_OK; + } +} + +static UCS_CLASS_DECLARE_DELETE_FUNC(uct_mm_iface_t, uct_iface_t); + +static uct_iface_ops_t uct_mm_iface_ops = { + .ep_put_short = uct_sm_ep_put_short, + .ep_put_bcopy = uct_sm_ep_put_bcopy, + .ep_get_bcopy = uct_sm_ep_get_bcopy, + .ep_am_short = uct_mm_ep_am_short, + .ep_am_bcopy = uct_mm_ep_am_bcopy, + .ep_atomic_cswap64 = uct_sm_ep_atomic_cswap64, + .ep_atomic64_post = uct_sm_ep_atomic64_post, + .ep_atomic64_fetch = uct_sm_ep_atomic64_fetch, + .ep_atomic_cswap32 = uct_sm_ep_atomic_cswap32, + .ep_atomic32_post = uct_sm_ep_atomic32_post, + .ep_atomic32_fetch = uct_sm_ep_atomic32_fetch, + .ep_pending_add = uct_mm_ep_pending_add, + .ep_pending_purge = uct_mm_ep_pending_purge, + .ep_flush = uct_mm_ep_flush, + .ep_fence = uct_sm_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_mm_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_mm_ep_t), + .iface_flush = uct_mm_iface_flush, + .iface_fence = uct_sm_iface_fence, + .iface_progress_enable = uct_base_iface_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = (uct_iface_progress_func_t)uct_mm_iface_progress, + .iface_event_fd_get = uct_mm_iface_event_fd_get, + .iface_event_arm = uct_mm_iface_event_fd_arm, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_mm_iface_t), + .iface_query = uct_mm_iface_query, + .iface_get_device_address = uct_sm_iface_get_device_address, + .iface_get_address = uct_mm_iface_get_address, + .iface_is_reachable = uct_mm_iface_is_reachable +}; + +static void uct_mm_iface_recv_desc_init(uct_iface_h tl_iface, void *obj, + uct_mem_h memh) +{ + uct_mm_iface_t *iface = ucs_derived_of(tl_iface, uct_mm_iface_t); + uct_mm_recv_desc_t *desc = obj; + uct_mm_seg_t *seg = memh; + size_t offset; + + if (seg->length > UINT_MAX) { + ucs_error("mm: shared memory segment length cannot exceed %u", UINT_MAX); + desc->info.seg_id = UINT64_MAX; + desc->info.seg_size = 0; + desc->info.offset = 0; + return; + } + + offset = UCS_PTR_BYTE_DIFF(seg->address, desc + 1) + iface->rx_headroom; + ucs_assert(offset <= UINT_MAX); + + desc->info.seg_id = seg->seg_id; + desc->info.seg_size = seg->length; + desc->info.offset = offset; +} + +static void uct_mm_iface_free_rx_descs(uct_mm_iface_t *iface, unsigned num_elems) +{ + uct_mm_fifo_element_t *elem; + uct_mm_recv_desc_t *desc; + unsigned i; + + for (i = 0; i < num_elems; i++) { + elem = UCT_MM_IFACE_GET_FIFO_ELEM(iface, iface->recv_fifo_elems, i); + desc = (uct_mm_recv_desc_t*)UCS_PTR_BYTE_OFFSET(elem->desc_data, + -iface->rx_headroom) - 1; + ucs_mpool_put(desc); + } +} + +void uct_mm_iface_set_fifo_ptrs(void *fifo_mem, uct_mm_fifo_ctl_t **fifo_ctl_p, + void **fifo_elems_p) +{ + uct_mm_fifo_ctl_t *fifo_ctl; + + /* initiate the the uct_mm_fifo_ctl struct, holding the head and the tail */ + fifo_ctl = (uct_mm_fifo_ctl_t*)ucs_align_up_pow2 + ((uintptr_t)fifo_mem, UCS_SYS_CACHE_LINE_SIZE); + + /* Make sure head and tail are cache-aligned, and not on same cacheline, to + * avoid false-sharing. + */ + ucs_assert_always( + (((uintptr_t)&fifo_ctl->head) % UCS_SYS_CACHE_LINE_SIZE) == 0); + ucs_assert_always( + (((uintptr_t)&fifo_ctl->tail) % UCS_SYS_CACHE_LINE_SIZE) == 0); + ucs_assert_always( + ((uintptr_t)&fifo_ctl->tail - (uintptr_t)&fifo_ctl->head) >= UCS_SYS_CACHE_LINE_SIZE); + + /* initiate the pointer to the beginning of the first FIFO element */ + *fifo_ctl_p = fifo_ctl; + *fifo_elems_p = UCS_PTR_BYTE_OFFSET(fifo_ctl, UCT_MM_FIFO_CTL_SIZE); +} + +static ucs_status_t uct_mm_iface_create_signal_fd(uct_mm_iface_t *iface) +{ + ucs_status_t status; + socklen_t addrlen; + struct sockaddr_un bind_addr; + int ret; + + /* Create a UNIX domain socket to send and receive wakeup signal from remote processes */ + iface->signal_fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (iface->signal_fd < 0) { + ucs_error("Failed to create unix domain socket for signal: %m"); + status = UCS_ERR_IO_ERROR; + goto err; + } + + /* Set the signal socket to non-blocking mode */ + status = ucs_sys_fcntl_modfl(iface->signal_fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto err_close; + } + + /* Bind the signal socket to automatic address */ + bind_addr.sun_family = AF_UNIX; + memset(bind_addr.sun_path, 0, sizeof(bind_addr.sun_path)); + ret = bind(iface->signal_fd, (struct sockaddr*)&bind_addr, sizeof(sa_family_t)); + if (ret < 0) { + ucs_error("Failed to auto-bind unix domain socket: %m"); + status = UCS_ERR_IO_ERROR; + goto err_close; + } + + /* Share the socket address on the FIFO control area, so we would not have + * to enlarge the interface address size. + */ + addrlen = sizeof(struct sockaddr_un); + memset(&iface->recv_fifo_ctl->signal_sockaddr, 0, addrlen); + ret = getsockname(iface->signal_fd, + (struct sockaddr *)ucs_unaligned_ptr(&iface->recv_fifo_ctl->signal_sockaddr), + &addrlen); + if (ret < 0) { + ucs_error("Failed to retrieve unix domain socket address: %m"); + status = UCS_ERR_IO_ERROR; + goto err_close; + } + + iface->recv_fifo_ctl->signal_addrlen = addrlen; + return UCS_OK; + +err_close: + close(iface->signal_fd); +err: + return status; +} + +static void uct_mm_iface_log_created(uct_mm_iface_t *iface) +{ + uct_mm_seg_t UCS_V_UNUSED *seg = iface->recv_fifo_mem.memh; + + ucs_debug("created mm iface %p FIFO id 0x%lx va %p size %zu (%u x %u elems)", + iface, seg->seg_id, seg->address, seg->length, + iface->config.fifo_elem_size, iface->config.fifo_size); +} + +static UCS_CLASS_INIT_FUNC(uct_mm_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_mm_iface_config_t *mm_config = + ucs_derived_of(tl_config, uct_mm_iface_config_t); + uct_mm_fifo_element_t* fifo_elem_p; + ucs_status_t status; + unsigned i; + + UCS_CLASS_CALL_SUPER_INIT(uct_sm_iface_t, &uct_mm_iface_ops, md, + worker, params, tl_config); + + if (ucs_derived_of(worker, uct_priv_worker_t)->thread_mode == UCS_THREAD_MODE_MULTI) { + ucs_error("Shared memory transport does not support multi-threaded worker"); + return UCS_ERR_INVALID_PARAM; + } + + /* check that the fifo size, from the user, is a power of two and bigger than 1 */ + if ((mm_config->fifo_size <= 1) || ucs_is_pow2(mm_config->fifo_size) != 1) { + ucs_error("The MM FIFO size must be a power of two and bigger than 1."); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + /* check the value defining the FIFO batch release */ + if ((mm_config->release_fifo_factor < 0) || (mm_config->release_fifo_factor >= 1)) { + ucs_error("The MM release FIFO factor must be: (0 =< factor < 1)."); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + /* check the value defining the size of the FIFO element */ + if (mm_config->fifo_elem_size <= sizeof(uct_mm_fifo_element_t)) { + ucs_error("The UCX_MM_FIFO_ELEM_SIZE parameter (%u) must be larger " + "than the FIFO element header size (%ld bytes).", + mm_config->fifo_elem_size, sizeof(uct_mm_fifo_element_t)); + status = UCS_ERR_INVALID_PARAM; + goto err; + } + + self->config.fifo_size = mm_config->fifo_size; + self->config.fifo_elem_size = mm_config->fifo_elem_size; + self->config.seg_size = mm_config->seg_size; + /* cppcheck-suppress internalAstError */ + self->fifo_release_factor_mask = UCS_MASK(ucs_ilog2(ucs_max((int) + (mm_config->fifo_size * mm_config->release_fifo_factor), + 1))); + self->fifo_mask = mm_config->fifo_size - 1; + self->fifo_shift = ucs_count_trailing_zero_bits(mm_config->fifo_size); + self->rx_headroom = (params->field_mask & + UCT_IFACE_PARAM_FIELD_RX_HEADROOM) ? + params->rx_headroom : 0; + self->release_desc.cb = uct_mm_iface_release_desc; + + /* Allocate the receive FIFO */ + status = uct_iface_mem_alloc(&self->super.super.super, + UCT_MM_GET_FIFO_SIZE(self), + UCT_MD_MEM_ACCESS_ALL, "mm_recv_fifo", + &self->recv_fifo_mem); + if (status != UCS_OK) { + ucs_error("mm_iface failed to allocate receive FIFO"); + return status; + } + + uct_mm_iface_set_fifo_ptrs(self->recv_fifo_mem.address, + &self->recv_fifo_ctl, &self->recv_fifo_elems); + self->recv_fifo_ctl->head = 0; + self->recv_fifo_ctl->tail = 0; + self->read_index = 0; + + /* create a unix file descriptor to receive event notifications */ + status = uct_mm_iface_create_signal_fd(self); + if (status != UCS_OK) { + goto err_free_fifo; + } + + /* create a memory pool for receive descriptors */ + status = uct_iface_mpool_init(&self->super.super, + &self->recv_desc_mp, + sizeof(uct_mm_recv_desc_t) + self->rx_headroom + + self->config.seg_size, + sizeof(uct_mm_recv_desc_t), + UCS_SYS_CACHE_LINE_SIZE, + &mm_config->mp, + 512, + uct_mm_iface_recv_desc_init, + "mm_recv_desc"); + if (status != UCS_OK) { + ucs_error("failed to create a receive descriptor memory pool for the MM transport"); + goto err_close_signal_fd; + } + + ucs_mpool_grow(&self->recv_desc_mp, mm_config->fifo_size * 2); + + /* set the first receive descriptor */ + self->last_recv_desc = ucs_mpool_get(&self->recv_desc_mp); + VALGRIND_MAKE_MEM_DEFINED(self->last_recv_desc, sizeof(*(self->last_recv_desc))); + if (self->last_recv_desc == NULL) { + ucs_error("failed to get the first receive descriptor"); + status = UCS_ERR_NO_RESOURCE; + goto destroy_recv_mpool; + } + + /* initiate the owner bit in all the FIFO elements and assign a receive descriptor + * per every FIFO element */ + for (i = 0; i < mm_config->fifo_size; i++) { + fifo_elem_p = UCT_MM_IFACE_GET_FIFO_ELEM(self, self->recv_fifo_elems, i); + fifo_elem_p->flags = UCT_MM_FIFO_ELEM_FLAG_OWNER; + + status = uct_mm_assign_desc_to_fifo_elem(self, fifo_elem_p, 1); + if (status != UCS_OK) { + ucs_error("failed to allocate a descriptor for MM"); + goto destroy_descs; + } + } + + ucs_arbiter_init(&self->arbiter); + uct_mm_iface_log_created(self); + + return UCS_OK; + +destroy_descs: + uct_mm_iface_free_rx_descs(self, i); + ucs_mpool_put(self->last_recv_desc); +destroy_recv_mpool: + ucs_mpool_cleanup(&self->recv_desc_mp, 1); +err_close_signal_fd: + close(self->signal_fd); +err_free_fifo: + uct_iface_mem_free(&self->recv_fifo_mem); +err: + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_mm_iface_t) +{ + uct_base_iface_progress_disable(&self->super.super.super, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + + /* return all the descriptors that are now 'assigned' to the FIFO, + * to their mpool */ + uct_mm_iface_free_rx_descs(self, self->config.fifo_size); + + ucs_mpool_put(self->last_recv_desc); + ucs_mpool_cleanup(&self->recv_desc_mp, 1); + close(self->signal_fd); + uct_iface_mem_free(&self->recv_fifo_mem); + ucs_arbiter_cleanup(&self->arbiter); +} + +UCS_CLASS_DEFINE(uct_mm_iface_t, uct_base_iface_t); + +UCS_CLASS_DEFINE_NEW_FUNC(uct_mm_iface_t, uct_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t*); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_mm_iface_t, uct_iface_t); diff --git a/src/uct/sm/mm/base/mm_iface.h b/src/uct/sm/mm/base/mm_iface.h new file mode 100644 index 0000000..b008562 --- /dev/null +++ b/src/uct/sm/mm/base/mm_iface.h @@ -0,0 +1,248 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_MM_IFACE_H +#define UCT_MM_IFACE_H + +#include "mm_md.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +enum { + UCT_MM_FIFO_ELEM_FLAG_OWNER = UCS_BIT(0), /* new/old info */ + UCT_MM_FIFO_ELEM_FLAG_INLINE = UCS_BIT(1), /* if inline or not */ +}; + + +#define UCT_MM_FIFO_CTL_SIZE \ + ucs_align_up(sizeof(uct_mm_fifo_ctl_t), UCS_SYS_CACHE_LINE_SIZE) + + +#define UCT_MM_GET_FIFO_SIZE(_iface) \ + (UCT_MM_FIFO_CTL_SIZE + \ + ((_iface)->config.fifo_size * (_iface)->config.fifo_elem_size) + \ + (UCS_SYS_CACHE_LINE_SIZE - 1)) + + +#define UCT_MM_IFACE_GET_FIFO_ELEM(_iface, _fifo, _index) \ + ((uct_mm_fifo_element_t*) \ + UCS_PTR_BYTE_OFFSET(_fifo, (_index) * (_iface)->config.fifo_elem_size)) + + +#define uct_mm_iface_mapper_call(_iface, _func, ...) \ + ({ \ + uct_mm_md_t *md = ucs_derived_of((_iface)->super.super.md, uct_mm_md_t); \ + uct_mm_md_mapper_call(md, _func, ## __VA_ARGS__); \ + }) + + +/** + * MM interface configuration + */ +typedef struct uct_mm_iface_config { + uct_sm_iface_config_t super; + size_t seg_size; /* Size of the receive + * descriptor (for payload) */ + unsigned fifo_size; /* Size of the receive FIFO */ + double release_fifo_factor; /* Tail index update frequency */ + ucs_ternary_value_t hugetlb_mode; /* Enable using huge pages for + * shared memory buffers */ + unsigned fifo_elem_size; /* Size of the FIFO element size */ + uct_iface_mpool_config_t mp; +} uct_mm_iface_config_t; + + +/** + * MM interface address + */ +typedef struct uct_mm_iface_addr { + uct_mm_seg_id_t fifo_seg_id; /* Shared memory identifier of FIFO */ + /* mapper-specific iface address follows */ +} UCS_S_PACKED uct_mm_iface_addr_t; + + +/** + * MM FIFO control segment + */ +typedef struct uct_mm_fifo_ctl { + /* 1st cacheline */ + volatile uint64_t head; /* Where to write next */ + socklen_t signal_addrlen; /* Address length of signaling socket */ + struct sockaddr_un signal_sockaddr;/* Address of signaling socket */ + UCS_CACHELINE_PADDING(uint64_t, + socklen_t, + struct sockaddr_un); + + /* 2nd cacheline */ + volatile uint64_t tail; /* How much was consumed */ +} UCS_S_PACKED UCS_V_ALIGNED(UCS_SYS_CACHE_LINE_SIZE) uct_mm_fifo_ctl_t; + + +/** + * MM receive descriptor info in the shared FIFO + */ +typedef struct uct_mm_desc_info { + uct_mm_seg_id_t seg_id; /* shared memory segment id */ + unsigned seg_size; /* size of the shared memory segment */ + unsigned offset; /* offset inside the shared memory + segment */ +} UCS_S_PACKED uct_mm_desc_info_t; + + +/** + * MM FIFO element + */ +typedef struct uct_mm_fifo_element { + uint8_t flags; /* UCT_MM_FIFO_ELEM_FLAG_xx */ + uint8_t am_id; /* active message id */ + uint16_t length; /* length of actual data written + by producer */ + uct_mm_desc_info_t desc; /* remote receive descriptor + parameters for am_bcopy */ + void *desc_data; /* pointer to receive descriptor, + valid only on receiver */ + + /* the data follows here (in case of inline messaging) */ +} UCS_S_PACKED uct_mm_fifo_element_t; + + +/* + * MM receive descriptor: + * + * +--------------------+---------------+-----------+ + * | uct_mm_recv_desc_t | user-defined | data | + * | (info + rdesc) | rx headroom | (payload) | + * +--------------------+---------------+-----------+ + */ +typedef struct uct_mm_recv_desc { + uct_mm_desc_info_t info; /* descriptor information for the + remote side which writes to it */ + uct_recv_desc_t recv; /* has to be in the end */ +} uct_mm_recv_desc_t; + + +/** + * MM trandport interface + */ +typedef struct uct_mm_iface { + uct_sm_iface_t super; + + /* Receive FIFO */ + uct_allocated_memory_t recv_fifo_mem; + + uct_mm_fifo_ctl_t *recv_fifo_ctl; /* pointer to the struct at the */ + /* beginning of the receive fifo */ + /* which holds the head and the tail. */ + /* this struct is cache line aligned and */ + /* doesn't necessarily start where */ + /* shared_mem starts */ + void *recv_fifo_elems; /* pointer to the first fifo element + in the receive fifo */ + uint64_t read_index; /* actual reading location */ + + uint8_t fifo_shift; /* = log2(fifo_size) */ + unsigned fifo_mask; /* = 2^fifo_shift - 1 */ + uint64_t fifo_release_factor_mask; + + ucs_mpool_t recv_desc_mp; + uct_mm_recv_desc_t *last_recv_desc; /* next receive descriptor to use */ + + int signal_fd; /* Unix socket for receiving remote signal */ + + size_t rx_headroom; + ucs_arbiter_t arbiter; + uct_recv_desc_t release_desc; + + struct { + unsigned fifo_size; + unsigned fifo_elem_size; + unsigned seg_size; /* size of the receive descriptor (for payload)*/ + } config; +} uct_mm_iface_t; + + +/* + * Define a memory-mapper transport for MM. + * + * @param _name Component name token + * @param _md_ops Memory domain operations, of type uct_mm_md_ops_t. + * @param _rkey_unpack Remote key unpack function + * @param _rkey_release Remote key release function + * @param _cfg_prefix Prefix for configuration variables. + */ +#define UCT_MM_TL_DEFINE(_name, _md_ops, _rkey_unpack, _rkey_release, \ + _cfg_prefix) \ + \ + UCT_MM_COMPONENT_DEFINE(uct_##_name##_component, _name, _md_ops, \ + _rkey_unpack, _rkey_release, _cfg_prefix) \ + \ + UCT_TL_DEFINE(&(uct_##_name##_component).super, \ + _name, \ + uct_sm_base_query_tl_devices, \ + uct_mm_iface_t, \ + "MM_", \ + uct_mm_iface_config_table, \ + uct_mm_iface_config_t); + + +extern ucs_config_field_t uct_mm_iface_config_table[]; + + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_mm_iface_invoke_am(uct_mm_iface_t *iface, uint8_t am_id, void *data, + unsigned length, unsigned flags) +{ + ucs_status_t status; + void *desc; + + status = uct_iface_invoke_am(&iface->super.super, am_id, data, length, + flags); + + if (status == UCS_INPROGRESS) { + desc = (void *)((uintptr_t)data - iface->rx_headroom); + /* save the release_desc for later release of this desc */ + uct_recv_desc(desc) = &iface->release_desc; + } + + return status; +} + + +/** + * Set aligned pointers of the FIFO according to the beginning of the allocated + * memory. + * @param [in] fifo_mem Pointer to the beginning of the allocated memory. + * @param [out] fifo_ctl_p Pointer to the FIFO control structure. + * @param [out] fifo_elems Pointer to the array of FIFO elements. + */ +void uct_mm_iface_set_fifo_ptrs(void *fifo_mem, uct_mm_fifo_ctl_t **fifo_ctl_p, + void **fifo_elems_p); + + +UCS_CLASS_DECLARE_NEW_FUNC(uct_mm_iface_t, uct_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, const uct_iface_config_t*); + + +void uct_mm_iface_release_desc(uct_recv_desc_t *self, void *desc); + + +ucs_status_t uct_mm_flush(); + + +unsigned uct_mm_iface_progress(void *arg); + + +#endif diff --git a/src/uct/sm/mm/base/mm_md.c b/src/uct/sm/mm/base/mm_md.c new file mode 100644 index 0000000..0461805 --- /dev/null +++ b/src/uct/sm/mm/base/mm_md.c @@ -0,0 +1,148 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "mm_md.h" + +#include +#include +#include + + +ucs_config_field_t uct_mm_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_mm_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + + {"HUGETLB_MODE", "try", + "Enable using huge pages for internal buffers. " + "Possible values are:\n" + " y - Allocate memory using huge pages only.\n" + " n - Allocate memory using regular pages only.\n" + " try - Try to allocate memory using huge pages and if it fails, allocate regular pages.\n", + ucs_offsetof(uct_mm_md_config_t, hugetlb_mode), UCS_CONFIG_TYPE_TERNARY}, + + {NULL} +}; + +ucs_status_t uct_mm_query_md_resources(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + ucs_status_t status; + + status = uct_mm_mdc_mapper_ops(component)->query(); + switch (status) { + case UCS_OK: + return uct_md_query_single_md_resource(component, resources_p, + num_resources_p); + case UCS_ERR_UNSUPPORTED: + return uct_md_query_empty_md_resource(resources_p, num_resources_p); + default: + return status; + } +} + +ucs_status_t uct_mm_seg_new(void *address, size_t length, uct_mm_seg_t **seg_p) +{ + uct_mm_seg_t *seg; + + seg = ucs_malloc(sizeof(*seg), "mm_seg"); + if (seg == NULL) { + ucs_error("failed to allocate mm segment"); + return UCS_ERR_NO_MEMORY; + } + + seg->address = address; + seg->length = length; + seg->seg_id = 0; + *seg_p = seg; + return UCS_OK; +} + +void uct_mm_md_query(uct_md_h md, uct_md_attr_t *md_attr, int support_alloc) +{ + memset(md_attr, 0, sizeof(*md_attr)); + + md_attr->cap.flags = UCT_MD_FLAG_RKEY_PTR | + UCT_MD_FLAG_NEED_RKEY; + md_attr->cap.max_reg = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; + md_attr->cap.detect_mem_types = 0; + + if (support_alloc) { + md_attr->cap.flags |= UCT_MD_FLAG_ALLOC | UCT_MD_FLAG_FIXED; + md_attr->cap.max_alloc = ULONG_MAX; + } + + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); +} + +ucs_status_t uct_mm_rkey_ptr(uct_component_t *component, uct_rkey_t rkey, + void *handle, uint64_t raddr, void **laddr_p) +{ + /* rkey stores offset from the remote va */ + *laddr_p = UCS_PTR_BYTE_OFFSET(raddr, (ptrdiff_t)rkey); + return UCS_OK; +} + +ucs_status_t uct_mm_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p) +{ + uct_mm_component_t *mmc = ucs_derived_of(component, uct_mm_component_t); + ucs_status_t status; + uct_mm_md_t *md; + + md = ucs_malloc(sizeof(*md), "uct_mm_md_t"); + if (md == NULL) { + ucs_error("Failed to allocate memory for uct_mm_md_t"); + status = UCS_ERR_NO_MEMORY; + goto err; + } + + md->config = ucs_malloc(mmc->super.md_config.size, "mm_md config"); + if (md->config == NULL) { + ucs_error("Failed to allocate memory for mm_md config"); + status = UCS_ERR_NO_MEMORY; + goto err_free_mm_md; + } + + status = ucs_config_parser_clone_opts(config, md->config, + mmc->super.md_config.table); + if (status != UCS_OK) { + ucs_error("Failed to clone opts"); + goto err_free_mm_md_config; + } + + md->super.ops = &mmc->md_ops->super; + md->super.component = &mmc->super; + md->iface_addr_len = mmc->md_ops->iface_addr_length(md); + + /* cppcheck-suppress autoVariables */ + *md_p = &md->super; + return UCS_OK; + +err_free_mm_md_config: + ucs_free(md->config); +err_free_mm_md: + ucs_free(md); +err: + return status; +} + +void uct_mm_md_close(uct_md_h md) +{ + uct_mm_md_t *mm_md = ucs_derived_of(md, uct_mm_md_t); + + ucs_config_parser_release_opts(mm_md->config, + md->component->md_config.table); + ucs_free(mm_md->config); + ucs_free(mm_md); +} diff --git a/src/uct/sm/mm/base/mm_md.h b/src/uct/sm/mm/base/mm_md.h new file mode 100644 index 0000000..a74f573 --- /dev/null +++ b/src/uct/sm/mm/base/mm_md.h @@ -0,0 +1,206 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_MM_MD_H_ +#define UCT_MM_MD_H_ + +#include +#include +#include +#include + + +/* Memory mapper segment unique id, used for both FIFO and bcopy descriptors. + * The exact structure depends on specific mapper */ +typedef uint64_t uct_mm_seg_id_t; + + +/** + * Local memory segment structure. + * The mappers must implement memory allocation functions so that they will + * return this structure as uct_memh. + */ +typedef struct uct_mm_seg { + uct_mm_seg_id_t seg_id; /* Shared memory ID */ + void *address; /* Virtual address */ + size_t length; /* Size of the memory */ +} uct_mm_seg_t; + + +/* + * Descriptor of remote attached memory + */ +typedef struct uct_mm_remote_seg { + void *address; /* Local address of attached memory */ + void *cookie; /* Mapper-specific data */ +} uct_mm_remote_seg_t; + + +/** + * MM memory domain configuration + */ +typedef struct uct_mm_md_config { + uct_md_config_t super; + ucs_ternary_value_t hugetlb_mode; /* Enable using huge pages */ +} uct_mm_md_config_t; + + +/** + * MM memory domain + */ +typedef struct uct_mm_md { + uct_md_t super; + uct_mm_md_config_t *config; /* Clone of MD configuration */ + size_t iface_addr_len; /* As returned from + uct_mm_md_mapper_ops_t::iface_addr_length */ +} uct_mm_md_t; + + +/* Check if available on current machine */ +typedef ucs_status_t (*uct_mm_mapper_query_func_t)(); + + +/* Return the size of memory-domain specific iface address (e.g mmap path) */ +typedef size_t (*uct_mm_mapper_iface_addr_length_func_t)(uct_mm_md_t *md); + + +/* Pack interface address. Holds common information for all memory segments + * allocated on the same interface. 'buffer' must be at least the size returned + * from iface_addr_length() + */ +typedef ucs_status_t +(*uct_mm_mapper_iface_addr_pack_func_t)(uct_mm_md_t *md, void *buffer); + + +/* Attach memory allocated by mem_alloc(). seg_id is from 'uct_mm_seg_t' + * structure, and iface_addr is from iface_addr_pack() on the remote process + * + * This function is used only for active messages memory (FIFO and receive + * descriptors). + */ +typedef ucs_status_t +(*uct_mm_mapper_mem_attach_func_t)(uct_mm_md_t *md, uct_mm_seg_id_t seg_id, + size_t length, const void *iface_addr, + uct_mm_remote_seg_t *rseg); + + +/* Check if memory may be attached using mem_attach. seg_id is from + * 'uct_mm_seg_t' structure, and iface_addr is from iface_addr_pack() on the + * remote process + */ +typedef int +(*uct_mm_mapper_is_reachable_func_t)(uct_mm_md_t *md, uct_mm_seg_id_t seg_id, + const void *iface_addr); + + +/* Clean up the remote segment handle created by mem_attach() */ +typedef void +(*uct_mm_mapper_mem_detach_func_t)(uct_mm_md_t *md, + const uct_mm_remote_seg_t *rseg); + + +/* + * Memory mapper operations - used to implement MD and TL functionality + */ +typedef struct uct_mm_mapper_ops { + uct_md_ops_t super; + uct_mm_mapper_query_func_t query; + uct_mm_mapper_iface_addr_length_func_t iface_addr_length; + uct_mm_mapper_iface_addr_pack_func_t iface_addr_pack; + uct_mm_mapper_mem_attach_func_t mem_attach; + uct_mm_mapper_mem_detach_func_t mem_detach; + uct_mm_mapper_is_reachable_func_t is_reachable; +} uct_mm_md_mapper_ops_t; + + +/** + * Memory mapper component + */ +typedef struct uct_mm_component { + uct_component_t super; + uct_mm_md_mapper_ops_t *md_ops; +} uct_mm_component_t; + + +/* Extract mapper ops from MM component */ +#define uct_mm_mdc_mapper_ops(_component) \ + (ucs_derived_of(_component, uct_mm_component_t)->md_ops) + + +/* Extract mapper ops from MM memory domain */ +#define uct_mm_md_mapper_ops(_md) \ + ucs_derived_of((_md)->super.ops, uct_mm_md_mapper_ops_t) + + +/* Call mapper operation */ +#define uct_mm_md_mapper_call(_md, _func, ...) \ + uct_mm_md_mapper_ops(_md)->_func(_md, ## __VA_ARGS__) + + +/* + * Define a memory-mapper component for MM. + * + * @param _var Variable for MM component. + * @param _name String which is the component name. + * @param _md_ops Mapper operations, of type uct_mm_mapper_ops_t. + * @param _cfg_prefix Prefix for configuration environment vars. + */ +#define UCT_MM_COMPONENT_DEFINE(_var, _name, _md_ops, _rkey_unpack, \ + _rkey_release, _cfg_prefix) \ + \ + static uct_mm_component_t _var = { \ + .super = { \ + .query_md_resources = uct_mm_query_md_resources, \ + .md_open = uct_mm_md_open, \ + .cm_open = ucs_empty_function_return_unsupported, \ + .rkey_unpack = _rkey_unpack, \ + .rkey_ptr = uct_mm_rkey_ptr, \ + .rkey_release = _rkey_release, \ + .name = #_name, \ + .md_config = { \ + .name = #_name " memory domain", \ + .prefix = _cfg_prefix, \ + .table = uct_##_name##_md_config_table, \ + .size = sizeof(uct_##_name##_md_config_t), \ + }, \ + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, \ + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER( \ + &(_var).super), \ + .flags = 0, \ + }, \ + .md_ops = (_md_ops) \ + }; \ + UCT_COMPONENT_REGISTER(&(_var).super); \ + + +extern ucs_config_field_t uct_mm_md_config_table[]; + + +ucs_status_t uct_mm_query_md_resources(uct_component_t *component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p); + +ucs_status_t uct_mm_seg_new(void *address, size_t length, uct_mm_seg_t **seg_p); + +void uct_mm_md_query(uct_md_h md, uct_md_attr_t *md_attr, int support_alloc); + +ucs_status_t uct_mm_rkey_ptr(uct_component_t *component, uct_rkey_t rkey, + void *handle, uint64_t raddr, void **laddr_p); + +ucs_status_t uct_mm_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p); + +void uct_mm_md_close(uct_md_h md); + +static inline void +uct_mm_md_make_rkey(void *local_address, uintptr_t remote_address, + uct_rkey_t *rkey_p) +{ + *rkey_p = (uintptr_t)local_address - remote_address; +} + +#endif diff --git a/src/uct/sm/mm/configure.m4 b/src/uct/sm/mm/configure.m4 new file mode 100644 index 0000000..7c72db6 --- /dev/null +++ b/src/uct/sm/mm/configure.m4 @@ -0,0 +1,8 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +m4_include([src/uct/sm/mm/xpmem/configure.m4]) + +AC_CONFIG_FILES([src/uct/sm/mm/Makefile]) diff --git a/src/uct/sm/mm/posix/mm_posix.c b/src/uct/sm/mm/posix/mm_posix.c new file mode 100644 index 0000000..76357ad --- /dev/null +++ b/src/uct/sm/mm/posix/mm_posix.c @@ -0,0 +1,675 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + + +/* File open flags */ +#define UCT_POSIX_SHM_CREATE_FLAGS (O_CREAT | O_EXCL | O_RDWR) /* shm create flags */ +#define UCT_POSIX_SHM_OPEN_MODE 0600 /* shm open/create mode */ + +/* Memory mapping parameters */ +#define UCT_POSIX_MMAP_PROT (PROT_READ | PROT_WRITE) + +/* Shared memory segment flags */ +#define UCT_POSIX_SEG_FLAG_PROCFS UCS_BIT(63) /* use procfs mode: mmid encodes an + open fd symlink from procfs */ +#define UCT_POSIX_SEG_FLAG_SHM_OPEN UCS_BIT(62) /* use shm_open() rather than open() */ +#define UCT_POSIX_SEG_FLAG_HUGETLB UCS_BIT(61) /* use MAP_HUGETLB */ +#define UCT_POSIX_SEG_FLAG_PID_NS UCS_BIT(60) /* use PID NS in address */ +#define UCT_POSIX_SEG_FLAGS_MASK (UCT_POSIX_SEG_FLAG_PROCFS | \ + UCT_POSIX_SEG_FLAG_SHM_OPEN | \ + UCT_POSIX_SEG_FLAG_PID_NS | \ + UCT_POSIX_SEG_FLAG_HUGETLB) +#define UCT_POSIX_SEG_MMID_MASK (~UCT_POSIX_SEG_FLAGS_MASK) + +/* Packing mmid for procfs mode */ +#define UCT_POSIX_PROCFS_MMID_FD_BITS 30 /* how many bits for file descriptor */ +#define UCT_POSIX_PROCFS_MMID_PID_BITS 30 /* how many bits for pid */ + +/* Filesystem paths */ +#define UCT_POSIX_SHM_OPEN_DIR "/dev/shm" /* directory path for shm_open() */ +#define UCT_POSIX_FILE_FMT "/ucx_shm_posix_%"PRIx64 +#define UCT_POSIX_PROCFS_FILE_FMT "/proc/%d/fd/%d" /* file pattern for procfs mode */ + + +typedef struct uct_posix_md_config { + uct_mm_md_config_t super; + char *dir; + int use_proc_link; +} uct_posix_md_config_t; + +typedef struct uct_posix_packed_rkey { + uint64_t seg_id; /* flags + mmid */ + uintptr_t address; + size_t length; +} UCS_S_PACKED uct_posix_packed_rkey_t; + + +static ucs_config_field_t uct_posix_md_config_table[] = { + {"MM_", "", NULL, + ucs_offsetof(uct_posix_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_mm_md_config_table)}, + + {"DIR", UCT_POSIX_SHM_OPEN_DIR, + "The path to the backing file. If it's equal to " UCT_POSIX_SHM_OPEN_DIR " then \n" + "shm_open() is used. Otherwise, open() is used.", + ucs_offsetof(uct_posix_md_config_t, dir), UCS_CONFIG_TYPE_STRING}, + + {"USE_PROC_LINK", "y", "Use /proc//fd/ to share posix file.\n" + " y - Use /proc//fd/ to share posix file.\n" + " n - Use original file path to share posix file.\n", + ucs_offsetof(uct_posix_md_config_t, use_proc_link), UCS_CONFIG_TYPE_BOOL}, + + {NULL} +}; + +static int uct_posix_use_shm_open(const uct_posix_md_config_t *posix_config) +{ + return !strcmp(posix_config->dir, UCT_POSIX_SHM_OPEN_DIR); +} + +static size_t uct_posix_iface_addr_length(uct_mm_md_t *md) +{ + const uct_posix_md_config_t *posix_config = + ucs_derived_of(md->config, uct_posix_md_config_t); + + /* if shm_open is requested, the path to the backing file is /dev/shm + * by default. however, if shm_open isn't used, the size of the path to the + * requested backing file is needed so that the user would know how much + * space to allocate for the rkey. + */ + if (posix_config->use_proc_link) { + return ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID) ? 0 : sizeof(ucs_sys_ns_t); + } + + return uct_posix_use_shm_open(posix_config) ? + 0 : (strlen(posix_config->dir) + 1); +} + +static ucs_status_t uct_posix_md_query(uct_md_h tl_md, uct_md_attr_t *md_attr) +{ + uct_mm_md_t *md = ucs_derived_of(tl_md, uct_mm_md_t); + + uct_mm_md_query(&md->super, md_attr, 1); + md_attr->rkey_packed_size = sizeof(uct_posix_packed_rkey_t) + + uct_posix_iface_addr_length(md); + return UCS_OK; +} + +static uint64_t uct_posix_mmid_procfs_pack(int fd) +{ + pid_t pid = getpid(); + + UCS_STATIC_ASSERT(UCS_MASK(UCT_POSIX_PROCFS_MMID_PID_BITS + + UCT_POSIX_PROCFS_MMID_FD_BITS) == + UCT_POSIX_SEG_MMID_MASK); + + ucs_assert(pid <= UCS_MASK(UCT_POSIX_PROCFS_MMID_PID_BITS)); + ucs_assert(fd <= UCS_MASK(UCT_POSIX_PROCFS_MMID_FD_BITS)); + return pid | ((uint64_t)fd << UCT_POSIX_PROCFS_MMID_PID_BITS); +} + +static void uct_posix_mmid_procfs_unpack(uint64_t mmid, int *pid_p, int *fd_p) +{ + *fd_p = mmid >> UCT_POSIX_PROCFS_MMID_PID_BITS; + *pid_p = mmid & UCS_MASK(UCT_POSIX_PROCFS_MMID_PID_BITS); +} + +static ucs_status_t uct_posix_test_mem(int shm_fd, size_t length) +{ + const size_t chunk_size = 64 * UCS_KBYTE; + size_t size_to_write, remaining; + ssize_t single_write; + ucs_status_t status; + int *buf; + + buf = ucs_malloc(chunk_size, "write buffer"); + if (buf == NULL) { + ucs_error("Failed to allocate memory for testing space for backing file."); + status = UCS_ERR_NO_MEMORY; + goto out; + } + + memset(buf, 0, chunk_size); + if (lseek(shm_fd, 0, SEEK_SET) < 0) { + ucs_error("lseek failed. %m"); + status = UCS_ERR_IO_ERROR; + goto out_free_buf; + } + + remaining = length; + while (remaining > 0) { + size_to_write = ucs_min(remaining, chunk_size); + single_write = write(shm_fd, buf, size_to_write); + + if (single_write < 0) { + switch(errno) { + case ENOSPC: + ucs_error("Not enough memory to write total of %zu bytes. " + "Please check that /dev/shm or the directory you specified has " + "more available memory.", length); + status = UCS_ERR_NO_MEMORY; + break; + default: + ucs_error("Failed to write %zu bytes. %m", size_to_write); + status = UCS_ERR_IO_ERROR; + } + goto out_free_buf; + } + + remaining -= single_write; + } + + status = UCS_OK; + +out_free_buf: + ucs_free(buf); +out: + return status; +} + +ucs_status_t uct_posix_open_check_result(const char *func, const char *file_name, + int open_flags, int ret, int *fd_p) +{ + if (ret >= 0) { + *fd_p = ret; + return UCS_OK; + } else if (errno == EEXIST) { + return UCS_ERR_ALREADY_EXISTS; + } else { + ucs_error("%s(file_name=%s flags=0x%x) failed: %m", func, file_name, + open_flags); + return UCS_ERR_SHMEM_SEGMENT; + } +} + +static ucs_status_t uct_posix_shm_open(uint64_t mmid, int open_flags, int *fd_p) +{ + char file_name[NAME_MAX]; + int ret; + + ucs_snprintf_safe(file_name, sizeof(file_name), UCT_POSIX_FILE_FMT, mmid); + ret = shm_open(file_name, open_flags | O_RDWR, UCT_POSIX_SHM_OPEN_MODE); + return uct_posix_open_check_result("shm_open", file_name, open_flags, ret, + fd_p); +} + +static ucs_status_t uct_posix_file_open(const char *dir, uint64_t mmid, + int open_flags, int* fd_p) +{ + char file_path[PATH_MAX]; + int ret; + + ucs_snprintf_safe(file_path, sizeof(file_path), "%s" UCT_POSIX_FILE_FMT, + dir, mmid); + ret = open(file_path, open_flags | O_RDWR, UCT_POSIX_SHM_OPEN_MODE); + return uct_posix_open_check_result("open", file_path, open_flags, ret, fd_p); +} + +static ucs_status_t uct_posix_procfs_open(int pid, int peer_fd, int* fd_p) +{ + char file_path[PATH_MAX]; + int ret; + + ucs_snprintf_safe(file_path, sizeof(file_path), UCT_POSIX_PROCFS_FILE_FMT, + pid, peer_fd); + ret = open(file_path, O_RDWR, UCT_POSIX_SHM_OPEN_MODE); + return uct_posix_open_check_result("open", file_path, 0, ret, fd_p); +} + +static ucs_status_t uct_posix_unlink(uct_mm_md_t *md, uint64_t seg_id) +{ + uct_posix_md_config_t *posix_config = ucs_derived_of(md->config, + uct_posix_md_config_t); + char file_path[PATH_MAX]; + int ret; + + if (seg_id & UCT_POSIX_SEG_FLAG_SHM_OPEN) { + ucs_snprintf_safe(file_path, sizeof(file_path), UCT_POSIX_FILE_FMT, + seg_id & UCT_POSIX_SEG_MMID_MASK); + ret = shm_unlink(file_path); + if (ret < 0) { + ucs_error("shm_unlink(%s) failed: %m", file_path); + return UCS_ERR_SHMEM_SEGMENT; + } + } else { + ucs_snprintf_safe(file_path, sizeof(file_path), "%s" UCT_POSIX_FILE_FMT, + posix_config->dir, seg_id & UCT_POSIX_SEG_MMID_MASK); + ret = unlink(file_path); + if (ret < 0) { + ucs_error("unlink(%s) failed: %m", file_path); + return UCS_ERR_SHMEM_SEGMENT; + } + } + + return UCS_OK; +} + +static ucs_status_t +uct_posix_mmap(void **address_p, size_t *length_p, int flags, int fd, + const char *alloc_name, ucs_log_level_t err_level) +{ + size_t aligned_length; + void *result; + + aligned_length = ucs_align_up_pow2(*length_p, ucs_get_page_size()); + +#ifdef MAP_HUGETLB + if (flags & MAP_HUGETLB) { + ssize_t huge_page_size = ucs_get_huge_page_size(); + size_t huge_aligned_length; + + if (huge_page_size <= 0) { + ucs_debug("huge pages are not supported on the system"); + return UCS_ERR_NO_MEMORY; /* Huge pages not supported */ + } + + huge_aligned_length = ucs_align_up_pow2(aligned_length, huge_page_size); + if (huge_aligned_length > (2 * aligned_length)) { + return UCS_ERR_EXCEEDS_LIMIT; /* Do not align up by more than 2x */ + } + + aligned_length = huge_aligned_length; + } +#endif + + result = ucs_mmap(*address_p, aligned_length, UCT_POSIX_MMAP_PROT, + MAP_SHARED | flags, fd, 0 UCS_MEMTRACK_VAL); + if (result == MAP_FAILED) { + ucs_log(err_level, + "shared memory mmap(addr=%p, length=%zu, flags=%s%s, fd=%d) failed: %m", + *address_p, aligned_length, + (flags & MAP_FIXED) ? " FIXED" : "", +#ifdef MAP_HUGETLB + (flags & MAP_HUGETLB) ? " HUGETLB" : "", +#else + "", +#endif + fd); + return UCS_ERR_SHMEM_SEGMENT; + } + + *address_p = result; + *length_p = aligned_length; + + return UCS_OK; +} + +static ucs_status_t uct_posix_munmap(void *address, size_t length) +{ + int ret; + + ret = ucs_munmap(address, length); + if (ret != 0) { + ucs_warn("shared memory munmap(address=%p, length=%zu) failed: %m", + address, length); + return UCS_ERR_SHMEM_SEGMENT; + } + + return UCS_OK; +} + +static ucs_status_t +uct_posix_mem_attach_common(uct_mm_seg_id_t seg_id, size_t length, + const char *dir, uct_mm_remote_seg_t *rseg) +{ + uint64_t mmid = seg_id & UCT_POSIX_SEG_MMID_MASK; + int pid, peer_fd, fd; + ucs_status_t status; + int mmap_flags; + + ucs_assert(length > 0); + rseg->cookie = (void*)length; + + if (seg_id & UCT_POSIX_SEG_FLAG_PROCFS) { + uct_posix_mmid_procfs_unpack(mmid, &pid, &peer_fd); + status = uct_posix_procfs_open(pid, peer_fd, &fd); + } else if (seg_id & UCT_POSIX_SEG_FLAG_SHM_OPEN) { + status = uct_posix_shm_open(mmid, 0, &fd); + } else { + ucs_assert(dir != NULL); /* for coverity */ + status = uct_posix_file_open(dir, mmid, 0, &fd); + } + if (status != UCS_OK) { + return status; + } + +#ifdef MAP_HUGETLB + mmap_flags = (seg_id & UCT_POSIX_SEG_FLAG_HUGETLB) ? MAP_HUGETLB : 0; +#else + mmap_flags = 0; +#endif + rseg->address = NULL; + status = uct_posix_mmap(&rseg->address, &length, mmap_flags, fd, + "posix_attach", UCS_LOG_LEVEL_ERROR); + close(fd); + return status; +} + +static int +uct_posix_is_reachable(uct_mm_md_t *md, uct_mm_seg_id_t seg_id, + const void *iface_addr) +{ + if (seg_id & UCT_POSIX_SEG_FLAG_PID_NS) { + return ucs_sys_get_ns(UCS_SYS_NS_TYPE_PID) == *(const ucs_sys_ns_t*)iface_addr; + } + + return ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID); +} + +static ucs_status_t uct_posix_mem_detach_common(const uct_mm_remote_seg_t *rseg) +{ + return uct_posix_munmap(rseg->address, (size_t)rseg->cookie); +} + +static ucs_status_t +uct_posix_segment_open(uct_mm_md_t *md, uct_mm_seg_id_t *seg_id_p, int *fd_p) +{ + uct_posix_md_config_t *posix_config = ucs_derived_of(md->config, + uct_posix_md_config_t); + uint64_t mmid, flags; + ucs_status_t status; + unsigned rand_seed; + + /* Generate random 32-bit shared memory id and make sure it's not used + * already by opening the file with O_CREAT|O_EXCL */ + rand_seed = ucs_generate_uuid((uintptr_t)md); + for (;;) { + mmid = rand_r(&rand_seed); + ucs_assert(mmid <= UCT_POSIX_SEG_MMID_MASK); + if (uct_posix_use_shm_open(posix_config)) { + flags = UCT_POSIX_SEG_FLAG_SHM_OPEN; + status = uct_posix_shm_open(mmid, UCT_POSIX_SHM_CREATE_FLAGS, fd_p); + } else { + flags = 0; + status = uct_posix_file_open(posix_config->dir, mmid, + UCT_POSIX_SHM_CREATE_FLAGS, fd_p); + } + if (status == UCS_OK) { + *seg_id_p = mmid | flags; + return UCS_OK; /* found unique file name */ + } else if (status != UCS_ERR_ALREADY_EXISTS) { + return status; /* unexpected error (e.g permission denied) */ + } + /* file exists, retry */ + } +} + +static ucs_status_t +uct_posix_mem_alloc(uct_md_h tl_md, size_t *length_p, void **address_p, + unsigned flags, const char *alloc_name, uct_mem_h *memh_p) +{ + uct_mm_md_t *md = ucs_derived_of(tl_md, uct_mm_md_t); + uct_posix_md_config_t *posix_config = ucs_derived_of(md->config, + uct_posix_md_config_t); + ucs_status_t status; + uct_mm_seg_t *seg; + int force_hugetlb; + int mmap_flags; + void *address; + int fd; + + status = uct_mm_seg_new(*address_p, *length_p, &seg); + if (status != UCS_OK) { + goto err; + } + + status = uct_posix_segment_open(md, &seg->seg_id, &fd); + if (status != UCS_OK) { + goto err_free_seg; + } + + /* Check if the location of the backing file has enough memory for the + * needed size by trying to write there before calling mmap */ + status = uct_posix_test_mem(fd, seg->length); + if (status != UCS_OK) { + goto err_close; + } + + /* If using procfs link instead of mmid, remove the original file and update + * seg->seg_id */ + if (posix_config->use_proc_link) { + status = uct_posix_unlink(md, seg->seg_id); + if (status != UCS_OK) { + goto err_close; + } + + /* Replace mmid by pid+fd. Keep previous SHM_OPEN flag for mkey_pack() */ + seg->seg_id = uct_posix_mmid_procfs_pack(fd) | + (seg->seg_id & UCT_POSIX_SEG_FLAG_SHM_OPEN) | + UCT_POSIX_SEG_FLAG_PROCFS | + (ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID) ? 0 : + UCT_POSIX_SEG_FLAG_PID_NS); + } + + /* mmap the shared memory segment that was created by shm_open */ + if (flags & UCT_MD_MEM_FLAG_FIXED) { + mmap_flags = MAP_FIXED; + } else { + seg->address = NULL; + mmap_flags = 0; + } + + /* try HUGETLB mmap */ + address = MAP_FAILED; + if (posix_config->super.hugetlb_mode != UCS_NO) { + force_hugetlb = (posix_config->super.hugetlb_mode == UCS_YES); +#ifdef MAP_HUGETLB + status = uct_posix_mmap(&seg->address, &seg->length, + mmap_flags | MAP_HUGETLB, fd, alloc_name, + force_hugetlb ? UCS_LOG_LEVEL_ERROR : + UCS_LOG_LEVEL_DEBUG); +#else + status = UCS_ERR_SHMEM_SEGMENT; + if (force_hugetlb) { + ucs_error("shared memory allocation failed: " + "MAP_HUGETLB is not supported on the system"); + } +#endif + if ((status != UCS_OK) && force_hugetlb) { + goto err_close; + } else if (status == UCS_OK) { + seg->seg_id |= UCT_POSIX_SEG_FLAG_HUGETLB; + } + } + + /* fallback to regular mmap */ + if (address == MAP_FAILED) { + ucs_assert(posix_config->super.hugetlb_mode != UCS_YES); + status = uct_posix_mmap(&seg->address, &seg->length, mmap_flags, fd, + alloc_name, UCS_LOG_LEVEL_ERROR); + if (status != UCS_OK) { + goto err_close; + } + } + + /* create new memory segment */ + ucs_debug("allocated posix shared memory at %p length %zu", seg->address, + seg->length); + + if (!posix_config->use_proc_link) { + /* closing the file here since the peers will open it by file system path */ + close(fd); + } + + *address_p = seg->address; + *length_p = seg->length; + *memh_p = seg; + return UCS_OK; + +err_close: + close(fd); + if (!(seg->seg_id & UCT_POSIX_SEG_FLAG_PROCFS)) { + uct_posix_unlink(md, seg->seg_id); + } +err_free_seg: + ucs_free(seg); +err: + return status; +} + +static ucs_status_t uct_posix_mem_free(uct_md_h tl_md, uct_mem_h memh) +{ + uct_mm_md_t *md = ucs_derived_of(tl_md, uct_mm_md_t); + uct_mm_seg_t *seg = memh; + ucs_status_t status; + int fd, dummy_pid; + + status = uct_posix_munmap(seg->address, seg->length); + if (status != UCS_OK) { + return status; + } + + if (seg->seg_id & UCT_POSIX_SEG_FLAG_PROCFS) { + uct_posix_mmid_procfs_unpack(seg->seg_id & UCT_POSIX_SEG_MMID_MASK, + &dummy_pid, &fd); + ucs_assert(dummy_pid == getpid()); + close(fd); + } else { + status = uct_posix_unlink(md, seg->seg_id); + if (status != UCS_OK) { + return status; + } + } + + ucs_free(seg); + return UCS_OK; +} + +static void uct_posix_copy_dir(uct_mm_md_t *md, void *buffer) +{ + const uct_posix_md_config_t *posix_config = + ucs_derived_of(md->config, uct_posix_md_config_t); + + memcpy(buffer, posix_config->dir, strlen(posix_config->dir) + 1); +} + +static ucs_status_t uct_posix_iface_addr_pack(uct_mm_md_t *md, void *buffer) +{ + const uct_posix_md_config_t *posix_config = + ucs_derived_of(md->config, uct_posix_md_config_t); + + if (posix_config->use_proc_link) { + if (!ucs_sys_ns_is_default(UCS_SYS_NS_TYPE_PID)) { + *(ucs_sys_ns_t*)buffer = ucs_sys_get_ns(UCS_SYS_NS_TYPE_PID); + } + return UCS_OK; + } + + if (!uct_posix_use_shm_open(posix_config)) { + uct_posix_copy_dir(md, buffer); + } + + return UCS_OK; +} + +static ucs_status_t +uct_posix_md_mkey_pack(uct_md_h tl_md, uct_mem_h memh, void *rkey_buffer) +{ + uct_mm_md_t *md = ucs_derived_of(tl_md, uct_mm_md_t); + uct_mm_seg_t *seg = memh; + uct_posix_packed_rkey_t *packed_rkey = rkey_buffer; + + packed_rkey->seg_id = seg->seg_id; + packed_rkey->address = (uintptr_t)seg->address; + packed_rkey->length = seg->length; + if (!(seg->seg_id & UCT_POSIX_SEG_FLAG_SHM_OPEN) && + !(seg->seg_id & UCT_POSIX_SEG_FLAG_PROCFS)) { + uct_posix_copy_dir(md, packed_rkey + 1); + } + + return UCS_OK; +} + +static ucs_status_t uct_posix_mem_attach(uct_mm_md_t *md, uct_mm_seg_id_t seg_id, + size_t length, const void *iface_addr, + uct_mm_remote_seg_t *remote_seg) +{ + return uct_posix_mem_attach_common(seg_id, length, iface_addr, remote_seg); +} + +static void uct_posix_mem_detach(uct_mm_md_t *md, const uct_mm_remote_seg_t *rseg) +{ + uct_posix_mem_detach_common(rseg); +} + +static ucs_status_t +uct_posix_rkey_unpack(uct_component_t *component, const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + const uct_posix_packed_rkey_t *packed_rkey = rkey_buffer; + uct_mm_remote_seg_t *rseg; + ucs_status_t status; + + rseg = ucs_malloc(sizeof(*rseg), "posix_remote_seg"); + if (rseg == NULL) { + ucs_error("failed to allocate posix remote segment descriptor"); + return UCS_ERR_NO_MEMORY; + } + + status = uct_posix_mem_attach_common(packed_rkey->seg_id, + packed_rkey->length, + (const char*)(packed_rkey + 1), rseg); + if (status != UCS_OK) { + ucs_free(rseg); + return status; + } + + uct_mm_md_make_rkey(rseg->address, packed_rkey->address, rkey_p); + *handle_p = rseg; + return UCS_OK; +} + +static ucs_status_t +uct_posix_rkey_release(uct_component_t *component, uct_rkey_t rkey, void *handle) +{ + uct_mm_remote_seg_t *rseg = handle; + ucs_status_t status; + + status = uct_posix_mem_detach_common(rseg); + if (status != UCS_OK) { + return status; + } + + ucs_free(rseg); + return UCS_OK; +} + +static uct_mm_md_mapper_ops_t uct_posix_md_ops = { + .super = { + .close = uct_mm_md_close, + .query = uct_posix_md_query, + .mem_alloc = uct_posix_mem_alloc, + .mem_free = uct_posix_mem_free, + .mem_advise = (uct_md_mem_advise_func_t)ucs_empty_function_return_unsupported, + .mem_reg = (uct_md_mem_reg_func_t)ucs_empty_function_return_unsupported, + .mem_dereg = (uct_md_mem_dereg_func_t)ucs_empty_function_return_unsupported, + .mkey_pack = uct_posix_md_mkey_pack, + .is_sockaddr_accessible = (uct_md_is_sockaddr_accessible_func_t)ucs_empty_function_return_zero, + .detect_memory_type = (uct_md_detect_memory_type_func_t)ucs_empty_function_return_unsupported + }, + .query = (uct_mm_mapper_query_func_t) + ucs_empty_function_return_success, + .iface_addr_length = uct_posix_iface_addr_length, + .iface_addr_pack = uct_posix_iface_addr_pack, + .mem_attach = uct_posix_mem_attach, + .mem_detach = uct_posix_mem_detach, + .is_reachable = uct_posix_is_reachable +}; + +UCT_MM_TL_DEFINE(posix, &uct_posix_md_ops, uct_posix_rkey_unpack, + uct_posix_rkey_release, "POSIX_") diff --git a/src/uct/sm/mm/sysv/mm_sysv.c b/src/uct/sm/mm/sysv/mm_sysv.c new file mode 100644 index 0000000..cd84ab4 --- /dev/null +++ b/src/uct/sm/mm/sysv/mm_sysv.c @@ -0,0 +1,198 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include + + +#define UCT_MM_SYSV_PERM (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) +#define UCT_MM_SYSV_MSTR (UCT_MM_SYSV_PERM | IPC_CREAT | IPC_EXCL) + +typedef struct uct_sysv_packed_rkey { + uint32_t shmid; + uintptr_t owner_ptr; +} UCS_S_PACKED uct_sysv_packed_rkey_t; + +typedef struct uct_sysv_md_config { + uct_mm_md_config_t super; +} uct_sysv_md_config_t; + +static ucs_config_field_t uct_sysv_md_config_table[] = { + {"MM_", "", NULL, + ucs_offsetof(uct_sysv_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_mm_md_config_table)}, + + {NULL} +}; + +static ucs_status_t uct_sysv_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + uct_mm_md_query(md, md_attr, 1); + md_attr->rkey_packed_size = sizeof(uct_sysv_packed_rkey_t); + return UCS_OK; +} + +static ucs_status_t uct_sysv_mem_attach_common(int shmid, void **address_p) +{ + void *address; + + address = shmat(shmid, NULL, 0); + if (address == MAP_FAILED) { + ucs_error("shmat(shmid=%d) failed: %m", shmid); + *address_p = NULL; /* GCC 8.3.1 reports error without it */ + return UCS_ERR_SHMEM_SEGMENT; + } + + *address_p = address; + ucs_trace("attached remote segment %d at address %p", (int)shmid, address); + return UCS_OK; +} + +static ucs_status_t +uct_sysv_mem_alloc(uct_md_h tl_md, size_t *length_p, void **address_p, + unsigned flags, const char *alloc_name, uct_mem_h *memh_p) +{ + uct_mm_md_t *md = ucs_derived_of(tl_md, uct_mm_md_t); + ucs_status_t status; + uct_mm_seg_t *seg; + int shmid; + + status = uct_mm_seg_new(*address_p, *length_p, &seg); + if (status != UCS_OK) { + return status; + } + +#ifdef SHM_HUGETLB + if (md->config->hugetlb_mode != UCS_NO) { + status = ucs_sysv_alloc(&seg->length, seg->length * 2, &seg->address, + UCT_MM_SYSV_MSTR | SHM_HUGETLB, alloc_name, + &shmid); + if (status == UCS_OK) { + goto out_ok; + } + + ucs_debug("mm failed to allocate %zu bytes with hugetlb", seg->length); + } +#else + status = UCS_ERR_UNSUPPORTED; +#endif + + if (md->config->hugetlb_mode != UCS_YES) { + status = ucs_sysv_alloc(&seg->length, SIZE_MAX, &seg->address, + UCT_MM_SYSV_MSTR, alloc_name, &shmid); + if (status == UCS_OK) { + goto out_ok; + } + + ucs_debug("mm failed to allocate %zu bytes without hugetlb", seg->length); + } + + ucs_error("failed to allocate %zu bytes with mm for %s", seg->length, + alloc_name); + ucs_free(seg); + return status; + +out_ok: + seg->seg_id = shmid; + *address_p = seg->address; + *length_p = seg->length; + *memh_p = seg; + return UCS_OK; +} + +static ucs_status_t uct_sysv_mem_free(uct_md_h tl_md, uct_mem_h memh) +{ + uct_mm_seg_t *seg = memh; + ucs_status_t status; + + status = ucs_sysv_free(seg->address); + if (status != UCS_OK) { + return status; + } + + ucs_free(seg); + return UCS_OK; +} + +static ucs_status_t +uct_sysv_md_mkey_pack(uct_md_h md, uct_mem_h memh, void *rkey_buffer) +{ + uct_sysv_packed_rkey_t *packed_rkey = rkey_buffer; + const uct_mm_seg_t *seg = memh; + + packed_rkey->shmid = seg->seg_id; + packed_rkey->owner_ptr = (uintptr_t)seg->address; + return UCS_OK; +} + +static ucs_status_t uct_sysv_mem_attach(uct_mm_md_t *md, uct_mm_seg_id_t seg_id, + size_t length, const void *iface_addr, + uct_mm_remote_seg_t *rseg) +{ + return uct_sysv_mem_attach_common(seg_id, &rseg->address); +} + +static void uct_sysv_mem_detach(uct_mm_md_t *md, const uct_mm_remote_seg_t *rseg) +{ + ucs_sysv_free(rseg->address); +} + +static ucs_status_t +uct_sysv_rkey_unpack(uct_component_t *component, const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + const uct_sysv_packed_rkey_t *packed_rkey = rkey_buffer; + ucs_status_t status; + void *address; + + status = uct_sysv_mem_attach_common(packed_rkey->shmid, &address); + if (status != UCS_OK) { + return status; + } + + *handle_p = address; + uct_mm_md_make_rkey(address, packed_rkey->owner_ptr, rkey_p); + return UCS_OK; +} + +static ucs_status_t +uct_sysv_rkey_release(uct_component_t *component, uct_rkey_t rkey, void *handle) +{ + return ucs_sysv_free(handle); +} + +static uct_mm_md_mapper_ops_t uct_sysv_md_ops = { + .super = { + .close = uct_mm_md_close, + .query = uct_sysv_md_query, + .mem_alloc = uct_sysv_mem_alloc, + .mem_free = uct_sysv_mem_free, + .mem_advise = (uct_md_mem_advise_func_t)ucs_empty_function_return_unsupported, + .mem_reg = (uct_md_mem_reg_func_t)ucs_empty_function_return_unsupported, + .mem_dereg = (uct_md_mem_dereg_func_t)ucs_empty_function_return_unsupported, + .mkey_pack = uct_sysv_md_mkey_pack, + .is_sockaddr_accessible = (uct_md_is_sockaddr_accessible_func_t)ucs_empty_function_return_zero, + .detect_memory_type = (uct_md_detect_memory_type_func_t)ucs_empty_function_return_unsupported + }, + .query = (uct_mm_mapper_query_func_t) + ucs_empty_function_return_success, + .iface_addr_length = (uct_mm_mapper_iface_addr_length_func_t) + ucs_empty_function_return_zero_int64, + .iface_addr_pack = (uct_mm_mapper_iface_addr_pack_func_t) + ucs_empty_function_return_success, + .mem_attach = uct_sysv_mem_attach, + .mem_detach = uct_sysv_mem_detach, + .is_reachable = (uct_mm_mapper_is_reachable_func_t)ucs_empty_function_return_one +}; + +UCT_MM_TL_DEFINE(sysv, &uct_sysv_md_ops, uct_sysv_rkey_unpack, + uct_sysv_rkey_release, "SYSV_") diff --git a/src/uct/sm/mm/xpmem/Makefile.am b/src/uct/sm/mm/xpmem/Makefile.am new file mode 100644 index 0000000..cb1fa8e --- /dev/null +++ b/src/uct/sm/mm/xpmem/Makefile.am @@ -0,0 +1,19 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# Copyright (C) UChicago Argonne, LLC. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_XPMEM + +module_LTLIBRARIES = libuct_xpmem.la +libuct_xpmem_la_CFLAGS = $(BASE_CFLAGS) $(XPMEM_CFLAGS) +libuct_xpmem_la_CPPFLAGS = $(BASE_CPPFLAGS) +libuct_xpmem_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la +libuct_xpmem_la_LDFLAGS = $(XPMEM_LIBS) -version-info $(SOVERSION) +libuct_xpmem_la_SOURCES = mm_xpmem.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/uct/sm/mm/xpmem/Makefile.in b/src/uct/sm/mm/xpmem/Makefile.in new file mode 100644 index 0000000..dce3963 --- /dev/null +++ b/src/uct/sm/mm/xpmem/Makefile.in @@ -0,0 +1,850 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# Copyright (C) UChicago Argonne, LLC. 2019. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/sm/mm/xpmem +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_XPMEM_TRUE@libuct_xpmem_la_DEPENDENCIES = \ +@HAVE_XPMEM_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_XPMEM_TRUE@ $(top_builddir)/src/uct/libuct.la +am__libuct_xpmem_la_SOURCES_DIST = mm_xpmem.c +@HAVE_XPMEM_TRUE@am_libuct_xpmem_la_OBJECTS = \ +@HAVE_XPMEM_TRUE@ libuct_xpmem_la-mm_xpmem.lo +libuct_xpmem_la_OBJECTS = $(am_libuct_xpmem_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_xpmem_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libuct_xpmem_la_CFLAGS) $(CFLAGS) $(libuct_xpmem_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_XPMEM_TRUE@am_libuct_xpmem_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libuct_xpmem_la-mm_xpmem.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_xpmem_la_SOURCES) +DIST_SOURCES = $(am__libuct_xpmem_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_XPMEM_TRUE@module_LTLIBRARIES = libuct_xpmem.la +@HAVE_XPMEM_TRUE@libuct_xpmem_la_CFLAGS = $(BASE_CFLAGS) $(XPMEM_CFLAGS) +@HAVE_XPMEM_TRUE@libuct_xpmem_la_CPPFLAGS = $(BASE_CPPFLAGS) +@HAVE_XPMEM_TRUE@libuct_xpmem_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_XPMEM_TRUE@ $(top_builddir)/src/uct/libuct.la + +@HAVE_XPMEM_TRUE@libuct_xpmem_la_LDFLAGS = $(XPMEM_LIBS) -version-info $(SOVERSION) +@HAVE_XPMEM_TRUE@libuct_xpmem_la_SOURCES = mm_xpmem.c + +# Automake silent rules +@HAVE_XPMEM_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_XPMEM_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_XPMEM_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_XPMEM_TRUE@AM_V_LN_1 = true +@HAVE_XPMEM_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/sm/mm/xpmem/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/sm/mm/xpmem/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libuct_xpmem.la: $(libuct_xpmem_la_OBJECTS) $(libuct_xpmem_la_DEPENDENCIES) $(EXTRA_libuct_xpmem_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_xpmem_la_LINK) $(am_libuct_xpmem_la_rpath) $(libuct_xpmem_la_OBJECTS) $(libuct_xpmem_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libuct_xpmem_la-mm_xpmem.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libuct_xpmem_la-mm_xpmem.lo: mm_xpmem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_xpmem_la_CPPFLAGS) $(CPPFLAGS) $(libuct_xpmem_la_CFLAGS) $(CFLAGS) -MT libuct_xpmem_la-mm_xpmem.lo -MD -MP -MF $(DEPDIR)/libuct_xpmem_la-mm_xpmem.Tpo -c -o libuct_xpmem_la-mm_xpmem.lo `test -f 'mm_xpmem.c' || echo '$(srcdir)/'`mm_xpmem.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libuct_xpmem_la-mm_xpmem.Tpo $(DEPDIR)/libuct_xpmem_la-mm_xpmem.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mm_xpmem.c' object='libuct_xpmem_la-mm_xpmem.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_xpmem_la_CPPFLAGS) $(CPPFLAGS) $(libuct_xpmem_la_CFLAGS) $(CFLAGS) -c -o libuct_xpmem_la-mm_xpmem.lo `test -f 'mm_xpmem.c' || echo '$(srcdir)/'`mm_xpmem.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_XPMEM_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libuct_xpmem_la-mm_xpmem.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libuct_xpmem_la-mm_xpmem.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_XPMEM_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_XPMEM_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_XPMEM_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_XPMEM_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_XPMEM_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_XPMEM_TRUE@ done +@HAVE_XPMEM_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_XPMEM_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_XPMEM_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/sm/mm/xpmem/configure.m4 b/src/uct/sm/mm/xpmem/configure.m4 new file mode 100644 index 0000000..48f8c08 --- /dev/null +++ b/src/uct/sm/mm/xpmem/configure.m4 @@ -0,0 +1,42 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +xpmem_happy="no" +AC_ARG_WITH([xpmem], + [AS_HELP_STRING([--with-xpmem=(DIR)], [Enable the use of XPMEM (default is guess).])], + [], [with_xpmem=guess]) + +AS_IF([test "x$with_xpmem" != "xno"], + [AS_IF([test ! -d "$with_xpmem"], + [ + AC_MSG_NOTICE([XPMEM - failed to open the requested location ($with_xpmem), guessing ...]) + AS_IF([$PKG_CONFIG --exists cray-xpmem], + [ + xpmem_happy=yes + AC_SUBST(XPMEM_CFLAGS, [`$PKG_CONFIG --cflags cray-xpmem`]) + AC_SUBST(XPMEM_LIBS, [`$PKG_CONFIG --libs cray-xpmem`]) + ], + [ + # If cray-xpmem module not found in pkg-config, try to search + xpmem_header=$(find /opt/xpmem /usr/local/include /usr/local/xpmem -name xpmem.h 2>/dev/null|head -1) + AS_IF([test -f "$xpmem_header"], + [with_xpmem=$(dirname $xpmem_header | head -1 | sed -e s,/include,,g)]) + ]) + ]) + ]) + +# Verify XPMEM header file +AS_IF([test "x$xpmem_happy" = "xno" -a -d "$with_xpmem"], + [AC_CHECK_HEADER([$with_xpmem/include/xpmem.h], + [AC_SUBST(XPMEM_CFLAGS, "-I$with_xpmem/include") + AC_SUBST(XPMEM_LIBS, "-L$with_xpmem/lib -lxpmem") + xpmem_happy="yes"], + [AC_MSG_WARN([cray-xpmem header was not found in $with_xpmem])]) + ]) + +AS_IF([test "x$xpmem_happy" = "xyes"], [uct_modules="${uct_modules}:xpmem"]) +AM_CONDITIONAL([HAVE_XPMEM], [test "x$xpmem_happy" != "xno"]) +AC_CONFIG_FILES([src/uct/sm/mm/xpmem/Makefile]) diff --git a/src/uct/sm/mm/xpmem/mm_xpmem.c b/src/uct/sm/mm/xpmem/mm_xpmem.c new file mode 100644 index 0000000..1508055 --- /dev/null +++ b/src/uct/sm/mm/xpmem/mm_xpmem.c @@ -0,0 +1,550 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (c) Los Alamos National Security, LLC. 2016. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "xpmem.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +/* XPMEM memory domain configuration */ +typedef struct uct_xpmem_md_config { + uct_mm_md_config_t super; +} uct_xpmem_md_config_t; + +/* Remote process memory */ +typedef struct uct_xpmem_remote_mem { + xpmem_apid_t apid; + xpmem_segid_t xsegid; + ucs_rcache_t *rcache; + int refcount; +} uct_xpmem_remote_mem_t; + +/* Cache entry for remote memory region */ +typedef struct uct_xpmem_remote_region { + ucs_rcache_region_t super; + void *attach_address; + uct_xpmem_remote_mem_t *rmem; +} uct_xpmem_remote_region_t; + +typedef struct uct_xpmem_iface_addr { + xpmem_segid_t xsegid; +} UCS_S_PACKED uct_xpmem_iface_addr_t; + +typedef struct uct_xpmem_packed_rkey { + xpmem_segid_t xsegid; + uintptr_t address; + size_t length; +} UCS_S_PACKED uct_xpmem_packed_rkey_t; + +KHASH_INIT(xpmem_remote_mem, xpmem_segid_t, uct_xpmem_remote_mem_t*, 1, + kh_int64_hash_func, kh_int64_hash_equal) + +/* Global XPMEM segment which maps the entire process virtual address space */ +static ucs_init_once_t uct_xpmem_global_seg_init_once = UCS_INIT_ONCE_INITIALIZER; +static xpmem_segid_t uct_xpmem_global_xsegid = -1; + +/* Hash of remote regions */ +static khash_t(xpmem_remote_mem) uct_xpmem_remote_mem_hash; +static ucs_spinlock_t uct_xpmem_remote_mem_lock; + +static ucs_config_field_t uct_xpmem_md_config_table[] = { + {"MM_", "", NULL, + ucs_offsetof(uct_xpmem_md_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_mm_md_config_table)}, + + {NULL} +}; + +UCS_STATIC_INIT { + ucs_spinlock_init(&uct_xpmem_remote_mem_lock); + kh_init_inplace(xpmem_remote_mem, &uct_xpmem_remote_mem_hash); +} + +UCS_STATIC_CLEANUP { + uct_xpmem_remote_mem_t *rmem; + ucs_status_t status; + + kh_foreach_value(&uct_xpmem_remote_mem_hash, rmem, { + ucs_warn("remote segment id %lx apid %lx is not released, refcount %d", + (unsigned long)rmem->xsegid, (unsigned long)rmem->apid, + rmem->refcount); + }) + kh_destroy_inplace(xpmem_remote_mem, &uct_xpmem_remote_mem_hash); + + status = ucs_spinlock_destroy(&uct_xpmem_remote_mem_lock); + if (status != UCS_OK) { + ucs_warn("ucs_spinlock_destroy() failed: %s", ucs_status_string(status)); + } +} + +static ucs_status_t uct_xpmem_query() +{ + int version; + + version = xpmem_version(); + if (version < 0) { + ucs_debug("xpmem_version() returned %d (%m), xpmem is unavailable", + version); + return UCS_ERR_UNSUPPORTED; + } + + ucs_debug("xpmem version: %d", version); + return UCS_OK; +} + +static ucs_status_t uct_xpmem_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + uct_mm_md_query(md, md_attr, 0); + + md_attr->cap.flags |= UCT_MD_FLAG_REG; + md_attr->reg_cost.overhead = 60.0e-9; + md_attr->reg_cost.growth = 0; + md_attr->cap.max_reg = ULONG_MAX; + md_attr->cap.reg_mem_types = UCS_MEMORY_TYPES_CPU_ACCESSIBLE; + md_attr->rkey_packed_size = sizeof(uct_xpmem_packed_rkey_t); + + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE size_t +uct_xpmem_rcache_region_length(uct_xpmem_remote_region_t *xpmem_region) +{ + return xpmem_region->super.super.end - xpmem_region->super.super.start; +} + +static ucs_status_t +uct_xpmem_rcache_mem_reg(void *context, ucs_rcache_t *rcache, void *arg, + ucs_rcache_region_t *region, uint16_t flags) +{ + uct_xpmem_remote_mem_t *rmem = context; + uct_xpmem_remote_region_t *xpmem_region = + ucs_derived_of(region, uct_xpmem_remote_region_t); + struct xpmem_addr addr; + size_t length; + + addr.apid = rmem->apid; + addr.offset = xpmem_region->super.super.start; + length = uct_xpmem_rcache_region_length(xpmem_region); + + xpmem_region->attach_address = xpmem_attach(addr, length, NULL); + VALGRIND_MAKE_MEM_DEFINED(&xpmem_region->attach_address, + sizeof(xpmem_region->attach_address)); + if (xpmem_region->attach_address == MAP_FAILED) { + ucs_error("failed to attach xpmem apid 0x%lx offset 0x%lx length %zu: %m", + (unsigned long)addr.apid, addr.offset, length); + return UCS_ERR_IO_ERROR; + } + + xpmem_region->rmem = rmem; + + ucs_trace("xpmem attached apid 0x%lx offset 0x%lx length %zu at %p", + (unsigned long)addr.apid, addr.offset, length, + xpmem_region->attach_address); + + VALGRIND_MAKE_MEM_DEFINED(xpmem_region->attach_address, length); + return UCS_OK; +} + +static void uct_xpmem_rcache_mem_dereg(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *region) +{ + uct_xpmem_remote_region_t *xpmem_region = + ucs_derived_of(region, uct_xpmem_remote_region_t); + int ret; + + ucs_trace("xpmem detaching address %p", xpmem_region->attach_address); + + ret = xpmem_detach(xpmem_region->attach_address); + if (ret < 0) { + ucs_warn("Failed to xpmem_detach: %m"); + } + + xpmem_region->attach_address = NULL; + xpmem_region->rmem = NULL; +} + +static void uct_xpmem_rcache_dump_region(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *region, char *buf, + size_t max) +{ + uct_xpmem_remote_mem_t *rmem = context; + uct_xpmem_remote_region_t *xpmem_region = + ucs_derived_of(region, uct_xpmem_remote_region_t); + + snprintf(buf, max, "apid 0x%lx attach_addr %p rmem %p", + (unsigned long)rmem->apid, xpmem_region->attach_address, rmem); +} + +static ucs_rcache_ops_t uct_xpmem_rcache_ops = { + .mem_reg = uct_xpmem_rcache_mem_reg, + .mem_dereg = uct_xpmem_rcache_mem_dereg, + .dump_region = uct_xpmem_rcache_dump_region +}; + +static UCS_F_NOINLINE ucs_status_t +uct_xpmem_make_global_xsegid(xpmem_segid_t *xsegid_p) +{ + /* double-checked locking */ + UCS_INIT_ONCE(&uct_xpmem_global_seg_init_once) { + if (uct_xpmem_global_xsegid < 0) { + uct_xpmem_global_xsegid = xpmem_make(0, XPMEM_MAXADDR_SIZE, + XPMEM_PERMIT_MODE, (void*)0600); + VALGRIND_MAKE_MEM_DEFINED(&uct_xpmem_global_xsegid, + sizeof(uct_xpmem_global_xsegid)); + } + } + + if (uct_xpmem_global_xsegid < 0) { + ucs_error("xpmem failed to register process address space: %m"); + return UCS_ERR_IO_ERROR; + } + + ucs_debug("xpmem registered global segment id 0x%lx", + (unsigned long)uct_xpmem_global_xsegid); + *xsegid_p = uct_xpmem_global_xsegid; + return UCS_OK; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_xpmem_get_global_xsegid(xpmem_segid_t *xsegid_p) +{ + if (ucs_unlikely(uct_xpmem_global_xsegid < 0)) { + return uct_xpmem_make_global_xsegid(xsegid_p); + } + + *xsegid_p = uct_xpmem_global_xsegid; + return UCS_OK; +} + +/* lock must be held */ +static UCS_F_NOINLINE ucs_status_t +uct_xpmem_rmem_add(xpmem_segid_t xsegid, uct_xpmem_remote_mem_t **rmem_p) +{ + ucs_rcache_params_t rcache_params; + uct_xpmem_remote_mem_t *rmem; + ucs_status_t status; + khiter_t khiter; + int khret; + + rmem = ucs_malloc(sizeof(*rmem), "xpmem_rmem"); + if (rmem == NULL) { + ucs_error("failed to allocate xpmem rmem"); + status = UCS_ERR_NO_MEMORY; + goto err; + } + + rmem->refcount = 0; + rmem->xsegid = xsegid; + + rmem->apid = xpmem_get(xsegid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL); + VALGRIND_MAKE_MEM_DEFINED(&rmem->apid, sizeof(rmem->apid)); + if (rmem->apid < 0) { + ucs_error("xpmem_get(segid=0x%lx) failed: %m", (unsigned long)xsegid); + status = UCS_ERR_SHMEM_SEGMENT; + goto err_free; + } + + rcache_params.region_struct_size = sizeof(uct_xpmem_remote_region_t); + rcache_params.alignment = ucs_get_page_size(); + rcache_params.max_alignment = ucs_get_page_size(); + rcache_params.ucm_events = 0; + rcache_params.ucm_event_priority = 0; + rcache_params.ops = &uct_xpmem_rcache_ops; + rcache_params.context = rmem; + + status = ucs_rcache_create(&rcache_params, "xpmem_remote_mem", + ucs_stats_get_root(), &rmem->rcache); + if (status != UCS_OK) { + ucs_error("failed to create xpmem remote cache: %s", + ucs_status_string(status)); + goto err_release_seg; + } + + khiter = kh_put(xpmem_remote_mem, &uct_xpmem_remote_mem_hash, xsegid, + &khret); + ucs_assertv_always((khret == 1) || (khret == 2), "khret=%d", khret); + ucs_assert_always (khiter != kh_end(&uct_xpmem_remote_mem_hash)); + kh_val(&uct_xpmem_remote_mem_hash, khiter) = rmem; + + ucs_trace("xpmem attached to remote segment id 0x%lx apid 0x%lx rcache %p", + (unsigned long)xsegid, (unsigned long)rmem->apid, rmem->rcache); + + *rmem_p = rmem; + return UCS_OK; + +err_release_seg: + xpmem_release(rmem->apid); +err_free: + ucs_free(rmem); +err: + return status; +} + +/* lock must be held */ +static UCS_F_NOINLINE void +uct_xpmem_rmem_del(uct_xpmem_remote_mem_t *rmem) +{ + khiter_t khiter; + int ret; + + ucs_assert(rmem->refcount == 0); + + ucs_trace("detaching remote segment rmem %p apid %lx", rmem, + (unsigned long)rmem->apid); + + khiter = kh_get(xpmem_remote_mem, &uct_xpmem_remote_mem_hash, rmem->xsegid); + ucs_assert(kh_val(&uct_xpmem_remote_mem_hash, khiter) == rmem); + kh_del(xpmem_remote_mem, &uct_xpmem_remote_mem_hash, khiter); + + ucs_rcache_destroy(rmem->rcache); + + ret = xpmem_release(rmem->apid); + if (ret) { + ucs_warn("xpmem_release(apid=0x%lx) failed: %m", + (unsigned long)rmem->apid); + } + + ucs_free(rmem); +} + +static ucs_status_t +uct_xpmem_rmem_get(xpmem_segid_t xsegid, uct_xpmem_remote_mem_t **rmem_p) +{ + uct_xpmem_remote_mem_t *rmem; + ucs_status_t status; + khiter_t khiter; + + ucs_spin_lock(&uct_xpmem_remote_mem_lock); + + khiter = kh_get(xpmem_remote_mem, &uct_xpmem_remote_mem_hash, xsegid); + if (ucs_likely(khiter != kh_end(&uct_xpmem_remote_mem_hash))) { + rmem = kh_val(&uct_xpmem_remote_mem_hash, khiter); + } else { + status = uct_xpmem_rmem_add(xsegid, &rmem); + if (status != UCS_OK) { + *rmem_p = NULL; + goto out_unlock; + } + } + + ++rmem->refcount; + *rmem_p = rmem; + status = UCS_OK; + +out_unlock: + ucs_spin_unlock(&uct_xpmem_remote_mem_lock); + return status; +} + +static void uct_xpmem_rmem_put(uct_xpmem_remote_mem_t *rmem) +{ + ucs_spin_lock(&uct_xpmem_remote_mem_lock); + if (--rmem->refcount == 0) { + uct_xpmem_rmem_del(rmem); + } + ucs_spin_unlock(&uct_xpmem_remote_mem_lock); +} + +static ucs_status_t +uct_xpmem_mem_attach_common(xpmem_segid_t xsegid, uintptr_t remote_address, + size_t length, uct_xpmem_remote_region_t **region_p) +{ + ucs_rcache_region_t *rcache_region; + uct_xpmem_remote_mem_t *rmem; + uintptr_t start, end; + ucs_status_t status; + + status = uct_xpmem_rmem_get(xsegid, &rmem); + if (status != UCS_OK) { + goto err; + } + + start = ucs_align_down_pow2(remote_address, ucs_get_page_size()); + end = ucs_align_up_pow2 (remote_address + length, ucs_get_page_size()); + + status = ucs_rcache_get(rmem->rcache, (void*)start, end - start, + PROT_READ|PROT_WRITE, NULL, &rcache_region); + if (status != UCS_OK) { + goto err_rmem_put; + } + + *region_p = ucs_derived_of(rcache_region, uct_xpmem_remote_region_t); + return UCS_OK; + +err_rmem_put: + uct_xpmem_rmem_put(rmem); +err: + return status; +} + +static void uct_xpmem_mem_detach_common(uct_xpmem_remote_region_t *xpmem_region) +{ + uct_xpmem_remote_mem_t *rmem = xpmem_region->rmem; + + ucs_rcache_region_put(rmem->rcache, &xpmem_region->super); + uct_xpmem_rmem_put(rmem); +} + +static ucs_status_t uct_xmpem_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + ucs_status_t status; + uct_mm_seg_t *seg; + + status = uct_mm_seg_new(address, length, &seg); + if (status != UCS_OK) { + return status; + } + + seg->seg_id = (uintptr_t)address; /* to be used by mem_attach */ + *memh_p = seg; + return UCS_OK; +} + +static ucs_status_t uct_xmpem_mem_dereg(uct_md_h md, uct_mem_h memh) +{ + uct_mm_seg_t *seg = memh; + ucs_free(seg); + return UCS_OK; +} + +static ucs_status_t +uct_xpmem_mkey_pack(uct_md_h md, uct_mem_h memh, void *rkey_buffer) +{ + uct_mm_seg_t *seg = memh; + uct_xpmem_packed_rkey_t *packed_rkey = rkey_buffer; + xpmem_segid_t xsegid; + ucs_status_t status; + + ucs_assert((uintptr_t)seg->address == seg->seg_id); /* sanity */ + + status = uct_xpmem_get_global_xsegid(&xsegid); + if (status != UCS_OK) { + return status; + } + + packed_rkey->xsegid = xsegid; + packed_rkey->address = (uintptr_t)seg->address; + packed_rkey->length = seg->length; + return UCS_OK; +} + +static size_t uct_xpmem_iface_addr_length(uct_mm_md_t *md) +{ + return sizeof(uct_xpmem_iface_addr_t); +} + +static ucs_status_t uct_xpmem_iface_addr_pack(uct_mm_md_t *md, void *buffer) +{ + uct_xpmem_iface_addr_t *xpmem_iface_addr = buffer; + xpmem_segid_t xsegid; + ucs_status_t status; + + status = uct_xpmem_get_global_xsegid(&xsegid); + if (status != UCS_OK) { + return status; + } + + xpmem_iface_addr->xsegid = xsegid; + return UCS_OK; +} + +static ucs_status_t uct_xpmem_mem_attach(uct_mm_md_t *md, uct_mm_seg_id_t seg_id, + size_t length, const void *iface_addr, + uct_mm_remote_seg_t *rseg) +{ + const uct_xpmem_iface_addr_t *xpmem_iface_addr = iface_addr; + uintptr_t remote_address = seg_id; + uct_xpmem_remote_region_t *xpmem_region; + ucs_status_t status; + ptrdiff_t offset; + + ucs_assert(xpmem_iface_addr != NULL); + status = uct_xpmem_mem_attach_common(xpmem_iface_addr->xsegid, + remote_address, length, &xpmem_region); + if (status != UCS_OK) { + return status; + } + + /* In order to obtain the local access address of the remote segment + * (rseg->address), we need to calculate its offset from the beginning of the + * region on remote side (offset), and then add it to the local base address + * of the attached region (xpmem_region->attach_address). + */ + offset = remote_address - xpmem_region->super.super.start; + rseg->address = UCS_PTR_BYTE_OFFSET(xpmem_region->attach_address, offset); + rseg->cookie = xpmem_region; + + return UCS_OK; +} + +static void uct_xpmem_mem_detach(uct_mm_md_t *md, + const uct_mm_remote_seg_t *rseg) +{ + uct_xpmem_mem_detach_common(rseg->cookie); +} + +static ucs_status_t +uct_xpmem_rkey_unpack(uct_component_t *component, const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + const uct_xpmem_packed_rkey_t *packed_rkey = rkey_buffer; + uct_xpmem_remote_region_t *xpmem_region; + ucs_status_t status; + + status = uct_xpmem_mem_attach_common(packed_rkey->xsegid, + packed_rkey->address, + packed_rkey->length, + &xpmem_region); + if (status != UCS_OK) { + return status; + } + + uct_mm_md_make_rkey(xpmem_region->attach_address, + xpmem_region->super.super.start, rkey_p); + *handle_p = xpmem_region; + + return UCS_OK; +} + +static ucs_status_t +uct_xpmem_rkey_release(uct_component_t *component, uct_rkey_t rkey, void *handle) +{ + uct_xpmem_mem_detach_common(handle); + return UCS_OK; +} + +static uct_mm_md_mapper_ops_t uct_xpmem_md_ops = { + .super = { + .close = uct_mm_md_close, + .query = uct_xpmem_md_query, + .mem_alloc = (uct_md_mem_alloc_func_t)ucs_empty_function_return_unsupported, + .mem_free = (uct_md_mem_free_func_t)ucs_empty_function_return_unsupported, + .mem_advise = (uct_md_mem_advise_func_t)ucs_empty_function_return_unsupported, + .mem_reg = uct_xmpem_mem_reg, + .mem_dereg = uct_xmpem_mem_dereg, + .mkey_pack = uct_xpmem_mkey_pack, + .is_sockaddr_accessible = (uct_md_is_sockaddr_accessible_func_t)ucs_empty_function_return_zero, + .detect_memory_type = (uct_md_detect_memory_type_func_t)ucs_empty_function_return_unsupported + }, + .query = uct_xpmem_query, + .iface_addr_length = uct_xpmem_iface_addr_length, + .iface_addr_pack = uct_xpmem_iface_addr_pack, + .mem_attach = uct_xpmem_mem_attach, + .mem_detach = uct_xpmem_mem_detach, + .is_reachable = (uct_mm_mapper_is_reachable_func_t)ucs_empty_function_return_one +}; + +UCT_MM_TL_DEFINE(xpmem, &uct_xpmem_md_ops, uct_xpmem_rkey_unpack, + uct_xpmem_rkey_release, "XPMEM_") diff --git a/src/uct/sm/self/self.c b/src/uct/sm/self/self.c new file mode 100644 index 0000000..b5e1790 --- /dev/null +++ b/src/uct/sm/self/self.c @@ -0,0 +1,392 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "self.h" + +#include +#include +#include +#include +#include +#include "self.h" + + +#define UCT_SELF_NAME "self" + +#define UCT_SELF_IFACE_SEND_BUFFER_GET(_iface) \ + ({ /* use buffers from mpool to avoid buffer re-usage */ \ + /* till operation completes */ \ + void *ptr = ucs_mpool_get_inline(&(_iface)->msg_mp); \ + if (ucs_unlikely(ptr == NULL)) { \ + return UCS_ERR_NO_MEMORY; \ + } \ + ptr; \ + }) + + +/* Forward declarations */ +static uct_iface_ops_t uct_self_iface_ops; +static uct_component_t uct_self_component; + + +static ucs_config_field_t uct_self_iface_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_self_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {"SEG_SIZE", "8k", + "Size of copy-out buffer", + ucs_offsetof(uct_self_iface_config_t, seg_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {NULL} +}; + + +static ucs_status_t uct_self_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *attr) +{ + uct_self_iface_t *iface = ucs_derived_of(tl_iface, uct_self_iface_t); + + ucs_trace_func("iface=%p", iface); + + uct_base_iface_query(&iface->super, attr); + + attr->iface_addr_len = sizeof(uct_self_iface_addr_t); + attr->device_addr_len = 0; + attr->ep_addr_len = 0; + attr->max_conn_priv = 0; + attr->cap.flags = UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_PUT_SHORT | + UCT_IFACE_FLAG_PUT_BCOPY | + UCT_IFACE_FLAG_GET_BCOPY | + UCT_IFACE_FLAG_ATOMIC_CPU | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_EP_CHECK; + + attr->cap.atomic32.op_flags = + attr->cap.atomic64.op_flags = UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR); + attr->cap.atomic32.fop_flags = + attr->cap.atomic64.fop_flags = UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR) | + UCS_BIT(UCT_ATOMIC_OP_SWAP) | + UCS_BIT(UCT_ATOMIC_OP_CSWAP); + + attr->cap.put.max_short = UINT_MAX; + attr->cap.put.max_bcopy = SIZE_MAX; + attr->cap.put.min_zcopy = 0; + attr->cap.put.max_zcopy = 0; + attr->cap.put.opt_zcopy_align = 1; + attr->cap.put.align_mtu = attr->cap.put.opt_zcopy_align; + attr->cap.put.max_iov = 1; + + attr->cap.get.max_bcopy = SIZE_MAX; + attr->cap.get.min_zcopy = 0; + attr->cap.get.max_zcopy = 0; + attr->cap.get.opt_zcopy_align = 1; + attr->cap.get.align_mtu = attr->cap.get.opt_zcopy_align; + attr->cap.get.max_iov = 1; + + attr->cap.am.max_short = iface->send_size; + attr->cap.am.max_bcopy = iface->send_size; + attr->cap.am.min_zcopy = 0; + attr->cap.am.max_zcopy = 0; + attr->cap.am.opt_zcopy_align = 1; + attr->cap.am.align_mtu = attr->cap.am.opt_zcopy_align; + attr->cap.am.max_hdr = 0; + attr->cap.am.max_iov = 1; + + attr->latency.overhead = 0; + attr->latency.growth = 0; + attr->bandwidth.dedicated = 6911 * 1024.0 * 1024.0; + attr->bandwidth.shared = 0; + attr->overhead = 10e-9; + attr->priority = 0; + + return UCS_OK; +} + +static ucs_status_t uct_self_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *addr) +{ + const uct_self_iface_t *iface = ucs_derived_of(tl_iface, uct_self_iface_t); + + *(uct_self_iface_addr_t*)addr = iface->id; + return UCS_OK; +} + +static int uct_self_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + const uct_self_iface_t *iface = ucs_derived_of(tl_iface, uct_self_iface_t); + const uct_self_iface_addr_t *addr = (const uct_self_iface_addr_t*)iface_addr; + + return (addr != NULL) && (iface->id == *addr); +} + +static void uct_self_iface_sendrecv_am(uct_self_iface_t *iface, uint8_t am_id, + void *buffer, size_t length, const char *title) +{ + ucs_status_t UCS_V_UNUSED status; + + uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_SEND, am_id, + buffer, length, "TX: AM_%s", title); + uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, am_id, + buffer, length, "RX: AM_%s", title); + + status = uct_iface_invoke_am(&iface->super, am_id, buffer, + length, 0); + ucs_assert(status == UCS_OK); + ucs_mpool_put_inline(buffer); +} + +static ucs_mpool_ops_t uct_self_iface_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = NULL, + .obj_cleanup = NULL +}; + +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_self_iface_t, uct_iface_t); + +static UCS_CLASS_INIT_FUNC(uct_self_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_self_iface_config_t *config = ucs_derived_of(tl_config, + uct_self_iface_config_t); + ucs_status_t status; + + UCT_CHECK_PARAM(params->field_mask & UCT_IFACE_PARAM_FIELD_OPEN_MODE, + "UCT_IFACE_PARAM_FIELD_OPEN_MODE is not defined"); + if (!(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE)) { + ucs_error("Self transport supports only UCT_IFACE_OPEN_MODE_DEVICE"); + return UCS_ERR_UNSUPPORTED; + } + + if (ucs_derived_of(worker, uct_priv_worker_t)->thread_mode == UCS_THREAD_MODE_MULTI) { + ucs_error("Self transport does not support multi-threaded worker"); + return UCS_ERR_INVALID_PARAM; + } + + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_self_iface_ops, md, worker, + params, tl_config + UCS_STATS_ARG((params->field_mask & + UCT_IFACE_PARAM_FIELD_STATS_ROOT) ? + params->stats_root : NULL) + UCS_STATS_ARG(UCT_SELF_NAME)); + + self->id = ucs_generate_uuid((uintptr_t)self); + self->send_size = config->seg_size; + + status = ucs_mpool_init(&self->msg_mp, 0, self->send_size, 0, + UCS_SYS_CACHE_LINE_SIZE, + 2, /* 2 elements are enough for most of communications */ + UINT_MAX, &uct_self_iface_mpool_ops, "self_msg_desc"); + + if (UCS_STATUS_IS_ERR(status)) { + return status; + } + + ucs_debug("created self iface id 0x%lx send_size %zu", self->id, + self->send_size); + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_self_iface_t) +{ + ucs_mpool_cleanup(&self->msg_mp, 1); +} + +UCS_CLASS_DEFINE(uct_self_iface_t, uct_base_iface_t); +static UCS_CLASS_DEFINE_NEW_FUNC(uct_self_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); + +static ucs_status_t +uct_self_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + return uct_single_device_resource(md, UCT_SM_DEVICE_NAME, + UCT_DEVICE_TYPE_SELF, + tl_devices_p, num_tl_devices_p); +} + +static UCS_CLASS_INIT_FUNC(uct_self_ep_t, const uct_ep_params_t *params) +{ + uct_self_iface_t *iface = ucs_derived_of(params->iface, uct_self_iface_t); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super) + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_self_ep_t) +{ +} + +UCS_CLASS_DEFINE(uct_self_ep_t, uct_base_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_self_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_self_ep_t, uct_ep_t); + + +ucs_status_t uct_self_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, + const void *payload, unsigned length) +{ + uct_self_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_self_iface_t); + uct_self_ep_t UCS_V_UNUSED *ep = ucs_derived_of(tl_ep, uct_self_ep_t); + size_t total_length; + void *send_buffer; + + UCT_CHECK_AM_ID(id); + + total_length = length + sizeof(header); + UCT_CHECK_LENGTH(total_length, 0, iface->send_size, "am_short"); + + send_buffer = UCT_SELF_IFACE_SEND_BUFFER_GET(iface); + uct_am_short_fill_data(send_buffer, header, payload, length); + + UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, total_length); + uct_self_iface_sendrecv_am(iface, id, send_buffer, total_length, "SHORT"); + return UCS_OK; +} + +ssize_t uct_self_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + uct_self_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_self_iface_t); + uct_self_ep_t UCS_V_UNUSED *ep = ucs_derived_of(tl_ep, uct_self_ep_t); + size_t length; + void *send_buffer; + + UCT_CHECK_AM_ID(id); + + send_buffer = UCT_SELF_IFACE_SEND_BUFFER_GET(iface); + length = pack_cb(send_buffer, arg); + + UCT_CHECK_LENGTH(length, 0, iface->send_size, "am_bcopy"); + UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, length); + + uct_self_iface_sendrecv_am(iface, id, send_buffer, length, "BCOPY"); + return length; +} + +static uct_iface_ops_t uct_self_iface_ops = { + .ep_put_short = uct_sm_ep_put_short, + .ep_put_bcopy = uct_sm_ep_put_bcopy, + .ep_get_bcopy = uct_sm_ep_get_bcopy, + .ep_am_short = uct_self_ep_am_short, + .ep_am_bcopy = uct_self_ep_am_bcopy, + .ep_atomic_cswap64 = uct_sm_ep_atomic_cswap64, + .ep_atomic64_post = uct_sm_ep_atomic64_post, + .ep_atomic64_fetch = uct_sm_ep_atomic64_fetch, + .ep_atomic_cswap32 = uct_sm_ep_atomic_cswap32, + .ep_atomic32_post = uct_sm_ep_atomic32_post, + .ep_atomic32_fetch = uct_sm_ep_atomic32_fetch, + .ep_flush = uct_base_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_check = ucs_empty_function_return_success, + .ep_pending_add = ucs_empty_function_return_busy, + .ep_pending_purge = ucs_empty_function, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_self_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_self_ep_t), + .iface_flush = uct_base_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = ucs_empty_function_return_zero, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_self_iface_t), + .iface_query = uct_self_iface_query, + .iface_get_device_address = ucs_empty_function_return_success, + .iface_get_address = uct_self_iface_get_address, + .iface_is_reachable = uct_self_iface_is_reachable +}; + +UCT_TL_DEFINE(&uct_self_component, self, uct_self_query_tl_devices, uct_self_iface_t, + "SELF_", uct_self_iface_config_table, uct_self_iface_config_t); + +static ucs_status_t uct_self_md_query(uct_md_h md, uct_md_attr_t *attr) +{ + /* Dummy memory registration provided. No real memory handling exists */ + attr->cap.flags = UCT_MD_FLAG_REG | + UCT_MD_FLAG_NEED_RKEY; /* TODO ignore rkey in rma/amo ops */ + attr->cap.reg_mem_types = UCS_MEMORY_TYPES_CPU_ACCESSIBLE; + attr->cap.detect_mem_types = 0; + attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; + attr->cap.max_alloc = 0; + attr->cap.max_reg = ULONG_MAX; + attr->rkey_packed_size = 0; /* uct_md_query adds UCT_COMPONENT_NAME_MAX to this */ + attr->reg_cost.overhead = 0; + attr->reg_cost.growth = 0; + memset(&attr->local_cpus, 0xff, sizeof(attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t uct_self_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + /* We have to emulate memory registration. Return dummy pointer */ + *memh_p = (void *) 0xdeadbeef; + return UCS_OK; +} + +static ucs_status_t uct_self_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p) +{ + static uct_md_ops_t md_ops = { + .close = ucs_empty_function, + .query = uct_self_md_query, + .mkey_pack = ucs_empty_function_return_success, + .mem_reg = uct_self_mem_reg, + .mem_dereg = ucs_empty_function_return_success, + .detect_memory_type = ucs_empty_function_return_unsupported + }; + static uct_md_t md = { + .ops = &md_ops, + .component = &uct_self_component + }; + + *md_p = &md; + return UCS_OK; +} + +static ucs_status_t uct_self_md_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, uct_rkey_t *rkey_p, + void **handle_p) +{ + /** + * Pseudo stub function for the key unpacking + * Need rkey == 0 due to work with same process to reuse uct_base_[put|get|atomic]* + */ + *rkey_p = 0; + *handle_p = NULL; + return UCS_OK; +} + +static uct_component_t uct_self_component = { + .query_md_resources = uct_md_query_single_md_resource, + .md_open = uct_self_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_self_md_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = ucs_empty_function_return_success, + .name = UCT_SELF_NAME, + .md_config = UCT_MD_DEFAULT_CONFIG_INITIALIZER, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_self_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_self_component); diff --git a/src/uct/sm/self/self.h b/src/uct/sm/self/self.h new file mode 100644 index 0000000..f9a4b61 --- /dev/null +++ b/src/uct/sm/self/self.h @@ -0,0 +1,36 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * Copyright (C) ARM Ltd. 2016-2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_SELF_H +#define UCT_SELF_H + +#include +#include + + +typedef uint64_t uct_self_iface_addr_t; + + +typedef struct uct_self_iface_config { + uct_iface_config_t super; + size_t seg_size; /* Maximal send size */ +} uct_self_iface_config_t; + + +typedef struct uct_self_iface { + uct_base_iface_t super; + uct_self_iface_addr_t id; /* Unique identifier for the instance */ + size_t send_size; /* Maximum size for payload */ + ucs_mpool_t msg_mp; /* Messages memory pool */ +} uct_self_iface_t; + + +typedef struct uct_self_ep { + uct_base_ep_t super; +} uct_self_ep_t; + + +#endif diff --git a/src/uct/tcp/sockcm/sockcm_def.h b/src/uct/tcp/sockcm/sockcm_def.h new file mode 100644 index 0000000..81195a0 --- /dev/null +++ b/src/uct/tcp/sockcm/sockcm_def.h @@ -0,0 +1,44 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_SOCKCM_H +#define UCT_SOCKCM_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UCT_SOCKCM_TL_NAME "sockcm" +#define UCT_SOCKCM_PRIV_DATA_LEN 2048 + +typedef struct uct_sockcm_iface uct_sockcm_iface_t; +typedef struct uct_sockcm_ep uct_sockcm_ep_t; + +typedef struct uct_sockcm_conn_param { + ssize_t length; + int fd; + char private_data[UCT_SOCKCM_PRIV_DATA_LEN]; +} uct_sockcm_conn_param_t; + +typedef struct uct_sockcm_ctx { + int sock_fd; + size_t recv_len; + uct_sockcm_iface_t *iface; + uct_sockcm_conn_param_t conn_param; + ucs_list_link_t list; +} uct_sockcm_ctx_t; + +ucs_status_t uct_sockcm_ep_set_sock_id(uct_sockcm_ep_t *ep); +void uct_sockcm_ep_put_sock_id(uct_sockcm_ctx_t *sock_id_ctx); + +#endif /* UCT_SOCKCM_H */ diff --git a/src/uct/tcp/sockcm/sockcm_ep.c b/src/uct/tcp/sockcm/sockcm_ep.c new file mode 100644 index 0000000..8882e51 --- /dev/null +++ b/src/uct/tcp/sockcm/sockcm_ep.c @@ -0,0 +1,398 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "sockcm_ep.h" +#include +#include +#include +#include + +#define UCT_SOCKCM_CB_FLAGS_CHECK(_flags) \ + do { \ + UCT_CB_FLAGS_CHECK(_flags); \ + if (!((_flags) & UCT_CB_FLAG_ASYNC)) { \ + return UCS_ERR_UNSUPPORTED; \ + } \ + } while (0) + +ucs_status_t uct_sockcm_ep_set_sock_id(uct_sockcm_ep_t *ep) +{ + ucs_status_t status; + struct sockaddr *dest_addr = NULL; + + ep->sock_id_ctx = ucs_malloc(sizeof(*ep->sock_id_ctx), "client sock_id_ctx"); + if (ep->sock_id_ctx == NULL) { + return UCS_ERR_NO_MEMORY; + } + + dest_addr = (struct sockaddr *) &(ep->remote_addr); + + status = ucs_socket_create(dest_addr->sa_family, SOCK_STREAM, + &ep->sock_id_ctx->sock_fd); + if (status != UCS_OK) { + ucs_debug("unable to create client socket for sockcm"); + ucs_free(ep->sock_id_ctx); + return status; + } + + return UCS_OK; +} + +void uct_sockcm_ep_put_sock_id(uct_sockcm_ctx_t *sock_id_ctx) +{ + close(sock_id_ctx->sock_fd); + ucs_free(sock_id_ctx); +} + +ucs_status_t uct_sockcm_ep_send_client_info(uct_sockcm_ep_t *ep) +{ + uct_sockcm_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_sockcm_iface_t); + ucs_status_t status; + uct_sockcm_conn_param_t conn_param; + char dev_name[UCT_DEVICE_NAME_MAX]; + + memset(&conn_param, 0, sizeof(uct_sockcm_conn_param_t)); + + /* get interface name associated with the connected client fd; use that for pack_cb */ + status = ucs_sockaddr_get_ifname(ep->sock_id_ctx->sock_fd, dev_name, + UCT_DEVICE_NAME_MAX); + if (UCS_OK != status) { + goto out; + } + + conn_param.length = ep->pack_cb(ep->pack_cb_arg, dev_name, + (void*)conn_param.private_data); + if (conn_param.length < 0) { + ucs_error("sockcm client (iface=%p, ep = %p) failed to fill " + "private data. status: %s", + iface, ep, ucs_status_string((ucs_status_t)conn_param.length)); + status = UCS_ERR_IO_ERROR; + goto out; + } + + ucs_assert(conn_param.length <= UCT_SOCKCM_PRIV_DATA_LEN); + + status = ucs_socket_send(ep->sock_id_ctx->sock_fd, &conn_param, + sizeof(uct_sockcm_conn_param_t), NULL, NULL); + +out: + return status; +} + +static const char* +uct_sockcm_ep_conn_state_str(uct_sockcm_ep_conn_state_t state) +{ + switch (state) { + case UCT_SOCKCM_EP_CONN_STATE_SOCK_CONNECTING: + return "UCT_SOCKCM_EP_CONN_STATE_SOCK_CONNECTING"; + case UCT_SOCKCM_EP_CONN_STATE_INFO_SENT: + return "UCT_SOCKCM_EP_CONN_STATE_INFO_SENT"; + case UCT_SOCKCM_EP_CONN_STATE_CLOSED: + return "UCT_SOCKCM_EP_CONN_STATE_CLOSED"; + case UCT_SOCKCM_EP_CONN_STATE_CONNECTED: + return "UCT_SOCKCM_EP_CONN_STATE_CONNECTED"; + default: + ucs_fatal("invaild sockcm endpoint state %d", state); + } +} + +static void uct_sockcm_change_state(uct_sockcm_ep_t *ep, + uct_sockcm_ep_conn_state_t conn_state, + ucs_status_t status) +{ + uct_sockcm_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_sockcm_iface_t); + + pthread_mutex_lock(&ep->ops_mutex); + ucs_debug("changing ep with status %s from state %s to state %s, status %s", + ucs_status_string(ep->status), + uct_sockcm_ep_conn_state_str(ep->conn_state), + uct_sockcm_ep_conn_state_str(conn_state), + ucs_status_string(status)); + if ((ep->status != UCS_OK) && + (ep->conn_state == UCT_SOCKCM_EP_CONN_STATE_CLOSED)) { + /* Do not handle failure twice for closed EP */ + pthread_mutex_unlock(&ep->ops_mutex); + return; + } + + ep->status = status; + ep->conn_state = conn_state; + + if (conn_state == UCT_SOCKCM_EP_CONN_STATE_CLOSED) { + uct_sockcm_ep_set_failed(&iface->super.super, &ep->super.super, status); + } + + uct_sockcm_ep_invoke_completions(ep, status); + pthread_mutex_unlock(&ep->ops_mutex); +} + +static void uct_sockcm_handle_sock_connect(uct_sockcm_ep_t *ep) +{ + char sockaddr_str[UCS_SOCKADDR_STRING_LEN]; + int fd = ep->sock_id_ctx->sock_fd; + ucs_status_t status; + + if (!ucs_socket_is_connected(fd)) { + ucs_error("failed to connect to %s", + ucs_sockaddr_str((struct sockaddr*)&ep->remote_addr, + sockaddr_str, sizeof(sockaddr_str))); + uct_sockcm_change_state(ep, UCT_SOCKCM_EP_CONN_STATE_CLOSED, + UCS_ERR_UNREACHABLE); + goto err; + } + + status = uct_sockcm_ep_send_client_info(ep); + if (status != UCS_OK) { + ucs_error("failed to send client info: %s", ucs_status_string(status)); + uct_sockcm_change_state(ep, UCT_SOCKCM_EP_CONN_STATE_CLOSED, status); + goto err; + } + + ep->conn_state = UCT_SOCKCM_EP_CONN_STATE_INFO_SENT; + + /* Call current handler when server responds to sent message */ + if (UCS_OK != ucs_async_modify_handler(fd, UCS_EVENT_SET_EVREAD)) { + ucs_error("failed to modify async handler for fd %d", fd); + uct_sockcm_change_state(ep, UCT_SOCKCM_EP_CONN_STATE_CLOSED, + UCS_ERR_IO_ERROR); + goto err; + } + + return; + +err: + status = ucs_async_modify_handler(fd, 0); + if (status != UCS_OK) { + ucs_debug("unable to modify handler"); + } +} + +static void uct_sockcm_handle_info_sent(uct_sockcm_ep_t *ep) +{ + ucs_status_t status; + size_t recv_len; + char notif_val; + + recv_len = sizeof(notif_val); + status = ucs_socket_recv_nb(ep->sock_id_ctx->sock_fd, ¬if_val, + &recv_len, NULL, NULL); + if (UCS_ERR_NO_PROGRESS == status) { + /* will call recv again when ready */ + return; + } + + ucs_async_remove_handler(ep->sock_id_ctx->sock_fd, 0); + + if (UCS_OK != status) { + /* receive notif failed, close the connection */ + uct_sockcm_change_state(ep, UCT_SOCKCM_EP_CONN_STATE_CLOSED, status); + return; + } + + if (notif_val == UCT_SOCKCM_IFACE_NOTIFY_ACCEPT) { + ucs_debug("event_handler OK after accept"); + uct_sockcm_change_state(ep, UCT_SOCKCM_EP_CONN_STATE_CONNECTED, UCS_OK); + } else { + ucs_debug("event_handler REJECTED after reject"); + uct_sockcm_change_state(ep, UCT_SOCKCM_EP_CONN_STATE_CLOSED, + UCS_ERR_REJECTED); + } +} + +static void uct_sockcm_ep_event_handler(int fd, void *arg) +{ + uct_sockcm_ep_t *ep = (uct_sockcm_ep_t *) arg; + + switch (ep->conn_state) { + case UCT_SOCKCM_EP_CONN_STATE_SOCK_CONNECTING: + uct_sockcm_handle_sock_connect(ep); + break; + case UCT_SOCKCM_EP_CONN_STATE_INFO_SENT: + uct_sockcm_handle_info_sent(ep); + break; + case UCT_SOCKCM_EP_CONN_STATE_CONNECTED: + if (UCS_OK != ucs_async_modify_handler(fd, 0)) { + ucs_warn("unable to turn off event notifications on %d", fd); + } + uct_sockcm_change_state(ep, UCT_SOCKCM_EP_CONN_STATE_CONNECTED, UCS_OK); + break; + case UCT_SOCKCM_EP_CONN_STATE_CLOSED: + default: + ucs_debug("handling closed/default state, ep %p fd %d", ep, fd); + uct_sockcm_change_state(ep, UCT_SOCKCM_EP_CONN_STATE_CLOSED, + UCS_ERR_IO_ERROR); + break; + } +} + +static UCS_CLASS_INIT_FUNC(uct_sockcm_ep_t, const uct_ep_params_t *params) +{ + const ucs_sock_addr_t *sockaddr = params->sockaddr; + uct_sockcm_iface_t *iface = NULL; + struct sockaddr *param_sockaddr = NULL; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + size_t sockaddr_len; + + iface = ucs_derived_of(params->iface, uct_sockcm_iface_t); + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); + + if (iface->is_server) { + return UCS_ERR_UNSUPPORTED; + } + + if (!(params->field_mask & UCT_EP_PARAM_FIELD_SOCKADDR)) { + return UCS_ERR_INVALID_PARAM; + } + + UCT_SOCKCM_CB_FLAGS_CHECK((params->field_mask & + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS) ? + params->sockaddr_cb_flags : 0); + + self->pack_cb = (params->field_mask & + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB) ? + params->sockaddr_pack_cb : NULL; + self->pack_cb_arg = (params->field_mask & + UCT_EP_PARAM_FIELD_USER_DATA) ? + params->user_data : NULL; + self->pack_cb_flags = (params->field_mask & + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS) ? + params->sockaddr_cb_flags : 0; + pthread_mutex_init(&self->ops_mutex, NULL); + ucs_queue_head_init(&self->ops); + + param_sockaddr = (struct sockaddr *) sockaddr->addr; + if (UCS_OK != ucs_sockaddr_sizeof(param_sockaddr, &sockaddr_len)) { + ucs_error("sockcm ep: unknown remote sa_family=%d", + sockaddr->addr->sa_family); + status = UCS_ERR_IO_ERROR; + goto err; + } + + memcpy(&self->remote_addr, param_sockaddr, sockaddr_len); + + self->slow_prog_id = UCS_CALLBACKQ_ID_NULL; + + status = uct_sockcm_ep_set_sock_id(self); + if (status != UCS_OK) { + goto err; + } + + status = ucs_sys_fcntl_modfl(self->sock_id_ctx->sock_fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto sock_err; + } + + status = ucs_socket_connect(self->sock_id_ctx->sock_fd, param_sockaddr); + if (UCS_STATUS_IS_ERR(status)) { + self->conn_state = UCT_SOCKCM_EP_CONN_STATE_CLOSED; + goto sock_err; + } + + self->conn_state = UCT_SOCKCM_EP_CONN_STATE_SOCK_CONNECTING; + self->status = UCS_INPROGRESS; + + /* set ep->status before event handler call to avoid simultaneous writes to state*/ + status = ucs_async_set_event_handler(iface->super.worker->async->mode, + self->sock_id_ctx->sock_fd, + UCS_EVENT_SET_EVWRITE, + uct_sockcm_ep_event_handler, + self, iface->super.worker->async); + if (status != UCS_OK) { + goto sock_err; + } + + ucs_debug("created an SOCKCM endpoint on iface %p, " + "remote addr: %s", iface, + ucs_sockaddr_str(param_sockaddr, + ip_port_str, UCS_SOCKADDR_STRING_LEN)); + return UCS_OK; + +sock_err: + uct_sockcm_ep_put_sock_id(self->sock_id_ctx); +err: + ucs_debug("error in sock connect"); + pthread_mutex_destroy(&self->ops_mutex); + + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_sockcm_ep_t) +{ + uct_sockcm_iface_t *iface = ucs_derived_of(self->super.super.iface, + uct_sockcm_iface_t); + + ucs_debug("sockcm_ep %p: destroying", self); + + UCS_ASYNC_BLOCK(iface->super.worker->async); + + ucs_async_remove_handler(self->sock_id_ctx->sock_fd, 1); + uct_sockcm_ep_put_sock_id(self->sock_id_ctx); + + uct_worker_progress_unregister_safe(&iface->super.worker->super, + &self->slow_prog_id); + + pthread_mutex_destroy(&self->ops_mutex); + if (!ucs_queue_is_empty(&self->ops)) { + ucs_warn("destroying endpoint %p with not completed operations", self); + } + + UCS_ASYNC_UNBLOCK(iface->super.worker->async); +} + +UCS_CLASS_DEFINE(uct_sockcm_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_sockcm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_sockcm_ep_t, uct_ep_t); + +static unsigned uct_sockcm_client_err_handle_progress(void *arg) +{ + uct_sockcm_ep_t *sockcm_ep = arg; + uct_sockcm_iface_t *iface = ucs_derived_of(sockcm_ep->super.super.iface, + uct_sockcm_iface_t); + + ucs_trace_func("err_handle ep=%p", sockcm_ep); + UCS_ASYNC_BLOCK(iface->super.worker->async); + + sockcm_ep->slow_prog_id = UCS_CALLBACKQ_ID_NULL; + uct_set_ep_failed(&UCS_CLASS_NAME(uct_sockcm_ep_t), &sockcm_ep->super.super, + sockcm_ep->super.super.iface, sockcm_ep->status); + + UCS_ASYNC_UNBLOCK(iface->super.worker->async); + return 0; +} + +void uct_sockcm_ep_set_failed(uct_iface_t *iface, uct_ep_h ep, ucs_status_t status) +{ + uct_sockcm_iface_t *sockcm_iface = ucs_derived_of(iface, uct_sockcm_iface_t); + uct_sockcm_ep_t *sockcm_ep = ucs_derived_of(ep, uct_sockcm_ep_t); + + if (sockcm_iface->super.err_handler_flags & UCT_CB_FLAG_ASYNC) { + uct_set_ep_failed(&UCS_CLASS_NAME(uct_sockcm_ep_t), &sockcm_ep->super.super, + &sockcm_iface->super.super, status); + } else { + sockcm_ep->status = status; + uct_worker_progress_register_safe(&sockcm_iface->super.worker->super, + uct_sockcm_client_err_handle_progress, + sockcm_ep, UCS_CALLBACKQ_FLAG_ONESHOT, + &sockcm_ep->slow_prog_id); + } +} + +void uct_sockcm_ep_invoke_completions(uct_sockcm_ep_t *ep, ucs_status_t status) +{ + uct_sockcm_ep_op_t *op; + + ucs_assert(pthread_mutex_trylock(&ep->ops_mutex) == EBUSY); + + ucs_queue_for_each_extract(op, &ep->ops, queue_elem, 1) { + pthread_mutex_unlock(&ep->ops_mutex); + uct_invoke_completion(op->user_comp, status); + ucs_free(op); + pthread_mutex_lock(&ep->ops_mutex); + } +} diff --git a/src/uct/tcp/sockcm/sockcm_ep.h b/src/uct/tcp/sockcm/sockcm_ep.h new file mode 100644 index 0000000..468eef3 --- /dev/null +++ b/src/uct/tcp/sockcm/sockcm_ep.h @@ -0,0 +1,49 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_SOCKCM_EP_H +#define UCT_SOCKCM_EP_H + +#include "sockcm_iface.h" + +typedef struct uct_sockcm_ep_op uct_sockcm_ep_op_t; + +typedef enum uct_sockcm_ep_conn_state { + UCT_SOCKCM_EP_CONN_STATE_SOCK_CONNECTING, + UCT_SOCKCM_EP_CONN_STATE_INFO_SENT, + UCT_SOCKCM_EP_CONN_STATE_CLOSED, + UCT_SOCKCM_EP_CONN_STATE_CONNECTED +} uct_sockcm_ep_conn_state_t; + +struct uct_sockcm_ep_op { + ucs_queue_elem_t queue_elem; + uct_completion_t *user_comp; +}; + +struct uct_sockcm_ep { + uct_base_ep_t super; + uct_sockaddr_priv_pack_callback_t pack_cb; + void *pack_cb_arg; + uint32_t pack_cb_flags; + uct_sockcm_ep_conn_state_t conn_state; + + pthread_mutex_t ops_mutex; /* guards ops and status */ + ucs_queue_head_t ops; + ucs_status_t status; /* client EP status */ + + struct sockaddr_storage remote_addr; + uct_worker_cb_id_t slow_prog_id; + uct_sockcm_ctx_t *sock_id_ctx; +}; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_sockcm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_sockcm_ep_t, uct_ep_t); + +void uct_sockcm_ep_set_failed(uct_iface_t *iface, uct_ep_h ep, ucs_status_t status); + +void uct_sockcm_ep_invoke_completions(uct_sockcm_ep_t *ep, ucs_status_t status); + +#endif diff --git a/src/uct/tcp/sockcm/sockcm_iface.c b/src/uct/tcp/sockcm/sockcm_iface.c new file mode 100644 index 0000000..7110615 --- /dev/null +++ b/src/uct/tcp/sockcm/sockcm_iface.c @@ -0,0 +1,426 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. + * Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "sockcm_iface.h" +#include "sockcm_ep.h" + +#include +#include +#include +#include + + +enum uct_sockcm_process_event_flags { + UCT_SOCKCM_PROCESS_EVENT_DESTROY_SOCK_ID_FLAG = UCS_BIT(0), + UCT_SOCKCM_PROCESS_EVENT_ACK_EVENT_FLAG = UCS_BIT(1) +}; + +static ucs_config_field_t uct_sockcm_iface_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_sockcm_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {"BACKLOG", "1024", + "Maximum number of pending connections for a listening socket.", + ucs_offsetof(uct_sockcm_iface_config_t, backlog), UCS_CONFIG_TYPE_UINT}, + + {NULL} +}; + +static UCS_CLASS_DECLARE_DELETE_FUNC(uct_sockcm_iface_t, uct_iface_t); + +static ucs_status_t uct_sockcm_iface_query(uct_iface_h tl_iface, + uct_iface_attr_t *iface_attr) +{ + uct_sockcm_iface_t *iface = ucs_derived_of(tl_iface, uct_sockcm_iface_t); + struct sockaddr_storage addr; + ucs_status_t status; + + uct_base_iface_query(&iface->super, iface_attr); + + iface_attr->iface_addr_len = sizeof(ucs_sock_addr_t); + iface_attr->device_addr_len = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR | + UCT_IFACE_FLAG_CB_ASYNC | + UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE; + iface_attr->max_conn_priv = UCT_SOCKCM_MAX_CONN_PRIV; + + if (iface->is_server) { + socklen_t len = sizeof(struct sockaddr_storage); + if (getsockname(iface->listen_fd, (struct sockaddr *)&addr, &len)) { + ucs_error("sockcm_iface: getsockname failed %m"); + return UCS_ERR_IO_ERROR; + } + + status = ucs_sockaddr_copy((struct sockaddr *)&iface_attr->listen_sockaddr, + (const struct sockaddr *)&addr); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +static ucs_status_t uct_sockcm_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *iface_addr) +{ + ucs_sock_addr_t *sockcm_addr = (ucs_sock_addr_t *)iface_addr; + + sockcm_addr->addr = NULL; + sockcm_addr->addrlen = 0; + return UCS_OK; +} + +static ucs_status_t uct_sockcm_iface_notify_client(int notif_val, + uct_conn_request_h conn_request) +{ + char notif = notif_val; + int fd; + + fd = ((uct_sockcm_ctx_t *) conn_request)->sock_fd; + + return ucs_socket_send(fd, ¬if, sizeof(notif), NULL, NULL); +} + +static ucs_status_t uct_sockcm_iface_accept(uct_iface_h tl_iface, + uct_conn_request_h conn_request) +{ + return uct_sockcm_iface_notify_client(UCT_SOCKCM_IFACE_NOTIFY_ACCEPT, conn_request); +} + +static ucs_status_t uct_sockcm_iface_reject(uct_iface_h tl_iface, + uct_conn_request_h conn_request) +{ + return uct_sockcm_iface_notify_client(UCT_SOCKCM_IFACE_NOTIFY_REJECT, conn_request); +} + +static ucs_status_t uct_sockcm_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + uct_sockcm_ep_t *ep = ucs_derived_of(tl_ep, uct_sockcm_ep_t); + ucs_status_t status; + uct_sockcm_ep_op_t *op; + + pthread_mutex_lock(&ep->ops_mutex); + status = ep->status; + if ((status == UCS_INPROGRESS) && (comp != NULL)) { + op = ucs_malloc(sizeof(*op), "uct_sockcm_ep_flush op"); + if (op != NULL) { + op->user_comp = comp; + ucs_queue_push(&ep->ops, &op->queue_elem); + } else { + status = UCS_ERR_NO_MEMORY; + } + } + pthread_mutex_unlock(&ep->ops_mutex); + + return status; +} + + +static uct_iface_ops_t uct_sockcm_iface_ops = { + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_sockcm_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_sockcm_ep_t), + .ep_flush = uct_sockcm_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_pending_purge = ucs_empty_function, + .iface_accept = uct_sockcm_iface_accept, + .iface_reject = uct_sockcm_iface_reject, + .iface_progress_enable = (uct_iface_progress_enable_func_t)ucs_empty_function_return_success, + .iface_progress_disable = (uct_iface_progress_disable_func_t)ucs_empty_function_return_success, + .iface_progress = ucs_empty_function_return_zero, + .iface_flush = uct_base_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_sockcm_iface_t), + .iface_query = uct_sockcm_iface_query, + .iface_is_reachable = (uct_iface_is_reachable_func_t)ucs_empty_function_return_zero, + .iface_get_device_address = (uct_iface_get_device_address_func_t)ucs_empty_function_return_success, + .iface_get_address = uct_sockcm_iface_get_address +}; + +static ucs_status_t uct_sockcm_iface_process_conn_req(uct_sockcm_ctx_t *sock_id_ctx) +{ + uct_sockcm_iface_t *iface = sock_id_ctx->iface; + uct_sockcm_conn_param_t *conn_param = &sock_id_ctx->conn_param; + + ucs_debug("process conn req conn_param = %p, conn_param->length = %ld", + conn_param, conn_param->length); + iface->conn_request_cb(&iface->super.super, iface->conn_request_arg, sock_id_ctx, + conn_param->private_data, conn_param->length); + return UCS_OK; +} + +static void uct_sockcm_iface_recv_handler(int fd, void *arg) +{ + uct_sockcm_ctx_t *sock_id_ctx = (uct_sockcm_ctx_t *) arg; + ucs_status_t status; + size_t recv_len; + + /* attempt another receive only if initial receive was not successful */ + recv_len = sizeof(uct_sockcm_conn_param_t) - sock_id_ctx->recv_len; + if (recv_len == 0) { + goto out_remove_handler; + } + + status = ucs_socket_recv_nb(sock_id_ctx->sock_fd, + UCS_PTR_BYTE_OFFSET(&sock_id_ctx->conn_param, + sock_id_ctx->recv_len), + &recv_len, NULL, NULL); + if ((status == UCS_ERR_CANCELED) || (status == UCS_ERR_IO_ERROR)) { + ucs_warn("recv failed in recv handler"); + /* TODO: clean up resources allocated for client endpoint? */ + return; + } + + sock_id_ctx->recv_len += ((UCS_ERR_NO_PROGRESS == status) ? 0 : recv_len); + if (sock_id_ctx->recv_len != sizeof(uct_sockcm_conn_param_t)) { + /* handler should be notified when remaining pieces show up */ + return; + } + + if (UCS_OK != uct_sockcm_iface_process_conn_req((uct_sockcm_ctx_t*)arg)) { + ucs_error("unable to process connection request"); + } + +out_remove_handler: + status = ucs_async_modify_handler(fd, 0); + if (status != UCS_OK) { + ucs_debug("unable to modify handler"); + } +} + +static void uct_sockcm_iface_event_handler(int fd, void *arg) +{ + size_t recv_len = 0; + uct_sockcm_iface_t *iface = arg; + uct_sockcm_ctx_t *sock_id_ctx = NULL; + struct sockaddr peer_addr; + socklen_t addrlen; + int accept_fd; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + + addrlen = sizeof(struct sockaddr); + accept_fd = accept(iface->listen_fd, (struct sockaddr*)&peer_addr, &addrlen); + if (accept_fd == -1) { + if ((errno == EAGAIN) || (errno == EINTR)) { + ucs_debug("accept(fd=%d) failed: %m", iface->listen_fd); + } else { + /* accept failed here, let the client try again */ + ucs_warn("accept(fd=%d) failed with non-recoverable error %m", + iface->listen_fd); + } + return; + } + + ucs_debug("sockcm_iface %p: accepted connection from %s at fd %d %m", iface, + ucs_sockaddr_str(&peer_addr, ip_port_str, + UCS_SOCKADDR_STRING_LEN), accept_fd); + + /* Unlike rdmacm, socket connect/accept does not permit exchange of + * connection parameters but we need to use send/recv on top of that + * We simulate that with an explicit receive */ + + sock_id_ctx = ucs_malloc(sizeof(uct_sockcm_ctx_t), "accepted sock_id_ctx"); + if (sock_id_ctx == NULL) { + ucs_error("sockcm_listener: unable to create mem for accepted fd"); + close(accept_fd); + return; + } + + sock_id_ctx->recv_len = 0; + sock_id_ctx->sock_fd = accept_fd; + sock_id_ctx->iface = iface; + + status = ucs_sys_fcntl_modfl(sock_id_ctx->sock_fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + ucs_error("sockcm_listener: unable make accepted fd non-blocking"); + goto err; + } + + recv_len = sizeof(sock_id_ctx->conn_param); + + status = ucs_socket_recv_nb(accept_fd, &sock_id_ctx->conn_param, &recv_len, + NULL, NULL); + if (UCS_OK != status) { + sock_id_ctx->recv_len = ((UCS_ERR_NO_PROGRESS == status) ? 0: recv_len); + status = ucs_async_set_event_handler(iface->super.worker->async->mode, + sock_id_ctx->sock_fd, + UCS_EVENT_SET_EVREAD, + uct_sockcm_iface_recv_handler, + sock_id_ctx, + iface->super.worker->async); + if (status != UCS_OK) { + ucs_fatal("sockcm_listener: unable to create handler for new connection"); + goto err; + } + ucs_debug("assigning recv handler for message from client"); + } else { + ucs_debug("not assigning recv handler for message from client"); + if (UCS_OK != uct_sockcm_iface_process_conn_req(sock_id_ctx)) { + ucs_error("Unable to process connection request"); + } + } + + UCS_ASYNC_BLOCK(iface->super.worker->async); + ucs_list_add_tail(&iface->used_sock_ids_list, &sock_id_ctx->list); + UCS_ASYNC_UNBLOCK(iface->super.worker->async); + + return; + +err: + uct_sockcm_ep_put_sock_id(sock_id_ctx); + return; +} + +static UCS_CLASS_INIT_FUNC(uct_sockcm_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_sockcm_iface_config_t *config = ucs_derived_of(tl_config, + uct_sockcm_iface_config_t); + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + struct sockaddr *param_sockaddr; + int param_sockaddr_len; + + UCT_CHECK_PARAM(params->field_mask & UCT_IFACE_PARAM_FIELD_OPEN_MODE, + "UCT_IFACE_PARAM_FIELD_OPEN_MODE is not defined"); + + UCT_CHECK_PARAM((params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER) || + (params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT), + "Invalid open mode %zu", params->open_mode); + + UCT_CHECK_PARAM(!(params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER) || + (params->field_mask & UCT_IFACE_PARAM_FIELD_SOCKADDR), + "UCT_IFACE_PARAM_FIELD_SOCKADDR is not defined " + "for UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER"); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_sockcm_iface_ops, md, worker, + params, tl_config + UCS_STATS_ARG((params->field_mask & + UCT_IFACE_PARAM_FIELD_STATS_ROOT) ? + params->stats_root : NULL) + UCS_STATS_ARG(UCT_SOCKCM_TL_NAME)); + + if (self->super.worker->async == NULL) { + ucs_error("sockcm must have async != NULL"); + return UCS_ERR_INVALID_PARAM; + } + if (self->super.worker->async->mode == UCS_ASYNC_MODE_SIGNAL) { + ucs_warn("sockcm does not support SIGIO"); + } + + self->listen_fd = -1; + + if (params->open_mode & UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER) { + + if (!(params->mode.sockaddr.cb_flags & UCT_CB_FLAG_ASYNC)) { + return UCS_ERR_INVALID_PARAM; + } + + param_sockaddr = (struct sockaddr *)params->mode.sockaddr.listen_sockaddr.addr; + param_sockaddr_len = params->mode.sockaddr.listen_sockaddr.addrlen; + + status = ucs_socket_create(param_sockaddr->sa_family, SOCK_STREAM, + &self->listen_fd); + if (status != UCS_OK) { + return status; + } + + status = ucs_sys_fcntl_modfl(self->listen_fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto err_close_sock; + } + + if (0 > bind(self->listen_fd, param_sockaddr, param_sockaddr_len)) { + ucs_error("bind(fd=%d) failed: %m", self->listen_fd); + status = (errno == EADDRINUSE) ? UCS_ERR_BUSY : UCS_ERR_IO_ERROR; + goto err_close_sock; + } + + if (0 > listen(self->listen_fd, config->backlog)) { + ucs_error("listen(fd=%d; backlog=%d)", self->listen_fd, + config->backlog); + status = UCS_ERR_IO_ERROR; + goto err_close_sock; + } + + status = ucs_async_set_event_handler(self->super.worker->async->mode, + self->listen_fd, + UCS_EVENT_SET_EVREAD | + UCS_EVENT_SET_EVERR, + uct_sockcm_iface_event_handler, + self, self->super.worker->async); + if (status != UCS_OK) { + goto err_close_sock; + } + + ucs_debug("iface (%p) sockcm id %d listening on %s", self, + self->listen_fd, + ucs_sockaddr_str(param_sockaddr, ip_port_str, + UCS_SOCKADDR_STRING_LEN)); + + self->cb_flags = params->mode.sockaddr.cb_flags; + self->conn_request_cb = params->mode.sockaddr.conn_request_cb; + self->conn_request_arg = params->mode.sockaddr.conn_request_arg; + self->is_server = 1; + } else { + self->is_server = 0; + } + + ucs_list_head_init(&self->used_sock_ids_list); + + return UCS_OK; + + err_close_sock: + close(self->listen_fd); + return status; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_sockcm_iface_t) +{ + uct_sockcm_ctx_t *sock_id_ctx; + + if (self->is_server) { + if (-1 != self->listen_fd) { + ucs_debug("cleaning listen_fd = %d", self->listen_fd); + ucs_async_remove_handler(self->listen_fd, 1); + close(self->listen_fd); + } + } + + UCS_ASYNC_BLOCK(self->super.worker->async); + + while (!ucs_list_is_empty(&self->used_sock_ids_list)) { + sock_id_ctx = ucs_list_extract_head(&self->used_sock_ids_list, + uct_sockcm_ctx_t, list); + ucs_debug("cleaning server fd = %d", sock_id_ctx->sock_fd); + ucs_async_remove_handler(sock_id_ctx->sock_fd, 1); + uct_sockcm_ep_put_sock_id(sock_id_ctx); + } + + UCS_ASYNC_UNBLOCK(self->super.worker->async); +} + +UCS_CLASS_DEFINE(uct_sockcm_iface_t, uct_base_iface_t); +static UCS_CLASS_DEFINE_NEW_FUNC(uct_sockcm_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t *, + const uct_iface_config_t *); +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_sockcm_iface_t, uct_iface_t); + +static ucs_status_t +uct_sockcm_query_tl_devices(uct_md_h md, uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + *num_tl_devices_p = 0; + *tl_devices_p = NULL; + return UCS_OK; +} + +UCT_TL_DEFINE(&uct_sockcm_component, sockcm, uct_sockcm_query_tl_devices, + uct_sockcm_iface_t, "SOCKCM_", uct_sockcm_iface_config_table, + uct_sockcm_iface_config_t); diff --git a/src/uct/tcp/sockcm/sockcm_iface.h b/src/uct/tcp/sockcm/sockcm_iface.h new file mode 100644 index 0000000..e39fd0f --- /dev/null +++ b/src/uct/tcp/sockcm/sockcm_iface.h @@ -0,0 +1,41 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_SOCKCM_IFACE_H +#define UCT_SOCKCM_IFACE_H + +#include "sockcm_def.h" +#include "sockcm_md.h" + +#define UCT_SOCKCM_MAX_CONN_PRIV \ + (UCT_SOCKCM_PRIV_DATA_LEN - sizeof(ssize_t)) + + +typedef enum uct_sockcm_iface_notify { + UCT_SOCKCM_IFACE_NOTIFY_ACCEPT = 0, + UCT_SOCKCM_IFACE_NOTIFY_REJECT +} uct_sockcm_iface_notify_t; + +typedef struct uct_sockcm_iface_config { + uct_iface_config_t super; + unsigned backlog; +} uct_sockcm_iface_config_t; + +struct uct_sockcm_iface { + uct_base_iface_t super; + + int listen_fd; + + uint8_t is_server; + /* Fields used only for server side */ + void *conn_request_arg; + uct_sockaddr_conn_request_callback_t conn_request_cb; + uint32_t cb_flags; + + /* Field used only for client side */ + ucs_list_link_t used_sock_ids_list; +}; +#endif diff --git a/src/uct/tcp/sockcm/sockcm_md.c b/src/uct/tcp/sockcm/sockcm_md.c new file mode 100644 index 0000000..527fb6a --- /dev/null +++ b/src/uct/tcp/sockcm/sockcm_md.c @@ -0,0 +1,140 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "sockcm_md.h" + +#define UCT_SOCKCM_NAME "sockcm" + +static ucs_config_field_t uct_sockcm_md_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_sockcm_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)}, + {NULL} +}; + +static void uct_sockcm_md_close(uct_md_h md); + +static uct_md_ops_t uct_sockcm_md_ops = { + .close = uct_sockcm_md_close, + .query = uct_sockcm_md_query, + .is_sockaddr_accessible = uct_sockcm_is_sockaddr_accessible, + .detect_memory_type = ucs_empty_function_return_unsupported, +}; + +static void uct_sockcm_md_close(uct_md_h md) +{ + uct_sockcm_md_t *sockcm_md = ucs_derived_of(md, uct_sockcm_md_t); + ucs_free(sockcm_md); +} + +ucs_status_t uct_sockcm_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->cap.flags = UCT_MD_FLAG_SOCKADDR; + md_attr->cap.reg_mem_types = 0; + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; + md_attr->cap.detect_mem_types = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = 0; + md_attr->rkey_packed_size = 0; + md_attr->reg_cost.overhead = 0; + md_attr->reg_cost.growth = 0; + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +int uct_sockcm_is_sockaddr_accessible(uct_md_h md, const ucs_sock_addr_t *sockaddr, + uct_sockaddr_accessibility_t mode) +{ + struct sockaddr *param_sockaddr = NULL; + int is_accessible = 0; + int sock_fd = -1; + size_t sockaddr_len = 0; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + + param_sockaddr = (struct sockaddr *) sockaddr->addr; + + if ((mode != UCT_SOCKADDR_ACC_LOCAL) && (mode != UCT_SOCKADDR_ACC_REMOTE)) { + ucs_error("Unknown sockaddr accessibility mode %d", mode); + return 0; + } + + sock_fd = socket(param_sockaddr->sa_family, SOCK_STREAM, 0); + if (-1 == sock_fd) { + return 0; + } + + if (UCS_OK != ucs_sockaddr_sizeof(param_sockaddr, &sockaddr_len)) { + ucs_debug("family != AF_INET and != AF_INET6"); + goto out_destroy_id; + } + + if (mode == UCT_SOCKADDR_ACC_LOCAL) { + ucs_debug("addr_len = %ld", (long int) sockaddr_len); + + if (bind(sock_fd, param_sockaddr, sockaddr_len)) { + ucs_debug("bind(addr = %s) failed: %m", + ucs_sockaddr_str((struct sockaddr *)sockaddr->addr, + ip_port_str, UCS_SOCKADDR_STRING_LEN)); + goto out_destroy_id; + } + + if (ucs_sockaddr_is_inaddr_any(param_sockaddr)) { + is_accessible = 1; + goto out_print; + } + } + + is_accessible = 1; /* if UCT_SOCKADDR_ACC_REMOTE == mode*/ + + out_print: + ucs_debug("address %s is accessible from sockcm_md %p with mode: %d", + ucs_sockaddr_str(param_sockaddr, ip_port_str, + UCS_SOCKADDR_STRING_LEN), + ucs_derived_of(md, uct_sockcm_md_t), mode); + + out_destroy_id: + close(sock_fd); + + return is_accessible; +} + +static ucs_status_t +uct_sockcm_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *config, uct_md_h *md_p) +{ + uct_sockcm_md_t *md; + + md = ucs_malloc(sizeof(*md), "sockcm_md"); + if (md == NULL) { + return UCS_ERR_NO_MEMORY; + } + + md->super.ops = &uct_sockcm_md_ops; + md->super.component = &uct_sockcm_component; + + /* cppcheck-suppress autoVariables */ + *md_p = &md->super; + return UCS_OK; +} + +uct_component_t uct_sockcm_component = { + .query_md_resources = uct_md_query_single_md_resource, + .md_open = uct_sockcm_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = ucs_empty_function_return_unsupported, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = ucs_empty_function_return_unsupported, + .name = UCT_SOCKCM_NAME, + .md_config = { + .name = "Sock-CM memory domain", + .prefix = "SOCKCM_", + .table = uct_sockcm_md_config_table, + .size = sizeof(uct_sockcm_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_sockcm_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_sockcm_component) diff --git a/src/uct/tcp/sockcm/sockcm_md.h b/src/uct/tcp/sockcm/sockcm_md.h new file mode 100644 index 0000000..7b7cfa6 --- /dev/null +++ b/src/uct/tcp/sockcm/sockcm_md.h @@ -0,0 +1,37 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. + * Copyright (C) NVIDIA Corporation. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_SOCKCM_MD_H_ +#define UCT_SOCKCM_MD_H_ + +#include "sockcm_def.h" +#include +#include +#include + +/* + * SOCKCM memory domain. + */ +typedef struct uct_sockcm_md { + uct_md_t super; +} uct_sockcm_md_t; + +/* + * SOCKCM memory domain configuration. + */ +typedef struct uct_sockcm_md_config { + uct_md_config_t super; +} uct_sockcm_md_config_t; + +extern uct_component_t uct_sockcm_component; + +ucs_status_t uct_sockcm_md_query(uct_md_h md, uct_md_attr_t *md_attr); + +int uct_sockcm_is_sockaddr_accessible(uct_md_h md, + const ucs_sock_addr_t *sockaddr, + uct_sockaddr_accessibility_t mode); + +#endif diff --git a/src/uct/tcp/tcp.h b/src/uct/tcp/tcp.h new file mode 100644 index 0000000..64afc7e --- /dev/null +++ b/src/uct/tcp/tcp.h @@ -0,0 +1,509 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_TCP_MD_H +#define UCT_TCP_MD_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define UCT_TCP_NAME "tcp" + +#define UCT_TCP_CONFIG_PREFIX "TCP_" + +/* Magic number that is used by TCP to identify its peers */ +#define UCT_TCP_MAGIC_NUMBER 0xCAFEBABE12345678lu + +/* Maximum number of events to wait on event set */ +#define UCT_TCP_MAX_EVENTS 16 + +/* How long should be string to keep [%s:%s] string + * where %s value can be -/Tx/Rx */ +#define UCT_TCP_EP_CTX_CAPS_STR_MAX 8 + +/* How many IOVs are needed to keep AM/PUT Zcopy service data + * (TCP protocol and user's AM (or PUT) headers) */ +#define UCT_TCP_EP_ZCOPY_SERVICE_IOV_COUNT 2 + +/* How many IOVs are needed to do AM Short + * (TCP protocol and user's AM headers, payload) */ +#define UCT_TCP_EP_AM_SHORTV_IOV_COUNT 3 + +/* Maximum size of a data that can be sent by PUT Zcopy + * operation */ +#define UCT_TCP_EP_PUT_ZCOPY_MAX SIZE_MAX + +/* Length of a data that is used by PUT protocol */ +#define UCT_TCP_EP_PUT_SERVICE_LENGTH (sizeof(uct_tcp_am_hdr_t) + \ + sizeof(uct_tcp_ep_put_req_hdr_t)) + +#define UCT_TCP_CONFIG_MAX_CONN_RETRIES "MAX_CONN_RETRIES" + + +/** + * TCP context type + */ +typedef enum uct_tcp_ep_ctx_type { + /* EP is connected to a peer to send data. This EP is managed + * by a user and TCP mustn't free this EP even if connection + * is broken. */ + UCT_TCP_EP_CTX_TYPE_TX, + /* EP is connected to a peer to receive data. If only RX is set + * on a given EP, it is hidden from a user (i.e. the user is unable + * to do any operation on that EP) and TCP is responsible to + * free memory allocating for this EP. */ + UCT_TCP_EP_CTX_TYPE_RX, + + /* Additional flags that controls EP behavior: */ + /* - Zcopy TX operation is in progress on a given EP. */ + UCT_TCP_EP_CTX_TYPE_ZCOPY_TX, + /* - PUT RX operation is in progress on a given EP. */ + UCT_TCP_EP_CTX_TYPE_PUT_RX, + /* - PUT TX operation is waiting for an ACK on a given EP */ + UCT_TCP_EP_CTX_TYPE_PUT_TX_WAITING_ACK, + /* - PUT RX operation is waiting for resources to send an ACK + * for received PUT operations on a given EP */ + UCT_TCP_EP_CTX_TYPE_PUT_RX_SENDING_ACK +} uct_tcp_ep_ctx_type_t; + + +/** + * TCP endpoint connection state + */ +typedef enum uct_tcp_ep_conn_state { + /* EP is unable to communicate with a peer's EP - connections establishment + * was unsuccessful or detected hangup during communications. */ + UCT_TCP_EP_CONN_STATE_CLOSED, + /* EP is connecting to a peer's EP, i.e. connect() was called on non-blocking + * socket and returned this call returned that an operation is in progress. + * After it is done, it sends `UCT_TCP_CM_CONN_REQ` to the peer. + * All AM operations return `UCS_ERR_NO_RESOURCE` error to a caller. */ + UCT_TCP_EP_CONN_STATE_CONNECTING, + /* EP is receiving the magic number in order to verify a peer. EP is moved + * to this state after accept() completed. */ + UCT_TCP_EP_CONN_STATE_RECV_MAGIC_NUMBER, + /* EP is accepting connection from a peer, i.e. accept() returns socket fd + * on which a connection was accepted, this EP was created using this socket + * fd and the magic number was received and verified by EP and now it is + * waiting for `UCT_TCP_CM_CONN_REQ` from a peer. */ + UCT_TCP_EP_CONN_STATE_ACCEPTING, + /* EP is waiting for `UCT_TCP_CM_CONN_ACK` message from a peer after sending + * `UCT_TCP_CM_CONN_REQ`. + * All AM operations return `UCS_ERR_NO_RESOURCE` error to a caller. */ + UCT_TCP_EP_CONN_STATE_WAITING_ACK, + /* EP is waiting for a connection and `UCT_TCP_CM_CONN_REQ` message from + * a peer after simultaneous connection resolution between them. This EP + * is a "winner" of the resolution, but no RX capability on this PR (i.e. + * no `UCT_TCP_CM_CONN_REQ` message was received from the peer). EP is moved + * to `UCT_TCP_EP_CONN_STATE_CONNECTED` state upon receiving this message. + * All AM operations return `UCS_ERR_NO_RESOURCE` error to a caller. */ + UCT_TCP_EP_CONN_STATE_WAITING_REQ, + /* EP is connected to a peer and they can communicate with each other. */ + UCT_TCP_EP_CONN_STATE_CONNECTED +} uct_tcp_ep_conn_state_t; + +/* Forward declaration */ +typedef struct uct_tcp_ep uct_tcp_ep_t; + +typedef unsigned (*uct_tcp_ep_progress_t)(uct_tcp_ep_t *ep); + +static inline int uct_tcp_khash_sockaddr_in_equal(struct sockaddr_in sa1, + struct sockaddr_in sa2) +{ + ucs_status_t status; + int cmp; + + cmp = ucs_sockaddr_cmp((const struct sockaddr*)&sa1, + (const struct sockaddr*)&sa2, + &status); + ucs_assert(status == UCS_OK); + return !cmp; +} + +static inline uint32_t uct_tcp_khash_sockaddr_in_hash(struct sockaddr_in sa) +{ + ucs_status_t UCS_V_UNUSED status; + size_t addr_size; + + status = ucs_sockaddr_sizeof((const struct sockaddr*)&sa, + &addr_size); + ucs_assert(status == UCS_OK); + return ucs_crc32(0, (const void *)&sa, addr_size); +} + +KHASH_INIT(uct_tcp_cm_eps, struct sockaddr_in, ucs_list_link_t*, + 1, uct_tcp_khash_sockaddr_in_hash, uct_tcp_khash_sockaddr_in_equal); + + +/** + * TCP Connection Manager state + */ +typedef struct uct_tcp_cm_state { + const char *name; /* CM state name */ + uct_tcp_ep_progress_t tx_progress; /* TX progress function */ + uct_tcp_ep_progress_t rx_progress; /* RX progress function */ +} uct_tcp_cm_state_t; + + +/** + * TCP Connection Manager event + */ +typedef enum uct_tcp_cm_conn_event { + /* Connection request from a EP that has TX capability to a EP that + * has to be able to receive AM data (i.e. has to have RX capability). */ + UCT_TCP_CM_CONN_REQ = UCS_BIT(0), + /* Connection acknowledgment from a EP that accepts a conenction from + * initiator of a connection request. */ + UCT_TCP_CM_CONN_ACK = UCS_BIT(1), + /* Request for waiting of a connection request. + * The mesage is not sent separately (only along with a connection + * acknowledgment.) */ + UCT_TCP_CM_CONN_WAIT_REQ = UCS_BIT(2), + /* Connection acknowledgment + Connection request. The mesasge is sent + * from a EP that accepts remote conenction when it was in + * `UCT_TCP_EP_CONN_STATE_CONNECTING` state (i.e. original + * `UCT_TCP_CM_CONN_REQ` wasn't sent yet) and want to have RX capability + * on a peer's EP in order to send AM data. */ + UCT_TCP_CM_CONN_ACK_WITH_REQ = (UCT_TCP_CM_CONN_REQ | + UCT_TCP_CM_CONN_ACK), + /* Connection acknowledgment + Request for waiting of a connection request. + * The message is sent from a EP that accepts remote conenction when it was + * in `UCT_TCP_EP_CONN_STATE_WAITING_ACK` state (i.e. original + * `UCT_TCP_CM_CONN_REQ` was sent) and want to have RX capability on a + * peer's EP in order to send AM data. */ + UCT_TCP_CM_CONN_ACK_WITH_WAIT_REQ = (UCT_TCP_CM_CONN_WAIT_REQ | + UCT_TCP_CM_CONN_ACK) +} uct_tcp_cm_conn_event_t; + + +/** + * TCP connection request packet + */ +typedef struct uct_tcp_cm_conn_req_pkt { + uct_tcp_cm_conn_event_t event; /* Connection event ID */ + struct sockaddr_in iface_addr; /* Socket address of UCT local iface */ +} UCS_S_PACKED uct_tcp_cm_conn_req_pkt_t; + + +/** + * TCP active message header + */ +typedef struct uct_tcp_am_hdr { + uint8_t am_id; /* UCT AM ID of an AM operation */ + uint32_t length; /* Length of data sent in an AM operation */ +} UCS_S_PACKED uct_tcp_am_hdr_t; + + +/** + * AM IDs reserved for TCP protocols + */ +typedef enum uct_tcp_ep_am_id { + /* AM ID reserved for TCP internal Connection Manager messages */ + UCT_TCP_EP_CM_AM_ID = UCT_AM_ID_MAX, + /* AM ID reserved for TCP internal PUT REQ message */ + UCT_TCP_EP_PUT_REQ_AM_ID = UCT_AM_ID_MAX + 1, + /* AM ID reserved for TCP internal PUT ACK message */ + UCT_TCP_EP_PUT_ACK_AM_ID = UCT_AM_ID_MAX + 2 +} uct_tcp_ep_am_id_t; + + +/** + * TCP PUT request header + */ +typedef struct uct_tcp_ep_put_req_hdr { + uint64_t addr; /* Address of a remote memory buffer */ + size_t length; /* Length of a remote memory buffer */ + uint32_t sn; /* Sequence number of the current PUT operation */ +} UCS_S_PACKED uct_tcp_ep_put_req_hdr_t; + + +/** + * TCP PUT acknowledge header + */ +typedef struct uct_tcp_ep_put_ack_hdr { + uint32_t sn; /* Sequence number of the last acked PUT operation */ +} UCS_S_PACKED uct_tcp_ep_put_ack_hdr_t; + + +/** + * TCP PUT completion + */ +typedef struct uct_tcp_ep_put_completion { + uct_completion_t *comp; /* User's completion passed to + * uct_ep_flush */ + uint32_t wait_put_sn; /* Sequence number of the last unacked + * PUT operations that was in-progress + * when uct_ep_flush was called */ + ucs_queue_elem_t elem; /* Element to insert completion into + * TCP EP PUT operation pending queue */ +} uct_tcp_ep_put_completion_t; + + +/** + * TCP endpoint communication context + */ +typedef struct uct_tcp_ep_ctx { + uint32_t put_sn; /* Sequence number of last sent + * or received PUT operation */ + void *buf; /* Partial send/recv data */ + size_t length; /* How much data in the buffer */ + size_t offset; /* How much data was sent (TX) or was + * handled after receiving (RX) */ +} uct_tcp_ep_ctx_t; + + +/** + * TCP AM/PUT Zcopy communication context mapped to + * buffer from TCP EP context + */ +typedef struct uct_tcp_ep_zcopy_tx { + uct_tcp_am_hdr_t super; /* UCT TCP AM header */ + uct_completion_t *comp; /* Local UCT completion object */ + size_t iov_index; /* Current IOV index */ + size_t iov_cnt; /* Number of IOVs that should be sent */ + struct iovec iov[0]; /* IOVs that should be sent */ +} uct_tcp_ep_zcopy_tx_t; + + +/** + * TCP endpoint + */ +struct uct_tcp_ep { + uct_base_ep_t super; + uint8_t ctx_caps; /* Which contexts are supported */ + int fd; /* Socket file descriptor */ + uct_tcp_ep_conn_state_t conn_state; /* State of connection with peer */ + unsigned conn_retries; /* Number of connection attempts done */ + int events; /* Current notifications */ + uct_tcp_ep_ctx_t tx; /* TX resources */ + uct_tcp_ep_ctx_t rx; /* RX resources */ + struct sockaddr_in peer_addr; /* Remote iface addr */ + ucs_queue_head_t pending_q; /* Pending operations */ + ucs_queue_head_t put_comp_q; /* Flush completions waiting for + * outstanding PUTs acknowledgment */ + ucs_list_link_t list; /* List element to insert into TCP EP list */ +}; + + +/** + * TCP interface + */ +typedef struct uct_tcp_iface { + uct_base_iface_t super; /* Parent class */ + int listen_fd; /* Server socket */ + khash_t(uct_tcp_cm_eps) ep_cm_map; /* Map of endpoints that don't + * have one of the context cap */ + ucs_list_link_t ep_list; /* List of endpoints */ + char if_name[IFNAMSIZ]; /* Network interface name */ + ucs_sys_event_set_t *event_set; /* Event set identifier */ + ucs_mpool_t tx_mpool; /* TX memory pool */ + ucs_mpool_t rx_mpool; /* RX memory pool */ + size_t outstanding; /* How much data in the EP send buffers + * + how many non-blocking connections + * are in progress + how many EPs are + * waiting for PUT Zcopy operation ACKs + * (0/1 for each EP) */ + + struct { + size_t tx_seg_size; /* TX AM buffer size */ + size_t rx_seg_size; /* RX AM buffer size */ + size_t sendv_thresh; /* Minimum size of user's payload from which + * non-blocking vector send should be used */ + struct { + size_t max_iov; /* Maximum supported IOVs limited by + * user configuration and service buffers + * (TCP protocol and user's AM headers) */ + size_t max_hdr; /* Maximum supported AM Zcopy header */ + size_t hdr_offset; /* Offset in TX buffer to empty space that + * can be used for AM Zcopy header */ + } zcopy; + struct sockaddr_in ifaddr; /* Network address */ + struct sockaddr_in netmask; /* Network address mask */ + int prefer_default; /* Prefer default gateway */ + int put_enable; /* Enable PUT Zcopy operation support */ + int conn_nb; /* Use non-blocking connect() */ + unsigned max_poll; /* Number of events to poll per socket*/ + unsigned max_conn_retries; /* How many connection establishment attmepts + * should be done if dropped connection was + * detected due to lack of system resources */ + } config; + + struct { + int nodelay; /* TCP_NODELAY */ + size_t sndbuf; /* SO_SNDBUF */ + size_t rcvbuf; /* SO_RCVBUF */ + } sockopt; +} uct_tcp_iface_t; + + +/** + * TCP interface configuration + */ +typedef struct uct_tcp_iface_config { + uct_iface_config_t super; + size_t tx_seg_size; + size_t rx_seg_size; + size_t max_iov; + size_t sendv_thresh; + int prefer_default; + int put_enable; + int conn_nb; + unsigned max_poll; + unsigned max_conn_retries; + int sockopt_nodelay; + size_t sockopt_sndbuf; + size_t sockopt_rcvbuf; + uct_iface_mpool_config_t tx_mpool; + uct_iface_mpool_config_t rx_mpool; +} uct_tcp_iface_config_t; + + +extern uct_component_t uct_tcp_component; +extern const char *uct_tcp_address_type_names[]; +extern const uct_tcp_cm_state_t uct_tcp_ep_cm_state[]; +extern const uct_tcp_ep_progress_t uct_tcp_ep_progress_rx_cb[]; + +ucs_status_t uct_tcp_netif_caps(const char *if_name, double *latency_p, + double *bandwidth_p); + +ucs_status_t uct_tcp_netif_inaddr(const char *if_name, struct sockaddr_in *ifaddr, + struct sockaddr_in *netmask); + +ucs_status_t uct_tcp_netif_is_default(const char *if_name, int *result_p); + +int uct_tcp_sockaddr_cmp(const struct sockaddr *sa1, + const struct sockaddr *sa2); + +ucs_status_t uct_tcp_iface_set_sockopt(uct_tcp_iface_t *iface, int fd); + +size_t uct_tcp_iface_get_max_iov(const uct_tcp_iface_t *iface); + +size_t uct_tcp_iface_get_max_zcopy_header(const uct_tcp_iface_t *iface); + +void uct_tcp_iface_add_ep(uct_tcp_ep_t *ep); + +void uct_tcp_iface_remove_ep(uct_tcp_ep_t *ep); + +ucs_status_t uct_tcp_ep_handle_dropped_connect(uct_tcp_ep_t *ep, int io_errno); + +ucs_status_t uct_tcp_ep_init(uct_tcp_iface_t *iface, int fd, + const struct sockaddr_in *dest_addr, + uct_tcp_ep_t **ep_p); + +ucs_status_t uct_tcp_ep_create(const uct_ep_params_t *params, + uct_ep_h *ep_p); + +const char *uct_tcp_ep_ctx_caps_str(uint8_t ep_ctx_caps, char *str_buffer); + +void uct_tcp_ep_change_ctx_caps(uct_tcp_ep_t *ep, uint8_t new_caps); + +ucs_status_t uct_tcp_ep_add_ctx_cap(uct_tcp_ep_t *ep, + uct_tcp_ep_ctx_type_t cap); + +ucs_status_t uct_tcp_ep_remove_ctx_cap(uct_tcp_ep_t *ep, + uct_tcp_ep_ctx_type_t cap); + +ucs_status_t uct_tcp_ep_move_ctx_cap(uct_tcp_ep_t *from_ep, uct_tcp_ep_t *to_ep, + uct_tcp_ep_ctx_type_t ctx_cap); + +void uct_tcp_ep_destroy_internal(uct_ep_h tl_ep); + +void uct_tcp_ep_destroy(uct_ep_h tl_ep); + +void uct_tcp_ep_set_failed(uct_tcp_ep_t *ep); + +unsigned uct_tcp_ep_is_self(const uct_tcp_ep_t *ep); + +void uct_tcp_ep_remove(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep); + +void uct_tcp_ep_add(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep); + +void uct_tcp_ep_mod_events(uct_tcp_ep_t *ep, int add, int remove); + +void uct_tcp_ep_pending_queue_dispatch(uct_tcp_ep_t *ep); + +ucs_status_t uct_tcp_ep_am_short(uct_ep_h uct_ep, uint8_t am_id, uint64_t header, + const void *payload, unsigned length); + +ssize_t uct_tcp_ep_am_bcopy(uct_ep_h uct_ep, uint8_t am_id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags); + +ucs_status_t uct_tcp_ep_am_zcopy(uct_ep_h uct_ep, uint8_t am_id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_tcp_ep_put_zcopy(uct_ep_h uct_ep, const uct_iov_t *iov, + size_t iovcnt, uint64_t remote_addr, + uct_rkey_t rkey, uct_completion_t *comp); + +ucs_status_t uct_tcp_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *req, + unsigned flags); + +void uct_tcp_ep_pending_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, + void *arg); + +ucs_status_t uct_tcp_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp); + +ucs_status_t uct_tcp_cm_send_event(uct_tcp_ep_t *ep, uct_tcp_cm_conn_event_t event); + +unsigned uct_tcp_cm_handle_conn_pkt(uct_tcp_ep_t **ep_p, void *pkt, uint32_t length); + +unsigned uct_tcp_cm_conn_progress(uct_tcp_ep_t *ep); + +uct_tcp_ep_conn_state_t +uct_tcp_cm_set_conn_state(uct_tcp_ep_t *ep, + uct_tcp_ep_conn_state_t new_conn_state); + +void uct_tcp_cm_change_conn_state(uct_tcp_ep_t *ep, + uct_tcp_ep_conn_state_t new_conn_state); + +ucs_status_t uct_tcp_cm_add_ep(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep); + +void uct_tcp_cm_remove_ep(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep); + +uct_tcp_ep_t *uct_tcp_cm_search_ep(uct_tcp_iface_t *iface, + const struct sockaddr_in *peer_addr, + uct_tcp_ep_ctx_type_t with_ctx_type); + +void uct_tcp_cm_purge_ep(uct_tcp_ep_t *ep); + +ucs_status_t uct_tcp_cm_handle_incoming_conn(uct_tcp_iface_t *iface, + const struct sockaddr_in *peer_addr, + int fd); + +ucs_status_t uct_tcp_cm_conn_start(uct_tcp_ep_t *ep); + +static inline void uct_tcp_iface_outstanding_inc(uct_tcp_iface_t *iface) +{ + iface->outstanding++; +} + +static inline void uct_tcp_iface_outstanding_dec(uct_tcp_iface_t *iface) +{ + ucs_assert(iface->outstanding > 0); + iface->outstanding--; +} + +/** + * Query for active network devices under /sys/class/net, as determined by + * ucs_netif_is_active(). 'md' parameter is not used, and is added for + * compatibility with uct_tl_t::query_devices definition. + */ +ucs_status_t uct_tcp_query_devices(uct_md_h md, + uct_tl_device_resource_t **devices_p, + unsigned *num_devices_p); + +#endif diff --git a/src/uct/tcp/tcp_cm.c b/src/uct/tcp/tcp_cm.c new file mode 100644 index 0000000..45e357d --- /dev/null +++ b/src/uct/tcp/tcp_cm.c @@ -0,0 +1,624 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "tcp.h" + +#include + + +void uct_tcp_cm_change_conn_state(uct_tcp_ep_t *ep, + uct_tcp_ep_conn_state_t new_conn_state) +{ + int full_log = 1; + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + char str_local_addr[UCS_SOCKADDR_STRING_LEN]; + char str_remote_addr[UCS_SOCKADDR_STRING_LEN]; + char str_ctx_caps[UCT_TCP_EP_CTX_CAPS_STR_MAX]; + uct_tcp_ep_conn_state_t old_conn_state; + + old_conn_state = ep->conn_state; + ep->conn_state = new_conn_state; + + switch(ep->conn_state) { + case UCT_TCP_EP_CONN_STATE_CONNECTING: + case UCT_TCP_EP_CONN_STATE_WAITING_ACK: + if (old_conn_state == UCT_TCP_EP_CONN_STATE_CLOSED) { + uct_tcp_iface_outstanding_inc(iface); + } else { + ucs_assert((ep->conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING) || + (old_conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING)); + } + break; + case UCT_TCP_EP_CONN_STATE_WAITING_REQ: + ucs_assert(old_conn_state == UCT_TCP_EP_CONN_STATE_WAITING_ACK); + break; + case UCT_TCP_EP_CONN_STATE_CONNECTED: + ucs_assert((old_conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING) || + (old_conn_state == UCT_TCP_EP_CONN_STATE_WAITING_ACK) || + (old_conn_state == UCT_TCP_EP_CONN_STATE_ACCEPTING) || + (old_conn_state == UCT_TCP_EP_CONN_STATE_WAITING_REQ)); + if ((old_conn_state == UCT_TCP_EP_CONN_STATE_WAITING_ACK) || + (old_conn_state == UCT_TCP_EP_CONN_STATE_WAITING_REQ) || + /* It may happen when a peer is going to use this EP with socket + * from accepted connection in case of handling simultaneous + * connection establishment */ + (old_conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING)) { + uct_tcp_iface_outstanding_dec(iface); + } + if (ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX)) { + /* Progress possibly pending TX operations */ + uct_tcp_ep_pending_queue_dispatch(ep); + } + break; + case UCT_TCP_EP_CONN_STATE_CLOSED: + ucs_assert(old_conn_state != UCT_TCP_EP_CONN_STATE_CLOSED); + if ((old_conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING) || + (old_conn_state == UCT_TCP_EP_CONN_STATE_WAITING_ACK) || + (old_conn_state == UCT_TCP_EP_CONN_STATE_WAITING_REQ)) { + uct_tcp_iface_outstanding_dec(iface); + } else if ((old_conn_state == UCT_TCP_EP_CONN_STATE_ACCEPTING) || + (old_conn_state == UCT_TCP_EP_CONN_STATE_RECV_MAGIC_NUMBER)) { + /* Since ep::peer_addr is 0'ed, we have to print w/o peer's address */ + full_log = 0; + } + break; + default: + ucs_assert((ep->conn_state == UCT_TCP_EP_CONN_STATE_ACCEPTING) || + (ep->conn_state == UCT_TCP_EP_CONN_STATE_RECV_MAGIC_NUMBER)); + /* Since ep::peer_addr is 0'ed and client's + * has already been logged, print w/o peer's address */ + full_log = 0; + break; + } + + if (full_log) { + ucs_debug("tcp_ep %p: %s -> %s for the [%s]<->[%s] connection %s", + ep, uct_tcp_ep_cm_state[old_conn_state].name, + uct_tcp_ep_cm_state[ep->conn_state].name, + ucs_sockaddr_str((const struct sockaddr*)&iface->config.ifaddr, + str_local_addr, UCS_SOCKADDR_STRING_LEN), + ucs_sockaddr_str((const struct sockaddr*)&ep->peer_addr, + str_remote_addr, UCS_SOCKADDR_STRING_LEN), + uct_tcp_ep_ctx_caps_str(ep->ctx_caps, str_ctx_caps)); + } else { + ucs_debug("tcp_ep %p: %s -> %s", + ep, uct_tcp_ep_cm_state[old_conn_state].name, + uct_tcp_ep_cm_state[ep->conn_state].name); + } +} + +static ucs_status_t uct_tcp_cm_io_err_handler_cb(void *arg, int io_errno) +{ + return uct_tcp_ep_handle_dropped_connect((uct_tcp_ep_t*)arg, + io_errno); +} + +/* `fmt_str` parameter has to contain "%s" to write event type */ +static void uct_tcp_cm_trace_conn_pkt(const uct_tcp_ep_t *ep, + ucs_log_level_t log_level, + const char *fmt_str, + uct_tcp_cm_conn_event_t event) +{ + char event_str[64] = { 0 }; + char str_addr[UCS_SOCKADDR_STRING_LEN], msg[128], *p; + + p = event_str; + if (event & UCT_TCP_CM_CONN_REQ) { + ucs_snprintf_zero(event_str, sizeof(event_str), "%s", + UCS_PP_MAKE_STRING(UCT_TCP_CM_CONN_REQ)); + p += strlen(event_str); + } + + if (event & UCT_TCP_CM_CONN_WAIT_REQ) { + ucs_assert(p == event_str); + ucs_snprintf_zero(event_str, sizeof(event_str), "%s", + UCS_PP_MAKE_STRING(UCT_TCP_CM_CONN_WAIT_REQ)); + p += strlen(event_str); + } + + if (event & UCT_TCP_CM_CONN_ACK) { + if (p != event_str) { + ucs_snprintf_zero(p, sizeof(event_str) - (p - event_str), " | "); + p += strlen(p); + } + ucs_snprintf_zero(p, sizeof(event_str) - (p - event_str), "%s", + UCS_PP_MAKE_STRING(UCT_TCP_CM_CONN_ACK)); + p += strlen(event_str); + } + + if (event_str == p) { + ucs_snprintf_zero(event_str, sizeof(event_str), "UNKNOWN (%d)", event); + log_level = UCS_LOG_LEVEL_ERROR; + } + + ucs_snprintf_zero(msg, sizeof(msg), fmt_str, event_str); + + ucs_log(log_level, "tcp_ep %p: %s %s", ep, msg, + ucs_sockaddr_str((const struct sockaddr*)&ep->peer_addr, + str_addr, UCS_SOCKADDR_STRING_LEN)); +} + +ucs_status_t uct_tcp_cm_send_event(uct_tcp_ep_t *ep, uct_tcp_cm_conn_event_t event) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + size_t magic_number_length = 0; + void *pkt_buf; + size_t pkt_length, cm_pkt_length; + uct_tcp_cm_conn_req_pkt_t *conn_pkt; + uct_tcp_cm_conn_event_t *pkt_event; + uct_tcp_am_hdr_t *pkt_hdr; + ucs_status_t status; + + ucs_assertv(!(event & ~(UCT_TCP_CM_CONN_REQ | + UCT_TCP_CM_CONN_ACK | + UCT_TCP_CM_CONN_WAIT_REQ)), + "ep=%p", ep); + ucs_assertv(!(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX)) || + (ep->conn_state != UCT_TCP_EP_CONN_STATE_CONNECTED), + "ep=%p", ep); + + pkt_length = sizeof(*pkt_hdr); + if (event == UCT_TCP_CM_CONN_REQ) { + cm_pkt_length = sizeof(*conn_pkt); + + if (ep->conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING) { + magic_number_length = sizeof(uint64_t); + } + } else { + cm_pkt_length = sizeof(event); + } + + pkt_length += cm_pkt_length + magic_number_length; + pkt_buf = ucs_alloca(pkt_length); + pkt_hdr = (uct_tcp_am_hdr_t*)(UCS_PTR_BYTE_OFFSET(pkt_buf, + magic_number_length)); + pkt_hdr->am_id = UCT_AM_ID_MAX; + pkt_hdr->length = cm_pkt_length; + + if (event == UCT_TCP_CM_CONN_REQ) { + if (ep->conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING) { + ucs_assert(magic_number_length == sizeof(uint64_t)); + *(uint64_t*)pkt_buf = UCT_TCP_MAGIC_NUMBER; + } + + conn_pkt = (uct_tcp_cm_conn_req_pkt_t*)(pkt_hdr + 1); + conn_pkt->event = UCT_TCP_CM_CONN_REQ; + conn_pkt->iface_addr = iface->config.ifaddr; + } else { + pkt_event = (uct_tcp_cm_conn_event_t*)(pkt_hdr + 1); + *pkt_event = event; + } + + status = ucs_socket_send(ep->fd, pkt_buf, pkt_length, + uct_tcp_cm_io_err_handler_cb, ep); + if (status != UCS_OK) { + uct_tcp_cm_trace_conn_pkt(ep, UCS_LOG_LEVEL_ERROR, + "unable to send %s to", event); + } else { + uct_tcp_cm_trace_conn_pkt(ep, UCS_LOG_LEVEL_TRACE, + "%s sent to", event); + } + return status; +} + +ucs_status_t uct_tcp_cm_add_ep(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep) +{ + ucs_list_link_t *ep_list; + khiter_t iter; + int ret; + + iter = kh_get(uct_tcp_cm_eps, &iface->ep_cm_map, ep->peer_addr); + if (iter == kh_end(&iface->ep_cm_map)) { + ep_list = ucs_malloc(sizeof(*ep_list), "tcp_ep_cm_map_entry"); + if (ep_list == NULL) { + return UCS_ERR_NO_MEMORY; + } + + ucs_list_head_init(ep_list); + iter = kh_put(uct_tcp_cm_eps, &iface->ep_cm_map, ep->peer_addr, &ret); + kh_value(&iface->ep_cm_map, iter) = ep_list; + + ucs_debug("tcp_iface %p: %p list added to map", iface, ep_list); + } else { + ep_list = kh_value(&iface->ep_cm_map, iter); + ucs_assertv(!ucs_list_is_empty(ep_list), "iface=%p", iface); + } + + uct_tcp_iface_remove_ep(ep); + + ucs_list_add_tail(ep_list, &ep->list); + ucs_debug("tcp_iface %p: tcp_ep %p added to %p list", + iface, ep, ep_list); + + return UCS_OK; +} + +void uct_tcp_cm_remove_ep(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep) +{ + ucs_list_link_t *ep_list; + khiter_t iter; + + iter = kh_get(uct_tcp_cm_eps, &iface->ep_cm_map, ep->peer_addr); + ucs_assertv(iter != kh_end(&iface->ep_cm_map), "iface=%p", iface); + + ep_list = kh_value(&iface->ep_cm_map, iter); + ucs_assertv(!ucs_list_is_empty(ep_list), "iface=%p", iface); + + ucs_list_del(&ep->list); + ucs_debug("tcp_iface %p: tcp_ep %p removed from %p list", + iface, ep, ep_list); + + uct_tcp_iface_add_ep(ep); + + if (ucs_list_is_empty(ep_list)) { + kh_del(uct_tcp_cm_eps, &iface->ep_cm_map, iter); + ucs_debug("tcp_iface %p: %p list removed from map", + iface, ep_list); + ucs_free(ep_list); + } +} + +uct_tcp_ep_t *uct_tcp_cm_search_ep(uct_tcp_iface_t *iface, + const struct sockaddr_in *peer_addr, + uct_tcp_ep_ctx_type_t with_ctx_type) +{ + uct_tcp_ep_t *ep; + ucs_list_link_t *ep_list; + khiter_t iter; + + iter = kh_get(uct_tcp_cm_eps, &iface->ep_cm_map, *peer_addr); + if (iter != kh_end(&iface->ep_cm_map)) { + ep_list = kh_value(&iface->ep_cm_map, iter); + ucs_assertv(!ucs_list_is_empty(ep_list), "iface=%p", iface); + + ucs_list_for_each(ep, ep_list, list) { + if (ep->ctx_caps & UCS_BIT(with_ctx_type)) { + return ep; + } + } + } + + return NULL; +} + +void uct_tcp_cm_purge_ep(uct_tcp_ep_t *ep) +{ + /* Move from a khash's EP list to iface's EP list */ + ucs_list_del(&ep->list); + uct_tcp_ep_change_ctx_caps(ep, 0); + uct_tcp_iface_add_ep(ep); +} + +static unsigned +uct_tcp_cm_simult_conn_accept_remote_conn(uct_tcp_ep_t *accept_ep, + uct_tcp_ep_t *connect_ep) +{ + uct_tcp_cm_conn_event_t event; + ucs_status_t status; + + /* 1. Close the allocated socket `fd` to avoid reading any + * events for this socket and assign the socket `fd` returned + * from `accept()` to the found EP */ + uct_tcp_ep_mod_events(connect_ep, 0, connect_ep->events); + ucs_assertv(connect_ep->events == 0, + "Requested epoll events must be 0-ed for ep=%p", connect_ep); + + close(connect_ep->fd); + connect_ep->fd = accept_ep->fd; + + /* 2. Migrate RX from the EP allocated during accepting connection to + * the found EP */ + status = uct_tcp_ep_move_ctx_cap(accept_ep, connect_ep, + UCT_TCP_EP_CTX_TYPE_RX); + if (status != UCS_OK) { + return 0; + } + + /* 3. The EP allocated during accepting connection has to be destroyed + * upon return from this function (set its socket `fd` to -1 prior + * to avoid closing this socket) */ + uct_tcp_ep_mod_events(accept_ep, 0, UCS_EVENT_SET_EVREAD); + accept_ep->fd = -1; + accept_ep = NULL; + + /* 4. Send ACK to the peer */ + event = UCT_TCP_CM_CONN_ACK; + + /* 5. - If found EP is still connecting, tie REQ with ACK and send + * it to the peer using new socket fd to ensure that the peer + * will be able to receive the data from us + * - If found EP is waiting ACK, tie WAIT_REQ with ACK and send + * it to the peer using new socket fd to ensure that the peer + * will wait for REQ and after receiving the REQ, peer will + * be able to receive the data from us */ + if (connect_ep->conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING) { + event |= UCT_TCP_CM_CONN_REQ; + } else if (connect_ep->conn_state == UCT_TCP_EP_CONN_STATE_WAITING_ACK) { + event |= UCT_TCP_CM_CONN_WAIT_REQ; + } + + status = uct_tcp_cm_send_event(connect_ep, event); + if (status != UCS_OK) { + return 0; + } + /* 6. Now fully connected to the peer */ + uct_tcp_ep_mod_events(connect_ep, UCS_EVENT_SET_EVREAD, 0); + uct_tcp_cm_change_conn_state(connect_ep, UCT_TCP_EP_CONN_STATE_CONNECTED); + + return 1; +} + +static unsigned uct_tcp_cm_handle_simult_conn(uct_tcp_iface_t *iface, + uct_tcp_ep_t *accept_ep, + uct_tcp_ep_t *connect_ep) +{ + int accept_conn = 0; + unsigned progress_count = 0; + ucs_status_t status; + int cmp; + + if ((connect_ep->conn_state != UCT_TCP_EP_CONN_STATE_CONNECTED) && + (connect_ep->conn_state != UCT_TCP_EP_CONN_STATE_WAITING_REQ)) { + cmp = ucs_sockaddr_cmp((const struct sockaddr*)&connect_ep->peer_addr, + (const struct sockaddr*)&iface->config.ifaddr, + &status); + if (status != UCS_OK) { + return 0; + } + + /* Accept connection from a peer if our iface + * address is greater than peer's one */ + accept_conn = (cmp < 0); + } + + if (!accept_conn) { + /* Migrate RX from the EP allocated during accepting connection to + * the found EP. */ + status = uct_tcp_ep_move_ctx_cap(accept_ep, connect_ep, + UCT_TCP_EP_CTX_TYPE_RX); + if (status != UCS_OK) { + return 0; + } + + if (connect_ep->conn_state == UCT_TCP_EP_CONN_STATE_WAITING_REQ) { + uct_tcp_cm_change_conn_state(connect_ep, UCT_TCP_EP_CONN_STATE_CONNECTED); + } + + uct_tcp_ep_mod_events(connect_ep, UCS_EVENT_SET_EVREAD, 0); + } else /* our iface address less than remote && we are not connected */ { + /* Accept the remote connection and close the current one */ + ucs_assertv(cmp != 0, "peer addresses for accepted tcp_ep %p and " + "found tcp_ep %p mustn't be equal", accept_ep, connect_ep); + progress_count = uct_tcp_cm_simult_conn_accept_remote_conn(accept_ep, + connect_ep); + } + + return progress_count; +} + +static unsigned +uct_tcp_cm_handle_conn_req(uct_tcp_ep_t **ep_p, + const uct_tcp_cm_conn_req_pkt_t *cm_req_pkt) +{ + uct_tcp_ep_t *ep = *ep_p; + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + unsigned progress_count = 0; + ucs_status_t status; + uct_tcp_ep_t *peer_ep; + + ep->peer_addr = cm_req_pkt->iface_addr; + uct_tcp_cm_trace_conn_pkt(ep, UCS_LOG_LEVEL_TRACE, + "%s received from", UCT_TCP_CM_CONN_REQ); + + status = uct_tcp_ep_add_ctx_cap(ep, UCT_TCP_EP_CTX_TYPE_RX); + if (status != UCS_OK) { + goto out; + } + + if (ep->conn_state == UCT_TCP_EP_CONN_STATE_CONNECTED) { + return 0; + } + + ucs_assertv(!(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX)), + "ep %p mustn't have TX cap", ep); + + if (!uct_tcp_ep_is_self(ep) && + (peer_ep = uct_tcp_cm_search_ep(iface, &ep->peer_addr, + UCT_TCP_EP_CTX_TYPE_TX))) { + progress_count = uct_tcp_cm_handle_simult_conn(iface, ep, peer_ep); + ucs_assert(!(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX))); + goto out; + } else { + /* Just accept this connection and make it operational for RX events */ + status = uct_tcp_cm_send_event(ep, UCT_TCP_CM_CONN_ACK); + if (status != UCS_OK) { + goto out; + } + + uct_tcp_cm_change_conn_state(ep, UCT_TCP_EP_CONN_STATE_CONNECTED); + + progress_count = 1; + } + + return progress_count; + +out: + if (!(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX))) { + uct_tcp_ep_destroy_internal(&ep->super.super); + *ep_p = NULL; + } + return progress_count; +} + +void uct_tcp_cm_handle_conn_ack(uct_tcp_ep_t *ep, uct_tcp_cm_conn_event_t cm_event, + uct_tcp_ep_conn_state_t new_conn_state) +{ + uct_tcp_cm_trace_conn_pkt(ep, UCS_LOG_LEVEL_TRACE, + "%s received from", cm_event); + if (ep->conn_state != new_conn_state) { + uct_tcp_cm_change_conn_state(ep, new_conn_state); + } +} + +unsigned uct_tcp_cm_handle_conn_pkt(uct_tcp_ep_t **ep_p, void *pkt, uint32_t length) +{ + ucs_status_t status; + uct_tcp_cm_conn_event_t cm_event; + uct_tcp_cm_conn_req_pkt_t *cm_req_pkt; + uct_tcp_ep_conn_state_t new_conn_state; + + ucs_assertv(length >= sizeof(cm_event), "ep=%p", *ep_p); + + cm_event = *((uct_tcp_cm_conn_event_t*)pkt); + + switch (cm_event) { + case UCT_TCP_CM_CONN_REQ: + /* Don't trace received CM packet here, because + * EP doesn't contain the peer address */ + ucs_assertv(length == sizeof(*cm_req_pkt), "ep=%p", *ep_p); + cm_req_pkt = (uct_tcp_cm_conn_req_pkt_t*)pkt; + return uct_tcp_cm_handle_conn_req(ep_p, cm_req_pkt); + case UCT_TCP_CM_CONN_ACK_WITH_WAIT_REQ: + if (!((*ep_p)->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_RX))) { + new_conn_state = UCT_TCP_EP_CONN_STATE_WAITING_REQ; + } else { + new_conn_state = UCT_TCP_EP_CONN_STATE_CONNECTED; + } + uct_tcp_cm_handle_conn_ack(*ep_p, cm_event, new_conn_state); + return 0; + case UCT_TCP_CM_CONN_ACK_WITH_REQ: + status = uct_tcp_ep_add_ctx_cap(*ep_p, UCT_TCP_EP_CTX_TYPE_RX); + if (status != UCS_OK) { + return 0; + } + /* fall through */ + case UCT_TCP_CM_CONN_ACK: + uct_tcp_cm_handle_conn_ack(*ep_p, cm_event, + UCT_TCP_EP_CONN_STATE_CONNECTED); + return 0; + case UCT_TCP_CM_CONN_WAIT_REQ: + ucs_error("tcp_ep %p: CM event for waiting REQ (%d) " + "must be sent along with ACK", *ep_p, cm_event); + return 0; + } + + ucs_error("tcp_ep %p: unknown CM event received %d", *ep_p, cm_event); + return 0; +} + +static ucs_status_t uct_tcp_cm_conn_complete(uct_tcp_ep_t *ep, + unsigned *progress_count_p) +{ + ucs_status_t status; + + status = uct_tcp_cm_send_event(ep, UCT_TCP_CM_CONN_REQ); + if (status != UCS_OK) { + goto out; + } + + uct_tcp_cm_change_conn_state(ep, UCT_TCP_EP_CONN_STATE_WAITING_ACK); + uct_tcp_ep_mod_events(ep, UCS_EVENT_SET_EVREAD, 0); + + ucs_assertv((ep->tx.length == 0) && (ep->tx.offset == 0) && + (ep->tx.buf == NULL), "ep=%p", ep); +out: + if (progress_count_p != NULL) { + *progress_count_p = (status == UCS_OK); + } + return status; +} + +unsigned uct_tcp_cm_conn_progress(uct_tcp_ep_t *ep) +{ + unsigned progress_count; + + if (!ucs_socket_is_connected(ep->fd)) { + ucs_error("tcp_ep %p: connection establishment for " + "socket fd %d was unsuccessful", ep, ep->fd); + goto err; + } + + uct_tcp_cm_conn_complete(ep, &progress_count); + return progress_count; + +err: + uct_tcp_ep_set_failed(ep); + return 0; +} + +ucs_status_t uct_tcp_cm_conn_start(uct_tcp_ep_t *ep) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + ucs_status_t status; + + if (ep->conn_retries++ > iface->config.max_conn_retries) { + ucs_error("tcp_ep %p: reached maximum number of connection retries " + "(%u)", ep, iface->config.max_conn_retries); + return UCS_ERR_TIMED_OUT; + } + + uct_tcp_cm_change_conn_state(ep, UCT_TCP_EP_CONN_STATE_CONNECTING); + + status = ucs_socket_connect(ep->fd, (const struct sockaddr*)&ep->peer_addr); + if (UCS_STATUS_IS_ERR(status)) { + return status; + } else if (status == UCS_INPROGRESS) { + uct_tcp_ep_mod_events(ep, UCS_EVENT_SET_EVWRITE, 0); + return UCS_OK; + } + + ucs_assert(status == UCS_OK); + + if (!iface->config.conn_nb) { + status = ucs_sys_fcntl_modfl(ep->fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + return status; + } + } + + return uct_tcp_cm_conn_complete(ep, NULL); +} + +/* This function is called from async thread */ +ucs_status_t uct_tcp_cm_handle_incoming_conn(uct_tcp_iface_t *iface, + const struct sockaddr_in *peer_addr, + int fd) +{ + char str_local_addr[UCS_SOCKADDR_STRING_LEN]; + char str_remote_addr[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + uct_tcp_ep_t *ep; + + if (!ucs_socket_is_connected(fd)) { + ucs_warn("tcp_iface %p: connection establishment for socket fd %d " + "from %s to %s was unsuccessful", iface, fd, + ucs_sockaddr_str((const struct sockaddr*)&peer_addr, + str_remote_addr, UCS_SOCKADDR_STRING_LEN), + ucs_sockaddr_str((const struct sockaddr*)&iface->config.ifaddr, + str_local_addr, UCS_SOCKADDR_STRING_LEN)); + return UCS_ERR_UNREACHABLE; + } + + status = uct_tcp_ep_init(iface, fd, NULL, &ep); + if (status != UCS_OK) { + return status; + } + + uct_tcp_cm_change_conn_state(ep, UCT_TCP_EP_CONN_STATE_RECV_MAGIC_NUMBER); + uct_tcp_ep_mod_events(ep, UCS_EVENT_SET_EVREAD, 0); + + ucs_debug("tcp_iface %p: accepted connection from " + "%s on %s to tcp_ep %p (fd %d)", iface, + ucs_sockaddr_str((const struct sockaddr*)peer_addr, + str_remote_addr, UCS_SOCKADDR_STRING_LEN), + ucs_sockaddr_str((const struct sockaddr*)&iface->config.ifaddr, + str_local_addr, UCS_SOCKADDR_STRING_LEN), + ep, fd); + return UCS_OK; +} diff --git a/src/uct/tcp/tcp_ep.c b/src/uct/tcp/tcp_ep.c new file mode 100644 index 0000000..a957329 --- /dev/null +++ b/src/uct/tcp/tcp_ep.c @@ -0,0 +1,1546 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "tcp.h" + +#include + + +/* Forward declarations */ +static unsigned uct_tcp_ep_progress_data_tx(uct_tcp_ep_t *ep); +static unsigned uct_tcp_ep_progress_data_rx(uct_tcp_ep_t *ep); +static unsigned uct_tcp_ep_progress_magic_number_rx(uct_tcp_ep_t *ep); + +const uct_tcp_cm_state_t uct_tcp_ep_cm_state[] = { + [UCT_TCP_EP_CONN_STATE_CLOSED] = { + .name = "CLOSED", + .tx_progress = (uct_tcp_ep_progress_t)ucs_empty_function_return_zero, + .rx_progress = (uct_tcp_ep_progress_t)ucs_empty_function_return_zero + }, + [UCT_TCP_EP_CONN_STATE_CONNECTING] = { + .name = "CONNECTING", + .tx_progress = uct_tcp_cm_conn_progress, + .rx_progress = uct_tcp_ep_progress_data_rx + }, + [UCT_TCP_EP_CONN_STATE_WAITING_ACK] = { + .name = "WAITING_ACK", + .tx_progress = (uct_tcp_ep_progress_t)ucs_empty_function_return_zero, + .rx_progress = uct_tcp_ep_progress_data_rx + }, + [UCT_TCP_EP_CONN_STATE_RECV_MAGIC_NUMBER] = { + .name = "RECV_MAGIC_NUMBER", + .tx_progress = (uct_tcp_ep_progress_t)ucs_empty_function_return_zero, + .rx_progress = uct_tcp_ep_progress_magic_number_rx + }, + [UCT_TCP_EP_CONN_STATE_ACCEPTING] = { + .name = "ACCEPTING", + .tx_progress = (uct_tcp_ep_progress_t)ucs_empty_function_return_zero, + .rx_progress = uct_tcp_ep_progress_data_rx + }, + [UCT_TCP_EP_CONN_STATE_WAITING_REQ] = { + .name = "WAITING_REQ", + .tx_progress = (uct_tcp_ep_progress_t)ucs_empty_function_return_zero, + .rx_progress = uct_tcp_ep_progress_data_rx + }, + [UCT_TCP_EP_CONN_STATE_CONNECTED] = { + .name = "CONNECTED", + .tx_progress = uct_tcp_ep_progress_data_tx, + .rx_progress = uct_tcp_ep_progress_data_rx + } +}; + +static inline int uct_tcp_ep_ctx_buf_empty(uct_tcp_ep_ctx_t *ctx) +{ + ucs_assert((ctx->length == 0) || (ctx->buf != NULL)); + + return ctx->length == 0; +} + +static inline int uct_tcp_ep_ctx_buf_need_progress(uct_tcp_ep_ctx_t *ctx) +{ + ucs_assert(ctx->offset <= ctx->length); + + return ctx->offset < ctx->length; +} + +static inline ucs_status_t uct_tcp_ep_check_tx_res(uct_tcp_ep_t *ep) +{ + if (ucs_unlikely(ep->conn_state != UCT_TCP_EP_CONN_STATE_CONNECTED)) { + if (ep->conn_state == UCT_TCP_EP_CONN_STATE_CLOSED) { + return UCS_ERR_UNREACHABLE; + } + + ucs_assertv((ep->conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING) || + (ep->conn_state == UCT_TCP_EP_CONN_STATE_WAITING_ACK) || + (ep->conn_state == UCT_TCP_EP_CONN_STATE_WAITING_REQ), + "ep=%p", ep); + return UCS_ERR_NO_RESOURCE; + } + + return uct_tcp_ep_ctx_buf_empty(&ep->tx) ? UCS_OK : UCS_ERR_NO_RESOURCE; +} + +static inline void uct_tcp_ep_ctx_rewind(uct_tcp_ep_ctx_t *ctx) +{ + ctx->offset = 0; + ctx->length = 0; +} + +static inline void uct_tcp_ep_ctx_init(uct_tcp_ep_ctx_t *ctx) +{ + ctx->put_sn = UINT32_MAX; + ctx->buf = NULL; + uct_tcp_ep_ctx_rewind(ctx); +} + +static inline void uct_tcp_ep_ctx_reset(uct_tcp_ep_ctx_t *ctx) +{ + ucs_mpool_put_inline(ctx->buf); + ctx->buf = NULL; + uct_tcp_ep_ctx_rewind(ctx); +} + +static void uct_tcp_ep_addr_cleanup(struct sockaddr_in *sock_addr) +{ + memset(sock_addr, 0, sizeof(*sock_addr)); +} + +static void uct_tcp_ep_addr_init(struct sockaddr_in *sock_addr, + const struct sockaddr_in *peer_addr) +{ + /* TODO: handle IPv4 and IPv6 */ + if (peer_addr == NULL) { + uct_tcp_ep_addr_cleanup(sock_addr); + } else { + *sock_addr = *peer_addr; + } +} + +static void uct_tcp_ep_close_fd(int *fd_p) +{ + if (*fd_p != -1) { + close(*fd_p); + *fd_p = -1; + } +} + +unsigned uct_tcp_ep_is_self(const uct_tcp_ep_t *ep) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + ucs_status_t status; + int cmp; + + cmp = ucs_sockaddr_cmp((const struct sockaddr*)&ep->peer_addr, + (const struct sockaddr*)&iface->config.ifaddr, + &status); + ucs_assertv(status == UCS_OK, "ep=%p", ep); + return !cmp; +} + +static void uct_tcp_ep_cleanup(uct_tcp_ep_t *ep) +{ + uct_tcp_ep_addr_cleanup(&ep->peer_addr); + + if (ep->tx.buf != NULL) { + uct_tcp_ep_ctx_reset(&ep->tx); + } + + if (ep->rx.buf != NULL) { + uct_tcp_ep_ctx_reset(&ep->rx); + } + + if (ep->events && (ep->fd != -1)) { + uct_tcp_ep_mod_events(ep, 0, ep->events); + } + + uct_tcp_ep_close_fd(&ep->fd); +} + +static UCS_CLASS_INIT_FUNC(uct_tcp_ep_t, uct_tcp_iface_t *iface, + int fd, const struct sockaddr_in *dest_addr) +{ + ucs_status_t status; + + ucs_assertv(fd >= 0, "iface=%p", iface); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super) + + uct_tcp_ep_addr_init(&self->peer_addr, dest_addr); + + uct_tcp_ep_ctx_init(&self->tx); + uct_tcp_ep_ctx_init(&self->rx); + + self->events = 0; + self->conn_retries = 0; + self->fd = fd; + self->ctx_caps = 0; + self->conn_state = UCT_TCP_EP_CONN_STATE_CLOSED; + + ucs_list_head_init(&self->list); + ucs_queue_head_init(&self->pending_q); + ucs_queue_head_init(&self->put_comp_q); + + /* Make a socket non-blocking if an EP is created during accepting + * a connection or non-blocking connection mode is requested */ + if ((dest_addr == NULL) || iface->config.conn_nb) { + status = ucs_sys_fcntl_modfl(self->fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto err_cleanup; + } + } + + status = uct_tcp_iface_set_sockopt(iface, self->fd); + if (status != UCS_OK) { + goto err_cleanup; + } + + uct_tcp_iface_add_ep(self); + + ucs_debug("tcp_ep %p: created on iface %p, fd %d", self, iface, self->fd); + return UCS_OK; + +err_cleanup: + /* need to be closed by this function caller */ + self->fd = -1; + uct_tcp_ep_cleanup(self); + return status; +} + +const char *uct_tcp_ep_ctx_caps_str(uint8_t ep_ctx_caps, char *str_buffer) +{ + ucs_snprintf_zero(str_buffer, UCT_TCP_EP_CTX_CAPS_STR_MAX, "[%s:%s]", + (ep_ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX)) ? + "Tx" : "-", + (ep_ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_RX)) ? + "Rx" : "-"); + return str_buffer; +} + +void uct_tcp_ep_change_ctx_caps(uct_tcp_ep_t *ep, uint8_t new_caps) +{ + char str_prev_ctx_caps[UCT_TCP_EP_CTX_CAPS_STR_MAX]; + char str_cur_ctx_caps[UCT_TCP_EP_CTX_CAPS_STR_MAX]; + + if (ep->ctx_caps != new_caps) { + ucs_trace("tcp_ep %p: ctx caps changed %s -> %s", ep, + uct_tcp_ep_ctx_caps_str(ep->ctx_caps, str_prev_ctx_caps), + uct_tcp_ep_ctx_caps_str(new_caps, str_cur_ctx_caps)); + ep->ctx_caps = new_caps; + } +} + +ucs_status_t uct_tcp_ep_add_ctx_cap(uct_tcp_ep_t *ep, + uct_tcp_ep_ctx_type_t cap) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + uint8_t prev_caps = ep->ctx_caps; + + uct_tcp_ep_change_ctx_caps(ep, ep->ctx_caps | UCS_BIT(cap)); + if (!uct_tcp_ep_is_self(ep) && (prev_caps != ep->ctx_caps)) { + if (!prev_caps) { + return uct_tcp_cm_add_ep(iface, ep); + } else if (ucs_test_all_flags(ep->ctx_caps, + (UCS_BIT(UCT_TCP_EP_CTX_TYPE_RX) | + UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX)))) { + uct_tcp_cm_remove_ep(iface, ep); + } + } + + return UCS_OK; +} + +ucs_status_t uct_tcp_ep_remove_ctx_cap(uct_tcp_ep_t *ep, + uct_tcp_ep_ctx_type_t cap) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + uint8_t prev_caps = ep->ctx_caps; + + uct_tcp_ep_change_ctx_caps(ep, ep->ctx_caps & ~UCS_BIT(cap)); + if (!uct_tcp_ep_is_self(ep)) { + if (ucs_test_all_flags(prev_caps, + (UCS_BIT(UCT_TCP_EP_CTX_TYPE_RX) | + UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX)))) { + return uct_tcp_cm_add_ep(iface, ep); + } else if (!ep->ctx_caps) { + uct_tcp_cm_remove_ep(iface, ep); + } + } + + return UCS_OK; +} + +ucs_status_t uct_tcp_ep_move_ctx_cap(uct_tcp_ep_t *from_ep, uct_tcp_ep_t *to_ep, + uct_tcp_ep_ctx_type_t ctx_cap) +{ + ucs_status_t status; + + status = uct_tcp_ep_remove_ctx_cap(from_ep, ctx_cap); + if (status != UCS_OK) { + return status; + } + + return uct_tcp_ep_add_ctx_cap(to_ep, ctx_cap); +} + +static UCS_CLASS_CLEANUP_FUNC(uct_tcp_ep_t) +{ + uct_tcp_iface_t UCS_V_UNUSED *iface = + ucs_derived_of(self->super.super.iface, uct_tcp_iface_t); + uct_tcp_ep_put_completion_t *put_comp; + + uct_tcp_ep_mod_events(self, 0, self->events); + + if (self->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX)) { + uct_tcp_ep_remove_ctx_cap(self, UCT_TCP_EP_CTX_TYPE_TX); + } + + if (self->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_RX)) { + uct_tcp_ep_remove_ctx_cap(self, UCT_TCP_EP_CTX_TYPE_RX); + } + + ucs_assertv(!self->ctx_caps, "ep=%p", self); + + ucs_queue_for_each_extract(put_comp, &self->put_comp_q, elem, 1) { + ucs_free(put_comp); + } + + uct_tcp_iface_remove_ep(self); + + if (self->conn_state != UCT_TCP_EP_CONN_STATE_CLOSED) { + uct_tcp_cm_change_conn_state(self, UCT_TCP_EP_CONN_STATE_CLOSED); + } + + uct_tcp_ep_cleanup(self); + + ucs_debug("tcp_ep %p: destroyed on iface %p", self, iface); +} + +UCS_CLASS_DEFINE(uct_tcp_ep_t, uct_base_ep_t); + +UCS_CLASS_DEFINE_NAMED_NEW_FUNC(uct_tcp_ep_init, uct_tcp_ep_t, uct_tcp_ep_t, + uct_tcp_iface_t*, int, + const struct sockaddr_in*) +UCS_CLASS_DEFINE_NAMED_DELETE_FUNC(uct_tcp_ep_destroy_internal, + uct_tcp_ep_t, uct_ep_t) + +void uct_tcp_ep_destroy(uct_ep_h tl_ep) +{ + uct_tcp_ep_t *ep = ucs_derived_of(tl_ep, uct_tcp_ep_t); + + if ((ep->conn_state == UCT_TCP_EP_CONN_STATE_CONNECTED) && + ucs_test_all_flags(ep->ctx_caps, + UCS_BIT(UCT_TCP_EP_CTX_TYPE_RX) | + UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX))) { + /* remove TX capability, but still will be able to receive data */ + uct_tcp_ep_remove_ctx_cap(ep, UCT_TCP_EP_CTX_TYPE_TX); + } else { + uct_tcp_ep_destroy_internal(tl_ep); + } +} + +void uct_tcp_ep_set_failed(uct_tcp_ep_t *ep) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + + if (ep->conn_state != UCT_TCP_EP_CONN_STATE_CLOSED) { + uct_tcp_cm_change_conn_state(ep, UCT_TCP_EP_CONN_STATE_CLOSED); + } + + uct_set_ep_failed(&UCS_CLASS_NAME(uct_tcp_ep_t), + &ep->super.super, &iface->super.super, + UCS_ERR_UNREACHABLE); +} + +static ucs_status_t +uct_tcp_ep_create_socket_and_connect(uct_tcp_iface_t *iface, + const struct sockaddr_in *dest_addr, + uct_tcp_ep_t **ep_p) +{ + uct_tcp_ep_t *ep = NULL; + ucs_status_t status; + int fd; + + /* if EP is already allocated, dest_addr can be NULL */ + ucs_assert((*ep_p != NULL) || (dest_addr != NULL)); + + status = ucs_socket_create(AF_INET, SOCK_STREAM, &fd); + if (status != UCS_OK) { + return status; + } + + if (*ep_p == NULL) { + status = uct_tcp_ep_init(iface, fd, dest_addr, &ep); + if (status != UCS_OK) { + goto err_close_fd; + } + + /* EP is responsible for this socket fd from now */ + fd = -1; + } else { + ep = *ep_p; + ep->fd = fd; + } + + status = uct_tcp_cm_conn_start(ep); + if (status != UCS_OK) { + goto err_ep_destroy; + } + + if (*ep_p == NULL) { + *ep_p = ep; + } + + return UCS_OK; + +err_ep_destroy: + if (*ep_p == NULL) { + uct_tcp_ep_destroy_internal(&ep->super.super); + } +err_close_fd: + /* fd has to be valid in case of valid EP has been + * passed to this function */ + ucs_assert((*ep_p == NULL) || (fd != -1)); + uct_tcp_ep_close_fd(&fd); + return status; +} + +static ucs_status_t uct_tcp_ep_create_connected(uct_tcp_iface_t *iface, + const struct sockaddr_in *dest_addr, + uct_tcp_ep_t **ep_p) +{ + ucs_status_t status; + + status = uct_tcp_ep_create_socket_and_connect(iface, dest_addr, ep_p); + if (status != UCS_OK) { + return status; + } + + status = uct_tcp_ep_add_ctx_cap(*ep_p, UCT_TCP_EP_CTX_TYPE_TX); + if (status != UCS_OK) { + goto err_ep_destroy; + } + + return UCS_OK; + +err_ep_destroy: + uct_tcp_ep_destroy_internal(&(*ep_p)->super.super); + return status; +} + +ucs_status_t uct_tcp_ep_create(const uct_ep_params_t *params, + uct_ep_h *ep_p) +{ + uct_tcp_iface_t *iface = ucs_derived_of(params->iface, uct_tcp_iface_t); + uct_tcp_ep_t *ep = NULL; + struct sockaddr_in dest_addr; + ucs_status_t status; + + UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params); + memset(&dest_addr, 0, sizeof(dest_addr)); + /* TODO: handle AF_INET6 */ + dest_addr.sin_family = AF_INET; + dest_addr.sin_port = *(in_port_t*)params->iface_addr; + dest_addr.sin_addr = *(struct in_addr*)params->dev_addr; + + do { + ep = uct_tcp_cm_search_ep(iface, &dest_addr, + UCT_TCP_EP_CTX_TYPE_RX); + if (ep) { + ucs_assert(!(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX))); + /* Found EP with RX ctx, try to send the connection request + * to the remote peer, if it successful - assign TX to this EP + * and return the EP to the user, otherwise - destroy this EP + * and try to search another EP w/o TX capability or create + * new EP */ + status = uct_tcp_cm_send_event(ep, UCT_TCP_CM_CONN_REQ); + if (status != UCS_OK) { + uct_tcp_ep_destroy_internal(&ep->super.super); + ep = NULL; + } else { + status = uct_tcp_ep_add_ctx_cap(ep, UCT_TCP_EP_CTX_TYPE_TX); + if (status != UCS_OK) { + return status; + } + } + } else { + status = uct_tcp_ep_create_connected(iface, &dest_addr, &ep); + break; + } + } while (ep == NULL); + + if (status == UCS_OK) { + /* cppcheck-suppress autoVariables */ + *ep_p = &ep->super.super; + } + return status; +} + +void uct_tcp_ep_mod_events(uct_tcp_ep_t *ep, int add, int remove) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + int old_events = ep->events; + int new_events = (ep->events | add) & ~remove; + ucs_status_t status; + + if (new_events != ep->events) { + ep->events = new_events; + ucs_trace("tcp_ep %p: set events to %c%c", ep, + (new_events & UCS_EVENT_SET_EVREAD) ? 'r' : '-', + (new_events & UCS_EVENT_SET_EVWRITE) ? 'w' : '-'); + if (new_events == 0) { + status = ucs_event_set_del(iface->event_set, ep->fd); + } else if (old_events != 0) { + status = ucs_event_set_mod(iface->event_set, ep->fd, + (ucs_event_set_type_t)ep->events, + (void *)ep); + } else { + status = ucs_event_set_add(iface->event_set, ep->fd, + (ucs_event_set_type_t)ep->events, + (void *)ep); + } + if (status != UCS_OK) { + ucs_fatal("unable to modify event set for tcp_ep %p (fd=%d)", ep, + ep->fd); + } + } +} + +static inline void uct_tcp_ep_handle_put_ack(uct_tcp_ep_t *ep, + uct_tcp_ep_put_ack_hdr_t *put_ack) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + uct_tcp_ep_put_completion_t *put_comp; + + if (put_ack->sn == ep->tx.put_sn) { + /* Since there are no other PUT operations in-flight, can remove flag + * and decrement iface outstanding operations counter */ + ucs_assert(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_TX_WAITING_ACK)); + ep->ctx_caps &= ~UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_TX_WAITING_ACK); + uct_tcp_iface_outstanding_dec(iface); + } + + ucs_queue_for_each_extract(put_comp, &ep->put_comp_q, elem, + (UCS_CIRCULAR_COMPARE32(put_comp->wait_put_sn, + <=, put_ack->sn))) { + uct_invoke_completion(put_comp->comp, UCS_OK); + ucs_free(put_comp); + } +} + +void uct_tcp_ep_pending_queue_dispatch(uct_tcp_ep_t *ep) +{ + uct_pending_req_priv_queue_t *priv; + + uct_pending_queue_dispatch(priv, &ep->pending_q, + uct_tcp_ep_ctx_buf_empty(&ep->tx)); + if (uct_tcp_ep_ctx_buf_empty(&ep->tx)) { + ucs_assert(ucs_queue_is_empty(&ep->pending_q)); + uct_tcp_ep_mod_events(ep, 0, UCS_EVENT_SET_EVWRITE); + } +} + +static void uct_tcp_ep_handle_disconnected(uct_tcp_ep_t *ep, + uct_tcp_ep_ctx_t *ctx) +{ + ucs_debug("tcp_ep %p: remote disconnected", ep); + + uct_tcp_ep_ctx_reset(ctx); + + if (ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_TX)) { + if (ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_RX)) { + uct_tcp_ep_remove_ctx_cap(ep, UCT_TCP_EP_CTX_TYPE_RX); + } + + uct_tcp_ep_mod_events(ep, 0, ep->events); + uct_tcp_ep_close_fd(&ep->fd); + } else if ((ep->ctx_caps == 0) || + (ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_RX))) { + /* If the EP supports RX only or no capabilities set, destroy it */ + uct_tcp_ep_destroy_internal(&ep->super.super); + } +} + +static inline ssize_t uct_tcp_ep_send(uct_tcp_ep_t *ep) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + size_t sent_length; + ucs_status_t status; + + ucs_assert(ep->tx.length > ep->tx.offset); + sent_length = ep->tx.length - ep->tx.offset; + + status = ucs_socket_send_nb(ep->fd, UCS_PTR_BYTE_OFFSET(ep->tx.buf, ep->tx.offset), + &sent_length, NULL, NULL); + if (ucs_unlikely((status != UCS_OK) && + (status != UCS_ERR_NO_PROGRESS))) { + return status; + } + + iface->outstanding -= sent_length; + ep->tx.offset += sent_length; + + ucs_assert(sent_length <= SSIZE_MAX); + + return sent_length; +} + +static inline void uct_tcp_ep_comp_zcopy(uct_tcp_ep_t *ep, + uct_completion_t *comp, + ucs_status_t status) +{ + ep->ctx_caps &= ~UCS_BIT(UCT_TCP_EP_CTX_TYPE_ZCOPY_TX); + if (comp != NULL) { + uct_invoke_completion(comp, status); + } +} + +static inline ssize_t uct_tcp_ep_sendv(uct_tcp_ep_t *ep) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + uct_tcp_ep_zcopy_tx_t *ctx = (uct_tcp_ep_zcopy_tx_t*)ep->tx.buf; + size_t sent_length; + ucs_status_t status; + + ucs_assertv(ep->tx.offset < ep->tx.length, "ep=%p", ep); + + status = ucs_socket_sendv_nb(ep->fd, &ctx->iov[ctx->iov_index], + ctx->iov_cnt - ctx->iov_index, + &sent_length, NULL, NULL); + + if (ucs_unlikely(status != UCS_OK)) { + if (status == UCS_ERR_NO_PROGRESS) { + ucs_assert(sent_length == 0); + return 0; + } + + uct_tcp_ep_comp_zcopy(ep, ctx->comp, status); + return status; + } + + ep->tx.offset += sent_length; + iface->outstanding -= sent_length; + + if (ep->tx.offset != ep->tx.length) { + ucs_iov_advance(ctx->iov, ctx->iov_cnt, + &ctx->iov_index, sent_length); + } else { + uct_tcp_ep_comp_zcopy(ep, ctx->comp, UCS_OK); + } + + ucs_assert(sent_length <= SSIZE_MAX); + return sent_length; +} + +ucs_status_t uct_tcp_ep_handle_dropped_connect(uct_tcp_ep_t *ep, int io_errno) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + ucs_status_t status; + + /* if connection establishment fails, the system limits + * may not be big enough */ + if (((ep->conn_state == UCT_TCP_EP_CONN_STATE_CONNECTING) || + (ep->conn_state == UCT_TCP_EP_CONN_STATE_WAITING_ACK) || + (ep->conn_state == UCT_TCP_EP_CONN_STATE_WAITING_REQ)) && + ((io_errno == ECONNRESET) || (io_errno == ECONNREFUSED) || + /* connection establishment procedure timed out */ + (io_errno == ETIMEDOUT))) { + uct_tcp_ep_mod_events(ep, 0, ep->events); + uct_tcp_ep_close_fd(&ep->fd); + + uct_tcp_cm_change_conn_state(ep, UCT_TCP_EP_CONN_STATE_CLOSED); + + status = uct_tcp_ep_create_socket_and_connect(iface, NULL, &ep); + if (status != UCS_OK) { + ucs_error("try to increase \"net.core.somaxconn\", " + "\"net.core.netdev_max_backlog\", " + "\"net.ipv4.tcp_max_syn_backlog\" to the maximum value " + "on the remote node or increase %s%s%s (=%u)", + UCS_CONFIG_PREFIX, UCT_TCP_CONFIG_PREFIX, + UCT_TCP_CONFIG_MAX_CONN_RETRIES, + iface->config.max_conn_retries); + } + + return status; + } + + return UCS_ERR_IO_ERROR; +} + +static ucs_status_t uct_tcp_ep_io_err_handler_cb(void *arg, int io_errno) +{ + uct_tcp_ep_t *ep = (uct_tcp_ep_t*)arg; + uct_tcp_iface_t UCS_V_UNUSED *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + char str_local_addr[UCS_SOCKADDR_STRING_LEN]; + char str_remote_addr[UCS_SOCKADDR_STRING_LEN]; + + if ((io_errno == ECONNRESET) && + ((ep->conn_state == UCT_TCP_EP_CONN_STATE_ACCEPTING) || + ((ep->conn_state == UCT_TCP_EP_CONN_STATE_CONNECTED) && + (ep->ctx_caps == UCS_BIT(UCT_TCP_EP_CTX_TYPE_RX)) /* only RX cap */))) { + ucs_debug("tcp_ep %p: detected %d (%s) error, the [%s <-> %s] " + "connection was dropped by the peer", + ep, io_errno, strerror(io_errno), + ucs_sockaddr_str((const struct sockaddr*)&iface->config.ifaddr, + str_local_addr, UCS_SOCKADDR_STRING_LEN), + ucs_sockaddr_str((const struct sockaddr*)&ep->peer_addr, + str_remote_addr, UCS_SOCKADDR_STRING_LEN)); + return UCS_OK; + } + + return uct_tcp_ep_handle_dropped_connect(ep, io_errno); +} + +static inline void uct_tcp_ep_handle_recv_err(uct_tcp_ep_t *ep, + ucs_status_t status) +{ + if (status == UCS_ERR_NO_PROGRESS) { + /* If no data were read to the allocated buffer, + * we can safely reset it for futher re-use and to + * avoid overwriting this buffer, because `rx::length == 0` */ + if (ep->rx.length == 0) { + uct_tcp_ep_ctx_reset(&ep->rx); + } + } else { + uct_tcp_ep_handle_disconnected(ep, &ep->rx); + } +} + +static inline unsigned uct_tcp_ep_recv(uct_tcp_ep_t *ep, size_t recv_length) +{ + uct_tcp_iface_t UCS_V_UNUSED *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + ucs_status_t status; + + ucs_assertv(recv_length, "ep=%p", ep); + + status = ucs_socket_recv_nb(ep->fd, UCS_PTR_BYTE_OFFSET(ep->rx.buf, + ep->rx.length), + &recv_length, uct_tcp_ep_io_err_handler_cb, ep); + if (ucs_unlikely(status != UCS_OK)) { + uct_tcp_ep_handle_recv_err(ep, status); + return 0; + } + + ucs_assertv(recv_length, "ep=%p", ep); + + ep->rx.length += recv_length; + ucs_trace_data("tcp_ep %p: recvd %zu bytes", ep, recv_length); + ucs_assert(ep->rx.length <= (iface->config.rx_seg_size * 2)); + + return 1; +} + +/* Forward declaration - the function depends on AM send + * functions implemented below */ +static void uct_tcp_ep_post_put_ack(uct_tcp_ep_t *ep); + +static unsigned uct_tcp_ep_progress_data_tx(uct_tcp_ep_t *ep) +{ + unsigned ret = 0; + ssize_t offset; + + ucs_trace_func("ep=%p", ep); + + if (uct_tcp_ep_ctx_buf_need_progress(&ep->tx)) { + offset = (!(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_ZCOPY_TX)) ? + uct_tcp_ep_send(ep) : uct_tcp_ep_sendv(ep)); + if (ucs_unlikely(offset < 0)) { + uct_tcp_ep_handle_disconnected(ep, &ep->tx); + return 1; + } + + ret = (offset > 0); + + ucs_trace_data("ep %p fd %d sent %zu/%zu bytes, moved by offset %zd", + ep, ep->fd, ep->tx.offset, ep->tx.length, offset); + + if (!uct_tcp_ep_ctx_buf_need_progress(&ep->tx)) { + uct_tcp_ep_ctx_reset(&ep->tx); + } + } + + if (ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX_SENDING_ACK)) { + uct_tcp_ep_post_put_ack(ep); + } + + if (!ucs_queue_is_empty(&ep->pending_q)) { + uct_tcp_ep_pending_queue_dispatch(ep); + return ret; + } + + if (uct_tcp_ep_ctx_buf_empty(&ep->tx)) { + ucs_assert(ucs_queue_is_empty(&ep->pending_q)); + uct_tcp_ep_mod_events(ep, 0, UCS_EVENT_SET_EVWRITE); + } + + return ret; +} + +static inline void +uct_tcp_ep_comp_recv_am(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep, + uct_tcp_am_hdr_t *hdr) +{ + uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, hdr->am_id, + hdr + 1, hdr->length, + "RECV: ep %p fd %d received %zu/%zu bytes", + ep, ep->fd, ep->rx.offset, ep->rx.length); + uct_iface_invoke_am(&iface->super, hdr->am_id, hdr + 1, hdr->length, 0); +} + +static inline ucs_status_t +uct_tcp_ep_put_rx_advance(uct_tcp_ep_t *ep, uct_tcp_ep_put_req_hdr_t *put_req, + size_t recv_length) +{ + ucs_assert(!(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX_SENDING_ACK))); + ucs_assert(recv_length <= put_req->length); + put_req->addr += recv_length; + put_req->length -= recv_length; + + if (!put_req->length) { + uct_tcp_ep_post_put_ack(ep); + + /* EP's ctx_caps doesn't have UCT_TCP_EP_CTX_TYPE_PUT_RX flag + * set in case of entire PUT payload was received through + * AM protocol */ + if (ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX)) { + ep->ctx_caps &= ~UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX); + uct_tcp_ep_ctx_reset(&ep->rx); + } + + return UCS_OK; + } + + return UCS_INPROGRESS; +} + +static inline void uct_tcp_ep_handle_put_req(uct_tcp_ep_t *ep, + uct_tcp_ep_put_req_hdr_t *put_req, + size_t extra_recvd_length) +{ + size_t copied_length; + ucs_status_t status; + + ucs_assert(put_req->addr || !put_req->length); + + copied_length = ucs_min(put_req->length, extra_recvd_length); + memcpy((void*)(uintptr_t)put_req->addr, + UCS_PTR_BYTE_OFFSET(ep->rx.buf, ep->rx.offset), + copied_length); + ep->rx.offset += copied_length; + ep->rx.put_sn = put_req->sn; + + /* Remove the flag that indicates that EP is sending PUT RX ACK in order + * to not ack the uncompleted PUT RX operation for which PUT REQ is being + * handled here. ACK for both operations will be sent after the completion + * of the last received PUT operation */ + ep->ctx_caps &= ~UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX_SENDING_ACK); + + status = uct_tcp_ep_put_rx_advance(ep, put_req, copied_length); + if (status == UCS_OK) { + return; + } + + ucs_assert(ep->rx.offset == ep->rx.length); + uct_tcp_ep_ctx_rewind(&ep->rx); + /* Since RX buffer and PUT request can be ovelapped, use memmove() */ + memmove(ep->rx.buf, put_req, sizeof(*put_req)); + ep->ctx_caps |= UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX); +} + +static unsigned uct_tcp_ep_progress_am_rx(uct_tcp_ep_t *ep) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + unsigned handled = 0; + uct_tcp_am_hdr_t *hdr; + size_t recv_length; + size_t remainder; + + ucs_trace_func("ep=%p", ep); + + if (!uct_tcp_ep_ctx_buf_need_progress(&ep->rx)) { + ucs_assert(ep->rx.buf == NULL); + ep->rx.buf = ucs_mpool_get_inline(&iface->rx_mpool); + if (ucs_unlikely(ep->rx.buf == NULL)) { + ucs_warn("tcp_ep %p: unable to get a buffer from RX memory pool", ep); + return 0; + } + + /* post the entire AM buffer */ + recv_length = iface->config.rx_seg_size; + } else if (ep->rx.length < sizeof(*hdr)) { + ucs_assert((ep->rx.buf != NULL) && (ep->rx.offset == 0)); + + /* do partial receive of the remaining part of the hdr + * and post the entire AM buffer */ + recv_length = iface->config.rx_seg_size - ep->rx.length; + } else { + ucs_assert((ep->rx.buf != NULL) && + ((ep->rx.length - ep->rx.offset) >= sizeof(*hdr))); + + /* do partial receive of the remaining user data */ + hdr = UCS_PTR_BYTE_OFFSET(ep->rx.buf, ep->rx.offset); + recv_length = hdr->length - (ep->rx.length - ep->rx.offset - sizeof(*hdr)); + } + + if (!uct_tcp_ep_recv(ep, recv_length)) { + goto out; + } + + /* Parse received active messages */ + while (uct_tcp_ep_ctx_buf_need_progress(&ep->rx)) { + remainder = ep->rx.length - ep->rx.offset; + if (remainder < sizeof(*hdr)) { + /* Move the partially received hdr to the beginning of the buffer */ + memmove(ep->rx.buf, UCS_PTR_BYTE_OFFSET(ep->rx.buf, ep->rx.offset), + remainder); + ep->rx.offset = 0; + ep->rx.length = remainder; + handled++; + goto out; + } + + hdr = UCS_PTR_BYTE_OFFSET(ep->rx.buf, ep->rx.offset); + ucs_assertv(hdr->length <= (iface->config.rx_seg_size - sizeof(*hdr)), + "tcp_ep %p (conn state - %s): %u vs %zu", + ep, uct_tcp_ep_cm_state[ep->conn_state].name, hdr->length, + (iface->config.rx_seg_size - sizeof(*hdr))); + + if (remainder < (sizeof(*hdr) + hdr->length)) { + handled++; + goto out; + } + + /* Full message was received */ + ep->rx.offset += sizeof(*hdr) + hdr->length; + ucs_assert(ep->rx.offset <= ep->rx.length); + + if (ucs_likely(hdr->am_id < UCT_AM_ID_MAX)) { + uct_tcp_ep_comp_recv_am(iface, ep, hdr); + handled++; + } else if (hdr->am_id == UCT_TCP_EP_PUT_REQ_AM_ID) { + ucs_assert(hdr->length == sizeof(uct_tcp_ep_put_req_hdr_t)); + uct_tcp_ep_handle_put_req(ep, (uct_tcp_ep_put_req_hdr_t*)(hdr + 1), + ep->rx.length - ep->rx.offset); + handled++; + if (ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX)) { + /* It means that PUT RX is in progress and EP RX buffer + * is used to keep PUT header. So, we don't need to + * release a EP RX buffer */ + goto out; + } + } else if (hdr->am_id == UCT_TCP_EP_PUT_ACK_AM_ID) { + ucs_assert(hdr->length == sizeof(uint32_t)); + uct_tcp_ep_handle_put_ack(ep, (uct_tcp_ep_put_ack_hdr_t*)(hdr + 1)); + handled++; + } else { + ucs_assert(hdr->am_id == UCT_TCP_EP_CM_AM_ID); + handled += 1 + uct_tcp_cm_handle_conn_pkt(&ep, hdr + 1, hdr->length); + /* coverity[check_after_deref] */ + if (ep == NULL) { + goto out; + } + } + + ucs_assert(ep != NULL); + } + + uct_tcp_ep_ctx_reset(&ep->rx); + +out: + return handled; +} + +static inline ucs_status_t +uct_tcp_ep_am_prepare(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep, + uint8_t am_id, uct_tcp_am_hdr_t **hdr) +{ + ucs_status_t status; + + status = uct_tcp_ep_check_tx_res(ep); + if (ucs_unlikely(status != UCS_OK)) { + if (ucs_likely(status == UCS_ERR_NO_RESOURCE)) { + goto err_no_res; + } + return status; + } + + ucs_assertv(ep->tx.buf == NULL, "ep=%p", ep); + + ep->tx.buf = ucs_mpool_get_inline(&iface->tx_mpool); + if (ucs_unlikely(ep->tx.buf == NULL)) { + goto err_no_res; + } + + *hdr = ep->tx.buf; + (*hdr)->am_id = am_id; + + return UCS_OK; + +err_no_res: + if (ep->fd != -1) { + uct_tcp_ep_mod_events(ep, UCS_EVENT_SET_EVWRITE, 0); + } + UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1); + return UCS_ERR_NO_RESOURCE; +} + +static unsigned uct_tcp_ep_progress_put_rx(uct_tcp_ep_t *ep) +{ + uct_tcp_ep_put_req_hdr_t *put_req; + size_t recv_length; + ucs_status_t status; + + put_req = (uct_tcp_ep_put_req_hdr_t*)ep->rx.buf; + recv_length = put_req->length; + status = ucs_socket_recv_nb(ep->fd, (void*)(uintptr_t)put_req->addr, + &recv_length, + uct_tcp_ep_io_err_handler_cb, ep); + if (ucs_unlikely(status != UCS_OK)) { + uct_tcp_ep_handle_recv_err(ep, status); + return 0; + } + + ucs_assertv(recv_length, "ep=%p", ep); + + uct_tcp_ep_put_rx_advance(ep, put_req, recv_length); + + return 1; +} + +static unsigned uct_tcp_ep_progress_data_rx(uct_tcp_ep_t *ep) +{ + if (!(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX))) { + return uct_tcp_ep_progress_am_rx(ep); + } else { + return uct_tcp_ep_progress_put_rx(ep); + } +} + +static unsigned uct_tcp_ep_progress_magic_number_rx(uct_tcp_ep_t *ep) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + char str_local_addr[UCS_SOCKADDR_STRING_LEN]; + char str_remote_addr[UCS_SOCKADDR_STRING_LEN]; + size_t recv_length, prev_length; + uint64_t magic_number; + + if (ep->rx.buf == NULL) { + ep->rx.buf = ucs_mpool_get_inline(&iface->rx_mpool); + if (ucs_unlikely(ep->rx.buf == NULL)) { + ucs_warn("tcp_ep %p: unable to get a buffer from RX memory pool", ep); + return 0; + } + } + + prev_length = ep->rx.length; + recv_length = sizeof(magic_number) - ep->rx.length; + + if (!uct_tcp_ep_recv(ep, recv_length) || + (ep->rx.length < sizeof(magic_number))) { + return ((ep->rx.length - prev_length) > 0); + } + + magic_number = *(uint64_t*)ep->rx.buf; + + if (magic_number != UCT_TCP_MAGIC_NUMBER) { + /* Silently close this connection and destroy its EP */ + ucs_debug("tcp_iface %p (%s): received wrong magic number (expected: " + "%zu, received: %zu) for ep=%p (fd=%d) from %s", iface, + ucs_sockaddr_str((const struct sockaddr*)&iface->config.ifaddr, + str_local_addr, UCS_SOCKADDR_STRING_LEN), + UCT_TCP_MAGIC_NUMBER, magic_number, ep, + ep->fd, ucs_socket_getname_str(ep->fd, str_remote_addr, + UCS_SOCKADDR_STRING_LEN)); + goto err; + } + + uct_tcp_ep_ctx_reset(&ep->rx); + + uct_tcp_cm_change_conn_state(ep, UCT_TCP_EP_CONN_STATE_ACCEPTING); + + return 1; + +err: + uct_tcp_ep_destroy_internal(&ep->super.super); + return 0; +} + +static inline void +uct_tcp_ep_set_outstanding_zcopy(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep, + uct_tcp_ep_zcopy_tx_t *ctx, const void *header, + unsigned header_length, uct_completion_t *comp) +{ + ctx->comp = comp; + ep->ctx_caps |= UCS_BIT(UCT_TCP_EP_CTX_TYPE_ZCOPY_TX); + + if ((header_length != 0) && + /* check whether a user's header was sent or not */ + (ep->tx.offset < (sizeof(uct_tcp_am_hdr_t) + header_length))) { + ucs_assert(header_length <= iface->config.zcopy.max_hdr); + /* if the user's header wasn't sent completely, copy it to + * the EP TX buffer (after Zcopy context and IOVs) for + * retransmission. iov_len is already set to the proper value */ + ctx->iov[1].iov_base = UCS_PTR_BYTE_OFFSET(ep->tx.buf, + iface->config.zcopy.hdr_offset); + memcpy(ctx->iov[1].iov_base, header, header_length); + } + + ctx->iov_index = 0; + ucs_iov_advance(ctx->iov, ctx->iov_cnt, &ctx->iov_index, ep->tx.offset); + uct_tcp_ep_mod_events(ep, UCS_EVENT_SET_EVWRITE, 0); +} + +static inline ucs_status_t +uct_tcp_ep_am_send(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep, + const uct_tcp_am_hdr_t *hdr) +{ + ssize_t offset; + + ep->tx.length = sizeof(*hdr) + hdr->length; + iface->outstanding += ep->tx.length; + + offset = uct_tcp_ep_send(ep); + if (ucs_unlikely(offset < 0)) { + return (ucs_status_t)offset; + } + + uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_SEND, hdr->am_id, + hdr + 1, hdr->length, "SEND: ep %p fd %d sent " + "%zu/%zu bytes, moved by offset %zd", + ep, ep->fd, ep->tx.offset, ep->tx.length, offset); + + if (ucs_likely(!uct_tcp_ep_ctx_buf_need_progress(&ep->tx))) { + uct_tcp_ep_ctx_reset(&ep->tx); + } else { + uct_tcp_ep_mod_events(ep, UCS_EVENT_SET_EVWRITE, 0); + } + + return UCS_OK; +} + +static const void* +uct_tcp_ep_am_sendv_get_trace_payload(uct_tcp_am_hdr_t *hdr, + const void *header, + const struct iovec *payload_iov, + int short_sendv) +{ + if (!short_sendv) { + return header; + } + + /* If user requested trace data, we copy header and payload + * to EP TX buffer in order to trace correct data */ + uct_am_short_fill_data(hdr + 1, *(const uint64_t*)header, + payload_iov->iov_base, payload_iov->iov_len); + return (hdr + 1); +} + +static inline ucs_status_t +uct_tcp_ep_am_sendv(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep, + int short_sendv, uct_tcp_am_hdr_t *hdr, + size_t send_limit, const void *header, + struct iovec *iov, size_t iov_cnt) +{ + ucs_status_t status; + + ep->tx.length += hdr->length + sizeof(*hdr); + + ucs_assertv(ep->tx.length <= send_limit, "ep=%p", ep); + + status = ucs_socket_sendv_nb(ep->fd, iov, iov_cnt, + &ep->tx.offset, NULL, NULL); + + uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_SEND, hdr->am_id, + /* the function will be invoked only in case of + * data tracing is enabled */ + uct_tcp_ep_am_sendv_get_trace_payload(hdr, header, + &iov[2], short_sendv), + hdr->length, "SEND: ep %p fd %d sent %zu/%zu bytes, " + "moved by offset %zu, iov cnt %zu " + "[addr %p len %zu] [addr %p len %zu]", + ep, ep->fd, ep->tx.offset, ep->tx.length, + ep->tx.offset, iov_cnt, + /* print user-defined header or + * first iovec with a payload */ + ((iov_cnt > 1) ? iov[1].iov_base : NULL), + ((iov_cnt > 1) ? iov[1].iov_len : 0), + /* print first/second iovec with a payload */ + ((iov_cnt > 2) ? iov[2].iov_base : NULL), + ((iov_cnt > 2) ? iov[2].iov_len : 0)); + + iface->outstanding += ep->tx.length - ep->tx.offset; + + return status; +} + +static void uct_tcp_ep_post_put_ack(uct_tcp_ep_t *ep) +{ + uct_tcp_am_hdr_t *hdr = NULL; + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + uct_tcp_ep_put_ack_hdr_t *put_ack; + ucs_status_t status; + + /* Make sure that we are sending nothing through this EP at the moment. + * This check is needed to avoid mixing AM/PUT data sent from this EP + * and this PUT ACK message */ + status = uct_tcp_ep_am_prepare(iface, ep, + UCT_TCP_EP_PUT_ACK_AM_ID, &hdr); + if (status != UCS_OK) { + if (status == UCS_ERR_NO_RESOURCE) { + ep->ctx_caps |= UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX_SENDING_ACK); + } else { + ucs_error("tcp_ep %p: failed to prepare AM data", ep); + } + return; + } + + /* Send PUT ACK to confirm completing PUT operations with + * the last received sequence number == ep::rx::put_sn */ + ucs_assertv(hdr != NULL, "ep=%p", ep); + hdr->length = sizeof(*put_ack); + put_ack = (uct_tcp_ep_put_ack_hdr_t*)(hdr + 1); + put_ack->sn = ep->rx.put_sn; + + uct_tcp_ep_am_send(iface, ep, hdr); + + /* If sending PUT ACK was OK, always remove SENDING ACK flag + * as the function can be called from outstanding progress */ + ep->ctx_caps &= ~UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_RX_SENDING_ACK); +} + +ucs_status_t uct_tcp_ep_am_short(uct_ep_h uct_ep, uint8_t am_id, uint64_t header, + const void *payload, unsigned length) +{ + uct_tcp_ep_t *ep = ucs_derived_of(uct_ep, uct_tcp_ep_t); + uct_tcp_iface_t *iface = ucs_derived_of(uct_ep->iface, uct_tcp_iface_t); + uct_tcp_am_hdr_t *hdr = NULL; + struct iovec iov[UCT_TCP_EP_AM_SHORTV_IOV_COUNT]; + uint32_t payload_length; + size_t offset; + ucs_status_t status; + + UCT_CHECK_LENGTH(length + sizeof(header), 0, + iface->config.tx_seg_size - sizeof(uct_tcp_am_hdr_t), + "am_short"); + UCT_CHECK_AM_ID(am_id); + + status = uct_tcp_ep_am_prepare(iface, ep, am_id, &hdr); + if (status != UCS_OK) { + return status; + } + + ucs_assertv(hdr != NULL, "ep=%p", ep); + + /* Save the length of the payload, because hdr (ep::buf) + * can be released inside `uct_tcp_ep_am_send` call */ + hdr->length = payload_length = length + sizeof(header); + + if (length <= iface->config.sendv_thresh) { + uct_am_short_fill_data(hdr + 1, header, payload, length); + status = uct_tcp_ep_am_send(iface, ep, hdr); + if (ucs_unlikely(status != UCS_OK)) { + uct_tcp_ep_ctx_reset(&ep->tx); + return status; + } + + UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, payload_length); + } else { + iov[0].iov_base = hdr; + iov[0].iov_len = sizeof(*hdr); + + iov[1].iov_base = &header; + iov[1].iov_len = sizeof(header); + + iov[2].iov_base = (void*)payload; + iov[2].iov_len = length; + + status = uct_tcp_ep_am_sendv(iface, ep, 1, hdr, + iface->config.tx_seg_size, &header, + iov, UCT_TCP_EP_AM_SHORTV_IOV_COUNT); + if ((status == UCS_OK) || (status == UCS_ERR_NO_PROGRESS)) { + UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, payload_length); + + if (uct_tcp_ep_ctx_buf_need_progress(&ep->tx)) { + /* Copy only user's header and payload to the TX buffer, + * TCP AM header is placed at the beginning of the buffer */ + offset = ((ep->tx.offset >= sizeof(*hdr)) ? + (ep->tx.offset - sizeof(*hdr)) : 0); + + ucs_iov_copy(&iov[1], UCT_TCP_EP_AM_SHORTV_IOV_COUNT - 1, + offset, UCS_PTR_BYTE_OFFSET(hdr + 1, offset), + ep->tx.length - sizeof(*hdr) - offset, + UCS_IOV_COPY_TO_BUF); + uct_tcp_ep_mod_events(ep, UCS_EVENT_SET_EVWRITE, 0); + return UCS_OK; + } + + ucs_assert(status == UCS_OK); + } + + uct_tcp_ep_ctx_reset(&ep->tx); + } + + return status; +} + +ssize_t uct_tcp_ep_am_bcopy(uct_ep_h uct_ep, uint8_t am_id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags) +{ + uct_tcp_ep_t *ep = ucs_derived_of(uct_ep, uct_tcp_ep_t); + uct_tcp_iface_t *iface = ucs_derived_of(uct_ep->iface, uct_tcp_iface_t); + uct_tcp_am_hdr_t *hdr = NULL; + uint32_t payload_length; + ucs_status_t status; + + UCT_CHECK_AM_ID(am_id); + + status = uct_tcp_ep_am_prepare(iface, ep, am_id, &hdr); + if (status != UCS_OK) { + return status; + } + + ucs_assertv(hdr != NULL, "ep=%p", ep); + + /* Save the length of the payload, because hdr (ep::buf) + * can be released inside `uct_tcp_ep_am_send` call */ + hdr->length = payload_length = pack_cb(hdr + 1, arg); + + status = uct_tcp_ep_am_send(iface, ep, hdr); + if (ucs_unlikely(status != UCS_OK)) { + uct_tcp_ep_ctx_reset(&ep->tx); + return status; + } + + UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, payload_length); + + return payload_length; +} + +static inline ucs_status_t +uct_tcp_ep_prepare_zcopy(uct_tcp_iface_t *iface, uct_tcp_ep_t *ep, uint8_t am_id, + const void *header, unsigned header_length, + const uct_iov_t *iov, size_t iovcnt, const char *name, + size_t *zcopy_payload_p, uct_tcp_ep_zcopy_tx_t **ctx_p) +{ + uct_tcp_am_hdr_t *hdr = NULL; + uct_tcp_ep_zcopy_tx_t *ctx; + ucs_status_t status; + + UCT_CHECK_IOV_SIZE(iovcnt, iface->config.zcopy.max_iov, name); + UCT_CHECK_LENGTH(header_length, 0, iface->config.zcopy.max_hdr, name); + + status = uct_tcp_ep_am_prepare(iface, ep, am_id, &hdr); + if (ucs_unlikely(status != UCS_OK)) { + return status; + } + + ucs_assertv(hdr != NULL, "ep=%p", ep); + + ctx = ucs_derived_of(hdr, uct_tcp_ep_zcopy_tx_t); + ctx->iov_cnt = 0; + + /* TCP transport header */ + ctx->iov[ctx->iov_cnt].iov_base = hdr; + ctx->iov[ctx->iov_cnt].iov_len = sizeof(*hdr); + ctx->iov_cnt++; + + /* User-defined or TCP internal protocol header */ + if (header_length != 0) { + ucs_assert(header != NULL); + ctx->iov[ctx->iov_cnt].iov_base = (void*)header; + ctx->iov[ctx->iov_cnt].iov_len = header_length; + ctx->iov_cnt++; + } + + /* User-defined payload */ + ctx->iov_cnt += uct_iovec_fill_iov(&ctx->iov[ctx->iov_cnt], iov, + iovcnt, zcopy_payload_p); + + *ctx_p = ctx; + + return UCS_OK; +} + +ucs_status_t uct_tcp_ep_am_zcopy(uct_ep_h uct_ep, uint8_t am_id, const void *header, + unsigned header_length, const uct_iov_t *iov, + size_t iovcnt, unsigned flags, + uct_completion_t *comp) +{ + uct_tcp_ep_t *ep = ucs_derived_of(uct_ep, uct_tcp_ep_t); + uct_tcp_iface_t *iface = ucs_derived_of(uct_ep->iface, uct_tcp_iface_t); + uct_tcp_ep_zcopy_tx_t *ctx = NULL; + size_t payload_length; + ucs_status_t status; + + UCT_CHECK_LENGTH(header_length + uct_iov_total_length(iov, iovcnt), 0, + iface->config.rx_seg_size - sizeof(uct_tcp_am_hdr_t), + "am_zcopy"); + UCT_CHECK_AM_ID(am_id); + + status = uct_tcp_ep_prepare_zcopy(iface, ep, am_id, header, header_length, + iov, iovcnt, "am_zcopy", &payload_length, + &ctx); + if (ucs_unlikely(status != UCS_OK)) { + return status; + } + + ctx->super.length = payload_length + header_length; + + status = uct_tcp_ep_am_sendv(iface, ep, 0, &ctx->super, + iface->config.rx_seg_size, + header, ctx->iov, ctx->iov_cnt); + if (ucs_unlikely((status != UCS_OK) && (status != UCS_ERR_NO_PROGRESS))) { + goto out; + } + + UCT_TL_EP_STAT_OP(&ep->super, AM, ZCOPY, ctx->super.length); + + if (uct_tcp_ep_ctx_buf_need_progress(&ep->tx)) { + uct_tcp_ep_set_outstanding_zcopy(iface, ep, ctx, header, + header_length, comp); + return UCS_INPROGRESS; + } + + ucs_assert(status == UCS_OK); + +out: + uct_tcp_ep_ctx_reset(&ep->tx); + return status; +} + +ucs_status_t uct_tcp_ep_put_zcopy(uct_ep_h uct_ep, const uct_iov_t *iov, + size_t iovcnt, uint64_t remote_addr, + uct_rkey_t rkey, uct_completion_t *comp) +{ + uct_tcp_ep_t *ep = ucs_derived_of(uct_ep, uct_tcp_ep_t); + uct_tcp_iface_t *iface = ucs_derived_of(uct_ep->iface, uct_tcp_iface_t); + uct_tcp_ep_zcopy_tx_t *ctx = NULL; + uct_tcp_ep_put_req_hdr_t put_req = {0}; /* Suppress Cppcheck false-positive */ + ucs_status_t status; + + UCT_CHECK_LENGTH(sizeof(put_req) + uct_iov_total_length(iov, iovcnt), 0, + UCT_TCP_EP_PUT_ZCOPY_MAX - sizeof(uct_tcp_am_hdr_t), + "put_zcopy"); + + status = uct_tcp_ep_prepare_zcopy(iface, ep, UCT_TCP_EP_PUT_REQ_AM_ID, + &put_req, sizeof(put_req), + iov, iovcnt, "put_zcopy", + /* Set a payload length directly to the + * TX length, since PUT Zcopy doesn't + * set the payload length to TCP AM hdr */ + &ep->tx.length, &ctx); + if (ucs_unlikely(status != UCS_OK)) { + return status; + } + + ctx->super.length = sizeof(put_req); + put_req.addr = remote_addr; + put_req.length = ep->tx.length; + put_req.sn = ep->tx.put_sn + 1; + + status = uct_tcp_ep_am_sendv(iface, ep, 0, &ctx->super, UCT_TCP_EP_PUT_ZCOPY_MAX, + &put_req, ctx->iov, ctx->iov_cnt); + if (ucs_unlikely((status != UCS_OK) && (status != UCS_ERR_NO_PROGRESS))) { + goto out; + } + + ep->tx.put_sn++; + + if (!(ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_TX_WAITING_ACK))) { + /* Add UCT_TCP_EP_CTX_TYPE_PUT_TX_WAITING_ACK flag and increment iface + * outstanding operations counter in order to ensure returning + * UCS_INPROGRESS from flush functions and do progressing. + * UCT_TCP_EP_CTX_TYPE_PUT_TX_WAITING_ACK flag has to be removed upon PUT + * ACK message receiving if there are no other PUT operations in-flight */ + ep->ctx_caps |= UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_TX_WAITING_ACK); + uct_tcp_iface_outstanding_inc(iface); + } + + UCT_TL_EP_STAT_OP(&ep->super, PUT, ZCOPY, put_req.length); + + if (uct_tcp_ep_ctx_buf_need_progress(&ep->tx)) { + uct_tcp_ep_set_outstanding_zcopy(iface, ep, ctx, &put_req, + sizeof(put_req), comp); + return UCS_INPROGRESS; + } + + ucs_assert(status == UCS_OK); + +out: + uct_tcp_ep_ctx_reset(&ep->tx); + return status; +} + +ucs_status_t uct_tcp_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *req, + unsigned flags) +{ + uct_tcp_ep_t *ep = ucs_derived_of(tl_ep, uct_tcp_ep_t); + + if (uct_tcp_ep_check_tx_res(ep) == UCS_OK) { + return UCS_ERR_BUSY; + } + + uct_pending_req_queue_push(&ep->pending_q, req); + UCT_TL_EP_STAT_PEND(&ep->super); + return UCS_OK; +} + +void uct_tcp_ep_pending_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, + void *arg) +{ + uct_tcp_ep_t *ep = ucs_derived_of(tl_ep, uct_tcp_ep_t); + uct_pending_req_priv_queue_t UCS_V_UNUSED *priv; + + uct_pending_queue_purge(priv, &ep->pending_q, 1, cb, arg); +} + +ucs_status_t uct_tcp_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + uct_tcp_ep_t *ep = ucs_derived_of(tl_ep, uct_tcp_ep_t); + uct_tcp_ep_put_completion_t *put_comp; + + if (uct_tcp_ep_check_tx_res(ep) == UCS_ERR_NO_RESOURCE) { + UCT_TL_EP_STAT_FLUSH_WAIT(&ep->super); + return UCS_ERR_NO_RESOURCE; + } + + if (ep->ctx_caps & UCS_BIT(UCT_TCP_EP_CTX_TYPE_PUT_TX_WAITING_ACK)) { + if (comp != NULL) { + put_comp = ucs_calloc(1, sizeof(*put_comp), "put completion"); + if (put_comp == NULL) { + return UCS_ERR_NO_MEMORY; + } + + put_comp->wait_put_sn = ep->tx.put_sn; + put_comp->comp = comp; + ucs_queue_push(&ep->put_comp_q, &put_comp->elem); + } + + return UCS_INPROGRESS; + } + + UCT_TL_EP_STAT_FLUSH(&ep->super); + return UCS_OK; +} + diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c new file mode 100644 index 0000000..01efc85 --- /dev/null +++ b/src/uct/tcp/tcp_iface.c @@ -0,0 +1,675 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "tcp.h" + +#include +#include +#include +#include +#include +#include +#include + + +static ucs_config_field_t uct_tcp_iface_config_table[] = { + {"", "MAX_NUM_EPS=256", NULL, + ucs_offsetof(uct_tcp_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + {"TX_SEG_SIZE", "8kb", + "Size of send copy-out buffer", + ucs_offsetof(uct_tcp_iface_config_t, tx_seg_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"RX_SEG_SIZE", "64kb", + "Size of receive copy-out buffer", + ucs_offsetof(uct_tcp_iface_config_t, rx_seg_size), UCS_CONFIG_TYPE_MEMUNITS}, + + {"MAX_IOV", "6", + "Maximum IOV count that can contain user-defined payload in a single\n" + "call to non-blocking vector socket send", + ucs_offsetof(uct_tcp_iface_config_t, max_iov), UCS_CONFIG_TYPE_ULONG}, + + {"SENDV_THRESH", "2kb", + "Threshold for switching from send() to sendmsg() for short active messages", + ucs_offsetof(uct_tcp_iface_config_t, sendv_thresh), UCS_CONFIG_TYPE_MEMUNITS}, + + {"PREFER_DEFAULT", "y", + "Give higher priority to the default network interface on the host", + ucs_offsetof(uct_tcp_iface_config_t, prefer_default), UCS_CONFIG_TYPE_BOOL}, + + {"PUT_ENABLE", "y", + "Enable PUT Zcopy support", + ucs_offsetof(uct_tcp_iface_config_t, put_enable), UCS_CONFIG_TYPE_BOOL}, + + {"CONN_NB", "n", + "Enable non-blocking connection establishment. It may improve startup " + "time, but can lead to connection resets due to high load on TCP/IP stack", + ucs_offsetof(uct_tcp_iface_config_t, conn_nb), UCS_CONFIG_TYPE_BOOL}, + + {"MAX_POLL", UCS_PP_MAKE_STRING(UCT_TCP_MAX_EVENTS), + "Number of times to poll on a ready socket. 0 - no polling, -1 - until drained", + ucs_offsetof(uct_tcp_iface_config_t, max_poll), UCS_CONFIG_TYPE_UINT}, + + {UCT_TCP_CONFIG_MAX_CONN_RETRIES, "25", + "How many connection establishment attmepts should be done if dropped " + "connection was detected due to lack of system resources", + ucs_offsetof(uct_tcp_iface_config_t, max_conn_retries), UCS_CONFIG_TYPE_UINT}, + + {"NODELAY", "y", + "Set TCP_NODELAY socket option to disable Nagle algorithm. Setting this\n" + "option usually provides better performance", + ucs_offsetof(uct_tcp_iface_config_t, sockopt_nodelay), UCS_CONFIG_TYPE_BOOL}, + + {"SNDBUF", "auto", + "Socket send buffer size", + ucs_offsetof(uct_tcp_iface_config_t, sockopt_sndbuf), UCS_CONFIG_TYPE_MEMUNITS}, + + {"RCVBUF", "auto", + "Socket receive buffer size", + ucs_offsetof(uct_tcp_iface_config_t, sockopt_rcvbuf), UCS_CONFIG_TYPE_MEMUNITS}, + + UCT_IFACE_MPOOL_CONFIG_FIELDS("TX_", -1, 8, "send", + ucs_offsetof(uct_tcp_iface_config_t, tx_mpool), ""), + + UCT_IFACE_MPOOL_CONFIG_FIELDS("RX_", -1, 8, "receive", + ucs_offsetof(uct_tcp_iface_config_t, rx_mpool), ""), + + {NULL} +}; + + +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_tcp_iface_t, uct_iface_t); + +static ucs_status_t uct_tcp_iface_get_device_address(uct_iface_h tl_iface, + uct_device_addr_t *addr) +{ + uct_tcp_iface_t *iface = ucs_derived_of(tl_iface, uct_tcp_iface_t); + + *(struct in_addr*)addr = iface->config.ifaddr.sin_addr; + return UCS_OK; +} + +static ucs_status_t uct_tcp_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *addr) +{ + uct_tcp_iface_t *iface = ucs_derived_of(tl_iface, uct_tcp_iface_t); + + *(in_port_t*)addr = iface->config.ifaddr.sin_port; + return UCS_OK; +} + +static int uct_tcp_iface_is_reachable(const uct_iface_h tl_iface, + const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr) +{ + /* We always report that a peer is reachable. connect() call will + * fail if the peer is unreachable when creating UCT/TCP EP */ + return 1; +} + +static ucs_status_t uct_tcp_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *attr) +{ + uct_tcp_iface_t *iface = ucs_derived_of(tl_iface, uct_tcp_iface_t); + size_t am_buf_size = iface->config.tx_seg_size - sizeof(uct_tcp_am_hdr_t); + ucs_status_t status; + int is_default; + + uct_base_iface_query(&iface->super, attr); + + status = uct_tcp_netif_caps(iface->if_name, &attr->latency.overhead, + &attr->bandwidth.shared); + if (status != UCS_OK) { + return status; + } + + attr->iface_addr_len = sizeof(in_port_t); + attr->device_addr_len = sizeof(struct in_addr); + attr->cap.flags = UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_EVENT_SEND_COMP | + UCT_IFACE_FLAG_EVENT_RECV; + + attr->cap.am.max_short = am_buf_size; + attr->cap.am.max_bcopy = am_buf_size; + + if (iface->config.zcopy.max_iov > UCT_TCP_EP_ZCOPY_SERVICE_IOV_COUNT) { + /* AM */ + attr->cap.am.max_iov = iface->config.zcopy.max_iov - + UCT_TCP_EP_ZCOPY_SERVICE_IOV_COUNT; + attr->cap.am.max_zcopy = iface->config.rx_seg_size - + sizeof(uct_tcp_am_hdr_t); + attr->cap.am.max_hdr = iface->config.zcopy.max_hdr; + attr->cap.am.opt_zcopy_align = 1; + attr->cap.flags |= UCT_IFACE_FLAG_AM_ZCOPY; + + if (iface->config.put_enable) { + /* PUT */ + attr->cap.put.max_iov = iface->config.zcopy.max_iov - + UCT_TCP_EP_ZCOPY_SERVICE_IOV_COUNT; + attr->cap.put.max_zcopy = UCT_TCP_EP_PUT_ZCOPY_MAX - + UCT_TCP_EP_PUT_SERVICE_LENGTH; + attr->cap.put.opt_zcopy_align = 1; + attr->cap.flags |= UCT_IFACE_FLAG_PUT_ZCOPY; + } + } + + attr->bandwidth.dedicated = 0; + attr->latency.growth = 0; + attr->overhead = 50e-6; /* 50 usec */ + + if (iface->config.prefer_default) { + status = uct_tcp_netif_is_default(iface->if_name, &is_default); + if (status != UCS_OK) { + return status; + } + + attr->priority = is_default ? 0 : 1; + } else { + attr->priority = 0; + } + + return UCS_OK; +} + +static ucs_status_t uct_tcp_iface_event_fd_get(uct_iface_h tl_iface, int *fd_p) +{ + uct_tcp_iface_t *iface = ucs_derived_of(tl_iface, uct_tcp_iface_t); + + return ucs_event_set_fd_get(iface->event_set, fd_p); +} + +static void uct_tcp_iface_handle_events(void *callback_data, + int events, void *arg) +{ + unsigned *count = (unsigned*)arg; + uct_tcp_ep_t *ep = (uct_tcp_ep_t*)callback_data; + + ucs_assertv(ep->conn_state != UCT_TCP_EP_CONN_STATE_CLOSED, "ep=%p", ep); + + if (events & UCS_EVENT_SET_EVREAD) { + *count += uct_tcp_ep_cm_state[ep->conn_state].rx_progress(ep); + } + if (events & UCS_EVENT_SET_EVWRITE) { + *count += uct_tcp_ep_cm_state[ep->conn_state].tx_progress(ep); + } +} + +unsigned uct_tcp_iface_progress(uct_iface_h tl_iface) +{ + uct_tcp_iface_t *iface = ucs_derived_of(tl_iface, uct_tcp_iface_t); + unsigned max_events = iface->config.max_poll; + unsigned count = 0; + unsigned read_events; + ucs_status_t status; + + do { + read_events = ucs_min(ucs_sys_event_set_max_wait_events, max_events); + status = ucs_event_set_wait(iface->event_set, &read_events, + 0, uct_tcp_iface_handle_events, + (void *)&count); + max_events -= read_events; + ucs_trace_poll("iface=%p ucs_event_set_wait() returned %d: " + "read events=%u, total=%u", + iface, status, read_events, + iface->config.max_poll - max_events); + } while ((max_events > 0) && (read_events == UCT_TCP_MAX_EVENTS) && + ((status == UCS_OK) || (status == UCS_INPROGRESS))); + + return count; +} + +static ucs_status_t uct_tcp_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + uct_tcp_iface_t *iface = ucs_derived_of(tl_iface, uct_tcp_iface_t); + + if (comp != NULL) { + return UCS_ERR_UNSUPPORTED; + } + + if (iface->outstanding) { + UCT_TL_IFACE_STAT_FLUSH_WAIT(&iface->super); + return UCS_INPROGRESS; + } + + UCT_TL_IFACE_STAT_FLUSH(&iface->super); + return UCS_OK; +} + +static void uct_tcp_iface_listen_close(uct_tcp_iface_t *iface) +{ + if (iface->listen_fd != -1) { + close(iface->listen_fd); + iface->listen_fd = -1; + } +} + +static void uct_tcp_iface_connect_handler(int listen_fd, void *arg) +{ + uct_tcp_iface_t *iface = arg; + struct sockaddr_in peer_addr; + socklen_t addrlen; + ucs_status_t status; + int fd; + + ucs_assert(listen_fd == iface->listen_fd); + + for (;;) { + addrlen = sizeof(peer_addr); + status = ucs_socket_accept(iface->listen_fd, (struct sockaddr*)&peer_addr, + &addrlen, &fd); + if (status != UCS_OK) { + if (status != UCS_ERR_NO_PROGRESS) { + uct_tcp_iface_listen_close(iface); + } + return; + } + ucs_assert(fd != -1); + + status = uct_tcp_cm_handle_incoming_conn(iface, &peer_addr, fd); + if (status != UCS_OK) { + close(fd); + return; + } + } +} + +ucs_status_t uct_tcp_iface_set_sockopt(uct_tcp_iface_t *iface, int fd) +{ + ucs_status_t status; + + status = ucs_socket_setopt(fd, IPPROTO_TCP, TCP_NODELAY, + (const void*)&iface->sockopt.nodelay, + sizeof(int)); + if (status != UCS_OK) { + return status; + } + + if (iface->sockopt.sndbuf != UCS_MEMUNITS_AUTO) { + status = ucs_socket_setopt(fd, SOL_SOCKET, SO_SNDBUF, + (const void*)&iface->sockopt.sndbuf, + sizeof(int)); + if (status != UCS_OK) { + return status; + } + } + + if (iface->sockopt.rcvbuf != UCS_MEMUNITS_AUTO) { + status = ucs_socket_setopt(fd, SOL_SOCKET, SO_RCVBUF, + (const void*)&iface->sockopt.rcvbuf, + sizeof(int)); + if (status != UCS_OK) { + return status; + } + } + + return UCS_OK; +} + +static uct_iface_ops_t uct_tcp_iface_ops = { + .ep_am_short = uct_tcp_ep_am_short, + .ep_am_bcopy = uct_tcp_ep_am_bcopy, + .ep_am_zcopy = uct_tcp_ep_am_zcopy, + .ep_put_zcopy = uct_tcp_ep_put_zcopy, + .ep_pending_add = uct_tcp_ep_pending_add, + .ep_pending_purge = uct_tcp_ep_pending_purge, + .ep_flush = uct_tcp_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = uct_tcp_ep_create, + .ep_destroy = uct_tcp_ep_destroy, + .iface_flush = uct_tcp_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = uct_base_iface_progress_enable, + .iface_progress_disable = uct_base_iface_progress_disable, + .iface_progress = uct_tcp_iface_progress, + .iface_event_fd_get = uct_tcp_iface_event_fd_get, + .iface_event_arm = ucs_empty_function_return_success, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_tcp_iface_t), + .iface_query = uct_tcp_iface_query, + .iface_get_address = uct_tcp_iface_get_address, + .iface_get_device_address = uct_tcp_iface_get_device_address, + .iface_is_reachable = uct_tcp_iface_is_reachable +}; + +static ucs_status_t uct_tcp_iface_listener_init(uct_tcp_iface_t *iface) +{ + struct sockaddr_in bind_addr = iface->config.ifaddr; + socklen_t socklen = sizeof(bind_addr); + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + int ret; + + bind_addr.sin_port = 0; /* use a random port */ + status = ucs_socket_server_init((struct sockaddr *)&bind_addr, + sizeof(bind_addr), ucs_socket_max_conn(), + &iface->listen_fd); + if (status != UCS_OK) { + goto err; + } + + /* Get the port which was selected for the socket */ + ret = getsockname(iface->listen_fd, (struct sockaddr *)&bind_addr, &socklen); + if (ret < 0) { + ucs_error("getsockname(fd=%d) failed: %m", iface->listen_fd); + status = UCS_ERR_IO_ERROR; + goto err_close_sock; + } + + iface->config.ifaddr.sin_port = bind_addr.sin_port; + + /* Register event handler for incoming connections */ + status = ucs_async_set_event_handler(iface->super.worker->async->mode, + iface->listen_fd, + UCS_EVENT_SET_EVREAD | + UCS_EVENT_SET_EVERR, + uct_tcp_iface_connect_handler, iface, + iface->super.worker->async); + if (status != UCS_OK) { + goto err_close_sock; + } + + ucs_debug("tcp_iface %p: listening for connections (fd=%d) on %s", + iface, iface->listen_fd, ucs_sockaddr_str((struct sockaddr *)&bind_addr, + ip_port_str, sizeof(ip_port_str))); + return UCS_OK; + +err_close_sock: + close(iface->listen_fd); +err: + return status; +} + +static ucs_mpool_ops_t uct_tcp_mpool_ops = { + ucs_mpool_chunk_malloc, + ucs_mpool_chunk_free, + NULL, + NULL +}; + +static UCS_CLASS_INIT_FUNC(uct_tcp_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_tcp_iface_config_t *config = ucs_derived_of(tl_config, + uct_tcp_iface_config_t); + ucs_status_t status; + + UCT_CHECK_PARAM(params->field_mask & UCT_IFACE_PARAM_FIELD_OPEN_MODE, + "UCT_IFACE_PARAM_FIELD_OPEN_MODE is not defined"); + if (!(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE)) { + ucs_error("only UCT_IFACE_OPEN_MODE_DEVICE is supported"); + return UCS_ERR_UNSUPPORTED; + } + + if (ucs_derived_of(worker, uct_priv_worker_t)->thread_mode == UCS_THREAD_MODE_MULTI) { + ucs_error("TCP transport does not support multi-threaded worker"); + return UCS_ERR_INVALID_PARAM; + } + + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &uct_tcp_iface_ops, md, worker, + params, tl_config + UCS_STATS_ARG((params->field_mask & + UCT_IFACE_PARAM_FIELD_STATS_ROOT) ? + params->stats_root : NULL) + UCS_STATS_ARG(params->mode.device.dev_name)); + + ucs_strncpy_zero(self->if_name, params->mode.device.dev_name, + sizeof(self->if_name)); + self->outstanding = 0; + self->config.tx_seg_size = config->tx_seg_size + + sizeof(uct_tcp_am_hdr_t); + self->config.rx_seg_size = config->rx_seg_size + + sizeof(uct_tcp_am_hdr_t); + + if (ucs_iov_get_max() >= UCT_TCP_EP_AM_SHORTV_IOV_COUNT) { + self->config.sendv_thresh = config->sendv_thresh; + } else { + /* AM Short with non-blocking vector send can't be used */ + self->config.sendv_thresh = UCS_MEMUNITS_INF; + } + + /* Maximum IOV count allowed by user's configuration (considering TCP + * protocol and user's AM headers that use 1st and 2nd IOVs + * correspondingly) and system constraints */ + self->config.zcopy.max_iov = ucs_min(config->max_iov + + UCT_TCP_EP_ZCOPY_SERVICE_IOV_COUNT, + ucs_iov_get_max()); + /* Use a remaining part of TX segment for AM Zcopy header */ + self->config.zcopy.hdr_offset = (sizeof(uct_tcp_ep_zcopy_tx_t) + + sizeof(struct iovec) * + self->config.zcopy.max_iov); + if ((self->config.zcopy.hdr_offset > self->config.tx_seg_size) && + (self->config.zcopy.max_iov > UCT_TCP_EP_ZCOPY_SERVICE_IOV_COUNT)) { + ucs_error("AM Zcopy context (%zu) must be <= TX segment size (%zu). " + "It can be adjusted by decreasing maximum IOV count (%zu)", + self->config.zcopy.hdr_offset, self->config.tx_seg_size, + self->config.zcopy.max_iov); + return UCS_ERR_INVALID_PARAM; + } + + self->config.zcopy.max_hdr = self->config.tx_seg_size - + self->config.zcopy.hdr_offset; + self->config.prefer_default = config->prefer_default; + self->config.put_enable = config->put_enable; + self->config.conn_nb = config->conn_nb; + self->config.max_poll = config->max_poll; + self->config.max_conn_retries = config->max_conn_retries; + self->sockopt.nodelay = config->sockopt_nodelay; + self->sockopt.sndbuf = config->sockopt_sndbuf; + self->sockopt.rcvbuf = config->sockopt_rcvbuf; + ucs_list_head_init(&self->ep_list); + kh_init_inplace(uct_tcp_cm_eps, &self->ep_cm_map); + + if (self->config.tx_seg_size > self->config.rx_seg_size) { + ucs_error("RX segment size (%zu) must be >= TX segment size (%zu)", + self->config.rx_seg_size, self->config.tx_seg_size); + return UCS_ERR_INVALID_PARAM; + } + + status = ucs_mpool_init(&self->tx_mpool, 0, self->config.tx_seg_size, + 0, UCS_SYS_CACHE_LINE_SIZE, + (config->tx_mpool.bufs_grow == 0) ? + 32 : config->tx_mpool.bufs_grow, + config->tx_mpool.max_bufs, + &uct_tcp_mpool_ops, "uct_tcp_iface_tx_buf_mp"); + if (status != UCS_OK) { + goto err; + } + + status = ucs_mpool_init(&self->rx_mpool, 0, self->config.rx_seg_size * 2, + 0, UCS_SYS_CACHE_LINE_SIZE, + (config->rx_mpool.bufs_grow == 0) ? + 32 : config->rx_mpool.bufs_grow, + config->rx_mpool.max_bufs, + &uct_tcp_mpool_ops, "uct_tcp_iface_rx_buf_mp"); + if (status != UCS_OK) { + goto err_cleanup_tx_mpool; + } + + status = uct_tcp_netif_inaddr(self->if_name, &self->config.ifaddr, + &self->config.netmask); + if (status != UCS_OK) { + goto err_cleanup_rx_mpool; + } + + status = ucs_event_set_create(&self->event_set); + if (status != UCS_OK) { + status = UCS_ERR_IO_ERROR; + goto err_cleanup_rx_mpool; + } + + status = uct_tcp_iface_listener_init(self); + if (status != UCS_OK) { + goto err_cleanup_event_set; + } + + return UCS_OK; + +err_cleanup_event_set: + ucs_event_set_cleanup(self->event_set); +err_cleanup_rx_mpool: + ucs_mpool_cleanup(&self->rx_mpool, 1); +err_cleanup_tx_mpool: + ucs_mpool_cleanup(&self->tx_mpool, 1); +err: + return status; +} + +static void uct_tcp_iface_ep_list_cleanup(uct_tcp_iface_t *iface, + ucs_list_link_t *ep_list) +{ + uct_tcp_ep_t *ep, *tmp; + + ucs_list_for_each_safe(ep, tmp, ep_list, list) { + uct_tcp_cm_purge_ep(ep); + uct_tcp_ep_destroy_internal(&ep->super.super); + } +} + +static void uct_tcp_iface_eps_cleanup(uct_tcp_iface_t *iface) +{ + ucs_list_link_t *ep_list; + + uct_tcp_iface_ep_list_cleanup(iface, &iface->ep_list); + + kh_foreach_value(&iface->ep_cm_map, ep_list, { + uct_tcp_iface_ep_list_cleanup(iface, ep_list); + ucs_free(ep_list); + }); + + kh_destroy_inplace(uct_tcp_cm_eps, &iface->ep_cm_map); +} + +void uct_tcp_iface_add_ep(uct_tcp_ep_t *ep) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + UCS_ASYNC_BLOCK(iface->super.worker->async); + ucs_list_add_tail(&iface->ep_list, &ep->list); + UCS_ASYNC_UNBLOCK(iface->super.worker->async); +} + +void uct_tcp_iface_remove_ep(uct_tcp_ep_t *ep) +{ + uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_tcp_iface_t); + UCS_ASYNC_BLOCK(iface->super.worker->async); + ucs_list_del(&ep->list); + UCS_ASYNC_UNBLOCK(iface->super.worker->async); +} + +static UCS_CLASS_CLEANUP_FUNC(uct_tcp_iface_t) +{ + ucs_status_t status; + + ucs_debug("tcp_iface %p: destroying", self); + + uct_base_iface_progress_disable(&self->super.super, + UCT_PROGRESS_SEND | + UCT_PROGRESS_RECV); + + status = ucs_async_remove_handler(self->listen_fd, 1); + if (status != UCS_OK) { + ucs_warn("failed to remove handler for server socket fd=%d", self->listen_fd); + } + + uct_tcp_iface_eps_cleanup(self); + + ucs_mpool_cleanup(&self->rx_mpool, 1); + ucs_mpool_cleanup(&self->tx_mpool, 1); + + uct_tcp_iface_listen_close(self); + ucs_event_set_cleanup(self->event_set); +} + +UCS_CLASS_DEFINE(uct_tcp_iface_t, uct_base_iface_t); +static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); + +ucs_status_t uct_tcp_query_devices(uct_md_h md, + uct_tl_device_resource_t **devices_p, + unsigned *num_devices_p) +{ + uct_tl_device_resource_t *devices, *tmp; + static const char *netdev_dir = "/sys/class/net"; + struct dirent *entry; + unsigned num_devices; + ucs_status_t status; + DIR *dir; + + dir = opendir(netdev_dir); + if (dir == NULL) { + ucs_error("opendir(%s) failed: %m", netdev_dir); + status = UCS_ERR_IO_ERROR; + goto out; + } + + devices = NULL; + num_devices = 0; + for (;;) { + errno = 0; + entry = readdir(dir); + if (entry == NULL) { + if (errno != 0) { + ucs_error("readdir(%s) failed: %m", netdev_dir); + ucs_free(devices); + status = UCS_ERR_IO_ERROR; + goto out_closedir; + } + break; /* no more items */ + } + + /* According to the sysfs(5) manual page, all of entries + * has to be a symbolic link representing one of the real + * or virtual networking devices that are visible in the + * network namespace of the process that is accessing the + * directory. Let's avoid checking files that are not a + * symbolic link, e.g. "." and ".." entries */ + if (entry->d_type != DT_LNK) { + continue; + } + + if (!ucs_netif_is_active(entry->d_name)) { + continue; + } + + tmp = ucs_realloc(devices, sizeof(*devices) * (num_devices + 1), + "tcp devices"); + if (tmp == NULL) { + ucs_free(devices); + status = UCS_ERR_NO_MEMORY; + goto out_closedir; + } + devices = tmp; + + ucs_snprintf_zero(devices[num_devices].name, + sizeof(devices[num_devices].name), + "%s", entry->d_name); + devices[num_devices].type = UCT_DEVICE_TYPE_NET; + ++num_devices; + } + + *num_devices_p = num_devices; + *devices_p = devices; + status = UCS_OK; + +out_closedir: + closedir(dir); +out: + return status; +} + +UCT_TL_DEFINE(&uct_tcp_component, tcp, uct_tcp_query_devices, uct_tcp_iface_t, + UCT_TCP_CONFIG_PREFIX, uct_tcp_iface_config_table, + uct_tcp_iface_config_t); diff --git a/src/uct/tcp/tcp_listener.c b/src/uct/tcp/tcp_listener.c new file mode 100644 index 0000000..9326f43 --- /dev/null +++ b/src/uct/tcp/tcp_listener.c @@ -0,0 +1,184 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "tcp_sockcm_ep.h" + +#include +#include + + +static void uct_tcp_listener_conn_req_handler(int fd, void *arg) +{ + uct_tcp_listener_t *listener = (uct_tcp_listener_t *)arg; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + struct sockaddr_storage client_addr; + ucs_async_context_t *async_ctx; + uct_tcp_sockcm_ep_t *ep; + uct_ep_params_t params; + ucs_status_t status; + socklen_t addrlen; + int conn_fd; + + ucs_assert(fd == listener->listen_fd); + + addrlen = sizeof(struct sockaddr_storage); + status = ucs_socket_accept(listener->listen_fd, + (struct sockaddr*)&client_addr, + &addrlen, &conn_fd); + if (status != UCS_OK) { + return; + } + + ucs_assert(conn_fd != -1); + + ucs_trace("server accepted a connection request (fd=%d) from client %s", + conn_fd, ucs_sockaddr_str((struct sockaddr*)&client_addr, + ip_port_str, UCS_SOCKADDR_STRING_LEN)); + + /* Set the accept_fd to non-blocking mode + * (so that send/recv won't be blocking) */ + status = ucs_sys_fcntl_modfl(conn_fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + goto err; + } + + /* create the server's endpoint here. uct_ep_create() will return this one */ + params.field_mask = UCT_EP_PARAM_FIELD_CM | + UCT_EP_PARAM_FIELD_CONN_REQUEST | + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS; + params.cm = listener->super.cm; + params.conn_request = NULL; + params.sockaddr_cb_flags = UCT_CB_FLAG_ASYNC; + + status = UCS_CLASS_NEW(uct_tcp_sockcm_ep_t, &ep, ¶ms); + if (status != UCS_OK) { + ucs_error("failed to create a new tcp_sockcm ep"); + goto err; + } + + /* coverity[uninit_use] */ + ep->fd = conn_fd; + ep->state |= UCT_TCP_SOCKCM_EP_CONNECTED; + ep->listener = listener; + + /* Adding the ep to a list on the cm for cleanup purposes */ + ucs_list_add_tail(&listener->sockcm->ep_list, &ep->list); + + async_ctx = listener->super.cm->iface.worker->async; + status = ucs_async_set_event_handler(async_ctx->mode, conn_fd, + UCS_EVENT_SET_EVREAD | + UCS_EVENT_SET_EVERR, + uct_tcp_sa_data_handler, + ep, async_ctx); + if (status != UCS_OK) { + goto err_delete_ep; + } + + return; + +err_delete_ep: + UCS_CLASS_DELETE(uct_tcp_sockcm_ep_t, ep); +err: + close(conn_fd); +} + +UCS_CLASS_INIT_FUNC(uct_tcp_listener_t, uct_cm_h cm, + const struct sockaddr *saddr, socklen_t socklen, + const uct_listener_params_t *params) +{ + ucs_async_context_t *async_ctx; + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + ucs_status_t status; + int backlog; + + UCS_CLASS_CALL_SUPER_INIT(uct_listener_t, cm); + + self->sockcm = ucs_derived_of(cm, uct_tcp_sockcm_t); + self->conn_request_cb = params->conn_request_cb; + self->user_data = (params->field_mask & UCT_LISTENER_PARAM_FIELD_USER_DATA) ? + params->user_data : NULL; + backlog = (params->field_mask & UCT_LISTENER_PARAM_FIELD_BACKLOG) ? + params->backlog : ucs_socket_max_conn(); + + status = ucs_socket_server_init(saddr, socklen, backlog, &self->listen_fd); + if (status != UCS_OK) { + goto err; + } + + async_ctx = self->sockcm->super.iface.worker->async; + status = ucs_async_set_event_handler(async_ctx->mode, self->listen_fd, + UCS_EVENT_SET_EVREAD | + UCS_EVENT_SET_EVERR, + uct_tcp_listener_conn_req_handler, self, + async_ctx); + if (status != UCS_OK) { + goto err_close_socket; + } + + ucs_debug("created a TCP listener %p on cm %p with fd: %d " + "listening on %s", self, cm, self->listen_fd, + ucs_sockaddr_str(saddr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); + + return UCS_OK; + +err_close_socket: + close(self->listen_fd); +err: + return status; +} + +UCS_CLASS_CLEANUP_FUNC(uct_tcp_listener_t) +{ + ucs_status_t status; + + status = ucs_async_remove_handler(self->listen_fd, 1); + if (status != UCS_OK) { + ucs_warn("failed to remove event handler for fd %d: %s", + self->listen_fd, ucs_status_string(status)); + } + + close(self->listen_fd); +} + +ucs_status_t uct_tcp_listener_reject(uct_listener_h listener, + uct_conn_request_h conn_request) +{ + return UCS_ERR_NOT_IMPLEMENTED; +} + +ucs_status_t uct_tcp_listener_query(uct_listener_h listener, + uct_listener_attr_t *listener_attr) +{ + uct_tcp_listener_t *tcp_listener = ucs_derived_of(listener, + uct_tcp_listener_t); + struct sockaddr_storage addr; + ucs_status_t status; + socklen_t sock_len; + + if (listener_attr->field_mask & UCT_LISTENER_ATTR_FIELD_SOCKADDR) { + sock_len = sizeof(struct sockaddr_storage); + if (getsockname(tcp_listener->listen_fd, (struct sockaddr *)&addr, + &sock_len)) { + ucs_error("getsockname failed (listener=%p) %m", tcp_listener); + return UCS_ERR_IO_ERROR; + } + + status = ucs_sockaddr_copy((struct sockaddr *)&listener_attr->sockaddr, + (const struct sockaddr *)&addr); + if (status != UCS_OK) { + return status; + } + + } + + return UCS_OK; +} + +UCS_CLASS_DEFINE(uct_tcp_listener_t, uct_listener_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_listener_t, uct_listener_t, + uct_cm_h , const struct sockaddr *, socklen_t , + const uct_listener_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_tcp_listener_t, uct_listener_t); diff --git a/src/uct/tcp/tcp_listener.h b/src/uct/tcp/tcp_listener.h new file mode 100644 index 0000000..c49ee40 --- /dev/null +++ b/src/uct/tcp/tcp_listener.h @@ -0,0 +1,37 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "tcp_sockcm.h" + +/** + * An TCP listener for incoming connections requests on the server side. + */ +typedef struct uct_tcp_listener { + uct_listener_t super; + + int listen_fd; + + uct_tcp_sockcm_t *sockcm; + + /** Callback to invoke upon receving a connection request from a client */ + uct_listener_conn_request_callback_t conn_request_cb; + + /** User's data to be passed as argument to the conn_request_cb */ + void *user_data; +} uct_tcp_listener_t; + + +UCS_CLASS_DECLARE_NEW_FUNC(uct_tcp_listener_t, uct_listener_t, + uct_cm_h , const struct sockaddr *, socklen_t , + const uct_listener_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_tcp_listener_t, uct_listener_t); + +ucs_status_t uct_tcp_listener_query(uct_listener_h listener, + uct_listener_attr_t *listener_attr); + +ucs_status_t uct_tcp_listener_reject(uct_listener_h listener, + uct_conn_request_h conn_request); + diff --git a/src/uct/tcp/tcp_md.c b/src/uct/tcp/tcp_md.c new file mode 100644 index 0000000..e2c24f8 --- /dev/null +++ b/src/uct/tcp/tcp_md.c @@ -0,0 +1,92 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "tcp.h" +#include "tcp_sockcm.h" +#include + + +static ucs_status_t uct_tcp_md_query(uct_md_h md, uct_md_attr_t *attr) +{ + /* Dummy memory registration provided. No real memory handling exists */ + attr->cap.flags = UCT_MD_FLAG_REG | + UCT_MD_FLAG_NEED_RKEY; /* TODO ignore rkey in rma/amo ops */ + attr->cap.max_alloc = 0; + attr->cap.reg_mem_types = UCS_MEMORY_TYPES_CPU_ACCESSIBLE; + attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; + attr->cap.detect_mem_types = 0; + attr->cap.max_reg = ULONG_MAX; + attr->rkey_packed_size = 0; + attr->reg_cost.overhead = 0; + attr->reg_cost.growth = 0; + memset(&attr->local_cpus, 0xff, sizeof(attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t uct_tcp_md_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + /* We have to emulate memory registration. Return dummy pointer */ + *memh_p = (void*)0xdeadbeef; + return UCS_OK; +} + +static ucs_status_t +uct_tcp_md_open(uct_component_t *component, const char *md_name, + const uct_md_config_t *md_config, uct_md_h *md_p) +{ + static uct_md_ops_t md_ops = { + .close = ucs_empty_function, + .query = uct_tcp_md_query, + .mkey_pack = ucs_empty_function_return_success, + .mem_reg = uct_tcp_md_mem_reg, + .mem_dereg = ucs_empty_function_return_success, + .detect_memory_type = ucs_empty_function_return_unsupported + }; + static uct_md_t md = { + .ops = &md_ops, + .component = &uct_tcp_component + }; + + *md_p = &md; + return UCS_OK; +} + +static ucs_status_t uct_tcp_md_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + /** + * Pseudo stub function for the key unpacking + * Need rkey == 0 due to work with same process to reuse uct_base_[put|get|atomic]* + */ + *rkey_p = 0; + *handle_p = NULL; + return UCS_OK; +} + +uct_component_t uct_tcp_component = { + .query_md_resources = uct_md_query_single_md_resource, + .md_open = uct_tcp_md_open, + .cm_open = UCS_CLASS_NEW_FUNC_NAME(uct_tcp_sockcm_t), + .rkey_unpack = uct_tcp_md_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = ucs_empty_function_return_success, + .name = UCT_TCP_NAME, + .md_config = UCT_MD_DEFAULT_CONFIG_INITIALIZER, + .cm_config = { + .name = "TCP-SOCKCM connection manager", + .prefix = "TCP_", + .table = uct_tcp_sockcm_config_table, + .size = sizeof(uct_tcp_sockcm_config_t), + }, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_tcp_component), + .flags = UCT_COMPONENT_FLAG_CM +}; +UCT_COMPONENT_REGISTER(&uct_tcp_component) diff --git a/src/uct/tcp/tcp_net.c b/src/uct/tcp/tcp_net.c new file mode 100644 index 0000000..8daffb2 --- /dev/null +++ b/src/uct/tcp/tcp_net.c @@ -0,0 +1,171 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "tcp.h" + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +typedef ssize_t (*uct_tcp_io_func_t)(int fd, void *data, size_t size, int flags); + + +ucs_status_t uct_tcp_netif_caps(const char *if_name, double *latency_p, + double *bandwidth_p) +{ + struct ethtool_cmd edata; + uint32_t speed_mbps; + ucs_status_t status; + struct ifreq ifr; + size_t ll_headers; + int speed_known; + short ether_type; + size_t mtu; + + memset(&ifr, 0, sizeof(ifr)); + + speed_known = 0; + edata.cmd = ETHTOOL_GSET; + ifr.ifr_data = (void*)&edata; + status = ucs_netif_ioctl(if_name, SIOCETHTOOL, &ifr); + if (status == UCS_OK) { +#if HAVE_DECL_ETHTOOL_CMD_SPEED + speed_mbps = ethtool_cmd_speed(&edata); +#else + speed_mbps = edata.speed; +#endif +#if HAVE_DECL_SPEED_UNKNOWN + speed_known = speed_mbps != (uint32_t)SPEED_UNKNOWN; +#else + speed_known = (speed_mbps != 0) && ((uint16_t)speed_mbps != (uint16_t)-1); +#endif + } + + if (!speed_known) { + speed_mbps = 100; + ucs_debug("speed of %s is UNKNOWN, assuming %d Mbps", if_name, speed_mbps); + } + + status = ucs_netif_ioctl(if_name, SIOCGIFHWADDR, &ifr); + if (status == UCS_OK) { + ether_type = ifr.ifr_addr.sa_family; + } else { + ether_type = ARPHRD_ETHER; + } + + status = ucs_netif_ioctl(if_name, SIOCGIFMTU, &ifr); + if (status == UCS_OK) { + mtu = ifr.ifr_mtu; + } else { + mtu = 1500; + } + + switch (ether_type) { + case ARPHRD_ETHER: + /* https://en.wikipedia.org/wiki/Ethernet_frame */ + ll_headers = 7 + /* preamble */ + 1 + /* start-of-frame */ + ETH_HLEN + /* src MAC + dst MAC + ethertype */ + ETH_FCS_LEN + /* CRC */ + 12; /* inter-packet gap */ + break; + case ARPHRD_INFINIBAND: + ll_headers = /* LRH */ 8 + + /* GRH */ 40 + + /* BTH */ 12 + + /* DETH */ 8 + + /* IPoIB */ 4 + 20 + + /* ICRC */ 4 + + /* VCRC */ 2 + + /* DELIM */ 2; + break; + default: + ll_headers = 0; + break; + } + + /* https://w3.siemens.com/mcms/industrial-communication/en/rugged-communication/Documents/AN8.pdf */ + *latency_p = 576.0 / (speed_mbps * 1e6) + 5.2e-6; + *bandwidth_p = (speed_mbps * 1e6) / 8 * + (mtu - 40) / (mtu + ll_headers); /* TCP/IP header is 40 bytes */ + return UCS_OK; +} + +ucs_status_t uct_tcp_netif_inaddr(const char *if_name, struct sockaddr_in *ifaddr, + struct sockaddr_in *netmask) +{ + ucs_status_t status; + struct ifreq ifra, ifrnm; + + status = ucs_netif_ioctl(if_name, SIOCGIFADDR, &ifra); + if (status != UCS_OK) { + return status; + } + + if (netmask != NULL) { + status = ucs_netif_ioctl(if_name, SIOCGIFNETMASK, &ifrnm); + if (status != UCS_OK) { + return status; + } + } + + if ((ifra.ifr_addr.sa_family != AF_INET) ) { + ucs_error("%s address is not INET", if_name); + return UCS_ERR_INVALID_ADDR; + } + + memcpy(ifaddr, (struct sockaddr_in*)&ifra.ifr_addr, sizeof(*ifaddr)); + if (netmask != NULL) { + memcpy(netmask, (struct sockaddr_in*)&ifrnm.ifr_addr, sizeof(*netmask)); + } + + return UCS_OK; +} + +ucs_status_t uct_tcp_netif_is_default(const char *if_name, int *result_p) +{ + static const char *filename = "/proc/net/route"; + in_addr_t netmask; + char name[128]; + char str[128]; + FILE *f; + int ret; + + f = fopen(filename, "r"); + if (f == NULL) { + ucs_debug("failed to open '%s': %m", filename); + return UCS_ERR_IO_ERROR; + } + + /* + Iface Destination Gateway Flags RefCnt Use Metric Mask MTU Window IRTT + */ + while (fgets(str, sizeof(str), f) != NULL) { + ret = sscanf(str, "%s %*x %*x %*d %*d %*d %*d %x", name, &netmask); + if ((ret == 3) && !strcmp(name, if_name) && (netmask == 0)) { + *result_p = 1; + break; + } + + /* Skip rest of the line */ + while ((strchr(str, '\n') == NULL) && (fgets(str, sizeof(str), f) != NULL)); + } + + *result_p = 0; + fclose(f); + return UCS_OK; +} diff --git a/src/uct/tcp/tcp_sockcm.c b/src/uct/tcp/tcp_sockcm.c new file mode 100644 index 0000000..fa49efb --- /dev/null +++ b/src/uct/tcp/tcp_sockcm.c @@ -0,0 +1,181 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "tcp_sockcm_ep.h" + +#include +#include + + +ucs_config_field_t uct_tcp_sockcm_config_table[] = { + {"", "", NULL, + ucs_offsetof(uct_tcp_sockcm_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_cm_config_table)}, + + {"PRIV_DATA_LEN", "2048", + "TCP CM private data length", + ucs_offsetof(uct_tcp_sockcm_config_t, priv_data_len), UCS_CONFIG_TYPE_MEMUNITS}, + + {NULL} +}; + +static ucs_status_t uct_tcp_sockcm_query(uct_cm_h cm, uct_cm_attr_t *cm_attr) +{ + uct_tcp_sockcm_t *tcp_sockcm = ucs_derived_of(cm, uct_tcp_sockcm_t); + + if (cm_attr->field_mask & UCT_CM_ATTR_FIELD_MAX_CONN_PRIV) { + cm_attr->max_conn_priv = tcp_sockcm->priv_data_len; + } + + return UCS_OK; +} + +static uct_cm_ops_t uct_tcp_sockcm_ops = { + .close = UCS_CLASS_DELETE_FUNC_NAME(uct_tcp_sockcm_t), + .cm_query = uct_tcp_sockcm_query, + .listener_create = UCS_CLASS_NEW_FUNC_NAME(uct_tcp_listener_t), + .listener_reject = uct_tcp_listener_reject, + .listener_query = uct_tcp_listener_query, + .listener_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_tcp_listener_t), + .ep_create = uct_tcp_sockcm_ep_create +}; + +static void uct_tcp_close_ep(uct_tcp_sockcm_ep_t *ep) +{ + ucs_list_del(&ep->list); + ucs_async_remove_handler(ep->fd, 1); + close(ep->fd); + ep->fd = -1; + UCS_CLASS_DELETE(uct_tcp_sockcm_ep_t, ep); +} + +void uct_tcp_sa_data_handler(int fd, void *arg) +{ + uct_tcp_sockcm_ep_t *ep = (uct_tcp_sockcm_ep_t*)arg; + ucs_status_t status; + + ucs_assertv(ep->fd == fd, "ep->fd %d fd %d, ep_state %d", ep->fd, fd, ep->state); + + if (!ucs_socket_is_connected(fd)) { + ucs_debug("fd %d is not connected. ep state: %d", fd, ep->state); + /* coverity[check_return] */ + ucs_async_modify_handler(fd, 0); + return; + } + + switch (ep->state) { + case UCT_TCP_SOCKCM_EP_ON_CLIENT: + /* connect() completed, send data to the server */ + ep->state |= UCT_TCP_SOCKCM_EP_CONNECTED; + + status = uct_tcp_sockcm_ep_send_priv_data(ep); + if (status != UCS_OK) { + ucs_async_modify_handler(fd, 0); + } + break; + case UCT_TCP_SOCKCM_EP_ON_CLIENT | UCT_TCP_SOCKCM_EP_CONN_SENDING: + /* can send, progress the sending */ + status = uct_tcp_sockcm_ep_progress_send(ep); + if (status != UCS_OK) { + ucs_async_modify_handler(fd, 0); + } + break; + case UCT_TCP_SOCKCM_EP_ON_CLIENT | UCT_TCP_SOCKCM_EP_CONN_SENT: + /* finished sending. TODO recv data from the server */ + ucs_async_modify_handler(fd, 0); + break; + case UCT_TCP_SOCKCM_EP_ON_SERVER | UCT_TCP_SOCKCM_EP_CONNECTED: + /* receive data from the client */ + status = uct_tcp_sockcm_ep_recv(ep); + if (status != UCS_OK) { + uct_tcp_close_ep(ep); + } + break; + case UCT_TCP_SOCKCM_EP_ON_SERVER | UCT_TCP_SOCKCM_EP_CONN_RECEIVING: + /* can read, progress the receving */ + status = uct_tcp_sockcm_ep_progress_recv(ep); + if (status != UCS_OK) { + uct_tcp_close_ep(ep); + } + break; + case UCT_TCP_SOCKCM_EP_ON_SERVER | UCT_TCP_SOCKCM_EP_CONN_RECEIVED: + /* finished recv, can send. TODO send data to the client */ + ucs_async_modify_handler(fd, 0); + break; + default: + ucs_error("unexpected event on client ep %p (state=%d)", ep, ep->state); + } +} + +static uct_iface_ops_t uct_tcp_sockcm_iface_ops = { + .ep_pending_purge = (uct_ep_pending_purge_func_t)ucs_empty_function, + .ep_disconnect = uct_tcp_sockcm_ep_disconnect, + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_tcp_sockcm_ep_t), + .ep_put_short = (uct_ep_put_short_func_t)ucs_empty_function_return_unsupported, + .ep_put_bcopy = (uct_ep_put_bcopy_func_t)ucs_empty_function_return_unsupported, + .ep_get_bcopy = (uct_ep_get_bcopy_func_t)ucs_empty_function_return_unsupported, + .ep_am_short = (uct_ep_am_short_func_t)ucs_empty_function_return_unsupported, + .ep_am_bcopy = (uct_ep_am_bcopy_func_t)ucs_empty_function_return_unsupported, + .ep_atomic_cswap64 = (uct_ep_atomic_cswap64_func_t)ucs_empty_function_return_unsupported, + .ep_atomic64_post = (uct_ep_atomic64_post_func_t)ucs_empty_function_return_unsupported, + .ep_atomic64_fetch = (uct_ep_atomic64_fetch_func_t)ucs_empty_function_return_unsupported, + .ep_atomic_cswap32 = (uct_ep_atomic_cswap32_func_t)ucs_empty_function_return_unsupported, + .ep_atomic32_post = (uct_ep_atomic32_post_func_t)ucs_empty_function_return_unsupported, + .ep_atomic32_fetch = (uct_ep_atomic32_fetch_func_t)ucs_empty_function_return_unsupported, + .ep_pending_add = (uct_ep_pending_add_func_t)ucs_empty_function_return_unsupported, + .ep_flush = (uct_ep_flush_func_t)ucs_empty_function_return_unsupported, + .ep_fence = (uct_ep_fence_func_t)ucs_empty_function_return_unsupported, + .ep_check = (uct_ep_check_func_t)ucs_empty_function_return_unsupported, + .ep_create = (uct_ep_create_func_t)ucs_empty_function_return_unsupported, + .iface_flush = (uct_iface_flush_func_t)ucs_empty_function_return_unsupported, + .iface_fence = (uct_iface_fence_func_t)ucs_empty_function_return_unsupported, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = (uct_iface_progress_func_t)ucs_empty_function_return_zero, + .iface_event_fd_get = (uct_iface_event_fd_get_func_t)ucs_empty_function_return_unsupported, + .iface_event_arm = (uct_iface_event_arm_func_t)ucs_empty_function_return_unsupported, + .iface_close = ucs_empty_function, + .iface_query = (uct_iface_query_func_t)ucs_empty_function_return_unsupported, + .iface_get_device_address = (uct_iface_get_device_address_func_t)ucs_empty_function_return_unsupported, + .iface_get_address = (uct_iface_get_address_func_t)ucs_empty_function_return_unsupported, + .iface_is_reachable = (uct_iface_is_reachable_func_t)ucs_empty_function_return_zero +}; + +UCS_CLASS_INIT_FUNC(uct_tcp_sockcm_t, uct_component_h component, + uct_worker_h worker, const uct_cm_config_t *config) +{ + uct_tcp_sockcm_config_t *cm_config = ucs_derived_of(config, + uct_tcp_sockcm_config_t); + + UCS_CLASS_CALL_SUPER_INIT(uct_cm_t, &uct_tcp_sockcm_ops, + &uct_tcp_sockcm_iface_ops, worker, component); + + self->priv_data_len = cm_config->priv_data_len - + sizeof(uct_tcp_sockcm_priv_data_hdr_t); + + ucs_list_head_init(&self->ep_list); + + ucs_debug("created tcp_sockcm %p", self); + + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(uct_tcp_sockcm_t) +{ + uct_tcp_sockcm_ep_t *ep, *tmp; + + UCS_ASYNC_BLOCK(self->super.iface.worker->async); + + ucs_list_for_each_safe(ep, tmp, &self->ep_list, list) { + uct_tcp_close_ep(ep); + } + + UCS_ASYNC_UNBLOCK(self->super.iface.worker->async); +} + +UCS_CLASS_DEFINE(uct_tcp_sockcm_t, uct_cm_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_sockcm_t, uct_cm_t, uct_component_h, + uct_worker_h, const uct_cm_config_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_tcp_sockcm_t, uct_cm_t); diff --git a/src/uct/tcp/tcp_sockcm.h b/src/uct/tcp/tcp_sockcm.h new file mode 100644 index 0000000..c450196 --- /dev/null +++ b/src/uct/tcp/tcp_sockcm.h @@ -0,0 +1,41 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include + + +typedef struct uct_tcp_sockcm_ep uct_tcp_sockcm_ep_t; + + +/** + * A TCP connection manager + */ +typedef struct uct_tcp_sockcm { + uct_cm_t super; + size_t priv_data_len; + ucs_list_link_t ep_list; /** List of endpoints */ +} uct_tcp_sockcm_t; + +/** + * TCP SOCKCM configuration. + */ +typedef struct uct_tcp_sockcm_config { + uct_cm_config_t super; + size_t priv_data_len; +} uct_tcp_sockcm_config_t; + + +typedef struct uct_tcp_sockcm_priv_data_hdr { + size_t length; /** Length of the private data */ +} uct_tcp_sockcm_priv_data_hdr_t; + +extern ucs_config_field_t uct_tcp_sockcm_config_table[]; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_tcp_sockcm_t, uct_cm_t, uct_component_h, + uct_worker_h, const uct_cm_config_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_tcp_sockcm_t, uct_cm_t); + +void uct_tcp_sa_data_handler(int fd, void *arg); diff --git a/src/uct/tcp/tcp_sockcm_ep.c b/src/uct/tcp/tcp_sockcm_ep.c new file mode 100644 index 0000000..2213e61 --- /dev/null +++ b/src/uct/tcp/tcp_sockcm_ep.c @@ -0,0 +1,415 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "tcp_sockcm_ep.h" +#include +#include +#include +#include + + +static UCS_F_ALWAYS_INLINE +uct_tcp_sockcm_t *uct_tcp_sockcm_ep_get_cm(uct_tcp_sockcm_ep_t *cep) +{ + /* return the tcp sockcm connection manager this ep is using */ + return ucs_container_of(cep->super.super.super.iface, uct_tcp_sockcm_t, + super.iface); +} + +ucs_status_t uct_tcp_sockcm_ep_disconnect(uct_ep_h ep, unsigned flags) +{ + return UCS_ERR_NOT_IMPLEMENTED; +} + +static void uct_tcp_sockcm_ep_init_comm_ctx(uct_tcp_sockcm_ep_t *cep) +{ + cep->comm_ctx.offset = 0; + cep->comm_ctx.length = 0; +} + +static void uct_tcp_sockcm_ep_handle_disconnect(uct_tcp_sockcm_ep_t *cep, + ucs_status_t status) +{ + uct_cm_remote_data_t remote_data; + + /* remote peer disconnected */ + ucs_debug("ep %p (fd=%d): remote peer disconnected", cep, cep->fd); + uct_tcp_sockcm_ep_init_comm_ctx(cep); + + ucs_assert(status != UCS_OK); + if (cep->state & UCT_TCP_SOCKCM_EP_ON_SERVER) { + uct_cm_ep_server_connect_cb(&cep->super, status); + } else { + ucs_assert(cep->state & UCT_TCP_SOCKCM_EP_ON_CLIENT); + remote_data.field_mask = 0; + uct_cm_ep_client_connect_cb(&cep->super, &remote_data, status); + } + + /* TODO handle disconnect if the ep already invoked the connect_cb */ +} + +static int uct_tcp_sockcm_ep_is_tx_rx_done(uct_tcp_sockcm_ep_t *cep) +{ + ucs_assert((cep->comm_ctx.length != 0)); + return (cep->comm_ctx.offset == cep->comm_ctx.length); +} + +ucs_status_t uct_tcp_sockcm_ep_progress_send(uct_tcp_sockcm_ep_t *cep) +{ + ucs_status_t status; + size_t sent_length; + + ucs_assert(ucs_test_all_flags(cep->state, UCT_TCP_SOCKCM_EP_ON_CLIENT | + UCT_TCP_SOCKCM_EP_CONNECTED)); + ucs_assert(cep->comm_ctx.offset < cep->comm_ctx.length); + + sent_length = cep->comm_ctx.length - cep->comm_ctx.offset; + + status = ucs_socket_send_nb(cep->fd, + UCS_PTR_BYTE_OFFSET(cep->comm_ctx.buf, + cep->comm_ctx.offset), + &sent_length, NULL, NULL); + if ((status != UCS_OK) && (status != UCS_ERR_NO_PROGRESS)) { + if (status == UCS_ERR_NOT_CONNECTED) { + uct_tcp_sockcm_ep_handle_disconnect(cep, status); + } else { + ucs_error("ep %p failed to send client's data (len=%zu offset=%zu)", + cep, cep->comm_ctx.length, cep->comm_ctx.offset); + } + return status; + } + + cep->comm_ctx.offset += sent_length; + ucs_assert(cep->comm_ctx.offset <= cep->comm_ctx.length); + cep->state |= UCT_TCP_SOCKCM_EP_SENDING; + + if (uct_tcp_sockcm_ep_is_tx_rx_done(cep)) { + cep->state |= UCT_TCP_SOCKCM_EP_DATA_SENT; + uct_tcp_sockcm_ep_init_comm_ctx(cep); + + /* wait for a reply from the peer */ + status = ucs_async_modify_handler(cep->fd, UCS_EVENT_SET_EVREAD); + if (status != UCS_OK) { + ucs_error("failed to modify %d event handler to " + "UCS_EVENT_SET_EVREAD: %s", cep->fd, + ucs_status_string(status)); + } + } + + return UCS_OK; +} + +ucs_status_t uct_tcp_sockcm_ep_send_priv_data(uct_tcp_sockcm_ep_t *cep) +{ + char ifname_str[UCT_DEVICE_NAME_MAX]; + uct_tcp_sockcm_priv_data_hdr_t *hdr; + ssize_t priv_data_ret; + ucs_status_t status; + + /* get interface name associated with the connected client fd */ + status = ucs_sockaddr_get_ifname(cep->fd, ifname_str, sizeof(ifname_str)); + if (UCS_OK != status) { + goto out; + } + + hdr = (uct_tcp_sockcm_priv_data_hdr_t*)cep->comm_ctx.buf; + priv_data_ret = cep->super.priv_pack_cb(cep->super.user_data, ifname_str, + hdr + 1); + if (priv_data_ret < 0) { + ucs_assert(priv_data_ret > UCS_ERR_LAST); + status = (ucs_status_t)priv_data_ret; + ucs_error("tcp_sockcm private data pack function failed with error: %s", + ucs_status_string(status)); + goto out; + } else if (priv_data_ret > (uct_tcp_sockcm_ep_get_cm(cep)->priv_data_len)) { + status = UCS_ERR_EXCEEDS_LIMIT; + ucs_error("tcp_sockcm private data pack function returned %zd " + "(max: %zu)", priv_data_ret, + uct_tcp_sockcm_ep_get_cm(cep)->priv_data_len); + goto out; + } + + hdr->length = priv_data_ret; + cep->comm_ctx.length = sizeof(*hdr) + hdr->length; + + status = uct_tcp_sockcm_ep_progress_send(cep); + +out: + return status; +} + +static ucs_status_t uct_tcp_sockcm_ep_server_invoke_conn_req_cb(uct_tcp_sockcm_ep_t *cep) +{ + uct_tcp_sockcm_priv_data_hdr_t *hdr = (uct_tcp_sockcm_priv_data_hdr_t *) + cep->comm_ctx.buf; + struct sockaddr_storage remote_dev_addr = {0}; + socklen_t remote_dev_addr_len; + char peer_str[UCS_SOCKADDR_STRING_LEN]; + char ifname_str[UCT_DEVICE_NAME_MAX]; + uct_cm_remote_data_t remote_data; + ucs_status_t status; + + /* get the local interface name associated with the connected fd */ + status = ucs_sockaddr_get_ifname(cep->fd, ifname_str, UCT_DEVICE_NAME_MAX); + if (UCS_OK != status) { + return status; + } + + /* get the device address of the remote peer associated with the connected fd */ + status = ucs_socket_getpeername(cep->fd, &remote_dev_addr, &remote_dev_addr_len); + if (status != UCS_OK) { + return status; + } + + remote_data.field_mask = UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR | + UCT_CM_REMOTE_DATA_FIELD_DEV_ADDR_LENGTH | + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA | + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA_LENGTH; + remote_data.dev_addr = (uct_device_addr_t *)&remote_dev_addr; + remote_data.dev_addr_length = remote_dev_addr_len; + remote_data.conn_priv_data = hdr + 1; + remote_data.conn_priv_data_length = hdr->length; + + ucs_debug("fd %d: remote_data: (field_mask=%zu) dev_addr: %s (length=%zu), " + "conn_priv_data_length=%zu", cep->fd, remote_data.field_mask, + ucs_sockaddr_str((const struct sockaddr*)remote_data.dev_addr, + peer_str, UCS_SOCKADDR_STRING_LEN), + remote_data.dev_addr_length, remote_data.conn_priv_data_length); + + /* the endpoint, passed as the conn_request to the callback, will be passed + * to uct_ep_create() which will be invoked by the user and therefore moving + * over to its responsibility. */ + ucs_list_del(&cep->list); + cep->listener->conn_request_cb(&cep->listener->super, cep->listener->user_data, + ifname_str, cep, &remote_data); + + return UCS_OK; +} + +ucs_status_t uct_tcp_sockcm_ep_handle_data_received(uct_tcp_sockcm_ep_t *cep) +{ + ucs_status_t status; + + cep->state |= UCT_TCP_SOCKCM_EP_DATA_RECEIVED; + uct_tcp_sockcm_ep_init_comm_ctx(cep); + + status = uct_tcp_sockcm_ep_server_invoke_conn_req_cb(cep); + if (status != UCS_OK) { + goto out; + } + + status = ucs_async_modify_handler(cep->fd, UCS_EVENT_SET_EVWRITE); + if (status != UCS_OK) { + ucs_error("failed to modify %d event handler to UCS_EVENT_SET_EVWRITE: %s", + cep->fd, ucs_status_string(status)); + goto out; + } + +out: + return status; +} + +static ucs_status_t uct_tcp_sockcm_ep_recv_nb(uct_tcp_sockcm_ep_t *cep) +{ + size_t recv_length; + ucs_status_t status; + + recv_length = uct_tcp_sockcm_ep_get_cm(cep)->priv_data_len + + sizeof(uct_tcp_sockcm_priv_data_hdr_t) - cep->comm_ctx.offset; + status = ucs_socket_recv_nb(cep->fd, UCS_PTR_BYTE_OFFSET(cep->comm_ctx.buf, + cep->comm_ctx.offset), + &recv_length, NULL, NULL); + if ((status != UCS_OK) && (status != UCS_ERR_NO_PROGRESS)) { + if (status == UCS_ERR_NOT_CONNECTED) { + uct_tcp_sockcm_ep_handle_disconnect(cep, status); + } else { + ucs_error("ep %p (fd=%d) failed to recv client's data (offset=%zu)", + cep, cep->fd, cep->comm_ctx.offset); + } + return status; + } + + cep->comm_ctx.offset += recv_length; + ucs_assertv((cep->comm_ctx.length ? + cep->comm_ctx.offset <= cep->comm_ctx.length : 1), "%zu > %zu", + cep->comm_ctx.offset, cep->comm_ctx.length); + return UCS_OK; +} + +ucs_status_t uct_tcp_sockcm_ep_progress_recv(uct_tcp_sockcm_ep_t *cep) +{ + ucs_status_t status; + + status = uct_tcp_sockcm_ep_recv_nb(cep); + if (status != UCS_OK) { + return status; + } + + if (uct_tcp_sockcm_ep_is_tx_rx_done(cep)) { + status = uct_tcp_sockcm_ep_handle_data_received(cep); + } + + return status; +} + +ucs_status_t uct_tcp_sockcm_ep_recv(uct_tcp_sockcm_ep_t *cep) +{ + uct_tcp_sockcm_priv_data_hdr_t *hdr; + ucs_status_t status; + + status = uct_tcp_sockcm_ep_recv_nb(cep); + if (status != UCS_OK) { + goto out; + } + + if (cep->comm_ctx.offset < sizeof(*hdr)) { + goto out; + } + + hdr = (uct_tcp_sockcm_priv_data_hdr_t *)cep->comm_ctx.buf; + cep->comm_ctx.length = sizeof(*hdr) + hdr->length; + ucs_assertv(cep->comm_ctx.offset <= cep->comm_ctx.length , "%zu > %zu", + cep->comm_ctx.offset, cep->comm_ctx.length); + + cep->state |= UCT_TCP_SOCKCM_EP_RECEIVING; + + if (uct_tcp_sockcm_ep_is_tx_rx_done(cep)) { + status = uct_tcp_sockcm_ep_handle_data_received(cep); + } + +out: + return status; +} + +static ucs_status_t uct_tcp_sockcm_ep_server_init(uct_tcp_sockcm_ep_t *cep, + const uct_ep_params_t *params) +{ + cep->state |= UCT_TCP_SOCKCM_EP_ON_SERVER; + cep->super.server.connect_cb = params->sockaddr_connect_cb.server; + return UCS_OK; +} + +static ucs_status_t uct_tcp_sockcm_ep_client_init(uct_tcp_sockcm_ep_t *cep, + const uct_ep_params_t *params) +{ + uct_tcp_sockcm_t *tcp_sockcm = uct_tcp_sockcm_ep_get_cm(cep); + char ip_port_str[UCS_SOCKADDR_STRING_LEN]; + const struct sockaddr *server_addr; + ucs_async_context_t *async_ctx; + ucs_status_t status; + + cep->state |= UCT_TCP_SOCKCM_EP_ON_CLIENT; + cep->super.client.connect_cb = params->sockaddr_connect_cb.client; + + server_addr = params->sockaddr->addr; + status = ucs_socket_create(server_addr->sa_family, SOCK_STREAM, &cep->fd); + if (status != UCS_OK) { + goto err; + } + + /* Set the fd to non-blocking mode. (so that connect() won't be blocking) */ + status = ucs_sys_fcntl_modfl(cep->fd, O_NONBLOCK, 0); + if (status != UCS_OK) { + status = UCS_ERR_IO_ERROR; + goto err_close_socket; + } + + /* try to connect to the server */ + status = ucs_socket_connect(cep->fd, server_addr); + if (UCS_STATUS_IS_ERR(status)) { + goto err_close_socket; + } + ucs_assert((status == UCS_OK) || (status == UCS_INPROGRESS)); + + async_ctx = tcp_sockcm->super.iface.worker->async; + status = ucs_async_set_event_handler(async_ctx->mode, cep->fd, + UCS_EVENT_SET_EVWRITE, + uct_tcp_sa_data_handler, cep, + async_ctx); + if (status != UCS_OK) { + goto err_close_socket; + } + + ucs_debug("created a TCP SOCKCM endpoint (fd=%d) on tcp cm %p, " + "remote addr: %s", cep->fd, tcp_sockcm, + ucs_sockaddr_str(server_addr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); + + return status; + +err_close_socket: + close(cep->fd); +err: + return status; +} + +UCS_CLASS_INIT_FUNC(uct_tcp_sockcm_ep_t, const uct_ep_params_t *params) +{ + ucs_status_t status; + + UCS_CLASS_CALL_SUPER_INIT(uct_cm_base_ep_t, params); + + uct_tcp_sockcm_ep_init_comm_ctx(self); + self->state = 0; + self->comm_ctx.buf = ucs_malloc(uct_tcp_sockcm_ep_get_cm(self)->priv_data_len + + sizeof(uct_tcp_sockcm_priv_data_hdr_t), + "tcp_sockcm priv data"); + if (self->comm_ctx.buf == NULL) { + ucs_error("failed to allocate memory for the ep's send/recv buf"); + return UCS_ERR_NO_MEMORY; + } + + if (params->field_mask & UCT_EP_PARAM_FIELD_SOCKADDR) { + status = uct_tcp_sockcm_ep_client_init(self, params); + } else { + status = uct_tcp_sockcm_ep_server_init(self, params); + } + + if (status == UCS_OK) { + ucs_debug("created an endpoint on tcp_sockcm %p id: %d state: %d", + uct_tcp_sockcm_ep_get_cm(self), self->fd, self->state); + } + + return status; +} + +ucs_status_t uct_tcp_sockcm_ep_create(const uct_ep_params_t *params, uct_ep_h *ep_p) +{ + uct_tcp_sockcm_ep_t *tcp_ep; + + if (params->field_mask & UCT_EP_PARAM_FIELD_SOCKADDR) { + /* create a new endpoint for the client side */ + return UCS_CLASS_NEW(uct_tcp_sockcm_ep_t, ep_p, params); + } else if (params->field_mask & UCT_EP_PARAM_FIELD_CONN_REQUEST) { + /* the server's endpoint was already created by the listener, return it */ + tcp_ep = (uct_tcp_sockcm_ep_t*)(params->conn_request); + *ep_p = &tcp_ep->super.super.super; + return UCS_OK; + } else { + ucs_error("either UCT_EP_PARAM_FIELD_SOCKADDR or UCT_EP_PARAM_FIELD_CONN_REQUEST " + "has to be provided"); + return UCS_ERR_INVALID_PARAM; + } +} + +UCS_CLASS_CLEANUP_FUNC(uct_tcp_sockcm_ep_t) +{ + uct_tcp_sockcm_t *tcp_sockcm = uct_tcp_sockcm_ep_get_cm(self); + + UCS_ASYNC_BLOCK(tcp_sockcm->super.iface.worker->async); + + ucs_free(self->comm_ctx.buf); + + ucs_async_remove_handler(self->fd, 1); + + if (self->fd != -1) { + close(self->fd); + } + UCS_ASYNC_UNBLOCK(tcp_sockcm->super.iface.worker->async); +} + +UCS_CLASS_DEFINE(uct_tcp_sockcm_ep_t, uct_base_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_sockcm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_tcp_sockcm_ep_t, uct_ep_t); diff --git a/src/uct/tcp/tcp_sockcm_ep.h b/src/uct/tcp/tcp_sockcm_ep.h new file mode 100644 index 0000000..ed5e9a9 --- /dev/null +++ b/src/uct/tcp/tcp_sockcm_ep.h @@ -0,0 +1,72 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "tcp_listener.h" + + +typedef enum uct_tcp_sockcm_ep_state { + UCT_TCP_SOCKCM_EP_ON_SERVER = UCS_BIT(0), /* ep is on the server side */ + UCT_TCP_SOCKCM_EP_ON_CLIENT = UCS_BIT(1), /* ep is on the client side */ + UCT_TCP_SOCKCM_EP_CONNECTED = UCS_BIT(2), /* connect()/accept() + completed successfully */ + UCT_TCP_SOCKCM_EP_SENDING = UCS_BIT(3), /* ep is sending data */ + UCT_TCP_SOCKCM_EP_DATA_SENT = UCS_BIT(4), /* ep finished sending the data */ + UCT_TCP_SOCKCM_EP_RECEIVING = UCS_BIT(5), /* ep so receiving data */ + UCT_TCP_SOCKCM_EP_DATA_RECEIVED = UCS_BIT(6), /* ep finished receviing the data */ + + /* a connected ep is sending data */ + UCT_TCP_SOCKCM_EP_CONN_SENDING = UCT_TCP_SOCKCM_EP_CONNECTED | + UCT_TCP_SOCKCM_EP_SENDING, + + /* a connected ep completed the data sending */ + UCT_TCP_SOCKCM_EP_CONN_SENT = UCT_TCP_SOCKCM_EP_CONNECTED | + UCT_TCP_SOCKCM_EP_SENDING | + UCT_TCP_SOCKCM_EP_DATA_SENT, + + /* a connected ep is is receiving data */ + UCT_TCP_SOCKCM_EP_CONN_RECEIVING = UCT_TCP_SOCKCM_EP_CONNECTED | + UCT_TCP_SOCKCM_EP_RECEIVING, + + /* a connected ep completed the data receiving */ + UCT_TCP_SOCKCM_EP_CONN_RECEIVED = UCT_TCP_SOCKCM_EP_CONNECTED | + UCT_TCP_SOCKCM_EP_RECEIVING | + UCT_TCP_SOCKCM_EP_DATA_RECEIVED +} uct_tcp_sockcm_ep_state_t; + + +/** + * TCP SOCKCM endpoint that is opened on a connection manager + */ +struct uct_tcp_sockcm_ep { + uct_cm_base_ep_t super; + int fd; /* the fd of the socket on the ep */ + uint16_t state; /* ep state (uct_tcp_sockcm_ep_state_t) */ + uct_tcp_listener_t *listener; /* the listener the ep belongs to - used on the server side */ + ucs_list_link_t list; /* list item on the cm ep_list - used on the server side */ + struct { + void *buf; /* Data buffer to send/recv */ + size_t length; /* How much data to send/recv */ + size_t offset; /* Next offset to send/recv */ + } comm_ctx; +}; + +UCS_CLASS_DECLARE(uct_tcp_sockcm_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_NEW_FUNC(uct_tcp_sockcm_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_tcp_sockcm_ep_t, uct_ep_t); + +ucs_status_t uct_tcp_sockcm_ep_create(const uct_ep_params_t *params, uct_ep_h* ep_p); + +ucs_status_t uct_tcp_sockcm_ep_disconnect(uct_ep_h ep, unsigned flags); + +ucs_status_t uct_tcp_sockcm_ep_send_priv_data(uct_tcp_sockcm_ep_t *cep); + +ucs_status_t uct_tcp_sockcm_ep_progress_send(uct_tcp_sockcm_ep_t *cep); + +ucs_status_t uct_tcp_sockcm_ep_recv(uct_tcp_sockcm_ep_t *cep); + +ucs_status_t uct_tcp_sockcm_ep_progress_recv(uct_tcp_sockcm_ep_t *cep); + +size_t uct_tcp_sockcm_ep_get_priv_data_len(uct_tcp_sockcm_ep_t *cep); diff --git a/src/uct/ugni/Makefile.am b/src/uct/ugni/Makefile.am new file mode 100644 index 0000000..42993f6 --- /dev/null +++ b/src/uct/ugni/Makefile.am @@ -0,0 +1,43 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +if HAVE_CRAY_UGNI + +module_LTLIBRARIES = libuct_ugni.la +libuct_ugni_la_CPPFLAGS = $(BASE_CPPFLAGS) +libuct_ugni_la_CFLAGS = $(BASE_CFLAGS) $(CRAY_UGNI_CFLAGS) +libuct_ugni_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la +libuct_ugni_la_LDFLAGS = $(CRAY_UGNI_LIBS) -version-info $(SOVERSION) + +noinst_HEADERS = \ + base/ugni_def.h \ + base/ugni_types.h \ + base/ugni_md.h \ + base/ugni_device.h \ + base/ugni_iface.h \ + base/ugni_ep.h \ + rdma/ugni_rdma_iface.h \ + rdma/ugni_rdma_ep.h \ + udt/ugni_udt_iface.h \ + udt/ugni_udt_ep.h \ + smsg/ugni_smsg_iface.h \ + smsg/ugni_smsg_ep.h + +libuct_ugni_la_SOURCES = \ + base/ugni_md.c \ + base/ugni_device.c \ + base/ugni_iface.c \ + base/ugni_ep.c \ + rdma/ugni_rdma_iface.c \ + rdma/ugni_rdma_ep.c \ + udt/ugni_udt_iface.c \ + udt/ugni_udt_ep.c \ + smsg/ugni_smsg_iface.c \ + smsg/ugni_smsg_ep.c + +include $(top_srcdir)/config/module.am + +endif diff --git a/src/uct/ugni/Makefile.in b/src/uct/ugni/Makefile.in new file mode 100644 index 0000000..22c4d92 --- /dev/null +++ b/src/uct/ugni/Makefile.in @@ -0,0 +1,1058 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/uct/ugni +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +@HAVE_CRAY_UGNI_TRUE@libuct_ugni_la_DEPENDENCIES = \ +@HAVE_CRAY_UGNI_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_CRAY_UGNI_TRUE@ $(top_builddir)/src/uct/libuct.la +am__libuct_ugni_la_SOURCES_DIST = base/ugni_md.c base/ugni_device.c \ + base/ugni_iface.c base/ugni_ep.c rdma/ugni_rdma_iface.c \ + rdma/ugni_rdma_ep.c udt/ugni_udt_iface.c udt/ugni_udt_ep.c \ + smsg/ugni_smsg_iface.c smsg/ugni_smsg_ep.c +am__dirstamp = $(am__leading_dot)dirstamp +@HAVE_CRAY_UGNI_TRUE@am_libuct_ugni_la_OBJECTS = \ +@HAVE_CRAY_UGNI_TRUE@ base/libuct_ugni_la-ugni_md.lo \ +@HAVE_CRAY_UGNI_TRUE@ base/libuct_ugni_la-ugni_device.lo \ +@HAVE_CRAY_UGNI_TRUE@ base/libuct_ugni_la-ugni_iface.lo \ +@HAVE_CRAY_UGNI_TRUE@ base/libuct_ugni_la-ugni_ep.lo \ +@HAVE_CRAY_UGNI_TRUE@ rdma/libuct_ugni_la-ugni_rdma_iface.lo \ +@HAVE_CRAY_UGNI_TRUE@ rdma/libuct_ugni_la-ugni_rdma_ep.lo \ +@HAVE_CRAY_UGNI_TRUE@ udt/libuct_ugni_la-ugni_udt_iface.lo \ +@HAVE_CRAY_UGNI_TRUE@ udt/libuct_ugni_la-ugni_udt_ep.lo \ +@HAVE_CRAY_UGNI_TRUE@ smsg/libuct_ugni_la-ugni_smsg_iface.lo \ +@HAVE_CRAY_UGNI_TRUE@ smsg/libuct_ugni_la-ugni_smsg_ep.lo +libuct_ugni_la_OBJECTS = $(am_libuct_ugni_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libuct_ugni_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libuct_ugni_la_CFLAGS) $(CFLAGS) $(libuct_ugni_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_CRAY_UGNI_TRUE@am_libuct_ugni_la_rpath = -rpath $(moduledir) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = base/$(DEPDIR)/libuct_ugni_la-ugni_device.Plo \ + base/$(DEPDIR)/libuct_ugni_la-ugni_ep.Plo \ + base/$(DEPDIR)/libuct_ugni_la-ugni_iface.Plo \ + base/$(DEPDIR)/libuct_ugni_la-ugni_md.Plo \ + rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_ep.Plo \ + rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_iface.Plo \ + smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_ep.Plo \ + smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_iface.Plo \ + udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_ep.Plo \ + udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_iface.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libuct_ugni_la_SOURCES) +DIST_SOURCES = $(am__libuct_ugni_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = base/ugni_def.h base/ugni_types.h \ + base/ugni_md.h base/ugni_device.h base/ugni_iface.h \ + base/ugni_ep.h rdma/ugni_rdma_iface.h rdma/ugni_rdma_ep.h \ + udt/ugni_udt_iface.h udt/ugni_udt_ep.h smsg/ugni_smsg_iface.h \ + smsg/ugni_smsg_ep.h +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_CRAY_UGNI_TRUE@module_LTLIBRARIES = libuct_ugni.la +@HAVE_CRAY_UGNI_TRUE@libuct_ugni_la_CPPFLAGS = $(BASE_CPPFLAGS) +@HAVE_CRAY_UGNI_TRUE@libuct_ugni_la_CFLAGS = $(BASE_CFLAGS) $(CRAY_UGNI_CFLAGS) +@HAVE_CRAY_UGNI_TRUE@libuct_ugni_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_CRAY_UGNI_TRUE@ $(top_builddir)/src/uct/libuct.la + +@HAVE_CRAY_UGNI_TRUE@libuct_ugni_la_LDFLAGS = $(CRAY_UGNI_LIBS) -version-info $(SOVERSION) +@HAVE_CRAY_UGNI_TRUE@noinst_HEADERS = \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_def.h \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_types.h \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_md.h \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_device.h \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_iface.h \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_ep.h \ +@HAVE_CRAY_UGNI_TRUE@ rdma/ugni_rdma_iface.h \ +@HAVE_CRAY_UGNI_TRUE@ rdma/ugni_rdma_ep.h \ +@HAVE_CRAY_UGNI_TRUE@ udt/ugni_udt_iface.h \ +@HAVE_CRAY_UGNI_TRUE@ udt/ugni_udt_ep.h \ +@HAVE_CRAY_UGNI_TRUE@ smsg/ugni_smsg_iface.h \ +@HAVE_CRAY_UGNI_TRUE@ smsg/ugni_smsg_ep.h + +@HAVE_CRAY_UGNI_TRUE@libuct_ugni_la_SOURCES = \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_md.c \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_device.c \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_iface.c \ +@HAVE_CRAY_UGNI_TRUE@ base/ugni_ep.c \ +@HAVE_CRAY_UGNI_TRUE@ rdma/ugni_rdma_iface.c \ +@HAVE_CRAY_UGNI_TRUE@ rdma/ugni_rdma_ep.c \ +@HAVE_CRAY_UGNI_TRUE@ udt/ugni_udt_iface.c \ +@HAVE_CRAY_UGNI_TRUE@ udt/ugni_udt_ep.c \ +@HAVE_CRAY_UGNI_TRUE@ smsg/ugni_smsg_iface.c \ +@HAVE_CRAY_UGNI_TRUE@ smsg/ugni_smsg_ep.c + + +# Automake silent rules +@HAVE_CRAY_UGNI_TRUE@AM_V_LN = $(AM_V_LN_@AM_V@) +@HAVE_CRAY_UGNI_TRUE@AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +@HAVE_CRAY_UGNI_TRUE@AM_V_LN_0 = echo " LN " +@HAVE_CRAY_UGNI_TRUE@AM_V_LN_1 = true +@HAVE_CRAY_UGNI_TRUE@local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/uct/ugni/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/uct/ugni/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +base/$(am__dirstamp): + @$(MKDIR_P) base + @: > base/$(am__dirstamp) +base/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) base/$(DEPDIR) + @: > base/$(DEPDIR)/$(am__dirstamp) +base/libuct_ugni_la-ugni_md.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_ugni_la-ugni_device.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_ugni_la-ugni_iface.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +base/libuct_ugni_la-ugni_ep.lo: base/$(am__dirstamp) \ + base/$(DEPDIR)/$(am__dirstamp) +rdma/$(am__dirstamp): + @$(MKDIR_P) rdma + @: > rdma/$(am__dirstamp) +rdma/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) rdma/$(DEPDIR) + @: > rdma/$(DEPDIR)/$(am__dirstamp) +rdma/libuct_ugni_la-ugni_rdma_iface.lo: rdma/$(am__dirstamp) \ + rdma/$(DEPDIR)/$(am__dirstamp) +rdma/libuct_ugni_la-ugni_rdma_ep.lo: rdma/$(am__dirstamp) \ + rdma/$(DEPDIR)/$(am__dirstamp) +udt/$(am__dirstamp): + @$(MKDIR_P) udt + @: > udt/$(am__dirstamp) +udt/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) udt/$(DEPDIR) + @: > udt/$(DEPDIR)/$(am__dirstamp) +udt/libuct_ugni_la-ugni_udt_iface.lo: udt/$(am__dirstamp) \ + udt/$(DEPDIR)/$(am__dirstamp) +udt/libuct_ugni_la-ugni_udt_ep.lo: udt/$(am__dirstamp) \ + udt/$(DEPDIR)/$(am__dirstamp) +smsg/$(am__dirstamp): + @$(MKDIR_P) smsg + @: > smsg/$(am__dirstamp) +smsg/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) smsg/$(DEPDIR) + @: > smsg/$(DEPDIR)/$(am__dirstamp) +smsg/libuct_ugni_la-ugni_smsg_iface.lo: smsg/$(am__dirstamp) \ + smsg/$(DEPDIR)/$(am__dirstamp) +smsg/libuct_ugni_la-ugni_smsg_ep.lo: smsg/$(am__dirstamp) \ + smsg/$(DEPDIR)/$(am__dirstamp) + +libuct_ugni.la: $(libuct_ugni_la_OBJECTS) $(libuct_ugni_la_DEPENDENCIES) $(EXTRA_libuct_ugni_la_DEPENDENCIES) + $(AM_V_CCLD)$(libuct_ugni_la_LINK) $(am_libuct_ugni_la_rpath) $(libuct_ugni_la_OBJECTS) $(libuct_ugni_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f base/*.$(OBJEXT) + -rm -f base/*.lo + -rm -f rdma/*.$(OBJEXT) + -rm -f rdma/*.lo + -rm -f smsg/*.$(OBJEXT) + -rm -f smsg/*.lo + -rm -f udt/*.$(OBJEXT) + -rm -f udt/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_ugni_la-ugni_device.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_ugni_la-ugni_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_ugni_la-ugni_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@base/$(DEPDIR)/libuct_ugni_la-ugni_md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_iface.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_ep.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_iface.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +base/libuct_ugni_la-ugni_md.lo: base/ugni_md.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT base/libuct_ugni_la-ugni_md.lo -MD -MP -MF base/$(DEPDIR)/libuct_ugni_la-ugni_md.Tpo -c -o base/libuct_ugni_la-ugni_md.lo `test -f 'base/ugni_md.c' || echo '$(srcdir)/'`base/ugni_md.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_ugni_la-ugni_md.Tpo base/$(DEPDIR)/libuct_ugni_la-ugni_md.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/ugni_md.c' object='base/libuct_ugni_la-ugni_md.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o base/libuct_ugni_la-ugni_md.lo `test -f 'base/ugni_md.c' || echo '$(srcdir)/'`base/ugni_md.c + +base/libuct_ugni_la-ugni_device.lo: base/ugni_device.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT base/libuct_ugni_la-ugni_device.lo -MD -MP -MF base/$(DEPDIR)/libuct_ugni_la-ugni_device.Tpo -c -o base/libuct_ugni_la-ugni_device.lo `test -f 'base/ugni_device.c' || echo '$(srcdir)/'`base/ugni_device.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_ugni_la-ugni_device.Tpo base/$(DEPDIR)/libuct_ugni_la-ugni_device.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/ugni_device.c' object='base/libuct_ugni_la-ugni_device.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o base/libuct_ugni_la-ugni_device.lo `test -f 'base/ugni_device.c' || echo '$(srcdir)/'`base/ugni_device.c + +base/libuct_ugni_la-ugni_iface.lo: base/ugni_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT base/libuct_ugni_la-ugni_iface.lo -MD -MP -MF base/$(DEPDIR)/libuct_ugni_la-ugni_iface.Tpo -c -o base/libuct_ugni_la-ugni_iface.lo `test -f 'base/ugni_iface.c' || echo '$(srcdir)/'`base/ugni_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_ugni_la-ugni_iface.Tpo base/$(DEPDIR)/libuct_ugni_la-ugni_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/ugni_iface.c' object='base/libuct_ugni_la-ugni_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o base/libuct_ugni_la-ugni_iface.lo `test -f 'base/ugni_iface.c' || echo '$(srcdir)/'`base/ugni_iface.c + +base/libuct_ugni_la-ugni_ep.lo: base/ugni_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT base/libuct_ugni_la-ugni_ep.lo -MD -MP -MF base/$(DEPDIR)/libuct_ugni_la-ugni_ep.Tpo -c -o base/libuct_ugni_la-ugni_ep.lo `test -f 'base/ugni_ep.c' || echo '$(srcdir)/'`base/ugni_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) base/$(DEPDIR)/libuct_ugni_la-ugni_ep.Tpo base/$(DEPDIR)/libuct_ugni_la-ugni_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='base/ugni_ep.c' object='base/libuct_ugni_la-ugni_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o base/libuct_ugni_la-ugni_ep.lo `test -f 'base/ugni_ep.c' || echo '$(srcdir)/'`base/ugni_ep.c + +rdma/libuct_ugni_la-ugni_rdma_iface.lo: rdma/ugni_rdma_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT rdma/libuct_ugni_la-ugni_rdma_iface.lo -MD -MP -MF rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_iface.Tpo -c -o rdma/libuct_ugni_la-ugni_rdma_iface.lo `test -f 'rdma/ugni_rdma_iface.c' || echo '$(srcdir)/'`rdma/ugni_rdma_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_iface.Tpo rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdma/ugni_rdma_iface.c' object='rdma/libuct_ugni_la-ugni_rdma_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o rdma/libuct_ugni_la-ugni_rdma_iface.lo `test -f 'rdma/ugni_rdma_iface.c' || echo '$(srcdir)/'`rdma/ugni_rdma_iface.c + +rdma/libuct_ugni_la-ugni_rdma_ep.lo: rdma/ugni_rdma_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT rdma/libuct_ugni_la-ugni_rdma_ep.lo -MD -MP -MF rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_ep.Tpo -c -o rdma/libuct_ugni_la-ugni_rdma_ep.lo `test -f 'rdma/ugni_rdma_ep.c' || echo '$(srcdir)/'`rdma/ugni_rdma_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_ep.Tpo rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='rdma/ugni_rdma_ep.c' object='rdma/libuct_ugni_la-ugni_rdma_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o rdma/libuct_ugni_la-ugni_rdma_ep.lo `test -f 'rdma/ugni_rdma_ep.c' || echo '$(srcdir)/'`rdma/ugni_rdma_ep.c + +udt/libuct_ugni_la-ugni_udt_iface.lo: udt/ugni_udt_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT udt/libuct_ugni_la-ugni_udt_iface.lo -MD -MP -MF udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_iface.Tpo -c -o udt/libuct_ugni_la-ugni_udt_iface.lo `test -f 'udt/ugni_udt_iface.c' || echo '$(srcdir)/'`udt/ugni_udt_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_iface.Tpo udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='udt/ugni_udt_iface.c' object='udt/libuct_ugni_la-ugni_udt_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o udt/libuct_ugni_la-ugni_udt_iface.lo `test -f 'udt/ugni_udt_iface.c' || echo '$(srcdir)/'`udt/ugni_udt_iface.c + +udt/libuct_ugni_la-ugni_udt_ep.lo: udt/ugni_udt_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT udt/libuct_ugni_la-ugni_udt_ep.lo -MD -MP -MF udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_ep.Tpo -c -o udt/libuct_ugni_la-ugni_udt_ep.lo `test -f 'udt/ugni_udt_ep.c' || echo '$(srcdir)/'`udt/ugni_udt_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_ep.Tpo udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='udt/ugni_udt_ep.c' object='udt/libuct_ugni_la-ugni_udt_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o udt/libuct_ugni_la-ugni_udt_ep.lo `test -f 'udt/ugni_udt_ep.c' || echo '$(srcdir)/'`udt/ugni_udt_ep.c + +smsg/libuct_ugni_la-ugni_smsg_iface.lo: smsg/ugni_smsg_iface.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT smsg/libuct_ugni_la-ugni_smsg_iface.lo -MD -MP -MF smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_iface.Tpo -c -o smsg/libuct_ugni_la-ugni_smsg_iface.lo `test -f 'smsg/ugni_smsg_iface.c' || echo '$(srcdir)/'`smsg/ugni_smsg_iface.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_iface.Tpo smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_iface.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smsg/ugni_smsg_iface.c' object='smsg/libuct_ugni_la-ugni_smsg_iface.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o smsg/libuct_ugni_la-ugni_smsg_iface.lo `test -f 'smsg/ugni_smsg_iface.c' || echo '$(srcdir)/'`smsg/ugni_smsg_iface.c + +smsg/libuct_ugni_la-ugni_smsg_ep.lo: smsg/ugni_smsg_ep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -MT smsg/libuct_ugni_la-ugni_smsg_ep.lo -MD -MP -MF smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_ep.Tpo -c -o smsg/libuct_ugni_la-ugni_smsg_ep.lo `test -f 'smsg/ugni_smsg_ep.c' || echo '$(srcdir)/'`smsg/ugni_smsg_ep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_ep.Tpo smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_ep.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smsg/ugni_smsg_ep.c' object='smsg/libuct_ugni_la-ugni_smsg_ep.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libuct_ugni_la_CPPFLAGS) $(CPPFLAGS) $(libuct_ugni_la_CFLAGS) $(CFLAGS) -c -o smsg/libuct_ugni_la-ugni_smsg_ep.lo `test -f 'smsg/ugni_smsg_ep.c' || echo '$(srcdir)/'`smsg/ugni_smsg_ep.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf base/.libs base/_libs + -rm -rf rdma/.libs rdma/_libs + -rm -rf smsg/.libs smsg/_libs + -rm -rf udt/.libs udt/_libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +@HAVE_CRAY_UGNI_FALSE@all-local: +all-am: Makefile $(LTLIBRARIES) $(HEADERS) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f base/$(DEPDIR)/$(am__dirstamp) + -rm -f base/$(am__dirstamp) + -rm -f rdma/$(DEPDIR)/$(am__dirstamp) + -rm -f rdma/$(am__dirstamp) + -rm -f smsg/$(DEPDIR)/$(am__dirstamp) + -rm -f smsg/$(am__dirstamp) + -rm -f udt/$(DEPDIR)/$(am__dirstamp) + -rm -f udt/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f base/$(DEPDIR)/libuct_ugni_la-ugni_device.Plo + -rm -f base/$(DEPDIR)/libuct_ugni_la-ugni_ep.Plo + -rm -f base/$(DEPDIR)/libuct_ugni_la-ugni_iface.Plo + -rm -f base/$(DEPDIR)/libuct_ugni_la-ugni_md.Plo + -rm -f rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_ep.Plo + -rm -f rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_iface.Plo + -rm -f smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_ep.Plo + -rm -f smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_iface.Plo + -rm -f udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_ep.Plo + -rm -f udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_iface.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f base/$(DEPDIR)/libuct_ugni_la-ugni_device.Plo + -rm -f base/$(DEPDIR)/libuct_ugni_la-ugni_ep.Plo + -rm -f base/$(DEPDIR)/libuct_ugni_la-ugni_iface.Plo + -rm -f base/$(DEPDIR)/libuct_ugni_la-ugni_md.Plo + -rm -f rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_ep.Plo + -rm -f rdma/$(DEPDIR)/libuct_ugni_la-ugni_rdma_iface.Plo + -rm -f smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_ep.Plo + -rm -f smsg/$(DEPDIR)/libuct_ugni_la-ugni_smsg_iface.Plo + -rm -f udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_ep.Plo + -rm -f udt/$(DEPDIR)/libuct_ugni_la-ugni_udt_iface.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +@HAVE_CRAY_UGNI_TRUE@all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +@HAVE_CRAY_UGNI_TRUE@$(local_la_modules): $(module_LTLIBRARIES) +@HAVE_CRAY_UGNI_TRUE@ $(AM_V_at)$(MKDIR_P) $(localmoduledir) +@HAVE_CRAY_UGNI_TRUE@ $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CRAY_UGNI_TRUE@ (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ +@HAVE_CRAY_UGNI_TRUE@ done +@HAVE_CRAY_UGNI_TRUE@ @for lib in *.la $(objdir)/*$(shrext)*; do \ +@HAVE_CRAY_UGNI_TRUE@ $(AM_V_LN) $$lib; \ +@HAVE_CRAY_UGNI_TRUE@ done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/uct/ugni/base/ugni_def.h b/src/uct/ugni/base/ugni_def.h new file mode 100644 index 0000000..4edab3b --- /dev/null +++ b/src/uct/ugni/base/ugni_def.h @@ -0,0 +1,77 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2017. ALL RIGHTS RESERVED. + * Copyright (c) Los Alamos National Security, LLC. 2018. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_DEF_H +#define UCT_UGNI_DEF_H + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +#define UCT_UGNI_MD_NAME "ugni" +#define UCT_UGNI_HASH_SIZE 256 +#define UCT_UGNI_MAX_DEVICES 2 +#define UCT_UGNI_LOCAL_CQ 8192 +#define UCT_UGNI_RKEY_MAGIC 0xdeadbeefLL +#define UCT_UGNI_MAX_TYPE_NAME 10 +#define LEN_64 (sizeof(uint64_t)) +#define LEN_32 (sizeof(uint32_t)) +#define UGNI_GET_ALIGN 4 +#define UCT_UGNI_INIT_FLUSH 1 +#define UCT_UGNI_INIT_FLUSH_REQ 2 + +#define UCT_UGNI_ZERO_LENGTH_POST(len) \ +if (0 == len) { \ + ucs_trace_data("Zero length request: skip it"); \ + return UCS_OK; \ +} + +#define uct_ugni_enter_async(x) \ +do {\ + ucs_trace_async("Taking lock on worker %p", (x)->super.worker); \ + UCS_ASYNC_BLOCK((x)->super.worker->async); \ +} while(0) + +#define uct_ugni_leave_async(x) \ +do {\ + ucs_trace_async("Releasing lock on worker %p", (x)->super.worker); \ + UCS_ASYNC_UNBLOCK((x)->super.worker->async); \ +} while(0) + +#if ENABLE_MT +#define uct_ugni_check_lock_needed(_cdm) UCS_THREAD_MODE_MULTI == (_cdm)->thread_mode +#define uct_ugni_cdm_init_lock(_cdm) ucs_spinlock_init(&(_cdm)->lock) +#define uct_ugni_cdm_destroy_lock(_cdm) \ + do { \ + ucs_status_t status; \ + \ + status = ucs_spinlock_destroy(&(_cdm)->lock); \ + if (status != UCS_OK) {\ + ucs_warn("ucs_spinlock_destroy() failed (%d)", status); \ + } \ + } while(0) +#define uct_ugni_cdm_lock(_cdm) \ +if (uct_ugni_check_lock_needed(_cdm)) { \ + ucs_trace_async("Taking lock"); \ + ucs_spin_lock(&(_cdm)->lock); \ +} +#define uct_ugni_cdm_unlock(_cdm) \ +if (uct_ugni_check_lock_needed(_cdm)) { \ + ucs_trace_async("Releasing lock"); \ + ucs_spin_unlock(&(_cdm)->lock); \ +} +#else +#define uct_ugni_cdm_init_lock(x) UCS_OK +#define uct_ugni_cdm_destroy_lock(x) {} +#define uct_ugni_cdm_lock(x) +#define uct_ugni_cdm_unlock(x) +#define uct_ugni_check_lock_needed(x) 0 +#endif + +#endif diff --git a/src/uct/ugni/base/ugni_device.c b/src/uct/ugni/base/ugni_device.c new file mode 100644 index 0000000..95af351 --- /dev/null +++ b/src/uct/ugni/base/ugni_device.c @@ -0,0 +1,513 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * Copyright (c) Los Alamos National Security, LLC. 2018. ALL RIGHTS RESERVED. + * Copyright (c) Triad National Security, LLC. 2018. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "ugni_device.h" +#include "ugni_md.h" +#include "ugni_iface.h" +#include +#include +#include +#include + +/** + * @breif Static information about UGNI job + * + * This is static information about Cray's job. + * The information is static and does not change since job launch. + * Therefore, the information is only fetched once. + */ +typedef struct uct_ugni_job_info { + uint8_t ptag; /**< Protection tag */ + uint32_t cookie; /**< Unique identifier generated by the PMI system */ + int num_devices; /**< Number of devices */ + uct_ugni_device_t devices[UCT_UGNI_MAX_DEVICES]; /**< Array of devices */ + int initialized; /**< Info status */ +} uct_ugni_job_info_t; + +static uct_ugni_job_info_t job_info = { + .num_devices = -1, +}; + +uint32_t ugni_domain_counter = 0; + +void uct_ugni_device_get_resource(uct_ugni_device_t *dev, + uct_tl_device_resource_t *tl_device) +{ + ucs_snprintf_zero(tl_device->name, sizeof(tl_device->name), "%s", dev->fname); + tl_device->type = UCT_DEVICE_TYPE_NET; +} + +ucs_status_t uct_ugni_query_devices(uct_md_h md, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p) +{ + uct_tl_device_resource_t *resources; + int num_devices = job_info.num_devices; + uct_ugni_device_t *devs = job_info.devices; + int i; + ucs_status_t status = UCS_OK; + + resources = ucs_calloc(job_info.num_devices, sizeof(*resources), + "resource desc"); + if (NULL == resources) { + ucs_error("Failed to allocate memory"); + num_devices = 0; + resources = NULL; + status = UCS_ERR_NO_MEMORY; + goto error; + } + + for (i = 0; i < job_info.num_devices; i++) { + uct_ugni_device_get_resource(&devs[i], &resources[i]); + } + +error: + *num_tl_devices_p = num_devices; + *tl_devices_p = resources; + + return status; +} + +static ucs_status_t get_cookie(uint32_t *cookie) +{ + char *cookie_str; + char *cookie_token; + + cookie_str = getenv("PMI_GNI_COOKIE"); + if (NULL == cookie_str) { + ucs_error("getenv PMI_GNI_COOKIE failed"); + return UCS_ERR_IO_ERROR; + } + + cookie_token = strtok(cookie_str, ":"); + if (NULL == cookie_token) { + ucs_error("Failed to read PMI_GNI_COOKIE token"); + return UCS_ERR_IO_ERROR; + } + + *cookie = (uint32_t) atoi(cookie_token); + return UCS_OK; +} + +static ucs_status_t get_ptag(uint8_t *ptag) +{ + char *ptag_str; + char *ptag_token; + + ptag_str = getenv("PMI_GNI_PTAG"); + if (NULL == ptag_str) { + ucs_error("getenv PMI_GNI_PTAG failed"); + return UCS_ERR_IO_ERROR; + } + + ptag_token = strtok(ptag_str, ":"); + if (NULL == ptag_token) { + ucs_error("Failed to read PMI_GNI_PTAG token"); + return UCS_ERR_IO_ERROR; + } + + *ptag = (uint8_t) atoi(ptag_token); + return UCS_OK; +} + +static ucs_status_t uct_ugni_fetch_pmi() +{ + int spawned = 0, + rc; + + if(job_info.initialized) { + return UCS_OK; + } + + if (NULL == getenv ("PMI_GNI_COOKIE")) { + /* Fetch information from Cray's PMI if needed */ + rc = PMI_Init(&spawned); + if (PMI_SUCCESS != rc) { + ucs_error("PMI_Init failed, Error status: %d", rc); + return UCS_ERR_IO_ERROR; + } + ucs_debug("PMI spawned %d", spawned); + } + + rc = get_ptag(&job_info.ptag); + if (UCS_OK != rc) { + ucs_error("get_ptag failed, Error status: %d", rc); + return rc; + } + ucs_debug("PMI ptag %d", job_info.ptag); + + rc = get_cookie(&job_info.cookie); + if (UCS_OK != rc) { + ucs_error("get_cookie failed, Error status: %d", rc); + return rc; + } + ucs_debug("PMI cookie %d", job_info.cookie); + + /* Context and domain is activated */ + job_info.initialized = 1; + ucs_debug("UGNI job info was activated"); + return UCS_OK; +} + +static uct_ugni_job_info_t *uct_ugni_get_job_info() +{ + ucs_status_t status; + + status = uct_ugni_fetch_pmi(); + if (UCS_OK != status) { + ucs_error("Could not fetch PMI info."); + return NULL; + } + return &job_info; +} + +ucs_status_t init_device_list() +{ + ucs_status_t status = UCS_OK; + int i, num_active_devices; + int *dev_ids = NULL; + gni_return_t ugni_rc = GNI_RC_SUCCESS; + uct_ugni_job_info_t *inf = NULL; + + /* check if devices were already initilized */ + + inf = uct_ugni_get_job_info(); + if (NULL == inf) { + ucs_error("Unable to get Cray PMI info"); + status = UCS_ERR_IO_ERROR; + goto err_zero; + } + + if (-1 != inf->num_devices) { + ucs_debug("The device list is already initialized"); + status = UCS_OK; + goto err_zero; + } + + ugni_rc = GNI_GetNumLocalDevices(&inf->num_devices); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_GetNumLocalDevices failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + status = UCS_ERR_NO_DEVICE; + goto err_zero; + } + + if (0 == inf->num_devices) { + ucs_debug("UGNI No device found"); + status = UCS_OK; + goto err_zero; + } + + if (inf->num_devices >= UCT_UGNI_MAX_DEVICES) { + ucs_error("UGNI, number of discovered devices (%d) " \ + "is above the maximum supported devices (%d)", + inf->num_devices, UCT_UGNI_MAX_DEVICES); + status = UCS_ERR_UNSUPPORTED; + goto err_zero; + } + + dev_ids = ucs_calloc(inf->num_devices, sizeof(int), "ugni device ids"); + if (NULL == dev_ids) { + ucs_error("Failed to allocate memory"); + status = UCS_ERR_NO_MEMORY; + goto err_zero; + } + + ugni_rc = GNI_GetLocalDeviceIds(inf->num_devices, dev_ids); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_GetLocalDeviceIds failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + status = UCS_ERR_NO_DEVICE; + goto err_dev_id; + } + + num_active_devices = 0; + for (i = 0; i < inf->num_devices; i++) { + status = uct_ugni_device_create(dev_ids[i], num_active_devices, &inf->devices[i]); + if (status != UCS_OK) { + ucs_warn("Failed to initialize ugni device %d (%s), ignoring it", + i, ucs_status_string(status)); + } else { + ++num_active_devices; + } + } + + if (num_active_devices != inf->num_devices) { + ucs_warn("Error in detection devices"); + status = UCS_ERR_NO_DEVICE; + goto err_dev_id; + } + + ucs_debug("Initialized UGNI component with %d devices", inf->num_devices); + +err_dev_id: + ucs_free(dev_ids); +err_zero: + return status; +} + +uct_ugni_device_t *uct_ugni_device_by_name(const char *dev_name) +{ + uct_ugni_device_t *dev; + unsigned dev_index; + + if ((NULL == dev_name)) { + ucs_error("Bad parameter. Device name is set to NULL"); + return NULL; + } + + for (dev_index = 0; dev_index < job_info.num_devices; ++dev_index) { + dev = &job_info.devices[dev_index]; + if ((strlen(dev_name) == strlen(dev->fname)) && + (0 == strncmp(dev_name, dev->fname, strlen(dev->fname)))) { + ucs_debug("Device found: %s", dev_name); + return dev; + } + } + + /* Device not found */ + ucs_error("Cannot find: %s", dev_name); + return NULL; +} + +static ucs_status_t get_nic_address(uct_ugni_device_t *dev_p) +{ + int alps_addr = -1; + int alps_dev_id = -1; + int i; + char *token, *pmi_env; + + pmi_env = getenv("PMI_GNI_DEV_ID"); + if (NULL == pmi_env) { + gni_return_t ugni_rc; + ugni_rc = GNI_CdmGetNicAddress(dev_p->device_id, &dev_p->address, + &dev_p->cpu_id); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_CdmGetNicAddress failed, device %d, Error status: %s %d", + dev_p->device_id, gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_NO_DEVICE; + } + CPU_SET(dev_p->cpu_id, &(dev_p->cpu_mask)); + ucs_debug("(GNI) NIC address: %d", dev_p->address); + } else { + while ((token = strtok(pmi_env, ":")) != NULL) { + alps_dev_id = atoi(token); + if (alps_dev_id == dev_p->device_id) { + break; + } + pmi_env = NULL; + } + ucs_assert(alps_dev_id != -1); + + pmi_env = getenv("PMI_GNI_LOC_ADDR"); + ucs_assert(NULL != pmi_env); + i = 0; + while ((token = strtok(pmi_env, ":")) != NULL) { + if (i == alps_dev_id) { + alps_addr = atoi(token); + break; + } + pmi_env = NULL; + ++i; + } + ucs_assert(alps_addr != -1); + dev_p->address = alps_addr; + ucs_debug("(PMI) NIC address: %d", dev_p->address); + } + return UCS_OK; +} + +ucs_status_t uct_ugni_device_create(int dev_id, int index, uct_ugni_device_t *dev_p) +{ + ucs_status_t status; + gni_return_t ugni_rc; + + dev_p->device_id = (uint32_t)dev_id; + + status = get_nic_address(dev_p); + if (UCS_OK != status) { + ucs_error("Failed to get NIC address"); + return status; + } + + ugni_rc = GNI_GetDeviceType(&dev_p->type); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_GetDeviceType failed, device %d, Error status: %s %d", + dev_id, gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_NO_DEVICE; + } + + switch (dev_p->type) { + case GNI_DEVICE_GEMINI: + ucs_snprintf_zero(dev_p->type_name, sizeof(dev_p->type_name), "%s", + "GEMINI"); + break; + case GNI_DEVICE_ARIES: + ucs_snprintf_zero(dev_p->type_name, sizeof(dev_p->type_name), "%s", + "ARIES"); + break; + default: + ucs_snprintf_zero(dev_p->type_name, sizeof(dev_p->type_name), "%s", + "UNKNOWN"); + } + + ucs_snprintf_zero(dev_p->fname, sizeof(dev_p->fname), "%s:%d", + dev_p->type_name, index); + + return UCS_OK; +} + +void uct_ugni_device_destroy(uct_ugni_device_t *dev) +{ +} + +ucs_status_t uct_ugni_iface_get_dev_address(uct_iface_t *tl_iface, uct_device_addr_t *addr) +{ + uct_ugni_iface_t *iface = ucs_derived_of(tl_iface, uct_ugni_iface_t); + uct_devaddr_ugni_t *ugni_dev_addr = (uct_devaddr_ugni_t *)addr; + uct_ugni_device_t *dev = uct_ugni_iface_device(iface); + + ugni_dev_addr->nic_addr = dev->address; + + return UCS_OK; +} + +static int uct_ugni_next_power_of_two_inclusive (int value) +{ + int i, j, bit; + + for (i = 3, bit = 31 ; i >= 0 ; --i) { + if (!(value & (0xff << (i << 3)))) { + /* short circuit. no set bits present in this byte */ + bit -= 8; + continue; + } + + for (j = 7 ; j >= 0 ; --j, --bit) { + int tmp = (1 << bit); + if (value & tmp) { + return (value == tmp) ? bit : bit + 1; + } + } + } + + return 0; +} + +ucs_status_t uct_ugni_create_cdm(uct_ugni_cdm_t *cdm, uct_ugni_device_t *device, ucs_thread_mode_t thread_mode) +{ + uct_ugni_job_info_t *job_info; + int modes; + gni_return_t ugni_rc; + ucs_status_t status = UCS_OK; + int pid_max = 32768, free_bits; + FILE *fh; + + job_info = uct_ugni_get_job_info(); + if (NULL == job_info) { + return UCS_ERR_IO_ERROR; + } + + fh = fopen ("/proc/sys/kernel/pid_max", "r"); + if (NULL != fh) { + fscanf (fh, "%d", &pid_max); + fclose (fh); + } + + /* determine how many free bits we have in the PID space (10 (64-bit) or more (32-bit)) */ + free_bits = 31 - (uct_ugni_next_power_of_two_inclusive (pid_max) - 1); + + cdm->thread_mode = thread_mode; + cdm->dev = device; + /* don't colide with the btl/ugni CDM space if used in the same process. this is done by setting the + * highest bit in the CDM identifier */ + cdm->domain_id = 0x80000000ul | ((getpid () << free_bits) + ucs_atomic_fadd32(&ugni_domain_counter, 1)); + ucs_debug("Creating new command domain with id 0x%08x (0x80000000ul | ((%d << %d) + %d))", + cdm->domain_id, getpid (), free_bits, ugni_domain_counter); + modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED | + GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL | GNI_CDM_MODE_FMA_SHARED; + ugni_rc = GNI_CdmCreate(cdm->domain_id, job_info->ptag, job_info->cookie, + modes, &cdm->cdm_handle); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_CdmCreate failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_NO_DEVICE; + } + + ugni_rc = GNI_CdmAttach(cdm->cdm_handle, device->device_id, + &cdm->address, &cdm->nic_handle); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_CdmAttach failed, Error status: %s\n" + "Created domain 0x%08x", + gni_err_str[ugni_rc], cdm->domain_id); + uct_ugni_destroy_cdm(cdm); + return UCS_ERR_NO_DEVICE; + } + + status = uct_ugni_cdm_init_lock(cdm); + if (UCS_OK != status) { + ucs_error("Couldn't initalize CDM lock."); + } + + if (UCS_OK == status) { + ucs_debug("Made ugni cdm. nic_addr = %i domain_id = 0x%08x", device->address, cdm->domain_id); + } + return status; +} + +ucs_status_t uct_ugni_create_md_cdm(uct_ugni_cdm_t *cdm) +{ + return uct_ugni_create_cdm(cdm, &job_info.devices[0], UCS_THREAD_MODE_MULTI); +} + +ucs_status_t uct_ugni_destroy_cdm(uct_ugni_cdm_t *cdm) +{ + gni_return_t ugni_rc; + + uct_ugni_cdm_destroy_lock(cdm); + + ucs_trace_func("cdm=%p", cdm); + ugni_rc = GNI_CdmDestroy(cdm->cdm_handle); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_CdmDestroy error status: %s (%d)", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_IO_ERROR; + } + return UCS_OK; +} + +ucs_status_t uct_ugni_create_cq(gni_cq_handle_t *cq, unsigned cq_size, uct_ugni_cdm_t *cdm) +{ + gni_return_t ugni_rc; + + ugni_rc = GNI_CqCreate(cdm->nic_handle, UCT_UGNI_LOCAL_CQ, 0, + GNI_CQ_NOBLOCK, + NULL, NULL, cq); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_CqCreate failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_NO_DEVICE; + } + + return UCS_OK; +} + +ucs_status_t uct_ugni_destroy_cq(gni_cq_handle_t cq, uct_ugni_cdm_t *cdm) +{ + gni_return_t ugni_rc; + + ugni_rc = GNI_CqDestroy(cq); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_warn("GNI_CqDestroy failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} diff --git a/src/uct/ugni/base/ugni_device.h b/src/uct/ugni/base/ugni_device.h new file mode 100644 index 0000000..3c1e702 --- /dev/null +++ b/src/uct/ugni/base/ugni_device.h @@ -0,0 +1,29 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_DEVICE_H +#define UCT_UGNI_DEVICE_H + +#include "ugni_types.h" +#include + +ucs_status_t uct_ugni_device_create(int dev_id, int index, uct_ugni_device_t *dev_p); +void uct_ugni_device_destroy(uct_ugni_device_t *dev); +void uct_ugni_device_get_resource(uct_ugni_device_t *dev, + uct_tl_device_resource_t *tl_device); +ucs_status_t uct_ugni_iface_get_dev_address(uct_iface_t *tl_iface, uct_device_addr_t *addr); +ucs_status_t uct_ugni_create_cdm(uct_ugni_cdm_t *cdm, uct_ugni_device_t *device, ucs_thread_mode_t thread_mode); +ucs_status_t uct_ugni_destroy_cdm(uct_ugni_cdm_t *cdm); +uct_ugni_device_t *uct_ugni_device_by_name(const char *dev_name); +ucs_status_t uct_ugni_query_devices(uct_md_h md, + uct_tl_device_resource_t **tl_devices_p, + unsigned *num_tl_devices_p); +ucs_status_t init_device_list(); +ucs_status_t uct_ugni_create_md_cdm(uct_ugni_cdm_t *cdm); +ucs_status_t uct_ugni_create_cq(gni_cq_handle_t *cq, unsigned cq_size, uct_ugni_cdm_t *cdm); +ucs_status_t uct_ugni_destroy_cq(gni_cq_handle_t cq, uct_ugni_cdm_t *cdm); + +#endif diff --git a/src/uct/ugni/base/ugni_ep.c b/src/uct/ugni/base/ugni_ep.c new file mode 100644 index 0000000..6950808 --- /dev/null +++ b/src/uct/ugni/base/ugni_ep.c @@ -0,0 +1,254 @@ +/** + * Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "ugni_ep.h" +#include "ugni_iface.h" + +#include + +SGLIB_DEFINE_LIST_FUNCTIONS(uct_ugni_ep_t, uct_ugni_ep_compare, next); +SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS(uct_ugni_ep_t, UCT_UGNI_HASH_SIZE, uct_ugni_ep_hash); + +ucs_status_t uct_ugni_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n, + unsigned flags){ + uct_ugni_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_iface_t); + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + + UCS_STATIC_ASSERT(sizeof(ucs_arbiter_elem_t) <= UCT_PENDING_REQ_PRIV_LEN); + uct_ugni_enter_async(iface); + uct_pending_req_arb_group_push(&ep->arb_group, n); + ucs_arbiter_group_schedule(&iface->arbiter, &ep->arb_group); + UCT_TL_EP_STAT_PEND(&ep->super); + uct_ugni_leave_async(iface); + return UCS_OK; +} + +ucs_arbiter_cb_result_t uct_ugni_ep_process_pending(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg){ + uct_ugni_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_ugni_ep_t, arb_group); + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); + ucs_status_t rc; + + ep->arb_sched = 1; + ucs_trace_data("progressing pending request %p", req); + rc = req->func(req); + ep->arb_sched = 0; + ucs_trace_data("status returned from progress pending: %s", + ucs_status_string(rc)); + + if (UCS_OK == rc) { + /* sent successfully. remove from the arbiter */ + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } else if (UCS_INPROGRESS == rc) { + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } else { + /* couldn't send. keep this request in the arbiter until the next time + * this function is called */ + return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; + } +} + +ucs_arbiter_cb_result_t uct_ugni_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_ugni_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_ugni_ep_t, arb_group); + uct_pending_req_t *req = ucs_container_of(elem, uct_pending_req_t, priv); + uct_purge_cb_args_t *cb_args = arg; + + if (NULL != arg) { + cb_args->cb(req, cb_args->arg); + } else { + ucs_warn("ep=%p cancelling user pending request %p", ep, req); + } + + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; +} + +void uct_ugni_ep_pending_purge(uct_ep_h tl_ep, + uct_pending_purge_callback_t cb, + void *arg){ + uct_ugni_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_iface_t); + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_purge_cb_args_t args = {cb, arg}; + + ucs_arbiter_group_purge(&iface->arbiter, &ep->arb_group, + uct_ugni_ep_abriter_purge_cb, &args); +} + + +static uct_ugni_flush_group_t *uct_ugni_new_flush_group(uct_ugni_iface_t *iface) +{ + return ucs_mpool_get(&iface->flush_pool); +} + +static void uct_ugni_put_flush_group(uct_ugni_flush_group_t *group) +{ + ucs_mpool_put(group); +} + +static void uct_ugni_flush_cb(uct_completion_t *self, ucs_status_t status) +{ + uct_ugni_flush_group_t *group = ucs_container_of(self, uct_ugni_flush_group_t, flush_comp); + + ucs_trace("group=%p, parent=%p, user_comp=%p", group, group->parent, group->user_comp); + uct_invoke_completion(group->user_comp, UCS_OK); + uct_ugni_check_flush(group->parent); + uct_ugni_put_flush_group(group); +} + +static uintptr_t uct_ugni_safe_swap_pointers(void *address, uintptr_t new_value) +{ + if (sizeof(void*) == 4) { + return ucs_atomic_swap32(address, new_value); + } else { + return ucs_atomic_swap64(address, new_value); + } +} + +static ucs_status_t uct_ugni_add_flush_comp(uct_ugni_ep_t *ep, unsigned flags, + uct_completion_t *comp) +{ + uct_ugni_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ugni_iface_t); + uct_ugni_flush_group_t *new_group, *present_group; + + if (!uct_ugni_ep_can_send(ep)) { + return UCS_ERR_NO_RESOURCE; + } + + if (NULL == comp) { + return UCS_INPROGRESS; + } + + new_group = uct_ugni_new_flush_group(iface); + new_group->flush_comp.count = UCT_UGNI_INIT_FLUSH_REQ; +#ifdef DEBUG + new_group->flush_comp.func = NULL; + new_group->parent = NULL; +#endif + present_group = (uct_ugni_flush_group_t*)uct_ugni_safe_swap_pointers(&ep->flush_group, + (uintptr_t)new_group); + present_group->flush_comp.func = uct_ugni_flush_cb; + present_group->user_comp = comp; + present_group->parent = new_group; + uct_invoke_completion(&present_group->flush_comp, UCS_OK); + return UCS_INPROGRESS; +} + +ucs_status_t uct_ugni_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + ucs_status_t status = UCS_INPROGRESS; + + ucs_trace_func("tl_ep=%p, flags=%x, comp=%p", tl_ep, flags, comp); + + if (uct_ugni_ep_can_flush(ep)) { + UCT_TL_EP_STAT_FLUSH(ucs_derived_of(tl_ep, uct_base_ep_t)); + return UCS_OK; + } + status = uct_ugni_add_flush_comp(ep, flags, comp); + if (UCS_INPROGRESS == status) { + UCT_TL_EP_STAT_FLUSH_WAIT(ucs_derived_of(tl_ep, uct_base_ep_t)); + } + return status; +} + +ucs_status_t ugni_connect_ep(uct_ugni_ep_t *ep, + uct_ugni_iface_t *iface, + const uct_sockaddr_ugni_t *iface_addr, + const uct_devaddr_ugni_t *ugni_dev_addr) +{ + gni_return_t ugni_rc; + + uct_ugni_cdm_lock(&iface->cdm); + ugni_rc = GNI_EpBind(ep->ep, ugni_dev_addr->nic_addr, iface_addr->domain_id); + uct_ugni_cdm_unlock(&iface->cdm); + if (GNI_RC_SUCCESS != ugni_rc) { + uct_ugni_cdm_lock(&iface->cdm); + (void)GNI_EpDestroy(ep->ep); + uct_ugni_cdm_unlock(&iface->cdm); + ucs_error("GNI_EpBind failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_UNREACHABLE; + } + + ucs_debug("Binding ep %p to address (%d %d)", ep, ugni_dev_addr->nic_addr, + iface_addr->domain_id); + + ep->flush_group->flush_comp.count = UCT_UGNI_INIT_FLUSH; + + return UCS_OK; +} + +/* Endpoint definition */ +UCS_CLASS_INIT_FUNC(uct_ugni_ep_t, const uct_ep_params_t *params) +{ + uct_ugni_iface_t *iface = ucs_derived_of(params->iface, uct_ugni_iface_t); + ucs_status_t rc = UCS_OK; + gni_return_t ugni_rc; + uint32_t *big_hash; + + self->arb_sched = 0; + UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super); + self->flush_group = uct_ugni_new_flush_group(iface); +#ifdef DEBUG + self->flush_group->flush_comp.func = NULL; + self->flush_group->parent = NULL; +#endif + uct_ugni_cdm_lock(&iface->cdm); + ugni_rc = GNI_EpCreate(uct_ugni_iface_nic_handle(iface), iface->local_cq, &self->ep); + uct_ugni_cdm_unlock(&iface->cdm); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_CdmCreate failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_NO_DEVICE; + } + ucs_arbiter_group_init(&self->arb_group); + big_hash = (void *)&self->ep; + self->hash_key = big_hash[0]; + if (uct_ugni_check_device_type(iface, GNI_DEVICE_ARIES)) { + self->hash_key &= 0x00FFFFFF; + } + ucs_debug("Adding ep hash %x to iface %p", self->hash_key, iface); + sglib_hashed_uct_ugni_ep_t_add(iface->eps, self); + + return rc; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ugni_ep_t) +{ + uct_ugni_iface_t *iface = ucs_derived_of(self->super.super.iface, + uct_ugni_iface_t); + gni_return_t ugni_rc; + + ucs_debug("Removinig ep hash %x from iface %p", self->hash_key, iface); + + ucs_arbiter_group_purge(&iface->arbiter, &self->arb_group, + uct_ugni_ep_abriter_purge_cb, NULL); + uct_ugni_cdm_lock(&iface->cdm); + ugni_rc = GNI_EpDestroy(self->ep); + uct_ugni_cdm_unlock(&iface->cdm); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_warn("GNI_EpDestroy failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + } + sglib_hashed_uct_ugni_ep_t_delete(iface->eps, self); + uct_ugni_ep_pending_purge(&self->super.super, NULL, NULL); + uct_ugni_put_flush_group(self->flush_group); +} + +UCS_CLASS_DEFINE(uct_ugni_ep_t, uct_base_ep_t) +UCS_CLASS_DEFINE_NEW_FUNC(uct_ugni_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_ugni_ep_t, uct_ep_t); + +uct_ugni_ep_t *uct_ugni_iface_lookup_ep(uct_ugni_iface_t *iface, uintptr_t hash_key) +{ + uct_ugni_ep_t tmp; + tmp.hash_key = hash_key; + return sglib_hashed_uct_ugni_ep_t_find_member(iface->eps, &tmp); +} diff --git a/src/uct/ugni/base/ugni_ep.h b/src/uct/ugni/base/ugni_ep.h new file mode 100644 index 0000000..1e5115a --- /dev/null +++ b/src/uct/ugni/base/ugni_ep.h @@ -0,0 +1,66 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_EP_H +#define UCT_UGNI_EP_H + +#include "ugni_def.h" +#include "ugni_types.h" +#include +#include +#include +#include + +static inline int32_t uct_ugni_ep_compare(uct_ugni_ep_t *ep1, uct_ugni_ep_t *ep2) +{ + return ep1->hash_key - ep2->hash_key; +} + +static inline unsigned uct_ugni_ep_hash(uct_ugni_ep_t *ep) +{ + return ep->hash_key; +} + +SGLIB_DEFINE_LIST_PROTOTYPES(uct_ugni_ep_t, uct_ugni_ep_compare, next); +SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(uct_ugni_ep_t, UCT_UGNI_HASH_SIZE, uct_ugni_ep_hash); + +UCS_CLASS_DECLARE(uct_ugni_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_NEW_FUNC(uct_ugni_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_ugni_ep_t, uct_ep_t); + +uct_ugni_ep_t *uct_ugni_iface_lookup_ep(uct_ugni_iface_t *iface, uintptr_t hash_key); +ucs_status_t ugni_connect_ep(uct_ugni_ep_t *ep, + uct_ugni_iface_t *iface, + const uct_sockaddr_ugni_t *iface_addr, + const uct_devaddr_ugni_t *dev_addr); +ucs_status_t uct_ugni_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n, + unsigned flags); +void uct_ugni_ep_pending_purge(uct_ep_h tl_ep, uct_pending_purge_callback_t cb, + void *arg); +ucs_arbiter_cb_result_t uct_ugni_ep_process_pending(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg); +ucs_arbiter_cb_result_t uct_ugni_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg); +ucs_status_t uct_ugni_ep_flush(uct_ep_h tl_ep, unsigned flags, + uct_completion_t *comp); + +static inline int uct_ugni_ep_can_send(uct_ugni_ep_t *ep) +{ + return (ucs_arbiter_group_is_empty(&ep->arb_group) || ep->arb_sched) ? 1 : 0; +} + +static inline int uct_ugni_ep_can_flush(uct_ugni_ep_t *ep) +{ + return (ep->flush_group->flush_comp.count == 1 && uct_ugni_ep_can_send(ep)) ? 1 : 0; +} + +static inline void uct_ugni_check_flush(uct_ugni_flush_group_t *flush_group) +{ + uct_invoke_completion(&flush_group->flush_comp, UCS_OK); +} + +#endif diff --git a/src/uct/ugni/base/ugni_iface.c b/src/uct/ugni/base/ugni_iface.c new file mode 100644 index 0000000..8a27b27 --- /dev/null +++ b/src/uct/ugni/base/ugni_iface.c @@ -0,0 +1,123 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "ugni_types.h" +#include "ugni_md.h" +#include "ugni_device.h" +#include "ugni_ep.h" +#include "ugni_iface.h" + +ucs_status_t uct_ugni_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp) +{ + uct_ugni_iface_t *iface = ucs_derived_of(tl_iface, uct_ugni_iface_t); + + if (comp != NULL) { + return UCS_ERR_UNSUPPORTED; + } + + if (0 == iface->outstanding) { + UCT_TL_IFACE_STAT_FLUSH(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_OK; + } + + UCT_TL_IFACE_STAT_FLUSH_WAIT(ucs_derived_of(tl_iface, uct_base_iface_t)); + return UCS_INPROGRESS; +} + +ucs_status_t uct_ugni_iface_get_address(uct_iface_h tl_iface, + uct_iface_addr_t *addr) +{ + uct_ugni_iface_t *iface = ucs_derived_of(tl_iface, uct_ugni_iface_t); + uct_sockaddr_ugni_t *iface_addr = (uct_sockaddr_ugni_t*)addr; + + iface_addr->domain_id = iface->cdm.domain_id; + return UCS_OK; +} + +int uct_ugni_iface_is_reachable(uct_iface_h tl_iface, const uct_device_addr_t *dev_addr, const uct_iface_addr_t *iface_addr) +{ + return 1; +} + +static ucs_mpool_ops_t uct_ugni_flush_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_malloc, + .chunk_release = ucs_mpool_chunk_free, + .obj_init = NULL, + .obj_cleanup = NULL +}; + +void uct_ugni_cleanup_base_iface(uct_ugni_iface_t *iface) +{ + ucs_arbiter_cleanup(&iface->arbiter); + ucs_mpool_cleanup(&iface->flush_pool, 1); + uct_ugni_destroy_cq(iface->local_cq, &iface->cdm); + uct_ugni_destroy_cdm(&iface->cdm); +} + +UCS_CLASS_INIT_FUNC(uct_ugni_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + uct_iface_ops_t *uct_ugni_iface_ops, + const uct_iface_config_t *tl_config + UCS_STATS_ARG(ucs_stats_node_t *stats_parent)) +{ + uct_ugni_device_t *dev; + ucs_status_t status; + uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t); + unsigned grow = (config->mpool.bufs_grow == 0) ? 128 : config->mpool.bufs_grow; + + ucs_assert(params->open_mode & UCT_IFACE_OPEN_MODE_DEVICE); + + UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, uct_ugni_iface_ops, md, worker, + params, tl_config UCS_STATS_ARG(params->stats_root) + UCS_STATS_ARG(UCT_UGNI_MD_NAME)); + dev = uct_ugni_device_by_name(params->mode.device.dev_name); + if (NULL == dev) { + ucs_error("No device was found: %s", params->mode.device.dev_name); + return UCS_ERR_NO_DEVICE; + } + status = uct_ugni_create_cdm(&self->cdm, dev, self->super.worker->thread_mode); + if (UCS_OK != status) { + ucs_error("Failed to UGNI NIC, Error status: %d", status); + return status; + } + status = uct_ugni_create_cq(&self->local_cq, UCT_UGNI_LOCAL_CQ, &self->cdm); + if (UCS_OK != status) { + goto clean_cdm; + } + self->outstanding = 0; + sglib_hashed_uct_ugni_ep_t_init(self->eps); + ucs_arbiter_init(&self->arbiter); + status = ucs_mpool_init(&self->flush_pool, + 0, + sizeof(uct_ugni_flush_group_t), + 0, /* alignment offset */ + UCS_SYS_CACHE_LINE_SIZE, /* alignment */ + grow, /* grow */ + config->mpool.max_bufs, /* max buffers */ + &uct_ugni_flush_mpool_ops, + "UGNI-DESC-ONLY"); + if (UCS_OK != status) { + ucs_error("Could not init iface"); + goto clean_cq; + } + return status; +clean_cq: + uct_ugni_destroy_cq(self->local_cq, &self->cdm); +clean_cdm: + uct_ugni_destroy_cdm(&self->cdm); + return status; +} + +UCS_CLASS_DEFINE_NEW_FUNC(uct_ugni_iface_t, uct_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, uct_iface_ops_t *, + const uct_iface_config_t * UCS_STATS_ARG(ucs_stats_node_t *)); + +static UCS_CLASS_CLEANUP_FUNC(uct_ugni_iface_t) +{ + uct_ugni_cleanup_base_iface(self); +} + +UCS_CLASS_DEFINE(uct_ugni_iface_t, uct_base_iface_t); diff --git a/src/uct/ugni/base/ugni_iface.h b/src/uct/ugni/base/ugni_iface.h new file mode 100644 index 0000000..ea6768e --- /dev/null +++ b/src/uct/ugni/base/ugni_iface.h @@ -0,0 +1,28 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_IFACE_H +#define UCT_UGNI_IFACE_H + +#include "ugni_types.h" +#include +#include + +UCS_CLASS_DECLARE(uct_ugni_iface_t, uct_md_h, uct_worker_h, + const uct_iface_params_t*, uct_iface_ops_t*, + const uct_iface_config_t* UCS_STATS_ARG(ucs_stats_node_t*)) + +ucs_status_t uct_ugni_iface_flush(uct_iface_h tl_iface, unsigned flags, + uct_completion_t *comp); +ucs_status_t uct_ugni_iface_get_address(uct_iface_h tl_iface, uct_iface_addr_t *addr); +int uct_ugni_iface_is_reachable(uct_iface_h tl_iface, const uct_device_addr_t *dev_addr, + const uct_iface_addr_t *iface_addr); +void uct_ugni_base_desc_init(ucs_mpool_t *mp, void *obj, void *chunk); +void uct_ugni_base_desc_key_init(uct_iface_h iface, void *obj, uct_mem_h memh); +void uct_ugni_cleanup_base_iface(uct_ugni_iface_t *iface); +#define uct_ugni_iface_device(_iface) ((_iface)->cdm.dev) +#define uct_ugni_iface_nic_handle(_iface) ((_iface)->cdm.nic_handle) +#define uct_ugni_check_device_type(_iface, _type) ((_iface)->cdm.dev->type == _type) +#endif diff --git a/src/uct/ugni/base/ugni_md.c b/src/uct/ugni/base/ugni_md.c new file mode 100644 index 0000000..ba93c4d --- /dev/null +++ b/src/uct/ugni/base/ugni_md.c @@ -0,0 +1,239 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "ugni_device.h" +#include "ugni_iface.h" +#include "ugni_md.h" + +/* Forward declarations */ + +UCS_CONFIG_DEFINE_ARRAY(ugni_alloc_methods, sizeof(uct_alloc_method_t), + UCS_CONFIG_TYPE_ENUM(uct_alloc_method_names)); + +pthread_mutex_t uct_ugni_global_lock = PTHREAD_MUTEX_INITIALIZER; + +/* For Cray devices we have only one MD */ +static ucs_status_t +uct_ugni_query_md_resources(uct_component_h component, + uct_md_resource_desc_t **resources_p, + unsigned *num_resources_p) +{ + if (getenv("PMI_GNI_PTAG") == NULL) { + return uct_md_query_empty_md_resource(resources_p, num_resources_p); + } + + return uct_md_query_single_md_resource(component, resources_p, + num_resources_p); +} + +static ucs_status_t uct_ugni_md_query(uct_md_h md, uct_md_attr_t *md_attr) +{ + md_attr->rkey_packed_size = 3 * sizeof(uint64_t); + md_attr->cap.flags = UCT_MD_FLAG_REG | + UCT_MD_FLAG_NEED_MEMH | + UCT_MD_FLAG_NEED_RKEY; + md_attr->cap.reg_mem_types = UCS_MEMORY_TYPES_CPU_ACCESSIBLE; + md_attr->cap.access_mem_type = UCS_MEMORY_TYPE_HOST; + md_attr->cap.detect_mem_types = 0; + md_attr->cap.max_alloc = 0; + md_attr->cap.max_reg = ULONG_MAX; + md_attr->reg_cost.overhead = 1000.0e-9; + md_attr->reg_cost.growth = 0.007e-9; + memset(&md_attr->local_cpus, 0xff, sizeof(md_attr->local_cpus)); + return UCS_OK; +} + +static ucs_status_t uct_ugni_mem_reg(uct_md_h md, void *address, size_t length, + unsigned flags, uct_mem_h *memh_p) +{ + ucs_status_t status; + gni_return_t ugni_rc; + uct_ugni_md_t *ugni_md = ucs_derived_of(md, uct_ugni_md_t); + gni_mem_handle_t * mem_hndl = NULL; + + if (0 == length) { + ucs_error("Unexpected length %zu", length); + return UCS_ERR_INVALID_PARAM; + } + + mem_hndl = ucs_malloc(sizeof(gni_mem_handle_t), "gni_mem_handle_t"); + if (NULL == mem_hndl) { + ucs_error("Failed to allocate memory for gni_mem_handle_t"); + status = UCS_ERR_NO_MEMORY; + goto mem_err; + } + + uct_ugni_cdm_lock(&ugni_md->cdm); + ugni_rc = GNI_MemRegister(ugni_md->cdm.nic_handle, (uint64_t)address, + length, NULL, + GNI_MEM_READWRITE | GNI_MEM_RELAXED_PI_ORDERING, + -1, mem_hndl); + uct_ugni_cdm_unlock(&ugni_md->cdm); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_MemRegister failed (addr %p, size %zu), Error status: %s %d", + address, length, gni_err_str[ugni_rc], ugni_rc); + status = UCS_ERR_IO_ERROR; + goto mem_err; + } + + ucs_debug("Memory registration address %p, len %lu, keys [%"PRIx64" %"PRIx64"]", + address, length, mem_hndl->qword1, mem_hndl->qword2); + *memh_p = mem_hndl; + return UCS_OK; + +mem_err: + free(mem_hndl); + return status; +} + +static ucs_status_t uct_ugni_mem_dereg(uct_md_h md, uct_mem_h memh) +{ + uct_ugni_md_t *ugni_md = ucs_derived_of(md, uct_ugni_md_t); + gni_mem_handle_t *mem_hndl = (gni_mem_handle_t *) memh; + gni_return_t ugni_rc; + ucs_status_t status = UCS_OK; + + uct_ugni_cdm_lock(&ugni_md->cdm); + ugni_rc = GNI_MemDeregister(ugni_md->cdm.nic_handle, mem_hndl); + uct_ugni_cdm_unlock(&ugni_md->cdm); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_MemDeregister failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + status = UCS_ERR_IO_ERROR; + } + ucs_free(mem_hndl); + + return status; +} + +static ucs_status_t uct_ugni_rkey_pack(uct_md_h md, uct_mem_h memh, + void *rkey_buffer) +{ + gni_mem_handle_t *mem_hndl = (gni_mem_handle_t *) memh; + uint64_t *ptr = rkey_buffer; + + ptr[0] = UCT_UGNI_RKEY_MAGIC; + ptr[1] = mem_hndl->qword1; + ptr[2] = mem_hndl->qword2; + ucs_debug("Packed [ %"PRIx64" %"PRIx64" %"PRIx64"]", ptr[0], ptr[1], ptr[2]); + return UCS_OK; +} + +static ucs_status_t uct_ugni_rkey_release(uct_component_t *component, + uct_rkey_t rkey, void *handle) +{ + ucs_assert(NULL == handle); + ucs_free((void *)rkey); + return UCS_OK; +} + +static ucs_status_t uct_ugni_rkey_unpack(uct_component_t *component, + const void *rkey_buffer, + uct_rkey_t *rkey_p, void **handle_p) +{ + const uint64_t *ptr = rkey_buffer; + gni_mem_handle_t *mem_hndl = NULL; + uint64_t magic = 0; + + ucs_debug("Unpacking [ %"PRIx64" %"PRIx64" %"PRIx64"]", ptr[0], ptr[1], ptr[2]); + magic = ptr[0]; + if (magic != UCT_UGNI_RKEY_MAGIC) { + ucs_error("Failed to identify key. Expected %llx but received %"PRIx64"", + UCT_UGNI_RKEY_MAGIC, magic); + return UCS_ERR_UNSUPPORTED; + } + + mem_hndl = ucs_malloc(sizeof(gni_mem_handle_t), "gni_mem_handle_t"); + if (NULL == mem_hndl) { + ucs_error("Failed to allocate memory for gni_mem_handle_t"); + return UCS_ERR_NO_MEMORY; + } + + mem_hndl->qword1 = ptr[1]; + mem_hndl->qword2 = ptr[2]; + *rkey_p = (uintptr_t)mem_hndl; + *handle_p = NULL; + return UCS_OK; +} + +static void uct_ugni_md_close(uct_md_h md) +{ + uct_ugni_md_t *ugni_md = ucs_derived_of(md, uct_ugni_md_t); + + pthread_mutex_lock(&uct_ugni_global_lock); + ugni_md->ref_count--; + if (!ugni_md->ref_count) { + ucs_debug("Tearing down MD CDM"); + uct_ugni_destroy_cdm(&ugni_md->cdm); + } + pthread_mutex_unlock(&uct_ugni_global_lock); +} + +static ucs_status_t +uct_ugni_md_open(uct_component_h component,const char *md_name, + const uct_md_config_t *md_config, uct_md_h *md_p) +{ + ucs_status_t status = UCS_OK; + + pthread_mutex_lock(&uct_ugni_global_lock); + static uct_md_ops_t md_ops = { + .close = uct_ugni_md_close, + .query = uct_ugni_md_query, + .mem_alloc = (void*)ucs_empty_function, + .mem_free = (void*)ucs_empty_function, + .mem_reg = uct_ugni_mem_reg, + .mem_dereg = uct_ugni_mem_dereg, + .mkey_pack = uct_ugni_rkey_pack, + .detect_memory_type = ucs_empty_function_return_unsupported, + }; + + static uct_ugni_md_t md = { + .super.ops = &md_ops, + .super.component = &uct_ugni_component, + .ref_count = 0 + }; + + *md_p = &md.super; + + if (!md.ref_count) { + status = init_device_list(); + if (UCS_OK != status) { + ucs_error("Failed to init device list, Error status: %d", status); + goto error; + } + status = uct_ugni_create_md_cdm(&md.cdm); + if (UCS_OK != status) { + ucs_error("Failed to UGNI NIC, Error status: %d", status); + goto error; + } + } + + md.ref_count++; + +error: + pthread_mutex_unlock(&uct_ugni_global_lock); + return status; +} + +uct_component_t uct_ugni_component = { + .query_md_resources = uct_ugni_query_md_resources, + .md_open = uct_ugni_md_open, + .cm_open = ucs_empty_function_return_unsupported, + .rkey_unpack = uct_ugni_rkey_unpack, + .rkey_ptr = ucs_empty_function_return_unsupported, + .rkey_release = uct_ugni_rkey_release, + .name = UCT_UGNI_MD_NAME, + .md_config = { + .name = "UGNI memory domain", + .prefix = "UGNI_", + .table = uct_md_config_table, + .size = sizeof(uct_md_config_t), + }, + .cm_config = UCS_CONFIG_EMPTY_GLOBAL_LIST_ENTRY, + .tl_list = UCT_COMPONENT_TL_LIST_INITIALIZER(&uct_ugni_component), + .flags = 0 +}; +UCT_COMPONENT_REGISTER(&uct_ugni_component); diff --git a/src/uct/ugni/base/ugni_md.h b/src/uct/ugni/base/ugni_md.h new file mode 100644 index 0000000..c5a6a73 --- /dev/null +++ b/src/uct/ugni/base/ugni_md.h @@ -0,0 +1,15 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_CONTEXT_H +#define UCT_UGNI_CONTEXT_H + +#include "ugni_types.h" +#include "ugni_def.h" +#include + +extern uct_component_t uct_ugni_component; + +#endif diff --git a/src/uct/ugni/base/ugni_types.h b/src/uct/ugni/base/ugni_types.h new file mode 100644 index 0000000..93f055c --- /dev/null +++ b/src/uct/ugni/base/ugni_types.h @@ -0,0 +1,99 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (c) Los Alamos National Security, LLC. 2018. ALL RIGHTS RESERVED. + * Copyright (c) Triad National Security, LLC. 2018. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_TYPES_H +#define UCT_UGNI_TYPES_H + +#include "ugni_def.h" + +#include +#include +#include +#include + + +typedef struct uct_ugni_device { + gni_nic_device_t type; /**< Device type */ + char type_name[UCT_UGNI_MAX_TYPE_NAME]; /**< Device type name */ + char fname[UCT_DEVICE_NAME_MAX];/**< Device full name */ + uint32_t device_id; /**< Device id */ + uint32_t address; /**< Device address */ + uint32_t cpu_id; /**< CPU attached directly + to the device */ + ucs_sys_cpuset_t cpu_mask; /**< CPU mask */ + /* TBD - reference counter */ +} uct_ugni_device_t; + +typedef struct uct_ugni_cdm { + gni_cdm_handle_t cdm_handle; /**< Ugni communication domain */ + gni_nic_handle_t nic_handle; /**< Ugni NIC handle */ + uct_ugni_device_t *dev; /**< Ugni device the cdm is connected to */ + ucs_thread_mode_t thread_mode; + uint32_t address; + uint32_t domain_id; + +#if ENABLE_MT + ucs_spinlock_t lock; /**< Device lock */ +#endif +} uct_ugni_cdm_t; + +/** + * @brief UGNI Memory domain + * + * Ugni does not define MD, instead I use + * device handle that "simulates" the MD. + * Memory that is registered with one device handle + * can be accessed with any other. + */ +typedef struct uct_ugni_md { + uct_md_t super; /**< Domain info */ + uct_ugni_cdm_t cdm; /**< Communication domain for memory registration*/ + int ref_count; /**< UGNI Domain ref count */ +} uct_ugni_md_t; + +typedef struct uct_devaddr_ugni_t { + uint32_t nic_addr; +} UCS_S_PACKED uct_devaddr_ugni_t; + +typedef struct uct_sockaddr_ugni { + uint32_t domain_id; +} UCS_S_PACKED uct_sockaddr_ugni_t; + +typedef struct uct_ugni_flush_group { + uct_completion_t flush_comp; /**< Completion for outstanding requests + flush_comp.count is used to track outstanding sends*/ + uct_completion_t *user_comp; /**< User completion struct */ + struct uct_ugni_flush_group *parent; /**< Used to signal the next flush_group that this group is done*/ +} uct_ugni_flush_group_t; + +typedef struct uct_ugni_ep { + uct_base_ep_t super; + gni_ep_handle_t ep; /**< Endpoint for ugni api */ + uct_ugni_flush_group_t *flush_group; /**< Flush group new sends are added to */ + uint32_t hash_key; /**< Hash to look up EPs with */ + uint32_t arb_sched; /**< Flag to make sure we don't recursively block sends*/ + ucs_arbiter_group_t arb_group; /**< Our group in the pending send arbiter */ + struct uct_ugni_ep *next; +} uct_ugni_ep_t; + +typedef struct uct_ugni_iface { + uct_base_iface_t super; + uct_ugni_cdm_t cdm; /**< Ugni communication domain and handles */ + gni_cq_handle_t local_cq; /**< Completion queue */ + uct_ugni_ep_t *eps[UCT_UGNI_HASH_SIZE]; /**< Array of QPs */ + unsigned outstanding; /**< Counter for outstanding packets + on the interface */ + ucs_arbiter_t arbiter; /**< arbiter structure for pending operations */ + ucs_mpool_t flush_pool; /**< Memory pool for flush objects */ +} uct_ugni_iface_t; + +typedef struct uct_ugni_iface_config { + uct_iface_config_t super; + uct_iface_mpool_config_t mpool; +} uct_ugni_iface_config_t; + +#endif diff --git a/src/uct/ugni/configure.m4 b/src/uct/ugni/configure.m4 new file mode 100644 index 0000000..a2979a8 --- /dev/null +++ b/src/uct/ugni/configure.m4 @@ -0,0 +1,32 @@ +# +# Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +cray_ugni_supported=no + +AC_ARG_WITH([ugni], + [AC_HELP_STRING([--with-ugni(=DIR)], [Build Cray UGNI support])], + [], + [with_ugni=guess]) + +AS_IF([test "x$with_ugni" != "xno"], + [AC_MSG_CHECKING([cray-ugni]) + AS_IF([$PKG_CONFIG --exists cray-ugni cray-pmi], + [AC_MSG_RESULT([yes]) + AC_SUBST([CRAY_UGNI_CFLAGS], [`$PKG_CONFIG --cflags cray-ugni cray-pmi`]) + AC_SUBST([CRAY_UGNI_LIBS], [`$PKG_CONFIG --libs cray-ugni cray-pmi`]) + uct_modules="${uct_modules}:ugni" + cray_ugni_supported=yes + AC_DEFINE([HAVE_TL_UGNI], [1], [Defined if UGNI transport exists]) + ], + [AC_MSG_RESULT([no]) + AS_IF([test "x$with_ugni" != "xguess"], + [AC_MSG_ERROR([UGNI support was requested but cray-ugni and cray-pmi packages cannot be found])]) + ]) + ]) + + +AM_CONDITIONAL([HAVE_CRAY_UGNI], [test "x$cray_ugni_supported" = xyes]) +AC_CONFIG_FILES([src/uct/ugni/Makefile]) diff --git a/src/uct/ugni/rdma/ugni_rdma_ep.c b/src/uct/ugni/rdma/ugni_rdma_ep.c new file mode 100644 index 0000000..3f0de8e --- /dev/null +++ b/src/uct/ugni/rdma/ugni_rdma_ep.c @@ -0,0 +1,785 @@ +/** +* Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "ugni_rdma_ep.h" +#include "ugni_rdma_iface.h" +#include + +#define UCT_CHECK_PARAM_IOV(_iov, _iovcnt, _buffer, _length, _memh) \ + UCT_CHECK_PARAM(1 == _iovcnt, "iov[iovcnt] has to be 1 at this time"); \ + void *_buffer = _iov[0].buffer; \ + size_t _length = _iov[0].length; \ + uct_mem_h _memh = _iov[0].memh; + +/* Endpoint operations */ +static inline void uct_ugni_invoke_orig_comp(uct_ugni_rdma_fetch_desc_t *fma, ucs_status_t status) +{ + if (ucs_likely(NULL != fma->orig_comp_cb)) { + uct_invoke_completion(fma->orig_comp_cb, status); + } +} + +static inline void uct_ugni_format_fma(uct_ugni_base_desc_t *fma, gni_post_type_t type, + const void *buffer, uint64_t remote_addr, + uct_rkey_t rkey, unsigned length, uct_ugni_ep_t *ep, + uct_completion_t *comp, + uct_unpack_callback_t unpack_cb) +{ + fma->desc.type = type; + fma->desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; + fma->desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; + fma->desc.local_addr = (uint64_t)buffer; + fma->desc.remote_addr = remote_addr; + fma->desc.remote_mem_hndl = *(gni_mem_handle_t *)rkey; + fma->desc.length = length; + fma->flush_group = ep->flush_group; + fma->comp_cb = comp; + fma->unpack_cb = unpack_cb; +} + +static inline void uct_ugni_format_fma_amo(uct_ugni_rdma_fetch_desc_t *amo, gni_post_type_t type, + gni_fma_cmd_type_t amo_op, + uint64_t first_operand, uint64_t second_operand, + void *buffer, uint64_t remote_addr, + uct_rkey_t rkey, unsigned length, uct_ugni_ep_t *ep, + uct_completion_t *comp, + uct_completion_callback_t unpack_cb, void *arg) +{ + if (NULL != comp) { + amo->orig_comp_cb = comp; + comp = &amo->tmp; + amo->tmp.func = unpack_cb; + amo->tmp.count = 1; + } + + uct_ugni_format_fma(&amo->super, GNI_POST_AMO, buffer, remote_addr, + rkey, length, ep, comp, NULL); + + amo->super.desc.amo_cmd = amo_op; + amo->super.desc.first_operand = first_operand; + amo->super.desc.second_operand = second_operand; + amo->user_buffer = arg; +} + +static inline void uct_ugni_format_rdma(uct_ugni_base_desc_t *rdma, gni_post_type_t type, + const void *buffer, uint64_t remote_addr, + uct_mem_h memh, uct_rkey_t rkey, + unsigned length, uct_ugni_ep_t *ep, + gni_cq_handle_t cq, + uct_completion_t *comp) +{ + rdma->desc.type = type; + rdma->desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT; + rdma->desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE; + rdma->desc.local_addr = (uint64_t) buffer; + rdma->desc.local_mem_hndl = *(gni_mem_handle_t *)memh; + rdma->desc.remote_addr = remote_addr; + rdma->desc.remote_mem_hndl = *(gni_mem_handle_t *)rkey; + rdma->desc.length = length; + rdma->desc.src_cq_hndl = cq; + rdma->flush_group = ep->flush_group; + rdma->comp_cb = comp; +} + +static inline ucs_status_t uct_ugni_post_rdma(uct_ugni_rdma_iface_t *iface, + uct_ugni_ep_t *ep, + uct_ugni_base_desc_t *rdma) +{ + gni_return_t ugni_rc; + + if (ucs_unlikely(!uct_ugni_ep_can_send(ep))) { + ucs_mpool_put(rdma); + return UCS_ERR_NO_RESOURCE; + } + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_PostRdma(ep->ep, &rdma->desc); + uct_ugni_cdm_unlock(&iface->super.cdm); + if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { + ucs_mpool_put(rdma); + if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) { + ucs_debug("GNI_PostRdma failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_NO_RESOURCE; + } else { + ucs_error("GNI_PostRdma failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_IO_ERROR; + } + } + + ++rdma->flush_group->flush_comp.count; + ++iface->super.outstanding; + + return UCS_INPROGRESS; +} + +static inline ssize_t uct_ugni_post_fma(uct_ugni_rdma_iface_t *iface, + uct_ugni_ep_t *ep, + uct_ugni_base_desc_t *fma, + ssize_t ok_status) +{ + gni_return_t ugni_rc; + + if (ucs_unlikely(!uct_ugni_ep_can_send(ep))) { + ucs_mpool_put(fma); + return UCS_ERR_NO_RESOURCE; + } + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_PostFma(ep->ep, &fma->desc); + uct_ugni_cdm_unlock(&iface->super.cdm); + if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { + ucs_mpool_put(fma); + if(GNI_RC_ERROR_RESOURCE == ugni_rc || GNI_RC_ERROR_NOMEM == ugni_rc) { + ucs_debug("GNI_PostFma failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_NO_RESOURCE; + } else { + ucs_error("GNI_PostFma failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_IO_ERROR; + } + } + + ++fma->flush_group->flush_comp.count; + ++iface->super.outstanding; + + return ok_status; +} + +ucs_status_t uct_ugni_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_base_desc_t *fma; + + UCT_SKIP_ZERO_LENGTH(length); + UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_short"); + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, + fma, return UCS_ERR_NO_RESOURCE); + uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, buffer, + remote_addr, rkey, length, ep, NULL, NULL); + ucs_trace_data("Posting PUT Short, GNI_PostFma of size %"PRIx64" from %p to " + "%p, with [%"PRIx64" %"PRIx64"]", + fma->desc.length, + (void *)fma->desc.local_addr, + (void *)fma->desc.remote_addr, + fma->desc.remote_mem_hndl.qword1, + fma->desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, SHORT, length); + return uct_ugni_post_fma(iface, ep, fma, UCS_OK); +} + +ssize_t uct_ugni_ep_put_bcopy(uct_ep_h tl_ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, uct_rkey_t rkey) +{ + /* Since custom pack function is used + * we have to allocate separate memory to pack + * the info and pass it to FMA + * something like: + * pack_cb(desc + 1, arg, length); */ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_base_desc_t *fma; + size_t length; + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_buffer, + fma, return UCS_ERR_NO_RESOURCE); + + length = pack_cb(fma + 1, arg); + UCT_SKIP_ZERO_LENGTH(length, fma); + UCT_CHECK_LENGTH(length, 0, iface->config.fma_seg_size, "put_bcopy"); + uct_ugni_format_fma(fma, GNI_POST_FMA_PUT, fma + 1, + remote_addr, rkey, length, ep, NULL, NULL); + ucs_trace_data("Posting PUT BCOPY, GNI_PostFma of size %"PRIx64" from %p to " + "%p, with [%"PRIx64" %"PRIx64"]", + fma->desc.length, + (void *)fma->desc.local_addr, + (void *)fma->desc.remote_addr, + fma->desc.remote_mem_hndl.qword1, + fma->desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, BCOPY, length); + return uct_ugni_post_fma(iface, ep, fma, length); +} + +ucs_status_t uct_ugni_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_base_desc_t *rdma; + + UCT_CHECK_PARAM_IOV(iov, iovcnt, buffer, length, memh); + UCT_SKIP_ZERO_LENGTH(length); + UCT_CHECK_LENGTH(length, 0, iface->config.rdma_max_size, "put_zcopy"); + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, rdma, + return UCS_ERR_NO_RESOURCE); + /* Setup Callback */ + uct_ugni_format_rdma(rdma, GNI_POST_RDMA_PUT, buffer, remote_addr, memh, + rkey, length, ep, iface->super.local_cq, comp); + + ucs_trace_data("Posting PUT ZCOPY, GNI_PostRdma of size %"PRIx64" from %p to %p, with [%"PRIx64" %"PRIx64"]", + rdma->desc.length, + (void *)rdma->desc.local_addr, + (void *)rdma->desc.remote_addr, + rdma->desc.remote_mem_hndl.qword1, + rdma->desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), PUT, ZCOPY, length); + return uct_ugni_post_rdma(iface, ep, rdma); +} + +static void uct_ugni_amo_unpack64(uct_completion_t *self, ucs_status_t status) +{ + uct_ugni_rdma_fetch_desc_t *fma = (uct_ugni_rdma_fetch_desc_t *) + ucs_container_of(self, uct_ugni_rdma_fetch_desc_t, tmp); + + /* Call the orignal callback and skip padding */ + *(uint64_t *)fma->user_buffer = *(uint64_t *)(fma + 1); + uct_ugni_invoke_orig_comp(fma, status); +} + +ucs_status_t uct_ugni_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_rdma_fetch_desc_t *fma; + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma, + return UCS_ERR_NO_RESOURCE); + uct_ugni_format_fma_amo(fma, GNI_POST_AMO, GNI_FMA_ATOMIC_CSWAP, + compare, swap, fma + 1, remote_addr, + rkey, LEN_64, ep, comp, uct_ugni_amo_unpack64, (void *)result); + ucs_trace_data("Posting AMO CSWAP, GNI_PostFma of size %"PRIx64" value" + "%"PRIx64" compare %"PRIx64" to %p, with [%"PRIx64" %"PRIx64"]", + fma->super.desc.length, swap, compare, + (void *)fma->super.desc.remote_addr, + fma->super.desc.remote_mem_hndl.qword1, + fma->super.desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); + return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); +} + +static void uct_ugni_amo_unpack32(uct_completion_t *self, ucs_status_t status) +{ + uct_ugni_rdma_fetch_desc_t *fma = (uct_ugni_rdma_fetch_desc_t *) + ucs_container_of(self, uct_ugni_rdma_fetch_desc_t, tmp); + + /* Call the orignal callback and skip padding */ + *(uint32_t *)fma->user_buffer = *(uint32_t *)(fma + 1); + uct_ugni_invoke_orig_comp(fma, status); +} + +ucs_status_t uct_ugni_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_rdma_fetch_desc_t *fma; + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma, + return UCS_ERR_NO_RESOURCE); + uct_ugni_format_fma_amo(fma, GNI_POST_AMO, GNI_FMA_ATOMIC2_FCSWAP_S, + (uint64_t)compare, (uint64_t)swap, fma + 1, remote_addr, + rkey, LEN_32, ep, comp, uct_ugni_amo_unpack32, (void *)result); + ucs_trace_data("Posting AMO CSWAP, GNI_PostFma of size %"PRIx64" value" + "%"PRIx32" compare %"PRIx32" to %p, with [%"PRIx64" %"PRIx64"]", + fma->super.desc.length, swap, compare, + (void *)fma->super.desc.remote_addr, + fma->super.desc.remote_mem_hndl.qword1, + fma->super.desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); + return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); +} + +ucs_status_t uct_ugni_ep_atomic_op32(uct_ep_h tl_ep, uint32_t op, + uint64_t remote_addr, uct_rkey_t rkey, + gni_fma_cmd_type_t op_type, char *op_str) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_rdma_fetch_desc_t *fma; + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma, + return UCS_ERR_NO_RESOURCE); + uct_ugni_format_fma_amo(fma, GNI_POST_AMO, op_type, + (uint64_t)op, 0, NULL, remote_addr, + rkey, LEN_32, ep, NULL, NULL, NULL); + ucs_trace_data("Posting AMO %s, GNI_PostFma of size %"PRIx64" value" + "%"PRIx32" to %p, with [%"PRIx64" %"PRIx64"]", + op_str, fma->super.desc.length, op, + (void *)fma->super.desc.remote_addr, + fma->super.desc.remote_mem_hndl.qword1, + fma->super.desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); + return uct_ugni_post_fma(iface, ep, &fma->super, UCS_OK); +} + +ucs_status_t uct_ugni_ep_atomic32_post(uct_ep_h ep, unsigned opcode, uint32_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + switch (opcode) { + case UCT_ATOMIC_OP_ADD: + return uct_ugni_ep_atomic_op32(ep, value, remote_addr, rkey, + GNI_FMA_ATOMIC2_IADD_S, "ADD"); + case UCT_ATOMIC_OP_XOR: + return uct_ugni_ep_atomic_op32(ep, value, remote_addr, rkey, + GNI_FMA_ATOMIC2_XOR_S, "XOR"); + case UCT_ATOMIC_OP_AND: + return uct_ugni_ep_atomic_op32(ep, value, remote_addr, rkey, + GNI_FMA_ATOMIC2_AND_S, "AND"); + case UCT_ATOMIC_OP_OR: + return uct_ugni_ep_atomic_op32(ep, value, remote_addr, rkey, + GNI_FMA_ATOMIC2_OR_S, "OR"); + default: + ucs_assertv(0, "incorrect opcode for atomic: %d", opcode); + return UCS_ERR_UNSUPPORTED; + } +} + +ucs_status_t uct_ugni_ep_atomic_op64(uct_ep_h tl_ep, uint64_t op, + uint64_t remote_addr, uct_rkey_t rkey, + gni_fma_cmd_type_t op_type, char *op_str) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_rdma_fetch_desc_t *fma; + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma, + return UCS_ERR_NO_RESOURCE); + uct_ugni_format_fma_amo(fma, GNI_POST_AMO, op_type, + op, 0, NULL, remote_addr, + rkey, LEN_64, ep, NULL, NULL, NULL); + ucs_trace_data("Posting AMO %s, GNI_PostFma of size %"PRIx64" value" + "%"PRIx64" to %p, with [%"PRIx64" %"PRIx64"]", + op_str, fma->super.desc.length, op, + (void *)fma->super.desc.remote_addr, + fma->super.desc.remote_mem_hndl.qword1, + fma->super.desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); + return uct_ugni_post_fma(iface, ep, &fma->super, UCS_OK); +} + +ucs_status_t uct_ugni_ep_atomic64_post(uct_ep_h ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey) +{ + switch (opcode) { + case UCT_ATOMIC_OP_ADD: + return uct_ugni_ep_atomic_op64(ep, value, remote_addr, rkey, + GNI_FMA_ATOMIC_ADD, "ADD"); + case UCT_ATOMIC_OP_XOR: + return uct_ugni_ep_atomic_op64(ep, value, remote_addr, rkey, + GNI_FMA_ATOMIC_XOR, "XOR"); + case UCT_ATOMIC_OP_AND: + return uct_ugni_ep_atomic_op64(ep, value, remote_addr, rkey, + GNI_FMA_ATOMIC_AND, "AND"); + case UCT_ATOMIC_OP_OR: + return uct_ugni_ep_atomic_op64(ep, value, remote_addr, rkey, + GNI_FMA_ATOMIC_OR, "OR"); + default: + ucs_assertv(0, "incorrect opcode for atomic: %d", opcode); + return UCS_ERR_UNSUPPORTED; + } +} + +ucs_status_t uct_ugni_ep_atomic_fop64(uct_ep_h tl_ep, uint64_t op, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp, + gni_fma_cmd_type_t op_type, char *op_str) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_rdma_fetch_desc_t *fma; + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma, + return UCS_ERR_NO_RESOURCE); + uct_ugni_format_fma_amo(fma, GNI_POST_AMO, op_type, + op, 0, fma + 1, remote_addr, + rkey, LEN_64, ep, comp, uct_ugni_amo_unpack64, (void *)result); + ucs_trace_data("Posting AMO %s, GNI_PostFma of size %"PRIx64" value" + "%"PRIx64" to %p, with [%"PRIx64" %"PRIx64"]", + op_str, fma->super.desc.length, op, + (void *)fma->super.desc.remote_addr, + fma->super.desc.remote_mem_hndl.qword1, + fma->super.desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); + return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); +} + +ucs_status_t uct_ugni_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + switch (opcode) { + case UCT_ATOMIC_OP_ADD: + return uct_ugni_ep_atomic_fop64(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC_FADD, "FADD"); + case UCT_ATOMIC_OP_SWAP: + return uct_ugni_ep_atomic_fop64(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC2_FSWAP, "FSWAP"); + case UCT_ATOMIC_OP_XOR: + return uct_ugni_ep_atomic_fop64(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC_FXOR, "FXOR"); + case UCT_ATOMIC_OP_AND: + return uct_ugni_ep_atomic_fop64(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC_FAND, "FAND"); + case UCT_ATOMIC_OP_OR: + return uct_ugni_ep_atomic_fop64(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC_FOR, "FOR"); + default: + ucs_assertv(0, "incorrect opcode for atomic: %d", opcode); + return UCS_ERR_UNSUPPORTED; + } +} + +ucs_status_t uct_ugni_ep_atomic_fop32(uct_ep_h tl_ep, uint32_t op, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp, + gni_fma_cmd_type_t op_type, char *op_str) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_rdma_fetch_desc_t *fma; + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_famo, fma, + return UCS_ERR_NO_RESOURCE); + uct_ugni_format_fma_amo(fma, GNI_POST_AMO, op_type, + (uint64_t)op, 0, fma + 1, remote_addr, + rkey, LEN_32, ep, comp, uct_ugni_amo_unpack32, (void *)result); + ucs_trace_data("Posting AMO %s, GNI_PostFma of size %"PRIx64" value" + "%"PRIx32" to %p, with [%"PRIx64" %"PRIx64"]", + op_str, fma->super.desc.length, op, + (void *)fma->super.desc.remote_addr, + fma->super.desc.remote_mem_hndl.qword1, + fma->super.desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_ATOMIC(ucs_derived_of(tl_ep, uct_base_ep_t)); + return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); +} + +ucs_status_t uct_ugni_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + switch (opcode) { + case UCT_ATOMIC_OP_ADD: + return uct_ugni_ep_atomic_fop32(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC2_FIADD_S, "ADD"); + case UCT_ATOMIC_OP_SWAP: + return uct_ugni_ep_atomic_fop32(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC2_FSWAP_S, "SWAP"); + case UCT_ATOMIC_OP_XOR: + return uct_ugni_ep_atomic_fop32(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC2_FXOR_S, "XOR"); + case UCT_ATOMIC_OP_AND: + return uct_ugni_ep_atomic_fop32(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC2_FAND_S, "AND"); + case UCT_ATOMIC_OP_OR: + return uct_ugni_ep_atomic_fop32(ep, value, remote_addr, rkey, result, comp, + GNI_FMA_ATOMIC2_FOR_S, "OR"); + default: + ucs_assertv(0, "incorrect opcode for atomic: %d", opcode); + return UCS_ERR_UNSUPPORTED; + } +} + +static void uct_ugni_unalign_fma_get_cb(uct_completion_t *self, ucs_status_t status) +{ + uct_ugni_rdma_fetch_desc_t *fma = (uct_ugni_rdma_fetch_desc_t *) + ucs_container_of(self, uct_ugni_rdma_fetch_desc_t, tmp); + + /* Call the orignal callback and skip padding */ + fma->super.unpack_cb(fma->user_buffer, (char *)(fma + 1) + fma->padding, + fma->super.desc.length - fma->padding - fma->tail); + + uct_ugni_invoke_orig_comp(fma, status); +} + +static inline void uct_ugni_format_get_fma(uct_ugni_rdma_fetch_desc_t *fma, + uint64_t remote_addr, uct_rkey_t rkey, + unsigned length, uct_ugni_ep_t *ep, + uct_completion_t *user_comp, + uct_completion_t *internal_comp, + uct_unpack_callback_t unpack_cb, + void *arg) +{ + uint64_t addr; + void *buffer; + unsigned align_length; + + fma->padding = ucs_padding_pow2(remote_addr, UGNI_GET_ALIGN); + fma->orig_comp_cb = user_comp; + /* Make sure that the address is always aligned */ + addr = remote_addr - fma->padding; + buffer = (fma + 1); + fma->user_buffer = arg; + /* Make sure that the length is always aligned */ + align_length = ucs_check_if_align_pow2(length + fma->padding, UGNI_GET_ALIGN) ? + ucs_align_up_pow2((length + fma->padding), UGNI_GET_ALIGN):length + fma->padding; + fma->tail = align_length - length - fma->padding; + ucs_assert(ucs_check_if_align_pow2(addr, UGNI_GET_ALIGN)==0); + ucs_assert(ucs_check_if_align_pow2(align_length, UGNI_GET_ALIGN)==0); + uct_ugni_format_fma(&fma->super, GNI_POST_FMA_GET, buffer, addr, rkey, align_length, + ep, internal_comp, unpack_cb); +} + +static inline void uct_ugni_format_unaligned_rdma(uct_ugni_rdma_fetch_desc_t *rdma, + const void *buffer, uint64_t remote_addr, + uct_mem_h memh, uct_rkey_t rkey, + unsigned length, uct_ugni_ep_t *ep, + gni_cq_handle_t cq, + uct_completion_t *composed_comp) +{ + uint64_t addr; + unsigned align_len; + char *local_buffer; + size_t local_padding, remote_padding; + + addr = ucs_align_up_pow2((uint64_t)buffer, UGNI_GET_ALIGN); + local_padding = addr - (uint64_t)buffer; + local_buffer = (char *)addr; + + addr = ucs_align_down(remote_addr, UGNI_GET_ALIGN); + remote_padding = remote_addr - addr; + + rdma->padding = local_padding + remote_padding; + align_len = ucs_align_up(length + rdma->padding, UGNI_GET_ALIGN); + rdma->tail = align_len - (length + rdma->padding); + + uct_ugni_format_rdma(&(rdma->super), GNI_POST_RDMA_GET, local_buffer, addr, memh, rkey, + align_len, ep, cq, composed_comp); +} + +ucs_status_t uct_ugni_ep_get_bcopy(uct_ep_h tl_ep, + uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_rdma_fetch_desc_t *fma; + + UCT_SKIP_ZERO_LENGTH(length); + UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0, + iface->config.fma_seg_size, "get_bcopy"); + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, + fma, return UCS_ERR_NO_RESOURCE); + + fma->tmp.func = uct_ugni_unalign_fma_get_cb; + fma->tmp.count = 1; + uct_ugni_format_get_fma(fma, + remote_addr, rkey, length, + ep, comp, + &fma->tmp, + unpack_cb, arg); + + ucs_trace_data("Posting GET BCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to " + "%p, with [%"PRIx64" %"PRIx64"]", + fma->super.desc.length, length, + (void *)fma->super.desc.local_addr, + (void *)fma->super.desc.remote_addr, + fma->super.desc.remote_mem_hndl.qword1, + fma->super.desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, BCOPY, length); + return uct_ugni_post_fma(iface, ep, &fma->super, UCS_INPROGRESS); +} + +static void assemble_composed_unaligned(uct_completion_t *self, ucs_status_t status) +{ + uct_ugni_rdma_fetch_desc_t *fma_head = (uct_ugni_rdma_fetch_desc_t *) + ucs_container_of(self, uct_ugni_rdma_fetch_desc_t, tmp); + void *buffer = fma_head->user_buffer; + uct_ugni_rdma_fetch_desc_t *rdma = fma_head->head; + + if(fma_head->head == NULL){ + memcpy(buffer, (char *)(fma_head + 1) + fma_head->padding, + fma_head->super.desc.length - fma_head->padding - fma_head->tail); + } else { + memmove(buffer, buffer + rdma->padding, rdma->super.desc.length); + memcpy(buffer + rdma->super.desc.length - rdma->padding, + (char *)(fma_head + 1) + rdma->tail, + fma_head->super.desc.length - (fma_head->tail + rdma->tail)); + } + uct_ugni_invoke_orig_comp(fma_head, status); +} + +static void free_composed_desc(void *arg) +{ + uct_ugni_rdma_fetch_desc_t *desc = (uct_ugni_rdma_fetch_desc_t*)arg; + uct_ugni_rdma_fetch_desc_t *fma = ucs_container_of(desc->super.comp_cb, uct_ugni_rdma_fetch_desc_t, tmp); + uct_ugni_rdma_fetch_desc_t *rdma = fma->head; + + if (0 == --rdma->tmp.count) { + fma->super.free_cb = rdma->super.free_cb = ucs_mpool_put; + ucs_mpool_put(fma); + ucs_mpool_put(rdma); + } +} + +static ucs_status_t uct_ugni_ep_get_composed_fma_rdma(uct_ep_h tl_ep, void *buffer, size_t length, + uct_mem_h memh, uint64_t remote_addr, + uct_rkey_t rkey, uct_completion_t *comp) +{ + uct_ugni_rdma_fetch_desc_t *fma = NULL; + uct_ugni_rdma_fetch_desc_t *rdma = NULL; + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + size_t fma_length, rdma_length, aligned_fma_remote_start; + uint64_t fma_remote_start, rdma_remote_start; + ucs_status_t post_result; + + + rdma_length = length - iface->config.fma_seg_size; + fma_length = iface->config.fma_seg_size; + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get_buffer, + fma, return UCS_ERR_NO_RESOURCE); + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc_get, + rdma, {ucs_mpool_put(fma);return UCS_ERR_NO_RESOURCE;}); + + rdma_remote_start = remote_addr; + fma_remote_start = rdma_remote_start + rdma_length; + aligned_fma_remote_start = ucs_align_up_pow2(fma_remote_start, UGNI_GET_ALIGN); + /* The FMA completion is used to signal when both descs have completed. */ + fma->tmp.count = 2; + fma->tmp.func = assemble_composed_unaligned; + /* The RDMA completion is used to signal when both descs have been freed */ + rdma->tmp.count = 2; + uct_ugni_format_get_fma(fma, aligned_fma_remote_start, rkey, fma_length, ep, comp, &fma->tmp, NULL, NULL); + fma->tail = aligned_fma_remote_start - fma_remote_start; + uct_ugni_format_unaligned_rdma(rdma, buffer, rdma_remote_start, memh, rkey, + rdma_length+fma->tail, ep, iface->super.local_cq, + &fma->tmp); + fma->head = rdma; + rdma->head = fma; + fma->user_buffer = rdma->user_buffer = buffer; + fma->super.free_cb = rdma->super.free_cb = free_composed_desc; + + ucs_trace_data("Posting split GET ZCOPY, GNI_PostFma of size %"PRIx64" (%lu) from %p to " + "%p, with [%"PRIx64" %"PRIx64"] and GNI_PostRdma of size %"PRIx64" (%lu)" + " from %p to %p, with [%"PRIx64" %"PRIx64"]", + fma->super.desc.length, length, + (void *)fma->super.desc.local_addr, + (void *)fma->super.desc.remote_addr, + fma->super.desc.remote_mem_hndl.qword1, + fma->super.desc.remote_mem_hndl.qword2, + rdma->super.desc.length, length, + (void *)rdma->super.desc.local_addr, + (void *)rdma->super.desc.remote_addr, + rdma->super.desc.remote_mem_hndl.qword1, + rdma->super.desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length); + post_result = uct_ugni_post_fma(iface, ep, &(fma->super), UCS_INPROGRESS); + if(post_result != UCS_OK && post_result != UCS_INPROGRESS){ + ucs_mpool_put(rdma); + return post_result; + } + return uct_ugni_post_rdma(iface, ep, &(rdma->super)); +} + +static ucs_status_t uct_ugni_ep_get_composed(uct_ep_h tl_ep, void *buffer, size_t length, + uct_mem_h memh, uint64_t remote_addr, + uct_rkey_t rkey, uct_completion_t *comp) +{ + uint64_t aligned_remote = ucs_align_down(remote_addr, UGNI_GET_ALIGN); + uint64_t remote_padding = remote_addr - aligned_remote; + uint64_t fetch_length = length + remote_padding; + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + + if(fetch_length < iface->config.fma_seg_size) { + return uct_ugni_ep_get_bcopy(tl_ep, + (uct_unpack_callback_t)memcpy, + buffer, length, + remote_addr, rkey, + comp); + } + + return uct_ugni_ep_get_composed_fma_rdma(tl_ep, buffer, length, memh, + remote_addr, rkey, comp); +} + +ucs_status_t uct_ugni_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t iovcnt, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) +{ + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_rdma_iface_t); + uct_ugni_base_desc_t *rdma; + + UCT_CHECK_PARAM_IOV(iov, iovcnt, buffer, length, memh); + UCT_SKIP_ZERO_LENGTH(length); + UCT_CHECK_LENGTH(ucs_align_up_pow2(length, UGNI_GET_ALIGN), 0, + iface->config.rdma_max_size, "get_zcopy"); + + /* Special flow for an unalign data */ + if (ucs_unlikely((uct_ugni_check_device_type(&iface->super, GNI_DEVICE_GEMINI) && + ucs_check_if_align_pow2((uintptr_t)buffer, UGNI_GET_ALIGN)) || + ucs_check_if_align_pow2(remote_addr, UGNI_GET_ALIGN) || + ucs_check_if_align_pow2(length, UGNI_GET_ALIGN))) { + return uct_ugni_ep_get_composed(tl_ep, buffer, length, memh, + remote_addr, rkey, comp); + } + + /* Everything is perfectly aligned */ + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, rdma, + return UCS_ERR_NO_RESOURCE); + + /* Setup Callback */ + uct_ugni_format_rdma(rdma, GNI_POST_RDMA_GET, buffer, remote_addr, memh, rkey, + ucs_align_up_pow2(length, UGNI_GET_ALIGN), ep, iface->super.local_cq, comp); + + ucs_trace_data("Posting GET ZCOPY, GNI_PostRdma of size %"PRIx64" (%lu) " + "from %p to %p, with [%"PRIx64" %"PRIx64"]", + rdma->desc.length, length, + (void *)rdma->desc.local_addr, + (void *)rdma->desc.remote_addr, + rdma->desc.remote_mem_hndl.qword1, + rdma->desc.remote_mem_hndl.qword2); + UCT_TL_EP_STAT_OP(ucs_derived_of(tl_ep, uct_base_ep_t), GET, ZCOPY, length); + return uct_ugni_post_rdma(iface, ep, rdma); +} + +UCS_CLASS_INIT_FUNC(uct_ugni_rdma_ep_t, const uct_ep_params_t *params) +{ + UCS_CLASS_CALL_SUPER_INIT(uct_ugni_ep_t, params); + UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params); + ucs_status_t rc; + + uct_ugni_iface_t *iface = ucs_derived_of(params->iface, uct_ugni_iface_t); + const uct_sockaddr_ugni_t *iface_addr = (const uct_sockaddr_ugni_t*)params->iface_addr; + const uct_devaddr_ugni_t *ugni_dev_addr = (const uct_devaddr_ugni_t *)params->dev_addr; + + ucs_debug("Connecting RDMA ep %p", self); + rc = ugni_connect_ep(&self->super, iface, iface_addr, ugni_dev_addr); + + if (UCS_OK != rc) { + ucs_error("Could not connect ep %p", self); + return rc; + } + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ugni_rdma_ep_t) +{ + return; +} + +UCS_CLASS_DEFINE(uct_ugni_rdma_ep_t, uct_ugni_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_ugni_rdma_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_ugni_rdma_ep_t, uct_ep_t); + +ucs_status_t uct_ugni_ep_am_short(uct_ep_h ep, uint8_t id, uint64_t header, + const void *payload, unsigned length) +{ + return UCS_ERR_UNSUPPORTED; +} diff --git a/src/uct/ugni/rdma/ugni_rdma_ep.h b/src/uct/ugni/rdma/ugni_rdma_ep.h new file mode 100644 index 0000000..72c2d83 --- /dev/null +++ b/src/uct/ugni/rdma/ugni_rdma_ep.h @@ -0,0 +1,74 @@ +/** +* Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_UGNI_RDMA_EP_H +#define UCT_UGNI_RDMA_EP_H + +#include +#include +#include +#include + + +ucs_status_t uct_ugni_ep_put_short(uct_ep_h tl_ep, const void *buffer, + unsigned length, uint64_t remote_addr, + uct_rkey_t rkey); +ssize_t uct_ugni_ep_put_bcopy(uct_ep_h ep, uct_pack_callback_t pack_cb, + void *arg, uint64_t remote_addr, uct_rkey_t rkey); +ucs_status_t uct_ugni_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t size, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +ucs_status_t uct_ugni_ep_am_short(uct_ep_h ep, uint8_t id, uint64_t header, + const void *payload, unsigned length); +ucs_status_t uct_ugni_ep_atomic_add64(uct_ep_h tl_ep, uint64_t add, + uint64_t remote_addr, uct_rkey_t rkey); +ucs_status_t uct_ugni_ep_atomic_fadd64(uct_ep_h tl_ep, uint64_t add, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp); +ucs_status_t uct_ugni_ep_atomic_swap64(uct_ep_h tl_ep, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp); +ucs_status_t uct_ugni_ep_atomic_cswap64(uct_ep_h tl_ep, uint64_t compare, uint64_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint64_t *result, uct_completion_t *comp); +ucs_status_t uct_ugni_ep_atomic_add32(uct_ep_h tl_ep, uint32_t add, + uint64_t remote_addr, uct_rkey_t rkey); +ucs_status_t uct_ugni_ep_atomic_fadd32(uct_ep_h tl_ep, uint32_t add, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp); +ucs_status_t uct_ugni_ep_atomic_swap32(uct_ep_h tl_ep, uint32_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp); +ucs_status_t uct_ugni_ep_atomic_cswap32(uct_ep_h tl_ep, uint32_t compare, uint32_t swap, + uint64_t remote_addr, uct_rkey_t rkey, + uint32_t *result, uct_completion_t *comp); +ucs_status_t uct_ugni_ep_atomic64_post(uct_ep_h ep, unsigned opcode, uint64_t value, + uint64_t remote_addr, uct_rkey_t rkey); +ucs_status_t uct_ugni_ep_atomic32_post(uct_ep_h ep, unsigned opcode, uint32_t value, + uint64_t remote_addr, uct_rkey_t rkey); +ucs_status_t uct_ugni_ep_atomic64_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +ucs_status_t uct_ugni_ep_atomic32_fetch(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +ucs_status_t uct_ugni_ep_get_bcopy(uct_ep_h tl_ep, + uct_unpack_callback_t unpack_cb, + void *arg, size_t length, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); +ucs_status_t uct_ugni_ep_get_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, size_t size, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp); + +typedef struct uct_ugni_rdma_ep { + uct_ugni_ep_t super; +} uct_ugni_rdma_ep_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_ugni_rdma_ep_t, uct_ep_t, const uct_ep_params_t *); +#endif diff --git a/src/uct/ugni/rdma/ugni_rdma_iface.c b/src/uct/ugni/rdma/ugni_rdma_iface.c new file mode 100644 index 0000000..16f7e31 --- /dev/null +++ b/src/uct/ugni/rdma/ugni_rdma_iface.c @@ -0,0 +1,373 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "ugni_rdma_ep.h" +#include "ugni_rdma_iface.h" +#include +#include +#include + +static ucs_config_field_t uct_ugni_rdma_iface_config_table[] = { + /* This tuning controls the allocation priorities for bouncing buffers */ + { "", "ALLOC=huge,mmap,heap", NULL, + ucs_offsetof(uct_ugni_rdma_iface_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + UCT_IFACE_MPOOL_CONFIG_FIELDS("RDMA", -1, 0, "rdma", + ucs_offsetof(uct_ugni_rdma_iface_config_t, mpool), + "\nAttention: Setting this param with value != -1 is a dangerous thing\n" + "and could cause deadlock or performance degradation."), + + {NULL} +}; + +static ucs_status_t uct_ugni_rdma_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) +{ + uct_ugni_rdma_iface_t *iface = ucs_derived_of(tl_iface, uct_ugni_rdma_iface_t); + + uct_base_iface_query(&iface->super.super, iface_attr); + + iface_attr->cap.put.max_short = iface->config.fma_seg_size; + iface_attr->cap.put.max_bcopy = iface->config.fma_seg_size; + iface_attr->cap.put.min_zcopy = 0; + iface_attr->cap.put.max_zcopy = iface->config.rdma_max_size; + iface_attr->cap.put.opt_zcopy_align = 1; + iface_attr->cap.put.align_mtu = iface_attr->cap.put.opt_zcopy_align; + iface_attr->cap.put.max_iov = 1; + + iface_attr->cap.get.max_bcopy = iface->config.fma_seg_size - 8; /* alignment offset 4 (addr)+ 4 (len)*/ + iface_attr->cap.get.min_zcopy = 0; + iface_attr->cap.get.max_zcopy = iface->config.rdma_max_size; + iface_attr->cap.get.opt_zcopy_align = 1; + iface_attr->cap.get.align_mtu = iface_attr->cap.get.opt_zcopy_align; + iface_attr->cap.get.max_iov = 1; + + iface_attr->cap.am.max_iov = 1; + iface_attr->cap.am.opt_zcopy_align = 1; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + + iface_attr->device_addr_len = sizeof(uct_devaddr_ugni_t); + iface_attr->iface_addr_len = sizeof(uct_sockaddr_ugni_t); + iface_attr->ep_addr_len = 0; + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_PUT_SHORT | + UCT_IFACE_FLAG_PUT_BCOPY | + UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_GET_BCOPY | + UCT_IFACE_FLAG_GET_ZCOPY | + UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_ATOMIC_DEVICE | + UCT_IFACE_FLAG_PENDING; + + iface_attr->cap.atomic64.op_flags = UCS_BIT(UCT_ATOMIC_OP_ADD)| + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR); + + iface_attr->cap.atomic64.fop_flags = UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR) | + UCS_BIT(UCT_ATOMIC_OP_SWAP) | + UCS_BIT(UCT_ATOMIC_OP_CSWAP); + + + if (uct_ugni_check_device_type(&iface->super, GNI_DEVICE_ARIES)) { + iface_attr->cap.flags |= UCT_IFACE_FLAG_PUT_SHORT; + + iface_attr->cap.atomic64.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_SWAP); + iface_attr->cap.atomic32.op_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR); + iface_attr->cap.atomic32.fop_flags |= UCS_BIT(UCT_ATOMIC_OP_ADD) | + UCS_BIT(UCT_ATOMIC_OP_AND) | + UCS_BIT(UCT_ATOMIC_OP_OR) | + UCS_BIT(UCT_ATOMIC_OP_XOR) | + UCS_BIT(UCT_ATOMIC_OP_SWAP) | + UCS_BIT(UCT_ATOMIC_OP_CSWAP); + } + iface_attr->overhead = 80e-9; /* 80 ns */ + iface_attr->latency.overhead = 900e-9; /* 900 ns */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = 6911 * pow(1024,2); /* bytes */ + iface_attr->bandwidth.shared = 0; + iface_attr->priority = 0; + + return UCS_OK; +} + +void uct_ugni_base_desc_init(ucs_mpool_t *mp, void *obj, void *chunk) +{ + uct_ugni_base_desc_t *base = (uct_ugni_base_desc_t *) obj; + /* zero base descriptor */ + memset(base, 0 , sizeof(*base)); + base->free_cb = ucs_mpool_put; +} + +void uct_ugni_base_desc_key_init(uct_iface_h iface, void *obj, uct_mem_h memh) +{ + uct_ugni_base_desc_t *base = (uct_ugni_base_desc_t *)obj; + /* call base initialization */ + uct_ugni_base_desc_init(NULL, obj, NULL); + /* set local keys */ + base->desc.local_mem_hndl = *(gni_mem_handle_t *)memh; +} + +unsigned uct_ugni_progress(void *arg) +{ + gni_cq_entry_t event_data = 0; + gni_post_descriptor_t *event_post_desc_ptr; + uct_ugni_base_desc_t *desc; + uct_ugni_iface_t * iface = (uct_ugni_iface_t *)arg; + gni_return_t ugni_rc; + unsigned count = 0; + + while (1) { + uct_ugni_cdm_lock(&iface->cdm); + ugni_rc = GNI_CqGetEvent(iface->local_cq, &event_data); + if (GNI_RC_NOT_DONE == ugni_rc) { + uct_ugni_cdm_unlock(&iface->cdm); + break; + } + if ((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)) { + uct_ugni_cdm_unlock(&iface->cdm); + ucs_error("GNI_CqGetEvent falied. Error status %s %d ", + gni_err_str[ugni_rc], ugni_rc); + return count; + } + + ugni_rc = GNI_GetCompleted(iface->local_cq, event_data, &event_post_desc_ptr); + uct_ugni_cdm_unlock(&iface->cdm); + if (GNI_RC_SUCCESS != ugni_rc && GNI_RC_TRANSACTION_ERROR != ugni_rc) { + ucs_error("GNI_GetCompleted falied. Error status %s %d", + gni_err_str[ugni_rc], ugni_rc); + return count; + } + + desc = (uct_ugni_base_desc_t *)event_post_desc_ptr; + ucs_trace_async("Completion received on %p", desc); + if (NULL != desc->comp_cb) { + uct_invoke_completion(desc->comp_cb, UCS_OK); + } + desc->free_cb(desc); + iface->outstanding--; + uct_ugni_check_flush(desc->flush_group); + ++count; + } + /* have a go a processing the pending queue */ + ucs_arbiter_dispatch(&iface->arbiter, 1, uct_ugni_ep_process_pending, NULL); + return count; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ugni_rdma_iface_t) +{ + uct_worker_progress_remove(self->super.super.worker, &self->super.super.prog); + ucs_mpool_cleanup(&self->free_desc_get_buffer, 1); + ucs_mpool_cleanup(&self->free_desc_get, 1); + ucs_mpool_cleanup(&self->free_desc_famo, 1); + ucs_mpool_cleanup(&self->free_desc_buffer, 1); + ucs_mpool_cleanup(&self->free_desc, 1); +} + +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_ugni_rdma_iface_t, uct_iface_t); + +static uct_iface_ops_t uct_ugni_aries_rdma_iface_ops = { + .ep_put_short = uct_ugni_ep_put_short, + .ep_put_bcopy = uct_ugni_ep_put_bcopy, + .ep_put_zcopy = uct_ugni_ep_put_zcopy, + .ep_get_bcopy = uct_ugni_ep_get_bcopy, + .ep_get_zcopy = uct_ugni_ep_get_zcopy, + .ep_am_short = uct_ugni_ep_am_short, + .ep_atomic_cswap64 = uct_ugni_ep_atomic_cswap64, + .ep_atomic_cswap32 = uct_ugni_ep_atomic_cswap32, + .ep_atomic64_post = uct_ugni_ep_atomic64_post, + .ep_atomic32_post = uct_ugni_ep_atomic32_post, + .ep_atomic64_fetch = uct_ugni_ep_atomic64_fetch, + .ep_atomic32_fetch = uct_ugni_ep_atomic32_fetch, + .ep_pending_add = uct_ugni_ep_pending_add, + .ep_pending_purge = uct_ugni_ep_pending_purge, + .ep_flush = uct_ugni_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_ugni_rdma_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_ugni_ep_t), + .iface_flush = uct_ugni_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = (void*)uct_ugni_progress, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_ugni_rdma_iface_t), + .iface_query = uct_ugni_rdma_iface_query, + .iface_get_device_address = uct_ugni_iface_get_dev_address, + .iface_get_address = uct_ugni_iface_get_address, + .iface_is_reachable = uct_ugni_iface_is_reachable +}; + +static uct_iface_ops_t uct_ugni_gemini_rdma_iface_ops = { + .ep_put_short = uct_ugni_ep_put_short, + .ep_put_bcopy = uct_ugni_ep_put_bcopy, + .ep_put_zcopy = uct_ugni_ep_put_zcopy, + .ep_get_bcopy = uct_ugni_ep_get_bcopy, + .ep_get_zcopy = uct_ugni_ep_get_zcopy, + .ep_am_short = uct_ugni_ep_am_short, + .ep_atomic_cswap64 = uct_ugni_ep_atomic_cswap64, + .ep_pending_add = uct_ugni_ep_pending_add, + .ep_pending_purge = uct_ugni_ep_pending_purge, + .ep_flush = uct_ugni_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_ugni_rdma_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_ugni_ep_t), + .iface_flush = uct_ugni_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = (void*)uct_ugni_progress, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_ugni_rdma_iface_t), + .iface_query = uct_ugni_rdma_iface_query, + .iface_get_device_address = uct_ugni_iface_get_dev_address, + .iface_get_address = uct_ugni_iface_get_address, + .iface_is_reachable = uct_ugni_iface_is_reachable +}; + +static ucs_mpool_ops_t uct_ugni_rdma_desc_mpool_ops = { + .chunk_alloc = ucs_mpool_hugetlb_malloc, + .chunk_release = ucs_mpool_hugetlb_free, + .obj_init = uct_ugni_base_desc_init, + .obj_cleanup = NULL +}; + +static uct_iface_ops_t *uct_ugni_rdma_choose_ops_by_device(uct_ugni_device_t *dev) +{ + switch(dev->type) { + case GNI_DEVICE_GEMINI: + return &uct_ugni_gemini_rdma_iface_ops; + case GNI_DEVICE_ARIES: + return &uct_ugni_aries_rdma_iface_ops; + default: + ucs_error("Unexpected device found in uct_ugni_rdma_choose_ops_by_device." + "unexpected device type %s", dev->type_name); + return NULL; + } +} + +static UCS_CLASS_INIT_FUNC(uct_ugni_rdma_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_ugni_rdma_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_rdma_iface_config_t); + ucs_status_t status; + uct_ugni_device_t *dev = uct_ugni_device_by_name(params->mode.device.dev_name); + uct_iface_ops_t *ops; + + ops = uct_ugni_rdma_choose_ops_by_device(dev); + if (NULL == ops) { + status = UCS_ERR_NO_DEVICE; + goto exit; + } + UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params, ops, + &config->super UCS_STATS_ARG(NULL)); + /* Setting initial configuration */ + self->config.fma_seg_size = UCT_UGNI_MAX_FMA; + self->config.rdma_max_size = UCT_UGNI_MAX_RDMA; + + status = ucs_mpool_init(&self->free_desc, + 0, + sizeof(uct_ugni_base_desc_t), + 0, /* alignment offset */ + UCS_SYS_CACHE_LINE_SIZE, /* alignment */ + 128, /* grow */ + config->mpool.max_bufs, /* max buffers */ + &uct_ugni_rdma_desc_mpool_ops, + "UGNI-DESC-ONLY"); + if (UCS_OK != status) { + ucs_error("Mpool creation failed"); + goto exit; + } + + status = ucs_mpool_init(&self->free_desc_get, + 0, + sizeof(uct_ugni_rdma_fetch_desc_t), + 0, /* alignment offset */ + UCS_SYS_CACHE_LINE_SIZE, /* alignment */ + 128 , /* grow */ + config->mpool.max_bufs, /* max buffers */ + &uct_ugni_rdma_desc_mpool_ops, + "UGNI-GET-DESC-ONLY"); + if (UCS_OK != status) { + ucs_error("Mpool creation failed"); + goto clean_desc; + } + + status = ucs_mpool_init(&self->free_desc_buffer, + 0, + sizeof(uct_ugni_base_desc_t) + self->config.fma_seg_size, + sizeof(uct_ugni_base_desc_t), /* alignment offset */ + UCS_SYS_CACHE_LINE_SIZE, /* alignment */ + 128 , /* grow */ + config->mpool.max_bufs, /* max buffers */ + &uct_ugni_rdma_desc_mpool_ops, + "UGNI-DESC-BUFFER"); + if (UCS_OK != status) { + ucs_error("Mpool creation failed"); + goto clean_desc_get; + } + + status = uct_iface_mpool_init(&self->super.super, + &self->free_desc_famo, + sizeof(uct_ugni_rdma_fetch_desc_t) + 8, + sizeof(uct_ugni_rdma_fetch_desc_t),/* alignment offset */ + UCS_SYS_CACHE_LINE_SIZE, /* alignment */ + &config->mpool, /* mpool config */ + 128 , /* grow */ + uct_ugni_base_desc_key_init, /* memory/key init */ + "UGNI-DESC-FAMO"); + if (UCS_OK != status) { + ucs_error("Mpool creation failed"); + goto clean_buffer; + } + + status = uct_iface_mpool_init(&self->super.super, + &self->free_desc_get_buffer, + sizeof(uct_ugni_rdma_fetch_desc_t) + + self->config.fma_seg_size, + sizeof(uct_ugni_rdma_fetch_desc_t), /* alignment offset */ + UCS_SYS_CACHE_LINE_SIZE, /* alignment */ + &config->mpool, /* mpool config */ + 128 , /* grow */ + uct_ugni_base_desc_key_init, /* memory/key init */ + "UGNI-DESC-GET"); + if (UCS_OK != status) { + ucs_error("Mpool creation failed"); + goto clean_famo; + } + + /* TBD: eventually the uct_ugni_progress has to be moved to + * rdma layer so each ugni layer will have own progress */ + uct_worker_progress_add_safe(self->super.super.worker, uct_ugni_progress, self, + &self->super.super.prog); + return UCS_OK; + +clean_famo: + ucs_mpool_cleanup(&self->free_desc_famo, 1); +clean_buffer: + ucs_mpool_cleanup(&self->free_desc_buffer, 1); +clean_desc_get: + ucs_mpool_cleanup(&self->free_desc_get, 1); +clean_desc: + ucs_mpool_cleanup(&self->free_desc, 1); +exit: + uct_ugni_cleanup_base_iface(&self->super); + ucs_error("Failed to activate interface"); + return status; +} + +UCS_CLASS_DEFINE(uct_ugni_rdma_iface_t, uct_ugni_iface_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_ugni_rdma_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); + +UCT_TL_DEFINE(&uct_ugni_component, ugni_rdma, uct_ugni_query_devices, + uct_ugni_rdma_iface_t, "UGNI_RDMA_", + uct_ugni_rdma_iface_config_table, uct_ugni_rdma_iface_config_t); diff --git a/src/uct/ugni/rdma/ugni_rdma_iface.h b/src/uct/ugni/rdma/ugni_rdma_iface.h new file mode 100644 index 0000000..c40c74e --- /dev/null +++ b/src/uct/ugni/rdma/ugni_rdma_iface.h @@ -0,0 +1,67 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_RDMA_IFACE_H +#define UCT_UGNI_RDMA_IFACE_H + +#include +#include +#include + + +#define UCT_UGNI_MAX_FMA 2048 +#define UCT_UGNI_MAX_RDMA (512*1024*1024); + +typedef struct uct_ugni_rdma_iface { + uct_ugni_iface_t super; /**< Super type */ + ucs_mpool_t free_desc; /**< Pool of FMA descriptors for + requests without bouncing buffers */ + ucs_mpool_t free_desc_get; /**< Pool of FMA descriptors for + unaligned get requests without + bouncing buffers */ + ucs_mpool_t free_desc_buffer; /**< Pool of FMA descriptors for + requests with bouncing buffer*/ + ucs_mpool_t free_desc_famo; /**< Pool of FMA descriptors for + 64/32 bit fetched-atomic operations + (registered memory) */ + ucs_mpool_t free_desc_get_buffer; /**< Pool of FMA descriptors for + FMA_SIZE fetch operations + (registered memory) */ + struct { + unsigned fma_seg_size; /**< FMA Segment size */ + unsigned rdma_max_size; /**< Max RDMA size */ + } config; +} uct_ugni_rdma_iface_t; + +typedef struct uct_ugni_rdma_iface_config { + uct_iface_config_t super; + uct_iface_mpool_config_t mpool; +} uct_ugni_rdma_iface_config_t; + +typedef void (*ugni_desc_free_cb_t)(void *desc); + +typedef struct uct_ugni_base_desc { + gni_post_descriptor_t desc; + uct_completion_t *comp_cb; + uct_unpack_callback_t unpack_cb; + uct_ugni_flush_group_t *flush_group; + ugni_desc_free_cb_t free_cb; +} uct_ugni_base_desc_t; + +typedef struct uct_ugni_rdma_fetch_desc { + uct_ugni_base_desc_t super; + uct_completion_t tmp; + uct_completion_t *orig_comp_cb; + size_t padding; + + /* Handling unalined composed get messages */ + struct uct_ugni_rdma_fetch_desc* head; /**< Pointer to the head descriptor + that manages the completion of the operation */ + void *user_buffer; /**< Pointer to user's buffer, here to ensure it's always available for composed messages */ + size_t tail; /**< Tail parameter to specify how many bytes at the end of a fma/rdma are garbage*/ +} uct_ugni_rdma_fetch_desc_t; + +#endif diff --git a/src/uct/ugni/smsg/ugni_smsg_ep.c b/src/uct/ugni/smsg/ugni_smsg_ep.c new file mode 100644 index 0000000..22694ec --- /dev/null +++ b/src/uct/ugni/smsg/ugni_smsg_ep.c @@ -0,0 +1,305 @@ +/** + * Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "ugni_smsg_ep.h" +#include "ugni_smsg_iface.h" +#include +#include + +SGLIB_DEFINE_LIST_FUNCTIONS(uct_ugni_smsg_desc_t, uct_ugni_smsg_desc_compare, next); +SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS(uct_ugni_smsg_desc_t, UCT_UGNI_HASH_SIZE, uct_ugni_smsg_desc_hash); + +static void compact_smsg_attr(gni_smsg_attr_t *smsg_attr, uct_ugni_compact_smsg_attr_t *smsg_compact_attr) +{ + smsg_compact_attr->msg_buffer = smsg_attr->msg_buffer; + smsg_compact_attr->mbox_offset = smsg_attr->mbox_offset; + smsg_compact_attr->mem_hndl = smsg_attr->mem_hndl; +} + +static void uncompact_smsg_attr(uct_ugni_smsg_iface_t *iface, uct_ugni_compact_smsg_attr_t *smsg_compact_attr, gni_smsg_attr_t *smsg_attr) +{ + smsg_attr->mem_hndl = smsg_compact_attr->mem_hndl; + smsg_attr->msg_buffer = smsg_compact_attr->msg_buffer; + smsg_attr->msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; + smsg_attr->buff_size = iface->bytes_per_mbox; + smsg_attr->mbox_offset = smsg_compact_attr->mbox_offset; + smsg_attr->mbox_maxcredit = iface->config.smsg_max_credit; + smsg_attr->msg_maxsize = iface->config.smsg_seg_size; +} + +static void uct_ugni_smsg_mbox_init(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_mbox_t *mbox_info){ + void *mbox_data = (void *)(mbox_info+1); + + mbox_info->mbox_attr.mem_hndl = mbox_info->gni_mem; + mbox_info->mbox_attr.msg_buffer = mbox_data; + mbox_info->mbox_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; + mbox_info->mbox_attr.buff_size = iface->bytes_per_mbox; + mbox_info->mbox_attr.mbox_offset = 0; + mbox_info->mbox_attr.mbox_maxcredit = iface->config.smsg_max_credit; + mbox_info->mbox_attr.msg_maxsize = iface->config.smsg_seg_size; +} + +static ucs_status_t uct_ugni_smsg_mbox_reg(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_mbox_t *mbox) +{ + gni_return_t ugni_rc; + void *address = (mbox+1); + + if (0 == iface->bytes_per_mbox) { + ucs_error("Unexpected length %zu", iface->bytes_per_mbox); + return UCS_ERR_INVALID_PARAM; + } + + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_MemRegister(uct_ugni_iface_nic_handle(&iface->super), (uint64_t)address, + iface->bytes_per_mbox, iface->remote_cq, + GNI_MEM_READWRITE, + -1, &(mbox->gni_mem)); + uct_ugni_cdm_unlock(&iface->super.cdm); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_MemRegister failed (addr %p, size %zu), Error status: %s %d", + address, iface->bytes_per_mbox, gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_IO_ERROR; + } + + mbox->base_address = (uintptr_t)address; + + return UCS_OK; +} + +static ucs_status_t uct_ugni_smsg_mbox_dereg(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_mbox_t *mbox){ + gni_return_t ugni_rc; + + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_MemDeregister(uct_ugni_iface_nic_handle(&iface->super), &mbox->gni_mem); + uct_ugni_cdm_unlock(&iface->super.cdm); + + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_MemDeregister failed Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_IO_ERROR; + } + + return UCS_OK; +} + +UCS_CLASS_INIT_FUNC(uct_ugni_smsg_ep_t, const uct_ep_params_t *params) +{ + uct_ugni_smsg_iface_t *iface = ucs_derived_of(params->iface, uct_ugni_smsg_iface_t); + void *mbox; + + UCS_CLASS_CALL_SUPER_INIT(uct_ugni_ep_t, params); + ucs_debug("Setting up SMSG ep"); + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_mbox, + mbox, return UCS_ERR_NO_RESOURCE); + + self->smsg_attr = (uct_ugni_smsg_mbox_t *)mbox; + + uct_ugni_smsg_mbox_reg(iface, self->smsg_attr); + uct_ugni_smsg_mbox_init(iface, self->smsg_attr); + compact_smsg_attr(&self->smsg_attr->mbox_attr, &self->smsg_compact_attr); + + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ugni_smsg_ep_t) +{ + uct_ugni_smsg_iface_t *iface = ucs_derived_of(self->super.super.super.iface, uct_ugni_smsg_iface_t); + ucs_status_t status; + + do { + status = iface->super.super.super.ops.ep_flush(&self->super.super.super, 0, NULL); + } while(UCS_OK != status); + + progress_remote_cq(iface); + uct_ugni_smsg_mbox_dereg(iface, self->smsg_attr); + ucs_mpool_put(self->smsg_attr); +} + +UCS_CLASS_DEFINE(uct_ugni_smsg_ep_t, uct_ugni_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_ugni_smsg_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_ugni_smsg_ep_t, uct_ep_t); + +ucs_status_t uct_ugni_smsg_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr) { + + uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t); + uct_sockaddr_smsg_ugni_t *ep_addr = (uct_sockaddr_smsg_ugni_t*)addr; + + ucs_status_t rc; + + rc = uct_ugni_iface_get_address(tl_ep->iface, (uct_iface_addr_t*)addr); + + if(UCS_OK != rc){ + return rc; + } + + ep_addr->ep_hash = ep->super.hash_key; + memcpy(&ep_addr->smsg_compact_attr, &ep->smsg_compact_attr, sizeof(ep_addr->smsg_compact_attr)); + + return UCS_OK; +} + +ucs_status_t uct_ugni_smsg_ep_connect_to_ep(uct_ep_h tl_ep, + const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr) +{ + uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t); + uct_ugni_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_iface_t); + const uct_sockaddr_smsg_ugni_t *iface_addr = (const uct_sockaddr_smsg_ugni_t*)ep_addr; + const uct_devaddr_ugni_t *ugni_dev_addr = (const uct_devaddr_ugni_t *)dev_addr; + gni_smsg_attr_t *local_attr = (gni_smsg_attr_t*)&ep->smsg_attr->mbox_attr; + uct_ugni_compact_smsg_attr_t *compact_remote_attr = (uct_ugni_compact_smsg_attr_t *)&iface_addr->smsg_compact_attr; + gni_smsg_attr_t remote_attr; + gni_return_t gni_rc; + ucs_status_t rc = UCS_OK; + uint32_t ep_hash; + + uncompact_smsg_attr(ucs_derived_of(iface, uct_ugni_smsg_iface_t), + compact_remote_attr, &remote_attr); + rc = ugni_connect_ep(&ep->super, iface, &iface_addr->super, ugni_dev_addr); + + if(UCS_OK != rc){ + ucs_error("Could not connect ep in smsg"); + return rc; + } + uct_ugni_cdm_lock(&iface->cdm); + gni_rc = GNI_SmsgInit(ep->super.ep, local_attr, &remote_attr); + uct_ugni_cdm_unlock(&iface->cdm); + + if(GNI_RC_SUCCESS != gni_rc){ + ucs_error("Failed to initalize smsg. %s [%i]", gni_err_str[gni_rc], gni_rc); + if(GNI_RC_INVALID_PARAM == gni_rc){ + return UCS_ERR_INVALID_PARAM; + } else { + return UCS_ERR_NO_MEMORY; + } + } + + ep_hash = (uint32_t)iface_addr->ep_hash; + uct_ugni_cdm_lock(&iface->cdm); + gni_rc = GNI_EpSetEventData(ep->super.ep, iface->cdm.domain_id, ep_hash); + uct_ugni_cdm_unlock(&iface->cdm); + + if(GNI_RC_SUCCESS != gni_rc){ + ucs_error("Could not set GNI_EpSetEventData!"); + } + return rc; +} + +static UCS_F_ALWAYS_INLINE ucs_status_t +uct_ugni_smsg_ep_am_common_send(uct_ugni_smsg_ep_t *ep, uct_ugni_smsg_iface_t *iface, + uint8_t am_id, unsigned header_length, void *header, + unsigned payload_length, void *payload, uct_ugni_smsg_desc_t *desc) +{ + gni_return_t gni_rc; + + if (ucs_unlikely(!uct_ugni_ep_can_send(&ep->super))) { + goto exit_no_res; + } + + desc->msg_id = iface->smsg_id++; + desc->flush_group = ep->super.flush_group; + uct_ugni_cdm_lock(&iface->super.cdm); + gni_rc = GNI_SmsgSendWTag(ep->super.ep, header, header_length, + payload, payload_length, desc->msg_id, am_id); + uct_ugni_cdm_unlock(&iface->super.cdm); + if(GNI_RC_SUCCESS != gni_rc){ + goto exit_no_res; + } + + ++desc->flush_group->flush_comp.count; + ++iface->super.outstanding; + + sglib_hashed_uct_ugni_smsg_desc_t_add(iface->smsg_list, desc); + + return UCS_OK; + +exit_no_res: + ucs_trace("Smsg send failed."); + ucs_mpool_put(desc); + UCS_STATS_UPDATE_COUNTER(ep->super.super.stats, UCT_EP_STAT_NO_RES, 1); + return UCS_ERR_NO_RESOURCE; +} + +ucs_status_t uct_ugni_smsg_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, + const void *payload, unsigned length) +{ + + uct_ugni_smsg_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_smsg_iface_t); + uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t); + uct_ugni_smsg_header_t *smsg_header; + uct_ugni_smsg_desc_t *desc; + ucs_status_t rc; + + UCT_CHECK_AM_ID(id); + UCT_CHECK_LENGTH(length, 0, iface->config.smsg_seg_size - + (sizeof(smsg_header) + sizeof(header)), "am_short"); + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, + desc, return UCS_ERR_NO_RESOURCE); + + ucs_trace_data("AM_SHORT [%p] am_id: %d buf=%p length=%u", + iface, id, payload, length); + + smsg_header = (uct_ugni_smsg_header_t *)(desc+1); + smsg_header->length = length + sizeof(header); + + uct_am_short_fill_data(smsg_header + 1, header, payload, length); + + uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, + id, smsg_header + 1, length, "TX: AM_SHORT"); + + rc = uct_ugni_smsg_ep_am_common_send(ep, iface, id, + sizeof(uct_ugni_smsg_header_t), + smsg_header, smsg_header->length, + smsg_header + 1, desc); + + UCT_TL_EP_STAT_OP_IF_SUCCESS(rc, ucs_derived_of(tl_ep, uct_base_ep_t), AM, + SHORT, sizeof(header) + length); + + return rc; +} + +ssize_t uct_ugni_smsg_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, + void *arg, unsigned flags) +{ + uct_ugni_smsg_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_smsg_iface_t); + uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t); + ssize_t packed; + uct_ugni_smsg_desc_t *desc; + ucs_status_t rc; + void *smsg_data; + uct_ugni_smsg_header_t *smsg_header; + + UCT_CHECK_AM_ID(id); + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, + desc, return UCS_ERR_NO_RESOURCE); + + ucs_trace_data("AM_BCOPY [%p] am_id: %d send request %p", + iface, id, arg); + + smsg_header = (uct_ugni_smsg_header_t *)(desc+1); + smsg_data = (void*)(smsg_header+1); + + packed = pack_cb(smsg_data, arg); + + smsg_header->length = packed; + + UCT_CHECK_LENGTH(packed, 0, iface->config.smsg_seg_size - + 0, "am_bcopy"); + + uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, + id, smsg_data, packed, "TX: AM_BCOPY"); + + rc = uct_ugni_smsg_ep_am_common_send(ep, iface, id, sizeof(uct_ugni_smsg_header_t), + smsg_header, packed, smsg_data, desc); + + UCT_TL_EP_STAT_OP_IF_SUCCESS(rc, ucs_derived_of(ep, uct_base_ep_t), AM, BCOPY, packed); + + return (UCS_OK == rc) ? packed : rc; +} diff --git a/src/uct/ugni/smsg/ugni_smsg_ep.h b/src/uct/ugni/smsg/ugni_smsg_ep.h new file mode 100644 index 0000000..e201bdf --- /dev/null +++ b/src/uct/ugni/smsg/ugni_smsg_ep.h @@ -0,0 +1,76 @@ +/** + * Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_SMSG_EP_H +#define UCT_UGNI_SMSG_EP_H + +#include +#include +#include +#include +#include +#include +#include + +#define UCT_UGNI_SMSG_ANY 0 + +typedef struct uct_ugni_compact_smsg_attr { + gni_mem_handle_t mem_hndl; + void *msg_buffer; + uint32_t mbox_offset; +} UCS_S_PACKED uct_ugni_compact_smsg_attr_t; + +typedef struct uct_sockaddr_smsg_ugni { + uct_sockaddr_ugni_t super; + uct_ugni_compact_smsg_attr_t smsg_compact_attr; + uint32_t ep_hash; +} UCS_S_PACKED uct_sockaddr_smsg_ugni_t; + +typedef struct uct_ugni_mbox_handle { + gni_mem_handle_t gni_mem; + uintptr_t base_address; + gni_smsg_attr_t mbox_attr; +} uct_ugni_smsg_mbox_t; + +typedef struct uct_ugni_smsg_ep { + uct_ugni_ep_t super; + uct_ugni_smsg_mbox_t *smsg_attr; + uct_ugni_compact_smsg_attr_t smsg_compact_attr; +} uct_ugni_smsg_ep_t; + +typedef struct uct_ugni_smsg_desc { + uint32_t msg_id; + uct_ugni_flush_group_t *flush_group; + struct uct_ugni_smsg_desc *next; +} uct_ugni_smsg_desc_t; + +UCS_CLASS_DECLARE_NEW_FUNC(uct_ugni_smsg_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_ugni_smsg_ep_t, uct_ep_t); + +ucs_status_t uct_ugni_smsg_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, + const void *payload, unsigned length); +ssize_t uct_ugni_smsg_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags); +ucs_status_t uct_ugni_smsg_ep_get_address(uct_ep_h tl_ep, uct_ep_addr_t *addr); +ucs_status_t uct_ugni_smsg_ep_connect_to_ep(uct_ep_h tl_ep, + const uct_device_addr_t *dev_addr, + const uct_ep_addr_t *ep_addr); + +static inline uint32_t uct_ugni_smsg_desc_compare(uct_ugni_smsg_desc_t *smsg1, uct_ugni_smsg_desc_t *smsg2) +{ + return smsg1->msg_id - smsg2->msg_id; +} + +static inline unsigned uct_ugni_smsg_desc_hash(uct_ugni_smsg_desc_t *smsg) +{ + return smsg->msg_id; +} + +SGLIB_DEFINE_LIST_PROTOTYPES(uct_ugni_smsg_desc_t, uct_ugni_smsg_desc_compare, next); +SGLIB_DEFINE_HASHED_CONTAINER_PROTOTYPES(uct_ugni_smsg_desc_t, UCT_UGNI_HASH_SIZE, uct_ugni_smsg_desc_hash); + +#endif diff --git a/src/uct/ugni/smsg/ugni_smsg_iface.c b/src/uct/ugni/smsg/ugni_smsg_iface.c new file mode 100644 index 0000000..9a309c2 --- /dev/null +++ b/src/uct/ugni/smsg/ugni_smsg_iface.c @@ -0,0 +1,376 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "ugni_smsg_iface.h" +#include "ugni_smsg_ep.h" +#include +#include +#include +#include + + +static ucs_config_field_t uct_ugni_smsg_iface_config_table[] = { + {"", "ALLOC=huge,thp,mmap,heap", NULL, + ucs_offsetof(uct_ugni_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + UCT_IFACE_MPOOL_CONFIG_FIELDS("SMSG", -1, 0, "smsg", + ucs_offsetof(uct_ugni_iface_config_t, mpool), + "\nAttention: Setting this param with value != -1 is a dangerous thing\n" + "and could cause deadlock or performance degradation."), + + {NULL} +}; + +static ucs_status_t progress_local_cq(uct_ugni_smsg_iface_t *iface){ + gni_return_t ugni_rc; + gni_cq_entry_t event_data; + uct_ugni_smsg_desc_t message_data; + uct_ugni_smsg_desc_t *message_pointer; + + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_CqGetEvent(iface->super.local_cq, &event_data); + uct_ugni_cdm_unlock(&iface->super.cdm); + if(GNI_RC_NOT_DONE == ugni_rc){ + return UCS_OK; + } + + if((GNI_RC_SUCCESS != ugni_rc && !event_data) || GNI_CQ_OVERRUN(event_data)){ + /* TODO: handle overruns */ + ucs_error("Error posting data. CQ overrun = %d", (int)GNI_CQ_OVERRUN(event_data)); + return UCS_ERR_NO_RESOURCE; + } + + message_data.msg_id = GNI_CQ_GET_MSG_ID(event_data); + message_pointer = sglib_hashed_uct_ugni_smsg_desc_t_find_member(iface->smsg_list,&message_data); + ucs_assert(NULL != message_pointer); + uct_ugni_check_flush(message_pointer->flush_group); + iface->super.outstanding--; + sglib_hashed_uct_ugni_smsg_desc_t_delete(iface->smsg_list,message_pointer); + ucs_mpool_put(message_pointer); + return UCS_INPROGRESS; +} + +static void process_mbox(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_ep_t *ep){ + uint8_t tag; + void *data_ptr; + gni_return_t ugni_rc; + uct_ugni_smsg_header_t *header; + void *user_data; + + /* Only one thread at a time can process mboxes for the iface. After it's done + then everyone's messages have been drained. */ + if (!ucs_spin_trylock(&iface->mbox_lock)) { + return; + } + while(1){ + tag = GNI_SMSG_ANY_TAG; + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_SmsgGetNextWTag(ep->super.ep, (void **)&data_ptr, &tag); + uct_ugni_cdm_unlock(&iface->super.cdm); + /* Yes, GNI_RC_NOT_DONE means that you're done with the smsg mailbox */ + if(GNI_RC_NOT_DONE == ugni_rc){ + break; + } + if(GNI_RC_SUCCESS != ugni_rc){ + ucs_error("Unhandled smsg error: %s %d", gni_err_str[ugni_rc], ugni_rc); + break; + } + if(NULL == data_ptr){ + ucs_error("Empty data pointer in smsg."); + break; + } + header = (uct_ugni_smsg_header_t *)data_ptr; + user_data = (void *)(header + 1); + + uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV, + tag, user_data, header->length, "RX: AM"); + + uct_iface_invoke_am(&iface->super.super, tag, user_data, + header->length, 0); + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_SmsgRelease(ep->super.ep); + uct_ugni_cdm_unlock(&iface->super.cdm); + if(GNI_RC_SUCCESS != ugni_rc){ + ucs_error("Unhandled smsg error in GNI_SmsgRelease: %s %d", gni_err_str[ugni_rc], ugni_rc); + break; + } + } + ucs_spin_unlock(&iface->mbox_lock); +} + +static void uct_ugni_smsg_handle_remote_overflow(uct_ugni_smsg_iface_t *iface){ + gni_return_t ugni_rc; + gni_cq_entry_t event_data; + struct sglib_hashed_uct_ugni_ep_t_iterator ep_iterator; + uct_ugni_ep_t *current_ep; + uct_ugni_smsg_ep_t *ep; + + /* We don't know which EP dropped a completion entry, so flush everything */ + uct_ugni_cdm_lock(&iface->super.cdm); + do{ + ugni_rc = GNI_CqGetEvent(iface->remote_cq, &event_data); + } while(GNI_RC_NOT_DONE != ugni_rc); + uct_ugni_cdm_unlock(&iface->super.cdm); + current_ep = sglib_hashed_uct_ugni_ep_t_it_init(&ep_iterator, iface->super.eps); + + while(NULL != current_ep){ + ep = ucs_derived_of(current_ep, uct_ugni_smsg_ep_t); + process_mbox(iface, ep); + current_ep = sglib_hashed_uct_ugni_ep_t_it_next(&ep_iterator); + } +} + +ucs_status_t progress_remote_cq(uct_ugni_smsg_iface_t *iface) +{ + gni_return_t ugni_rc; + gni_cq_entry_t event_data; + uct_ugni_ep_t tl_ep; + uct_ugni_ep_t *ugni_ep; + uct_ugni_smsg_ep_t *ep; + + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_CqGetEvent(iface->remote_cq, &event_data); + uct_ugni_cdm_unlock(&iface->super.cdm); + if(GNI_RC_NOT_DONE == ugni_rc){ + return UCS_OK; + } + + if (GNI_RC_SUCCESS != ugni_rc || !GNI_CQ_STATUS_OK(event_data) || GNI_CQ_OVERRUN(event_data)) { + if(GNI_RC_ERROR_RESOURCE == ugni_rc || (GNI_RC_SUCCESS == ugni_rc && GNI_CQ_OVERRUN(event_data))){ + ucs_debug("Detected remote CQ overrun. ungi_rc = %d [%s]", ugni_rc, gni_err_str[ugni_rc]); + uct_ugni_smsg_handle_remote_overflow(iface); + return UCS_OK; + } + ucs_error("GNI_CqGetEvent falied with unhandled error. Error status %s %d ", + gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_IO_ERROR; + } + + tl_ep.hash_key = GNI_CQ_GET_INST_ID(event_data); + ugni_ep = sglib_hashed_uct_ugni_ep_t_find_member(iface->super.eps, &tl_ep); + ep = ucs_derived_of(ugni_ep, uct_ugni_smsg_ep_t); + + process_mbox(iface, ep); + return UCS_INPROGRESS; +} + +UCS_CLASS_DEFINE_DELETE_FUNC(uct_ugni_smsg_iface_t, uct_iface_t); + +static unsigned uct_ugni_smsg_progress(void *arg) +{ + uct_ugni_smsg_iface_t *iface = (uct_ugni_smsg_iface_t *)arg; + ucs_status_t status; + unsigned count = 0; + + do { + ++count; + status = progress_local_cq(iface); + } while(status == UCS_INPROGRESS); + do { + ++count; + status = progress_remote_cq(iface); + } while(status == UCS_INPROGRESS); + + /* have a go a processing the pending queue */ + + ucs_arbiter_dispatch(&iface->super.arbiter, iface->config.smsg_max_credit, + uct_ugni_ep_process_pending, NULL); + return count - 2; +} + +static ucs_status_t uct_ugni_smsg_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) +{ + uct_ugni_smsg_iface_t *iface = ucs_derived_of(tl_iface, uct_ugni_smsg_iface_t); + + uct_base_iface_query(&iface->super.super, iface_attr); + + iface_attr->cap.am.max_short = iface->config.smsg_seg_size-sizeof(uint64_t); + iface_attr->cap.am.max_bcopy = iface->config.smsg_seg_size; + iface_attr->cap.am.opt_zcopy_align = 1; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + iface_attr->device_addr_len = sizeof(uct_devaddr_ugni_t); + iface_attr->iface_addr_len = sizeof(uct_sockaddr_ugni_t); + iface_attr->ep_addr_len = sizeof(uct_sockaddr_smsg_ugni_t); + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_CONNECT_TO_EP | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_PENDING; + + iface_attr->overhead = 1e-6; /* 1 usec */ + iface_attr->latency.overhead = 40e-6; /* 40 usec */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = pow(1024, 2); /* bytes */ + iface_attr->bandwidth.shared = 0; + iface_attr->priority = 0; + + return UCS_OK; +} + + +static UCS_CLASS_CLEANUP_FUNC(uct_ugni_smsg_iface_t) +{ + ucs_status_t status; + + uct_worker_progress_remove(self->super.super.worker, &self->super.super.prog); + ucs_mpool_cleanup(&self->free_desc, 1); + ucs_mpool_cleanup(&self->free_mbox, 1); + uct_ugni_destroy_cq(self->remote_cq, &self->super.cdm); + + status = ucs_spinlock_destroy(&self->mbox_lock); + if (status != UCS_OK) { + ucs_warn("ucs_spinlock_destroy() failed (%d)", status); + } +} + +static uct_iface_ops_t uct_ugni_smsg_iface_ops = { + .ep_am_short = uct_ugni_smsg_ep_am_short, + .ep_am_bcopy = uct_ugni_smsg_ep_am_bcopy, + .ep_pending_add = uct_ugni_ep_pending_add, + .ep_pending_purge = uct_ugni_ep_pending_purge, + .ep_flush = uct_ugni_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_ugni_smsg_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_ugni_smsg_ep_t), + .ep_get_address = uct_ugni_smsg_ep_get_address, + .ep_connect_to_ep = uct_ugni_smsg_ep_connect_to_ep, + .iface_flush = uct_ugni_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = (void*)uct_ugni_smsg_progress, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_ugni_smsg_iface_t), + .iface_query = uct_ugni_smsg_iface_query, + .iface_get_device_address = uct_ugni_iface_get_dev_address, + .iface_get_address = uct_ugni_iface_get_address, + .iface_is_reachable = uct_ugni_iface_is_reachable +}; + +static ucs_mpool_ops_t uct_ugni_smsg_desc_mpool_ops = { + .chunk_alloc = ucs_mpool_hugetlb_malloc, + .chunk_release = ucs_mpool_hugetlb_free, + .obj_init = uct_ugni_base_desc_init, + .obj_cleanup = NULL +}; + +static ucs_mpool_ops_t uct_ugni_smsg_mbox_mpool_ops = { + .chunk_alloc = ucs_mpool_chunk_mmap, + .chunk_release = ucs_mpool_chunk_munmap, + .obj_init = NULL, + .obj_cleanup = NULL +}; + +static UCS_CLASS_INIT_FUNC(uct_ugni_smsg_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t); + ucs_status_t status; + gni_return_t ugni_rc; + unsigned int bytes_per_mbox; + gni_smsg_attr_t smsg_attr; + + UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params, + &uct_ugni_smsg_iface_ops, + &config->super UCS_STATS_ARG(NULL)); + + /* Setting initial configuration */ + self->config.smsg_seg_size = 2048; + self->config.rx_headroom = params->rx_headroom; + self->config.smsg_max_retransmit = 16; + self->config.smsg_max_credit = 8; + self->smsg_id = 0; + + smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; + smsg_attr.mbox_maxcredit = self->config.smsg_max_credit; + smsg_attr.msg_maxsize = self->config.smsg_seg_size; + status = ucs_spinlock_init(&self->mbox_lock); + if (UCS_OK != status) { + goto exit; + } + + status = uct_ugni_create_cq(&self->remote_cq, 40000, &self->super.cdm); + if (UCS_OK != status) { + goto clean_lock; + } + ugni_rc = GNI_SmsgBufferSizeNeeded(&(smsg_attr), &bytes_per_mbox); + self->bytes_per_mbox = ucs_align_up_pow2(bytes_per_mbox, ucs_get_page_size()); + + if (ugni_rc != GNI_RC_SUCCESS) { + ucs_error("Smsg buffer size calculation failed"); + status = UCS_ERR_INVALID_PARAM; + goto clean_cq; + } + + status = ucs_mpool_init(&self->free_desc, + 0, + self->config.smsg_seg_size + sizeof(uct_ugni_smsg_desc_t), + 0, + UCS_SYS_CACHE_LINE_SIZE, /* alignment */ + 128 , /* grow */ + config->mpool.max_bufs, /* max buffers */ + &uct_ugni_smsg_desc_mpool_ops, + "UGNI-SMSG-DESC"); + + if (UCS_OK != status) { + ucs_error("Desc Mpool creation failed"); + goto clean_cq; + } + + status = ucs_mpool_init(&self->free_mbox, + 0, + self->bytes_per_mbox + sizeof(uct_ugni_smsg_mbox_t), + sizeof(uct_ugni_smsg_mbox_t), + UCS_SYS_CACHE_LINE_SIZE, /* alignment */ + 128, /* grow */ + config->mpool.max_bufs, /* max buffers */ + &uct_ugni_smsg_mbox_mpool_ops, + "UGNI-SMSG-MBOX"); + + if (UCS_OK != status) { + ucs_error("Mbox Mpool creation failed"); + goto clean_mbox; + } + + ugni_rc = GNI_SmsgSetMaxRetrans(uct_ugni_iface_nic_handle(&self->super), self->config.smsg_max_retransmit); + + if (ugni_rc != GNI_RC_SUCCESS) { + ucs_error("Smsg setting max retransmit count failed."); + status = UCS_ERR_INVALID_PARAM; + goto clean_desc; + } + + /* TBD: eventually the uct_ugni_progress has to be moved to + * udt layer so each ugni layer will have own progress */ + uct_worker_progress_add_safe(self->super.super.worker, uct_ugni_smsg_progress, + self, &self->super.super.prog); + + return UCS_OK; + + clean_desc: + ucs_mpool_cleanup(&self->free_desc, 1); + clean_mbox: + ucs_mpool_cleanup(&self->free_mbox, 1); + clean_cq: + uct_ugni_destroy_cq(self->remote_cq, &self->super.cdm); + clean_lock: + ucs_spinlock_destroy(&self->mbox_lock); + exit: + uct_ugni_cleanup_base_iface(&self->super); + ucs_error("Failed to activate interface"); + return status; +} + +UCS_CLASS_DEFINE(uct_ugni_smsg_iface_t, uct_ugni_iface_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_ugni_smsg_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t *); + +UCT_TL_DEFINE(&uct_ugni_component, ugni_smsg, uct_ugni_query_devices, + uct_ugni_smsg_iface_t, "UGNI_SMSG_", + uct_ugni_smsg_iface_config_table, uct_ugni_iface_config_t); diff --git a/src/uct/ugni/smsg/ugni_smsg_iface.h b/src/uct/ugni/smsg/ugni_smsg_iface.h new file mode 100644 index 0000000..876748d --- /dev/null +++ b/src/uct/ugni/smsg/ugni_smsg_iface.h @@ -0,0 +1,42 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_SMSG_IFACE_H +#define UCT_UGNI_SMSG_IFACE_H + +#include "ugni_smsg_ep.h" +#include +#include +#include +#include + +#define SMSG_MAX_SIZE 65535 + +typedef struct uct_ugni_smsg_iface { + uct_ugni_iface_t super; /**< Super type */ + gni_cq_handle_t remote_cq; /**< Remote completion queue */ + ucs_mpool_t free_desc; /**< Pool of FMA descriptors for + requests without bouncing buffers */ + ucs_mpool_t free_mbox; /**< Pool of mboxes for use with smsg */ + uint32_t smsg_id; /**< Id number to uniquely identify smsgs in the cq */ + struct { + unsigned smsg_seg_size; /**< Max SMSG size */ + size_t rx_headroom; /**< The size of user defined header for am */ + uint16_t smsg_max_retransmit; + uint16_t smsg_max_credit; /**< Max credits for smsg boxes */ + } config; + size_t bytes_per_mbox; + uct_ugni_smsg_desc_t *smsg_list[UCT_UGNI_HASH_SIZE]; /**< A list of descriptors currently outstanding */ + ucs_spinlock_t mbox_lock; /**< Lock for processing SMSG mboxes */ +} uct_ugni_smsg_iface_t; + +typedef struct uct_ugni_smsg_header { + uint32_t length; +} uct_ugni_smsg_header_t; + +ucs_status_t progress_remote_cq(uct_ugni_smsg_iface_t *iface); + +#endif diff --git a/src/uct/ugni/udt/ugni_udt_ep.c b/src/uct/ugni/udt/ugni_udt_ep.c new file mode 100644 index 0000000..a08f680 --- /dev/null +++ b/src/uct/ugni/udt/ugni_udt_ep.c @@ -0,0 +1,248 @@ +/** +* Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "ugni_udt_ep.h" +#include "ugni_udt_iface.h" +#include +#include + +#define uct_ugni_udt_can_send(_ep) ((uct_ugni_ep_can_send(&_ep->super)) && (_ep->posted_desc == NULL)) + +ucs_status_t uct_ugni_udt_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n, + unsigned flags) +{ + uct_ugni_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_iface_t); + uct_ugni_udt_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_udt_ep_t); + ucs_status_t status; + + if (uct_ugni_udt_can_send(ep)) { + return UCS_ERR_BUSY; + } + + status = uct_ugni_ep_pending_add(tl_ep, n, flags); + if (UCS_OK == status) { + uct_worker_progress_add_safe(iface->super.worker, uct_ugni_udt_progress, + iface, &iface->super.prog); + } + return status; +} + +ucs_arbiter_cb_result_t uct_ugni_udt_ep_process_pending(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + ucs_arbiter_cb_result_t result; + uct_ugni_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_ugni_ep_t, arb_group); + uct_ugni_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ugni_iface_t); + + result = uct_ugni_ep_process_pending(arbiter, elem, arg); + if (UCS_ARBITER_CB_RESULT_REMOVE_ELEM == result) { + uct_worker_progress_remove(iface->super.worker, &iface->super.prog); + } + return result; +} + +static ucs_arbiter_cb_result_t uct_ugni_udt_ep_abriter_purge_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) +{ + uct_ugni_ep_t *ep = ucs_container_of(ucs_arbiter_elem_group(elem), uct_ugni_ep_t, arb_group); + uct_ugni_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_ugni_iface_t); + ucs_arbiter_cb_result_t result; + + result = uct_ugni_ep_abriter_purge_cb(arbiter, elem, arg); + if (UCS_ARBITER_CB_RESULT_REMOVE_ELEM == result) { + uct_worker_progress_remove(iface->super.worker, &iface->super.prog); + } + return result; +} + +void uct_ugni_udt_ep_pending_purge(uct_ep_h tl_ep, + uct_pending_purge_callback_t cb, + void *arg) +{ + uct_ugni_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_iface_t); + uct_ugni_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_ep_t); + uct_purge_cb_args_t args = {cb, arg}; + + ucs_arbiter_group_purge(&iface->arbiter, &ep->arb_group, + uct_ugni_udt_ep_abriter_purge_cb, &args); +} + +static UCS_CLASS_INIT_FUNC(uct_ugni_udt_ep_t, const uct_ep_params_t *params) +{ + UCS_CLASS_CALL_SUPER_INIT(uct_ugni_ep_t, params); + UCT_EP_PARAMS_CHECK_DEV_IFACE_ADDRS(params); + uct_ugni_iface_t *iface = ucs_derived_of(params->iface, uct_ugni_iface_t); + const uct_sockaddr_ugni_t *iface_addr = (const uct_sockaddr_ugni_t*)params->iface_addr; + const uct_devaddr_ugni_t *ugni_dev_addr = (const uct_devaddr_ugni_t *)params->dev_addr; + ucs_status_t rc; + + ucs_debug("Connecting UDT ep %p", self); + rc = ugni_connect_ep(&self->super, iface, iface_addr, ugni_dev_addr); + + if (UCS_OK != rc) { + ucs_error("Could not connect ep %p", self); + return rc; + } + + self->posted_desc = NULL; + return UCS_OK; +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ugni_udt_ep_t) +{ + uct_ugni_udt_iface_t *iface = ucs_derived_of(self->super.super.super.iface, uct_ugni_udt_iface_t); + gni_return_t ugni_rc; + uint32_t rem_addr, rem_id; + gni_post_state_t post_state; + + if (self->posted_desc) { + ucs_debug("Cleaning outstanding request"); + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_EpPostDataCancelById(self->super.ep, self->super.hash_key); + uct_ugni_cdm_unlock(&iface->super.cdm); + if (GNI_RC_SUCCESS != ugni_rc) { + if (GNI_RC_NO_MATCH == ugni_rc) { + /* We raced with the async thread, it recieved and cleaned up this reply. It's fine. */ + return; + } + ucs_error("GNI_EpPostDataCancel failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return; + } + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_EpPostDataWaitById(self->super.ep, self->super.hash_key, 100, &post_state, &rem_addr, &rem_id); + uct_ugni_cdm_unlock(&iface->super.cdm); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_warn("GNI_EpPostDataWaitById failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return; + } + iface->super.outstanding--; + ucs_mpool_put(self->posted_desc); + } +} + +UCS_CLASS_DEFINE(uct_ugni_udt_ep_t, uct_ugni_ep_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_ugni_udt_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DEFINE_DELETE_FUNC(uct_ugni_udt_ep_t, uct_ep_t); + +enum { + UCT_UGNI_UDT_AM_BCOPY, + UCT_UGNI_UDT_AM_SHORT, +}; + +/* A common mm active message sending function. + * The first parameter indicates the origin of the call. + * 1 - perform am short sending + * 0 - perform am bcopy sending + */ + +static UCS_F_ALWAYS_INLINE ssize_t +uct_ugni_udt_ep_am_common_send(const unsigned is_short, uct_ugni_udt_ep_t *ep, uct_ugni_udt_iface_t *iface, + uint8_t am_id, unsigned length, uint64_t header, + const void *payload, uct_pack_callback_t pack_cb, void *arg) +{ + gni_return_t ugni_rc; + uint16_t msg_length; + uct_ugni_udt_desc_t *desc; + uct_ugni_udt_header_t *sheader, + *rheader; + ssize_t packed_length; + + UCT_CHECK_AM_ID(am_id); + if (ucs_unlikely(!uct_ugni_udt_can_send(ep))) { + UCS_STATS_UPDATE_COUNTER(ep->super.super.stats, UCT_EP_STAT_NO_RES, 1); + return UCS_ERR_NO_RESOURCE; + } + + ep->desc_flush_group = ep->super.flush_group; + + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, + desc, return UCS_ERR_NO_RESOURCE); + + rheader = uct_ugni_udt_get_rheader(desc, iface); + rheader->type = UCT_UGNI_UDT_EMPTY; + + sheader = uct_ugni_udt_get_sheader(desc, iface); + + if (is_short) { + uct_am_short_fill_data(uct_ugni_udt_get_spayload(desc, iface), + header, payload, length); + sheader->length = length + sizeof(header); + msg_length = sheader->length + sizeof(*sheader); + UCT_TL_EP_STAT_OP(ucs_derived_of(ep, uct_base_ep_t), AM, SHORT, sizeof(header) + length); + } else { + packed_length = pack_cb((void *)uct_ugni_udt_get_spayload(desc, iface), + arg); + sheader->length = packed_length; + msg_length = sheader->length + sizeof(*sheader); + UCT_TL_EP_STAT_OP(ucs_derived_of(ep, uct_base_ep_t), AM, BCOPY, packed_length); + } + + uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, am_id, + uct_ugni_udt_get_spayload(desc, iface), length, + is_short ? "TX: AM_SHORT" : "TX: AM_BCOPY"); + + sheader->am_id = am_id; + sheader->type = UCT_UGNI_UDT_PAYLOAD; + + ucs_assertv(msg_length <= GNI_DATAGRAM_MAXSIZE, "msg_length=%u", msg_length); + + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_EpPostDataWId(ep->super.ep, + sheader, msg_length, + rheader, (uint16_t)iface->config.udt_seg_size, + ep->super.hash_key); + uct_ugni_cdm_unlock(&iface->super.cdm); + + UCT_UGNI_UDT_CHECK_RC(ugni_rc, desc); + + ep->posted_desc = desc; + ++ep->desc_flush_group->flush_comp.count; + ++iface->super.outstanding; + + return is_short ? UCS_OK : packed_length; +} + +ucs_status_t uct_ugni_udt_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, + const void *payload, unsigned length) +{ + uct_ugni_udt_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_udt_iface_t); + uct_ugni_udt_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_udt_ep_t); + + UCS_ASYNC_BLOCK(iface->super.super.worker->async); + + UCT_CHECK_LENGTH(length, 0, + iface->config.udt_seg_size - sizeof(header) - sizeof(uct_ugni_udt_header_t), "am_short"); + ucs_trace_data("AM_SHORT [%p] am_id: %d buf=%p length=%u", + iface, id, payload, length); + ucs_status_t status = uct_ugni_udt_ep_am_common_send(UCT_UGNI_UDT_AM_SHORT, ep, iface, id, length, + header, payload, NULL, NULL); + + UCS_ASYNC_UNBLOCK(iface->super.super.worker->async); + + return status; +} + +ssize_t uct_ugni_udt_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, + void *arg, unsigned flags) +{ + uct_ugni_udt_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_udt_iface_t); + uct_ugni_udt_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_udt_ep_t); + + UCS_ASYNC_BLOCK(iface->super.super.worker->async); + + ucs_trace_data("AM_BCOPY [%p] am_id: %d buf=%p", + iface, id, arg ); + ucs_status_t status = uct_ugni_udt_ep_am_common_send(UCT_UGNI_UDT_AM_BCOPY, ep, iface, id, 0, + 0, NULL, pack_cb, arg); + UCS_ASYNC_UNBLOCK(iface->super.super.worker->async); + + return status; +} diff --git a/src/uct/ugni/udt/ugni_udt_ep.h b/src/uct/ugni/udt/ugni_udt_ep.h new file mode 100644 index 0000000..a071e21 --- /dev/null +++ b/src/uct/ugni/udt/ugni_udt_ep.h @@ -0,0 +1,43 @@ +/** +* Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_UGNI_UDT_EP_H +#define UCT_UGNI_UDT_EP_H + +#include +#include +#include +#include +#include + +#define UCT_UGNI_UDT_ANY 0 +#define UCT_UGNI_UDT_CANCEL 1 + +struct uct_ugni_udt_desc; + +typedef struct uct_ugni_udt_ep { + uct_ugni_ep_t super; + struct uct_ugni_udt_desc *posted_desc; + uct_ugni_flush_group_t *desc_flush_group; +} uct_ugni_udt_ep_t; + +ucs_status_t uct_ugni_udt_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header, + const void *payload, unsigned length); +ssize_t uct_ugni_udt_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id, + uct_pack_callback_t pack_cb, void *arg, + unsigned flags); +ucs_status_t uct_ugni_udt_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n, + unsigned flags); +ucs_arbiter_cb_result_t uct_ugni_udt_ep_process_pending(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg); +void uct_ugni_udt_ep_pending_purge(uct_ep_h tl_ep, + uct_pending_purge_callback_t cb, + void *arg); +UCS_CLASS_DECLARE_NEW_FUNC(uct_ugni_udt_ep_t, uct_ep_t, const uct_ep_params_t *); +UCS_CLASS_DECLARE_DELETE_FUNC(uct_ugni_udt_ep_t, uct_ep_t); + +#endif diff --git a/src/uct/ugni/udt/ugni_udt_iface.c b/src/uct/ugni/udt/ugni_udt_iface.c new file mode 100644 index 0000000..aaf82ba --- /dev/null +++ b/src/uct/ugni/udt/ugni_udt_iface.c @@ -0,0 +1,486 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include "ugni_udt_iface.h" +#include "ugni_udt_ep.h" +#include +#include +#include + + +static ucs_config_field_t uct_ugni_udt_iface_config_table[] = { + {"", "ALLOC=huge,thp,mmap,heap", NULL, + ucs_offsetof(uct_ugni_iface_config_t, super), + UCS_CONFIG_TYPE_TABLE(uct_iface_config_table)}, + + UCT_IFACE_MPOOL_CONFIG_FIELDS("UDT", -1, 0, "udt", + ucs_offsetof(uct_ugni_iface_config_t, mpool), + "\nAttention: Setting this param with value != -1 is a dangerous thing\n" + "and could cause deadlock or performance degradation."), + + {NULL} +}; + +static ucs_status_t processs_datagram(uct_ugni_udt_iface_t *iface, uct_ugni_udt_desc_t *desc) +{ + ucs_status_t status; + uct_ugni_udt_header_t *header; + void *payload; + + header = uct_ugni_udt_get_rheader(desc, iface); + payload = uct_ugni_udt_get_rpayload(desc, iface); + uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV, + header->am_id, payload, header->length, "RX: AM"); + status = uct_iface_invoke_am(&iface->super.super, header->am_id, payload, + header->length, UCT_CB_PARAM_FLAG_DESC); + return status; +} + +static ucs_status_t recieve_datagram(uct_ugni_udt_iface_t *iface, uint64_t id, uct_ugni_udt_ep_t **ep_out) +{ + uint32_t rem_addr, rem_id; + gni_post_state_t post_state; + gni_return_t ugni_rc; + uct_ugni_udt_ep_t *ep; + gni_ep_handle_t gni_ep; + uct_ugni_udt_desc_t *desc; + uct_ugni_udt_header_t *header; + + ucs_trace_func("iface=%p, id=%lx", iface, id); + + if (UCT_UGNI_UDT_ANY == id) { + ep = NULL; + gni_ep = iface->ep_any; + desc = iface->desc_any; + } else { + ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id), + uct_ugni_udt_ep_t); + gni_ep = ep->super.ep; + desc = ep->posted_desc; + } + + *ep_out = ep; + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_EpPostDataWaitById(gni_ep, id, -1, &post_state, &rem_addr, &rem_id); + uct_ugni_cdm_unlock(&iface->super.cdm); + if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { + ucs_error("GNI_EpPostDataWaitById, id=%lu Error status: %s %d", + id, gni_err_str[ugni_rc], ugni_rc); + return UCS_ERR_IO_ERROR; + } + if (GNI_POST_TERMINATED == post_state) { + return UCS_ERR_CANCELED; + } + + if (GNI_POST_COMPLETED != post_state) { + ucs_error("GNI_EpPostDataWaitById gave unexpected response: %u", post_state); + return UCS_ERR_IO_ERROR; + } + + if (UCT_UGNI_UDT_ANY != id) { + --iface->super.outstanding; + } + + header = uct_ugni_udt_get_rheader(desc, iface); + + ucs_trace("Got datagram id: %lu type: %i len: %i am_id: %i", id, header->type, header->length, header->am_id); + + if (UCT_UGNI_UDT_PAYLOAD != header->type) { + /* ack message, no data */ + ucs_assert_always(NULL != ep); + ucs_mpool_put(ep->posted_desc); + uct_ugni_check_flush(ep->desc_flush_group); + ep->posted_desc = NULL; + return UCS_OK; + } + + return UCS_INPROGRESS; +} + +static void *uct_ugni_udt_device_thread(void *arg) +{ + uct_ugni_udt_iface_t *iface = (uct_ugni_udt_iface_t *)arg; + gni_return_t ugni_rc; + uint64_t id; + + while (1) { + pthread_mutex_lock(&iface->device_lock); + while (iface->events_ready) { + pthread_cond_wait(&iface->device_condition, &iface->device_lock); + } + pthread_mutex_unlock(&iface->device_lock); + ugni_rc = GNI_PostdataProbeWaitById(uct_ugni_udt_iface_nic_handle(iface),-1,&id); + if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) { + ucs_error("GNI_PostDataProbeWaitById, Error status: %s %d\n", + gni_err_str[ugni_rc], ugni_rc); + continue; + } + if (ucs_unlikely(UCT_UGNI_UDT_CANCEL == id)) { + /* When the iface is torn down, it will post and cancel a datagram with a + * magic cookie as it's id that tells us to shut down. + */ + break; + } + iface->events_ready = 1; + ucs_trace("Recieved a new datagram"); + ucs_async_pipe_push(&iface->event_pipe); + } + + return NULL; +} + +unsigned uct_ugni_udt_progress(void *arg) +{ + uct_ugni_udt_iface_t * iface = (uct_ugni_udt_iface_t *)arg; + + uct_ugni_enter_async(&iface->super); + ucs_arbiter_dispatch(&iface->super.arbiter, 1, uct_ugni_udt_ep_process_pending, NULL); + uct_ugni_leave_async(&iface->super); + return 0; +} + +static void uct_ugni_udt_iface_release_desc(uct_recv_desc_t *self, void *desc) +{ + uct_ugni_udt_desc_t *ugni_desc; + uct_ugni_udt_iface_t *iface = ucs_container_of(self, uct_ugni_udt_iface_t, + release_desc); + + ugni_desc = (uct_ugni_udt_desc_t *)((uct_recv_desc_t *)desc - 1); + ucs_assert_always(NULL != ugni_desc); + uct_ugni_udt_reset_desc(ugni_desc, iface); + ucs_mpool_put(ugni_desc); +} + +static ucs_status_t uct_ugni_udt_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) +{ + uct_ugni_udt_iface_t *iface = ucs_derived_of(tl_iface, uct_ugni_udt_iface_t); + + uct_base_iface_query(&iface->super.super, iface_attr); + + iface_attr->cap.am.max_short = iface->config.udt_seg_size - + sizeof(uct_ugni_udt_header_t); + iface_attr->cap.am.max_bcopy = iface->config.udt_seg_size - + sizeof(uct_ugni_udt_header_t); + iface_attr->cap.am.opt_zcopy_align = 1; + iface_attr->cap.am.align_mtu = iface_attr->cap.am.opt_zcopy_align; + iface_attr->device_addr_len = sizeof(uct_devaddr_ugni_t); + iface_attr->iface_addr_len = sizeof(uct_sockaddr_ugni_t); + iface_attr->ep_addr_len = 0; + iface_attr->max_conn_priv = 0; + iface_attr->cap.flags = UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_CONNECT_TO_IFACE | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CB_ASYNC; + + iface_attr->overhead = 1e-6; /* 1 usec */ + iface_attr->latency.overhead = 40e-6; /* 40 usec */ + iface_attr->latency.growth = 0; + iface_attr->bandwidth.dedicated = pow(1024, 2); /* bytes */ + iface_attr->bandwidth.shared = 0; + iface_attr->priority = 0; + + return UCS_OK; +} + +void uct_ugni_proccess_datagram_pipe(int event_id, void *arg) { + uct_ugni_udt_iface_t *iface = (uct_ugni_udt_iface_t *)arg; + uct_ugni_udt_ep_t *ep; + uct_ugni_udt_desc_t *datagram; + ucs_status_t status; + void *user_desc; + gni_return_t ugni_rc; + uint64_t id; + + ucs_trace_func(""); + + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_PostDataProbeById(uct_ugni_udt_iface_nic_handle(iface), &id); + uct_ugni_cdm_unlock(&iface->super.cdm); + while (GNI_RC_SUCCESS == ugni_rc) { + status = recieve_datagram(iface, id, &ep); + if (UCS_INPROGRESS == status) { + if (ep != NULL){ + ucs_trace_data("Processing reply"); + datagram = ep->posted_desc; + status = processs_datagram(iface, datagram); + if (UCS_OK != status) { + user_desc = uct_ugni_udt_get_user_desc(datagram, iface); + uct_recv_desc(user_desc) = &iface->release_desc; + } else { + ucs_mpool_put(datagram); + } + ep->posted_desc = NULL; + uct_ugni_check_flush(ep->desc_flush_group); + } else { + ucs_trace_data("Processing wildcard"); + datagram = iface->desc_any; + status = processs_datagram(iface, datagram); + if (UCS_OK != status) { + UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc, + iface->desc_any, iface->desc_any=NULL); + user_desc = uct_ugni_udt_get_user_desc(datagram, iface); + uct_recv_desc(user_desc) = &iface->release_desc; + } + status = uct_ugni_udt_ep_any_post(iface); + if (UCS_OK != status) { + /* We can't continue if we can't post the first receive */ + ucs_error("Failed to post wildcard request"); + return; + } + } + } + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_PostDataProbeById(uct_ugni_udt_iface_nic_handle(iface), &id); + uct_ugni_cdm_unlock(&iface->super.cdm); + } + + ucs_async_pipe_drain(&iface->event_pipe); + pthread_mutex_lock(&iface->device_lock); + iface->events_ready = 0; + pthread_mutex_unlock(&iface->device_lock); + ucs_trace("Signaling device thread to resume monitoring"); + pthread_cond_signal(&iface->device_condition); + +} + +static void uct_ugni_udt_clean_wildcard(uct_ugni_udt_iface_t *iface) +{ + gni_return_t ugni_rc; + uint32_t rem_addr, rem_id; + gni_post_state_t post_state; + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_EpPostDataCancelById(iface->ep_any, UCT_UGNI_UDT_ANY); + if (GNI_RC_SUCCESS != ugni_rc) { + uct_ugni_cdm_unlock(&iface->super.cdm); + ucs_error("GNI_EpPostDataCancel failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return; + } + ugni_rc = GNI_EpPostDataTestById(iface->ep_any, UCT_UGNI_UDT_ANY, &post_state, &rem_addr, &rem_id); + if (GNI_RC_SUCCESS != ugni_rc) { + if (GNI_RC_NO_MATCH != ugni_rc) { + uct_ugni_cdm_unlock(&iface->super.cdm); + ucs_error("GNI_EpPostDataTestById failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return; + } + } else { + if (GNI_POST_PENDING == post_state) { + ugni_rc = GNI_EpPostDataWaitById(iface->ep_any, UCT_UGNI_UDT_ANY, -1, &post_state, &rem_addr, &rem_id); + } + } + ugni_rc = GNI_EpDestroy(iface->ep_any); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_EpDestroy failed, Error status: %s %d\n", + gni_err_str[ugni_rc], ugni_rc); + } + uct_ugni_cdm_unlock(&iface->super.cdm); +} + +/* Before this function is called, you MUST + * A) Deregister the datagram processing function from the async thread. + * B) Cancel the wildcard datagram. + * C) Drain all other messages from the queue. + */ +static inline void uct_ugni_udt_terminate_thread(uct_ugni_udt_iface_t *iface) +{ + gni_return_t ugni_rc; + gni_ep_handle_t ep; + + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(iface), iface->super.local_cq, &ep); + if (GNI_RC_SUCCESS != ugni_rc) { + uct_ugni_cdm_unlock(&iface->super.cdm); + ucs_error("GNI_EpCreate, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return; + } + ugni_rc = GNI_EpBind(ep, iface->super.cdm.dev->address, iface->super.cdm.domain_id); + if (GNI_RC_SUCCESS != ugni_rc) { + GNI_EpDestroy(ep); + uct_ugni_cdm_unlock(&iface->super.cdm); + ucs_error("GNI_EpBind failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + return; + } + ugni_rc = GNI_EpPostDataWId(ep, + NULL, 0, + NULL, 0, + UCT_UGNI_UDT_CANCEL); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("Couldn't send cancel message to UGNI interface! %s %d", + gni_err_str[ugni_rc], ugni_rc); + } + /* When the gni_ep is destroyed the above post will be canceled */ + ugni_rc = GNI_EpDestroy(ep); + uct_ugni_cdm_unlock(&iface->super.cdm); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_EpDestroy failed, Error status: %s %d\n", + gni_err_str[ugni_rc], ugni_rc); + } +} + +static UCS_CLASS_CLEANUP_FUNC(uct_ugni_udt_iface_t) +{ + void *dummy; + uct_ugni_enter_async(&self->super); + uct_ugni_udt_clean_wildcard(self); + ucs_async_remove_handler(ucs_async_pipe_rfd(&self->event_pipe),1); + if (self->events_ready) { + uct_ugni_proccess_datagram_pipe(ucs_async_pipe_rfd(&self->event_pipe),self); + } + uct_ugni_udt_terminate_thread(self); + pthread_join(self->event_thread, &dummy); + ucs_async_pipe_destroy(&self->event_pipe); + ucs_mpool_put(self->desc_any); + ucs_mpool_cleanup(&self->free_desc, 1); + pthread_mutex_destroy(&self->device_lock); + uct_ugni_leave_async(&self->super); +} + +static UCS_CLASS_DEFINE_DELETE_FUNC(uct_ugni_udt_iface_t, uct_iface_t); + +static uct_iface_ops_t uct_ugni_udt_iface_ops = { + .ep_am_short = uct_ugni_udt_ep_am_short, + .ep_am_bcopy = uct_ugni_udt_ep_am_bcopy, + .ep_pending_add = uct_ugni_udt_ep_pending_add, + .ep_pending_purge = uct_ugni_udt_ep_pending_purge, + .ep_flush = uct_ugni_ep_flush, + .ep_fence = uct_base_ep_fence, + .ep_create = UCS_CLASS_NEW_FUNC_NAME(uct_ugni_udt_ep_t), + .ep_destroy = UCS_CLASS_DELETE_FUNC_NAME(uct_ugni_udt_ep_t), + .iface_flush = uct_ugni_iface_flush, + .iface_fence = uct_base_iface_fence, + .iface_progress_enable = ucs_empty_function, + .iface_progress_disable = ucs_empty_function, + .iface_progress = (void*)uct_ugni_udt_progress, + .iface_close = UCS_CLASS_DELETE_FUNC_NAME(uct_ugni_udt_iface_t), + .iface_query = uct_ugni_udt_iface_query, + .iface_get_address = uct_ugni_iface_get_address, + .iface_get_device_address = uct_ugni_iface_get_dev_address, + .iface_is_reachable = uct_ugni_iface_is_reachable +}; + +static ucs_mpool_ops_t uct_ugni_udt_desc_mpool_ops = { + .chunk_alloc = ucs_mpool_hugetlb_malloc, + .chunk_release = ucs_mpool_hugetlb_free, + .obj_init = NULL, + .obj_cleanup = NULL +}; + +static UCS_CLASS_INIT_FUNC(uct_ugni_udt_iface_t, uct_md_h md, uct_worker_h worker, + const uct_iface_params_t *params, + const uct_iface_config_t *tl_config) +{ + uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t); + ucs_status_t status; + uct_ugni_udt_desc_t *desc; + gni_return_t ugni_rc; + int rc; + + UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params, + &uct_ugni_udt_iface_ops, + &config->super UCS_STATS_ARG(NULL)); + + /* Setting initial configuration */ + self->config.udt_seg_size = GNI_DATAGRAM_MAXSIZE; + self->config.rx_headroom = params->rx_headroom; + self->release_desc.cb = uct_ugni_udt_iface_release_desc; + + status = ucs_async_pipe_create(&self->event_pipe); + if (UCS_OK != status) { + ucs_error("Pipe creation failed"); + goto exit; + } + + status = ucs_mpool_init(&self->free_desc, + 0, + uct_ugni_udt_get_diff(self) + self->config.udt_seg_size * 2, + uct_ugni_udt_get_diff(self), + UCS_SYS_CACHE_LINE_SIZE, /* alignment */ + 128, /* grow */ + config->mpool.max_bufs, /* max buffers */ + &uct_ugni_udt_desc_mpool_ops, + "UGNI-UDT-DESC"); + + if (UCS_OK != status) { + ucs_error("Mpool creation failed"); + goto clean_pipe; + } + + ugni_rc = GNI_EpCreate(uct_ugni_udt_iface_nic_handle(self), NULL, &self->ep_any); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_error("GNI_EpCreate failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + status = UCS_ERR_NO_DEVICE; + goto clean_free_desc; + } + + UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc, + desc, goto clean_ep); + ucs_debug("First wildcard desc is %p", desc); + + /* Init any desc */ + self->desc_any = desc; + status = uct_ugni_udt_ep_any_post(self); + if (UCS_OK != status) { + /* We can't continue if we can't post the first receive */ + ucs_error("Failed to post wildcard request"); + goto clean_any_desc; + } + + status = ucs_async_set_event_handler(self->super.super.worker->async->mode, + ucs_async_pipe_rfd(&self->event_pipe), + UCS_EVENT_SET_EVREAD, + uct_ugni_proccess_datagram_pipe, + self, self->super.super.worker->async); + + if (UCS_OK != status) { + goto clean_cancel_desc; + } + + pthread_mutex_init(&self->device_lock, NULL); + pthread_cond_init(&self->device_condition, NULL); + self->events_ready = 0; + + rc = pthread_create(&self->event_thread, NULL, uct_ugni_udt_device_thread, self); + if(0 != rc) { + goto clean_remove_event; + } + + return UCS_OK; + + clean_remove_event: + ucs_async_pipe_destroy(&self->event_pipe); + clean_cancel_desc: + uct_ugni_udt_clean_wildcard(self); + clean_any_desc: + ucs_mpool_put(self->desc_any); + clean_ep: + ugni_rc = GNI_EpDestroy(self->ep_any); + if (GNI_RC_SUCCESS != ugni_rc) { + ucs_warn("GNI_EpDestroy failed, Error status: %s %d", + gni_err_str[ugni_rc], ugni_rc); + } + clean_free_desc: + ucs_mpool_cleanup(&self->free_desc, 1); + clean_pipe: + ucs_async_pipe_destroy(&self->event_pipe); + exit: + uct_ugni_cleanup_base_iface(&self->super); + ucs_error("Failed to activate interface"); + return status; +} + +UCS_CLASS_DEFINE(uct_ugni_udt_iface_t, uct_ugni_iface_t); +UCS_CLASS_DEFINE_NEW_FUNC(uct_ugni_udt_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); + +UCT_TL_DEFINE(&uct_ugni_component, ugni_udt, uct_ugni_query_devices, + uct_ugni_udt_iface_t, "UGNI_UDT_", + uct_ugni_udt_iface_config_table, uct_ugni_iface_config_t); diff --git a/src/uct/ugni/udt/ugni_udt_iface.h b/src/uct/ugni/udt/ugni_udt_iface.h new file mode 100644 index 0000000..ca3283b --- /dev/null +++ b/src/uct/ugni/udt/ugni_udt_iface.h @@ -0,0 +1,108 @@ +/** + * Copyright (c) UT-Battelle, LLC. 2014-2017. ALL RIGHTS RESERVED. + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#ifndef UCT_UGNI_UDT_IFACE_H +#define UCT_UGNI_UDT_IFACE_H + +#include "ugni_udt_ep.h" +#include +#include +#include +#include +#include +#include + +typedef void uct_ugni_udt_desc_t; + +typedef struct uct_ugni_udt_iface { + uct_ugni_iface_t super; /**< Super type */ + ucs_mpool_t free_desc; /**< Pool of FMA descriptors for + requests without bouncing buffers */ + gni_ep_handle_t ep_any; /**< Unbound endpoint that accept any datagram + messages */ + uct_ugni_udt_desc_t *desc_any; /**< Segment that accepts datagram from any source */ + uct_recv_desc_t release_desc; /**< Callback for receive desc release */ + struct { + unsigned udt_seg_size; /**< Max UDT size */ + size_t rx_headroom; /**< The size of user defined header for am */ + } config; + + pthread_t event_thread; + pthread_mutex_t device_lock; + pthread_cond_t device_condition; + int events_ready; + ucs_async_pipe_t event_pipe; +} uct_ugni_udt_iface_t; + +enum { + UCT_UGNI_UDT_EMPTY = 0, + UCT_UGNI_UDT_PAYLOAD = 1 +}; + +typedef struct uct_ugni_udt_header { + uint8_t type; + uint8_t am_id; + uint8_t length; +} uct_ugni_udt_header_t; + +unsigned uct_ugni_udt_progress(void *arg); + +#define uct_ugni_udt_get_offset(i) ((size_t)(ucs_max(sizeof(uct_ugni_udt_header_t), ((i)->config.rx_headroom + \ + sizeof(uct_recv_desc_t))))) + +#define uct_ugni_udt_get_diff(i) ((size_t)(uct_ugni_udt_get_offset(i) - sizeof(uct_ugni_udt_header_t))) + +#define uct_ugni_udt_get_rheader(d, i) ((uct_ugni_udt_header_t *)((char *)(d) + uct_ugni_udt_get_diff(i))) +#define uct_ugni_udt_get_sheader(d, i) ((uct_ugni_udt_header_t *)((char *)uct_ugni_udt_get_rheader(d, i) + GNI_DATAGRAM_MAXSIZE)) + +#define uct_ugni_udt_get_rpayload(d, i) (uct_ugni_udt_get_rheader(d, i) + 1) +#define uct_ugni_udt_get_spayload(d, i) (uct_ugni_udt_get_sheader(d, i) + 1) +#define uct_ugni_udt_get_user_desc(d, i) ((char *)uct_ugni_udt_get_rpayload(d, i) - (i)->config.rx_headroom) + +#define UCT_UGNI_UDT_CHECK_RC(rc, desc) \ +if (ucs_unlikely(GNI_RC_SUCCESS != rc)) { \ + if (GNI_RC_ERROR_RESOURCE == rc || GNI_RC_ERROR_NOMEM == rc) { \ + ucs_debug("GNI_EpPostDataWId failed, Error status: %s %d", \ + gni_err_str[rc], rc); \ + ucs_mpool_put(desc); \ + return UCS_ERR_NO_RESOURCE; \ + } else { \ + ucs_error("GNI_EpPostDataWId failed, Error status: %s %d", \ + gni_err_str[rc], rc); \ + ucs_mpool_put(desc); \ + return UCS_ERR_IO_ERROR; \ + } \ +} + +#define uct_ugni_udt_iface_nic_handle(_iface) uct_ugni_iface_nic_handle(&(_iface)->super) + +static inline void uct_ugni_udt_reset_desc(uct_ugni_udt_desc_t *desc, uct_ugni_udt_iface_t *iface) +{ + uct_ugni_udt_header_t *sheader = uct_ugni_udt_get_sheader(desc, iface); + uct_ugni_udt_header_t *rheader = uct_ugni_udt_get_rheader(desc, iface); + + memset(sheader, 0, sizeof(*sheader)); + memset(rheader, 0, sizeof(*rheader)); +} + +static inline int uct_ugni_udt_ep_any_post(uct_ugni_udt_iface_t *iface) +{ + gni_return_t ugni_rc; + + uct_ugni_udt_reset_desc(iface->desc_any, iface); + uct_ugni_cdm_lock(&iface->super.cdm); + ugni_rc = GNI_EpPostDataWId(iface->ep_any, + uct_ugni_udt_get_sheader(iface->desc_any, iface), + iface->config.udt_seg_size, + uct_ugni_udt_get_rheader(iface->desc_any, iface), + iface->config.udt_seg_size, + UCT_UGNI_UDT_ANY); + uct_ugni_cdm_unlock(&iface->super.cdm); + UCT_UGNI_UDT_CHECK_RC(ugni_rc, iface->desc_any); + return UCS_OK; +} + +#endif diff --git a/test/apps/Makefile.am b/test/apps/Makefile.am new file mode 100644 index 0000000..c6258b6 --- /dev/null +++ b/test/apps/Makefile.am @@ -0,0 +1,52 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2017. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +if HAVE_CXX11 +SUBDIRS = sockaddr +endif + +noinst_PROGRAMS = \ + test_ucp_dlopen \ + test_ucs_dlopen \ + test_link_map \ + test_dlopen_cfg_print + +objdir = $(shell sed -n -e 's/^objdir=\(.*\)$$/\1/p' $(LIBTOOL)) + +test_ucs_dlopen_SOURCES = test_ucs_dlopen.c +test_ucs_dlopen_CPPFLAGS = $(BASE_CPPFLAGS) \ + -DLIB_PATH=$(abs_top_builddir)/src/ucs/$(objdir)/libucs.so +test_ucs_dlopen_CFLAGS = $(BASE_CFLAGS) +test_ucs_dlopen_LDADD = -ldl + +test_ucp_dlopen_SOURCES = test_ucp_dlopen.c +test_ucp_dlopen_CPPFLAGS = $(BASE_CPPFLAGS) \ + -DLIB_PATH=$(abs_top_builddir)/src/ucp/$(objdir)/libucp.so +test_ucp_dlopen_CFLAGS = $(BASE_CFLAGS) +test_ucp_dlopen_LDADD = -ldl + +test_link_map_SOURCES = test_link_map.c +test_link_map_CPPFLAGS = $(BASE_CPPFLAGS) +test_link_map_CFLAGS = $(BASE_CFLAGS) +test_link_map_LDADD = -ldl $(top_builddir)/src/ucp/libucp.la + +test_dlopen_cfg_print_SOURCES = test_dlopen_cfg_print.c +test_dlopen_cfg_print_CPPFLAGS = $(BASE_CPPFLAGS) -g \ + -DUCS_LIB_PATH=$(abs_top_builddir)/src/ucs/$(objdir)/libucs.so \ + -DUCT_LIB_PATH=$(abs_top_builddir)/src/uct/$(objdir)/libuct.so +test_dlopen_cfg_print_CFLAGS = $(BASE_CFLAGS) +test_dlopen_cfg_print_LDADD = -ldl + +if HAVE_TCMALLOC +noinst_PROGRAMS += test_tcmalloc +test_tcmalloc_SOURCES = test_tcmalloc.c +test_tcmalloc_CPPFLAGS = $(BASE_CPPFLAGS) +test_tcmalloc_CFLAGS = $(BASE_CFLAGS) +test_tcmalloc_LDADD = -ldl $(TCMALLOC_LIB) \ + $(top_builddir)/src/ucp/libucp.la +endif diff --git a/test/apps/Makefile.in b/test/apps/Makefile.in new file mode 100644 index 0000000..aa8fcd0 --- /dev/null +++ b/test/apps/Makefile.in @@ -0,0 +1,1035 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# Copyright (C) ARM Ltd. 2017. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +noinst_PROGRAMS = test_ucp_dlopen$(EXEEXT) test_ucs_dlopen$(EXEEXT) \ + test_link_map$(EXEEXT) test_dlopen_cfg_print$(EXEEXT) \ + $(am__EXEEXT_1) +@HAVE_TCMALLOC_TRUE@am__append_1 = test_tcmalloc +subdir = test/apps +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@HAVE_TCMALLOC_TRUE@am__EXEEXT_1 = test_tcmalloc$(EXEEXT) +PROGRAMS = $(noinst_PROGRAMS) +am_test_dlopen_cfg_print_OBJECTS = \ + test_dlopen_cfg_print-test_dlopen_cfg_print.$(OBJEXT) +test_dlopen_cfg_print_OBJECTS = $(am_test_dlopen_cfg_print_OBJECTS) +test_dlopen_cfg_print_DEPENDENCIES = +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +test_dlopen_cfg_print_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(test_dlopen_cfg_print_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +am_test_link_map_OBJECTS = test_link_map-test_link_map.$(OBJEXT) +test_link_map_OBJECTS = $(am_test_link_map_OBJECTS) +test_link_map_DEPENDENCIES = $(top_builddir)/src/ucp/libucp.la +test_link_map_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(test_link_map_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__test_tcmalloc_SOURCES_DIST = test_tcmalloc.c +@HAVE_TCMALLOC_TRUE@am_test_tcmalloc_OBJECTS = \ +@HAVE_TCMALLOC_TRUE@ test_tcmalloc-test_tcmalloc.$(OBJEXT) +test_tcmalloc_OBJECTS = $(am_test_tcmalloc_OBJECTS) +@HAVE_TCMALLOC_TRUE@test_tcmalloc_DEPENDENCIES = \ +@HAVE_TCMALLOC_TRUE@ $(top_builddir)/src/ucp/libucp.la +test_tcmalloc_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(test_tcmalloc_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am_test_ucp_dlopen_OBJECTS = \ + test_ucp_dlopen-test_ucp_dlopen.$(OBJEXT) +test_ucp_dlopen_OBJECTS = $(am_test_ucp_dlopen_OBJECTS) +test_ucp_dlopen_DEPENDENCIES = +test_ucp_dlopen_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(test_ucp_dlopen_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am_test_ucs_dlopen_OBJECTS = \ + test_ucs_dlopen-test_ucs_dlopen.$(OBJEXT) +test_ucs_dlopen_OBJECTS = $(am_test_ucs_dlopen_OBJECTS) +test_ucs_dlopen_DEPENDENCIES = +test_ucs_dlopen_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(test_ucs_dlopen_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = \ + ./$(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Po \ + ./$(DEPDIR)/test_link_map-test_link_map.Po \ + ./$(DEPDIR)/test_tcmalloc-test_tcmalloc.Po \ + ./$(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Po \ + ./$(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(test_dlopen_cfg_print_SOURCES) $(test_link_map_SOURCES) \ + $(test_tcmalloc_SOURCES) $(test_ucp_dlopen_SOURCES) \ + $(test_ucs_dlopen_SOURCES) +DIST_SOURCES = $(test_dlopen_cfg_print_SOURCES) \ + $(test_link_map_SOURCES) $(am__test_tcmalloc_SOURCES_DIST) \ + $(test_ucp_dlopen_SOURCES) $(test_ucs_dlopen_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = sockaddr +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = $(shell sed -n -e 's/^objdir=\(.*\)$$/\1/p' $(LIBTOOL)) +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_CXX11_TRUE@SUBDIRS = sockaddr +test_ucs_dlopen_SOURCES = test_ucs_dlopen.c +test_ucs_dlopen_CPPFLAGS = $(BASE_CPPFLAGS) \ + -DLIB_PATH=$(abs_top_builddir)/src/ucs/$(objdir)/libucs.so + +test_ucs_dlopen_CFLAGS = $(BASE_CFLAGS) +test_ucs_dlopen_LDADD = -ldl +test_ucp_dlopen_SOURCES = test_ucp_dlopen.c +test_ucp_dlopen_CPPFLAGS = $(BASE_CPPFLAGS) \ + -DLIB_PATH=$(abs_top_builddir)/src/ucp/$(objdir)/libucp.so + +test_ucp_dlopen_CFLAGS = $(BASE_CFLAGS) +test_ucp_dlopen_LDADD = -ldl +test_link_map_SOURCES = test_link_map.c +test_link_map_CPPFLAGS = $(BASE_CPPFLAGS) +test_link_map_CFLAGS = $(BASE_CFLAGS) +test_link_map_LDADD = -ldl $(top_builddir)/src/ucp/libucp.la +test_dlopen_cfg_print_SOURCES = test_dlopen_cfg_print.c +test_dlopen_cfg_print_CPPFLAGS = $(BASE_CPPFLAGS) -g \ + -DUCS_LIB_PATH=$(abs_top_builddir)/src/ucs/$(objdir)/libucs.so \ + -DUCT_LIB_PATH=$(abs_top_builddir)/src/uct/$(objdir)/libuct.so + +test_dlopen_cfg_print_CFLAGS = $(BASE_CFLAGS) +test_dlopen_cfg_print_LDADD = -ldl +@HAVE_TCMALLOC_TRUE@test_tcmalloc_SOURCES = test_tcmalloc.c +@HAVE_TCMALLOC_TRUE@test_tcmalloc_CPPFLAGS = $(BASE_CPPFLAGS) +@HAVE_TCMALLOC_TRUE@test_tcmalloc_CFLAGS = $(BASE_CFLAGS) +@HAVE_TCMALLOC_TRUE@test_tcmalloc_LDADD = -ldl $(TCMALLOC_LIB) \ +@HAVE_TCMALLOC_TRUE@ $(top_builddir)/src/ucp/libucp.la + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/apps/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/apps/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +test_dlopen_cfg_print$(EXEEXT): $(test_dlopen_cfg_print_OBJECTS) $(test_dlopen_cfg_print_DEPENDENCIES) $(EXTRA_test_dlopen_cfg_print_DEPENDENCIES) + @rm -f test_dlopen_cfg_print$(EXEEXT) + $(AM_V_CCLD)$(test_dlopen_cfg_print_LINK) $(test_dlopen_cfg_print_OBJECTS) $(test_dlopen_cfg_print_LDADD) $(LIBS) + +test_link_map$(EXEEXT): $(test_link_map_OBJECTS) $(test_link_map_DEPENDENCIES) $(EXTRA_test_link_map_DEPENDENCIES) + @rm -f test_link_map$(EXEEXT) + $(AM_V_CCLD)$(test_link_map_LINK) $(test_link_map_OBJECTS) $(test_link_map_LDADD) $(LIBS) + +test_tcmalloc$(EXEEXT): $(test_tcmalloc_OBJECTS) $(test_tcmalloc_DEPENDENCIES) $(EXTRA_test_tcmalloc_DEPENDENCIES) + @rm -f test_tcmalloc$(EXEEXT) + $(AM_V_CCLD)$(test_tcmalloc_LINK) $(test_tcmalloc_OBJECTS) $(test_tcmalloc_LDADD) $(LIBS) + +test_ucp_dlopen$(EXEEXT): $(test_ucp_dlopen_OBJECTS) $(test_ucp_dlopen_DEPENDENCIES) $(EXTRA_test_ucp_dlopen_DEPENDENCIES) + @rm -f test_ucp_dlopen$(EXEEXT) + $(AM_V_CCLD)$(test_ucp_dlopen_LINK) $(test_ucp_dlopen_OBJECTS) $(test_ucp_dlopen_LDADD) $(LIBS) + +test_ucs_dlopen$(EXEEXT): $(test_ucs_dlopen_OBJECTS) $(test_ucs_dlopen_DEPENDENCIES) $(EXTRA_test_ucs_dlopen_DEPENDENCIES) + @rm -f test_ucs_dlopen$(EXEEXT) + $(AM_V_CCLD)$(test_ucs_dlopen_LINK) $(test_ucs_dlopen_OBJECTS) $(test_ucs_dlopen_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_link_map-test_link_map.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_tcmalloc-test_tcmalloc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +test_dlopen_cfg_print-test_dlopen_cfg_print.o: test_dlopen_cfg_print.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_dlopen_cfg_print_CPPFLAGS) $(CPPFLAGS) $(test_dlopen_cfg_print_CFLAGS) $(CFLAGS) -MT test_dlopen_cfg_print-test_dlopen_cfg_print.o -MD -MP -MF $(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Tpo -c -o test_dlopen_cfg_print-test_dlopen_cfg_print.o `test -f 'test_dlopen_cfg_print.c' || echo '$(srcdir)/'`test_dlopen_cfg_print.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Tpo $(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_dlopen_cfg_print.c' object='test_dlopen_cfg_print-test_dlopen_cfg_print.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_dlopen_cfg_print_CPPFLAGS) $(CPPFLAGS) $(test_dlopen_cfg_print_CFLAGS) $(CFLAGS) -c -o test_dlopen_cfg_print-test_dlopen_cfg_print.o `test -f 'test_dlopen_cfg_print.c' || echo '$(srcdir)/'`test_dlopen_cfg_print.c + +test_dlopen_cfg_print-test_dlopen_cfg_print.obj: test_dlopen_cfg_print.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_dlopen_cfg_print_CPPFLAGS) $(CPPFLAGS) $(test_dlopen_cfg_print_CFLAGS) $(CFLAGS) -MT test_dlopen_cfg_print-test_dlopen_cfg_print.obj -MD -MP -MF $(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Tpo -c -o test_dlopen_cfg_print-test_dlopen_cfg_print.obj `if test -f 'test_dlopen_cfg_print.c'; then $(CYGPATH_W) 'test_dlopen_cfg_print.c'; else $(CYGPATH_W) '$(srcdir)/test_dlopen_cfg_print.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Tpo $(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_dlopen_cfg_print.c' object='test_dlopen_cfg_print-test_dlopen_cfg_print.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_dlopen_cfg_print_CPPFLAGS) $(CPPFLAGS) $(test_dlopen_cfg_print_CFLAGS) $(CFLAGS) -c -o test_dlopen_cfg_print-test_dlopen_cfg_print.obj `if test -f 'test_dlopen_cfg_print.c'; then $(CYGPATH_W) 'test_dlopen_cfg_print.c'; else $(CYGPATH_W) '$(srcdir)/test_dlopen_cfg_print.c'; fi` + +test_link_map-test_link_map.o: test_link_map.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_link_map_CPPFLAGS) $(CPPFLAGS) $(test_link_map_CFLAGS) $(CFLAGS) -MT test_link_map-test_link_map.o -MD -MP -MF $(DEPDIR)/test_link_map-test_link_map.Tpo -c -o test_link_map-test_link_map.o `test -f 'test_link_map.c' || echo '$(srcdir)/'`test_link_map.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_link_map-test_link_map.Tpo $(DEPDIR)/test_link_map-test_link_map.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_link_map.c' object='test_link_map-test_link_map.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_link_map_CPPFLAGS) $(CPPFLAGS) $(test_link_map_CFLAGS) $(CFLAGS) -c -o test_link_map-test_link_map.o `test -f 'test_link_map.c' || echo '$(srcdir)/'`test_link_map.c + +test_link_map-test_link_map.obj: test_link_map.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_link_map_CPPFLAGS) $(CPPFLAGS) $(test_link_map_CFLAGS) $(CFLAGS) -MT test_link_map-test_link_map.obj -MD -MP -MF $(DEPDIR)/test_link_map-test_link_map.Tpo -c -o test_link_map-test_link_map.obj `if test -f 'test_link_map.c'; then $(CYGPATH_W) 'test_link_map.c'; else $(CYGPATH_W) '$(srcdir)/test_link_map.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_link_map-test_link_map.Tpo $(DEPDIR)/test_link_map-test_link_map.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_link_map.c' object='test_link_map-test_link_map.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_link_map_CPPFLAGS) $(CPPFLAGS) $(test_link_map_CFLAGS) $(CFLAGS) -c -o test_link_map-test_link_map.obj `if test -f 'test_link_map.c'; then $(CYGPATH_W) 'test_link_map.c'; else $(CYGPATH_W) '$(srcdir)/test_link_map.c'; fi` + +test_tcmalloc-test_tcmalloc.o: test_tcmalloc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_tcmalloc_CPPFLAGS) $(CPPFLAGS) $(test_tcmalloc_CFLAGS) $(CFLAGS) -MT test_tcmalloc-test_tcmalloc.o -MD -MP -MF $(DEPDIR)/test_tcmalloc-test_tcmalloc.Tpo -c -o test_tcmalloc-test_tcmalloc.o `test -f 'test_tcmalloc.c' || echo '$(srcdir)/'`test_tcmalloc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_tcmalloc-test_tcmalloc.Tpo $(DEPDIR)/test_tcmalloc-test_tcmalloc.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_tcmalloc.c' object='test_tcmalloc-test_tcmalloc.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_tcmalloc_CPPFLAGS) $(CPPFLAGS) $(test_tcmalloc_CFLAGS) $(CFLAGS) -c -o test_tcmalloc-test_tcmalloc.o `test -f 'test_tcmalloc.c' || echo '$(srcdir)/'`test_tcmalloc.c + +test_tcmalloc-test_tcmalloc.obj: test_tcmalloc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_tcmalloc_CPPFLAGS) $(CPPFLAGS) $(test_tcmalloc_CFLAGS) $(CFLAGS) -MT test_tcmalloc-test_tcmalloc.obj -MD -MP -MF $(DEPDIR)/test_tcmalloc-test_tcmalloc.Tpo -c -o test_tcmalloc-test_tcmalloc.obj `if test -f 'test_tcmalloc.c'; then $(CYGPATH_W) 'test_tcmalloc.c'; else $(CYGPATH_W) '$(srcdir)/test_tcmalloc.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_tcmalloc-test_tcmalloc.Tpo $(DEPDIR)/test_tcmalloc-test_tcmalloc.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_tcmalloc.c' object='test_tcmalloc-test_tcmalloc.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_tcmalloc_CPPFLAGS) $(CPPFLAGS) $(test_tcmalloc_CFLAGS) $(CFLAGS) -c -o test_tcmalloc-test_tcmalloc.obj `if test -f 'test_tcmalloc.c'; then $(CYGPATH_W) 'test_tcmalloc.c'; else $(CYGPATH_W) '$(srcdir)/test_tcmalloc.c'; fi` + +test_ucp_dlopen-test_ucp_dlopen.o: test_ucp_dlopen.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_ucp_dlopen_CPPFLAGS) $(CPPFLAGS) $(test_ucp_dlopen_CFLAGS) $(CFLAGS) -MT test_ucp_dlopen-test_ucp_dlopen.o -MD -MP -MF $(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Tpo -c -o test_ucp_dlopen-test_ucp_dlopen.o `test -f 'test_ucp_dlopen.c' || echo '$(srcdir)/'`test_ucp_dlopen.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Tpo $(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_ucp_dlopen.c' object='test_ucp_dlopen-test_ucp_dlopen.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_ucp_dlopen_CPPFLAGS) $(CPPFLAGS) $(test_ucp_dlopen_CFLAGS) $(CFLAGS) -c -o test_ucp_dlopen-test_ucp_dlopen.o `test -f 'test_ucp_dlopen.c' || echo '$(srcdir)/'`test_ucp_dlopen.c + +test_ucp_dlopen-test_ucp_dlopen.obj: test_ucp_dlopen.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_ucp_dlopen_CPPFLAGS) $(CPPFLAGS) $(test_ucp_dlopen_CFLAGS) $(CFLAGS) -MT test_ucp_dlopen-test_ucp_dlopen.obj -MD -MP -MF $(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Tpo -c -o test_ucp_dlopen-test_ucp_dlopen.obj `if test -f 'test_ucp_dlopen.c'; then $(CYGPATH_W) 'test_ucp_dlopen.c'; else $(CYGPATH_W) '$(srcdir)/test_ucp_dlopen.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Tpo $(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_ucp_dlopen.c' object='test_ucp_dlopen-test_ucp_dlopen.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_ucp_dlopen_CPPFLAGS) $(CPPFLAGS) $(test_ucp_dlopen_CFLAGS) $(CFLAGS) -c -o test_ucp_dlopen-test_ucp_dlopen.obj `if test -f 'test_ucp_dlopen.c'; then $(CYGPATH_W) 'test_ucp_dlopen.c'; else $(CYGPATH_W) '$(srcdir)/test_ucp_dlopen.c'; fi` + +test_ucs_dlopen-test_ucs_dlopen.o: test_ucs_dlopen.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_ucs_dlopen_CPPFLAGS) $(CPPFLAGS) $(test_ucs_dlopen_CFLAGS) $(CFLAGS) -MT test_ucs_dlopen-test_ucs_dlopen.o -MD -MP -MF $(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Tpo -c -o test_ucs_dlopen-test_ucs_dlopen.o `test -f 'test_ucs_dlopen.c' || echo '$(srcdir)/'`test_ucs_dlopen.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Tpo $(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_ucs_dlopen.c' object='test_ucs_dlopen-test_ucs_dlopen.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_ucs_dlopen_CPPFLAGS) $(CPPFLAGS) $(test_ucs_dlopen_CFLAGS) $(CFLAGS) -c -o test_ucs_dlopen-test_ucs_dlopen.o `test -f 'test_ucs_dlopen.c' || echo '$(srcdir)/'`test_ucs_dlopen.c + +test_ucs_dlopen-test_ucs_dlopen.obj: test_ucs_dlopen.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_ucs_dlopen_CPPFLAGS) $(CPPFLAGS) $(test_ucs_dlopen_CFLAGS) $(CFLAGS) -MT test_ucs_dlopen-test_ucs_dlopen.obj -MD -MP -MF $(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Tpo -c -o test_ucs_dlopen-test_ucs_dlopen.obj `if test -f 'test_ucs_dlopen.c'; then $(CYGPATH_W) 'test_ucs_dlopen.c'; else $(CYGPATH_W) '$(srcdir)/test_ucs_dlopen.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Tpo $(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_ucs_dlopen.c' object='test_ucs_dlopen-test_ucs_dlopen.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_ucs_dlopen_CPPFLAGS) $(CPPFLAGS) $(test_ucs_dlopen_CFLAGS) $(CFLAGS) -c -o test_ucs_dlopen-test_ucs_dlopen.obj `if test -f 'test_ucs_dlopen.c'; then $(CYGPATH_W) 'test_ucs_dlopen.c'; else $(CYGPATH_W) '$(srcdir)/test_ucs_dlopen.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(PROGRAMS) +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Po + -rm -f ./$(DEPDIR)/test_link_map-test_link_map.Po + -rm -f ./$(DEPDIR)/test_tcmalloc-test_tcmalloc.Po + -rm -f ./$(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Po + -rm -f ./$(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/test_dlopen_cfg_print-test_dlopen_cfg_print.Po + -rm -f ./$(DEPDIR)/test_link_map-test_link_map.Po + -rm -f ./$(DEPDIR)/test_tcmalloc-test_tcmalloc.Po + -rm -f ./$(DEPDIR)/test_ucp_dlopen-test_ucp_dlopen.Po + -rm -f ./$(DEPDIR)/test_ucs_dlopen-test_ucs_dlopen.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-generic clean-libtool \ + clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/test/apps/sockaddr/Makefile.am b/test/apps/sockaddr/Makefile.am new file mode 100644 index 0000000..7ce7a01 --- /dev/null +++ b/test/apps/sockaddr/Makefile.am @@ -0,0 +1,23 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +noinst_PROGRAMS = sa + +noinst_HEADERS = \ + sa_base.h \ + sa_tcp.h \ + sa_util.h + +sa_CXXFLAGS = \ + -std=c++11 -g -Wall -Werror + +sa_CPPFLAGS = $(BASE_CPPFLAGS) + +sa_SOURCES = \ + sa_base.cc \ + sa_main.cc \ + sa_tcp.cc \ + sa_util.cc diff --git a/test/apps/sockaddr/Makefile.in b/test/apps/sockaddr/Makefile.in new file mode 100644 index 0000000..acf37be --- /dev/null +++ b/test/apps/sockaddr/Makefile.in @@ -0,0 +1,829 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +noinst_PROGRAMS = sa$(EXEEXT) +subdir = test/apps/sockaddr +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +PROGRAMS = $(noinst_PROGRAMS) +am_sa_OBJECTS = sa-sa_base.$(OBJEXT) sa-sa_main.$(OBJEXT) \ + sa-sa_tcp.$(OBJEXT) sa-sa_util.$(OBJEXT) +sa_OBJECTS = $(am_sa_OBJECTS) +sa_LDADD = $(LDADD) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +sa_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(sa_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/sa-sa_base.Po \ + ./$(DEPDIR)/sa-sa_main.Po ./$(DEPDIR)/sa-sa_tcp.Po \ + ./$(DEPDIR)/sa-sa_util.Po +am__mv = mv -f +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(sa_SOURCES) +DIST_SOURCES = $(sa_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +noinst_HEADERS = \ + sa_base.h \ + sa_tcp.h \ + sa_util.h + +sa_CXXFLAGS = \ + -std=c++11 -g -Wall -Werror + +sa_CPPFLAGS = $(BASE_CPPFLAGS) +sa_SOURCES = \ + sa_base.cc \ + sa_main.cc \ + sa_tcp.cc \ + sa_util.cc + +all: all-am + +.SUFFIXES: +.SUFFIXES: .cc .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/apps/sockaddr/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/apps/sockaddr/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +sa$(EXEEXT): $(sa_OBJECTS) $(sa_DEPENDENCIES) $(EXTRA_sa_DEPENDENCIES) + @rm -f sa$(EXEEXT) + $(AM_V_CXXLD)$(sa_LINK) $(sa_OBJECTS) $(sa_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sa-sa_base.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sa-sa_main.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sa-sa_tcp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sa-sa_util.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.cc.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +sa-sa_base.o: sa_base.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -MT sa-sa_base.o -MD -MP -MF $(DEPDIR)/sa-sa_base.Tpo -c -o sa-sa_base.o `test -f 'sa_base.cc' || echo '$(srcdir)/'`sa_base.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sa-sa_base.Tpo $(DEPDIR)/sa-sa_base.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sa_base.cc' object='sa-sa_base.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -c -o sa-sa_base.o `test -f 'sa_base.cc' || echo '$(srcdir)/'`sa_base.cc + +sa-sa_base.obj: sa_base.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -MT sa-sa_base.obj -MD -MP -MF $(DEPDIR)/sa-sa_base.Tpo -c -o sa-sa_base.obj `if test -f 'sa_base.cc'; then $(CYGPATH_W) 'sa_base.cc'; else $(CYGPATH_W) '$(srcdir)/sa_base.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sa-sa_base.Tpo $(DEPDIR)/sa-sa_base.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sa_base.cc' object='sa-sa_base.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -c -o sa-sa_base.obj `if test -f 'sa_base.cc'; then $(CYGPATH_W) 'sa_base.cc'; else $(CYGPATH_W) '$(srcdir)/sa_base.cc'; fi` + +sa-sa_main.o: sa_main.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -MT sa-sa_main.o -MD -MP -MF $(DEPDIR)/sa-sa_main.Tpo -c -o sa-sa_main.o `test -f 'sa_main.cc' || echo '$(srcdir)/'`sa_main.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sa-sa_main.Tpo $(DEPDIR)/sa-sa_main.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sa_main.cc' object='sa-sa_main.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -c -o sa-sa_main.o `test -f 'sa_main.cc' || echo '$(srcdir)/'`sa_main.cc + +sa-sa_main.obj: sa_main.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -MT sa-sa_main.obj -MD -MP -MF $(DEPDIR)/sa-sa_main.Tpo -c -o sa-sa_main.obj `if test -f 'sa_main.cc'; then $(CYGPATH_W) 'sa_main.cc'; else $(CYGPATH_W) '$(srcdir)/sa_main.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sa-sa_main.Tpo $(DEPDIR)/sa-sa_main.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sa_main.cc' object='sa-sa_main.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -c -o sa-sa_main.obj `if test -f 'sa_main.cc'; then $(CYGPATH_W) 'sa_main.cc'; else $(CYGPATH_W) '$(srcdir)/sa_main.cc'; fi` + +sa-sa_tcp.o: sa_tcp.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -MT sa-sa_tcp.o -MD -MP -MF $(DEPDIR)/sa-sa_tcp.Tpo -c -o sa-sa_tcp.o `test -f 'sa_tcp.cc' || echo '$(srcdir)/'`sa_tcp.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sa-sa_tcp.Tpo $(DEPDIR)/sa-sa_tcp.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sa_tcp.cc' object='sa-sa_tcp.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -c -o sa-sa_tcp.o `test -f 'sa_tcp.cc' || echo '$(srcdir)/'`sa_tcp.cc + +sa-sa_tcp.obj: sa_tcp.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -MT sa-sa_tcp.obj -MD -MP -MF $(DEPDIR)/sa-sa_tcp.Tpo -c -o sa-sa_tcp.obj `if test -f 'sa_tcp.cc'; then $(CYGPATH_W) 'sa_tcp.cc'; else $(CYGPATH_W) '$(srcdir)/sa_tcp.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sa-sa_tcp.Tpo $(DEPDIR)/sa-sa_tcp.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sa_tcp.cc' object='sa-sa_tcp.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -c -o sa-sa_tcp.obj `if test -f 'sa_tcp.cc'; then $(CYGPATH_W) 'sa_tcp.cc'; else $(CYGPATH_W) '$(srcdir)/sa_tcp.cc'; fi` + +sa-sa_util.o: sa_util.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -MT sa-sa_util.o -MD -MP -MF $(DEPDIR)/sa-sa_util.Tpo -c -o sa-sa_util.o `test -f 'sa_util.cc' || echo '$(srcdir)/'`sa_util.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sa-sa_util.Tpo $(DEPDIR)/sa-sa_util.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sa_util.cc' object='sa-sa_util.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -c -o sa-sa_util.o `test -f 'sa_util.cc' || echo '$(srcdir)/'`sa_util.cc + +sa-sa_util.obj: sa_util.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -MT sa-sa_util.obj -MD -MP -MF $(DEPDIR)/sa-sa_util.Tpo -c -o sa-sa_util.obj `if test -f 'sa_util.cc'; then $(CYGPATH_W) 'sa_util.cc'; else $(CYGPATH_W) '$(srcdir)/sa_util.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sa-sa_util.Tpo $(DEPDIR)/sa-sa_util.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='sa_util.cc' object='sa-sa_util.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(sa_CPPFLAGS) $(CPPFLAGS) $(sa_CXXFLAGS) $(CXXFLAGS) -c -o sa-sa_util.obj `if test -f 'sa_util.cc'; then $(CYGPATH_W) 'sa_util.cc'; else $(CYGPATH_W) '$(srcdir)/sa_util.cc'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(HEADERS) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/sa-sa_base.Po + -rm -f ./$(DEPDIR)/sa-sa_main.Po + -rm -f ./$(DEPDIR)/sa-sa_tcp.Po + -rm -f ./$(DEPDIR)/sa-sa_util.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/sa-sa_base.Po + -rm -f ./$(DEPDIR)/sa-sa_main.Po + -rm -f ./$(DEPDIR)/sa-sa_tcp.Po + -rm -f ./$(DEPDIR)/sa-sa_util.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/test/apps/sockaddr/sa_base.cc b/test/apps/sockaddr/sa_base.cc new file mode 100644 index 0000000..dfb7389 --- /dev/null +++ b/test/apps/sockaddr/sa_base.cc @@ -0,0 +1,37 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "sa_base.h" +#include "sa_tcp.h" +#include "sa_util.h" + +#include + + +connection::~connection() { +} + +void connection::set_id(uint64_t id) { + m_id = id; +} + +uint64_t connection::id() const { + return m_id; +} + +worker::~worker() { +} + +std::shared_ptr worker::make(const std::string& mode, + const struct sockaddr *listen_addr, + socklen_t addrlen) +{ + if (mode == "tcp") { + return std::make_shared(listen_addr, addrlen); + } else { + throw error("invalid mode: " + mode); + } +} diff --git a/test/apps/sockaddr/sa_base.h b/test/apps/sockaddr/sa_base.h new file mode 100644 index 0000000..531f044 --- /dev/null +++ b/test/apps/sockaddr/sa_base.h @@ -0,0 +1,68 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef SA_BASE_H_ +#define SA_BASE_H_ + +#include "sa_util.h" + +#include +#include +#include +#include +#include + + +/* interface for classes which generate events */ +class event_source { +public: + virtual void add_to_evpoll(evpoll_set& evpoll) = 0; +}; + + +/* one data connection */ +class connection : public event_source { +public: + virtual ~connection(); + + virtual size_t send(const char *buffer, size_t size) = 0; + + virtual size_t recv(char *buffer, size_t size) = 0; + + virtual bool is_closed() const = 0; + + uint64_t id() const; + +protected: + void set_id(uint64_t id); + +private: + uint64_t m_id; +}; + +typedef std::shared_ptr conn_ptr_t; + + +/* communication context */ +class worker : public event_source { +public: + typedef std::function conn_handler_t; + typedef std::function data_handler_t; + + virtual ~worker(); + + virtual conn_ptr_t connect(const struct sockaddr *addr, socklen_t addrlen) = 0; + + virtual void wait(const evpoll_set& evpoll, conn_handler_t conn_handler, + data_handler_t data_handler, int timeout_ms) = 0; + + /* factory function to create workers of given type */ + static std::shared_ptr make(const std::string& mode, + const struct sockaddr *listen_addr, + socklen_t addrlen); +}; + +#endif diff --git a/test/apps/sockaddr/sa_main.cc b/test/apps/sockaddr/sa_main.cc new file mode 100644 index 0000000..958c1ec --- /dev/null +++ b/test/apps/sockaddr/sa_main.cc @@ -0,0 +1,421 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "sa_base.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +class application { +public: + class usage_exception : public error { + public: + usage_exception(const std::string& message = ""); + }; + + application(int argc, char **argv); + + int run(); + + static void usage(const std::string& error); + +private: + typedef struct { + std::string hostname; + int port; + } dest_t; + + typedef std::vector dest_vec_t; + + enum connection_type { + CONNECTION_CLIENT, + CONNECTION_SERVER + }; + + struct params { + params() : port(0), + total_conns(1000), + conn_ratio(1.5), + request_size(32), + response_size(1024) { + } + + std::string mode; + int port; + int total_conns; + double conn_ratio; + size_t request_size; + size_t response_size; + dest_vec_t dests; + }; + + struct connection_state { + conn_ptr_t conn_ptr; + connection_type conn_type; + size_t bytes_sent; + size_t bytes_recvd; + std::string send_data; + std::string recv_data; + }; + + typedef std::shared_ptr conn_state_ptr_t; + typedef std::map conn_map_t; + + void parse_hostfile(const std::string& filename); + + void initiate_connections(); + + int max_conns_inflight() const; + + void create_worker(); + + void add_connection(conn_ptr_t conn_ptr, connection_type conn_type); + + conn_ptr_t connect(const dest_t& dst); + + void advance_connection(conn_state_ptr_t s, uint32_t events); + + void connection_completed(conn_state_ptr_t s); + + static void pton(const dest_t& dst, struct sockaddr_storage& saddr, + socklen_t &addrlen); + + template + friend typename O::__basic_ostream& operator<<(O& os, connection_type conn_type); + + params m_params; + std::shared_ptr m_worker; + evpoll_set m_evpoll; + conn_map_t m_connections; + int m_num_conns_inflight; + int m_num_conns_started; +}; + + +application::usage_exception::usage_exception(const std::string& message) : + error(message) { +}; + +application::application(int argc, char **argv) : m_num_conns_inflight(0), + m_num_conns_started(0) { + int c; + + while ( (c = getopt(argc, argv, "p:f:m:r:n:S:s:vh")) != -1 ) { + switch (c) { + case 'p': + m_params.port = atoi(optarg); + break; + case 'f': + parse_hostfile(optarg); + break; + case 'm': + m_params.mode = optarg; + break; + case 'r': + m_params.conn_ratio = atof(optarg); + break; + case 'n': + m_params.total_conns = atoi(optarg); + break; + case 'S': + m_params.request_size = atoi(optarg); + break; + case 's': + m_params.response_size = atoi(optarg); + break; + case 'v': + log::more_verbose(); + break; + default: + throw usage_exception(); + } + } + + if (m_params.mode.empty()) { + throw usage_exception("missing mode argument"); + } + + if (m_params.dests.empty()) { + throw usage_exception("no remote destinations specified"); + } + + if (m_params.port == 0) { + throw usage_exception("local port not specified"); + } +} + +int application::run() { + LOG_INFO << "starting application with " + << max_conns_inflight() << " simultaneous connections, " + << m_params.total_conns << " total"; + + create_worker(); + + while ((m_num_conns_started > m_params.total_conns) || !m_connections.empty()) { + initiate_connections(); + m_worker->wait(m_evpoll, + [this](conn_ptr_t conn) { + LOG_DEBUG << "accepted new connection"; + add_connection(conn, CONNECTION_SERVER); + }, + [this](uint64_t conn_id, uint32_t events) { + LOG_DEBUG << "new event on connection id " + << conn_id << " events " + << ((events & EPOLLIN ) ? "i" : "") + << ((events & EPOLLOUT) ? "o" : "") + << ((events & EPOLLERR) ? "e" : "") + ; + advance_connection(m_connections.at(conn_id), events); + }, + -1); + } + + LOG_INFO << "all connections completed"; + + m_worker.reset(); + return 0; +} + +void application::create_worker() { + struct sockaddr_in inaddr_any; + memset(&inaddr_any, 0, sizeof(inaddr_any)); + inaddr_any.sin_family = AF_INET; + inaddr_any.sin_port = htons(m_params.port); + inaddr_any.sin_addr.s_addr = INADDR_ANY; + + m_worker = worker::make(m_params.mode, reinterpret_cast(&inaddr_any), + sizeof(inaddr_any)); + m_worker->add_to_evpoll(m_evpoll); +} + +std::shared_ptr application::connect(const dest_t& dst) { + struct sockaddr_storage saddr; + socklen_t addrlen; + pton(dst, saddr, addrlen); + return m_worker->connect(reinterpret_cast(&saddr), + addrlen); +} + +template +typename O::__basic_ostream& operator<<(O& os, application::connection_type conn_type) { + switch (conn_type) { + case application::CONNECTION_CLIENT: + return os << "client"; + case application::CONNECTION_SERVER: + return os << "server"; + default: + return os; + } +} + +void application::add_connection(conn_ptr_t conn_ptr, connection_type conn_type) { + auto s = std::make_shared(); + s->conn_type = conn_type; + s->conn_ptr = conn_ptr; + s->bytes_sent = 0; + s->bytes_recvd = 0; + + switch (s->conn_type) { + case CONNECTION_CLIENT: + s->send_data.assign(m_params.request_size, 'r'); + s->recv_data.resize(m_params.response_size); + break; + case CONNECTION_SERVER: + s->send_data.resize(m_params.response_size); + s->recv_data.resize(m_params.request_size); + break; + } + + LOG_DEBUG << "add " << conn_type << " connection with id " << conn_ptr->id(); + conn_ptr->add_to_evpoll(m_evpoll); + m_connections[conn_ptr->id()] = s; + advance_connection(s, 0); +} + +void application::initiate_connections() { + int max = max_conns_inflight(); + while ((m_num_conns_started < m_params.total_conns) && (m_num_conns_inflight < max)) { + /* coverity[dont_call] */ + const dest_t& dest = m_params.dests[::rand() % m_params.dests.size()]; + ++m_num_conns_started; + ++m_num_conns_inflight; + LOG_DEBUG << "connecting to " << dest.hostname << ":" << dest.port; + add_connection(connect(dest), CONNECTION_CLIENT); + } +} + +int application::max_conns_inflight() const { + return m_params.conn_ratio * m_params.dests.size() + 0.5; +} + +void application::advance_connection(conn_state_ptr_t s, uint32_t events) { + LOG_DEBUG << "advance " << s->conn_type << " connection id " << s->conn_ptr->id() + << " total sent " << s->bytes_sent << ", received " << s->bytes_recvd; + switch (s->conn_type) { + case CONNECTION_CLIENT: + if (s->bytes_sent < m_params.request_size) { + /* more data should be sent */ + size_t nsent = s->conn_ptr->send(&s->send_data[s->bytes_sent], + m_params.request_size - s->bytes_sent); + LOG_DEBUG << "sent " << nsent << " bytes on connection id " + << s->conn_ptr->id(); + s->bytes_sent += nsent; + } + if (events & EPOLLIN) { + size_t nrecv = s->conn_ptr->recv(&s->recv_data[s->bytes_recvd], + m_params.response_size - s->bytes_recvd); + LOG_DEBUG << "received " << nrecv << " bytes on connection id " + << s->conn_ptr->id(); + s->bytes_recvd += nrecv; + } + if (s->bytes_recvd == m_params.response_size) { + connection_completed(s); + } + break; + case CONNECTION_SERVER: + if (events & EPOLLIN) { + size_t nrecv = s->conn_ptr->recv(&s->recv_data[s->bytes_recvd], + m_params.request_size - s->bytes_recvd); + LOG_DEBUG << "received " << nrecv << " bytes on connection id " + << s->conn_ptr->id(); + s->bytes_recvd += nrecv; + } + if ((s->bytes_recvd == m_params.request_size) && + (s->bytes_sent < m_params.response_size)) { + /* more data should be sent */ + size_t nsent = s->conn_ptr->send(&s->send_data[s->bytes_sent], + m_params.response_size - s->bytes_sent); + LOG_DEBUG << "sent " << nsent << " bytes on connection id " + << s->conn_ptr->id(); + s->bytes_sent += nsent; + } + if (s->conn_ptr->is_closed()) { + connection_completed(s); + } + break; + } +} + +void application::connection_completed(conn_state_ptr_t s) { + LOG_DEBUG << "completed " << s->conn_type << " connection id " << s->conn_ptr->id(); + m_connections.erase(s->conn_ptr->id()); + --m_num_conns_inflight; +} + +void application::pton(const dest_t& dst, struct sockaddr_storage& saddr, + socklen_t &addrlen) { + + struct hostent *he = gethostbyname(dst.hostname.c_str()); + if (he == NULL || he->h_addr_list == NULL) { + throw error("host " + dst.hostname + " not found: "+ hstrerror(h_errno)); + } + + memset(&saddr, 0, sizeof(saddr)); + saddr.ss_family = he->h_addrtype; + + void *addr; + int addr_datalen = 0; + switch (saddr.ss_family) { + case AF_INET: + reinterpret_cast(&saddr)->sin_port = + htons(dst.port); + /* cppcheck-suppress internalAstError */ + addr = &reinterpret_cast(&saddr)->sin_addr; + addrlen = sizeof(struct sockaddr_in); + addr_datalen = sizeof(struct in_addr); + break; + case AF_INET6: + reinterpret_cast(&saddr)->sin6_port = + htons(dst.port); + addr = &reinterpret_cast(&saddr)->sin6_addr; + addrlen = sizeof(struct sockaddr_in6); + addr_datalen = sizeof(struct in6_addr); + break; + default: + throw error("unsupported address family"); + } + + if (he->h_length != addr_datalen) { + throw error("mismatching address length"); + } + + memcpy(addr, he->h_addr_list[0], addr_datalen); +} + +void application::usage(const std::string& error) { + if (!error.empty()) { + std::cout << "Error: " << error << std::endl; + std::cout << std::endl; + } + + params defaults; + std::cout << "Usage: ./sa [ options ]" << std::endl; + std::cout << "Options:" << std::endl; + std::cout << " -m Application mode (tcp)" << std::endl; + std::cout << " -p Local port number to listen on" << std::endl; + std::cout << " -f File with list of hosts and ports to connect to" << std::endl; + std::cout << " Each line in the file is formatter as follows:" << std::endl; + std::cout << "
" << std::endl; + std::cout << " -r How many in-flight connection to hold as multiple" << std::endl; + std::cout << " of number of possible destinations (" << defaults.conn_ratio << ")" << std::endl; + std::cout << " -n How many total exchanges to perform (" << defaults.total_conns << ")" << std::endl; + std::cout << " -S Request message size, in bytes (" << defaults.request_size << ")" << std::endl; + std::cout << " -s Response message size, in bytes (" << defaults.response_size << ")" << std::endl; + std::cout << " -v Increase verbosity level (may be specified several times)" << std::endl; +} + +void application::parse_hostfile(const std::string& filename) { + std::ifstream f(filename.c_str()); + if (!f) { + throw error("failed to open '" + filename + "'"); + } + + /* + * Each line in the file contains 2 whitespace-separated tokens: host-name + * and port number. + */ + std::string line; + int lineno = 1; + while (std::getline(f, line)) { + std::stringstream ss(line); + if (line.empty()) { + continue; + } + + dest_t dest; + if ((ss >> dest.hostname) && (ss >> dest.port)) { + m_params.dests.push_back(dest); + } else { + std::stringstream errss; + errss << "syntax error in file '" << filename << "' line " << lineno << + " near `" << line << "'"; + throw error(errss.str()); + } + ++lineno; + } +} + +int main(int argc, char **argv) +{ + try { + application app(argc, argv); + return app.run(); + } catch (application::usage_exception& e) { + application::usage(e.what()); + return -127; + } catch (error& e) { + std::cerr << "Error: " << e.what() << std::endl; + } +} diff --git a/test/apps/sockaddr/sa_tcp.cc b/test/apps/sockaddr/sa_tcp.cc new file mode 100644 index 0000000..7d15776 --- /dev/null +++ b/test/apps/sockaddr/sa_tcp.cc @@ -0,0 +1,127 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "sa_tcp.h" + +#include +#include +#include +#include +#include +#include +#include + + +tcp_socket::tcp_socket() : file_desc(create_socket()) { +} + +tcp_socket::tcp_socket(int fd) : file_desc(fd) { +} + +tcp_socket::~tcp_socket() { +} + +int tcp_socket::create_socket() { + int fd = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (fd < 0) { + throw sys_error("failed to create tcp socket", errno); + } + return fd; +} + +tcp_connection::tcp_connection(const struct sockaddr *addr, socklen_t addrlen) : + m_is_closed(false) { + initialize(); + int ret = ::connect(m_socket, addr, addrlen); + if ((ret < 0) && (errno != EINPROGRESS)) { + throw sys_error("failed to connect tcp socket", errno); + } +} + +tcp_connection::tcp_connection(int fd) : m_socket(fd), m_is_closed(false) { + initialize(); +} + +void tcp_connection::initialize() { + int ret = fcntl(m_socket, F_SETFL, fcntl(m_socket, F_GETFL) | O_NONBLOCK); + if (ret < 0) { + throw sys_error("failed to set tcp socket to nonblocking", errno); + } + + set_id(m_socket); +} + +void tcp_connection::add_to_evpoll(evpoll_set& evpoll) { + evpoll.add(m_socket, EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLET); +} + +size_t tcp_connection::send(const char *buffer, size_t size) { + ssize_t ret = ::send(m_socket, buffer, size, 0); + if (ret < 0) { + if (errno != EAGAIN) { + throw sys_error("failed to send on tcp socket", errno); + } + return 0; + } + return ret; +} + +size_t tcp_connection::recv(char *buffer, size_t size) { + ssize_t ret = ::recv(m_socket, buffer, size, 0); + if (ret < 0) { + if (errno != EAGAIN) { + throw sys_error("failed to receive from tcp socket", errno); + } + return 0; + } + if (ret == 0) { + m_is_closed = true; + } + return ret; +} + +bool tcp_connection::is_closed() const { + return m_is_closed; +} + +tcp_worker::tcp_worker(const struct sockaddr *listen_addr, socklen_t addrlen) { + int retb = ::bind(m_server_socket, listen_addr, addrlen); + if (retb != 0) { + throw sys_error("failed to bind tcp socket", errno); + } + + int retl = ::listen(m_server_socket, 1024); + if (retl != 0) { + throw sys_error("failed to listen on tcp socket", errno); + } +} + +void tcp_worker::add_to_evpoll(evpoll_set& evpoll) { + evpoll.add(m_server_socket, EPOLLIN | EPOLLERR); +} + +void tcp_worker::wait(const evpoll_set& evpoll, conn_handler_t conn_handler, + data_handler_t data_handler, int timeout_ms) { + std::vector events; + evpoll.wait(events, timeout_ms); + for (auto ev : events) { + if (ev.fd == m_server_socket) { + int ret = accept(m_server_socket, NULL, NULL); + if (ret < 0) { + throw sys_error("failed to accept", errno); + } + auto conn = std::make_shared(ret); + conn_handler(conn); + } else { + data_handler(ev.fd, ev.ev_flags); + } + } +} + +std::shared_ptr tcp_worker::connect(const struct sockaddr *addr, + socklen_t addrlen) { + return std::make_shared(addr, addrlen); +} diff --git a/test/apps/sockaddr/sa_tcp.h b/test/apps/sockaddr/sa_tcp.h new file mode 100644 index 0000000..f8c3717 --- /dev/null +++ b/test/apps/sockaddr/sa_tcp.h @@ -0,0 +1,64 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef SA_TCP_H_ +#define SA_TCP_H_ + +#include "sa_base.h" +#include "sa_util.h" + + +class tcp_socket : public file_desc { +public: + tcp_socket(); + + tcp_socket(int fd); + + virtual ~tcp_socket(); + +private: + static int create_socket(); +}; + + +class tcp_connection : public connection { +public: + tcp_connection(const struct sockaddr *addr, socklen_t addrlen); + + tcp_connection(int fd); + + virtual void add_to_evpoll(evpoll_set& evpoll); + + virtual size_t send(const char *buffer, size_t size); + + virtual size_t recv(char *buffer, size_t size); + + virtual bool is_closed() const; + +private: + void initialize(); + + tcp_socket m_socket; + bool m_is_closed; +}; + + +class tcp_worker : public worker { +public: + tcp_worker(const struct sockaddr *listen_addr, socklen_t addrlen); + + virtual void add_to_evpoll(evpoll_set& evpoll); + + virtual conn_ptr_t connect(const struct sockaddr *addr, socklen_t addrlen); + + virtual void wait(const evpoll_set& evpoll, conn_handler_t conn_handler, + data_handler_t data_handler, int timeout_ms); + +private: + tcp_socket m_server_socket; +}; + +#endif diff --git a/test/apps/sockaddr/sa_util.cc b/test/apps/sockaddr/sa_util.cc new file mode 100644 index 0000000..9e2a97e --- /dev/null +++ b/test/apps/sockaddr/sa_util.cc @@ -0,0 +1,124 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "sa_util.h" + +#include +#include +#include +#include +#include + + +error::error(const std::string& message) : m_message(message) { +} + +error::~error() throw() { +} + +const char* error::what() const throw() { + return m_message.c_str(); +} + +sys_error::~sys_error() throw() { +} + +sys_error::sys_error(const std::string& message, int errn) : + error(message + ": " + strerror(errn) + " (" + std::to_string(errn) + ")") { +} + +file_desc::file_desc(int fd) : m_fd(fd) { +} + +file_desc::~file_desc() { + int ret = ::close(m_fd); + if (ret < 0) { + fprintf(stderr, "Warning: failed to close fd %d: %m", m_fd); + } +} + +file_desc::operator int() const { + return m_fd; +} + +evpoll_set::evpoll_set() : file_desc(create_epfd()) { +} + +void evpoll_set::add(int fd, uint32_t ev_flags) { + struct epoll_event ev; + memset(&ev, 0, sizeof(ev)); + ev.events = ev_flags; + ev.data.fd = fd; + int ret = ::epoll_ctl(*this, EPOLL_CTL_ADD, fd, &ev); + if (ret != 0) { + throw sys_error("failed to add fd to epoll", errno); + } +} + +void evpoll_set::wait(std::vector& events, int timeout_ms) const { + static const size_t maxevents = 32; + struct epoll_event ev_array[maxevents]; + + LOG_DEBUG << "epoll_wait with timeout " << timeout_ms << " milliseconds"; + int ret = epoll_wait(*this, ev_array, maxevents, timeout_ms); + if (ret < 0) { + if (errno != EINTR) { + throw sys_error("epoll_wait failed", errno); + } + } else { + for (int i = 0; i < ret; ++i) { + event ev = { ev_array[i].data.fd, ev_array[i].events }; + events.push_back(ev); + } + } +} + +int evpoll_set::create_epfd() { + int fd = epoll_create(1); + if (fd < 0) { + throw sys_error("failed to create epoll set", errno); + } + return fd; +} + +log::level_t log::m_log_level = INFO; + +log::log(log::level_t level, const std::string& file, int line) : + m_enabled(level <= m_log_level) { + if (m_enabled) { + struct timeval tv; + gettimeofday(&tv, NULL); + + char cstr[64]; + snprintf(cstr, sizeof(cstr), "[%lu.%06lu] %12s:%-5d", + tv.tv_sec, tv.tv_usec, basename(file.c_str()), line); + m_msg << cstr << " " << level_str(level) << " "; + } +} + +log::~log() { + if (m_enabled) { + m_msg << std::endl; + std::cout << m_msg.str() << std::flush; + } +} + +std::string log::level_str(log::level_t level) { + switch (level) { + case INFO: + return "INFO "; + case DEBUG: + return "DEBUG"; + default: + throw error("invalid log level"); + } +} + +void log::more_verbose() { + if (m_log_level == INFO) { + m_log_level = DEBUG; + } +} diff --git a/test/apps/sockaddr/sa_util.h b/test/apps/sockaddr/sa_util.h new file mode 100644 index 0000000..7cd46bc --- /dev/null +++ b/test/apps/sockaddr/sa_util.h @@ -0,0 +1,107 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef SA_UTIL_H_ +#define SA_UTIL_H_ + +#include +#include +#include +#include + + +/* runtime error exception */ +class error : public std::exception { +public: + error(const std::string& message); + + virtual ~error() throw(); + + virtual const char* what() const throw(); + +private: + std::string m_message; +}; + + +/* system error exception */ +class sys_error : public error { +public: + virtual ~sys_error() throw(); + + sys_error(const std::string& message, int errn); +}; + + +/* file descriptor wrapper which closes the file automatically */ +class file_desc { +public: + file_desc(int fd); + + virtual ~file_desc(); + + operator int() const; + +private: + file_desc(const file_desc&); + + const file_desc& operator=(const file_desc&); + + int m_fd; +}; + + +/* event poll set */ +class evpoll_set : public file_desc { +public: + struct event { + int fd; + uint32_t ev_flags; + }; + + evpoll_set(); + + void add(int fd, uint32_t ev_flags); + + void wait(std::vector& events, int timeout_ms = -1) const; + +private: + static int create_epfd(); +}; + +#define LOG_INFO \ + log(log::INFO, __FILE__, __LINE__) +#define LOG_DEBUG \ + log(log::DEBUG, __FILE__, __LINE__) + +/* logger */ +class log { +public: + typedef enum { + INFO, + DEBUG + } level_t; + + log(level_t level, const std::string& file, int line); + ~log(); + + template + log& operator<<(const T& value) { + m_msg << value; + return *this; + } + + static void more_verbose(); + +private: + static std::string level_str(level_t level); + + static level_t m_log_level; + const bool m_enabled; + std::ostringstream m_msg; +}; + +#endif diff --git a/test/apps/test_dlopen_cfg_print.c b/test/apps/test_dlopen_cfg_print.c new file mode 100644 index 0000000..137f999 --- /dev/null +++ b/test/apps/test_dlopen_cfg_print.c @@ -0,0 +1,56 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +#include +#include + +#define _QUOTE(x) #x +#define QUOTE(x) _QUOTE(x) + + +static void* do_dlopen_or_exit(const char *filename) +{ + void *handle; + + (void)dlerror(); + printf("opening '%s'\n", filename); + handle = dlopen(filename, RTLD_LAZY); + if (handle == NULL) { + fprintf(stderr, "failed to open %s: %s\n", filename, + dlerror()); + exit(1); + } + + return handle; +} + +int main(int argc, char **argv) +{ + typedef void (*print_all_opts_func_t)(FILE*, int); + + const char *ucs_filename = QUOTE(UCS_LIB_PATH); + const char *uct_filename = QUOTE(UCT_LIB_PATH); + void *ucs_handle, *uct_handle; + int i; + + /* unload and reload uct while ucs is loaded + * would fail if uct global vars are kept on global lists in ucs */ + ucs_handle = do_dlopen_or_exit(ucs_filename); + for (i = 0; i < 2; ++i) { + uct_handle = do_dlopen_or_exit(uct_filename); + dlclose(uct_handle); + } + + /* print all config table, to force going over the global list in ucs */ + print_all_opts_func_t print_all_opts = + (print_all_opts_func_t)dlsym(ucs_handle, "ucs_config_parser_print_all_opts"); + print_all_opts(stdout, 0); + dlclose(ucs_handle); + + printf("done\n"); + return 0; +} diff --git a/test/apps/test_link_map.c b/test/apps/test_link_map.c new file mode 100644 index 0000000..6683333 --- /dev/null +++ b/test/apps/test_link_map.c @@ -0,0 +1,41 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include + + +int main(int argc, char **argv) +{ + ucp_params_t params; + ucs_status_t status; + ucp_context_h context; + + params.field_mask = UCP_PARAM_FIELD_FEATURES; + params.features = UCP_FEATURE_TAG; + + status = ucp_init(¶ms, NULL, &context); + if (status != UCS_OK) { + return -1; + } + + /* This could segfault if libucm_cuda.so is marked as linker nodelete but + * could not be loaded due to libcuda dependency, because of a corrupted + * link_map in the program. + */ + dlopen("libgcc_s.so.1", RTLD_LAZY); + + ucp_cleanup(context); + + printf("SUCCESS\n"); + return 0; +} + diff --git a/test/apps/test_tcmalloc.c b/test/apps/test_tcmalloc.c new file mode 100644 index 0000000..a987ed7 --- /dev/null +++ b/test/apps/test_tcmalloc.c @@ -0,0 +1,33 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +#include +#include + + +int main(int argc, char **argv) +{ + ucp_params_t params; + ucs_status_t status; + ucp_context_h context; + + params.field_mask = UCP_PARAM_FIELD_FEATURES; + params.features = UCP_FEATURE_TAG; + + status = ucp_init(¶ms, NULL, &context); + if (status != UCS_OK) { + return -1; + } + + dlopen("libselinux.so", RTLD_LAZY); + + ucp_cleanup(context); + + printf("SUCCESS\n"); + return 0; +} + diff --git a/test/apps/test_ucp_dlopen.c b/test/apps/test_ucp_dlopen.c new file mode 100644 index 0000000..0cd3147 --- /dev/null +++ b/test/apps/test_ucp_dlopen.c @@ -0,0 +1,120 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include + +#define _QUOTE(x) #x +#define QUOTE(x) _QUOTE(x) + + +int test_ucp_init(void *handle) +{ + typedef ucs_status_t (*ucp_init_version_func_t)(unsigned, unsigned, + const ucp_params_t *, + const ucp_config_t *, + ucp_context_h *); + typedef void (*ucp_context_print_info_func_t)(const ucp_context_h, FILE*); + typedef void (*ucp_cleanup_func_t)(ucp_context_h); + + ucp_init_version_func_t ucp_init_version_f; + ucp_context_print_info_func_t ucp_context_print_info_f; + ucp_cleanup_func_t ucp_cleanup_f; + ucp_params_t ucp_params; + ucs_status_t status; + ucp_context_h ucph; + + ucp_init_version_f = (ucp_init_version_func_t)dlsym(handle, + "ucp_init_version"); + ucp_cleanup_f = (ucp_cleanup_func_t)dlsym(handle, "ucp_cleanup"); + ucp_context_print_info_f = (ucp_context_print_info_func_t)dlsym(handle, + "ucp_context_print_info"); + + if (!ucp_init_version_f || !ucp_cleanup_f || !ucp_context_print_info_f) { + fprintf(stderr, "failed to get UCP function pointers\n"); + return -1; + } + + ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES; + ucp_params.features = UCP_FEATURE_RMA; + status = ucp_init_version_f(UCP_API_MAJOR, UCP_API_MINOR, &ucp_params, + NULL, &ucph); + if (status != UCS_OK) { + fprintf(stderr, "ucp_init_version() failed\n"); + return -1; + } + + ucp_context_print_info_f(ucph, stdout); + ucp_cleanup_f(ucph); + + return 0; +} + +int main(int argc, char **argv) +{ + const char *filename = QUOTE(LIB_PATH); + void *handle; + void *ptr1, *ptr2; + size_t alloc_size; + long ret; + + /* get page size */ + ret = sysconf(_SC_PAGESIZE); + if (ret < 0) { + fprintf(stderr, "sysconf(_SC_PAGESIZE) failed: %m\n"); + return -1; + } + alloc_size = ret; + + /* allocate some memory */ + ptr1 = malloc(alloc_size); + if (!ptr1) { + fprintf(stderr, "malloc() failed\n"); + return -1; + } + + ptr2 = mmap(NULL, alloc_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (ptr2 == MAP_FAILED) { + fprintf(stderr, "mmmap() failed: %m\n"); + ret = -1; + goto failed_mmap; + } + + /* load ucp */ + printf("opening '%s'\n", filename); + handle = dlopen(filename, RTLD_NOW | RTLD_LOCAL); + if (handle == NULL) { + fprintf(stderr, "failed to open %s: %m\n", filename); + ret = -1; + goto failed_dlopen; + } + + /* init ucp */ + ret = test_ucp_init(handle); + + /* unload ucp */ + dlclose(handle); + +failed_dlopen: + /* relase the memory - could break if UCM is unloaded */ + munmap(ptr2, alloc_size); +failed_mmap: + free(ptr1); + + printf("done\n"); + return ret; +} + diff --git a/test/apps/test_ucs_dlopen.c b/test/apps/test_ucs_dlopen.c new file mode 100644 index 0000000..87ef1af --- /dev/null +++ b/test/apps/test_ucs_dlopen.c @@ -0,0 +1,113 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include + +#define _QUOTE(x) #x +#define QUOTE(x) _QUOTE(x) + + +static void vm_unmap_cb(ucm_event_type_t event_type, ucm_event_t *event, + void *arg) +{ +} + +int test_ucm_set_event_handler(void *handle) +{ + typedef ucs_status_t (*ucm_set_event_handler_func_t)(int events, + int priority, + ucm_event_callback_t cb, + void *arg); + + ucm_set_event_handler_func_t ucm_set_event_handler_f; + ucs_status_t status; + + dlerror(); + ucm_set_event_handler_f = (ucm_set_event_handler_func_t)dlsym(handle, + "ucm_set_event_handler"); + if (ucm_set_event_handler_f == NULL) { + fprintf(stderr, "failed to resolve ucm_set_event_handler(): %s\n", + dlerror()); + return -1; + } + + status = ucm_set_event_handler_f(UCM_EVENT_VM_UNMAPPED, 0, vm_unmap_cb, + NULL); + if (status != UCS_OK) { + fprintf(stderr, "ucm_set_event_handler() failed\n"); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + const char *filename = QUOTE(LIB_PATH); + void *handle; + void *ptr1, *ptr2; + size_t alloc_size; + long ret; + + /* get page size */ + ret = sysconf(_SC_PAGESIZE); + if (ret < 0) { + fprintf(stderr, "sysconf(_SC_PAGESIZE) failed: %m\n"); + return -1; + } + alloc_size = ret; + + /* allocate some memory */ + ptr1 = malloc(alloc_size); + if (!ptr1) { + fprintf(stderr, "malloc() failed\n"); + return -1; + } + + ptr2 = mmap(NULL, alloc_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (ptr2 == MAP_FAILED) { + fprintf(stderr, "mmmap() failed: %m\n"); + ret = -1; + goto failed_mmap; + } + + /* load ucm */ + printf("opening '%s'\n", filename); + dlerror(); + handle = dlopen(filename, RTLD_NOW); + if (handle == NULL) { + fprintf(stderr, "failed to open %s: %s\n", filename, dlerror()); + ret = -1; + goto failed_dlopen; + } + + /* init ucm */ + ret = test_ucm_set_event_handler(handle); + + /* unload ucp */ + dlclose(handle); + +failed_dlopen: + /* release the memory - could break if UCM is unloaded */ + munmap(ptr2, alloc_size); +failed_mmap: + free(ptr1); + + printf("done\n"); + return ret; +} + diff --git a/test/examples/Makefile.am b/test/examples/Makefile.am new file mode 100644 index 0000000..eb22561 --- /dev/null +++ b/test/examples/Makefile.am @@ -0,0 +1,72 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +examplesdir = $(pkgdatadir)/examples +dist_examples_DATA = \ + ucx_hello_world.h \ + ucp_hello_world.c \ + uct_hello_world.c \ + ucx_profiling.c \ + ucp_client_server.c + +if HAVE_CUDA +EXAMPLE_CUDA_LDFLAGS = $(CUDA_LDFLAGS) +# cuda.h couldn't be compiled with -pedantic flag +EXAMPLE_CUDA_CFLAGS = +EXAMPLE_CUDA_CPPFLAGS = $(CUDA_CPPFLAGS) -DHAVE_CUDA +else +EXAMPLE_CUDA_LDFLAGS = +EXAMPLE_CUDA_CFLAGS = $(CFLAGS_PEDANTIC) +EXAMPLE_CUDA_CPPFLAGS = +endif + +EXAMPLE_CCLD_FLAGS = -lucs -I$(includedir) -L$(libdir) -Wall -Werror -Wl,-rpath,$(libdir) \ + $(EXAMPLE_CUDA_LDFLAGS) $(EXAMPLE_CUDA_CPPFLAGS) + +installcheck-local: + @echo "INSTALLCHECK: Compiling examples with installed library" + $(CC) -o uct_hello_world $(examplesdir)/uct_hello_world.c -luct $(EXAMPLE_CCLD_FLAGS) + $(CC) -o ucp_hello_world $(examplesdir)/ucp_hello_world.c -lucp $(EXAMPLE_CCLD_FLAGS) + $(CC) -o ucp_client_server $(examplesdir)/ucp_client_server.c -lucp $(EXAMPLE_CCLD_FLAGS) + $(CC) -o ucx_profiling $(examplesdir)/ucx_profiling.c -lm $(EXAMPLE_CCLD_FLAGS) + $(RM) *.o uct_hello_world ucp_hello_world ucp_client_server ucx_profiling + +if HAVE_EXAMPLES + +noinst_PROGRAMS = \ + ucp_hello_world \ + uct_hello_world \ + ucx_profiling \ + ucp_client_server + +ucp_hello_world_SOURCES = ucp_hello_world.c +ucp_hello_world_CFLAGS = $(BASE_CFLAGS) $(EXAMPLE_CUDA_CFLAGS) +ucp_hello_world_CPPFLAGS = $(BASE_CPPFLAGS) $(EXAMPLE_CUDA_CPPFLAGS) +ucp_hello_world_LDADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/ucp/libucp.la \ + $(EXAMPLE_CUDA_LDFLAGS) + +uct_hello_world_SOURCES = uct_hello_world.c +uct_hello_world_CFLAGS = $(BASE_CFLAGS) $(EXAMPLE_CUDA_CFLAGS) +uct_hello_world_CPPFLAGS = $(BASE_CPPFLAGS) $(EXAMPLE_CUDA_CPPFLAGS) +uct_hello_world_LDADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la \ + $(EXAMPLE_CUDA_LDFLAGS) + +ucp_client_server_SOURCES = ucp_client_server.c +ucp_client_server_CFLAGS = $(BASE_CFLAGS) $(CFLAGS_PEDANTIC) +ucp_client_server_CPPFLAGS = $(BASE_CPPFLAGS) +ucp_client_server_LDADD = $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/ucp/libucp.la + +ucx_profiling_SOURCES = ucx_profiling.c +ucx_profiling_CFLAGS = $(BASE_CFLAGS) +ucx_profiling_CPPFLAGS = $(BASE_CPPFLAGS) +ucx_profiling_LDADD = $(top_builddir)/src/ucs/libucs.la +ucx_profiling_LDFLAGS = -lm + +endif diff --git a/test/examples/Makefile.in b/test/examples/Makefile.in new file mode 100644 index 0000000..74180ef --- /dev/null +++ b/test/examples/Makefile.in @@ -0,0 +1,978 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@HAVE_EXAMPLES_TRUE@noinst_PROGRAMS = ucp_hello_world$(EXEEXT) \ +@HAVE_EXAMPLES_TRUE@ uct_hello_world$(EXEEXT) \ +@HAVE_EXAMPLES_TRUE@ ucx_profiling$(EXEEXT) \ +@HAVE_EXAMPLES_TRUE@ ucp_client_server$(EXEEXT) +subdir = test/examples +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(dist_examples_DATA) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +PROGRAMS = $(noinst_PROGRAMS) +am__ucp_client_server_SOURCES_DIST = ucp_client_server.c +@HAVE_EXAMPLES_TRUE@am_ucp_client_server_OBJECTS = ucp_client_server-ucp_client_server.$(OBJEXT) +ucp_client_server_OBJECTS = $(am_ucp_client_server_OBJECTS) +@HAVE_EXAMPLES_TRUE@ucp_client_server_DEPENDENCIES = \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/ucp/libucp.la +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +ucp_client_server_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(ucp_client_server_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am__ucp_hello_world_SOURCES_DIST = ucp_hello_world.c +@HAVE_EXAMPLES_TRUE@am_ucp_hello_world_OBJECTS = \ +@HAVE_EXAMPLES_TRUE@ ucp_hello_world-ucp_hello_world.$(OBJEXT) +ucp_hello_world_OBJECTS = $(am_ucp_hello_world_OBJECTS) +am__DEPENDENCIES_1 = +@HAVE_CUDA_TRUE@am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) +@HAVE_EXAMPLES_TRUE@ucp_hello_world_DEPENDENCIES = \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/ucp/libucp.la \ +@HAVE_EXAMPLES_TRUE@ $(am__DEPENDENCIES_2) +ucp_hello_world_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(ucp_hello_world_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am__uct_hello_world_SOURCES_DIST = uct_hello_world.c +@HAVE_EXAMPLES_TRUE@am_uct_hello_world_OBJECTS = \ +@HAVE_EXAMPLES_TRUE@ uct_hello_world-uct_hello_world.$(OBJEXT) +uct_hello_world_OBJECTS = $(am_uct_hello_world_OBJECTS) +@HAVE_EXAMPLES_TRUE@uct_hello_world_DEPENDENCIES = \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/uct/libuct.la \ +@HAVE_EXAMPLES_TRUE@ $(am__DEPENDENCIES_2) +uct_hello_world_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(uct_hello_world_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am__ucx_profiling_SOURCES_DIST = ucx_profiling.c +@HAVE_EXAMPLES_TRUE@am_ucx_profiling_OBJECTS = \ +@HAVE_EXAMPLES_TRUE@ ucx_profiling-ucx_profiling.$(OBJEXT) +ucx_profiling_OBJECTS = $(am_ucx_profiling_OBJECTS) +@HAVE_EXAMPLES_TRUE@ucx_profiling_DEPENDENCIES = \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/ucs/libucs.la +ucx_profiling_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(ucx_profiling_CFLAGS) \ + $(CFLAGS) $(ucx_profiling_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = \ + ./$(DEPDIR)/ucp_client_server-ucp_client_server.Po \ + ./$(DEPDIR)/ucp_hello_world-ucp_hello_world.Po \ + ./$(DEPDIR)/uct_hello_world-uct_hello_world.Po \ + ./$(DEPDIR)/ucx_profiling-ucx_profiling.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(ucp_client_server_SOURCES) $(ucp_hello_world_SOURCES) \ + $(uct_hello_world_SOURCES) $(ucx_profiling_SOURCES) +DIST_SOURCES = $(am__ucp_client_server_SOURCES_DIST) \ + $(am__ucp_hello_world_SOURCES_DIST) \ + $(am__uct_hello_world_SOURCES_DIST) \ + $(am__ucx_profiling_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(examplesdir)" +DATA = $(dist_examples_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +examplesdir = $(pkgdatadir)/examples +dist_examples_DATA = \ + ucx_hello_world.h \ + ucp_hello_world.c \ + uct_hello_world.c \ + ucx_profiling.c \ + ucp_client_server.c + +@HAVE_CUDA_FALSE@EXAMPLE_CUDA_LDFLAGS = +@HAVE_CUDA_TRUE@EXAMPLE_CUDA_LDFLAGS = $(CUDA_LDFLAGS) +@HAVE_CUDA_FALSE@EXAMPLE_CUDA_CFLAGS = $(CFLAGS_PEDANTIC) +# cuda.h couldn't be compiled with -pedantic flag +@HAVE_CUDA_TRUE@EXAMPLE_CUDA_CFLAGS = +@HAVE_CUDA_FALSE@EXAMPLE_CUDA_CPPFLAGS = +@HAVE_CUDA_TRUE@EXAMPLE_CUDA_CPPFLAGS = $(CUDA_CPPFLAGS) -DHAVE_CUDA +EXAMPLE_CCLD_FLAGS = -lucs -I$(includedir) -L$(libdir) -Wall -Werror -Wl,-rpath,$(libdir) \ + $(EXAMPLE_CUDA_LDFLAGS) $(EXAMPLE_CUDA_CPPFLAGS) + +@HAVE_EXAMPLES_TRUE@ucp_hello_world_SOURCES = ucp_hello_world.c +@HAVE_EXAMPLES_TRUE@ucp_hello_world_CFLAGS = $(BASE_CFLAGS) $(EXAMPLE_CUDA_CFLAGS) +@HAVE_EXAMPLES_TRUE@ucp_hello_world_CPPFLAGS = $(BASE_CPPFLAGS) $(EXAMPLE_CUDA_CPPFLAGS) +@HAVE_EXAMPLES_TRUE@ucp_hello_world_LDADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/ucp/libucp.la \ +@HAVE_EXAMPLES_TRUE@ $(EXAMPLE_CUDA_LDFLAGS) + +@HAVE_EXAMPLES_TRUE@uct_hello_world_SOURCES = uct_hello_world.c +@HAVE_EXAMPLES_TRUE@uct_hello_world_CFLAGS = $(BASE_CFLAGS) $(EXAMPLE_CUDA_CFLAGS) +@HAVE_EXAMPLES_TRUE@uct_hello_world_CPPFLAGS = $(BASE_CPPFLAGS) $(EXAMPLE_CUDA_CPPFLAGS) +@HAVE_EXAMPLES_TRUE@uct_hello_world_LDADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/uct/libuct.la \ +@HAVE_EXAMPLES_TRUE@ $(EXAMPLE_CUDA_LDFLAGS) + +@HAVE_EXAMPLES_TRUE@ucp_client_server_SOURCES = ucp_client_server.c +@HAVE_EXAMPLES_TRUE@ucp_client_server_CFLAGS = $(BASE_CFLAGS) $(CFLAGS_PEDANTIC) +@HAVE_EXAMPLES_TRUE@ucp_client_server_CPPFLAGS = $(BASE_CPPFLAGS) +@HAVE_EXAMPLES_TRUE@ucp_client_server_LDADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_EXAMPLES_TRUE@ $(top_builddir)/src/ucp/libucp.la + +@HAVE_EXAMPLES_TRUE@ucx_profiling_SOURCES = ucx_profiling.c +@HAVE_EXAMPLES_TRUE@ucx_profiling_CFLAGS = $(BASE_CFLAGS) +@HAVE_EXAMPLES_TRUE@ucx_profiling_CPPFLAGS = $(BASE_CPPFLAGS) +@HAVE_EXAMPLES_TRUE@ucx_profiling_LDADD = $(top_builddir)/src/ucs/libucs.la +@HAVE_EXAMPLES_TRUE@ucx_profiling_LDFLAGS = -lm +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/examples/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/examples/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +ucp_client_server$(EXEEXT): $(ucp_client_server_OBJECTS) $(ucp_client_server_DEPENDENCIES) $(EXTRA_ucp_client_server_DEPENDENCIES) + @rm -f ucp_client_server$(EXEEXT) + $(AM_V_CCLD)$(ucp_client_server_LINK) $(ucp_client_server_OBJECTS) $(ucp_client_server_LDADD) $(LIBS) + +ucp_hello_world$(EXEEXT): $(ucp_hello_world_OBJECTS) $(ucp_hello_world_DEPENDENCIES) $(EXTRA_ucp_hello_world_DEPENDENCIES) + @rm -f ucp_hello_world$(EXEEXT) + $(AM_V_CCLD)$(ucp_hello_world_LINK) $(ucp_hello_world_OBJECTS) $(ucp_hello_world_LDADD) $(LIBS) + +uct_hello_world$(EXEEXT): $(uct_hello_world_OBJECTS) $(uct_hello_world_DEPENDENCIES) $(EXTRA_uct_hello_world_DEPENDENCIES) + @rm -f uct_hello_world$(EXEEXT) + $(AM_V_CCLD)$(uct_hello_world_LINK) $(uct_hello_world_OBJECTS) $(uct_hello_world_LDADD) $(LIBS) + +ucx_profiling$(EXEEXT): $(ucx_profiling_OBJECTS) $(ucx_profiling_DEPENDENCIES) $(EXTRA_ucx_profiling_DEPENDENCIES) + @rm -f ucx_profiling$(EXEEXT) + $(AM_V_CCLD)$(ucx_profiling_LINK) $(ucx_profiling_OBJECTS) $(ucx_profiling_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucp_client_server-ucp_client_server.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucp_hello_world-ucp_hello_world.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uct_hello_world-uct_hello_world.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ucx_profiling-ucx_profiling.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +ucp_client_server-ucp_client_server.o: ucp_client_server.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucp_client_server_CPPFLAGS) $(CPPFLAGS) $(ucp_client_server_CFLAGS) $(CFLAGS) -MT ucp_client_server-ucp_client_server.o -MD -MP -MF $(DEPDIR)/ucp_client_server-ucp_client_server.Tpo -c -o ucp_client_server-ucp_client_server.o `test -f 'ucp_client_server.c' || echo '$(srcdir)/'`ucp_client_server.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucp_client_server-ucp_client_server.Tpo $(DEPDIR)/ucp_client_server-ucp_client_server.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ucp_client_server.c' object='ucp_client_server-ucp_client_server.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucp_client_server_CPPFLAGS) $(CPPFLAGS) $(ucp_client_server_CFLAGS) $(CFLAGS) -c -o ucp_client_server-ucp_client_server.o `test -f 'ucp_client_server.c' || echo '$(srcdir)/'`ucp_client_server.c + +ucp_client_server-ucp_client_server.obj: ucp_client_server.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucp_client_server_CPPFLAGS) $(CPPFLAGS) $(ucp_client_server_CFLAGS) $(CFLAGS) -MT ucp_client_server-ucp_client_server.obj -MD -MP -MF $(DEPDIR)/ucp_client_server-ucp_client_server.Tpo -c -o ucp_client_server-ucp_client_server.obj `if test -f 'ucp_client_server.c'; then $(CYGPATH_W) 'ucp_client_server.c'; else $(CYGPATH_W) '$(srcdir)/ucp_client_server.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucp_client_server-ucp_client_server.Tpo $(DEPDIR)/ucp_client_server-ucp_client_server.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ucp_client_server.c' object='ucp_client_server-ucp_client_server.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucp_client_server_CPPFLAGS) $(CPPFLAGS) $(ucp_client_server_CFLAGS) $(CFLAGS) -c -o ucp_client_server-ucp_client_server.obj `if test -f 'ucp_client_server.c'; then $(CYGPATH_W) 'ucp_client_server.c'; else $(CYGPATH_W) '$(srcdir)/ucp_client_server.c'; fi` + +ucp_hello_world-ucp_hello_world.o: ucp_hello_world.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucp_hello_world_CPPFLAGS) $(CPPFLAGS) $(ucp_hello_world_CFLAGS) $(CFLAGS) -MT ucp_hello_world-ucp_hello_world.o -MD -MP -MF $(DEPDIR)/ucp_hello_world-ucp_hello_world.Tpo -c -o ucp_hello_world-ucp_hello_world.o `test -f 'ucp_hello_world.c' || echo '$(srcdir)/'`ucp_hello_world.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucp_hello_world-ucp_hello_world.Tpo $(DEPDIR)/ucp_hello_world-ucp_hello_world.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ucp_hello_world.c' object='ucp_hello_world-ucp_hello_world.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucp_hello_world_CPPFLAGS) $(CPPFLAGS) $(ucp_hello_world_CFLAGS) $(CFLAGS) -c -o ucp_hello_world-ucp_hello_world.o `test -f 'ucp_hello_world.c' || echo '$(srcdir)/'`ucp_hello_world.c + +ucp_hello_world-ucp_hello_world.obj: ucp_hello_world.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucp_hello_world_CPPFLAGS) $(CPPFLAGS) $(ucp_hello_world_CFLAGS) $(CFLAGS) -MT ucp_hello_world-ucp_hello_world.obj -MD -MP -MF $(DEPDIR)/ucp_hello_world-ucp_hello_world.Tpo -c -o ucp_hello_world-ucp_hello_world.obj `if test -f 'ucp_hello_world.c'; then $(CYGPATH_W) 'ucp_hello_world.c'; else $(CYGPATH_W) '$(srcdir)/ucp_hello_world.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucp_hello_world-ucp_hello_world.Tpo $(DEPDIR)/ucp_hello_world-ucp_hello_world.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ucp_hello_world.c' object='ucp_hello_world-ucp_hello_world.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucp_hello_world_CPPFLAGS) $(CPPFLAGS) $(ucp_hello_world_CFLAGS) $(CFLAGS) -c -o ucp_hello_world-ucp_hello_world.obj `if test -f 'ucp_hello_world.c'; then $(CYGPATH_W) 'ucp_hello_world.c'; else $(CYGPATH_W) '$(srcdir)/ucp_hello_world.c'; fi` + +uct_hello_world-uct_hello_world.o: uct_hello_world.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(uct_hello_world_CPPFLAGS) $(CPPFLAGS) $(uct_hello_world_CFLAGS) $(CFLAGS) -MT uct_hello_world-uct_hello_world.o -MD -MP -MF $(DEPDIR)/uct_hello_world-uct_hello_world.Tpo -c -o uct_hello_world-uct_hello_world.o `test -f 'uct_hello_world.c' || echo '$(srcdir)/'`uct_hello_world.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uct_hello_world-uct_hello_world.Tpo $(DEPDIR)/uct_hello_world-uct_hello_world.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uct_hello_world.c' object='uct_hello_world-uct_hello_world.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(uct_hello_world_CPPFLAGS) $(CPPFLAGS) $(uct_hello_world_CFLAGS) $(CFLAGS) -c -o uct_hello_world-uct_hello_world.o `test -f 'uct_hello_world.c' || echo '$(srcdir)/'`uct_hello_world.c + +uct_hello_world-uct_hello_world.obj: uct_hello_world.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(uct_hello_world_CPPFLAGS) $(CPPFLAGS) $(uct_hello_world_CFLAGS) $(CFLAGS) -MT uct_hello_world-uct_hello_world.obj -MD -MP -MF $(DEPDIR)/uct_hello_world-uct_hello_world.Tpo -c -o uct_hello_world-uct_hello_world.obj `if test -f 'uct_hello_world.c'; then $(CYGPATH_W) 'uct_hello_world.c'; else $(CYGPATH_W) '$(srcdir)/uct_hello_world.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uct_hello_world-uct_hello_world.Tpo $(DEPDIR)/uct_hello_world-uct_hello_world.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uct_hello_world.c' object='uct_hello_world-uct_hello_world.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(uct_hello_world_CPPFLAGS) $(CPPFLAGS) $(uct_hello_world_CFLAGS) $(CFLAGS) -c -o uct_hello_world-uct_hello_world.obj `if test -f 'uct_hello_world.c'; then $(CYGPATH_W) 'uct_hello_world.c'; else $(CYGPATH_W) '$(srcdir)/uct_hello_world.c'; fi` + +ucx_profiling-ucx_profiling.o: ucx_profiling.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_profiling_CPPFLAGS) $(CPPFLAGS) $(ucx_profiling_CFLAGS) $(CFLAGS) -MT ucx_profiling-ucx_profiling.o -MD -MP -MF $(DEPDIR)/ucx_profiling-ucx_profiling.Tpo -c -o ucx_profiling-ucx_profiling.o `test -f 'ucx_profiling.c' || echo '$(srcdir)/'`ucx_profiling.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_profiling-ucx_profiling.Tpo $(DEPDIR)/ucx_profiling-ucx_profiling.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ucx_profiling.c' object='ucx_profiling-ucx_profiling.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_profiling_CPPFLAGS) $(CPPFLAGS) $(ucx_profiling_CFLAGS) $(CFLAGS) -c -o ucx_profiling-ucx_profiling.o `test -f 'ucx_profiling.c' || echo '$(srcdir)/'`ucx_profiling.c + +ucx_profiling-ucx_profiling.obj: ucx_profiling.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_profiling_CPPFLAGS) $(CPPFLAGS) $(ucx_profiling_CFLAGS) $(CFLAGS) -MT ucx_profiling-ucx_profiling.obj -MD -MP -MF $(DEPDIR)/ucx_profiling-ucx_profiling.Tpo -c -o ucx_profiling-ucx_profiling.obj `if test -f 'ucx_profiling.c'; then $(CYGPATH_W) 'ucx_profiling.c'; else $(CYGPATH_W) '$(srcdir)/ucx_profiling.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ucx_profiling-ucx_profiling.Tpo $(DEPDIR)/ucx_profiling-ucx_profiling.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='ucx_profiling.c' object='ucx_profiling-ucx_profiling.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ucx_profiling_CPPFLAGS) $(CPPFLAGS) $(ucx_profiling_CFLAGS) $(CFLAGS) -c -o ucx_profiling-ucx_profiling.obj `if test -f 'ucx_profiling.c'; then $(CYGPATH_W) 'ucx_profiling.c'; else $(CYGPATH_W) '$(srcdir)/ucx_profiling.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-dist_examplesDATA: $(dist_examples_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_examples_DATA)'; test -n "$(examplesdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(examplesdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(examplesdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(examplesdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(examplesdir)" || exit $$?; \ + done + +uninstall-dist_examplesDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_examples_DATA)'; test -n "$(examplesdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(examplesdir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(DATA) +installdirs: + for dir in "$(DESTDIR)$(examplesdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/ucp_client_server-ucp_client_server.Po + -rm -f ./$(DEPDIR)/ucp_hello_world-ucp_hello_world.Po + -rm -f ./$(DEPDIR)/uct_hello_world-uct_hello_world.Po + -rm -f ./$(DEPDIR)/ucx_profiling-ucx_profiling.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-dist_examplesDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: installcheck-local + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/ucp_client_server-ucp_client_server.Po + -rm -f ./$(DEPDIR)/ucp_hello_world-ucp_hello_world.Po + -rm -f ./$(DEPDIR)/uct_hello_world-uct_hello_world.Po + -rm -f ./$(DEPDIR)/ucx_profiling-ucx_profiling.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-dist_examplesDATA + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstPROGRAMS cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dist_examplesDATA install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installcheck-local \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-dist_examplesDATA + +.PRECIOUS: Makefile + + +installcheck-local: + @echo "INSTALLCHECK: Compiling examples with installed library" + $(CC) -o uct_hello_world $(examplesdir)/uct_hello_world.c -luct $(EXAMPLE_CCLD_FLAGS) + $(CC) -o ucp_hello_world $(examplesdir)/ucp_hello_world.c -lucp $(EXAMPLE_CCLD_FLAGS) + $(CC) -o ucp_client_server $(examplesdir)/ucp_client_server.c -lucp $(EXAMPLE_CCLD_FLAGS) + $(CC) -o ucx_profiling $(examplesdir)/ucx_profiling.c -lm $(EXAMPLE_CCLD_FLAGS) + $(RM) *.o uct_hello_world ucp_hello_world ucp_client_server ucx_profiling + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/test/examples/ucp_client_server.c b/test/examples/ucp_client_server.c new file mode 100644 index 0000000..e3b0c2d --- /dev/null +++ b/test/examples/ucp_client_server.c @@ -0,0 +1,805 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +/* + * UCP client - server example utility + * ----------------------------------------------- + * + * Server side: + * + * ./ucp_client_server + * + * Client side: + * + * ./ucp_client_server -a + * + * Notes: + * + * - The server will listen to incoming connection requests on INADDR_ANY. + * - The client needs to pass the IP address of the server side to connect to + * as an argument to the test. + * - Currently, the passed IP needs to be an IPoIB or a RoCE address. + * - The port which the server side would listen on can be modified with the + * '-p' option and should be used on both sides. The default port to use is + * 13337. + */ + +#include + +#include /* memset */ +#include /* inet_addr */ +#include /* getopt */ +#include /* atoi */ + +#define TEST_STRING_LEN sizeof(test_message) +#define DEFAULT_PORT 13337 +#define IP_STRING_LEN 50 +#define PORT_STRING_LEN 8 +#define TAG 0xCAFE +#define COMM_TYPE_DEFAULT "STREAM" +#define PRINT_INTERVAL 2000 +#define DEFAULT_NUM_ITERATIONS 1 + +const char test_message[] = "UCX Client-Server Hello World"; +static uint16_t server_port = DEFAULT_PORT; +static int num_iterations = DEFAULT_NUM_ITERATIONS; + + +typedef enum { + CLIENT_SERVER_SEND_RECV_STREAM = UCS_BIT(0), + CLIENT_SERVER_SEND_RECV_TAG = UCS_BIT(1), + CLIENT_SERVER_SEND_RECV_DEFAULT = CLIENT_SERVER_SEND_RECV_STREAM +} send_recv_type_t; + + +/** + * Server's application context to be used in the user's connection request + * callback. + * It holds the server's listener and the handle to an incoming connection request. + */ +typedef struct ucx_server_ctx { + volatile ucp_conn_request_h conn_request; + ucp_listener_h listener; +} ucx_server_ctx_t; + + +/** + * Stream request context. Holds a value to indicate whether or not the + * request is completed. + */ +typedef struct test_req { + int complete; +} test_req_t; + + +static void tag_recv_cb(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) +{ + test_req_t *req = request; + + req->complete = 1; +} + +/** + * The callback on the receiving side, which is invoked upon receiving the + * stream message. + */ +static void stream_recv_cb(void *request, ucs_status_t status, size_t length) +{ + test_req_t *req = request; + + req->complete = 1; +} + +/** + * The callback on the sending side, which is invoked after finishing sending + * the message. + */ +static void send_cb(void *request, ucs_status_t status) +{ + test_req_t *req = request; + + req->complete = 1; +} + +/** + * Error handling callback. + */ +static void err_cb(void *arg, ucp_ep_h ep, ucs_status_t status) +{ + printf("error handling callback was invoked with status %d (%s)\n", + status, ucs_status_string(status)); +} + +/** + * Set an address for the server to listen on - INADDR_ANY on a well known port. + */ +void set_listen_addr(const char *address_str, struct sockaddr_in *listen_addr) +{ + /* The server will listen on INADDR_ANY */ + memset(listen_addr, 0, sizeof(struct sockaddr_in)); + listen_addr->sin_family = AF_INET; + listen_addr->sin_addr.s_addr = (address_str) ? inet_addr(address_str) : INADDR_ANY; + listen_addr->sin_port = htons(server_port); +} + +/** + * Set an address to connect to. A given IP address on a well known port. + */ +void set_connect_addr(const char *address_str, struct sockaddr_in *connect_addr) +{ + memset(connect_addr, 0, sizeof(struct sockaddr_in)); + connect_addr->sin_family = AF_INET; + connect_addr->sin_addr.s_addr = inet_addr(address_str); + connect_addr->sin_port = htons(server_port); +} + +/** + * Initialize the client side. Create an endpoint from the client side to be + * connected to the remote server (to the given IP). + */ +static ucs_status_t start_client(ucp_worker_h ucp_worker, const char *ip, + ucp_ep_h *client_ep) +{ + ucp_ep_params_t ep_params; + struct sockaddr_in connect_addr; + ucs_status_t status; + + set_connect_addr(ip, &connect_addr); + + /* + * Endpoint field mask bits: + * UCP_EP_PARAM_FIELD_FLAGS - Use the value of the 'flags' field. + * UCP_EP_PARAM_FIELD_SOCK_ADDR - Use a remote sockaddr to connect + * to the remote peer. + * UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE - Error handling mode - this flag + * is temporarily required since the + * endpoint will be closed with + * UCP_EP_CLOSE_MODE_FORCE which + * requires this mode. + * Once UCP_EP_CLOSE_MODE_FORCE is + * removed, the error handling mode + * will be removed. + */ + ep_params.field_mask = UCP_EP_PARAM_FIELD_FLAGS | + UCP_EP_PARAM_FIELD_SOCK_ADDR | + UCP_EP_PARAM_FIELD_ERR_HANDLER | + UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE; + ep_params.err_mode = UCP_ERR_HANDLING_MODE_PEER; + ep_params.err_handler.cb = err_cb; + ep_params.err_handler.arg = NULL; + ep_params.flags = UCP_EP_PARAMS_FLAGS_CLIENT_SERVER; + ep_params.sockaddr.addr = (struct sockaddr*)&connect_addr; + ep_params.sockaddr.addrlen = sizeof(connect_addr); + + status = ucp_ep_create(ucp_worker, &ep_params, client_ep); + if (status != UCS_OK) { + fprintf(stderr, "failed to connect to %s (%s)\n", ip, ucs_status_string(status)); + } + + return status; +} + +/** + * Print the received message on the server side or the sent data on the client + * side. + */ +static void print_result(int is_server, char *recv_message, int current_iter) +{ + if (is_server) { + printf("Server: iteration #%d\n", (current_iter + 1)); + printf("UCX data message was received\n"); + printf("\n\n----- UCP TEST SUCCESS -------\n\n"); + printf("%s", recv_message); + printf("\n\n------------------------------\n\n"); + } else { + printf("Client: iteration #%d\n", (current_iter + 1)); + printf("\n\n-----------------------------------------\n\n"); + printf("Client sent message: \n%s.\nlength: %ld\n", + test_message, TEST_STRING_LEN); + printf("\n-----------------------------------------\n\n"); + } +} + +/** + * Progress the request until it completes. + */ +static ucs_status_t request_wait(ucp_worker_h ucp_worker, test_req_t *request) +{ + ucs_status_t status; + + /* if operation was completed immediately */ + if (request == NULL) { + return UCS_OK; + } + + if (UCS_PTR_IS_ERR(request)) { + return UCS_PTR_STATUS(request); + } + + while (request->complete == 0) { + ucp_worker_progress(ucp_worker); + } + status = ucp_request_check_status(request); + + /* This request may be reused so initialize it for next time */ + request->complete = 0; + ucp_request_free(request); + + return status; +} + +static int request_finalize(ucp_worker_h ucp_worker, test_req_t *request, + int is_server, char *recv_message, int current_iter) +{ + ucs_status_t status; + int ret = 0; + + status = request_wait(ucp_worker, request); + if (status != UCS_OK) { + fprintf(stderr, "unable to %s UCX message (%s)\n", + is_server ? "receive": "send", ucs_status_string(status)); + return -1; + } + + /* Print the output of the first, last and every PRINT_INTERVAL iteration */ + if ((current_iter == 0) || (current_iter == (num_iterations - 1)) || + !((current_iter + 1) % (PRINT_INTERVAL))) { + print_result(is_server, recv_message, current_iter); + } + + return ret; +} + +/** + * Send and receive a message using the Stream API. + * The client sends a message to the server and waits until the send it completed. + * The server receives a message from the client and waits for its completion. + */ +static int send_recv_stream(ucp_worker_h ucp_worker, ucp_ep_h ep, int is_server, + int current_iter) +{ + char recv_message[TEST_STRING_LEN]= ""; + test_req_t *request; + size_t length; + + if (!is_server) { + /* Client sends a message to the server using the stream API */ + request = ucp_stream_send_nb(ep, test_message, 1, + ucp_dt_make_contig(TEST_STRING_LEN), + send_cb, 0); + } else { + /* Server receives a message from the client using the stream API */ + request = ucp_stream_recv_nb(ep, &recv_message, 1, + ucp_dt_make_contig(TEST_STRING_LEN), + stream_recv_cb, &length, + UCP_STREAM_RECV_FLAG_WAITALL); + } + + return request_finalize(ucp_worker, request, is_server, recv_message, + current_iter); +} + +/** + * Send and receive a message using the Tag-Matching API. + * The client sends a message to the server and waits until the send it completed. + * The server receives a message from the client and waits for its completion. + */ +static int send_recv_tag(ucp_worker_h ucp_worker, ucp_ep_h ep, int is_server, + int current_iter) +{ + char recv_message[TEST_STRING_LEN]= ""; + test_req_t *request; + + if (!is_server) { + /* Client sends a message to the server using the Tag-Matching API */ + request = ucp_tag_send_nb(ep, test_message, 1, + ucp_dt_make_contig(TEST_STRING_LEN), TAG, + send_cb); + } else { + /* Server receives a message from the client using the Tag-Matching API */ + request = ucp_tag_recv_nb(ucp_worker, &recv_message, 1, + ucp_dt_make_contig(TEST_STRING_LEN), + TAG, 0, tag_recv_cb); + } + + return request_finalize(ucp_worker, request, is_server, recv_message, + current_iter); +} + +/** + * Close the given endpoint. + * Currently closing the endpoint with UCP_EP_CLOSE_MODE_FORCE since we currently + * cannot rely on the client side to be present during the server's endpoint + * closing process. + */ +static void ep_close(ucp_worker_h ucp_worker, ucp_ep_h ep) +{ + ucs_status_t status; + void *close_req; + + close_req = ucp_ep_close_nb(ep, UCP_EP_CLOSE_MODE_FORCE); + if (UCS_PTR_IS_PTR(close_req)) { + do { + ucp_worker_progress(ucp_worker); + status = ucp_request_check_status(close_req); + } while (status == UCS_INPROGRESS); + + ucp_request_free(close_req); + } else if (UCS_PTR_STATUS(close_req) != UCS_OK) { + fprintf(stderr, "failed to close ep %p\n", (void*)ep); + } +} + +/** + * A callback to be invoked by UCX in order to initialize the user's request. + */ +static void request_init(void *request) +{ + test_req_t *req = request; + req->complete = 0; +} + +/** + * Print this application's usage help message. + */ +static void usage() +{ + fprintf(stderr, "Usage: ucp_client_server [parameters]\n"); + fprintf(stderr, "UCP client-server example utility\n"); + fprintf(stderr, "\nParameters are:\n"); + fprintf(stderr, " -a Set IP address of the server " + "(required for client and should not be specified " + "for the server)\n"); + fprintf(stderr, " -l Set IP address where server listens " + "(If not specified, server uses INADDR_ANY; " + "Irrelevant at client)\n"); + fprintf(stderr, " -p Port number to listen/connect to (default = %d). " + "0 on the server side means select a random port and print it\n", + DEFAULT_PORT); + fprintf(stderr, " -c Communication type for the client and server. " + " Valid values are:\n" + " 'stream' : Stream API\n" + " 'tag' : Tag API\n" + " If not specified, %s API will be used.\n", COMM_TYPE_DEFAULT); + fprintf(stderr, " -i Number of iterations to run. Client and server must " + "have the same value. (default = %d).\n", + num_iterations); + fprintf(stderr, "\n"); +} + +/** + * Parse the command line arguments. + */ +static int parse_cmd(int argc, char *const argv[], char **server_addr, + char **listen_addr, send_recv_type_t *send_recv_type) +{ + int c = 0; + int port; + + opterr = 0; + + while ((c = getopt(argc, argv, "a:l:p:c:i:")) != -1) { + switch (c) { + case 'a': + *server_addr = optarg; + break; + case 'c': + if (!strcasecmp(optarg, "stream")) { + *send_recv_type = CLIENT_SERVER_SEND_RECV_STREAM; + } else if (!strcasecmp(optarg, "tag")) { + *send_recv_type = CLIENT_SERVER_SEND_RECV_TAG; + } else { + fprintf(stderr, "Wrong communication type %s. " + "Using %s as default\n", optarg, COMM_TYPE_DEFAULT); + *send_recv_type = CLIENT_SERVER_SEND_RECV_DEFAULT; + } + break; + case 'l': + *listen_addr = optarg; + break; + case 'p': + port = atoi(optarg); + if ((port < 0) || (port > UINT16_MAX)) { + fprintf(stderr, "Wrong server port number %d\n", port); + return -1; + } + server_port = port; + break; + case 'i': + num_iterations = atoi(optarg); + break; + default: + usage(); + return -1; + } + } + + return 0; +} + +static char* sockaddr_get_ip_str(const struct sockaddr_storage *sock_addr, + char *ip_str, size_t max_size) +{ + struct sockaddr_in addr_in; + struct sockaddr_in6 addr_in6; + + switch (sock_addr->ss_family) { + case AF_INET: + memcpy(&addr_in, sock_addr, sizeof(struct sockaddr_in)); + inet_ntop(AF_INET, &addr_in.sin_addr, ip_str, max_size); + return ip_str; + case AF_INET6: + memcpy(&addr_in6, sock_addr, sizeof(struct sockaddr_in6)); + inet_ntop(AF_INET6, &addr_in6.sin6_addr, ip_str, max_size); + return ip_str; + default: + return "Invalid address family"; + } +} + +static char* sockaddr_get_port_str(const struct sockaddr_storage *sock_addr, + char *port_str, size_t max_size) +{ + struct sockaddr_in addr_in; + struct sockaddr_in6 addr_in6; + + switch (sock_addr->ss_family) { + case AF_INET: + memcpy(&addr_in, sock_addr, sizeof(struct sockaddr_in)); + snprintf(port_str, max_size, "%d", ntohs(addr_in.sin_port)); + return port_str; + case AF_INET6: + memcpy(&addr_in6, sock_addr, sizeof(struct sockaddr_in6)); + snprintf(port_str, max_size, "%d", ntohs(addr_in6.sin6_port)); + return port_str; + default: + return "Invalid address family"; + } +} + +static int client_server_communication(ucp_worker_h worker, ucp_ep_h ep, + send_recv_type_t send_recv_type, + int is_server, int current_iter) +{ + int ret; + + switch (send_recv_type) { + case CLIENT_SERVER_SEND_RECV_STREAM: + /* Client-Server communication via Stream API */ + ret = send_recv_stream(worker, ep, is_server, current_iter); + break; + case CLIENT_SERVER_SEND_RECV_TAG: + /* Client-Server communication via Tag-Matching API */ + ret = send_recv_tag(worker, ep, is_server, current_iter); + break; + default: + fprintf(stderr, "unknown send-recv type %d\n", send_recv_type); + return -1; + } + + return ret; +} + +/** + * Create a ucp worker on the given ucp context. + */ +static int init_worker(ucp_context_h ucp_context, ucp_worker_h *ucp_worker) +{ + ucp_worker_params_t worker_params; + ucs_status_t status; + int ret = 0; + + memset(&worker_params, 0, sizeof(worker_params)); + + worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; + + status = ucp_worker_create(ucp_context, &worker_params, ucp_worker); + if (status != UCS_OK) { + fprintf(stderr, "failed to ucp_worker_create (%s)\n", ucs_status_string(status)); + ret = -1; + } + + return ret; +} + +/** + * The callback on the server side which is invoked upon receiving a connection + * request from the client. + */ +static void server_conn_handle_cb(ucp_conn_request_h conn_request, void *arg) +{ + ucx_server_ctx_t *context = arg; + ucs_status_t status; + + if (context->conn_request == NULL) { + context->conn_request = conn_request; + } else { + /* The server is already handling a connection request from a client, + * reject this new one */ + printf("Rejecting a connection request. " + "Only one client at a time is supported.\n"); + status = ucp_listener_reject(context->listener, conn_request); + if (status != UCS_OK) { + fprintf(stderr, "server failed to reject a connection request: (%s)\n", + ucs_status_string(status)); + } + } +} + +static ucs_status_t server_create_ep(ucp_worker_h data_worker, + ucp_conn_request_h conn_request, + ucp_ep_h *server_ep) +{ + ucp_ep_params_t ep_params; + ucs_status_t status; + + /* Server creates an ep to the client on the data worker. + * This is not the worker the listener was created on. + * The client side should have initiated the connection, leading + * to this ep's creation */ + ep_params.field_mask = UCP_EP_PARAM_FIELD_ERR_HANDLER | + UCP_EP_PARAM_FIELD_CONN_REQUEST; + ep_params.conn_request = conn_request; + ep_params.err_handler.cb = err_cb; + ep_params.err_handler.arg = NULL; + + status = ucp_ep_create(data_worker, &ep_params, server_ep); + if (status != UCS_OK) { + fprintf(stderr, "failed to create an endpoint on the server: (%s)\n", + ucs_status_string(status)); + } + + return status; +} + +/** + * Initialize the server side. The server starts listening on the set address. + */ +static ucs_status_t start_server(ucp_worker_h ucp_worker, + ucx_server_ctx_t *context, + ucp_listener_h *listener_p, const char *ip) +{ + struct sockaddr_in listen_addr; + ucp_listener_params_t params; + ucp_listener_attr_t attr; + ucs_status_t status; + char ip_str[IP_STRING_LEN]; + char port_str[PORT_STRING_LEN]; + + set_listen_addr(ip, &listen_addr); + + params.field_mask = UCP_LISTENER_PARAM_FIELD_SOCK_ADDR | + UCP_LISTENER_PARAM_FIELD_CONN_HANDLER; + params.sockaddr.addr = (const struct sockaddr*)&listen_addr; + params.sockaddr.addrlen = sizeof(listen_addr); + params.conn_handler.cb = server_conn_handle_cb; + params.conn_handler.arg = context; + + /* Create a listener on the server side to listen on the given address.*/ + status = ucp_listener_create(ucp_worker, ¶ms, listener_p); + if (status != UCS_OK) { + fprintf(stderr, "failed to listen (%s)\n", ucs_status_string(status)); + goto out; + } + + /* Query the created listener to get the port it is listening on. */ + attr.field_mask = UCP_LISTENER_ATTR_FIELD_SOCKADDR; + status = ucp_listener_query(*listener_p, &attr); + if (status != UCS_OK) { + fprintf(stderr, "failed to query the listener (%s)\n", + ucs_status_string(status)); + ucp_listener_destroy(*listener_p); + goto out; + } + + fprintf(stderr, "server is listening on IP %s port %s\n", + sockaddr_get_ip_str(&attr.sockaddr, ip_str, IP_STRING_LEN), + sockaddr_get_port_str(&attr.sockaddr, port_str, PORT_STRING_LEN)); + + printf("Waiting for connection...\n"); + +out: + return status; +} + +static int client_server_do_work(ucp_worker_h ucp_worker, ucp_ep_h ep, + send_recv_type_t send_recv_type, int is_server) +{ + int i, ret = 0; + + for (i = 0; i < num_iterations; i++) { + ret = client_server_communication(ucp_worker, ep, send_recv_type, + is_server, i); + if (ret != 0) { + fprintf(stderr, "%s failed on iteration #%d\n", + (is_server ? "server": "client"), i); + goto out; + } + } + +out: + return ret; +} + +static int run_server(ucp_context_h ucp_context, ucp_worker_h ucp_worker, + char *listen_addr, send_recv_type_t send_recv_type) +{ + ucx_server_ctx_t context; + ucp_worker_h ucp_data_worker; + ucp_ep_h server_ep; + ucs_status_t status; + int ret; + + /* Create a data worker (to be used for data exchange between the server + * and the client after the connection between them was established) */ + ret = init_worker(ucp_context, &ucp_data_worker); + if (ret != 0) { + goto err; + } + + /* Initialize the server's context. */ + context.conn_request = NULL; + + /* Create a listener on the worker created at first. The 'connection + * worker' - used for connection establishment between client and server. + * This listener will stay open for listening to incoming connection + * requests from the client */ + status = start_server(ucp_worker, &context, &context.listener, listen_addr); + if (status != UCS_OK) { + ret = -1; + goto err_worker; + } + + /* Server is always up listening */ + while (1) { + /* Wait for the server to receive a connection request from the client. + * If there are multiple clients for which the server's connection request + * callback is invoked, i.e. several clients are trying to connect in + * parallel, the server will handle only the first one and reject the rest */ + while (context.conn_request == NULL) { + ucp_worker_progress(ucp_worker); + } + + /* Server creates an ep to the client on the data worker. + * This is not the worker the listener was created on. + * The client side should have initiated the connection, leading + * to this ep's creation */ + status = server_create_ep(ucp_data_worker, context.conn_request, + &server_ep); + if (status != UCS_OK) { + ret = -1; + goto err_listener; + } + + /* The server waits for all the iterations to complete before moving on + * to the next client */ + ret = client_server_do_work(ucp_data_worker, server_ep, send_recv_type, + 1); + if (ret != 0) { + goto err_ep; + } + + /* Close the endpoint to the client */ + ep_close(ucp_data_worker, server_ep); + + /* Reinitialize the server's context to be used for the next client */ + context.conn_request = NULL; + + printf("Waiting for connection...\n"); + } + +err_ep: + ep_close(ucp_data_worker, server_ep); +err_listener: + ucp_listener_destroy(context.listener); +err_worker: + ucp_worker_destroy(ucp_data_worker); +err: + return ret; +} + +static int run_client(ucp_worker_h ucp_worker, char *server_addr, + send_recv_type_t send_recv_type) +{ + ucp_ep_h client_ep; + ucs_status_t status; + int ret; + + status = start_client(ucp_worker, server_addr, &client_ep); + if (status != UCS_OK) { + fprintf(stderr, "failed to start client (%s)\n", ucs_status_string(status)); + ret = -1; + goto out; + } + + ret = client_server_do_work(ucp_worker, client_ep, send_recv_type, 0); + + /* Close the endpoint to the server */ + ep_close(ucp_worker, client_ep); + +out: + return ret; +} + +/** + * Initialize the UCP context and worker. + */ +static int init_context(ucp_context_h *ucp_context, ucp_worker_h *ucp_worker) +{ + /* UCP objects */ + ucp_params_t ucp_params; + ucs_status_t status; + int ret = 0; + + memset(&ucp_params, 0, sizeof(ucp_params)); + + /* UCP initialization */ + ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_REQUEST_SIZE | + UCP_PARAM_FIELD_REQUEST_INIT; + ucp_params.features = UCP_FEATURE_STREAM | UCP_FEATURE_TAG; + ucp_params.request_size = sizeof(test_req_t); + ucp_params.request_init = request_init; + + status = ucp_init(&ucp_params, NULL, ucp_context); + if (status != UCS_OK) { + fprintf(stderr, "failed to ucp_init (%s)\n", ucs_status_string(status)); + ret = -1; + goto err; + } + + ret = init_worker(*ucp_context, ucp_worker); + if (ret != 0) { + goto err_cleanup; + } + + return ret; + +err_cleanup: + ucp_cleanup(*ucp_context); +err: + return ret; +} + + +int main(int argc, char **argv) +{ + send_recv_type_t send_recv_type = CLIENT_SERVER_SEND_RECV_DEFAULT; + char *server_addr = NULL; + char *listen_addr = NULL; + int ret; + + /* UCP objects */ + ucp_context_h ucp_context; + ucp_worker_h ucp_worker; + + ret = parse_cmd(argc, argv, &server_addr, &listen_addr, &send_recv_type); + if (ret != 0) { + goto err; + } + + /* Initialize the UCX required objects */ + ret = init_context(&ucp_context, &ucp_worker); + if (ret != 0) { + goto err; + } + + /* Client-Server initialization */ + if (server_addr == NULL) { + /* Server side */ + ret = run_server(ucp_context, ucp_worker, listen_addr, send_recv_type); + } else { + /* Client side */ + ret = run_client(ucp_worker, server_addr, send_recv_type); + } + + ucp_worker_destroy(ucp_worker); + ucp_cleanup(ucp_context); +err: + return ret; +} diff --git a/test/examples/ucp_hello_world.c b/test/examples/ucp_hello_world.c new file mode 100644 index 0000000..e8ce1b8 --- /dev/null +++ b/test/examples/ucp_hello_world.c @@ -0,0 +1,634 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* Copyright (C) Advanced Micro Devices, Inc. 2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef HAVE_CONFIG_H +# define HAVE_CONFIG_H /* Force using config.h, so test would fail if header + actually tries to use it */ +#endif + +/* + * UCP hello world client / server example utility + * ----------------------------------------------- + * + * Server side: + * + * ./ucp_hello_world + * + * Client side: + * + * ./ucp_hello_world -n + * + * Notes: + * + * - Client acquires Server UCX address via TCP socket + * + * + * Author: + * + * Ilya Nelkenbaum + * Sergey Shalnov 7-June-2016 + */ + +#include "ucx_hello_world.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* getopt */ +#include /* isprint */ +#include /* pthread_self */ +#include /* errno */ +#include +#include /* raise */ + +struct msg { + uint64_t data_len; +}; + +struct ucx_context { + int completed; +}; + +enum ucp_test_mode_t { + TEST_MODE_PROBE, + TEST_MODE_WAIT, + TEST_MODE_EVENTFD +} ucp_test_mode = TEST_MODE_PROBE; + +static struct err_handling { + ucp_err_handling_mode_t ucp_err_mode; + int failure; +} err_handling_opt; + +static ucs_status_t client_status = UCS_OK; +static uint16_t server_port = 13337; +static long test_string_length = 16; +static const ucp_tag_t tag = 0x1337a880u; +static const ucp_tag_t tag_mask = UINT64_MAX; +static ucp_address_t *local_addr; +static ucp_address_t *peer_addr; + +static size_t local_addr_len; +static size_t peer_addr_len; + +static ucs_status_t parse_cmd(int argc, char * const argv[], char **server_name); + +static void set_msg_data_len(struct msg *msg, uint64_t data_len) +{ + mem_type_memcpy(&msg->data_len, &data_len, sizeof(data_len)); +} + +static void request_init(void *request) +{ + struct ucx_context *ctx = (struct ucx_context *) request; + ctx->completed = 0; +} + +static void send_handler(void *request, ucs_status_t status) +{ + struct ucx_context *context = (struct ucx_context *) request; + + context->completed = 1; + + printf("[0x%x] send handler called with status %d (%s)\n", + (unsigned int)pthread_self(), status, ucs_status_string(status)); +} + +static void failure_handler(void *arg, ucp_ep_h ep, ucs_status_t status) +{ + ucs_status_t *arg_status = (ucs_status_t *)arg; + + printf("[0x%x] failure handler called with status %d (%s)\n", + (unsigned int)pthread_self(), status, ucs_status_string(status)); + + *arg_status = status; +} + +static void recv_handler(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) +{ + struct ucx_context *context = (struct ucx_context *) request; + + context->completed = 1; + + printf("[0x%x] receive handler called with status %d (%s), length %lu\n", + (unsigned int)pthread_self(), status, ucs_status_string(status), + info->length); +} + +static void wait(ucp_worker_h ucp_worker, struct ucx_context *context) +{ + while (context->completed == 0) { + ucp_worker_progress(ucp_worker); + } +} + +static ucs_status_t test_poll_wait(ucp_worker_h ucp_worker) +{ + int err = 0; + ucs_status_t ret = UCS_ERR_NO_MESSAGE; + int epoll_fd_local = 0; + int epoll_fd = 0; + ucs_status_t status; + struct epoll_event ev; + ev.data.u64 = 0; + + status = ucp_worker_get_efd(ucp_worker, &epoll_fd); + CHKERR_JUMP(UCS_OK != status, "ucp_worker_get_efd", err); + + /* It is recommended to copy original fd */ + epoll_fd_local = epoll_create(1); + + ev.data.fd = epoll_fd; + ev.events = EPOLLIN; + err = epoll_ctl(epoll_fd_local, EPOLL_CTL_ADD, epoll_fd, &ev); + CHKERR_JUMP(err < 0, "add original socket to the new epoll\n", err_fd); + + /* Need to prepare ucp_worker before epoll_wait */ + status = ucp_worker_arm(ucp_worker); + if (status == UCS_ERR_BUSY) { /* some events are arrived already */ + ret = UCS_OK; + goto err_fd; + } + CHKERR_JUMP(status != UCS_OK, "ucp_worker_arm\n", err_fd); + + do { + err = epoll_wait(epoll_fd_local, &ev, 1, -1); + } while ((err == -1) && (errno == EINTR)); + + ret = UCS_OK; + +err_fd: + close(epoll_fd_local); + +err: + return ret; +} + +static int run_ucx_client(ucp_worker_h ucp_worker) +{ + ucp_tag_recv_info_t info_tag; + ucp_tag_message_h msg_tag; + ucs_status_t status; + ucp_ep_h server_ep; + ucp_ep_params_t ep_params; + struct msg *msg = 0; + struct ucx_context *request = 0; + size_t msg_len = 0; + int ret = -1; + char *str; + + /* Send client UCX address to server */ + ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | + UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE; + ep_params.address = peer_addr; + ep_params.err_mode = err_handling_opt.ucp_err_mode; + + status = ucp_ep_create(ucp_worker, &ep_params, &server_ep); + CHKERR_JUMP(status != UCS_OK, "ucp_ep_create\n", err); + + msg_len = sizeof(*msg) + local_addr_len; + msg = malloc(msg_len); + CHKERR_JUMP(msg == NULL, "allocate memory\n", err_ep); + memset(msg, 0, msg_len); + + msg->data_len = local_addr_len; + memcpy(msg + 1, local_addr, local_addr_len); + + request = ucp_tag_send_nb(server_ep, msg, msg_len, + ucp_dt_make_contig(1), tag, + send_handler); + if (UCS_PTR_IS_ERR(request)) { + fprintf(stderr, "unable to send UCX address message\n"); + free(msg); + goto err_ep; + } else if (UCS_PTR_IS_PTR(request)) { + wait(ucp_worker, request); + request->completed = 0; /* Reset request state before recycling it */ + ucp_request_release(request); + } + + free(msg); + + if (err_handling_opt.failure) { + fprintf(stderr, "Emulating unexpected failure on client side\n"); + raise(SIGKILL); + } + + /* Receive test string from server */ + for (;;) { + + /* Probing incoming events in non-block mode */ + msg_tag = ucp_tag_probe_nb(ucp_worker, tag, tag_mask, 1, &info_tag); + if (msg_tag != NULL) { + /* Message arrived */ + break; + } else if (ucp_worker_progress(ucp_worker)) { + /* Some events were polled; try again without going to sleep */ + continue; + } + + /* If we got here, ucp_worker_progress() returned 0, so we can sleep. + * Following blocked methods used to polling internal file descriptor + * to make CPU idle and don't spin loop + */ + if (ucp_test_mode == TEST_MODE_WAIT) { + /* Polling incoming events*/ + status = ucp_worker_wait(ucp_worker); + CHKERR_JUMP(status != UCS_OK, "ucp_worker_wait\n", err_ep); + } else if (ucp_test_mode == TEST_MODE_EVENTFD) { + status = test_poll_wait(ucp_worker); + CHKERR_JUMP(status != UCS_OK, "test_poll_wait\n", err_ep); + } + } + + msg = mem_type_malloc(info_tag.length); + CHKERR_JUMP(msg == NULL, "allocate memory\n", err_ep); + + request = ucp_tag_msg_recv_nb(ucp_worker, msg, info_tag.length, + ucp_dt_make_contig(1), msg_tag, + recv_handler); + + if (UCS_PTR_IS_ERR(request)) { + fprintf(stderr, "unable to receive UCX data message (%u)\n", + UCS_PTR_STATUS(request)); + free(msg); + goto err_ep; + } else { + /* ucp_tag_msg_recv_nb() cannot return NULL */ + assert(UCS_PTR_IS_PTR(request)); + wait(ucp_worker, request); + request->completed = 0; + ucp_request_release(request); + printf("UCX data message was received\n"); + } + + str = calloc(1, test_string_length); + if (str != NULL) { + mem_type_memcpy(str, msg + 1, test_string_length); + printf("\n\n----- UCP TEST SUCCESS ----\n\n"); + printf("%s", str); + printf("\n\n---------------------------\n\n"); + free(str); + } else { + fprintf(stderr, "Memory allocation failed\n"); + goto err_ep; + } + + mem_type_free(msg); + + ret = 0; + +err_ep: + ucp_ep_destroy(server_ep); + +err: + return ret; +} + +static void flush_callback(void *request, ucs_status_t status) +{ +} + +static ucs_status_t flush_ep(ucp_worker_h worker, ucp_ep_h ep) +{ + void *request; + + request = ucp_ep_flush_nb(ep, 0, flush_callback); + if (request == NULL) { + return UCS_OK; + } else if (UCS_PTR_IS_ERR(request)) { + return UCS_PTR_STATUS(request); + } else { + ucs_status_t status; + do { + ucp_worker_progress(worker); + status = ucp_request_check_status(request); + } while (status == UCS_INPROGRESS); + ucp_request_release(request); + return status; + } +} + +static int run_ucx_server(ucp_worker_h ucp_worker) +{ + ucp_tag_recv_info_t info_tag; + ucp_tag_message_h msg_tag; + ucs_status_t status; + ucp_ep_h client_ep; + ucp_ep_params_t ep_params; + struct msg *msg = 0; + struct ucx_context *request = 0; + size_t msg_len = 0; + int ret; + + /* Receive client UCX address */ + do { + /* Progressing before probe to update the state */ + ucp_worker_progress(ucp_worker); + + /* Probing incoming events in non-block mode */ + msg_tag = ucp_tag_probe_nb(ucp_worker, tag, tag_mask, 1, &info_tag); + } while (msg_tag == NULL); + + msg = malloc(info_tag.length); + CHKERR_ACTION(msg == NULL, "allocate memory\n", ret = -1; goto err); + request = ucp_tag_msg_recv_nb(ucp_worker, msg, info_tag.length, + ucp_dt_make_contig(1), msg_tag, recv_handler); + + if (UCS_PTR_IS_ERR(request)) { + fprintf(stderr, "unable to receive UCX address message (%s)\n", + ucs_status_string(UCS_PTR_STATUS(request))); + free(msg); + ret = -1; + goto err; + } else { + /* ucp_tag_msg_recv_nb() cannot return NULL */ + assert(UCS_PTR_IS_PTR(request)); + wait(ucp_worker, request); + request->completed = 0; + ucp_request_release(request); + printf("UCX address message was received\n"); + } + + peer_addr_len = msg->data_len; + peer_addr = malloc(peer_addr_len); + if (peer_addr == NULL) { + fprintf(stderr, "unable to allocate memory for peer address\n"); + free(msg); + ret = -1; + goto err; + } + + memcpy(peer_addr, msg + 1, peer_addr_len); + + free(msg); + + /* Send test string to client */ + ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | + UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | + UCP_EP_PARAM_FIELD_ERR_HANDLER | + UCP_EP_PARAM_FIELD_USER_DATA; + ep_params.address = peer_addr; + ep_params.err_mode = err_handling_opt.ucp_err_mode; + ep_params.err_handler.cb = failure_handler; + ep_params.err_handler.arg = NULL; + ep_params.user_data = &client_status; + + status = ucp_ep_create(ucp_worker, &ep_params, &client_ep); + CHKERR_ACTION(status != UCS_OK, "ucp_ep_create\n", ret = -1; goto err); + + msg_len = sizeof(*msg) + test_string_length; + msg = mem_type_malloc(msg_len); + CHKERR_ACTION(msg == NULL, "allocate memory\n", ret = -1; goto err_ep); + mem_type_memset(msg, 0, msg_len); + + set_msg_data_len(msg, msg_len - sizeof(*msg)); + ret = generate_test_string((char *)(msg + 1), test_string_length); + CHKERR_JUMP(ret < 0, "generate test string", err_free_mem_type_msg); + + request = ucp_tag_send_nb(client_ep, msg, msg_len, + ucp_dt_make_contig(1), tag, + send_handler); + if (UCS_PTR_IS_ERR(request)) { + fprintf(stderr, "unable to send UCX data message\n"); + ret = -1; + goto err_free_mem_type_msg; + } else if (UCS_PTR_IS_PTR(request)) { + printf("UCX data message was scheduled for send\n"); + wait(ucp_worker, request); + request->completed = 0; + ucp_request_release(request); + } + + status = flush_ep(ucp_worker, client_ep); + printf("flush_ep completed with status %d (%s)\n", + status, ucs_status_string(status)); + + ret = 0; + +err_free_mem_type_msg: + mem_type_free(msg); +err_ep: + ucp_ep_destroy(client_ep); +err: + return ret; +} + +static int run_test(const char *client_target_name, ucp_worker_h ucp_worker) +{ + if (client_target_name != NULL) { + return run_ucx_client(ucp_worker); + } else { + return run_ucx_server(ucp_worker); + } +} + +int main(int argc, char **argv) +{ + /* UCP temporary vars */ + ucp_params_t ucp_params; + ucp_worker_params_t worker_params; + ucp_config_t *config; + ucs_status_t status; + + /* UCP handler objects */ + ucp_context_h ucp_context; + ucp_worker_h ucp_worker; + + /* OOB connection vars */ + uint64_t addr_len = 0; + char *client_target_name = NULL; + int oob_sock = -1; + int ret = -1; + + memset(&ucp_params, 0, sizeof(ucp_params)); + memset(&worker_params, 0, sizeof(worker_params)); + + /* Parse the command line */ + status = parse_cmd(argc, argv, &client_target_name); + CHKERR_JUMP(status != UCS_OK, "parse_cmd\n", err); + + /* UCP initialization */ + status = ucp_config_read(NULL, NULL, &config); + CHKERR_JUMP(status != UCS_OK, "ucp_config_read\n", err); + + ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_REQUEST_SIZE | + UCP_PARAM_FIELD_REQUEST_INIT; + ucp_params.features = UCP_FEATURE_TAG; + if (ucp_test_mode == TEST_MODE_WAIT || ucp_test_mode == TEST_MODE_EVENTFD) { + ucp_params.features |= UCP_FEATURE_WAKEUP; + } + ucp_params.request_size = sizeof(struct ucx_context); + ucp_params.request_init = request_init; + + status = ucp_init(&ucp_params, config, &ucp_context); + + ucp_config_print(config, stdout, NULL, UCS_CONFIG_PRINT_CONFIG); + + ucp_config_release(config); + CHKERR_JUMP(status != UCS_OK, "ucp_init\n", err); + + worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; + + status = ucp_worker_create(ucp_context, &worker_params, &ucp_worker); + CHKERR_JUMP(status != UCS_OK, "ucp_worker_create\n", err_cleanup); + + status = ucp_worker_get_address(ucp_worker, &local_addr, &local_addr_len); + CHKERR_JUMP(status != UCS_OK, "ucp_worker_get_address\n", err_worker); + + printf("[0x%x] local address length: %lu\n", + (unsigned int)pthread_self(), local_addr_len); + + /* OOB connection establishment */ + if (client_target_name) { + peer_addr_len = local_addr_len; + + oob_sock = client_connect(client_target_name, server_port); + CHKERR_JUMP(oob_sock < 0, "client_connect\n", err_addr); + + ret = recv(oob_sock, &addr_len, sizeof(addr_len), MSG_WAITALL); + CHKERR_JUMP_RETVAL(ret != (int)sizeof(addr_len), + "receive address length\n", err_addr, ret); + + peer_addr_len = addr_len; + peer_addr = malloc(peer_addr_len); + CHKERR_JUMP(!peer_addr, "allocate memory\n", err_addr); + + ret = recv(oob_sock, peer_addr, peer_addr_len, MSG_WAITALL); + CHKERR_JUMP_RETVAL(ret != (int)peer_addr_len, + "receive address\n", err_peer_addr, ret); + } else { + oob_sock = server_connect(server_port); + CHKERR_JUMP(oob_sock < 0, "server_connect\n", err_peer_addr); + + addr_len = local_addr_len; + ret = send(oob_sock, &addr_len, sizeof(addr_len), 0); + CHKERR_JUMP_RETVAL(ret != (int)sizeof(addr_len), + "send address length\n", err_peer_addr, ret); + + ret = send(oob_sock, local_addr, local_addr_len, 0); + CHKERR_JUMP_RETVAL(ret != (int)local_addr_len, "send address\n", + err_peer_addr, ret); + } + + ret = run_test(client_target_name, ucp_worker); + + if (!ret && !err_handling_opt.failure) { + /* Make sure remote is disconnected before destroying local worker */ + ret = barrier(oob_sock); + } + close(oob_sock); + +err_peer_addr: + free(peer_addr); + +err_addr: + ucp_worker_release_address(ucp_worker, local_addr); + +err_worker: + ucp_worker_destroy(ucp_worker); + +err_cleanup: + ucp_cleanup(ucp_context); + +err: + return ret; +} + +ucs_status_t parse_cmd(int argc, char * const argv[], char **server_name) +{ + int c = 0, index = 0; + opterr = 0; + + err_handling_opt.ucp_err_mode = UCP_ERR_HANDLING_MODE_NONE; + err_handling_opt.failure = 0; + + while ((c = getopt(argc, argv, "wfben:p:s:m:h")) != -1) { + switch (c) { + case 'w': + ucp_test_mode = TEST_MODE_WAIT; + break; + case 'f': + ucp_test_mode = TEST_MODE_EVENTFD; + break; + case 'b': + ucp_test_mode = TEST_MODE_PROBE; + break; + case 'e': + err_handling_opt.ucp_err_mode = UCP_ERR_HANDLING_MODE_PEER; + err_handling_opt.failure = 1; + break; + case 'n': + *server_name = optarg; + break; + case 'p': + server_port = atoi(optarg); + if (server_port <= 0) { + fprintf(stderr, "Wrong server port number %d\n", server_port); + return UCS_ERR_UNSUPPORTED; + } + break; + case 's': + test_string_length = atol(optarg); + if (test_string_length <= 0) { + fprintf(stderr, "Wrong string size %ld\n", test_string_length); + return UCS_ERR_UNSUPPORTED; + } + break; + case 'm': + test_mem_type = parse_mem_type(optarg); + if (test_mem_type == UCS_MEMORY_TYPE_LAST) { + return UCS_ERR_UNSUPPORTED; + } + break; + case '?': + if (optopt == 's') { + fprintf(stderr, "Option -%c requires an argument.\n", optopt); + } else if (isprint (optopt)) { + fprintf(stderr, "Unknown option `-%c'.\n", optopt); + } else { + fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt); + } + /* Fall through */ + case 'h': + default: + fprintf(stderr, "Usage: ucp_hello_world [parameters]\n"); + fprintf(stderr, "UCP hello world client/server example utility\n"); + fprintf(stderr, "\nParameters are:\n"); + fprintf(stderr, " -w Select test mode \"wait\" to test " + "ucp_worker_wait function\n"); + fprintf(stderr, " -f Select test mode \"event fd\" to test " + "ucp_worker_get_efd function with later poll\n"); + fprintf(stderr, " -b Select test mode \"busy polling\" to test " + "ucp_tag_probe_nb and ucp_worker_progress (default)\n"); + fprintf(stderr, " -e Emulate unexpected failure on server side" + "and handle an error on client side with enabled " + "UCP_ERR_HANDLING_MODE_PEER\n"); + print_common_help(); + fprintf(stderr, "\n"); + return UCS_ERR_UNSUPPORTED; + } + } + fprintf(stderr, "INFO: UCP_HELLO_WORLD mode = %d server = %s port = %d\n", + ucp_test_mode, *server_name, server_port); + + for (index = optind; index < argc; index++) { + fprintf(stderr, "WARNING: Non-option argument %s\n", argv[index]); + } + return UCS_OK; +} diff --git a/test/examples/uct_hello_world.c b/test/examples/uct_hello_world.c new file mode 100644 index 0000000..73fa383 --- /dev/null +++ b/test/examples/uct_hello_world.c @@ -0,0 +1,759 @@ +/** +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) Mellanox Technologies Ltd. 2015-2019. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "ucx_hello_world.h" +#include + +#include + +#include +#include + +typedef enum { + FUNC_AM_SHORT, + FUNC_AM_BCOPY, + FUNC_AM_ZCOPY +} func_am_t; + +typedef struct { + int is_uct_desc; +} recv_desc_t; + +typedef struct { + char *server_name; + uint16_t server_port; + func_am_t func_am_type; + const char *dev_name; + const char *tl_name; + long test_strlen; +} cmd_args_t; + +typedef struct { + uct_iface_attr_t iface_attr; /* Interface attributes: capabilities and limitations */ + uct_iface_h iface; /* Communication interface context */ + uct_md_attr_t md_attr; /* Memory domain attributes: capabilities and limitations */ + uct_md_h md; /* Memory domain */ + uct_worker_h worker; /* Workers represent allocated resources in a communication thread */ +} iface_info_t; + +/* Helper data type for am_short */ +typedef struct { + uint64_t header; + char *payload; + size_t len; +} am_short_args_t; + +/* Helper data type for am_bcopy */ +typedef struct { + char *data; + size_t len; +} am_bcopy_args_t; + +/* Helper data type for am_zcopy */ +typedef struct { + uct_completion_t uct_comp; + uct_md_h md; + uct_mem_h memh; +} zcopy_comp_t; + +static void* desc_holder = NULL; + +static char *func_am_t_str(func_am_t func_am_type) +{ + switch (func_am_type) { + case FUNC_AM_SHORT: + return "uct_ep_am_short"; + case FUNC_AM_BCOPY: + return "uct_ep_am_bcopy"; + case FUNC_AM_ZCOPY: + return "uct_ep_am_zcopy"; + } + return NULL; +} + +static size_t func_am_max_size(func_am_t func_am_type, + const uct_iface_attr_t *attr) +{ + switch (func_am_type) { + case FUNC_AM_SHORT: + return attr->cap.am.max_short; + case FUNC_AM_BCOPY: + return attr->cap.am.max_bcopy; + case FUNC_AM_ZCOPY: + return attr->cap.am.max_zcopy; + } + return 0; +} + +/* Helper function for am_short */ +void am_short_params_pack(char *buf, size_t len, am_short_args_t *args) +{ + args->header = *(uint64_t *)buf; + if (len > sizeof(args->header)) { + args->payload = (buf + sizeof(args->header)); + args->len = len - sizeof(args->header); + } else { + args->payload = NULL; + args->len = 0; + } +} + +ucs_status_t do_am_short(iface_info_t *if_info, uct_ep_h ep, uint8_t id, + const cmd_args_t *cmd_args, char *buf) +{ + ucs_status_t status; + am_short_args_t send_args; + + am_short_params_pack(buf, cmd_args->test_strlen, &send_args); + + do { + /* Send active message to remote endpoint */ + status = uct_ep_am_short(ep, id, send_args.header, send_args.payload, + send_args.len); + uct_worker_progress(if_info->worker); + } while (status == UCS_ERR_NO_RESOURCE); + + return status; +} + +/* Pack callback for am_bcopy */ +size_t am_bcopy_data_pack_cb(void *dest, void *arg) +{ + am_bcopy_args_t *bc_args = arg; + mem_type_memcpy(dest, bc_args->data, bc_args->len); + return bc_args->len; +} + +ucs_status_t do_am_bcopy(iface_info_t *if_info, uct_ep_h ep, uint8_t id, + const cmd_args_t *cmd_args, char *buf) +{ + am_bcopy_args_t args; + ssize_t len; + + args.data = buf; + args.len = cmd_args->test_strlen; + + /* Send active message to remote endpoint */ + do { + len = uct_ep_am_bcopy(ep, id, am_bcopy_data_pack_cb, &args, 0); + uct_worker_progress(if_info->worker); + } while (len == UCS_ERR_NO_RESOURCE); + /* Negative len is an error code */ + return (len >= 0) ? UCS_OK : (ucs_status_t)len; +} + +/* Completion callback for am_zcopy */ +void zcopy_completion_cb(uct_completion_t *self, ucs_status_t status) +{ + zcopy_comp_t *comp = (zcopy_comp_t *)self; + assert((comp->uct_comp.count == 0) && (status == UCS_OK)); + if (comp->memh != UCT_MEM_HANDLE_NULL) { + uct_md_mem_dereg(comp->md, comp->memh); + } + desc_holder = (void *)0xDEADBEEF; +} + +ucs_status_t do_am_zcopy(iface_info_t *if_info, uct_ep_h ep, uint8_t id, + const cmd_args_t *cmd_args, char *buf) +{ + ucs_status_t status = UCS_OK; + uct_mem_h memh; + uct_iov_t iov; + zcopy_comp_t comp; + + if (if_info->md_attr.cap.flags & UCT_MD_FLAG_NEED_MEMH) { + status = uct_md_mem_reg(if_info->md, buf, cmd_args->test_strlen, + UCT_MD_MEM_ACCESS_RMA, &memh); + } else { + memh = UCT_MEM_HANDLE_NULL; + } + + iov.buffer = buf; + iov.length = cmd_args->test_strlen; + iov.memh = memh; + iov.stride = 0; + iov.count = 1; + + comp.uct_comp.func = zcopy_completion_cb; + comp.uct_comp.count = 1; + comp.md = if_info->md; + comp.memh = memh; + + if (status == UCS_OK) { + do { + status = uct_ep_am_zcopy(ep, id, NULL, 0, &iov, 1, 0, + (uct_completion_t *)&comp); + uct_worker_progress(if_info->worker); + } while (status == UCS_ERR_NO_RESOURCE); + + if (status == UCS_INPROGRESS) { + while (!desc_holder) { + /* Explicitly progress outstanding active message request */ + uct_worker_progress(if_info->worker); + } + status = UCS_OK; + } + } + return status; +} +static void print_strings(const char *label, const char *local_str, + const char *remote_str, size_t length) +{ + fprintf(stdout, "\n\n----- UCT TEST SUCCESS ----\n\n"); + fprintf(stdout, "[%s] %s sent %s", label, local_str, remote_str); + fprintf(stdout, "\n\n---------------------------\n"); + fflush(stdout); +} + +/* Callback to handle receive active message */ +static ucs_status_t hello_world(void *arg, void *data, size_t length, + unsigned flags) +{ + func_am_t func_am_type = *(func_am_t *)arg; + recv_desc_t *rdesc; + + print_strings("callback", func_am_t_str(func_am_type), data, length); + + if (flags & UCT_CB_PARAM_FLAG_DESC) { + rdesc = (recv_desc_t *)data - 1; + /* Hold descriptor to release later and return UCS_INPROGRESS */ + rdesc->is_uct_desc = 1; + desc_holder = rdesc; + return UCS_INPROGRESS; + } + + /* We need to copy-out data and return UCS_OK if want to use the data + * outside the callback */ + rdesc = malloc(sizeof(*rdesc) + length); + CHKERR_ACTION(rdesc == NULL, "allocate memory\n", return UCS_ERR_NO_MEMORY); + rdesc->is_uct_desc = 0; + memcpy(rdesc + 1, data, length); + desc_holder = rdesc; + return UCS_OK; +} + +/* Init the transport by its name */ +static ucs_status_t init_iface(char *dev_name, char *tl_name, + func_am_t func_am_type, + iface_info_t *iface_p) +{ + ucs_status_t status; + uct_iface_config_t *config; /* Defines interface configuration options */ + uct_iface_params_t params; + + params.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_DEVICE | + UCT_IFACE_PARAM_FIELD_STATS_ROOT | + UCT_IFACE_PARAM_FIELD_RX_HEADROOM | + UCT_IFACE_PARAM_FIELD_CPU_MASK; + params.open_mode = UCT_IFACE_OPEN_MODE_DEVICE; + params.mode.device.tl_name = tl_name; + params.mode.device.dev_name = dev_name; + params.stats_root = NULL; + params.rx_headroom = sizeof(recv_desc_t); + + UCS_CPU_ZERO(¶ms.cpu_mask); + /* Read transport-specific interface configuration */ + status = uct_md_iface_config_read(iface_p->md, tl_name, NULL, NULL, &config); + CHKERR_JUMP(UCS_OK != status, "setup iface_config", error_ret); + + /* Open communication interface */ + assert(iface_p->iface == NULL); + status = uct_iface_open(iface_p->md, iface_p->worker, ¶ms, config, + &iface_p->iface); + uct_config_release(config); + CHKERR_JUMP(UCS_OK != status, "open temporary interface", error_ret); + + /* Enable progress on the interface */ + uct_iface_progress_enable(iface_p->iface, + UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + + /* Get interface attributes */ + status = uct_iface_query(iface_p->iface, &iface_p->iface_attr); + CHKERR_JUMP(UCS_OK != status, "query iface", error_iface); + + /* Check if current device and transport support required active messages */ + if ((func_am_type == FUNC_AM_SHORT) && + (iface_p->iface_attr.cap.flags & UCT_IFACE_FLAG_AM_SHORT)) { + if (test_mem_type != UCS_MEMORY_TYPE_CUDA) { + return UCS_OK; + } else { + fprintf(stderr, "AM short protocol doesn't support CUDA memory"); + } + } + + if ((func_am_type == FUNC_AM_BCOPY) && + (iface_p->iface_attr.cap.flags & UCT_IFACE_FLAG_AM_BCOPY)) { + return UCS_OK; + } + + if ((func_am_type == FUNC_AM_ZCOPY) && + (iface_p->iface_attr.cap.flags & UCT_IFACE_FLAG_AM_ZCOPY)) { + return UCS_OK; + } + +error_iface: + uct_iface_close(iface_p->iface); + iface_p->iface = NULL; +error_ret: + return UCS_ERR_UNSUPPORTED; +} + +/* Device and transport to be used are determined by minimum latency */ +static ucs_status_t dev_tl_lookup(const cmd_args_t *cmd_args, + iface_info_t *iface_p) +{ + uct_tl_resource_desc_t *tl_resources = NULL; /* Communication resource descriptor */ + unsigned num_tl_resources = 0; /* Number of transport resources resource objects created */ + uct_component_h *components; + unsigned num_components; + unsigned cmpt_index; + uct_component_attr_t component_attr; + unsigned md_index; + unsigned tl_index; + uct_md_config_t *md_config; + ucs_status_t status; + + status = uct_query_components(&components, &num_components); + CHKERR_JUMP(UCS_OK != status, "query for components", error_ret); + + for (cmpt_index = 0; cmpt_index < num_components; ++cmpt_index) { + + component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT; + status = uct_component_query(components[cmpt_index], &component_attr); + CHKERR_JUMP(UCS_OK != status, "query component attributes", + release_component_list); + + component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES; + component_attr.md_resources = alloca(sizeof(*component_attr.md_resources) * + component_attr.md_resource_count); + status = uct_component_query(components[cmpt_index], &component_attr); + CHKERR_JUMP(UCS_OK != status, "query for memory domain resources", + release_component_list); + + iface_p->iface = NULL; + + /* Iterate through memory domain resources */ + for (md_index = 0; md_index < component_attr.md_resource_count; ++md_index) { + status = uct_md_config_read(components[cmpt_index], NULL, NULL, + &md_config); + CHKERR_JUMP(UCS_OK != status, "read MD config", + release_component_list); + + status = uct_md_open(components[cmpt_index], + component_attr.md_resources[md_index].md_name, + md_config, &iface_p->md); + uct_config_release(md_config); + CHKERR_JUMP(UCS_OK != status, "open memory domains", + release_component_list); + + status = uct_md_query(iface_p->md, &iface_p->md_attr); + CHKERR_JUMP(UCS_OK != status, "query iface", + close_md); + + status = uct_md_query_tl_resources(iface_p->md, &tl_resources, + &num_tl_resources); + CHKERR_JUMP(UCS_OK != status, "query transport resources", close_md); + + /* Go through each available transport and find the proper name */ + for (tl_index = 0; tl_index < num_tl_resources; ++tl_index) { + if (!strcmp(cmd_args->dev_name, tl_resources[tl_index].dev_name) && + !strcmp(cmd_args->tl_name, tl_resources[tl_index].tl_name)) { + if (!(iface_p->md_attr.cap.reg_mem_types & UCS_BIT(test_mem_type))) { + fprintf(stderr, "Unsupported memory type %s by " + UCT_TL_RESOURCE_DESC_FMT" on %s MD\n", + ucs_memory_type_names[test_mem_type], + UCT_TL_RESOURCE_DESC_ARG(&tl_resources[tl_index]), + component_attr.md_resources[md_index].md_name); + status = UCS_ERR_UNSUPPORTED; + break; + } + + status = init_iface(tl_resources[tl_index].dev_name, + tl_resources[tl_index].tl_name, + cmd_args->func_am_type, iface_p); + if (status != UCS_OK) { + break; + } + + fprintf(stdout, "Using "UCT_TL_RESOURCE_DESC_FMT"\n", + UCT_TL_RESOURCE_DESC_ARG(&tl_resources[tl_index])); + goto release_tl_resources; + } + } + +release_tl_resources: + uct_release_tl_resource_list(tl_resources); + if ((status == UCS_OK) && + (tl_index < num_tl_resources)) { + goto release_component_list; + } + + tl_resources = NULL; + num_tl_resources = 0; + uct_md_close(iface_p->md); + } + } + + fprintf(stderr, "No supported (dev/tl) found (%s/%s)\n", + cmd_args->dev_name, cmd_args->tl_name); + status = UCS_ERR_UNSUPPORTED; + +release_component_list: + uct_release_component_list(components); +error_ret: + return status; +close_md: + uct_md_close(iface_p->md); + goto release_component_list; +} + +int print_err_usage() +{ + const char func_template[] = " -%c Select \"%s\" function to send the message%s\n"; + + fprintf(stderr, "Usage: uct_hello_world [parameters]\n"); + fprintf(stderr, "UCT hello world client/server example utility\n"); + fprintf(stderr, "\nParameters are:\n"); + fprintf(stderr, func_template, 'i', func_am_t_str(FUNC_AM_SHORT), " (default)"); + fprintf(stderr, func_template, 'b', func_am_t_str(FUNC_AM_BCOPY), ""); + fprintf(stderr, func_template, 'z', func_am_t_str(FUNC_AM_ZCOPY), ""); + fprintf(stderr, " -d Select device name\n"); + fprintf(stderr, " -t Select transport layer\n"); + print_common_help(); + fprintf(stderr, "\nExample:\n"); + fprintf(stderr, " Server: uct_hello_world -d eth0 -t tcp\n"); + fprintf(stderr, " Client: uct_hello_world -d eth0 -t tcp -n localhost\n"); + + return UCS_ERR_UNSUPPORTED; +} + +int parse_cmd(int argc, char * const argv[], cmd_args_t *args) +{ + int c = 0, index = 0; + + assert(args); + memset(args, 0, sizeof(*args)); + + /* Defaults */ + args->server_port = 13337; + args->func_am_type = FUNC_AM_SHORT; + args->test_strlen = 16; + + opterr = 0; + while ((c = getopt(argc, argv, "ibzd:t:n:p:s:m:h")) != -1) { + switch (c) { + case 'i': + args->func_am_type = FUNC_AM_SHORT; + break; + case 'b': + args->func_am_type = FUNC_AM_BCOPY; + break; + case 'z': + args->func_am_type = FUNC_AM_ZCOPY; + break; + case 'd': + args->dev_name = optarg; + break; + case 't': + args->tl_name = optarg; + break; + case 'n': + args->server_name = optarg; + break; + case 'p': + args->server_port = atoi(optarg); + if (args->server_port <= 0) { + fprintf(stderr, "Wrong server port number %d\n", + args->server_port); + return UCS_ERR_UNSUPPORTED; + } + break; + case 's': + args->test_strlen = atol(optarg); + if (args->test_strlen <= 0) { + fprintf(stderr, "Wrong string size %ld\n", args->test_strlen); + return UCS_ERR_UNSUPPORTED; + } + break; + case 'm': + test_mem_type = parse_mem_type(optarg); + if (test_mem_type == UCS_MEMORY_TYPE_LAST) { + return UCS_ERR_UNSUPPORTED; + } + break; + case '?': + if (optopt == 's') { + fprintf(stderr, "Option -%c requires an argument.\n", optopt); + } else if (isprint (optopt)) { + fprintf(stderr, "Unknown option `-%c'.\n", optopt); + } else { + fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt); + } + case 'h': + default: + return print_err_usage(); + } + } + fprintf(stderr, "INFO: UCT_HELLO_WORLD AM function = %s server = %s port = %d\n", + func_am_t_str(args->func_am_type), args->server_name, + args->server_port); + + for (index = optind; index < argc; index++) { + fprintf(stderr, "WARNING: Non-option argument %s\n", argv[index]); + } + + if (args->dev_name == NULL) { + fprintf(stderr, "WARNING: device is not set\n"); + return print_err_usage(); + } + + if (args->tl_name == NULL) { + fprintf(stderr, "WARNING: transport layer is not set\n"); + return print_err_usage(); + } + + return UCS_OK; +} + +/* The caller is responsible to free *rbuf */ +int sendrecv(int sock, const void *sbuf, size_t slen, void **rbuf) +{ + int ret = 0; + size_t rlen = 0; + *rbuf = NULL; + + ret = send(sock, &slen, sizeof(slen), 0); + if ((ret < 0) || (ret != sizeof(slen))) { + fprintf(stderr, "failed to send buffer length\n"); + return -1; + } + + ret = send(sock, sbuf, slen, 0); + if (ret != (int)slen) { + fprintf(stderr, "failed to send buffer, return value %d\n", ret); + return -1; + } + + ret = recv(sock, &rlen, sizeof(rlen), MSG_WAITALL); + if ((ret != sizeof(rlen)) || (rlen > (SIZE_MAX / 2))) { + fprintf(stderr, + "failed to receive device address length, return value %d\n", + ret); + return -1; + } + + *rbuf = calloc(1, rlen); + if (!*rbuf) { + fprintf(stderr, "failed to allocate receive buffer\n"); + return -1; + } + + ret = recv(sock, *rbuf, rlen, MSG_WAITALL); + if (ret != (int)rlen) { + fprintf(stderr, "failed to receive device address, return value %d\n", + ret); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + uct_device_addr_t *peer_dev = NULL; + uct_iface_addr_t *peer_iface = NULL; + uct_ep_addr_t *own_ep = NULL; + uct_ep_addr_t *peer_ep = NULL; + uint8_t id = 0; + int oob_sock = -1; /* OOB connection socket */ + ucs_status_t status = UCS_OK; /* status codes for UCS */ + uct_device_addr_t *own_dev; + uct_iface_addr_t *own_iface; + uct_ep_h ep; /* Remote endpoint */ + ucs_async_context_t *async; /* Async event context manages + times and fd notifications */ + cmd_args_t cmd_args; + iface_info_t if_info; + uct_ep_params_t ep_params; + int res; + + /* Parse the command line */ + if (parse_cmd(argc, argv, &cmd_args)) { + status = UCS_ERR_INVALID_PARAM; + goto out; + } + + /* Initialize context + * It is better to use different contexts for different workers */ + status = ucs_async_context_create(UCS_ASYNC_MODE_THREAD_SPINLOCK, &async); + CHKERR_JUMP(UCS_OK != status, "init async context", out); + + /* Create a worker object */ + status = uct_worker_create(async, UCS_THREAD_MODE_SINGLE, &if_info.worker); + CHKERR_JUMP(UCS_OK != status, "create worker", out_cleanup_async); + + /* Search for the desired transport */ + status = dev_tl_lookup(&cmd_args, &if_info); + CHKERR_JUMP(UCS_OK != status, "find supported device and transport", + out_destroy_worker); + + own_dev = (uct_device_addr_t*)calloc(1, if_info.iface_attr.device_addr_len); + CHKERR_JUMP(NULL == own_dev, "allocate memory for dev addr", + out_destroy_iface); + + own_iface = (uct_iface_addr_t*)calloc(1, if_info.iface_attr.iface_addr_len); + CHKERR_JUMP(NULL == own_iface, "allocate memory for if addr", + out_free_dev_addrs); + + /* Get device address */ + status = uct_iface_get_device_address(if_info.iface, own_dev); + CHKERR_JUMP(UCS_OK != status, "get device address", out_free_if_addrs); + + if (cmd_args.server_name) { + oob_sock = client_connect(cmd_args.server_name, cmd_args.server_port); + } else { + oob_sock = server_connect(cmd_args.server_port); + } + CHKERR_ACTION(oob_sock < 0, "OOB connect", + status = UCS_ERR_IO_ERROR; goto out_close_oob_sock); + + res = sendrecv(oob_sock, own_dev, if_info.iface_attr.device_addr_len, + (void **)&peer_dev); + CHKERR_ACTION(0 != res, "device exchange", + status = UCS_ERR_NO_MESSAGE; goto out_close_oob_sock); + + status = (ucs_status_t)uct_iface_is_reachable(if_info.iface, peer_dev, NULL); + CHKERR_JUMP(0 == status, "reach the peer", out_close_oob_sock); + + /* Get interface address */ + if (if_info.iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + status = uct_iface_get_address(if_info.iface, own_iface); + CHKERR_JUMP(UCS_OK != status, "get interface address", + out_close_oob_sock); + + status = (ucs_status_t)sendrecv(oob_sock, own_iface, if_info.iface_attr.iface_addr_len, + (void **)&peer_iface); + CHKERR_JUMP(0 != status, "ifaces exchange", out_close_oob_sock); + } + + ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + ep_params.iface = if_info.iface; + if (if_info.iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { + own_ep = (uct_ep_addr_t*)calloc(1, if_info.iface_attr.ep_addr_len); + CHKERR_ACTION(NULL == own_ep, "allocate memory for ep addrs", + status = UCS_ERR_NO_MEMORY; goto out_close_oob_sock); + + /* Create new endpoint */ + status = uct_ep_create(&ep_params, &ep); + CHKERR_JUMP(UCS_OK != status, "create endpoint", out_free_ep_addrs); + + /* Get endpoint address */ + status = uct_ep_get_address(ep, own_ep); + CHKERR_JUMP(UCS_OK != status, "get endpoint address", out_free_ep); + + status = (ucs_status_t)sendrecv(oob_sock, own_ep, if_info.iface_attr.ep_addr_len, + (void **)&peer_ep); + CHKERR_JUMP(0 != status, "EPs exchange", out_free_ep); + + /* Connect endpoint to a remote endpoint */ + status = uct_ep_connect_to_ep(ep, peer_dev, peer_ep); + if (barrier(oob_sock)) { + status = UCS_ERR_IO_ERROR; + goto out_free_ep; + } + } else if (if_info.iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + /* Create an endpoint which is connected to a remote interface */ + ep_params.field_mask |= UCT_EP_PARAM_FIELD_DEV_ADDR | + UCT_EP_PARAM_FIELD_IFACE_ADDR; + ep_params.dev_addr = peer_dev; + ep_params.iface_addr = peer_iface; + status = uct_ep_create(&ep_params, &ep); + CHKERR_JUMP(UCS_OK != status, "create endpoint", out_free_ep_addrs); + } else { + status = UCS_ERR_UNSUPPORTED; + goto out_free_ep_addrs; + } + + if (cmd_args.test_strlen > func_am_max_size(cmd_args.func_am_type, &if_info.iface_attr)) { + status = UCS_ERR_UNSUPPORTED; + fprintf(stderr, "Test string is too long: %ld, max supported: %lu\n", + cmd_args.test_strlen, + func_am_max_size(cmd_args.func_am_type, &if_info.iface_attr)); + goto out_free_ep; + } + + /* Set active message handler */ + status = uct_iface_set_am_handler(if_info.iface, id, hello_world, + &cmd_args.func_am_type, 0); + CHKERR_JUMP(UCS_OK != status, "set callback", out_free_ep); + + if (cmd_args.server_name) { + char *str = (char *)mem_type_malloc(cmd_args.test_strlen); + CHKERR_ACTION(str == NULL, "allocate memory", + status = UCS_ERR_NO_MEMORY; goto out_free_ep); + res = generate_test_string(str, cmd_args.test_strlen); + CHKERR_ACTION(res < 0, "generate test string", + status = UCS_ERR_NO_MEMORY; goto out_free_ep); + + /* Send active message to remote endpoint */ + if (cmd_args.func_am_type == FUNC_AM_SHORT) { + status = do_am_short(&if_info, ep, id, &cmd_args, str); + } else if (cmd_args.func_am_type == FUNC_AM_BCOPY) { + status = do_am_bcopy(&if_info, ep, id, &cmd_args, str); + } else if (cmd_args.func_am_type == FUNC_AM_ZCOPY) { + status = do_am_zcopy(&if_info, ep, id, &cmd_args, str); + } + + mem_type_free(str); + CHKERR_JUMP(UCS_OK != status, "send active msg", out_free_ep); + } else { + recv_desc_t *rdesc; + + while (desc_holder == NULL) { + /* Explicitly progress any outstanding active message requests */ + uct_worker_progress(if_info.worker); + } + + rdesc = desc_holder; + print_strings("main", func_am_t_str(cmd_args.func_am_type), + (char *)(rdesc + 1), cmd_args.test_strlen); + + if (rdesc->is_uct_desc) { + /* Release descriptor because callback returns UCS_INPROGRESS */ + uct_iface_release_desc(rdesc); + } else { + free(rdesc); + } + } + + if (barrier(oob_sock)) { + status = UCS_ERR_IO_ERROR; + } + +out_free_ep: + uct_ep_destroy(ep); +out_free_ep_addrs: + free(own_ep); + free(peer_ep); +out_close_oob_sock: + close(oob_sock); +out_free_if_addrs: + free(own_iface); + free(peer_iface); +out_free_dev_addrs: + free(own_dev); + free(peer_dev); +out_destroy_iface: + uct_iface_close(if_info.iface); + uct_md_close(if_info.md); +out_destroy_worker: + uct_worker_destroy(if_info.worker); +out_cleanup_async: + ucs_async_context_destroy(async); +out: + return (status == UCS_ERR_UNSUPPORTED) ? UCS_OK : status; +} diff --git a/test/examples/ucx_hello_world.h b/test/examples/ucx_hello_world.h new file mode 100644 index 0000000..ecfd7f8 --- /dev/null +++ b/test/examples/ucx_hello_world.h @@ -0,0 +1,303 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCX_HELLO_WORLD_H +#define UCX_HELLO_WORLD_H + +#include + +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_CUDA +# include +# include +#endif + + +#define CHKERR_ACTION(_cond, _msg, _action) \ + do { \ + if (_cond) { \ + fprintf(stderr, "Failed to %s\n", _msg); \ + _action; \ + } \ + } while (0) + + +#define CHKERR_JUMP(_cond, _msg, _label) \ + CHKERR_ACTION(_cond, _msg, goto _label) + + +#define CHKERR_JUMP_RETVAL(_cond, _msg, _label, _retval) \ + do { \ + if (_cond) { \ + fprintf(stderr, "Failed to %s, return value %d\n", _msg, _retval); \ + goto _label; \ + } \ + } while (0) + + +static ucs_memory_type_t test_mem_type = UCS_MEMORY_TYPE_HOST; + + +#define CUDA_FUNC(_func) \ + do { \ + cudaError_t _result = (_func); \ + if (cudaSuccess != _result) { \ + fprintf(stderr, "%s failed: %s\n", \ + #_func, cudaGetErrorString(_result)); \ + } \ + } while(0) + + +void *mem_type_malloc(size_t length) +{ + void *ptr; + + switch (test_mem_type) { + case UCS_MEMORY_TYPE_HOST: + ptr = malloc(length); + break; +#ifdef HAVE_CUDA + case UCS_MEMORY_TYPE_CUDA: + CUDA_FUNC(cudaMalloc(&ptr, length)); + break; + case UCS_MEMORY_TYPE_CUDA_MANAGED: + CUDA_FUNC(cudaMallocManaged(&ptr, length, cudaMemAttachGlobal)); + break; +#endif + default: + fprintf(stderr, "Unsupported memory type: %d\n", test_mem_type); + ptr = NULL; + break; + } + + return ptr; +} + +void mem_type_free(void *address) +{ + switch (test_mem_type) { + case UCS_MEMORY_TYPE_HOST: + free(address); + break; +#ifdef HAVE_CUDA + case UCS_MEMORY_TYPE_CUDA: + case UCS_MEMORY_TYPE_CUDA_MANAGED: + CUDA_FUNC(cudaFree(address)); + break; +#endif + default: + fprintf(stderr, "Unsupported memory type: %d\n", test_mem_type); + break; + } +} + +void *mem_type_memcpy(void *dst, const void *src, size_t count) +{ + switch (test_mem_type) { + case UCS_MEMORY_TYPE_HOST: + memcpy(dst, src, count); + break; +#ifdef HAVE_CUDA + case UCS_MEMORY_TYPE_CUDA: + case UCS_MEMORY_TYPE_CUDA_MANAGED: + CUDA_FUNC(cudaMemcpy(dst, src, count, cudaMemcpyDefault)); + break; +#endif + default: + fprintf(stderr, "Unsupported memory type: %d\n", test_mem_type); + break; + } + + return dst; +} + +void *mem_type_memset(void *dst, int value, size_t count) +{ + switch (test_mem_type) { + case UCS_MEMORY_TYPE_HOST: + memset(dst, value, count); + break; +#ifdef HAVE_CUDA + case UCS_MEMORY_TYPE_CUDA: + case UCS_MEMORY_TYPE_CUDA_MANAGED: + CUDA_FUNC(cudaMemset(dst, value, count)); + break; +#endif + default: + fprintf(stderr, "Unsupported memory type: %d", test_mem_type); + break; + } + + return dst; +} + +int check_mem_type_support(ucs_memory_type_t mem_type) +{ + switch (test_mem_type) { + case UCS_MEMORY_TYPE_HOST: + return 1; + case UCS_MEMORY_TYPE_CUDA: + case UCS_MEMORY_TYPE_CUDA_MANAGED: +#ifdef HAVE_CUDA + return 1; +#else + return 0; +#endif + default: + fprintf(stderr, "Unsupported memory type: %d", test_mem_type); + break; + } + + return 0; +} + +ucs_memory_type_t parse_mem_type(const char *opt_arg) +{ + if (!strcmp(opt_arg, "host")) { + return UCS_MEMORY_TYPE_HOST; + } else if (!strcmp(opt_arg, "cuda") && + check_mem_type_support(UCS_MEMORY_TYPE_CUDA)) { + return UCS_MEMORY_TYPE_CUDA; + } else if (!strcmp(opt_arg, "cuda-managed") && + check_mem_type_support(UCS_MEMORY_TYPE_CUDA_MANAGED)) { + return UCS_MEMORY_TYPE_CUDA_MANAGED; + } else { + fprintf(stderr, "Unsupported memory type: \"%s\".\n", opt_arg); + } + + return UCS_MEMORY_TYPE_LAST; +} + +void print_common_help() +{ + fprintf(stderr, " -n name Set node name or IP address " + "of the server (required for client and should be ignored " + "for server)\n"); + fprintf(stderr, " -p port Set alternative server port (default:13337)\n"); + fprintf(stderr, " -s size Set test string length (default:16)\n"); + fprintf(stderr, " -m memory type of messages\n"); + fprintf(stderr, " host - system memory (default)\n"); + if (check_mem_type_support(UCS_MEMORY_TYPE_CUDA)) { + fprintf(stderr, " cuda - NVIDIA GPU memory\n"); + } + if (check_mem_type_support(UCS_MEMORY_TYPE_CUDA_MANAGED)) { + fprintf(stderr, " cuda-managed - NVIDIA GPU managed/unified memory\n"); + } +} + +int server_connect(uint16_t server_port) +{ + struct sockaddr_in inaddr; + int lsock = -1; + int dsock = -1; + int optval = 1; + int ret; + + lsock = socket(AF_INET, SOCK_STREAM, 0); + CHKERR_JUMP(lsock < 0, "open server socket", err); + + optval = 1; + ret = setsockopt(lsock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); + CHKERR_JUMP(ret < 0, "server setsockopt()", err_sock); + + inaddr.sin_family = AF_INET; + inaddr.sin_port = htons(server_port); + inaddr.sin_addr.s_addr = INADDR_ANY; + memset(inaddr.sin_zero, 0, sizeof(inaddr.sin_zero)); + ret = bind(lsock, (struct sockaddr*)&inaddr, sizeof(inaddr)); + CHKERR_JUMP(ret < 0, "bind server", err_sock); + + ret = listen(lsock, 0); + CHKERR_JUMP(ret < 0, "listen server", err_sock); + + fprintf(stdout, "Waiting for connection...\n"); + + /* Accept next connection */ + dsock = accept(lsock, NULL, NULL); + CHKERR_JUMP(dsock < 0, "accept server", err_sock); + + close(lsock); + + return dsock; + +err_sock: + close(lsock); + +err: + return -1; +} + +int client_connect(const char *server, uint16_t server_port) +{ + struct sockaddr_in conn_addr; + struct hostent *he; + int connfd; + int ret; + + connfd = socket(AF_INET, SOCK_STREAM, 0); + CHKERR_JUMP(connfd < 0, "open client socket", err); + + he = gethostbyname(server); + CHKERR_JUMP((he == NULL || he->h_addr_list == NULL), "found a host", err_conn); + + conn_addr.sin_family = he->h_addrtype; + conn_addr.sin_port = htons(server_port); + + memcpy(&conn_addr.sin_addr, he->h_addr_list[0], he->h_length); + memset(conn_addr.sin_zero, 0, sizeof(conn_addr.sin_zero)); + + ret = connect(connfd, (struct sockaddr*)&conn_addr, sizeof(conn_addr)); + CHKERR_JUMP(ret < 0, "connect client", err_conn); + + return connfd; + +err_conn: + close(connfd); +err: + return -1; +} + +static int barrier(int oob_sock) +{ + int dummy = 0; + ssize_t res; + + res = send(oob_sock, &dummy, sizeof(dummy), 0); + if (res < 0) { + return res; + } + + res = recv(oob_sock, &dummy, sizeof(dummy), MSG_WAITALL); + + /* number of received bytes should be the same as sent */ + return !(res == sizeof(dummy)); +} + +static int generate_test_string(char *str, int size) +{ + char *tmp_str; + int i; + + tmp_str = calloc(1, size); + CHKERR_ACTION(tmp_str == NULL, "allocate memory\n", return -1); + + for (i = 0; i < (size - 1); ++i) { + tmp_str[i] = 'A' + (i % 26); + } + + mem_type_memcpy(str, tmp_str, size); + + free(tmp_str); + return 0; +} + +#endif /* UCX_HELLO_WORLD_H */ diff --git a/test/examples/ucx_profiling.c b/test/examples/ucx_profiling.c new file mode 100644 index 0000000..38c3eb6 --- /dev/null +++ b/test/examples/ucx_profiling.c @@ -0,0 +1,43 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +#include +#include + + +/* calc_pi() would be profiled */ +UCS_PROFILE_FUNC(double, calc_pi, (count), int count) { + double pi_d_4; + int n; + + pi_d_4 = 0.0; + + /* Profile a block of code */ + UCS_PROFILE_CODE("leibnitz") { + for (n = 0; n < count; ++n) { + pi_d_4 += pow(-1.0, n) / (2 * n + 1); + + /* create a timestamp for each step */ + UCS_PROFILE_SAMPLE("step"); + } + } + + return pi_d_4 * 4.0; +} + +/* print_pi() would be profiled */ +UCS_PROFILE_FUNC_VOID(print_pi, (pi), double pi) { + /* Call printf() and profile it */ + UCS_PROFILE_CALL(printf, "PI estimation is %.10f\n", pi); +} + +int main(int argc, char **argv) +{ + double pi = calc_pi(10); + print_pi(pi); + return 0; +} diff --git a/test/gtest/Makefile.am b/test/gtest/Makefile.am new file mode 100644 index 0000000..2ea4972 --- /dev/null +++ b/test/gtest/Makefile.am @@ -0,0 +1,311 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# Copyright (C) Los Alamos National Security, LLC. 2018 ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +if HAVE_GTEST + +# Set default configuration for running tests +UCX_HANDLE_ERRORS ?= freeze +UCX_LOG_LEVEL ?= warn +UCX_LOG_PRINT_ENABLE ?= y +GTEST_FILTER ?= * +GTEST_EXTRA_ARGS ?= +LAUNCHER ?= +VALGRIND_EXTRA_ARGS ?= + +SUBDIRS = ucs/test_module ucm/test_dlopen + +export UCX_HANDLE_ERRORS +export UCX_LOG_LEVEL +export UCX_LOG_PRINT_ENABLE + +GTEST_ARGS = \ + --gtest_filter=$(GTEST_FILTER) \ + $(GTEST_EXTRA_ARGS) + +VALGRIND_ARGS = \ + --tool=memcheck \ + --leak-check=full \ + --track-origins=yes \ + --fair-sched=try \ + --num-callers=25 \ + --error-exitcode=1 \ + --child-silent-after-fork=yes \ + --suppressions=$(top_srcdir)/contrib/valgrind.supp \ + $(VALGRIND_EXTRA_ARGS) + +noinst_PROGRAMS = gtest + +gtestdir = $(includedir) +gtest_LDADD = \ + $(top_builddir)/src/ucs/libucs.la \ + $(top_builddir)/src/uct/libuct.la \ + $(top_builddir)/src/ucm/libucm.la \ + $(top_builddir)/src/ucp/libucp.la \ + $(top_builddir)/src/tools/perf/lib/libucxperf.la \ + $(OPENMP_CFLAGS) \ + $(GTEST_LIBS) + + +gtest_CPPFLAGS = \ + $(BASE_CPPFLAGS) \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/test \ + -I$(top_builddir)/src \ + -I$(top_srcdir)/test/gtest \ + $(GTEST_CPPFLAGS) \ + $(OPENMP_CFLAGS) + +gtest_LDFLAGS = $(GTEST_LDFLAGS) -no-install -Wl,-dynamic-list-data +gtest_CFLAGS = $(BASE_CFLAGS) +gtest_CXXFLAGS = \ + $(BASE_CXXFLAGS) $(GTEST_CXXFLAGS) \ + -DGTEST_UCM_HOOK_LIB_DIR="\"${abs_builddir}/ucm/test_dlopen/.libs\"" + +gtest_SOURCES = \ + common/gtest-all.cc \ + common/main.cc \ + common/test_gtest_cmn.cc \ + common/mem_buffer.cc \ + common/test_helpers.cc \ + common/test_obj_size.cc \ + common/test_watchdog.cc \ + common/test_perf.cc \ + common/test.cc \ + \ + ucm/malloc_hook.cc \ + \ + uct/test_amo.cc \ + uct/test_amo_add_xor.cc \ + uct/test_amo_and_or.cc \ + uct/test_amo_cswap.cc \ + uct/test_amo_fadd_fxor.cc \ + uct/test_amo_fand_for.cc \ + uct/test_amo_swap.cc \ + uct/test_event.cc \ + uct/test_fence.cc \ + uct/test_flush.cc \ + uct/test_many2one_am.cc \ + uct/test_md.cc \ + uct/test_mm.cc \ + uct/test_mem.cc \ + uct/test_p2p_am.cc \ + uct/test_p2p_err.cc \ + uct/test_p2p_mix.cc \ + uct/test_p2p_rma.cc \ + uct/test_pending.cc \ + uct/test_progress.cc \ + uct/test_uct_ep.cc \ + uct/test_uct_perf.cc \ + uct/test_zcopy_comp.cc \ + uct/uct_p2p_test.cc \ + uct/uct_test.cc \ + uct/test_stats.cc \ + ucs/test_event_set.cc \ + ucs/test_stats_filter.cc \ + uct/test_peer_failure.cc \ + uct/test_tag.cc \ + uct/tcp/test_tcp.cc \ + \ + ucp/test_ucp_am.cc \ + ucp/test_ucp_stream.cc \ + ucp/test_ucp_peer_failure.cc \ + ucp/test_ucp_atomic.cc \ + ucp/test_ucp_dt.cc \ + ucp/test_ucp_memheap.cc \ + ucp/test_ucp_mmap.cc \ + ucp/test_ucp_mem_type.cc \ + ucp/test_ucp_perf.cc \ + ucp/test_ucp_rma.cc \ + ucp/test_ucp_rma_mt.cc \ + ucp/test_ucp_tag_cancel.cc \ + ucp/test_ucp_tag_match.cc \ + ucp/test_ucp_tag_offload.cc \ + ucp/test_ucp_tag_mt.cc \ + ucp/test_ucp_tag_perf.cc \ + ucp/test_ucp_tag_probe.cc \ + ucp/test_ucp_tag_xfer.cc \ + ucp/test_ucp_tag_mem_type.cc \ + ucp/test_ucp_tag.cc \ + ucp/test_ucp_context.cc \ + ucp/test_ucp_wireup.cc \ + ucp/test_ucp_wakeup.cc \ + ucp/test_ucp_fence.cc \ + ucp/test_ucp_sockaddr.cc \ + ucp/ucp_test.cc \ + ucp/ucp_datatype.cc \ + \ + ucs/test_algorithm.cc \ + ucs/test_arbiter.cc \ + ucs/test_async.cc \ + ucs/test_callbackq.cc \ + ucs/test_class.cc \ + ucs/test_config.cc \ + ucs/test_datatype.cc \ + ucs/test_debug.cc \ + ucs/test_memtrack.cc \ + ucs/test_math.cc \ + ucs/test_mpmc.cc \ + ucs/test_mpool.cc \ + ucs/test_pgtable.cc \ + ucs/test_profile.cc \ + ucs/test_rcache.cc \ + ucs/test_memtype_cache.cc \ + ucs/test_stats.cc \ + ucs/test_strided_alloc.cc \ + ucs/test_string.cc \ + ucs/test_sys.cc \ + ucs/test_sock.cc \ + ucs/test_time.cc \ + ucs/test_twheel.cc \ + ucs/test_frag_list.cc \ + ucs/test_type.cc \ + ucs/test_log.cc \ + ucs/arch/test_x86_64.cc + +if HAVE_IB +gtest_SOURCES += \ + uct/ib/test_ib.cc \ + uct/ib/test_ib_md.cc \ + uct/ib/test_cq_moderation.cc \ + uct/ib/test_ib_xfer.cc \ + uct/ib/test_ib_pkey.cc +gtest_CPPFLAGS += \ + $(IBVERBS_CPPFLAGS) +gtest_LDADD += \ + $(IBVERBS_LDFLAGS) \ + $(top_builddir)/src/uct/ib/libuct_ib.la +if HAVE_DEVX +gtest_SOURCES += \ + uct/ib/test_devx.cc +endif +if HAVE_TL_UD +gtest_SOURCES += \ + uct/ib/ud_base.cc \ + uct/ib/test_ud.cc \ + uct/ib/test_ud_slow_timer.cc \ + uct/ib/test_ud_pending.cc \ + uct/ib/test_ud_ds.cc +endif +if HAVE_TL_RC +gtest_SOURCES += \ + uct/ib/test_rc.cc +endif +if HAVE_TL_DC +gtest_SOURCES += \ + uct/ib/test_dc.cc +endif +if HAVE_RDMACM +gtest_SOURCES += \ + uct/ib/test_sockaddr.cc +endif +endif # HAVE_IB + +if HAVE_CUDA +gtest_SOURCES += \ + ucm/cuda_hooks.cc +gtest_CPPFLAGS += \ + $(CUDA_CPPFLAGS) +gtest_LDADD += \ + $(CUDA_LDFLAGS) \ + $(top_builddir)/src/uct/cuda/libuct_cuda.la +endif + +if HAVE_HIP +if HAVE_GNUXX11 +gtest_SOURCES += \ + ucm/rocm_hooks.cc +gtest_CPPFLAGS += \ + $(HIP_CPPFLAGS) +gtest_CXXFLAGS += \ + $(HIP_CXXFLAGS) +gtest_LDADD += \ + $(HIP_LDFLAGS) \ + $(HIP_LIBS) \ + $(top_builddir)/src/uct/rocm/libuct_rocm.la +endif +endif + +noinst_HEADERS = \ + common/gtest.h \ + common/mem_buffer.h \ + common/test.h \ + common/test_helpers.h \ + common/test_perf.h \ + common/tap.h \ + \ + uct/ib/test_rc.h \ + uct/ib/ud_base.h \ + uct/ib/test_ib.h \ + uct/test_amo.h \ + uct/test_p2p_mix.h \ + uct/test_p2p_rma.h \ + uct/uct_p2p_test.h \ + uct/uct_test.h \ + uct/test_md.h \ + \ + ucp/test_ucp_atomic.h \ + ucp/test_ucp_memheap.h \ + ucp/test_ucp_tag.h \ + ucp/ucp_test.h \ + ucp/ucp_datatype.h + +.PHONY: test test gdb valgrind fix_rpath ucx + + +all-local: gtest + +ucx: + $(MAKE) -C $(top_builddir) + + +help: + @echo + @echo "Targets:" + @echo " list : List unit tests." + @echo " test : Run unit tests." + @echo " test_gdb : Run unit tests with GDB." + @echo " test_valgrind : Run unit tests with Valgrind." + @echo + @echo "Environment variables:" + @echo " GTEST_FILTER : Unit tests filter (\"$(GTEST_FILTER)\")" + @echo " GTEST_EXTRA_ARGS : Additional arguments for gtest (\"$(GTEST_EXTRA_ARGS)\")" + @echo " LAUNCHER : Custom launcher for gtest executable (\"$(LAUNCHER)\")" + @echo " VALGRIND_EXTRA_ARGS : Additional arguments for Valgrind (\"$(VALGRIND_EXTRA_ARGS)\")" + @echo + +# +# List unit tests +# +list: gtest + $(abs_builddir)/gtest --gtest_list_tests $(GTEST_ARGS) + +# +# Run unit tests +# +test: ucx gtest + @rm -f core.* + $(LAUNCHER) stdbuf -e0 -o0 $(abs_builddir)/gtest $(GTEST_ARGS) + +# +# Run unit tests with GDB +# +test_gdb: ucx gtest + echo -e 'r\ninit-if-undefined $$_exitcode=-1\nif $$_exitcode>=0\n\tq\nend' > .gdbcommands + $(LAUNCHER) env UCX_HANDLE_ERRORS=none \ + gdb -x .gdbcommands --args $(GDB_ARGS) \ + $(abs_builddir)/gtest $(GTEST_ARGS) + +# +# Run unit tests with valgrind +# +test_valgrind: ucx gtest + $(LAUNCHER) env LD_LIBRARY_PATH="$(VALGRIND_LIBPATH):${LD_LIBRARY_PATH}" \ + stdbuf -e0 -o0 valgrind $(VALGRIND_ARGS) $(abs_builddir)/gtest $(GTEST_ARGS) +endif diff --git a/test/gtest/Makefile.in b/test/gtest/Makefile.in new file mode 100644 index 0000000..50ec363 --- /dev/null +++ b/test/gtest/Makefile.in @@ -0,0 +1,3578 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED. +# Copyright (C) Los Alamos National Security, LLC. 2018 ALL RIGHTS RESERVED. +# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@HAVE_GTEST_TRUE@noinst_PROGRAMS = gtest$(EXEEXT) +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@am__append_1 = \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/test_ib.cc \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/test_ib_md.cc \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/test_cq_moderation.cc \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/test_ib_xfer.cc \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/test_ib_pkey.cc + +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@am__append_2 = \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ $(IBVERBS_CPPFLAGS) + +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@am__append_3 = \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ $(IBVERBS_LDFLAGS) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ $(top_builddir)/src/uct/ib/libuct_ib.la + +@HAVE_DEVX_TRUE@@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@am__append_4 = \ +@HAVE_DEVX_TRUE@@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/test_devx.cc + +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@am__append_5 = \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ uct/ib/ud_base.cc \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ uct/ib/test_ud.cc \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ uct/ib/test_ud_slow_timer.cc \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ uct/ib/test_ud_pending.cc \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ uct/ib/test_ud_ds.cc + +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@am__append_6 = \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@ uct/ib/test_rc.cc + +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@am__append_7 = \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@ uct/ib/test_dc.cc + +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_RDMACM_TRUE@am__append_8 = \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_RDMACM_TRUE@ uct/ib/test_sockaddr.cc + +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@am__append_9 = \ +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@ ucm/cuda_hooks.cc + +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@am__append_10 = \ +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@ $(CUDA_CPPFLAGS) + +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@am__append_11 = \ +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@ $(CUDA_LDFLAGS) \ +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@ $(top_builddir)/src/uct/cuda/libuct_cuda.la + +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@am__append_12 = \ +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@ ucm/rocm_hooks.cc + +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@am__append_13 = \ +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@ $(HIP_CPPFLAGS) + +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@am__append_14 = \ +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@ $(HIP_CXXFLAGS) + +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@am__append_15 = \ +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@ $(HIP_LDFLAGS) \ +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@ $(HIP_LIBS) \ +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@ $(top_builddir)/src/uct/rocm/libuct_rocm.la + +subdir = test/gtest +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__noinst_HEADERS_DIST) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +PROGRAMS = $(noinst_PROGRAMS) +am__gtest_SOURCES_DIST = common/gtest-all.cc common/main.cc \ + common/test_gtest_cmn.cc common/mem_buffer.cc \ + common/test_helpers.cc common/test_obj_size.cc \ + common/test_watchdog.cc common/test_perf.cc common/test.cc \ + ucm/malloc_hook.cc uct/test_amo.cc uct/test_amo_add_xor.cc \ + uct/test_amo_and_or.cc uct/test_amo_cswap.cc \ + uct/test_amo_fadd_fxor.cc uct/test_amo_fand_for.cc \ + uct/test_amo_swap.cc uct/test_event.cc uct/test_fence.cc \ + uct/test_flush.cc uct/test_many2one_am.cc uct/test_md.cc \ + uct/test_mm.cc uct/test_mem.cc uct/test_p2p_am.cc \ + uct/test_p2p_err.cc uct/test_p2p_mix.cc uct/test_p2p_rma.cc \ + uct/test_pending.cc uct/test_progress.cc uct/test_uct_ep.cc \ + uct/test_uct_perf.cc uct/test_zcopy_comp.cc \ + uct/uct_p2p_test.cc uct/uct_test.cc uct/test_stats.cc \ + ucs/test_event_set.cc ucs/test_stats_filter.cc \ + uct/test_peer_failure.cc uct/test_tag.cc uct/tcp/test_tcp.cc \ + ucp/test_ucp_am.cc ucp/test_ucp_stream.cc \ + ucp/test_ucp_peer_failure.cc ucp/test_ucp_atomic.cc \ + ucp/test_ucp_dt.cc ucp/test_ucp_memheap.cc \ + ucp/test_ucp_mmap.cc ucp/test_ucp_mem_type.cc \ + ucp/test_ucp_perf.cc ucp/test_ucp_rma.cc \ + ucp/test_ucp_rma_mt.cc ucp/test_ucp_tag_cancel.cc \ + ucp/test_ucp_tag_match.cc ucp/test_ucp_tag_offload.cc \ + ucp/test_ucp_tag_mt.cc ucp/test_ucp_tag_perf.cc \ + ucp/test_ucp_tag_probe.cc ucp/test_ucp_tag_xfer.cc \ + ucp/test_ucp_tag_mem_type.cc ucp/test_ucp_tag.cc \ + ucp/test_ucp_context.cc ucp/test_ucp_wireup.cc \ + ucp/test_ucp_wakeup.cc ucp/test_ucp_fence.cc \ + ucp/test_ucp_sockaddr.cc ucp/ucp_test.cc ucp/ucp_datatype.cc \ + ucs/test_algorithm.cc ucs/test_arbiter.cc ucs/test_async.cc \ + ucs/test_callbackq.cc ucs/test_class.cc ucs/test_config.cc \ + ucs/test_datatype.cc ucs/test_debug.cc ucs/test_memtrack.cc \ + ucs/test_math.cc ucs/test_mpmc.cc ucs/test_mpool.cc \ + ucs/test_pgtable.cc ucs/test_profile.cc ucs/test_rcache.cc \ + ucs/test_memtype_cache.cc ucs/test_stats.cc \ + ucs/test_strided_alloc.cc ucs/test_string.cc ucs/test_sys.cc \ + ucs/test_sock.cc ucs/test_time.cc ucs/test_twheel.cc \ + ucs/test_frag_list.cc ucs/test_type.cc ucs/test_log.cc \ + ucs/arch/test_x86_64.cc uct/ib/test_ib.cc uct/ib/test_ib_md.cc \ + uct/ib/test_cq_moderation.cc uct/ib/test_ib_xfer.cc \ + uct/ib/test_ib_pkey.cc uct/ib/test_devx.cc uct/ib/ud_base.cc \ + uct/ib/test_ud.cc uct/ib/test_ud_slow_timer.cc \ + uct/ib/test_ud_pending.cc uct/ib/test_ud_ds.cc \ + uct/ib/test_rc.cc uct/ib/test_dc.cc uct/ib/test_sockaddr.cc \ + ucm/cuda_hooks.cc ucm/rocm_hooks.cc +am__dirstamp = $(am__leading_dot)dirstamp +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@am__objects_1 = \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/gtest-test_ib.$(OBJEXT) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/gtest-test_ib_md.$(OBJEXT) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/gtest-test_cq_moderation.$(OBJEXT) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/gtest-test_ib_xfer.$(OBJEXT) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ uct/ib/gtest-test_ib_pkey.$(OBJEXT) +@HAVE_DEVX_TRUE@@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@am__objects_2 = uct/ib/gtest-test_devx.$(OBJEXT) +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@am__objects_3 = uct/ib/gtest-ud_base.$(OBJEXT) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ uct/ib/gtest-test_ud.$(OBJEXT) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ uct/ib/gtest-test_ud_slow_timer.$(OBJEXT) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ uct/ib/gtest-test_ud_pending.$(OBJEXT) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_UD_TRUE@ uct/ib/gtest-test_ud_ds.$(OBJEXT) +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_RC_TRUE@am__objects_4 = uct/ib/gtest-test_rc.$(OBJEXT) +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_TL_DC_TRUE@am__objects_5 = uct/ib/gtest-test_dc.$(OBJEXT) +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@@HAVE_RDMACM_TRUE@am__objects_6 = uct/ib/gtest-test_sockaddr.$(OBJEXT) +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@am__objects_7 = ucm/gtest-cuda_hooks.$(OBJEXT) +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@am__objects_8 = ucm/gtest-rocm_hooks.$(OBJEXT) +@HAVE_GTEST_TRUE@am_gtest_OBJECTS = common/gtest-gtest-all.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ common/gtest-main.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ common/gtest-test_gtest_cmn.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ common/gtest-mem_buffer.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ common/gtest-test_helpers.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ common/gtest-test_obj_size.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ common/gtest-test_watchdog.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ common/gtest-test_perf.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ common/gtest-test.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucm/gtest-malloc_hook.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_amo.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_amo_add_xor.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_amo_and_or.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_amo_cswap.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_amo_fadd_fxor.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_amo_fand_for.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_amo_swap.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_event.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_fence.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_flush.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_many2one_am.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_md.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_mm.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_mem.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_p2p_am.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_p2p_err.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_p2p_mix.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_p2p_rma.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_pending.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_progress.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_uct_ep.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_uct_perf.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_zcopy_comp.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-uct_p2p_test.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-uct_test.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_stats.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_event_set.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_stats_filter.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_peer_failure.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/gtest-test_tag.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ uct/tcp/gtest-test_tcp.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_am.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_stream.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_peer_failure.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_atomic.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_dt.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_memheap.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_mmap.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_mem_type.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_perf.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_rma.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_rma_mt.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_tag_cancel.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_tag_match.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_tag_offload.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_tag_mt.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_tag_perf.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_tag_probe.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_tag_xfer.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_tag_mem_type.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_tag.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_context.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_wireup.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_wakeup.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_fence.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-test_ucp_sockaddr.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-ucp_test.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucp/gtest-ucp_datatype.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_algorithm.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_arbiter.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_async.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_callbackq.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_class.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_config.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_datatype.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_debug.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_memtrack.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_math.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_mpmc.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_mpool.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_pgtable.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_profile.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_rcache.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_memtype_cache.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_stats.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_strided_alloc.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_string.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_sys.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_sock.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_time.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_twheel.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_frag_list.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_type.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/gtest-test_log.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ ucs/arch/gtest-test_x86_64.$(OBJEXT) \ +@HAVE_GTEST_TRUE@ $(am__objects_1) $(am__objects_2) \ +@HAVE_GTEST_TRUE@ $(am__objects_3) $(am__objects_4) \ +@HAVE_GTEST_TRUE@ $(am__objects_5) $(am__objects_6) \ +@HAVE_GTEST_TRUE@ $(am__objects_7) $(am__objects_8) +gtest_OBJECTS = $(am_gtest_OBJECTS) +am__DEPENDENCIES_1 = +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@am__DEPENDENCIES_2 = \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ $(am__DEPENDENCIES_1) \ +@HAVE_GTEST_TRUE@@HAVE_IB_TRUE@ $(top_builddir)/src/uct/ib/libuct_ib.la +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@am__DEPENDENCIES_3 = \ +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@ $(am__DEPENDENCIES_1) \ +@HAVE_CUDA_TRUE@@HAVE_GTEST_TRUE@ $(top_builddir)/src/uct/cuda/libuct_cuda.la +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) \ +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@ $(am__DEPENDENCIES_1) \ +@HAVE_GNUXX11_TRUE@@HAVE_GTEST_TRUE@@HAVE_HIP_TRUE@ $(top_builddir)/src/uct/rocm/libuct_rocm.la +@HAVE_GTEST_TRUE@gtest_DEPENDENCIES = \ +@HAVE_GTEST_TRUE@ $(top_builddir)/src/ucs/libucs.la \ +@HAVE_GTEST_TRUE@ $(top_builddir)/src/uct/libuct.la \ +@HAVE_GTEST_TRUE@ $(top_builddir)/src/ucm/libucm.la \ +@HAVE_GTEST_TRUE@ $(top_builddir)/src/ucp/libucp.la \ +@HAVE_GTEST_TRUE@ $(top_builddir)/src/tools/perf/lib/libucxperf.la \ +@HAVE_GTEST_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2) \ +@HAVE_GTEST_TRUE@ $(am__DEPENDENCIES_3) $(am__DEPENDENCIES_4) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +gtest_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(gtest_CXXFLAGS) \ + $(CXXFLAGS) $(gtest_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = common/$(DEPDIR)/gtest-gtest-all.Po \ + common/$(DEPDIR)/gtest-main.Po \ + common/$(DEPDIR)/gtest-mem_buffer.Po \ + common/$(DEPDIR)/gtest-test.Po \ + common/$(DEPDIR)/gtest-test_gtest_cmn.Po \ + common/$(DEPDIR)/gtest-test_helpers.Po \ + common/$(DEPDIR)/gtest-test_obj_size.Po \ + common/$(DEPDIR)/gtest-test_perf.Po \ + common/$(DEPDIR)/gtest-test_watchdog.Po \ + ucm/$(DEPDIR)/gtest-cuda_hooks.Po \ + ucm/$(DEPDIR)/gtest-malloc_hook.Po \ + ucm/$(DEPDIR)/gtest-rocm_hooks.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_am.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_atomic.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_context.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_dt.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_fence.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_memheap.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_mmap.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_perf.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_rma.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_stream.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_tag.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Po \ + ucp/$(DEPDIR)/gtest-test_ucp_wireup.Po \ + ucp/$(DEPDIR)/gtest-ucp_datatype.Po \ + ucp/$(DEPDIR)/gtest-ucp_test.Po \ + ucs/$(DEPDIR)/gtest-test_algorithm.Po \ + ucs/$(DEPDIR)/gtest-test_arbiter.Po \ + ucs/$(DEPDIR)/gtest-test_async.Po \ + ucs/$(DEPDIR)/gtest-test_callbackq.Po \ + ucs/$(DEPDIR)/gtest-test_class.Po \ + ucs/$(DEPDIR)/gtest-test_config.Po \ + ucs/$(DEPDIR)/gtest-test_datatype.Po \ + ucs/$(DEPDIR)/gtest-test_debug.Po \ + ucs/$(DEPDIR)/gtest-test_event_set.Po \ + ucs/$(DEPDIR)/gtest-test_frag_list.Po \ + ucs/$(DEPDIR)/gtest-test_log.Po \ + ucs/$(DEPDIR)/gtest-test_math.Po \ + ucs/$(DEPDIR)/gtest-test_memtrack.Po \ + ucs/$(DEPDIR)/gtest-test_memtype_cache.Po \ + ucs/$(DEPDIR)/gtest-test_mpmc.Po \ + ucs/$(DEPDIR)/gtest-test_mpool.Po \ + ucs/$(DEPDIR)/gtest-test_pgtable.Po \ + ucs/$(DEPDIR)/gtest-test_profile.Po \ + ucs/$(DEPDIR)/gtest-test_rcache.Po \ + ucs/$(DEPDIR)/gtest-test_sock.Po \ + ucs/$(DEPDIR)/gtest-test_stats.Po \ + ucs/$(DEPDIR)/gtest-test_stats_filter.Po \ + ucs/$(DEPDIR)/gtest-test_strided_alloc.Po \ + ucs/$(DEPDIR)/gtest-test_string.Po \ + ucs/$(DEPDIR)/gtest-test_sys.Po \ + ucs/$(DEPDIR)/gtest-test_time.Po \ + ucs/$(DEPDIR)/gtest-test_twheel.Po \ + ucs/$(DEPDIR)/gtest-test_type.Po \ + ucs/arch/$(DEPDIR)/gtest-test_x86_64.Po \ + uct/$(DEPDIR)/gtest-test_amo.Po \ + uct/$(DEPDIR)/gtest-test_amo_add_xor.Po \ + uct/$(DEPDIR)/gtest-test_amo_and_or.Po \ + uct/$(DEPDIR)/gtest-test_amo_cswap.Po \ + uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Po \ + uct/$(DEPDIR)/gtest-test_amo_fand_for.Po \ + uct/$(DEPDIR)/gtest-test_amo_swap.Po \ + uct/$(DEPDIR)/gtest-test_event.Po \ + uct/$(DEPDIR)/gtest-test_fence.Po \ + uct/$(DEPDIR)/gtest-test_flush.Po \ + uct/$(DEPDIR)/gtest-test_many2one_am.Po \ + uct/$(DEPDIR)/gtest-test_md.Po uct/$(DEPDIR)/gtest-test_mem.Po \ + uct/$(DEPDIR)/gtest-test_mm.Po \ + uct/$(DEPDIR)/gtest-test_p2p_am.Po \ + uct/$(DEPDIR)/gtest-test_p2p_err.Po \ + uct/$(DEPDIR)/gtest-test_p2p_mix.Po \ + uct/$(DEPDIR)/gtest-test_p2p_rma.Po \ + uct/$(DEPDIR)/gtest-test_peer_failure.Po \ + uct/$(DEPDIR)/gtest-test_pending.Po \ + uct/$(DEPDIR)/gtest-test_progress.Po \ + uct/$(DEPDIR)/gtest-test_stats.Po \ + uct/$(DEPDIR)/gtest-test_tag.Po \ + uct/$(DEPDIR)/gtest-test_uct_ep.Po \ + uct/$(DEPDIR)/gtest-test_uct_perf.Po \ + uct/$(DEPDIR)/gtest-test_zcopy_comp.Po \ + uct/$(DEPDIR)/gtest-uct_p2p_test.Po \ + uct/$(DEPDIR)/gtest-uct_test.Po \ + uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Po \ + uct/ib/$(DEPDIR)/gtest-test_dc.Po \ + uct/ib/$(DEPDIR)/gtest-test_devx.Po \ + uct/ib/$(DEPDIR)/gtest-test_ib.Po \ + uct/ib/$(DEPDIR)/gtest-test_ib_md.Po \ + uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Po \ + uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Po \ + uct/ib/$(DEPDIR)/gtest-test_rc.Po \ + uct/ib/$(DEPDIR)/gtest-test_sockaddr.Po \ + uct/ib/$(DEPDIR)/gtest-test_ud.Po \ + uct/ib/$(DEPDIR)/gtest-test_ud_ds.Po \ + uct/ib/$(DEPDIR)/gtest-test_ud_pending.Po \ + uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Po \ + uct/ib/$(DEPDIR)/gtest-ud_base.Po \ + uct/tcp/$(DEPDIR)/gtest-test_tcp.Po +am__mv = mv -f +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(gtest_SOURCES) +DIST_SOURCES = $(am__gtest_SOURCES_DIST) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__noinst_HEADERS_DIST = common/gtest.h common/mem_buffer.h \ + common/test.h common/test_helpers.h common/test_perf.h \ + common/tap.h uct/ib/test_rc.h uct/ib/ud_base.h \ + uct/ib/test_ib.h uct/test_amo.h uct/test_p2p_mix.h \ + uct/test_p2p_rma.h uct/uct_p2p_test.h uct/uct_test.h \ + uct/test_md.h ucp/test_ucp_atomic.h ucp/test_ucp_memheap.h \ + ucp/test_ucp_tag.h ucp/ucp_test.h ucp/ucp_datatype.h +HEADERS = $(noinst_HEADERS) +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = ucs/test_module ucm/test_dlopen +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_GTEST_TRUE@SUBDIRS = ucs/test_module ucm/test_dlopen +@HAVE_GTEST_TRUE@GTEST_ARGS = \ +@HAVE_GTEST_TRUE@ --gtest_filter=$(GTEST_FILTER) \ +@HAVE_GTEST_TRUE@ $(GTEST_EXTRA_ARGS) + +@HAVE_GTEST_TRUE@VALGRIND_ARGS = \ +@HAVE_GTEST_TRUE@ --tool=memcheck \ +@HAVE_GTEST_TRUE@ --leak-check=full \ +@HAVE_GTEST_TRUE@ --track-origins=yes \ +@HAVE_GTEST_TRUE@ --fair-sched=try \ +@HAVE_GTEST_TRUE@ --num-callers=25 \ +@HAVE_GTEST_TRUE@ --error-exitcode=1 \ +@HAVE_GTEST_TRUE@ --child-silent-after-fork=yes \ +@HAVE_GTEST_TRUE@ --suppressions=$(top_srcdir)/contrib/valgrind.supp \ +@HAVE_GTEST_TRUE@ $(VALGRIND_EXTRA_ARGS) + +@HAVE_GTEST_TRUE@gtestdir = $(includedir) +@HAVE_GTEST_TRUE@gtest_LDADD = $(top_builddir)/src/ucs/libucs.la \ +@HAVE_GTEST_TRUE@ $(top_builddir)/src/uct/libuct.la \ +@HAVE_GTEST_TRUE@ $(top_builddir)/src/ucm/libucm.la \ +@HAVE_GTEST_TRUE@ $(top_builddir)/src/ucp/libucp.la \ +@HAVE_GTEST_TRUE@ $(top_builddir)/src/tools/perf/lib/libucxperf.la \ +@HAVE_GTEST_TRUE@ $(OPENMP_CFLAGS) $(GTEST_LIBS) \ +@HAVE_GTEST_TRUE@ $(am__append_3) $(am__append_11) \ +@HAVE_GTEST_TRUE@ $(am__append_15) +@HAVE_GTEST_TRUE@gtest_CPPFLAGS = $(BASE_CPPFLAGS) -I$(top_srcdir)/src \ +@HAVE_GTEST_TRUE@ -I$(top_srcdir)/test -I$(top_builddir)/src \ +@HAVE_GTEST_TRUE@ -I$(top_srcdir)/test/gtest $(GTEST_CPPFLAGS) \ +@HAVE_GTEST_TRUE@ $(OPENMP_CFLAGS) $(am__append_2) \ +@HAVE_GTEST_TRUE@ $(am__append_10) $(am__append_13) +@HAVE_GTEST_TRUE@gtest_LDFLAGS = $(GTEST_LDFLAGS) -no-install -Wl,-dynamic-list-data +@HAVE_GTEST_TRUE@gtest_CFLAGS = $(BASE_CFLAGS) +@HAVE_GTEST_TRUE@gtest_CXXFLAGS = $(BASE_CXXFLAGS) $(GTEST_CXXFLAGS) \ +@HAVE_GTEST_TRUE@ -DGTEST_UCM_HOOK_LIB_DIR="\"${abs_builddir}/ucm/test_dlopen/.libs\"" \ +@HAVE_GTEST_TRUE@ $(am__append_14) +@HAVE_GTEST_TRUE@gtest_SOURCES = common/gtest-all.cc common/main.cc \ +@HAVE_GTEST_TRUE@ common/test_gtest_cmn.cc common/mem_buffer.cc \ +@HAVE_GTEST_TRUE@ common/test_helpers.cc \ +@HAVE_GTEST_TRUE@ common/test_obj_size.cc \ +@HAVE_GTEST_TRUE@ common/test_watchdog.cc common/test_perf.cc \ +@HAVE_GTEST_TRUE@ common/test.cc ucm/malloc_hook.cc \ +@HAVE_GTEST_TRUE@ uct/test_amo.cc uct/test_amo_add_xor.cc \ +@HAVE_GTEST_TRUE@ uct/test_amo_and_or.cc uct/test_amo_cswap.cc \ +@HAVE_GTEST_TRUE@ uct/test_amo_fadd_fxor.cc \ +@HAVE_GTEST_TRUE@ uct/test_amo_fand_for.cc uct/test_amo_swap.cc \ +@HAVE_GTEST_TRUE@ uct/test_event.cc uct/test_fence.cc \ +@HAVE_GTEST_TRUE@ uct/test_flush.cc uct/test_many2one_am.cc \ +@HAVE_GTEST_TRUE@ uct/test_md.cc uct/test_mm.cc uct/test_mem.cc \ +@HAVE_GTEST_TRUE@ uct/test_p2p_am.cc uct/test_p2p_err.cc \ +@HAVE_GTEST_TRUE@ uct/test_p2p_mix.cc uct/test_p2p_rma.cc \ +@HAVE_GTEST_TRUE@ uct/test_pending.cc uct/test_progress.cc \ +@HAVE_GTEST_TRUE@ uct/test_uct_ep.cc uct/test_uct_perf.cc \ +@HAVE_GTEST_TRUE@ uct/test_zcopy_comp.cc uct/uct_p2p_test.cc \ +@HAVE_GTEST_TRUE@ uct/uct_test.cc uct/test_stats.cc \ +@HAVE_GTEST_TRUE@ ucs/test_event_set.cc \ +@HAVE_GTEST_TRUE@ ucs/test_stats_filter.cc \ +@HAVE_GTEST_TRUE@ uct/test_peer_failure.cc uct/test_tag.cc \ +@HAVE_GTEST_TRUE@ uct/tcp/test_tcp.cc ucp/test_ucp_am.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_stream.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_peer_failure.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_atomic.cc ucp/test_ucp_dt.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_memheap.cc ucp/test_ucp_mmap.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_mem_type.cc ucp/test_ucp_perf.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_rma.cc ucp/test_ucp_rma_mt.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag_cancel.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag_match.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag_offload.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag_mt.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag_perf.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag_probe.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag_xfer.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag_mem_type.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag.cc ucp/test_ucp_context.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_wireup.cc ucp/test_ucp_wakeup.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_fence.cc \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_sockaddr.cc ucp/ucp_test.cc \ +@HAVE_GTEST_TRUE@ ucp/ucp_datatype.cc ucs/test_algorithm.cc \ +@HAVE_GTEST_TRUE@ ucs/test_arbiter.cc ucs/test_async.cc \ +@HAVE_GTEST_TRUE@ ucs/test_callbackq.cc ucs/test_class.cc \ +@HAVE_GTEST_TRUE@ ucs/test_config.cc ucs/test_datatype.cc \ +@HAVE_GTEST_TRUE@ ucs/test_debug.cc ucs/test_memtrack.cc \ +@HAVE_GTEST_TRUE@ ucs/test_math.cc ucs/test_mpmc.cc \ +@HAVE_GTEST_TRUE@ ucs/test_mpool.cc ucs/test_pgtable.cc \ +@HAVE_GTEST_TRUE@ ucs/test_profile.cc ucs/test_rcache.cc \ +@HAVE_GTEST_TRUE@ ucs/test_memtype_cache.cc ucs/test_stats.cc \ +@HAVE_GTEST_TRUE@ ucs/test_strided_alloc.cc ucs/test_string.cc \ +@HAVE_GTEST_TRUE@ ucs/test_sys.cc ucs/test_sock.cc \ +@HAVE_GTEST_TRUE@ ucs/test_time.cc ucs/test_twheel.cc \ +@HAVE_GTEST_TRUE@ ucs/test_frag_list.cc ucs/test_type.cc \ +@HAVE_GTEST_TRUE@ ucs/test_log.cc ucs/arch/test_x86_64.cc \ +@HAVE_GTEST_TRUE@ $(am__append_1) $(am__append_4) \ +@HAVE_GTEST_TRUE@ $(am__append_5) $(am__append_6) \ +@HAVE_GTEST_TRUE@ $(am__append_7) $(am__append_8) \ +@HAVE_GTEST_TRUE@ $(am__append_9) $(am__append_12) +@HAVE_GTEST_TRUE@noinst_HEADERS = \ +@HAVE_GTEST_TRUE@ common/gtest.h \ +@HAVE_GTEST_TRUE@ common/mem_buffer.h \ +@HAVE_GTEST_TRUE@ common/test.h \ +@HAVE_GTEST_TRUE@ common/test_helpers.h \ +@HAVE_GTEST_TRUE@ common/test_perf.h \ +@HAVE_GTEST_TRUE@ common/tap.h \ +@HAVE_GTEST_TRUE@ \ +@HAVE_GTEST_TRUE@ uct/ib/test_rc.h \ +@HAVE_GTEST_TRUE@ uct/ib/ud_base.h \ +@HAVE_GTEST_TRUE@ uct/ib/test_ib.h \ +@HAVE_GTEST_TRUE@ uct/test_amo.h \ +@HAVE_GTEST_TRUE@ uct/test_p2p_mix.h \ +@HAVE_GTEST_TRUE@ uct/test_p2p_rma.h \ +@HAVE_GTEST_TRUE@ uct/uct_p2p_test.h \ +@HAVE_GTEST_TRUE@ uct/uct_test.h \ +@HAVE_GTEST_TRUE@ uct/test_md.h \ +@HAVE_GTEST_TRUE@ \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_atomic.h \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_memheap.h \ +@HAVE_GTEST_TRUE@ ucp/test_ucp_tag.h \ +@HAVE_GTEST_TRUE@ ucp/ucp_test.h \ +@HAVE_GTEST_TRUE@ ucp/ucp_datatype.h + +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .cc .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/gtest/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/gtest/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +common/$(am__dirstamp): + @$(MKDIR_P) common + @: > common/$(am__dirstamp) +common/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) common/$(DEPDIR) + @: > common/$(DEPDIR)/$(am__dirstamp) +common/gtest-gtest-all.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/gtest-main.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/gtest-test_gtest_cmn.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/gtest-mem_buffer.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/gtest-test_helpers.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/gtest-test_obj_size.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/gtest-test_watchdog.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/gtest-test_perf.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +common/gtest-test.$(OBJEXT): common/$(am__dirstamp) \ + common/$(DEPDIR)/$(am__dirstamp) +ucm/$(am__dirstamp): + @$(MKDIR_P) ucm + @: > ucm/$(am__dirstamp) +ucm/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ucm/$(DEPDIR) + @: > ucm/$(DEPDIR)/$(am__dirstamp) +ucm/gtest-malloc_hook.$(OBJEXT): ucm/$(am__dirstamp) \ + ucm/$(DEPDIR)/$(am__dirstamp) +uct/$(am__dirstamp): + @$(MKDIR_P) uct + @: > uct/$(am__dirstamp) +uct/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) uct/$(DEPDIR) + @: > uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_amo.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_amo_add_xor.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_amo_and_or.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_amo_cswap.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_amo_fadd_fxor.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_amo_fand_for.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_amo_swap.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_event.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_fence.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_flush.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_many2one_am.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_md.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_mm.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_mem.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_p2p_am.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_p2p_err.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_p2p_mix.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_p2p_rma.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_pending.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_progress.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_uct_ep.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_uct_perf.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_zcopy_comp.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-uct_p2p_test.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-uct_test.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_stats.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +ucs/$(am__dirstamp): + @$(MKDIR_P) ucs + @: > ucs/$(am__dirstamp) +ucs/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ucs/$(DEPDIR) + @: > ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_event_set.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_stats_filter.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_peer_failure.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/gtest-test_tag.$(OBJEXT): uct/$(am__dirstamp) \ + uct/$(DEPDIR)/$(am__dirstamp) +uct/tcp/$(am__dirstamp): + @$(MKDIR_P) uct/tcp + @: > uct/tcp/$(am__dirstamp) +uct/tcp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) uct/tcp/$(DEPDIR) + @: > uct/tcp/$(DEPDIR)/$(am__dirstamp) +uct/tcp/gtest-test_tcp.$(OBJEXT): uct/tcp/$(am__dirstamp) \ + uct/tcp/$(DEPDIR)/$(am__dirstamp) +ucp/$(am__dirstamp): + @$(MKDIR_P) ucp + @: > ucp/$(am__dirstamp) +ucp/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ucp/$(DEPDIR) + @: > ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_am.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_stream.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_peer_failure.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_atomic.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_dt.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_memheap.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_mmap.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_mem_type.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_perf.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_rma.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_rma_mt.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_tag_cancel.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_tag_match.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_tag_offload.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_tag_mt.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_tag_perf.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_tag_probe.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_tag_xfer.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_tag_mem_type.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_tag.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_context.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_wireup.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_wakeup.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_fence.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-test_ucp_sockaddr.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-ucp_test.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucp/gtest-ucp_datatype.$(OBJEXT): ucp/$(am__dirstamp) \ + ucp/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_algorithm.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_arbiter.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_async.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_callbackq.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_class.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_config.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_datatype.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_debug.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_memtrack.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_math.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_mpmc.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_mpool.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_pgtable.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_profile.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_rcache.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_memtype_cache.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_stats.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_strided_alloc.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_string.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_sys.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_sock.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_time.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_twheel.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_frag_list.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_type.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/gtest-test_log.$(OBJEXT): ucs/$(am__dirstamp) \ + ucs/$(DEPDIR)/$(am__dirstamp) +ucs/arch/$(am__dirstamp): + @$(MKDIR_P) ucs/arch + @: > ucs/arch/$(am__dirstamp) +ucs/arch/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) ucs/arch/$(DEPDIR) + @: > ucs/arch/$(DEPDIR)/$(am__dirstamp) +ucs/arch/gtest-test_x86_64.$(OBJEXT): ucs/arch/$(am__dirstamp) \ + ucs/arch/$(DEPDIR)/$(am__dirstamp) +uct/ib/$(am__dirstamp): + @$(MKDIR_P) uct/ib + @: > uct/ib/$(am__dirstamp) +uct/ib/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) uct/ib/$(DEPDIR) + @: > uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_ib.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_ib_md.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_cq_moderation.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_ib_xfer.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_ib_pkey.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_devx.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-ud_base.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_ud.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_ud_slow_timer.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_ud_pending.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_ud_ds.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_rc.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_dc.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +uct/ib/gtest-test_sockaddr.$(OBJEXT): uct/ib/$(am__dirstamp) \ + uct/ib/$(DEPDIR)/$(am__dirstamp) +ucm/gtest-cuda_hooks.$(OBJEXT): ucm/$(am__dirstamp) \ + ucm/$(DEPDIR)/$(am__dirstamp) +ucm/gtest-rocm_hooks.$(OBJEXT): ucm/$(am__dirstamp) \ + ucm/$(DEPDIR)/$(am__dirstamp) + +gtest$(EXEEXT): $(gtest_OBJECTS) $(gtest_DEPENDENCIES) $(EXTRA_gtest_DEPENDENCIES) + @rm -f gtest$(EXEEXT) + $(AM_V_CXXLD)$(gtest_LINK) $(gtest_OBJECTS) $(gtest_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f common/*.$(OBJEXT) + -rm -f ucm/*.$(OBJEXT) + -rm -f ucp/*.$(OBJEXT) + -rm -f ucs/*.$(OBJEXT) + -rm -f ucs/arch/*.$(OBJEXT) + -rm -f uct/*.$(OBJEXT) + -rm -f uct/ib/*.$(OBJEXT) + -rm -f uct/tcp/*.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/gtest-gtest-all.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/gtest-main.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/gtest-mem_buffer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/gtest-test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/gtest-test_gtest_cmn.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/gtest-test_helpers.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/gtest-test_obj_size.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/gtest-test_perf.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@common/$(DEPDIR)/gtest-test_watchdog.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucm/$(DEPDIR)/gtest-cuda_hooks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucm/$(DEPDIR)/gtest-malloc_hook.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucm/$(DEPDIR)/gtest-rocm_hooks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_am.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_atomic.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_context.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_dt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_fence.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_memheap.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_mmap.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_perf.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_rma.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_stream.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-test_ucp_wireup.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-ucp_datatype.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucp/$(DEPDIR)/gtest-ucp_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_algorithm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_arbiter.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_async.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_callbackq.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_class.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_config.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_datatype.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_debug.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_event_set.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_frag_list.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_log.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_math.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_memtrack.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_memtype_cache.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_mpmc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_mpool.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_pgtable.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_profile.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_rcache.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_sock.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_stats.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_stats_filter.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_strided_alloc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_string.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_sys.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_time.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_twheel.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/$(DEPDIR)/gtest-test_type.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@ucs/arch/$(DEPDIR)/gtest-test_x86_64.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_amo.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_amo_add_xor.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_amo_and_or.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_amo_cswap.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_amo_fand_for.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_amo_swap.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_event.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_fence.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_flush.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_many2one_am.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_md.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_mem.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_mm.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_p2p_am.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_p2p_err.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_p2p_mix.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_p2p_rma.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_peer_failure.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_pending.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_progress.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_stats.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_tag.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_uct_ep.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_uct_perf.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-test_zcopy_comp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-uct_p2p_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/$(DEPDIR)/gtest-uct_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_dc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_devx.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_ib.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_ib_md.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_rc.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_sockaddr.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_ud.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_ud_ds.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_ud_pending.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/ib/$(DEPDIR)/gtest-ud_base.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@uct/tcp/$(DEPDIR)/gtest-test_tcp.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.cc.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +common/gtest-gtest-all.o: common/gtest-all.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-gtest-all.o -MD -MP -MF common/$(DEPDIR)/gtest-gtest-all.Tpo -c -o common/gtest-gtest-all.o `test -f 'common/gtest-all.cc' || echo '$(srcdir)/'`common/gtest-all.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-gtest-all.Tpo common/$(DEPDIR)/gtest-gtest-all.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/gtest-all.cc' object='common/gtest-gtest-all.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-gtest-all.o `test -f 'common/gtest-all.cc' || echo '$(srcdir)/'`common/gtest-all.cc + +common/gtest-gtest-all.obj: common/gtest-all.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-gtest-all.obj -MD -MP -MF common/$(DEPDIR)/gtest-gtest-all.Tpo -c -o common/gtest-gtest-all.obj `if test -f 'common/gtest-all.cc'; then $(CYGPATH_W) 'common/gtest-all.cc'; else $(CYGPATH_W) '$(srcdir)/common/gtest-all.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-gtest-all.Tpo common/$(DEPDIR)/gtest-gtest-all.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/gtest-all.cc' object='common/gtest-gtest-all.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-gtest-all.obj `if test -f 'common/gtest-all.cc'; then $(CYGPATH_W) 'common/gtest-all.cc'; else $(CYGPATH_W) '$(srcdir)/common/gtest-all.cc'; fi` + +common/gtest-main.o: common/main.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-main.o -MD -MP -MF common/$(DEPDIR)/gtest-main.Tpo -c -o common/gtest-main.o `test -f 'common/main.cc' || echo '$(srcdir)/'`common/main.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-main.Tpo common/$(DEPDIR)/gtest-main.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/main.cc' object='common/gtest-main.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-main.o `test -f 'common/main.cc' || echo '$(srcdir)/'`common/main.cc + +common/gtest-main.obj: common/main.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-main.obj -MD -MP -MF common/$(DEPDIR)/gtest-main.Tpo -c -o common/gtest-main.obj `if test -f 'common/main.cc'; then $(CYGPATH_W) 'common/main.cc'; else $(CYGPATH_W) '$(srcdir)/common/main.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-main.Tpo common/$(DEPDIR)/gtest-main.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/main.cc' object='common/gtest-main.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-main.obj `if test -f 'common/main.cc'; then $(CYGPATH_W) 'common/main.cc'; else $(CYGPATH_W) '$(srcdir)/common/main.cc'; fi` + +common/gtest-test_gtest_cmn.o: common/test_gtest_cmn.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_gtest_cmn.o -MD -MP -MF common/$(DEPDIR)/gtest-test_gtest_cmn.Tpo -c -o common/gtest-test_gtest_cmn.o `test -f 'common/test_gtest_cmn.cc' || echo '$(srcdir)/'`common/test_gtest_cmn.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_gtest_cmn.Tpo common/$(DEPDIR)/gtest-test_gtest_cmn.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_gtest_cmn.cc' object='common/gtest-test_gtest_cmn.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_gtest_cmn.o `test -f 'common/test_gtest_cmn.cc' || echo '$(srcdir)/'`common/test_gtest_cmn.cc + +common/gtest-test_gtest_cmn.obj: common/test_gtest_cmn.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_gtest_cmn.obj -MD -MP -MF common/$(DEPDIR)/gtest-test_gtest_cmn.Tpo -c -o common/gtest-test_gtest_cmn.obj `if test -f 'common/test_gtest_cmn.cc'; then $(CYGPATH_W) 'common/test_gtest_cmn.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_gtest_cmn.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_gtest_cmn.Tpo common/$(DEPDIR)/gtest-test_gtest_cmn.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_gtest_cmn.cc' object='common/gtest-test_gtest_cmn.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_gtest_cmn.obj `if test -f 'common/test_gtest_cmn.cc'; then $(CYGPATH_W) 'common/test_gtest_cmn.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_gtest_cmn.cc'; fi` + +common/gtest-mem_buffer.o: common/mem_buffer.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-mem_buffer.o -MD -MP -MF common/$(DEPDIR)/gtest-mem_buffer.Tpo -c -o common/gtest-mem_buffer.o `test -f 'common/mem_buffer.cc' || echo '$(srcdir)/'`common/mem_buffer.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-mem_buffer.Tpo common/$(DEPDIR)/gtest-mem_buffer.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/mem_buffer.cc' object='common/gtest-mem_buffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-mem_buffer.o `test -f 'common/mem_buffer.cc' || echo '$(srcdir)/'`common/mem_buffer.cc + +common/gtest-mem_buffer.obj: common/mem_buffer.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-mem_buffer.obj -MD -MP -MF common/$(DEPDIR)/gtest-mem_buffer.Tpo -c -o common/gtest-mem_buffer.obj `if test -f 'common/mem_buffer.cc'; then $(CYGPATH_W) 'common/mem_buffer.cc'; else $(CYGPATH_W) '$(srcdir)/common/mem_buffer.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-mem_buffer.Tpo common/$(DEPDIR)/gtest-mem_buffer.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/mem_buffer.cc' object='common/gtest-mem_buffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-mem_buffer.obj `if test -f 'common/mem_buffer.cc'; then $(CYGPATH_W) 'common/mem_buffer.cc'; else $(CYGPATH_W) '$(srcdir)/common/mem_buffer.cc'; fi` + +common/gtest-test_helpers.o: common/test_helpers.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_helpers.o -MD -MP -MF common/$(DEPDIR)/gtest-test_helpers.Tpo -c -o common/gtest-test_helpers.o `test -f 'common/test_helpers.cc' || echo '$(srcdir)/'`common/test_helpers.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_helpers.Tpo common/$(DEPDIR)/gtest-test_helpers.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_helpers.cc' object='common/gtest-test_helpers.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_helpers.o `test -f 'common/test_helpers.cc' || echo '$(srcdir)/'`common/test_helpers.cc + +common/gtest-test_helpers.obj: common/test_helpers.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_helpers.obj -MD -MP -MF common/$(DEPDIR)/gtest-test_helpers.Tpo -c -o common/gtest-test_helpers.obj `if test -f 'common/test_helpers.cc'; then $(CYGPATH_W) 'common/test_helpers.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_helpers.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_helpers.Tpo common/$(DEPDIR)/gtest-test_helpers.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_helpers.cc' object='common/gtest-test_helpers.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_helpers.obj `if test -f 'common/test_helpers.cc'; then $(CYGPATH_W) 'common/test_helpers.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_helpers.cc'; fi` + +common/gtest-test_obj_size.o: common/test_obj_size.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_obj_size.o -MD -MP -MF common/$(DEPDIR)/gtest-test_obj_size.Tpo -c -o common/gtest-test_obj_size.o `test -f 'common/test_obj_size.cc' || echo '$(srcdir)/'`common/test_obj_size.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_obj_size.Tpo common/$(DEPDIR)/gtest-test_obj_size.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_obj_size.cc' object='common/gtest-test_obj_size.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_obj_size.o `test -f 'common/test_obj_size.cc' || echo '$(srcdir)/'`common/test_obj_size.cc + +common/gtest-test_obj_size.obj: common/test_obj_size.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_obj_size.obj -MD -MP -MF common/$(DEPDIR)/gtest-test_obj_size.Tpo -c -o common/gtest-test_obj_size.obj `if test -f 'common/test_obj_size.cc'; then $(CYGPATH_W) 'common/test_obj_size.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_obj_size.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_obj_size.Tpo common/$(DEPDIR)/gtest-test_obj_size.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_obj_size.cc' object='common/gtest-test_obj_size.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_obj_size.obj `if test -f 'common/test_obj_size.cc'; then $(CYGPATH_W) 'common/test_obj_size.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_obj_size.cc'; fi` + +common/gtest-test_watchdog.o: common/test_watchdog.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_watchdog.o -MD -MP -MF common/$(DEPDIR)/gtest-test_watchdog.Tpo -c -o common/gtest-test_watchdog.o `test -f 'common/test_watchdog.cc' || echo '$(srcdir)/'`common/test_watchdog.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_watchdog.Tpo common/$(DEPDIR)/gtest-test_watchdog.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_watchdog.cc' object='common/gtest-test_watchdog.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_watchdog.o `test -f 'common/test_watchdog.cc' || echo '$(srcdir)/'`common/test_watchdog.cc + +common/gtest-test_watchdog.obj: common/test_watchdog.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_watchdog.obj -MD -MP -MF common/$(DEPDIR)/gtest-test_watchdog.Tpo -c -o common/gtest-test_watchdog.obj `if test -f 'common/test_watchdog.cc'; then $(CYGPATH_W) 'common/test_watchdog.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_watchdog.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_watchdog.Tpo common/$(DEPDIR)/gtest-test_watchdog.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_watchdog.cc' object='common/gtest-test_watchdog.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_watchdog.obj `if test -f 'common/test_watchdog.cc'; then $(CYGPATH_W) 'common/test_watchdog.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_watchdog.cc'; fi` + +common/gtest-test_perf.o: common/test_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_perf.o -MD -MP -MF common/$(DEPDIR)/gtest-test_perf.Tpo -c -o common/gtest-test_perf.o `test -f 'common/test_perf.cc' || echo '$(srcdir)/'`common/test_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_perf.Tpo common/$(DEPDIR)/gtest-test_perf.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_perf.cc' object='common/gtest-test_perf.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_perf.o `test -f 'common/test_perf.cc' || echo '$(srcdir)/'`common/test_perf.cc + +common/gtest-test_perf.obj: common/test_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test_perf.obj -MD -MP -MF common/$(DEPDIR)/gtest-test_perf.Tpo -c -o common/gtest-test_perf.obj `if test -f 'common/test_perf.cc'; then $(CYGPATH_W) 'common/test_perf.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_perf.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test_perf.Tpo common/$(DEPDIR)/gtest-test_perf.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test_perf.cc' object='common/gtest-test_perf.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test_perf.obj `if test -f 'common/test_perf.cc'; then $(CYGPATH_W) 'common/test_perf.cc'; else $(CYGPATH_W) '$(srcdir)/common/test_perf.cc'; fi` + +common/gtest-test.o: common/test.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test.o -MD -MP -MF common/$(DEPDIR)/gtest-test.Tpo -c -o common/gtest-test.o `test -f 'common/test.cc' || echo '$(srcdir)/'`common/test.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test.Tpo common/$(DEPDIR)/gtest-test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test.cc' object='common/gtest-test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test.o `test -f 'common/test.cc' || echo '$(srcdir)/'`common/test.cc + +common/gtest-test.obj: common/test.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT common/gtest-test.obj -MD -MP -MF common/$(DEPDIR)/gtest-test.Tpo -c -o common/gtest-test.obj `if test -f 'common/test.cc'; then $(CYGPATH_W) 'common/test.cc'; else $(CYGPATH_W) '$(srcdir)/common/test.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/gtest-test.Tpo common/$(DEPDIR)/gtest-test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='common/test.cc' object='common/gtest-test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o common/gtest-test.obj `if test -f 'common/test.cc'; then $(CYGPATH_W) 'common/test.cc'; else $(CYGPATH_W) '$(srcdir)/common/test.cc'; fi` + +ucm/gtest-malloc_hook.o: ucm/malloc_hook.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucm/gtest-malloc_hook.o -MD -MP -MF ucm/$(DEPDIR)/gtest-malloc_hook.Tpo -c -o ucm/gtest-malloc_hook.o `test -f 'ucm/malloc_hook.cc' || echo '$(srcdir)/'`ucm/malloc_hook.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucm/$(DEPDIR)/gtest-malloc_hook.Tpo ucm/$(DEPDIR)/gtest-malloc_hook.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucm/malloc_hook.cc' object='ucm/gtest-malloc_hook.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucm/gtest-malloc_hook.o `test -f 'ucm/malloc_hook.cc' || echo '$(srcdir)/'`ucm/malloc_hook.cc + +ucm/gtest-malloc_hook.obj: ucm/malloc_hook.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucm/gtest-malloc_hook.obj -MD -MP -MF ucm/$(DEPDIR)/gtest-malloc_hook.Tpo -c -o ucm/gtest-malloc_hook.obj `if test -f 'ucm/malloc_hook.cc'; then $(CYGPATH_W) 'ucm/malloc_hook.cc'; else $(CYGPATH_W) '$(srcdir)/ucm/malloc_hook.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucm/$(DEPDIR)/gtest-malloc_hook.Tpo ucm/$(DEPDIR)/gtest-malloc_hook.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucm/malloc_hook.cc' object='ucm/gtest-malloc_hook.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucm/gtest-malloc_hook.obj `if test -f 'ucm/malloc_hook.cc'; then $(CYGPATH_W) 'ucm/malloc_hook.cc'; else $(CYGPATH_W) '$(srcdir)/ucm/malloc_hook.cc'; fi` + +uct/gtest-test_amo.o: uct/test_amo.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo.Tpo -c -o uct/gtest-test_amo.o `test -f 'uct/test_amo.cc' || echo '$(srcdir)/'`uct/test_amo.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo.Tpo uct/$(DEPDIR)/gtest-test_amo.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo.cc' object='uct/gtest-test_amo.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo.o `test -f 'uct/test_amo.cc' || echo '$(srcdir)/'`uct/test_amo.cc + +uct/gtest-test_amo.obj: uct/test_amo.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo.Tpo -c -o uct/gtest-test_amo.obj `if test -f 'uct/test_amo.cc'; then $(CYGPATH_W) 'uct/test_amo.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo.Tpo uct/$(DEPDIR)/gtest-test_amo.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo.cc' object='uct/gtest-test_amo.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo.obj `if test -f 'uct/test_amo.cc'; then $(CYGPATH_W) 'uct/test_amo.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo.cc'; fi` + +uct/gtest-test_amo_add_xor.o: uct/test_amo_add_xor.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_add_xor.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_add_xor.Tpo -c -o uct/gtest-test_amo_add_xor.o `test -f 'uct/test_amo_add_xor.cc' || echo '$(srcdir)/'`uct/test_amo_add_xor.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_add_xor.Tpo uct/$(DEPDIR)/gtest-test_amo_add_xor.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_add_xor.cc' object='uct/gtest-test_amo_add_xor.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_add_xor.o `test -f 'uct/test_amo_add_xor.cc' || echo '$(srcdir)/'`uct/test_amo_add_xor.cc + +uct/gtest-test_amo_add_xor.obj: uct/test_amo_add_xor.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_add_xor.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_add_xor.Tpo -c -o uct/gtest-test_amo_add_xor.obj `if test -f 'uct/test_amo_add_xor.cc'; then $(CYGPATH_W) 'uct/test_amo_add_xor.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_add_xor.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_add_xor.Tpo uct/$(DEPDIR)/gtest-test_amo_add_xor.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_add_xor.cc' object='uct/gtest-test_amo_add_xor.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_add_xor.obj `if test -f 'uct/test_amo_add_xor.cc'; then $(CYGPATH_W) 'uct/test_amo_add_xor.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_add_xor.cc'; fi` + +uct/gtest-test_amo_and_or.o: uct/test_amo_and_or.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_and_or.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_and_or.Tpo -c -o uct/gtest-test_amo_and_or.o `test -f 'uct/test_amo_and_or.cc' || echo '$(srcdir)/'`uct/test_amo_and_or.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_and_or.Tpo uct/$(DEPDIR)/gtest-test_amo_and_or.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_and_or.cc' object='uct/gtest-test_amo_and_or.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_and_or.o `test -f 'uct/test_amo_and_or.cc' || echo '$(srcdir)/'`uct/test_amo_and_or.cc + +uct/gtest-test_amo_and_or.obj: uct/test_amo_and_or.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_and_or.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_and_or.Tpo -c -o uct/gtest-test_amo_and_or.obj `if test -f 'uct/test_amo_and_or.cc'; then $(CYGPATH_W) 'uct/test_amo_and_or.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_and_or.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_and_or.Tpo uct/$(DEPDIR)/gtest-test_amo_and_or.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_and_or.cc' object='uct/gtest-test_amo_and_or.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_and_or.obj `if test -f 'uct/test_amo_and_or.cc'; then $(CYGPATH_W) 'uct/test_amo_and_or.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_and_or.cc'; fi` + +uct/gtest-test_amo_cswap.o: uct/test_amo_cswap.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_cswap.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_cswap.Tpo -c -o uct/gtest-test_amo_cswap.o `test -f 'uct/test_amo_cswap.cc' || echo '$(srcdir)/'`uct/test_amo_cswap.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_cswap.Tpo uct/$(DEPDIR)/gtest-test_amo_cswap.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_cswap.cc' object='uct/gtest-test_amo_cswap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_cswap.o `test -f 'uct/test_amo_cswap.cc' || echo '$(srcdir)/'`uct/test_amo_cswap.cc + +uct/gtest-test_amo_cswap.obj: uct/test_amo_cswap.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_cswap.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_cswap.Tpo -c -o uct/gtest-test_amo_cswap.obj `if test -f 'uct/test_amo_cswap.cc'; then $(CYGPATH_W) 'uct/test_amo_cswap.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_cswap.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_cswap.Tpo uct/$(DEPDIR)/gtest-test_amo_cswap.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_cswap.cc' object='uct/gtest-test_amo_cswap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_cswap.obj `if test -f 'uct/test_amo_cswap.cc'; then $(CYGPATH_W) 'uct/test_amo_cswap.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_cswap.cc'; fi` + +uct/gtest-test_amo_fadd_fxor.o: uct/test_amo_fadd_fxor.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_fadd_fxor.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Tpo -c -o uct/gtest-test_amo_fadd_fxor.o `test -f 'uct/test_amo_fadd_fxor.cc' || echo '$(srcdir)/'`uct/test_amo_fadd_fxor.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Tpo uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_fadd_fxor.cc' object='uct/gtest-test_amo_fadd_fxor.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_fadd_fxor.o `test -f 'uct/test_amo_fadd_fxor.cc' || echo '$(srcdir)/'`uct/test_amo_fadd_fxor.cc + +uct/gtest-test_amo_fadd_fxor.obj: uct/test_amo_fadd_fxor.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_fadd_fxor.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Tpo -c -o uct/gtest-test_amo_fadd_fxor.obj `if test -f 'uct/test_amo_fadd_fxor.cc'; then $(CYGPATH_W) 'uct/test_amo_fadd_fxor.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_fadd_fxor.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Tpo uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_fadd_fxor.cc' object='uct/gtest-test_amo_fadd_fxor.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_fadd_fxor.obj `if test -f 'uct/test_amo_fadd_fxor.cc'; then $(CYGPATH_W) 'uct/test_amo_fadd_fxor.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_fadd_fxor.cc'; fi` + +uct/gtest-test_amo_fand_for.o: uct/test_amo_fand_for.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_fand_for.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_fand_for.Tpo -c -o uct/gtest-test_amo_fand_for.o `test -f 'uct/test_amo_fand_for.cc' || echo '$(srcdir)/'`uct/test_amo_fand_for.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_fand_for.Tpo uct/$(DEPDIR)/gtest-test_amo_fand_for.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_fand_for.cc' object='uct/gtest-test_amo_fand_for.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_fand_for.o `test -f 'uct/test_amo_fand_for.cc' || echo '$(srcdir)/'`uct/test_amo_fand_for.cc + +uct/gtest-test_amo_fand_for.obj: uct/test_amo_fand_for.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_fand_for.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_fand_for.Tpo -c -o uct/gtest-test_amo_fand_for.obj `if test -f 'uct/test_amo_fand_for.cc'; then $(CYGPATH_W) 'uct/test_amo_fand_for.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_fand_for.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_fand_for.Tpo uct/$(DEPDIR)/gtest-test_amo_fand_for.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_fand_for.cc' object='uct/gtest-test_amo_fand_for.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_fand_for.obj `if test -f 'uct/test_amo_fand_for.cc'; then $(CYGPATH_W) 'uct/test_amo_fand_for.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_fand_for.cc'; fi` + +uct/gtest-test_amo_swap.o: uct/test_amo_swap.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_swap.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_swap.Tpo -c -o uct/gtest-test_amo_swap.o `test -f 'uct/test_amo_swap.cc' || echo '$(srcdir)/'`uct/test_amo_swap.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_swap.Tpo uct/$(DEPDIR)/gtest-test_amo_swap.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_swap.cc' object='uct/gtest-test_amo_swap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_swap.o `test -f 'uct/test_amo_swap.cc' || echo '$(srcdir)/'`uct/test_amo_swap.cc + +uct/gtest-test_amo_swap.obj: uct/test_amo_swap.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_amo_swap.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_amo_swap.Tpo -c -o uct/gtest-test_amo_swap.obj `if test -f 'uct/test_amo_swap.cc'; then $(CYGPATH_W) 'uct/test_amo_swap.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_swap.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_amo_swap.Tpo uct/$(DEPDIR)/gtest-test_amo_swap.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_amo_swap.cc' object='uct/gtest-test_amo_swap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_amo_swap.obj `if test -f 'uct/test_amo_swap.cc'; then $(CYGPATH_W) 'uct/test_amo_swap.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_amo_swap.cc'; fi` + +uct/gtest-test_event.o: uct/test_event.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_event.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_event.Tpo -c -o uct/gtest-test_event.o `test -f 'uct/test_event.cc' || echo '$(srcdir)/'`uct/test_event.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_event.Tpo uct/$(DEPDIR)/gtest-test_event.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_event.cc' object='uct/gtest-test_event.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_event.o `test -f 'uct/test_event.cc' || echo '$(srcdir)/'`uct/test_event.cc + +uct/gtest-test_event.obj: uct/test_event.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_event.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_event.Tpo -c -o uct/gtest-test_event.obj `if test -f 'uct/test_event.cc'; then $(CYGPATH_W) 'uct/test_event.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_event.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_event.Tpo uct/$(DEPDIR)/gtest-test_event.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_event.cc' object='uct/gtest-test_event.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_event.obj `if test -f 'uct/test_event.cc'; then $(CYGPATH_W) 'uct/test_event.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_event.cc'; fi` + +uct/gtest-test_fence.o: uct/test_fence.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_fence.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_fence.Tpo -c -o uct/gtest-test_fence.o `test -f 'uct/test_fence.cc' || echo '$(srcdir)/'`uct/test_fence.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_fence.Tpo uct/$(DEPDIR)/gtest-test_fence.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_fence.cc' object='uct/gtest-test_fence.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_fence.o `test -f 'uct/test_fence.cc' || echo '$(srcdir)/'`uct/test_fence.cc + +uct/gtest-test_fence.obj: uct/test_fence.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_fence.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_fence.Tpo -c -o uct/gtest-test_fence.obj `if test -f 'uct/test_fence.cc'; then $(CYGPATH_W) 'uct/test_fence.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_fence.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_fence.Tpo uct/$(DEPDIR)/gtest-test_fence.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_fence.cc' object='uct/gtest-test_fence.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_fence.obj `if test -f 'uct/test_fence.cc'; then $(CYGPATH_W) 'uct/test_fence.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_fence.cc'; fi` + +uct/gtest-test_flush.o: uct/test_flush.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_flush.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_flush.Tpo -c -o uct/gtest-test_flush.o `test -f 'uct/test_flush.cc' || echo '$(srcdir)/'`uct/test_flush.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_flush.Tpo uct/$(DEPDIR)/gtest-test_flush.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_flush.cc' object='uct/gtest-test_flush.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_flush.o `test -f 'uct/test_flush.cc' || echo '$(srcdir)/'`uct/test_flush.cc + +uct/gtest-test_flush.obj: uct/test_flush.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_flush.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_flush.Tpo -c -o uct/gtest-test_flush.obj `if test -f 'uct/test_flush.cc'; then $(CYGPATH_W) 'uct/test_flush.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_flush.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_flush.Tpo uct/$(DEPDIR)/gtest-test_flush.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_flush.cc' object='uct/gtest-test_flush.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_flush.obj `if test -f 'uct/test_flush.cc'; then $(CYGPATH_W) 'uct/test_flush.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_flush.cc'; fi` + +uct/gtest-test_many2one_am.o: uct/test_many2one_am.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_many2one_am.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_many2one_am.Tpo -c -o uct/gtest-test_many2one_am.o `test -f 'uct/test_many2one_am.cc' || echo '$(srcdir)/'`uct/test_many2one_am.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_many2one_am.Tpo uct/$(DEPDIR)/gtest-test_many2one_am.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_many2one_am.cc' object='uct/gtest-test_many2one_am.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_many2one_am.o `test -f 'uct/test_many2one_am.cc' || echo '$(srcdir)/'`uct/test_many2one_am.cc + +uct/gtest-test_many2one_am.obj: uct/test_many2one_am.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_many2one_am.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_many2one_am.Tpo -c -o uct/gtest-test_many2one_am.obj `if test -f 'uct/test_many2one_am.cc'; then $(CYGPATH_W) 'uct/test_many2one_am.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_many2one_am.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_many2one_am.Tpo uct/$(DEPDIR)/gtest-test_many2one_am.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_many2one_am.cc' object='uct/gtest-test_many2one_am.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_many2one_am.obj `if test -f 'uct/test_many2one_am.cc'; then $(CYGPATH_W) 'uct/test_many2one_am.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_many2one_am.cc'; fi` + +uct/gtest-test_md.o: uct/test_md.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_md.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_md.Tpo -c -o uct/gtest-test_md.o `test -f 'uct/test_md.cc' || echo '$(srcdir)/'`uct/test_md.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_md.Tpo uct/$(DEPDIR)/gtest-test_md.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_md.cc' object='uct/gtest-test_md.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_md.o `test -f 'uct/test_md.cc' || echo '$(srcdir)/'`uct/test_md.cc + +uct/gtest-test_md.obj: uct/test_md.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_md.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_md.Tpo -c -o uct/gtest-test_md.obj `if test -f 'uct/test_md.cc'; then $(CYGPATH_W) 'uct/test_md.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_md.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_md.Tpo uct/$(DEPDIR)/gtest-test_md.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_md.cc' object='uct/gtest-test_md.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_md.obj `if test -f 'uct/test_md.cc'; then $(CYGPATH_W) 'uct/test_md.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_md.cc'; fi` + +uct/gtest-test_mm.o: uct/test_mm.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_mm.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_mm.Tpo -c -o uct/gtest-test_mm.o `test -f 'uct/test_mm.cc' || echo '$(srcdir)/'`uct/test_mm.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_mm.Tpo uct/$(DEPDIR)/gtest-test_mm.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_mm.cc' object='uct/gtest-test_mm.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_mm.o `test -f 'uct/test_mm.cc' || echo '$(srcdir)/'`uct/test_mm.cc + +uct/gtest-test_mm.obj: uct/test_mm.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_mm.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_mm.Tpo -c -o uct/gtest-test_mm.obj `if test -f 'uct/test_mm.cc'; then $(CYGPATH_W) 'uct/test_mm.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_mm.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_mm.Tpo uct/$(DEPDIR)/gtest-test_mm.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_mm.cc' object='uct/gtest-test_mm.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_mm.obj `if test -f 'uct/test_mm.cc'; then $(CYGPATH_W) 'uct/test_mm.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_mm.cc'; fi` + +uct/gtest-test_mem.o: uct/test_mem.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_mem.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_mem.Tpo -c -o uct/gtest-test_mem.o `test -f 'uct/test_mem.cc' || echo '$(srcdir)/'`uct/test_mem.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_mem.Tpo uct/$(DEPDIR)/gtest-test_mem.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_mem.cc' object='uct/gtest-test_mem.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_mem.o `test -f 'uct/test_mem.cc' || echo '$(srcdir)/'`uct/test_mem.cc + +uct/gtest-test_mem.obj: uct/test_mem.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_mem.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_mem.Tpo -c -o uct/gtest-test_mem.obj `if test -f 'uct/test_mem.cc'; then $(CYGPATH_W) 'uct/test_mem.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_mem.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_mem.Tpo uct/$(DEPDIR)/gtest-test_mem.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_mem.cc' object='uct/gtest-test_mem.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_mem.obj `if test -f 'uct/test_mem.cc'; then $(CYGPATH_W) 'uct/test_mem.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_mem.cc'; fi` + +uct/gtest-test_p2p_am.o: uct/test_p2p_am.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_p2p_am.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_p2p_am.Tpo -c -o uct/gtest-test_p2p_am.o `test -f 'uct/test_p2p_am.cc' || echo '$(srcdir)/'`uct/test_p2p_am.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_p2p_am.Tpo uct/$(DEPDIR)/gtest-test_p2p_am.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_p2p_am.cc' object='uct/gtest-test_p2p_am.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_p2p_am.o `test -f 'uct/test_p2p_am.cc' || echo '$(srcdir)/'`uct/test_p2p_am.cc + +uct/gtest-test_p2p_am.obj: uct/test_p2p_am.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_p2p_am.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_p2p_am.Tpo -c -o uct/gtest-test_p2p_am.obj `if test -f 'uct/test_p2p_am.cc'; then $(CYGPATH_W) 'uct/test_p2p_am.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_p2p_am.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_p2p_am.Tpo uct/$(DEPDIR)/gtest-test_p2p_am.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_p2p_am.cc' object='uct/gtest-test_p2p_am.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_p2p_am.obj `if test -f 'uct/test_p2p_am.cc'; then $(CYGPATH_W) 'uct/test_p2p_am.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_p2p_am.cc'; fi` + +uct/gtest-test_p2p_err.o: uct/test_p2p_err.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_p2p_err.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_p2p_err.Tpo -c -o uct/gtest-test_p2p_err.o `test -f 'uct/test_p2p_err.cc' || echo '$(srcdir)/'`uct/test_p2p_err.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_p2p_err.Tpo uct/$(DEPDIR)/gtest-test_p2p_err.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_p2p_err.cc' object='uct/gtest-test_p2p_err.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_p2p_err.o `test -f 'uct/test_p2p_err.cc' || echo '$(srcdir)/'`uct/test_p2p_err.cc + +uct/gtest-test_p2p_err.obj: uct/test_p2p_err.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_p2p_err.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_p2p_err.Tpo -c -o uct/gtest-test_p2p_err.obj `if test -f 'uct/test_p2p_err.cc'; then $(CYGPATH_W) 'uct/test_p2p_err.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_p2p_err.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_p2p_err.Tpo uct/$(DEPDIR)/gtest-test_p2p_err.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_p2p_err.cc' object='uct/gtest-test_p2p_err.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_p2p_err.obj `if test -f 'uct/test_p2p_err.cc'; then $(CYGPATH_W) 'uct/test_p2p_err.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_p2p_err.cc'; fi` + +uct/gtest-test_p2p_mix.o: uct/test_p2p_mix.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_p2p_mix.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_p2p_mix.Tpo -c -o uct/gtest-test_p2p_mix.o `test -f 'uct/test_p2p_mix.cc' || echo '$(srcdir)/'`uct/test_p2p_mix.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_p2p_mix.Tpo uct/$(DEPDIR)/gtest-test_p2p_mix.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_p2p_mix.cc' object='uct/gtest-test_p2p_mix.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_p2p_mix.o `test -f 'uct/test_p2p_mix.cc' || echo '$(srcdir)/'`uct/test_p2p_mix.cc + +uct/gtest-test_p2p_mix.obj: uct/test_p2p_mix.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_p2p_mix.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_p2p_mix.Tpo -c -o uct/gtest-test_p2p_mix.obj `if test -f 'uct/test_p2p_mix.cc'; then $(CYGPATH_W) 'uct/test_p2p_mix.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_p2p_mix.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_p2p_mix.Tpo uct/$(DEPDIR)/gtest-test_p2p_mix.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_p2p_mix.cc' object='uct/gtest-test_p2p_mix.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_p2p_mix.obj `if test -f 'uct/test_p2p_mix.cc'; then $(CYGPATH_W) 'uct/test_p2p_mix.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_p2p_mix.cc'; fi` + +uct/gtest-test_p2p_rma.o: uct/test_p2p_rma.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_p2p_rma.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_p2p_rma.Tpo -c -o uct/gtest-test_p2p_rma.o `test -f 'uct/test_p2p_rma.cc' || echo '$(srcdir)/'`uct/test_p2p_rma.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_p2p_rma.Tpo uct/$(DEPDIR)/gtest-test_p2p_rma.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_p2p_rma.cc' object='uct/gtest-test_p2p_rma.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_p2p_rma.o `test -f 'uct/test_p2p_rma.cc' || echo '$(srcdir)/'`uct/test_p2p_rma.cc + +uct/gtest-test_p2p_rma.obj: uct/test_p2p_rma.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_p2p_rma.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_p2p_rma.Tpo -c -o uct/gtest-test_p2p_rma.obj `if test -f 'uct/test_p2p_rma.cc'; then $(CYGPATH_W) 'uct/test_p2p_rma.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_p2p_rma.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_p2p_rma.Tpo uct/$(DEPDIR)/gtest-test_p2p_rma.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_p2p_rma.cc' object='uct/gtest-test_p2p_rma.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_p2p_rma.obj `if test -f 'uct/test_p2p_rma.cc'; then $(CYGPATH_W) 'uct/test_p2p_rma.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_p2p_rma.cc'; fi` + +uct/gtest-test_pending.o: uct/test_pending.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_pending.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_pending.Tpo -c -o uct/gtest-test_pending.o `test -f 'uct/test_pending.cc' || echo '$(srcdir)/'`uct/test_pending.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_pending.Tpo uct/$(DEPDIR)/gtest-test_pending.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_pending.cc' object='uct/gtest-test_pending.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_pending.o `test -f 'uct/test_pending.cc' || echo '$(srcdir)/'`uct/test_pending.cc + +uct/gtest-test_pending.obj: uct/test_pending.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_pending.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_pending.Tpo -c -o uct/gtest-test_pending.obj `if test -f 'uct/test_pending.cc'; then $(CYGPATH_W) 'uct/test_pending.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_pending.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_pending.Tpo uct/$(DEPDIR)/gtest-test_pending.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_pending.cc' object='uct/gtest-test_pending.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_pending.obj `if test -f 'uct/test_pending.cc'; then $(CYGPATH_W) 'uct/test_pending.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_pending.cc'; fi` + +uct/gtest-test_progress.o: uct/test_progress.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_progress.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_progress.Tpo -c -o uct/gtest-test_progress.o `test -f 'uct/test_progress.cc' || echo '$(srcdir)/'`uct/test_progress.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_progress.Tpo uct/$(DEPDIR)/gtest-test_progress.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_progress.cc' object='uct/gtest-test_progress.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_progress.o `test -f 'uct/test_progress.cc' || echo '$(srcdir)/'`uct/test_progress.cc + +uct/gtest-test_progress.obj: uct/test_progress.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_progress.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_progress.Tpo -c -o uct/gtest-test_progress.obj `if test -f 'uct/test_progress.cc'; then $(CYGPATH_W) 'uct/test_progress.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_progress.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_progress.Tpo uct/$(DEPDIR)/gtest-test_progress.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_progress.cc' object='uct/gtest-test_progress.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_progress.obj `if test -f 'uct/test_progress.cc'; then $(CYGPATH_W) 'uct/test_progress.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_progress.cc'; fi` + +uct/gtest-test_uct_ep.o: uct/test_uct_ep.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_uct_ep.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_uct_ep.Tpo -c -o uct/gtest-test_uct_ep.o `test -f 'uct/test_uct_ep.cc' || echo '$(srcdir)/'`uct/test_uct_ep.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_uct_ep.Tpo uct/$(DEPDIR)/gtest-test_uct_ep.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_uct_ep.cc' object='uct/gtest-test_uct_ep.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_uct_ep.o `test -f 'uct/test_uct_ep.cc' || echo '$(srcdir)/'`uct/test_uct_ep.cc + +uct/gtest-test_uct_ep.obj: uct/test_uct_ep.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_uct_ep.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_uct_ep.Tpo -c -o uct/gtest-test_uct_ep.obj `if test -f 'uct/test_uct_ep.cc'; then $(CYGPATH_W) 'uct/test_uct_ep.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_uct_ep.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_uct_ep.Tpo uct/$(DEPDIR)/gtest-test_uct_ep.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_uct_ep.cc' object='uct/gtest-test_uct_ep.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_uct_ep.obj `if test -f 'uct/test_uct_ep.cc'; then $(CYGPATH_W) 'uct/test_uct_ep.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_uct_ep.cc'; fi` + +uct/gtest-test_uct_perf.o: uct/test_uct_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_uct_perf.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_uct_perf.Tpo -c -o uct/gtest-test_uct_perf.o `test -f 'uct/test_uct_perf.cc' || echo '$(srcdir)/'`uct/test_uct_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_uct_perf.Tpo uct/$(DEPDIR)/gtest-test_uct_perf.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_uct_perf.cc' object='uct/gtest-test_uct_perf.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_uct_perf.o `test -f 'uct/test_uct_perf.cc' || echo '$(srcdir)/'`uct/test_uct_perf.cc + +uct/gtest-test_uct_perf.obj: uct/test_uct_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_uct_perf.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_uct_perf.Tpo -c -o uct/gtest-test_uct_perf.obj `if test -f 'uct/test_uct_perf.cc'; then $(CYGPATH_W) 'uct/test_uct_perf.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_uct_perf.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_uct_perf.Tpo uct/$(DEPDIR)/gtest-test_uct_perf.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_uct_perf.cc' object='uct/gtest-test_uct_perf.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_uct_perf.obj `if test -f 'uct/test_uct_perf.cc'; then $(CYGPATH_W) 'uct/test_uct_perf.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_uct_perf.cc'; fi` + +uct/gtest-test_zcopy_comp.o: uct/test_zcopy_comp.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_zcopy_comp.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_zcopy_comp.Tpo -c -o uct/gtest-test_zcopy_comp.o `test -f 'uct/test_zcopy_comp.cc' || echo '$(srcdir)/'`uct/test_zcopy_comp.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_zcopy_comp.Tpo uct/$(DEPDIR)/gtest-test_zcopy_comp.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_zcopy_comp.cc' object='uct/gtest-test_zcopy_comp.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_zcopy_comp.o `test -f 'uct/test_zcopy_comp.cc' || echo '$(srcdir)/'`uct/test_zcopy_comp.cc + +uct/gtest-test_zcopy_comp.obj: uct/test_zcopy_comp.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_zcopy_comp.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_zcopy_comp.Tpo -c -o uct/gtest-test_zcopy_comp.obj `if test -f 'uct/test_zcopy_comp.cc'; then $(CYGPATH_W) 'uct/test_zcopy_comp.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_zcopy_comp.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_zcopy_comp.Tpo uct/$(DEPDIR)/gtest-test_zcopy_comp.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_zcopy_comp.cc' object='uct/gtest-test_zcopy_comp.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_zcopy_comp.obj `if test -f 'uct/test_zcopy_comp.cc'; then $(CYGPATH_W) 'uct/test_zcopy_comp.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_zcopy_comp.cc'; fi` + +uct/gtest-uct_p2p_test.o: uct/uct_p2p_test.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-uct_p2p_test.o -MD -MP -MF uct/$(DEPDIR)/gtest-uct_p2p_test.Tpo -c -o uct/gtest-uct_p2p_test.o `test -f 'uct/uct_p2p_test.cc' || echo '$(srcdir)/'`uct/uct_p2p_test.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-uct_p2p_test.Tpo uct/$(DEPDIR)/gtest-uct_p2p_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/uct_p2p_test.cc' object='uct/gtest-uct_p2p_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-uct_p2p_test.o `test -f 'uct/uct_p2p_test.cc' || echo '$(srcdir)/'`uct/uct_p2p_test.cc + +uct/gtest-uct_p2p_test.obj: uct/uct_p2p_test.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-uct_p2p_test.obj -MD -MP -MF uct/$(DEPDIR)/gtest-uct_p2p_test.Tpo -c -o uct/gtest-uct_p2p_test.obj `if test -f 'uct/uct_p2p_test.cc'; then $(CYGPATH_W) 'uct/uct_p2p_test.cc'; else $(CYGPATH_W) '$(srcdir)/uct/uct_p2p_test.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-uct_p2p_test.Tpo uct/$(DEPDIR)/gtest-uct_p2p_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/uct_p2p_test.cc' object='uct/gtest-uct_p2p_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-uct_p2p_test.obj `if test -f 'uct/uct_p2p_test.cc'; then $(CYGPATH_W) 'uct/uct_p2p_test.cc'; else $(CYGPATH_W) '$(srcdir)/uct/uct_p2p_test.cc'; fi` + +uct/gtest-uct_test.o: uct/uct_test.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-uct_test.o -MD -MP -MF uct/$(DEPDIR)/gtest-uct_test.Tpo -c -o uct/gtest-uct_test.o `test -f 'uct/uct_test.cc' || echo '$(srcdir)/'`uct/uct_test.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-uct_test.Tpo uct/$(DEPDIR)/gtest-uct_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/uct_test.cc' object='uct/gtest-uct_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-uct_test.o `test -f 'uct/uct_test.cc' || echo '$(srcdir)/'`uct/uct_test.cc + +uct/gtest-uct_test.obj: uct/uct_test.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-uct_test.obj -MD -MP -MF uct/$(DEPDIR)/gtest-uct_test.Tpo -c -o uct/gtest-uct_test.obj `if test -f 'uct/uct_test.cc'; then $(CYGPATH_W) 'uct/uct_test.cc'; else $(CYGPATH_W) '$(srcdir)/uct/uct_test.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-uct_test.Tpo uct/$(DEPDIR)/gtest-uct_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/uct_test.cc' object='uct/gtest-uct_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-uct_test.obj `if test -f 'uct/uct_test.cc'; then $(CYGPATH_W) 'uct/uct_test.cc'; else $(CYGPATH_W) '$(srcdir)/uct/uct_test.cc'; fi` + +uct/gtest-test_stats.o: uct/test_stats.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_stats.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_stats.Tpo -c -o uct/gtest-test_stats.o `test -f 'uct/test_stats.cc' || echo '$(srcdir)/'`uct/test_stats.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_stats.Tpo uct/$(DEPDIR)/gtest-test_stats.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_stats.cc' object='uct/gtest-test_stats.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_stats.o `test -f 'uct/test_stats.cc' || echo '$(srcdir)/'`uct/test_stats.cc + +uct/gtest-test_stats.obj: uct/test_stats.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_stats.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_stats.Tpo -c -o uct/gtest-test_stats.obj `if test -f 'uct/test_stats.cc'; then $(CYGPATH_W) 'uct/test_stats.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_stats.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_stats.Tpo uct/$(DEPDIR)/gtest-test_stats.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_stats.cc' object='uct/gtest-test_stats.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_stats.obj `if test -f 'uct/test_stats.cc'; then $(CYGPATH_W) 'uct/test_stats.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_stats.cc'; fi` + +ucs/gtest-test_event_set.o: ucs/test_event_set.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_event_set.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_event_set.Tpo -c -o ucs/gtest-test_event_set.o `test -f 'ucs/test_event_set.cc' || echo '$(srcdir)/'`ucs/test_event_set.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_event_set.Tpo ucs/$(DEPDIR)/gtest-test_event_set.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_event_set.cc' object='ucs/gtest-test_event_set.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_event_set.o `test -f 'ucs/test_event_set.cc' || echo '$(srcdir)/'`ucs/test_event_set.cc + +ucs/gtest-test_event_set.obj: ucs/test_event_set.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_event_set.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_event_set.Tpo -c -o ucs/gtest-test_event_set.obj `if test -f 'ucs/test_event_set.cc'; then $(CYGPATH_W) 'ucs/test_event_set.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_event_set.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_event_set.Tpo ucs/$(DEPDIR)/gtest-test_event_set.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_event_set.cc' object='ucs/gtest-test_event_set.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_event_set.obj `if test -f 'ucs/test_event_set.cc'; then $(CYGPATH_W) 'ucs/test_event_set.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_event_set.cc'; fi` + +ucs/gtest-test_stats_filter.o: ucs/test_stats_filter.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_stats_filter.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_stats_filter.Tpo -c -o ucs/gtest-test_stats_filter.o `test -f 'ucs/test_stats_filter.cc' || echo '$(srcdir)/'`ucs/test_stats_filter.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_stats_filter.Tpo ucs/$(DEPDIR)/gtest-test_stats_filter.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_stats_filter.cc' object='ucs/gtest-test_stats_filter.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_stats_filter.o `test -f 'ucs/test_stats_filter.cc' || echo '$(srcdir)/'`ucs/test_stats_filter.cc + +ucs/gtest-test_stats_filter.obj: ucs/test_stats_filter.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_stats_filter.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_stats_filter.Tpo -c -o ucs/gtest-test_stats_filter.obj `if test -f 'ucs/test_stats_filter.cc'; then $(CYGPATH_W) 'ucs/test_stats_filter.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_stats_filter.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_stats_filter.Tpo ucs/$(DEPDIR)/gtest-test_stats_filter.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_stats_filter.cc' object='ucs/gtest-test_stats_filter.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_stats_filter.obj `if test -f 'ucs/test_stats_filter.cc'; then $(CYGPATH_W) 'ucs/test_stats_filter.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_stats_filter.cc'; fi` + +uct/gtest-test_peer_failure.o: uct/test_peer_failure.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_peer_failure.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_peer_failure.Tpo -c -o uct/gtest-test_peer_failure.o `test -f 'uct/test_peer_failure.cc' || echo '$(srcdir)/'`uct/test_peer_failure.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_peer_failure.Tpo uct/$(DEPDIR)/gtest-test_peer_failure.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_peer_failure.cc' object='uct/gtest-test_peer_failure.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_peer_failure.o `test -f 'uct/test_peer_failure.cc' || echo '$(srcdir)/'`uct/test_peer_failure.cc + +uct/gtest-test_peer_failure.obj: uct/test_peer_failure.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_peer_failure.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_peer_failure.Tpo -c -o uct/gtest-test_peer_failure.obj `if test -f 'uct/test_peer_failure.cc'; then $(CYGPATH_W) 'uct/test_peer_failure.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_peer_failure.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_peer_failure.Tpo uct/$(DEPDIR)/gtest-test_peer_failure.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_peer_failure.cc' object='uct/gtest-test_peer_failure.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_peer_failure.obj `if test -f 'uct/test_peer_failure.cc'; then $(CYGPATH_W) 'uct/test_peer_failure.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_peer_failure.cc'; fi` + +uct/gtest-test_tag.o: uct/test_tag.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_tag.o -MD -MP -MF uct/$(DEPDIR)/gtest-test_tag.Tpo -c -o uct/gtest-test_tag.o `test -f 'uct/test_tag.cc' || echo '$(srcdir)/'`uct/test_tag.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_tag.Tpo uct/$(DEPDIR)/gtest-test_tag.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_tag.cc' object='uct/gtest-test_tag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_tag.o `test -f 'uct/test_tag.cc' || echo '$(srcdir)/'`uct/test_tag.cc + +uct/gtest-test_tag.obj: uct/test_tag.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/gtest-test_tag.obj -MD -MP -MF uct/$(DEPDIR)/gtest-test_tag.Tpo -c -o uct/gtest-test_tag.obj `if test -f 'uct/test_tag.cc'; then $(CYGPATH_W) 'uct/test_tag.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_tag.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/$(DEPDIR)/gtest-test_tag.Tpo uct/$(DEPDIR)/gtest-test_tag.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/test_tag.cc' object='uct/gtest-test_tag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/gtest-test_tag.obj `if test -f 'uct/test_tag.cc'; then $(CYGPATH_W) 'uct/test_tag.cc'; else $(CYGPATH_W) '$(srcdir)/uct/test_tag.cc'; fi` + +uct/tcp/gtest-test_tcp.o: uct/tcp/test_tcp.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/tcp/gtest-test_tcp.o -MD -MP -MF uct/tcp/$(DEPDIR)/gtest-test_tcp.Tpo -c -o uct/tcp/gtest-test_tcp.o `test -f 'uct/tcp/test_tcp.cc' || echo '$(srcdir)/'`uct/tcp/test_tcp.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/tcp/$(DEPDIR)/gtest-test_tcp.Tpo uct/tcp/$(DEPDIR)/gtest-test_tcp.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/tcp/test_tcp.cc' object='uct/tcp/gtest-test_tcp.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/tcp/gtest-test_tcp.o `test -f 'uct/tcp/test_tcp.cc' || echo '$(srcdir)/'`uct/tcp/test_tcp.cc + +uct/tcp/gtest-test_tcp.obj: uct/tcp/test_tcp.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/tcp/gtest-test_tcp.obj -MD -MP -MF uct/tcp/$(DEPDIR)/gtest-test_tcp.Tpo -c -o uct/tcp/gtest-test_tcp.obj `if test -f 'uct/tcp/test_tcp.cc'; then $(CYGPATH_W) 'uct/tcp/test_tcp.cc'; else $(CYGPATH_W) '$(srcdir)/uct/tcp/test_tcp.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/tcp/$(DEPDIR)/gtest-test_tcp.Tpo uct/tcp/$(DEPDIR)/gtest-test_tcp.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/tcp/test_tcp.cc' object='uct/tcp/gtest-test_tcp.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/tcp/gtest-test_tcp.obj `if test -f 'uct/tcp/test_tcp.cc'; then $(CYGPATH_W) 'uct/tcp/test_tcp.cc'; else $(CYGPATH_W) '$(srcdir)/uct/tcp/test_tcp.cc'; fi` + +ucp/gtest-test_ucp_am.o: ucp/test_ucp_am.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_am.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_am.Tpo -c -o ucp/gtest-test_ucp_am.o `test -f 'ucp/test_ucp_am.cc' || echo '$(srcdir)/'`ucp/test_ucp_am.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_am.Tpo ucp/$(DEPDIR)/gtest-test_ucp_am.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_am.cc' object='ucp/gtest-test_ucp_am.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_am.o `test -f 'ucp/test_ucp_am.cc' || echo '$(srcdir)/'`ucp/test_ucp_am.cc + +ucp/gtest-test_ucp_am.obj: ucp/test_ucp_am.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_am.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_am.Tpo -c -o ucp/gtest-test_ucp_am.obj `if test -f 'ucp/test_ucp_am.cc'; then $(CYGPATH_W) 'ucp/test_ucp_am.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_am.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_am.Tpo ucp/$(DEPDIR)/gtest-test_ucp_am.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_am.cc' object='ucp/gtest-test_ucp_am.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_am.obj `if test -f 'ucp/test_ucp_am.cc'; then $(CYGPATH_W) 'ucp/test_ucp_am.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_am.cc'; fi` + +ucp/gtest-test_ucp_stream.o: ucp/test_ucp_stream.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_stream.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_stream.Tpo -c -o ucp/gtest-test_ucp_stream.o `test -f 'ucp/test_ucp_stream.cc' || echo '$(srcdir)/'`ucp/test_ucp_stream.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_stream.Tpo ucp/$(DEPDIR)/gtest-test_ucp_stream.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_stream.cc' object='ucp/gtest-test_ucp_stream.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_stream.o `test -f 'ucp/test_ucp_stream.cc' || echo '$(srcdir)/'`ucp/test_ucp_stream.cc + +ucp/gtest-test_ucp_stream.obj: ucp/test_ucp_stream.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_stream.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_stream.Tpo -c -o ucp/gtest-test_ucp_stream.obj `if test -f 'ucp/test_ucp_stream.cc'; then $(CYGPATH_W) 'ucp/test_ucp_stream.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_stream.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_stream.Tpo ucp/$(DEPDIR)/gtest-test_ucp_stream.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_stream.cc' object='ucp/gtest-test_ucp_stream.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_stream.obj `if test -f 'ucp/test_ucp_stream.cc'; then $(CYGPATH_W) 'ucp/test_ucp_stream.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_stream.cc'; fi` + +ucp/gtest-test_ucp_peer_failure.o: ucp/test_ucp_peer_failure.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_peer_failure.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Tpo -c -o ucp/gtest-test_ucp_peer_failure.o `test -f 'ucp/test_ucp_peer_failure.cc' || echo '$(srcdir)/'`ucp/test_ucp_peer_failure.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Tpo ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_peer_failure.cc' object='ucp/gtest-test_ucp_peer_failure.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_peer_failure.o `test -f 'ucp/test_ucp_peer_failure.cc' || echo '$(srcdir)/'`ucp/test_ucp_peer_failure.cc + +ucp/gtest-test_ucp_peer_failure.obj: ucp/test_ucp_peer_failure.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_peer_failure.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Tpo -c -o ucp/gtest-test_ucp_peer_failure.obj `if test -f 'ucp/test_ucp_peer_failure.cc'; then $(CYGPATH_W) 'ucp/test_ucp_peer_failure.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_peer_failure.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Tpo ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_peer_failure.cc' object='ucp/gtest-test_ucp_peer_failure.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_peer_failure.obj `if test -f 'ucp/test_ucp_peer_failure.cc'; then $(CYGPATH_W) 'ucp/test_ucp_peer_failure.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_peer_failure.cc'; fi` + +ucp/gtest-test_ucp_atomic.o: ucp/test_ucp_atomic.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_atomic.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_atomic.Tpo -c -o ucp/gtest-test_ucp_atomic.o `test -f 'ucp/test_ucp_atomic.cc' || echo '$(srcdir)/'`ucp/test_ucp_atomic.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_atomic.Tpo ucp/$(DEPDIR)/gtest-test_ucp_atomic.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_atomic.cc' object='ucp/gtest-test_ucp_atomic.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_atomic.o `test -f 'ucp/test_ucp_atomic.cc' || echo '$(srcdir)/'`ucp/test_ucp_atomic.cc + +ucp/gtest-test_ucp_atomic.obj: ucp/test_ucp_atomic.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_atomic.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_atomic.Tpo -c -o ucp/gtest-test_ucp_atomic.obj `if test -f 'ucp/test_ucp_atomic.cc'; then $(CYGPATH_W) 'ucp/test_ucp_atomic.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_atomic.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_atomic.Tpo ucp/$(DEPDIR)/gtest-test_ucp_atomic.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_atomic.cc' object='ucp/gtest-test_ucp_atomic.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_atomic.obj `if test -f 'ucp/test_ucp_atomic.cc'; then $(CYGPATH_W) 'ucp/test_ucp_atomic.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_atomic.cc'; fi` + +ucp/gtest-test_ucp_dt.o: ucp/test_ucp_dt.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_dt.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_dt.Tpo -c -o ucp/gtest-test_ucp_dt.o `test -f 'ucp/test_ucp_dt.cc' || echo '$(srcdir)/'`ucp/test_ucp_dt.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_dt.Tpo ucp/$(DEPDIR)/gtest-test_ucp_dt.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_dt.cc' object='ucp/gtest-test_ucp_dt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_dt.o `test -f 'ucp/test_ucp_dt.cc' || echo '$(srcdir)/'`ucp/test_ucp_dt.cc + +ucp/gtest-test_ucp_dt.obj: ucp/test_ucp_dt.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_dt.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_dt.Tpo -c -o ucp/gtest-test_ucp_dt.obj `if test -f 'ucp/test_ucp_dt.cc'; then $(CYGPATH_W) 'ucp/test_ucp_dt.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_dt.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_dt.Tpo ucp/$(DEPDIR)/gtest-test_ucp_dt.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_dt.cc' object='ucp/gtest-test_ucp_dt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_dt.obj `if test -f 'ucp/test_ucp_dt.cc'; then $(CYGPATH_W) 'ucp/test_ucp_dt.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_dt.cc'; fi` + +ucp/gtest-test_ucp_memheap.o: ucp/test_ucp_memheap.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_memheap.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_memheap.Tpo -c -o ucp/gtest-test_ucp_memheap.o `test -f 'ucp/test_ucp_memheap.cc' || echo '$(srcdir)/'`ucp/test_ucp_memheap.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_memheap.Tpo ucp/$(DEPDIR)/gtest-test_ucp_memheap.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_memheap.cc' object='ucp/gtest-test_ucp_memheap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_memheap.o `test -f 'ucp/test_ucp_memheap.cc' || echo '$(srcdir)/'`ucp/test_ucp_memheap.cc + +ucp/gtest-test_ucp_memheap.obj: ucp/test_ucp_memheap.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_memheap.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_memheap.Tpo -c -o ucp/gtest-test_ucp_memheap.obj `if test -f 'ucp/test_ucp_memheap.cc'; then $(CYGPATH_W) 'ucp/test_ucp_memheap.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_memheap.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_memheap.Tpo ucp/$(DEPDIR)/gtest-test_ucp_memheap.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_memheap.cc' object='ucp/gtest-test_ucp_memheap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_memheap.obj `if test -f 'ucp/test_ucp_memheap.cc'; then $(CYGPATH_W) 'ucp/test_ucp_memheap.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_memheap.cc'; fi` + +ucp/gtest-test_ucp_mmap.o: ucp/test_ucp_mmap.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_mmap.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_mmap.Tpo -c -o ucp/gtest-test_ucp_mmap.o `test -f 'ucp/test_ucp_mmap.cc' || echo '$(srcdir)/'`ucp/test_ucp_mmap.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_mmap.Tpo ucp/$(DEPDIR)/gtest-test_ucp_mmap.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_mmap.cc' object='ucp/gtest-test_ucp_mmap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_mmap.o `test -f 'ucp/test_ucp_mmap.cc' || echo '$(srcdir)/'`ucp/test_ucp_mmap.cc + +ucp/gtest-test_ucp_mmap.obj: ucp/test_ucp_mmap.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_mmap.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_mmap.Tpo -c -o ucp/gtest-test_ucp_mmap.obj `if test -f 'ucp/test_ucp_mmap.cc'; then $(CYGPATH_W) 'ucp/test_ucp_mmap.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_mmap.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_mmap.Tpo ucp/$(DEPDIR)/gtest-test_ucp_mmap.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_mmap.cc' object='ucp/gtest-test_ucp_mmap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_mmap.obj `if test -f 'ucp/test_ucp_mmap.cc'; then $(CYGPATH_W) 'ucp/test_ucp_mmap.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_mmap.cc'; fi` + +ucp/gtest-test_ucp_mem_type.o: ucp/test_ucp_mem_type.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_mem_type.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Tpo -c -o ucp/gtest-test_ucp_mem_type.o `test -f 'ucp/test_ucp_mem_type.cc' || echo '$(srcdir)/'`ucp/test_ucp_mem_type.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Tpo ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_mem_type.cc' object='ucp/gtest-test_ucp_mem_type.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_mem_type.o `test -f 'ucp/test_ucp_mem_type.cc' || echo '$(srcdir)/'`ucp/test_ucp_mem_type.cc + +ucp/gtest-test_ucp_mem_type.obj: ucp/test_ucp_mem_type.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_mem_type.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Tpo -c -o ucp/gtest-test_ucp_mem_type.obj `if test -f 'ucp/test_ucp_mem_type.cc'; then $(CYGPATH_W) 'ucp/test_ucp_mem_type.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_mem_type.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Tpo ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_mem_type.cc' object='ucp/gtest-test_ucp_mem_type.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_mem_type.obj `if test -f 'ucp/test_ucp_mem_type.cc'; then $(CYGPATH_W) 'ucp/test_ucp_mem_type.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_mem_type.cc'; fi` + +ucp/gtest-test_ucp_perf.o: ucp/test_ucp_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_perf.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_perf.Tpo -c -o ucp/gtest-test_ucp_perf.o `test -f 'ucp/test_ucp_perf.cc' || echo '$(srcdir)/'`ucp/test_ucp_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_perf.Tpo ucp/$(DEPDIR)/gtest-test_ucp_perf.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_perf.cc' object='ucp/gtest-test_ucp_perf.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_perf.o `test -f 'ucp/test_ucp_perf.cc' || echo '$(srcdir)/'`ucp/test_ucp_perf.cc + +ucp/gtest-test_ucp_perf.obj: ucp/test_ucp_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_perf.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_perf.Tpo -c -o ucp/gtest-test_ucp_perf.obj `if test -f 'ucp/test_ucp_perf.cc'; then $(CYGPATH_W) 'ucp/test_ucp_perf.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_perf.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_perf.Tpo ucp/$(DEPDIR)/gtest-test_ucp_perf.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_perf.cc' object='ucp/gtest-test_ucp_perf.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_perf.obj `if test -f 'ucp/test_ucp_perf.cc'; then $(CYGPATH_W) 'ucp/test_ucp_perf.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_perf.cc'; fi` + +ucp/gtest-test_ucp_rma.o: ucp/test_ucp_rma.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_rma.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_rma.Tpo -c -o ucp/gtest-test_ucp_rma.o `test -f 'ucp/test_ucp_rma.cc' || echo '$(srcdir)/'`ucp/test_ucp_rma.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_rma.Tpo ucp/$(DEPDIR)/gtest-test_ucp_rma.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_rma.cc' object='ucp/gtest-test_ucp_rma.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_rma.o `test -f 'ucp/test_ucp_rma.cc' || echo '$(srcdir)/'`ucp/test_ucp_rma.cc + +ucp/gtest-test_ucp_rma.obj: ucp/test_ucp_rma.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_rma.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_rma.Tpo -c -o ucp/gtest-test_ucp_rma.obj `if test -f 'ucp/test_ucp_rma.cc'; then $(CYGPATH_W) 'ucp/test_ucp_rma.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_rma.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_rma.Tpo ucp/$(DEPDIR)/gtest-test_ucp_rma.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_rma.cc' object='ucp/gtest-test_ucp_rma.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_rma.obj `if test -f 'ucp/test_ucp_rma.cc'; then $(CYGPATH_W) 'ucp/test_ucp_rma.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_rma.cc'; fi` + +ucp/gtest-test_ucp_rma_mt.o: ucp/test_ucp_rma_mt.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_rma_mt.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Tpo -c -o ucp/gtest-test_ucp_rma_mt.o `test -f 'ucp/test_ucp_rma_mt.cc' || echo '$(srcdir)/'`ucp/test_ucp_rma_mt.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Tpo ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_rma_mt.cc' object='ucp/gtest-test_ucp_rma_mt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_rma_mt.o `test -f 'ucp/test_ucp_rma_mt.cc' || echo '$(srcdir)/'`ucp/test_ucp_rma_mt.cc + +ucp/gtest-test_ucp_rma_mt.obj: ucp/test_ucp_rma_mt.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_rma_mt.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Tpo -c -o ucp/gtest-test_ucp_rma_mt.obj `if test -f 'ucp/test_ucp_rma_mt.cc'; then $(CYGPATH_W) 'ucp/test_ucp_rma_mt.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_rma_mt.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Tpo ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_rma_mt.cc' object='ucp/gtest-test_ucp_rma_mt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_rma_mt.obj `if test -f 'ucp/test_ucp_rma_mt.cc'; then $(CYGPATH_W) 'ucp/test_ucp_rma_mt.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_rma_mt.cc'; fi` + +ucp/gtest-test_ucp_tag_cancel.o: ucp/test_ucp_tag_cancel.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_cancel.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Tpo -c -o ucp/gtest-test_ucp_tag_cancel.o `test -f 'ucp/test_ucp_tag_cancel.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_cancel.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_cancel.cc' object='ucp/gtest-test_ucp_tag_cancel.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_cancel.o `test -f 'ucp/test_ucp_tag_cancel.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_cancel.cc + +ucp/gtest-test_ucp_tag_cancel.obj: ucp/test_ucp_tag_cancel.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_cancel.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Tpo -c -o ucp/gtest-test_ucp_tag_cancel.obj `if test -f 'ucp/test_ucp_tag_cancel.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_cancel.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_cancel.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_cancel.cc' object='ucp/gtest-test_ucp_tag_cancel.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_cancel.obj `if test -f 'ucp/test_ucp_tag_cancel.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_cancel.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_cancel.cc'; fi` + +ucp/gtest-test_ucp_tag_match.o: ucp/test_ucp_tag_match.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_match.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Tpo -c -o ucp/gtest-test_ucp_tag_match.o `test -f 'ucp/test_ucp_tag_match.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_match.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_match.cc' object='ucp/gtest-test_ucp_tag_match.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_match.o `test -f 'ucp/test_ucp_tag_match.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_match.cc + +ucp/gtest-test_ucp_tag_match.obj: ucp/test_ucp_tag_match.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_match.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Tpo -c -o ucp/gtest-test_ucp_tag_match.obj `if test -f 'ucp/test_ucp_tag_match.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_match.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_match.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_match.cc' object='ucp/gtest-test_ucp_tag_match.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_match.obj `if test -f 'ucp/test_ucp_tag_match.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_match.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_match.cc'; fi` + +ucp/gtest-test_ucp_tag_offload.o: ucp/test_ucp_tag_offload.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_offload.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Tpo -c -o ucp/gtest-test_ucp_tag_offload.o `test -f 'ucp/test_ucp_tag_offload.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_offload.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_offload.cc' object='ucp/gtest-test_ucp_tag_offload.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_offload.o `test -f 'ucp/test_ucp_tag_offload.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_offload.cc + +ucp/gtest-test_ucp_tag_offload.obj: ucp/test_ucp_tag_offload.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_offload.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Tpo -c -o ucp/gtest-test_ucp_tag_offload.obj `if test -f 'ucp/test_ucp_tag_offload.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_offload.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_offload.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_offload.cc' object='ucp/gtest-test_ucp_tag_offload.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_offload.obj `if test -f 'ucp/test_ucp_tag_offload.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_offload.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_offload.cc'; fi` + +ucp/gtest-test_ucp_tag_mt.o: ucp/test_ucp_tag_mt.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_mt.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Tpo -c -o ucp/gtest-test_ucp_tag_mt.o `test -f 'ucp/test_ucp_tag_mt.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_mt.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_mt.cc' object='ucp/gtest-test_ucp_tag_mt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_mt.o `test -f 'ucp/test_ucp_tag_mt.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_mt.cc + +ucp/gtest-test_ucp_tag_mt.obj: ucp/test_ucp_tag_mt.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_mt.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Tpo -c -o ucp/gtest-test_ucp_tag_mt.obj `if test -f 'ucp/test_ucp_tag_mt.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_mt.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_mt.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_mt.cc' object='ucp/gtest-test_ucp_tag_mt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_mt.obj `if test -f 'ucp/test_ucp_tag_mt.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_mt.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_mt.cc'; fi` + +ucp/gtest-test_ucp_tag_perf.o: ucp/test_ucp_tag_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_perf.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Tpo -c -o ucp/gtest-test_ucp_tag_perf.o `test -f 'ucp/test_ucp_tag_perf.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_perf.cc' object='ucp/gtest-test_ucp_tag_perf.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_perf.o `test -f 'ucp/test_ucp_tag_perf.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_perf.cc + +ucp/gtest-test_ucp_tag_perf.obj: ucp/test_ucp_tag_perf.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_perf.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Tpo -c -o ucp/gtest-test_ucp_tag_perf.obj `if test -f 'ucp/test_ucp_tag_perf.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_perf.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_perf.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_perf.cc' object='ucp/gtest-test_ucp_tag_perf.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_perf.obj `if test -f 'ucp/test_ucp_tag_perf.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_perf.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_perf.cc'; fi` + +ucp/gtest-test_ucp_tag_probe.o: ucp/test_ucp_tag_probe.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_probe.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Tpo -c -o ucp/gtest-test_ucp_tag_probe.o `test -f 'ucp/test_ucp_tag_probe.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_probe.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_probe.cc' object='ucp/gtest-test_ucp_tag_probe.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_probe.o `test -f 'ucp/test_ucp_tag_probe.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_probe.cc + +ucp/gtest-test_ucp_tag_probe.obj: ucp/test_ucp_tag_probe.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_probe.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Tpo -c -o ucp/gtest-test_ucp_tag_probe.obj `if test -f 'ucp/test_ucp_tag_probe.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_probe.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_probe.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_probe.cc' object='ucp/gtest-test_ucp_tag_probe.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_probe.obj `if test -f 'ucp/test_ucp_tag_probe.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_probe.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_probe.cc'; fi` + +ucp/gtest-test_ucp_tag_xfer.o: ucp/test_ucp_tag_xfer.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_xfer.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Tpo -c -o ucp/gtest-test_ucp_tag_xfer.o `test -f 'ucp/test_ucp_tag_xfer.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_xfer.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_xfer.cc' object='ucp/gtest-test_ucp_tag_xfer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_xfer.o `test -f 'ucp/test_ucp_tag_xfer.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_xfer.cc + +ucp/gtest-test_ucp_tag_xfer.obj: ucp/test_ucp_tag_xfer.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_xfer.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Tpo -c -o ucp/gtest-test_ucp_tag_xfer.obj `if test -f 'ucp/test_ucp_tag_xfer.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_xfer.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_xfer.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_xfer.cc' object='ucp/gtest-test_ucp_tag_xfer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_xfer.obj `if test -f 'ucp/test_ucp_tag_xfer.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_xfer.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_xfer.cc'; fi` + +ucp/gtest-test_ucp_tag_mem_type.o: ucp/test_ucp_tag_mem_type.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_mem_type.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Tpo -c -o ucp/gtest-test_ucp_tag_mem_type.o `test -f 'ucp/test_ucp_tag_mem_type.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_mem_type.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_mem_type.cc' object='ucp/gtest-test_ucp_tag_mem_type.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_mem_type.o `test -f 'ucp/test_ucp_tag_mem_type.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag_mem_type.cc + +ucp/gtest-test_ucp_tag_mem_type.obj: ucp/test_ucp_tag_mem_type.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag_mem_type.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Tpo -c -o ucp/gtest-test_ucp_tag_mem_type.obj `if test -f 'ucp/test_ucp_tag_mem_type.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_mem_type.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_mem_type.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag_mem_type.cc' object='ucp/gtest-test_ucp_tag_mem_type.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag_mem_type.obj `if test -f 'ucp/test_ucp_tag_mem_type.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag_mem_type.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag_mem_type.cc'; fi` + +ucp/gtest-test_ucp_tag.o: ucp/test_ucp_tag.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag.Tpo -c -o ucp/gtest-test_ucp_tag.o `test -f 'ucp/test_ucp_tag.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag.cc' object='ucp/gtest-test_ucp_tag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag.o `test -f 'ucp/test_ucp_tag.cc' || echo '$(srcdir)/'`ucp/test_ucp_tag.cc + +ucp/gtest-test_ucp_tag.obj: ucp/test_ucp_tag.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_tag.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_tag.Tpo -c -o ucp/gtest-test_ucp_tag.obj `if test -f 'ucp/test_ucp_tag.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_tag.Tpo ucp/$(DEPDIR)/gtest-test_ucp_tag.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_tag.cc' object='ucp/gtest-test_ucp_tag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_tag.obj `if test -f 'ucp/test_ucp_tag.cc'; then $(CYGPATH_W) 'ucp/test_ucp_tag.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_tag.cc'; fi` + +ucp/gtest-test_ucp_context.o: ucp/test_ucp_context.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_context.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_context.Tpo -c -o ucp/gtest-test_ucp_context.o `test -f 'ucp/test_ucp_context.cc' || echo '$(srcdir)/'`ucp/test_ucp_context.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_context.Tpo ucp/$(DEPDIR)/gtest-test_ucp_context.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_context.cc' object='ucp/gtest-test_ucp_context.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_context.o `test -f 'ucp/test_ucp_context.cc' || echo '$(srcdir)/'`ucp/test_ucp_context.cc + +ucp/gtest-test_ucp_context.obj: ucp/test_ucp_context.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_context.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_context.Tpo -c -o ucp/gtest-test_ucp_context.obj `if test -f 'ucp/test_ucp_context.cc'; then $(CYGPATH_W) 'ucp/test_ucp_context.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_context.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_context.Tpo ucp/$(DEPDIR)/gtest-test_ucp_context.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_context.cc' object='ucp/gtest-test_ucp_context.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_context.obj `if test -f 'ucp/test_ucp_context.cc'; then $(CYGPATH_W) 'ucp/test_ucp_context.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_context.cc'; fi` + +ucp/gtest-test_ucp_wireup.o: ucp/test_ucp_wireup.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_wireup.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_wireup.Tpo -c -o ucp/gtest-test_ucp_wireup.o `test -f 'ucp/test_ucp_wireup.cc' || echo '$(srcdir)/'`ucp/test_ucp_wireup.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_wireup.Tpo ucp/$(DEPDIR)/gtest-test_ucp_wireup.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_wireup.cc' object='ucp/gtest-test_ucp_wireup.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_wireup.o `test -f 'ucp/test_ucp_wireup.cc' || echo '$(srcdir)/'`ucp/test_ucp_wireup.cc + +ucp/gtest-test_ucp_wireup.obj: ucp/test_ucp_wireup.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_wireup.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_wireup.Tpo -c -o ucp/gtest-test_ucp_wireup.obj `if test -f 'ucp/test_ucp_wireup.cc'; then $(CYGPATH_W) 'ucp/test_ucp_wireup.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_wireup.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_wireup.Tpo ucp/$(DEPDIR)/gtest-test_ucp_wireup.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_wireup.cc' object='ucp/gtest-test_ucp_wireup.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_wireup.obj `if test -f 'ucp/test_ucp_wireup.cc'; then $(CYGPATH_W) 'ucp/test_ucp_wireup.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_wireup.cc'; fi` + +ucp/gtest-test_ucp_wakeup.o: ucp/test_ucp_wakeup.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_wakeup.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Tpo -c -o ucp/gtest-test_ucp_wakeup.o `test -f 'ucp/test_ucp_wakeup.cc' || echo '$(srcdir)/'`ucp/test_ucp_wakeup.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Tpo ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_wakeup.cc' object='ucp/gtest-test_ucp_wakeup.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_wakeup.o `test -f 'ucp/test_ucp_wakeup.cc' || echo '$(srcdir)/'`ucp/test_ucp_wakeup.cc + +ucp/gtest-test_ucp_wakeup.obj: ucp/test_ucp_wakeup.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_wakeup.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Tpo -c -o ucp/gtest-test_ucp_wakeup.obj `if test -f 'ucp/test_ucp_wakeup.cc'; then $(CYGPATH_W) 'ucp/test_ucp_wakeup.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_wakeup.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Tpo ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_wakeup.cc' object='ucp/gtest-test_ucp_wakeup.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_wakeup.obj `if test -f 'ucp/test_ucp_wakeup.cc'; then $(CYGPATH_W) 'ucp/test_ucp_wakeup.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_wakeup.cc'; fi` + +ucp/gtest-test_ucp_fence.o: ucp/test_ucp_fence.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_fence.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_fence.Tpo -c -o ucp/gtest-test_ucp_fence.o `test -f 'ucp/test_ucp_fence.cc' || echo '$(srcdir)/'`ucp/test_ucp_fence.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_fence.Tpo ucp/$(DEPDIR)/gtest-test_ucp_fence.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_fence.cc' object='ucp/gtest-test_ucp_fence.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_fence.o `test -f 'ucp/test_ucp_fence.cc' || echo '$(srcdir)/'`ucp/test_ucp_fence.cc + +ucp/gtest-test_ucp_fence.obj: ucp/test_ucp_fence.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_fence.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_fence.Tpo -c -o ucp/gtest-test_ucp_fence.obj `if test -f 'ucp/test_ucp_fence.cc'; then $(CYGPATH_W) 'ucp/test_ucp_fence.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_fence.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_fence.Tpo ucp/$(DEPDIR)/gtest-test_ucp_fence.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_fence.cc' object='ucp/gtest-test_ucp_fence.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_fence.obj `if test -f 'ucp/test_ucp_fence.cc'; then $(CYGPATH_W) 'ucp/test_ucp_fence.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_fence.cc'; fi` + +ucp/gtest-test_ucp_sockaddr.o: ucp/test_ucp_sockaddr.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_sockaddr.o -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Tpo -c -o ucp/gtest-test_ucp_sockaddr.o `test -f 'ucp/test_ucp_sockaddr.cc' || echo '$(srcdir)/'`ucp/test_ucp_sockaddr.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Tpo ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_sockaddr.cc' object='ucp/gtest-test_ucp_sockaddr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_sockaddr.o `test -f 'ucp/test_ucp_sockaddr.cc' || echo '$(srcdir)/'`ucp/test_ucp_sockaddr.cc + +ucp/gtest-test_ucp_sockaddr.obj: ucp/test_ucp_sockaddr.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-test_ucp_sockaddr.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Tpo -c -o ucp/gtest-test_ucp_sockaddr.obj `if test -f 'ucp/test_ucp_sockaddr.cc'; then $(CYGPATH_W) 'ucp/test_ucp_sockaddr.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_sockaddr.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Tpo ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/test_ucp_sockaddr.cc' object='ucp/gtest-test_ucp_sockaddr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-test_ucp_sockaddr.obj `if test -f 'ucp/test_ucp_sockaddr.cc'; then $(CYGPATH_W) 'ucp/test_ucp_sockaddr.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/test_ucp_sockaddr.cc'; fi` + +ucp/gtest-ucp_test.o: ucp/ucp_test.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-ucp_test.o -MD -MP -MF ucp/$(DEPDIR)/gtest-ucp_test.Tpo -c -o ucp/gtest-ucp_test.o `test -f 'ucp/ucp_test.cc' || echo '$(srcdir)/'`ucp/ucp_test.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-ucp_test.Tpo ucp/$(DEPDIR)/gtest-ucp_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/ucp_test.cc' object='ucp/gtest-ucp_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-ucp_test.o `test -f 'ucp/ucp_test.cc' || echo '$(srcdir)/'`ucp/ucp_test.cc + +ucp/gtest-ucp_test.obj: ucp/ucp_test.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-ucp_test.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-ucp_test.Tpo -c -o ucp/gtest-ucp_test.obj `if test -f 'ucp/ucp_test.cc'; then $(CYGPATH_W) 'ucp/ucp_test.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/ucp_test.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-ucp_test.Tpo ucp/$(DEPDIR)/gtest-ucp_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/ucp_test.cc' object='ucp/gtest-ucp_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-ucp_test.obj `if test -f 'ucp/ucp_test.cc'; then $(CYGPATH_W) 'ucp/ucp_test.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/ucp_test.cc'; fi` + +ucp/gtest-ucp_datatype.o: ucp/ucp_datatype.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-ucp_datatype.o -MD -MP -MF ucp/$(DEPDIR)/gtest-ucp_datatype.Tpo -c -o ucp/gtest-ucp_datatype.o `test -f 'ucp/ucp_datatype.cc' || echo '$(srcdir)/'`ucp/ucp_datatype.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-ucp_datatype.Tpo ucp/$(DEPDIR)/gtest-ucp_datatype.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/ucp_datatype.cc' object='ucp/gtest-ucp_datatype.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-ucp_datatype.o `test -f 'ucp/ucp_datatype.cc' || echo '$(srcdir)/'`ucp/ucp_datatype.cc + +ucp/gtest-ucp_datatype.obj: ucp/ucp_datatype.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucp/gtest-ucp_datatype.obj -MD -MP -MF ucp/$(DEPDIR)/gtest-ucp_datatype.Tpo -c -o ucp/gtest-ucp_datatype.obj `if test -f 'ucp/ucp_datatype.cc'; then $(CYGPATH_W) 'ucp/ucp_datatype.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/ucp_datatype.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucp/$(DEPDIR)/gtest-ucp_datatype.Tpo ucp/$(DEPDIR)/gtest-ucp_datatype.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucp/ucp_datatype.cc' object='ucp/gtest-ucp_datatype.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucp/gtest-ucp_datatype.obj `if test -f 'ucp/ucp_datatype.cc'; then $(CYGPATH_W) 'ucp/ucp_datatype.cc'; else $(CYGPATH_W) '$(srcdir)/ucp/ucp_datatype.cc'; fi` + +ucs/gtest-test_algorithm.o: ucs/test_algorithm.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_algorithm.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_algorithm.Tpo -c -o ucs/gtest-test_algorithm.o `test -f 'ucs/test_algorithm.cc' || echo '$(srcdir)/'`ucs/test_algorithm.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_algorithm.Tpo ucs/$(DEPDIR)/gtest-test_algorithm.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_algorithm.cc' object='ucs/gtest-test_algorithm.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_algorithm.o `test -f 'ucs/test_algorithm.cc' || echo '$(srcdir)/'`ucs/test_algorithm.cc + +ucs/gtest-test_algorithm.obj: ucs/test_algorithm.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_algorithm.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_algorithm.Tpo -c -o ucs/gtest-test_algorithm.obj `if test -f 'ucs/test_algorithm.cc'; then $(CYGPATH_W) 'ucs/test_algorithm.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_algorithm.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_algorithm.Tpo ucs/$(DEPDIR)/gtest-test_algorithm.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_algorithm.cc' object='ucs/gtest-test_algorithm.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_algorithm.obj `if test -f 'ucs/test_algorithm.cc'; then $(CYGPATH_W) 'ucs/test_algorithm.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_algorithm.cc'; fi` + +ucs/gtest-test_arbiter.o: ucs/test_arbiter.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_arbiter.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_arbiter.Tpo -c -o ucs/gtest-test_arbiter.o `test -f 'ucs/test_arbiter.cc' || echo '$(srcdir)/'`ucs/test_arbiter.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_arbiter.Tpo ucs/$(DEPDIR)/gtest-test_arbiter.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_arbiter.cc' object='ucs/gtest-test_arbiter.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_arbiter.o `test -f 'ucs/test_arbiter.cc' || echo '$(srcdir)/'`ucs/test_arbiter.cc + +ucs/gtest-test_arbiter.obj: ucs/test_arbiter.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_arbiter.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_arbiter.Tpo -c -o ucs/gtest-test_arbiter.obj `if test -f 'ucs/test_arbiter.cc'; then $(CYGPATH_W) 'ucs/test_arbiter.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_arbiter.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_arbiter.Tpo ucs/$(DEPDIR)/gtest-test_arbiter.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_arbiter.cc' object='ucs/gtest-test_arbiter.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_arbiter.obj `if test -f 'ucs/test_arbiter.cc'; then $(CYGPATH_W) 'ucs/test_arbiter.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_arbiter.cc'; fi` + +ucs/gtest-test_async.o: ucs/test_async.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_async.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_async.Tpo -c -o ucs/gtest-test_async.o `test -f 'ucs/test_async.cc' || echo '$(srcdir)/'`ucs/test_async.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_async.Tpo ucs/$(DEPDIR)/gtest-test_async.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_async.cc' object='ucs/gtest-test_async.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_async.o `test -f 'ucs/test_async.cc' || echo '$(srcdir)/'`ucs/test_async.cc + +ucs/gtest-test_async.obj: ucs/test_async.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_async.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_async.Tpo -c -o ucs/gtest-test_async.obj `if test -f 'ucs/test_async.cc'; then $(CYGPATH_W) 'ucs/test_async.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_async.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_async.Tpo ucs/$(DEPDIR)/gtest-test_async.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_async.cc' object='ucs/gtest-test_async.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_async.obj `if test -f 'ucs/test_async.cc'; then $(CYGPATH_W) 'ucs/test_async.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_async.cc'; fi` + +ucs/gtest-test_callbackq.o: ucs/test_callbackq.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_callbackq.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_callbackq.Tpo -c -o ucs/gtest-test_callbackq.o `test -f 'ucs/test_callbackq.cc' || echo '$(srcdir)/'`ucs/test_callbackq.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_callbackq.Tpo ucs/$(DEPDIR)/gtest-test_callbackq.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_callbackq.cc' object='ucs/gtest-test_callbackq.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_callbackq.o `test -f 'ucs/test_callbackq.cc' || echo '$(srcdir)/'`ucs/test_callbackq.cc + +ucs/gtest-test_callbackq.obj: ucs/test_callbackq.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_callbackq.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_callbackq.Tpo -c -o ucs/gtest-test_callbackq.obj `if test -f 'ucs/test_callbackq.cc'; then $(CYGPATH_W) 'ucs/test_callbackq.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_callbackq.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_callbackq.Tpo ucs/$(DEPDIR)/gtest-test_callbackq.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_callbackq.cc' object='ucs/gtest-test_callbackq.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_callbackq.obj `if test -f 'ucs/test_callbackq.cc'; then $(CYGPATH_W) 'ucs/test_callbackq.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_callbackq.cc'; fi` + +ucs/gtest-test_class.o: ucs/test_class.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_class.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_class.Tpo -c -o ucs/gtest-test_class.o `test -f 'ucs/test_class.cc' || echo '$(srcdir)/'`ucs/test_class.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_class.Tpo ucs/$(DEPDIR)/gtest-test_class.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_class.cc' object='ucs/gtest-test_class.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_class.o `test -f 'ucs/test_class.cc' || echo '$(srcdir)/'`ucs/test_class.cc + +ucs/gtest-test_class.obj: ucs/test_class.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_class.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_class.Tpo -c -o ucs/gtest-test_class.obj `if test -f 'ucs/test_class.cc'; then $(CYGPATH_W) 'ucs/test_class.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_class.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_class.Tpo ucs/$(DEPDIR)/gtest-test_class.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_class.cc' object='ucs/gtest-test_class.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_class.obj `if test -f 'ucs/test_class.cc'; then $(CYGPATH_W) 'ucs/test_class.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_class.cc'; fi` + +ucs/gtest-test_config.o: ucs/test_config.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_config.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_config.Tpo -c -o ucs/gtest-test_config.o `test -f 'ucs/test_config.cc' || echo '$(srcdir)/'`ucs/test_config.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_config.Tpo ucs/$(DEPDIR)/gtest-test_config.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_config.cc' object='ucs/gtest-test_config.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_config.o `test -f 'ucs/test_config.cc' || echo '$(srcdir)/'`ucs/test_config.cc + +ucs/gtest-test_config.obj: ucs/test_config.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_config.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_config.Tpo -c -o ucs/gtest-test_config.obj `if test -f 'ucs/test_config.cc'; then $(CYGPATH_W) 'ucs/test_config.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_config.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_config.Tpo ucs/$(DEPDIR)/gtest-test_config.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_config.cc' object='ucs/gtest-test_config.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_config.obj `if test -f 'ucs/test_config.cc'; then $(CYGPATH_W) 'ucs/test_config.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_config.cc'; fi` + +ucs/gtest-test_datatype.o: ucs/test_datatype.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_datatype.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_datatype.Tpo -c -o ucs/gtest-test_datatype.o `test -f 'ucs/test_datatype.cc' || echo '$(srcdir)/'`ucs/test_datatype.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_datatype.Tpo ucs/$(DEPDIR)/gtest-test_datatype.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_datatype.cc' object='ucs/gtest-test_datatype.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_datatype.o `test -f 'ucs/test_datatype.cc' || echo '$(srcdir)/'`ucs/test_datatype.cc + +ucs/gtest-test_datatype.obj: ucs/test_datatype.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_datatype.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_datatype.Tpo -c -o ucs/gtest-test_datatype.obj `if test -f 'ucs/test_datatype.cc'; then $(CYGPATH_W) 'ucs/test_datatype.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_datatype.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_datatype.Tpo ucs/$(DEPDIR)/gtest-test_datatype.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_datatype.cc' object='ucs/gtest-test_datatype.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_datatype.obj `if test -f 'ucs/test_datatype.cc'; then $(CYGPATH_W) 'ucs/test_datatype.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_datatype.cc'; fi` + +ucs/gtest-test_debug.o: ucs/test_debug.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_debug.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_debug.Tpo -c -o ucs/gtest-test_debug.o `test -f 'ucs/test_debug.cc' || echo '$(srcdir)/'`ucs/test_debug.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_debug.Tpo ucs/$(DEPDIR)/gtest-test_debug.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_debug.cc' object='ucs/gtest-test_debug.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_debug.o `test -f 'ucs/test_debug.cc' || echo '$(srcdir)/'`ucs/test_debug.cc + +ucs/gtest-test_debug.obj: ucs/test_debug.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_debug.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_debug.Tpo -c -o ucs/gtest-test_debug.obj `if test -f 'ucs/test_debug.cc'; then $(CYGPATH_W) 'ucs/test_debug.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_debug.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_debug.Tpo ucs/$(DEPDIR)/gtest-test_debug.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_debug.cc' object='ucs/gtest-test_debug.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_debug.obj `if test -f 'ucs/test_debug.cc'; then $(CYGPATH_W) 'ucs/test_debug.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_debug.cc'; fi` + +ucs/gtest-test_memtrack.o: ucs/test_memtrack.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_memtrack.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_memtrack.Tpo -c -o ucs/gtest-test_memtrack.o `test -f 'ucs/test_memtrack.cc' || echo '$(srcdir)/'`ucs/test_memtrack.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_memtrack.Tpo ucs/$(DEPDIR)/gtest-test_memtrack.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_memtrack.cc' object='ucs/gtest-test_memtrack.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_memtrack.o `test -f 'ucs/test_memtrack.cc' || echo '$(srcdir)/'`ucs/test_memtrack.cc + +ucs/gtest-test_memtrack.obj: ucs/test_memtrack.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_memtrack.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_memtrack.Tpo -c -o ucs/gtest-test_memtrack.obj `if test -f 'ucs/test_memtrack.cc'; then $(CYGPATH_W) 'ucs/test_memtrack.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_memtrack.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_memtrack.Tpo ucs/$(DEPDIR)/gtest-test_memtrack.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_memtrack.cc' object='ucs/gtest-test_memtrack.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_memtrack.obj `if test -f 'ucs/test_memtrack.cc'; then $(CYGPATH_W) 'ucs/test_memtrack.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_memtrack.cc'; fi` + +ucs/gtest-test_math.o: ucs/test_math.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_math.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_math.Tpo -c -o ucs/gtest-test_math.o `test -f 'ucs/test_math.cc' || echo '$(srcdir)/'`ucs/test_math.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_math.Tpo ucs/$(DEPDIR)/gtest-test_math.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_math.cc' object='ucs/gtest-test_math.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_math.o `test -f 'ucs/test_math.cc' || echo '$(srcdir)/'`ucs/test_math.cc + +ucs/gtest-test_math.obj: ucs/test_math.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_math.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_math.Tpo -c -o ucs/gtest-test_math.obj `if test -f 'ucs/test_math.cc'; then $(CYGPATH_W) 'ucs/test_math.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_math.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_math.Tpo ucs/$(DEPDIR)/gtest-test_math.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_math.cc' object='ucs/gtest-test_math.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_math.obj `if test -f 'ucs/test_math.cc'; then $(CYGPATH_W) 'ucs/test_math.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_math.cc'; fi` + +ucs/gtest-test_mpmc.o: ucs/test_mpmc.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_mpmc.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_mpmc.Tpo -c -o ucs/gtest-test_mpmc.o `test -f 'ucs/test_mpmc.cc' || echo '$(srcdir)/'`ucs/test_mpmc.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_mpmc.Tpo ucs/$(DEPDIR)/gtest-test_mpmc.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_mpmc.cc' object='ucs/gtest-test_mpmc.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_mpmc.o `test -f 'ucs/test_mpmc.cc' || echo '$(srcdir)/'`ucs/test_mpmc.cc + +ucs/gtest-test_mpmc.obj: ucs/test_mpmc.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_mpmc.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_mpmc.Tpo -c -o ucs/gtest-test_mpmc.obj `if test -f 'ucs/test_mpmc.cc'; then $(CYGPATH_W) 'ucs/test_mpmc.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_mpmc.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_mpmc.Tpo ucs/$(DEPDIR)/gtest-test_mpmc.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_mpmc.cc' object='ucs/gtest-test_mpmc.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_mpmc.obj `if test -f 'ucs/test_mpmc.cc'; then $(CYGPATH_W) 'ucs/test_mpmc.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_mpmc.cc'; fi` + +ucs/gtest-test_mpool.o: ucs/test_mpool.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_mpool.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_mpool.Tpo -c -o ucs/gtest-test_mpool.o `test -f 'ucs/test_mpool.cc' || echo '$(srcdir)/'`ucs/test_mpool.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_mpool.Tpo ucs/$(DEPDIR)/gtest-test_mpool.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_mpool.cc' object='ucs/gtest-test_mpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_mpool.o `test -f 'ucs/test_mpool.cc' || echo '$(srcdir)/'`ucs/test_mpool.cc + +ucs/gtest-test_mpool.obj: ucs/test_mpool.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_mpool.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_mpool.Tpo -c -o ucs/gtest-test_mpool.obj `if test -f 'ucs/test_mpool.cc'; then $(CYGPATH_W) 'ucs/test_mpool.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_mpool.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_mpool.Tpo ucs/$(DEPDIR)/gtest-test_mpool.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_mpool.cc' object='ucs/gtest-test_mpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_mpool.obj `if test -f 'ucs/test_mpool.cc'; then $(CYGPATH_W) 'ucs/test_mpool.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_mpool.cc'; fi` + +ucs/gtest-test_pgtable.o: ucs/test_pgtable.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_pgtable.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_pgtable.Tpo -c -o ucs/gtest-test_pgtable.o `test -f 'ucs/test_pgtable.cc' || echo '$(srcdir)/'`ucs/test_pgtable.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_pgtable.Tpo ucs/$(DEPDIR)/gtest-test_pgtable.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_pgtable.cc' object='ucs/gtest-test_pgtable.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_pgtable.o `test -f 'ucs/test_pgtable.cc' || echo '$(srcdir)/'`ucs/test_pgtable.cc + +ucs/gtest-test_pgtable.obj: ucs/test_pgtable.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_pgtable.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_pgtable.Tpo -c -o ucs/gtest-test_pgtable.obj `if test -f 'ucs/test_pgtable.cc'; then $(CYGPATH_W) 'ucs/test_pgtable.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_pgtable.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_pgtable.Tpo ucs/$(DEPDIR)/gtest-test_pgtable.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_pgtable.cc' object='ucs/gtest-test_pgtable.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_pgtable.obj `if test -f 'ucs/test_pgtable.cc'; then $(CYGPATH_W) 'ucs/test_pgtable.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_pgtable.cc'; fi` + +ucs/gtest-test_profile.o: ucs/test_profile.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_profile.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_profile.Tpo -c -o ucs/gtest-test_profile.o `test -f 'ucs/test_profile.cc' || echo '$(srcdir)/'`ucs/test_profile.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_profile.Tpo ucs/$(DEPDIR)/gtest-test_profile.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_profile.cc' object='ucs/gtest-test_profile.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_profile.o `test -f 'ucs/test_profile.cc' || echo '$(srcdir)/'`ucs/test_profile.cc + +ucs/gtest-test_profile.obj: ucs/test_profile.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_profile.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_profile.Tpo -c -o ucs/gtest-test_profile.obj `if test -f 'ucs/test_profile.cc'; then $(CYGPATH_W) 'ucs/test_profile.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_profile.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_profile.Tpo ucs/$(DEPDIR)/gtest-test_profile.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_profile.cc' object='ucs/gtest-test_profile.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_profile.obj `if test -f 'ucs/test_profile.cc'; then $(CYGPATH_W) 'ucs/test_profile.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_profile.cc'; fi` + +ucs/gtest-test_rcache.o: ucs/test_rcache.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_rcache.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_rcache.Tpo -c -o ucs/gtest-test_rcache.o `test -f 'ucs/test_rcache.cc' || echo '$(srcdir)/'`ucs/test_rcache.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_rcache.Tpo ucs/$(DEPDIR)/gtest-test_rcache.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_rcache.cc' object='ucs/gtest-test_rcache.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_rcache.o `test -f 'ucs/test_rcache.cc' || echo '$(srcdir)/'`ucs/test_rcache.cc + +ucs/gtest-test_rcache.obj: ucs/test_rcache.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_rcache.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_rcache.Tpo -c -o ucs/gtest-test_rcache.obj `if test -f 'ucs/test_rcache.cc'; then $(CYGPATH_W) 'ucs/test_rcache.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_rcache.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_rcache.Tpo ucs/$(DEPDIR)/gtest-test_rcache.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_rcache.cc' object='ucs/gtest-test_rcache.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_rcache.obj `if test -f 'ucs/test_rcache.cc'; then $(CYGPATH_W) 'ucs/test_rcache.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_rcache.cc'; fi` + +ucs/gtest-test_memtype_cache.o: ucs/test_memtype_cache.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_memtype_cache.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_memtype_cache.Tpo -c -o ucs/gtest-test_memtype_cache.o `test -f 'ucs/test_memtype_cache.cc' || echo '$(srcdir)/'`ucs/test_memtype_cache.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_memtype_cache.Tpo ucs/$(DEPDIR)/gtest-test_memtype_cache.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_memtype_cache.cc' object='ucs/gtest-test_memtype_cache.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_memtype_cache.o `test -f 'ucs/test_memtype_cache.cc' || echo '$(srcdir)/'`ucs/test_memtype_cache.cc + +ucs/gtest-test_memtype_cache.obj: ucs/test_memtype_cache.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_memtype_cache.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_memtype_cache.Tpo -c -o ucs/gtest-test_memtype_cache.obj `if test -f 'ucs/test_memtype_cache.cc'; then $(CYGPATH_W) 'ucs/test_memtype_cache.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_memtype_cache.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_memtype_cache.Tpo ucs/$(DEPDIR)/gtest-test_memtype_cache.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_memtype_cache.cc' object='ucs/gtest-test_memtype_cache.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_memtype_cache.obj `if test -f 'ucs/test_memtype_cache.cc'; then $(CYGPATH_W) 'ucs/test_memtype_cache.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_memtype_cache.cc'; fi` + +ucs/gtest-test_stats.o: ucs/test_stats.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_stats.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_stats.Tpo -c -o ucs/gtest-test_stats.o `test -f 'ucs/test_stats.cc' || echo '$(srcdir)/'`ucs/test_stats.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_stats.Tpo ucs/$(DEPDIR)/gtest-test_stats.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_stats.cc' object='ucs/gtest-test_stats.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_stats.o `test -f 'ucs/test_stats.cc' || echo '$(srcdir)/'`ucs/test_stats.cc + +ucs/gtest-test_stats.obj: ucs/test_stats.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_stats.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_stats.Tpo -c -o ucs/gtest-test_stats.obj `if test -f 'ucs/test_stats.cc'; then $(CYGPATH_W) 'ucs/test_stats.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_stats.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_stats.Tpo ucs/$(DEPDIR)/gtest-test_stats.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_stats.cc' object='ucs/gtest-test_stats.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_stats.obj `if test -f 'ucs/test_stats.cc'; then $(CYGPATH_W) 'ucs/test_stats.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_stats.cc'; fi` + +ucs/gtest-test_strided_alloc.o: ucs/test_strided_alloc.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_strided_alloc.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_strided_alloc.Tpo -c -o ucs/gtest-test_strided_alloc.o `test -f 'ucs/test_strided_alloc.cc' || echo '$(srcdir)/'`ucs/test_strided_alloc.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_strided_alloc.Tpo ucs/$(DEPDIR)/gtest-test_strided_alloc.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_strided_alloc.cc' object='ucs/gtest-test_strided_alloc.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_strided_alloc.o `test -f 'ucs/test_strided_alloc.cc' || echo '$(srcdir)/'`ucs/test_strided_alloc.cc + +ucs/gtest-test_strided_alloc.obj: ucs/test_strided_alloc.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_strided_alloc.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_strided_alloc.Tpo -c -o ucs/gtest-test_strided_alloc.obj `if test -f 'ucs/test_strided_alloc.cc'; then $(CYGPATH_W) 'ucs/test_strided_alloc.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_strided_alloc.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_strided_alloc.Tpo ucs/$(DEPDIR)/gtest-test_strided_alloc.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_strided_alloc.cc' object='ucs/gtest-test_strided_alloc.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_strided_alloc.obj `if test -f 'ucs/test_strided_alloc.cc'; then $(CYGPATH_W) 'ucs/test_strided_alloc.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_strided_alloc.cc'; fi` + +ucs/gtest-test_string.o: ucs/test_string.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_string.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_string.Tpo -c -o ucs/gtest-test_string.o `test -f 'ucs/test_string.cc' || echo '$(srcdir)/'`ucs/test_string.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_string.Tpo ucs/$(DEPDIR)/gtest-test_string.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_string.cc' object='ucs/gtest-test_string.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_string.o `test -f 'ucs/test_string.cc' || echo '$(srcdir)/'`ucs/test_string.cc + +ucs/gtest-test_string.obj: ucs/test_string.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_string.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_string.Tpo -c -o ucs/gtest-test_string.obj `if test -f 'ucs/test_string.cc'; then $(CYGPATH_W) 'ucs/test_string.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_string.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_string.Tpo ucs/$(DEPDIR)/gtest-test_string.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_string.cc' object='ucs/gtest-test_string.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_string.obj `if test -f 'ucs/test_string.cc'; then $(CYGPATH_W) 'ucs/test_string.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_string.cc'; fi` + +ucs/gtest-test_sys.o: ucs/test_sys.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_sys.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_sys.Tpo -c -o ucs/gtest-test_sys.o `test -f 'ucs/test_sys.cc' || echo '$(srcdir)/'`ucs/test_sys.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_sys.Tpo ucs/$(DEPDIR)/gtest-test_sys.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_sys.cc' object='ucs/gtest-test_sys.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_sys.o `test -f 'ucs/test_sys.cc' || echo '$(srcdir)/'`ucs/test_sys.cc + +ucs/gtest-test_sys.obj: ucs/test_sys.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_sys.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_sys.Tpo -c -o ucs/gtest-test_sys.obj `if test -f 'ucs/test_sys.cc'; then $(CYGPATH_W) 'ucs/test_sys.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_sys.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_sys.Tpo ucs/$(DEPDIR)/gtest-test_sys.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_sys.cc' object='ucs/gtest-test_sys.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_sys.obj `if test -f 'ucs/test_sys.cc'; then $(CYGPATH_W) 'ucs/test_sys.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_sys.cc'; fi` + +ucs/gtest-test_sock.o: ucs/test_sock.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_sock.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_sock.Tpo -c -o ucs/gtest-test_sock.o `test -f 'ucs/test_sock.cc' || echo '$(srcdir)/'`ucs/test_sock.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_sock.Tpo ucs/$(DEPDIR)/gtest-test_sock.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_sock.cc' object='ucs/gtest-test_sock.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_sock.o `test -f 'ucs/test_sock.cc' || echo '$(srcdir)/'`ucs/test_sock.cc + +ucs/gtest-test_sock.obj: ucs/test_sock.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_sock.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_sock.Tpo -c -o ucs/gtest-test_sock.obj `if test -f 'ucs/test_sock.cc'; then $(CYGPATH_W) 'ucs/test_sock.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_sock.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_sock.Tpo ucs/$(DEPDIR)/gtest-test_sock.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_sock.cc' object='ucs/gtest-test_sock.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_sock.obj `if test -f 'ucs/test_sock.cc'; then $(CYGPATH_W) 'ucs/test_sock.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_sock.cc'; fi` + +ucs/gtest-test_time.o: ucs/test_time.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_time.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_time.Tpo -c -o ucs/gtest-test_time.o `test -f 'ucs/test_time.cc' || echo '$(srcdir)/'`ucs/test_time.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_time.Tpo ucs/$(DEPDIR)/gtest-test_time.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_time.cc' object='ucs/gtest-test_time.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_time.o `test -f 'ucs/test_time.cc' || echo '$(srcdir)/'`ucs/test_time.cc + +ucs/gtest-test_time.obj: ucs/test_time.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_time.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_time.Tpo -c -o ucs/gtest-test_time.obj `if test -f 'ucs/test_time.cc'; then $(CYGPATH_W) 'ucs/test_time.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_time.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_time.Tpo ucs/$(DEPDIR)/gtest-test_time.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_time.cc' object='ucs/gtest-test_time.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_time.obj `if test -f 'ucs/test_time.cc'; then $(CYGPATH_W) 'ucs/test_time.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_time.cc'; fi` + +ucs/gtest-test_twheel.o: ucs/test_twheel.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_twheel.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_twheel.Tpo -c -o ucs/gtest-test_twheel.o `test -f 'ucs/test_twheel.cc' || echo '$(srcdir)/'`ucs/test_twheel.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_twheel.Tpo ucs/$(DEPDIR)/gtest-test_twheel.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_twheel.cc' object='ucs/gtest-test_twheel.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_twheel.o `test -f 'ucs/test_twheel.cc' || echo '$(srcdir)/'`ucs/test_twheel.cc + +ucs/gtest-test_twheel.obj: ucs/test_twheel.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_twheel.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_twheel.Tpo -c -o ucs/gtest-test_twheel.obj `if test -f 'ucs/test_twheel.cc'; then $(CYGPATH_W) 'ucs/test_twheel.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_twheel.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_twheel.Tpo ucs/$(DEPDIR)/gtest-test_twheel.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_twheel.cc' object='ucs/gtest-test_twheel.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_twheel.obj `if test -f 'ucs/test_twheel.cc'; then $(CYGPATH_W) 'ucs/test_twheel.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_twheel.cc'; fi` + +ucs/gtest-test_frag_list.o: ucs/test_frag_list.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_frag_list.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_frag_list.Tpo -c -o ucs/gtest-test_frag_list.o `test -f 'ucs/test_frag_list.cc' || echo '$(srcdir)/'`ucs/test_frag_list.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_frag_list.Tpo ucs/$(DEPDIR)/gtest-test_frag_list.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_frag_list.cc' object='ucs/gtest-test_frag_list.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_frag_list.o `test -f 'ucs/test_frag_list.cc' || echo '$(srcdir)/'`ucs/test_frag_list.cc + +ucs/gtest-test_frag_list.obj: ucs/test_frag_list.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_frag_list.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_frag_list.Tpo -c -o ucs/gtest-test_frag_list.obj `if test -f 'ucs/test_frag_list.cc'; then $(CYGPATH_W) 'ucs/test_frag_list.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_frag_list.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_frag_list.Tpo ucs/$(DEPDIR)/gtest-test_frag_list.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_frag_list.cc' object='ucs/gtest-test_frag_list.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_frag_list.obj `if test -f 'ucs/test_frag_list.cc'; then $(CYGPATH_W) 'ucs/test_frag_list.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_frag_list.cc'; fi` + +ucs/gtest-test_type.o: ucs/test_type.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_type.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_type.Tpo -c -o ucs/gtest-test_type.o `test -f 'ucs/test_type.cc' || echo '$(srcdir)/'`ucs/test_type.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_type.Tpo ucs/$(DEPDIR)/gtest-test_type.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_type.cc' object='ucs/gtest-test_type.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_type.o `test -f 'ucs/test_type.cc' || echo '$(srcdir)/'`ucs/test_type.cc + +ucs/gtest-test_type.obj: ucs/test_type.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_type.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_type.Tpo -c -o ucs/gtest-test_type.obj `if test -f 'ucs/test_type.cc'; then $(CYGPATH_W) 'ucs/test_type.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_type.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_type.Tpo ucs/$(DEPDIR)/gtest-test_type.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_type.cc' object='ucs/gtest-test_type.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_type.obj `if test -f 'ucs/test_type.cc'; then $(CYGPATH_W) 'ucs/test_type.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_type.cc'; fi` + +ucs/gtest-test_log.o: ucs/test_log.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_log.o -MD -MP -MF ucs/$(DEPDIR)/gtest-test_log.Tpo -c -o ucs/gtest-test_log.o `test -f 'ucs/test_log.cc' || echo '$(srcdir)/'`ucs/test_log.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_log.Tpo ucs/$(DEPDIR)/gtest-test_log.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_log.cc' object='ucs/gtest-test_log.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_log.o `test -f 'ucs/test_log.cc' || echo '$(srcdir)/'`ucs/test_log.cc + +ucs/gtest-test_log.obj: ucs/test_log.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/gtest-test_log.obj -MD -MP -MF ucs/$(DEPDIR)/gtest-test_log.Tpo -c -o ucs/gtest-test_log.obj `if test -f 'ucs/test_log.cc'; then $(CYGPATH_W) 'ucs/test_log.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_log.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/$(DEPDIR)/gtest-test_log.Tpo ucs/$(DEPDIR)/gtest-test_log.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/test_log.cc' object='ucs/gtest-test_log.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/gtest-test_log.obj `if test -f 'ucs/test_log.cc'; then $(CYGPATH_W) 'ucs/test_log.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/test_log.cc'; fi` + +ucs/arch/gtest-test_x86_64.o: ucs/arch/test_x86_64.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/arch/gtest-test_x86_64.o -MD -MP -MF ucs/arch/$(DEPDIR)/gtest-test_x86_64.Tpo -c -o ucs/arch/gtest-test_x86_64.o `test -f 'ucs/arch/test_x86_64.cc' || echo '$(srcdir)/'`ucs/arch/test_x86_64.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/arch/$(DEPDIR)/gtest-test_x86_64.Tpo ucs/arch/$(DEPDIR)/gtest-test_x86_64.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/arch/test_x86_64.cc' object='ucs/arch/gtest-test_x86_64.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/arch/gtest-test_x86_64.o `test -f 'ucs/arch/test_x86_64.cc' || echo '$(srcdir)/'`ucs/arch/test_x86_64.cc + +ucs/arch/gtest-test_x86_64.obj: ucs/arch/test_x86_64.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucs/arch/gtest-test_x86_64.obj -MD -MP -MF ucs/arch/$(DEPDIR)/gtest-test_x86_64.Tpo -c -o ucs/arch/gtest-test_x86_64.obj `if test -f 'ucs/arch/test_x86_64.cc'; then $(CYGPATH_W) 'ucs/arch/test_x86_64.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/arch/test_x86_64.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucs/arch/$(DEPDIR)/gtest-test_x86_64.Tpo ucs/arch/$(DEPDIR)/gtest-test_x86_64.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucs/arch/test_x86_64.cc' object='ucs/arch/gtest-test_x86_64.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucs/arch/gtest-test_x86_64.obj `if test -f 'ucs/arch/test_x86_64.cc'; then $(CYGPATH_W) 'ucs/arch/test_x86_64.cc'; else $(CYGPATH_W) '$(srcdir)/ucs/arch/test_x86_64.cc'; fi` + +uct/ib/gtest-test_ib.o: uct/ib/test_ib.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ib.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ib.Tpo -c -o uct/ib/gtest-test_ib.o `test -f 'uct/ib/test_ib.cc' || echo '$(srcdir)/'`uct/ib/test_ib.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ib.Tpo uct/ib/$(DEPDIR)/gtest-test_ib.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ib.cc' object='uct/ib/gtest-test_ib.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ib.o `test -f 'uct/ib/test_ib.cc' || echo '$(srcdir)/'`uct/ib/test_ib.cc + +uct/ib/gtest-test_ib.obj: uct/ib/test_ib.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ib.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ib.Tpo -c -o uct/ib/gtest-test_ib.obj `if test -f 'uct/ib/test_ib.cc'; then $(CYGPATH_W) 'uct/ib/test_ib.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ib.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ib.Tpo uct/ib/$(DEPDIR)/gtest-test_ib.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ib.cc' object='uct/ib/gtest-test_ib.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ib.obj `if test -f 'uct/ib/test_ib.cc'; then $(CYGPATH_W) 'uct/ib/test_ib.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ib.cc'; fi` + +uct/ib/gtest-test_ib_md.o: uct/ib/test_ib_md.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ib_md.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ib_md.Tpo -c -o uct/ib/gtest-test_ib_md.o `test -f 'uct/ib/test_ib_md.cc' || echo '$(srcdir)/'`uct/ib/test_ib_md.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ib_md.Tpo uct/ib/$(DEPDIR)/gtest-test_ib_md.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ib_md.cc' object='uct/ib/gtest-test_ib_md.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ib_md.o `test -f 'uct/ib/test_ib_md.cc' || echo '$(srcdir)/'`uct/ib/test_ib_md.cc + +uct/ib/gtest-test_ib_md.obj: uct/ib/test_ib_md.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ib_md.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ib_md.Tpo -c -o uct/ib/gtest-test_ib_md.obj `if test -f 'uct/ib/test_ib_md.cc'; then $(CYGPATH_W) 'uct/ib/test_ib_md.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ib_md.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ib_md.Tpo uct/ib/$(DEPDIR)/gtest-test_ib_md.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ib_md.cc' object='uct/ib/gtest-test_ib_md.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ib_md.obj `if test -f 'uct/ib/test_ib_md.cc'; then $(CYGPATH_W) 'uct/ib/test_ib_md.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ib_md.cc'; fi` + +uct/ib/gtest-test_cq_moderation.o: uct/ib/test_cq_moderation.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_cq_moderation.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Tpo -c -o uct/ib/gtest-test_cq_moderation.o `test -f 'uct/ib/test_cq_moderation.cc' || echo '$(srcdir)/'`uct/ib/test_cq_moderation.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Tpo uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_cq_moderation.cc' object='uct/ib/gtest-test_cq_moderation.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_cq_moderation.o `test -f 'uct/ib/test_cq_moderation.cc' || echo '$(srcdir)/'`uct/ib/test_cq_moderation.cc + +uct/ib/gtest-test_cq_moderation.obj: uct/ib/test_cq_moderation.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_cq_moderation.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Tpo -c -o uct/ib/gtest-test_cq_moderation.obj `if test -f 'uct/ib/test_cq_moderation.cc'; then $(CYGPATH_W) 'uct/ib/test_cq_moderation.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_cq_moderation.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Tpo uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_cq_moderation.cc' object='uct/ib/gtest-test_cq_moderation.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_cq_moderation.obj `if test -f 'uct/ib/test_cq_moderation.cc'; then $(CYGPATH_W) 'uct/ib/test_cq_moderation.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_cq_moderation.cc'; fi` + +uct/ib/gtest-test_ib_xfer.o: uct/ib/test_ib_xfer.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ib_xfer.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Tpo -c -o uct/ib/gtest-test_ib_xfer.o `test -f 'uct/ib/test_ib_xfer.cc' || echo '$(srcdir)/'`uct/ib/test_ib_xfer.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Tpo uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ib_xfer.cc' object='uct/ib/gtest-test_ib_xfer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ib_xfer.o `test -f 'uct/ib/test_ib_xfer.cc' || echo '$(srcdir)/'`uct/ib/test_ib_xfer.cc + +uct/ib/gtest-test_ib_xfer.obj: uct/ib/test_ib_xfer.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ib_xfer.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Tpo -c -o uct/ib/gtest-test_ib_xfer.obj `if test -f 'uct/ib/test_ib_xfer.cc'; then $(CYGPATH_W) 'uct/ib/test_ib_xfer.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ib_xfer.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Tpo uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ib_xfer.cc' object='uct/ib/gtest-test_ib_xfer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ib_xfer.obj `if test -f 'uct/ib/test_ib_xfer.cc'; then $(CYGPATH_W) 'uct/ib/test_ib_xfer.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ib_xfer.cc'; fi` + +uct/ib/gtest-test_ib_pkey.o: uct/ib/test_ib_pkey.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ib_pkey.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Tpo -c -o uct/ib/gtest-test_ib_pkey.o `test -f 'uct/ib/test_ib_pkey.cc' || echo '$(srcdir)/'`uct/ib/test_ib_pkey.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Tpo uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ib_pkey.cc' object='uct/ib/gtest-test_ib_pkey.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ib_pkey.o `test -f 'uct/ib/test_ib_pkey.cc' || echo '$(srcdir)/'`uct/ib/test_ib_pkey.cc + +uct/ib/gtest-test_ib_pkey.obj: uct/ib/test_ib_pkey.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ib_pkey.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Tpo -c -o uct/ib/gtest-test_ib_pkey.obj `if test -f 'uct/ib/test_ib_pkey.cc'; then $(CYGPATH_W) 'uct/ib/test_ib_pkey.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ib_pkey.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Tpo uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ib_pkey.cc' object='uct/ib/gtest-test_ib_pkey.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ib_pkey.obj `if test -f 'uct/ib/test_ib_pkey.cc'; then $(CYGPATH_W) 'uct/ib/test_ib_pkey.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ib_pkey.cc'; fi` + +uct/ib/gtest-test_devx.o: uct/ib/test_devx.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_devx.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_devx.Tpo -c -o uct/ib/gtest-test_devx.o `test -f 'uct/ib/test_devx.cc' || echo '$(srcdir)/'`uct/ib/test_devx.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_devx.Tpo uct/ib/$(DEPDIR)/gtest-test_devx.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_devx.cc' object='uct/ib/gtest-test_devx.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_devx.o `test -f 'uct/ib/test_devx.cc' || echo '$(srcdir)/'`uct/ib/test_devx.cc + +uct/ib/gtest-test_devx.obj: uct/ib/test_devx.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_devx.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_devx.Tpo -c -o uct/ib/gtest-test_devx.obj `if test -f 'uct/ib/test_devx.cc'; then $(CYGPATH_W) 'uct/ib/test_devx.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_devx.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_devx.Tpo uct/ib/$(DEPDIR)/gtest-test_devx.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_devx.cc' object='uct/ib/gtest-test_devx.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_devx.obj `if test -f 'uct/ib/test_devx.cc'; then $(CYGPATH_W) 'uct/ib/test_devx.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_devx.cc'; fi` + +uct/ib/gtest-ud_base.o: uct/ib/ud_base.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-ud_base.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-ud_base.Tpo -c -o uct/ib/gtest-ud_base.o `test -f 'uct/ib/ud_base.cc' || echo '$(srcdir)/'`uct/ib/ud_base.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-ud_base.Tpo uct/ib/$(DEPDIR)/gtest-ud_base.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/ud_base.cc' object='uct/ib/gtest-ud_base.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-ud_base.o `test -f 'uct/ib/ud_base.cc' || echo '$(srcdir)/'`uct/ib/ud_base.cc + +uct/ib/gtest-ud_base.obj: uct/ib/ud_base.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-ud_base.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-ud_base.Tpo -c -o uct/ib/gtest-ud_base.obj `if test -f 'uct/ib/ud_base.cc'; then $(CYGPATH_W) 'uct/ib/ud_base.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/ud_base.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-ud_base.Tpo uct/ib/$(DEPDIR)/gtest-ud_base.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/ud_base.cc' object='uct/ib/gtest-ud_base.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-ud_base.obj `if test -f 'uct/ib/ud_base.cc'; then $(CYGPATH_W) 'uct/ib/ud_base.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/ud_base.cc'; fi` + +uct/ib/gtest-test_ud.o: uct/ib/test_ud.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ud.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ud.Tpo -c -o uct/ib/gtest-test_ud.o `test -f 'uct/ib/test_ud.cc' || echo '$(srcdir)/'`uct/ib/test_ud.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ud.Tpo uct/ib/$(DEPDIR)/gtest-test_ud.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ud.cc' object='uct/ib/gtest-test_ud.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ud.o `test -f 'uct/ib/test_ud.cc' || echo '$(srcdir)/'`uct/ib/test_ud.cc + +uct/ib/gtest-test_ud.obj: uct/ib/test_ud.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ud.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ud.Tpo -c -o uct/ib/gtest-test_ud.obj `if test -f 'uct/ib/test_ud.cc'; then $(CYGPATH_W) 'uct/ib/test_ud.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ud.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ud.Tpo uct/ib/$(DEPDIR)/gtest-test_ud.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ud.cc' object='uct/ib/gtest-test_ud.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ud.obj `if test -f 'uct/ib/test_ud.cc'; then $(CYGPATH_W) 'uct/ib/test_ud.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ud.cc'; fi` + +uct/ib/gtest-test_ud_slow_timer.o: uct/ib/test_ud_slow_timer.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ud_slow_timer.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Tpo -c -o uct/ib/gtest-test_ud_slow_timer.o `test -f 'uct/ib/test_ud_slow_timer.cc' || echo '$(srcdir)/'`uct/ib/test_ud_slow_timer.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Tpo uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ud_slow_timer.cc' object='uct/ib/gtest-test_ud_slow_timer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ud_slow_timer.o `test -f 'uct/ib/test_ud_slow_timer.cc' || echo '$(srcdir)/'`uct/ib/test_ud_slow_timer.cc + +uct/ib/gtest-test_ud_slow_timer.obj: uct/ib/test_ud_slow_timer.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ud_slow_timer.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Tpo -c -o uct/ib/gtest-test_ud_slow_timer.obj `if test -f 'uct/ib/test_ud_slow_timer.cc'; then $(CYGPATH_W) 'uct/ib/test_ud_slow_timer.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ud_slow_timer.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Tpo uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ud_slow_timer.cc' object='uct/ib/gtest-test_ud_slow_timer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ud_slow_timer.obj `if test -f 'uct/ib/test_ud_slow_timer.cc'; then $(CYGPATH_W) 'uct/ib/test_ud_slow_timer.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ud_slow_timer.cc'; fi` + +uct/ib/gtest-test_ud_pending.o: uct/ib/test_ud_pending.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ud_pending.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ud_pending.Tpo -c -o uct/ib/gtest-test_ud_pending.o `test -f 'uct/ib/test_ud_pending.cc' || echo '$(srcdir)/'`uct/ib/test_ud_pending.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ud_pending.Tpo uct/ib/$(DEPDIR)/gtest-test_ud_pending.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ud_pending.cc' object='uct/ib/gtest-test_ud_pending.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ud_pending.o `test -f 'uct/ib/test_ud_pending.cc' || echo '$(srcdir)/'`uct/ib/test_ud_pending.cc + +uct/ib/gtest-test_ud_pending.obj: uct/ib/test_ud_pending.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ud_pending.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ud_pending.Tpo -c -o uct/ib/gtest-test_ud_pending.obj `if test -f 'uct/ib/test_ud_pending.cc'; then $(CYGPATH_W) 'uct/ib/test_ud_pending.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ud_pending.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ud_pending.Tpo uct/ib/$(DEPDIR)/gtest-test_ud_pending.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ud_pending.cc' object='uct/ib/gtest-test_ud_pending.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ud_pending.obj `if test -f 'uct/ib/test_ud_pending.cc'; then $(CYGPATH_W) 'uct/ib/test_ud_pending.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ud_pending.cc'; fi` + +uct/ib/gtest-test_ud_ds.o: uct/ib/test_ud_ds.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ud_ds.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ud_ds.Tpo -c -o uct/ib/gtest-test_ud_ds.o `test -f 'uct/ib/test_ud_ds.cc' || echo '$(srcdir)/'`uct/ib/test_ud_ds.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ud_ds.Tpo uct/ib/$(DEPDIR)/gtest-test_ud_ds.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ud_ds.cc' object='uct/ib/gtest-test_ud_ds.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ud_ds.o `test -f 'uct/ib/test_ud_ds.cc' || echo '$(srcdir)/'`uct/ib/test_ud_ds.cc + +uct/ib/gtest-test_ud_ds.obj: uct/ib/test_ud_ds.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_ud_ds.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_ud_ds.Tpo -c -o uct/ib/gtest-test_ud_ds.obj `if test -f 'uct/ib/test_ud_ds.cc'; then $(CYGPATH_W) 'uct/ib/test_ud_ds.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ud_ds.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_ud_ds.Tpo uct/ib/$(DEPDIR)/gtest-test_ud_ds.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_ud_ds.cc' object='uct/ib/gtest-test_ud_ds.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_ud_ds.obj `if test -f 'uct/ib/test_ud_ds.cc'; then $(CYGPATH_W) 'uct/ib/test_ud_ds.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_ud_ds.cc'; fi` + +uct/ib/gtest-test_rc.o: uct/ib/test_rc.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_rc.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_rc.Tpo -c -o uct/ib/gtest-test_rc.o `test -f 'uct/ib/test_rc.cc' || echo '$(srcdir)/'`uct/ib/test_rc.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_rc.Tpo uct/ib/$(DEPDIR)/gtest-test_rc.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_rc.cc' object='uct/ib/gtest-test_rc.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_rc.o `test -f 'uct/ib/test_rc.cc' || echo '$(srcdir)/'`uct/ib/test_rc.cc + +uct/ib/gtest-test_rc.obj: uct/ib/test_rc.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_rc.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_rc.Tpo -c -o uct/ib/gtest-test_rc.obj `if test -f 'uct/ib/test_rc.cc'; then $(CYGPATH_W) 'uct/ib/test_rc.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_rc.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_rc.Tpo uct/ib/$(DEPDIR)/gtest-test_rc.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_rc.cc' object='uct/ib/gtest-test_rc.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_rc.obj `if test -f 'uct/ib/test_rc.cc'; then $(CYGPATH_W) 'uct/ib/test_rc.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_rc.cc'; fi` + +uct/ib/gtest-test_dc.o: uct/ib/test_dc.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_dc.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_dc.Tpo -c -o uct/ib/gtest-test_dc.o `test -f 'uct/ib/test_dc.cc' || echo '$(srcdir)/'`uct/ib/test_dc.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_dc.Tpo uct/ib/$(DEPDIR)/gtest-test_dc.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_dc.cc' object='uct/ib/gtest-test_dc.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_dc.o `test -f 'uct/ib/test_dc.cc' || echo '$(srcdir)/'`uct/ib/test_dc.cc + +uct/ib/gtest-test_dc.obj: uct/ib/test_dc.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_dc.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_dc.Tpo -c -o uct/ib/gtest-test_dc.obj `if test -f 'uct/ib/test_dc.cc'; then $(CYGPATH_W) 'uct/ib/test_dc.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_dc.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_dc.Tpo uct/ib/$(DEPDIR)/gtest-test_dc.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_dc.cc' object='uct/ib/gtest-test_dc.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_dc.obj `if test -f 'uct/ib/test_dc.cc'; then $(CYGPATH_W) 'uct/ib/test_dc.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_dc.cc'; fi` + +uct/ib/gtest-test_sockaddr.o: uct/ib/test_sockaddr.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_sockaddr.o -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_sockaddr.Tpo -c -o uct/ib/gtest-test_sockaddr.o `test -f 'uct/ib/test_sockaddr.cc' || echo '$(srcdir)/'`uct/ib/test_sockaddr.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_sockaddr.Tpo uct/ib/$(DEPDIR)/gtest-test_sockaddr.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_sockaddr.cc' object='uct/ib/gtest-test_sockaddr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_sockaddr.o `test -f 'uct/ib/test_sockaddr.cc' || echo '$(srcdir)/'`uct/ib/test_sockaddr.cc + +uct/ib/gtest-test_sockaddr.obj: uct/ib/test_sockaddr.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT uct/ib/gtest-test_sockaddr.obj -MD -MP -MF uct/ib/$(DEPDIR)/gtest-test_sockaddr.Tpo -c -o uct/ib/gtest-test_sockaddr.obj `if test -f 'uct/ib/test_sockaddr.cc'; then $(CYGPATH_W) 'uct/ib/test_sockaddr.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_sockaddr.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) uct/ib/$(DEPDIR)/gtest-test_sockaddr.Tpo uct/ib/$(DEPDIR)/gtest-test_sockaddr.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='uct/ib/test_sockaddr.cc' object='uct/ib/gtest-test_sockaddr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o uct/ib/gtest-test_sockaddr.obj `if test -f 'uct/ib/test_sockaddr.cc'; then $(CYGPATH_W) 'uct/ib/test_sockaddr.cc'; else $(CYGPATH_W) '$(srcdir)/uct/ib/test_sockaddr.cc'; fi` + +ucm/gtest-cuda_hooks.o: ucm/cuda_hooks.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucm/gtest-cuda_hooks.o -MD -MP -MF ucm/$(DEPDIR)/gtest-cuda_hooks.Tpo -c -o ucm/gtest-cuda_hooks.o `test -f 'ucm/cuda_hooks.cc' || echo '$(srcdir)/'`ucm/cuda_hooks.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucm/$(DEPDIR)/gtest-cuda_hooks.Tpo ucm/$(DEPDIR)/gtest-cuda_hooks.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucm/cuda_hooks.cc' object='ucm/gtest-cuda_hooks.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucm/gtest-cuda_hooks.o `test -f 'ucm/cuda_hooks.cc' || echo '$(srcdir)/'`ucm/cuda_hooks.cc + +ucm/gtest-cuda_hooks.obj: ucm/cuda_hooks.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucm/gtest-cuda_hooks.obj -MD -MP -MF ucm/$(DEPDIR)/gtest-cuda_hooks.Tpo -c -o ucm/gtest-cuda_hooks.obj `if test -f 'ucm/cuda_hooks.cc'; then $(CYGPATH_W) 'ucm/cuda_hooks.cc'; else $(CYGPATH_W) '$(srcdir)/ucm/cuda_hooks.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucm/$(DEPDIR)/gtest-cuda_hooks.Tpo ucm/$(DEPDIR)/gtest-cuda_hooks.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucm/cuda_hooks.cc' object='ucm/gtest-cuda_hooks.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucm/gtest-cuda_hooks.obj `if test -f 'ucm/cuda_hooks.cc'; then $(CYGPATH_W) 'ucm/cuda_hooks.cc'; else $(CYGPATH_W) '$(srcdir)/ucm/cuda_hooks.cc'; fi` + +ucm/gtest-rocm_hooks.o: ucm/rocm_hooks.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucm/gtest-rocm_hooks.o -MD -MP -MF ucm/$(DEPDIR)/gtest-rocm_hooks.Tpo -c -o ucm/gtest-rocm_hooks.o `test -f 'ucm/rocm_hooks.cc' || echo '$(srcdir)/'`ucm/rocm_hooks.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucm/$(DEPDIR)/gtest-rocm_hooks.Tpo ucm/$(DEPDIR)/gtest-rocm_hooks.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucm/rocm_hooks.cc' object='ucm/gtest-rocm_hooks.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucm/gtest-rocm_hooks.o `test -f 'ucm/rocm_hooks.cc' || echo '$(srcdir)/'`ucm/rocm_hooks.cc + +ucm/gtest-rocm_hooks.obj: ucm/rocm_hooks.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -MT ucm/gtest-rocm_hooks.obj -MD -MP -MF ucm/$(DEPDIR)/gtest-rocm_hooks.Tpo -c -o ucm/gtest-rocm_hooks.obj `if test -f 'ucm/rocm_hooks.cc'; then $(CYGPATH_W) 'ucm/rocm_hooks.cc'; else $(CYGPATH_W) '$(srcdir)/ucm/rocm_hooks.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) ucm/$(DEPDIR)/gtest-rocm_hooks.Tpo ucm/$(DEPDIR)/gtest-rocm_hooks.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='ucm/rocm_hooks.cc' object='ucm/gtest-rocm_hooks.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(gtest_CPPFLAGS) $(CPPFLAGS) $(gtest_CXXFLAGS) $(CXXFLAGS) -c -o ucm/gtest-rocm_hooks.obj `if test -f 'ucm/rocm_hooks.cc'; then $(CYGPATH_W) 'ucm/rocm_hooks.cc'; else $(CYGPATH_W) '$(srcdir)/ucm/rocm_hooks.cc'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +@HAVE_GTEST_FALSE@all-local: +all-am: Makefile $(PROGRAMS) $(HEADERS) all-local +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f common/$(DEPDIR)/$(am__dirstamp) + -rm -f common/$(am__dirstamp) + -rm -f ucm/$(DEPDIR)/$(am__dirstamp) + -rm -f ucm/$(am__dirstamp) + -rm -f ucp/$(DEPDIR)/$(am__dirstamp) + -rm -f ucp/$(am__dirstamp) + -rm -f ucs/$(DEPDIR)/$(am__dirstamp) + -rm -f ucs/$(am__dirstamp) + -rm -f ucs/arch/$(DEPDIR)/$(am__dirstamp) + -rm -f ucs/arch/$(am__dirstamp) + -rm -f uct/$(DEPDIR)/$(am__dirstamp) + -rm -f uct/$(am__dirstamp) + -rm -f uct/ib/$(DEPDIR)/$(am__dirstamp) + -rm -f uct/ib/$(am__dirstamp) + -rm -f uct/tcp/$(DEPDIR)/$(am__dirstamp) + -rm -f uct/tcp/$(am__dirstamp) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f common/$(DEPDIR)/gtest-gtest-all.Po + -rm -f common/$(DEPDIR)/gtest-main.Po + -rm -f common/$(DEPDIR)/gtest-mem_buffer.Po + -rm -f common/$(DEPDIR)/gtest-test.Po + -rm -f common/$(DEPDIR)/gtest-test_gtest_cmn.Po + -rm -f common/$(DEPDIR)/gtest-test_helpers.Po + -rm -f common/$(DEPDIR)/gtest-test_obj_size.Po + -rm -f common/$(DEPDIR)/gtest-test_perf.Po + -rm -f common/$(DEPDIR)/gtest-test_watchdog.Po + -rm -f ucm/$(DEPDIR)/gtest-cuda_hooks.Po + -rm -f ucm/$(DEPDIR)/gtest-malloc_hook.Po + -rm -f ucm/$(DEPDIR)/gtest-rocm_hooks.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_am.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_atomic.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_context.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_dt.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_fence.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_memheap.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_mmap.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_perf.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_rma.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_stream.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_wireup.Po + -rm -f ucp/$(DEPDIR)/gtest-ucp_datatype.Po + -rm -f ucp/$(DEPDIR)/gtest-ucp_test.Po + -rm -f ucs/$(DEPDIR)/gtest-test_algorithm.Po + -rm -f ucs/$(DEPDIR)/gtest-test_arbiter.Po + -rm -f ucs/$(DEPDIR)/gtest-test_async.Po + -rm -f ucs/$(DEPDIR)/gtest-test_callbackq.Po + -rm -f ucs/$(DEPDIR)/gtest-test_class.Po + -rm -f ucs/$(DEPDIR)/gtest-test_config.Po + -rm -f ucs/$(DEPDIR)/gtest-test_datatype.Po + -rm -f ucs/$(DEPDIR)/gtest-test_debug.Po + -rm -f ucs/$(DEPDIR)/gtest-test_event_set.Po + -rm -f ucs/$(DEPDIR)/gtest-test_frag_list.Po + -rm -f ucs/$(DEPDIR)/gtest-test_log.Po + -rm -f ucs/$(DEPDIR)/gtest-test_math.Po + -rm -f ucs/$(DEPDIR)/gtest-test_memtrack.Po + -rm -f ucs/$(DEPDIR)/gtest-test_memtype_cache.Po + -rm -f ucs/$(DEPDIR)/gtest-test_mpmc.Po + -rm -f ucs/$(DEPDIR)/gtest-test_mpool.Po + -rm -f ucs/$(DEPDIR)/gtest-test_pgtable.Po + -rm -f ucs/$(DEPDIR)/gtest-test_profile.Po + -rm -f ucs/$(DEPDIR)/gtest-test_rcache.Po + -rm -f ucs/$(DEPDIR)/gtest-test_sock.Po + -rm -f ucs/$(DEPDIR)/gtest-test_stats.Po + -rm -f ucs/$(DEPDIR)/gtest-test_stats_filter.Po + -rm -f ucs/$(DEPDIR)/gtest-test_strided_alloc.Po + -rm -f ucs/$(DEPDIR)/gtest-test_string.Po + -rm -f ucs/$(DEPDIR)/gtest-test_sys.Po + -rm -f ucs/$(DEPDIR)/gtest-test_time.Po + -rm -f ucs/$(DEPDIR)/gtest-test_twheel.Po + -rm -f ucs/$(DEPDIR)/gtest-test_type.Po + -rm -f ucs/arch/$(DEPDIR)/gtest-test_x86_64.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_add_xor.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_and_or.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_cswap.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_fand_for.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_swap.Po + -rm -f uct/$(DEPDIR)/gtest-test_event.Po + -rm -f uct/$(DEPDIR)/gtest-test_fence.Po + -rm -f uct/$(DEPDIR)/gtest-test_flush.Po + -rm -f uct/$(DEPDIR)/gtest-test_many2one_am.Po + -rm -f uct/$(DEPDIR)/gtest-test_md.Po + -rm -f uct/$(DEPDIR)/gtest-test_mem.Po + -rm -f uct/$(DEPDIR)/gtest-test_mm.Po + -rm -f uct/$(DEPDIR)/gtest-test_p2p_am.Po + -rm -f uct/$(DEPDIR)/gtest-test_p2p_err.Po + -rm -f uct/$(DEPDIR)/gtest-test_p2p_mix.Po + -rm -f uct/$(DEPDIR)/gtest-test_p2p_rma.Po + -rm -f uct/$(DEPDIR)/gtest-test_peer_failure.Po + -rm -f uct/$(DEPDIR)/gtest-test_pending.Po + -rm -f uct/$(DEPDIR)/gtest-test_progress.Po + -rm -f uct/$(DEPDIR)/gtest-test_stats.Po + -rm -f uct/$(DEPDIR)/gtest-test_tag.Po + -rm -f uct/$(DEPDIR)/gtest-test_uct_ep.Po + -rm -f uct/$(DEPDIR)/gtest-test_uct_perf.Po + -rm -f uct/$(DEPDIR)/gtest-test_zcopy_comp.Po + -rm -f uct/$(DEPDIR)/gtest-uct_p2p_test.Po + -rm -f uct/$(DEPDIR)/gtest-uct_test.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_dc.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_devx.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ib.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ib_md.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_rc.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_sockaddr.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ud.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ud_ds.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ud_pending.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Po + -rm -f uct/ib/$(DEPDIR)/gtest-ud_base.Po + -rm -f uct/tcp/$(DEPDIR)/gtest-test_tcp.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f common/$(DEPDIR)/gtest-gtest-all.Po + -rm -f common/$(DEPDIR)/gtest-main.Po + -rm -f common/$(DEPDIR)/gtest-mem_buffer.Po + -rm -f common/$(DEPDIR)/gtest-test.Po + -rm -f common/$(DEPDIR)/gtest-test_gtest_cmn.Po + -rm -f common/$(DEPDIR)/gtest-test_helpers.Po + -rm -f common/$(DEPDIR)/gtest-test_obj_size.Po + -rm -f common/$(DEPDIR)/gtest-test_perf.Po + -rm -f common/$(DEPDIR)/gtest-test_watchdog.Po + -rm -f ucm/$(DEPDIR)/gtest-cuda_hooks.Po + -rm -f ucm/$(DEPDIR)/gtest-malloc_hook.Po + -rm -f ucm/$(DEPDIR)/gtest-rocm_hooks.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_am.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_atomic.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_context.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_dt.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_fence.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_mem_type.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_memheap.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_mmap.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_peer_failure.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_perf.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_rma.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_rma_mt.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_sockaddr.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_stream.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_cancel.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_match.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_mem_type.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_mt.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_offload.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_perf.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_probe.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_tag_xfer.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_wakeup.Po + -rm -f ucp/$(DEPDIR)/gtest-test_ucp_wireup.Po + -rm -f ucp/$(DEPDIR)/gtest-ucp_datatype.Po + -rm -f ucp/$(DEPDIR)/gtest-ucp_test.Po + -rm -f ucs/$(DEPDIR)/gtest-test_algorithm.Po + -rm -f ucs/$(DEPDIR)/gtest-test_arbiter.Po + -rm -f ucs/$(DEPDIR)/gtest-test_async.Po + -rm -f ucs/$(DEPDIR)/gtest-test_callbackq.Po + -rm -f ucs/$(DEPDIR)/gtest-test_class.Po + -rm -f ucs/$(DEPDIR)/gtest-test_config.Po + -rm -f ucs/$(DEPDIR)/gtest-test_datatype.Po + -rm -f ucs/$(DEPDIR)/gtest-test_debug.Po + -rm -f ucs/$(DEPDIR)/gtest-test_event_set.Po + -rm -f ucs/$(DEPDIR)/gtest-test_frag_list.Po + -rm -f ucs/$(DEPDIR)/gtest-test_log.Po + -rm -f ucs/$(DEPDIR)/gtest-test_math.Po + -rm -f ucs/$(DEPDIR)/gtest-test_memtrack.Po + -rm -f ucs/$(DEPDIR)/gtest-test_memtype_cache.Po + -rm -f ucs/$(DEPDIR)/gtest-test_mpmc.Po + -rm -f ucs/$(DEPDIR)/gtest-test_mpool.Po + -rm -f ucs/$(DEPDIR)/gtest-test_pgtable.Po + -rm -f ucs/$(DEPDIR)/gtest-test_profile.Po + -rm -f ucs/$(DEPDIR)/gtest-test_rcache.Po + -rm -f ucs/$(DEPDIR)/gtest-test_sock.Po + -rm -f ucs/$(DEPDIR)/gtest-test_stats.Po + -rm -f ucs/$(DEPDIR)/gtest-test_stats_filter.Po + -rm -f ucs/$(DEPDIR)/gtest-test_strided_alloc.Po + -rm -f ucs/$(DEPDIR)/gtest-test_string.Po + -rm -f ucs/$(DEPDIR)/gtest-test_sys.Po + -rm -f ucs/$(DEPDIR)/gtest-test_time.Po + -rm -f ucs/$(DEPDIR)/gtest-test_twheel.Po + -rm -f ucs/$(DEPDIR)/gtest-test_type.Po + -rm -f ucs/arch/$(DEPDIR)/gtest-test_x86_64.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_add_xor.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_and_or.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_cswap.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_fadd_fxor.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_fand_for.Po + -rm -f uct/$(DEPDIR)/gtest-test_amo_swap.Po + -rm -f uct/$(DEPDIR)/gtest-test_event.Po + -rm -f uct/$(DEPDIR)/gtest-test_fence.Po + -rm -f uct/$(DEPDIR)/gtest-test_flush.Po + -rm -f uct/$(DEPDIR)/gtest-test_many2one_am.Po + -rm -f uct/$(DEPDIR)/gtest-test_md.Po + -rm -f uct/$(DEPDIR)/gtest-test_mem.Po + -rm -f uct/$(DEPDIR)/gtest-test_mm.Po + -rm -f uct/$(DEPDIR)/gtest-test_p2p_am.Po + -rm -f uct/$(DEPDIR)/gtest-test_p2p_err.Po + -rm -f uct/$(DEPDIR)/gtest-test_p2p_mix.Po + -rm -f uct/$(DEPDIR)/gtest-test_p2p_rma.Po + -rm -f uct/$(DEPDIR)/gtest-test_peer_failure.Po + -rm -f uct/$(DEPDIR)/gtest-test_pending.Po + -rm -f uct/$(DEPDIR)/gtest-test_progress.Po + -rm -f uct/$(DEPDIR)/gtest-test_stats.Po + -rm -f uct/$(DEPDIR)/gtest-test_tag.Po + -rm -f uct/$(DEPDIR)/gtest-test_uct_ep.Po + -rm -f uct/$(DEPDIR)/gtest-test_uct_perf.Po + -rm -f uct/$(DEPDIR)/gtest-test_zcopy_comp.Po + -rm -f uct/$(DEPDIR)/gtest-uct_p2p_test.Po + -rm -f uct/$(DEPDIR)/gtest-uct_test.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_cq_moderation.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_dc.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_devx.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ib.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ib_md.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ib_pkey.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ib_xfer.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_rc.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_sockaddr.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ud.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ud_ds.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ud_pending.Po + -rm -f uct/ib/$(DEPDIR)/gtest-test_ud_slow_timer.Po + -rm -f uct/ib/$(DEPDIR)/gtest-ud_base.Po + -rm -f uct/tcp/$(DEPDIR)/gtest-test_tcp.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \ + am--depfiles check check-am clean clean-generic clean-libtool \ + clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am + +.PRECIOUS: Makefile + + +# Set default configuration for running tests +@HAVE_GTEST_TRUE@UCX_HANDLE_ERRORS ?= freeze +@HAVE_GTEST_TRUE@UCX_LOG_LEVEL ?= warn +@HAVE_GTEST_TRUE@UCX_LOG_PRINT_ENABLE ?= y +@HAVE_GTEST_TRUE@GTEST_FILTER ?= * +@HAVE_GTEST_TRUE@GTEST_EXTRA_ARGS ?= +@HAVE_GTEST_TRUE@LAUNCHER ?= +@HAVE_GTEST_TRUE@VALGRIND_EXTRA_ARGS ?= + +@HAVE_GTEST_TRUE@export UCX_HANDLE_ERRORS +@HAVE_GTEST_TRUE@export UCX_LOG_LEVEL +@HAVE_GTEST_TRUE@export UCX_LOG_PRINT_ENABLE + +@HAVE_GTEST_TRUE@.PHONY: test test gdb valgrind fix_rpath ucx + +@HAVE_GTEST_TRUE@all-local: gtest + +@HAVE_GTEST_TRUE@ucx: +@HAVE_GTEST_TRUE@ $(MAKE) -C $(top_builddir) + +@HAVE_GTEST_TRUE@help: +@HAVE_GTEST_TRUE@ @echo +@HAVE_GTEST_TRUE@ @echo "Targets:" +@HAVE_GTEST_TRUE@ @echo " list : List unit tests." +@HAVE_GTEST_TRUE@ @echo " test : Run unit tests." +@HAVE_GTEST_TRUE@ @echo " test_gdb : Run unit tests with GDB." +@HAVE_GTEST_TRUE@ @echo " test_valgrind : Run unit tests with Valgrind." +@HAVE_GTEST_TRUE@ @echo +@HAVE_GTEST_TRUE@ @echo "Environment variables:" +@HAVE_GTEST_TRUE@ @echo " GTEST_FILTER : Unit tests filter (\"$(GTEST_FILTER)\")" +@HAVE_GTEST_TRUE@ @echo " GTEST_EXTRA_ARGS : Additional arguments for gtest (\"$(GTEST_EXTRA_ARGS)\")" +@HAVE_GTEST_TRUE@ @echo " LAUNCHER : Custom launcher for gtest executable (\"$(LAUNCHER)\")" +@HAVE_GTEST_TRUE@ @echo " VALGRIND_EXTRA_ARGS : Additional arguments for Valgrind (\"$(VALGRIND_EXTRA_ARGS)\")" +@HAVE_GTEST_TRUE@ @echo + +# +# List unit tests +# +@HAVE_GTEST_TRUE@list: gtest +@HAVE_GTEST_TRUE@ $(abs_builddir)/gtest --gtest_list_tests $(GTEST_ARGS) + +# +# Run unit tests +# +@HAVE_GTEST_TRUE@test: ucx gtest +@HAVE_GTEST_TRUE@ @rm -f core.* +@HAVE_GTEST_TRUE@ $(LAUNCHER) stdbuf -e0 -o0 $(abs_builddir)/gtest $(GTEST_ARGS) + +# +# Run unit tests with GDB +# +@HAVE_GTEST_TRUE@test_gdb: ucx gtest +@HAVE_GTEST_TRUE@ echo -e 'r\ninit-if-undefined $$_exitcode=-1\nif $$_exitcode>=0\n\tq\nend' > .gdbcommands +@HAVE_GTEST_TRUE@ $(LAUNCHER) env UCX_HANDLE_ERRORS=none \ +@HAVE_GTEST_TRUE@ gdb -x .gdbcommands --args $(GDB_ARGS) \ +@HAVE_GTEST_TRUE@ $(abs_builddir)/gtest $(GTEST_ARGS) + +# +# Run unit tests with valgrind +# +@HAVE_GTEST_TRUE@test_valgrind: ucx gtest +@HAVE_GTEST_TRUE@ $(LAUNCHER) env LD_LIBRARY_PATH="$(VALGRIND_LIBPATH):${LD_LIBRARY_PATH}" \ +@HAVE_GTEST_TRUE@ stdbuf -e0 -o0 valgrind $(VALGRIND_ARGS) $(abs_builddir)/gtest $(GTEST_ARGS) + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/test/gtest/common/gtest-all.cc b/test/gtest/common/gtest-all.cc new file mode 100644 index 0000000..fa67e68 --- /dev/null +++ b/test/gtest/common/gtest-all.cc @@ -0,0 +1,9608 @@ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// +// Google C++ Testing Framework (Google Test) +// +// Sometimes it's desirable to build Google Test by compiling a single file. +// This file serves this purpose. + +// This line ensures that gtest.h can be compiled on its own, even +// when it's fused. +#include "gtest.h" + +// The following lines pull in the real gtest *.cc files. +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) + +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Utilities for testing Google Test itself and code that uses Google Test +// (e.g. frameworks built on top of Google Test). + +#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_ +#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_ + + +namespace testing { + +// This helper class can be used to mock out Google Test failure reporting +// so that we can test Google Test or code that builds on Google Test. +// +// An object of this class appends a TestPartResult object to the +// TestPartResultArray object given in the constructor whenever a Google Test +// failure is reported. It can either intercept only failures that are +// generated in the same thread that created this object or it can intercept +// all generated failures. The scope of this mock object can be controlled with +// the second argument to the two arguments constructor. +class GTEST_API_ ScopedFakeTestPartResultReporter + : public TestPartResultReporterInterface { + public: + // The two possible mocking modes of this object. + enum InterceptMode { + INTERCEPT_ONLY_CURRENT_THREAD, // Intercepts only thread local failures. + INTERCEPT_ALL_THREADS // Intercepts all failures. + }; + + // The c'tor sets this object as the test part result reporter used + // by Google Test. The 'result' parameter specifies where to report the + // results. This reporter will only catch failures generated in the current + // thread. DEPRECATED + explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result); + + // Same as above, but you can choose the interception scope of this object. + ScopedFakeTestPartResultReporter(InterceptMode intercept_mode, + TestPartResultArray* result); + + // The d'tor restores the previous test part result reporter. + virtual ~ScopedFakeTestPartResultReporter(); + + // Appends the TestPartResult object to the TestPartResultArray + // received in the constructor. + // + // This method is from the TestPartResultReporterInterface + // interface. + virtual void ReportTestPartResult(const TestPartResult& result); + private: + void Init(); + + const InterceptMode intercept_mode_; + TestPartResultReporterInterface* old_reporter_; + TestPartResultArray* const result_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter); +}; + +namespace internal { + +// A helper class for implementing EXPECT_FATAL_FAILURE() and +// EXPECT_NONFATAL_FAILURE(). Its destructor verifies that the given +// TestPartResultArray contains exactly one failure that has the given +// type and contains the given substring. If that's not the case, a +// non-fatal failure will be generated. +class GTEST_API_ SingleFailureChecker { + public: + // The constructor remembers the arguments. + SingleFailureChecker(const TestPartResultArray* results, + TestPartResult::Type type, + const string& substr); + ~SingleFailureChecker(); + private: + const TestPartResultArray* const results_; + const TestPartResult::Type type_; + const string substr_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker); +}; + +} // namespace internal + +} // namespace testing + +// A set of macros for testing Google Test assertions or code that's expected +// to generate Google Test fatal failures. It verifies that the given +// statement will cause exactly one fatal Google Test failure with 'substr' +// being part of the failure message. +// +// There are two different versions of this macro. EXPECT_FATAL_FAILURE only +// affects and considers failures generated in the current thread and +// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads. +// +// The verification of the assertion is done correctly even when the statement +// throws an exception or aborts the current function. +// +// Known restrictions: +// - 'statement' cannot reference local non-static variables or +// non-static members of the current object. +// - 'statement' cannot return a value. +// - You cannot stream a failure message to this macro. +// +// Note that even though the implementations of the following two +// macros are much alike, we cannot refactor them to use a common +// helper macro, due to some peculiarity in how the preprocessor +// works. The AcceptsMacroThatExpandsToUnprotectedComma test in +// gtest_unittest.cc will fail to compile if we do that. +#define EXPECT_FATAL_FAILURE(statement, substr) \ + do { \ + class GTestExpectFatalFailureHelper {\ + public:\ + static void Execute() { statement; }\ + };\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TestPartResult::kFatalFailure, (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + ::testing::ScopedFakeTestPartResultReporter:: \ + INTERCEPT_ONLY_CURRENT_THREAD, >est_failures);\ + GTestExpectFatalFailureHelper::Execute();\ + }\ + } while (::testing::internal::AlwaysFalse()) + +#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \ + do { \ + class GTestExpectFatalFailureHelper {\ + public:\ + static void Execute() { statement; }\ + };\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TestPartResult::kFatalFailure, (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + ::testing::ScopedFakeTestPartResultReporter:: \ + INTERCEPT_ALL_THREADS, >est_failures);\ + GTestExpectFatalFailureHelper::Execute();\ + }\ + } while (::testing::internal::AlwaysFalse()) + +// A macro for testing Google Test assertions or code that's expected to +// generate Google Test non-fatal failures. It asserts that the given +// statement will cause exactly one non-fatal Google Test failure with 'substr' +// being part of the failure message. +// +// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only +// affects and considers failures generated in the current thread and +// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads. +// +// 'statement' is allowed to reference local variables and members of +// the current object. +// +// The verification of the assertion is done correctly even when the statement +// throws an exception or aborts the current function. +// +// Known restrictions: +// - You cannot stream a failure message to this macro. +// +// Note that even though the implementations of the following two +// macros are much alike, we cannot refactor them to use a common +// helper macro, due to some peculiarity in how the preprocessor +// works. If we do that, the code won't compile when the user gives +// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that +// expands to code containing an unprotected comma. The +// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc +// catches that. +// +// For the same reason, we have to write +// if (::testing::internal::AlwaysTrue()) { statement; } +// instead of +// GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) +// to avoid an MSVC warning on unreachable code. +#define EXPECT_NONFATAL_FAILURE(statement, substr) \ + do {\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TestPartResult::kNonFatalFailure, \ + (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + ::testing::ScopedFakeTestPartResultReporter:: \ + INTERCEPT_ONLY_CURRENT_THREAD, >est_failures);\ + if (::testing::internal::AlwaysTrue()) { statement; }\ + }\ + } while (::testing::internal::AlwaysFalse()) + +#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \ + do {\ + ::testing::TestPartResultArray gtest_failures;\ + ::testing::internal::SingleFailureChecker gtest_checker(\ + >est_failures, ::testing::TestPartResult::kNonFatalFailure, \ + (substr));\ + {\ + ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ + ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \ + >est_failures);\ + if (::testing::internal::AlwaysTrue()) { statement; }\ + }\ + } while (::testing::internal::AlwaysFalse()) + +#endif // GTEST_INCLUDE_GTEST_GTEST_SPI_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include // NOLINT +#include +#include + +#if GTEST_OS_LINUX + +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +# define GTEST_HAS_GETTIMEOFDAY_ 1 + +# include // NOLINT +# include // NOLINT +# include // NOLINT +// Declares vsnprintf(). This header is not available on Windows. +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include + +#elif GTEST_OS_SYMBIAN +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include // NOLINT + +#elif GTEST_OS_ZOS +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include // NOLINT + +// On z/OS we additionally need strings.h for strcasecmp. +# include // NOLINT + +#elif GTEST_OS_WINDOWS_MOBILE // We are on Windows CE. + +# include // NOLINT + +#elif GTEST_OS_WINDOWS // We are on Windows proper. + +# include // NOLINT +# include // NOLINT +# include // NOLINT +# include // NOLINT + +# if GTEST_OS_WINDOWS_MINGW +// MinGW has gettimeofday() but not _ftime64(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +// TODO(kenton@google.com): There are other ways to get the time on +// Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW +// supports these. consider using them instead. +# define GTEST_HAS_GETTIMEOFDAY_ 1 +# include // NOLINT +# endif // GTEST_OS_WINDOWS_MINGW + +// cpplint thinks that the header is already included, so we want to +// silence it. +# include // NOLINT + +#else + +// Assume other platforms have gettimeofday(). +// TODO(kenton@google.com): Use autoconf to detect availability of +// gettimeofday(). +# define GTEST_HAS_GETTIMEOFDAY_ 1 + +// cpplint thinks that the header is already included, so we want to +// silence it. +# include // NOLINT +# include // NOLINT + +#endif // GTEST_OS_LINUX + +#if GTEST_HAS_EXCEPTIONS +# include +#endif + +#if GTEST_CAN_STREAM_RESULTS_ +# include // NOLINT +# include // NOLINT +#endif + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Utility functions and classes used by the Google C++ testing framework. +// +// Author: wan@google.com (Zhanyong Wan) +// +// This file contains purely Google Test's internal implementation. Please +// DO NOT #INCLUDE IT IN A USER PROGRAM. + +#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_ +#define GTEST_SRC_GTEST_INTERNAL_INL_H_ + +// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is +// part of Google Test's implementation; otherwise it's undefined. +#if !GTEST_IMPLEMENTATION_ +// A user is trying to include this from his code - just say no. +# error "gtest-internal-inl.h is part of Google Test's internal implementation." +# error "It must not be included except by Google Test itself." +#endif // GTEST_IMPLEMENTATION_ + +#ifndef _WIN32_WCE +# include +#endif // !_WIN32_WCE +#include +#include // For strtoll/_strtoul64/malloc/free. +#include // For memmove. + +#include +#include +#include + + +#if GTEST_CAN_STREAM_RESULTS_ +# include // NOLINT +# include // NOLINT +#endif + +#if GTEST_OS_WINDOWS +# include // NOLINT +#endif // GTEST_OS_WINDOWS + + +namespace testing { + +// Declares the flags. +// +// We don't want the users to modify this flag in the code, but want +// Google Test's own unit tests to be able to access it. Therefore we +// declare it here as opposed to in gtest.h. +GTEST_DECLARE_bool_(death_test_use_fork); + +namespace internal { + +// The value of GetTestTypeId() as seen from within the Google Test +// library. This is solely for testing GetTestTypeId(). +GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest; + +// Names of the flags (needed for parsing Google Test flags). +const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests"; +const char kBreakOnFailureFlag[] = "break_on_failure"; +const char kCatchExceptionsFlag[] = "catch_exceptions"; +const char kColorFlag[] = "color"; +const char kFilterFlag[] = "filter"; +const char kListTestsFlag[] = "list_tests"; +const char kOutputFlag[] = "output"; +const char kPrintTimeFlag[] = "print_time"; +const char kRandomSeedFlag[] = "random_seed"; +const char kRepeatFlag[] = "repeat"; +const char kShuffleFlag[] = "shuffle"; +const char kStackTraceDepthFlag[] = "stack_trace_depth"; +const char kStreamResultToFlag[] = "stream_result_to"; +const char kThrowOnFailureFlag[] = "throw_on_failure"; + +// A valid random seed must be in [1, kMaxRandomSeed]. +const int kMaxRandomSeed = 99999; + +// g_help_flag is true iff the --help flag or an equivalent form is +// specified on the command line. +GTEST_API_ extern bool g_help_flag; + +// Returns the current time in milliseconds. +GTEST_API_ TimeInMillis GetTimeInMillis(); + +// Returns true iff Google Test should use colors in the output. +GTEST_API_ bool ShouldUseColor(bool stdout_is_tty); + +// Formats the given time in milliseconds as seconds. +GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms); + +// Converts the given time in milliseconds to a date string in the ISO 8601 +// format, without the timezone information. N.B.: due to the use the +// non-reentrant localtime() function, this function is not thread safe. Do +// not use it in any code that can be called from multiple threads. +GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms); + +// Parses a string for an Int32 flag, in the form of "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +GTEST_API_ bool ParseInt32Flag( + const char* str, const char* flag, Int32* value); + +// Returns a random seed in range [1, kMaxRandomSeed] based on the +// given --gtest_random_seed flag value. +inline int GetRandomSeedFromFlag(Int32 random_seed_flag) { + const unsigned int raw_seed = (random_seed_flag == 0) ? + static_cast(GetTimeInMillis()) : + static_cast(random_seed_flag); + + // Normalizes the actual seed to range [1, kMaxRandomSeed] such that + // it's easy to type. + const int normalized_seed = + static_cast((raw_seed - 1U) % + static_cast(kMaxRandomSeed)) + 1; + return normalized_seed; +} + +// Returns the first valid random seed after 'seed'. The behavior is +// undefined if 'seed' is invalid. The seed after kMaxRandomSeed is +// considered to be 1. +inline int GetNextRandomSeed(int seed) { + GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed) + << "Invalid random seed " << seed << " - must be in [1, " + << kMaxRandomSeed << "]."; + const int next_seed = seed + 1; + return (next_seed > kMaxRandomSeed) ? 1 : next_seed; +} + +// This class saves the values of all Google Test flags in its c'tor, and +// restores them in its d'tor. +class GTestFlagSaver { + public: + // The c'tor. + GTestFlagSaver() { + also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests); + break_on_failure_ = GTEST_FLAG(break_on_failure); + catch_exceptions_ = GTEST_FLAG(catch_exceptions); + color_ = GTEST_FLAG(color); + death_test_style_ = GTEST_FLAG(death_test_style); + death_test_use_fork_ = GTEST_FLAG(death_test_use_fork); + filter_ = GTEST_FLAG(filter); + internal_run_death_test_ = GTEST_FLAG(internal_run_death_test); + list_tests_ = GTEST_FLAG(list_tests); + output_ = GTEST_FLAG(output); + print_time_ = GTEST_FLAG(print_time); + random_seed_ = GTEST_FLAG(random_seed); + repeat_ = GTEST_FLAG(repeat); + shuffle_ = GTEST_FLAG(shuffle); + stack_trace_depth_ = GTEST_FLAG(stack_trace_depth); + stream_result_to_ = GTEST_FLAG(stream_result_to); + throw_on_failure_ = GTEST_FLAG(throw_on_failure); + } + + // The d'tor is not virtual. DO NOT INHERIT FROM THIS CLASS. + ~GTestFlagSaver() { + GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_; + GTEST_FLAG(break_on_failure) = break_on_failure_; + GTEST_FLAG(catch_exceptions) = catch_exceptions_; + GTEST_FLAG(color) = color_; + GTEST_FLAG(death_test_style) = death_test_style_; + GTEST_FLAG(death_test_use_fork) = death_test_use_fork_; + GTEST_FLAG(filter) = filter_; + GTEST_FLAG(internal_run_death_test) = internal_run_death_test_; + GTEST_FLAG(list_tests) = list_tests_; + GTEST_FLAG(output) = output_; + GTEST_FLAG(print_time) = print_time_; + GTEST_FLAG(random_seed) = random_seed_; + GTEST_FLAG(repeat) = repeat_; + GTEST_FLAG(shuffle) = shuffle_; + GTEST_FLAG(stack_trace_depth) = stack_trace_depth_; + GTEST_FLAG(stream_result_to) = stream_result_to_; + GTEST_FLAG(throw_on_failure) = throw_on_failure_; + } + + private: + // Fields for saving the original values of flags. + bool also_run_disabled_tests_; + bool break_on_failure_; + bool catch_exceptions_; + std::string color_; + std::string death_test_style_; + bool death_test_use_fork_; + std::string filter_; + std::string internal_run_death_test_; + bool list_tests_; + std::string output_; + bool print_time_; + internal::Int32 random_seed_; + internal::Int32 repeat_; + bool shuffle_; + internal::Int32 stack_trace_depth_; + std::string stream_result_to_; + bool throw_on_failure_; +} GTEST_ATTRIBUTE_UNUSED_; + +// Converts a Unicode code point to a narrow string in UTF-8 encoding. +// code_point parameter is of type UInt32 because wchar_t may not be +// wide enough to contain a code point. +// If the code_point is not a valid Unicode code point +// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted +// to "(Invalid Unicode 0xXXXXXXXX)". +GTEST_API_ std::string CodePointToUtf8(UInt32 code_point); + +// Converts a wide string to a narrow string in UTF-8 encoding. +// The wide string is assumed to have the following encoding: +// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) +// UTF-32 if sizeof(wchar_t) == 4 (on Linux) +// Parameter str points to a null-terminated wide string. +// Parameter num_chars may additionally limit the number +// of wchar_t characters processed. -1 is used when the entire string +// should be processed. +// If the string contains code points that are not valid Unicode code points +// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output +// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding +// and contains invalid UTF-16 surrogate pairs, values in those pairs +// will be encoded as individual Unicode characters from Basic Normal Plane. +GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars); + +// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file +// if the variable is present. If a file already exists at this location, this +// function will write over it. If the variable is present, but the file cannot +// be created, prints an error and exits. +void WriteToShardStatusFileIfNeeded(); + +// Checks whether sharding is enabled by examining the relevant +// environment variable values. If the variables are present, +// but inconsistent (e.g., shard_index >= total_shards), prints +// an error and exits. If in_subprocess_for_death_test, sharding is +// disabled because it must only be applied to the original test +// process. Otherwise, we could filter out death tests we intended to execute. +GTEST_API_ bool ShouldShard(const char* total_shards_str, + const char* shard_index_str, + bool in_subprocess_for_death_test); + +// Parses the environment variable var as an Int32. If it is unset, +// returns default_val. If it is not an Int32, prints an error and +// and aborts. +GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val); + +// Given the total number of shards, the shard index, and the test id, +// returns true iff the test should be run on this shard. The test id is +// some arbitrary but unique non-negative integer assigned to each test +// method. Assumes that 0 <= shard_index < total_shards. +GTEST_API_ bool ShouldRunTestOnShard( + int total_shards, int shard_index, int test_id); + +// STL container utilities. + +// Returns the number of elements in the given container that satisfy +// the given predicate. +template +inline int CountIf(const Container& c, Predicate predicate) { + // Implemented as an explicit loop since std::count_if() in libCstd on + // Solaris has a non-standard signature. + int count = 0; + for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) { + if (predicate(*it)) + ++count; + } + return count; +} + +// Applies a function/functor to each element in the container. +template +void ForEach(const Container& c, Functor functor) { + std::for_each(c.begin(), c.end(), functor); +} + +// Returns the i-th element of the vector, or default_value if i is not +// in range [0, v.size()). +template +inline E GetElementOr(const std::vector& v, int i, E default_value) { + return (i < 0 || i >= static_cast(v.size())) ? default_value : v[i]; +} + +// Performs an in-place shuffle of a range of the vector's elements. +// 'begin' and 'end' are element indices as an STL-style range; +// i.e. [begin, end) are shuffled, where 'end' == size() means to +// shuffle to the end of the vector. +template +void ShuffleRange(internal::Random* random, int begin, int end, + std::vector* v) { + const int size = static_cast(v->size()); + GTEST_CHECK_(0 <= begin && begin <= size) + << "Invalid shuffle range start " << begin << ": must be in range [0, " + << size << "]."; + GTEST_CHECK_(begin <= end && end <= size) + << "Invalid shuffle range finish " << end << ": must be in range [" + << begin << ", " << size << "]."; + + // Fisher-Yates shuffle, from + // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle + for (int range_width = end - begin; range_width >= 2; range_width--) { + const int last_in_range = begin + range_width - 1; + const int selected = begin + random->Generate(range_width); + std::swap((*v)[selected], (*v)[last_in_range]); + } +} + +// Performs an in-place shuffle of the vector's elements. +template +inline void Shuffle(internal::Random* random, std::vector* v) { + ShuffleRange(random, 0, static_cast(v->size()), v); +} + +// A function for deleting an object. Handy for being used as a +// functor. +template +static void Delete(T* x) { + delete x; +} + +// A predicate that checks the key of a TestProperty against a known key. +// +// TestPropertyKeyIs is copyable. +class TestPropertyKeyIs { + public: + // Constructor. + // + // TestPropertyKeyIs has NO default constructor. + explicit TestPropertyKeyIs(const std::string& key) : key_(key) {} + + // Returns true iff the test name of test property matches on key_. + bool operator()(const TestProperty& test_property) const { + return test_property.key() == key_; + } + + private: + std::string key_; +}; + +// Class UnitTestOptions. +// +// This class contains functions for processing options the user +// specifies when running the tests. It has only static members. +// +// In most cases, the user can specify an option using either an +// environment variable or a command line flag. E.g. you can set the +// test filter using either GTEST_FILTER or --gtest_filter. If both +// the variable and the flag are present, the latter overrides the +// former. +class GTEST_API_ UnitTestOptions { + public: + // Functions for processing the gtest_output flag. + + // Returns the output format, or "" for normal printed output. + static std::string GetOutputFormat(); + + // Returns the absolute path of the requested output file, or the + // default (test_detail.xml in the original working directory) if + // none was explicitly specified. + static std::string GetAbsolutePathToOutputFile(); + + // Functions for processing the gtest_filter flag. + + // Returns true iff the wildcard pattern matches the string. The + // first ':' or '\0' character in pattern marks the end of it. + // + // This recursive algorithm isn't very efficient, but is clear and + // works well enough for matching test names, which are short. + static bool PatternMatchesString(const char *pattern, const char *str); + + // Returns true iff the user-specified filter matches the test case + // name and the test name. + static bool FilterMatchesTest(const std::string &test_case_name, + const std::string &test_name); + +#if GTEST_OS_WINDOWS + // Function for supporting the gtest_catch_exception flag. + + // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the + // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. + // This function is useful as an __except condition. + static int GTestShouldProcessSEH(DWORD exception_code); +#endif // GTEST_OS_WINDOWS + + // Returns true if "name" matches the ':' separated list of glob-style + // filters in "filter". + static bool MatchesFilter(const std::string& name, const char* filter); +}; + +// Returns the current application's name, removing directory path if that +// is present. Used by UnitTestOptions::GetOutputFile. +GTEST_API_ FilePath GetCurrentExecutableName(); + +// The role interface for getting the OS stack trace as a string. +class OsStackTraceGetterInterface { + public: + OsStackTraceGetterInterface() {} + virtual ~OsStackTraceGetterInterface() {} + + // Returns the current OS stack trace as an std::string. Parameters: + // + // max_depth - the maximum number of stack frames to be included + // in the trace. + // skip_count - the number of top frames to be skipped; doesn't count + // against max_depth. + virtual string CurrentStackTrace(int max_depth, int skip_count) = 0; + + // UponLeavingGTest() should be called immediately before Google Test calls + // user code. It saves some information about the current stack that + // CurrentStackTrace() will use to find and hide Google Test stack frames. + virtual void UponLeavingGTest() = 0; + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface); +}; + +// A working implementation of the OsStackTraceGetterInterface interface. +class OsStackTraceGetter : public OsStackTraceGetterInterface { + public: + OsStackTraceGetter() : caller_frame_(NULL) {} + + virtual string CurrentStackTrace(int max_depth, int skip_count) + GTEST_LOCK_EXCLUDED_(mutex_); + + virtual void UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_); + + // This string is inserted in place of stack frames that are part of + // Google Test's implementation. + static const char* const kElidedFramesMarker; + + private: + Mutex mutex_; // protects all internal state + + // We save the stack frame below the frame that calls user code. + // We do this because the address of the frame immediately below + // the user code changes between the call to UponLeavingGTest() + // and any calls to CurrentStackTrace() from within the user code. + void* caller_frame_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter); +}; + +// Information about a Google Test trace point. +struct TraceInfo { + const char* file; + int line; + std::string message; +}; + +// This is the default global test part result reporter used in UnitTestImpl. +// This class should only be used by UnitTestImpl. +class DefaultGlobalTestPartResultReporter + : public TestPartResultReporterInterface { + public: + explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test); + // Implements the TestPartResultReporterInterface. Reports the test part + // result in the current test. + virtual void ReportTestPartResult(const TestPartResult& result); + + private: + UnitTestImpl* const unit_test_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter); +}; + +// This is the default per thread test part result reporter used in +// UnitTestImpl. This class should only be used by UnitTestImpl. +class DefaultPerThreadTestPartResultReporter + : public TestPartResultReporterInterface { + public: + explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test); + // Implements the TestPartResultReporterInterface. The implementation just + // delegates to the current global test part result reporter of *unit_test_. + virtual void ReportTestPartResult(const TestPartResult& result); + + private: + UnitTestImpl* const unit_test_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter); +}; + +// The private implementation of the UnitTest class. We don't protect +// the methods under a mutex, as this class is not accessible by a +// user and the UnitTest class that delegates work to this class does +// proper locking. +class GTEST_API_ UnitTestImpl { + public: + explicit UnitTestImpl(UnitTest* parent); + virtual ~UnitTestImpl(); + + // There are two different ways to register your own TestPartResultReporter. + // You can register your own repoter to listen either only for test results + // from the current thread or for results from all threads. + // By default, each per-thread test result repoter just passes a new + // TestPartResult to the global test result reporter, which registers the + // test part result for the currently running test. + + // Returns the global test part result reporter. + TestPartResultReporterInterface* GetGlobalTestPartResultReporter(); + + // Sets the global test part result reporter. + void SetGlobalTestPartResultReporter( + TestPartResultReporterInterface* reporter); + + // Returns the test part result reporter for the current thread. + TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread(); + + // Sets the test part result reporter for the current thread. + void SetTestPartResultReporterForCurrentThread( + TestPartResultReporterInterface* reporter); + + // Gets the number of successful test cases. + int successful_test_case_count() const; + + // Gets the number of failed test cases. + int failed_test_case_count() const; + + // Gets the number of all test cases. + int total_test_case_count() const; + + // Gets the number of all test cases that contain at least one test + // that should run. + int test_case_to_run_count() const; + + // Gets the number of successful tests. + int successful_test_count() const; + + // Gets the number of failed tests. + int failed_test_count() const; + + // Gets the number of disabled tests that will be reported in the XML report. + int reportable_disabled_test_count() const; + + // Gets the number of disabled tests. + int disabled_test_count() const; + + // Gets the number of tests to be printed in the XML report. + int reportable_test_count() const; + + // Gets the number of all tests. + int total_test_count() const; + + // Gets the number of tests that should run. + int test_to_run_count() const; + + // Gets the time of the test program start, in ms from the start of the + // UNIX epoch. + TimeInMillis start_timestamp() const { return start_timestamp_; } + + // Gets the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const { return elapsed_time_; } + + // Returns true iff the unit test passed (i.e. all test cases passed). + bool Passed() const { return !Failed(); } + + // Returns true iff the unit test failed (i.e. some test case failed + // or something outside of all tests failed). + bool Failed() const { + return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed(); + } + + // Gets the i-th test case among all the test cases. i can range from 0 to + // total_test_case_count() - 1. If i is not in that range, returns NULL. + const TestCase* GetTestCase(int i) const { + const int index = GetElementOr(test_case_indices_, i, -1); + return index < 0 ? NULL : test_cases_[i]; + } + + // Gets the i-th test case among all the test cases. i can range from 0 to + // total_test_case_count() - 1. If i is not in that range, returns NULL. + TestCase* GetMutableTestCase(int i) { + const int index = GetElementOr(test_case_indices_, i, -1); + return index < 0 ? NULL : test_cases_[index]; + } + + // Provides access to the event listener list. + TestEventListeners* listeners() { return &listeners_; } + + // Returns the TestResult for the test that's currently running, or + // the TestResult for the ad hoc test if no test is running. + TestResult* current_test_result(); + + // Returns the TestResult for the ad hoc test. + const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; } + + // Sets the OS stack trace getter. + // + // Does nothing if the input and the current OS stack trace getter + // are the same; otherwise, deletes the old getter and makes the + // input the current getter. + void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter); + + // Returns the current OS stack trace getter if it is not NULL; + // otherwise, creates an OsStackTraceGetter, makes it the current + // getter, and returns it. + OsStackTraceGetterInterface* os_stack_trace_getter(); + + // Returns the current OS stack trace as an std::string. + // + // The maximum number of stack frames to be included is specified by + // the gtest_stack_trace_depth flag. The skip_count parameter + // specifies the number of top frames to be skipped, which doesn't + // count against the number of frames to be included. + // + // For example, if Foo() calls Bar(), which in turn calls + // CurrentOsStackTraceExceptTop(1), Foo() will be included in the + // trace but Bar() and CurrentOsStackTraceExceptTop() won't. + std::string CurrentOsStackTraceExceptTop(int skip_count) GTEST_NO_INLINE_; + + // Finds and returns a TestCase with the given name. If one doesn't + // exist, creates one and returns it. + // + // Arguments: + // + // test_case_name: name of the test case + // type_param: the name of the test's type parameter, or NULL if + // this is not a typed or a type-parameterized test. + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + TestCase* GetTestCase(const char* test_case_name, + const char* type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc); + + // Adds a TestInfo to the unit test. + // + // Arguments: + // + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + // test_info: the TestInfo object + void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc, + TestInfo* test_info) { + // In order to support thread-safe death tests, we need to + // remember the original working directory when the test program + // was first invoked. We cannot do this in RUN_ALL_TESTS(), as + // the user may have changed the current directory before calling + // RUN_ALL_TESTS(). Therefore we capture the current directory in + // AddTestInfo(), which is called to register a TEST or TEST_F + // before main() is reached. + if (original_working_dir_.IsEmpty()) { + original_working_dir_.Set(FilePath::GetCurrentDir()); + GTEST_CHECK_(!original_working_dir_.IsEmpty()) + << "Failed to get the current working directory."; + } + + GetTestCase(test_info->test_case_name(), + test_info->type_param(), + set_up_tc, + tear_down_tc)->AddTestInfo(test_info); + } + +#if GTEST_HAS_PARAM_TEST + // Returns ParameterizedTestCaseRegistry object used to keep track of + // value-parameterized tests and instantiate and register them. + internal::ParameterizedTestCaseRegistry& parameterized_test_registry() { + return parameterized_test_registry_; + } +#endif // GTEST_HAS_PARAM_TEST + + // Sets the TestCase object for the test that's currently running. + void set_current_test_case(TestCase* a_current_test_case) { + current_test_case_ = a_current_test_case; + } + + // Sets the TestInfo object for the test that's currently running. If + // current_test_info is NULL, the assertion results will be stored in + // ad_hoc_test_result_. + void set_current_test_info(TestInfo* a_current_test_info) { + current_test_info_ = a_current_test_info; + } + + // Registers all parameterized tests defined using TEST_P and + // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter + // combination. This method can be called more then once; it has guards + // protecting from registering the tests more then once. If + // value-parameterized tests are disabled, RegisterParameterizedTests is + // present but does nothing. + void RegisterParameterizedTests(); + + // Runs all tests in this UnitTest object, prints the result, and + // returns true if all tests are successful. If any exception is + // thrown during a test, this test is considered to be failed, but + // the rest of the tests will still be run. + bool RunAllTests(); + + // Clears the results of all tests, except the ad hoc tests. + void ClearNonAdHocTestResult() { + ForEach(test_cases_, TestCase::ClearTestCaseResult); + } + + // Clears the results of ad-hoc test assertions. + void ClearAdHocTestResult() { + ad_hoc_test_result_.Clear(); + } + + // Adds a TestProperty to the current TestResult object when invoked in a + // context of a test or a test case, or to the global property set. If the + // result already contains a property with the same key, the value will be + // updated. + void RecordProperty(const TestProperty& test_property); + + enum ReactionToSharding { + HONOR_SHARDING_PROTOCOL, + IGNORE_SHARDING_PROTOCOL + }; + + // Matches the full name of each test against the user-specified + // filter to decide whether the test should run, then records the + // result in each TestCase and TestInfo object. + // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests + // based on sharding variables in the environment. + // Returns the number of tests that should run. + int FilterTests(ReactionToSharding shard_tests); + + // Prints the names of the tests matching the user-specified filter flag. + void ListTestsMatchingFilter(); + + const TestCase* current_test_case() const { return current_test_case_; } + TestInfo* current_test_info() { return current_test_info_; } + const TestInfo* current_test_info() const { return current_test_info_; } + + // Returns the vector of environments that need to be set-up/torn-down + // before/after the tests are run. + std::vector& environments() { return environments_; } + + // Getters for the per-thread Google Test trace stack. + std::vector& gtest_trace_stack() { + return *(gtest_trace_stack_.pointer()); + } + const std::vector& gtest_trace_stack() const { + return gtest_trace_stack_.get(); + } + +#if GTEST_HAS_DEATH_TEST + void InitDeathTestSubprocessControlInfo() { + internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag()); + } + // Returns a pointer to the parsed --gtest_internal_run_death_test + // flag, or NULL if that flag was not specified. + // This information is useful only in a death test child process. + // Must not be called before a call to InitGoogleTest. + const InternalRunDeathTestFlag* internal_run_death_test_flag() const { + return internal_run_death_test_flag_.get(); + } + + // Returns a pointer to the current death test factory. + internal::DeathTestFactory* death_test_factory() { + return death_test_factory_.get(); + } + + void SuppressTestEventsIfInSubprocess(); + + friend class ReplaceDeathTestFactory; +#endif // GTEST_HAS_DEATH_TEST + + // Initializes the event listener performing XML output as specified by + // UnitTestOptions. Must not be called before InitGoogleTest. + void ConfigureXmlOutput(); + +#if GTEST_CAN_STREAM_RESULTS_ + // Initializes the event listener for streaming test results to a socket. + // Must not be called before InitGoogleTest. + void ConfigureStreamingOutput(); +#endif + + // Performs initialization dependent upon flag values obtained in + // ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to + // ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest + // this function is also called from RunAllTests. Since this function can be + // called more than once, it has to be idempotent. + void PostFlagParsingInit(); + + // Gets the random seed used at the start of the current test iteration. + int random_seed() const { return random_seed_; } + + // Gets the random number generator. + internal::Random* random() { return &random_; } + + // Shuffles all test cases, and the tests within each test case, + // making sure that death tests are still run first. + void ShuffleTests(); + + // Restores the test cases and tests to their order before the first shuffle. + void UnshuffleTests(); + + // Returns the value of GTEST_FLAG(catch_exceptions) at the moment + // UnitTest::Run() starts. + bool catch_exceptions() const { return catch_exceptions_; } + + private: + friend class ::testing::UnitTest; + + // Used by UnitTest::Run() to capture the state of + // GTEST_FLAG(catch_exceptions) at the moment it starts. + void set_catch_exceptions(bool value) { catch_exceptions_ = value; } + + // The UnitTest object that owns this implementation object. + UnitTest* const parent_; + + // The working directory when the first TEST() or TEST_F() was + // executed. + internal::FilePath original_working_dir_; + + // The default test part result reporters. + DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_; + DefaultPerThreadTestPartResultReporter + default_per_thread_test_part_result_reporter_; + + // Points to (but doesn't own) the global test part result reporter. + TestPartResultReporterInterface* global_test_part_result_repoter_; + + // Protects read and write access to global_test_part_result_reporter_. + internal::Mutex global_test_part_result_reporter_mutex_; + + // Points to (but doesn't own) the per-thread test part result reporter. + internal::ThreadLocal + per_thread_test_part_result_reporter_; + + // The vector of environments that need to be set-up/torn-down + // before/after the tests are run. + std::vector environments_; + + // The vector of TestCases in their original order. It owns the + // elements in the vector. + std::vector test_cases_; + + // Provides a level of indirection for the test case list to allow + // easy shuffling and restoring the test case order. The i-th + // element of this vector is the index of the i-th test case in the + // shuffled order. + std::vector test_case_indices_; + +#if GTEST_HAS_PARAM_TEST + // ParameterizedTestRegistry object used to register value-parameterized + // tests. + internal::ParameterizedTestCaseRegistry parameterized_test_registry_; + + // Indicates whether RegisterParameterizedTests() has been called already. + bool parameterized_tests_registered_; +#endif // GTEST_HAS_PARAM_TEST + + // Index of the last death test case registered. Initially -1. + int last_death_test_case_; + + // This points to the TestCase for the currently running test. It + // changes as Google Test goes through one test case after another. + // When no test is running, this is set to NULL and Google Test + // stores assertion results in ad_hoc_test_result_. Initially NULL. + TestCase* current_test_case_; + + // This points to the TestInfo for the currently running test. It + // changes as Google Test goes through one test after another. When + // no test is running, this is set to NULL and Google Test stores + // assertion results in ad_hoc_test_result_. Initially NULL. + TestInfo* current_test_info_; + + // Normally, a user only writes assertions inside a TEST or TEST_F, + // or inside a function called by a TEST or TEST_F. Since Google + // Test keeps track of which test is current running, it can + // associate such an assertion with the test it belongs to. + // + // If an assertion is encountered when no TEST or TEST_F is running, + // Google Test attributes the assertion result to an imaginary "ad hoc" + // test, and records the result in ad_hoc_test_result_. + TestResult ad_hoc_test_result_; + + // The list of event listeners that can be used to track events inside + // Google Test. + TestEventListeners listeners_; + + // The OS stack trace getter. Will be deleted when the UnitTest + // object is destructed. By default, an OsStackTraceGetter is used, + // but the user can set this field to use a custom getter if that is + // desired. + OsStackTraceGetterInterface* os_stack_trace_getter_; + + // True iff PostFlagParsingInit() has been called. + bool post_flag_parse_init_performed_; + + // The random number seed used at the beginning of the test run. + int random_seed_; + + // Our random number generator. + internal::Random random_; + + // The time of the test program start, in ms from the start of the + // UNIX epoch. + TimeInMillis start_timestamp_; + + // How long the test took to run, in milliseconds. + TimeInMillis elapsed_time_; + +#if GTEST_HAS_DEATH_TEST + // The decomposed components of the gtest_internal_run_death_test flag, + // parsed when RUN_ALL_TESTS is called. + internal::scoped_ptr internal_run_death_test_flag_; + internal::scoped_ptr death_test_factory_; +#endif // GTEST_HAS_DEATH_TEST + + // A per-thread stack of traces created by the SCOPED_TRACE() macro. + internal::ThreadLocal > gtest_trace_stack_; + + // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests() + // starts. + bool catch_exceptions_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl); +}; // class UnitTestImpl + +// Convenience function for accessing the global UnitTest +// implementation object. +inline UnitTestImpl* GetUnitTestImpl() { + return UnitTest::GetInstance()->impl(); +} + +#if GTEST_USES_SIMPLE_RE + +// Internal helper functions for implementing the simple regular +// expression matcher. +GTEST_API_ bool IsInSet(char ch, const char* str); +GTEST_API_ bool IsAsciiDigit(char ch); +GTEST_API_ bool IsAsciiPunct(char ch); +GTEST_API_ bool IsRepeat(char ch); +GTEST_API_ bool IsAsciiWhiteSpace(char ch); +GTEST_API_ bool IsAsciiWordChar(char ch); +GTEST_API_ bool IsValidEscape(char ch); +GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch); +GTEST_API_ bool ValidateRegex(const char* regex); +GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str); +GTEST_API_ bool MatchRepetitionAndRegexAtHead( + bool escaped, char ch, char repeat, const char* regex, const char* str); +GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str); + +#endif // GTEST_USES_SIMPLE_RE + +// Parses the command line for Google Test flags, without initializing +// other parts of Google Test. +GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv); +GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv); + +#if GTEST_HAS_DEATH_TEST + +// Returns the message describing the last system error, regardless of the +// platform. +GTEST_API_ std::string GetLastErrnoDescription(); + +# if GTEST_OS_WINDOWS +// Provides leak-safe Windows kernel handle ownership. +class AutoHandle { + public: + AutoHandle() : handle_(INVALID_HANDLE_VALUE) {} + explicit AutoHandle(HANDLE handle) : handle_(handle) {} + + ~AutoHandle() { Reset(); } + + HANDLE Get() const { return handle_; } + void Reset() { Reset(INVALID_HANDLE_VALUE); } + void Reset(HANDLE handle) { + if (handle != handle_) { + if (handle_ != INVALID_HANDLE_VALUE) + ::CloseHandle(handle_); + handle_ = handle; + } + } + + private: + HANDLE handle_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle); +}; +# endif // GTEST_OS_WINDOWS + +// Attempts to parse a string into a positive integer pointed to by the +// number parameter. Returns true if that is possible. +// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use +// it here. +template +bool ParseNaturalNumber(const ::std::string& str, Integer* number) { + // Fail fast if the given string does not begin with a digit; + // this bypasses strtoXXX's "optional leading whitespace and plus + // or minus sign" semantics, which are undesirable here. + if (str.empty() || !IsDigit(str[0])) { + return false; + } + errno = 0; + + char* end; + // BiggestConvertible is the largest integer type that system-provided + // string-to-number conversion routines can return. + +# if GTEST_OS_WINDOWS && !defined(__GNUC__) + + // MSVC and C++ Builder define __int64 instead of the standard long long. + typedef unsigned __int64 BiggestConvertible; + const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10); + +# else + + typedef unsigned long long BiggestConvertible; // NOLINT + const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10); + +# endif // GTEST_OS_WINDOWS && !defined(__GNUC__) + + const bool parse_success = *end == '\0' && errno == 0; + + // TODO(vladl@google.com): Convert this to compile time assertion when it is + // available. + GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed)); + + const Integer result = static_cast(parsed); + if (parse_success && static_cast(result) == parsed) { + *number = result; + return true; + } + return false; +} +#endif // GTEST_HAS_DEATH_TEST + +// TestResult contains some private methods that should be hidden from +// Google Test user but are required for testing. This class allow our tests +// to access them. +// +// This class is supplied only for the purpose of testing Google Test's own +// constructs. Do not use it in user tests, either directly or indirectly. +class TestResultAccessor { + public: + static void RecordProperty(TestResult* test_result, + const std::string& xml_element, + const TestProperty& property) { + test_result->RecordProperty(xml_element, property); + } + + static void ClearTestPartResults(TestResult* test_result) { + test_result->ClearTestPartResults(); + } + + static const std::vector& test_part_results( + const TestResult& test_result) { + return test_result.test_part_results(); + } +}; + +#if GTEST_CAN_STREAM_RESULTS_ + +// Streams test results to the given port on the given host machine. +class StreamingListener : public EmptyTestEventListener { + public: + // Abstract base class for writing strings to a socket. + class AbstractSocketWriter { + public: + virtual ~AbstractSocketWriter() {} + + // Sends a string to the socket. + virtual void Send(const string& message) = 0; + + // Closes the socket. + virtual void CloseConnection() {} + + // Sends a string and a newline to the socket. + void SendLn(const string& message) { + Send(message + "\n"); + } + }; + + // Concrete class for actually writing strings to a socket. + class SocketWriter : public AbstractSocketWriter { + public: + SocketWriter(const string& host, const string& port) + : sockfd_(-1), host_name_(host), port_num_(port) { + MakeConnection(); + } + + virtual ~SocketWriter() { + if (sockfd_ != -1) + CloseConnection(); + } + + // Sends a string to the socket. + virtual void Send(const string& message) { + GTEST_CHECK_(sockfd_ != -1) + << "Send() can be called only when there is a connection."; + + const int len = static_cast(message.length()); + if (write(sockfd_, message.c_str(), len) != len) { + GTEST_LOG_(WARNING) + << "stream_result_to: failed to stream to " + << host_name_ << ":" << port_num_; + } + } + + private: + // Creates a client socket and connects to the server. + void MakeConnection(); + + // Closes the socket. + void CloseConnection() { + GTEST_CHECK_(sockfd_ != -1) + << "CloseConnection() can be called only when there is a connection."; + + close(sockfd_); + sockfd_ = -1; + } + + int sockfd_; // socket file descriptor + const string host_name_; + const string port_num_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter); + }; // class SocketWriter + + // Escapes '=', '&', '%', and '\n' characters in str as "%xx". + static string UrlEncode(const char* str); + + StreamingListener(const string& host, const string& port) + : socket_writer_(new SocketWriter(host, port)) { Start(); } + + explicit StreamingListener(AbstractSocketWriter* socket_writer) + : socket_writer_(socket_writer) { Start(); } + + void OnTestProgramStart(const UnitTest& /* unit_test */) { + SendLn("event=TestProgramStart"); + } + + void OnTestProgramEnd(const UnitTest& unit_test) { + // Note that Google Test current only report elapsed time for each + // test iteration, not for the entire test program. + SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed())); + + // Notify the streaming server to stop. + socket_writer_->CloseConnection(); + } + + void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) { + SendLn("event=TestIterationStart&iteration=" + + StreamableToString(iteration)); + } + + void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) { + SendLn("event=TestIterationEnd&passed=" + + FormatBool(unit_test.Passed()) + "&elapsed_time=" + + StreamableToString(unit_test.elapsed_time()) + "ms"); + } + + void OnTestCaseStart(const TestCase& test_case) { + SendLn(std::string("event=TestCaseStart&name=") + test_case.name()); + } + + void OnTestCaseEnd(const TestCase& test_case) { + SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed()) + + "&elapsed_time=" + StreamableToString(test_case.elapsed_time()) + + "ms"); + } + + void OnTestStart(const TestInfo& test_info) { + SendLn(std::string("event=TestStart&name=") + test_info.name()); + } + + void OnTestEnd(const TestInfo& test_info) { + SendLn("event=TestEnd&passed=" + + FormatBool((test_info.result())->Passed()) + + "&elapsed_time=" + + StreamableToString((test_info.result())->elapsed_time()) + "ms"); + } + + void OnTestPartResult(const TestPartResult& test_part_result) { + const char* file_name = test_part_result.file_name(); + if (file_name == NULL) + file_name = ""; + SendLn("event=TestPartResult&file=" + UrlEncode(file_name) + + "&line=" + StreamableToString(test_part_result.line_number()) + + "&message=" + UrlEncode(test_part_result.message())); + } + + private: + // Sends the given message and a newline to the socket. + void SendLn(const string& message) { socket_writer_->SendLn(message); } + + // Called at the start of streaming to notify the receiver what + // protocol we are using. + void Start() { SendLn("gtest_streaming_protocol_version=1.0"); } + + string FormatBool(bool value) { return value ? "1" : "0"; } + + const scoped_ptr socket_writer_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener); +}; // class StreamingListener + +#endif // GTEST_CAN_STREAM_RESULTS_ + +} // namespace internal +} // namespace testing + +#endif // GTEST_SRC_GTEST_INTERNAL_INL_H_ +#undef GTEST_IMPLEMENTATION_ + +#if GTEST_OS_WINDOWS +# define vsnprintf _vsnprintf +#endif // GTEST_OS_WINDOWS + +namespace testing { + +using internal::CountIf; +using internal::ForEach; +using internal::GetElementOr; +using internal::Shuffle; + +// Constants. + +// A test whose test case name or test name matches this filter is +// disabled and not run. +static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*"; + +// A test case whose name matches this filter is considered a death +// test case and will be run before test cases whose name doesn't +// match this filter. +static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*"; + +// A test filter that matches everything. +static const char kUniversalFilter[] = "*"; + +// The default output file for XML output. +static const char kDefaultOutputFile[] = "test_detail.xml"; + +// The environment variable name for the test shard index. +static const char kTestShardIndex[] = "GTEST_SHARD_INDEX"; +// The environment variable name for the total number of test shards. +static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS"; +// The environment variable name for the test shard status file. +static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE"; + +namespace internal { + +// The text used in failure messages to indicate the start of the +// stack trace. +const char kStackTraceMarker[] = "\nStack trace:\n"; + +// g_help_flag is true iff the --help flag or an equivalent form is +// specified on the command line. +bool g_help_flag = false; + +} // namespace internal + +static const char* GetDefaultFilter() { + return kUniversalFilter; +} + +GTEST_DEFINE_bool_( + also_run_disabled_tests, + internal::BoolFromGTestEnv("also_run_disabled_tests", false), + "Run disabled tests too, in addition to the tests normally being run."); + +GTEST_DEFINE_bool_( + break_on_failure, + internal::BoolFromGTestEnv("break_on_failure", false), + "True iff a failed assertion should be a debugger break-point."); + +GTEST_DEFINE_bool_( + catch_exceptions, + internal::BoolFromGTestEnv("catch_exceptions", true), + "True iff " GTEST_NAME_ + " should catch exceptions and treat them as test failures."); + +GTEST_DEFINE_string_( + color, + internal::StringFromGTestEnv("color", "auto"), + "Whether to use colors in the output. Valid values: yes, no, " + "and auto. 'auto' means to use colors if the output is " + "being sent to a terminal and the TERM environment variable " + "is set to a terminal type that supports colors."); + +GTEST_DEFINE_string_( + filter, + internal::StringFromGTestEnv("filter", GetDefaultFilter()), + "A colon-separated list of glob (not regex) patterns " + "for filtering the tests to run, optionally followed by a " + "'-' and a : separated list of negative patterns (tests to " + "exclude). A test is run if it matches one of the positive " + "patterns and does not match any of the negative patterns."); + +GTEST_DEFINE_bool_(list_tests, false, + "List all tests without running them."); + +GTEST_DEFINE_string_( + output, + internal::StringFromGTestEnv("output", ""), + "A format (currently must be \"xml\"), optionally followed " + "by a colon and an output file name or directory. A directory " + "is indicated by a trailing pathname separator. " + "Examples: \"xml:filename.xml\", \"xml::directoryname/\". " + "If a directory is specified, output files will be created " + "within that directory, with file-names based on the test " + "executable's name and, if necessary, made unique by adding " + "digits."); + +GTEST_DEFINE_bool_( + print_time, + internal::BoolFromGTestEnv("print_time", true), + "True iff " GTEST_NAME_ + " should display elapsed time in text output."); + +GTEST_DEFINE_int32_( + random_seed, + internal::Int32FromGTestEnv("random_seed", 0), + "Random number seed to use when shuffling test orders. Must be in range " + "[1, 99999], or 0 to use a seed based on the current time."); + +GTEST_DEFINE_int32_( + repeat, + internal::Int32FromGTestEnv("repeat", 1), + "How many times to repeat each test. Specify a negative number " + "for repeating forever. Useful for shaking out flaky tests."); + +GTEST_DEFINE_bool_( + show_internal_stack_frames, false, + "True iff " GTEST_NAME_ " should include internal stack frames when " + "printing test failure stack traces."); + +GTEST_DEFINE_bool_( + shuffle, + internal::BoolFromGTestEnv("shuffle", false), + "True iff " GTEST_NAME_ + " should randomize tests' order on every run."); + +GTEST_DEFINE_int32_( + stack_trace_depth, + internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth), + "The maximum number of stack frames to print when an " + "assertion fails. The valid range is 0 through 100, inclusive."); + +GTEST_DEFINE_string_( + stream_result_to, + internal::StringFromGTestEnv("stream_result_to", ""), + "This flag specifies the host name and the port number on which to stream " + "test results. Example: \"localhost:555\". The flag is effective only on " + "Linux."); + +GTEST_DEFINE_bool_( + throw_on_failure, + internal::BoolFromGTestEnv("throw_on_failure", false), + "When this flag is specified, a failed assertion will throw an exception " + "if exceptions are enabled or exit the program with a non-zero code " + "otherwise."); + +namespace internal { + +// Generates a random number from [0, range), using a Linear +// Congruential Generator (LCG). Crashes if 'range' is 0 or greater +// than kMaxRange. +UInt32 Random::Generate(UInt32 range) { + // These constants are the same as are used in glibc's rand(3). + state_ = (1103515245U*state_ + 12345U) % kMaxRange; + + GTEST_CHECK_(range > 0) + << "Cannot generate a number in the range [0, 0)."; + GTEST_CHECK_(range <= kMaxRange) + << "Generation of a number in [0, " << range << ") was requested, " + << "but this can only generate numbers in [0, " << kMaxRange << ")."; + + // Converting via modulus introduces a bit of downward bias, but + // it's simple, and a linear congruential generator isn't too good + // to begin with. + return state_ % range; +} + +// GTestIsInitialized() returns true iff the user has initialized +// Google Test. Useful for catching the user mistake of not initializing +// Google Test before calling RUN_ALL_TESTS(). +// +// A user must call testing::InitGoogleTest() to initialize Google +// Test. g_init_gtest_count is set to the number of times +// InitGoogleTest() has been called. We don't protect this variable +// under a mutex as it is only accessed in the main thread. +GTEST_API_ int g_init_gtest_count = 0; +static bool GTestIsInitialized() { return g_init_gtest_count != 0; } + +// Iterates over a vector of TestCases, keeping a running sum of the +// results of calling a given int-returning method on each. +// Returns the sum. +static int SumOverTestCaseList(const std::vector& case_list, + int (TestCase::*method)() const) { + int sum = 0; + for (size_t i = 0; i < case_list.size(); i++) { + sum += (case_list[i]->*method)(); + } + return sum; +} + +// Returns true iff the test case passed. +static bool TestCasePassed(const TestCase* test_case) { + return test_case->should_run() && test_case->Passed(); +} + +// Returns true iff the test case failed. +static bool TestCaseFailed(const TestCase* test_case) { + return test_case->should_run() && test_case->Failed(); +} + +// Returns true iff test_case contains at least one test that should +// run. +static bool ShouldRunTestCase(const TestCase* test_case) { + return test_case->should_run(); +} + +// AssertHelper constructor. +AssertHelper::AssertHelper(TestPartResult::Type type, + const char* file, + int line, + const char* message) + : data_(new AssertHelperData(type, file, line, message)) { +} + +AssertHelper::~AssertHelper() { + delete data_; +} + +// Message assignment, for assertion streaming support. +void AssertHelper::operator=(const Message& message) const { + UnitTest::GetInstance()-> + AddTestPartResult(data_->type, data_->file, data_->line, + AppendUserMessage(data_->message, message), + UnitTest::GetInstance()->impl() + ->CurrentOsStackTraceExceptTop(1) + // Skips the stack frame for this function itself. + ); // NOLINT +} + +// Mutex for linked pointers. +GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex); + +// Application pathname gotten in InitGoogleTest. +std::string g_executable_path; + +// Returns the current application's name, removing directory path if that +// is present. +FilePath GetCurrentExecutableName() { + FilePath result; + +#if GTEST_OS_WINDOWS + result.Set(FilePath(g_executable_path).RemoveExtension("exe")); +#else + result.Set(FilePath(g_executable_path)); +#endif // GTEST_OS_WINDOWS + + return result.RemoveDirectoryName(); +} + +// Functions for processing the gtest_output flag. + +// Returns the output format, or "" for normal printed output. +std::string UnitTestOptions::GetOutputFormat() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) return std::string(""); + + const char* const colon = strchr(gtest_output_flag, ':'); + return (colon == NULL) ? + std::string(gtest_output_flag) : + std::string(gtest_output_flag, colon - gtest_output_flag); +} + +// Returns the name of the requested output file, or the default if none +// was explicitly specified. +std::string UnitTestOptions::GetAbsolutePathToOutputFile() { + const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); + if (gtest_output_flag == NULL) + return ""; + + const char* const colon = strchr(gtest_output_flag, ':'); + if (colon == NULL) + return internal::FilePath::ConcatPaths( + internal::FilePath( + UnitTest::GetInstance()->original_working_dir()), + internal::FilePath(kDefaultOutputFile)).string(); + + internal::FilePath output_name(colon + 1); + if (!output_name.IsAbsolutePath()) + // TODO(wan@google.com): on Windows \some\path is not an absolute + // path (as its meaning depends on the current drive), yet the + // following logic for turning it into an absolute path is wrong. + // Fix it. + output_name = internal::FilePath::ConcatPaths( + internal::FilePath(UnitTest::GetInstance()->original_working_dir()), + internal::FilePath(colon + 1)); + + if (!output_name.IsDirectory()) + return output_name.string(); + + internal::FilePath result(internal::FilePath::GenerateUniqueFileName( + output_name, internal::GetCurrentExecutableName(), + GetOutputFormat().c_str())); + return result.string(); +} + +// Returns true iff the wildcard pattern matches the string. The +// first ':' or '\0' character in pattern marks the end of it. +// +// This recursive algorithm isn't very efficient, but is clear and +// works well enough for matching test names, which are short. +bool UnitTestOptions::PatternMatchesString(const char *pattern, + const char *str) { + switch (*pattern) { + case '\0': + case ':': // Either ':' or '\0' marks the end of the pattern. + return *str == '\0'; + case '?': // Matches any single character. + return *str != '\0' && PatternMatchesString(pattern + 1, str + 1); + case '*': // Matches any string (possibly empty) of characters. + return (*str != '\0' && PatternMatchesString(pattern, str + 1)) || + PatternMatchesString(pattern + 1, str); + default: // Non-special character. Matches itself. + return *pattern == *str && + PatternMatchesString(pattern + 1, str + 1); + } +} + +bool UnitTestOptions::MatchesFilter( + const std::string& name, const char* filter) { + const char *cur_pattern = filter; + for (;;) { + if (PatternMatchesString(cur_pattern, name.c_str())) { + return true; + } + + // Finds the next pattern in the filter. + cur_pattern = strchr(cur_pattern, ':'); + + // Returns if no more pattern can be found. + if (cur_pattern == NULL) { + return false; + } + + // Skips the pattern separater (the ':' character). + cur_pattern++; + } +} + +// Returns true iff the user-specified filter matches the test case +// name and the test name. +bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name, + const std::string &test_name) { + const std::string& full_name = test_case_name + "." + test_name.c_str(); + + // Split --gtest_filter at '-', if there is one, to separate into + // positive filter and negative filter portions + const char* const p = GTEST_FLAG(filter).c_str(); + const char* const dash = strchr(p, '-'); + std::string positive; + std::string negative; + if (dash == NULL) { + positive = GTEST_FLAG(filter).c_str(); // Whole string is a positive filter + negative = ""; + } else { + positive = std::string(p, dash); // Everything up to the dash + negative = std::string(dash + 1); // Everything after the dash + if (positive.empty()) { + // Treat '-test1' as the same as '*-test1' + positive = kUniversalFilter; + } + } + + // A filter is a colon-separated list of patterns. It matches a + // test if any pattern in it matches the test. + return (MatchesFilter(full_name, positive.c_str()) && + !MatchesFilter(full_name, negative.c_str())); +} + +#if GTEST_HAS_SEH +// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the +// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. +// This function is useful as an __except condition. +int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) { + // Google Test should handle a SEH exception if: + // 1. the user wants it to, AND + // 2. this is not a breakpoint exception, AND + // 3. this is not a C++ exception (VC++ implements them via SEH, + // apparently). + // + // SEH exception code for C++ exceptions. + // (see http://support.microsoft.com/kb/185294 for more information). + const DWORD kCxxExceptionCode = 0xe06d7363; + + bool should_handle = true; + + if (!GTEST_FLAG(catch_exceptions)) + should_handle = false; + else if (exception_code == EXCEPTION_BREAKPOINT) + should_handle = false; + else if (exception_code == kCxxExceptionCode) + should_handle = false; + + return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH; +} +#endif // GTEST_HAS_SEH + +} // namespace internal + +// The c'tor sets this object as the test part result reporter used by +// Google Test. The 'result' parameter specifies where to report the +// results. Intercepts only failures from the current thread. +ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( + TestPartResultArray* result) + : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD), + result_(result) { + Init(); +} + +// The c'tor sets this object as the test part result reporter used by +// Google Test. The 'result' parameter specifies where to report the +// results. +ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( + InterceptMode intercept_mode, TestPartResultArray* result) + : intercept_mode_(intercept_mode), + result_(result) { + Init(); +} + +void ScopedFakeTestPartResultReporter::Init() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + if (intercept_mode_ == INTERCEPT_ALL_THREADS) { + old_reporter_ = impl->GetGlobalTestPartResultReporter(); + impl->SetGlobalTestPartResultReporter(this); + } else { + old_reporter_ = impl->GetTestPartResultReporterForCurrentThread(); + impl->SetTestPartResultReporterForCurrentThread(this); + } +} + +// The d'tor restores the test part result reporter used by Google Test +// before. +ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + if (intercept_mode_ == INTERCEPT_ALL_THREADS) { + impl->SetGlobalTestPartResultReporter(old_reporter_); + } else { + impl->SetTestPartResultReporterForCurrentThread(old_reporter_); + } +} + +// Increments the test part result count and remembers the result. +// This method is from the TestPartResultReporterInterface interface. +void ScopedFakeTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + result_->Append(result); +} + +namespace internal { + +// Returns the type ID of ::testing::Test. We should always call this +// instead of GetTypeId< ::testing::Test>() to get the type ID of +// testing::Test. This is to work around a suspected linker bug when +// using Google Test as a framework on Mac OS X. The bug causes +// GetTypeId< ::testing::Test>() to return different values depending +// on whether the call is from the Google Test framework itself or +// from user test code. GetTestTypeId() is guaranteed to always +// return the same value, as it always calls GetTypeId<>() from the +// gtest.cc, which is within the Google Test framework. +TypeId GetTestTypeId() { + return GetTypeId(); +} + +// The value of GetTestTypeId() as seen from within the Google Test +// library. This is solely for testing GetTestTypeId(). +extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId(); + +// This predicate-formatter checks that 'results' contains a test part +// failure of the given type and that the failure message contains the +// given substring. +AssertionResult HasOneFailure(const char* /* results_expr */, + const char* /* type_expr */, + const char* /* substr_expr */, + const TestPartResultArray& results, + TestPartResult::Type type, + const string& substr) { + const std::string expected(type == TestPartResult::kFatalFailure ? + "1 fatal failure" : + "1 non-fatal failure"); + Message msg; + if (results.size() != 1) { + msg << "Expected: " << expected << "\n" + << " Actual: " << results.size() << " failures"; + for (int i = 0; i < results.size(); i++) { + msg << "\n" << results.GetTestPartResult(i); + } + return AssertionFailure() << msg; + } + + const TestPartResult& r = results.GetTestPartResult(0); + if (r.type() != type) { + return AssertionFailure() << "Expected: " << expected << "\n" + << " Actual:\n" + << r; + } + + if (strstr(r.message(), substr.c_str()) == NULL) { + return AssertionFailure() << "Expected: " << expected << " containing \"" + << substr << "\"\n" + << " Actual:\n" + << r; + } + + return AssertionSuccess(); +} + +// The constructor of SingleFailureChecker remembers where to look up +// test part results, what type of failure we expect, and what +// substring the failure message should contain. +SingleFailureChecker:: SingleFailureChecker( + const TestPartResultArray* results, + TestPartResult::Type type, + const string& substr) + : results_(results), + type_(type), + substr_(substr) {} + +// The destructor of SingleFailureChecker verifies that the given +// TestPartResultArray contains exactly one failure that has the given +// type and contains the given substring. If that's not the case, a +// non-fatal failure will be generated. +SingleFailureChecker::~SingleFailureChecker() { + EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_); +} + +DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter( + UnitTestImpl* unit_test) : unit_test_(unit_test) {} + +void DefaultGlobalTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + unit_test_->current_test_result()->AddTestPartResult(result); + unit_test_->listeners()->repeater()->OnTestPartResult(result); +} + +DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter( + UnitTestImpl* unit_test) : unit_test_(unit_test) {} + +void DefaultPerThreadTestPartResultReporter::ReportTestPartResult( + const TestPartResult& result) { + unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result); +} + +// Returns the global test part result reporter. +TestPartResultReporterInterface* +UnitTestImpl::GetGlobalTestPartResultReporter() { + internal::MutexLock lock(&global_test_part_result_reporter_mutex_); + return global_test_part_result_repoter_; +} + +// Sets the global test part result reporter. +void UnitTestImpl::SetGlobalTestPartResultReporter( + TestPartResultReporterInterface* reporter) { + internal::MutexLock lock(&global_test_part_result_reporter_mutex_); + global_test_part_result_repoter_ = reporter; +} + +// Returns the test part result reporter for the current thread. +TestPartResultReporterInterface* +UnitTestImpl::GetTestPartResultReporterForCurrentThread() { + return per_thread_test_part_result_reporter_.get(); +} + +// Sets the test part result reporter for the current thread. +void UnitTestImpl::SetTestPartResultReporterForCurrentThread( + TestPartResultReporterInterface* reporter) { + per_thread_test_part_result_reporter_.set(reporter); +} + +// Gets the number of successful test cases. +int UnitTestImpl::successful_test_case_count() const { + return CountIf(test_cases_, TestCasePassed); +} + +// Gets the number of failed test cases. +int UnitTestImpl::failed_test_case_count() const { + return CountIf(test_cases_, TestCaseFailed); +} + +// Gets the number of all test cases. +int UnitTestImpl::total_test_case_count() const { + return static_cast(test_cases_.size()); +} + +// Gets the number of all test cases that contain at least one test +// that should run. +int UnitTestImpl::test_case_to_run_count() const { + return CountIf(test_cases_, ShouldRunTestCase); +} + +// Gets the number of successful tests. +int UnitTestImpl::successful_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count); +} + +// Gets the number of failed tests. +int UnitTestImpl::failed_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count); +} + +// Gets the number of disabled tests that will be reported in the XML report. +int UnitTestImpl::reportable_disabled_test_count() const { + return SumOverTestCaseList(test_cases_, + &TestCase::reportable_disabled_test_count); +} + +// Gets the number of disabled tests. +int UnitTestImpl::disabled_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count); +} + +// Gets the number of tests to be printed in the XML report. +int UnitTestImpl::reportable_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count); +} + +// Gets the number of all tests. +int UnitTestImpl::total_test_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::total_test_count); +} + +// Gets the number of tests that should run. +int UnitTestImpl::test_to_run_count() const { + return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count); +} + +// Returns the current OS stack trace as an std::string. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// CurrentOsStackTraceExceptTop(1), Foo() will be included in the +// trace but Bar() and CurrentOsStackTraceExceptTop() won't. +std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) { + (void)skip_count; + return ""; +} + +// Returns the current time in milliseconds. +TimeInMillis GetTimeInMillis() { +#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__) + // Difference between 1970-01-01 and 1601-01-01 in milliseconds. + // http://analogous.blogspot.com/2005/04/epoch.html + const TimeInMillis kJavaEpochToWinFileTimeDelta = + static_cast(116444736UL) * 100000UL; + const DWORD kTenthMicrosInMilliSecond = 10000; + + SYSTEMTIME now_systime; + FILETIME now_filetime; + ULARGE_INTEGER now_int64; + // TODO(kenton@google.com): Shouldn't this just use + // GetSystemTimeAsFileTime()? + GetSystemTime(&now_systime); + if (SystemTimeToFileTime(&now_systime, &now_filetime)) { + now_int64.LowPart = now_filetime.dwLowDateTime; + now_int64.HighPart = now_filetime.dwHighDateTime; + now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) - + kJavaEpochToWinFileTimeDelta; + return now_int64.QuadPart; + } + return 0; +#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_ + __timeb64 now; + +# ifdef _MSC_VER + + // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996 + // (deprecated function) there. + // TODO(kenton@google.com): Use GetTickCount()? Or use + // SystemTimeToFileTime() +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4996) // Temporarily disables warning 4996. + _ftime64(&now); +# pragma warning(pop) // Restores the warning state. +# else + + _ftime64(&now); + +# endif // _MSC_VER + + return static_cast(now.time) * 1000 + now.millitm; +#elif GTEST_HAS_GETTIMEOFDAY_ + struct timeval now; + gettimeofday(&now, NULL); + return static_cast(now.tv_sec) * 1000 + now.tv_usec / 1000; +#else +# error "Don't know how to get the current time on your system." +#endif +} + +// Utilities + +// class String. + +#if GTEST_OS_WINDOWS_MOBILE +// Creates a UTF-16 wide string from the given ANSI string, allocating +// memory using new. The caller is responsible for deleting the return +// value using delete[]. Returns the wide string, or NULL if the +// input is NULL. +LPCWSTR String::AnsiToUtf16(const char* ansi) { + if (!ansi) return NULL; + const int length = strlen(ansi); + const int unicode_length = + MultiByteToWideChar(CP_ACP, 0, ansi, length, + NULL, 0); + WCHAR* unicode = new WCHAR[unicode_length + 1]; + MultiByteToWideChar(CP_ACP, 0, ansi, length, + unicode, unicode_length); + unicode[unicode_length] = 0; + return unicode; +} + +// Creates an ANSI string from the given wide string, allocating +// memory using new. The caller is responsible for deleting the return +// value using delete[]. Returns the ANSI string, or NULL if the +// input is NULL. +const char* String::Utf16ToAnsi(LPCWSTR utf16_str) { + if (!utf16_str) return NULL; + const int ansi_length = + WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, + NULL, 0, NULL, NULL); + char* ansi = new char[ansi_length + 1]; + WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, + ansi, ansi_length, NULL, NULL); + ansi[ansi_length] = 0; + return ansi; +} + +#endif // GTEST_OS_WINDOWS_MOBILE + +// Compares two C strings. Returns true iff they have the same content. +// +// Unlike strcmp(), this function can handle NULL argument(s). A NULL +// C string is considered different to any non-NULL C string, +// including the empty string. +bool String::CStringEquals(const char * lhs, const char * rhs) { + if ( lhs == NULL ) return rhs == NULL; + + if ( rhs == NULL ) return false; + + return strcmp(lhs, rhs) == 0; +} + +#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +// Converts an array of wide chars to a narrow string using the UTF-8 +// encoding, and streams the result to the given Message object. +static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length, + Message* msg) { + for (size_t i = 0; i != length; ) { // NOLINT + if (wstr[i] != L'\0') { + *msg << WideStringToUtf8(wstr + i, static_cast(length - i)); + while (i != length && wstr[i] != L'\0') + i++; + } else { + *msg << '\0'; + i++; + } + } +} + +#endif // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING + +} // namespace internal + +// Constructs an empty Message. +// We allocate the stringstream separately because otherwise each use of +// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's +// stack frame leading to huge stack frames in some cases; gcc does not reuse +// the stack space. +Message::Message() : ss_(new ::std::stringstream) { + // By default, we want there to be enough precision when printing + // a double to a Message. + *ss_ << std::setprecision(std::numeric_limits::digits10 + 2); +} + +// These two overloads allow streaming a wide C string to a Message +// using the UTF-8 encoding. +Message& Message::operator <<(const wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); +} +Message& Message::operator <<(wchar_t* wide_c_str) { + return *this << internal::String::ShowWideCString(wide_c_str); +} + +#if GTEST_HAS_STD_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::std::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_WSTRING +// Converts the given wide string to a narrow string using the UTF-8 +// encoding, and streams the result to this Message object. +Message& Message::operator <<(const ::wstring& wstr) { + internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); + return *this; +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +// Gets the text streamed to this object so far as an std::string. +// Each '\0' character in the buffer is replaced with "\\0". +std::string Message::GetString() const { + return internal::StringStreamToString(ss_.get()); +} + +// AssertionResult constructors. +// Used in EXPECT_TRUE/FALSE(assertion_result). +AssertionResult::AssertionResult(const AssertionResult& other) + : success_(other.success_), + message_(other.message_.get() != NULL ? + new ::std::string(*other.message_) : + static_cast< ::std::string*>(NULL)) { +} + +// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. +AssertionResult AssertionResult::operator!() const { + AssertionResult negation(!success_); + if (message_.get() != NULL) + negation << *message_; + return negation; +} + +// Makes a successful assertion result. +AssertionResult AssertionSuccess() { + return AssertionResult(true); +} + +// Makes a failed assertion result. +AssertionResult AssertionFailure() { + return AssertionResult(false); +} + +// Makes a failed assertion result with the given failure message. +// Deprecated; use AssertionFailure() << message. +AssertionResult AssertionFailure(const Message& message) { + return AssertionFailure() << message; +} + +namespace internal { + +// Constructs and returns the message for an equality assertion +// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. +// +// The first four parameters are the expressions used in the assertion +// and their values, as strings. For example, for ASSERT_EQ(foo, bar) +// where foo is 5 and bar is 6, we have: +// +// expected_expression: "foo" +// actual_expression: "bar" +// expected_value: "5" +// actual_value: "6" +// +// The ignoring_case parameter is true iff the assertion is a +// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will +// be inserted into the message. +AssertionResult EqFailure(const char* expected_expression, + const char* actual_expression, + const std::string& expected_value, + const std::string& actual_value, + bool ignoring_case) { + Message msg; + msg << "Value of: " << actual_expression; + if (actual_value != actual_expression) { + msg << "\n Actual: " << actual_value; + } + + msg << "\nExpected: " << expected_expression; + if (ignoring_case) { + msg << " (ignoring case)"; + } + if (expected_value != expected_expression) { + msg << "\nWhich is: " << expected_value; + } + + return AssertionFailure() << msg; +} + +// Constructs a failure message for Boolean assertions such as EXPECT_TRUE. +std::string GetBoolAssertionFailureMessage( + const AssertionResult& assertion_result, + const char* expression_text, + const char* actual_predicate_value, + const char* expected_predicate_value) { + const char* actual_message = assertion_result.message(); + Message msg; + msg << "Value of: " << expression_text + << "\n Actual: " << actual_predicate_value; + if (actual_message[0] != '\0') + msg << " (" << actual_message << ")"; + msg << "\nExpected: " << expected_predicate_value; + return msg.GetString(); +} + +// Helper function for implementing ASSERT_NEAR. +AssertionResult DoubleNearPredFormat(const char* expr1, + const char* expr2, + const char* abs_error_expr, + double val1, + double val2, + double abs_error) { + const double diff = fabs(val1 - val2); + if (diff <= abs_error) return AssertionSuccess(); + + // TODO(wan): do not print the value of an expression if it's + // already a literal. + return AssertionFailure() + << "The difference between " << expr1 << " and " << expr2 + << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n" + << expr1 << " evaluates to " << val1 << ",\n" + << expr2 << " evaluates to " << val2 << ", and\n" + << abs_error_expr << " evaluates to " << abs_error << "."; +} + + +// Helper template for implementing FloatLE() and DoubleLE(). +template +AssertionResult FloatingPointLE(const char* expr1, + const char* expr2, + RawType val1, + RawType val2) { + // Returns success if val1 is less than val2, + if (val1 < val2) { + return AssertionSuccess(); + } + + // or if val1 is almost equal to val2. + const FloatingPoint lhs(val1), rhs(val2); + if (lhs.AlmostEquals(rhs)) { + return AssertionSuccess(); + } + + // Note that the above two checks will both fail if either val1 or + // val2 is NaN, as the IEEE floating-point standard requires that + // any predicate involving a NaN must return false. + + ::std::stringstream val1_ss; + val1_ss << std::setprecision(std::numeric_limits::digits10 + 2) + << val1; + + ::std::stringstream val2_ss; + val2_ss << std::setprecision(std::numeric_limits::digits10 + 2) + << val2; + + return AssertionFailure() + << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n" + << " Actual: " << StringStreamToString(&val1_ss) << " vs " + << StringStreamToString(&val2_ss); +} + +} // namespace internal + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult FloatLE(const char* expr1, const char* expr2, + float val1, float val2) { + return internal::FloatingPointLE(expr1, expr2, val1, val2); +} + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +AssertionResult DoubleLE(const char* expr1, const char* expr2, + double val1, double val2) { + return internal::FloatingPointLE(expr1, expr2, val1, val2); +} + +namespace internal { + +// The helper function for {ASSERT|EXPECT}_EQ with int or enum +// arguments. +AssertionResult CmpHelperEQ(const char* expected_expression, + const char* actual_expression, + BiggestInt expected, + BiggestInt actual) { + if (expected == actual) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + FormatForComparisonFailureMessage(expected, actual), + FormatForComparisonFailureMessage(actual, expected), + false); +} + +// A macro for implementing the helper functions needed to implement +// ASSERT_?? and EXPECT_?? with integer or enum arguments. It is here +// just to avoid copy-and-paste of similar code. +#define GTEST_IMPL_CMP_HELPER_(op_name, op)\ +AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ + BiggestInt val1, BiggestInt val2) {\ + if (val1 op val2) {\ + return AssertionSuccess();\ + } else {\ + return AssertionFailure() \ + << "Expected: (" << expr1 << ") " #op " (" << expr2\ + << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ + << " vs " << FormatForComparisonFailureMessage(val2, val1);\ + }\ +} + +// Implements the helper function for {ASSERT|EXPECT}_NE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(NE, !=) +// Implements the helper function for {ASSERT|EXPECT}_LE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(LE, <=) +// Implements the helper function for {ASSERT|EXPECT}_LT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(LT, < ) +// Implements the helper function for {ASSERT|EXPECT}_GE with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(GE, >=) +// Implements the helper function for {ASSERT|EXPECT}_GT with int or +// enum arguments. +GTEST_IMPL_CMP_HELPER_(GT, > ) + +#undef GTEST_IMPL_CMP_HELPER_ + +// The helper function for {ASSERT|EXPECT}_STREQ. +AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual) { + if (String::CStringEquals(expected, actual)) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + PrintToString(expected), + PrintToString(actual), + false); +} + +// The helper function for {ASSERT|EXPECT}_STRCASEEQ. +AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual) { + if (String::CaseInsensitiveCStringEquals(expected, actual)) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + PrintToString(expected), + PrintToString(actual), + true); +} + +// The helper function for {ASSERT|EXPECT}_STRNE. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + return AssertionFailure() << "Expected: (" << s1_expression << ") != (" + << s2_expression << "), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + } +} + +// The helper function for {ASSERT|EXPECT}_STRCASENE. +AssertionResult CmpHelperSTRCASENE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2) { + if (!String::CaseInsensitiveCStringEquals(s1, s2)) { + return AssertionSuccess(); + } else { + return AssertionFailure() + << "Expected: (" << s1_expression << ") != (" + << s2_expression << ") (ignoring case), actual: \"" + << s1 << "\" vs \"" << s2 << "\""; + } +} + +} // namespace internal + +namespace { + +// Helper functions for implementing IsSubString() and IsNotSubstring(). + +// This group of overloaded functions return true iff needle is a +// substring of haystack. NULL is considered a substring of itself +// only. + +bool IsSubstringPred(const char* needle, const char* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return strstr(haystack, needle) != NULL; +} + +bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) { + if (needle == NULL || haystack == NULL) + return needle == haystack; + + return wcsstr(haystack, needle) != NULL; +} + +// StringType here can be either ::std::string or ::std::wstring. +template +bool IsSubstringPred(const StringType& needle, + const StringType& haystack) { + return haystack.find(needle) != StringType::npos; +} + +// This function implements either IsSubstring() or IsNotSubstring(), +// depending on the value of the expected_to_be_substring parameter. +// StringType here can be const char*, const wchar_t*, ::std::string, +// or ::std::wstring. +template +AssertionResult IsSubstringImpl( + bool expected_to_be_substring, + const char* needle_expr, const char* haystack_expr, + const StringType& needle, const StringType& haystack) { + if (IsSubstringPred(needle, haystack) == expected_to_be_substring) + return AssertionSuccess(); + + const bool is_wide_string = sizeof(needle[0]) > 1; + const char* const begin_string_quote = is_wide_string ? "L\"" : "\""; + return AssertionFailure() + << "Value of: " << needle_expr << "\n" + << " Actual: " << begin_string_quote << needle << "\"\n" + << "Expected: " << (expected_to_be_substring ? "" : "not ") + << "a substring of " << haystack_expr << "\n" + << "Which is: " << begin_string_quote << haystack << "\""; +} + +} // namespace + +// IsSubstring() and IsNotSubstring() check whether needle is a +// substring of haystack (NULL is considered a substring of itself +// only), and return an appropriate error message when they fail. + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} + +#if GTEST_HAS_STD_WSTRING +AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); +} + +AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack) { + return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); +} +#endif // GTEST_HAS_STD_WSTRING + +namespace internal { + +#if GTEST_OS_WINDOWS + +namespace { + +// Helper function for IsHRESULT{SuccessFailure} predicates +AssertionResult HRESULTFailureHelper(const char* expr, + const char* expected, + long hr) { // NOLINT +# if GTEST_OS_WINDOWS_MOBILE + + // Windows CE doesn't support FormatMessage. + const char error_text[] = ""; + +# else + + // Looks up the human-readable system message for the HRESULT code + // and since we're not passing any params to FormatMessage, we don't + // want inserts expanded. + const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS; + const DWORD kBufSize = 4096; + // Gets the system's human readable message string for this HRESULT. + char error_text[kBufSize] = { '\0' }; + DWORD message_length = ::FormatMessageA(kFlags, + 0, // no source, we're asking system + hr, // the error + 0, // no line width restrictions + error_text, // output buffer + kBufSize, // buf size + NULL); // no arguments for inserts + // Trims tailing white space (FormatMessage leaves a trailing CR-LF) + for (; message_length && IsSpace(error_text[message_length - 1]); + --message_length) { + error_text[message_length - 1] = '\0'; + } + +# endif // GTEST_OS_WINDOWS_MOBILE + + const std::string error_hex("0x" + String::FormatHexInt(hr)); + return ::testing::AssertionFailure() + << "Expected: " << expr << " " << expected << ".\n" + << " Actual: " << error_hex << " " << error_text << "\n"; +} + +} // namespace + +AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT + if (SUCCEEDED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "succeeds", hr); +} + +AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT + if (FAILED(hr)) { + return AssertionSuccess(); + } + return HRESULTFailureHelper(expr, "fails", hr); +} + +#endif // GTEST_OS_WINDOWS + +// Utility functions for encoding Unicode text (wide strings) in +// UTF-8. + +// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8 +// like this: +// +// Code-point length Encoding +// 0 - 7 bits 0xxxxxxx +// 8 - 11 bits 110xxxxx 10xxxxxx +// 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx +// 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + +// The maximum code-point a one-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint1 = (static_cast(1) << 7) - 1; + +// The maximum code-point a two-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint2 = (static_cast(1) << (5 + 6)) - 1; + +// The maximum code-point a three-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint3 = (static_cast(1) << (4 + 2*6)) - 1; + +// The maximum code-point a four-byte UTF-8 sequence can represent. +const UInt32 kMaxCodePoint4 = (static_cast(1) << (3 + 3*6)) - 1; + +// Chops off the n lowest bits from a bit pattern. Returns the n +// lowest bits. As a side effect, the original bit pattern will be +// shifted to the right by n bits. +inline UInt32 ChopLowBits(UInt32* bits, int n) { + const UInt32 low_bits = *bits & ((static_cast(1) << n) - 1); + *bits >>= n; + return low_bits; +} + +// Converts a Unicode code point to a narrow string in UTF-8 encoding. +// code_point parameter is of type UInt32 because wchar_t may not be +// wide enough to contain a code point. +// If the code_point is not a valid Unicode code point +// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted +// to "(Invalid Unicode 0xXXXXXXXX)". +std::string CodePointToUtf8(UInt32 code_point) { + if (code_point > kMaxCodePoint4) { + return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")"; + } + + char str[5]; // Big enough for the largest valid code point. + if (code_point <= kMaxCodePoint1) { + str[1] = '\0'; + str[0] = static_cast(code_point); // 0xxxxxxx + } else if (code_point <= kMaxCodePoint2) { + str[2] = '\0'; + str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast(0xC0 | code_point); // 110xxxxx + } else if (code_point <= kMaxCodePoint3) { + str[3] = '\0'; + str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast(0xE0 | code_point); // 1110xxxx + } else { // code_point <= kMaxCodePoint4 + str[4] = '\0'; + str[3] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx + str[0] = static_cast(0xF0 | code_point); // 11110xxx + } + return str; +} + +// The following two functions only make sense if the the system +// uses UTF-16 for wide string encoding. All supported systems +// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16. + +// Determines if the arguments constitute UTF-16 surrogate pair +// and thus should be combined into a single Unicode code point +// using CreateCodePointFromUtf16SurrogatePair. +inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) { + return sizeof(wchar_t) == 2 && + (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00; +} + +// Creates a Unicode code point from UTF16 surrogate pair. +inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first, + wchar_t second) { + const UInt32 mask = (1 << 10) - 1; + return (sizeof(wchar_t) == 2) ? + (((first & mask) << 10) | (second & mask)) + 0x10000 : + // This function should not be called when the condition is + // false, but we provide a sensible default in case it is. + static_cast(first); +} + +// Converts a wide string to a narrow string in UTF-8 encoding. +// The wide string is assumed to have the following encoding: +// UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) +// UTF-32 if sizeof(wchar_t) == 4 (on Linux) +// Parameter str points to a null-terminated wide string. +// Parameter num_chars may additionally limit the number +// of wchar_t characters processed. -1 is used when the entire string +// should be processed. +// If the string contains code points that are not valid Unicode code points +// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output +// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding +// and contains invalid UTF-16 surrogate pairs, values in those pairs +// will be encoded as individual Unicode characters from Basic Normal Plane. +std::string WideStringToUtf8(const wchar_t* str, int num_chars) { + if (num_chars == -1) + num_chars = static_cast(wcslen(str)); + + ::std::stringstream stream; + for (int i = 0; i < num_chars; ++i) { + UInt32 unicode_code_point; + + if (str[i] == L'\0') { + break; + } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) { + unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i], + str[i + 1]); + i++; + } else { + unicode_code_point = static_cast(str[i]); + } + + stream << CodePointToUtf8(unicode_code_point); + } + return StringStreamToString(&stream); +} + +// Converts a wide C string to an std::string using the UTF-8 encoding. +// NULL will be converted to "(null)". +std::string String::ShowWideCString(const wchar_t * wide_c_str) { + if (wide_c_str == NULL) return "(null)"; + + return internal::WideStringToUtf8(wide_c_str, -1); +} + +// Compares two wide C strings. Returns true iff they have the same +// content. +// +// Unlike wcscmp(), this function can handle NULL argument(s). A NULL +// C string is considered different to any non-NULL C string, +// including the empty string. +bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) { + if (lhs == NULL) return rhs == NULL; + + if (rhs == NULL) return false; + + return wcscmp(lhs, rhs) == 0; +} + +// Helper function for *_STREQ on wide strings. +AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const wchar_t* expected, + const wchar_t* actual) { + if (String::WideCStringEquals(expected, actual)) { + return AssertionSuccess(); + } + + return EqFailure(expected_expression, + actual_expression, + PrintToString(expected), + PrintToString(actual), + false); +} + +// Helper function for *_STRNE on wide strings. +AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const wchar_t* s1, + const wchar_t* s2) { + if (!String::WideCStringEquals(s1, s2)) { + return AssertionSuccess(); + } + + return AssertionFailure() << "Expected: (" << s1_expression << ") != (" + << s2_expression << "), actual: " + << PrintToString(s1) + << " vs " << PrintToString(s2); +} + +// Compares two C strings, ignoring case. Returns true iff they have +// the same content. +// +// Unlike strcasecmp(), this function can handle NULL argument(s). A +// NULL C string is considered different to any non-NULL C string, +// including the empty string. +bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) { + if (lhs == NULL) + return rhs == NULL; + if (rhs == NULL) + return false; + return posix::StrCaseCmp(lhs, rhs) == 0; +} + + // Compares two wide C strings, ignoring case. Returns true iff they + // have the same content. + // + // Unlike wcscasecmp(), this function can handle NULL argument(s). + // A NULL C string is considered different to any non-NULL wide C string, + // including the empty string. + // NB: The implementations on different platforms slightly differ. + // On windows, this method uses _wcsicmp which compares according to LC_CTYPE + // environment variable. On GNU platform this method uses wcscasecmp + // which compares according to LC_CTYPE category of the current locale. + // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the + // current locale. +bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs, + const wchar_t* rhs) { + if (lhs == NULL) return rhs == NULL; + + if (rhs == NULL) return false; + +#if GTEST_OS_WINDOWS + return _wcsicmp(lhs, rhs) == 0; +#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID + return wcscasecmp(lhs, rhs) == 0; +#else + // Android, Mac OS X and Cygwin don't define wcscasecmp. + // Other unknown OSes may not define it either. + wint_t left, right; + do { + left = towlower(*lhs++); + right = towlower(*rhs++); + } while (left && left == right); + return left == right; +#endif // OS selector +} + +// Returns true iff str ends with the given suffix, ignoring case. +// Any string is considered to end with an empty suffix. +bool String::EndsWithCaseInsensitive( + const std::string& str, const std::string& suffix) { + const size_t str_len = str.length(); + const size_t suffix_len = suffix.length(); + return (str_len >= suffix_len) && + CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len, + suffix.c_str()); +} + +// Formats an int value as "%02d". +std::string String::FormatIntWidth2(int value) { + std::stringstream ss; + ss << std::setfill('0') << std::setw(2) << value; + return ss.str(); +} + +// Formats an int value as "%X". +std::string String::FormatHexInt(int value) { + std::stringstream ss; + ss << std::hex << std::uppercase << value; + return ss.str(); +} + +// Formats a byte as "%02X". +std::string String::FormatByte(unsigned char value) { + std::stringstream ss; + ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase + << static_cast(value); + return ss.str(); +} + +// Converts the buffer in a stringstream to an std::string, converting NUL +// bytes to "\\0" along the way. +std::string StringStreamToString(::std::stringstream* ss) { + const ::std::string& str = ss->str(); + const char* const start = str.c_str(); + const char* const end = start + str.length(); + + std::string result; + result.reserve(2 * (end - start)); + for (const char* ch = start; ch != end; ++ch) { + if (*ch == '\0') { + result += "\\0"; // Replaces NUL with "\\0"; + } else { + result += *ch; + } + } + + return result; +} + +// Appends the user-supplied message to the Google-Test-generated message. +std::string AppendUserMessage(const std::string& gtest_msg, + const Message& user_msg) { + // Appends the user message if it's non-empty. + const std::string user_msg_string = user_msg.GetString(); + if (user_msg_string.empty()) { + return gtest_msg; + } + + return gtest_msg + "\n" + user_msg_string; +} + +} // namespace internal + +// class TestResult + +// Creates an empty TestResult. +TestResult::TestResult() + : death_test_count_(0), + elapsed_time_(0) { +} + +// D'tor. +TestResult::~TestResult() { +} + +// Returns the i-th test part result among all the results. i can +// range from 0 to total_part_count() - 1. If i is not in that range, +// aborts the program. +const TestPartResult& TestResult::GetTestPartResult(int i) const { + if (i < 0 || i >= total_part_count()) + internal::posix::Abort(); + return test_part_results_.at(i); +} + +// Returns the i-th test property. i can range from 0 to +// test_property_count() - 1. If i is not in that range, aborts the +// program. +const TestProperty& TestResult::GetTestProperty(int i) const { + if (i < 0 || i >= test_property_count()) + internal::posix::Abort(); + return test_properties_.at(i); +} + +// Clears the test part results. +void TestResult::ClearTestPartResults() { + test_part_results_.clear(); +} + +// Adds a test part result to the list. +void TestResult::AddTestPartResult(const TestPartResult& test_part_result) { + test_part_results_.push_back(test_part_result); +} + +// Adds a test property to the list. If a property with the same key as the +// supplied property is already represented, the value of this test_property +// replaces the old value for that key. +void TestResult::RecordProperty(const std::string& xml_element, + const TestProperty& test_property) { + if (!ValidateTestProperty(xml_element, test_property)) { + return; + } + internal::MutexLock lock(&test_properites_mutex_); + const std::vector::iterator property_with_matching_key = + std::find_if(test_properties_.begin(), test_properties_.end(), + internal::TestPropertyKeyIs(test_property.key())); + if (property_with_matching_key == test_properties_.end()) { + test_properties_.push_back(test_property); + return; + } + property_with_matching_key->SetValue(test_property.value()); +} + +// The list of reserved attributes used in the element of XML +// output. +static const char* const kReservedTestSuitesAttributes[] = { + "disabled", + "errors", + "failures", + "name", + "random_seed", + "tests", + "time", + "timestamp" +}; + +// The list of reserved attributes used in the element of XML +// output. +static const char* const kReservedTestSuiteAttributes[] = { + "disabled", + "errors", + "failures", + "name", + "tests", + "time" +}; + +// The list of reserved attributes used in the element of XML output. +static const char* const kReservedTestCaseAttributes[] = { + "classname", + "name", + "status", + "time", + "type_param", + "value_param" +}; + +template +std::vector ArrayAsVector(const char* const (&array)[kSize]) { + return std::vector(array, array + kSize); +} + +static std::vector GetReservedAttributesForElement( + const std::string& xml_element) { + if (xml_element == "testsuites") { + return ArrayAsVector(kReservedTestSuitesAttributes); + } else if (xml_element == "testsuite") { + return ArrayAsVector(kReservedTestSuiteAttributes); + } else if (xml_element == "testcase") { + return ArrayAsVector(kReservedTestCaseAttributes); + } else { + GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element; + } + // This code is unreachable but some compilers may not realizes that. + return std::vector(); +} + +static std::string FormatWordList(const std::vector& words) { + Message word_list; + for (size_t i = 0; i < words.size(); ++i) { + if (i > 0 && words.size() > 2) { + word_list << ", "; + } + if (i == words.size() - 1) { + word_list << "and "; + } + word_list << "'" << words[i] << "'"; + } + return word_list.GetString(); +} + +bool ValidateTestPropertyName(const std::string& property_name, + const std::vector& reserved_names) { + if (std::find(reserved_names.begin(), reserved_names.end(), property_name) != + reserved_names.end()) { + ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name + << " (" << FormatWordList(reserved_names) + << " are reserved by " << GTEST_NAME_ << ")"; + return false; + } + return true; +} + +// Adds a failure if the key is a reserved attribute of the element named +// xml_element. Returns true if the property is valid. +bool TestResult::ValidateTestProperty(const std::string& xml_element, + const TestProperty& test_property) { + return ValidateTestPropertyName(test_property.key(), + GetReservedAttributesForElement(xml_element)); +} + +// Clears the object. +void TestResult::Clear() { + test_part_results_.clear(); + test_properties_.clear(); + death_test_count_ = 0; + elapsed_time_ = 0; +} + +// Returns true iff the test failed. +bool TestResult::Failed() const { + for (int i = 0; i < total_part_count(); ++i) { + if (GetTestPartResult(i).failed()) + return true; + } + return false; +} + +// Returns true iff the test part fatally failed. +static bool TestPartFatallyFailed(const TestPartResult& result) { + return result.fatally_failed(); +} + +// Returns true iff the test fatally failed. +bool TestResult::HasFatalFailure() const { + return CountIf(test_part_results_, TestPartFatallyFailed) > 0; +} + +// Returns true iff the test part non-fatally failed. +static bool TestPartNonfatallyFailed(const TestPartResult& result) { + return result.nonfatally_failed(); +} + +// Returns true iff the test has a non-fatal failure. +bool TestResult::HasNonfatalFailure() const { + return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0; +} + +// Gets the number of all test parts. This is the sum of the number +// of successful test parts and the number of failed test parts. +int TestResult::total_part_count() const { + return static_cast(test_part_results_.size()); +} + +// Returns the number of the test properties. +int TestResult::test_property_count() const { + return static_cast(test_properties_.size()); +} + +// class Test + +// Creates a Test object. + +// The c'tor saves the values of all Google Test flags. +Test::Test() + : gtest_flag_saver_(new internal::GTestFlagSaver) { +} + +// The d'tor restores the values of all Google Test flags. +Test::~Test() { + delete gtest_flag_saver_; +} + +// Sets up the test fixture. +// +// A sub-class may override this. +void Test::SetUp() { +} + +// Tears down the test fixture. +// +// A sub-class may override this. +void Test::TearDown() { +} + +// Allows user supplied key value pairs to be recorded for later output. +void Test::RecordProperty(const std::string& key, const std::string& value) { + UnitTest::GetInstance()->RecordProperty(key, value); +} + +// Allows user supplied key value pairs to be recorded for later output. +void Test::RecordProperty(const std::string& key, int value) { + Message value_message; + value_message << value; + RecordProperty(key, value_message.GetString().c_str()); +} + +namespace internal { + +void ReportFailureInUnknownLocation(TestPartResult::Type result_type, + const std::string& message) { + // This function is a friend of UnitTest and as such has access to + // AddTestPartResult. + UnitTest::GetInstance()->AddTestPartResult( + result_type, + NULL, // No info about the source file where the exception occurred. + -1, // We have no info on which line caused the exception. + message, + ""); // No stack trace, either. +} + +} // namespace internal + +// Google Test requires all tests in the same test case to use the same test +// fixture class. This function checks if the current test has the +// same fixture class as the first test in the current test case. If +// yes, it returns true; otherwise it generates a Google Test failure and +// returns false. +bool Test::HasSameFixtureClass() { + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + const TestCase* const test_case = impl->current_test_case(); + + // Info about the first test in the current test case. + const TestInfo* const first_test_info = test_case->test_info_list()[0]; + const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_; + const char* const first_test_name = first_test_info->name(); + + // Info about the current test. + const TestInfo* const this_test_info = impl->current_test_info(); + const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_; + const char* const this_test_name = this_test_info->name(); + + if (this_fixture_id != first_fixture_id) { + // Is the first test defined using TEST? + const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId(); + // Is this test defined using TEST? + const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId(); + + if (first_is_TEST || this_is_TEST) { + // The user mixed TEST and TEST_F in this test case - we'll tell + // him/her how to fix it. + + // Gets the name of the TEST and the name of the TEST_F. Note + // that first_is_TEST and this_is_TEST cannot both be true, as + // the fixture IDs are different for the two tests. + const char* const TEST_name = + first_is_TEST ? first_test_name : this_test_name; + const char* const TEST_F_name = + first_is_TEST ? this_test_name : first_test_name; + + ADD_FAILURE() + << "All tests in the same test case must use the same test fixture\n" + << "class, so mixing TEST_F and TEST in the same test case is\n" + << "illegal. In test case " << this_test_info->test_case_name() + << ",\n" + << "test " << TEST_F_name << " is defined using TEST_F but\n" + << "test " << TEST_name << " is defined using TEST. You probably\n" + << "want to change the TEST to TEST_F or move it to another test\n" + << "case."; + } else { + // The user defined two fixture classes with the same name in + // two namespaces - we'll tell him/her how to fix it. + ADD_FAILURE() + << "All tests in the same test case must use the same test fixture\n" + << "class. However, in test case " + << this_test_info->test_case_name() << ",\n" + << "you defined test " << first_test_name + << " and test " << this_test_name << "\n" + << "using two different test fixture classes. This can happen if\n" + << "the two classes are from different namespaces or translation\n" + << "units and have the same name. You should probably rename one\n" + << "of the classes to put the tests into different test cases."; + } + return false; + } + + return true; +} + +#if GTEST_HAS_SEH + +// Adds an "exception thrown" fatal failure to the current test. This +// function returns its result via an output parameter pointer because VC++ +// prohibits creation of objects with destructors on stack in functions +// using __try (see error C2712). +static std::string* FormatSehExceptionMessage(DWORD exception_code, + const char* location) { + Message message; + message << "SEH exception with code 0x" << std::setbase(16) << + exception_code << std::setbase(10) << " thrown in " << location << "."; + + return new std::string(message.GetString()); +} + +#endif // GTEST_HAS_SEH + +namespace internal { + +#if GTEST_HAS_EXCEPTIONS + +// Adds an "exception thrown" fatal failure to the current test. +static std::string FormatCxxExceptionMessage(const char* description, + const char* location) { + Message message; + if (description != NULL) { + message << "C++ exception with description \"" << description << "\""; + } else { + message << "Unknown C++ exception"; + } + message << " thrown in " << location << "."; + + return message.GetString(); +} + +static std::string PrintTestPartResultToString( + const TestPartResult& test_part_result); + +GoogleTestFailureException::GoogleTestFailureException( + const TestPartResult& failure) + : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {} + +#endif // GTEST_HAS_EXCEPTIONS + +// We put these helper functions in the internal namespace as IBM's xlC +// compiler rejects the code if they were declared static. + +// Runs the given method and handles SEH exceptions it throws, when +// SEH is supported; returns the 0-value for type Result in case of an +// SEH exception. (Microsoft compilers cannot handle SEH and C++ +// exceptions in the same function. Therefore, we provide a separate +// wrapper function for handling SEH exceptions.) +template +Result HandleSehExceptionsInMethodIfSupported( + T* object, Result (T::*method)(), const char* location) { +#if GTEST_HAS_SEH + __try { + return (object->*method)(); + } __except (internal::UnitTestOptions::GTestShouldProcessSEH( // NOLINT + GetExceptionCode())) { + // We create the exception message on the heap because VC++ prohibits + // creation of objects with destructors on stack in functions using __try + // (see error C2712). + std::string* exception_message = FormatSehExceptionMessage( + GetExceptionCode(), location); + internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure, + *exception_message); + delete exception_message; + return static_cast(0); + } +#else + (void)location; + return (object->*method)(); +#endif // GTEST_HAS_SEH +} + +// Runs the given method and catches and reports C++ and/or SEH-style +// exceptions, if they are supported; returns the 0-value for type +// Result in case of an SEH exception. +template +Result HandleExceptionsInMethodIfSupported( + T* object, Result (T::*method)(), const char* location) { + // NOTE: The user code can affect the way in which Google Test handles + // exceptions by setting GTEST_FLAG(catch_exceptions), but only before + // RUN_ALL_TESTS() starts. It is technically possible to check the flag + // after the exception is caught and either report or re-throw the + // exception based on the flag's value: + // + // try { + // // Perform the test method. + // } catch (...) { + // if (GTEST_FLAG(catch_exceptions)) + // // Report the exception as failure. + // else + // throw; // Re-throws the original exception. + // } + // + // However, the purpose of this flag is to allow the program to drop into + // the debugger when the exception is thrown. On most platforms, once the + // control enters the catch block, the exception origin information is + // lost and the debugger will stop the program at the point of the + // re-throw in this function -- instead of at the point of the original + // throw statement in the code under test. For this reason, we perform + // the check early, sacrificing the ability to affect Google Test's + // exception handling in the method where the exception is thrown. + if (internal::GetUnitTestImpl()->catch_exceptions()) { +#if GTEST_HAS_EXCEPTIONS + try { + return HandleSehExceptionsInMethodIfSupported(object, method, location); + } catch (const internal::GoogleTestFailureException&) { // NOLINT + // This exception type can only be thrown by a failed Google + // Test assertion with the intention of letting another testing + // framework catch it. Therefore we just re-throw it. + throw; + } catch (const std::exception& e) { // NOLINT + internal::ReportFailureInUnknownLocation( + TestPartResult::kFatalFailure, + FormatCxxExceptionMessage(e.what(), location)); + } catch (...) { // NOLINT + internal::ReportFailureInUnknownLocation( + TestPartResult::kFatalFailure, + FormatCxxExceptionMessage(NULL, location)); + } + return static_cast(0); +#else + return HandleSehExceptionsInMethodIfSupported(object, method, location); +#endif // GTEST_HAS_EXCEPTIONS + } else { + return (object->*method)(); + } +} + +} // namespace internal + +// Runs the test and updates the test result. +void Test::Run() { + if (!HasSameFixtureClass()) return; + + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()"); + // We will run the test only if SetUp() was successful. + if (!HasFatalFailure()) { + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &Test::TestBody, "the test body"); + } + + // However, we want to clean up as much as possible. Hence we will + // always call TearDown(), even if SetUp() or the test body has + // failed. + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &Test::TearDown, "TearDown()"); +} + +// Returns true iff the current test has a fatal failure. +bool Test::HasFatalFailure() { + return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure(); +} + +// Returns true iff the current test has a non-fatal failure. +bool Test::HasNonfatalFailure() { + return internal::GetUnitTestImpl()->current_test_result()-> + HasNonfatalFailure(); +} + +// class TestInfo + +// Constructs a TestInfo object. It assumes ownership of the test factory +// object. +TestInfo::TestInfo(const std::string& a_test_case_name, + const std::string& a_name, + const char* a_type_param, + const char* a_value_param, + internal::TypeId fixture_class_id, + internal::TestFactoryBase* factory) + : test_case_name_(a_test_case_name), + name_(a_name), + type_param_(a_type_param ? new std::string(a_type_param) : NULL), + value_param_(a_value_param ? new std::string(a_value_param) : NULL), + fixture_class_id_(fixture_class_id), + should_run_(false), + is_disabled_(false), + matches_filter_(false), + factory_(factory), + result_() {} + +// Destructs a TestInfo object. +TestInfo::~TestInfo() { delete factory_; } + +namespace internal { + +// Creates a new TestInfo object and registers it with Google Test; +// returns the created object. +// +// Arguments: +// +// test_case_name: name of the test case +// name: name of the test +// type_param: the name of the test's type parameter, or NULL if +// this is not a typed or a type-parameterized test. +// value_param: text representation of the test's value parameter, +// or NULL if this is not a value-parameterized test. +// fixture_class_id: ID of the test fixture class +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +// factory: pointer to the factory that creates a test object. +// The newly created TestInfo instance will assume +// ownership of the factory object. +TestInfo* MakeAndRegisterTestInfo( + const char* test_case_name, + const char* name, + const char* type_param, + const char* value_param, + TypeId fixture_class_id, + SetUpTestCaseFunc set_up_tc, + TearDownTestCaseFunc tear_down_tc, + TestFactoryBase* factory) { + TestInfo* const test_info = + new TestInfo(test_case_name, name, type_param, value_param, + fixture_class_id, factory); + GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info); + return test_info; +} + +#if GTEST_HAS_PARAM_TEST +void ReportInvalidTestCaseType(const char* test_case_name, + const char* file, int line) { + Message errors; + errors + << "Attempted redefinition of test case " << test_case_name << ".\n" + << "All tests in the same test case must use the same test fixture\n" + << "class. However, in test case " << test_case_name << ", you tried\n" + << "to define a test using a fixture class different from the one\n" + << "used earlier. This can happen if the two fixture classes are\n" + << "from different namespaces and have the same name. You should\n" + << "probably rename one of the classes to put the tests into different\n" + << "test cases."; + + fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(), + errors.GetString().c_str()); +} +#endif // GTEST_HAS_PARAM_TEST + +} // namespace internal + +#if 0 /* supress compilation issue for icc: was declared but never referenced*/ +namespace { + +// A predicate that checks the test name of a TestInfo against a known +// value. +// +// This is used for implementation of the TestCase class only. We put +// it in the anonymous namespace to prevent polluting the outer +// namespace. +// +// TestNameIs is copyable. +class TestNameIs { + public: + // Constructor. + // + // TestNameIs has NO default constructor. + explicit TestNameIs(const char* name) + : name_(name) {} + + // Returns true iff the test name of test_info matches name_. + bool operator()(const TestInfo * test_info) const { + return test_info && test_info->name() == name_; + } + + private: + std::string name_; +}; + +} // namespace +#endif + +namespace internal { + +// This method expands all parameterized tests registered with macros TEST_P +// and INSTANTIATE_TEST_CASE_P into regular tests and registers those. +// This will be done just once during the program runtime. +void UnitTestImpl::RegisterParameterizedTests() { +#if GTEST_HAS_PARAM_TEST + if (!parameterized_tests_registered_) { + parameterized_test_registry_.RegisterTests(); + parameterized_tests_registered_ = true; + } +#endif +} + +} // namespace internal + +// Creates the test object, runs it, records its result, and then +// deletes it. +void TestInfo::Run() { + if (!should_run_) return; + + // Tells UnitTest where to store test result. + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->set_current_test_info(this); + + TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); + + // Notifies the unit test event listeners that a test is about to start. + repeater->OnTestStart(*this); + + const TimeInMillis start = internal::GetTimeInMillis(); + + impl->os_stack_trace_getter()->UponLeavingGTest(); + + // Creates the test object. + Test* const test = internal::HandleExceptionsInMethodIfSupported( + factory_, &internal::TestFactoryBase::CreateTest, + "the test fixture's constructor"); + + // Runs the test only if the test object was created and its + // constructor didn't generate a fatal failure. + if ((test != NULL) && !Test::HasFatalFailure()) { + // This doesn't throw as all user code that can throw are wrapped into + // exception handling code. + test->Run(); + } + + // Deletes the test object. + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + test, &Test::DeleteSelf_, "the test fixture's destructor"); + + result_.set_elapsed_time(internal::GetTimeInMillis() - start); + + // Notifies the unit test event listener that a test has just finished. + repeater->OnTestEnd(*this); + + // Tells UnitTest to stop associating assertion results to this + // test. + impl->set_current_test_info(NULL); +} + +// class TestCase + +// Gets the number of successful tests in this test case. +int TestCase::successful_test_count() const { + return CountIf(test_info_list_, TestPassed); +} + +// Gets the number of failed tests in this test case. +int TestCase::failed_test_count() const { + return CountIf(test_info_list_, TestFailed); +} + +// Gets the number of disabled tests that will be reported in the XML report. +int TestCase::reportable_disabled_test_count() const { + return CountIf(test_info_list_, TestReportableDisabled); +} + +// Gets the number of disabled tests in this test case. +int TestCase::disabled_test_count() const { + return CountIf(test_info_list_, TestDisabled); +} + +// Gets the number of tests to be printed in the XML report. +int TestCase::reportable_test_count() const { + return CountIf(test_info_list_, TestReportable); +} + +// Get the number of tests in this test case that should run. +int TestCase::test_to_run_count() const { + return CountIf(test_info_list_, ShouldRunTest); +} + +// Gets the number of all tests. +int TestCase::total_test_count() const { + return static_cast(test_info_list_.size()); +} + +// Creates a TestCase with the given name. +// +// Arguments: +// +// name: name of the test case +// a_type_param: the name of the test case's type parameter, or NULL if +// this is not a typed or a type-parameterized test case. +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +TestCase::TestCase(const char* a_name, const char* a_type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc) + : name_(a_name), + type_param_(a_type_param ? new std::string(a_type_param) : NULL), + set_up_tc_(set_up_tc), + tear_down_tc_(tear_down_tc), + should_run_(false), + elapsed_time_(0) { +} + +// Destructor of TestCase. +TestCase::~TestCase() { + // Deletes every Test in the collection. + ForEach(test_info_list_, internal::Delete); +} + +// Returns the i-th test among all the tests. i can range from 0 to +// total_test_count() - 1. If i is not in that range, returns NULL. +const TestInfo* TestCase::GetTestInfo(int i) const { + const int index = GetElementOr(test_indices_, i, -1); + return index < 0 ? NULL : test_info_list_[index]; +} + +// Returns the i-th test among all the tests. i can range from 0 to +// total_test_count() - 1. If i is not in that range, returns NULL. +TestInfo* TestCase::GetMutableTestInfo(int i) { + const int index = GetElementOr(test_indices_, i, -1); + return index < 0 ? NULL : test_info_list_[index]; +} + +// Adds a test to this test case. Will delete the test upon +// destruction of the TestCase object. +void TestCase::AddTestInfo(TestInfo * test_info) { + test_info_list_.push_back(test_info); + test_indices_.push_back(static_cast(test_indices_.size())); +} + +// Runs every test in this TestCase. +void TestCase::Run() { + if (!should_run_) return; + + internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); + impl->set_current_test_case(this); + + TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); + + repeater->OnTestCaseStart(*this); + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &TestCase::RunSetUpTestCase, "SetUpTestCase()"); + + const internal::TimeInMillis start = internal::GetTimeInMillis(); + for (int i = 0; i < total_test_count(); i++) { + GetMutableTestInfo(i)->Run(); + } + elapsed_time_ = internal::GetTimeInMillis() - start; + + impl->os_stack_trace_getter()->UponLeavingGTest(); + internal::HandleExceptionsInMethodIfSupported( + this, &TestCase::RunTearDownTestCase, "TearDownTestCase()"); + + repeater->OnTestCaseEnd(*this); + impl->set_current_test_case(NULL); +} + +// Clears the results of all tests in this test case. +void TestCase::ClearResult() { + ad_hoc_test_result_.Clear(); + ForEach(test_info_list_, TestInfo::ClearTestResult); +} + +// Shuffles the tests in this test case. +void TestCase::ShuffleTests(internal::Random* random) { + Shuffle(random, &test_indices_); +} + +// Restores the test order to before the first shuffle. +void TestCase::UnshuffleTests() { + for (size_t i = 0; i < test_indices_.size(); i++) { + test_indices_[i] = static_cast(i); + } +} + +// Formats a countable noun. Depending on its quantity, either the +// singular form or the plural form is used. e.g. +// +// FormatCountableNoun(1, "formula", "formuli") returns "1 formula". +// FormatCountableNoun(5, "book", "books") returns "5 books". +static std::string FormatCountableNoun(int count, + const char * singular_form, + const char * plural_form) { + return internal::StreamableToString(count) + " " + + (count == 1 ? singular_form : plural_form); +} + +// Formats the count of tests. +static std::string FormatTestCount(int test_count) { + return FormatCountableNoun(test_count, "test", "tests"); +} + +// Formats the count of test cases. +static std::string FormatTestCaseCount(int test_case_count) { + return FormatCountableNoun(test_case_count, "test case", "test cases"); +} + +// Converts a TestPartResult::Type enum to human-friendly string +// representation. Both kNonFatalFailure and kFatalFailure are translated +// to "Failure", as the user usually doesn't care about the difference +// between the two when viewing the test result. +static const char * TestPartResultTypeToString(TestPartResult::Type type) { + switch (type) { + case TestPartResult::kSuccess: + return "Success"; + + case TestPartResult::kNonFatalFailure: + case TestPartResult::kFatalFailure: +#ifdef _MSC_VER + return "error: "; +#else + return "Failure\n"; +#endif + default: + return "Unknown result type"; + } +} + +namespace internal { + +// Prints a TestPartResult to an std::string. +static std::string PrintTestPartResultToString( + const TestPartResult& test_part_result) { + return (Message() + << internal::FormatFileLocation(test_part_result.file_name(), + test_part_result.line_number()) + << " " << TestPartResultTypeToString(test_part_result.type()) + << test_part_result.message()).GetString(); +} + +// Prints a TestPartResult. +static void PrintTestPartResult(const TestPartResult& test_part_result) { + const std::string& result = + PrintTestPartResultToString(test_part_result); + printf("%s\n", result.c_str()); + fflush(stdout); + // If the test program runs in Visual Studio or a debugger, the + // following statements add the test part result message to the Output + // window such that the user can double-click on it to jump to the + // corresponding source code location; otherwise they do nothing. +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + // We don't call OutputDebugString*() on Windows Mobile, as printing + // to stdout is done by OutputDebugString() there already - we don't + // want the same message printed twice. + ::OutputDebugStringA(result.c_str()); + ::OutputDebugStringA("\n"); +#endif +} + +// class PrettyUnitTestResultPrinter + +enum GTestColor { + COLOR_DEFAULT, + COLOR_RED, + COLOR_GREEN, + COLOR_YELLOW +}; + +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + +// Returns the character attribute for the given color. +WORD GetColorAttribute(GTestColor color) { + switch (color) { + case COLOR_RED: return FOREGROUND_RED; + case COLOR_GREEN: return FOREGROUND_GREEN; + case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN; + default: return 0; + } +} + +#else + +// Returns the ANSI color code for the given color. COLOR_DEFAULT is +// an invalid input. +const char* GetAnsiColorCode(GTestColor color) { + switch (color) { + case COLOR_RED: return "1"; + case COLOR_GREEN: return "2"; + case COLOR_YELLOW: return "3"; + default: return NULL; + }; +} + +#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + +// Returns true iff Google Test should use colors in the output. +bool ShouldUseColor(bool stdout_is_tty) { + const char* const gtest_color = GTEST_FLAG(color).c_str(); + + if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) { +#if GTEST_OS_WINDOWS + // On Windows the TERM variable is usually not set, but the + // console there does support colors. + return stdout_is_tty; +#else + // On non-Windows platforms, we rely on the TERM variable. + const char* const term = posix::GetEnv("TERM"); + const bool term_supports_color = + String::CStringEquals(term, "xterm") || + String::CStringEquals(term, "xterm-color") || + String::CStringEquals(term, "xterm-256color") || + String::CStringEquals(term, "screen") || + String::CStringEquals(term, "screen-256color") || + String::CStringEquals(term, "linux") || + String::CStringEquals(term, "cygwin"); + return stdout_is_tty && term_supports_color; +#endif // GTEST_OS_WINDOWS + } + + return String::CaseInsensitiveCStringEquals(gtest_color, "yes") || + String::CaseInsensitiveCStringEquals(gtest_color, "true") || + String::CaseInsensitiveCStringEquals(gtest_color, "t") || + String::CStringEquals(gtest_color, "1"); + // We take "yes", "true", "t", and "1" as meaning "yes". If the + // value is neither one of these nor "auto", we treat it as "no" to + // be conservative. +} + +// Helpers for printing colored strings to stdout. Note that on Windows, we +// cannot simply emit special characters and have the terminal change colors. +// This routine must actually emit the characters rather than return a string +// that would be colored when printed, as can be done on Linux. +void ColoredPrintf(GTestColor color, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + +#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS || GTEST_OS_IOS + const bool use_color = false; +#else + static const bool in_color_mode = + ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0); + const bool use_color = in_color_mode && (color != COLOR_DEFAULT); +#endif // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS + // The '!= 0' comparison is necessary to satisfy MSVC 7.1. + + if (!use_color) { + vprintf(fmt, args); + va_end(args); + return; + } + +#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); + + // Gets the current text color. + CONSOLE_SCREEN_BUFFER_INFO buffer_info; + GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); + const WORD old_color_attrs = buffer_info.wAttributes; + + // We need to flush the stream buffers into the console before each + // SetConsoleTextAttribute call lest it affect the text that is already + // printed but has not yet reached the console. + fflush(stdout); + SetConsoleTextAttribute(stdout_handle, + GetColorAttribute(color) | FOREGROUND_INTENSITY); + vprintf(fmt, args); + + fflush(stdout); + // Restores the text color. + SetConsoleTextAttribute(stdout_handle, old_color_attrs); +#else + printf("\033[0;3%sm", GetAnsiColorCode(color)); + vprintf(fmt, args); + printf("\033[m"); // Resets the terminal to default. +#endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE + va_end(args); +} + +// Text printed in Google Test's text output and --gunit_list_tests +// output to label the type parameter and value parameter for a test. +static const char kTypeParamLabel[] = "TypeParam"; +static const char kValueParamLabel[] = "GetParam()"; + +void PrintFullTestCommentIfPresent(const TestInfo& test_info) { + const char* const type_param = test_info.type_param(); + const char* const value_param = test_info.value_param(); + + if (type_param != NULL || value_param != NULL) { + printf(", where "); + if (type_param != NULL) { + printf("%s = %s", kTypeParamLabel, type_param); + if (value_param != NULL) + printf(" and "); + } + if (value_param != NULL) { + printf("%s = %s", kValueParamLabel, value_param); + } + } +} + +// This class implements the TestEventListener interface. +// +// Class PrettyUnitTestResultPrinter is copyable. +class PrettyUnitTestResultPrinter : public TestEventListener { + public: + PrettyUnitTestResultPrinter() {} + static void PrintTestName(const char * test_case, const char * test) { + printf("%s.%s", test_case, test); + } + + // The following methods override what's in the TestEventListener class. + virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); + virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); + virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestCaseStart(const TestCase& test_case); + virtual void OnTestStart(const TestInfo& test_info); + virtual void OnTestPartResult(const TestPartResult& result); + virtual void OnTestEnd(const TestInfo& test_info); + virtual void OnTestCaseEnd(const TestCase& test_case); + virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); + virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {} + + private: + static void PrintFailedTests(const UnitTest& unit_test); +}; + + // Fired before each iteration of tests starts. +void PrettyUnitTestResultPrinter::OnTestIterationStart( + const UnitTest& unit_test, int iteration) { + if (GTEST_FLAG(repeat) != 1) + printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1); + + const char* const filter = GTEST_FLAG(filter).c_str(); + + // Prints the filter if it's not *. This reminds the user that some + // tests may be skipped. + if (!String::CStringEquals(filter, kUniversalFilter)) { + ColoredPrintf(COLOR_YELLOW, + "Note: %s filter = %s\n", GTEST_NAME_, filter); + } + + if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) { + const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1); + ColoredPrintf(COLOR_YELLOW, + "Note: This is test shard %d of %s.\n", + static_cast(shard_index) + 1, + internal::posix::GetEnv(kTestTotalShards)); + } + + if (GTEST_FLAG(shuffle)) { + ColoredPrintf(COLOR_YELLOW, + "Note: Randomizing tests' orders with a seed of %d .\n", + unit_test.random_seed()); + } + + ColoredPrintf(COLOR_GREEN, "[==========] "); + printf("Running %s from %s.\n", + FormatTestCount(unit_test.test_to_run_count()).c_str(), + FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart( + const UnitTest& /*unit_test*/) { + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("Global test environment set-up.\n"); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) { + const std::string counts = + FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("%s from %s", counts.c_str(), test_case.name()); + if (test_case.type_param() == NULL) { + printf("\n"); + } else { + printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param()); + } + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) { + ColoredPrintf(COLOR_GREEN, "[ RUN ] "); + PrintTestName(test_info.test_case_name(), test_info.name()); + if (test_info.type_param() != NULL) { + printf(" <%s>", test_info.type_param()); + } + if (test_info.value_param() != NULL) { + printf(" <%s>", test_info.value_param()); + } + printf("\n"); + fflush(stdout); +} + +// Called after an assertion failure. +void PrettyUnitTestResultPrinter::OnTestPartResult( + const TestPartResult& result) { + // If the test part succeeded, we don't need to do anything. + if (result.type() == TestPartResult::kSuccess) + return; + + // Print failure message from the assertion (e.g. expected this and got that). + PrintTestPartResult(result); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) { + if (test_info.result()->Passed()) { + ColoredPrintf(COLOR_GREEN, "[ OK ] "); + } else { + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + } + PrintTestName(test_info.test_case_name(), test_info.name()); + if (test_info.result()->Failed()) + PrintFullTestCommentIfPresent(test_info); + + if (GTEST_FLAG(print_time)) { + printf(" (%s ms)\n", internal::StreamableToString( + test_info.result()->elapsed_time()).c_str()); + } else { + printf("\n"); + } + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) { + if (!GTEST_FLAG(print_time)) return; + + const std::string counts = + FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("%s from %s (%s ms total)\n\n", + counts.c_str(), test_case.name(), + internal::StreamableToString(test_case.elapsed_time()).c_str()); + fflush(stdout); +} + +void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart( + const UnitTest& /*unit_test*/) { + ColoredPrintf(COLOR_GREEN, "[----------] "); + printf("Global test environment tear-down\n"); + fflush(stdout); +} + +// Internal helper for printing the list of failed tests. +void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) { + const int failed_test_count = unit_test.failed_test_count(); + if (failed_test_count == 0) { + return; + } + + for (int i = 0; i < unit_test.total_test_case_count(); ++i) { + const TestCase& test_case = *unit_test.GetTestCase(i); + if (!test_case.should_run() || (test_case.failed_test_count() == 0)) { + continue; + } + for (int j = 0; j < test_case.total_test_count(); ++j) { + const TestInfo& test_info = *test_case.GetTestInfo(j); + if (!test_info.should_run() || test_info.result()->Passed()) { + continue; + } + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + printf("%s.%s", test_case.name(), test_info.name()); + PrintFullTestCommentIfPresent(test_info); + printf("\n"); + } + } +} + +void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, + int /*iteration*/) { + ColoredPrintf(COLOR_GREEN, "[==========] "); + printf("%s from %s ran.", + FormatTestCount(unit_test.test_to_run_count()).c_str(), + FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); + if (GTEST_FLAG(print_time)) { + printf(" (%s ms total)", + internal::StreamableToString(unit_test.elapsed_time()).c_str()); + } + printf("\n"); + ColoredPrintf(COLOR_GREEN, "[ PASSED ] "); + printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str()); + + int num_failures = unit_test.failed_test_count(); + if (!unit_test.Passed()) { + const int failed_test_count = unit_test.failed_test_count(); + ColoredPrintf(COLOR_RED, "[ FAILED ] "); + printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str()); + PrintFailedTests(unit_test); + printf("\n%2d FAILED %s\n", num_failures, + num_failures == 1 ? "TEST" : "TESTS"); + } + + int num_disabled = unit_test.reportable_disabled_test_count(); + if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) { + if (!num_failures) { + printf("\n"); // Add a spacer if no FAILURE banner is displayed. + } + ColoredPrintf(COLOR_YELLOW, + " YOU HAVE %d DISABLED %s\n\n", + num_disabled, + num_disabled == 1 ? "TEST" : "TESTS"); + } + // Ensure that Google Test output is printed before, e.g., heapchecker output. + fflush(stdout); +} + +// End PrettyUnitTestResultPrinter + +// class TestEventRepeater +// +// This class forwards events to other event listeners. +class TestEventRepeater : public TestEventListener { + public: + TestEventRepeater() : forwarding_enabled_(true) {} + virtual ~TestEventRepeater(); + void Append(TestEventListener *listener); + TestEventListener* Release(TestEventListener* listener); + + // Controls whether events will be forwarded to listeners_. Set to false + // in death test child processes. + bool forwarding_enabled() const { return forwarding_enabled_; } + void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; } + + virtual void OnTestProgramStart(const UnitTest& unit_test); + virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); + virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); + virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test); + virtual void OnTestCaseStart(const TestCase& test_case); + virtual void OnTestStart(const TestInfo& test_info); + virtual void OnTestPartResult(const TestPartResult& result); + virtual void OnTestEnd(const TestInfo& test_info); + virtual void OnTestCaseEnd(const TestCase& test_case); + virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); + virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test); + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + virtual void OnTestProgramEnd(const UnitTest& unit_test); + + private: + // Controls whether events will be forwarded to listeners_. Set to false + // in death test child processes. + bool forwarding_enabled_; + // The list of listeners that receive events. + std::vector listeners_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater); +}; + +TestEventRepeater::~TestEventRepeater() { + ForEach(listeners_, Delete); +} + +void TestEventRepeater::Append(TestEventListener *listener) { + listeners_.push_back(listener); +} + +// TODO(vladl@google.com): Factor the search functionality into Vector::Find. +TestEventListener* TestEventRepeater::Release(TestEventListener *listener) { + for (size_t i = 0; i < listeners_.size(); ++i) { + if (listeners_[i] == listener) { + listeners_.erase(listeners_.begin() + i); + return listener; + } + } + + return NULL; +} + +// Since most methods are very similar, use macros to reduce boilerplate. +// This defines a member that forwards the call to all listeners. +#define GTEST_REPEATER_METHOD_(Name, Type) \ +void TestEventRepeater::Name(const Type& parameter) { \ + if (forwarding_enabled_) { \ + for (size_t i = 0; i < listeners_.size(); i++) { \ + listeners_[i]->Name(parameter); \ + } \ + } \ +} +// This defines a member that forwards the call to all listeners in reverse +// order. +#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \ +void TestEventRepeater::Name(const Type& parameter) { \ + if (forwarding_enabled_) { \ + for (int i = static_cast(listeners_.size()) - 1; i >= 0; i--) { \ + listeners_[i]->Name(parameter); \ + } \ + } \ +} + +GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest) +GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest) +GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase) +GTEST_REPEATER_METHOD_(OnTestStart, TestInfo) +GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult) +GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest) +GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest) +GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest) +GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo) +GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase) +GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest) + +#undef GTEST_REPEATER_METHOD_ +#undef GTEST_REVERSE_REPEATER_METHOD_ + +void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test, + int iteration) { + if (forwarding_enabled_) { + for (size_t i = 0; i < listeners_.size(); i++) { + listeners_[i]->OnTestIterationStart(unit_test, iteration); + } + } +} + +void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test, + int iteration) { + if (forwarding_enabled_) { + for (int i = static_cast(listeners_.size()) - 1; i >= 0; i--) { + listeners_[i]->OnTestIterationEnd(unit_test, iteration); + } + } +} + +// End TestEventRepeater + +// This class generates an XML output file. +class XmlUnitTestResultPrinter : public EmptyTestEventListener { + public: + explicit XmlUnitTestResultPrinter(const char* output_file); + + virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); + + private: + // Is c a whitespace character that is normalized to a space character + // when it appears in an XML attribute value? + static bool IsNormalizableWhitespace(char c) { + return c == 0x9 || c == 0xA || c == 0xD; + } + + // May c appear in a well-formed XML document? + static bool IsValidXmlCharacter(char c) { + return IsNormalizableWhitespace(c) || c >= 0x20; + } + + // Returns an XML-escaped copy of the input string str. If + // is_attribute is true, the text is meant to appear as an attribute + // value, and normalizable whitespace is preserved by replacing it + // with character references. + static std::string EscapeXml(const std::string& str, bool is_attribute); + + // Returns the given string with all characters invalid in XML removed. + static std::string RemoveInvalidXmlCharacters(const std::string& str); + + // Convenience wrapper around EscapeXml when str is an attribute value. + static std::string EscapeXmlAttribute(const std::string& str) { + return EscapeXml(str, true); + } + + // Convenience wrapper around EscapeXml when str is not an attribute value. + static std::string EscapeXmlText(const char* str) { + return EscapeXml(str, false); + } + + // Verifies that the given attribute belongs to the given element and + // streams the attribute as XML. + static void OutputXmlAttribute(std::ostream* stream, + const std::string& element_name, + const std::string& name, + const std::string& value); + + // Streams an XML CDATA section, escaping invalid CDATA sequences as needed. + static void OutputXmlCDataSection(::std::ostream* stream, const char* data); + + // Streams an XML representation of a TestInfo object. + static void OutputXmlTestInfo(::std::ostream* stream, + const char* test_case_name, + const TestInfo& test_info); + + // Prints an XML representation of a TestCase object + static void PrintXmlTestCase(::std::ostream* stream, + const TestCase& test_case); + + // Prints an XML summary of unit_test to output stream out. + static void PrintXmlUnitTest(::std::ostream* stream, + const UnitTest& unit_test); + + // Produces a string representing the test properties in a result as space + // delimited XML attributes based on the property key="value" pairs. + // When the std::string is not empty, it includes a space at the beginning, + // to delimit this attribute from prior attributes. + static std::string TestPropertiesAsXmlAttributes(const TestResult& result); + + // The output file. + const std::string output_file_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter); +}; + +// Creates a new XmlUnitTestResultPrinter. +XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file) + : output_file_(output_file) { + if (output_file_.c_str() == NULL || output_file_.empty()) { + fprintf(stderr, "XML output file may not be null\n"); + fflush(stderr); + exit(EXIT_FAILURE); + } +} + +// Called after the unit test ends. +void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, + int /*iteration*/) { + FILE* xmlout = NULL; + FilePath output_file(output_file_); + FilePath output_dir(output_file.RemoveFileName()); + + if (output_dir.CreateDirectoriesRecursively()) { + xmlout = posix::FOpen(output_file_.c_str(), "w"); + } + if (xmlout == NULL) { + // TODO(wan): report the reason of the failure. + // + // We don't do it for now as: + // + // 1. There is no urgent need for it. + // 2. It's a bit involved to make the errno variable thread-safe on + // all three operating systems (Linux, Windows, and Mac OS). + // 3. To interpret the meaning of errno in a thread-safe way, + // we need the strerror_r() function, which is not available on + // Windows. + fprintf(stderr, + "Unable to open file \"%s\"\n", + output_file_.c_str()); + fflush(stderr); + exit(EXIT_FAILURE); + } + std::stringstream stream; + PrintXmlUnitTest(&stream, unit_test); + fprintf(xmlout, "%s", StringStreamToString(&stream).c_str()); + fclose(xmlout); +} + +// Returns an XML-escaped copy of the input string str. If is_attribute +// is true, the text is meant to appear as an attribute value, and +// normalizable whitespace is preserved by replacing it with character +// references. +// +// Invalid XML characters in str, if any, are stripped from the output. +// It is expected that most, if not all, of the text processed by this +// module will consist of ordinary English text. +// If this module is ever modified to produce version 1.1 XML output, +// most invalid characters can be retained using character references. +// TODO(wan): It might be nice to have a minimally invasive, human-readable +// escaping scheme for invalid characters, rather than dropping them. +std::string XmlUnitTestResultPrinter::EscapeXml( + const std::string& str, bool is_attribute) { + Message m; + + for (size_t i = 0; i < str.size(); ++i) { + const char ch = str[i]; + switch (ch) { + case '<': + m << "<"; + break; + case '>': + m << ">"; + break; + case '&': + m << "&"; + break; + case '\'': + if (is_attribute) + m << "'"; + else + m << '\''; + break; + case '"': + if (is_attribute) + m << """; + else + m << '"'; + break; + default: + if (IsValidXmlCharacter(ch)) { + if (is_attribute && IsNormalizableWhitespace(ch)) + m << "&#x" << String::FormatByte(static_cast(ch)) + << ";"; + else + m << ch; + } + break; + } + } + + return m.GetString(); +} + +// Returns the given string with all characters invalid in XML removed. +// Currently invalid characters are dropped from the string. An +// alternative is to replace them with certain characters such as . or ?. +std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters( + const std::string& str) { + std::string output; + output.reserve(str.size()); + for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) + if (IsValidXmlCharacter(*it)) + output.push_back(*it); + + return output; +} + +// The following routines generate an XML representation of a UnitTest +// object. +// +// This is how Google Test concepts map to the DTD: +// +// <-- corresponds to a UnitTest object +// <-- corresponds to a TestCase object +// <-- corresponds to a TestInfo object +// ... +// ... +// ... +// <-- individual assertion failures +// +// +// + +// Formats the given time in milliseconds as seconds. +std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) { + ::std::stringstream ss; + ss << ms/1000.0; + return ss.str(); +} + +// Converts the given epoch time in milliseconds to a date string in the ISO +// 8601 format, without the timezone information. +std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) { + // Using non-reentrant version as localtime_r is not portable. + time_t seconds = static_cast(ms / 1000); +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4996) // Temporarily disables warning 4996 + // (function or variable may be unsafe). + const struct tm* const time_struct = localtime(&seconds); // NOLINT +# pragma warning(pop) // Restores the warning state again. +#else + const struct tm* const time_struct = localtime(&seconds); // NOLINT +#endif + if (time_struct == NULL) + return ""; // Invalid ms value + + // YYYY-MM-DDThh:mm:ss + return StreamableToString(time_struct->tm_year + 1900) + "-" + + String::FormatIntWidth2(time_struct->tm_mon + 1) + "-" + + String::FormatIntWidth2(time_struct->tm_mday) + "T" + + String::FormatIntWidth2(time_struct->tm_hour) + ":" + + String::FormatIntWidth2(time_struct->tm_min) + ":" + + String::FormatIntWidth2(time_struct->tm_sec); +} + +// Streams an XML CDATA section, escaping invalid CDATA sequences as needed. +void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream, + const char* data) { + const char* segment = data; + *stream << ""); + if (next_segment != NULL) { + stream->write( + segment, static_cast(next_segment - segment)); + *stream << "]]>]]>"); + } else { + *stream << segment; + break; + } + } + *stream << "]]>"; +} + +void XmlUnitTestResultPrinter::OutputXmlAttribute( + std::ostream* stream, + const std::string& element_name, + const std::string& name, + const std::string& value) { + const std::vector& allowed_names = + GetReservedAttributesForElement(element_name); + + GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) != + allowed_names.end()) + << "Attribute " << name << " is not allowed for element <" << element_name + << ">."; + + *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\""; +} + +// Prints an XML representation of a TestInfo object. +// TODO(wan): There is also value in printing properties with the plain printer. +void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream, + const char* test_case_name, + const TestInfo& test_info) { + const TestResult& result = *test_info.result(); + const std::string kTestcase = "testcase"; + + *stream << " \n"; + } + const string location = internal::FormatCompilerIndependentFileLocation( + part.file_name(), part.line_number()); + const string summary = location + "\n" + part.summary(); + *stream << " "; + const string detail = location + "\n" + part.message(); + OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str()); + *stream << "\n"; + } + } + + if (failures == 0) + *stream << " />\n"; + else + *stream << " \n"; +} + +// Prints an XML representation of a TestCase object +void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream, + const TestCase& test_case) { + const std::string kTestsuite = "testsuite"; + *stream << " <" << kTestsuite; + OutputXmlAttribute(stream, kTestsuite, "name", test_case.name()); + OutputXmlAttribute(stream, kTestsuite, "tests", + StreamableToString(test_case.reportable_test_count())); + OutputXmlAttribute(stream, kTestsuite, "failures", + StreamableToString(test_case.failed_test_count())); + OutputXmlAttribute( + stream, kTestsuite, "disabled", + StreamableToString(test_case.reportable_disabled_test_count())); + OutputXmlAttribute(stream, kTestsuite, "errors", "0"); + OutputXmlAttribute(stream, kTestsuite, "time", + FormatTimeInMillisAsSeconds(test_case.elapsed_time())); + *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result()) + << ">\n"; + + for (int i = 0; i < test_case.total_test_count(); ++i) { + if (test_case.GetTestInfo(i)->is_reportable()) + OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i)); + } + *stream << " \n"; +} + +// Prints an XML summary of unit_test to output stream out. +void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream, + const UnitTest& unit_test) { + const std::string kTestsuites = "testsuites"; + + *stream << "\n"; + *stream << "<" << kTestsuites; + + OutputXmlAttribute(stream, kTestsuites, "tests", + StreamableToString(unit_test.reportable_test_count())); + OutputXmlAttribute(stream, kTestsuites, "failures", + StreamableToString(unit_test.failed_test_count())); + OutputXmlAttribute( + stream, kTestsuites, "disabled", + StreamableToString(unit_test.reportable_disabled_test_count())); + OutputXmlAttribute(stream, kTestsuites, "errors", "0"); + OutputXmlAttribute( + stream, kTestsuites, "timestamp", + FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp())); + OutputXmlAttribute(stream, kTestsuites, "time", + FormatTimeInMillisAsSeconds(unit_test.elapsed_time())); + + if (GTEST_FLAG(shuffle)) { + OutputXmlAttribute(stream, kTestsuites, "random_seed", + StreamableToString(unit_test.random_seed())); + } + + *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result()); + + OutputXmlAttribute(stream, kTestsuites, "name", "AllTests"); + *stream << ">\n"; + + for (int i = 0; i < unit_test.total_test_case_count(); ++i) { + if (unit_test.GetTestCase(i)->reportable_test_count() > 0) + PrintXmlTestCase(stream, *unit_test.GetTestCase(i)); + } + *stream << "\n"; +} + +// Produces a string representing the test properties in a result as space +// delimited XML attributes based on the property key="value" pairs. +std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes( + const TestResult& result) { + Message attributes; + for (int i = 0; i < result.test_property_count(); ++i) { + const TestProperty& property = result.GetTestProperty(i); + attributes << " " << property.key() << "=" + << "\"" << EscapeXmlAttribute(property.value()) << "\""; + } + return attributes.GetString(); +} + +// End XmlUnitTestResultPrinter + +#if GTEST_CAN_STREAM_RESULTS_ + +// Checks if str contains '=', '&', '%' or '\n' characters. If yes, +// replaces them by "%xx" where xx is their hexadecimal value. For +// example, replaces "=" with "%3D". This algorithm is O(strlen(str)) +// in both time and space -- important as the input str may contain an +// arbitrarily long test failure message and stack trace. +string StreamingListener::UrlEncode(const char* str) { + string result; + result.reserve(strlen(str) + 1); + for (char ch = *str; ch != '\0'; ch = *++str) { + switch (ch) { + case '%': + case '=': + case '&': + case '\n': + result.append("%" + String::FormatByte(static_cast(ch))); + break; + default: + result.push_back(ch); + break; + } + } + return result; +} + +void StreamingListener::SocketWriter::MakeConnection() { + GTEST_CHECK_(sockfd_ == -1) + << "MakeConnection() can't be called when there is already a connection."; + + addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; // To allow both IPv4 and IPv6 addresses. + hints.ai_socktype = SOCK_STREAM; + addrinfo* servinfo = NULL; + + // Use the getaddrinfo() to get a linked list of IP addresses for + // the given host name. + const int error_num = getaddrinfo( + host_name_.c_str(), port_num_.c_str(), &hints, &servinfo); + if (error_num != 0) { + GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: " + << gai_strerror(error_num); + } + + // Loop through all the results and connect to the first we can. + for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL; + cur_addr = cur_addr->ai_next) { + sockfd_ = socket( + cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol); + if (sockfd_ != -1) { + // Connect the client socket to the server socket. + if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) { + close(sockfd_); + sockfd_ = -1; + } + } + } + + freeaddrinfo(servinfo); // all done with this structure + + if (sockfd_ == -1) { + GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to " + << host_name_ << ":" << port_num_; + } +} + +// End of class Streaming Listener +#endif // GTEST_CAN_STREAM_RESULTS__ + +// Class ScopedTrace + +// Pushes the given source file location and message onto a per-thread +// trace stack maintained by Google Test. +ScopedTrace::ScopedTrace(const char* file, int line, const Message& message) + GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { + TraceInfo trace; + trace.file = file; + trace.line = line; + trace.message = message.GetString(); + + UnitTest::GetInstance()->PushGTestTrace(trace); +} + +// Pops the info pushed by the c'tor. +ScopedTrace::~ScopedTrace() + GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { + UnitTest::GetInstance()->PopGTestTrace(); +} + + +// class OsStackTraceGetter + +// Returns the current OS stack trace as an std::string. Parameters: +// +// max_depth - the maximum number of stack frames to be included +// in the trace. +// skip_count - the number of top frames to be skipped; doesn't count +// against max_depth. +// +string OsStackTraceGetter::CurrentStackTrace(int /* max_depth */, + int /* skip_count */) + GTEST_LOCK_EXCLUDED_(mutex_) { + return ""; +} + +void OsStackTraceGetter::UponLeavingGTest() + GTEST_LOCK_EXCLUDED_(mutex_) { +} + +const char* const +OsStackTraceGetter::kElidedFramesMarker = + "... " GTEST_NAME_ " internal frames ..."; + +// A helper class that creates the premature-exit file in its +// constructor and deletes the file in its destructor. +class ScopedPrematureExitFile { + public: + explicit ScopedPrematureExitFile(const char* premature_exit_filepath) + : premature_exit_filepath_(premature_exit_filepath) { + // If a path to the premature-exit file is specified... + if (premature_exit_filepath != NULL && *premature_exit_filepath != '\0') { + // create the file with a single "0" character in it. I/O + // errors are ignored as there's nothing better we can do and we + // don't want to fail the test because of this. + FILE* pfile = posix::FOpen(premature_exit_filepath, "w"); + fwrite("0", 1, 1, pfile); + fclose(pfile); + } + } + + ~ScopedPrematureExitFile() { + if (premature_exit_filepath_ != NULL && *premature_exit_filepath_ != '\0') { + (void)remove(premature_exit_filepath_); + } + } + + private: + const char* const premature_exit_filepath_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile); +}; + +} // namespace internal + +// class TestEventListeners + +TestEventListeners::TestEventListeners() + : repeater_(new internal::TestEventRepeater()), + default_result_printer_(NULL), + default_xml_generator_(NULL) { +} + +TestEventListeners::~TestEventListeners() { delete repeater_; } + +// Returns the standard listener responsible for the default console +// output. Can be removed from the listeners list to shut down default +// console output. Note that removing this object from the listener list +// with Release transfers its ownership to the user. +void TestEventListeners::Append(TestEventListener* listener) { + repeater_->Append(listener); +} + +// Removes the given event listener from the list and returns it. It then +// becomes the caller's responsibility to delete the listener. Returns +// NULL if the listener is not found in the list. +TestEventListener* TestEventListeners::Release(TestEventListener* listener) { + if (listener == default_result_printer_) + default_result_printer_ = NULL; + else if (listener == default_xml_generator_) + default_xml_generator_ = NULL; + return repeater_->Release(listener); +} + +// Returns repeater that broadcasts the TestEventListener events to all +// subscribers. +TestEventListener* TestEventListeners::repeater() { return repeater_; } + +// Sets the default_result_printer attribute to the provided listener. +// The listener is also added to the listener list and previous +// default_result_printer is removed from it and deleted. The listener can +// also be NULL in which case it will not be added to the list. Does +// nothing if the previous and the current listener objects are the same. +void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) { + if (default_result_printer_ != listener) { + // It is an error to pass this method a listener that is already in the + // list. + delete Release(default_result_printer_); + default_result_printer_ = listener; + if (listener != NULL) + Append(listener); + } +} + +// Sets the default_xml_generator attribute to the provided listener. The +// listener is also added to the listener list and previous +// default_xml_generator is removed from it and deleted. The listener can +// also be NULL in which case it will not be added to the list. Does +// nothing if the previous and the current listener objects are the same. +void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) { + if (default_xml_generator_ != listener) { + // It is an error to pass this method a listener that is already in the + // list. + delete Release(default_xml_generator_); + default_xml_generator_ = listener; + if (listener != NULL) + Append(listener); + } +} + +// Controls whether events will be forwarded by the repeater to the +// listeners in the list. +bool TestEventListeners::EventForwardingEnabled() const { + return repeater_->forwarding_enabled(); +} + +void TestEventListeners::SuppressEventForwarding() { + repeater_->set_forwarding_enabled(false); +} + +// class UnitTest + +// Gets the singleton UnitTest object. The first time this method is +// called, a UnitTest object is constructed and returned. Consecutive +// calls will return the same object. +// +// We don't protect this under mutex_ as a user is not supposed to +// call this before main() starts, from which point on the return +// value will never change. +UnitTest* UnitTest::GetInstance() { + // When compiled with MSVC 7.1 in optimized mode, destroying the + // UnitTest object upon exiting the program messes up the exit code, + // causing successful tests to appear failed. We have to use a + // different implementation in this case to bypass the compiler bug. + // This implementation makes the compiler happy, at the cost of + // leaking the UnitTest object. + + // CodeGear C++Builder insists on a public destructor for the + // default implementation. Use this implementation to keep good OO + // design with private destructor. + +#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) + static UnitTest* const instance = new UnitTest; + return instance; +#else + static UnitTest instance; + return &instance; +#endif // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) +} + +// Gets the number of successful test cases. +int UnitTest::successful_test_case_count() const { + return impl()->successful_test_case_count(); +} + +// Gets the number of failed test cases. +int UnitTest::failed_test_case_count() const { + return impl()->failed_test_case_count(); +} + +// Gets the number of all test cases. +int UnitTest::total_test_case_count() const { + return impl()->total_test_case_count(); +} + +// Gets the number of all test cases that contain at least one test +// that should run. +int UnitTest::test_case_to_run_count() const { + return impl()->test_case_to_run_count(); +} + +// Gets the number of successful tests. +int UnitTest::successful_test_count() const { + return impl()->successful_test_count(); +} + +// Gets the number of failed tests. +int UnitTest::failed_test_count() const { return impl()->failed_test_count(); } + +// Gets the number of disabled tests that will be reported in the XML report. +int UnitTest::reportable_disabled_test_count() const { + return impl()->reportable_disabled_test_count(); +} + +// Gets the number of disabled tests. +int UnitTest::disabled_test_count() const { + return impl()->disabled_test_count(); +} + +// Gets the number of tests to be printed in the XML report. +int UnitTest::reportable_test_count() const { + return impl()->reportable_test_count(); +} + +// Gets the number of all tests. +int UnitTest::total_test_count() const { return impl()->total_test_count(); } + +// Gets the number of tests that should run. +int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); } + +// Gets the time of the test program start, in ms from the start of the +// UNIX epoch. +internal::TimeInMillis UnitTest::start_timestamp() const { + return impl()->start_timestamp(); +} + +// Gets the elapsed time, in milliseconds. +internal::TimeInMillis UnitTest::elapsed_time() const { + return impl()->elapsed_time(); +} + +// Returns true iff the unit test passed (i.e. all test cases passed). +bool UnitTest::Passed() const { return impl()->Passed(); } + +// Returns true iff the unit test failed (i.e. some test case failed +// or something outside of all tests failed). +bool UnitTest::Failed() const { return impl()->Failed(); } + +// Gets the i-th test case among all the test cases. i can range from 0 to +// total_test_case_count() - 1. If i is not in that range, returns NULL. +const TestCase* UnitTest::GetTestCase(int i) const { + return impl()->GetTestCase(i); +} + +// Returns the TestResult containing information on test failures and +// properties logged outside of individual test cases. +const TestResult& UnitTest::ad_hoc_test_result() const { + return *impl()->ad_hoc_test_result(); +} + +// Gets the i-th test case among all the test cases. i can range from 0 to +// total_test_case_count() - 1. If i is not in that range, returns NULL. +TestCase* UnitTest::GetMutableTestCase(int i) { + return impl()->GetMutableTestCase(i); +} + +// Returns the list of event listeners that can be used to track events +// inside Google Test. +TestEventListeners& UnitTest::listeners() { + return *impl()->listeners(); +} + +// Registers and returns a global test environment. When a test +// program is run, all global test environments will be set-up in the +// order they were registered. After all tests in the program have +// finished, all global test environments will be torn-down in the +// *reverse* order they were registered. +// +// The UnitTest object takes ownership of the given environment. +// +// We don't protect this under mutex_, as we only support calling it +// from the main thread. +Environment* UnitTest::AddEnvironment(Environment* env) { + if (env == NULL) { + return NULL; + } + + impl_->environments().push_back(env); + return env; +} + +// Adds a TestPartResult to the current TestResult object. All Google Test +// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call +// this to report their results. The user code should use the +// assertion macros instead of calling this directly. +void UnitTest::AddTestPartResult( + TestPartResult::Type result_type, + const char* file_name, + int line_number, + const std::string& message, + const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_) { + Message msg; + msg << message; + + internal::MutexLock lock(&mutex_); + if (impl_->gtest_trace_stack().size() > 0) { + msg << "\n" << GTEST_NAME_ << " trace:"; + + for (int i = static_cast(impl_->gtest_trace_stack().size()); + i > 0; --i) { + const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1]; + msg << "\n" << internal::FormatFileLocation(trace.file, trace.line) + << " " << trace.message; + } + } + + if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) { + msg << internal::kStackTraceMarker << os_stack_trace; + } + + const TestPartResult result = + TestPartResult(result_type, file_name, line_number, + msg.GetString().c_str()); + impl_->GetTestPartResultReporterForCurrentThread()-> + ReportTestPartResult(result); + + if (result_type != TestPartResult::kSuccess) { + // gtest_break_on_failure takes precedence over + // gtest_throw_on_failure. This allows a user to set the latter + // in the code (perhaps in order to use Google Test assertions + // with another testing framework) and specify the former on the + // command line for debugging. + if (GTEST_FLAG(break_on_failure)) { +#if GTEST_OS_WINDOWS + // Using DebugBreak on Windows allows gtest to still break into a debugger + // when a failure happens and both the --gtest_break_on_failure and + // the --gtest_catch_exceptions flags are specified. + DebugBreak(); +#else + // Dereference NULL through a volatile pointer to prevent the compiler + // from removing. We use this rather than abort() or __builtin_trap() for + // portability: Symbian doesn't implement abort() well, and some debuggers + // don't correctly trap abort(). + *static_cast(NULL) = 1; +#endif // GTEST_OS_WINDOWS + } else if (GTEST_FLAG(throw_on_failure)) { +#if GTEST_HAS_EXCEPTIONS + // coverity[fun_call_w_exception]: uncaught exceptions cause nonzero exit anyway, so don't warn. + //throw internal::GoogleTestFailureException(result); +#else + // We cannot call abort() as it generates a pop-up in debug mode + // that cannot be suppressed in VC 7.1 or below. + exit(1); +#endif + } + } +} + +// Adds a TestProperty to the current TestResult object when invoked from +// inside a test, to current TestCase's ad_hoc_test_result_ when invoked +// from SetUpTestCase or TearDownTestCase, or to the global property set +// when invoked elsewhere. If the result already contains a property with +// the same key, the value will be updated. +void UnitTest::RecordProperty(const std::string& key, + const std::string& value) { + impl_->RecordProperty(TestProperty(key, value)); +} + +// Runs all tests in this UnitTest object and prints the result. +// Returns 0 if successful, or 1 otherwise. +// +// We don't protect this under mutex_, as we only support calling it +// from the main thread. +int UnitTest::Run() { + const bool in_death_test_child_process = + internal::GTEST_FLAG(internal_run_death_test).length() > 0; + + // Google Test implements this protocol for catching that a test + // program exits before returning control to Google Test: + // + // 1. Upon start, Google Test creates a file whose absolute path + // is specified by the environment variable + // TEST_PREMATURE_EXIT_FILE. + // 2. When Google Test has finished its work, it deletes the file. + // + // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before + // running a Google-Test-based test program and check the existence + // of the file at the end of the test execution to see if it has + // exited prematurely. + + // If we are in the child process of a death test, don't + // create/delete the premature exit file, as doing so is unnecessary + // and will confuse the parent process. Otherwise, create/delete + // the file upon entering/leaving this function. If the program + // somehow exits before this function has a chance to return, the + // premature-exit file will be left undeleted, causing a test runner + // that understands the premature-exit-file protocol to report the + // test as having failed. + const internal::ScopedPrematureExitFile premature_exit_file( + in_death_test_child_process ? + NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE")); + + // Captures the value of GTEST_FLAG(catch_exceptions). This value will be + // used for the duration of the program. + impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions)); + +#if GTEST_HAS_SEH + // Either the user wants Google Test to catch exceptions thrown by the + // tests or this is executing in the context of death test child + // process. In either case the user does not want to see pop-up dialogs + // about crashes - they are expected. + if (impl()->catch_exceptions() || in_death_test_child_process) { +# if !GTEST_OS_WINDOWS_MOBILE + // SetErrorMode doesn't exist on CE. + SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT | + SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX); +# endif // !GTEST_OS_WINDOWS_MOBILE + +# if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE + // Death test children can be terminated with _abort(). On Windows, + // _abort() can show a dialog with a warning message. This forces the + // abort message to go to stderr instead. + _set_error_mode(_OUT_TO_STDERR); +# endif + +# if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE + // In the debug version, Visual Studio pops up a separate dialog + // offering a choice to debug the aborted program. We need to suppress + // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement + // executed. Google Test will notify the user of any unexpected + // failure via stderr. + // + // VC++ doesn't define _set_abort_behavior() prior to the version 8.0. + // Users of prior VC versions shall suffer the agony and pain of + // clicking through the countless debug dialogs. + // TODO(vladl@google.com): find a way to suppress the abort dialog() in the + // debug mode when compiled with VC 7.1 or lower. + if (!GTEST_FLAG(break_on_failure)) + _set_abort_behavior( + 0x0, // Clear the following flags: + _WRITE_ABORT_MSG | _CALL_REPORTFAULT); // pop-up window, core dump. +# endif + } +#endif // GTEST_HAS_SEH + + return internal::HandleExceptionsInMethodIfSupported( + impl(), + &internal::UnitTestImpl::RunAllTests, + "auxiliary test code (environments or event listeners)") ? 0 : 1; +} + +// Returns the working directory when the first TEST() or TEST_F() was +// executed. +const char* UnitTest::original_working_dir() const { + return impl_->original_working_dir_.c_str(); +} + +// Returns the TestCase object for the test that's currently running, +// or NULL if no test is running. +const TestCase* UnitTest::current_test_case() const + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + return impl_->current_test_case(); +} + +// Returns the TestInfo object for the test that's currently running, +// or NULL if no test is running. +const TestInfo* UnitTest::current_test_info() const + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + return impl_->current_test_info(); +} + +// Returns the random seed used at the start of the current test run. +int UnitTest::random_seed() const { return impl_->random_seed(); } + +#if GTEST_HAS_PARAM_TEST +// Returns ParameterizedTestCaseRegistry object used to keep track of +// value-parameterized tests and instantiate and register them. +internal::ParameterizedTestCaseRegistry& + UnitTest::parameterized_test_registry() + GTEST_LOCK_EXCLUDED_(mutex_) { + return impl_->parameterized_test_registry(); +} +#endif // GTEST_HAS_PARAM_TEST + +// Creates an empty UnitTest. +UnitTest::UnitTest() { + impl_ = new internal::UnitTestImpl(this); +} + +// Destructor of UnitTest. +UnitTest::~UnitTest() { + delete impl_; +} + +// Pushes a trace defined by SCOPED_TRACE() on to the per-thread +// Google Test trace stack. +void UnitTest::PushGTestTrace(const internal::TraceInfo& trace) + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + impl_->gtest_trace_stack().push_back(trace); +} + +// Pops a trace from the per-thread Google Test trace stack. +void UnitTest::PopGTestTrace() + GTEST_LOCK_EXCLUDED_(mutex_) { + internal::MutexLock lock(&mutex_); + impl_->gtest_trace_stack().pop_back(); +} + +namespace internal { + +UnitTestImpl::UnitTestImpl(UnitTest* parent) + : parent_(parent), +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4355) // Temporarily disables warning 4355 + // (using this in initializer). + default_global_test_part_result_reporter_(this), + default_per_thread_test_part_result_reporter_(this), +# pragma warning(pop) // Restores the warning state again. +#else + default_global_test_part_result_reporter_(this), + default_per_thread_test_part_result_reporter_(this), +#endif // _MSC_VER + global_test_part_result_repoter_( + &default_global_test_part_result_reporter_), + per_thread_test_part_result_reporter_( + &default_per_thread_test_part_result_reporter_), +#if GTEST_HAS_PARAM_TEST + parameterized_test_registry_(), + parameterized_tests_registered_(false), +#endif // GTEST_HAS_PARAM_TEST + last_death_test_case_(-1), + current_test_case_(NULL), + current_test_info_(NULL), + ad_hoc_test_result_(), + os_stack_trace_getter_(NULL), + post_flag_parse_init_performed_(false), + random_seed_(0), // Will be overridden by the flag before first use. + random_(0), // Will be reseeded before first use. + start_timestamp_(0), + elapsed_time_(0), +#if GTEST_HAS_DEATH_TEST + death_test_factory_(new DefaultDeathTestFactory), +#endif + // Will be overridden by the flag before first use. + catch_exceptions_(false) { + listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter); +} + +UnitTestImpl::~UnitTestImpl() { + // Deletes every TestCase. + ForEach(test_cases_, internal::Delete); + + // Deletes every Environment. + ForEach(environments_, internal::Delete); + + delete os_stack_trace_getter_; +} + +// Adds a TestProperty to the current TestResult object when invoked in a +// context of a test, to current test case's ad_hoc_test_result when invoke +// from SetUpTestCase/TearDownTestCase, or to the global property set +// otherwise. If the result already contains a property with the same key, +// the value will be updated. +void UnitTestImpl::RecordProperty(const TestProperty& test_property) { + std::string xml_element; + TestResult* test_result; // TestResult appropriate for property recording. + + if (current_test_info_ != NULL) { + xml_element = "testcase"; + test_result = &(current_test_info_->result_); + } else if (current_test_case_ != NULL) { + xml_element = "testsuite"; + test_result = &(current_test_case_->ad_hoc_test_result_); + } else { + xml_element = "testsuites"; + test_result = &ad_hoc_test_result_; + } + test_result->RecordProperty(xml_element, test_property); +} + +#if GTEST_HAS_DEATH_TEST +// Disables event forwarding if the control is currently in a death test +// subprocess. Must not be called before InitGoogleTest. +void UnitTestImpl::SuppressTestEventsIfInSubprocess() { + if (internal_run_death_test_flag_.get() != NULL) + listeners()->SuppressEventForwarding(); +} +#endif // GTEST_HAS_DEATH_TEST + +// Initializes event listeners performing XML output as specified by +// UnitTestOptions. Must not be called before InitGoogleTest. +void UnitTestImpl::ConfigureXmlOutput() { + const std::string& output_format = UnitTestOptions::GetOutputFormat(); + if (output_format == "xml") { + listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter( + UnitTestOptions::GetAbsolutePathToOutputFile().c_str())); + } else if (output_format != "") { + printf("WARNING: unrecognized output format \"%s\" ignored.\n", + output_format.c_str()); + fflush(stdout); + } +} + +#if GTEST_CAN_STREAM_RESULTS_ +// Initializes event listeners for streaming test results in string form. +// Must not be called before InitGoogleTest. +void UnitTestImpl::ConfigureStreamingOutput() { + const std::string& target = GTEST_FLAG(stream_result_to); + if (!target.empty()) { + const size_t pos = target.find(':'); + if (pos != std::string::npos) { + listeners()->Append(new StreamingListener(target.substr(0, pos), + target.substr(pos+1))); + } else { + printf("WARNING: unrecognized streaming target \"%s\" ignored.\n", + target.c_str()); + fflush(stdout); + } + } +} +#endif // GTEST_CAN_STREAM_RESULTS_ + +// Performs initialization dependent upon flag values obtained in +// ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to +// ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest +// this function is also called from RunAllTests. Since this function can be +// called more than once, it has to be idempotent. +void UnitTestImpl::PostFlagParsingInit() { + // Ensures that this function does not execute more than once. + if (!post_flag_parse_init_performed_) { + post_flag_parse_init_performed_ = true; + +#if GTEST_HAS_DEATH_TEST + InitDeathTestSubprocessControlInfo(); + SuppressTestEventsIfInSubprocess(); +#endif // GTEST_HAS_DEATH_TEST + + // Registers parameterized tests. This makes parameterized tests + // available to the UnitTest reflection API without running + // RUN_ALL_TESTS. + RegisterParameterizedTests(); + + // Configures listeners for XML output. This makes it possible for users + // to shut down the default XML output before invoking RUN_ALL_TESTS. + ConfigureXmlOutput(); + +#if GTEST_CAN_STREAM_RESULTS_ + // Configures listeners for streaming test results to the specified server. + ConfigureStreamingOutput(); +#endif // GTEST_CAN_STREAM_RESULTS_ + } +} + +// A predicate that checks the name of a TestCase against a known +// value. +// +// This is used for implementation of the UnitTest class only. We put +// it in the anonymous namespace to prevent polluting the outer +// namespace. +// +// TestCaseNameIs is copyable. +class TestCaseNameIs { + public: + // Constructor. + explicit TestCaseNameIs(const std::string& name) + : name_(name) {} + + // Returns true iff the name of test_case matches name_. + bool operator()(const TestCase* test_case) const { + return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0; + } + + private: + std::string name_; +}; + +// Finds and returns a TestCase with the given name. If one doesn't +// exist, creates one and returns it. It's the CALLER'S +// RESPONSIBILITY to ensure that this function is only called WHEN THE +// TESTS ARE NOT SHUFFLED. +// +// Arguments: +// +// test_case_name: name of the test case +// type_param: the name of the test case's type parameter, or NULL if +// this is not a typed or a type-parameterized test case. +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +TestCase* UnitTestImpl::GetTestCase(const char* test_case_name, + const char* type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc) { + // Can we find a TestCase with the given name? + const std::vector::const_iterator test_case = + std::find_if(test_cases_.begin(), test_cases_.end(), + TestCaseNameIs(test_case_name)); + + if (test_case != test_cases_.end()) + return *test_case; + + // No. Let's create one. + TestCase* const new_test_case = + new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc); + + // Is this a death test case? + if (internal::UnitTestOptions::MatchesFilter(test_case_name, + kDeathTestCaseFilter)) { + // Yes. Inserts the test case after the last death test case + // defined so far. This only works when the test cases haven't + // been shuffled. Otherwise we may end up running a death test + // after a non-death test. + ++last_death_test_case_; + test_cases_.insert(test_cases_.begin() + last_death_test_case_, + new_test_case); + } else { + // No. Appends to the end of the list. + test_cases_.push_back(new_test_case); + } + + test_case_indices_.push_back(static_cast(test_case_indices_.size())); + return new_test_case; +} + +// Helpers for setting up / tearing down the given environment. They +// are for use in the ForEach() function. +static void SetUpEnvironment(Environment* env) { env->SetUp(); } +static void TearDownEnvironment(Environment* env) { env->TearDown(); } + +// Runs all tests in this UnitTest object, prints the result, and +// returns true if all tests are successful. If any exception is +// thrown during a test, the test is considered to be failed, but the +// rest of the tests will still be run. +// +// When parameterized tests are enabled, it expands and registers +// parameterized tests first in RegisterParameterizedTests(). +// All other functions called from RunAllTests() may safely assume that +// parameterized tests are ready to be counted and run. +bool UnitTestImpl::RunAllTests() { + // Makes sure InitGoogleTest() was called. + if (!GTestIsInitialized()) { + printf("%s", + "\nThis test program did NOT call ::testing::InitGoogleTest " + "before calling RUN_ALL_TESTS(). Please fix it.\n"); + return false; + } + + // Do not run any test if the --help flag was specified. + if (g_help_flag) + return true; + + // Repeats the call to the post-flag parsing initialization in case the + // user didn't call InitGoogleTest. + PostFlagParsingInit(); + + // Even if sharding is not on, test runners may want to use the + // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding + // protocol. + internal::WriteToShardStatusFileIfNeeded(); + + // True iff we are in a subprocess for running a thread-safe-style + // death test. + bool in_subprocess_for_death_test = false; + +#if GTEST_HAS_DEATH_TEST + in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL); +#endif // GTEST_HAS_DEATH_TEST + + const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex, + in_subprocess_for_death_test); + + // Compares the full test names with the filter to decide which + // tests to run. + const bool has_tests_to_run = FilterTests(should_shard + ? HONOR_SHARDING_PROTOCOL + : IGNORE_SHARDING_PROTOCOL) > 0; + + // Lists the tests and exits if the --gtest_list_tests flag was specified. + if (GTEST_FLAG(list_tests)) { + // This must be called *after* FilterTests() has been called. + ListTestsMatchingFilter(); + return true; + } + + random_seed_ = GTEST_FLAG(shuffle) ? + GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0; + + // True iff at least one test has failed. + bool failed = false; + + TestEventListener* repeater = listeners()->repeater(); + + start_timestamp_ = GetTimeInMillis(); + repeater->OnTestProgramStart(*parent_); + + // How many times to repeat the tests? We don't want to repeat them + // when we are inside the subprocess of a death test. + const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat); + // Repeats forever if the repeat count is negative. + const bool forever = repeat < 0; + for (int i = 0; forever || i != repeat; i++) { + // We want to preserve failures generated by ad-hoc test + // assertions executed before RUN_ALL_TESTS(). + ClearNonAdHocTestResult(); + + const TimeInMillis start = GetTimeInMillis(); + + // Shuffles test cases and tests if requested. + if (has_tests_to_run && GTEST_FLAG(shuffle)) { + random()->Reseed(random_seed_); + // This should be done before calling OnTestIterationStart(), + // such that a test event listener can see the actual test order + // in the event. + ShuffleTests(); + } + + // Tells the unit test event listeners that the tests are about to start. + repeater->OnTestIterationStart(*parent_, i); + + // Runs each test case if there is at least one test to run. + if (has_tests_to_run) { + // Sets up all environments beforehand. + repeater->OnEnvironmentsSetUpStart(*parent_); + ForEach(environments_, SetUpEnvironment); + repeater->OnEnvironmentsSetUpEnd(*parent_); + + // Runs the tests only if there was no fatal failure during global + // set-up. + if (!Test::HasFatalFailure()) { + for (int test_index = 0; test_index < total_test_case_count(); + test_index++) { + GetMutableTestCase(test_index)->Run(); + } + } + + // Tears down all environments in reverse order afterwards. + repeater->OnEnvironmentsTearDownStart(*parent_); + std::for_each(environments_.rbegin(), environments_.rend(), + TearDownEnvironment); + repeater->OnEnvironmentsTearDownEnd(*parent_); + } + + elapsed_time_ = GetTimeInMillis() - start; + + // Tells the unit test event listener that the tests have just finished. + repeater->OnTestIterationEnd(*parent_, i); + + // Gets the result and clears it. + if (!Passed()) { + failed = true; + } + + // Restores the original test order after the iteration. This + // allows the user to quickly repro a failure that happens in the + // N-th iteration without repeating the first (N - 1) iterations. + // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in + // case the user somehow changes the value of the flag somewhere + // (it's always safe to unshuffle the tests). + UnshuffleTests(); + + if (GTEST_FLAG(shuffle)) { + // Picks a new random seed for each iteration. + random_seed_ = GetNextRandomSeed(random_seed_); + } + } + + repeater->OnTestProgramEnd(*parent_); + + return !failed; +} + +// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file +// if the variable is present. If a file already exists at this location, this +// function will write over it. If the variable is present, but the file cannot +// be created, prints an error and exits. +void WriteToShardStatusFileIfNeeded() { + const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile); + if (test_shard_file != NULL) { + FILE* const file = posix::FOpen(test_shard_file, "w"); + if (file == NULL) { + ColoredPrintf(COLOR_RED, + "Could not write to the test shard status file \"%s\" " + "specified by the %s environment variable.\n", + test_shard_file, kTestShardStatusFile); + fflush(stdout); + exit(EXIT_FAILURE); + } + fclose(file); + } +} + +// Checks whether sharding is enabled by examining the relevant +// environment variable values. If the variables are present, +// but inconsistent (i.e., shard_index >= total_shards), prints +// an error and exits. If in_subprocess_for_death_test, sharding is +// disabled because it must only be applied to the original test +// process. Otherwise, we could filter out death tests we intended to execute. +bool ShouldShard(const char* total_shards_env, + const char* shard_index_env, + bool in_subprocess_for_death_test) { + if (in_subprocess_for_death_test) { + return false; + } + + const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1); + const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1); + + if (total_shards == -1 && shard_index == -1) { + return false; + } else if (total_shards == -1 && shard_index != -1) { + const Message msg = Message() + << "Invalid environment variables: you have " + << kTestShardIndex << " = " << shard_index + << ", but have left " << kTestTotalShards << " unset.\n"; + ColoredPrintf(COLOR_RED, msg.GetString().c_str()); + fflush(stdout); + exit(EXIT_FAILURE); + } else if (total_shards != -1 && shard_index == -1) { + const Message msg = Message() + << "Invalid environment variables: you have " + << kTestTotalShards << " = " << total_shards + << ", but have left " << kTestShardIndex << " unset.\n"; + ColoredPrintf(COLOR_RED, msg.GetString().c_str()); + fflush(stdout); + exit(EXIT_FAILURE); + } else if (shard_index < 0 || shard_index >= total_shards) { + const Message msg = Message() + << "Invalid environment variables: we require 0 <= " + << kTestShardIndex << " < " << kTestTotalShards + << ", but you have " << kTestShardIndex << "=" << shard_index + << ", " << kTestTotalShards << "=" << total_shards << ".\n"; + ColoredPrintf(COLOR_RED, msg.GetString().c_str()); + fflush(stdout); + exit(EXIT_FAILURE); + } + + return total_shards > 1; +} + +// Parses the environment variable var as an Int32. If it is unset, +// returns default_val. If it is not an Int32, prints an error +// and aborts. +Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) { + const char* str_val = posix::GetEnv(var); + if (str_val == NULL) { + return default_val; + } + + Int32 result; + if (!ParseInt32(Message() << "The value of environment variable " << var, + str_val, &result)) { + exit(EXIT_FAILURE); + } + return result; +} + +// Given the total number of shards, the shard index, and the test id, +// returns true iff the test should be run on this shard. The test id is +// some arbitrary but unique non-negative integer assigned to each test +// method. Assumes that 0 <= shard_index < total_shards. +bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) { + return (test_id % total_shards) == shard_index; +} + +// Compares the name of each test with the user-specified filter to +// decide whether the test should be run, then records the result in +// each TestCase and TestInfo object. +// If shard_tests == true, further filters tests based on sharding +// variables in the environment - see +// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide. +// Returns the number of tests that should run. +int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) { + const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ? + Int32FromEnvOrDie(kTestTotalShards, -1) : -1; + const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ? + Int32FromEnvOrDie(kTestShardIndex, -1) : -1; + + // num_runnable_tests are the number of tests that will + // run across all shards (i.e., match filter and are not disabled). + // num_selected_tests are the number of tests to be run on + // this shard. + int num_runnable_tests = 0; + int num_selected_tests = 0; + for (size_t i = 0; i < test_cases_.size(); i++) { + TestCase* const test_case = test_cases_[i]; + const std::string &test_case_name = test_case->name(); + test_case->set_should_run(false); + + for (size_t j = 0; j < test_case->test_info_list().size(); j++) { + TestInfo* const test_info = test_case->test_info_list()[j]; + const std::string test_name(test_info->name()); + // A test is disabled if test case name or test name matches + // kDisableTestFilter. + const bool is_disabled = + internal::UnitTestOptions::MatchesFilter(test_case_name, + kDisableTestFilter) || + internal::UnitTestOptions::MatchesFilter(test_name, + kDisableTestFilter); + test_info->is_disabled_ = is_disabled; + + const bool matches_filter = + internal::UnitTestOptions::FilterMatchesTest(test_case_name, + test_name); + test_info->matches_filter_ = matches_filter; + + const bool is_runnable = + (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) && + matches_filter; + + const bool is_selected = is_runnable && + (shard_tests == IGNORE_SHARDING_PROTOCOL || + ShouldRunTestOnShard(total_shards, shard_index, + num_runnable_tests)); + + num_runnable_tests += is_runnable; + num_selected_tests += is_selected; + + test_info->should_run_ = is_selected; + test_case->set_should_run(test_case->should_run() || is_selected); + } + } + return num_selected_tests; +} + +// Prints the given C-string on a single line by replacing all '\n' +// characters with string "\\n". If the output takes more than +// max_length characters, only prints the first max_length characters +// and "...". +static void PrintOnOneLine(const char* str, int max_length) { + if (str != NULL) { + for (int i = 0; *str != '\0'; ++str) { + if (i >= max_length) { + printf("..."); + break; + } + if (*str == '\n') { + printf("\\n"); + i += 2; + } else { + printf("%c", *str); + ++i; + } + } + } +} + +// Prints the names of the tests matching the user-specified filter flag. +void UnitTestImpl::ListTestsMatchingFilter() { + // Print at most this many characters for each type/value parameter. + const int kMaxParamLength = 250; + + for (size_t i = 0; i < test_cases_.size(); i++) { + const TestCase* const test_case = test_cases_[i]; + bool printed_test_case_name = false; + + for (size_t j = 0; j < test_case->test_info_list().size(); j++) { + const TestInfo* const test_info = + test_case->test_info_list()[j]; + if (test_info->matches_filter_) { + if (!printed_test_case_name) { + printed_test_case_name = true; + printf("%s.", test_case->name()); + if (test_case->type_param() != NULL) { + printf(" # %s = ", kTypeParamLabel); + // We print the type parameter on a single line to make + // the output easy to parse by a program. + PrintOnOneLine(test_case->type_param(), kMaxParamLength); + } + printf("\n"); + } + printf(" %s", test_info->name()); + if (test_info->value_param() != NULL) { + printf(" # %s = ", kValueParamLabel); + // We print the value parameter on a single line to make the + // output easy to parse by a program. + PrintOnOneLine(test_info->value_param(), kMaxParamLength); + } + printf("\n"); + } + } + } + fflush(stdout); +} + +// Sets the OS stack trace getter. +// +// Does nothing if the input and the current OS stack trace getter are +// the same; otherwise, deletes the old getter and makes the input the +// current getter. +void UnitTestImpl::set_os_stack_trace_getter( + OsStackTraceGetterInterface* getter) { + if (os_stack_trace_getter_ != getter) { + delete os_stack_trace_getter_; + os_stack_trace_getter_ = getter; + } +} + +// Returns the current OS stack trace getter if it is not NULL; +// otherwise, creates an OsStackTraceGetter, makes it the current +// getter, and returns it. +OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() { + if (os_stack_trace_getter_ == NULL) { + os_stack_trace_getter_ = new OsStackTraceGetter; + } + + return os_stack_trace_getter_; +} + +// Returns the TestResult for the test that's currently running, or +// the TestResult for the ad hoc test if no test is running. +TestResult* UnitTestImpl::current_test_result() { + return current_test_info_ ? + &(current_test_info_->result_) : &ad_hoc_test_result_; +} + +// Shuffles all test cases, and the tests within each test case, +// making sure that death tests are still run first. +void UnitTestImpl::ShuffleTests() { + // Shuffles the death test cases. + ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_); + + // Shuffles the non-death test cases. + ShuffleRange(random(), last_death_test_case_ + 1, + static_cast(test_cases_.size()), &test_case_indices_); + + // Shuffles the tests inside each test case. + for (size_t i = 0; i < test_cases_.size(); i++) { + test_cases_[i]->ShuffleTests(random()); + } +} + +// Restores the test cases and tests to their order before the first shuffle. +void UnitTestImpl::UnshuffleTests() { + for (size_t i = 0; i < test_cases_.size(); i++) { + // Unshuffles the tests in each test case. + test_cases_[i]->UnshuffleTests(); + // Resets the index of each test case. + test_case_indices_[i] = static_cast(i); + } +} + +// Returns the current OS stack trace as an std::string. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in +// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't. +std::string GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/, + int skip_count) { + // We pass skip_count + 1 to skip this wrapper function in addition + // to what the user really wants to skip. + return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1); +} + +// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to +// suppress unreachable code warnings. +namespace { +class ClassUniqueToAlwaysTrue {}; +} + +bool IsTrue(bool condition) { return condition; } + +bool AlwaysTrue() { +#if GTEST_HAS_EXCEPTIONS + // This condition is always false so AlwaysTrue() never actually throws, + // but it makes the compiler think that it may throw. + if (IsTrue(false)) + throw ClassUniqueToAlwaysTrue(); +#endif // GTEST_HAS_EXCEPTIONS + return true; +} + +// If *pstr starts with the given prefix, modifies *pstr to be right +// past the prefix and returns true; otherwise leaves *pstr unchanged +// and returns false. None of pstr, *pstr, and prefix can be NULL. +bool SkipPrefix(const char* prefix, const char** pstr) { + const size_t prefix_len = strlen(prefix); + if (strncmp(*pstr, prefix, prefix_len) == 0) { + *pstr += prefix_len; + return true; + } + return false; +} + +// Parses a string as a command line flag. The string should have +// the format "--flag=value". When def_optional is true, the "=value" +// part can be omitted. +// +// Returns the value of the flag, or NULL if the parsing failed. +const char* ParseFlagValue(const char* str, + const char* flag, + bool def_optional) { + // str and flag must not be NULL. + if (str == NULL || flag == NULL) return NULL; + + // The flag must start with "--" followed by GTEST_FLAG_PREFIX_. + const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag; + const size_t flag_len = flag_str.length(); + if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL; + + // Skips the flag name. + const char* flag_end = str + flag_len; + + // When def_optional is true, it's OK to not have a "=value" part. + if (def_optional && (flag_end[0] == '\0')) { + return flag_end; + } + + // If def_optional is true and there are more characters after the + // flag name, or if def_optional is false, there must be a '=' after + // the flag name. + if (flag_end[0] != '=') return NULL; + + // Returns the string after "=". + return flag_end + 1; +} + +// Parses a string for a bool flag, in the form of either +// "--flag=value" or "--flag". +// +// In the former case, the value is taken as true as long as it does +// not start with '0', 'f', or 'F'. +// +// In the latter case, the value is taken as true. +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseBoolFlag(const char* str, const char* flag, bool* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, true); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Converts the string value to a bool. + *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F'); + return true; +} + +// Parses a string for an Int32 flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseInt32Flag(const char* str, const char* flag, Int32* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Sets *value to the value of the flag. + return ParseInt32(Message() << "The value of flag --" << flag, + value_str, value); +} + +// Parses a string for a string flag, in the form of +// "--flag=value". +// +// On success, stores the value of the flag in *value, and returns +// true. On failure, returns false without changing *value. +bool ParseStringFlag(const char* str, const char* flag, std::string* value) { + // Gets the value of the flag as a string. + const char* const value_str = ParseFlagValue(str, flag, false); + + // Aborts if the parsing failed. + if (value_str == NULL) return false; + + // Sets *value to the value of the flag. + *value = value_str; + return true; +} + +// Determines whether a string has a prefix that Google Test uses for its +// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_. +// If Google Test detects that a command line flag has its prefix but is not +// recognized, it will print its help message. Flags starting with +// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test +// internal flags and do not trigger the help message. +static bool HasGoogleTestFlagPrefix(const char* str) { + return (SkipPrefix("--", &str) || + SkipPrefix("-", &str) || + SkipPrefix("/", &str)) && + !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) && + (SkipPrefix(GTEST_FLAG_PREFIX_, &str) || + SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str)); +} + +// Prints a string containing code-encoded text. The following escape +// sequences can be used in the string to control the text color: +// +// @@ prints a single '@' character. +// @R changes the color to red. +// @G changes the color to green. +// @Y changes the color to yellow. +// @D changes to the default terminal text color. +// +// TODO(wan@google.com): Write tests for this once we add stdout +// capturing to Google Test. +static void PrintColorEncoded(const char* str) { + GTestColor color = COLOR_DEFAULT; // The current color. + + // Conceptually, we split the string into segments divided by escape + // sequences. Then we print one segment at a time. At the end of + // each iteration, the str pointer advances to the beginning of the + // next segment. + for (;;) { + const char* p = strchr(str, '@'); + if (p == NULL) { + ColoredPrintf(color, "%s", str); + return; + } + + ColoredPrintf(color, "%s", std::string(str, p).c_str()); + + const char ch = p[1]; + str = p + 2; + if (ch == '@') { + ColoredPrintf(color, "@"); + } else if (ch == 'D') { + color = COLOR_DEFAULT; + } else if (ch == 'R') { + color = COLOR_RED; + } else if (ch == 'G') { + color = COLOR_GREEN; + } else if (ch == 'Y') { + color = COLOR_YELLOW; + } else { + --str; + } + } +} + +static const char kColorEncodedHelpMessage[] = +"This program contains tests written using " GTEST_NAME_ ". You can use the\n" +"following command line flags to control its behavior:\n" +"\n" +"Test Selection:\n" +" @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n" +" List the names of all tests instead of running them. The name of\n" +" TEST(Foo, Bar) is \"Foo.Bar\".\n" +" @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS" + "[@G-@YNEGATIVE_PATTERNS]@D\n" +" Run only the tests whose name matches one of the positive patterns but\n" +" none of the negative patterns. '?' matches any single character; '*'\n" +" matches any substring; ':' separates two patterns.\n" +" @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n" +" Run all disabled tests too.\n" +"\n" +"Test Execution:\n" +" @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n" +" Run the tests repeatedly; use a negative count to repeat forever.\n" +" @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n" +" Randomize tests' orders on every iteration.\n" +" @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n" +" Random number seed to use for shuffling test orders (between 1 and\n" +" 99999, or 0 to use a seed based on the current time).\n" +"\n" +"Test Output:\n" +" @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n" +" Enable/disable colored output. The default is @Gauto@D.\n" +" -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n" +" Don't print the elapsed time of each test.\n" +" @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G" + GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n" +" Generate an XML report in the given directory or with the given file\n" +" name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n" +#if GTEST_CAN_STREAM_RESULTS_ +" @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n" +" Stream test results to the given server.\n" +#endif // GTEST_CAN_STREAM_RESULTS_ +"\n" +"Assertion Behavior:\n" +#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS +" @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n" +" Set the default death test style.\n" +#endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS +" @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n" +" Turn assertion failures into debugger break-points.\n" +" @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n" +" Turn assertion failures into C++ exceptions.\n" +" @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n" +" Do not report exceptions as test failures. Instead, allow them\n" +" to crash the program or throw a pop-up (on Windows).\n" +"\n" +"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set " + "the corresponding\n" +"environment variable of a flag (all letters in upper-case). For example, to\n" +"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_ + "color=no@D or set\n" +"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n" +"\n" +"For more information, please read the " GTEST_NAME_ " documentation at\n" +"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n" +"(not one in your own code or tests), please report it to\n" +"@G<" GTEST_DEV_EMAIL_ ">@D.\n"; + +// Parses the command line for Google Test flags, without initializing +// other parts of Google Test. The type parameter CharType can be +// instantiated to either char or wchar_t. +template +void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) { + for (int i = 1; i < *argc; i++) { + const std::string arg_string = StreamableToString(argv[i]); + const char* const arg = arg_string.c_str(); + + using internal::ParseBoolFlag; + using internal::ParseInt32Flag; + using internal::ParseStringFlag; + + // Do we see a Google Test flag? + if (ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag, + >EST_FLAG(also_run_disabled_tests)) || + ParseBoolFlag(arg, kBreakOnFailureFlag, + >EST_FLAG(break_on_failure)) || + ParseBoolFlag(arg, kCatchExceptionsFlag, + >EST_FLAG(catch_exceptions)) || + ParseStringFlag(arg, kColorFlag, >EST_FLAG(color)) || + ParseStringFlag(arg, kDeathTestStyleFlag, + >EST_FLAG(death_test_style)) || + ParseBoolFlag(arg, kDeathTestUseFork, + >EST_FLAG(death_test_use_fork)) || + ParseStringFlag(arg, kFilterFlag, >EST_FLAG(filter)) || + ParseStringFlag(arg, kInternalRunDeathTestFlag, + >EST_FLAG(internal_run_death_test)) || + ParseBoolFlag(arg, kListTestsFlag, >EST_FLAG(list_tests)) || + ParseStringFlag(arg, kOutputFlag, >EST_FLAG(output)) || + ParseBoolFlag(arg, kPrintTimeFlag, >EST_FLAG(print_time)) || + ParseInt32Flag(arg, kRandomSeedFlag, >EST_FLAG(random_seed)) || + ParseInt32Flag(arg, kRepeatFlag, >EST_FLAG(repeat)) || + ParseBoolFlag(arg, kShuffleFlag, >EST_FLAG(shuffle)) || + ParseInt32Flag(arg, kStackTraceDepthFlag, + >EST_FLAG(stack_trace_depth)) || + ParseStringFlag(arg, kStreamResultToFlag, + >EST_FLAG(stream_result_to)) || + ParseBoolFlag(arg, kThrowOnFailureFlag, + >EST_FLAG(throw_on_failure)) + ) { + // Yes. Shift the remainder of the argv list left by one. Note + // that argv has (*argc + 1) elements, the last one always being + // NULL. The following loop moves the trailing NULL element as + // well. + for (int j = i; j != *argc; j++) { + argv[j] = argv[j + 1]; + } + + // Decrements the argument count. + (*argc)--; + + // We also need to decrement the iterator as we just removed + // an element. + i--; + } else if (arg_string == "--help" || arg_string == "-h" || + arg_string == "-?" || arg_string == "/?" || + HasGoogleTestFlagPrefix(arg)) { + // Both help flag and unrecognized Google Test flags (excluding + // internal ones) trigger help display. + g_help_flag = true; + } + } + + if (g_help_flag) { + // We print the help here instead of in RUN_ALL_TESTS(), as the + // latter may not be called at all if the user is using Google + // Test with another testing framework. + PrintColorEncoded(kColorEncodedHelpMessage); + } +} + +// Parses the command line for Google Test flags, without initializing +// other parts of Google Test. +void ParseGoogleTestFlagsOnly(int* argc, char** argv) { + ParseGoogleTestFlagsOnlyImpl(argc, argv); +} +void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) { + ParseGoogleTestFlagsOnlyImpl(argc, argv); +} + +// The internal implementation of InitGoogleTest(). +// +// The type parameter CharType can be instantiated to either char or +// wchar_t. +template +void InitGoogleTestImpl(int* argc, CharType** argv) { + g_init_gtest_count++; + + // We don't want to run the initialization code twice. + if (g_init_gtest_count != 1) return; + + if (*argc <= 0) return; + + internal::g_executable_path = internal::StreamableToString(argv[0]); + +#if GTEST_HAS_DEATH_TEST + + g_argvs.clear(); + for (int i = 0; i != *argc; i++) { + g_argvs.push_back(StreamableToString(argv[i])); + } + +#endif // GTEST_HAS_DEATH_TEST + + ParseGoogleTestFlagsOnly(argc, argv); + GetUnitTestImpl()->PostFlagParsingInit(); +} + +} // namespace internal + +// Initializes Google Test. This must be called before calling +// RUN_ALL_TESTS(). In particular, it parses a command line for the +// flags that Google Test recognizes. Whenever a Google Test flag is +// seen, it is removed from argv, and *argc is decremented. +// +// No value is returned. Instead, the Google Test flag variables are +// updated. +// +// Calling the function for the second time has no user-visible effect. +void InitGoogleTest(int* argc, char** argv) { + internal::InitGoogleTestImpl(argc, argv); +} + +// This overloaded version can be used in Windows programs compiled in +// UNICODE mode. +void InitGoogleTest(int* argc, wchar_t** argv) { + internal::InitGoogleTestImpl(argc, argv); +} + +} // namespace testing +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev) +// +// This file implements death tests. + + +#if GTEST_HAS_DEATH_TEST + +# if GTEST_OS_MAC +# include +# endif // GTEST_OS_MAC + +# include +# include +# include + +# if GTEST_OS_LINUX +# include +# endif // GTEST_OS_LINUX + +# include + +# if GTEST_OS_WINDOWS +# include +# else +# include +# include +# endif // GTEST_OS_WINDOWS + +# if GTEST_OS_QNX +# include +# endif // GTEST_OS_QNX + +#endif // GTEST_HAS_DEATH_TEST + + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#undef GTEST_IMPLEMENTATION_ + +namespace testing { + +// Constants. + +// The default death test style. +static const char kDefaultDeathTestStyle[] = "fast"; + +GTEST_DEFINE_string_( + death_test_style, + internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle), + "Indicates how to run a death test in a forked child process: " + "\"threadsafe\" (child process re-executes the test binary " + "from the beginning, running only the specific death test) or " + "\"fast\" (child process runs the death test immediately " + "after forking)."); + +GTEST_DEFINE_bool_( + death_test_use_fork, + internal::BoolFromGTestEnv("death_test_use_fork", false), + "Instructs to use fork()/_exit() instead of clone() in death tests. " + "Ignored and always uses fork() on POSIX systems where clone() is not " + "implemented. Useful when running under valgrind or similar tools if " + "those do not support clone(). Valgrind 3.3.1 will just fail if " + "it sees an unsupported combination of clone() flags. " + "It is not recommended to use this flag w/o valgrind though it will " + "work in 99% of the cases. Once valgrind is fixed, this flag will " + "most likely be removed."); + +namespace internal { +GTEST_DEFINE_string_( + internal_run_death_test, "", + "Indicates the file, line number, temporal index of " + "the single death test to run, and a file descriptor to " + "which a success code may be sent, all separated by " + "the '|' characters. This flag is specified if and only if the current " + "process is a sub-process launched for running a thread-safe " + "death test. FOR INTERNAL USE ONLY."); +} // namespace internal + +#if GTEST_HAS_DEATH_TEST + +namespace internal { + +// Valid only for fast death tests. Indicates the code is running in the +// child process of a fast style death test. +static bool g_in_fast_death_test_child = false; + +// Returns a Boolean value indicating whether the caller is currently +// executing in the context of the death test child process. Tools such as +// Valgrind heap checkers may need this to modify their behavior in death +// tests. IMPORTANT: This is an internal utility. Using it may break the +// implementation of death tests. User code MUST NOT use it. +bool InDeathTestChild() { +# if GTEST_OS_WINDOWS + + // On Windows, death tests are thread-safe regardless of the value of the + // death_test_style flag. + return !GTEST_FLAG(internal_run_death_test).empty(); + +# else + + if (GTEST_FLAG(death_test_style) == "threadsafe") + return !GTEST_FLAG(internal_run_death_test).empty(); + else + return g_in_fast_death_test_child; +#endif +} + +} // namespace internal + +// ExitedWithCode constructor. +ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) { +} + +// ExitedWithCode function-call operator. +bool ExitedWithCode::operator()(int exit_status) const { +# if GTEST_OS_WINDOWS + + return exit_status == exit_code_; + +# else + + return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_; + +# endif // GTEST_OS_WINDOWS +} + +# if !GTEST_OS_WINDOWS +// KilledBySignal constructor. +KilledBySignal::KilledBySignal(int signum) : signum_(signum) { +} + +// KilledBySignal function-call operator. +bool KilledBySignal::operator()(int exit_status) const { + return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_; +} +# endif // !GTEST_OS_WINDOWS + +namespace internal { + +// Utilities needed for death tests. + +// Generates a textual description of a given exit code, in the format +// specified by wait(2). +static std::string ExitSummary(int exit_code) { + Message m; + +# if GTEST_OS_WINDOWS + + m << "Exited with exit status " << exit_code; + +# else + + if (WIFEXITED(exit_code)) { + m << "Exited with exit status " << WEXITSTATUS(exit_code); + } else if (WIFSIGNALED(exit_code)) { + m << "Terminated by signal " << WTERMSIG(exit_code); + } +# ifdef WCOREDUMP + if (WCOREDUMP(exit_code)) { + m << " (core dumped)"; + } +# endif +# endif // GTEST_OS_WINDOWS + + return m.GetString(); +} + +// Returns true if exit_status describes a process that was terminated +// by a signal, or exited normally with a nonzero exit code. +bool ExitedUnsuccessfully(int exit_status) { + return !ExitedWithCode(0)(exit_status); +} + +# if !GTEST_OS_WINDOWS +// Generates a textual failure message when a death test finds more than +// one thread running, or cannot determine the number of threads, prior +// to executing the given statement. It is the responsibility of the +// caller not to pass a thread_count of 1. +static std::string DeathTestThreadWarning(size_t thread_count) { + Message msg; + msg << "Death tests use fork(), which is unsafe particularly" + << " in a threaded context. For this test, " << GTEST_NAME_ << " "; + if (thread_count == 0) + msg << "couldn't detect the number of threads."; + else + msg << "detected " << thread_count << " threads."; + return msg.GetString(); +} +# endif // !GTEST_OS_WINDOWS + +// Flag characters for reporting a death test that did not die. +static const char kDeathTestLived = 'L'; +static const char kDeathTestReturned = 'R'; +static const char kDeathTestThrew = 'T'; +static const char kDeathTestInternalError = 'I'; + +// An enumeration describing all of the possible ways that a death test can +// conclude. DIED means that the process died while executing the test +// code; LIVED means that process lived beyond the end of the test code; +// RETURNED means that the test statement attempted to execute a return +// statement, which is not allowed; THREW means that the test statement +// returned control by throwing an exception. IN_PROGRESS means the test +// has not yet concluded. +// TODO(vladl@google.com): Unify names and possibly values for +// AbortReason, DeathTestOutcome, and flag characters above. +enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW }; + +// Routine for aborting the program which is safe to call from an +// exec-style death test child process, in which case the error +// message is propagated back to the parent process. Otherwise, the +// message is simply printed to stderr. In either case, the program +// then exits with status 1. +void DeathTestAbort(const std::string& message) { + // On a POSIX system, this function may be called from a threadsafe-style + // death test child process, which operates on a very small stack. Use + // the heap for any additional non-minuscule memory requirements. + const InternalRunDeathTestFlag* const flag = + GetUnitTestImpl()->internal_run_death_test_flag(); + if (flag != NULL) { + FILE* parent = posix::FDOpen(flag->write_fd(), "w"); + fputc(kDeathTestInternalError, parent); + fprintf(parent, "%s", message.c_str()); + fflush(parent); + _exit(1); + } else { + fprintf(stderr, "%s", message.c_str()); + fflush(stderr); + posix::Abort(); + } +} + +// A replacement for CHECK that calls DeathTestAbort if the assertion +// fails. +# define GTEST_DEATH_TEST_CHECK_(expression) \ + do { \ + if (!::testing::internal::IsTrue(expression)) { \ + DeathTestAbort( \ + ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + + ::testing::internal::StreamableToString(__LINE__) + ": " \ + + #expression); \ + } \ + } while (::testing::internal::AlwaysFalse()) + +// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for +// evaluating any system call that fulfills two conditions: it must return +// -1 on failure, and set errno to EINTR when it is interrupted and +// should be tried again. The macro expands to a loop that repeatedly +// evaluates the expression as long as it evaluates to -1 and sets +// errno to EINTR. If the expression evaluates to -1 but errno is +// something other than EINTR, DeathTestAbort is called. +# define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \ + do { \ + int gtest_retval; \ + do { \ + gtest_retval = (expression); \ + } while (gtest_retval == -1 && errno == EINTR); \ + if (gtest_retval == -1) { \ + DeathTestAbort( \ + ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + + ::testing::internal::StreamableToString(__LINE__) + ": " \ + + #expression + " != -1"); \ + } \ + } while (::testing::internal::AlwaysFalse()) + +// Returns the message describing the last system error in errno. +std::string GetLastErrnoDescription() { + return errno == 0 ? "" : posix::StrError(errno); +} + +// This is called from a death test parent process to read a failure +// message from the death test child process and log it with the FATAL +// severity. On Windows, the message is read from a pipe handle. On other +// platforms, it is read from a file descriptor. +static void FailFromInternalError(int fd) { + Message error; + char buffer[256]; + int num_read; + + do { + while ((num_read = posix::Read(fd, buffer, 255)) > 0) { + buffer[num_read] = '\0'; + error << buffer; + } + } while (num_read == -1 && errno == EINTR); + + if (num_read == 0) { + GTEST_LOG_(FATAL) << error.GetString(); + } else { + const int last_error = errno; + GTEST_LOG_(FATAL) << "Error while reading death test internal: " + << GetLastErrnoDescription() << " [" << last_error << "]"; + } +} + +// Death test constructor. Increments the running death test count +// for the current test. +DeathTest::DeathTest() { + TestInfo* const info = GetUnitTestImpl()->current_test_info(); + if (info == NULL) { + DeathTestAbort("Cannot run a death test outside of a TEST or " + "TEST_F construct"); + } +} + +// Creates and returns a death test by dispatching to the current +// death test factory. +bool DeathTest::Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test) { + return GetUnitTestImpl()->death_test_factory()->Create( + statement, regex, file, line, test); +} + +const char* DeathTest::LastMessage() { + return last_death_test_message_.c_str(); +} + +void DeathTest::set_last_death_test_message(const std::string& message) { + last_death_test_message_ = message; +} + +std::string DeathTest::last_death_test_message_; + +// Provides cross platform implementation for some death functionality. +class DeathTestImpl : public DeathTest { + protected: + DeathTestImpl(const char* a_statement, const RE* a_regex) + : statement_(a_statement), + regex_(a_regex), + spawned_(false), + status_(-1), + outcome_(IN_PROGRESS), + read_fd_(-1), + write_fd_(-1) {} + + // read_fd_ is expected to be closed and cleared by a derived class. + ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); } + + void Abort(AbortReason reason); + virtual bool Passed(bool status_ok); + + const char* statement() const { return statement_; } + const RE* regex() const { return regex_; } + bool spawned() const { return spawned_; } + void set_spawned(bool is_spawned) { spawned_ = is_spawned; } + int status() const { return status_; } + void set_status(int a_status) { status_ = a_status; } + DeathTestOutcome outcome() const { return outcome_; } + void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; } + int read_fd() const { return read_fd_; } + void set_read_fd(int fd) { read_fd_ = fd; } + int write_fd() const { return write_fd_; } + void set_write_fd(int fd) { write_fd_ = fd; } + + // Called in the parent process only. Reads the result code of the death + // test child process via a pipe, interprets it to set the outcome_ + // member, and closes read_fd_. Outputs diagnostics and terminates in + // case of unexpected codes. + void ReadAndInterpretStatusByte(); + + private: + // The textual content of the code this object is testing. This class + // doesn't own this string and should not attempt to delete it. + const char* const statement_; + // The regular expression which test output must match. DeathTestImpl + // doesn't own this object and should not attempt to delete it. + const RE* const regex_; + // True if the death test child process has been successfully spawned. + bool spawned_; + // The exit status of the child process. + int status_; + // How the death test concluded. + DeathTestOutcome outcome_; + // Descriptor to the read end of the pipe to the child process. It is + // always -1 in the child process. The child keeps its write end of the + // pipe in write_fd_. + int read_fd_; + // Descriptor to the child's write end of the pipe to the parent process. + // It is always -1 in the parent process. The parent keeps its end of the + // pipe in read_fd_. + int write_fd_; +}; + +// Called in the parent process only. Reads the result code of the death +// test child process via a pipe, interprets it to set the outcome_ +// member, and closes read_fd_. Outputs diagnostics and terminates in +// case of unexpected codes. +void DeathTestImpl::ReadAndInterpretStatusByte() { + char flag; + int bytes_read; + + // The read() here blocks until data is available (signifying the + // failure of the death test) or until the pipe is closed (signifying + // its success), so it's okay to call this in the parent before + // the child process has exited. + do { + bytes_read = posix::Read(read_fd(), &flag, 1); + } while (bytes_read == -1 && errno == EINTR); + + if (bytes_read == 0) { + set_outcome(DIED); + } else if (bytes_read == 1) { + switch (flag) { + case kDeathTestReturned: + set_outcome(RETURNED); + break; + case kDeathTestThrew: + set_outcome(THREW); + break; + case kDeathTestLived: + set_outcome(LIVED); + break; + case kDeathTestInternalError: + FailFromInternalError(read_fd()); // Does not return. + break; + default: + GTEST_LOG_(FATAL) << "Death test child process reported " + << "unexpected status byte (" + << static_cast(flag) << ")"; + } + } else { + GTEST_LOG_(FATAL) << "Read from death test child process failed: " + << GetLastErrnoDescription(); + } + GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd())); + set_read_fd(-1); +} + +// Signals that the death test code which should have exited, didn't. +// Should be called only in a death test child process. +// Writes a status byte to the child's status file descriptor, then +// calls _exit(1). +void DeathTestImpl::Abort(AbortReason reason) { + // The parent process considers the death test to be a failure if + // it finds any data in our pipe. So, here we write a single flag byte + // to the pipe, then exit. + const char status_ch = + reason == TEST_DID_NOT_DIE ? kDeathTestLived : + reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned; + + GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1)); + // We are leaking the descriptor here because on some platforms (i.e., + // when built as Windows DLL), destructors of global objects will still + // run after calling _exit(). On such systems, write_fd_ will be + // indirectly closed from the destructor of UnitTestImpl, causing double + // close if it is also closed here. On debug configurations, double close + // may assert. As there are no in-process buffers to flush here, we are + // relying on the OS to close the descriptor after the process terminates + // when the destructors are not run. + _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash) +} + +// Returns an indented copy of stderr output for a death test. +// This makes distinguishing death test output lines from regular log lines +// much easier. +static ::std::string FormatDeathTestOutput(const ::std::string& output) { + ::std::string ret; + for (size_t at = 0; ; ) { + const size_t line_end = output.find('\n', at); + ret += "[ DEATH ] "; + if (line_end == ::std::string::npos) { + ret += output.substr(at); + break; + } + ret += output.substr(at, line_end + 1 - at); + at = line_end + 1; + } + return ret; +} + +// Assesses the success or failure of a death test, using both private +// members which have previously been set, and one argument: +// +// Private data members: +// outcome: An enumeration describing how the death test +// concluded: DIED, LIVED, THREW, or RETURNED. The death test +// fails in the latter three cases. +// status: The exit status of the child process. On *nix, it is in the +// in the format specified by wait(2). On Windows, this is the +// value supplied to the ExitProcess() API or a numeric code +// of the exception that terminated the program. +// regex: A regular expression object to be applied to +// the test's captured standard error output; the death test +// fails if it does not match. +// +// Argument: +// status_ok: true if exit_status is acceptable in the context of +// this particular death test, which fails if it is false +// +// Returns true iff all of the above conditions are met. Otherwise, the +// first failing condition, in the order given above, is the one that is +// reported. Also sets the last death test message string. +bool DeathTestImpl::Passed(bool status_ok) { + if (!spawned()) + return false; + + const std::string error_message = GetCapturedStderr(); + + bool success = false; + Message buffer; + + buffer << "Death test: " << statement() << "\n"; + switch (outcome()) { + case LIVED: + buffer << " Result: failed to die.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case THREW: + buffer << " Result: threw an exception.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case RETURNED: + buffer << " Result: illegal return in test statement.\n" + << " Error msg:\n" << FormatDeathTestOutput(error_message); + break; + case DIED: + if (status_ok) { + const bool matched = RE::PartialMatch(error_message.c_str(), *regex()); + if (matched) { + success = true; + } else { + buffer << " Result: died but not with expected error.\n" + << " Expected: " << regex()->pattern() << "\n" + << "Actual msg:\n" << FormatDeathTestOutput(error_message); + } + } else { + buffer << " Result: died but not with expected exit code:\n" + << " " << ExitSummary(status()) << "\n" + << "Actual msg:\n" << FormatDeathTestOutput(error_message); + } + break; + case IN_PROGRESS: + default: + GTEST_LOG_(FATAL) + << "DeathTest::Passed somehow called before conclusion of test"; + } + + DeathTest::set_last_death_test_message(buffer.GetString()); + return success; +} + +# if GTEST_OS_WINDOWS +// WindowsDeathTest implements death tests on Windows. Due to the +// specifics of starting new processes on Windows, death tests there are +// always threadsafe, and Google Test considers the +// --gtest_death_test_style=fast setting to be equivalent to +// --gtest_death_test_style=threadsafe there. +// +// A few implementation notes: Like the Linux version, the Windows +// implementation uses pipes for child-to-parent communication. But due to +// the specifics of pipes on Windows, some extra steps are required: +// +// 1. The parent creates a communication pipe and stores handles to both +// ends of it. +// 2. The parent starts the child and provides it with the information +// necessary to acquire the handle to the write end of the pipe. +// 3. The child acquires the write end of the pipe and signals the parent +// using a Windows event. +// 4. Now the parent can release the write end of the pipe on its side. If +// this is done before step 3, the object's reference count goes down to +// 0 and it is destroyed, preventing the child from acquiring it. The +// parent now has to release it, or read operations on the read end of +// the pipe will not return when the child terminates. +// 5. The parent reads child's output through the pipe (outcome code and +// any possible error messages) from the pipe, and its stderr and then +// determines whether to fail the test. +// +// Note: to distinguish Win32 API calls from the local method and function +// calls, the former are explicitly resolved in the global namespace. +// +class WindowsDeathTest : public DeathTestImpl { + public: + WindowsDeathTest(const char* a_statement, + const RE* a_regex, + const char* file, + int line) + : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {} + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + virtual TestRole AssumeRole(); + + private: + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; + // Handle to the write end of the pipe to the child process. + AutoHandle write_handle_; + // Child process handle. + AutoHandle child_handle_; + // Event the child process uses to signal the parent that it has + // acquired the handle to the write end of the pipe. After seeing this + // event the parent can release its own handles to make sure its + // ReadFile() calls return when the child terminates. + AutoHandle event_handle_; +}; + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int WindowsDeathTest::Wait() { + if (!spawned()) + return 0; + + // Wait until the child either signals that it has acquired the write end + // of the pipe or it dies. + const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() }; + switch (::WaitForMultipleObjects(2, + wait_handles, + FALSE, // Waits for any of the handles. + INFINITE)) { + case WAIT_OBJECT_0: + case WAIT_OBJECT_0 + 1: + break; + default: + GTEST_DEATH_TEST_CHECK_(false); // Should not get here. + } + + // The child has acquired the write end of the pipe or exited. + // We release the handle on our side and continue. + write_handle_.Reset(); + event_handle_.Reset(); + + ReadAndInterpretStatusByte(); + + // Waits for the child process to exit if it haven't already. This + // returns immediately if the child has already exited, regardless of + // whether previous calls to WaitForMultipleObjects synchronized on this + // handle or not. + GTEST_DEATH_TEST_CHECK_( + WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(), + INFINITE)); + DWORD status_code; + GTEST_DEATH_TEST_CHECK_( + ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE); + child_handle_.Reset(); + set_status(static_cast(status_code)); + return status(); +} + +// The AssumeRole process for a Windows death test. It creates a child +// process with the same executable as the current process to run the +// death test. The child process is given the --gtest_filter and +// --gtest_internal_run_death_test flags such that it knows to run the +// current death test only. +DeathTest::TestRole WindowsDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + // ParseInternalRunDeathTestFlag() has performed all the necessary + // processing. + set_write_fd(flag->write_fd()); + return EXECUTE_TEST; + } + + // WindowsDeathTest uses an anonymous pipe to communicate results of + // a death test. + SECURITY_ATTRIBUTES handles_are_inheritable = { + sizeof(SECURITY_ATTRIBUTES), NULL, TRUE }; + HANDLE read_handle, write_handle; + GTEST_DEATH_TEST_CHECK_( + ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable, + 0) // Default buffer size. + != FALSE); + set_read_fd(::_open_osfhandle(reinterpret_cast(read_handle), + O_RDONLY)); + write_handle_.Reset(write_handle); + event_handle_.Reset(::CreateEvent( + &handles_are_inheritable, + TRUE, // The event will automatically reset to non-signaled state. + FALSE, // The initial state is non-signalled. + NULL)); // The even is unnamed. + GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL); + const std::string filter_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + + info->test_case_name() + "." + info->name(); + const std::string internal_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + + "=" + file_ + "|" + StreamableToString(line_) + "|" + + StreamableToString(death_test_index) + "|" + + StreamableToString(static_cast(::GetCurrentProcessId())) + + // size_t has the same width as pointers on both 32-bit and 64-bit + // Windows platforms. + // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx. + "|" + StreamableToString(reinterpret_cast(write_handle)) + + "|" + StreamableToString(reinterpret_cast(event_handle_.Get())); + + char executable_path[_MAX_PATH + 1]; // NOLINT + GTEST_DEATH_TEST_CHECK_( + _MAX_PATH + 1 != ::GetModuleFileNameA(NULL, + executable_path, + _MAX_PATH)); + + std::string command_line = + std::string(::GetCommandLineA()) + " " + filter_flag + " \"" + + internal_flag + "\""; + + DeathTest::set_last_death_test_message(""); + + CaptureStderr(); + // Flush the log buffers since the log streams are shared with the child. + FlushInfoLog(); + + // The child process will share the standard handles with the parent. + STARTUPINFOA startup_info; + memset(&startup_info, 0, sizeof(STARTUPINFO)); + startup_info.dwFlags = STARTF_USESTDHANDLES; + startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE); + startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE); + startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE); + + PROCESS_INFORMATION process_info; + GTEST_DEATH_TEST_CHECK_(::CreateProcessA( + executable_path, + const_cast(command_line.c_str()), + NULL, // Retuned process handle is not inheritable. + NULL, // Retuned thread handle is not inheritable. + TRUE, // Child inherits all inheritable handles (for write_handle_). + 0x0, // Default creation flags. + NULL, // Inherit the parent's environment. + UnitTest::GetInstance()->original_working_dir(), + &startup_info, + &process_info) != FALSE); + child_handle_.Reset(process_info.hProcess); + ::CloseHandle(process_info.hThread); + set_spawned(true); + return OVERSEE_TEST; +} +# else // We are not on Windows. + +// ForkingDeathTest provides implementations for most of the abstract +// methods of the DeathTest interface. Only the AssumeRole method is +// left undefined. +class ForkingDeathTest : public DeathTestImpl { + public: + ForkingDeathTest(const char* statement, const RE* regex); + + // All of these virtual functions are inherited from DeathTest. + virtual int Wait(); + + protected: + void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; } + + private: + // PID of child process during death test; 0 in the child process itself. + pid_t child_pid_; +}; + +// Constructs a ForkingDeathTest. +ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex) + : DeathTestImpl(a_statement, a_regex), + child_pid_(-1) {} + +// Waits for the child in a death test to exit, returning its exit +// status, or 0 if no child process exists. As a side effect, sets the +// outcome data member. +int ForkingDeathTest::Wait() { + if (!spawned()) + return 0; + + ReadAndInterpretStatusByte(); + + int status_value; + GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0)); + set_status(status_value); + return status_value; +} + +// A concrete death test class that forks, then immediately runs the test +// in the child process. +class NoExecDeathTest : public ForkingDeathTest { + public: + NoExecDeathTest(const char* a_statement, const RE* a_regex) : + ForkingDeathTest(a_statement, a_regex) { } + virtual TestRole AssumeRole(); +}; + +// The AssumeRole process for a fork-and-run death test. It implements a +// straightforward fork, with a simple pipe to transmit the status byte. +DeathTest::TestRole NoExecDeathTest::AssumeRole() { + const size_t thread_count = GetThreadCount(); + if (thread_count != 1) { + GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count); + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); + + DeathTest::set_last_death_test_message(""); + CaptureStderr(); + // When we fork the process below, the log file buffers are copied, but the + // file descriptors are shared. We flush all log files here so that closing + // the file descriptors in the child process doesn't throw off the + // synchronization between descriptors and buffers in the parent process. + // This is as close to the fork as possible to avoid a race condition in case + // there are multiple threads running before the death test, and another + // thread writes to the log file. + FlushInfoLog(); + + const pid_t child_pid = fork(); + GTEST_DEATH_TEST_CHECK_(child_pid != -1); + set_child_pid(child_pid); + if (child_pid == 0) { + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0])); + set_write_fd(pipe_fd[1]); + // Redirects all logging to stderr in the child process to prevent + // concurrent writes to the log files. We capture stderr in the parent + // process and append the child process' output to a log. + LogToStderr(); + // Event forwarding to the listeners of event listener API mush be shut + // down in death test subprocesses. + GetUnitTestImpl()->listeners()->SuppressEventForwarding(); + g_in_fast_death_test_child = true; + return EXECUTE_TEST; + } else { + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); + set_read_fd(pipe_fd[0]); + set_spawned(true); + return OVERSEE_TEST; + } +} + +// A concrete death test class that forks and re-executes the main +// program from the beginning, with command-line flags set that cause +// only this specific death test to be run. +class ExecDeathTest : public ForkingDeathTest { + public: + ExecDeathTest(const char* a_statement, const RE* a_regex, + const char* file, int line) : + ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { } + virtual TestRole AssumeRole(); + private: + static ::std::vector + GetArgvsForDeathTestChildProcess() { + ::std::vector args = GetInjectableArgvs(); + return args; + } + // The name of the file in which the death test is located. + const char* const file_; + // The line number on which the death test is located. + const int line_; +}; + +// Utility class for accumulating command-line arguments. +class Arguments { + public: + Arguments() { + args_.push_back(NULL); + } + + ~Arguments() { + for (std::vector::iterator i = args_.begin(); i != args_.end(); + ++i) { + free(*i); + } + } + void AddArgument(const char* argument) { + args_.insert(args_.end() - 1, posix::StrDup(argument)); + } + + template + void AddArguments(const ::std::vector& arguments) { + for (typename ::std::vector::const_iterator i = arguments.begin(); + i != arguments.end(); + ++i) { + args_.insert(args_.end() - 1, posix::StrDup(i->c_str())); + } + } + char* const* Argv() { + return &args_[0]; + } + + private: + std::vector args_; +}; + +// A struct that encompasses the arguments to the child process of a +// threadsafe-style death test process. +struct ExecDeathTestArgs { + char* const* argv; // Command-line arguments for the child's call to exec + int close_fd; // File descriptor to close; the read end of a pipe +}; + +# if GTEST_OS_MAC +inline char** GetEnviron() { + // When Google Test is built as a framework on MacOS X, the environ variable + // is unavailable. Apple's documentation (man environ) recommends using + // _NSGetEnviron() instead. + return *_NSGetEnviron(); +} +# else +// Some POSIX platforms expect you to declare environ. extern "C" makes +// it reside in the global namespace. +extern "C" char** environ; +inline char** GetEnviron() { return environ; } +# endif // GTEST_OS_MAC + +# if !GTEST_OS_QNX +// The main function for a threadsafe-style death test child process. +// This function is called in a clone()-ed process and thus must avoid +// any potentially unsafe operations like malloc or libc functions. +static int ExecDeathTestChildMain(void* child_arg) { + ExecDeathTestArgs* const args = static_cast(child_arg); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd)); + + // We need to execute the test program in the same environment where + // it was originally invoked. Therefore we change to the original + // working directory first. + const char* const original_dir = + UnitTest::GetInstance()->original_working_dir(); + // We can safely call chdir() as it's a direct system call. + if (chdir(original_dir) != 0) { + DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; + } + + // We can safely call execve() as it's a direct system call. We + // cannot use execvp() as it's a libc function and thus potentially + // unsafe. Since execve() doesn't search the PATH, the user must + // invoke the test program via a valid path that contains at least + // one path separator. + execve(args->argv[0], args->argv, GetEnviron()); + DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " + + original_dir + " failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; +} +# endif // !GTEST_OS_QNX + +// Two utility routines that together determine the direction the stack +// grows. +// This could be accomplished more elegantly by a single recursive +// function, but we want to guard against the unlikely possibility of +// a smart compiler optimizing the recursion away. +// +// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining +// StackLowerThanAddress into StackGrowsDown, which then doesn't give +// correct answer. +void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_; +void StackLowerThanAddress(const void* ptr, bool* result) { + int dummy; + *result = (&dummy < ptr); +} + +bool StackGrowsDown() { + int dummy; + bool result; + StackLowerThanAddress(&dummy, &result); + return result; +} + +// Spawns a child process with the same executable as the current process in +// a thread-safe manner and instructs it to run the death test. The +// implementation uses fork(2) + exec. On systems where clone(2) is +// available, it is used instead, being slightly more thread-safe. On QNX, +// fork supports only single-threaded environments, so this function uses +// spawn(2) there instead. The function dies with an error message if +// anything goes wrong. +static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) { + ExecDeathTestArgs args = { argv, close_fd }; + pid_t child_pid = -1; + +# if GTEST_OS_QNX + // Obtains the current directory and sets it to be closed in the child + // process. + const int cwd_fd = open(".", O_RDONLY); + GTEST_DEATH_TEST_CHECK_(cwd_fd != -1); + GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC)); + // We need to execute the test program in the same environment where + // it was originally invoked. Therefore we change to the original + // working directory first. + const char* const original_dir = + UnitTest::GetInstance()->original_working_dir(); + // We can safely call chdir() as it's a direct system call. + if (chdir(original_dir) != 0) { + DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + + GetLastErrnoDescription()); + return EXIT_FAILURE; + } + + int fd_flags; + // Set close_fd to be closed after spawn. + GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD)); + GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(close_fd, F_SETFD, + fd_flags | FD_CLOEXEC)); + struct inheritance inherit = {0}; + // spawn is a system call. + child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron()); + // Restores the current working directory. + GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd)); + +# else // GTEST_OS_QNX +# if GTEST_OS_LINUX + // When a SIGPROF signal is received while fork() or clone() are executing, + // the process may hang. To avoid this, we ignore SIGPROF here and re-enable + // it after the call to fork()/clone() is complete. + struct sigaction saved_sigprof_action; + struct sigaction ignore_sigprof_action; + memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action)); + sigemptyset(&ignore_sigprof_action.sa_mask); + ignore_sigprof_action.sa_handler = SIG_IGN; + GTEST_DEATH_TEST_CHECK_SYSCALL_(sigaction( + SIGPROF, &ignore_sigprof_action, &saved_sigprof_action)); +# endif // GTEST_OS_LINUX + +# if GTEST_HAS_CLONE + const bool use_fork = GTEST_FLAG(death_test_use_fork); + + if (!use_fork) { + static const bool stack_grows_down = StackGrowsDown(); + const size_t stack_size = getpagesize(); + // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead. + void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED); + + // Maximum stack alignment in bytes: For a downward-growing stack, this + // amount is subtracted from size of the stack space to get an address + // that is within the stack space and is aligned on all systems we care + // about. As far as I know there is no ABI with stack alignment greater + // than 64. We assume stack and stack_size already have alignment of + // kMaxStackAlignment. + const size_t kMaxStackAlignment = 64; + void* const stack_top = + static_cast(stack) + + (stack_grows_down ? stack_size - kMaxStackAlignment : 0); + GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment && + reinterpret_cast(stack_top) % kMaxStackAlignment == 0); + + child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args); + + GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1); + } +# else + const bool use_fork = true; +# endif // GTEST_HAS_CLONE + + if (use_fork && (child_pid = fork()) == 0) { + ExecDeathTestChildMain(&args); + _exit(0); + } +# endif // GTEST_OS_QNX +# if GTEST_OS_LINUX + GTEST_DEATH_TEST_CHECK_SYSCALL_( + sigaction(SIGPROF, &saved_sigprof_action, NULL)); +# endif // GTEST_OS_LINUX + + GTEST_DEATH_TEST_CHECK_(child_pid != -1); + return child_pid; +} + +// The AssumeRole process for a fork-and-exec death test. It re-executes the +// main program from the beginning, setting the --gtest_filter +// and --gtest_internal_run_death_test flags to cause only the current +// death test to be re-run. +DeathTest::TestRole ExecDeathTest::AssumeRole() { + const UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const TestInfo* const info = impl->current_test_info(); + const int death_test_index = info->result()->death_test_count(); + + if (flag != NULL) { + set_write_fd(flag->write_fd()); + return EXECUTE_TEST; + } + + int pipe_fd[2]; + GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); + // Clear the close-on-exec flag on the write end of the pipe, lest + // it be closed when the child process does an exec: + GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1); + + const std::string filter_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + + info->test_case_name() + "." + info->name(); + const std::string internal_flag = + std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "=" + + file_ + "|" + StreamableToString(line_) + "|" + + StreamableToString(death_test_index) + "|" + + StreamableToString(pipe_fd[1]); + Arguments args; + args.AddArguments(GetArgvsForDeathTestChildProcess()); + args.AddArgument(filter_flag.c_str()); + args.AddArgument(internal_flag.c_str()); + + DeathTest::set_last_death_test_message(""); + + CaptureStderr(); + // See the comment in NoExecDeathTest::AssumeRole for why the next line + // is necessary. + FlushInfoLog(); + + const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]); + GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); + set_child_pid(child_pid); + set_read_fd(pipe_fd[0]); + set_spawned(true); + return OVERSEE_TEST; +} + +# endif // !GTEST_OS_WINDOWS + +// Creates a concrete DeathTest-derived class that depends on the +// --gtest_death_test_style flag, and sets the pointer pointed to +// by the "test" argument to its address. If the test should be +// skipped, sets that pointer to NULL. Returns true, unless the +// flag is set to an invalid value. +bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex, + const char* file, int line, + DeathTest** test) { + UnitTestImpl* const impl = GetUnitTestImpl(); + const InternalRunDeathTestFlag* const flag = + impl->internal_run_death_test_flag(); + const int death_test_index = impl->current_test_info() + ->increment_death_test_count(); + + if (flag != NULL) { + if (death_test_index > flag->index()) { + DeathTest::set_last_death_test_message( + "Death test count (" + StreamableToString(death_test_index) + + ") somehow exceeded expected maximum (" + + StreamableToString(flag->index()) + ")"); + return false; + } + + if (!(flag->file() == file && flag->line() == line && + flag->index() == death_test_index)) { + *test = NULL; + return true; + } + } + +# if GTEST_OS_WINDOWS + + if (GTEST_FLAG(death_test_style) == "threadsafe" || + GTEST_FLAG(death_test_style) == "fast") { + *test = new WindowsDeathTest(statement, regex, file, line); + } + +# else + + if (GTEST_FLAG(death_test_style) == "threadsafe") { + *test = new ExecDeathTest(statement, regex, file, line); + } else if (GTEST_FLAG(death_test_style) == "fast") { + *test = new NoExecDeathTest(statement, regex); + } + +# endif // GTEST_OS_WINDOWS + + else { // NOLINT - this is more readable than unbalanced brackets inside #if. + DeathTest::set_last_death_test_message( + "Unknown death test style \"" + GTEST_FLAG(death_test_style) + + "\" encountered"); + return false; + } + + return true; +} + +// Splits a given string on a given delimiter, populating a given +// vector with the fields. GTEST_HAS_DEATH_TEST implies that we have +// ::std::string, so we can use it here. +static void SplitString(const ::std::string& str, char delimiter, + ::std::vector< ::std::string>* dest) { + ::std::vector< ::std::string> parsed; + ::std::string::size_type pos = 0; + while (::testing::internal::AlwaysTrue()) { + const ::std::string::size_type colon = str.find(delimiter, pos); + if (colon == ::std::string::npos) { + parsed.push_back(str.substr(pos)); + break; + } else { + parsed.push_back(str.substr(pos, colon - pos)); + pos = colon + 1; + } + } + dest->swap(parsed); +} + +# if GTEST_OS_WINDOWS +// Recreates the pipe and event handles from the provided parameters, +// signals the event, and returns a file descriptor wrapped around the pipe +// handle. This function is called in the child process only. +int GetStatusFileDescriptor(unsigned int parent_process_id, + size_t write_handle_as_size_t, + size_t event_handle_as_size_t) { + AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE, + FALSE, // Non-inheritable. + parent_process_id)); + if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) { + DeathTestAbort("Unable to open parent process " + + StreamableToString(parent_process_id)); + } + + // TODO(vladl@google.com): Replace the following check with a + // compile-time assertion when available. + GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t)); + + const HANDLE write_handle = + reinterpret_cast(write_handle_as_size_t); + HANDLE dup_write_handle; + + // The newly initialized handle is accessible only in in the parent + // process. To obtain one accessible within the child, we need to use + // DuplicateHandle. + if (!::DuplicateHandle(parent_process_handle.Get(), write_handle, + ::GetCurrentProcess(), &dup_write_handle, + 0x0, // Requested privileges ignored since + // DUPLICATE_SAME_ACCESS is used. + FALSE, // Request non-inheritable handler. + DUPLICATE_SAME_ACCESS)) { + DeathTestAbort("Unable to duplicate the pipe handle " + + StreamableToString(write_handle_as_size_t) + + " from the parent process " + + StreamableToString(parent_process_id)); + } + + const HANDLE event_handle = reinterpret_cast(event_handle_as_size_t); + HANDLE dup_event_handle; + + if (!::DuplicateHandle(parent_process_handle.Get(), event_handle, + ::GetCurrentProcess(), &dup_event_handle, + 0x0, + FALSE, + DUPLICATE_SAME_ACCESS)) { + DeathTestAbort("Unable to duplicate the event handle " + + StreamableToString(event_handle_as_size_t) + + " from the parent process " + + StreamableToString(parent_process_id)); + } + + const int write_fd = + ::_open_osfhandle(reinterpret_cast(dup_write_handle), O_APPEND); + if (write_fd == -1) { + DeathTestAbort("Unable to convert pipe handle " + + StreamableToString(write_handle_as_size_t) + + " to a file descriptor"); + } + + // Signals the parent that the write end of the pipe has been acquired + // so the parent can release its own write end. + ::SetEvent(dup_event_handle); + + return write_fd; +} +# endif // GTEST_OS_WINDOWS + +// Returns a newly created InternalRunDeathTestFlag object with fields +// initialized from the GTEST_FLAG(internal_run_death_test) flag if +// the flag is specified; otherwise returns NULL. +InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() { + if (GTEST_FLAG(internal_run_death_test) == "") return NULL; + + // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we + // can use it here. + int line = -1; + int index = -1; + ::std::vector< ::std::string> fields; + SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields); + int write_fd = -1; + +# if GTEST_OS_WINDOWS + + unsigned int parent_process_id = 0; + size_t write_handle_as_size_t = 0; + size_t event_handle_as_size_t = 0; + + if (fields.size() != 6 + || !ParseNaturalNumber(fields[1], &line) + || !ParseNaturalNumber(fields[2], &index) + || !ParseNaturalNumber(fields[3], &parent_process_id) + || !ParseNaturalNumber(fields[4], &write_handle_as_size_t) + || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + + GTEST_FLAG(internal_run_death_test)); + } + write_fd = GetStatusFileDescriptor(parent_process_id, + write_handle_as_size_t, + event_handle_as_size_t); +# else + + if (fields.size() != 4 + || !ParseNaturalNumber(fields[1], &line) + || !ParseNaturalNumber(fields[2], &index) + || !ParseNaturalNumber(fields[3], &write_fd)) { + DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + + GTEST_FLAG(internal_run_death_test)); + } + +# endif // GTEST_OS_WINDOWS + + return new InternalRunDeathTestFlag(fields[0], line, index, write_fd); +} + +} // namespace internal + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: keith.ray@gmail.com (Keith Ray) + + +#include + +#if GTEST_OS_WINDOWS_MOBILE +# include +#elif GTEST_OS_WINDOWS +# include +# include +#elif GTEST_OS_SYMBIAN +// Symbian OpenC has PATH_MAX in sys/syslimits.h +# include +#else +# include +# include // Some Linux distributions define PATH_MAX here. +#endif // GTEST_OS_WINDOWS_MOBILE + +#if GTEST_OS_WINDOWS +# define GTEST_PATH_MAX_ _MAX_PATH +#elif defined(PATH_MAX) +# define GTEST_PATH_MAX_ PATH_MAX +#elif defined(_XOPEN_PATH_MAX) +# define GTEST_PATH_MAX_ _XOPEN_PATH_MAX +#else +# define GTEST_PATH_MAX_ _POSIX_PATH_MAX +#endif // GTEST_OS_WINDOWS + + +namespace testing { +namespace internal { + +#if GTEST_OS_WINDOWS +// On Windows, '\\' is the standard path separator, but many tools and the +// Windows API also accept '/' as an alternate path separator. Unless otherwise +// noted, a file path can contain either kind of path separators, or a mixture +// of them. +const char kPathSeparator = '\\'; +const char kAlternatePathSeparator = '/'; +const char kPathSeparatorString[] = "\\"; +const char kAlternatePathSeparatorString[] = "/"; +# if GTEST_OS_WINDOWS_MOBILE +// Windows CE doesn't have a current directory. You should not use +// the current directory in tests on Windows CE, but this at least +// provides a reasonable fallback. +const char kCurrentDirectoryString[] = "\\"; +// Windows CE doesn't define INVALID_FILE_ATTRIBUTES +const DWORD kInvalidFileAttributes = 0xffffffff; +# else +const char kCurrentDirectoryString[] = ".\\"; +# endif // GTEST_OS_WINDOWS_MOBILE +#else +const char kPathSeparator = '/'; +const char kCurrentDirectoryString[] = "./"; +#endif // GTEST_OS_WINDOWS + +// Returns whether the given character is a valid path separator. +static bool IsPathSeparator(char c) { +#if GTEST_HAS_ALT_PATH_SEP_ + return (c == kPathSeparator) || (c == kAlternatePathSeparator); +#else + return c == kPathSeparator; +#endif +} + +// Returns the current working directory, or "" if unsuccessful. +FilePath FilePath::GetCurrentDir() { +#if GTEST_OS_WINDOWS_MOBILE + // Windows CE doesn't have a current directory, so we just return + // something reasonable. + return FilePath(kCurrentDirectoryString); +#elif GTEST_OS_WINDOWS + char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; + return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd); +#else + char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; + return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd); +#endif // GTEST_OS_WINDOWS_MOBILE +} + +// Returns a copy of the FilePath with the case-insensitive extension removed. +// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns +// FilePath("dir/file"). If a case-insensitive extension is not +// found, returns a copy of the original FilePath. +FilePath FilePath::RemoveExtension(const char* extension) const { + const std::string dot_extension = std::string(".") + extension; + if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) { + return FilePath(pathname_.substr( + 0, pathname_.length() - dot_extension.length())); + } + return *this; +} + +// Returns a pointer to the last occurence of a valid path separator in +// the FilePath. On Windows, for example, both '/' and '\' are valid path +// separators. Returns NULL if no path separator was found. +const char* FilePath::FindLastPathSeparator() const { + const char* const last_sep = strrchr(c_str(), kPathSeparator); +#if GTEST_HAS_ALT_PATH_SEP_ + const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator); + // Comparing two pointers of which only one is NULL is undefined. + if (last_alt_sep != NULL && + (last_sep == NULL || last_alt_sep > last_sep)) { + return last_alt_sep; + } +#endif + return last_sep; +} + +// Returns a copy of the FilePath with the directory part removed. +// Example: FilePath("path/to/file").RemoveDirectoryName() returns +// FilePath("file"). If there is no directory part ("just_a_file"), it returns +// the FilePath unmodified. If there is no file part ("just_a_dir/") it +// returns an empty FilePath (""). +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveDirectoryName() const { + const char* const last_sep = FindLastPathSeparator(); + return last_sep ? FilePath(last_sep + 1) : *this; +} + +// RemoveFileName returns the directory path with the filename removed. +// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". +// If the FilePath is "a_file" or "/a_file", RemoveFileName returns +// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does +// not have a file, like "just/a/dir/", it returns the FilePath unmodified. +// On Windows platform, '\' is the path separator, otherwise it is '/'. +FilePath FilePath::RemoveFileName() const { + const char* const last_sep = FindLastPathSeparator(); + std::string dir; + if (last_sep) { + dir = std::string(c_str(), last_sep + 1 - c_str()); + } else { + dir = kCurrentDirectoryString; + } + return FilePath(dir); +} + +// Helper functions for naming files in a directory for xml output. + +// Given directory = "dir", base_name = "test", number = 0, +// extension = "xml", returns "dir/test.xml". If number is greater +// than zero (e.g., 12), returns "dir/test_12.xml". +// On Windows platform, uses \ as the separator rather than /. +FilePath FilePath::MakeFileName(const FilePath& directory, + const FilePath& base_name, + int number, + const char* extension) { + std::string file; + if (number == 0) { + file = base_name.string() + "." + extension; + } else { + file = base_name.string() + "_" + StreamableToString(number) + + "." + extension; + } + return ConcatPaths(directory, FilePath(file)); +} + +// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml". +// On Windows, uses \ as the separator rather than /. +FilePath FilePath::ConcatPaths(const FilePath& directory, + const FilePath& relative_path) { + if (directory.IsEmpty()) + return relative_path; + const FilePath dir(directory.RemoveTrailingPathSeparator()); + return FilePath(dir.string() + kPathSeparator + relative_path.string()); +} + +// Returns true if pathname describes something findable in the file-system, +// either a file, directory, or whatever. +bool FilePath::FileOrDirectoryExists() const { +#if GTEST_OS_WINDOWS_MOBILE + LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str()); + const DWORD attributes = GetFileAttributes(unicode); + delete [] unicode; + return attributes != kInvalidFileAttributes; +#else + posix::StatStruct file_stat; + return posix::Stat(pathname_.c_str(), &file_stat) == 0; +#endif // GTEST_OS_WINDOWS_MOBILE +} + +// Returns true if pathname describes a directory in the file-system +// that exists. +bool FilePath::DirectoryExists() const { + bool result = false; +#if GTEST_OS_WINDOWS + // Don't strip off trailing separator if path is a root directory on + // Windows (like "C:\\"). + const FilePath& path(IsRootDirectory() ? *this : + RemoveTrailingPathSeparator()); +#else + const FilePath& path(*this); +#endif + +#if GTEST_OS_WINDOWS_MOBILE + LPCWSTR unicode = String::AnsiToUtf16(path.c_str()); + const DWORD attributes = GetFileAttributes(unicode); + delete [] unicode; + if ((attributes != kInvalidFileAttributes) && + (attributes & FILE_ATTRIBUTE_DIRECTORY)) { + result = true; + } +#else + posix::StatStruct file_stat; + result = posix::Stat(path.c_str(), &file_stat) == 0 && + posix::IsDir(file_stat); +#endif // GTEST_OS_WINDOWS_MOBILE + + return result; +} + +// Returns true if pathname describes a root directory. (Windows has one +// root directory per disk drive.) +bool FilePath::IsRootDirectory() const { +#if GTEST_OS_WINDOWS + // TODO(wan@google.com): on Windows a network share like + // \\server\share can be a root directory, although it cannot be the + // current directory. Handle this properly. + return pathname_.length() == 3 && IsAbsolutePath(); +#else + return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]); +#endif +} + +// Returns true if pathname describes an absolute path. +bool FilePath::IsAbsolutePath() const { + const char* const name = pathname_.c_str(); +#if GTEST_OS_WINDOWS + return pathname_.length() >= 3 && + ((name[0] >= 'a' && name[0] <= 'z') || + (name[0] >= 'A' && name[0] <= 'Z')) && + name[1] == ':' && + IsPathSeparator(name[2]); +#else + return IsPathSeparator(name[0]); +#endif +} + +// Returns a pathname for a file that does not currently exist. The pathname +// will be directory/base_name.extension or +// directory/base_name_.extension if directory/base_name.extension +// already exists. The number will be incremented until a pathname is found +// that does not already exist. +// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. +// There could be a race condition if two or more processes are calling this +// function at the same time -- they could both pick the same filename. +FilePath FilePath::GenerateUniqueFileName(const FilePath& directory, + const FilePath& base_name, + const char* extension) { + FilePath full_pathname; + int number = 0; + do { + full_pathname.Set(MakeFileName(directory, base_name, number++, extension)); + } while (full_pathname.FileOrDirectoryExists()); + return full_pathname; +} + +// Returns true if FilePath ends with a path separator, which indicates that +// it is intended to represent a directory. Returns false otherwise. +// This does NOT check that a directory (or file) actually exists. +bool FilePath::IsDirectory() const { + return !pathname_.empty() && + IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]); +} + +// Create directories so that path exists. Returns true if successful or if +// the directories already exist; returns false if unable to create directories +// for any reason. +bool FilePath::CreateDirectoriesRecursively() const { + if (!this->IsDirectory()) { + return false; + } + + if (pathname_.length() == 0 || this->DirectoryExists()) { + return true; + } + + const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName()); + return parent.CreateDirectoriesRecursively() && this->CreateFolder(); +} + +// Create the directory so that path exists. Returns true if successful or +// if the directory already exists; returns false if unable to create the +// directory for any reason, including if the parent directory does not +// exist. Not named "CreateDirectory" because that's a macro on Windows. +bool FilePath::CreateFolder() const { +#if GTEST_OS_WINDOWS_MOBILE + FilePath removed_sep(this->RemoveTrailingPathSeparator()); + LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str()); + int result = CreateDirectory(unicode, NULL) ? 0 : -1; + delete [] unicode; +#elif GTEST_OS_WINDOWS + int result = _mkdir(pathname_.c_str()); +#else + int result = mkdir(pathname_.c_str(), 0777); +#endif // GTEST_OS_WINDOWS_MOBILE + + if (result == -1) { + return this->DirectoryExists(); // An error is OK if the directory exists. + } + return true; // No error. +} + +// If input name has a trailing separator character, remove it and return the +// name, otherwise return the name string unmodified. +// On Windows platform, uses \ as the separator, other platforms use /. +FilePath FilePath::RemoveTrailingPathSeparator() const { + return IsDirectory() + ? FilePath(pathname_.substr(0, pathname_.length() - 1)) + : *this; +} + +// Removes any redundant separators that might be in the pathname. +// For example, "bar///foo" becomes "bar/foo". Does not eliminate other +// redundancies that might be in a pathname involving "." or "..". +// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share). +void FilePath::Normalize() { + if (pathname_.c_str() == NULL) { + pathname_ = ""; + return; + } + const char* src = pathname_.c_str(); + char* const dest = new char[pathname_.length() + 1]; + char* dest_ptr = dest; + memset(dest_ptr, 0, pathname_.length() + 1); + + while (*src != '\0') { + *dest_ptr = *src; + if (!IsPathSeparator(*src)) { + src++; + } else { +#if GTEST_HAS_ALT_PATH_SEP_ + if (*dest_ptr == kAlternatePathSeparator) { + *dest_ptr = kPathSeparator; + } +#endif + while (IsPathSeparator(*src)) + src++; + } + dest_ptr++; + } + *dest_ptr = '\0'; + pathname_ = dest; + delete[] dest; +} + +} // namespace internal +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + + +#include +#include +#include +#include + +#if GTEST_OS_WINDOWS_MOBILE +# include // For TerminateProcess() +#elif GTEST_OS_WINDOWS +# include +# include +#else +# include +#endif // GTEST_OS_WINDOWS_MOBILE + +#if GTEST_OS_MAC +# include +# include +# include +#endif // GTEST_OS_MAC + +#if GTEST_OS_QNX +# include +# include +#endif // GTEST_OS_QNX + + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#undef GTEST_IMPLEMENTATION_ + +namespace testing { +namespace internal { + +#if defined(_MSC_VER) || defined(__BORLANDC__) +// MSVC and C++Builder do not provide a definition of STDERR_FILENO. +const int kStdOutFileno = 1; +const int kStdErrFileno = 2; +#else +const int kStdOutFileno = STDOUT_FILENO; +const int kStdErrFileno = STDERR_FILENO; +#endif // _MSC_VER + +#if GTEST_OS_MAC + +// Returns the number of threads running in the process, or 0 to indicate that +// we cannot detect it. +size_t GetThreadCount() { + const task_t task = mach_task_self(); + mach_msg_type_number_t thread_count; + thread_act_array_t thread_list; + const kern_return_t status = task_threads(task, &thread_list, &thread_count); + if (status == KERN_SUCCESS) { + // task_threads allocates resources in thread_list and we need to free them + // to avoid leaks. + vm_deallocate(task, + reinterpret_cast(thread_list), + sizeof(thread_t) * thread_count); + return static_cast(thread_count); + } else { + return 0; + } +} + +#elif GTEST_OS_QNX + +// Returns the number of threads running in the process, or 0 to indicate that +// we cannot detect it. +size_t GetThreadCount() { + const int fd = open("/proc/self/as", O_RDONLY); + if (fd < 0) { + return 0; + } + procfs_info process_info; + const int status = + devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL); + close(fd); + if (status == EOK) { + return static_cast(process_info.num_threads); + } else { + return 0; + } +} + +#else + +size_t GetThreadCount() { + // There's no portable way to detect the number of threads, so we just + // return 0 to indicate that we cannot detect it. + return 0; +} + +#endif // GTEST_OS_MAC + +#if GTEST_USES_POSIX_RE + +// Implements RE. Currently only needed for death tests. + +RE::~RE() { + if (is_valid_) { + // regfree'ing an invalid regex might crash because the content + // of the regex is undefined. Since the regex's are essentially + // the same, one cannot be valid (or invalid) without the other + // being so too. + regfree(&partial_regex_); + regfree(&full_regex_); + } + free(const_cast(pattern_)); +} + +// Returns true iff regular expression re matches the entire str. +bool RE::FullMatch(const char* str, const RE& re) { + if (!re.is_valid_) return false; + + regmatch_t match; + return regexec(&re.full_regex_, str, 1, &match, 0) == 0; +} + +// Returns true iff regular expression re matches a substring of str +// (including str itself). +bool RE::PartialMatch(const char* str, const RE& re) { + if (!re.is_valid_) return false; + + regmatch_t match; + return regexec(&re.partial_regex_, str, 1, &match, 0) == 0; +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = posix::StrDup(regex); + + // Reserves enough bytes to hold the regular expression used for a + // full match. + const size_t full_regex_len = strlen(regex) + 10; + char* const full_pattern = new char[full_regex_len]; + + snprintf(full_pattern, full_regex_len, "^(%s)$", regex); + is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0; + // We want to call regcomp(&partial_regex_, ...) even if the + // previous expression returns false. Otherwise partial_regex_ may + // not be properly initialized can may cause trouble when it's + // freed. + // + // Some implementation of POSIX regex (e.g. on at least some + // versions of Cygwin) doesn't accept the empty string as a valid + // regex. We change it to an equivalent form "()" to be safe. + if (is_valid_) { + const char* const partial_regex = (*regex == '\0') ? "()" : regex; + is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0; + } + EXPECT_TRUE(is_valid_) + << "Regular expression \"" << regex + << "\" is not a valid POSIX Extended regular expression."; + + delete[] full_pattern; +} + +#elif GTEST_USES_SIMPLE_RE + +// Returns true iff ch appears anywhere in str (excluding the +// terminating '\0' character). +bool IsInSet(char ch, const char* str) { + return ch != '\0' && strchr(str, ch) != NULL; +} + +// Returns true iff ch belongs to the given classification. Unlike +// similar functions in , these aren't affected by the +// current locale. +bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; } +bool IsAsciiPunct(char ch) { + return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"); +} +bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); } +bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); } +bool IsAsciiWordChar(char ch) { + return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || + ('0' <= ch && ch <= '9') || ch == '_'; +} + +// Returns true iff "\\c" is a supported escape sequence. +bool IsValidEscape(char c) { + return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW")); +} + +// Returns true iff the given atom (specified by escaped and pattern) +// matches ch. The result is undefined if the atom is invalid. +bool AtomMatchesChar(bool escaped, char pattern_char, char ch) { + if (escaped) { // "\\p" where p is pattern_char. + switch (pattern_char) { + case 'd': return IsAsciiDigit(ch); + case 'D': return !IsAsciiDigit(ch); + case 'f': return ch == '\f'; + case 'n': return ch == '\n'; + case 'r': return ch == '\r'; + case 's': return IsAsciiWhiteSpace(ch); + case 'S': return !IsAsciiWhiteSpace(ch); + case 't': return ch == '\t'; + case 'v': return ch == '\v'; + case 'w': return IsAsciiWordChar(ch); + case 'W': return !IsAsciiWordChar(ch); + } + return IsAsciiPunct(pattern_char) && pattern_char == ch; + } + + return (pattern_char == '.' && ch != '\n') || pattern_char == ch; +} + +// Helper function used by ValidateRegex() to format error messages. +std::string FormatRegexSyntaxError(const char* regex, int index) { + return (Message() << "Syntax error at index " << index + << " in simple regular expression \"" << regex << "\": ").GetString(); +} + +// Generates non-fatal failures and returns false if regex is invalid; +// otherwise returns true. +bool ValidateRegex(const char* regex) { + if (regex == NULL) { + // TODO(wan@google.com): fix the source file location in the + // assertion failures to match where the regex is used in user + // code. + ADD_FAILURE() << "NULL is not a valid simple regular expression."; + return false; + } + + bool is_valid = true; + + // True iff ?, *, or + can follow the previous atom. + bool prev_repeatable = false; + for (int i = 0; regex[i]; i++) { + if (regex[i] == '\\') { // An escape sequence + i++; + if (regex[i] == '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "'\\' cannot appear at the end."; + return false; + } + + if (!IsValidEscape(regex[i])) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "invalid escape sequence \"\\" << regex[i] << "\"."; + is_valid = false; + } + prev_repeatable = true; + } else { // Not an escape sequence. + const char ch = regex[i]; + + if (ch == '^' && i > 0) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'^' can only appear at the beginning."; + is_valid = false; + } else if (ch == '$' && regex[i + 1] != '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'$' can only appear at the end."; + is_valid = false; + } else if (IsInSet(ch, "()[]{}|")) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' is unsupported."; + is_valid = false; + } else if (IsRepeat(ch) && !prev_repeatable) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' can only follow a repeatable token."; + is_valid = false; + } + + prev_repeatable = !IsInSet(ch, "^$?*+"); + } + } + + return is_valid; +} + +// Matches a repeated regex atom followed by a valid simple regular +// expression. The regex atom is defined as c if escaped is false, +// or \c otherwise. repeat is the repetition meta character (?, *, +// or +). The behavior is undefined if str contains too many +// characters to be indexable by size_t, in which case the test will +// probably time out anyway. We are fine with this limitation as +// std::string has it too. +bool MatchRepetitionAndRegexAtHead( + bool escaped, char c, char repeat, const char* regex, + const char* str) { + const size_t min_count = (repeat == '+') ? 1 : 0; + const size_t max_count = (repeat == '?') ? 1 : + static_cast(-1) - 1; + // We cannot call numeric_limits::max() as it conflicts with the + // max() macro on Windows. + + for (size_t i = 0; i <= max_count; ++i) { + // We know that the atom matches each of the first i characters in str. + if (i >= min_count && MatchRegexAtHead(regex, str + i)) { + // We have enough matches at the head, and the tail matches too. + // Since we only care about *whether* the pattern matches str + // (as opposed to *how* it matches), there is no need to find a + // greedy match. + return true; + } + if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) + return false; + } + return false; +} + +// Returns true iff regex matches a prefix of str. regex must be a +// valid simple regular expression and not start with "^", or the +// result is undefined. +bool MatchRegexAtHead(const char* regex, const char* str) { + if (*regex == '\0') // An empty regex matches a prefix of anything. + return true; + + // "$" only matches the end of a string. Note that regex being + // valid guarantees that there's nothing after "$" in it. + if (*regex == '$') + return *str == '\0'; + + // Is the first thing in regex an escape sequence? + const bool escaped = *regex == '\\'; + if (escaped) + ++regex; + if (IsRepeat(regex[1])) { + // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so + // here's an indirect recursion. It terminates as the regex gets + // shorter in each recursion. + return MatchRepetitionAndRegexAtHead( + escaped, regex[0], regex[1], regex + 2, str); + } else { + // regex isn't empty, isn't "$", and doesn't start with a + // repetition. We match the first atom of regex with the first + // character of str and recurse. + return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) && + MatchRegexAtHead(regex + 1, str + 1); + } +} + +// Returns true iff regex matches any substring of str. regex must be +// a valid simple regular expression, or the result is undefined. +// +// The algorithm is recursive, but the recursion depth doesn't exceed +// the regex length, so we won't need to worry about running out of +// stack space normally. In rare cases the time complexity can be +// exponential with respect to the regex length + the string length, +// but usually it's must faster (often close to linear). +bool MatchRegexAnywhere(const char* regex, const char* str) { + if (regex == NULL || str == NULL) + return false; + + if (*regex == '^') + return MatchRegexAtHead(regex + 1, str); + + // A successful match can be anywhere in str. + do { + if (MatchRegexAtHead(regex, str)) + return true; + } while (*str++ != '\0'); + return false; +} + +// Implements the RE class. + +RE::~RE() { + free(const_cast(pattern_)); + free(const_cast(full_pattern_)); +} + +// Returns true iff regular expression re matches the entire str. +bool RE::FullMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str); +} + +// Returns true iff regular expression re matches a substring of str +// (including str itself). +bool RE::PartialMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str); +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = full_pattern_ = NULL; + if (regex != NULL) { + pattern_ = posix::StrDup(regex); + } + + is_valid_ = ValidateRegex(regex); + if (!is_valid_) { + // No need to calculate the full pattern when the regex is invalid. + return; + } + + const size_t len = strlen(regex); + // Reserves enough bytes to hold the regular expression used for a + // full match: we need space to prepend a '^', append a '$', and + // terminate the string with '\0'. + char* buffer = static_cast(malloc(len + 3)); + full_pattern_ = buffer; + + if (*regex != '^') + *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'. + + // We don't use snprintf or strncpy, as they trigger a warning when + // compiled with VC++ 8.0. + memcpy(buffer, regex, len); + buffer += len; + + if (len == 0 || regex[len - 1] != '$') + *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'. + + *buffer = '\0'; +} + +#endif // GTEST_USES_POSIX_RE + +const char kUnknownFile[] = "unknown file"; + +// Formats a source file path and a line number as they would appear +// in an error message from the compiler used to compile this code. +GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) { + const std::string file_name(file == NULL ? kUnknownFile : file); + + if (line < 0) { + return file_name + ":"; + } +#ifdef _MSC_VER + return file_name + "(" + StreamableToString(line) + "):"; +#else + return file_name + ":" + StreamableToString(line) + ":"; +#endif // _MSC_VER +} + +// Formats a file location for compiler-independent XML output. +// Although this function is not platform dependent, we put it next to +// FormatFileLocation in order to contrast the two functions. +// Note that FormatCompilerIndependentFileLocation() does NOT append colon +// to the file location it produces, unlike FormatFileLocation(). +GTEST_API_ ::std::string FormatCompilerIndependentFileLocation( + const char* file, int line) { + const std::string file_name(file == NULL ? kUnknownFile : file); + + if (line < 0) + return file_name; + else + return file_name + ":" + StreamableToString(line); +} + + +GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line) + : severity_(severity) { + const char* const marker = + severity == GTEST_INFO ? "[ INFO ]" : + severity == GTEST_WARNING ? "[WARNING]" : + severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]"; + GetStream() << ::std::endl << marker << " " + << FormatFileLocation(file, line).c_str() << ": "; +} + +// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. +GTestLog::~GTestLog() { + GetStream() << ::std::endl; + if (severity_ == GTEST_FATAL) { + fflush(stderr); + posix::Abort(); + } +} +// Disable Microsoft deprecation warnings for POSIX functions called from +// this class (creat, dup, dup2, and close) +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4996) +#endif // _MSC_VER + +#if GTEST_HAS_STREAM_REDIRECTION + +// Object that captures an output stream (stdout/stderr). +class CapturedStream { + public: + // The ctor redirects the stream to a temporary file. + explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) { +# if GTEST_OS_WINDOWS + char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT + char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT + + ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path); + const UINT success = ::GetTempFileNameA(temp_dir_path, + "gtest_redir", + 0, // Generate unique file name. + temp_file_path); + GTEST_CHECK_(success != 0) + << "Unable to create a temporary file in " << temp_dir_path; + const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE); + GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file " + << temp_file_path; + filename_ = temp_file_path; +# else + // There's no guarantee that a test has write access to the current + // directory, so we create the temporary file in the /tmp directory + // instead. We use /tmp on most systems, and /sdcard on Android. + // That's because Android doesn't have /tmp. +# if GTEST_OS_LINUX_ANDROID + // Note: Android applications are expected to call the framework's + // Context.getExternalStorageDirectory() method through JNI to get + // the location of the world-writable SD Card directory. However, + // this requires a Context handle, which cannot be retrieved + // globally from native code. Doing so also precludes running the + // code as part of a regular standalone executable, which doesn't + // run in a Dalvik process (e.g. when running it through 'adb shell'). + // + // The location /sdcard is directly accessible from native code + // and is the only location (unofficially) supported by the Android + // team. It's generally a symlink to the real SD Card mount point + // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or + // other OEM-customized locations. Never rely on these, and always + // use /sdcard. + char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX"; +# else + char name_template[] = "/tmp/captured_stream.XXXXXX"; +# endif // GTEST_OS_LINUX_ANDROID + /* coverity[secure_temp] */ + const int captured_fd = mkstemp(name_template); + GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file"; + filename_ = name_template; +# endif // GTEST_OS_WINDOWS + fflush(NULL); + dup2(captured_fd, fd_); + close(captured_fd); + } + + ~CapturedStream() { + (void) remove(filename_.c_str()); + if (uncaptured_fd_!= -1) { + close(uncaptured_fd_); + uncaptured_fd_ = -1; + } + } + + std::string GetCapturedString() { + if (uncaptured_fd_ != -1) { + // Restores the original stream. + fflush(NULL); + dup2(uncaptured_fd_, fd_); + close(uncaptured_fd_); + uncaptured_fd_ = -1; + } + + FILE* const file = posix::FOpen(filename_.c_str(), "r"); + const std::string content = ReadEntireFile(file); + posix::FClose(file); + return content; + } + + private: + // Reads the entire content of a file as an std::string. + static std::string ReadEntireFile(FILE* file); + + // Returns the size (in bytes) of a file. + static size_t GetFileSize(FILE* file); + + const int fd_; // A stream to capture. + int uncaptured_fd_; + // Name of the temporary file holding the stderr output. + ::std::string filename_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream); +}; + +// Returns the size (in bytes) of a file. +size_t CapturedStream::GetFileSize(FILE* file) { + (void) fseek(file, 0, SEEK_END); + long size = ftell(file); + GTEST_CHECK_(size >= 0) << "Unable to get file size"; + return static_cast(size); +} + +// Reads the entire content of a file as a string. +std::string CapturedStream::ReadEntireFile(FILE* file) { + const size_t file_size = GetFileSize(file); + char* const buffer = new char[file_size]; + + size_t bytes_last_read = 0; // # of bytes read in the last fread() + size_t bytes_read = 0; // # of bytes read so far + + fseek(file, 0, SEEK_SET); + + // Keeps reading the file until we cannot read further or the + // pre-determined file size is reached. + do { + bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file); + bytes_read += bytes_last_read; + } while (bytes_last_read > 0 && bytes_read < file_size); + + const std::string content(buffer, bytes_read); + delete[] buffer; + + return content; +} + +# ifdef _MSC_VER +# pragma warning(pop) +# endif // _MSC_VER + +static CapturedStream* g_captured_stderr = NULL; +static CapturedStream* g_captured_stdout = NULL; + +// Starts capturing an output stream (stdout/stderr). +void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) { + if (*stream != NULL) { + GTEST_LOG_(FATAL) << "Only one " << stream_name + << " capturer can exist at a time."; + } + *stream = new CapturedStream(fd); +} + +// Stops capturing the output stream and returns the captured string. +std::string GetCapturedStream(CapturedStream** captured_stream) { + const std::string content = (*captured_stream)->GetCapturedString(); + + delete *captured_stream; + *captured_stream = NULL; + + return content; +} + +// Starts capturing stdout. +void CaptureStdout() { + CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout); +} + +// Starts capturing stderr. +void CaptureStderr() { + CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr); +} + +// Stops capturing stdout and returns the captured string. +std::string GetCapturedStdout() { + return GetCapturedStream(&g_captured_stdout); +} + +// Stops capturing stderr and returns the captured string. +std::string GetCapturedStderr() { + return GetCapturedStream(&g_captured_stderr); +} + +#endif // GTEST_HAS_STREAM_REDIRECTION + +#if GTEST_HAS_DEATH_TEST + +// A copy of all command line arguments. Set by InitGoogleTest(). +::std::vector g_argvs; + +static const ::std::vector* g_injected_test_argvs = + NULL; // Owned. + +void SetInjectableArgvs(const ::std::vector* argvs) { + if (g_injected_test_argvs != argvs) + delete g_injected_test_argvs; + g_injected_test_argvs = argvs; +} + +const ::std::vector& GetInjectableArgvs() { + if (g_injected_test_argvs != NULL) { + return *g_injected_test_argvs; + } + return g_argvs; +} +#endif // GTEST_HAS_DEATH_TEST + +#if GTEST_OS_WINDOWS_MOBILE +namespace posix { +void Abort() { + DebugBreak(); + TerminateProcess(GetCurrentProcess(), 1); +} +} // namespace posix +#endif // GTEST_OS_WINDOWS_MOBILE + +// Returns the name of the environment variable corresponding to the +// given flag. For example, FlagToEnvVar("foo") will return +// "GTEST_FOO" in the open-source version. +static std::string FlagToEnvVar(const char* flag) { + const std::string full_flag = + (Message() << GTEST_FLAG_PREFIX_ << flag).GetString(); + + Message env_var; + for (size_t i = 0; i != full_flag.length(); i++) { + env_var << ToUpper(full_flag.c_str()[i]); + } + + return env_var.GetString(); +} + +// Parses 'str' for a 32-bit signed integer. If successful, writes +// the result to *value and returns true; otherwise leaves *value +// unchanged and returns false. +bool ParseInt32(const Message& src_text, const char* str, Int32* value) { + // Parses the environment variable as a decimal integer. + char* end = NULL; + const long long_value = strtol(str, &end, 10); // NOLINT + + // Has strtol() consumed all characters in the string? + if (*end != '\0') { + // No - an invalid character was encountered. + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value \"" << str << "\".\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + // Is the parsed value in the range of an Int32? + const Int32 result = static_cast(long_value); + if (long_value == LONG_MAX || long_value == LONG_MIN || + // The parsed value overflows as a long. (strtol() returns + // LONG_MAX or LONG_MIN when the input overflows.) + result != long_value + // The parsed value overflows as an Int32. + ) { + Message msg; + msg << "WARNING: " << src_text + << " is expected to be a 32-bit integer, but actually" + << " has value " << str << ", which overflows.\n"; + printf("%s", msg.GetString().c_str()); + fflush(stdout); + return false; + } + + *value = result; + return true; +} + +// Reads and returns the Boolean environment variable corresponding to +// the given flag; if it's not set, returns default_value. +// +// The value is considered true iff it's not "0". +bool BoolFromGTestEnv(const char* flag, bool default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = posix::GetEnv(env_var.c_str()); + return string_value == NULL ? + default_value : strcmp(string_value, "0") != 0; +} + +// Reads and returns a 32-bit integer stored in the environment +// variable corresponding to the given flag; if it isn't set or +// doesn't represent a valid 32-bit integer, returns default_value. +Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const string_value = posix::GetEnv(env_var.c_str()); + if (string_value == NULL) { + // The environment variable is not set. + return default_value; + } + + Int32 result = default_value; + if (!ParseInt32(Message() << "Environment variable " << env_var, + string_value, &result)) { + printf("The default value %s is used.\n", + (Message() << default_value).GetString().c_str()); + fflush(stdout); + return default_value; + } + + return result; +} + +// Reads and returns the string environment variable corresponding to +// the given flag; if it's not set, returns default_value. +const char* StringFromGTestEnv(const char* flag, const char* default_value) { + const std::string env_var = FlagToEnvVar(flag); + const char* const value = posix::GetEnv(env_var.c_str()); + return value == NULL ? default_value : value; +} + +} // namespace internal +} // namespace testing +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Test - The Google C++ Testing Framework +// +// This file implements a universal value printer that can print a +// value of any type T: +// +// void ::testing::internal::UniversalPrinter::Print(value, ostream_ptr); +// +// It uses the << operator when possible, and prints the bytes in the +// object otherwise. A user can override its behavior for a class +// type Foo by defining either operator<<(::std::ostream&, const Foo&) +// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that +// defines Foo. + +#include +#include +#include // NOLINT +#include + +namespace testing { + +namespace { + +using ::std::ostream; + +// Prints a segment of bytes in the given object. +void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start, + size_t count, ostream* os) { + char text[5] = ""; + for (size_t i = 0; i != count; i++) { + const size_t j = start + i; + if (i != 0) { + // Organizes the bytes into groups of 2 for easy parsing by + // human. + if ((j % 2) == 0) + *os << ' '; + else + *os << '-'; + } + GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]); + *os << text; + } +} + +// Prints the bytes in the given value to the given ostream. +void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count, + ostream* os) { + // Tells the user how big the object is. + *os << count << "-byte object <"; + + const size_t kThreshold = 132; + const size_t kChunkSize = 64; + // If the object size is bigger than kThreshold, we'll have to omit + // some details by printing only the first and the last kChunkSize + // bytes. + // TODO(wan): let the user control the threshold using a flag. + if (count < kThreshold) { + PrintByteSegmentInObjectTo(obj_bytes, 0, count, os); + } else { + PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os); + *os << " ... "; + // Rounds up to 2-byte boundary. + const size_t resume_pos = (count - kChunkSize + 1)/2*2; + PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os); + } + *os << ">"; +} + +} // namespace + +namespace internal2 { + +// Delegates to PrintBytesInObjectToImpl() to print the bytes in the +// given object. The delegation simplifies the implementation, which +// uses the << operator and thus is easier done outside of the +// ::testing::internal namespace, which contains a << operator that +// sometimes conflicts with the one in STL. +void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count, + ostream* os) { + PrintBytesInObjectToImpl(obj_bytes, count, os); +} + +} // namespace internal2 + +namespace internal { + +// Depending on the value of a char (or wchar_t), we print it in one +// of three formats: +// - as is if it's a printable ASCII (e.g. 'a', '2', ' '), +// - as a hexidecimal escape sequence (e.g. '\x7F'), or +// - as a special escape sequence (e.g. '\r', '\n'). +enum CharFormat { + kAsIs, + kHexEscape, + kSpecialEscape +}; + +// Returns true if c is a printable ASCII character. We test the +// value of c directly instead of calling isprint(), which is buggy on +// Windows Mobile. +inline bool IsPrintableAscii(wchar_t c) { + return 0x20 <= c && c <= 0x7E; +} + +// Prints a wide or narrow char c as a character literal without the +// quotes, escaping it when necessary; returns how c was formatted. +// The template argument UnsignedChar is the unsigned version of Char, +// which is the type of c. +template +static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) { + switch (static_cast(c)) { + case L'\0': + *os << "\\0"; + break; + case L'\'': + *os << "\\'"; + break; + case L'\\': + *os << "\\\\"; + break; + case L'\a': + *os << "\\a"; + break; + case L'\b': + *os << "\\b"; + break; + case L'\f': + *os << "\\f"; + break; + case L'\n': + *os << "\\n"; + break; + case L'\r': + *os << "\\r"; + break; + case L'\t': + *os << "\\t"; + break; + case L'\v': + *os << "\\v"; + break; + default: + if (IsPrintableAscii(c)) { + *os << static_cast(c); + return kAsIs; + } else { + *os << "\\x" + String::FormatHexInt(static_cast(c)); + return kHexEscape; + } + } + return kSpecialEscape; +} + +// Prints a wchar_t c as if it's part of a string literal, escaping it when +// necessary; returns how c was formatted. +static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) { + switch (c) { + case L'\'': + *os << "'"; + return kAsIs; + case L'"': + *os << "\\\""; + return kSpecialEscape; + default: + return PrintAsCharLiteralTo(c, os); + } +} + +// Prints a char c as if it's part of a string literal, escaping it when +// necessary; returns how c was formatted. +static CharFormat PrintAsStringLiteralTo(char c, ostream* os) { + return PrintAsStringLiteralTo( + static_cast(static_cast(c)), os); +} + +// Prints a wide or narrow character c and its code. '\0' is printed +// as "'\\0'", other unprintable characters are also properly escaped +// using the standard C++ escape sequence. The template argument +// UnsignedChar is the unsigned version of Char, which is the type of c. +template +void PrintCharAndCodeTo(Char c, ostream* os) { + // First, print c as a literal in the most readable form we can find. + *os << ((sizeof(c) > 1) ? "L'" : "'"); + const CharFormat format = PrintAsCharLiteralTo(c, os); + *os << "'"; + + // To aid user debugging, we also print c's code in decimal, unless + // it's 0 (in which case c was printed as '\\0', making the code + // obvious). + if (c == 0) + return; + *os << " (" << static_cast(c); + + // For more convenience, we print c's code again in hexidecimal, + // unless c was already printed in the form '\x##' or the code is in + // [1, 9]. + if (format == kHexEscape || (1 <= c && c <= 9)) { + // Do nothing. + } else { + *os << ", 0x" << String::FormatHexInt(static_cast(c)); + } + *os << ")"; +} + +void PrintTo(unsigned char c, ::std::ostream* os) { + PrintCharAndCodeTo(c, os); +} +void PrintTo(signed char c, ::std::ostream* os) { + PrintCharAndCodeTo(c, os); +} + +// Prints a wchar_t as a symbol if it is printable or as its internal +// code otherwise and also as its code. L'\0' is printed as "L'\\0'". +void PrintTo(wchar_t wc, ostream* os) { + PrintCharAndCodeTo(wc, os); +} + +// Prints the given array of characters to the ostream. CharType must be either +// char or wchar_t. +// The array starts at begin, the length is len, it may include '\0' characters +// and may not be NUL-terminated. +template +static void PrintCharsAsStringTo( + const CharType* begin, size_t len, ostream* os) { + const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\""; + *os << kQuoteBegin; + bool is_previous_hex = false; + for (size_t index = 0; index < len; ++index) { + const CharType cur = begin[index]; + if (is_previous_hex && IsXDigit(cur)) { + // Previous character is of '\x..' form and this character can be + // interpreted as another hexadecimal digit in its number. Break string to + // disambiguate. + *os << "\" " << kQuoteBegin; + } + is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape; + } + *os << "\""; +} + +// Prints a (const) char/wchar_t array of 'len' elements, starting at address +// 'begin'. CharType must be either char or wchar_t. +template +static void UniversalPrintCharArray( + const CharType* begin, size_t len, ostream* os) { + // The code + // const char kFoo[] = "foo"; + // generates an array of 4, not 3, elements, with the last one being '\0'. + // + // Therefore when printing a char array, we don't print the last element if + // it's '\0', such that the output matches the string literal as it's + // written in the source code. + if (len > 0 && begin[len - 1] == '\0') { + PrintCharsAsStringTo(begin, len - 1, os); + return; + } + + // If, however, the last element in the array is not '\0', e.g. + // const char kFoo[] = { 'f', 'o', 'o' }; + // we must print the entire array. We also print a message to indicate + // that the array is not NUL-terminated. + PrintCharsAsStringTo(begin, len, os); + *os << " (no terminating NUL)"; +} + +// Prints a (const) char array of 'len' elements, starting at address 'begin'. +void UniversalPrintArray(const char* begin, size_t len, ostream* os) { + UniversalPrintCharArray(begin, len, os); +} + +// Prints a (const) wchar_t array of 'len' elements, starting at address +// 'begin'. +void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) { + UniversalPrintCharArray(begin, len, os); +} + +// Prints the given C string to the ostream. +void PrintTo(const char* s, ostream* os) { + if (s == NULL) { + *os << "NULL"; + } else { + *os << ImplicitCast_(s) << " pointing to "; + PrintCharsAsStringTo(s, strlen(s), os); + } +} + +// MSVC compiler can be configured to define whar_t as a typedef +// of unsigned short. Defining an overload for const wchar_t* in that case +// would cause pointers to unsigned shorts be printed as wide strings, +// possibly accessing more memory than intended and causing invalid +// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when +// wchar_t is implemented as a native type. +#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) +// Prints the given wide C string to the ostream. +void PrintTo(const wchar_t* s, ostream* os) { + if (s == NULL) { + *os << "NULL"; + } else { + *os << ImplicitCast_(s) << " pointing to "; + PrintCharsAsStringTo(s, wcslen(s), os); + } +} +#endif // wchar_t is native + +// Prints a ::string object. +#if GTEST_HAS_GLOBAL_STRING +void PrintStringTo(const ::string& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_GLOBAL_STRING + +void PrintStringTo(const ::std::string& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} + +// Prints a ::wstring object. +#if GTEST_HAS_GLOBAL_WSTRING +void PrintWideStringTo(const ::wstring& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +#if GTEST_HAS_STD_WSTRING +void PrintWideStringTo(const ::std::wstring& s, ostream* os) { + PrintCharsAsStringTo(s.data(), s.size(), os); +} +#endif // GTEST_HAS_STD_WSTRING + +} // namespace internal + +} // namespace testing +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// +// The Google C++ Testing Framework (Google Test) + + +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION_ 1 +#undef GTEST_IMPLEMENTATION_ + +namespace testing { + +using internal::GetUnitTestImpl; + +// Gets the summary of the failure message by omitting the stack trace +// in it. +std::string TestPartResult::ExtractSummary(const char* message) { + const char* const stack_trace = strstr(message, internal::kStackTraceMarker); + return stack_trace == NULL ? message : + std::string(message, stack_trace); +} + +// Prints a TestPartResult object. +std::ostream& operator<<(std::ostream& os, const TestPartResult& result) { + return os + << result.file_name() << ":" << result.line_number() << ": " + << (result.type() == TestPartResult::kSuccess ? "Success" : + result.type() == TestPartResult::kFatalFailure ? "Fatal failure" : + "Non-fatal failure") << ":\n" + << result.message() << std::endl; +} + +// Appends a TestPartResult to the array. +void TestPartResultArray::Append(const TestPartResult& result) { + array_.push_back(result); +} + +// Returns the TestPartResult at the given index (0-based). +const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const { + if (index < 0 || index >= size()) { + printf("\nInvalid index (%d) into TestPartResultArray.\n", index); + internal::posix::Abort(); + } + + return array_[index]; +} + +// Returns the number of TestPartResult objects in the array. +int TestPartResultArray::size() const { + return static_cast(array_.size()); +} + +namespace internal { + +HasNewFatalFailureHelper::HasNewFatalFailureHelper() + : has_new_fatal_failure_(false), + original_reporter_(GetUnitTestImpl()-> + GetTestPartResultReporterForCurrentThread()) { + GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this); +} + +HasNewFatalFailureHelper::~HasNewFatalFailureHelper() { + GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( + original_reporter_); +} + +void HasNewFatalFailureHelper::ReportTestPartResult( + const TestPartResult& result) { + if (result.fatally_failed()) + has_new_fatal_failure_ = true; + original_reporter_->ReportTestPartResult(result); +} + +} // namespace internal + +} // namespace testing +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + + +namespace testing { +namespace internal { + +#if GTEST_HAS_TYPED_TEST_P + +// Skips to the first non-space char in str. Returns an empty string if str +// contains only whitespace characters. +static const char* SkipSpaces(const char* str) { + while (IsSpace(*str)) + str++; + return str; +} + +// Verifies that registered_tests match the test names in +// defined_test_names_; returns registered_tests if successful, or +// aborts the program otherwise. +const char* TypedTestCasePState::VerifyRegisteredTestNames( + const char* file, int line, const char* registered_tests) { + typedef ::std::set::const_iterator DefinedTestIter; + registered_ = true; + + // Skip initial whitespace in registered_tests since some + // preprocessors prefix stringizied literals with whitespace. + registered_tests = SkipSpaces(registered_tests); + + Message errors; + ::std::set tests; + for (const char* names = registered_tests; names != NULL; + names = SkipComma(names)) { + const std::string name = GetPrefixUntilComma(names); + if (tests.count(name) != 0) { + errors << "Test " << name << " is listed more than once.\n"; + continue; + } + + bool found = false; + for (DefinedTestIter it = defined_test_names_.begin(); + it != defined_test_names_.end(); + ++it) { + if (name == *it) { + found = true; + break; + } + } + + if (found) { + tests.insert(name); + } else { + errors << "No test named " << name + << " can be found in this test case.\n"; + } + } + + for (DefinedTestIter it = defined_test_names_.begin(); + it != defined_test_names_.end(); + ++it) { + if (tests.count(*it) == 0) { + errors << "You forgot to list test " << *it << ".\n"; + } + } + + const std::string& errors_str = errors.GetString(); + if (errors_str != "") { + fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(), + errors_str.c_str()); + fflush(stderr); + posix::Abort(); + } + + return registered_tests; +} + +#endif // GTEST_HAS_TYPED_TEST_P + +} // namespace internal +} // namespace testing diff --git a/test/gtest/common/gtest.h b/test/gtest/common/gtest.h new file mode 100644 index 0000000..6c12067 --- /dev/null +++ b/test/gtest/common/gtest.h @@ -0,0 +1,20062 @@ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines the public API for Google Test. It should be +// included by any test program that uses Google Test. +// +// IMPORTANT NOTE: Due to limitation of the C++ language, we have to +// leave some internal implementation details in this header file. +// They are clearly marked by comments like this: +// +// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +// +// Such code is NOT meant to be used by a user directly, and is subject +// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user +// program! +// +// Acknowledgment: Google Test borrowed the idea of automatic test +// registration from Barthelemy Dagenais' (barthelemy@prologique.com) +// easyUnit framework. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_H_ + +#include +#include +#include + +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file declares functions and macros used internally by +// Google Test. They are subject to change without notice. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ + +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan) +// +// Low-level types and utilities for porting Google Test to various +// platforms. They are subject to change without notice. DO NOT USE +// THEM IN USER CODE. +// +// This file is fundamental to Google Test. All other Google Test source +// files are expected to #include this. Therefore, it cannot #include +// any other Google Test header. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ + +// The user can define the following macros in the build script to +// control Google Test's behavior. If the user doesn't define a macro +// in this list, Google Test will define it. +// +// GTEST_HAS_CLONE - Define it to 1/0 to indicate that clone(2) +// is/isn't available. +// GTEST_HAS_EXCEPTIONS - Define it to 1/0 to indicate that exceptions +// are enabled. +// GTEST_HAS_GLOBAL_STRING - Define it to 1/0 to indicate that ::string +// is/isn't available (some systems define +// ::string, which is different to std::string). +// GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string +// is/isn't available (some systems define +// ::wstring, which is different to std::wstring). +// GTEST_HAS_POSIX_RE - Define it to 1/0 to indicate that POSIX regular +// expressions are/aren't available. +// GTEST_HAS_PTHREAD - Define it to 1/0 to indicate that +// is/isn't available. +// GTEST_HAS_RTTI - Define it to 1/0 to indicate that RTTI is/isn't +// enabled. +// GTEST_HAS_STD_WSTRING - Define it to 1/0 to indicate that +// std::wstring does/doesn't work (Google Test can +// be used where std::wstring is unavailable). +// GTEST_HAS_TR1_TUPLE - Define it to 1/0 to indicate tr1::tuple +// is/isn't available. +// GTEST_HAS_SEH - Define it to 1/0 to indicate whether the +// compiler supports Microsoft's "Structured +// Exception Handling". +// GTEST_HAS_STREAM_REDIRECTION +// - Define it to 1/0 to indicate whether the +// platform supports I/O stream redirection using +// dup() and dup2(). +// GTEST_USE_OWN_TR1_TUPLE - Define it to 1/0 to indicate whether Google +// Test's own tr1 tuple implementation should be +// used. Unused when the user sets +// GTEST_HAS_TR1_TUPLE to 0. +// GTEST_LANG_CXX11 - Define it to 1/0 to indicate that Google Test +// is building in C++11/C++98 mode. +// GTEST_LINKED_AS_SHARED_LIBRARY +// - Define to 1 when compiling tests that use +// Google Test as a shared library (known as +// DLL on Windows). +// GTEST_CREATE_SHARED_LIBRARY +// - Define to 1 when compiling Google Test itself +// as a shared library. + +// This header defines the following utilities: +// +// Macros indicating the current platform (defined to 1 if compiled on +// the given platform; otherwise undefined): +// GTEST_OS_AIX - IBM AIX +// GTEST_OS_CYGWIN - Cygwin +// GTEST_OS_HPUX - HP-UX +// GTEST_OS_LINUX - Linux +// GTEST_OS_LINUX_ANDROID - Google Android +// GTEST_OS_MAC - Mac OS X +// GTEST_OS_IOS - iOS +// GTEST_OS_IOS_SIMULATOR - iOS simulator +// GTEST_OS_NACL - Google Native Client (NaCl) +// GTEST_OS_OPENBSD - OpenBSD +// GTEST_OS_QNX - QNX +// GTEST_OS_SOLARIS - Sun Solaris +// GTEST_OS_SYMBIAN - Symbian +// GTEST_OS_WINDOWS - Windows (Desktop, MinGW, or Mobile) +// GTEST_OS_WINDOWS_DESKTOP - Windows Desktop +// GTEST_OS_WINDOWS_MINGW - MinGW +// GTEST_OS_WINDOWS_MOBILE - Windows Mobile +// GTEST_OS_ZOS - z/OS +// +// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the +// most stable support. Since core members of the Google Test project +// don't have access to other platforms, support for them may be less +// stable. If you notice any problems on your platform, please notify +// googletestframework@googlegroups.com (patches for fixing them are +// even more welcome!). +// +// Note that it is possible that none of the GTEST_OS_* macros are defined. +// +// Macros indicating available Google Test features (defined to 1 if +// the corresponding feature is supported; otherwise undefined): +// GTEST_HAS_COMBINE - the Combine() function (for value-parameterized +// tests) +// GTEST_HAS_DEATH_TEST - death tests +// GTEST_HAS_PARAM_TEST - value-parameterized tests +// GTEST_HAS_TYPED_TEST - typed tests +// GTEST_HAS_TYPED_TEST_P - type-parameterized tests +// GTEST_USES_POSIX_RE - enhanced POSIX regex is used. Do not confuse with +// GTEST_HAS_POSIX_RE (see above) which users can +// define themselves. +// GTEST_USES_SIMPLE_RE - our own simple regex is used; +// the above two are mutually exclusive. +// GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ(). +// +// Macros for basic C++ coding: +// GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning. +// GTEST_ATTRIBUTE_UNUSED_ - declares that a class' instances or a +// variable don't have to be used. +// GTEST_DISALLOW_ASSIGN_ - disables operator=. +// GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=. +// GTEST_MUST_USE_RESULT_ - declares that a function's result must be used. +// +// Synchronization: +// Mutex, MutexLock, ThreadLocal, GetThreadCount() +// - synchronization primitives. +// GTEST_IS_THREADSAFE - defined to 1 to indicate that the above +// synchronization primitives have real implementations +// and Google Test is thread-safe; or 0 otherwise. +// +// Template meta programming: +// is_pointer - as in TR1; needed on Symbian and IBM XL C/C++ only. +// IteratorTraits - partial implementation of std::iterator_traits, which +// is not available in libCstd when compiled with Sun C++. +// +// Smart pointers: +// scoped_ptr - as in TR2. +// +// Regular expressions: +// RE - a simple regular expression class using the POSIX +// Extended Regular Expression syntax on UNIX-like +// platforms, or a reduced regular exception syntax on +// other platforms, including Windows. +// +// Logging: +// GTEST_LOG_() - logs messages at the specified severity level. +// LogToStderr() - directs all log messages to stderr. +// FlushInfoLog() - flushes informational log messages. +// +// Stdout and stderr capturing: +// CaptureStdout() - starts capturing stdout. +// GetCapturedStdout() - stops capturing stdout and returns the captured +// string. +// CaptureStderr() - starts capturing stderr. +// GetCapturedStderr() - stops capturing stderr and returns the captured +// string. +// +// Integer types: +// TypeWithSize - maps an integer to a int type. +// Int32, UInt32, Int64, UInt64, TimeInMillis +// - integers of known sizes. +// BiggestInt - the biggest signed integer type. +// +// Command-line utilities: +// GTEST_FLAG() - references a flag. +// GTEST_DECLARE_*() - declares a flag. +// GTEST_DEFINE_*() - defines a flag. +// GetInjectableArgvs() - returns the command line as a vector of strings. +// +// Environment variable utilities: +// GetEnv() - gets the value of an environment variable. +// BoolFromGTestEnv() - parses a bool environment variable. +// Int32FromGTestEnv() - parses an Int32 environment variable. +// StringFromGTestEnv() - parses a string environment variable. + +#include // for isspace, etc +#include // for ptrdiff_t +#include +#include +#include +#ifndef _WIN32_WCE +# include +# include +#endif // !_WIN32_WCE + +#if defined __APPLE__ +# include +# include +#endif + +#include // NOLINT +#include // NOLINT +#include // NOLINT + +#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com" +#define GTEST_FLAG_PREFIX_ "gtest_" +#define GTEST_FLAG_PREFIX_DASH_ "gtest-" +#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_" +#define GTEST_NAME_ "Google Test" +#define GTEST_PROJECT_URL_ "http://code.google.com/p/googletest/" + +// Determines the version of gcc that is used to compile this. +#ifdef __GNUC__ +// 40302 means version 4.3.2. +# define GTEST_GCC_VER_ \ + (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) +#endif // __GNUC__ + +// Determines the platform on which Google Test is compiled. +#ifdef __CYGWIN__ +# define GTEST_OS_CYGWIN 1 +#elif defined __SYMBIAN32__ +# define GTEST_OS_SYMBIAN 1 +#elif defined _WIN32 +# define GTEST_OS_WINDOWS 1 +# ifdef _WIN32_WCE +# define GTEST_OS_WINDOWS_MOBILE 1 +# elif defined(__MINGW__) || defined(__MINGW32__) +# define GTEST_OS_WINDOWS_MINGW 1 +# else +# define GTEST_OS_WINDOWS_DESKTOP 1 +# endif // _WIN32_WCE +#elif defined __APPLE__ +# define GTEST_OS_MAC 1 +# if TARGET_OS_IPHONE +# define GTEST_OS_IOS 1 +# if TARGET_IPHONE_SIMULATOR +# define GTEST_OS_IOS_SIMULATOR 1 +# endif +# endif +#elif defined __linux__ +# define GTEST_OS_LINUX 1 +# if defined __ANDROID__ +# define GTEST_OS_LINUX_ANDROID 1 +# endif +#elif defined __MVS__ +# define GTEST_OS_ZOS 1 +#elif defined(__sun) && defined(__SVR4) +# define GTEST_OS_SOLARIS 1 +#elif defined(_AIX) +# define GTEST_OS_AIX 1 +#elif defined(__hpux) +# define GTEST_OS_HPUX 1 +#elif defined __native_client__ +# define GTEST_OS_NACL 1 +#elif defined __OpenBSD__ +# define GTEST_OS_OPENBSD 1 +#elif defined __QNX__ +# define GTEST_OS_QNX 1 +#endif // __CYGWIN__ + +#ifndef GTEST_LANG_CXX11 +// gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when +// -std={c,gnu}++{0x,11} is passed. The C++11 standard specifies a +// value for __cplusplus, and recent versions of clang, gcc, and +// probably other compilers set that too in C++11 mode. +# if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L +// Compiling in at least C++11 mode. +# define GTEST_LANG_CXX11 1 +# else +# define GTEST_LANG_CXX11 0 +# endif +#endif + +// Brings in definitions for functions used in the testing::internal::posix +// namespace (read, write, close, chdir, isatty, stat). We do not currently +// use them on Windows Mobile. +#if !GTEST_OS_WINDOWS +// This assumes that non-Windows OSes provide unistd.h. For OSes where this +// is not the case, we need to include headers that provide the functions +// mentioned above. +# include +# include +#elif !GTEST_OS_WINDOWS_MOBILE +# include +# include +#endif + +#if GTEST_OS_LINUX_ANDROID +// Used to define __ANDROID_API__ matching the target NDK API level. +# include // NOLINT +#endif + +// Defines this to true iff Google Test can use POSIX regular expressions. +#ifndef GTEST_HAS_POSIX_RE +# if GTEST_OS_LINUX_ANDROID +// On Android, is only available starting with Gingerbread. +# define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9) +# else +# define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS) +# endif +#endif + +#if GTEST_HAS_POSIX_RE + +// On some platforms, needs someone to define size_t, and +// won't compile otherwise. We can #include it here as we already +// included , which is guaranteed to define size_t through +// . +# include // NOLINT + +# define GTEST_USES_POSIX_RE 1 + +#elif GTEST_OS_WINDOWS + +// is not available on Windows. Use our own simple regex +// implementation instead. +# define GTEST_USES_SIMPLE_RE 1 + +#else + +// may not be available on this platform. Use our own +// simple regex implementation instead. +# define GTEST_USES_SIMPLE_RE 1 + +#endif // GTEST_HAS_POSIX_RE + +#ifndef GTEST_HAS_EXCEPTIONS +// The user didn't tell us whether exceptions are enabled, so we need +// to figure it out. +# if defined(_MSC_VER) || defined(__BORLANDC__) +// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS +// macro to enable exceptions, so we'll do the same. +// Assumes that exceptions are enabled by default. +# ifndef _HAS_EXCEPTIONS +# define _HAS_EXCEPTIONS 1 +# endif // _HAS_EXCEPTIONS +# define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS +# elif defined(__GNUC__) && __EXCEPTIONS +// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled. +# define GTEST_HAS_EXCEPTIONS 1 +# elif defined(__SUNPRO_CC) +// Sun Pro CC supports exceptions. However, there is no compile-time way of +// detecting whether they are enabled or not. Therefore, we assume that +// they are enabled unless the user tells us otherwise. +# define GTEST_HAS_EXCEPTIONS 1 +# elif defined(__IBMCPP__) && __EXCEPTIONS +// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled. +# define GTEST_HAS_EXCEPTIONS 1 +# elif defined(__HP_aCC) +// Exception handling is in effect by default in HP aCC compiler. It has to +// be turned of by +noeh compiler option if desired. +# define GTEST_HAS_EXCEPTIONS 1 +# else +// For other compilers, we assume exceptions are disabled to be +// conservative. +# define GTEST_HAS_EXCEPTIONS 0 +# endif // defined(_MSC_VER) || defined(__BORLANDC__) +#endif // GTEST_HAS_EXCEPTIONS + +#if !defined(GTEST_HAS_STD_STRING) +// Even though we don't use this macro any longer, we keep it in case +// some clients still depend on it. +# define GTEST_HAS_STD_STRING 1 +#elif !GTEST_HAS_STD_STRING +// The user told us that ::std::string isn't available. +# error "Google Test cannot be used where ::std::string isn't available." +#endif // !defined(GTEST_HAS_STD_STRING) + +#ifndef GTEST_HAS_GLOBAL_STRING +// The user didn't tell us whether ::string is available, so we need +// to figure it out. + +# define GTEST_HAS_GLOBAL_STRING 0 + +#endif // GTEST_HAS_GLOBAL_STRING + +#ifndef GTEST_HAS_STD_WSTRING +// The user didn't tell us whether ::std::wstring is available, so we need +// to figure it out. +// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring +// is available. + +// Cygwin 1.7 and below doesn't support ::std::wstring. +// Solaris' libc++ doesn't support it either. Android has +// no support for it at least as recent as Froyo (2.2). +# define GTEST_HAS_STD_WSTRING \ + (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS)) + +#endif // GTEST_HAS_STD_WSTRING + +#ifndef GTEST_HAS_GLOBAL_WSTRING +// The user didn't tell us whether ::wstring is available, so we need +// to figure it out. +# define GTEST_HAS_GLOBAL_WSTRING \ + (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING) +#endif // GTEST_HAS_GLOBAL_WSTRING + +// Determines whether RTTI is available. +#ifndef GTEST_HAS_RTTI +// The user didn't tell us whether RTTI is enabled, so we need to +// figure it out. + +# ifdef _MSC_VER + +# ifdef _CPPRTTI // MSVC defines this macro iff RTTI is enabled. +# define GTEST_HAS_RTTI 1 +# else +# define GTEST_HAS_RTTI 0 +# endif + +// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled. +# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302) + +# ifdef __GXX_RTTI +// When building against STLport with the Android NDK and with +// -frtti -fno-exceptions, the build fails at link time with undefined +// references to __cxa_bad_typeid. Note sure if STL or toolchain bug, +// so disable RTTI when detected. +# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && \ + !defined(__EXCEPTIONS) +# define GTEST_HAS_RTTI 0 +# else +# define GTEST_HAS_RTTI 1 +# endif // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS +# else +# define GTEST_HAS_RTTI 0 +# endif // __GXX_RTTI + +// Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends +// using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the +// first version with C++ support. +# elif defined(__clang__) + +# define GTEST_HAS_RTTI __has_feature(cxx_rtti) + +// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if +// both the typeid and dynamic_cast features are present. +# elif defined(__IBMCPP__) && (__IBMCPP__ >= 900) + +# ifdef __RTTI_ALL__ +# define GTEST_HAS_RTTI 1 +# else +# define GTEST_HAS_RTTI 0 +# endif + +# else + +// For all other compilers, we assume RTTI is enabled. +# define GTEST_HAS_RTTI 1 + +# endif // _MSC_VER + +#endif // GTEST_HAS_RTTI + +// It's this header's responsibility to #include when RTTI +// is enabled. +#if GTEST_HAS_RTTI +# include +#endif + +// Determines whether Google Test can use the pthreads library. +#ifndef GTEST_HAS_PTHREAD +// The user didn't tell us explicitly, so we assume pthreads support is +// available on Linux and Mac. +// +// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0 +// to your compiler flags. +# define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX \ + || GTEST_OS_QNX) +#endif // GTEST_HAS_PTHREAD + +#if GTEST_HAS_PTHREAD +// gtest-port.h guarantees to #include when GTEST_HAS_PTHREAD is +// true. +# include // NOLINT + +// For timespec and nanosleep, used below. +# include // NOLINT +#endif + +// Determines whether Google Test can use tr1/tuple. You can define +// this macro to 0 to prevent Google Test from using tuple (any +// feature depending on tuple with be disabled in this mode). +#ifndef GTEST_HAS_TR1_TUPLE +# if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) +// STLport, provided with the Android NDK, has neither or . +# define GTEST_HAS_TR1_TUPLE 0 +# else +// The user didn't tell us not to do it, so we assume it's OK. +# define GTEST_HAS_TR1_TUPLE 1 +# endif +#endif // GTEST_HAS_TR1_TUPLE + +// Determines whether Google Test's own tr1 tuple implementation +// should be used. +#ifndef GTEST_USE_OWN_TR1_TUPLE +// The user didn't tell us, so we need to figure it out. + +// We use our own TR1 tuple if we aren't sure the user has an +// implementation of it already. At this time, libstdc++ 4.0.0+ and +// MSVC 2010 are the only mainstream standard libraries that come +// with a TR1 tuple implementation. NVIDIA's CUDA NVCC compiler +// pretends to be GCC by defining __GNUC__ and friends, but cannot +// compile GCC's tuple implementation. MSVC 2008 (9.0) provides TR1 +// tuple in a 323 MB Feature Pack download, which we cannot assume the +// user has. QNX's QCC compiler is a modified GCC but it doesn't +// support TR1 tuple. libc++ only provides std::tuple, in C++11 mode, +// and it can be used with some compilers that define __GNUC__. +# if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \ + && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) || _MSC_VER >= 1600 +# define GTEST_ENV_HAS_TR1_TUPLE_ 1 +# endif + +// C++11 specifies that provides std::tuple. Use that if gtest is used +// in C++11 mode and libstdc++ isn't very old (binaries targeting OS X 10.6 +// can build with clang but need to use gcc4.2's libstdc++). +# if GTEST_LANG_CXX11 && (!defined(__GLIBCXX__) || __GLIBCXX__ > 20110325) +# define GTEST_ENV_HAS_STD_TUPLE_ 1 +# endif + +# if GTEST_ENV_HAS_TR1_TUPLE_ || GTEST_ENV_HAS_STD_TUPLE_ +# define GTEST_USE_OWN_TR1_TUPLE 0 +# else +# define GTEST_USE_OWN_TR1_TUPLE 1 +# endif + +#endif // GTEST_USE_OWN_TR1_TUPLE + +// To avoid conditional compilation everywhere, we make it +// gtest-port.h's responsibility to #include the header implementing +// tr1/tuple. +#if GTEST_HAS_TR1_TUPLE + +# if GTEST_USE_OWN_TR1_TUPLE +// This file was GENERATED by command: +// pump.py gtest-tuple.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2009 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Implements a subset of TR1 tuple needed by Google Test and Google Mock. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ + +#include // For ::std::pair. + +// The compiler used in Symbian has a bug that prevents us from declaring the +// tuple template as a friend (it complains that tuple is redefined). This +// hack bypasses the bug by declaring the members that should otherwise be +// private as public. +// Sun Studio versions < 12 also have the above bug. +#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590) +# define GTEST_DECLARE_TUPLE_AS_FRIEND_ public: +#else +# define GTEST_DECLARE_TUPLE_AS_FRIEND_ \ + template friend class tuple; \ + private: +#endif + +// GTEST_n_TUPLE_(T) is the type of an n-tuple. +#define GTEST_0_TUPLE_(T) tuple<> +#define GTEST_1_TUPLE_(T) tuple +#define GTEST_2_TUPLE_(T) tuple +#define GTEST_3_TUPLE_(T) tuple +#define GTEST_4_TUPLE_(T) tuple +#define GTEST_5_TUPLE_(T) tuple +#define GTEST_6_TUPLE_(T) tuple +#define GTEST_7_TUPLE_(T) tuple +#define GTEST_8_TUPLE_(T) tuple +#define GTEST_9_TUPLE_(T) tuple +#define GTEST_10_TUPLE_(T) tuple + +// GTEST_n_TYPENAMES_(T) declares a list of n typenames. +#define GTEST_0_TYPENAMES_(T) +#define GTEST_1_TYPENAMES_(T) typename T##0 +#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1 +#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2 +#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3 +#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4 +#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5 +#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5, typename T##6 +#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5, typename T##6, typename T##7 +#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5, typename T##6, \ + typename T##7, typename T##8 +#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ + typename T##3, typename T##4, typename T##5, typename T##6, \ + typename T##7, typename T##8, typename T##9 + +// In theory, defining stuff in the ::std namespace is undefined +// behavior. We can do this as we are playing the role of a standard +// library vendor. +namespace std { +namespace tr1 { + +template +class tuple; + +// Anything in namespace gtest_internal is Google Test's INTERNAL +// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code. +namespace gtest_internal { + +// ByRef::type is T if T is a reference; otherwise it's const T&. +template +struct ByRef { typedef const T& type; }; // NOLINT +template +struct ByRef { typedef T& type; }; // NOLINT + +// A handy wrapper for ByRef. +#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef::type + +// AddRef::type is T if T is a reference; otherwise it's T&. This +// is the same as tr1::add_reference::type. +template +struct AddRef { typedef T& type; }; // NOLINT +template +struct AddRef { typedef T& type; }; // NOLINT + +// A handy wrapper for AddRef. +#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef::type + +// A helper for implementing get(). +template class Get; + +// A helper for implementing tuple_element. kIndexValid is true +// iff k < the number of fields in tuple type T. +template +struct TupleElement; + +template +struct TupleElement { + typedef T0 type; +}; + +template +struct TupleElement { + typedef T1 type; +}; + +template +struct TupleElement { + typedef T2 type; +}; + +template +struct TupleElement { + typedef T3 type; +}; + +template +struct TupleElement { + typedef T4 type; +}; + +template +struct TupleElement { + typedef T5 type; +}; + +template +struct TupleElement { + typedef T6 type; +}; + +template +struct TupleElement { + typedef T7 type; +}; + +template +struct TupleElement { + typedef T8 type; +}; + +template +struct TupleElement { + typedef T9 type; +}; + +} // namespace gtest_internal + +template <> +class tuple<> { + public: + tuple() {} + tuple(const tuple& /* t */) {} + tuple& operator=(const tuple& /* t */) { return *this; } +}; + +template +class GTEST_1_TUPLE_(T) { + public: + template friend class gtest_internal::Get; + + tuple() : f0_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {} + + tuple(const tuple& t) : f0_(t.f0_) {} + + template + tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_1_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) { + f0_ = t.f0_; + return *this; + } + + T0 f0_; +}; + +template +class GTEST_2_TUPLE_(T) { + public: + template friend class gtest_internal::Get; + + tuple() : f0_(), f1_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0), + f1_(f1) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {} + + template + tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {} + template + tuple(const ::std::pair& p) : f0_(p.first), f1_(p.second) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_2_TUPLE_(U)& t) { + return CopyFrom(t); + } + template + tuple& operator=(const ::std::pair& p) { + f0_ = p.first; + f1_ = p.second; + return *this; + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + return *this; + } + + T0 f0_; + T1 f1_; +}; + +template +class GTEST_3_TUPLE_(T) { + public: + template friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {} + + template + tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_3_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; +}; + +template +class GTEST_4_TUPLE_(T) { + public: + template friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2), + f3_(f3) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {} + + template + tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_4_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; +}; + +template +class GTEST_5_TUPLE_(T) { + public: + template friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, + GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_) {} + + template + tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_5_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; +}; + +template +class GTEST_6_TUPLE_(T) { + public: + template friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), + f5_(f5) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_) {} + + template + tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_6_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; +}; + +template +class GTEST_7_TUPLE_(T) { + public: + template friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2), + f3_(f3), f4_(f4), f5_(f5), f6_(f6) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {} + + template + tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_7_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + f6_ = t.f6_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; + T6 f6_; +}; + +template +class GTEST_8_TUPLE_(T) { + public: + template friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, + GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), + f5_(f5), f6_(f6), f7_(f7) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {} + + template + tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_8_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + f6_ = t.f6_; + f7_ = t.f7_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; + T6 f6_; + T7 f7_; +}; + +template +class GTEST_9_TUPLE_(T) { + public: + template friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7, + GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), + f5_(f5), f6_(f6), f7_(f7), f8_(f8) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {} + + template + tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_9_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + f6_ = t.f6_; + f7_ = t.f7_; + f8_ = t.f8_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; + T6 f6_; + T7 f7_; + T8 f8_; +}; + +template +class tuple { + public: + template friend class gtest_internal::Get; + + tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(), + f9_() {} + + explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, + GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, + GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7, + GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2), + f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {} + + tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), + f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {} + + template + tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), + f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), + f9_(t.f9_) {} + + tuple& operator=(const tuple& t) { return CopyFrom(t); } + + template + tuple& operator=(const GTEST_10_TUPLE_(U)& t) { + return CopyFrom(t); + } + + GTEST_DECLARE_TUPLE_AS_FRIEND_ + + template + tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) { + f0_ = t.f0_; + f1_ = t.f1_; + f2_ = t.f2_; + f3_ = t.f3_; + f4_ = t.f4_; + f5_ = t.f5_; + f6_ = t.f6_; + f7_ = t.f7_; + f8_ = t.f8_; + f9_ = t.f9_; + return *this; + } + + T0 f0_; + T1 f1_; + T2 f2_; + T3 f3_; + T4 f4_; + T5 f5_; + T6 f6_; + T7 f7_; + T8 f8_; + T9 f9_; +}; + +// 6.1.3.2 Tuple creation functions. + +// Known limitations: we don't support passing an +// std::tr1::reference_wrapper to make_tuple(). And we don't +// implement tie(). + +inline tuple<> make_tuple() { return tuple<>(); } + +template +inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) { + return GTEST_1_TUPLE_(T)(f0); +} + +template +inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) { + return GTEST_2_TUPLE_(T)(f0, f1); +} + +template +inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) { + return GTEST_3_TUPLE_(T)(f0, f1, f2); +} + +template +inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3) { + return GTEST_4_TUPLE_(T)(f0, f1, f2, f3); +} + +template +inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4) { + return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4); +} + +template +inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5) { + return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5); +} + +template +inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5, const T6& f6) { + return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6); +} + +template +inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) { + return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7); +} + +template +inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7, + const T8& f8) { + return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8); +} + +template +inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, + const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7, + const T8& f8, const T9& f9) { + return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9); +} + +// 6.1.3.3 Tuple helper classes. + +template struct tuple_size; + +template +struct tuple_size { + static const int value = 0; +}; + +template +struct tuple_size { + static const int value = 1; +}; + +template +struct tuple_size { + static const int value = 2; +}; + +template +struct tuple_size { + static const int value = 3; +}; + +template +struct tuple_size { + static const int value = 4; +}; + +template +struct tuple_size { + static const int value = 5; +}; + +template +struct tuple_size { + static const int value = 6; +}; + +template +struct tuple_size { + static const int value = 7; +}; + +template +struct tuple_size { + static const int value = 8; +}; + +template +struct tuple_size { + static const int value = 9; +}; + +template +struct tuple_size { + static const int value = 10; +}; + +template +struct tuple_element { + typedef typename gtest_internal::TupleElement< + k < (tuple_size::value), k, Tuple>::type type; +}; + +#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element::type + +// 6.1.3.4 Element access. + +namespace gtest_internal { + +template <> +class Get<0> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple)) + Field(Tuple& t) { return t.f0_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple)) + ConstField(const Tuple& t) { return t.f0_; } +}; + +template <> +class Get<1> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple)) + Field(Tuple& t) { return t.f1_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple)) + ConstField(const Tuple& t) { return t.f1_; } +}; + +template <> +class Get<2> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple)) + Field(Tuple& t) { return t.f2_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple)) + ConstField(const Tuple& t) { return t.f2_; } +}; + +template <> +class Get<3> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple)) + Field(Tuple& t) { return t.f3_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple)) + ConstField(const Tuple& t) { return t.f3_; } +}; + +template <> +class Get<4> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple)) + Field(Tuple& t) { return t.f4_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple)) + ConstField(const Tuple& t) { return t.f4_; } +}; + +template <> +class Get<5> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple)) + Field(Tuple& t) { return t.f5_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple)) + ConstField(const Tuple& t) { return t.f5_; } +}; + +template <> +class Get<6> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple)) + Field(Tuple& t) { return t.f6_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple)) + ConstField(const Tuple& t) { return t.f6_; } +}; + +template <> +class Get<7> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple)) + Field(Tuple& t) { return t.f7_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple)) + ConstField(const Tuple& t) { return t.f7_; } +}; + +template <> +class Get<8> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple)) + Field(Tuple& t) { return t.f8_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple)) + ConstField(const Tuple& t) { return t.f8_; } +}; + +template <> +class Get<9> { + public: + template + static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple)) + Field(Tuple& t) { return t.f9_; } // NOLINT + + template + static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple)) + ConstField(const Tuple& t) { return t.f9_; } +}; + +} // namespace gtest_internal + +template +GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T))) +get(GTEST_10_TUPLE_(T)& t) { + return gtest_internal::Get::Field(t); +} + +template +GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T))) +get(const GTEST_10_TUPLE_(T)& t) { + return gtest_internal::Get::ConstField(t); +} + +// 6.1.3.5 Relational operators + +// We only implement == and !=, as we don't have a need for the rest yet. + +namespace gtest_internal { + +// SameSizeTuplePrefixComparator::Eq(t1, t2) returns true if the +// first k fields of t1 equals the first k fields of t2. +// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if +// k1 != k2. +template +struct SameSizeTuplePrefixComparator; + +template <> +struct SameSizeTuplePrefixComparator<0, 0> { + template + static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) { + return true; + } +}; + +template +struct SameSizeTuplePrefixComparator { + template + static bool Eq(const Tuple1& t1, const Tuple2& t2) { + return SameSizeTuplePrefixComparator::Eq(t1, t2) && + ::std::tr1::get(t1) == ::std::tr1::get(t2); + } +}; + +} // namespace gtest_internal + +template +inline bool operator==(const GTEST_10_TUPLE_(T)& t, + const GTEST_10_TUPLE_(U)& u) { + return gtest_internal::SameSizeTuplePrefixComparator< + tuple_size::value, + tuple_size::value>::Eq(t, u); +} + +template +inline bool operator!=(const GTEST_10_TUPLE_(T)& t, + const GTEST_10_TUPLE_(U)& u) { return !(t == u); } + +// 6.1.4 Pairs. +// Unimplemented. + +} // namespace tr1 +} // namespace std + +#undef GTEST_0_TUPLE_ +#undef GTEST_1_TUPLE_ +#undef GTEST_2_TUPLE_ +#undef GTEST_3_TUPLE_ +#undef GTEST_4_TUPLE_ +#undef GTEST_5_TUPLE_ +#undef GTEST_6_TUPLE_ +#undef GTEST_7_TUPLE_ +#undef GTEST_8_TUPLE_ +#undef GTEST_9_TUPLE_ +#undef GTEST_10_TUPLE_ + +#undef GTEST_0_TYPENAMES_ +#undef GTEST_1_TYPENAMES_ +#undef GTEST_2_TYPENAMES_ +#undef GTEST_3_TYPENAMES_ +#undef GTEST_4_TYPENAMES_ +#undef GTEST_5_TYPENAMES_ +#undef GTEST_6_TYPENAMES_ +#undef GTEST_7_TYPENAMES_ +#undef GTEST_8_TYPENAMES_ +#undef GTEST_9_TYPENAMES_ +#undef GTEST_10_TYPENAMES_ + +#undef GTEST_DECLARE_TUPLE_AS_FRIEND_ +#undef GTEST_BY_REF_ +#undef GTEST_ADD_REF_ +#undef GTEST_TUPLE_ELEMENT_ + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ +# elif GTEST_ENV_HAS_STD_TUPLE_ +# include +// C++11 puts its tuple into the ::std namespace rather than +// ::std::tr1. gtest expects tuple to live in ::std::tr1, so put it there. +// This causes undefined behavior, but supported compilers react in +// the way we intend. +namespace std { +namespace tr1 { +using ::std::get; +using ::std::make_tuple; +using ::std::tuple; +using ::std::tuple_element; +using ::std::tuple_size; +} +} + +# elif GTEST_OS_SYMBIAN + +// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to +// use STLport's tuple implementation, which unfortunately doesn't +// work as the copy of STLport distributed with Symbian is incomplete. +// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to +// use its own tuple implementation. +# ifdef BOOST_HAS_TR1_TUPLE +# undef BOOST_HAS_TR1_TUPLE +# endif // BOOST_HAS_TR1_TUPLE + +// This prevents , which defines +// BOOST_HAS_TR1_TUPLE, from being #included by Boost's . +# define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED +# include + +# elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000) +// GCC 4.0+ implements tr1/tuple in the header. This does +// not conform to the TR1 spec, which requires the header to be . + +# if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302 +// Until version 4.3.2, gcc has a bug that causes , +// which is #included by , to not compile when RTTI is +// disabled. _TR1_FUNCTIONAL is the header guard for +// . Hence the following #define is a hack to prevent +// from being included. +# define _TR1_FUNCTIONAL 1 +# include +# undef _TR1_FUNCTIONAL // Allows the user to #include + // if he chooses to. +# else +# include // NOLINT +# endif // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302 + +# else +// If the compiler is not GCC 4.0+, we assume the user is using a +// spec-conforming TR1 implementation. +# include // NOLINT +# endif // GTEST_USE_OWN_TR1_TUPLE + +#endif // GTEST_HAS_TR1_TUPLE + +// Determines whether clone(2) is supported. +// Usually it will only be available on Linux, excluding +// Linux on the Itanium architecture. +// Also see http://linux.die.net/man/2/clone. +#ifndef GTEST_HAS_CLONE +// The user didn't tell us, so we need to figure it out. + +# if GTEST_OS_LINUX && !defined(__ia64__) +# if GTEST_OS_LINUX_ANDROID +// On Android, clone() is only available on ARM starting with Gingerbread. +# if defined(__arm__) && __ANDROID_API__ >= 9 +# define GTEST_HAS_CLONE 1 +# else +# define GTEST_HAS_CLONE 0 +# endif +# else +# define GTEST_HAS_CLONE 1 +# endif +# else +# define GTEST_HAS_CLONE 0 +# endif // GTEST_OS_LINUX && !defined(__ia64__) + +#endif // GTEST_HAS_CLONE + +// Determines whether to support stream redirection. This is used to test +// output correctness and to implement death tests. +#ifndef GTEST_HAS_STREAM_REDIRECTION +// By default, we assume that stream redirection is supported on all +// platforms except known mobile ones. +# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN +# define GTEST_HAS_STREAM_REDIRECTION 0 +# else +# define GTEST_HAS_STREAM_REDIRECTION 1 +# endif // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN +#endif // GTEST_HAS_STREAM_REDIRECTION + +// Determines whether to support death tests. +// Google Test does not support death tests for VC 7.1 and earlier as +// abort() in a VC 7.1 application compiled as GUI in debug config +// pops up a dialog window that cannot be suppressed programmatically. +#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \ + (GTEST_OS_MAC && !GTEST_OS_IOS) || GTEST_OS_IOS_SIMULATOR || \ + (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \ + GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \ + GTEST_OS_OPENBSD || GTEST_OS_QNX) +# define GTEST_HAS_DEATH_TEST 1 +# include // NOLINT +#endif + +// We don't support MSVC 7.1 with exceptions disabled now. Therefore +// all the compilers we care about are adequate for supporting +// value-parameterized tests. +#define GTEST_HAS_PARAM_TEST 1 + +// Determines whether to support type-driven tests. + +// Typed tests need and variadic macros, which GCC, VC++ 8.0, +// Sun Pro CC, IBM Visual Age, and HP aCC support. +#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \ + defined(__IBMCPP__) || defined(__HP_aCC) +# define GTEST_HAS_TYPED_TEST 1 +# define GTEST_HAS_TYPED_TEST_P 1 +#endif + +// Determines whether to support Combine(). This only makes sense when +// value-parameterized tests are enabled. The implementation doesn't +// work on Sun Studio since it doesn't understand templated conversion +// operators. +#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC) +# define GTEST_HAS_COMBINE 1 +#endif + +// Determines whether the system compiler uses UTF-16 for encoding wide strings. +#define GTEST_WIDE_STRING_USES_UTF16_ \ + (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX) + +// Determines whether test results can be streamed to a socket. +#if GTEST_OS_LINUX +# define GTEST_CAN_STREAM_RESULTS_ 1 +#endif + +// Defines some utility macros. + +// The GNU compiler emits a warning if nested "if" statements are followed by +// an "else" statement and braces are not used to explicitly disambiguate the +// "else" binding. This leads to problems with code like: +// +// if (gate) +// ASSERT_*(condition) << "Some message"; +// +// The "switch (0) case 0:" idiom is used to suppress this. +#ifdef __INTEL_COMPILER +# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ +#else +# define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default: // NOLINT +#endif + +// Use this annotation at the end of a struct/class definition to +// prevent the compiler from optimizing away instances that are never +// used. This is useful when all interesting logic happens inside the +// c'tor and / or d'tor. Example: +// +// struct Foo { +// Foo() { ... } +// } GTEST_ATTRIBUTE_UNUSED_; +// +// Also use it after a variable or parameter declaration to tell the +// compiler the variable/parameter does not have to be used. +#if defined(__GNUC__) && !defined(COMPILER_ICC) +# define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused)) +#else +# define GTEST_ATTRIBUTE_UNUSED_ +#endif + +// A macro to disallow operator= +// This should be used in the private: declarations for a class. +#define GTEST_DISALLOW_ASSIGN_(type)\ + void operator=(type const &) + +// A macro to disallow copy constructor and operator= +// This should be used in the private: declarations for a class. +#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\ + type(type const &);\ + GTEST_DISALLOW_ASSIGN_(type) + +// Tell the compiler to warn about unused return values for functions declared +// with this macro. The macro should be used on function declarations +// following the argument list: +// +// Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_; +#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC) +# define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result)) +#else +# define GTEST_MUST_USE_RESULT_ +#endif // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC + +// Determine whether the compiler supports Microsoft's Structured Exception +// Handling. This is supported by several Windows compilers but generally +// does not exist on any other system. +#ifndef GTEST_HAS_SEH +// The user didn't tell us, so we need to figure it out. + +# if defined(_MSC_VER) || defined(__BORLANDC__) +// These two compilers are known to support SEH. +# define GTEST_HAS_SEH 1 +# else +// Assume no SEH. +# define GTEST_HAS_SEH 0 +# endif + +#endif // GTEST_HAS_SEH + +#ifdef _MSC_VER + +# if GTEST_LINKED_AS_SHARED_LIBRARY +# define GTEST_API_ __declspec(dllimport) +# elif GTEST_CREATE_SHARED_LIBRARY +# define GTEST_API_ __declspec(dllexport) +# endif + +#endif // _MSC_VER + +#ifndef GTEST_API_ +# define GTEST_API_ +#endif + +#ifdef __GNUC__ +// Ask the compiler to never inline a given function. +# define GTEST_NO_INLINE_ __attribute__((noinline)) +#else +# define GTEST_NO_INLINE_ +#endif + +// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project. +#if defined(__GLIBCXX__) || defined(_LIBCPP_VERSION) +# define GTEST_HAS_CXXABI_H_ 1 +#else +# define GTEST_HAS_CXXABI_H_ 0 +#endif + +namespace testing { + +class Message; + +namespace internal { + +// A secret type that Google Test users don't know about. It has no +// definition on purpose. Therefore it's impossible to create a +// Secret object, which is what we want. +class Secret; + +// The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time +// expression is true. For example, you could use it to verify the +// size of a static array: +// +// GTEST_COMPILE_ASSERT_(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES, +// content_type_names_incorrect_size); +// +// or to make sure a struct is smaller than a certain size: +// +// GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large); +// +// The second argument to the macro is the name of the variable. If +// the expression is false, most compilers will issue a warning/error +// containing the name of the variable. + +template +struct CompileAssert { +}; + +#define GTEST_COMPILE_ASSERT_(expr, msg) \ + typedef ::testing::internal::CompileAssert<(static_cast(expr))> \ + msg[static_cast(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_ + +// Implementation details of GTEST_COMPILE_ASSERT_: +// +// - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1 +// elements (and thus is invalid) when the expression is false. +// +// - The simpler definition +// +// #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1] +// +// does not work, as gcc supports variable-length arrays whose sizes +// are determined at run-time (this is gcc's extension and not part +// of the C++ standard). As a result, gcc fails to reject the +// following code with the simple definition: +// +// int foo; +// GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is +// // not a compile-time constant. +// +// - By using the type CompileAssert<(bool(expr))>, we ensures that +// expr is a compile-time constant. (Template arguments must be +// determined at compile-time.) +// +// - The outter parentheses in CompileAssert<(bool(expr))> are necessary +// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written +// +// CompileAssert +// +// instead, these compilers will refuse to compile +// +// GTEST_COMPILE_ASSERT_(5 > 0, some_message); +// +// (They seem to think the ">" in "5 > 0" marks the end of the +// template argument list.) +// +// - The array size is (bool(expr) ? 1 : -1), instead of simply +// +// ((expr) ? 1 : -1). +// +// This is to avoid running into a bug in MS VC 7.1, which +// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. + +// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h. +// +// This template is declared, but intentionally undefined. +template +struct StaticAssertTypeEqHelper; + +template +struct StaticAssertTypeEqHelper {}; + +#if GTEST_HAS_GLOBAL_STRING +typedef ::string string; +#else +typedef ::std::string string; +#endif // GTEST_HAS_GLOBAL_STRING + +#if GTEST_HAS_GLOBAL_WSTRING +typedef ::wstring wstring; +#elif GTEST_HAS_STD_WSTRING +typedef ::std::wstring wstring; +#endif // GTEST_HAS_GLOBAL_WSTRING + +// A helper for suppressing warnings on constant condition. It just +// returns 'condition'. +GTEST_API_ bool IsTrue(bool condition); + +// Defines scoped_ptr. + +// This implementation of scoped_ptr is PARTIAL - it only contains +// enough stuff to satisfy Google Test's need. +template +class scoped_ptr { + public: + typedef T element_type; + + explicit scoped_ptr(T* p = NULL) : ptr_(p) {} + ~scoped_ptr() { reset(); } + + T& operator*() const { return *ptr_; } + T* operator->() const { return ptr_; } + T* get() const { return ptr_; } + + T* release() { + T* const ptr = ptr_; + ptr_ = NULL; + return ptr; + } + + void reset(T* p = NULL) { + if (p != ptr_) { + if (IsTrue(sizeof(T) > 0)) { // Makes sure T is a complete type. + delete ptr_; + } + ptr_ = p; + } + } + + private: + T* ptr_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr); +}; + +// Defines RE. + +// A simple C++ wrapper for . It uses the POSIX Extended +// Regular Expression syntax. +class GTEST_API_ RE { + public: + // A copy constructor is required by the Standard to initialize object + // references from r-values. + RE(const RE& other) { Init(other.pattern()); } + + // Constructs an RE from a string. + RE(const ::std::string& regex) { Init(regex.c_str()); } // NOLINT + +#if GTEST_HAS_GLOBAL_STRING + + RE(const ::string& regex) { Init(regex.c_str()); } // NOLINT + +#endif // GTEST_HAS_GLOBAL_STRING + + RE(const char* regex) { Init(regex); } // NOLINT + ~RE(); + + // Returns the string representation of the regex. + const char* pattern() const { return pattern_; } + + // FullMatch(str, re) returns true iff regular expression re matches + // the entire str. + // PartialMatch(str, re) returns true iff regular expression re + // matches a substring of str (including str itself). + // + // TODO(wan@google.com): make FullMatch() and PartialMatch() work + // when str contains NUL characters. + static bool FullMatch(const ::std::string& str, const RE& re) { + return FullMatch(str.c_str(), re); + } + static bool PartialMatch(const ::std::string& str, const RE& re) { + return PartialMatch(str.c_str(), re); + } + +#if GTEST_HAS_GLOBAL_STRING + + static bool FullMatch(const ::string& str, const RE& re) { + return FullMatch(str.c_str(), re); + } + static bool PartialMatch(const ::string& str, const RE& re) { + return PartialMatch(str.c_str(), re); + } + +#endif // GTEST_HAS_GLOBAL_STRING + + static bool FullMatch(const char* str, const RE& re); + static bool PartialMatch(const char* str, const RE& re); + + private: + void Init(const char* regex); + + // We use a const char* instead of an std::string, as Google Test used to be + // used where std::string is not available. TODO(wan@google.com): change to + // std::string. + const char* pattern_; + bool is_valid_; + +#if GTEST_USES_POSIX_RE + + regex_t full_regex_; // For FullMatch(). + regex_t partial_regex_; // For PartialMatch(). + +#else // GTEST_USES_SIMPLE_RE + + const char* full_pattern_; // For FullMatch(); + +#endif + + GTEST_DISALLOW_ASSIGN_(RE); +}; + +// Formats a source file path and a line number as they would appear +// in an error message from the compiler used to compile this code. +GTEST_API_ ::std::string FormatFileLocation(const char* file, int line); + +// Formats a file location for compiler-independent XML output. +// Although this function is not platform dependent, we put it next to +// FormatFileLocation in order to contrast the two functions. +GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file, + int line); + +// Defines logging utilities: +// GTEST_LOG_(severity) - logs messages at the specified severity level. The +// message itself is streamed into the macro. +// LogToStderr() - directs all log messages to stderr. +// FlushInfoLog() - flushes informational log messages. + +enum GTestLogSeverity { + GTEST_INFO, + GTEST_WARNING, + GTEST_ERROR, + GTEST_FATAL +}; + +// Formats log entry severity, provides a stream object for streaming the +// log message, and terminates the message with a newline when going out of +// scope. +class GTEST_API_ GTestLog { + public: + GTestLog(GTestLogSeverity severity, const char* file, int line); + + // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. + ~GTestLog(); + + ::std::ostream& GetStream() { return ::std::cerr; } + + private: + const GTestLogSeverity severity_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog); +}; + +#define GTEST_LOG_(severity) \ + ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \ + __FILE__, __LINE__).GetStream() + +inline void LogToStderr() {} +inline void FlushInfoLog() { fflush(NULL); } + +// INTERNAL IMPLEMENTATION - DO NOT USE. +// +// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition +// is not satisfied. +// Synopsys: +// GTEST_CHECK_(boolean_condition); +// or +// GTEST_CHECK_(boolean_condition) << "Additional message"; +// +// This checks the condition and if the condition is not satisfied +// it prints message about the condition violation, including the +// condition itself, plus additional message streamed into it, if any, +// and then it aborts the program. It aborts the program irrespective of +// whether it is built in the debug mode or not. +#define GTEST_CHECK_(condition) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::IsTrue(condition)) \ + ; \ + else \ + GTEST_LOG_(FATAL) << "Condition " #condition " failed. " + +// An all-mode assert to verify that the given POSIX-style function +// call returns 0 (indicating success). Known limitation: this +// doesn't expand to a balanced 'if' statement, so enclose the macro +// in {} if you need to use it as the only statement in an 'if' +// branch. +#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \ + if (const int gtest_error = (posix_call)) \ + GTEST_LOG_(FATAL) << #posix_call << "failed with error " \ + << gtest_error + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Use ImplicitCast_ as a safe version of static_cast for upcasting in +// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a +// const Foo*). When you use ImplicitCast_, the compiler checks that +// the cast is safe. Such explicit ImplicitCast_s are necessary in +// surprisingly many situations where C++ demands an exact type match +// instead of an argument type convertable to a target type. +// +// The syntax for using ImplicitCast_ is the same as for static_cast: +// +// ImplicitCast_(expr) +// +// ImplicitCast_ would have been part of the C++ standard library, +// but the proposal was submitted too late. It will probably make +// its way into the language in the future. +// +// This relatively ugly name is intentional. It prevents clashes with +// similar functions users may have (e.g., implicit_cast). The internal +// namespace alone is not enough because the function can be found by ADL. +template +inline To ImplicitCast_(To x) { return x; } + +// When you upcast (that is, cast a pointer from type Foo to type +// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts +// always succeed. When you downcast (that is, cast a pointer from +// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because +// how do you know the pointer is really of type SubclassOfFoo? It +// could be a bare Foo, or of type DifferentSubclassOfFoo. Thus, +// when you downcast, you should use this macro. In debug mode, we +// use dynamic_cast<> to double-check the downcast is legal (we die +// if it's not). In normal mode, we do the efficient static_cast<> +// instead. Thus, it's important to test in debug mode to make sure +// the cast is legal! +// This is the only place in the code we should use dynamic_cast<>. +// In particular, you SHOULDN'T be using dynamic_cast<> in order to +// do RTTI (eg code like this: +// if (dynamic_cast(foo)) HandleASubclass1Object(foo); +// if (dynamic_cast(foo)) HandleASubclass2Object(foo); +// You should design the code some other way not to need this. +// +// This relatively ugly name is intentional. It prevents clashes with +// similar functions users may have (e.g., down_cast). The internal +// namespace alone is not enough because the function can be found by ADL. +template // use like this: DownCast_(foo); +inline To DownCast_(From* f) { // so we only accept pointers + // Ensures that To is a sub-type of From *. This test is here only + // for compile-time type checking, and has no overhead in an + // optimized build at run-time, as it will be optimized away + // completely. + if (false) { + const To to = NULL; + ::testing::internal::ImplicitCast_(to); + } + +#if GTEST_HAS_RTTI + // RTTI: debug mode only! + GTEST_CHECK_(f == NULL || dynamic_cast(f) != NULL); +#endif + return static_cast(f); +} + +// Downcasts the pointer of type Base to Derived. +// Derived must be a subclass of Base. The parameter MUST +// point to a class of type Derived, not any subclass of it. +// When RTTI is available, the function performs a runtime +// check to enforce this. +template +Derived* CheckedDowncastToActualType(Base* base) { +#if GTEST_HAS_RTTI + GTEST_CHECK_(typeid(*base) == typeid(Derived)); + return dynamic_cast(base); // NOLINT +#else + return static_cast(base); // Poor man's downcast. +#endif +} + +#if GTEST_HAS_STREAM_REDIRECTION + +// Defines the stderr capturer: +// CaptureStdout - starts capturing stdout. +// GetCapturedStdout - stops capturing stdout and returns the captured string. +// CaptureStderr - starts capturing stderr. +// GetCapturedStderr - stops capturing stderr and returns the captured string. +// +GTEST_API_ void CaptureStdout(); +GTEST_API_ std::string GetCapturedStdout(); +GTEST_API_ void CaptureStderr(); +GTEST_API_ std::string GetCapturedStderr(); + +#endif // GTEST_HAS_STREAM_REDIRECTION + + +#if GTEST_HAS_DEATH_TEST + +const ::std::vector& GetInjectableArgvs(); +void SetInjectableArgvs(const ::std::vector* + new_argvs); + +// A copy of all command line arguments. Set by InitGoogleTest(). +extern ::std::vector g_argvs; + +#endif // GTEST_HAS_DEATH_TEST + +// Defines synchronization primitives. + +#if GTEST_HAS_PTHREAD + +// Sleeps for (roughly) n milli-seconds. This function is only for +// testing Google Test's own constructs. Don't use it in user tests, +// either directly or indirectly. +inline void SleepMilliseconds(int n) { + const timespec time = { + 0, // 0 seconds. + n * 1000L * 1000L, // And n ms. + }; + nanosleep(&time, NULL); +} + +// Allows a controller thread to pause execution of newly created +// threads until notified. Instances of this class must be created +// and destroyed in the controller thread. +// +// This class is only for testing Google Test's own constructs. Do not +// use it in user tests, either directly or indirectly. +class Notification { + public: + Notification() : notified_(false) { + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL)); + } + ~Notification() { + pthread_mutex_destroy(&mutex_); + } + + // Notifies all threads created with this notification to start. Must + // be called from the controller thread. + void Notify() { + pthread_mutex_lock(&mutex_); + notified_ = true; + pthread_mutex_unlock(&mutex_); + } + + // Blocks until the controller thread notifies. Must be called from a test + // thread. + void WaitForNotification() { + for (;;) { + pthread_mutex_lock(&mutex_); + const bool notified = notified_; + pthread_mutex_unlock(&mutex_); + if (notified) + break; + SleepMilliseconds(10); + } + } + + private: + pthread_mutex_t mutex_; + bool notified_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification); +}; + +// As a C-function, ThreadFuncWithCLinkage cannot be templated itself. +// Consequently, it cannot select a correct instantiation of ThreadWithParam +// in order to call its Run(). Introducing ThreadWithParamBase as a +// non-templated base class for ThreadWithParam allows us to bypass this +// problem. +class ThreadWithParamBase { + public: + virtual ~ThreadWithParamBase() {} + virtual void Run() = 0; +}; + +// pthread_create() accepts a pointer to a function type with the C linkage. +// According to the Standard (7.5/1), function types with different linkages +// are different even if they are otherwise identical. Some compilers (for +// example, SunStudio) treat them as different types. Since class methods +// cannot be defined with C-linkage we need to define a free C-function to +// pass into pthread_create(). +extern "C" inline void* ThreadFuncWithCLinkage(void* thread) { + static_cast(thread)->Run(); + return NULL; +} + +// Helper class for testing Google Test's multi-threading constructs. +// To use it, write: +// +// void ThreadFunc(int param) { /* Do things with param */ } +// Notification thread_can_start; +// ... +// // The thread_can_start parameter is optional; you can supply NULL. +// ThreadWithParam thread(&ThreadFunc, 5, &thread_can_start); +// thread_can_start.Notify(); +// +// These classes are only for testing Google Test's own constructs. Do +// not use them in user tests, either directly or indirectly. +template +class ThreadWithParam : public ThreadWithParamBase { + public: + typedef void (*UserThreadFunc)(T); + + ThreadWithParam( + UserThreadFunc func, T param, Notification* thread_can_start) + : func_(func), + param_(param), + thread_can_start_(thread_can_start), + finished_(false) { + ThreadWithParamBase* const base = this; + // The thread can be created only after all fields except thread_ + // have been initialized. + GTEST_CHECK_POSIX_SUCCESS_( + pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base)); + } + ~ThreadWithParam() { Join(); } + + void Join() { + if (!finished_) { + GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0)); + finished_ = true; + } + } + + virtual void Run() { + if (thread_can_start_ != NULL) + thread_can_start_->WaitForNotification(); + func_(param_); + } + + private: + const UserThreadFunc func_; // User-supplied thread function. + const T param_; // User-supplied parameter to the thread function. + // When non-NULL, used to block execution until the controller thread + // notifies. + Notification* const thread_can_start_; + bool finished_; // true iff we know that the thread function has finished. + pthread_t thread_; // The native thread object. + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam); +}; + +// MutexBase and Mutex implement mutex on pthreads-based platforms. They +// are used in conjunction with class MutexLock: +// +// Mutex mutex; +// ... +// MutexLock lock(&mutex); // Acquires the mutex and releases it at the end +// // of the current scope. +// +// MutexBase implements behavior for both statically and dynamically +// allocated mutexes. Do not use MutexBase directly. Instead, write +// the following to define a static mutex: +// +// GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex); +// +// You can forward declare a static mutex like this: +// +// GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex); +// +// To create a dynamic mutex, just define an object of type Mutex. +class MutexBase { + public: + // Acquires this mutex. + void Lock() { + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_)); + owner_ = pthread_self(); + has_owner_ = true; + } + + // Releases this mutex. + void Unlock() { + // Since the lock is being released the owner_ field should no longer be + // considered valid. We don't protect writing to has_owner_ here, as it's + // the caller's responsibility to ensure that the current thread holds the + // mutex when this is called. + // coverity[missing_lock] + has_owner_ = false; + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_)); + } + + // Does nothing if the current thread holds the mutex. Otherwise, crashes + // with high probability. + void AssertHeld() const { + GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self())) + << "The current thread is not holding the mutex @" << this; + } + + // A static mutex may be used before main() is entered. It may even + // be used before the dynamic initialization stage. Therefore we + // must be able to initialize a static mutex object at link time. + // This means MutexBase has to be a POD and its member variables + // have to be public. + public: + pthread_mutex_t mutex_; // The underlying pthread mutex. + // has_owner_ indicates whether the owner_ field below contains a valid thread + // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All + // accesses to the owner_ field should be protected by a check of this field. + // An alternative might be to memset() owner_ to all zeros, but there's no + // guarantee that a zero'd pthread_t is necessarily invalid or even different + // from pthread_self(). + bool has_owner_; + pthread_t owner_; // The thread holding the mutex. +}; + +// Forward-declares a static mutex. +# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \ + extern ::testing::internal::MutexBase mutex + +// Defines and statically (i.e. at link time) initializes a static mutex. +// The initialization list here does not explicitly initialize each field, +// instead relying on default initialization for the unspecified fields. In +// particular, the owner_ field (a pthread_t) is not explicitly initialized. +// This allows initialization to work whether pthread_t is a scalar or struct. +// The flag -Wmissing-field-initializers must not be specified for this to work. +# define GTEST_DEFINE_STATIC_MUTEX_(mutex) \ + ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, false } + +// The Mutex class can only be used for mutexes created at runtime. It +// shares its API with MutexBase otherwise. +class Mutex : public MutexBase { + public: + Mutex() { + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL)); + has_owner_ = false; + } + ~Mutex() { + GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_)); + } + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex); +}; + +// We cannot name this class MutexLock as the ctor declaration would +// conflict with a macro named MutexLock, which is defined on some +// platforms. Hence the typedef trick below. +class GTestMutexLock { + public: + explicit GTestMutexLock(MutexBase* mutex) + : mutex_(mutex) { mutex_->Lock(); } + + ~GTestMutexLock() { mutex_->Unlock(); } + + private: + MutexBase* const mutex_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock); +}; + +typedef GTestMutexLock MutexLock; + +// Helpers for ThreadLocal. + +// pthread_key_create() requires DeleteThreadLocalValue() to have +// C-linkage. Therefore it cannot be templatized to access +// ThreadLocal. Hence the need for class +// ThreadLocalValueHolderBase. +class ThreadLocalValueHolderBase { + public: + virtual ~ThreadLocalValueHolderBase() {} +}; + +// Called by pthread to delete thread-local data stored by +// pthread_setspecific(). +extern "C" inline void DeleteThreadLocalValue(void* value_holder) { + delete static_cast(value_holder); +} + +// Implements thread-local storage on pthreads-based systems. +// +// // Thread 1 +// ThreadLocal tl(100); // 100 is the default value for each thread. +// +// // Thread 2 +// tl.set(150); // Changes the value for thread 2 only. +// EXPECT_EQ(150, tl.get()); +// +// // Thread 1 +// EXPECT_EQ(100, tl.get()); // In thread 1, tl has the original value. +// tl.set(200); +// EXPECT_EQ(200, tl.get()); +// +// The template type argument T must have a public copy constructor. +// In addition, the default ThreadLocal constructor requires T to have +// a public default constructor. +// +// An object managed for a thread by a ThreadLocal instance is deleted +// when the thread exits. Or, if the ThreadLocal instance dies in +// that thread, when the ThreadLocal dies. It's the user's +// responsibility to ensure that all other threads using a ThreadLocal +// have exited when it dies, or the per-thread objects for those +// threads will not be deleted. +// +// Google Test only uses global ThreadLocal objects. That means they +// will die after main() has returned. Therefore, no per-thread +// object managed by Google Test will be leaked as long as all threads +// using Google Test have exited when main() returns. +template +class ThreadLocal { + public: + ThreadLocal() : key_(CreateKey()), + default_() {} + explicit ThreadLocal(const T& value) : key_(CreateKey()), + default_(value) {} + + ~ThreadLocal() { + // Destroys the managed object for the current thread, if any. + DeleteThreadLocalValue(pthread_getspecific(key_)); + + // Releases resources associated with the key. This will *not* + // delete managed objects for other threads. + GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_)); + } + + T* pointer() { return GetOrCreateValue(); } + const T* pointer() const { return GetOrCreateValue(); } + const T& get() const { return *pointer(); } + void set(const T& value) { *pointer() = value; } + + private: + // Holds a value of type T. + class ValueHolder : public ThreadLocalValueHolderBase { + public: + explicit ValueHolder(const T& value) : value_(value) {} + + T* pointer() { return &value_; } + + private: + T value_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder); + }; + + static pthread_key_t CreateKey() { + pthread_key_t key; + // When a thread exits, DeleteThreadLocalValue() will be called on + // the object managed for that thread. + GTEST_CHECK_POSIX_SUCCESS_( + pthread_key_create(&key, &DeleteThreadLocalValue)); + return key; + } + + T* GetOrCreateValue() const { + ThreadLocalValueHolderBase* const holder = + static_cast(pthread_getspecific(key_)); + if (holder != NULL) { + return CheckedDowncastToActualType(holder)->pointer(); + } + + ValueHolder* const new_holder = new ValueHolder(default_); + ThreadLocalValueHolderBase* const holder_base = new_holder; + GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base)); + return new_holder->pointer(); + } + + // A key pthreads uses for looking up per-thread values. + const pthread_key_t key_; + const T default_; // The default value for each thread. + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal); +}; + +# define GTEST_IS_THREADSAFE 1 + +#else // GTEST_HAS_PTHREAD + +// A dummy implementation of synchronization primitives (mutex, lock, +// and thread-local variable). Necessary for compiling Google Test where +// mutex is not supported - using Google Test in multiple threads is not +// supported on such platforms. + +class Mutex { + public: + Mutex() {} + void Lock() {} + void Unlock() {} + void AssertHeld() const {} +}; + +# define GTEST_DECLARE_STATIC_MUTEX_(mutex) \ + extern ::testing::internal::Mutex mutex + +# define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex + +class GTestMutexLock { + public: + explicit GTestMutexLock(Mutex*) {} // NOLINT +}; + +typedef GTestMutexLock MutexLock; + +template +class ThreadLocal { + public: + ThreadLocal() : value_() {} + explicit ThreadLocal(const T& value) : value_(value) {} + T* pointer() { return &value_; } + const T* pointer() const { return &value_; } + const T& get() const { return value_; } + void set(const T& value) { value_ = value; } + private: + T value_; +}; + +// The above synchronization primitives have dummy implementations. +// Therefore Google Test is not thread-safe. +# define GTEST_IS_THREADSAFE 0 + +#endif // GTEST_HAS_PTHREAD + +// Returns the number of threads running in the process, or 0 to indicate that +// we cannot detect it. +GTEST_API_ size_t GetThreadCount(); + +// Passing non-POD classes through ellipsis (...) crashes the ARM +// compiler and generates a warning in Sun Studio. The Nokia Symbian +// and the IBM XL C/C++ compiler try to instantiate a copy constructor +// for objects passed through ellipsis (...), failing for uncopyable +// objects. We define this to ensure that only POD is passed through +// ellipsis on these systems. +#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC) +// We lose support for NULL detection where the compiler doesn't like +// passing non-POD classes through ellipsis (...). +# define GTEST_ELLIPSIS_NEEDS_POD_ 1 +#else +# define GTEST_CAN_COMPARE_NULL 1 +#endif + +// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between +// const T& and const T* in a function template. These compilers +// _can_ decide between class template specializations for T and T*, +// so a tr1::type_traits-like is_pointer works. +#if defined(__SYMBIAN32__) || defined(__IBMCPP__) +# define GTEST_NEEDS_IS_POINTER_ 1 +#endif + +template +struct bool_constant { + typedef bool_constant type; + static const bool value = bool_value; +}; +template const bool bool_constant::value; + +typedef bool_constant false_type; +typedef bool_constant true_type; + +template +struct is_pointer : public false_type {}; + +template +struct is_pointer : public true_type {}; + +template +struct IteratorTraits { + typedef typename Iterator::value_type value_type; +}; + +template +struct IteratorTraits { + typedef T value_type; +}; + +template +struct IteratorTraits { + typedef T value_type; +}; + +#if GTEST_OS_WINDOWS +# define GTEST_PATH_SEP_ "\\" +# define GTEST_HAS_ALT_PATH_SEP_ 1 +// The biggest signed integer type the compiler supports. +typedef __int64 BiggestInt; +#else +# define GTEST_PATH_SEP_ "/" +# define GTEST_HAS_ALT_PATH_SEP_ 0 +typedef long long BiggestInt; // NOLINT +#endif // GTEST_OS_WINDOWS + +// Utilities for char. + +// isspace(int ch) and friends accept an unsigned char or EOF. char +// may be signed, depending on the compiler (or compiler flags). +// Therefore we need to cast a char to unsigned char before calling +// isspace(), etc. + +inline bool IsAlpha(char ch) { + return isalpha(static_cast(ch)) != 0; +} +inline bool IsAlNum(char ch) { + return isalnum(static_cast(ch)) != 0; +} +inline bool IsDigit(char ch) { + return isdigit(static_cast(ch)) != 0; +} +inline bool IsLower(char ch) { + return islower(static_cast(ch)) != 0; +} +inline bool IsSpace(char ch) { + return isspace(static_cast(ch)) != 0; +} +inline bool IsUpper(char ch) { + return isupper(static_cast(ch)) != 0; +} +inline bool IsXDigit(char ch) { + return isxdigit(static_cast(ch)) != 0; +} +inline bool IsXDigit(wchar_t ch) { + const unsigned char low_byte = static_cast(ch); + return ch == low_byte && isxdigit(low_byte) != 0; +} + +inline char ToLower(char ch) { + return static_cast(tolower(static_cast(ch))); +} +inline char ToUpper(char ch) { + return static_cast(toupper(static_cast(ch))); +} + +// The testing::internal::posix namespace holds wrappers for common +// POSIX functions. These wrappers hide the differences between +// Windows/MSVC and POSIX systems. Since some compilers define these +// standard functions as macros, the wrapper cannot have the same name +// as the wrapped function. + +namespace posix { + +// Functions with a different name on Windows. + +#if GTEST_OS_WINDOWS + +typedef struct _stat StatStruct; + +# ifdef __BORLANDC__ +inline int IsATTY(int fd) { return isatty(fd); } +inline int StrCaseCmp(const char* s1, const char* s2) { + return stricmp(s1, s2); +} +inline char* StrDup(const char* src) { return strdup(src); } +# else // !__BORLANDC__ +# if GTEST_OS_WINDOWS_MOBILE +inline int IsATTY(int /* fd */) { return 0; } +# else +inline int IsATTY(int fd) { return _isatty(fd); } +# endif // GTEST_OS_WINDOWS_MOBILE +inline int StrCaseCmp(const char* s1, const char* s2) { + return _stricmp(s1, s2); +} +inline char* StrDup(const char* src) { return _strdup(src); } +# endif // __BORLANDC__ + +# if GTEST_OS_WINDOWS_MOBILE +inline int FileNo(FILE* file) { return reinterpret_cast(_fileno(file)); } +// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this +// time and thus not defined there. +# else +inline int FileNo(FILE* file) { return _fileno(file); } +inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); } +inline int RmDir(const char* dir) { return _rmdir(dir); } +inline bool IsDir(const StatStruct& st) { + return (_S_IFDIR & st.st_mode) != 0; +} +# endif // GTEST_OS_WINDOWS_MOBILE + +#else + +typedef struct stat StatStruct; + +inline int FileNo(FILE* file) { return fileno(file); } +inline int IsATTY(int fd) { return isatty(fd); } +inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); } +inline int StrCaseCmp(const char* s1, const char* s2) { + return strcasecmp(s1, s2); +} +inline char* StrDup(const char* src) { return strdup(src); } +inline int RmDir(const char* dir) { return rmdir(dir); } +inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); } + +#endif // GTEST_OS_WINDOWS + +// Functions deprecated by MSVC 8.0. + +#ifdef _MSC_VER +// Temporarily disable warning 4996 (deprecated function). +# pragma warning(push) +# pragma warning(disable:4996) +#endif + +inline const char* StrNCpy(char* dest, const char* src, size_t n) { + return strncpy(dest, src, n); +} + +// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and +// StrError() aren't needed on Windows CE at this time and thus not +// defined there. + +#if !GTEST_OS_WINDOWS_MOBILE +inline int ChDir(const char* dir) { return chdir(dir); } +#endif +inline FILE* FOpen(const char* path, const char* mode) { + return fopen(path, mode); +} +#if !GTEST_OS_WINDOWS_MOBILE +inline FILE *FReopen(const char* path, const char* mode, FILE* stream) { + return freopen(path, mode, stream); +} +inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); } +#endif +inline int FClose(FILE* fp) { return fclose(fp); } +#if !GTEST_OS_WINDOWS_MOBILE +inline int Read(int fd, void* buf, unsigned int count) { + return static_cast(read(fd, buf, count)); +} +inline int Write(int fd, const void* buf, unsigned int count) { + return static_cast(write(fd, buf, count)); +} +inline int Close(int fd) { return close(fd); } +inline const char* StrError(int errnum) { return strerror(errnum); } +#endif +inline const char* GetEnv(const char* name) { +#if GTEST_OS_WINDOWS_MOBILE + // We are on Windows CE, which has no environment variables. + return NULL; +#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9) + // Environment variables which we programmatically clear will be set to the + // empty string rather than unset (NULL). Handle that case. + const char* const env = getenv(name); + return (env != NULL && env[0] != '\0') ? env : NULL; +#else + return getenv(name); +#endif +} + +#ifdef _MSC_VER +# pragma warning(pop) // Restores the warning state. +#endif + +#if GTEST_OS_WINDOWS_MOBILE +// Windows CE has no C library. The abort() function is used in +// several places in Google Test. This implementation provides a reasonable +// imitation of standard behaviour. +void Abort(); +#else +inline void Abort() { abort(); } +#endif // GTEST_OS_WINDOWS_MOBILE + +} // namespace posix + +// MSVC "deprecates" snprintf and issues warnings wherever it is used. In +// order to avoid these warnings, we need to use _snprintf or _snprintf_s on +// MSVC-based platforms. We map the GTEST_SNPRINTF_ macro to the appropriate +// function in order to achieve that. We use macro definition here because +// snprintf is a variadic function. +#if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE +// MSVC 2005 and above support variadic macros. +# define GTEST_SNPRINTF_(buffer, size, format, ...) \ + _snprintf_s(buffer, size, size, format, __VA_ARGS__) +#elif defined(_MSC_VER) +// Windows CE does not define _snprintf_s and MSVC prior to 2005 doesn't +// complain about _snprintf. +# define GTEST_SNPRINTF_ _snprintf +#else +# define GTEST_SNPRINTF_ snprintf +#endif + +// The maximum number a BiggestInt can represent. This definition +// works no matter BiggestInt is represented in one's complement or +// two's complement. +// +// We cannot rely on numeric_limits in STL, as __int64 and long long +// are not part of standard C++ and numeric_limits doesn't need to be +// defined for them. +const BiggestInt kMaxBiggestInt = + ~(static_cast(1) << (8*sizeof(BiggestInt) - 1)); + +// This template class serves as a compile-time function from size to +// type. It maps a size in bytes to a primitive type with that +// size. e.g. +// +// TypeWithSize<4>::UInt +// +// is typedef-ed to be unsigned int (unsigned integer made up of 4 +// bytes). +// +// Such functionality should belong to STL, but I cannot find it +// there. +// +// Google Test uses this class in the implementation of floating-point +// comparison. +// +// For now it only handles UInt (unsigned int) as that's all Google Test +// needs. Other types can be easily added in the future if need +// arises. +template +class TypeWithSize { + public: + // This prevents the user from using TypeWithSize with incorrect + // values of N. + typedef void UInt; +}; + +// The specialization for size 4. +template <> +class TypeWithSize<4> { + public: + // unsigned int has size 4 in both gcc and MSVC. + // + // As base/basictypes.h doesn't compile on Windows, we cannot use + // uint32, uint64, and etc here. + typedef int Int; + typedef unsigned int UInt; +}; + +// The specialization for size 8. +template <> +class TypeWithSize<8> { + public: +#if GTEST_OS_WINDOWS + typedef __int64 Int; + typedef unsigned __int64 UInt; +#else + typedef long long Int; // NOLINT + typedef unsigned long long UInt; // NOLINT +#endif // GTEST_OS_WINDOWS +}; + +// Integer types of known sizes. +typedef TypeWithSize<4>::Int Int32; +typedef TypeWithSize<4>::UInt UInt32; +typedef TypeWithSize<8>::Int Int64; +typedef TypeWithSize<8>::UInt UInt64; +typedef TypeWithSize<8>::Int TimeInMillis; // Represents time in milliseconds. + +// Utilities for command line flags and environment variables. + +// Macro for referencing flags. +#define GTEST_FLAG(name) FLAGS_gtest_##name + +// Macros for declaring flags. +#define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name) +#define GTEST_DECLARE_int32_(name) \ + GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name) +#define GTEST_DECLARE_string_(name) \ + GTEST_API_ extern ::std::string GTEST_FLAG(name) + +// Macros for defining flags. +#define GTEST_DEFINE_bool_(name, default_val, doc) \ + GTEST_API_ bool GTEST_FLAG(name) = (default_val) +#define GTEST_DEFINE_int32_(name, default_val, doc) \ + GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val) +#define GTEST_DEFINE_string_(name, default_val, doc) \ + GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val) + +// Thread annotations +#define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks) +#define GTEST_LOCK_EXCLUDED_(locks) + +// Parses 'str' for a 32-bit signed integer. If successful, writes the result +// to *value and returns true; otherwise leaves *value unchanged and returns +// false. +// TODO(chandlerc): Find a better way to refactor flag and environment parsing +// out of both gtest-port.cc and gtest.cc to avoid exporting this utility +// function. +bool ParseInt32(const Message& src_text, const char* str, Int32* value); + +// Parses a bool/Int32/string from the environment variable +// corresponding to the given Google Test flag. +bool BoolFromGTestEnv(const char* flag, bool default_val); +GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val); +const char* StringFromGTestEnv(const char* flag, const char* default_val); + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ + +#if GTEST_OS_LINUX +# include +# include +# include +# include +#endif // GTEST_OS_LINUX + +#if GTEST_HAS_EXCEPTIONS +# include +#endif + +#include +#include +#include +#include +#include +#include + +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines the Message class. +// +// IMPORTANT NOTE: Due to limitation of the C++ language, we have to +// leave some internal implementation details in this header file. +// They are clearly marked by comments like this: +// +// // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +// +// Such code is NOT meant to be used by a user directly, and is subject +// to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user +// program! + +#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ +#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ + +#include + + +// Ensures that there is at least one operator<< in the global namespace. +// See Message& operator<<(...) below for why. +void operator<<(const testing::internal::Secret&, int); + +namespace testing { + +// The Message class works like an ostream repeater. +// +// Typical usage: +// +// 1. You stream a bunch of values to a Message object. +// It will remember the text in a stringstream. +// 2. Then you stream the Message object to an ostream. +// This causes the text in the Message to be streamed +// to the ostream. +// +// For example; +// +// testing::Message foo; +// foo << 1 << " != " << 2; +// std::cout << foo; +// +// will print "1 != 2". +// +// Message is not intended to be inherited from. In particular, its +// destructor is not virtual. +// +// Note that stringstream behaves differently in gcc and in MSVC. You +// can stream a NULL char pointer to it in the former, but not in the +// latter (it causes an access violation if you do). The Message +// class hides this difference by treating a NULL char pointer as +// "(null)". +class GTEST_API_ Message { + private: + // The type of basic IO manipulators (endl, ends, and flush) for + // narrow streams. + typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&); + + public: + // Constructs an empty Message. + Message(); + + // Copy constructor. + Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT + *ss_ << msg.GetString(); + } + + // Constructs a Message from a C-string. + explicit Message(const char* str) : ss_(new ::std::stringstream) { + *ss_ << str; + } + +#if GTEST_OS_SYMBIAN + // Streams a value (either a pointer or not) to this object. + template + inline Message& operator <<(const T& value) { + StreamHelper(typename internal::is_pointer::type(), value); + return *this; + } +#else + // Streams a non-pointer value to this object. + template + inline Message& operator <<(const T& val) { + // Some libraries overload << for STL containers. These + // overloads are defined in the global namespace instead of ::std. + // + // C++'s symbol lookup rule (i.e. Koenig lookup) says that these + // overloads are visible in either the std namespace or the global + // namespace, but not other namespaces, including the testing + // namespace which Google Test's Message class is in. + // + // To allow STL containers (and other types that has a << operator + // defined in the global namespace) to be used in Google Test + // assertions, testing::Message must access the custom << operator + // from the global namespace. With this using declaration, + // overloads of << defined in the global namespace and those + // visible via Koenig lookup are both exposed in this function. + using ::operator <<; + *ss_ << val; + return *this; + } + + // Streams a pointer value to this object. + // + // This function is an overload of the previous one. When you + // stream a pointer to a Message, this definition will be used as it + // is more specialized. (The C++ Standard, section + // [temp.func.order].) If you stream a non-pointer, then the + // previous definition will be used. + // + // The reason for this overload is that streaming a NULL pointer to + // ostream is undefined behavior. Depending on the compiler, you + // may get "0", "(nil)", "(null)", or an access violation. To + // ensure consistent result across compilers, we always treat NULL + // as "(null)". + template + inline Message& operator <<(T* const& pointer) { // NOLINT + if (pointer == NULL) { + *ss_ << "(null)"; + } else { + *ss_ << pointer; + } + return *this; + } +#endif // GTEST_OS_SYMBIAN + + // Since the basic IO manipulators are overloaded for both narrow + // and wide streams, we have to provide this specialized definition + // of operator <<, even though its body is the same as the + // templatized version above. Without this definition, streaming + // endl or other basic IO manipulators to Message will confuse the + // compiler. + Message& operator <<(BasicNarrowIoManip val) { + *ss_ << val; + return *this; + } + + // Instead of 1/0, we want to see true/false for bool values. + Message& operator <<(bool b) { + return *this << (b ? "true" : "false"); + } + + // These two overloads allow streaming a wide C string to a Message + // using the UTF-8 encoding. + Message& operator <<(const wchar_t* wide_c_str); + Message& operator <<(wchar_t* wide_c_str); + +#if GTEST_HAS_STD_WSTRING + // Converts the given wide string to a narrow string using the UTF-8 + // encoding, and streams the result to this Message object. + Message& operator <<(const ::std::wstring& wstr); +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_GLOBAL_WSTRING + // Converts the given wide string to a narrow string using the UTF-8 + // encoding, and streams the result to this Message object. + Message& operator <<(const ::wstring& wstr); +#endif // GTEST_HAS_GLOBAL_WSTRING + + // Gets the text streamed to this object so far as an std::string. + // Each '\0' character in the buffer is replaced with "\\0". + // + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + std::string GetString() const; + + private: + +#if GTEST_OS_SYMBIAN + // These are needed as the Nokia Symbian Compiler cannot decide between + // const T& and const T* in a function template. The Nokia compiler _can_ + // decide between class template specializations for T and T*, so a + // tr1::type_traits-like is_pointer works, and we can overload on that. + template + inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) { + if (pointer == NULL) { + *ss_ << "(null)"; + } else { + *ss_ << pointer; + } + } + template + inline void StreamHelper(internal::false_type /*is_pointer*/, + const T& value) { + // See the comments in Message& operator <<(const T&) above for why + // we need this using statement. + using ::operator <<; + *ss_ << value; + } +#endif // GTEST_OS_SYMBIAN + + // We'll hold the text streamed to this object here. + const internal::scoped_ptr< ::std::stringstream> ss_; + + // We declare (but don't implement) this to prevent the compiler + // from implementing the assignment operator. + void operator=(const Message&); +}; + +// Streams a Message to an ostream. +inline std::ostream& operator <<(std::ostream& os, const Message& sb) { + return os << sb.GetString(); +} + +namespace internal { + +// Converts a streamable value to an std::string. A NULL pointer is +// converted to "(null)". When the input value is a ::string, +// ::std::string, ::wstring, or ::std::wstring object, each NUL +// character in it is replaced with "\\0". +template +std::string StreamableToString(const T& streamable) { + return (Message() << streamable).GetString(); +} + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file declares the String class and functions used internally by +// Google Test. They are subject to change without notice. They should not used +// by code external to Google Test. +// +// This header file is #included by . +// It should not be #included by other files. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ + +#ifdef __BORLANDC__ +// string.h is not guaranteed to provide strcpy on C++ Builder. +# include +#endif + +#include +#include + + +namespace testing { +namespace internal { + +// String - an abstract class holding static string utilities. +class GTEST_API_ String { + public: + // Static utility methods + + // Clones a 0-terminated C string, allocating memory using new. The + // caller is responsible for deleting the return value using + // delete[]. Returns the cloned string, or NULL if the input is + // NULL. + // + // This is different from strdup() in string.h, which allocates + // memory using malloc(). + static const char* CloneCString(const char* c_str); + +#if GTEST_OS_WINDOWS_MOBILE + // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be + // able to pass strings to Win32 APIs on CE we need to convert them + // to 'Unicode', UTF-16. + + // Creates a UTF-16 wide string from the given ANSI string, allocating + // memory using new. The caller is responsible for deleting the return + // value using delete[]. Returns the wide string, or NULL if the + // input is NULL. + // + // The wide string is created using the ANSI codepage (CP_ACP) to + // match the behaviour of the ANSI versions of Win32 calls and the + // C runtime. + static LPCWSTR AnsiToUtf16(const char* c_str); + + // Creates an ANSI string from the given wide string, allocating + // memory using new. The caller is responsible for deleting the return + // value using delete[]. Returns the ANSI string, or NULL if the + // input is NULL. + // + // The returned string is created using the ANSI codepage (CP_ACP) to + // match the behaviour of the ANSI versions of Win32 calls and the + // C runtime. + static const char* Utf16ToAnsi(LPCWSTR utf16_str); +#endif + + // Compares two C strings. Returns true iff they have the same content. + // + // Unlike strcmp(), this function can handle NULL argument(s). A + // NULL C string is considered different to any non-NULL C string, + // including the empty string. + static bool CStringEquals(const char* lhs, const char* rhs); + + // Converts a wide C string to a String using the UTF-8 encoding. + // NULL will be converted to "(null)". If an error occurred during + // the conversion, "(failed to convert from wide string)" is + // returned. + static std::string ShowWideCString(const wchar_t* wide_c_str); + + // Compares two wide C strings. Returns true iff they have the same + // content. + // + // Unlike wcscmp(), this function can handle NULL argument(s). A + // NULL C string is considered different to any non-NULL C string, + // including the empty string. + static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs); + + // Compares two C strings, ignoring case. Returns true iff they + // have the same content. + // + // Unlike strcasecmp(), this function can handle NULL argument(s). + // A NULL C string is considered different to any non-NULL C string, + // including the empty string. + static bool CaseInsensitiveCStringEquals(const char* lhs, + const char* rhs); + + // Compares two wide C strings, ignoring case. Returns true iff they + // have the same content. + // + // Unlike wcscasecmp(), this function can handle NULL argument(s). + // A NULL C string is considered different to any non-NULL wide C string, + // including the empty string. + // NB: The implementations on different platforms slightly differ. + // On windows, this method uses _wcsicmp which compares according to LC_CTYPE + // environment variable. On GNU platform this method uses wcscasecmp + // which compares according to LC_CTYPE category of the current locale. + // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the + // current locale. + static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs, + const wchar_t* rhs); + + // Returns true iff the given string ends with the given suffix, ignoring + // case. Any string is considered to end with an empty suffix. + static bool EndsWithCaseInsensitive( + const std::string& str, const std::string& suffix); + + // Formats an int value as "%02d". + static std::string FormatIntWidth2(int value); // "%02d" for width == 2 + + // Formats an int value as "%X". + static std::string FormatHexInt(int value); + + // Formats a byte as "%02X". + static std::string FormatByte(unsigned char value); + + private: + String(); // Not meant to be instantiated. +}; // class String + +// Gets the content of the stringstream's buffer as an std::string. Each '\0' +// character in the buffer is replaced with "\\0". +GTEST_API_ std::string StringStreamToString(::std::stringstream* stream); + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: keith.ray@gmail.com (Keith Ray) +// +// Google Test filepath utilities +// +// This header file declares classes and functions used internally by +// Google Test. They are subject to change without notice. +// +// This file is #included in . +// Do not include this header file separately! + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ + + +namespace testing { +namespace internal { + +// FilePath - a class for file and directory pathname manipulation which +// handles platform-specific conventions (like the pathname separator). +// Used for helper functions for naming files in a directory for xml output. +// Except for Set methods, all methods are const or static, which provides an +// "immutable value object" -- useful for peace of mind. +// A FilePath with a value ending in a path separator ("like/this/") represents +// a directory, otherwise it is assumed to represent a file. In either case, +// it may or may not represent an actual file or directory in the file system. +// Names are NOT checked for syntax correctness -- no checking for illegal +// characters, malformed paths, etc. + +class GTEST_API_ FilePath { + public: + FilePath() : pathname_("") { } + FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { } + + explicit FilePath(const std::string& pathname) : pathname_(pathname) { + Normalize(); + } + + FilePath& operator=(const FilePath& rhs) { + Set(rhs); + return *this; + } + + void Set(const FilePath& rhs) { + pathname_ = rhs.pathname_; + } + + const std::string& string() const { return pathname_; } + const char* c_str() const { return pathname_.c_str(); } + + // Returns the current working directory, or "" if unsuccessful. + static FilePath GetCurrentDir(); + + // Given directory = "dir", base_name = "test", number = 0, + // extension = "xml", returns "dir/test.xml". If number is greater + // than zero (e.g., 12), returns "dir/test_12.xml". + // On Windows platform, uses \ as the separator rather than /. + static FilePath MakeFileName(const FilePath& directory, + const FilePath& base_name, + int number, + const char* extension); + + // Given directory = "dir", relative_path = "test.xml", + // returns "dir/test.xml". + // On Windows, uses \ as the separator rather than /. + static FilePath ConcatPaths(const FilePath& directory, + const FilePath& relative_path); + + // Returns a pathname for a file that does not currently exist. The pathname + // will be directory/base_name.extension or + // directory/base_name_.extension if directory/base_name.extension + // already exists. The number will be incremented until a pathname is found + // that does not already exist. + // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. + // There could be a race condition if two or more processes are calling this + // function at the same time -- they could both pick the same filename. + static FilePath GenerateUniqueFileName(const FilePath& directory, + const FilePath& base_name, + const char* extension); + + // Returns true iff the path is "". + bool IsEmpty() const { return pathname_.empty(); } + + // If input name has a trailing separator character, removes it and returns + // the name, otherwise return the name string unmodified. + // On Windows platform, uses \ as the separator, other platforms use /. + FilePath RemoveTrailingPathSeparator() const; + + // Returns a copy of the FilePath with the directory part removed. + // Example: FilePath("path/to/file").RemoveDirectoryName() returns + // FilePath("file"). If there is no directory part ("just_a_file"), it returns + // the FilePath unmodified. If there is no file part ("just_a_dir/") it + // returns an empty FilePath (""). + // On Windows platform, '\' is the path separator, otherwise it is '/'. + FilePath RemoveDirectoryName() const; + + // RemoveFileName returns the directory path with the filename removed. + // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". + // If the FilePath is "a_file" or "/a_file", RemoveFileName returns + // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does + // not have a file, like "just/a/dir/", it returns the FilePath unmodified. + // On Windows platform, '\' is the path separator, otherwise it is '/'. + FilePath RemoveFileName() const; + + // Returns a copy of the FilePath with the case-insensitive extension removed. + // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns + // FilePath("dir/file"). If a case-insensitive extension is not + // found, returns a copy of the original FilePath. + FilePath RemoveExtension(const char* extension) const; + + // Creates directories so that path exists. Returns true if successful or if + // the directories already exist; returns false if unable to create + // directories for any reason. Will also return false if the FilePath does + // not represent a directory (that is, it doesn't end with a path separator). + bool CreateDirectoriesRecursively() const; + + // Create the directory so that path exists. Returns true if successful or + // if the directory already exists; returns false if unable to create the + // directory for any reason, including if the parent directory does not + // exist. Not named "CreateDirectory" because that's a macro on Windows. + bool CreateFolder() const; + + // Returns true if FilePath describes something in the file-system, + // either a file, directory, or whatever, and that something exists. + bool FileOrDirectoryExists() const; + + // Returns true if pathname describes a directory in the file-system + // that exists. + bool DirectoryExists() const; + + // Returns true if FilePath ends with a path separator, which indicates that + // it is intended to represent a directory. Returns false otherwise. + // This does NOT check that a directory (or file) actually exists. + bool IsDirectory() const; + + // Returns true if pathname describes a root directory. (Windows has one + // root directory per disk drive.) + bool IsRootDirectory() const; + + // Returns true if pathname describes an absolute path. + bool IsAbsolutePath() const; + + private: + // Replaces multiple consecutive separators with a single separator. + // For example, "bar///foo" becomes "bar/foo". Does not eliminate other + // redundancies that might be in a pathname involving "." or "..". + // + // A pathname with multiple consecutive separators may occur either through + // user error or as a result of some scripts or APIs that generate a pathname + // with a trailing separator. On other platforms the same API or script + // may NOT generate a pathname with a trailing "/". Then elsewhere that + // pathname may have another "/" and pathname components added to it, + // without checking for the separator already being there. + // The script language and operating system may allow paths like "foo//bar" + // but some of the functions in FilePath will not handle that correctly. In + // particular, RemoveTrailingPathSeparator() only removes one separator, and + // it is called in CreateDirectoriesRecursively() assuming that it will change + // a pathname from directory syntax (trailing separator) to filename syntax. + // + // On Windows this method also replaces the alternate path separator '/' with + // the primary path separator '\\', so that for example "bar\\/\\foo" becomes + // "bar\\foo". + + void Normalize(); + + // Returns a pointer to the last occurence of a valid path separator in + // the FilePath. On Windows, for example, both '/' and '\' are valid path + // separators. Returns NULL if no path separator was found. + const char* FindLastPathSeparator() const; + + std::string pathname_; +}; // class FilePath + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ +// This file was GENERATED by command: +// pump.py gtest-type-util.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Type utilities needed for implementing typed and type-parameterized +// tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND! +// +// Currently we support at most 50 types in a list, and at most 50 +// type-parameterized tests in one type-parameterized test case. +// Please contact googletestframework@googlegroups.com if you need +// more. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ + + +// #ifdef __GNUC__ is too general here. It is possible to use gcc without using +// libstdc++ (which is where cxxabi.h comes from). +# if GTEST_HAS_CXXABI_H_ +# include +# elif defined(__HP_aCC) +# include +# endif // GTEST_HASH_CXXABI_H_ + +namespace testing { +namespace internal { + +// GetTypeName() returns a human-readable name of type T. +// NB: This function is also used in Google Mock, so don't move it inside of +// the typed-test-only section below. +template +std::string GetTypeName() { +# if GTEST_HAS_RTTI + + const char* const name = typeid(T).name(); +# if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC) + int status = 0; + // gcc's implementation of typeid(T).name() mangles the type name, + // so we have to demangle it. +# if GTEST_HAS_CXXABI_H_ + using abi::__cxa_demangle; +# endif // GTEST_HAS_CXXABI_H_ + char* const readable_name = __cxa_demangle(name, 0, 0, &status); + const std::string name_str(status == 0 ? readable_name : name); + free(readable_name); + return name_str; +# else + return name; +# endif // GTEST_HAS_CXXABI_H_ || __HP_aCC + +# else + + return ""; + +# endif // GTEST_HAS_RTTI +} + +#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P + +// AssertyTypeEq::type is defined iff T1 and T2 are the same +// type. This can be used as a compile-time assertion to ensure that +// two types are equal. + +template +struct AssertTypeEq; + +template +struct AssertTypeEq { + typedef bool type; +}; + +// A unique type used as the default value for the arguments of class +// template Types. This allows us to simulate variadic templates +// (e.g. Types, Type, and etc), which C++ doesn't +// support directly. +struct None {}; + +// The following family of struct and struct templates are used to +// represent type lists. In particular, TypesN +// represents a type list with N types (T1, T2, ..., and TN) in it. +// Except for Types0, every struct in the family has two member types: +// Head for the first type in the list, and Tail for the rest of the +// list. + +// The empty type list. +struct Types0 {}; + +// Type lists of length 1, 2, 3, and so on. + +template +struct Types1 { + typedef T1 Head; + typedef Types0 Tail; +}; +template +struct Types2 { + typedef T1 Head; + typedef Types1 Tail; +}; + +template +struct Types3 { + typedef T1 Head; + typedef Types2 Tail; +}; + +template +struct Types4 { + typedef T1 Head; + typedef Types3 Tail; +}; + +template +struct Types5 { + typedef T1 Head; + typedef Types4 Tail; +}; + +template +struct Types6 { + typedef T1 Head; + typedef Types5 Tail; +}; + +template +struct Types7 { + typedef T1 Head; + typedef Types6 Tail; +}; + +template +struct Types8 { + typedef T1 Head; + typedef Types7 Tail; +}; + +template +struct Types9 { + typedef T1 Head; + typedef Types8 Tail; +}; + +template +struct Types10 { + typedef T1 Head; + typedef Types9 Tail; +}; + +template +struct Types11 { + typedef T1 Head; + typedef Types10 Tail; +}; + +template +struct Types12 { + typedef T1 Head; + typedef Types11 Tail; +}; + +template +struct Types13 { + typedef T1 Head; + typedef Types12 Tail; +}; + +template +struct Types14 { + typedef T1 Head; + typedef Types13 Tail; +}; + +template +struct Types15 { + typedef T1 Head; + typedef Types14 Tail; +}; + +template +struct Types16 { + typedef T1 Head; + typedef Types15 Tail; +}; + +template +struct Types17 { + typedef T1 Head; + typedef Types16 Tail; +}; + +template +struct Types18 { + typedef T1 Head; + typedef Types17 Tail; +}; + +template +struct Types19 { + typedef T1 Head; + typedef Types18 Tail; +}; + +template +struct Types20 { + typedef T1 Head; + typedef Types19 Tail; +}; + +template +struct Types21 { + typedef T1 Head; + typedef Types20 Tail; +}; + +template +struct Types22 { + typedef T1 Head; + typedef Types21 Tail; +}; + +template +struct Types23 { + typedef T1 Head; + typedef Types22 Tail; +}; + +template +struct Types24 { + typedef T1 Head; + typedef Types23 Tail; +}; + +template +struct Types25 { + typedef T1 Head; + typedef Types24 Tail; +}; + +template +struct Types26 { + typedef T1 Head; + typedef Types25 Tail; +}; + +template +struct Types27 { + typedef T1 Head; + typedef Types26 Tail; +}; + +template +struct Types28 { + typedef T1 Head; + typedef Types27 Tail; +}; + +template +struct Types29 { + typedef T1 Head; + typedef Types28 Tail; +}; + +template +struct Types30 { + typedef T1 Head; + typedef Types29 Tail; +}; + +template +struct Types31 { + typedef T1 Head; + typedef Types30 Tail; +}; + +template +struct Types32 { + typedef T1 Head; + typedef Types31 Tail; +}; + +template +struct Types33 { + typedef T1 Head; + typedef Types32 Tail; +}; + +template +struct Types34 { + typedef T1 Head; + typedef Types33 Tail; +}; + +template +struct Types35 { + typedef T1 Head; + typedef Types34 Tail; +}; + +template +struct Types36 { + typedef T1 Head; + typedef Types35 Tail; +}; + +template +struct Types37 { + typedef T1 Head; + typedef Types36 Tail; +}; + +template +struct Types38 { + typedef T1 Head; + typedef Types37 Tail; +}; + +template +struct Types39 { + typedef T1 Head; + typedef Types38 Tail; +}; + +template +struct Types40 { + typedef T1 Head; + typedef Types39 Tail; +}; + +template +struct Types41 { + typedef T1 Head; + typedef Types40 Tail; +}; + +template +struct Types42 { + typedef T1 Head; + typedef Types41 Tail; +}; + +template +struct Types43 { + typedef T1 Head; + typedef Types42 Tail; +}; + +template +struct Types44 { + typedef T1 Head; + typedef Types43 Tail; +}; + +template +struct Types45 { + typedef T1 Head; + typedef Types44 Tail; +}; + +template +struct Types46 { + typedef T1 Head; + typedef Types45 Tail; +}; + +template +struct Types47 { + typedef T1 Head; + typedef Types46 Tail; +}; + +template +struct Types48 { + typedef T1 Head; + typedef Types47 Tail; +}; + +template +struct Types49 { + typedef T1 Head; + typedef Types48 Tail; +}; + +template +struct Types50 { + typedef T1 Head; + typedef Types49 Tail; +}; + + +} // namespace internal + +// We don't want to require the users to write TypesN<...> directly, +// as that would require them to count the length. Types<...> is much +// easier to write, but generates horrible messages when there is a +// compiler error, as gcc insists on printing out each template +// argument, even if it has the default value (this means Types +// will appear as Types in the compiler +// errors). +// +// Our solution is to combine the best part of the two approaches: a +// user would write Types, and Google Test will translate +// that to TypesN internally to make error messages +// readable. The translation is done by the 'type' member of the +// Types template. +template +struct Types { + typedef internal::Types50 type; +}; + +template <> +struct Types { + typedef internal::Types0 type; +}; +template +struct Types { + typedef internal::Types1 type; +}; +template +struct Types { + typedef internal::Types2 type; +}; +template +struct Types { + typedef internal::Types3 type; +}; +template +struct Types { + typedef internal::Types4 type; +}; +template +struct Types { + typedef internal::Types5 type; +}; +template +struct Types { + typedef internal::Types6 type; +}; +template +struct Types { + typedef internal::Types7 type; +}; +template +struct Types { + typedef internal::Types8 type; +}; +template +struct Types { + typedef internal::Types9 type; +}; +template +struct Types { + typedef internal::Types10 type; +}; +template +struct Types { + typedef internal::Types11 type; +}; +template +struct Types { + typedef internal::Types12 type; +}; +template +struct Types { + typedef internal::Types13 type; +}; +template +struct Types { + typedef internal::Types14 type; +}; +template +struct Types { + typedef internal::Types15 type; +}; +template +struct Types { + typedef internal::Types16 type; +}; +template +struct Types { + typedef internal::Types17 type; +}; +template +struct Types { + typedef internal::Types18 type; +}; +template +struct Types { + typedef internal::Types19 type; +}; +template +struct Types { + typedef internal::Types20 type; +}; +template +struct Types { + typedef internal::Types21 type; +}; +template +struct Types { + typedef internal::Types22 type; +}; +template +struct Types { + typedef internal::Types23 type; +}; +template +struct Types { + typedef internal::Types24 type; +}; +template +struct Types { + typedef internal::Types25 type; +}; +template +struct Types { + typedef internal::Types26 type; +}; +template +struct Types { + typedef internal::Types27 type; +}; +template +struct Types { + typedef internal::Types28 type; +}; +template +struct Types { + typedef internal::Types29 type; +}; +template +struct Types { + typedef internal::Types30 type; +}; +template +struct Types { + typedef internal::Types31 type; +}; +template +struct Types { + typedef internal::Types32 type; +}; +template +struct Types { + typedef internal::Types33 type; +}; +template +struct Types { + typedef internal::Types34 type; +}; +template +struct Types { + typedef internal::Types35 type; +}; +template +struct Types { + typedef internal::Types36 type; +}; +template +struct Types { + typedef internal::Types37 type; +}; +template +struct Types { + typedef internal::Types38 type; +}; +template +struct Types { + typedef internal::Types39 type; +}; +template +struct Types { + typedef internal::Types40 type; +}; +template +struct Types { + typedef internal::Types41 type; +}; +template +struct Types { + typedef internal::Types42 type; +}; +template +struct Types { + typedef internal::Types43 type; +}; +template +struct Types { + typedef internal::Types44 type; +}; +template +struct Types { + typedef internal::Types45 type; +}; +template +struct Types { + typedef internal::Types46 type; +}; +template +struct Types { + typedef internal::Types47 type; +}; +template +struct Types { + typedef internal::Types48 type; +}; +template +struct Types { + typedef internal::Types49 type; +}; + +namespace internal { + +# define GTEST_TEMPLATE_ template class + +// The template "selector" struct TemplateSel is used to +// represent Tmpl, which must be a class template with one type +// parameter, as a type. TemplateSel::Bind::type is defined +// as the type Tmpl. This allows us to actually instantiate the +// template "selected" by TemplateSel. +// +// This trick is necessary for simulating typedef for class templates, +// which C++ doesn't support directly. +template +struct TemplateSel { + template + struct Bind { + typedef Tmpl type; + }; +}; + +# define GTEST_BIND_(TmplSel, T) \ + TmplSel::template Bind::type + +// A unique struct template used as the default value for the +// arguments of class template Templates. This allows us to simulate +// variadic templates (e.g. Templates, Templates, +// and etc), which C++ doesn't support directly. +template +struct NoneT {}; + +// The following family of struct and struct templates are used to +// represent template lists. In particular, TemplatesN represents a list of N templates (T1, T2, ..., and TN). Except +// for Templates0, every struct in the family has two member types: +// Head for the selector of the first template in the list, and Tail +// for the rest of the list. + +// The empty template list. +struct Templates0 {}; + +// Template lists of length 1, 2, 3, and so on. + +template +struct Templates1 { + typedef TemplateSel Head; + typedef Templates0 Tail; +}; +template +struct Templates2 { + typedef TemplateSel Head; + typedef Templates1 Tail; +}; + +template +struct Templates3 { + typedef TemplateSel Head; + typedef Templates2 Tail; +}; + +template +struct Templates4 { + typedef TemplateSel Head; + typedef Templates3 Tail; +}; + +template +struct Templates5 { + typedef TemplateSel Head; + typedef Templates4 Tail; +}; + +template +struct Templates6 { + typedef TemplateSel Head; + typedef Templates5 Tail; +}; + +template +struct Templates7 { + typedef TemplateSel Head; + typedef Templates6 Tail; +}; + +template +struct Templates8 { + typedef TemplateSel Head; + typedef Templates7 Tail; +}; + +template +struct Templates9 { + typedef TemplateSel Head; + typedef Templates8 Tail; +}; + +template +struct Templates10 { + typedef TemplateSel Head; + typedef Templates9 Tail; +}; + +template +struct Templates11 { + typedef TemplateSel Head; + typedef Templates10 Tail; +}; + +template +struct Templates12 { + typedef TemplateSel Head; + typedef Templates11 Tail; +}; + +template +struct Templates13 { + typedef TemplateSel Head; + typedef Templates12 Tail; +}; + +template +struct Templates14 { + typedef TemplateSel Head; + typedef Templates13 Tail; +}; + +template +struct Templates15 { + typedef TemplateSel Head; + typedef Templates14 Tail; +}; + +template +struct Templates16 { + typedef TemplateSel Head; + typedef Templates15 Tail; +}; + +template +struct Templates17 { + typedef TemplateSel Head; + typedef Templates16 Tail; +}; + +template +struct Templates18 { + typedef TemplateSel Head; + typedef Templates17 Tail; +}; + +template +struct Templates19 { + typedef TemplateSel Head; + typedef Templates18 Tail; +}; + +template +struct Templates20 { + typedef TemplateSel Head; + typedef Templates19 Tail; +}; + +template +struct Templates21 { + typedef TemplateSel Head; + typedef Templates20 Tail; +}; + +template +struct Templates22 { + typedef TemplateSel Head; + typedef Templates21 Tail; +}; + +template +struct Templates23 { + typedef TemplateSel Head; + typedef Templates22 Tail; +}; + +template +struct Templates24 { + typedef TemplateSel Head; + typedef Templates23 Tail; +}; + +template +struct Templates25 { + typedef TemplateSel Head; + typedef Templates24 Tail; +}; + +template +struct Templates26 { + typedef TemplateSel Head; + typedef Templates25 Tail; +}; + +template +struct Templates27 { + typedef TemplateSel Head; + typedef Templates26 Tail; +}; + +template +struct Templates28 { + typedef TemplateSel Head; + typedef Templates27 Tail; +}; + +template +struct Templates29 { + typedef TemplateSel Head; + typedef Templates28 Tail; +}; + +template +struct Templates30 { + typedef TemplateSel Head; + typedef Templates29 Tail; +}; + +template +struct Templates31 { + typedef TemplateSel Head; + typedef Templates30 Tail; +}; + +template +struct Templates32 { + typedef TemplateSel Head; + typedef Templates31 Tail; +}; + +template +struct Templates33 { + typedef TemplateSel Head; + typedef Templates32 Tail; +}; + +template +struct Templates34 { + typedef TemplateSel Head; + typedef Templates33 Tail; +}; + +template +struct Templates35 { + typedef TemplateSel Head; + typedef Templates34 Tail; +}; + +template +struct Templates36 { + typedef TemplateSel Head; + typedef Templates35 Tail; +}; + +template +struct Templates37 { + typedef TemplateSel Head; + typedef Templates36 Tail; +}; + +template +struct Templates38 { + typedef TemplateSel Head; + typedef Templates37 Tail; +}; + +template +struct Templates39 { + typedef TemplateSel Head; + typedef Templates38 Tail; +}; + +template +struct Templates40 { + typedef TemplateSel Head; + typedef Templates39 Tail; +}; + +template +struct Templates41 { + typedef TemplateSel Head; + typedef Templates40 Tail; +}; + +template +struct Templates42 { + typedef TemplateSel Head; + typedef Templates41 Tail; +}; + +template +struct Templates43 { + typedef TemplateSel Head; + typedef Templates42 Tail; +}; + +template +struct Templates44 { + typedef TemplateSel Head; + typedef Templates43 Tail; +}; + +template +struct Templates45 { + typedef TemplateSel Head; + typedef Templates44 Tail; +}; + +template +struct Templates46 { + typedef TemplateSel Head; + typedef Templates45 Tail; +}; + +template +struct Templates47 { + typedef TemplateSel Head; + typedef Templates46 Tail; +}; + +template +struct Templates48 { + typedef TemplateSel Head; + typedef Templates47 Tail; +}; + +template +struct Templates49 { + typedef TemplateSel Head; + typedef Templates48 Tail; +}; + +template +struct Templates50 { + typedef TemplateSel Head; + typedef Templates49 Tail; +}; + + +// We don't want to require the users to write TemplatesN<...> directly, +// as that would require them to count the length. Templates<...> is much +// easier to write, but generates horrible messages when there is a +// compiler error, as gcc insists on printing out each template +// argument, even if it has the default value (this means Templates +// will appear as Templates in the compiler +// errors). +// +// Our solution is to combine the best part of the two approaches: a +// user would write Templates, and Google Test will translate +// that to TemplatesN internally to make error messages +// readable. The translation is done by the 'type' member of the +// Templates template. +template +struct Templates { + typedef Templates50 type; +}; + +template <> +struct Templates { + typedef Templates0 type; +}; +template +struct Templates { + typedef Templates1 type; +}; +template +struct Templates { + typedef Templates2 type; +}; +template +struct Templates { + typedef Templates3 type; +}; +template +struct Templates { + typedef Templates4 type; +}; +template +struct Templates { + typedef Templates5 type; +}; +template +struct Templates { + typedef Templates6 type; +}; +template +struct Templates { + typedef Templates7 type; +}; +template +struct Templates { + typedef Templates8 type; +}; +template +struct Templates { + typedef Templates9 type; +}; +template +struct Templates { + typedef Templates10 type; +}; +template +struct Templates { + typedef Templates11 type; +}; +template +struct Templates { + typedef Templates12 type; +}; +template +struct Templates { + typedef Templates13 type; +}; +template +struct Templates { + typedef Templates14 type; +}; +template +struct Templates { + typedef Templates15 type; +}; +template +struct Templates { + typedef Templates16 type; +}; +template +struct Templates { + typedef Templates17 type; +}; +template +struct Templates { + typedef Templates18 type; +}; +template +struct Templates { + typedef Templates19 type; +}; +template +struct Templates { + typedef Templates20 type; +}; +template +struct Templates { + typedef Templates21 type; +}; +template +struct Templates { + typedef Templates22 type; +}; +template +struct Templates { + typedef Templates23 type; +}; +template +struct Templates { + typedef Templates24 type; +}; +template +struct Templates { + typedef Templates25 type; +}; +template +struct Templates { + typedef Templates26 type; +}; +template +struct Templates { + typedef Templates27 type; +}; +template +struct Templates { + typedef Templates28 type; +}; +template +struct Templates { + typedef Templates29 type; +}; +template +struct Templates { + typedef Templates30 type; +}; +template +struct Templates { + typedef Templates31 type; +}; +template +struct Templates { + typedef Templates32 type; +}; +template +struct Templates { + typedef Templates33 type; +}; +template +struct Templates { + typedef Templates34 type; +}; +template +struct Templates { + typedef Templates35 type; +}; +template +struct Templates { + typedef Templates36 type; +}; +template +struct Templates { + typedef Templates37 type; +}; +template +struct Templates { + typedef Templates38 type; +}; +template +struct Templates { + typedef Templates39 type; +}; +template +struct Templates { + typedef Templates40 type; +}; +template +struct Templates { + typedef Templates41 type; +}; +template +struct Templates { + typedef Templates42 type; +}; +template +struct Templates { + typedef Templates43 type; +}; +template +struct Templates { + typedef Templates44 type; +}; +template +struct Templates { + typedef Templates45 type; +}; +template +struct Templates { + typedef Templates46 type; +}; +template +struct Templates { + typedef Templates47 type; +}; +template +struct Templates { + typedef Templates48 type; +}; +template +struct Templates { + typedef Templates49 type; +}; + +// The TypeList template makes it possible to use either a single type +// or a Types<...> list in TYPED_TEST_CASE() and +// INSTANTIATE_TYPED_TEST_CASE_P(). + +template +struct TypeList { + typedef Types1 type; +}; + +template +struct TypeList > { + typedef typename Types::type type; +}; + +#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ + +// Due to C++ preprocessor weirdness, we need double indirection to +// concatenate two tokens when one of them is __LINE__. Writing +// +// foo ## __LINE__ +// +// will result in the token foo__LINE__, instead of foo followed by +// the current line number. For more details, see +// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6 +#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar) +#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar + +class ProtocolMessage; +namespace proto2 { class Message; } + +namespace testing { + +// Forward declarations. + +class AssertionResult; // Result of an assertion. +class Message; // Represents a failure message. +class Test; // Represents a test. +class TestInfo; // Information about a test. +class TestPartResult; // Result of a test part. +class UnitTest; // A collection of test cases. + +template +::std::string PrintToString(const T& value); + +namespace internal { + +struct TraceInfo; // Information about a trace point. +class ScopedTrace; // Implements scoped trace. +class TestInfoImpl; // Opaque implementation of TestInfo +class UnitTestImpl; // Opaque implementation of UnitTest + +// How many times InitGoogleTest() has been called. +GTEST_API_ extern int g_init_gtest_count; + +// The text used in failure messages to indicate the start of the +// stack trace. +GTEST_API_ extern const char kStackTraceMarker[]; + +// Two overloaded helpers for checking at compile time whether an +// expression is a null pointer literal (i.e. NULL or any 0-valued +// compile-time integral constant). Their return values have +// different sizes, so we can use sizeof() to test which version is +// picked by the compiler. These helpers have no implementations, as +// we only need their signatures. +// +// Given IsNullLiteralHelper(x), the compiler will pick the first +// version if x can be implicitly converted to Secret*, and pick the +// second version otherwise. Since Secret is a secret and incomplete +// type, the only expression a user can write that has type Secret* is +// a null pointer literal. Therefore, we know that x is a null +// pointer literal if and only if the first version is picked by the +// compiler. +char IsNullLiteralHelper(Secret* p); +char (&IsNullLiteralHelper(...))[2]; // NOLINT + +// A compile-time bool constant that is true if and only if x is a +// null pointer literal (i.e. NULL or any 0-valued compile-time +// integral constant). +#ifdef GTEST_ELLIPSIS_NEEDS_POD_ +// We lose support for NULL detection where the compiler doesn't like +// passing non-POD classes through ellipsis (...). +# define GTEST_IS_NULL_LITERAL_(x) false +#else +# define GTEST_IS_NULL_LITERAL_(x) \ + (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1) +#endif // GTEST_ELLIPSIS_NEEDS_POD_ + +// Appends the user-supplied message to the Google-Test-generated message. +GTEST_API_ std::string AppendUserMessage( + const std::string& gtest_msg, const Message& user_msg); + +#if GTEST_HAS_EXCEPTIONS + +// This exception is thrown by (and only by) a failed Google Test +// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions +// are enabled). We derive it from std::runtime_error, which is for +// errors presumably detectable only at run time. Since +// std::runtime_error inherits from std::exception, many testing +// frameworks know how to extract and print the message inside it. +class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error { + public: + explicit GoogleTestFailureException(const TestPartResult& failure); +}; + +#endif // GTEST_HAS_EXCEPTIONS + +// A helper class for creating scoped traces in user programs. +class GTEST_API_ ScopedTrace { + public: + // The c'tor pushes the given source file location and message onto + // a trace stack maintained by Google Test. + ScopedTrace(const char* file, int line, const Message& message); + + // The d'tor pops the info pushed by the c'tor. + // + // Note that the d'tor is not virtual in order to be efficient. + // Don't inherit from ScopedTrace! + ~ScopedTrace(); + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace); +} GTEST_ATTRIBUTE_UNUSED_; // A ScopedTrace object does its job in its + // c'tor and d'tor. Therefore it doesn't + // need to be used otherwise. + +// Constructs and returns the message for an equality assertion +// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. +// +// The first four parameters are the expressions used in the assertion +// and their values, as strings. For example, for ASSERT_EQ(foo, bar) +// where foo is 5 and bar is 6, we have: +// +// expected_expression: "foo" +// actual_expression: "bar" +// expected_value: "5" +// actual_value: "6" +// +// The ignoring_case parameter is true iff the assertion is a +// *_STRCASEEQ*. When it's true, the string " (ignoring case)" will +// be inserted into the message. +GTEST_API_ AssertionResult EqFailure(const char* expected_expression, + const char* actual_expression, + const std::string& expected_value, + const std::string& actual_value, + bool ignoring_case); + +// Constructs a failure message for Boolean assertions such as EXPECT_TRUE. +GTEST_API_ std::string GetBoolAssertionFailureMessage( + const AssertionResult& assertion_result, + const char* expression_text, + const char* actual_predicate_value, + const char* expected_predicate_value); + +// This template class represents an IEEE floating-point number +// (either single-precision or double-precision, depending on the +// template parameters). +// +// The purpose of this class is to do more sophisticated number +// comparison. (Due to round-off error, etc, it's very unlikely that +// two floating-points will be equal exactly. Hence a naive +// comparison by the == operation often doesn't work.) +// +// Format of IEEE floating-point: +// +// The most-significant bit being the leftmost, an IEEE +// floating-point looks like +// +// sign_bit exponent_bits fraction_bits +// +// Here, sign_bit is a single bit that designates the sign of the +// number. +// +// For float, there are 8 exponent bits and 23 fraction bits. +// +// For double, there are 11 exponent bits and 52 fraction bits. +// +// More details can be found at +// http://en.wikipedia.org/wiki/IEEE_floating-point_standard. +// +// Template parameter: +// +// RawType: the raw floating-point type (either float or double) +template +class FloatingPoint { + public: + // Defines the unsigned integer type that has the same size as the + // floating point number. + typedef typename TypeWithSize::UInt Bits; + + // Constants. + + // # of bits in a number. + static const size_t kBitCount = 8*sizeof(RawType); + + // # of fraction bits in a number. + static const size_t kFractionBitCount = + std::numeric_limits::digits - 1; + + // # of exponent bits in a number. + static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount; + + // The mask for the sign bit. + static const Bits kSignBitMask = static_cast(1) << (kBitCount - 1); + + // The mask for the fraction bits. + static const Bits kFractionBitMask = + ~static_cast(0) >> (kExponentBitCount + 1); + + // The mask for the exponent bits. + static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask); + + // How many ULP's (Units in the Last Place) we want to tolerate when + // comparing two numbers. The larger the value, the more error we + // allow. A 0 value means that two numbers must be exactly the same + // to be considered equal. + // + // The maximum error of a single floating-point operation is 0.5 + // units in the last place. On Intel CPU's, all floating-point + // calculations are done with 80-bit precision, while double has 64 + // bits. Therefore, 4 should be enough for ordinary use. + // + // See the following article for more details on ULP: + // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ + static const size_t kMaxUlps = 4; + + // Constructs a FloatingPoint from a raw floating-point number. + // + // On an Intel CPU, passing a non-normalized NAN (Not a Number) + // around may change its bits, although the new value is guaranteed + // to be also a NAN. Therefore, don't expect this constructor to + // preserve the bits in x when x is a NAN. + explicit FloatingPoint(const RawType& x) { u_.value_ = x; } + + // Static methods + + // Reinterprets a bit pattern as a floating-point number. + // + // This function is needed to test the AlmostEquals() method. + static RawType ReinterpretBits(const Bits bits) { + FloatingPoint fp(0); + fp.u_.bits_ = bits; + return fp.u_.value_; + } + + // Returns the floating-point number that represent positive infinity. + static RawType Infinity() { + return ReinterpretBits(kExponentBitMask); + } + + // Returns the maximum representable finite floating-point number. + static RawType Max(); + + // Non-static methods + + // Returns the bits that represents this number. + const Bits &bits() const { return u_.bits_; } + + // Returns the exponent bits of this number. + Bits exponent_bits() const { return kExponentBitMask & u_.bits_; } + + // Returns the fraction bits of this number. + Bits fraction_bits() const { return kFractionBitMask & u_.bits_; } + + // Returns the sign bit of this number. + Bits sign_bit() const { return kSignBitMask & u_.bits_; } + + // Returns true iff this is NAN (not a number). + bool is_nan() const { + // It's a NAN if the exponent bits are all ones and the fraction + // bits are not entirely zeros. + return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0); + } + + // Returns true iff this number is at most kMaxUlps ULP's away from + // rhs. In particular, this function: + // + // - returns false if either number is (or both are) NAN. + // - treats really large numbers as almost equal to infinity. + // - thinks +0.0 and -0.0 are 0 DLP's apart. + bool AlmostEquals(const FloatingPoint& rhs) const { + // The IEEE standard says that any comparison operation involving + // a NAN must return false. + if (is_nan() || rhs.is_nan()) return false; + + return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_) + <= kMaxUlps; + } + + private: + // The data type used to store the actual floating-point number. + union FloatingPointUnion { + RawType value_; // The raw floating-point number. + Bits bits_; // The bits that represent the number. + }; + + // Converts an integer from the sign-and-magnitude representation to + // the biased representation. More precisely, let N be 2 to the + // power of (kBitCount - 1), an integer x is represented by the + // unsigned number x + N. + // + // For instance, + // + // -N + 1 (the most negative number representable using + // sign-and-magnitude) is represented by 1; + // 0 is represented by N; and + // N - 1 (the biggest number representable using + // sign-and-magnitude) is represented by 2N - 1. + // + // Read http://en.wikipedia.org/wiki/Signed_number_representations + // for more details on signed number representations. + static Bits SignAndMagnitudeToBiased(const Bits &sam) { + if (kSignBitMask & sam) { + // sam represents a negative number. + return ~sam + 1; + } else { + // sam represents a positive number. + return kSignBitMask | sam; + } + } + + // Given two numbers in the sign-and-magnitude representation, + // returns the distance between them as an unsigned number. + static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1, + const Bits &sam2) { + const Bits biased1 = SignAndMagnitudeToBiased(sam1); + const Bits biased2 = SignAndMagnitudeToBiased(sam2); + return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1); + } + + FloatingPointUnion u_; +}; + +// We cannot use std::numeric_limits::max() as it clashes with the max() +// macro defined by . +template <> +inline float FloatingPoint::Max() { return FLT_MAX; } +template <> +inline double FloatingPoint::Max() { return DBL_MAX; } + +// Typedefs the instances of the FloatingPoint template class that we +// care to use. +typedef FloatingPoint Float; +typedef FloatingPoint Double; + +// In order to catch the mistake of putting tests that use different +// test fixture classes in the same test case, we need to assign +// unique IDs to fixture classes and compare them. The TypeId type is +// used to hold such IDs. The user should treat TypeId as an opaque +// type: the only operation allowed on TypeId values is to compare +// them for equality using the == operator. +typedef const void* TypeId; + +template +class TypeIdHelper { + public: + // dummy_ must not have a const type. Otherwise an overly eager + // compiler (e.g. MSVC 7.1 & 8.0) may try to merge + // TypeIdHelper::dummy_ for different Ts as an "optimization". + static bool dummy_; +}; + +template +bool TypeIdHelper::dummy_ = false; + +// GetTypeId() returns the ID of type T. Different values will be +// returned for different types. Calling the function twice with the +// same type argument is guaranteed to return the same ID. +template +TypeId GetTypeId() { + // The compiler is required to allocate a different + // TypeIdHelper::dummy_ variable for each T used to instantiate + // the template. Therefore, the address of dummy_ is guaranteed to + // be unique. + return &(TypeIdHelper::dummy_); +} + +// Returns the type ID of ::testing::Test. Always call this instead +// of GetTypeId< ::testing::Test>() to get the type ID of +// ::testing::Test, as the latter may give the wrong result due to a +// suspected linker bug when compiling Google Test as a Mac OS X +// framework. +GTEST_API_ TypeId GetTestTypeId(); + +// Defines the abstract factory interface that creates instances +// of a Test object. +class TestFactoryBase { + public: + virtual ~TestFactoryBase() {} + + // Creates a test instance to run. The instance is both created and destroyed + // within TestInfoImpl::Run() + virtual Test* CreateTest() = 0; + + protected: + TestFactoryBase() {} + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase); +}; + +// This class provides implementation of TeastFactoryBase interface. +// It is used in TEST and TEST_F macros. +template +class TestFactoryImpl : public TestFactoryBase { + public: + virtual Test* CreateTest() { return new TestClass; } +}; + +#if GTEST_OS_WINDOWS + +// Predicate-formatters for implementing the HRESULT checking macros +// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED} +// We pass a long instead of HRESULT to avoid causing an +// include dependency for the HRESULT type. +GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr, + long hr); // NOLINT +GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr, + long hr); // NOLINT + +#endif // GTEST_OS_WINDOWS + +// Types of SetUpTestCase() and TearDownTestCase() functions. +typedef void (*SetUpTestCaseFunc)(); +typedef void (*TearDownTestCaseFunc)(); + +// Creates a new TestInfo object and registers it with Google Test; +// returns the created object. +// +// Arguments: +// +// test_case_name: name of the test case +// name: name of the test +// type_param the name of the test's type parameter, or NULL if +// this is not a typed or a type-parameterized test. +// value_param text representation of the test's value parameter, +// or NULL if this is not a type-parameterized test. +// fixture_class_id: ID of the test fixture class +// set_up_tc: pointer to the function that sets up the test case +// tear_down_tc: pointer to the function that tears down the test case +// factory: pointer to the factory that creates a test object. +// The newly created TestInfo instance will assume +// ownership of the factory object. +GTEST_API_ TestInfo* MakeAndRegisterTestInfo( + const char* test_case_name, + const char* name, + const char* type_param, + const char* value_param, + TypeId fixture_class_id, + SetUpTestCaseFunc set_up_tc, + TearDownTestCaseFunc tear_down_tc, + TestFactoryBase* factory); + +// If *pstr starts with the given prefix, modifies *pstr to be right +// past the prefix and returns true; otherwise leaves *pstr unchanged +// and returns false. None of pstr, *pstr, and prefix can be NULL. +GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr); + +#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P + +// State of the definition of a type-parameterized test case. +class GTEST_API_ TypedTestCasePState { + public: + TypedTestCasePState() : registered_(false) {} + + // Adds the given test name to defined_test_names_ and return true + // if the test case hasn't been registered; otherwise aborts the + // program. + bool AddTestName(const char* file, int line, const char* case_name, + const char* test_name) { + if (registered_) { + fprintf(stderr, "%s Test %s must be defined before " + "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n", + FormatFileLocation(file, line).c_str(), test_name, case_name); + fflush(stderr); + posix::Abort(); + } + defined_test_names_.insert(test_name); + return true; + } + + // Verifies that registered_tests match the test names in + // defined_test_names_; returns registered_tests if successful, or + // aborts the program otherwise. + const char* VerifyRegisteredTestNames( + const char* file, int line, const char* registered_tests); + + private: + bool registered_; + ::std::set defined_test_names_; +}; + +// Skips to the first non-space char after the first comma in 'str'; +// returns NULL if no comma is found in 'str'. +inline const char* SkipComma(const char* str) { + const char* comma = strchr(str, ','); + if (comma == NULL) { + return NULL; + } + while (IsSpace(*(++comma))) {} + return comma; +} + +// Returns the prefix of 'str' before the first comma in it; returns +// the entire string if it contains no comma. +inline std::string GetPrefixUntilComma(const char* str) { + const char* comma = strchr(str, ','); + return comma == NULL ? str : std::string(str, comma); +} + +// TypeParameterizedTest::Register() +// registers a list of type-parameterized tests with Google Test. The +// return value is insignificant - we just need to return something +// such that we can call this function in a namespace scope. +// +// Implementation note: The GTEST_TEMPLATE_ macro declares a template +// template parameter. It's defined in gtest-type-util.h. +template +class TypeParameterizedTest { + public: + // 'index' is the index of the test in the type list 'Types' + // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase, + // Types). Valid values for 'index' are [0, N - 1] where N is the + // length of Types. + static bool Register(const char* prefix, const char* case_name, + const char* test_names, int index) { + typedef typename Types::Head Type; + typedef Fixture FixtureClass; + typedef typename GTEST_BIND_(TestSel, Type) TestClass; + + // First, registers the first type-parameterized test in the type + // list. + MakeAndRegisterTestInfo( + (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/" + + StreamableToString(index)).c_str(), + GetPrefixUntilComma(test_names).c_str(), + GetTypeName().c_str(), + NULL, // No value parameter. + GetTypeId(), + TestClass::SetUpTestCase, + TestClass::TearDownTestCase, + new TestFactoryImpl); + + // Next, recurses (at compile time) with the tail of the type list. + return TypeParameterizedTest + ::Register(prefix, case_name, test_names, index + 1); + } +}; + +// The base case for the compile time recursion. +template +class TypeParameterizedTest { + public: + static bool Register(const char* /*prefix*/, const char* /*case_name*/, + const char* /*test_names*/, int /*index*/) { + return true; + } +}; + +// TypeParameterizedTestCase::Register() +// registers *all combinations* of 'Tests' and 'Types' with Google +// Test. The return value is insignificant - we just need to return +// something such that we can call this function in a namespace scope. +template +class TypeParameterizedTestCase { + public: + static bool Register(const char* prefix, const char* case_name, + const char* test_names) { + typedef typename Tests::Head Head; + + // First, register the first test in 'Test' for each type in 'Types'. + TypeParameterizedTest::Register( + prefix, case_name, test_names, 0); + + // Next, recurses (at compile time) with the tail of the test list. + return TypeParameterizedTestCase + ::Register(prefix, case_name, SkipComma(test_names)); + } +}; + +// The base case for the compile time recursion. +template +class TypeParameterizedTestCase { + public: + static bool Register(const char* /*prefix*/, const char* /*case_name*/, + const char* /*test_names*/) { + return true; + } +}; + +#endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P + +// Returns the current OS stack trace as an std::string. +// +// The maximum number of stack frames to be included is specified by +// the gtest_stack_trace_depth flag. The skip_count parameter +// specifies the number of top frames to be skipped, which doesn't +// count against the number of frames to be included. +// +// For example, if Foo() calls Bar(), which in turn calls +// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in +// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't. +GTEST_API_ std::string GetCurrentOsStackTraceExceptTop( + UnitTest* unit_test, int skip_count); + +// Helpers for suppressing warnings on unreachable code or constant +// condition. + +// Always returns true. +GTEST_API_ bool AlwaysTrue(); + +// Always returns false. +inline bool AlwaysFalse() { return !AlwaysTrue(); } + +// Helper for suppressing false warning from Clang on a const char* +// variable declared in a conditional expression always being NULL in +// the else branch. +struct GTEST_API_ ConstCharPtr { + ConstCharPtr(const char* str) : value(str) {} + operator bool() const { return true; } + const char* value; +}; + +// A simple Linear Congruential Generator for generating random +// numbers with a uniform distribution. Unlike rand() and srand(), it +// doesn't use global state (and therefore can't interfere with user +// code). Unlike rand_r(), it's portable. An LCG isn't very random, +// but it's good enough for our purposes. +class GTEST_API_ Random { + public: + static const UInt32 kMaxRange = 1u << 31; + + explicit Random(UInt32 seed) : state_(seed) {} + + void Reseed(UInt32 seed) { state_ = seed; } + + // Generates a random number from [0, range). Crashes if 'range' is + // 0 or greater than kMaxRange. + UInt32 Generate(UInt32 range); + + private: + UInt32 state_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(Random); +}; + +// Defining a variable of type CompileAssertTypesEqual will cause a +// compiler error iff T1 and T2 are different types. +template +struct CompileAssertTypesEqual; + +template +struct CompileAssertTypesEqual { +}; + +// Removes the reference from a type if it is a reference type, +// otherwise leaves it unchanged. This is the same as +// tr1::remove_reference, which is not widely available yet. +template +struct RemoveReference { typedef T type; }; // NOLINT +template +struct RemoveReference { typedef T type; }; // NOLINT + +// A handy wrapper around RemoveReference that works when the argument +// T depends on template parameters. +#define GTEST_REMOVE_REFERENCE_(T) \ + typename ::testing::internal::RemoveReference::type + +// Removes const from a type if it is a const type, otherwise leaves +// it unchanged. This is the same as tr1::remove_const, which is not +// widely available yet. +template +struct RemoveConst { typedef T type; }; // NOLINT +template +struct RemoveConst { typedef T type; }; // NOLINT + +// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above +// definition to fail to remove the const in 'const int[3]' and 'const +// char[3][4]'. The following specialization works around the bug. +template +struct RemoveConst { + typedef typename RemoveConst::type type[N]; +}; + +#if defined(_MSC_VER) && _MSC_VER < 1400 +// This is the only specialization that allows VC++ 7.1 to remove const in +// 'const int[3] and 'const int[3][4]'. However, it causes trouble with GCC +// and thus needs to be conditionally compiled. +template +struct RemoveConst { + typedef typename RemoveConst::type type[N]; +}; +#endif + +// A handy wrapper around RemoveConst that works when the argument +// T depends on template parameters. +#define GTEST_REMOVE_CONST_(T) \ + typename ::testing::internal::RemoveConst::type + +// Turns const U&, U&, const U, and U all into U. +#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \ + GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T)) + +// Adds reference to a type if it is not a reference type, +// otherwise leaves it unchanged. This is the same as +// tr1::add_reference, which is not widely available yet. +template +struct AddReference { typedef T& type; }; // NOLINT +template +struct AddReference { typedef T& type; }; // NOLINT + +// A handy wrapper around AddReference that works when the argument T +// depends on template parameters. +#define GTEST_ADD_REFERENCE_(T) \ + typename ::testing::internal::AddReference::type + +// Adds a reference to const on top of T as necessary. For example, +// it transforms +// +// char ==> const char& +// const char ==> const char& +// char& ==> const char& +// const char& ==> const char& +// +// The argument T must depend on some template parameters. +#define GTEST_REFERENCE_TO_CONST_(T) \ + GTEST_ADD_REFERENCE_(const GTEST_REMOVE_REFERENCE_(T)) + +// ImplicitlyConvertible::value is a compile-time bool +// constant that's true iff type From can be implicitly converted to +// type To. +template +class ImplicitlyConvertible { + private: + // We need the following helper functions only for their types. + // They have no implementations. + + // MakeFrom() is an expression whose type is From. We cannot simply + // use From(), as the type From may not have a public default + // constructor. + static From MakeFrom(); + + // These two functions are overloaded. Given an expression + // Helper(x), the compiler will pick the first version if x can be + // implicitly converted to type To; otherwise it will pick the + // second version. + // + // The first version returns a value of size 1, and the second + // version returns a value of size 2. Therefore, by checking the + // size of Helper(x), which can be done at compile time, we can tell + // which version of Helper() is used, and hence whether x can be + // implicitly converted to type To. + static char Helper(To); + static char (&Helper(...))[2]; // NOLINT + + // We have to put the 'public' section after the 'private' section, + // or MSVC refuses to compile the code. + public: + // MSVC warns about implicitly converting from double to int for + // possible loss of data, so we need to temporarily disable the + // warning. +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4244) // Temporarily disables warning 4244. + + static const bool value = + sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1; +# pragma warning(pop) // Restores the warning state. +#elif defined(__BORLANDC__) + // C++Builder cannot use member overload resolution during template + // instantiation. The simplest workaround is to use its C++0x type traits + // functions (C++Builder 2009 and above only). + static const bool value = __is_convertible(From, To); +#else + static const bool value = + sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1; +#endif // _MSV_VER +}; +template +const bool ImplicitlyConvertible::value; + +// IsAProtocolMessage::value is a compile-time bool constant that's +// true iff T is type ProtocolMessage, proto2::Message, or a subclass +// of those. +template +struct IsAProtocolMessage + : public bool_constant< + ImplicitlyConvertible::value || + ImplicitlyConvertible::value> { +}; + +// When the compiler sees expression IsContainerTest(0), if C is an +// STL-style container class, the first overload of IsContainerTest +// will be viable (since both C::iterator* and C::const_iterator* are +// valid types and NULL can be implicitly converted to them). It will +// be picked over the second overload as 'int' is a perfect match for +// the type of argument 0. If C::iterator or C::const_iterator is not +// a valid type, the first overload is not viable, and the second +// overload will be picked. Therefore, we can determine whether C is +// a container class by checking the type of IsContainerTest(0). +// The value of the expression is insignificant. +// +// Note that we look for both C::iterator and C::const_iterator. The +// reason is that C++ injects the name of a class as a member of the +// class itself (e.g. you can refer to class iterator as either +// 'iterator' or 'iterator::iterator'). If we look for C::iterator +// only, for example, we would mistakenly think that a class named +// iterator is an STL container. +// +// Also note that the simpler approach of overloading +// IsContainerTest(typename C::const_iterator*) and +// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++. +typedef int IsContainer; +template +IsContainer IsContainerTest(int /* dummy */, + typename C::iterator* /* it */ = NULL, + typename C::const_iterator* /* const_it */ = NULL) { + return 0; +} + +typedef char IsNotContainer; +template +IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; } + +// EnableIf::type is void when 'Cond' is true, and +// undefined when 'Cond' is false. To use SFINAE to make a function +// overload only apply when a particular expression is true, add +// "typename EnableIf::type* = 0" as the last parameter. +template struct EnableIf; +template<> struct EnableIf { typedef void type; }; // NOLINT + +// Utilities for native arrays. + +// ArrayEq() compares two k-dimensional native arrays using the +// elements' operator==, where k can be any integer >= 0. When k is +// 0, ArrayEq() degenerates into comparing a single pair of values. + +template +bool ArrayEq(const T* lhs, size_t size, const U* rhs); + +// This generic version is used when k is 0. +template +inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; } + +// This overload is used when k >= 1. +template +inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) { + return internal::ArrayEq(lhs, N, rhs); +} + +// This helper reduces code bloat. If we instead put its logic inside +// the previous ArrayEq() function, arrays with different sizes would +// lead to different copies of the template code. +template +bool ArrayEq(const T* lhs, size_t size, const U* rhs) { + for (size_t i = 0; i != size; i++) { + if (!internal::ArrayEq(lhs[i], rhs[i])) + return false; + } + return true; +} + +// Finds the first element in the iterator range [begin, end) that +// equals elem. Element may be a native array type itself. +template +Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) { + for (Iter it = begin; it != end; ++it) { + if (internal::ArrayEq(*it, elem)) + return it; + } + return end; +} + +// CopyArray() copies a k-dimensional native array using the elements' +// operator=, where k can be any integer >= 0. When k is 0, +// CopyArray() degenerates into copying a single value. + +template +void CopyArray(const T* from, size_t size, U* to); + +// This generic version is used when k is 0. +template +inline void CopyArray(const T& from, U* to) { *to = from; } + +// This overload is used when k >= 1. +template +inline void CopyArray(const T(&from)[N], U(*to)[N]) { + internal::CopyArray(from, N, *to); +} + +// This helper reduces code bloat. If we instead put its logic inside +// the previous CopyArray() function, arrays with different sizes +// would lead to different copies of the template code. +template +void CopyArray(const T* from, size_t size, U* to) { + for (size_t i = 0; i != size; i++) { + internal::CopyArray(from[i], to + i); + } +} + +// The relation between an NativeArray object (see below) and the +// native array it represents. +enum RelationToSource { + kReference, // The NativeArray references the native array. + kCopy // The NativeArray makes a copy of the native array and + // owns the copy. +}; + +// Adapts a native array to a read-only STL-style container. Instead +// of the complete STL container concept, this adaptor only implements +// members useful for Google Mock's container matchers. New members +// should be added as needed. To simplify the implementation, we only +// support Element being a raw type (i.e. having no top-level const or +// reference modifier). It's the client's responsibility to satisfy +// this requirement. Element can be an array type itself (hence +// multi-dimensional arrays are supported). +template +class NativeArray { + public: + // STL-style container typedefs. + typedef Element value_type; + typedef Element* iterator; + typedef const Element* const_iterator; + + // Constructs from a native array. + NativeArray(const Element* array, size_t count, RelationToSource relation) { + Init(array, count, relation); + } + + // Copy constructor. + NativeArray(const NativeArray& rhs) { + Init(rhs.array_, rhs.size_, rhs.relation_to_source_); + } + + ~NativeArray() { + // Ensures that the user doesn't instantiate NativeArray with a + // const or reference type. + static_cast(StaticAssertTypeEqHelper()); + if (relation_to_source_ == kCopy) + delete[] array_; + } + + // STL-style container methods. + size_t size() const { return size_; } + const_iterator begin() const { return array_; } + const_iterator end() const { return array_ + size_; } + bool operator==(const NativeArray& rhs) const { + return size() == rhs.size() && + ArrayEq(begin(), size(), rhs.begin()); + } + + private: + // Initializes this object; makes a copy of the input array if + // 'relation' is kCopy. + void Init(const Element* array, size_t a_size, RelationToSource relation) { + if (relation == kReference) { + array_ = array; + } else { + Element* const copy = new Element[a_size]; + CopyArray(array, a_size, copy); + array_ = copy; + } + size_ = a_size; + relation_to_source_ = relation; + } + + const Element* array_; + size_t size_; + RelationToSource relation_to_source_; + + GTEST_DISALLOW_ASSIGN_(NativeArray); +}; + +} // namespace internal +} // namespace testing + +#define GTEST_MESSAGE_AT_(file, line, message, result_type) \ + ::testing::internal::AssertHelper(result_type, file, line, message) \ + = ::testing::Message() + +#define GTEST_MESSAGE_(message, result_type) \ + GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type) + +#define GTEST_FATAL_FAILURE_(message) \ + return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure) + +#define GTEST_NONFATAL_FAILURE_(message) \ + GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure) + +#define GTEST_SUCCESS_(message) \ + GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess) + +// Suppresses MSVC warnings 4072 (unreachable code) for the code following +// statement if it returns or throws (or doesn't return or throw in some +// situations). +#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \ + if (::testing::internal::AlwaysTrue()) { statement; } + +#define GTEST_TEST_THROW_(statement, expected_exception, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::ConstCharPtr gtest_msg = "") { \ + bool gtest_caught_expected = false; \ + try { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } \ + catch (expected_exception const&) { \ + gtest_caught_expected = true; \ + } \ + catch (...) { \ + gtest_msg.value = \ + "Expected: " #statement " throws an exception of type " \ + #expected_exception ".\n Actual: it throws a different type."; \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \ + } \ + if (!gtest_caught_expected) { \ + gtest_msg.value = \ + "Expected: " #statement " throws an exception of type " \ + #expected_exception ".\n Actual: it throws nothing."; \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \ + fail(gtest_msg.value) + +#define GTEST_TEST_NO_THROW_(statement, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + try { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } \ + catch (...) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \ + fail("Expected: " #statement " doesn't throw an exception.\n" \ + " Actual: it throws.") + +#define GTEST_TEST_ANY_THROW_(statement, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + bool gtest_caught_any = false; \ + try { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } \ + catch (...) { \ + gtest_caught_any = true; \ + } \ + if (!gtest_caught_any) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \ + fail("Expected: " #statement " throws an exception.\n" \ + " Actual: it doesn't.") + + +// Implements Boolean test assertions such as EXPECT_TRUE. expression can be +// either a boolean expression or an AssertionResult. text is a textual +// represenation of expression as it was passed into the EXPECT_TRUE. +#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (const ::testing::AssertionResult gtest_ar_ = \ + ::testing::AssertionResult(expression)) \ + ; \ + else \ + fail(::testing::internal::GetBoolAssertionFailureMessage(\ + gtest_ar_, text, #actual, #expected).c_str()) + +#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \ + fail("Expected: " #statement " doesn't generate new fatal " \ + "failures in the current thread.\n" \ + " Actual: it does.") + +// Expands to the name of the class that implements the given test. +#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ + test_case_name##_##test_name##_Test + +// Helper macro for defining tests. +#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\ +class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\ + public:\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\ + private:\ + virtual void TestBody();\ + static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\ + GTEST_DISALLOW_COPY_AND_ASSIGN_(\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\ +};\ +\ +::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\ + ::test_info_ =\ + ::testing::internal::MakeAndRegisterTestInfo(\ + #test_case_name, #test_name, NULL, NULL, \ + (parent_id), \ + parent_class::SetUpTestCase, \ + parent_class::TearDownTestCase, \ + new ::testing::internal::TestFactoryImpl<\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\ +void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines the public API for death tests. It is +// #included by gtest.h so a user doesn't need to include this +// directly. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ + +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) +// +// This header file defines internal utilities needed for implementing +// death tests. They are subject to change without notice. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ + + +#include + +namespace testing { +namespace internal { + +GTEST_DECLARE_string_(internal_run_death_test); + +// Names of the flags (needed for parsing Google Test flags). +const char kDeathTestStyleFlag[] = "death_test_style"; +const char kDeathTestUseFork[] = "death_test_use_fork"; +const char kInternalRunDeathTestFlag[] = "internal_run_death_test"; + +#if GTEST_HAS_DEATH_TEST + +// DeathTest is a class that hides much of the complexity of the +// GTEST_DEATH_TEST_ macro. It is abstract; its static Create method +// returns a concrete class that depends on the prevailing death test +// style, as defined by the --gtest_death_test_style and/or +// --gtest_internal_run_death_test flags. + +// In describing the results of death tests, these terms are used with +// the corresponding definitions: +// +// exit status: The integer exit information in the format specified +// by wait(2) +// exit code: The integer code passed to exit(3), _exit(2), or +// returned from main() +class GTEST_API_ DeathTest { + public: + // Create returns false if there was an error determining the + // appropriate action to take for the current death test; for example, + // if the gtest_death_test_style flag is set to an invalid value. + // The LastMessage method will return a more detailed message in that + // case. Otherwise, the DeathTest pointer pointed to by the "test" + // argument is set. If the death test should be skipped, the pointer + // is set to NULL; otherwise, it is set to the address of a new concrete + // DeathTest object that controls the execution of the current test. + static bool Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test); + DeathTest(); + virtual ~DeathTest() { } + + // A helper class that aborts a death test when it's deleted. + class ReturnSentinel { + public: + explicit ReturnSentinel(DeathTest* test) : test_(test) { } + ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); } + private: + DeathTest* const test_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel); + } GTEST_ATTRIBUTE_UNUSED_; + + // An enumeration of possible roles that may be taken when a death + // test is encountered. EXECUTE means that the death test logic should + // be executed immediately. OVERSEE means that the program should prepare + // the appropriate environment for a child process to execute the death + // test, then wait for it to complete. + enum TestRole { OVERSEE_TEST, EXECUTE_TEST }; + + // An enumeration of the three reasons that a test might be aborted. + enum AbortReason { + TEST_ENCOUNTERED_RETURN_STATEMENT, + TEST_THREW_EXCEPTION, + TEST_DID_NOT_DIE + }; + + // Assumes one of the above roles. + virtual TestRole AssumeRole() = 0; + + // Waits for the death test to finish and returns its status. + virtual int Wait() = 0; + + // Returns true if the death test passed; that is, the test process + // exited during the test, its exit status matches a user-supplied + // predicate, and its stderr output matches a user-supplied regular + // expression. + // The user-supplied predicate may be a macro expression rather + // than a function pointer or functor, or else Wait and Passed could + // be combined. + virtual bool Passed(bool exit_status_ok) = 0; + + // Signals that the death test did not die as expected. + virtual void Abort(AbortReason reason) = 0; + + // Returns a human-readable outcome message regarding the outcome of + // the last death test. + static const char* LastMessage(); + + static void set_last_death_test_message(const std::string& message); + + private: + // A string containing a description of the outcome of the last death test. + static std::string last_death_test_message_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest); +}; + +// Factory interface for death tests. May be mocked out for testing. +class DeathTestFactory { + public: + virtual ~DeathTestFactory() { } + virtual bool Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test) = 0; +}; + +// A concrete DeathTestFactory implementation for normal use. +class DefaultDeathTestFactory : public DeathTestFactory { + public: + virtual bool Create(const char* statement, const RE* regex, + const char* file, int line, DeathTest** test); +}; + +// Returns true if exit_status describes a process that was terminated +// by a signal, or exited normally with a nonzero exit code. +GTEST_API_ bool ExitedUnsuccessfully(int exit_status); + +// Traps C++ exceptions escaping statement and reports them as test +// failures. Note that trapping SEH exceptions is not implemented here. +# if GTEST_HAS_EXCEPTIONS +# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \ + try { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } catch (const ::std::exception& gtest_exception) { \ + fprintf(\ + stderr, \ + "\n%s: Caught std::exception-derived exception escaping the " \ + "death test statement. Exception message: %s\n", \ + ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \ + gtest_exception.what()); \ + fflush(stderr); \ + death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \ + } catch (...) { \ + death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \ + } + +# else +# define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) + +# endif + +// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*, +// ASSERT_EXIT*, and EXPECT_EXIT*. +# define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + const ::testing::internal::RE& gtest_regex = (regex); \ + ::testing::internal::DeathTest* gtest_dt; \ + if (!::testing::internal::DeathTest::Create(#statement, >est_regex, \ + __FILE__, __LINE__, >est_dt)) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \ + } \ + if (gtest_dt != NULL) { \ + ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \ + gtest_dt_ptr(gtest_dt); \ + switch (gtest_dt->AssumeRole()) { \ + case ::testing::internal::DeathTest::OVERSEE_TEST: \ + if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \ + goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \ + } \ + break; \ + case ::testing::internal::DeathTest::EXECUTE_TEST: { \ + ::testing::internal::DeathTest::ReturnSentinel \ + gtest_sentinel(gtest_dt); \ + GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \ + gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \ + break; \ + } \ + default: \ + break; \ + } \ + } \ + } else \ + GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \ + fail(::testing::internal::DeathTest::LastMessage()) +// The symbol "fail" here expands to something into which a message +// can be streamed. + +// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in +// NDEBUG mode. In this case we need the statements to be executed, the regex is +// ignored, and the macro must accept a streamed message even though the message +// is never printed. +# define GTEST_EXECUTE_STATEMENT_(statement, regex) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + } else \ + ::testing::Message() + +// A class representing the parsed contents of the +// --gtest_internal_run_death_test flag, as it existed when +// RUN_ALL_TESTS was called. +class InternalRunDeathTestFlag { + public: + InternalRunDeathTestFlag(const std::string& a_file, + int a_line, + int an_index, + int a_write_fd) + : file_(a_file), line_(a_line), index_(an_index), + write_fd_(a_write_fd) {} + + ~InternalRunDeathTestFlag() { + if (write_fd_ >= 0) + posix::Close(write_fd_); + } + + const std::string& file() const { return file_; } + int line() const { return line_; } + int index() const { return index_; } + int write_fd() const { return write_fd_; } + + private: + std::string file_; + int line_; + int index_; + int write_fd_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag); +}; + +// Returns a newly created InternalRunDeathTestFlag object with fields +// initialized from the GTEST_FLAG(internal_run_death_test) flag if +// the flag is specified; otherwise returns NULL. +InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag(); + +#else // GTEST_HAS_DEATH_TEST + +// This macro is used for implementing macros such as +// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where +// death tests are not supported. Those macros must compile on such systems +// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on +// systems that support death tests. This allows one to write such a macro +// on a system that does not support death tests and be sure that it will +// compile on a death-test supporting system. +// +// Parameters: +// statement - A statement that a macro such as EXPECT_DEATH would test +// for program termination. This macro has to make sure this +// statement is compiled but not executed, to ensure that +// EXPECT_DEATH_IF_SUPPORTED compiles with a certain +// parameter iff EXPECT_DEATH compiles with it. +// regex - A regex that a macro such as EXPECT_DEATH would use to test +// the output of statement. This parameter has to be +// compiled but not evaluated by this macro, to ensure that +// this macro only accepts expressions that a macro such as +// EXPECT_DEATH would accept. +// terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED +// and a return statement for ASSERT_DEATH_IF_SUPPORTED. +// This ensures that ASSERT_DEATH_IF_SUPPORTED will not +// compile inside functions where ASSERT_DEATH doesn't +// compile. +// +// The branch that has an always false condition is used to ensure that +// statement and regex are compiled (and thus syntactically correct) but +// never executed. The unreachable code macro protects the terminator +// statement from generating an 'unreachable code' warning in case +// statement unconditionally returns or throws. The Message constructor at +// the end allows the syntax of streaming additional messages into the +// macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH. +# define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (::testing::internal::AlwaysTrue()) { \ + GTEST_LOG_(WARNING) \ + << "Death tests are not supported on this platform.\n" \ + << "Statement '" #statement "' cannot be verified."; \ + } else if (::testing::internal::AlwaysFalse()) { \ + ::testing::internal::RE::PartialMatch(".*", (regex)); \ + GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ + terminator; \ + } else \ + ::testing::Message() + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ + +namespace testing { + +// This flag controls the style of death tests. Valid values are "threadsafe", +// meaning that the death test child process will re-execute the test binary +// from the start, running only a single death test, or "fast", +// meaning that the child process will execute the test logic immediately +// after forking. +GTEST_DECLARE_string_(death_test_style); + +#if GTEST_HAS_DEATH_TEST + +namespace internal { + +// Returns a Boolean value indicating whether the caller is currently +// executing in the context of the death test child process. Tools such as +// Valgrind heap checkers may need this to modify their behavior in death +// tests. IMPORTANT: This is an internal utility. Using it may break the +// implementation of death tests. User code MUST NOT use it. +GTEST_API_ bool InDeathTestChild(); + +} // namespace internal + +// The following macros are useful for writing death tests. + +// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is +// executed: +// +// 1. It generates a warning if there is more than one active +// thread. This is because it's safe to fork() or clone() only +// when there is a single thread. +// +// 2. The parent process clone()s a sub-process and runs the death +// test in it; the sub-process exits with code 0 at the end of the +// death test, if it hasn't exited already. +// +// 3. The parent process waits for the sub-process to terminate. +// +// 4. The parent process checks the exit code and error message of +// the sub-process. +// +// Examples: +// +// ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number"); +// for (int i = 0; i < 5; i++) { +// EXPECT_DEATH(server.ProcessRequest(i), +// "Invalid request .* in ProcessRequest()") +// << "Failed to die on request " << i; +// } +// +// ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting"); +// +// bool KilledBySIGHUP(int exit_code) { +// return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP; +// } +// +// ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!"); +// +// On the regular expressions used in death tests: +// +// On POSIX-compliant systems (*nix), we use the library, +// which uses the POSIX extended regex syntax. +// +// On other platforms (e.g. Windows), we only support a simple regex +// syntax implemented as part of Google Test. This limited +// implementation should be enough most of the time when writing +// death tests; though it lacks many features you can find in PCRE +// or POSIX extended regex syntax. For example, we don't support +// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and +// repetition count ("x{5,7}"), among others. +// +// Below is the syntax that we do support. We chose it to be a +// subset of both PCRE and POSIX extended regex, so it's easy to +// learn wherever you come from. In the following: 'A' denotes a +// literal character, period (.), or a single \\ escape sequence; +// 'x' and 'y' denote regular expressions; 'm' and 'n' are for +// natural numbers. +// +// c matches any literal character c +// \\d matches any decimal digit +// \\D matches any character that's not a decimal digit +// \\f matches \f +// \\n matches \n +// \\r matches \r +// \\s matches any ASCII whitespace, including \n +// \\S matches any character that's not a whitespace +// \\t matches \t +// \\v matches \v +// \\w matches any letter, _, or decimal digit +// \\W matches any character that \\w doesn't match +// \\c matches any literal character c, which must be a punctuation +// . matches any single character except \n +// A? matches 0 or 1 occurrences of A +// A* matches 0 or many occurrences of A +// A+ matches 1 or many occurrences of A +// ^ matches the beginning of a string (not that of each line) +// $ matches the end of a string (not that of each line) +// xy matches x followed by y +// +// If you accidentally use PCRE or POSIX extended regex features +// not implemented by us, you will get a run-time failure. In that +// case, please try to rewrite your regular expression within the +// above syntax. +// +// This implementation is *not* meant to be as highly tuned or robust +// as a compiled regex library, but should perform well enough for a +// death test, which already incurs significant overhead by launching +// a child process. +// +// Known caveats: +// +// A "threadsafe" style death test obtains the path to the test +// program from argv[0] and re-executes it in the sub-process. For +// simplicity, the current implementation doesn't search the PATH +// when launching the sub-process. This means that the user must +// invoke the test program via a path that contains at least one +// path separator (e.g. path/to/foo_test and +// /absolute/path/to/bar_test are fine, but foo_test is not). This +// is rarely a problem as people usually don't put the test binary +// directory in PATH. +// +// TODO(wan@google.com): make thread-safe death tests search the PATH. + +// Asserts that a given statement causes the program to exit, with an +// integer exit status that satisfies predicate, and emitting error output +// that matches regex. +# define ASSERT_EXIT(statement, predicate, regex) \ + GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_) + +// Like ASSERT_EXIT, but continues on to successive tests in the +// test case, if any: +# define EXPECT_EXIT(statement, predicate, regex) \ + GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_) + +// Asserts that a given statement causes the program to exit, either by +// explicitly exiting with a nonzero exit code or being killed by a +// signal, and emitting error output that matches regex. +# define ASSERT_DEATH(statement, regex) \ + ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) + +// Like ASSERT_DEATH, but continues on to successive tests in the +// test case, if any: +# define EXPECT_DEATH(statement, regex) \ + EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) + +// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*: + +// Tests that an exit code describes a normal exit with a given exit code. +class GTEST_API_ ExitedWithCode { + public: + explicit ExitedWithCode(int exit_code); + bool operator()(int exit_status) const; + private: + // No implementation - assignment is unsupported. + void operator=(const ExitedWithCode& other); + + const int exit_code_; +}; + +# if !GTEST_OS_WINDOWS +// Tests that an exit code describes an exit due to termination by a +// given signal. +class GTEST_API_ KilledBySignal { + public: + explicit KilledBySignal(int signum); + bool operator()(int exit_status) const; + private: + const int signum_; +}; +# endif // !GTEST_OS_WINDOWS + +// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode. +// The death testing framework causes this to have interesting semantics, +// since the sideeffects of the call are only visible in opt mode, and not +// in debug mode. +// +// In practice, this can be used to test functions that utilize the +// LOG(DFATAL) macro using the following style: +// +// int DieInDebugOr12(int* sideeffect) { +// if (sideeffect) { +// *sideeffect = 12; +// } +// LOG(DFATAL) << "death"; +// return 12; +// } +// +// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) { +// int sideeffect = 0; +// // Only asserts in dbg. +// EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death"); +// +// #ifdef NDEBUG +// // opt-mode has sideeffect visible. +// EXPECT_EQ(12, sideeffect); +// #else +// // dbg-mode no visible sideeffect. +// EXPECT_EQ(0, sideeffect); +// #endif +// } +// +// This will assert that DieInDebugReturn12InOpt() crashes in debug +// mode, usually due to a DCHECK or LOG(DFATAL), but returns the +// appropriate fallback value (12 in this case) in opt mode. If you +// need to test that a function has appropriate side-effects in opt +// mode, include assertions against the side-effects. A general +// pattern for this is: +// +// EXPECT_DEBUG_DEATH({ +// // Side-effects here will have an effect after this statement in +// // opt mode, but none in debug mode. +// EXPECT_EQ(12, DieInDebugOr12(&sideeffect)); +// }, "death"); +// +# ifdef NDEBUG + +# define EXPECT_DEBUG_DEATH(statement, regex) \ + GTEST_EXECUTE_STATEMENT_(statement, regex) + +# define ASSERT_DEBUG_DEATH(statement, regex) \ + GTEST_EXECUTE_STATEMENT_(statement, regex) + +# else + +# define EXPECT_DEBUG_DEATH(statement, regex) \ + EXPECT_DEATH(statement, regex) + +# define ASSERT_DEBUG_DEATH(statement, regex) \ + ASSERT_DEATH(statement, regex) + +# endif // NDEBUG for EXPECT_DEBUG_DEATH +#endif // GTEST_HAS_DEATH_TEST + +// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and +// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if +// death tests are supported; otherwise they just issue a warning. This is +// useful when you are combining death test assertions with normal test +// assertions in one test. +#if GTEST_HAS_DEATH_TEST +# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ + EXPECT_DEATH(statement, regex) +# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \ + ASSERT_DEATH(statement, regex) +#else +# define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ + GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, ) +# define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \ + GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return) +#endif + +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ +// This file was GENERATED by command: +// pump.py gtest-param-test.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: vladl@google.com (Vlad Losev) +// +// Macros and functions for implementing parameterized tests +// in Google C++ Testing Framework (Google Test) +// +// This file is generated by a SCRIPT. DO NOT EDIT BY HAND! +// +#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ + + +// Value-parameterized tests allow you to test your code with different +// parameters without writing multiple copies of the same test. +// +// Here is how you use value-parameterized tests: + +#if 0 + +// To write value-parameterized tests, first you should define a fixture +// class. It is usually derived from testing::TestWithParam (see below for +// another inheritance scheme that's sometimes useful in more complicated +// class hierarchies), where the type of your parameter values. +// TestWithParam is itself derived from testing::Test. T can be any +// copyable type. If it's a raw pointer, you are responsible for managing the +// lifespan of the pointed values. + +class FooTest : public ::testing::TestWithParam { + // You can implement all the usual class fixture members here. +}; + +// Then, use the TEST_P macro to define as many parameterized tests +// for this fixture as you want. The _P suffix is for "parameterized" +// or "pattern", whichever you prefer to think. + +TEST_P(FooTest, DoesBlah) { + // Inside a test, access the test parameter with the GetParam() method + // of the TestWithParam class: + EXPECT_TRUE(foo.Blah(GetParam())); + ... +} + +TEST_P(FooTest, HasBlahBlah) { + ... +} + +// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test +// case with any set of parameters you want. Google Test defines a number +// of functions for generating test parameters. They return what we call +// (surprise!) parameter generators. Here is a summary of them, which +// are all in the testing namespace: +// +// +// Range(begin, end [, step]) - Yields values {begin, begin+step, +// begin+step+step, ...}. The values do not +// include end. step defaults to 1. +// Values(v1, v2, ..., vN) - Yields values {v1, v2, ..., vN}. +// ValuesIn(container) - Yields values from a C-style array, an STL +// ValuesIn(begin,end) container, or an iterator range [begin, end). +// Bool() - Yields sequence {false, true}. +// Combine(g1, g2, ..., gN) - Yields all combinations (the Cartesian product +// for the math savvy) of the values generated +// by the N generators. +// +// For more details, see comments at the definitions of these functions below +// in this file. +// +// The following statement will instantiate tests from the FooTest test case +// each with parameter values "meeny", "miny", and "moe". + +INSTANTIATE_TEST_CASE_P(InstantiationName, + FooTest, + Values("meeny", "miny", "moe")); + +// To distinguish different instances of the pattern, (yes, you +// can instantiate it more then once) the first argument to the +// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the +// actual test case name. Remember to pick unique prefixes for different +// instantiations. The tests from the instantiation above will have +// these names: +// +// * InstantiationName/FooTest.DoesBlah/0 for "meeny" +// * InstantiationName/FooTest.DoesBlah/1 for "miny" +// * InstantiationName/FooTest.DoesBlah/2 for "moe" +// * InstantiationName/FooTest.HasBlahBlah/0 for "meeny" +// * InstantiationName/FooTest.HasBlahBlah/1 for "miny" +// * InstantiationName/FooTest.HasBlahBlah/2 for "moe" +// +// You can use these names in --gtest_filter. +// +// This statement will instantiate all tests from FooTest again, each +// with parameter values "cat" and "dog": + +const char* pets[] = {"cat", "dog"}; +INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets)); + +// The tests from the instantiation above will have these names: +// +// * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat" +// * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog" +// * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat" +// * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog" +// +// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests +// in the given test case, whether their definitions come before or +// AFTER the INSTANTIATE_TEST_CASE_P statement. +// +// Please also note that generator expressions (including parameters to the +// generators) are evaluated in InitGoogleTest(), after main() has started. +// This allows the user on one hand, to adjust generator parameters in order +// to dynamically determine a set of tests to run and on the other hand, +// give the user a chance to inspect the generated tests with Google Test +// reflection API before RUN_ALL_TESTS() is executed. +// +// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc +// for more examples. +// +// In the future, we plan to publish the API for defining new parameter +// generators. But for now this interface remains part of the internal +// implementation and is subject to change. +// +// +// A parameterized test fixture must be derived from testing::Test and from +// testing::WithParamInterface, where T is the type of the parameter +// values. Inheriting from TestWithParam satisfies that requirement because +// TestWithParam inherits from both Test and WithParamInterface. In more +// complicated hierarchies, however, it is occasionally useful to inherit +// separately from Test and WithParamInterface. For example: + +class BaseTest : public ::testing::Test { + // You can inherit all the usual members for a non-parameterized test + // fixture here. +}; + +class DerivedTest : public BaseTest, public ::testing::WithParamInterface { + // The usual test fixture members go here too. +}; + +TEST_F(BaseTest, HasFoo) { + // This is an ordinary non-parameterized test. +} + +TEST_P(DerivedTest, DoesBlah) { + // GetParam works just the same here as if you inherit from TestWithParam. + EXPECT_TRUE(foo.Blah(GetParam())); +} + +#endif // 0 + + +#if !GTEST_OS_SYMBIAN +# include +#endif + +// scripts/fuse_gtest.py depends on gtest's own header being #included +// *unconditionally*. Therefore these #includes cannot be moved +// inside #if GTEST_HAS_PARAM_TEST. +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// Type and function utilities for implementing parameterized tests. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ + +#include +#include +#include + +// scripts/fuse_gtest.py depends on gtest's own header being #included +// *unconditionally*. Therefore these #includes cannot be moved +// inside #if GTEST_HAS_PARAM_TEST. +// Copyright 2003 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Dan Egnor (egnor@google.com) +// +// A "smart" pointer type with reference tracking. Every pointer to a +// particular object is kept on a circular linked list. When the last pointer +// to an object is destroyed or reassigned, the object is deleted. +// +// Used properly, this deletes the object when the last reference goes away. +// There are several caveats: +// - Like all reference counting schemes, cycles lead to leaks. +// - Each smart pointer is actually two pointers (8 bytes instead of 4). +// - Every time a pointer is assigned, the entire list of pointers to that +// object is traversed. This class is therefore NOT SUITABLE when there +// will often be more than two or three pointers to a particular object. +// - References are only tracked as long as linked_ptr<> objects are copied. +// If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS +// will happen (double deletion). +// +// A good use of this class is storing object references in STL containers. +// You can safely put linked_ptr<> in a vector<>. +// Other uses may not be as good. +// +// Note: If you use an incomplete type with linked_ptr<>, the class +// *containing* linked_ptr<> must have a constructor and destructor (even +// if they do nothing!). +// +// Bill Gibbons suggested we use something like this. +// +// Thread Safety: +// Unlike other linked_ptr implementations, in this implementation +// a linked_ptr object is thread-safe in the sense that: +// - it's safe to copy linked_ptr objects concurrently, +// - it's safe to copy *from* a linked_ptr and read its underlying +// raw pointer (e.g. via get()) concurrently, and +// - it's safe to write to two linked_ptrs that point to the same +// shared object concurrently. +// TODO(wan@google.com): rename this to safe_linked_ptr to avoid +// confusion with normal linked_ptr. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ + +#include +#include + + +namespace testing { +namespace internal { + +// Protects copying of all linked_ptr objects. +GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex); + +// This is used internally by all instances of linked_ptr<>. It needs to be +// a non-template class because different types of linked_ptr<> can refer to +// the same object (linked_ptr(obj) vs linked_ptr(obj)). +// So, it needs to be possible for different types of linked_ptr to participate +// in the same circular linked list, so we need a single class type here. +// +// DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr. +class linked_ptr_internal { + public: + // Create a new circle that includes only this instance. + void join_new() { + next_ = this; + } + + // Many linked_ptr operations may change p.link_ for some linked_ptr + // variable p in the same circle as this object. Therefore we need + // to prevent two such operations from occurring concurrently. + // + // Note that different types of linked_ptr objects can coexist in a + // circle (e.g. linked_ptr, linked_ptr, and + // linked_ptr). Therefore we must use a single mutex to + // protect all linked_ptr objects. This can create serious + // contention in production code, but is acceptable in a testing + // framework. + + // Join an existing circle. + void join(linked_ptr_internal const* ptr) + GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) { + MutexLock lock(&g_linked_ptr_mutex); + + linked_ptr_internal const* p = ptr; + while (p->next_ != ptr) p = p->next_; + p->next_ = this; + next_ = ptr; + } + + // Leave whatever circle we're part of. Returns true if we were the + // last member of the circle. Once this is done, you can join() another. + bool depart() + GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) { + MutexLock lock(&g_linked_ptr_mutex); + + if (next_ == this) return true; + linked_ptr_internal const* p = next_; + while (p->next_ != this) p = p->next_; + p->next_ = next_; + return false; + } + + private: + mutable linked_ptr_internal const* next_; +}; + +template +class linked_ptr { + public: + typedef T element_type; + + // Take over ownership of a raw pointer. This should happen as soon as + // possible after the object is created. + explicit linked_ptr(T* ptr = NULL) { capture(ptr); } + ~linked_ptr() { depart(); } + + // Copy an existing linked_ptr<>, adding ourselves to the list of references. + template linked_ptr(linked_ptr const& ptr) { copy(&ptr); } + linked_ptr(linked_ptr const& ptr) { // NOLINT + assert(&ptr != this); + copy(&ptr); + } + + // Assignment releases the old value and acquires the new. + template linked_ptr& operator=(linked_ptr const& ptr) { + depart(); + copy(&ptr); + return *this; + } + + linked_ptr& operator=(linked_ptr const& ptr) { + if (&ptr != this) { + depart(); + copy(&ptr); + } + return *this; + } + + // Smart pointer members. + void reset(T* ptr = NULL) { + depart(); + capture(ptr); + } + T* get() const { return value_; } + T* operator->() const { return value_; } + T& operator*() const { return *value_; } + + bool operator==(T* p) const { return value_ == p; } + bool operator!=(T* p) const { return value_ != p; } + template + bool operator==(linked_ptr const& ptr) const { + return value_ == ptr.get(); + } + template + bool operator!=(linked_ptr const& ptr) const { + return value_ != ptr.get(); + } + + private: + template + friend class linked_ptr; + + T* value_; + linked_ptr_internal link_; + + void depart() { + if (link_.depart()) delete value_; + } + + void capture(T* ptr) { + value_ = ptr; + link_.join_new(); + } + + template void copy(linked_ptr const* ptr) { + value_ = ptr->get(); + if (value_) + link_.join(&ptr->link_); + else + link_.join_new(); + } +}; + +template inline +bool operator==(T* ptr, const linked_ptr& x) { + return ptr == x.get(); +} + +template inline +bool operator!=(T* ptr, const linked_ptr& x) { + return ptr != x.get(); +} + +// A function to convert T* into linked_ptr +// Doing e.g. make_linked_ptr(new FooBarBaz(arg)) is a shorter notation +// for linked_ptr >(new FooBarBaz(arg)) +template +linked_ptr make_linked_ptr(T* ptr) { + return linked_ptr(ptr); +} + +} // namespace internal +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +// Google Test - The Google C++ Testing Framework +// +// This file implements a universal value printer that can print a +// value of any type T: +// +// void ::testing::internal::UniversalPrinter::Print(value, ostream_ptr); +// +// A user can teach this function how to print a class type T by +// defining either operator<<() or PrintTo() in the namespace that +// defines T. More specifically, the FIRST defined function in the +// following list will be used (assuming T is defined in namespace +// foo): +// +// 1. foo::PrintTo(const T&, ostream*) +// 2. operator<<(ostream&, const T&) defined in either foo or the +// global namespace. +// +// If none of the above is defined, it will print the debug string of +// the value if it is a protocol buffer, or print the raw bytes in the +// value otherwise. +// +// To aid debugging: when T is a reference type, the address of the +// value is also printed; when T is a (const) char pointer, both the +// pointer value and the NUL-terminated string it points to are +// printed. +// +// We also provide some convenient wrappers: +// +// // Prints a value to a string. For a (const or not) char +// // pointer, the NUL-terminated string (but not the pointer) is +// // printed. +// std::string ::testing::PrintToString(const T& value); +// +// // Prints a value tersely: for a reference type, the referenced +// // value (but not the address) is printed; for a (const or not) char +// // pointer, the NUL-terminated string (but not the pointer) is +// // printed. +// void ::testing::internal::UniversalTersePrint(const T& value, ostream*); +// +// // Prints value using the type inferred by the compiler. The difference +// // from UniversalTersePrint() is that this function prints both the +// // pointer and the NUL-terminated string for a (const or not) char pointer. +// void ::testing::internal::UniversalPrint(const T& value, ostream*); +// +// // Prints the fields of a tuple tersely to a string vector, one +// // element for each field. Tuple support must be enabled in +// // gtest-port.h. +// std::vector UniversalTersePrintTupleFieldsToStrings( +// const Tuple& value); +// +// Known limitation: +// +// The print primitives print the elements of an STL-style container +// using the compiler-inferred type of *iter where iter is a +// const_iterator of the container. When const_iterator is an input +// iterator but not a forward iterator, this inferred type may not +// match value_type, and the print output may be incorrect. In +// practice, this is rarely a problem as for most containers +// const_iterator is a forward iterator. We'll fix this if there's an +// actual need for it. Note that this fix cannot rely on value_type +// being defined as many user-defined container types don't have +// value_type. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ + +#include // NOLINT +#include +#include +#include +#include + +namespace testing { + +// Definitions in the 'internal' and 'internal2' name spaces are +// subject to change without notice. DO NOT USE THEM IN USER CODE! +namespace internal2 { + +// Prints the given number of bytes in the given object to the given +// ostream. +GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes, + size_t count, + ::std::ostream* os); + +// For selecting which printer to use when a given type has neither << +// nor PrintTo(). +enum TypeKind { + kProtobuf, // a protobuf type + kConvertibleToInteger, // a type implicitly convertible to BiggestInt + // (e.g. a named or unnamed enum type) + kOtherType // anything else +}; + +// TypeWithoutFormatter::PrintValue(value, os) is called +// by the universal printer to print a value of type T when neither +// operator<< nor PrintTo() is defined for T, where kTypeKind is the +// "kind" of T as defined by enum TypeKind. +template +class TypeWithoutFormatter { + public: + // This default version is called when kTypeKind is kOtherType. + static void PrintValue(const T& value, ::std::ostream* os) { + PrintBytesInObjectTo(reinterpret_cast(&value), + sizeof(value), os); + } +}; + +// We print a protobuf using its ShortDebugString() when the string +// doesn't exceed this many characters; otherwise we print it using +// DebugString() for better readability. +const size_t kProtobufOneLinerMaxLength = 50; + +template +class TypeWithoutFormatter { + public: + static void PrintValue(const T& value, ::std::ostream* os) { + const ::testing::internal::string short_str = value.ShortDebugString(); + const ::testing::internal::string pretty_str = + short_str.length() <= kProtobufOneLinerMaxLength ? + short_str : ("\n" + value.DebugString()); + *os << ("<" + pretty_str + ">"); + } +}; + +template +class TypeWithoutFormatter { + public: + // Since T has no << operator or PrintTo() but can be implicitly + // converted to BiggestInt, we print it as a BiggestInt. + // + // Most likely T is an enum type (either named or unnamed), in which + // case printing it as an integer is the desired behavior. In case + // T is not an enum, printing it as an integer is the best we can do + // given that it has no user-defined printer. + static void PrintValue(const T& value, ::std::ostream* os) { + const internal::BiggestInt kBigInt = value; + *os << kBigInt; + } +}; + +// Prints the given value to the given ostream. If the value is a +// protocol message, its debug string is printed; if it's an enum or +// of a type implicitly convertible to BiggestInt, it's printed as an +// integer; otherwise the bytes in the value are printed. This is +// what UniversalPrinter::Print() does when it knows nothing about +// type T and T has neither << operator nor PrintTo(). +// +// A user can override this behavior for a class type Foo by defining +// a << operator in the namespace where Foo is defined. +// +// We put this operator in namespace 'internal2' instead of 'internal' +// to simplify the implementation, as much code in 'internal' needs to +// use << in STL, which would conflict with our own << were it defined +// in 'internal'. +// +// Note that this operator<< takes a generic std::basic_ostream type instead of the more restricted std::ostream. If +// we define it to take an std::ostream instead, we'll get an +// "ambiguous overloads" compiler error when trying to print a type +// Foo that supports streaming to std::basic_ostream, as the compiler cannot tell whether +// operator<<(std::ostream&, const T&) or +// operator<<(std::basic_stream, const Foo&) is more +// specific. +template +::std::basic_ostream& operator<<( + ::std::basic_ostream& os, const T& x) { + TypeWithoutFormatter::value ? kProtobuf : + internal::ImplicitlyConvertible::value ? + kConvertibleToInteger : kOtherType)>::PrintValue(x, &os); + return os; +} + +} // namespace internal2 +} // namespace testing + +// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up +// magic needed for implementing UniversalPrinter won't work. +namespace testing_internal { + +// Used to print a value that is not an STL-style container when the +// user doesn't define PrintTo() for it. +template +void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) { + // With the following statement, during unqualified name lookup, + // testing::internal2::operator<< appears as if it was declared in + // the nearest enclosing namespace that contains both + // ::testing_internal and ::testing::internal2, i.e. the global + // namespace. For more details, refer to the C++ Standard section + // 7.3.4-1 [namespace.udir]. This allows us to fall back onto + // testing::internal2::operator<< in case T doesn't come with a << + // operator. + // + // We cannot write 'using ::testing::internal2::operator<<;', which + // gcc 3.3 fails to compile due to a compiler bug. + using namespace ::testing::internal2; // NOLINT + + // Assuming T is defined in namespace foo, in the next statement, + // the compiler will consider all of: + // + // 1. foo::operator<< (thanks to Koenig look-up), + // 2. ::operator<< (as the current namespace is enclosed in ::), + // 3. testing::internal2::operator<< (thanks to the using statement above). + // + // The operator<< whose type matches T best will be picked. + // + // We deliberately allow #2 to be a candidate, as sometimes it's + // impossible to define #1 (e.g. when foo is ::std, defining + // anything in it is undefined behavior unless you are a compiler + // vendor.). + *os << value; +} + +} // namespace testing_internal + +namespace testing { +namespace internal { + +// UniversalPrinter::Print(value, ostream_ptr) prints the given +// value to the given ostream. The caller must ensure that +// 'ostream_ptr' is not NULL, or the behavior is undefined. +// +// We define UniversalPrinter as a class template (as opposed to a +// function template), as we need to partially specialize it for +// reference types, which cannot be done with function templates. +template +class UniversalPrinter; + +template +void UniversalPrint(const T& value, ::std::ostream* os); + +// Used to print an STL-style container when the user doesn't define +// a PrintTo() for it. +template +void DefaultPrintTo(IsContainer /* dummy */, + false_type /* is not a pointer */, + const C& container, ::std::ostream* os) { + const size_t kMaxCount = 32; // The maximum number of elements to print. + *os << '{'; + size_t count = 0; + for (typename C::const_iterator it = container.begin(); + it != container.end(); ++it, ++count) { + if (count > 0) { + *os << ','; + if (count == kMaxCount) { // Enough has been printed. + *os << " ..."; + break; + } + } + *os << ' '; + // We cannot call PrintTo(*it, os) here as PrintTo() doesn't + // handle *it being a native array. + internal::UniversalPrint(*it, os); + } + + if (count > 0) { + *os << ' '; + } + *os << '}'; +} + +// Used to print a pointer that is neither a char pointer nor a member +// pointer, when the user doesn't define PrintTo() for it. (A member +// variable pointer or member function pointer doesn't really point to +// a location in the address space. Their representation is +// implementation-defined. Therefore they will be printed as raw +// bytes.) +template +void DefaultPrintTo(IsNotContainer /* dummy */, + true_type /* is a pointer */, + T* p, ::std::ostream* os) { + if (p == NULL) { + *os << "NULL"; + } else { + // C++ doesn't allow casting from a function pointer to any object + // pointer. + // + // IsTrue() silences warnings: "Condition is always true", + // "unreachable code". + if (IsTrue(ImplicitlyConvertible::value)) { + // T is not a function type. We just call << to print p, + // relying on ADL to pick up user-defined << for their pointer + // types, if any. + *os << p; + } else { + // T is a function type, so '*os << p' doesn't do what we want + // (it just prints p as bool). We want to print p as a const + // void*. However, we cannot cast it to const void* directly, + // even using reinterpret_cast, as earlier versions of gcc + // (e.g. 3.4.5) cannot compile the cast when p is a function + // pointer. Casting to UInt64 first solves the problem. + *os << reinterpret_cast( + reinterpret_cast(p)); + } + } +} + +// Used to print a non-container, non-pointer value when the user +// doesn't define PrintTo() for it. +template +void DefaultPrintTo(IsNotContainer /* dummy */, + false_type /* is not a pointer */, + const T& value, ::std::ostream* os) { + ::testing_internal::DefaultPrintNonContainerTo(value, os); +} + +// Prints the given value using the << operator if it has one; +// otherwise prints the bytes in it. This is what +// UniversalPrinter::Print() does when PrintTo() is not specialized +// or overloaded for type T. +// +// A user can override this behavior for a class type Foo by defining +// an overload of PrintTo() in the namespace where Foo is defined. We +// give the user this option as sometimes defining a << operator for +// Foo is not desirable (e.g. the coding style may prevent doing it, +// or there is already a << operator but it doesn't do what the user +// wants). +template +void PrintTo(const T& value, ::std::ostream* os) { + // DefaultPrintTo() is overloaded. The type of its first two + // arguments determine which version will be picked. If T is an + // STL-style container, the version for container will be called; if + // T is a pointer, the pointer version will be called; otherwise the + // generic version will be called. + // + // Note that we check for container types here, prior to we check + // for protocol message types in our operator<<. The rationale is: + // + // For protocol messages, we want to give people a chance to + // override Google Mock's format by defining a PrintTo() or + // operator<<. For STL containers, other formats can be + // incompatible with Google Mock's format for the container + // elements; therefore we check for container types here to ensure + // that our format is used. + // + // The second argument of DefaultPrintTo() is needed to bypass a bug + // in Symbian's C++ compiler that prevents it from picking the right + // overload between: + // + // PrintTo(const T& x, ...); + // PrintTo(T* x, ...); + DefaultPrintTo(IsContainerTest(0), is_pointer(), value, os); +} + +// The following list of PrintTo() overloads tells +// UniversalPrinter::Print() how to print standard types (built-in +// types, strings, plain arrays, and pointers). + +// Overloads for various char types. +GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os); +GTEST_API_ void PrintTo(signed char c, ::std::ostream* os); +inline void PrintTo(char c, ::std::ostream* os) { + // When printing a plain char, we always treat it as unsigned. This + // way, the output won't be affected by whether the compiler thinks + // char is signed or not. + PrintTo(static_cast(c), os); +} + +// Overloads for other simple built-in types. +inline void PrintTo(bool x, ::std::ostream* os) { + *os << (x ? "true" : "false"); +} + +// Overload for wchar_t type. +// Prints a wchar_t as a symbol if it is printable or as its internal +// code otherwise and also as its decimal code (except for L'\0'). +// The L'\0' char is printed as "L'\\0'". The decimal code is printed +// as signed integer when wchar_t is implemented by the compiler +// as a signed type and is printed as an unsigned integer when wchar_t +// is implemented as an unsigned type. +GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os); + +// Overloads for C strings. +GTEST_API_ void PrintTo(const char* s, ::std::ostream* os); +inline void PrintTo(char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_(s), os); +} + +// signed/unsigned char is often used for representing binary data, so +// we print pointers to it as void* to be safe. +inline void PrintTo(const signed char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_(s), os); +} +inline void PrintTo(signed char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_(s), os); +} +inline void PrintTo(const unsigned char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_(s), os); +} +inline void PrintTo(unsigned char* s, ::std::ostream* os) { + PrintTo(ImplicitCast_(s), os); +} + +// MSVC can be configured to define wchar_t as a typedef of unsigned +// short. It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native +// type. When wchar_t is a typedef, defining an overload for const +// wchar_t* would cause unsigned short* be printed as a wide string, +// possibly causing invalid memory accesses. +#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) +// Overloads for wide C strings +GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os); +inline void PrintTo(wchar_t* s, ::std::ostream* os) { + PrintTo(ImplicitCast_(s), os); +} +#endif + +// Overload for C arrays. Multi-dimensional arrays are printed +// properly. + +// Prints the given number of elements in an array, without printing +// the curly braces. +template +void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) { + UniversalPrint(a[0], os); + for (size_t i = 1; i != count; i++) { + *os << ", "; + UniversalPrint(a[i], os); + } +} + +// Overloads for ::string and ::std::string. +#if GTEST_HAS_GLOBAL_STRING +GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os); +inline void PrintTo(const ::string& s, ::std::ostream* os) { + PrintStringTo(s, os); +} +#endif // GTEST_HAS_GLOBAL_STRING + +GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os); +inline void PrintTo(const ::std::string& s, ::std::ostream* os) { + PrintStringTo(s, os); +} + +// Overloads for ::wstring and ::std::wstring. +#if GTEST_HAS_GLOBAL_WSTRING +GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os); +inline void PrintTo(const ::wstring& s, ::std::ostream* os) { + PrintWideStringTo(s, os); +} +#endif // GTEST_HAS_GLOBAL_WSTRING + +#if GTEST_HAS_STD_WSTRING +GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os); +inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) { + PrintWideStringTo(s, os); +} +#endif // GTEST_HAS_STD_WSTRING + +#if GTEST_HAS_TR1_TUPLE +// Overload for ::std::tr1::tuple. Needed for printing function arguments, +// which are packed as tuples. + +// Helper function for printing a tuple. T must be instantiated with +// a tuple type. +template +void PrintTupleTo(const T& t, ::std::ostream* os); + +// Overloaded PrintTo() for tuples of various arities. We support +// tuples of up-to 10 fields. The following implementation works +// regardless of whether tr1::tuple is implemented using the +// non-standard variadic template feature or not. + +inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo(const ::std::tr1::tuple& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo(const ::std::tr1::tuple& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo(const ::std::tr1::tuple& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo(const ::std::tr1::tuple& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo(const ::std::tr1::tuple& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} + +template +void PrintTo( + const ::std::tr1::tuple& t, + ::std::ostream* os) { + PrintTupleTo(t, os); +} +#endif // GTEST_HAS_TR1_TUPLE + +// Overload for std::pair. +template +void PrintTo(const ::std::pair& value, ::std::ostream* os) { + *os << '('; + // We cannot use UniversalPrint(value.first, os) here, as T1 may be + // a reference type. The same for printing value.second. + UniversalPrinter::Print(value.first, os); + *os << ", "; + UniversalPrinter::Print(value.second, os); + *os << ')'; +} + +// Implements printing a non-reference type T by letting the compiler +// pick the right overload of PrintTo() for T. +template +class UniversalPrinter { + public: + // MSVC warns about adding const to a function type, so we want to + // disable the warning. +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4180) // Temporarily disables warning 4180. +#endif // _MSC_VER + + // Note: we deliberately don't call this PrintTo(), as that name + // conflicts with ::testing::internal::PrintTo in the body of the + // function. + static void Print(const T& value, ::std::ostream* os) { + // By default, ::testing::internal::PrintTo() is used for printing + // the value. + // + // Thanks to Koenig look-up, if T is a class and has its own + // PrintTo() function defined in its namespace, that function will + // be visible here. Since it is more specific than the generic ones + // in ::testing::internal, it will be picked by the compiler in the + // following statement - exactly what we want. + PrintTo(value, os); + } + +#ifdef _MSC_VER +# pragma warning(pop) // Restores the warning state. +#endif // _MSC_VER +}; + +// UniversalPrintArray(begin, len, os) prints an array of 'len' +// elements, starting at address 'begin'. +template +void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) { + if (len == 0) { + *os << "{}"; + } else { + *os << "{ "; + const size_t kThreshold = 18; + const size_t kChunkSize = 8; + // If the array has more than kThreshold elements, we'll have to + // omit some details by printing only the first and the last + // kChunkSize elements. + // TODO(wan@google.com): let the user control the threshold using a flag. + if (len <= kThreshold) { + PrintRawArrayTo(begin, len, os); + } else { + PrintRawArrayTo(begin, kChunkSize, os); + *os << ", ..., "; + PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os); + } + *os << " }"; + } +} +// This overload prints a (const) char array compactly. +GTEST_API_ void UniversalPrintArray( + const char* begin, size_t len, ::std::ostream* os); + +// This overload prints a (const) wchar_t array compactly. +GTEST_API_ void UniversalPrintArray( + const wchar_t* begin, size_t len, ::std::ostream* os); + +// Implements printing an array type T[N]. +template +class UniversalPrinter { + public: + // Prints the given array, omitting some elements when there are too + // many. + static void Print(const T (&a)[N], ::std::ostream* os) { + UniversalPrintArray(a, N, os); + } +}; + +// Implements printing a reference type T&. +template +class UniversalPrinter { + public: + // MSVC warns about adding const to a function type, so we want to + // disable the warning. +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4180) // Temporarily disables warning 4180. +#endif // _MSC_VER + + static void Print(const T& value, ::std::ostream* os) { + // Prints the address of the value. We use reinterpret_cast here + // as static_cast doesn't compile when T is a function type. + *os << "@" << reinterpret_cast(&value) << " "; + + // Then prints the value itself. + UniversalPrint(value, os); + } + +#ifdef _MSC_VER +# pragma warning(pop) // Restores the warning state. +#endif // _MSC_VER +}; + +// Prints a value tersely: for a reference type, the referenced value +// (but not the address) is printed; for a (const) char pointer, the +// NUL-terminated string (but not the pointer) is printed. + +template +class UniversalTersePrinter { + public: + static void Print(const T& value, ::std::ostream* os) { + UniversalPrint(value, os); + } +}; +template +class UniversalTersePrinter { + public: + static void Print(const T& value, ::std::ostream* os) { + UniversalPrint(value, os); + } +}; +template +class UniversalTersePrinter { + public: + static void Print(const T (&value)[N], ::std::ostream* os) { + UniversalPrinter::Print(value, os); + } +}; +template <> +class UniversalTersePrinter { + public: + static void Print(const char* str, ::std::ostream* os) { + if (str == NULL) { + *os << "NULL"; + } else { + UniversalPrint(string(str), os); + } + } +}; +template <> +class UniversalTersePrinter { + public: + static void Print(char* str, ::std::ostream* os) { + UniversalTersePrinter::Print(str, os); + } +}; + +#if GTEST_HAS_STD_WSTRING +template <> +class UniversalTersePrinter { + public: + static void Print(const wchar_t* str, ::std::ostream* os) { + if (str == NULL) { + *os << "NULL"; + } else { + UniversalPrint(::std::wstring(str), os); + } + } +}; +#endif + +template <> +class UniversalTersePrinter { + public: + static void Print(wchar_t* str, ::std::ostream* os) { + UniversalTersePrinter::Print(str, os); + } +}; + +template +void UniversalTersePrint(const T& value, ::std::ostream* os) { + UniversalTersePrinter::Print(value, os); +} + +// Prints a value using the type inferred by the compiler. The +// difference between this and UniversalTersePrint() is that for a +// (const) char pointer, this prints both the pointer and the +// NUL-terminated string. +template +void UniversalPrint(const T& value, ::std::ostream* os) { + // A workarond for the bug in VC++ 7.1 that prevents us from instantiating + // UniversalPrinter with T directly. + typedef T T1; + UniversalPrinter::Print(value, os); +} + +#if GTEST_HAS_TR1_TUPLE +typedef ::std::vector Strings; + +// This helper template allows PrintTo() for tuples and +// UniversalTersePrintTupleFieldsToStrings() to be defined by +// induction on the number of tuple fields. The idea is that +// TuplePrefixPrinter::PrintPrefixTo(t, os) prints the first N +// fields in tuple t, and can be defined in terms of +// TuplePrefixPrinter. + +// The inductive case. +template +struct TuplePrefixPrinter { + // Prints the first N fields of a tuple. + template + static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) { + TuplePrefixPrinter::PrintPrefixTo(t, os); + *os << ", "; + UniversalPrinter::type> + ::Print(::std::tr1::get(t), os); + } + + // Tersely prints the first N fields of a tuple to a string vector, + // one element for each field. + template + static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) { + TuplePrefixPrinter::TersePrintPrefixToStrings(t, strings); + ::std::stringstream ss; + UniversalTersePrint(::std::tr1::get(t), &ss); + strings->push_back(ss.str()); + } +}; + +// Base cases. +template <> +struct TuplePrefixPrinter<0> { + template + static void PrintPrefixTo(const Tuple&, ::std::ostream*) {} + + template + static void TersePrintPrefixToStrings(const Tuple&, Strings*) {} +}; +// We have to specialize the entire TuplePrefixPrinter<> class +// template here, even though the definition of +// TersePrintPrefixToStrings() is the same as the generic version, as +// Embarcadero (formerly CodeGear, formerly Borland) C++ doesn't +// support specializing a method template of a class template. +template <> +struct TuplePrefixPrinter<1> { + template + static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) { + UniversalPrinter::type>:: + Print(::std::tr1::get<0>(t), os); + } + + template + static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) { + ::std::stringstream ss; + UniversalTersePrint(::std::tr1::get<0>(t), &ss); + strings->push_back(ss.str()); + } +}; + +// Helper function for printing a tuple. T must be instantiated with +// a tuple type. +template +void PrintTupleTo(const T& t, ::std::ostream* os) { + *os << "("; + TuplePrefixPrinter< ::std::tr1::tuple_size::value>:: + PrintPrefixTo(t, os); + *os << ")"; +} + +// Prints the fields of a tuple tersely to a string vector, one +// element for each field. See the comment before +// UniversalTersePrint() for how we define "tersely". +template +Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) { + Strings result; + TuplePrefixPrinter< ::std::tr1::tuple_size::value>:: + TersePrintPrefixToStrings(value, &result); + return result; +} +#endif // GTEST_HAS_TR1_TUPLE + +} // namespace internal + +template +::std::string PrintToString(const T& value) { + ::std::stringstream ss; + internal::UniversalTersePrinter::Print(value, &ss); + return ss.str(); +} + +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ + +#if GTEST_HAS_PARAM_TEST + +namespace testing { +namespace internal { + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Outputs a message explaining invalid registration of different +// fixture class for the same test case. This may happen when +// TEST_P macro is used to define two tests with the same name +// but in different namespaces. +GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name, + const char* file, int line); + +template class ParamGeneratorInterface; +template class ParamGenerator; + +// Interface for iterating over elements provided by an implementation +// of ParamGeneratorInterface. +template +class ParamIteratorInterface { + public: + virtual ~ParamIteratorInterface() {} + // A pointer to the base generator instance. + // Used only for the purposes of iterator comparison + // to make sure that two iterators belong to the same generator. + virtual const ParamGeneratorInterface* BaseGenerator() const = 0; + // Advances iterator to point to the next element + // provided by the generator. The caller is responsible + // for not calling Advance() on an iterator equal to + // BaseGenerator()->End(). + virtual void Advance() = 0; + // Clones the iterator object. Used for implementing copy semantics + // of ParamIterator. + virtual ParamIteratorInterface* Clone() const = 0; + // Dereferences the current iterator and provides (read-only) access + // to the pointed value. It is the caller's responsibility not to call + // Current() on an iterator equal to BaseGenerator()->End(). + // Used for implementing ParamGenerator::operator*(). + virtual const T* Current() const = 0; + // Determines whether the given iterator and other point to the same + // element in the sequence generated by the generator. + // Used for implementing ParamGenerator::operator==(). + virtual bool Equals(const ParamIteratorInterface& other) const = 0; +}; + +// Class iterating over elements provided by an implementation of +// ParamGeneratorInterface. It wraps ParamIteratorInterface +// and implements the const forward iterator concept. +template +class ParamIterator { + public: + typedef T value_type; + typedef const T& reference; + typedef ptrdiff_t difference_type; + + // ParamIterator assumes ownership of the impl_ pointer. + ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {} + ParamIterator& operator=(const ParamIterator& other) { + if (this != &other) + impl_.reset(other.impl_->Clone()); + return *this; + } + + const T& operator*() const { return *impl_->Current(); } + const T* operator->() const { return impl_->Current(); } + // Prefix version of operator++. + ParamIterator& operator++() { + impl_->Advance(); + return *this; + } + // Postfix version of operator++. + ParamIterator operator++(int /*unused*/) { + ParamIteratorInterface* clone = impl_->Clone(); + impl_->Advance(); + return ParamIterator(clone); + } + bool operator==(const ParamIterator& other) const { + return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_); + } + bool operator!=(const ParamIterator& other) const { + return !(*this == other); + } + + private: + friend class ParamGenerator; + explicit ParamIterator(ParamIteratorInterface* impl) : impl_(impl) {} + scoped_ptr > impl_; +}; + +// ParamGeneratorInterface is the binary interface to access generators +// defined in other translation units. +template +class ParamGeneratorInterface { + public: + typedef T ParamType; + + virtual ~ParamGeneratorInterface() {} + + // Generator interface definition + virtual ParamIteratorInterface* Begin() const = 0; + virtual ParamIteratorInterface* End() const = 0; +}; + +// Wraps ParamGeneratorInterface and provides general generator syntax +// compatible with the STL Container concept. +// This class implements copy initialization semantics and the contained +// ParamGeneratorInterface instance is shared among all copies +// of the original object. This is possible because that instance is immutable. +template +class ParamGenerator { + public: + typedef ParamIterator iterator; + + explicit ParamGenerator(ParamGeneratorInterface* impl) : impl_(impl) {} + ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {} + + ParamGenerator& operator=(const ParamGenerator& other) { + impl_ = other.impl_; + return *this; + } + + iterator begin() const { return iterator(impl_->Begin()); } + iterator end() const { return iterator(impl_->End()); } + + private: + linked_ptr > impl_; +}; + +// Generates values from a range of two comparable values. Can be used to +// generate sequences of user-defined types that implement operator+() and +// operator<(). +// This class is used in the Range() function. +template +class RangeGenerator : public ParamGeneratorInterface { + public: + RangeGenerator(T begin, T end, IncrementT step) + : begin_(begin), end_(end), + step_(step), end_index_(CalculateEndIndex(begin, end, step)) {} + virtual ~RangeGenerator() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, begin_, 0, step_); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, end_, end_index_, step_); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, T value, int index, + IncrementT step) + : base_(base), value_(value), index_(index), step_(step) {} + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + virtual void Advance() { + value_ = value_ + step_; + index_++; + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const T* Current() const { return &value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const int other_index = + CheckedDowncastToActualType(&other)->index_; + return index_ == other_index; + } + + private: + Iterator(const Iterator& other) + : ParamIteratorInterface(), + base_(other.base_), value_(other.value_), index_(other.index_), + step_(other.step_) {} + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + T value_; + int index_; + const IncrementT step_; + }; // class RangeGenerator::Iterator + + static int CalculateEndIndex(const T& begin, + const T& end, + const IncrementT& step) { + int end_index = 0; + for (T i = begin; i < end; i = i + step) + end_index++; + return end_index; + } + + // No implementation - assignment is unsupported. + void operator=(const RangeGenerator& other); + + const T begin_; + const T end_; + const IncrementT step_; + // The index for the end() iterator. All the elements in the generated + // sequence are indexed (0-based) to aid iterator comparison. + const int end_index_; +}; // class RangeGenerator + + +// Generates values from a pair of STL-style iterators. Used in the +// ValuesIn() function. The elements are copied from the source range +// since the source can be located on the stack, and the generator +// is likely to persist beyond that stack frame. +template +class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface { + public: + template + ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end) + : container_(begin, end) {} + virtual ~ValuesInIteratorRangeGenerator() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, container_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, container_.end()); + } + + private: + typedef typename ::std::vector ContainerType; + + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + typename ContainerType::const_iterator iterator) + : base_(base), iterator_(iterator) {} + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + virtual void Advance() { + ++iterator_; + value_.reset(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + // We need to use cached value referenced by iterator_ because *iterator_ + // can return a temporary object (and of type other then T), so just + // having "return &*iterator_;" doesn't work. + // value_ is updated here and not in Advance() because Advance() + // can advance iterator_ beyond the end of the range, and we cannot + // detect that fact. The client code, on the other hand, is + // responsible for not calling Current() on an out-of-range iterator. + virtual const T* Current() const { + if (value_.get() == NULL) + value_.reset(new T(*iterator_)); + return value_.get(); + } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + return iterator_ == + CheckedDowncastToActualType(&other)->iterator_; + } + + private: + Iterator(const Iterator& other) + // The explicit constructor call suppresses a false warning + // emitted by gcc when supplied with the -Wextra option. + : ParamIteratorInterface(), + base_(other.base_), + iterator_(other.iterator_) {} + + const ParamGeneratorInterface* const base_; + typename ContainerType::const_iterator iterator_; + // A cached value of *iterator_. We keep it here to allow access by + // pointer in the wrapping iterator's operator->(). + // value_ needs to be mutable to be accessed in Current(). + // Use of scoped_ptr helps manage cached value's lifetime, + // which is bound by the lifespan of the iterator itself. + mutable scoped_ptr value_; + }; // class ValuesInIteratorRangeGenerator::Iterator + + // No implementation - assignment is unsupported. + void operator=(const ValuesInIteratorRangeGenerator& other); + + const ContainerType container_; +}; // class ValuesInIteratorRangeGenerator + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Stores a parameter value and later creates tests parameterized with that +// value. +template +class ParameterizedTestFactory : public TestFactoryBase { + public: + typedef typename TestClass::ParamType ParamType; + explicit ParameterizedTestFactory(ParamType parameter) : + parameter_(parameter) {} + virtual Test* CreateTest() { + TestClass::SetParam(¶meter_); + return new TestClass(); + } + + private: + const ParamType parameter_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory); +}; + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// TestMetaFactoryBase is a base class for meta-factories that create +// test factories for passing into MakeAndRegisterTestInfo function. +template +class TestMetaFactoryBase { + public: + virtual ~TestMetaFactoryBase() {} + + virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0; +}; + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// TestMetaFactory creates test factories for passing into +// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives +// ownership of test factory pointer, same factory object cannot be passed +// into that method twice. But ParameterizedTestCaseInfo is going to call +// it for each Test/Parameter value combination. Thus it needs meta factory +// creator class. +template +class TestMetaFactory + : public TestMetaFactoryBase { + public: + typedef typename TestCase::ParamType ParamType; + + TestMetaFactory() {} + + virtual TestFactoryBase* CreateTestFactory(ParamType parameter) { + return new ParameterizedTestFactory(parameter); + } + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory); +}; + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// ParameterizedTestCaseInfoBase is a generic interface +// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase +// accumulates test information provided by TEST_P macro invocations +// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations +// and uses that information to register all resulting test instances +// in RegisterTests method. The ParameterizeTestCaseRegistry class holds +// a collection of pointers to the ParameterizedTestCaseInfo objects +// and calls RegisterTests() on each of them when asked. +class ParameterizedTestCaseInfoBase { + public: + virtual ~ParameterizedTestCaseInfoBase() {} + + // Base part of test case name for display purposes. + virtual const string& GetTestCaseName() const = 0; + // Test case id to verify identity. + virtual TypeId GetTestCaseTypeId() const = 0; + // UnitTest class invokes this method to register tests in this + // test case right before running them in RUN_ALL_TESTS macro. + // This method should not be called more then once on any single + // instance of a ParameterizedTestCaseInfoBase derived class. + virtual void RegisterTests() = 0; + + protected: + ParameterizedTestCaseInfoBase() {} + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase); +}; + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P +// macro invocations for a particular test case and generators +// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that +// test case. It registers tests with all values generated by all +// generators when asked. +template +class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase { + public: + // ParamType and GeneratorCreationFunc are private types but are required + // for declarations of public methods AddTestPattern() and + // AddTestCaseInstantiation(). + typedef typename TestCase::ParamType ParamType; + // A function that returns an instance of appropriate generator type. + typedef ParamGenerator(GeneratorCreationFunc)(); + + explicit ParameterizedTestCaseInfo(const char* name) + : test_case_name_(name) {} + + // Test case base name for display purposes. + virtual const string& GetTestCaseName() const { return test_case_name_; } + // Test case id to verify identity. + virtual TypeId GetTestCaseTypeId() const { return GetTypeId(); } + // TEST_P macro uses AddTestPattern() to record information + // about a single test in a LocalTestInfo structure. + // test_case_name is the base name of the test case (without invocation + // prefix). test_base_name is the name of an individual test without + // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is + // test case base name and DoBar is test base name. + void AddTestPattern(const char* test_case_name, + const char* test_base_name, + TestMetaFactoryBase* meta_factory) { + tests_.push_back(linked_ptr(new TestInfo(test_case_name, + test_base_name, + meta_factory))); + } + // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information + // about a generator. + int AddTestCaseInstantiation(const string& instantiation_name, + GeneratorCreationFunc* func, + const char* /* file */, + int /* line */) { + instantiations_.push_back(::std::make_pair(instantiation_name, func)); + return 0; // Return value used only to run this method in namespace scope. + } + // UnitTest class invokes this method to register tests in this test case + // test cases right before running tests in RUN_ALL_TESTS macro. + // This method should not be called more then once on any single + // instance of a ParameterizedTestCaseInfoBase derived class. + // UnitTest has a guard to prevent from calling this method more then once. + virtual void RegisterTests() { + for (typename TestInfoContainer::iterator test_it = tests_.begin(); + test_it != tests_.end(); ++test_it) { + linked_ptr test_info = *test_it; + for (typename InstantiationContainer::iterator gen_it = + instantiations_.begin(); gen_it != instantiations_.end(); + ++gen_it) { + const string& instantiation_name = gen_it->first; + ParamGenerator generator((*gen_it->second)()); + + string test_case_name; + if ( !instantiation_name.empty() ) + test_case_name = instantiation_name + "/"; + test_case_name += test_info->test_case_base_name; + + int i = 0; + for (typename ParamGenerator::iterator param_it = + generator.begin(); + param_it != generator.end(); ++param_it, ++i) { + Message test_name_stream; + test_name_stream << test_info->test_base_name << "/" << i; + MakeAndRegisterTestInfo( + test_case_name.c_str(), + test_name_stream.GetString().c_str(), + NULL, // No type parameter. + PrintToString(*param_it).c_str(), + GetTestCaseTypeId(), + TestCase::SetUpTestCase, + TestCase::TearDownTestCase, + test_info->test_meta_factory->CreateTestFactory(*param_it)); + } // for param_it + } // for gen_it + } // for test_it + } // RegisterTests + + private: + // LocalTestInfo structure keeps information about a single test registered + // with TEST_P macro. + struct TestInfo { + TestInfo(const char* a_test_case_base_name, + const char* a_test_base_name, + TestMetaFactoryBase* a_test_meta_factory) : + test_case_base_name(a_test_case_base_name), + test_base_name(a_test_base_name), + test_meta_factory(a_test_meta_factory) {} + + const string test_case_base_name; + const string test_base_name; + const scoped_ptr > test_meta_factory; + }; + typedef ::std::vector > TestInfoContainer; + // Keeps pairs of + // received from INSTANTIATE_TEST_CASE_P macros. + typedef ::std::vector > + InstantiationContainer; + + const string test_case_name_; + TestInfoContainer tests_; + InstantiationContainer instantiations_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo); +}; // class ParameterizedTestCaseInfo + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase +// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P +// macros use it to locate their corresponding ParameterizedTestCaseInfo +// descriptors. +class ParameterizedTestCaseRegistry { + public: + ParameterizedTestCaseRegistry() {} + ~ParameterizedTestCaseRegistry() { + for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); + it != test_case_infos_.end(); ++it) { + delete *it; + } + } + + // Looks up or creates and returns a structure containing information about + // tests and instantiations of a particular test case. + template + ParameterizedTestCaseInfo* GetTestCasePatternHolder( + const char* test_case_name, + const char* file, + int line) { + ParameterizedTestCaseInfo* typed_test_info = NULL; + for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); + it != test_case_infos_.end(); ++it) { + if ((*it)->GetTestCaseName() == test_case_name) { + if ((*it)->GetTestCaseTypeId() != GetTypeId()) { + // Complain about incorrect usage of Google Test facilities + // and terminate the program since we cannot guaranty correct + // test case setup and tear-down in this case. + ReportInvalidTestCaseType(test_case_name, file, line); + posix::Abort(); + } else { + // At this point we are sure that the object we found is of the same + // type we are looking for, so we downcast it to that type + // without further checks. + typed_test_info = CheckedDowncastToActualType< + ParameterizedTestCaseInfo >(*it); + } + break; + } + } + if (typed_test_info == NULL) { + typed_test_info = new ParameterizedTestCaseInfo(test_case_name); + test_case_infos_.push_back(typed_test_info); + } + return typed_test_info; + } + void RegisterTests() { + for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); + it != test_case_infos_.end(); ++it) { + (*it)->RegisterTests(); + } + } + + private: + typedef ::std::vector TestCaseInfoContainer; + + TestCaseInfoContainer test_case_infos_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry); +}; + +} // namespace internal +} // namespace testing + +#endif // GTEST_HAS_PARAM_TEST + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ +// This file was GENERATED by command: +// pump.py gtest-param-util-generated.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: vladl@google.com (Vlad Losev) + +// Type and function utilities for implementing parameterized tests. +// This file is generated by a SCRIPT. DO NOT EDIT BY HAND! +// +// Currently Google Test supports at most 50 arguments in Values, +// and at most 10 arguments in Combine. Please contact +// googletestframework@googlegroups.com if you need more. +// Please note that the number of arguments to Combine is limited +// by the maximum arity of the implementation of tr1::tuple which is +// currently set at 10. + +#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ +#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ + +// scripts/fuse_gtest.py depends on gtest's own header being #included +// *unconditionally*. Therefore these #includes cannot be moved +// inside #if GTEST_HAS_PARAM_TEST. + +#if GTEST_HAS_PARAM_TEST + +namespace testing { + +// Forward declarations of ValuesIn(), which is implemented in +// include/gtest/gtest-param-test.h. +template +internal::ParamGenerator< + typename ::testing::internal::IteratorTraits::value_type> +ValuesIn(ForwardIterator begin, ForwardIterator end); + +template +internal::ParamGenerator ValuesIn(const T (&array)[N]); + +template +internal::ParamGenerator ValuesIn( + const Container& container); + +namespace internal { + +// Used in the Values() function to provide polymorphic capabilities. +template +class ValueArray1 { + public: + explicit ValueArray1(T1 v1) : v1_(v1) {} + + template + operator ParamGenerator() const { return ValuesIn(&v1_, &v1_ + 1); } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray1& other); + + const T1 v1_; +}; + +template +class ValueArray2 { + public: + ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray2& other); + + const T1 v1_; + const T2 v2_; +}; + +template +class ValueArray3 { + public: + ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray3& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; +}; + +template +class ValueArray4 { + public: + ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray4& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; +}; + +template +class ValueArray5 { + public: + ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray5& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; +}; + +template +class ValueArray6 { + public: + ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray6& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; +}; + +template +class ValueArray7 { + public: + ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray7& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; +}; + +template +class ValueArray8 { + public: + ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray8& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; +}; + +template +class ValueArray9 { + public: + ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, + T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray9& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; +}; + +template +class ValueArray10 { + public: + ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray10& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; +}; + +template +class ValueArray11 { + public: + ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray11& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; +}; + +template +class ValueArray12 { + public: + ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray12& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; +}; + +template +class ValueArray13 { + public: + ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray13& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; +}; + +template +class ValueArray14 { + public: + ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray14& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; +}; + +template +class ValueArray15 { + public: + ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray15& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; +}; + +template +class ValueArray16 { + public: + ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray16& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; +}; + +template +class ValueArray17 { + public: + ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, + T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray17& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; +}; + +template +class ValueArray18 { + public: + ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray18& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; +}; + +template +class ValueArray19 { + public: + ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), + v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray19& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; +}; + +template +class ValueArray20 { + public: + ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), + v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), + v19_(v19), v20_(v20) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray20& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; +}; + +template +class ValueArray21 { + public: + ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), + v18_(v18), v19_(v19), v20_(v20), v21_(v21) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray21& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; +}; + +template +class ValueArray22 { + public: + ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray22& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; +}; + +template +class ValueArray23 { + public: + ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray23& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; +}; + +template +class ValueArray24 { + public: + ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), + v22_(v22), v23_(v23), v24_(v24) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray24& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; +}; + +template +class ValueArray25 { + public: + ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, + T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray25& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; +}; + +template +class ValueArray26 { + public: + ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray26& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; +}; + +template +class ValueArray27 { + public: + ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), + v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), + v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), + v26_(v26), v27_(v27) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray27& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; +}; + +template +class ValueArray28 { + public: + ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), + v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), + v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), + v25_(v25), v26_(v26), v27_(v27), v28_(v28) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray28& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; +}; + +template +class ValueArray29 { + public: + ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), + v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), + v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray29& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; +}; + +template +class ValueArray30 { + public: + ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray30& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; +}; + +template +class ValueArray31 { + public: + ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray31& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; +}; + +template +class ValueArray32 { + public: + ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), + v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), + v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray32& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; +}; + +template +class ValueArray33 { + public: + ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, + T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray33& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; +}; + +template +class ValueArray34 { + public: + ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray34& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; +}; + +template +class ValueArray35 { + public: + ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), + v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), + v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), + v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), + v32_(v32), v33_(v33), v34_(v34), v35_(v35) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray35& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; +}; + +template +class ValueArray36 { + public: + ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), + v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), + v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), + v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), + v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray36& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; +}; + +template +class ValueArray37 { + public: + ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), + v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), + v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), + v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), + v36_(v36), v37_(v37) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray37& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; +}; + +template +class ValueArray38 { + public: + ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), + v35_(v35), v36_(v36), v37_(v37), v38_(v38) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray38& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; +}; + +template +class ValueArray39 { + public: + ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), + v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray39& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; +}; + +template +class ValueArray40 { + public: + ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), + v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), + v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), + v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), + v40_(v40) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray40& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; +}; + +template +class ValueArray41 { + public: + ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, + T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), + v39_(v39), v40_(v40), v41_(v41) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray41& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; +}; + +template +class ValueArray42 { + public: + ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), + v39_(v39), v40_(v40), v41_(v41), v42_(v42) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_), + static_cast(v42_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray42& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; +}; + +template +class ValueArray43 { + public: + ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), + v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), + v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), + v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), + v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), + v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), + v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_), + static_cast(v42_), static_cast(v43_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray43& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; +}; + +template +class ValueArray44 { + public: + ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), + v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), + v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), + v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), + v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), + v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), + v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), + v43_(v43), v44_(v44) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_), + static_cast(v42_), static_cast(v43_), static_cast(v44_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray44& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; +}; + +template +class ValueArray45 { + public: + ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), + v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), + v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), + v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), + v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), + v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), + v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), + v42_(v42), v43_(v43), v44_(v44), v45_(v45) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_), + static_cast(v42_), static_cast(v43_), static_cast(v44_), + static_cast(v45_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray45& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; +}; + +template +class ValueArray46 { + public: + ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3), + v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), + v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), + v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_), + static_cast(v42_), static_cast(v43_), static_cast(v44_), + static_cast(v45_), static_cast(v46_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray46& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; +}; + +template +class ValueArray47 { + public: + ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2), + v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), + v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), + v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), + v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), + v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), + v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), + v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46), + v47_(v47) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_), + static_cast(v42_), static_cast(v43_), static_cast(v44_), + static_cast(v45_), static_cast(v46_), static_cast(v47_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray47& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; + const T47 v47_; +}; + +template +class ValueArray48 { + public: + ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1), + v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), + v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), + v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), + v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), + v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), + v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), + v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), + v46_(v46), v47_(v47), v48_(v48) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_), + static_cast(v42_), static_cast(v43_), static_cast(v44_), + static_cast(v45_), static_cast(v46_), static_cast(v47_), + static_cast(v48_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray48& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; + const T47 v47_; + const T48 v48_; +}; + +template +class ValueArray49 { + public: + ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, + T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), + v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), + v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_), + static_cast(v42_), static_cast(v43_), static_cast(v44_), + static_cast(v45_), static_cast(v46_), static_cast(v47_), + static_cast(v48_), static_cast(v49_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray49& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; + const T47 v47_; + const T48 v48_; + const T49 v49_; +}; + +template +class ValueArray50 { + public: + ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49, + T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), + v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), + v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), + v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), + v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), + v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), + v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), + v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {} + + template + operator ParamGenerator() const { + const T array[] = {static_cast(v1_), static_cast(v2_), + static_cast(v3_), static_cast(v4_), static_cast(v5_), + static_cast(v6_), static_cast(v7_), static_cast(v8_), + static_cast(v9_), static_cast(v10_), static_cast(v11_), + static_cast(v12_), static_cast(v13_), static_cast(v14_), + static_cast(v15_), static_cast(v16_), static_cast(v17_), + static_cast(v18_), static_cast(v19_), static_cast(v20_), + static_cast(v21_), static_cast(v22_), static_cast(v23_), + static_cast(v24_), static_cast(v25_), static_cast(v26_), + static_cast(v27_), static_cast(v28_), static_cast(v29_), + static_cast(v30_), static_cast(v31_), static_cast(v32_), + static_cast(v33_), static_cast(v34_), static_cast(v35_), + static_cast(v36_), static_cast(v37_), static_cast(v38_), + static_cast(v39_), static_cast(v40_), static_cast(v41_), + static_cast(v42_), static_cast(v43_), static_cast(v44_), + static_cast(v45_), static_cast(v46_), static_cast(v47_), + static_cast(v48_), static_cast(v49_), static_cast(v50_)}; + return ValuesIn(array); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const ValueArray50& other); + + const T1 v1_; + const T2 v2_; + const T3 v3_; + const T4 v4_; + const T5 v5_; + const T6 v6_; + const T7 v7_; + const T8 v8_; + const T9 v9_; + const T10 v10_; + const T11 v11_; + const T12 v12_; + const T13 v13_; + const T14 v14_; + const T15 v15_; + const T16 v16_; + const T17 v17_; + const T18 v18_; + const T19 v19_; + const T20 v20_; + const T21 v21_; + const T22 v22_; + const T23 v23_; + const T24 v24_; + const T25 v25_; + const T26 v26_; + const T27 v27_; + const T28 v28_; + const T29 v29_; + const T30 v30_; + const T31 v31_; + const T32 v32_; + const T33 v33_; + const T34 v34_; + const T35 v35_; + const T36 v36_; + const T37 v37_; + const T38 v38_; + const T39 v39_; + const T40 v40_; + const T41 v41_; + const T42 v42_; + const T43 v43_; + const T44 v44_; + const T45 v45_; + const T46 v46_; + const T47 v47_; + const T48 v48_; + const T49 v49_; + const T50 v50_; +}; + +# if GTEST_HAS_COMBINE +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Generates values from the Cartesian product of values produced +// by the argument generators. +// +template +class CartesianProductGenerator2 + : public ParamGeneratorInterface< ::std::tr1::tuple > { + public: + typedef ::std::tr1::tuple ParamType; + + CartesianProductGenerator2(const ParamGenerator& g1, + const ParamGenerator& g2) + : g1_(g1), g2_(g2) {} + virtual ~CartesianProductGenerator2() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end()); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + const ParamGenerator& g1, + const typename ParamGenerator::iterator& current1, + const ParamGenerator& g2, + const typename ParamGenerator::iterator& current2) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current2_; + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator::iterator begin1_; + const typename ParamGenerator::iterator end1_; + typename ParamGenerator::iterator current1_; + const typename ParamGenerator::iterator begin2_; + const typename ParamGenerator::iterator end2_; + typename ParamGenerator::iterator current2_; + ParamType current_value_; + }; // class CartesianProductGenerator2::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator2& other); + + const ParamGenerator g1_; + const ParamGenerator g2_; +}; // class CartesianProductGenerator2 + + +template +class CartesianProductGenerator3 + : public ParamGeneratorInterface< ::std::tr1::tuple > { + public: + typedef ::std::tr1::tuple ParamType; + + CartesianProductGenerator3(const ParamGenerator& g1, + const ParamGenerator& g2, const ParamGenerator& g3) + : g1_(g1), g2_(g2), g3_(g3) {} + virtual ~CartesianProductGenerator3() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end()); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + const ParamGenerator& g1, + const typename ParamGenerator::iterator& current1, + const ParamGenerator& g2, + const typename ParamGenerator::iterator& current2, + const ParamGenerator& g3, + const typename ParamGenerator::iterator& current3) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current3_; + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator::iterator begin1_; + const typename ParamGenerator::iterator end1_; + typename ParamGenerator::iterator current1_; + const typename ParamGenerator::iterator begin2_; + const typename ParamGenerator::iterator end2_; + typename ParamGenerator::iterator current2_; + const typename ParamGenerator::iterator begin3_; + const typename ParamGenerator::iterator end3_; + typename ParamGenerator::iterator current3_; + ParamType current_value_; + }; // class CartesianProductGenerator3::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator3& other); + + const ParamGenerator g1_; + const ParamGenerator g2_; + const ParamGenerator g3_; +}; // class CartesianProductGenerator3 + + +template +class CartesianProductGenerator4 + : public ParamGeneratorInterface< ::std::tr1::tuple > { + public: + typedef ::std::tr1::tuple ParamType; + + CartesianProductGenerator4(const ParamGenerator& g1, + const ParamGenerator& g2, const ParamGenerator& g3, + const ParamGenerator& g4) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {} + virtual ~CartesianProductGenerator4() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end()); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + const ParamGenerator& g1, + const typename ParamGenerator::iterator& current1, + const ParamGenerator& g2, + const typename ParamGenerator::iterator& current2, + const ParamGenerator& g3, + const typename ParamGenerator::iterator& current3, + const ParamGenerator& g4, + const typename ParamGenerator::iterator& current4) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current4_; + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator::iterator begin1_; + const typename ParamGenerator::iterator end1_; + typename ParamGenerator::iterator current1_; + const typename ParamGenerator::iterator begin2_; + const typename ParamGenerator::iterator end2_; + typename ParamGenerator::iterator current2_; + const typename ParamGenerator::iterator begin3_; + const typename ParamGenerator::iterator end3_; + typename ParamGenerator::iterator current3_; + const typename ParamGenerator::iterator begin4_; + const typename ParamGenerator::iterator end4_; + typename ParamGenerator::iterator current4_; + ParamType current_value_; + }; // class CartesianProductGenerator4::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator4& other); + + const ParamGenerator g1_; + const ParamGenerator g2_; + const ParamGenerator g3_; + const ParamGenerator g4_; +}; // class CartesianProductGenerator4 + + +template +class CartesianProductGenerator5 + : public ParamGeneratorInterface< ::std::tr1::tuple > { + public: + typedef ::std::tr1::tuple ParamType; + + CartesianProductGenerator5(const ParamGenerator& g1, + const ParamGenerator& g2, const ParamGenerator& g3, + const ParamGenerator& g4, const ParamGenerator& g5) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {} + virtual ~CartesianProductGenerator5() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end()); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + const ParamGenerator& g1, + const typename ParamGenerator::iterator& current1, + const ParamGenerator& g2, + const typename ParamGenerator::iterator& current2, + const ParamGenerator& g3, + const typename ParamGenerator::iterator& current3, + const ParamGenerator& g4, + const typename ParamGenerator::iterator& current4, + const ParamGenerator& g5, + const typename ParamGenerator::iterator& current5) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current5_; + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator::iterator begin1_; + const typename ParamGenerator::iterator end1_; + typename ParamGenerator::iterator current1_; + const typename ParamGenerator::iterator begin2_; + const typename ParamGenerator::iterator end2_; + typename ParamGenerator::iterator current2_; + const typename ParamGenerator::iterator begin3_; + const typename ParamGenerator::iterator end3_; + typename ParamGenerator::iterator current3_; + const typename ParamGenerator::iterator begin4_; + const typename ParamGenerator::iterator end4_; + typename ParamGenerator::iterator current4_; + const typename ParamGenerator::iterator begin5_; + const typename ParamGenerator::iterator end5_; + typename ParamGenerator::iterator current5_; + ParamType current_value_; + }; // class CartesianProductGenerator5::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator5& other); + + const ParamGenerator g1_; + const ParamGenerator g2_; + const ParamGenerator g3_; + const ParamGenerator g4_; + const ParamGenerator g5_; +}; // class CartesianProductGenerator5 + + +template +class CartesianProductGenerator6 + : public ParamGeneratorInterface< ::std::tr1::tuple > { + public: + typedef ::std::tr1::tuple ParamType; + + CartesianProductGenerator6(const ParamGenerator& g1, + const ParamGenerator& g2, const ParamGenerator& g3, + const ParamGenerator& g4, const ParamGenerator& g5, + const ParamGenerator& g6) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {} + virtual ~CartesianProductGenerator6() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end()); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + const ParamGenerator& g1, + const typename ParamGenerator::iterator& current1, + const ParamGenerator& g2, + const typename ParamGenerator::iterator& current2, + const ParamGenerator& g3, + const typename ParamGenerator::iterator& current3, + const ParamGenerator& g4, + const typename ParamGenerator::iterator& current4, + const ParamGenerator& g5, + const typename ParamGenerator::iterator& current5, + const ParamGenerator& g6, + const typename ParamGenerator::iterator& current6) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current6_; + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator::iterator begin1_; + const typename ParamGenerator::iterator end1_; + typename ParamGenerator::iterator current1_; + const typename ParamGenerator::iterator begin2_; + const typename ParamGenerator::iterator end2_; + typename ParamGenerator::iterator current2_; + const typename ParamGenerator::iterator begin3_; + const typename ParamGenerator::iterator end3_; + typename ParamGenerator::iterator current3_; + const typename ParamGenerator::iterator begin4_; + const typename ParamGenerator::iterator end4_; + typename ParamGenerator::iterator current4_; + const typename ParamGenerator::iterator begin5_; + const typename ParamGenerator::iterator end5_; + typename ParamGenerator::iterator current5_; + const typename ParamGenerator::iterator begin6_; + const typename ParamGenerator::iterator end6_; + typename ParamGenerator::iterator current6_; + ParamType current_value_; + }; // class CartesianProductGenerator6::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator6& other); + + const ParamGenerator g1_; + const ParamGenerator g2_; + const ParamGenerator g3_; + const ParamGenerator g4_; + const ParamGenerator g5_; + const ParamGenerator g6_; +}; // class CartesianProductGenerator6 + + +template +class CartesianProductGenerator7 + : public ParamGeneratorInterface< ::std::tr1::tuple > { + public: + typedef ::std::tr1::tuple ParamType; + + CartesianProductGenerator7(const ParamGenerator& g1, + const ParamGenerator& g2, const ParamGenerator& g3, + const ParamGenerator& g4, const ParamGenerator& g5, + const ParamGenerator& g6, const ParamGenerator& g7) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {} + virtual ~CartesianProductGenerator7() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, + g7_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end()); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + const ParamGenerator& g1, + const typename ParamGenerator::iterator& current1, + const ParamGenerator& g2, + const typename ParamGenerator::iterator& current2, + const ParamGenerator& g3, + const typename ParamGenerator::iterator& current3, + const ParamGenerator& g4, + const typename ParamGenerator::iterator& current4, + const ParamGenerator& g5, + const typename ParamGenerator::iterator& current5, + const ParamGenerator& g6, + const typename ParamGenerator::iterator& current6, + const ParamGenerator& g7, + const typename ParamGenerator::iterator& current7) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6), + begin7_(g7.begin()), end7_(g7.end()), current7_(current7) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current7_; + if (current7_ == end7_) { + current7_ = begin7_; + ++current6_; + } + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_ && + current7_ == typed_other->current7_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_), + begin7_(other.begin7_), + end7_(other.end7_), + current7_(other.current7_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_ || + current7_ == end7_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator::iterator begin1_; + const typename ParamGenerator::iterator end1_; + typename ParamGenerator::iterator current1_; + const typename ParamGenerator::iterator begin2_; + const typename ParamGenerator::iterator end2_; + typename ParamGenerator::iterator current2_; + const typename ParamGenerator::iterator begin3_; + const typename ParamGenerator::iterator end3_; + typename ParamGenerator::iterator current3_; + const typename ParamGenerator::iterator begin4_; + const typename ParamGenerator::iterator end4_; + typename ParamGenerator::iterator current4_; + const typename ParamGenerator::iterator begin5_; + const typename ParamGenerator::iterator end5_; + typename ParamGenerator::iterator current5_; + const typename ParamGenerator::iterator begin6_; + const typename ParamGenerator::iterator end6_; + typename ParamGenerator::iterator current6_; + const typename ParamGenerator::iterator begin7_; + const typename ParamGenerator::iterator end7_; + typename ParamGenerator::iterator current7_; + ParamType current_value_; + }; // class CartesianProductGenerator7::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator7& other); + + const ParamGenerator g1_; + const ParamGenerator g2_; + const ParamGenerator g3_; + const ParamGenerator g4_; + const ParamGenerator g5_; + const ParamGenerator g6_; + const ParamGenerator g7_; +}; // class CartesianProductGenerator7 + + +template +class CartesianProductGenerator8 + : public ParamGeneratorInterface< ::std::tr1::tuple > { + public: + typedef ::std::tr1::tuple ParamType; + + CartesianProductGenerator8(const ParamGenerator& g1, + const ParamGenerator& g2, const ParamGenerator& g3, + const ParamGenerator& g4, const ParamGenerator& g5, + const ParamGenerator& g6, const ParamGenerator& g7, + const ParamGenerator& g8) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), + g8_(g8) {} + virtual ~CartesianProductGenerator8() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, + g7_.begin(), g8_, g8_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, + g8_.end()); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + const ParamGenerator& g1, + const typename ParamGenerator::iterator& current1, + const ParamGenerator& g2, + const typename ParamGenerator::iterator& current2, + const ParamGenerator& g3, + const typename ParamGenerator::iterator& current3, + const ParamGenerator& g4, + const typename ParamGenerator::iterator& current4, + const ParamGenerator& g5, + const typename ParamGenerator::iterator& current5, + const ParamGenerator& g6, + const typename ParamGenerator::iterator& current6, + const ParamGenerator& g7, + const typename ParamGenerator::iterator& current7, + const ParamGenerator& g8, + const typename ParamGenerator::iterator& current8) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6), + begin7_(g7.begin()), end7_(g7.end()), current7_(current7), + begin8_(g8.begin()), end8_(g8.end()), current8_(current8) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current8_; + if (current8_ == end8_) { + current8_ = begin8_; + ++current7_; + } + if (current7_ == end7_) { + current7_ = begin7_; + ++current6_; + } + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_ && + current7_ == typed_other->current7_ && + current8_ == typed_other->current8_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_), + begin7_(other.begin7_), + end7_(other.end7_), + current7_(other.current7_), + begin8_(other.begin8_), + end8_(other.end8_), + current8_(other.current8_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_, *current8_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_ || + current7_ == end7_ || + current8_ == end8_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator::iterator begin1_; + const typename ParamGenerator::iterator end1_; + typename ParamGenerator::iterator current1_; + const typename ParamGenerator::iterator begin2_; + const typename ParamGenerator::iterator end2_; + typename ParamGenerator::iterator current2_; + const typename ParamGenerator::iterator begin3_; + const typename ParamGenerator::iterator end3_; + typename ParamGenerator::iterator current3_; + const typename ParamGenerator::iterator begin4_; + const typename ParamGenerator::iterator end4_; + typename ParamGenerator::iterator current4_; + const typename ParamGenerator::iterator begin5_; + const typename ParamGenerator::iterator end5_; + typename ParamGenerator::iterator current5_; + const typename ParamGenerator::iterator begin6_; + const typename ParamGenerator::iterator end6_; + typename ParamGenerator::iterator current6_; + const typename ParamGenerator::iterator begin7_; + const typename ParamGenerator::iterator end7_; + typename ParamGenerator::iterator current7_; + const typename ParamGenerator::iterator begin8_; + const typename ParamGenerator::iterator end8_; + typename ParamGenerator::iterator current8_; + ParamType current_value_; + }; // class CartesianProductGenerator8::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator8& other); + + const ParamGenerator g1_; + const ParamGenerator g2_; + const ParamGenerator g3_; + const ParamGenerator g4_; + const ParamGenerator g5_; + const ParamGenerator g6_; + const ParamGenerator g7_; + const ParamGenerator g8_; +}; // class CartesianProductGenerator8 + + +template +class CartesianProductGenerator9 + : public ParamGeneratorInterface< ::std::tr1::tuple > { + public: + typedef ::std::tr1::tuple ParamType; + + CartesianProductGenerator9(const ParamGenerator& g1, + const ParamGenerator& g2, const ParamGenerator& g3, + const ParamGenerator& g4, const ParamGenerator& g5, + const ParamGenerator& g6, const ParamGenerator& g7, + const ParamGenerator& g8, const ParamGenerator& g9) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), + g9_(g9) {} + virtual ~CartesianProductGenerator9() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, + g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, + g8_.end(), g9_, g9_.end()); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + const ParamGenerator& g1, + const typename ParamGenerator::iterator& current1, + const ParamGenerator& g2, + const typename ParamGenerator::iterator& current2, + const ParamGenerator& g3, + const typename ParamGenerator::iterator& current3, + const ParamGenerator& g4, + const typename ParamGenerator::iterator& current4, + const ParamGenerator& g5, + const typename ParamGenerator::iterator& current5, + const ParamGenerator& g6, + const typename ParamGenerator::iterator& current6, + const ParamGenerator& g7, + const typename ParamGenerator::iterator& current7, + const ParamGenerator& g8, + const typename ParamGenerator::iterator& current8, + const ParamGenerator& g9, + const typename ParamGenerator::iterator& current9) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6), + begin7_(g7.begin()), end7_(g7.end()), current7_(current7), + begin8_(g8.begin()), end8_(g8.end()), current8_(current8), + begin9_(g9.begin()), end9_(g9.end()), current9_(current9) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current9_; + if (current9_ == end9_) { + current9_ = begin9_; + ++current8_; + } + if (current8_ == end8_) { + current8_ = begin8_; + ++current7_; + } + if (current7_ == end7_) { + current7_ = begin7_; + ++current6_; + } + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_ && + current7_ == typed_other->current7_ && + current8_ == typed_other->current8_ && + current9_ == typed_other->current9_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_), + begin7_(other.begin7_), + end7_(other.end7_), + current7_(other.current7_), + begin8_(other.begin8_), + end8_(other.end8_), + current8_(other.current8_), + begin9_(other.begin9_), + end9_(other.end9_), + current9_(other.current9_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_, *current8_, + *current9_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_ || + current7_ == end7_ || + current8_ == end8_ || + current9_ == end9_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator::iterator begin1_; + const typename ParamGenerator::iterator end1_; + typename ParamGenerator::iterator current1_; + const typename ParamGenerator::iterator begin2_; + const typename ParamGenerator::iterator end2_; + typename ParamGenerator::iterator current2_; + const typename ParamGenerator::iterator begin3_; + const typename ParamGenerator::iterator end3_; + typename ParamGenerator::iterator current3_; + const typename ParamGenerator::iterator begin4_; + const typename ParamGenerator::iterator end4_; + typename ParamGenerator::iterator current4_; + const typename ParamGenerator::iterator begin5_; + const typename ParamGenerator::iterator end5_; + typename ParamGenerator::iterator current5_; + const typename ParamGenerator::iterator begin6_; + const typename ParamGenerator::iterator end6_; + typename ParamGenerator::iterator current6_; + const typename ParamGenerator::iterator begin7_; + const typename ParamGenerator::iterator end7_; + typename ParamGenerator::iterator current7_; + const typename ParamGenerator::iterator begin8_; + const typename ParamGenerator::iterator end8_; + typename ParamGenerator::iterator current8_; + const typename ParamGenerator::iterator begin9_; + const typename ParamGenerator::iterator end9_; + typename ParamGenerator::iterator current9_; + ParamType current_value_; + }; // class CartesianProductGenerator9::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator9& other); + + const ParamGenerator g1_; + const ParamGenerator g2_; + const ParamGenerator g3_; + const ParamGenerator g4_; + const ParamGenerator g5_; + const ParamGenerator g6_; + const ParamGenerator g7_; + const ParamGenerator g8_; + const ParamGenerator g9_; +}; // class CartesianProductGenerator9 + + +template +class CartesianProductGenerator10 + : public ParamGeneratorInterface< ::std::tr1::tuple > { + public: + typedef ::std::tr1::tuple ParamType; + + CartesianProductGenerator10(const ParamGenerator& g1, + const ParamGenerator& g2, const ParamGenerator& g3, + const ParamGenerator& g4, const ParamGenerator& g5, + const ParamGenerator& g6, const ParamGenerator& g7, + const ParamGenerator& g8, const ParamGenerator& g9, + const ParamGenerator& g10) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), + g9_(g9), g10_(g10) {} + virtual ~CartesianProductGenerator10() {} + + virtual ParamIteratorInterface* Begin() const { + return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, + g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, + g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin()); + } + virtual ParamIteratorInterface* End() const { + return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), + g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, + g8_.end(), g9_, g9_.end(), g10_, g10_.end()); + } + + private: + class Iterator : public ParamIteratorInterface { + public: + Iterator(const ParamGeneratorInterface* base, + const ParamGenerator& g1, + const typename ParamGenerator::iterator& current1, + const ParamGenerator& g2, + const typename ParamGenerator::iterator& current2, + const ParamGenerator& g3, + const typename ParamGenerator::iterator& current3, + const ParamGenerator& g4, + const typename ParamGenerator::iterator& current4, + const ParamGenerator& g5, + const typename ParamGenerator::iterator& current5, + const ParamGenerator& g6, + const typename ParamGenerator::iterator& current6, + const ParamGenerator& g7, + const typename ParamGenerator::iterator& current7, + const ParamGenerator& g8, + const typename ParamGenerator::iterator& current8, + const ParamGenerator& g9, + const typename ParamGenerator::iterator& current9, + const ParamGenerator& g10, + const typename ParamGenerator::iterator& current10) + : base_(base), + begin1_(g1.begin()), end1_(g1.end()), current1_(current1), + begin2_(g2.begin()), end2_(g2.end()), current2_(current2), + begin3_(g3.begin()), end3_(g3.end()), current3_(current3), + begin4_(g4.begin()), end4_(g4.end()), current4_(current4), + begin5_(g5.begin()), end5_(g5.end()), current5_(current5), + begin6_(g6.begin()), end6_(g6.end()), current6_(current6), + begin7_(g7.begin()), end7_(g7.end()), current7_(current7), + begin8_(g8.begin()), end8_(g8.end()), current8_(current8), + begin9_(g9.begin()), end9_(g9.end()), current9_(current9), + begin10_(g10.begin()), end10_(g10.end()), current10_(current10) { + ComputeCurrentValue(); + } + virtual ~Iterator() {} + + virtual const ParamGeneratorInterface* BaseGenerator() const { + return base_; + } + // Advance should not be called on beyond-of-range iterators + // so no component iterators must be beyond end of range, either. + virtual void Advance() { + assert(!AtEnd()); + ++current10_; + if (current10_ == end10_) { + current10_ = begin10_; + ++current9_; + } + if (current9_ == end9_) { + current9_ = begin9_; + ++current8_; + } + if (current8_ == end8_) { + current8_ = begin8_; + ++current7_; + } + if (current7_ == end7_) { + current7_ = begin7_; + ++current6_; + } + if (current6_ == end6_) { + current6_ = begin6_; + ++current5_; + } + if (current5_ == end5_) { + current5_ = begin5_; + ++current4_; + } + if (current4_ == end4_) { + current4_ = begin4_; + ++current3_; + } + if (current3_ == end3_) { + current3_ = begin3_; + ++current2_; + } + if (current2_ == end2_) { + current2_ = begin2_; + ++current1_; + } + ComputeCurrentValue(); + } + virtual ParamIteratorInterface* Clone() const { + return new Iterator(*this); + } + virtual const ParamType* Current() const { return ¤t_value_; } + virtual bool Equals(const ParamIteratorInterface& other) const { + // Having the same base generator guarantees that the other + // iterator is of the same type and we can downcast. + GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) + << "The program attempted to compare iterators " + << "from different generators." << std::endl; + const Iterator* typed_other = + CheckedDowncastToActualType(&other); + // We must report iterators equal if they both point beyond their + // respective ranges. That can happen in a variety of fashions, + // so we have to consult AtEnd(). + return (AtEnd() && typed_other->AtEnd()) || + ( + current1_ == typed_other->current1_ && + current2_ == typed_other->current2_ && + current3_ == typed_other->current3_ && + current4_ == typed_other->current4_ && + current5_ == typed_other->current5_ && + current6_ == typed_other->current6_ && + current7_ == typed_other->current7_ && + current8_ == typed_other->current8_ && + current9_ == typed_other->current9_ && + current10_ == typed_other->current10_); + } + + private: + Iterator(const Iterator& other) + : base_(other.base_), + begin1_(other.begin1_), + end1_(other.end1_), + current1_(other.current1_), + begin2_(other.begin2_), + end2_(other.end2_), + current2_(other.current2_), + begin3_(other.begin3_), + end3_(other.end3_), + current3_(other.current3_), + begin4_(other.begin4_), + end4_(other.end4_), + current4_(other.current4_), + begin5_(other.begin5_), + end5_(other.end5_), + current5_(other.current5_), + begin6_(other.begin6_), + end6_(other.end6_), + current6_(other.current6_), + begin7_(other.begin7_), + end7_(other.end7_), + current7_(other.current7_), + begin8_(other.begin8_), + end8_(other.end8_), + current8_(other.current8_), + begin9_(other.begin9_), + end9_(other.end9_), + current9_(other.current9_), + begin10_(other.begin10_), + end10_(other.end10_), + current10_(other.current10_) { + ComputeCurrentValue(); + } + + void ComputeCurrentValue() { + if (!AtEnd()) + current_value_ = ParamType(*current1_, *current2_, *current3_, + *current4_, *current5_, *current6_, *current7_, *current8_, + *current9_, *current10_); + } + bool AtEnd() const { + // We must report iterator past the end of the range when either of the + // component iterators has reached the end of its range. + return + current1_ == end1_ || + current2_ == end2_ || + current3_ == end3_ || + current4_ == end4_ || + current5_ == end5_ || + current6_ == end6_ || + current7_ == end7_ || + current8_ == end8_ || + current9_ == end9_ || + current10_ == end10_; + } + + // No implementation - assignment is unsupported. + void operator=(const Iterator& other); + + const ParamGeneratorInterface* const base_; + // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. + // current[i]_ is the actual traversing iterator. + const typename ParamGenerator::iterator begin1_; + const typename ParamGenerator::iterator end1_; + typename ParamGenerator::iterator current1_; + const typename ParamGenerator::iterator begin2_; + const typename ParamGenerator::iterator end2_; + typename ParamGenerator::iterator current2_; + const typename ParamGenerator::iterator begin3_; + const typename ParamGenerator::iterator end3_; + typename ParamGenerator::iterator current3_; + const typename ParamGenerator::iterator begin4_; + const typename ParamGenerator::iterator end4_; + typename ParamGenerator::iterator current4_; + const typename ParamGenerator::iterator begin5_; + const typename ParamGenerator::iterator end5_; + typename ParamGenerator::iterator current5_; + const typename ParamGenerator::iterator begin6_; + const typename ParamGenerator::iterator end6_; + typename ParamGenerator::iterator current6_; + const typename ParamGenerator::iterator begin7_; + const typename ParamGenerator::iterator end7_; + typename ParamGenerator::iterator current7_; + const typename ParamGenerator::iterator begin8_; + const typename ParamGenerator::iterator end8_; + typename ParamGenerator::iterator current8_; + const typename ParamGenerator::iterator begin9_; + const typename ParamGenerator::iterator end9_; + typename ParamGenerator::iterator current9_; + const typename ParamGenerator::iterator begin10_; + const typename ParamGenerator::iterator end10_; + typename ParamGenerator::iterator current10_; + ParamType current_value_; + }; // class CartesianProductGenerator10::Iterator + + // No implementation - assignment is unsupported. + void operator=(const CartesianProductGenerator10& other); + + const ParamGenerator g1_; + const ParamGenerator g2_; + const ParamGenerator g3_; + const ParamGenerator g4_; + const ParamGenerator g5_; + const ParamGenerator g6_; + const ParamGenerator g7_; + const ParamGenerator g8_; + const ParamGenerator g9_; + const ParamGenerator g10_; +}; // class CartesianProductGenerator10 + + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Helper classes providing Combine() with polymorphic features. They allow +// casting CartesianProductGeneratorN to ParamGenerator if T is +// convertible to U. +// +template +class CartesianProductHolder2 { + public: +CartesianProductHolder2(const Generator1& g1, const Generator2& g2) + : g1_(g1), g2_(g2) {} + template + operator ParamGenerator< ::std::tr1::tuple >() const { + return ParamGenerator< ::std::tr1::tuple >( + new CartesianProductGenerator2( + static_cast >(g1_), + static_cast >(g2_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder2& other); + + const Generator1 g1_; + const Generator2 g2_; +}; // class CartesianProductHolder2 + +template +class CartesianProductHolder3 { + public: +CartesianProductHolder3(const Generator1& g1, const Generator2& g2, + const Generator3& g3) + : g1_(g1), g2_(g2), g3_(g3) {} + template + operator ParamGenerator< ::std::tr1::tuple >() const { + return ParamGenerator< ::std::tr1::tuple >( + new CartesianProductGenerator3( + static_cast >(g1_), + static_cast >(g2_), + static_cast >(g3_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder3& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; +}; // class CartesianProductHolder3 + +template +class CartesianProductHolder4 { + public: +CartesianProductHolder4(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {} + template + operator ParamGenerator< ::std::tr1::tuple >() const { + return ParamGenerator< ::std::tr1::tuple >( + new CartesianProductGenerator4( + static_cast >(g1_), + static_cast >(g2_), + static_cast >(g3_), + static_cast >(g4_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder4& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; +}; // class CartesianProductHolder4 + +template +class CartesianProductHolder5 { + public: +CartesianProductHolder5(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {} + template + operator ParamGenerator< ::std::tr1::tuple >() const { + return ParamGenerator< ::std::tr1::tuple >( + new CartesianProductGenerator5( + static_cast >(g1_), + static_cast >(g2_), + static_cast >(g3_), + static_cast >(g4_), + static_cast >(g5_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder5& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; +}; // class CartesianProductHolder5 + +template +class CartesianProductHolder6 { + public: +CartesianProductHolder6(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {} + template + operator ParamGenerator< ::std::tr1::tuple >() const { + return ParamGenerator< ::std::tr1::tuple >( + new CartesianProductGenerator6( + static_cast >(g1_), + static_cast >(g2_), + static_cast >(g3_), + static_cast >(g4_), + static_cast >(g5_), + static_cast >(g6_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder6& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; +}; // class CartesianProductHolder6 + +template +class CartesianProductHolder7 { + public: +CartesianProductHolder7(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6, const Generator7& g7) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {} + template + operator ParamGenerator< ::std::tr1::tuple >() const { + return ParamGenerator< ::std::tr1::tuple >( + new CartesianProductGenerator7( + static_cast >(g1_), + static_cast >(g2_), + static_cast >(g3_), + static_cast >(g4_), + static_cast >(g5_), + static_cast >(g6_), + static_cast >(g7_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder7& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; + const Generator7 g7_; +}; // class CartesianProductHolder7 + +template +class CartesianProductHolder8 { + public: +CartesianProductHolder8(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6, const Generator7& g7, const Generator8& g8) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), + g8_(g8) {} + template + operator ParamGenerator< ::std::tr1::tuple >() const { + return ParamGenerator< ::std::tr1::tuple >( + new CartesianProductGenerator8( + static_cast >(g1_), + static_cast >(g2_), + static_cast >(g3_), + static_cast >(g4_), + static_cast >(g5_), + static_cast >(g6_), + static_cast >(g7_), + static_cast >(g8_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder8& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; + const Generator7 g7_; + const Generator8 g8_; +}; // class CartesianProductHolder8 + +template +class CartesianProductHolder9 { + public: +CartesianProductHolder9(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6, const Generator7& g7, const Generator8& g8, + const Generator9& g9) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), + g9_(g9) {} + template + operator ParamGenerator< ::std::tr1::tuple >() const { + return ParamGenerator< ::std::tr1::tuple >( + new CartesianProductGenerator9( + static_cast >(g1_), + static_cast >(g2_), + static_cast >(g3_), + static_cast >(g4_), + static_cast >(g5_), + static_cast >(g6_), + static_cast >(g7_), + static_cast >(g8_), + static_cast >(g9_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder9& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; + const Generator7 g7_; + const Generator8 g8_; + const Generator9 g9_; +}; // class CartesianProductHolder9 + +template +class CartesianProductHolder10 { + public: +CartesianProductHolder10(const Generator1& g1, const Generator2& g2, + const Generator3& g3, const Generator4& g4, const Generator5& g5, + const Generator6& g6, const Generator7& g7, const Generator8& g8, + const Generator9& g9, const Generator10& g10) + : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), + g9_(g9), g10_(g10) {} + template + operator ParamGenerator< ::std::tr1::tuple >() const { + return ParamGenerator< ::std::tr1::tuple >( + new CartesianProductGenerator10( + static_cast >(g1_), + static_cast >(g2_), + static_cast >(g3_), + static_cast >(g4_), + static_cast >(g5_), + static_cast >(g6_), + static_cast >(g7_), + static_cast >(g8_), + static_cast >(g9_), + static_cast >(g10_))); + } + + private: + // No implementation - assignment is unsupported. + void operator=(const CartesianProductHolder10& other); + + const Generator1 g1_; + const Generator2 g2_; + const Generator3 g3_; + const Generator4 g4_; + const Generator5 g5_; + const Generator6 g6_; + const Generator7 g7_; + const Generator8 g8_; + const Generator9 g9_; + const Generator10 g10_; +}; // class CartesianProductHolder10 + +# endif // GTEST_HAS_COMBINE + +} // namespace internal +} // namespace testing + +#endif // GTEST_HAS_PARAM_TEST + +#endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ + +#if GTEST_HAS_PARAM_TEST + +namespace testing { + +// Functions producing parameter generators. +// +// Google Test uses these generators to produce parameters for value- +// parameterized tests. When a parameterized test case is instantiated +// with a particular generator, Google Test creates and runs tests +// for each element in the sequence produced by the generator. +// +// In the following sample, tests from test case FooTest are instantiated +// each three times with parameter values 3, 5, and 8: +// +// class FooTest : public TestWithParam { ... }; +// +// TEST_P(FooTest, TestThis) { +// } +// TEST_P(FooTest, TestThat) { +// } +// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8)); +// + +// Range() returns generators providing sequences of values in a range. +// +// Synopsis: +// Range(start, end) +// - returns a generator producing a sequence of values {start, start+1, +// start+2, ..., }. +// Range(start, end, step) +// - returns a generator producing a sequence of values {start, start+step, +// start+step+step, ..., }. +// Notes: +// * The generated sequences never include end. For example, Range(1, 5) +// returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2) +// returns a generator producing {1, 3, 5, 7}. +// * start and end must have the same type. That type may be any integral or +// floating-point type or a user defined type satisfying these conditions: +// * It must be assignable (have operator=() defined). +// * It must have operator+() (operator+(int-compatible type) for +// two-operand version). +// * It must have operator<() defined. +// Elements in the resulting sequences will also have that type. +// * Condition start < end must be satisfied in order for resulting sequences +// to contain any elements. +// +template +internal::ParamGenerator Range(T start, T end, IncrementT step) { + return internal::ParamGenerator( + new internal::RangeGenerator(start, end, step)); +} + +template +internal::ParamGenerator Range(T start, T end) { + return Range(start, end, 1); +} + +// ValuesIn() function allows generation of tests with parameters coming from +// a container. +// +// Synopsis: +// ValuesIn(const T (&array)[N]) +// - returns a generator producing sequences with elements from +// a C-style array. +// ValuesIn(const Container& container) +// - returns a generator producing sequences with elements from +// an STL-style container. +// ValuesIn(Iterator begin, Iterator end) +// - returns a generator producing sequences with elements from +// a range [begin, end) defined by a pair of STL-style iterators. These +// iterators can also be plain C pointers. +// +// Please note that ValuesIn copies the values from the containers +// passed in and keeps them to generate tests in RUN_ALL_TESTS(). +// +// Examples: +// +// This instantiates tests from test case StringTest +// each with C-string values of "foo", "bar", and "baz": +// +// const char* strings[] = {"foo", "bar", "baz"}; +// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings)); +// +// This instantiates tests from test case StlStringTest +// each with STL strings with values "a" and "b": +// +// ::std::vector< ::std::string> GetParameterStrings() { +// ::std::vector< ::std::string> v; +// v.push_back("a"); +// v.push_back("b"); +// return v; +// } +// +// INSTANTIATE_TEST_CASE_P(CharSequence, +// StlStringTest, +// ValuesIn(GetParameterStrings())); +// +// +// This will also instantiate tests from CharTest +// each with parameter values 'a' and 'b': +// +// ::std::list GetParameterChars() { +// ::std::list list; +// list.push_back('a'); +// list.push_back('b'); +// return list; +// } +// ::std::list l = GetParameterChars(); +// INSTANTIATE_TEST_CASE_P(CharSequence2, +// CharTest, +// ValuesIn(l.begin(), l.end())); +// +template +internal::ParamGenerator< + typename ::testing::internal::IteratorTraits::value_type> +ValuesIn(ForwardIterator begin, ForwardIterator end) { + typedef typename ::testing::internal::IteratorTraits + ::value_type ParamType; + return internal::ParamGenerator( + new internal::ValuesInIteratorRangeGenerator(begin, end)); +} + +template +internal::ParamGenerator ValuesIn(const T (&array)[N]) { + return ValuesIn(array, array + N); +} + +template +internal::ParamGenerator ValuesIn( + const Container& container) { + return ValuesIn(container.begin(), container.end()); +} + +// Values() allows generating tests from explicitly specified list of +// parameters. +// +// Synopsis: +// Values(T v1, T v2, ..., T vN) +// - returns a generator producing sequences with elements v1, v2, ..., vN. +// +// For example, this instantiates tests from test case BarTest each +// with values "one", "two", and "three": +// +// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three")); +// +// This instantiates tests from test case BazTest each with values 1, 2, 3.5. +// The exact type of values will depend on the type of parameter in BazTest. +// +// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5)); +// +// Currently, Values() supports from 1 to 50 parameters. +// +template +internal::ValueArray1 Values(T1 v1) { + return internal::ValueArray1(v1); +} + +template +internal::ValueArray2 Values(T1 v1, T2 v2) { + return internal::ValueArray2(v1, v2); +} + +template +internal::ValueArray3 Values(T1 v1, T2 v2, T3 v3) { + return internal::ValueArray3(v1, v2, v3); +} + +template +internal::ValueArray4 Values(T1 v1, T2 v2, T3 v3, T4 v4) { + return internal::ValueArray4(v1, v2, v3, v4); +} + +template +internal::ValueArray5 Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5) { + return internal::ValueArray5(v1, v2, v3, v4, v5); +} + +template +internal::ValueArray6 Values(T1 v1, T2 v2, T3 v3, + T4 v4, T5 v5, T6 v6) { + return internal::ValueArray6(v1, v2, v3, v4, v5, v6); +} + +template +internal::ValueArray7 Values(T1 v1, T2 v2, T3 v3, + T4 v4, T5 v5, T6 v6, T7 v7) { + return internal::ValueArray7(v1, v2, v3, v4, v5, + v6, v7); +} + +template +internal::ValueArray8 Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) { + return internal::ValueArray8(v1, v2, v3, v4, + v5, v6, v7, v8); +} + +template +internal::ValueArray9 Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) { + return internal::ValueArray9(v1, v2, v3, + v4, v5, v6, v7, v8, v9); +} + +template +internal::ValueArray10 Values(T1 v1, + T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) { + return internal::ValueArray10(v1, + v2, v3, v4, v5, v6, v7, v8, v9, v10); +} + +template +internal::ValueArray11 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11) { + return internal::ValueArray11(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11); +} + +template +internal::ValueArray12 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12) { + return internal::ValueArray12(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12); +} + +template +internal::ValueArray13 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13) { + return internal::ValueArray13(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13); +} + +template +internal::ValueArray14 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) { + return internal::ValueArray14(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14); +} + +template +internal::ValueArray15 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, + T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) { + return internal::ValueArray15(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); +} + +template +internal::ValueArray16 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16) { + return internal::ValueArray16(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16); +} + +template +internal::ValueArray17 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17) { + return internal::ValueArray17(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, + v11, v12, v13, v14, v15, v16, v17); +} + +template +internal::ValueArray18 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, + T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18) { + return internal::ValueArray18(v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15, v16, v17, v18); +} + +template +internal::ValueArray19 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, + T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, + T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) { + return internal::ValueArray19(v1, v2, v3, v4, v5, v6, v7, v8, + v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19); +} + +template +internal::ValueArray20 Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) { + return internal::ValueArray20(v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20); +} + +template +internal::ValueArray21 Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) { + return internal::ValueArray21(v1, v2, v3, v4, v5, v6, + v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21); +} + +template +internal::ValueArray22 Values(T1 v1, T2 v2, T3 v3, + T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22) { + return internal::ValueArray22(v1, v2, v3, v4, + v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22); +} + +template +internal::ValueArray23 Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23) { + return internal::ValueArray23(v1, v2, v3, + v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22, v23); +} + +template +internal::ValueArray24 Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23, T24 v24) { + return internal::ValueArray24(v1, v2, + v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, + v19, v20, v21, v22, v23, v24); +} + +template +internal::ValueArray25 Values(T1 v1, + T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, + T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, + T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) { + return internal::ValueArray25(v1, + v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, + v18, v19, v20, v21, v22, v23, v24, v25); +} + +template +internal::ValueArray26 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26) { + return internal::ValueArray26(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26); +} + +template +internal::ValueArray27 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27) { + return internal::ValueArray27(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, + v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27); +} + +template +internal::ValueArray28 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28) { + return internal::ValueArray28(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, + v28); +} + +template +internal::ValueArray29 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29) { + return internal::ValueArray29(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, + v27, v28, v29); +} + +template +internal::ValueArray30 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, + T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, + T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, + T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) { + return internal::ValueArray30(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, + v26, v27, v28, v29, v30); +} + +template +internal::ValueArray31 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) { + return internal::ValueArray31(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, + v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, + v25, v26, v27, v28, v29, v30, v31); +} + +template +internal::ValueArray32 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32) { + return internal::ValueArray32(v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, v32); +} + +template +internal::ValueArray33 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, + T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32, T33 v33) { + return internal::ValueArray33(v1, v2, v3, v4, v5, v6, v7, v8, + v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, v32, v33); +} + +template +internal::ValueArray34 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, + T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, + T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, + T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, + T31 v31, T32 v32, T33 v33, T34 v34) { + return internal::ValueArray34(v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, + v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34); +} + +template +internal::ValueArray35 Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, + T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, + T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) { + return internal::ValueArray35(v1, v2, v3, v4, v5, v6, + v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, + v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35); +} + +template +internal::ValueArray36 Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, + T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, + T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) { + return internal::ValueArray36(v1, v2, v3, v4, + v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, + v34, v35, v36); +} + +template +internal::ValueArray37 Values(T1 v1, T2 v2, T3 v3, + T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, + T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, + T37 v37) { + return internal::ValueArray37(v1, v2, v3, + v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, + v34, v35, v36, v37); +} + +template +internal::ValueArray38 Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, + T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, + T37 v37, T38 v38) { + return internal::ValueArray38(v1, v2, + v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, + v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, + v33, v34, v35, v36, v37, v38); +} + +template +internal::ValueArray39 Values(T1 v1, T2 v2, + T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, + T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, + T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, + T37 v37, T38 v38, T39 v39) { + return internal::ValueArray39(v1, + v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, + v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + v32, v33, v34, v35, v36, v37, v38, v39); +} + +template +internal::ValueArray40 Values(T1 v1, + T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, + T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, + T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, + T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, + T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) { + return internal::ValueArray40(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, + v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40); +} + +template +internal::ValueArray41 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) { + return internal::ValueArray41(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, + v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, + v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41); +} + +template +internal::ValueArray42 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42) { + return internal::ValueArray42(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, + v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, + v42); +} + +template +internal::ValueArray43 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43) { + return internal::ValueArray43(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, + v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, + v41, v42, v43); +} + +template +internal::ValueArray44 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, + T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, + T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, + T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, + T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, + T42 v42, T43 v43, T44 v44) { + return internal::ValueArray44(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, + v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, + v40, v41, v42, v43, v44); +} + +template +internal::ValueArray45 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, + T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, + T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, + T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, + T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, + T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) { + return internal::ValueArray45(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, + v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, + v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, + v39, v40, v41, v42, v43, v44, v45); +} + +template +internal::ValueArray46 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, + T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) { + return internal::ValueArray46(v1, v2, v3, v4, v5, v6, v7, v8, v9, + v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, + v38, v39, v40, v41, v42, v43, v44, v45, v46); +} + +template +internal::ValueArray47 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, + T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, + T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) { + return internal::ValueArray47(v1, v2, v3, v4, v5, v6, v7, v8, + v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, + v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, + v38, v39, v40, v41, v42, v43, v44, v45, v46, v47); +} + +template +internal::ValueArray48 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, + T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, + T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, + T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, + T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, + T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, + T48 v48) { + return internal::ValueArray48(v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, + v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, + v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48); +} + +template +internal::ValueArray49 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, + T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, + T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, + T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, + T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, + T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, + T47 v47, T48 v48, T49 v49) { + return internal::ValueArray49(v1, v2, v3, v4, v5, v6, + v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, + v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, + v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49); +} + +template +internal::ValueArray50 Values(T1 v1, T2 v2, T3 v3, T4 v4, + T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, + T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, + T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, + T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, + T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, + T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) { + return internal::ValueArray50(v1, v2, v3, v4, + v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, + v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, + v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, + v48, v49, v50); +} + +// Bool() allows generating tests with parameters in a set of (false, true). +// +// Synopsis: +// Bool() +// - returns a generator producing sequences with elements {false, true}. +// +// It is useful when testing code that depends on Boolean flags. Combinations +// of multiple flags can be tested when several Bool()'s are combined using +// Combine() function. +// +// In the following example all tests in the test case FlagDependentTest +// will be instantiated twice with parameters false and true. +// +// class FlagDependentTest : public testing::TestWithParam { +// virtual void SetUp() { +// external_flag = GetParam(); +// } +// } +// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool()); +// +inline internal::ParamGenerator Bool() { + return Values(false, true); +} + +# if GTEST_HAS_COMBINE +// Combine() allows the user to combine two or more sequences to produce +// values of a Cartesian product of those sequences' elements. +// +// Synopsis: +// Combine(gen1, gen2, ..., genN) +// - returns a generator producing sequences with elements coming from +// the Cartesian product of elements from the sequences generated by +// gen1, gen2, ..., genN. The sequence elements will have a type of +// tuple where T1, T2, ..., TN are the types +// of elements from sequences produces by gen1, gen2, ..., genN. +// +// Combine can have up to 10 arguments. This number is currently limited +// by the maximum number of elements in the tuple implementation used by Google +// Test. +// +// Example: +// +// This will instantiate tests in test case AnimalTest each one with +// the parameter values tuple("cat", BLACK), tuple("cat", WHITE), +// tuple("dog", BLACK), and tuple("dog", WHITE): +// +// enum Color { BLACK, GRAY, WHITE }; +// class AnimalTest +// : public testing::TestWithParam > {...}; +// +// TEST_P(AnimalTest, AnimalLooksNice) {...} +// +// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest, +// Combine(Values("cat", "dog"), +// Values(BLACK, WHITE))); +// +// This will instantiate tests in FlagDependentTest with all variations of two +// Boolean flags: +// +// class FlagDependentTest +// : public testing::TestWithParam > { +// virtual void SetUp() { +// // Assigns external_flag_1 and external_flag_2 values from the tuple. +// tie(external_flag_1, external_flag_2) = GetParam(); +// } +// }; +// +// TEST_P(FlagDependentTest, TestFeature1) { +// // Test your code using external_flag_1 and external_flag_2 here. +// } +// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest, +// Combine(Bool(), Bool())); +// +template +internal::CartesianProductHolder2 Combine( + const Generator1& g1, const Generator2& g2) { + return internal::CartesianProductHolder2( + g1, g2); +} + +template +internal::CartesianProductHolder3 Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3) { + return internal::CartesianProductHolder3( + g1, g2, g3); +} + +template +internal::CartesianProductHolder4 Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4) { + return internal::CartesianProductHolder4( + g1, g2, g3, g4); +} + +template +internal::CartesianProductHolder5 Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5) { + return internal::CartesianProductHolder5( + g1, g2, g3, g4, g5); +} + +template +internal::CartesianProductHolder6 Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6) { + return internal::CartesianProductHolder6( + g1, g2, g3, g4, g5, g6); +} + +template +internal::CartesianProductHolder7 Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6, + const Generator7& g7) { + return internal::CartesianProductHolder7( + g1, g2, g3, g4, g5, g6, g7); +} + +template +internal::CartesianProductHolder8 Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6, + const Generator7& g7, const Generator8& g8) { + return internal::CartesianProductHolder8( + g1, g2, g3, g4, g5, g6, g7, g8); +} + +template +internal::CartesianProductHolder9 Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6, + const Generator7& g7, const Generator8& g8, const Generator9& g9) { + return internal::CartesianProductHolder9( + g1, g2, g3, g4, g5, g6, g7, g8, g9); +} + +template +internal::CartesianProductHolder10 Combine( + const Generator1& g1, const Generator2& g2, const Generator3& g3, + const Generator4& g4, const Generator5& g5, const Generator6& g6, + const Generator7& g7, const Generator8& g8, const Generator9& g9, + const Generator10& g10) { + return internal::CartesianProductHolder10( + g1, g2, g3, g4, g5, g6, g7, g8, g9, g10); +} +# endif // GTEST_HAS_COMBINE + + + +# define TEST_P(test_case_name, test_name) \ + class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ + : public test_case_name { \ + public: \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \ + virtual void TestBody(); \ + private: \ + static int AddToRegistry() { \ + ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ + GetTestCasePatternHolder(\ + #test_case_name, __FILE__, __LINE__)->AddTestPattern(\ + #test_case_name, \ + #test_name, \ + new ::testing::internal::TestMetaFactory< \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \ + return 0; \ + } \ + static int gtest_registering_dummy_; \ + GTEST_DISALLOW_COPY_AND_ASSIGN_(\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \ + }; \ + int GTEST_TEST_CLASS_NAME_(test_case_name, \ + test_name)::gtest_registering_dummy_ = \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \ + void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() + +# define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \ + ::testing::internal::ParamGenerator \ + gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \ + int gtest_##prefix##test_case_name##_dummy_ = \ + ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ + GetTestCasePatternHolder(\ + #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\ + #prefix, \ + >est_##prefix##test_case_name##_EvalGenerator_, \ + __FILE__, __LINE__) + +} // namespace testing + +#endif // GTEST_HAS_PARAM_TEST + +#endif // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) +// +// Google C++ Testing Framework definitions useful in production code. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_ + +// When you need to test the private or protected members of a class, +// use the FRIEND_TEST macro to declare your tests as friends of the +// class. For example: +// +// class MyClass { +// private: +// void MyMethod(); +// FRIEND_TEST(MyClassTest, MyMethod); +// }; +// +// class MyClassTest : public testing::Test { +// // ... +// }; +// +// TEST_F(MyClassTest, MyMethod) { +// // Can call MyClass::MyMethod() here. +// } + +#define FRIEND_TEST(test_case_name, test_name)\ +friend class test_case_name##_##test_name##_Test + +#endif // GTEST_INCLUDE_GTEST_GTEST_PROD_H_ +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: mheule@google.com (Markus Heule) +// + +#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ +#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ + +#include +#include + +namespace testing { + +// A copyable object representing the result of a test part (i.e. an +// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()). +// +// Don't inherit from TestPartResult as its destructor is not virtual. +class GTEST_API_ TestPartResult { + public: + // The possible outcomes of a test part (i.e. an assertion or an + // explicit SUCCEED(), FAIL(), or ADD_FAILURE()). + enum Type { + kSuccess, // Succeeded. + kNonFatalFailure, // Failed but the test can continue. + kFatalFailure // Failed and the test should be terminated. + }; + + // C'tor. TestPartResult does NOT have a default constructor. + // Always use this constructor (with parameters) to create a + // TestPartResult object. + TestPartResult(Type a_type, + const char* a_file_name, + int a_line_number, + const char* a_message) + : type_(a_type), + file_name_(a_file_name == NULL ? "" : a_file_name), + line_number_(a_line_number), + summary_(ExtractSummary(a_message)), + message_(a_message) { + } + + // Gets the outcome of the test part. + Type type() const { return type_; } + + // Gets the name of the source file where the test part took place, or + // NULL if it's unknown. + const char* file_name() const { + return file_name_.empty() ? NULL : file_name_.c_str(); + } + + // Gets the line in the source file where the test part took place, + // or -1 if it's unknown. + int line_number() const { return line_number_; } + + // Gets the summary of the failure message. + const char* summary() const { return summary_.c_str(); } + + // Gets the message associated with the test part. + const char* message() const { return message_.c_str(); } + + // Returns true iff the test part passed. + bool passed() const { return type_ == kSuccess; } + + // Returns true iff the test part failed. + bool failed() const { return type_ != kSuccess; } + + // Returns true iff the test part non-fatally failed. + bool nonfatally_failed() const { return type_ == kNonFatalFailure; } + + // Returns true iff the test part fatally failed. + bool fatally_failed() const { return type_ == kFatalFailure; } + + private: + Type type_; + + // Gets the summary of the failure message by omitting the stack + // trace in it. + static std::string ExtractSummary(const char* message); + + // The name of the source file where the test part took place, or + // "" if the source file is unknown. + std::string file_name_; + // The line in the source file where the test part took place, or -1 + // if the line number is unknown. + int line_number_; + std::string summary_; // The test failure summary. + std::string message_; // The test failure message. +}; + +// Prints a TestPartResult object. +std::ostream& operator<<(std::ostream& os, const TestPartResult& result); + +// An array of TestPartResult objects. +// +// Don't inherit from TestPartResultArray as its destructor is not +// virtual. +class GTEST_API_ TestPartResultArray { + public: + TestPartResultArray() {} + + // Appends the given TestPartResult to the array. + void Append(const TestPartResult& result); + + // Returns the TestPartResult at the given index (0-based). + const TestPartResult& GetTestPartResult(int index) const; + + // Returns the number of TestPartResult objects in the array. + int size() const; + + private: + std::vector array_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray); +}; + +// This interface knows how to report a test part result. +class TestPartResultReporterInterface { + public: + virtual ~TestPartResultReporterInterface() {} + + virtual void ReportTestPartResult(const TestPartResult& result) = 0; +}; + +namespace internal { + +// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a +// statement generates new fatal failures. To do so it registers itself as the +// current test part result reporter. Besides checking if fatal failures were +// reported, it only delegates the reporting to the former result reporter. +// The original result reporter is restored in the destructor. +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +class GTEST_API_ HasNewFatalFailureHelper + : public TestPartResultReporterInterface { + public: + HasNewFatalFailureHelper(); + virtual ~HasNewFatalFailureHelper(); + virtual void ReportTestPartResult(const TestPartResult& result); + bool has_new_fatal_failure() const { return has_new_fatal_failure_; } + private: + bool has_new_fatal_failure_; + TestPartResultReporterInterface* original_reporter_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper); +}; + +} // namespace internal + +} // namespace testing + +#endif // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ +// Copyright 2008 Google Inc. +// All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: wan@google.com (Zhanyong Wan) + +#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ +#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ + +// This header implements typed tests and type-parameterized tests. + +// Typed (aka type-driven) tests repeat the same test for types in a +// list. You must know which types you want to test with when writing +// typed tests. Here's how you do it: + +#if 0 + +// First, define a fixture class template. It should be parameterized +// by a type. Remember to derive it from testing::Test. +template +class FooTest : public testing::Test { + public: + ... + typedef std::list List; + static T shared_; + T value_; +}; + +// Next, associate a list of types with the test case, which will be +// repeated for each type in the list. The typedef is necessary for +// the macro to parse correctly. +typedef testing::Types MyTypes; +TYPED_TEST_CASE(FooTest, MyTypes); + +// If the type list contains only one type, you can write that type +// directly without Types<...>: +// TYPED_TEST_CASE(FooTest, int); + +// Then, use TYPED_TEST() instead of TEST_F() to define as many typed +// tests for this test case as you want. +TYPED_TEST(FooTest, DoesBlah) { + // Inside a test, refer to TypeParam to get the type parameter. + // Since we are inside a derived class template, C++ requires use to + // visit the members of FooTest via 'this'. + TypeParam n = this->value_; + + // To visit static members of the fixture, add the TestFixture:: + // prefix. + n += TestFixture::shared_; + + // To refer to typedefs in the fixture, add the "typename + // TestFixture::" prefix. + typename TestFixture::List values; + values.push_back(n); + ... +} + +TYPED_TEST(FooTest, HasPropertyA) { ... } + +#endif // 0 + +// Type-parameterized tests are abstract test patterns parameterized +// by a type. Compared with typed tests, type-parameterized tests +// allow you to define the test pattern without knowing what the type +// parameters are. The defined pattern can be instantiated with +// different types any number of times, in any number of translation +// units. +// +// If you are designing an interface or concept, you can define a +// suite of type-parameterized tests to verify properties that any +// valid implementation of the interface/concept should have. Then, +// each implementation can easily instantiate the test suite to verify +// that it conforms to the requirements, without having to write +// similar tests repeatedly. Here's an example: + +#if 0 + +// First, define a fixture class template. It should be parameterized +// by a type. Remember to derive it from testing::Test. +template +class FooTest : public testing::Test { + ... +}; + +// Next, declare that you will define a type-parameterized test case +// (the _P suffix is for "parameterized" or "pattern", whichever you +// prefer): +TYPED_TEST_CASE_P(FooTest); + +// Then, use TYPED_TEST_P() to define as many type-parameterized tests +// for this type-parameterized test case as you want. +TYPED_TEST_P(FooTest, DoesBlah) { + // Inside a test, refer to TypeParam to get the type parameter. + TypeParam n = 0; + ... +} + +TYPED_TEST_P(FooTest, HasPropertyA) { ... } + +// Now the tricky part: you need to register all test patterns before +// you can instantiate them. The first argument of the macro is the +// test case name; the rest are the names of the tests in this test +// case. +REGISTER_TYPED_TEST_CASE_P(FooTest, + DoesBlah, HasPropertyA); + +// Finally, you are free to instantiate the pattern with the types you +// want. If you put the above code in a header file, you can #include +// it in multiple C++ source files and instantiate it multiple times. +// +// To distinguish different instances of the pattern, the first +// argument to the INSTANTIATE_* macro is a prefix that will be added +// to the actual test case name. Remember to pick unique prefixes for +// different instances. +typedef testing::Types MyTypes; +INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes); + +// If the type list contains only one type, you can write that type +// directly without Types<...>: +// INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int); + +#endif // 0 + + +// Implements typed tests. + +#if GTEST_HAS_TYPED_TEST + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Expands to the name of the typedef for the type parameters of the +// given test case. +# define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_ + +// The 'Types' template argument below must have spaces around it +// since some compilers may choke on '>>' when passing a template +// instance (e.g. Types) +# define TYPED_TEST_CASE(CaseName, Types) \ + typedef ::testing::internal::TypeList< Types >::type \ + GTEST_TYPE_PARAMS_(CaseName) + +# define TYPED_TEST(CaseName, TestName) \ + template \ + class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \ + : public CaseName { \ + private: \ + typedef CaseName TestFixture; \ + typedef gtest_TypeParam_ TypeParam; \ + virtual void TestBody(); \ + }; \ + bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \ + ::testing::internal::TypeParameterizedTest< \ + CaseName, \ + ::testing::internal::TemplateSel< \ + GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \ + GTEST_TYPE_PARAMS_(CaseName)>::Register(\ + "", #CaseName, #TestName, 0); \ + template \ + void GTEST_TEST_CLASS_NAME_(CaseName, TestName)::TestBody() + +#endif // GTEST_HAS_TYPED_TEST + +// Implements type-parameterized tests. + +#if GTEST_HAS_TYPED_TEST_P + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Expands to the namespace name that the type-parameterized tests for +// the given type-parameterized test case are defined in. The exact +// name of the namespace is subject to change without notice. +# define GTEST_CASE_NAMESPACE_(TestCaseName) \ + gtest_case_##TestCaseName##_ + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// +// Expands to the name of the variable used to remember the names of +// the defined tests in the given test case. +# define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \ + gtest_typed_test_case_p_state_##TestCaseName##_ + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY. +// +// Expands to the name of the variable used to remember the names of +// the registered tests in the given test case. +# define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \ + gtest_registered_test_names_##TestCaseName##_ + +// The variables defined in the type-parameterized test macros are +// static as typically these macros are used in a .h file that can be +// #included in multiple translation units linked together. +# define TYPED_TEST_CASE_P(CaseName) \ + static ::testing::internal::TypedTestCasePState \ + GTEST_TYPED_TEST_CASE_P_STATE_(CaseName) + +# define TYPED_TEST_P(CaseName, TestName) \ + namespace GTEST_CASE_NAMESPACE_(CaseName) { \ + template \ + class TestName : public CaseName { \ + private: \ + typedef CaseName TestFixture; \ + typedef gtest_TypeParam_ TypeParam; \ + virtual void TestBody(); \ + }; \ + static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \ + GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\ + __FILE__, __LINE__, #CaseName, #TestName); \ + } \ + template \ + void GTEST_CASE_NAMESPACE_(CaseName)::TestName::TestBody() + +# define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \ + namespace GTEST_CASE_NAMESPACE_(CaseName) { \ + typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \ + } \ + static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \ + GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\ + __FILE__, __LINE__, #__VA_ARGS__) + +// The 'Types' template argument below must have spaces around it +// since some compilers may choke on '>>' when passing a template +// instance (e.g. Types) +# define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \ + bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \ + ::testing::internal::TypeParameterizedTestCase::type>::Register(\ + #Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName)) + +#endif // GTEST_HAS_TYPED_TEST_P + +#endif // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ + +// Depending on the platform, different string classes are available. +// On Linux, in addition to ::std::string, Google also makes use of +// class ::string, which has the same interface as ::std::string, but +// has a different implementation. +// +// The user can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that +// ::string is available AND is a distinct type to ::std::string, or +// define it to 0 to indicate otherwise. +// +// If the user's ::std::string and ::string are the same class due to +// aliasing, he should define GTEST_HAS_GLOBAL_STRING to 0. +// +// If the user doesn't define GTEST_HAS_GLOBAL_STRING, it is defined +// heuristically. + +namespace testing { + +// Declares the flags. + +// This flag temporary enables the disabled tests. +GTEST_DECLARE_bool_(also_run_disabled_tests); + +// This flag brings the debugger on an assertion failure. +GTEST_DECLARE_bool_(break_on_failure); + +// This flag controls whether Google Test catches all test-thrown exceptions +// and logs them as failures. +GTEST_DECLARE_bool_(catch_exceptions); + +// This flag enables using colors in terminal output. Available values are +// "yes" to enable colors, "no" (disable colors), or "auto" (the default) +// to let Google Test decide. +GTEST_DECLARE_string_(color); + +// This flag sets up the filter to select by name using a glob pattern +// the tests to run. If the filter is not given all tests are executed. +GTEST_DECLARE_string_(filter); + +// This flag causes the Google Test to list tests. None of the tests listed +// are actually run if the flag is provided. +GTEST_DECLARE_bool_(list_tests); + +// This flag controls whether Google Test emits a detailed XML report to a file +// in addition to its normal textual output. +GTEST_DECLARE_string_(output); + +// This flags control whether Google Test prints the elapsed time for each +// test. +GTEST_DECLARE_bool_(print_time); + +// This flag specifies the random number seed. +GTEST_DECLARE_int32_(random_seed); + +// This flag sets how many times the tests are repeated. The default value +// is 1. If the value is -1 the tests are repeating forever. +GTEST_DECLARE_int32_(repeat); + +// This flag controls whether Google Test includes Google Test internal +// stack frames in failure stack traces. +GTEST_DECLARE_bool_(show_internal_stack_frames); + +// When this flag is specified, tests' order is randomized on every iteration. +GTEST_DECLARE_bool_(shuffle); + +// This flag specifies the maximum number of stack frames to be +// printed in a failure message. +GTEST_DECLARE_int32_(stack_trace_depth); + +// When this flag is specified, a failed assertion will throw an +// exception if exceptions are enabled, or exit the program with a +// non-zero code otherwise. +GTEST_DECLARE_bool_(throw_on_failure); + +// When this flag is set with a "host:port" string, on supported +// platforms test results are streamed to the specified port on +// the specified host machine. +GTEST_DECLARE_string_(stream_result_to); + +// The upper limit for valid stack trace depths. +const int kMaxStackTraceDepth = 100; + +namespace internal { + +class AssertHelper; +class DefaultGlobalTestPartResultReporter; +class ExecDeathTest; +class NoExecDeathTest; +class FinalSuccessChecker; +class GTestFlagSaver; +class StreamingListenerTest; +class TestResultAccessor; +class TestEventListenersAccessor; +class TestEventRepeater; +class UnitTestRecordPropertyTestHelper; +class WindowsDeathTest; +class UnitTestImpl* GetUnitTestImpl(); +void ReportFailureInUnknownLocation(TestPartResult::Type result_type, + const std::string& message); + +} // namespace internal + +// The friend relationship of some of these classes is cyclic. +// If we don't forward declare them the compiler might confuse the classes +// in friendship clauses with same named classes on the scope. +class Test; +class TestCase; +class TestInfo; +class UnitTest; + +// A class for indicating whether an assertion was successful. When +// the assertion wasn't successful, the AssertionResult object +// remembers a non-empty message that describes how it failed. +// +// To create an instance of this class, use one of the factory functions +// (AssertionSuccess() and AssertionFailure()). +// +// This class is useful for two purposes: +// 1. Defining predicate functions to be used with Boolean test assertions +// EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts +// 2. Defining predicate-format functions to be +// used with predicate assertions (ASSERT_PRED_FORMAT*, etc). +// +// For example, if you define IsEven predicate: +// +// testing::AssertionResult IsEven(int n) { +// if ((n % 2) == 0) +// return testing::AssertionSuccess(); +// else +// return testing::AssertionFailure() << n << " is odd"; +// } +// +// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5))) +// will print the message +// +// Value of: IsEven(Fib(5)) +// Actual: false (5 is odd) +// Expected: true +// +// instead of a more opaque +// +// Value of: IsEven(Fib(5)) +// Actual: false +// Expected: true +// +// in case IsEven is a simple Boolean predicate. +// +// If you expect your predicate to be reused and want to support informative +// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up +// about half as often as positive ones in our tests), supply messages for +// both success and failure cases: +// +// testing::AssertionResult IsEven(int n) { +// if ((n % 2) == 0) +// return testing::AssertionSuccess() << n << " is even"; +// else +// return testing::AssertionFailure() << n << " is odd"; +// } +// +// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print +// +// Value of: IsEven(Fib(6)) +// Actual: true (8 is even) +// Expected: false +// +// NB: Predicates that support negative Boolean assertions have reduced +// performance in positive ones so be careful not to use them in tests +// that have lots (tens of thousands) of positive Boolean assertions. +// +// To use this class with EXPECT_PRED_FORMAT assertions such as: +// +// // Verifies that Foo() returns an even number. +// EXPECT_PRED_FORMAT1(IsEven, Foo()); +// +// you need to define: +// +// testing::AssertionResult IsEven(const char* expr, int n) { +// if ((n % 2) == 0) +// return testing::AssertionSuccess(); +// else +// return testing::AssertionFailure() +// << "Expected: " << expr << " is even\n Actual: it's " << n; +// } +// +// If Foo() returns 5, you will see the following message: +// +// Expected: Foo() is even +// Actual: it's 5 +// +class GTEST_API_ AssertionResult { + public: + // Copy constructor. + // Used in EXPECT_TRUE/FALSE(assertion_result). + AssertionResult(const AssertionResult& other); + // Used in the EXPECT_TRUE/FALSE(bool_expression). + explicit AssertionResult(bool success) : success_(success) {} + + // Returns true iff the assertion succeeded. + operator bool() const { return success_; } // NOLINT + + // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. + AssertionResult operator!() const; + + // Returns the text streamed into this AssertionResult. Test assertions + // use it when they fail (i.e., the predicate's outcome doesn't match the + // assertion's expectation). When nothing has been streamed into the + // object, returns an empty string. + const char* message() const { + return message_.get() != NULL ? message_->c_str() : ""; + } + // TODO(vladl@google.com): Remove this after making sure no clients use it. + // Deprecated; please use message() instead. + const char* failure_message() const { return message(); } + + // Streams a custom failure message into this object. + template AssertionResult& operator<<(const T& value) { + AppendMessage(Message() << value); + return *this; + } + + // Allows streaming basic output manipulators such as endl or flush into + // this object. + AssertionResult& operator<<( + ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) { + AppendMessage(Message() << basic_manipulator); + return *this; + } + + private: + // Appends the contents of message to message_. + void AppendMessage(const Message& a_message) { + if (message_.get() == NULL) + message_.reset(new ::std::string); + message_->append(a_message.GetString().c_str()); + } + + // Stores result of the assertion predicate. + bool success_; + // Stores the message describing the condition in case the expectation + // construct is not satisfied with the predicate's outcome. + // Referenced via a pointer to avoid taking too much stack frame space + // with test assertions. + internal::scoped_ptr< ::std::string> message_; + + GTEST_DISALLOW_ASSIGN_(AssertionResult); +}; + +// Makes a successful assertion result. +GTEST_API_ AssertionResult AssertionSuccess(); + +// Makes a failed assertion result. +GTEST_API_ AssertionResult AssertionFailure(); + +// Makes a failed assertion result with the given failure message. +// Deprecated; use AssertionFailure() << msg. +GTEST_API_ AssertionResult AssertionFailure(const Message& msg); + +// The abstract class that all tests inherit from. +// +// In Google Test, a unit test program contains one or many TestCases, and +// each TestCase contains one or many Tests. +// +// When you define a test using the TEST macro, you don't need to +// explicitly derive from Test - the TEST macro automatically does +// this for you. +// +// The only time you derive from Test is when defining a test fixture +// to be used a TEST_F. For example: +// +// class FooTest : public testing::Test { +// protected: +// virtual void SetUp() { ... } +// virtual void TearDown() { ... } +// ... +// }; +// +// TEST_F(FooTest, Bar) { ... } +// TEST_F(FooTest, Baz) { ... } +// +// Test is not copyable. +class GTEST_API_ Test { + public: + friend class TestInfo; + + // Defines types for pointers to functions that set up and tear down + // a test case. + typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc; + typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc; + + // The d'tor is virtual as we intend to inherit from Test. + virtual ~Test(); + + // Sets up the stuff shared by all tests in this test case. + // + // Google Test will call Foo::SetUpTestCase() before running the first + // test in test case Foo. Hence a sub-class can define its own + // SetUpTestCase() method to shadow the one defined in the super + // class. + static void SetUpTestCase() {} + + // Tears down the stuff shared by all tests in this test case. + // + // Google Test will call Foo::TearDownTestCase() after running the last + // test in test case Foo. Hence a sub-class can define its own + // TearDownTestCase() method to shadow the one defined in the super + // class. + static void TearDownTestCase() {} + + // Returns true iff the current test has a fatal failure. + static bool HasFatalFailure(); + + // Returns true iff the current test has a non-fatal failure. + static bool HasNonfatalFailure(); + + // Returns true iff the current test has a (either fatal or + // non-fatal) failure. + static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); } + + // Logs a property for the current test, test case, or for the entire + // invocation of the test program when used outside of the context of a + // test case. Only the last value for a given key is remembered. These + // are public static so they can be called from utility functions that are + // not members of the test fixture. Calls to RecordProperty made during + // lifespan of the test (from the moment its constructor starts to the + // moment its destructor finishes) will be output in XML as attributes of + // the element. Properties recorded from fixture's + // SetUpTestCase or TearDownTestCase are logged as attributes of the + // corresponding element. Calls to RecordProperty made in the + // global context (before or after invocation of RUN_ALL_TESTS and from + // SetUp/TearDown method of Environment objects registered with Google + // Test) will be output as attributes of the element. + static void RecordProperty(const std::string& key, const std::string& value); + static void RecordProperty(const std::string& key, int value); + + protected: + // Creates a Test object. + Test(); + + // Sets up the test fixture. + virtual void SetUp(); + + // Tears down the test fixture. + virtual void TearDown(); + + private: + // Returns true iff the current test has the same fixture class as + // the first test in the current test case. + static bool HasSameFixtureClass(); + + // Runs the test after the test fixture has been set up. + // + // A sub-class must implement this to define the test logic. + // + // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM. + // Instead, use the TEST or TEST_F macro. + virtual void TestBody() = 0; + + // Sets up, executes, and tears down the test. + void Run(); + + // Deletes self. We deliberately pick an unusual name for this + // internal method to avoid clashing with names used in user TESTs. + void DeleteSelf_() { delete this; } + + // Uses a GTestFlagSaver to save and restore all Google Test flags. + const internal::GTestFlagSaver* const gtest_flag_saver_; + + // Often a user mis-spells SetUp() as Setup() and spends a long time + // wondering why it is never called by Google Test. The declaration of + // the following method is solely for catching such an error at + // compile time: + // + // - The return type is deliberately chosen to be not void, so it + // will be a conflict if a user declares void Setup() in his test + // fixture. + // + // - This method is private, so it will be another compiler error + // if a user calls it from his test fixture. + // + // DO NOT OVERRIDE THIS FUNCTION. + // + // If you see an error about overriding the following function or + // about it being private, you have mis-spelled SetUp() as Setup(). + struct Setup_should_be_spelled_SetUp {}; + virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } + + // We disallow copying Tests. + GTEST_DISALLOW_COPY_AND_ASSIGN_(Test); +}; + +typedef internal::TimeInMillis TimeInMillis; + +// A copyable object representing a user specified test property which can be +// output as a key/value string pair. +// +// Don't inherit from TestProperty as its destructor is not virtual. +class TestProperty { + public: + // C'tor. TestProperty does NOT have a default constructor. + // Always use this constructor (with parameters) to create a + // TestProperty object. + TestProperty(const std::string& a_key, const std::string& a_value) : + key_(a_key), value_(a_value) { + } + + // Gets the user supplied key. + const char* key() const { + return key_.c_str(); + } + + // Gets the user supplied value. + const char* value() const { + return value_.c_str(); + } + + // Sets a new value, overriding the one supplied in the constructor. + void SetValue(const std::string& new_value) { + value_ = new_value; + } + + private: + // The key supplied by the user. + std::string key_; + // The value supplied by the user. + std::string value_; +}; + +// The result of a single Test. This includes a list of +// TestPartResults, a list of TestProperties, a count of how many +// death tests there are in the Test, and how much time it took to run +// the Test. +// +// TestResult is not copyable. +class GTEST_API_ TestResult { + public: + // Creates an empty TestResult. + TestResult(); + + // D'tor. Do not inherit from TestResult. + ~TestResult(); + + // Gets the number of all test parts. This is the sum of the number + // of successful test parts and the number of failed test parts. + int total_part_count() const; + + // Returns the number of the test properties. + int test_property_count() const; + + // Returns true iff the test passed (i.e. no test part failed). + bool Passed() const { return !Failed(); } + + // Returns true iff the test failed. + bool Failed() const; + + // Returns true iff the test fatally failed. + bool HasFatalFailure() const; + + // Returns true iff the test has a non-fatal failure. + bool HasNonfatalFailure() const; + + // Returns the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const { return elapsed_time_; } + + // Returns the i-th test part result among all the results. i can range + // from 0 to test_property_count() - 1. If i is not in that range, aborts + // the program. + const TestPartResult& GetTestPartResult(int i) const; + + // Returns the i-th test property. i can range from 0 to + // test_property_count() - 1. If i is not in that range, aborts the + // program. + const TestProperty& GetTestProperty(int i) const; + + private: + friend class TestInfo; + friend class TestCase; + friend class UnitTest; + friend class internal::DefaultGlobalTestPartResultReporter; + friend class internal::ExecDeathTest; + friend class internal::TestResultAccessor; + friend class internal::UnitTestImpl; + friend class internal::WindowsDeathTest; + + // Gets the vector of TestPartResults. + const std::vector& test_part_results() const { + return test_part_results_; + } + + // Gets the vector of TestProperties. + const std::vector& test_properties() const { + return test_properties_; + } + + // Sets the elapsed time. + void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; } + + // Adds a test property to the list. The property is validated and may add + // a non-fatal failure if invalid (e.g., if it conflicts with reserved + // key names). If a property is already recorded for the same key, the + // value will be updated, rather than storing multiple values for the same + // key. xml_element specifies the element for which the property is being + // recorded and is used for validation. + void RecordProperty(const std::string& xml_element, + const TestProperty& test_property); + + // Adds a failure if the key is a reserved attribute of Google Test + // testcase tags. Returns true if the property is valid. + // TODO(russr): Validate attribute names are legal and human readable. + static bool ValidateTestProperty(const std::string& xml_element, + const TestProperty& test_property); + + // Adds a test part result to the list. + void AddTestPartResult(const TestPartResult& test_part_result); + + // Returns the death test count. + int death_test_count() const { return death_test_count_; } + + // Increments the death test count, returning the new count. + int increment_death_test_count() { return ++death_test_count_; } + + // Clears the test part results. + void ClearTestPartResults(); + + // Clears the object. + void Clear(); + + // Protects mutable state of the property vector and of owned + // properties, whose values may be updated. + internal::Mutex test_properites_mutex_; + + // The vector of TestPartResults + std::vector test_part_results_; + // The vector of TestProperties + std::vector test_properties_; + // Running count of death tests. + int death_test_count_; + // The elapsed time, in milliseconds. + TimeInMillis elapsed_time_; + + // We disallow copying TestResult. + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult); +}; // class TestResult + +// A TestInfo object stores the following information about a test: +// +// Test case name +// Test name +// Whether the test should be run +// A function pointer that creates the test object when invoked +// Test result +// +// The constructor of TestInfo registers itself with the UnitTest +// singleton such that the RUN_ALL_TESTS() macro knows which tests to +// run. +class GTEST_API_ TestInfo { + public: + // Destructs a TestInfo object. This function is not virtual, so + // don't inherit from TestInfo. + ~TestInfo(); + + // Returns the test case name. + const char* test_case_name() const { return test_case_name_.c_str(); } + + // Returns the test name. + const char* name() const { return name_.c_str(); } + + // Returns the name of the parameter type, or NULL if this is not a typed + // or a type-parameterized test. + const char* type_param() const { + if (type_param_.get() != NULL) + return type_param_->c_str(); + return NULL; + } + + // Returns the text representation of the value parameter, or NULL if this + // is not a value-parameterized test. + const char* value_param() const { + if (value_param_.get() != NULL) + return value_param_->c_str(); + return NULL; + } + + // Returns true if this test should run, that is if the test is not + // disabled (or it is disabled but the also_run_disabled_tests flag has + // been specified) and its full name matches the user-specified filter. + // + // Google Test allows the user to filter the tests by their full names. + // The full name of a test Bar in test case Foo is defined as + // "Foo.Bar". Only the tests that match the filter will run. + // + // A filter is a colon-separated list of glob (not regex) patterns, + // optionally followed by a '-' and a colon-separated list of + // negative patterns (tests to exclude). A test is run if it + // matches one of the positive patterns and does not match any of + // the negative patterns. + // + // For example, *A*:Foo.* is a filter that matches any string that + // contains the character 'A' or starts with "Foo.". + bool should_run() const { return should_run_; } + + // Returns true iff this test will appear in the XML report. + bool is_reportable() const { + // For now, the XML report includes all tests matching the filter. + // In the future, we may trim tests that are excluded because of + // sharding. + return matches_filter_; + } + + // Returns the result of the test. + const TestResult* result() const { return &result_; } + + private: +#if GTEST_HAS_DEATH_TEST + friend class internal::DefaultDeathTestFactory; +#endif // GTEST_HAS_DEATH_TEST + friend class Test; + friend class TestCase; + friend class internal::UnitTestImpl; + friend class internal::StreamingListenerTest; + friend TestInfo* internal::MakeAndRegisterTestInfo( + const char* test_case_name, + const char* name, + const char* type_param, + const char* value_param, + internal::TypeId fixture_class_id, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc, + internal::TestFactoryBase* factory); + + // Constructs a TestInfo object. The newly constructed instance assumes + // ownership of the factory object. + TestInfo(const std::string& test_case_name, + const std::string& name, + const char* a_type_param, // NULL if not a type-parameterized test + const char* a_value_param, // NULL if not a value-parameterized test + internal::TypeId fixture_class_id, + internal::TestFactoryBase* factory); + + // Increments the number of death tests encountered in this test so + // far. + int increment_death_test_count() { + return result_.increment_death_test_count(); + } + + // Creates the test object, runs it, records its result, and then + // deletes it. + void Run(); + + static void ClearTestResult(TestInfo* test_info) { + test_info->result_.Clear(); + } + + // These fields are immutable properties of the test. + const std::string test_case_name_; // Test case name + const std::string name_; // Test name + // Name of the parameter type, or NULL if this is not a typed or a + // type-parameterized test. + const internal::scoped_ptr type_param_; + // Text representation of the value parameter, or NULL if this is not a + // value-parameterized test. + const internal::scoped_ptr value_param_; + const internal::TypeId fixture_class_id_; // ID of the test fixture class + bool should_run_; // True iff this test should run + bool is_disabled_; // True iff this test is disabled + bool matches_filter_; // True if this test matches the + // user-specified filter. + internal::TestFactoryBase* const factory_; // The factory that creates + // the test object + + // This field is mutable and needs to be reset before running the + // test for the second time. + TestResult result_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo); +}; + +// A test case, which consists of a vector of TestInfos. +// +// TestCase is not copyable. +class GTEST_API_ TestCase { + public: + // Creates a TestCase with the given name. + // + // TestCase does NOT have a default constructor. Always use this + // constructor to create a TestCase object. + // + // Arguments: + // + // name: name of the test case + // a_type_param: the name of the test's type parameter, or NULL if + // this is not a type-parameterized test. + // set_up_tc: pointer to the function that sets up the test case + // tear_down_tc: pointer to the function that tears down the test case + TestCase(const char* name, const char* a_type_param, + Test::SetUpTestCaseFunc set_up_tc, + Test::TearDownTestCaseFunc tear_down_tc); + + // Destructor of TestCase. + virtual ~TestCase(); + + // Gets the name of the TestCase. + const char* name() const { return name_.c_str(); } + + // Returns the name of the parameter type, or NULL if this is not a + // type-parameterized test case. + const char* type_param() const { + if (type_param_.get() != NULL) + return type_param_->c_str(); + return NULL; + } + + // Returns true if any test in this test case should run. + bool should_run() const { return should_run_; } + + // Gets the number of successful tests in this test case. + int successful_test_count() const; + + // Gets the number of failed tests in this test case. + int failed_test_count() const; + + // Gets the number of disabled tests that will be reported in the XML report. + int reportable_disabled_test_count() const; + + // Gets the number of disabled tests in this test case. + int disabled_test_count() const; + + // Gets the number of tests to be printed in the XML report. + int reportable_test_count() const; + + // Get the number of tests in this test case that should run. + int test_to_run_count() const; + + // Gets the number of all tests in this test case. + int total_test_count() const; + + // Returns true iff the test case passed. + bool Passed() const { return !Failed(); } + + // Returns true iff the test case failed. + bool Failed() const { return failed_test_count() > 0; } + + // Returns the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const { return elapsed_time_; } + + // Returns the i-th test among all the tests. i can range from 0 to + // total_test_count() - 1. If i is not in that range, returns NULL. + const TestInfo* GetTestInfo(int i) const; + + // Returns the TestResult that holds test properties recorded during + // execution of SetUpTestCase and TearDownTestCase. + const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; } + + private: + friend class Test; + friend class internal::UnitTestImpl; + + // Gets the (mutable) vector of TestInfos in this TestCase. + std::vector& test_info_list() { return test_info_list_; } + + // Gets the (immutable) vector of TestInfos in this TestCase. + const std::vector& test_info_list() const { + return test_info_list_; + } + + // Returns the i-th test among all the tests. i can range from 0 to + // total_test_count() - 1. If i is not in that range, returns NULL. + TestInfo* GetMutableTestInfo(int i); + + // Sets the should_run member. + void set_should_run(bool should) { should_run_ = should; } + + // Adds a TestInfo to this test case. Will delete the TestInfo upon + // destruction of the TestCase object. + void AddTestInfo(TestInfo * test_info); + + // Clears the results of all tests in this test case. + void ClearResult(); + + // Clears the results of all tests in the given test case. + static void ClearTestCaseResult(TestCase* test_case) { + test_case->ClearResult(); + } + + // Runs every test in this TestCase. + void Run(); + + // Runs SetUpTestCase() for this TestCase. This wrapper is needed + // for catching exceptions thrown from SetUpTestCase(). + void RunSetUpTestCase() { (*set_up_tc_)(); } + + // Runs TearDownTestCase() for this TestCase. This wrapper is + // needed for catching exceptions thrown from TearDownTestCase(). + void RunTearDownTestCase() { (*tear_down_tc_)(); } + + // Returns true iff test passed. + static bool TestPassed(const TestInfo* test_info) { + return test_info->should_run() && test_info->result()->Passed(); + } + + // Returns true iff test failed. + static bool TestFailed(const TestInfo* test_info) { + return test_info->should_run() && test_info->result()->Failed(); + } + + // Returns true iff the test is disabled and will be reported in the XML + // report. + static bool TestReportableDisabled(const TestInfo* test_info) { + return test_info->is_reportable() && test_info->is_disabled_; + } + + // Returns true iff test is disabled. + static bool TestDisabled(const TestInfo* test_info) { + return test_info->is_disabled_; + } + + // Returns true iff this test will appear in the XML report. + static bool TestReportable(const TestInfo* test_info) { + return test_info->is_reportable(); + } + + // Returns true if the given test should run. + static bool ShouldRunTest(const TestInfo* test_info) { + return test_info->should_run(); + } + + // Shuffles the tests in this test case. + void ShuffleTests(internal::Random* random); + + // Restores the test order to before the first shuffle. + void UnshuffleTests(); + + // Name of the test case. + std::string name_; + // Name of the parameter type, or NULL if this is not a typed or a + // type-parameterized test. + const internal::scoped_ptr type_param_; + // The vector of TestInfos in their original order. It owns the + // elements in the vector. + std::vector test_info_list_; + // Provides a level of indirection for the test list to allow easy + // shuffling and restoring the test order. The i-th element in this + // vector is the index of the i-th test in the shuffled test list. + std::vector test_indices_; + // Pointer to the function that sets up the test case. + Test::SetUpTestCaseFunc set_up_tc_; + // Pointer to the function that tears down the test case. + Test::TearDownTestCaseFunc tear_down_tc_; + // True iff any test in this test case should run. + bool should_run_; + // Elapsed time, in milliseconds. + TimeInMillis elapsed_time_; + // Holds test properties recorded during execution of SetUpTestCase and + // TearDownTestCase. + TestResult ad_hoc_test_result_; + + // We disallow copying TestCases. + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase); +}; + +// An Environment object is capable of setting up and tearing down an +// environment. The user should subclass this to define his own +// environment(s). +// +// An Environment object does the set-up and tear-down in virtual +// methods SetUp() and TearDown() instead of the constructor and the +// destructor, as: +// +// 1. You cannot safely throw from a destructor. This is a problem +// as in some cases Google Test is used where exceptions are enabled, and +// we may want to implement ASSERT_* using exceptions where they are +// available. +// 2. You cannot use ASSERT_* directly in a constructor or +// destructor. +class Environment { + public: + // The d'tor is virtual as we need to subclass Environment. + virtual ~Environment() {} + + // Override this to define how to set up the environment. + virtual void SetUp() {} + + // Override this to define how to tear down the environment. + virtual void TearDown() {} + private: + // If you see an error about overriding the following function or + // about it being private, you have mis-spelled SetUp() as Setup(). + struct Setup_should_be_spelled_SetUp {}; + virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } +}; + +// The interface for tracing execution of tests. The methods are organized in +// the order the corresponding events are fired. +class TestEventListener { + public: + virtual ~TestEventListener() {} + + // Fired before any test activity starts. + virtual void OnTestProgramStart(const UnitTest& unit_test) = 0; + + // Fired before each iteration of tests starts. There may be more than + // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration + // index, starting from 0. + virtual void OnTestIterationStart(const UnitTest& unit_test, + int iteration) = 0; + + // Fired before environment set-up for each iteration of tests starts. + virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0; + + // Fired after environment set-up for each iteration of tests ends. + virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0; + + // Fired before the test case starts. + virtual void OnTestCaseStart(const TestCase& test_case) = 0; + + // Fired before the test starts. + virtual void OnTestStart(const TestInfo& test_info) = 0; + + // Fired after a failed assertion or a SUCCEED() invocation. + virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0; + + // Fired after the test ends. + virtual void OnTestEnd(const TestInfo& test_info) = 0; + + // Fired after the test case ends. + virtual void OnTestCaseEnd(const TestCase& test_case) = 0; + + // Fired before environment tear-down for each iteration of tests starts. + virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0; + + // Fired after environment tear-down for each iteration of tests ends. + virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0; + + // Fired after each iteration of tests finishes. + virtual void OnTestIterationEnd(const UnitTest& unit_test, + int iteration) = 0; + + // Fired after all test activities have ended. + virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0; +}; + +// The convenience class for users who need to override just one or two +// methods and are not concerned that a possible change to a signature of +// the methods they override will not be caught during the build. For +// comments about each method please see the definition of TestEventListener +// above. +class EmptyTestEventListener : public TestEventListener { + public: + virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationStart(const UnitTest& /*unit_test*/, + int /*iteration*/) {} + virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {} + virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestCaseStart(const TestCase& /*test_case*/) {} + virtual void OnTestStart(const TestInfo& /*test_info*/) {} + virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {} + virtual void OnTestEnd(const TestInfo& /*test_info*/) {} + virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {} + virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {} + virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {} + virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/, + int /*iteration*/) {} + virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {} +}; + +// TestEventListeners lets users add listeners to track events in Google Test. +class GTEST_API_ TestEventListeners { + public: + TestEventListeners(); + ~TestEventListeners(); + + // Appends an event listener to the end of the list. Google Test assumes + // the ownership of the listener (i.e. it will delete the listener when + // the test program finishes). + void Append(TestEventListener* listener); + + // Removes the given event listener from the list and returns it. It then + // becomes the caller's responsibility to delete the listener. Returns + // NULL if the listener is not found in the list. + TestEventListener* Release(TestEventListener* listener); + + // Returns the standard listener responsible for the default console + // output. Can be removed from the listeners list to shut down default + // console output. Note that removing this object from the listener list + // with Release transfers its ownership to the caller and makes this + // function return NULL the next time. + TestEventListener* default_result_printer() const { + return default_result_printer_; + } + + // Returns the standard listener responsible for the default XML output + // controlled by the --gtest_output=xml flag. Can be removed from the + // listeners list by users who want to shut down the default XML output + // controlled by this flag and substitute it with custom one. Note that + // removing this object from the listener list with Release transfers its + // ownership to the caller and makes this function return NULL the next + // time. + TestEventListener* default_xml_generator() const { + return default_xml_generator_; + } + + private: + friend class TestCase; + friend class TestInfo; + friend class internal::DefaultGlobalTestPartResultReporter; + friend class internal::NoExecDeathTest; + friend class internal::TestEventListenersAccessor; + friend class internal::UnitTestImpl; + + // Returns repeater that broadcasts the TestEventListener events to all + // subscribers. + TestEventListener* repeater(); + + // Sets the default_result_printer attribute to the provided listener. + // The listener is also added to the listener list and previous + // default_result_printer is removed from it and deleted. The listener can + // also be NULL in which case it will not be added to the list. Does + // nothing if the previous and the current listener objects are the same. + void SetDefaultResultPrinter(TestEventListener* listener); + + // Sets the default_xml_generator attribute to the provided listener. The + // listener is also added to the listener list and previous + // default_xml_generator is removed from it and deleted. The listener can + // also be NULL in which case it will not be added to the list. Does + // nothing if the previous and the current listener objects are the same. + void SetDefaultXmlGenerator(TestEventListener* listener); + + // Controls whether events will be forwarded by the repeater to the + // listeners in the list. + bool EventForwardingEnabled() const; + void SuppressEventForwarding(); + + // The actual list of listeners. + internal::TestEventRepeater* repeater_; + // Listener responsible for the standard result output. + TestEventListener* default_result_printer_; + // Listener responsible for the creation of the XML output file. + TestEventListener* default_xml_generator_; + + // We disallow copying TestEventListeners. + GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners); +}; + +// A UnitTest consists of a vector of TestCases. +// +// This is a singleton class. The only instance of UnitTest is +// created when UnitTest::GetInstance() is first called. This +// instance is never deleted. +// +// UnitTest is not copyable. +// +// This class is thread-safe as long as the methods are called +// according to their specification. +class GTEST_API_ UnitTest { + public: + // Gets the singleton UnitTest object. The first time this method + // is called, a UnitTest object is constructed and returned. + // Consecutive calls will return the same object. + static UnitTest* GetInstance(); + + // Runs all tests in this UnitTest object and prints the result. + // Returns 0 if successful, or 1 otherwise. + // + // This method can only be called from the main thread. + // + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + int Run() GTEST_MUST_USE_RESULT_; + + // Returns the working directory when the first TEST() or TEST_F() + // was executed. The UnitTest object owns the string. + const char* original_working_dir() const; + + // Returns the TestCase object for the test that's currently running, + // or NULL if no test is running. + const TestCase* current_test_case() const + GTEST_LOCK_EXCLUDED_(mutex_); + + // Returns the TestInfo object for the test that's currently running, + // or NULL if no test is running. + const TestInfo* current_test_info() const + GTEST_LOCK_EXCLUDED_(mutex_); + + // Returns the random seed used at the start of the current test run. + int random_seed() const; + +#if GTEST_HAS_PARAM_TEST + // Returns the ParameterizedTestCaseRegistry object used to keep track of + // value-parameterized tests and instantiate and register them. + // + // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + internal::ParameterizedTestCaseRegistry& parameterized_test_registry() + GTEST_LOCK_EXCLUDED_(mutex_); +#endif // GTEST_HAS_PARAM_TEST + + // Gets the number of successful test cases. + int successful_test_case_count() const; + + // Gets the number of failed test cases. + int failed_test_case_count() const; + + // Gets the number of all test cases. + int total_test_case_count() const; + + // Gets the number of all test cases that contain at least one test + // that should run. + int test_case_to_run_count() const; + + // Gets the number of successful tests. + int successful_test_count() const; + + // Gets the number of failed tests. + int failed_test_count() const; + + // Gets the number of disabled tests that will be reported in the XML report. + int reportable_disabled_test_count() const; + + // Gets the number of disabled tests. + int disabled_test_count() const; + + // Gets the number of tests to be printed in the XML report. + int reportable_test_count() const; + + // Gets the number of all tests. + int total_test_count() const; + + // Gets the number of tests that should run. + int test_to_run_count() const; + + // Gets the time of the test program start, in ms from the start of the + // UNIX epoch. + TimeInMillis start_timestamp() const; + + // Gets the elapsed time, in milliseconds. + TimeInMillis elapsed_time() const; + + // Returns true iff the unit test passed (i.e. all test cases passed). + bool Passed() const; + + // Returns true iff the unit test failed (i.e. some test case failed + // or something outside of all tests failed). + bool Failed() const; + + // Gets the i-th test case among all the test cases. i can range from 0 to + // total_test_case_count() - 1. If i is not in that range, returns NULL. + const TestCase* GetTestCase(int i) const; + + // Returns the TestResult containing information on test failures and + // properties logged outside of individual test cases. + const TestResult& ad_hoc_test_result() const; + + // Returns the list of event listeners that can be used to track events + // inside Google Test. + TestEventListeners& listeners(); + + private: + // Registers and returns a global test environment. When a test + // program is run, all global test environments will be set-up in + // the order they were registered. After all tests in the program + // have finished, all global test environments will be torn-down in + // the *reverse* order they were registered. + // + // The UnitTest object takes ownership of the given environment. + // + // This method can only be called from the main thread. + Environment* AddEnvironment(Environment* env); + + // Adds a TestPartResult to the current TestResult object. All + // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) + // eventually call this to report their results. The user code + // should use the assertion macros instead of calling this directly. + void AddTestPartResult(TestPartResult::Type result_type, + const char* file_name, + int line_number, + const std::string& message, + const std::string& os_stack_trace) + GTEST_LOCK_EXCLUDED_(mutex_); + + // Adds a TestProperty to the current TestResult object when invoked from + // inside a test, to current TestCase's ad_hoc_test_result_ when invoked + // from SetUpTestCase or TearDownTestCase, or to the global property set + // when invoked elsewhere. If the result already contains a property with + // the same key, the value will be updated. + void RecordProperty(const std::string& key, const std::string& value); + + // Gets the i-th test case among all the test cases. i can range from 0 to + // total_test_case_count() - 1. If i is not in that range, returns NULL. + TestCase* GetMutableTestCase(int i); + + // Accessors for the implementation object. + internal::UnitTestImpl* impl() { return impl_; } + const internal::UnitTestImpl* impl() const { return impl_; } + + // These classes and funcions are friends as they need to access private + // members of UnitTest. + friend class Test; + friend class internal::AssertHelper; + friend class internal::ScopedTrace; + friend class internal::StreamingListenerTest; + friend class internal::UnitTestRecordPropertyTestHelper; + friend Environment* AddGlobalTestEnvironment(Environment* env); + friend internal::UnitTestImpl* internal::GetUnitTestImpl(); + friend void internal::ReportFailureInUnknownLocation( + TestPartResult::Type result_type, + const std::string& message); + + // Creates an empty UnitTest. + UnitTest(); + + // D'tor + virtual ~UnitTest(); + + // Pushes a trace defined by SCOPED_TRACE() on to the per-thread + // Google Test trace stack. + void PushGTestTrace(const internal::TraceInfo& trace) + GTEST_LOCK_EXCLUDED_(mutex_); + + // Pops a trace from the per-thread Google Test trace stack. + void PopGTestTrace() + GTEST_LOCK_EXCLUDED_(mutex_); + + // Protects mutable state in *impl_. This is mutable as some const + // methods need to lock it too. + mutable internal::Mutex mutex_; + + // Opaque implementation object. This field is never changed once + // the object is constructed. We don't mark it as const here, as + // doing so will cause a warning in the constructor of UnitTest. + // Mutable state in *impl_ is protected by mutex_. + internal::UnitTestImpl* impl_; + + // We disallow copying UnitTest. + GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest); +}; + +// A convenient wrapper for adding an environment for the test +// program. +// +// You should call this before RUN_ALL_TESTS() is called, probably in +// main(). If you use gtest_main, you need to call this before main() +// starts for it to take effect. For example, you can define a global +// variable like this: +// +// testing::Environment* const foo_env = +// testing::AddGlobalTestEnvironment(new FooEnvironment); +// +// However, we strongly recommend you to write your own main() and +// call AddGlobalTestEnvironment() there, as relying on initialization +// of global variables makes the code harder to read and may cause +// problems when you register multiple environments from different +// translation units and the environments have dependencies among them +// (remember that the compiler doesn't guarantee the order in which +// global variables from different translation units are initialized). +inline Environment* AddGlobalTestEnvironment(Environment* env) { + return UnitTest::GetInstance()->AddEnvironment(env); +} + +// Initializes Google Test. This must be called before calling +// RUN_ALL_TESTS(). In particular, it parses a command line for the +// flags that Google Test recognizes. Whenever a Google Test flag is +// seen, it is removed from argv, and *argc is decremented. +// +// No value is returned. Instead, the Google Test flag variables are +// updated. +// +// Calling the function for the second time has no user-visible effect. +GTEST_API_ void InitGoogleTest(int* argc, char** argv); + +// This overloaded version can be used in Windows programs compiled in +// UNICODE mode. +GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv); + +namespace internal { + +// FormatForComparison::Format(value) formats a +// value of type ToPrint that is an operand of a comparison assertion +// (e.g. ASSERT_EQ). OtherOperand is the type of the other operand in +// the comparison, and is used to help determine the best way to +// format the value. In particular, when the value is a C string +// (char pointer) and the other operand is an STL string object, we +// want to format the C string as a string, since we know it is +// compared by value with the string object. If the value is a char +// pointer but the other operand is not an STL string object, we don't +// know whether the pointer is supposed to point to a NUL-terminated +// string, and thus want to print it as a pointer to be safe. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + +// The default case. +template +class FormatForComparison { + public: + static ::std::string Format(const ToPrint& value) { + return ::testing::PrintToString(value); + } +}; + +// Array. +template +class FormatForComparison { + public: + static ::std::string Format(const ToPrint* value) { + return FormatForComparison::Format(value); + } +}; + +// By default, print C string as pointers to be safe, as we don't know +// whether they actually point to a NUL-terminated string. + +#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType) \ + template \ + class FormatForComparison { \ + public: \ + static ::std::string Format(CharType* value) { \ + return ::testing::PrintToString(static_cast(value)); \ + } \ + } + +GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char); +GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char); +GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t); +GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t); + +#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_ + +// If a C string is compared with an STL string object, we know it's meant +// to point to a NUL-terminated string, and thus can print it as a string. + +#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \ + template <> \ + class FormatForComparison { \ + public: \ + static ::std::string Format(CharType* value) { \ + return ::testing::PrintToString(value); \ + } \ + } + +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string); +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string); + +#if GTEST_HAS_GLOBAL_STRING +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::string); +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::string); +#endif + +#if GTEST_HAS_GLOBAL_WSTRING +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::wstring); +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::wstring); +#endif + +#if GTEST_HAS_STD_WSTRING +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring); +GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring); +#endif + +#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_ + +// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc) +// operand to be used in a failure message. The type (but not value) +// of the other operand may affect the format. This allows us to +// print a char* as a raw pointer when it is compared against another +// char* or void*, and print it as a C string when it is compared +// against an std::string object, for example. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +template +std::string FormatForComparisonFailureMessage( + const T1& value, const T2& /* other_operand */) { + return FormatForComparison::Format(value); +} + +// The helper function for {ASSERT|EXPECT}_EQ. +template +AssertionResult CmpHelperEQ(const char* expected_expression, + const char* actual_expression, + const T1& expected, + const T2& actual) { +#ifdef _MSC_VER +# pragma warning(push) // Saves the current warning state. +# pragma warning(disable:4389) // Temporarily disables warning on + // signed/unsigned mismatch. +#endif + + if (expected == actual) { + return AssertionSuccess(); + } + +#ifdef _MSC_VER +# pragma warning(pop) // Restores the warning state. +#endif + + return EqFailure(expected_expression, + actual_expression, + FormatForComparisonFailureMessage(expected, actual), + FormatForComparisonFailureMessage(actual, expected), + false); +} + +// With this overloaded version, we allow anonymous enums to be used +// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums +// can be implicitly cast to BiggestInt. +GTEST_API_ AssertionResult CmpHelperEQ(const char* expected_expression, + const char* actual_expression, + BiggestInt expected, + BiggestInt actual); + +// The helper class for {ASSERT|EXPECT}_EQ. The template argument +// lhs_is_null_literal is true iff the first argument to ASSERT_EQ() +// is a null pointer literal. The following default implementation is +// for lhs_is_null_literal being false. +template +class EqHelper { + public: + // This templatized version is for the general case. + template + static AssertionResult Compare(const char* expected_expression, + const char* actual_expression, + const T1& expected, + const T2& actual) { + return CmpHelperEQ(expected_expression, actual_expression, expected, + actual); + } + + // With this overloaded version, we allow anonymous enums to be used + // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous + // enums can be implicitly cast to BiggestInt. + // + // Even though its body looks the same as the above version, we + // cannot merge the two, as it will make anonymous enums unhappy. + static AssertionResult Compare(const char* expected_expression, + const char* actual_expression, + BiggestInt expected, + BiggestInt actual) { + return CmpHelperEQ(expected_expression, actual_expression, expected, + actual); + } +}; + +// This specialization is used when the first argument to ASSERT_EQ() +// is a null pointer literal, like NULL, false, or 0. +template <> +class EqHelper { + public: + // We define two overloaded versions of Compare(). The first + // version will be picked when the second argument to ASSERT_EQ() is + // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or + // EXPECT_EQ(false, a_bool). + template + static AssertionResult Compare( + const char* expected_expression, + const char* actual_expression, + const T1& expected, + const T2& actual, + // The following line prevents this overload from being considered if T2 + // is not a pointer type. We need this because ASSERT_EQ(NULL, my_ptr) + // expands to Compare("", "", NULL, my_ptr), which requires a conversion + // to match the Secret* in the other overload, which would otherwise make + // this template match better. + typename EnableIf::value>::type* = 0) { + return CmpHelperEQ(expected_expression, actual_expression, expected, + actual); + } + + // This version will be picked when the second argument to ASSERT_EQ() is a + // pointer, e.g. ASSERT_EQ(NULL, a_pointer). + template + static AssertionResult Compare( + const char* expected_expression, + const char* actual_expression, + // We used to have a second template parameter instead of Secret*. That + // template parameter would deduce to 'long', making this a better match + // than the first overload even without the first overload's EnableIf. + // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to + // non-pointer argument" (even a deduced integral argument), so the old + // implementation caused warnings in user code. + Secret* /* expected (NULL) */, + T* actual) { + // We already know that 'expected' is a null pointer. + return CmpHelperEQ(expected_expression, actual_expression, + static_cast(NULL), actual); + } +}; + +// A macro for implementing the helper functions needed to implement +// ASSERT_?? and EXPECT_??. It is here just to avoid copy-and-paste +// of similar code. +// +// For each templatized helper function, we also define an overloaded +// version for BiggestInt in order to reduce code bloat and allow +// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled +// with gcc 4. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +#define GTEST_IMPL_CMP_HELPER_(op_name, op)\ +template \ +AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ + const T1& val1, const T2& val2) {\ + if (val1 op val2) {\ + return AssertionSuccess();\ + } else {\ + return AssertionFailure() \ + << "Expected: (" << expr1 << ") " #op " (" << expr2\ + << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ + << " vs " << FormatForComparisonFailureMessage(val2, val1);\ + }\ +}\ +GTEST_API_ AssertionResult CmpHelper##op_name(\ + const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2) + +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. + +// Implements the helper function for {ASSERT|EXPECT}_NE +GTEST_IMPL_CMP_HELPER_(NE, !=); +// Implements the helper function for {ASSERT|EXPECT}_LE +GTEST_IMPL_CMP_HELPER_(LE, <=); +// Implements the helper function for {ASSERT|EXPECT}_LT +GTEST_IMPL_CMP_HELPER_(LT, <); +// Implements the helper function for {ASSERT|EXPECT}_GE +GTEST_IMPL_CMP_HELPER_(GE, >=); +// Implements the helper function for {ASSERT|EXPECT}_GT +GTEST_IMPL_CMP_HELPER_(GT, >); + +#undef GTEST_IMPL_CMP_HELPER_ + +// The helper function for {ASSERT|EXPECT}_STREQ. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual); + +// The helper function for {ASSERT|EXPECT}_STRCASEEQ. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression, + const char* actual_expression, + const char* expected, + const char* actual); + +// The helper function for {ASSERT|EXPECT}_STRNE. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2); + +// The helper function for {ASSERT|EXPECT}_STRCASENE. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression, + const char* s2_expression, + const char* s1, + const char* s2); + + +// Helper function for *_STREQ on wide strings. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression, + const char* actual_expression, + const wchar_t* expected, + const wchar_t* actual); + +// Helper function for *_STRNE on wide strings. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression, + const char* s2_expression, + const wchar_t* s1, + const wchar_t* s2); + +} // namespace internal + +// IsSubstring() and IsNotSubstring() are intended to be used as the +// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by +// themselves. They check whether needle is a substring of haystack +// (NULL is considered a substring of itself only), and return an +// appropriate error message when they fail. +// +// The {needle,haystack}_expr arguments are the stringified +// expressions that generated the two real arguments. +GTEST_API_ AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack); +GTEST_API_ AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack); +GTEST_API_ AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const char* needle, const char* haystack); +GTEST_API_ AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const wchar_t* needle, const wchar_t* haystack); +GTEST_API_ AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack); +GTEST_API_ AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::string& needle, const ::std::string& haystack); + +#if GTEST_HAS_STD_WSTRING +GTEST_API_ AssertionResult IsSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack); +GTEST_API_ AssertionResult IsNotSubstring( + const char* needle_expr, const char* haystack_expr, + const ::std::wstring& needle, const ::std::wstring& haystack); +#endif // GTEST_HAS_STD_WSTRING + +namespace internal { + +// Helper template function for comparing floating-points. +// +// Template parameter: +// +// RawType: the raw floating-point type (either float or double) +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +template +AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression, + const char* actual_expression, + RawType expected, + RawType actual) { + const FloatingPoint lhs(expected), rhs(actual); + + if (lhs.AlmostEquals(rhs)) { + return AssertionSuccess(); + } + + ::std::stringstream expected_ss; + expected_ss << std::setprecision(std::numeric_limits::digits10 + 2) + << expected; + + ::std::stringstream actual_ss; + actual_ss << std::setprecision(std::numeric_limits::digits10 + 2) + << actual; + + return EqFailure(expected_expression, + actual_expression, + StringStreamToString(&expected_ss), + StringStreamToString(&actual_ss), + false); +} + +// Helper function for implementing ASSERT_NEAR. +// +// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. +GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1, + const char* expr2, + const char* abs_error_expr, + double val1, + double val2, + double abs_error); + +// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. +// A class that enables one to stream messages to assertion macros +class GTEST_API_ AssertHelper { + public: + // Constructor. + AssertHelper(TestPartResult::Type type, + const char* file, + int line, + const char* message); + ~AssertHelper(); + + // Message assignment is a semantic trick to enable assertion + // streaming; see the GTEST_MESSAGE_ macro below. + void operator=(const Message& message) const; + + private: + // We put our data in a struct so that the size of the AssertHelper class can + // be as small as possible. This is important because gcc is incapable of + // re-using stack space even for temporary variables, so every EXPECT_EQ + // reserves stack space for another AssertHelper. + struct AssertHelperData { + AssertHelperData(TestPartResult::Type t, + const char* srcfile, + int line_num, + const char* msg) + : type(t), file(srcfile), line(line_num), message(msg) { } + + TestPartResult::Type const type; + const char* const file; + int const line; + std::string const message; + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData); + }; + + AssertHelperData* const data_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper); +}; + +} // namespace internal + +#if GTEST_HAS_PARAM_TEST +// The pure interface class that all value-parameterized tests inherit from. +// A value-parameterized class must inherit from both ::testing::Test and +// ::testing::WithParamInterface. In most cases that just means inheriting +// from ::testing::TestWithParam, but more complicated test hierarchies +// may need to inherit from Test and WithParamInterface at different levels. +// +// This interface has support for accessing the test parameter value via +// the GetParam() method. +// +// Use it with one of the parameter generator defining functions, like Range(), +// Values(), ValuesIn(), Bool(), and Combine(). +// +// class FooTest : public ::testing::TestWithParam { +// protected: +// FooTest() { +// // Can use GetParam() here. +// } +// virtual ~FooTest() { +// // Can use GetParam() here. +// } +// virtual void SetUp() { +// // Can use GetParam() here. +// } +// virtual void TearDown { +// // Can use GetParam() here. +// } +// }; +// TEST_P(FooTest, DoesBar) { +// // Can use GetParam() method here. +// Foo foo; +// ASSERT_TRUE(foo.DoesBar(GetParam())); +// } +// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10)); + +template +class WithParamInterface { + public: + typedef T ParamType; + virtual ~WithParamInterface() {} + + // The current parameter value. Is also available in the test fixture's + // constructor. This member function is non-static, even though it only + // references static data, to reduce the opportunity for incorrect uses + // like writing 'WithParamInterface::GetParam()' for a test that + // uses a fixture whose parameter type is int. + const ParamType& GetParam() const { + GTEST_CHECK_(parameter_ != NULL) + << "GetParam() can only be called inside a value-parameterized test " + << "-- did you intend to write TEST_P instead of TEST_F?"; + return *parameter_; + } + + private: + // Sets parameter value. The caller is responsible for making sure the value + // remains alive and unchanged throughout the current test. + static void SetParam(const ParamType* parameter) { + parameter_ = parameter; + } + + // Static value used for accessing parameter during a test lifetime. + static const ParamType* parameter_; + + // TestClass must be a subclass of WithParamInterface and Test. + template friend class internal::ParameterizedTestFactory; +}; + +template +const T* WithParamInterface::parameter_ = NULL; + +// Most value-parameterized classes can ignore the existence of +// WithParamInterface, and can just inherit from ::testing::TestWithParam. + +template +class TestWithParam : public Test, public WithParamInterface { +}; + +#endif // GTEST_HAS_PARAM_TEST + +// Macros for indicating success/failure in test code. + +// ADD_FAILURE unconditionally adds a failure to the current test. +// SUCCEED generates a success - it doesn't automatically make the +// current test successful, as a test is only successful when it has +// no failure. +// +// EXPECT_* verifies that a certain condition is satisfied. If not, +// it behaves like ADD_FAILURE. In particular: +// +// EXPECT_TRUE verifies that a Boolean condition is true. +// EXPECT_FALSE verifies that a Boolean condition is false. +// +// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except +// that they will also abort the current function on failure. People +// usually want the fail-fast behavior of FAIL and ASSERT_*, but those +// writing data-driven tests often find themselves using ADD_FAILURE +// and EXPECT_* more. + +// Generates a nonfatal failure with a generic message. +#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed") + +// Generates a nonfatal failure at the given source file location with +// a generic message. +#define ADD_FAILURE_AT(file, line) \ + GTEST_MESSAGE_AT_(file, line, "Failed", \ + ::testing::TestPartResult::kNonFatalFailure) + +// Generates a fatal failure with a generic message. +#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed") + +// Define this macro to 1 to omit the definition of FAIL(), which is a +// generic name and clashes with some other libraries. +#if !GTEST_DONT_DEFINE_FAIL +# define FAIL() GTEST_FAIL() +#endif + +// Generates a success with a generic message. +#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded") + +// Define this macro to 1 to omit the definition of SUCCEED(), which +// is a generic name and clashes with some other libraries. +#if !GTEST_DONT_DEFINE_SUCCEED +# define SUCCEED() GTEST_SUCCEED() +#endif + +// Macros for testing exceptions. +// +// * {ASSERT|EXPECT}_THROW(statement, expected_exception): +// Tests that the statement throws the expected exception. +// * {ASSERT|EXPECT}_NO_THROW(statement): +// Tests that the statement doesn't throw any exception. +// * {ASSERT|EXPECT}_ANY_THROW(statement): +// Tests that the statement throws an exception. + +#define EXPECT_THROW(statement, expected_exception) \ + GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_) +#define EXPECT_NO_THROW(statement) \ + GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_) +#define EXPECT_ANY_THROW(statement) \ + GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_) +#define ASSERT_THROW(statement, expected_exception) \ + GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_) +#define ASSERT_NO_THROW(statement) \ + GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_) +#define ASSERT_ANY_THROW(statement) \ + GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_) + +// Boolean assertions. Condition can be either a Boolean expression or an +// AssertionResult. For more information on how to use AssertionResult with +// these macros see comments on that class. +#define EXPECT_TRUE(condition) \ + GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ + GTEST_NONFATAL_FAILURE_) +#define EXPECT_FALSE(condition) \ + GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ + GTEST_NONFATAL_FAILURE_) +#define ASSERT_TRUE(condition) \ + GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ + GTEST_FATAL_FAILURE_) +#define ASSERT_FALSE(condition) \ + GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ + GTEST_FATAL_FAILURE_) + +// Includes the auto-generated header that implements a family of +// generic predicate assertion macros. +// Copyright 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is AUTOMATICALLY GENERATED on 10/31/2011 by command +// 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND! +// +// Implements a family of generic predicate assertion macros. + +#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ +#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ + +// Makes sure this header is not included before gtest.h. +#ifndef GTEST_INCLUDE_GTEST_GTEST_H_ +# error Do not include gtest_pred_impl.h directly. Include gtest.h instead. +#endif // GTEST_INCLUDE_GTEST_GTEST_H_ + +// This header implements a family of generic predicate assertion +// macros: +// +// ASSERT_PRED_FORMAT1(pred_format, v1) +// ASSERT_PRED_FORMAT2(pred_format, v1, v2) +// ... +// +// where pred_format is a function or functor that takes n (in the +// case of ASSERT_PRED_FORMATn) values and their source expression +// text, and returns a testing::AssertionResult. See the definition +// of ASSERT_EQ in gtest.h for an example. +// +// If you don't care about formatting, you can use the more +// restrictive version: +// +// ASSERT_PRED1(pred, v1) +// ASSERT_PRED2(pred, v1, v2) +// ... +// +// where pred is an n-ary function or functor that returns bool, +// and the values v1, v2, ..., must support the << operator for +// streaming to std::ostream. +// +// We also define the EXPECT_* variations. +// +// For now we only support predicates whose arity is at most 5. +// Please email googletestframework@googlegroups.com if you need +// support for higher arities. + +// GTEST_ASSERT_ is the basic statement to which all of the assertions +// in this file reduce. Don't use this in your code. + +#define GTEST_ASSERT_(expression, on_failure) \ + GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ + if (const ::testing::AssertionResult gtest_ar = (expression)) \ + ; \ + else \ + on_failure(gtest_ar.failure_message()) + + +// Helper function for implementing {EXPECT|ASSERT}_PRED1. Don't use +// this in your code. +template +AssertionResult AssertPred1Helper(const char* pred_text, + const char* e1, + Pred pred, + const T1& v1) { + if (pred(v1)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1. +// Don't use this in your code. +#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, v1), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED1. Don't use +// this in your code. +#define GTEST_PRED1_(pred, v1, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \ + #v1, \ + pred, \ + v1), on_failure) + +// Unary predicate assertion macros. +#define EXPECT_PRED_FORMAT1(pred_format, v1) \ + GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED1(pred, v1) \ + GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT1(pred_format, v1) \ + GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED1(pred, v1) \ + GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED2. Don't use +// this in your code. +template +AssertionResult AssertPred2Helper(const char* pred_text, + const char* e1, + const char* e2, + Pred pred, + const T1& v1, + const T2& v2) { + if (pred(v1, v2)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ", " + << e2 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2. +// Don't use this in your code. +#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED2. Don't use +// this in your code. +#define GTEST_PRED2_(pred, v1, v2, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \ + #v1, \ + #v2, \ + pred, \ + v1, \ + v2), on_failure) + +// Binary predicate assertion macros. +#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \ + GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED2(pred, v1, v2) \ + GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \ + GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED2(pred, v1, v2) \ + GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED3. Don't use +// this in your code. +template +AssertionResult AssertPred3Helper(const char* pred_text, + const char* e1, + const char* e2, + const char* e3, + Pred pred, + const T1& v1, + const T2& v2, + const T3& v3) { + if (pred(v1, v2, v3)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ", " + << e2 << ", " + << e3 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2 + << "\n" << e3 << " evaluates to " << v3; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3. +// Don't use this in your code. +#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED3. Don't use +// this in your code. +#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \ + #v1, \ + #v2, \ + #v3, \ + pred, \ + v1, \ + v2, \ + v3), on_failure) + +// Ternary predicate assertion macros. +#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \ + GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED3(pred, v1, v2, v3) \ + GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \ + GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED3(pred, v1, v2, v3) \ + GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED4. Don't use +// this in your code. +template +AssertionResult AssertPred4Helper(const char* pred_text, + const char* e1, + const char* e2, + const char* e3, + const char* e4, + Pred pred, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4) { + if (pred(v1, v2, v3, v4)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ", " + << e2 << ", " + << e3 << ", " + << e4 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2 + << "\n" << e3 << " evaluates to " << v3 + << "\n" << e4 << " evaluates to " << v4; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4. +// Don't use this in your code. +#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED4. Don't use +// this in your code. +#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \ + #v1, \ + #v2, \ + #v3, \ + #v4, \ + pred, \ + v1, \ + v2, \ + v3, \ + v4), on_failure) + +// 4-ary predicate assertion macros. +#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ + GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED4(pred, v1, v2, v3, v4) \ + GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ + GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED4(pred, v1, v2, v3, v4) \ + GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_) + + + +// Helper function for implementing {EXPECT|ASSERT}_PRED5. Don't use +// this in your code. +template +AssertionResult AssertPred5Helper(const char* pred_text, + const char* e1, + const char* e2, + const char* e3, + const char* e4, + const char* e5, + Pred pred, + const T1& v1, + const T2& v2, + const T3& v3, + const T4& v4, + const T5& v5) { + if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess(); + + return AssertionFailure() << pred_text << "(" + << e1 << ", " + << e2 << ", " + << e3 << ", " + << e4 << ", " + << e5 << ") evaluates to false, where" + << "\n" << e1 << " evaluates to " << v1 + << "\n" << e2 << " evaluates to " << v2 + << "\n" << e3 << " evaluates to " << v3 + << "\n" << e4 << " evaluates to " << v4 + << "\n" << e5 << " evaluates to " << v5; +} + +// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5. +// Don't use this in your code. +#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\ + GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \ + on_failure) + +// Internal macro for implementing {EXPECT|ASSERT}_PRED5. Don't use +// this in your code. +#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\ + GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \ + #v1, \ + #v2, \ + #v3, \ + #v4, \ + #v5, \ + pred, \ + v1, \ + v2, \ + v3, \ + v4, \ + v5), on_failure) + +// 5-ary predicate assertion macros. +#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ + GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_) +#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \ + GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_) +#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ + GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_) +#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \ + GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_) + + + +#endif // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ + +// Macros for testing equalities and inequalities. +// +// * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual +// * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2 +// * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2 +// * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2 +// * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2 +// * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2 +// +// When they are not, Google Test prints both the tested expressions and +// their actual values. The values must be compatible built-in types, +// or you will get a compiler error. By "compatible" we mean that the +// values can be compared by the respective operator. +// +// Note: +// +// 1. It is possible to make a user-defined type work with +// {ASSERT|EXPECT}_??(), but that requires overloading the +// comparison operators and is thus discouraged by the Google C++ +// Usage Guide. Therefore, you are advised to use the +// {ASSERT|EXPECT}_TRUE() macro to assert that two objects are +// equal. +// +// 2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on +// pointers (in particular, C strings). Therefore, if you use it +// with two C strings, you are testing how their locations in memory +// are related, not how their content is related. To compare two C +// strings by content, use {ASSERT|EXPECT}_STR*(). +// +// 3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to +// {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you +// what the actual value is when it fails, and similarly for the +// other comparisons. +// +// 4. Do not depend on the order in which {ASSERT|EXPECT}_??() +// evaluate their arguments, which is undefined. +// +// 5. These macros evaluate their arguments exactly once. +// +// Examples: +// +// EXPECT_NE(5, Foo()); +// EXPECT_EQ(NULL, a_pointer); +// ASSERT_LT(i, array_size); +// ASSERT_GT(records.size(), 0) << "There is no record left."; + +#define EXPECT_EQ(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal:: \ + EqHelper::Compare, \ + expected, actual) +#define EXPECT_NE(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual) +#define EXPECT_LE(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) +#define EXPECT_LT(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) +#define EXPECT_GE(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) +#define EXPECT_GT(val1, val2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) + +#define GTEST_ASSERT_EQ(expected, actual) \ + ASSERT_PRED_FORMAT2(::testing::internal:: \ + EqHelper::Compare, \ + expected, actual) +#define GTEST_ASSERT_NE(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2) +#define GTEST_ASSERT_LE(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) +#define GTEST_ASSERT_LT(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) +#define GTEST_ASSERT_GE(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) +#define GTEST_ASSERT_GT(val1, val2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) + +// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of +// ASSERT_XY(), which clashes with some users' own code. + +#if !GTEST_DONT_DEFINE_ASSERT_EQ +# define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_NE +# define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_LE +# define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_LT +# define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_GE +# define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2) +#endif + +#if !GTEST_DONT_DEFINE_ASSERT_GT +# define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2) +#endif + +// C-string Comparisons. All tests treat NULL and any non-NULL string +// as different. Two NULLs are equal. +// +// * {ASSERT|EXPECT}_STREQ(s1, s2): Tests that s1 == s2 +// * {ASSERT|EXPECT}_STRNE(s1, s2): Tests that s1 != s2 +// * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case +// * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case +// +// For wide or narrow string objects, you can use the +// {ASSERT|EXPECT}_??() macros. +// +// Don't depend on the order in which the arguments are evaluated, +// which is undefined. +// +// These macros evaluate their arguments exactly once. + +#define EXPECT_STREQ(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual) +#define EXPECT_STRNE(s1, s2) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) +#define EXPECT_STRCASEEQ(expected, actual) \ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual) +#define EXPECT_STRCASENE(s1, s2)\ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) + +#define ASSERT_STREQ(expected, actual) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual) +#define ASSERT_STRNE(s1, s2) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) +#define ASSERT_STRCASEEQ(expected, actual) \ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual) +#define ASSERT_STRCASENE(s1, s2)\ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) + +// Macros for comparing floating-point numbers. +// +// * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual): +// Tests that two float values are almost equal. +// * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual): +// Tests that two double values are almost equal. +// * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error): +// Tests that v1 and v2 are within the given distance to each other. +// +// Google Test uses ULP-based comparison to automatically pick a default +// error bound that is appropriate for the operands. See the +// FloatingPoint template class in gtest-internal.h if you are +// interested in the implementation details. + +#define EXPECT_FLOAT_EQ(expected, actual)\ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ + expected, actual) + +#define EXPECT_DOUBLE_EQ(expected, actual)\ + EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ + expected, actual) + +#define ASSERT_FLOAT_EQ(expected, actual)\ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ + expected, actual) + +#define ASSERT_DOUBLE_EQ(expected, actual)\ + ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ + expected, actual) + +#define EXPECT_NEAR(val1, val2, abs_error)\ + EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ + val1, val2, abs_error) + +#define ASSERT_NEAR(val1, val2, abs_error)\ + ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ + val1, val2, abs_error) + +// These predicate format functions work on floating-point values, and +// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g. +// +// EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0); + +// Asserts that val1 is less than, or almost equal to, val2. Fails +// otherwise. In particular, it fails if either val1 or val2 is NaN. +GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2, + float val1, float val2); +GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2, + double val1, double val2); + + +#if GTEST_OS_WINDOWS + +// Macros that test for HRESULT failure and success, these are only useful +// on Windows, and rely on Windows SDK macros and APIs to compile. +// +// * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr) +// +// When expr unexpectedly fails or succeeds, Google Test prints the +// expected result and the actual result with both a human-readable +// string representation of the error, if available, as well as the +// hex result code. +# define EXPECT_HRESULT_SUCCEEDED(expr) \ + EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) + +# define ASSERT_HRESULT_SUCCEEDED(expr) \ + ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) + +# define EXPECT_HRESULT_FAILED(expr) \ + EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) + +# define ASSERT_HRESULT_FAILED(expr) \ + ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) + +#endif // GTEST_OS_WINDOWS + +// Macros that execute statement and check that it doesn't generate new fatal +// failures in the current thread. +// +// * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement); +// +// Examples: +// +// EXPECT_NO_FATAL_FAILURE(Process()); +// ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed"; +// +#define ASSERT_NO_FATAL_FAILURE(statement) \ + GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_) +#define EXPECT_NO_FATAL_FAILURE(statement) \ + GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_) + +// Causes a trace (including the source file path, the current line +// number, and the given message) to be included in every test failure +// message generated by code in the current scope. The effect is +// undone when the control leaves the current scope. +// +// The message argument can be anything streamable to std::ostream. +// +// In the implementation, we include the current line number as part +// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s +// to appear in the same block - as long as they are on different +// lines. +#define SCOPED_TRACE(message) \ + ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\ + __FILE__, __LINE__, ::testing::Message() << (message)) + +// Compile-time assertion for type equality. +// StaticAssertTypeEq() compiles iff type1 and type2 are +// the same type. The value it returns is not interesting. +// +// Instead of making StaticAssertTypeEq a class template, we make it a +// function template that invokes a helper class template. This +// prevents a user from misusing StaticAssertTypeEq by +// defining objects of that type. +// +// CAVEAT: +// +// When used inside a method of a class template, +// StaticAssertTypeEq() is effective ONLY IF the method is +// instantiated. For example, given: +// +// template class Foo { +// public: +// void Bar() { testing::StaticAssertTypeEq(); } +// }; +// +// the code: +// +// void Test1() { Foo foo; } +// +// will NOT generate a compiler error, as Foo::Bar() is never +// actually instantiated. Instead, you need: +// +// void Test2() { Foo foo; foo.Bar(); } +// +// to cause a compiler error. +template +bool StaticAssertTypeEq() { + (void)internal::StaticAssertTypeEqHelper(); + return true; +} + +// Defines a test. +// +// The first parameter is the name of the test case, and the second +// parameter is the name of the test within the test case. +// +// The convention is to end the test case name with "Test". For +// example, a test case for the Foo class can be named FooTest. +// +// The user should put his test code between braces after using this +// macro. Example: +// +// TEST(FooTest, InitializesCorrectly) { +// Foo foo; +// EXPECT_TRUE(foo.StatusIsOK()); +// } + +// Note that we call GetTestTypeId() instead of GetTypeId< +// ::testing::Test>() here to get the type ID of testing::Test. This +// is to work around a suspected linker bug when using Google Test as +// a framework on Mac OS X. The bug causes GetTypeId< +// ::testing::Test>() to return different values depending on whether +// the call is from the Google Test framework itself or from user test +// code. GetTestTypeId() is guaranteed to always return the same +// value, as it always calls GetTypeId<>() from the Google Test +// framework. +#define GTEST_TEST(test_case_name, test_name)\ + GTEST_TEST_(test_case_name, test_name, \ + ::testing::Test, ::testing::internal::GetTestTypeId()) + +// Define this macro to 1 to omit the definition of TEST(), which +// is a generic name and clashes with some other libraries. +#if !GTEST_DONT_DEFINE_TEST +# define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name) +#endif + +// Defines a test that uses a test fixture. +// +// The first parameter is the name of the test fixture class, which +// also doubles as the test case name. The second parameter is the +// name of the test within the test case. +// +// A test fixture class must be declared earlier. The user should put +// his test code between braces after using this macro. Example: +// +// class FooTest : public testing::Test { +// protected: +// virtual void SetUp() { b_.AddElement(3); } +// +// Foo a_; +// Foo b_; +// }; +// +// TEST_F(FooTest, InitializesCorrectly) { +// EXPECT_TRUE(a_.StatusIsOK()); +// } +// +// TEST_F(FooTest, ReturnsElementCountCorrectly) { +// EXPECT_EQ(0, a_.size()); +// EXPECT_EQ(1, b_.size()); +// } + +#define TEST_F(test_fixture, test_name)\ + GTEST_TEST_(test_fixture, test_name, test_fixture, \ + ::testing::internal::GetTypeId()) + +} // namespace testing + +// Use this function in main() to run all tests. It returns 0 if all +// tests are successful, or 1 otherwise. +// +// RUN_ALL_TESTS() should be invoked after the command line has been +// parsed by InitGoogleTest(). +// +// This function was formerly a macro; thus, it is in the global +// namespace and has an all-caps name. +int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_; + +inline int RUN_ALL_TESTS() { + return ::testing::UnitTest::GetInstance()->Run(); +} + +#endif // GTEST_INCLUDE_GTEST_GTEST_H_ diff --git a/test/gtest/common/main.cc b/test/gtest/common/main.cc new file mode 100644 index 0000000..91bb6c3 --- /dev/null +++ b/test/gtest/common/main.cc @@ -0,0 +1,109 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include +#include +#include +#include +#include "test_helpers.h" +#include "tap.h" + + +static int ucs_gtest_random_seed = -1; +int ucs::perf_retry_count = 0; /* 0 - don't check performance */ +double ucs::perf_retry_interval = 1.0; + + +void parse_test_opts(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "s:p:i:")) != -1) { + switch (c) { + case 's': + ucs_gtest_random_seed = atoi(optarg); + break; + case 'p': + ucs::perf_retry_count = atoi(optarg); + break; + case 'i': + ucs::perf_retry_interval = atof(optarg); + break; + default: + fprintf(stderr, "Usage: gtest [ -s rand-seed ] [ -p count ] [ -i interval ]\n"); + exit(1); + } + } +} + +static void modify_config_for_valgrind(const char *name, const char *value) +{ + char full_name[128]; + + snprintf(full_name, sizeof(full_name), "%s%s", UCS_CONFIG_PREFIX, name); + + if (getenv(full_name) == NULL) { + UCS_TEST_MESSAGE << " Setting for valgrind: " << full_name << "=" << value; + setenv(full_name, value, 1); + } +} + +int main(int argc, char **argv) { + // coverity[fun_call_w_exception]: uncaught exceptions cause nonzero exit anyway, so don't warn. + ::testing::InitGoogleTest(&argc, argv); + + char *str = getenv("GTEST_TAP"); + int ret; + + /* Append TAP Listener */ + if (str) { + if (0 < strtol(str, NULL, 0)) { + testing::TestEventListeners& listeners = testing::UnitTest::GetInstance()->listeners(); + if (1 == strtol(str, NULL, 0)) { + delete listeners.Release(listeners.default_result_printer()); + } + listeners.Append(new tap::TapListener()); + } + } + + parse_test_opts(argc, argv); + if (ucs_gtest_random_seed == -1) { + ucs_gtest_random_seed = time(NULL) % 32768; + } + UCS_TEST_MESSAGE << "Using random seed of " << ucs_gtest_random_seed; + srand(ucs_gtest_random_seed); + if (RUNNING_ON_VALGRIND) { + modify_config_for_valgrind("IB_RX_QUEUE_LEN", "512"); + modify_config_for_valgrind("IB_RX_BUFS_GROW", "512"); + modify_config_for_valgrind("MM_RX_BUFS_GROW", "128"); + modify_config_for_valgrind("IB_TX_QUEUE_LEN", "128"); + modify_config_for_valgrind("IB_TX_BUFS_GROW", "64"); + modify_config_for_valgrind("RC_TX_CQ_LEN", "256"); + modify_config_for_valgrind("CM_TIMEOUT", "600ms"); + modify_config_for_valgrind("TCP_TX_BUFS_GROW", "512"); + modify_config_for_valgrind("TCP_RX_BUFS_GROW", "512"); + modify_config_for_valgrind("TCP_RX_SEG_SIZE", "16k"); + ucm_global_opts.enable_malloc_reloc = 1; /* Test reloc hooks with valgrind, + though it's generally unsafe. */ + } + ucs_global_opts.warn_unused_env_vars = 0; /* Avoid warnings if not all + config vars are being used */ + + ret = ucs::watchdog_start(); + if (ret != 0) { + ADD_FAILURE() << "Unable to start watchdog - abort"; + return ret; + } + + ret = RUN_ALL_TESTS(); + + ucs::watchdog_stop(); + + ucs::analyze_test_results(); + + return ret; +} diff --git a/test/gtest/common/mem_buffer.cc b/test/gtest/common/mem_buffer.cc new file mode 100644 index 0000000..fcad446 --- /dev/null +++ b/test/gtest/common/mem_buffer.cc @@ -0,0 +1,293 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "mem_buffer.h" + +#include +#include +#include + +#if HAVE_CUDA +# include +# include + +#define CUDA_CALL(_code) \ + do { \ + cudaError_t cerr = _code; \ + if (cerr != cudaSuccess) { \ + UCS_TEST_ABORT(# _code << " failed"); \ + } \ + } while (0) + +#endif + +#if HAVE_ROCM +# include + +#define ROCM_CALL(_code) \ + do { \ + hipError_t cerr = _code; \ + if (cerr != hipSuccess) { \ + UCS_TEST_ABORT(# _code << " failed"); \ + } \ + } while (0) + +#endif + + +std::vector mem_buffer::supported_mem_types() +{ + static std::vector vec; + + if (vec.empty()) { + vec.push_back(UCS_MEMORY_TYPE_HOST); +#if HAVE_CUDA + vec.push_back(UCS_MEMORY_TYPE_CUDA); + vec.push_back(UCS_MEMORY_TYPE_CUDA_MANAGED); +#endif +#if HAVE_ROCM + vec.push_back(UCS_MEMORY_TYPE_ROCM); + vec.push_back(UCS_MEMORY_TYPE_ROCM_MANAGED); +#endif + } + + return vec; +} + +void *mem_buffer::allocate(size_t size, ucs_memory_type_t mem_type) +{ + void *ptr; + + switch (mem_type) { + case UCS_MEMORY_TYPE_HOST: + ptr = malloc(size); + if (ptr == NULL) { + UCS_TEST_ABORT("malloc() failed"); + } + return ptr; +#if HAVE_CUDA + case UCS_MEMORY_TYPE_CUDA: + CUDA_CALL(cudaMalloc(&ptr, size)); + return ptr; + case UCS_MEMORY_TYPE_CUDA_MANAGED: + CUDA_CALL(cudaMallocManaged(&ptr, size)); + return ptr; +#endif +#if HAVE_ROCM + case UCS_MEMORY_TYPE_ROCM: + ROCM_CALL(hipMalloc(&ptr, size)); + return ptr; + case UCS_MEMORY_TYPE_ROCM_MANAGED: + ROCM_CALL(hipMallocManaged(&ptr, size)); + return ptr; +#endif + default: + UCS_TEST_SKIP_R(std::string(ucs_memory_type_names[mem_type]) + + " memory is not supported"); + } +} + +void mem_buffer::release(void *ptr, ucs_memory_type_t mem_type) +{ + switch (mem_type) { + case UCS_MEMORY_TYPE_HOST: + free(ptr); + break; +#if HAVE_CUDA + case UCS_MEMORY_TYPE_CUDA: + case UCS_MEMORY_TYPE_CUDA_MANAGED: + CUDA_CALL(cudaFree(ptr)); + break; +#endif +#if HAVE_ROCM + case UCS_MEMORY_TYPE_ROCM: + case UCS_MEMORY_TYPE_ROCM_MANAGED: + ROCM_CALL(hipFree(ptr)); + break; +#endif + default: + break; + } +} + +void mem_buffer::pattern_fill(void *buffer, size_t length, uint64_t seed) +{ + uint64_t *ptr = (uint64_t*)buffer; + char *end = (char *)buffer + length; + + while ((char*)(ptr + 1) <= end) { + *ptr = seed; + seed = pat(seed); + ++ptr; + } + memcpy(ptr, &seed, end - (char*)ptr); +} + +void mem_buffer::pattern_check(const void *buffer, size_t length, uint64_t seed) +{ + const char* end = (const char*)buffer + length; + const uint64_t *ptr = (const uint64_t*)buffer; + + while ((const char*)(ptr + 1) <= end) { + if (*ptr != seed) { + UCS_TEST_ABORT("At offset " << ((const char*)ptr - (const char*)buffer) << ": " << + "Expected: 0x" << std::hex << seed << " " << + "Got: 0x" << std::hex << (*ptr) << std::dec); + } + seed = pat(seed); + ++ptr; + } + + size_t remainder = (end - (const char*)ptr); + if (remainder > 0) { + ucs_assert(remainder < sizeof(*ptr)); + uint64_t mask = UCS_MASK_SAFE(remainder * 8 * sizeof(char)); + uint64_t value = 0; + memcpy(&value, ptr, remainder); + if (value != (seed & mask)) { + UCS_TEST_ABORT("At offset " << ((const char*)ptr - (const char*)buffer) << + " (remainder " << remainder << ") : " << + "Expected: 0x" << std::hex << (seed & mask) << " " << + "Mask: 0x" << std::hex << mask << " " << + "Got: 0x" << std::hex << value << std::dec); + } + } +} + +void mem_buffer::pattern_check(const void *buffer, size_t length) +{ + if (length > sizeof(uint64_t)) { + pattern_check(buffer, length, *(const uint64_t*)buffer); + } +} + +void mem_buffer::pattern_fill(void *buffer, size_t length, uint64_t seed, + ucs_memory_type_t mem_type) +{ + if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type)) { + pattern_fill(buffer, length, seed); + } else { + ucs::auto_buffer temp(length); + pattern_fill(*temp, length, seed); + copy_to(buffer, *temp, length, mem_type); + } +} + +void mem_buffer::pattern_check(const void *buffer, size_t length, uint64_t seed, + ucs_memory_type_t mem_type) +{ + if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type)) { + pattern_check(buffer, length, seed); + } else { + ucs::auto_buffer temp(length); + copy_from(*temp, buffer, length, mem_type); + pattern_check(*temp, length, seed); + } +} + +void mem_buffer::copy_to(void *dst, const void *src, size_t length, + ucs_memory_type_t dst_mem_type) +{ + switch (dst_mem_type) { + case UCS_MEMORY_TYPE_HOST: + case UCS_MEMORY_TYPE_CUDA_MANAGED: + case UCS_MEMORY_TYPE_ROCM_MANAGED: + memcpy(dst, src, length); + break; +#if HAVE_CUDA + case UCS_MEMORY_TYPE_CUDA: + CUDA_CALL(cudaMemcpy(dst, src, length, cudaMemcpyHostToDevice)); + CUDA_CALL(cudaDeviceSynchronize()); + break; +#endif +#if HAVE_ROCM + case UCS_MEMORY_TYPE_ROCM: + ROCM_CALL(hipMemcpy(dst, src, length, hipMemcpyHostToDevice)); + ROCM_CALL(hipDeviceSynchronize()); + break; +#endif + default: + abort_wrong_mem_type(dst_mem_type); + } +} + +void mem_buffer::copy_from(void *dst, const void *src, size_t length, + ucs_memory_type_t src_mem_type) +{ + switch (src_mem_type) { + case UCS_MEMORY_TYPE_HOST: + case UCS_MEMORY_TYPE_CUDA_MANAGED: + case UCS_MEMORY_TYPE_ROCM_MANAGED: + memcpy(dst, src, length); + break; +#if HAVE_CUDA + case UCS_MEMORY_TYPE_CUDA: + CUDA_CALL(cudaMemcpy(dst, src, length, cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaDeviceSynchronize()); + break; +#endif +#if HAVE_ROCM + case UCS_MEMORY_TYPE_ROCM: + ROCM_CALL(hipMemcpy(dst, src, length, hipMemcpyDeviceToHost)); + ROCM_CALL(hipDeviceSynchronize()); + break; +#endif + default: + abort_wrong_mem_type(src_mem_type); + } +} + +bool mem_buffer::compare(const void *expected, const void *buffer, + size_t length, ucs_memory_type_t mem_type) +{ + if (UCP_MEM_IS_ACCESSIBLE_FROM_CPU(mem_type)) { + return memcmp(expected, buffer, length) == 0; + } else { + ucs::auto_buffer temp(length); + copy_from(*temp, buffer, length, mem_type); + return memcmp(expected, *temp, length) == 0; + } +} + +std::string mem_buffer::mem_type_name(ucs_memory_type_t mem_type) +{ + return ucs_memory_type_names[mem_type]; +} + +void mem_buffer::abort_wrong_mem_type(ucs_memory_type_t mem_type) { + UCS_TEST_ABORT("Wrong buffer memory type " + mem_type_name(mem_type)); +} + +uint64_t mem_buffer::pat(uint64_t prev) { + /* LFSR pattern */ + static const uint64_t polynom = 1337; + return (prev << 1) | (__builtin_parityl(prev & polynom) & 1); +} + +mem_buffer::mem_buffer(size_t size, ucs_memory_type_t mem_type) : + m_mem_type(mem_type), m_ptr(allocate(size, mem_type)), m_size(size) { +} + +mem_buffer::~mem_buffer() { + release(ptr(), mem_type()); +} + +ucs_memory_type_t mem_buffer::mem_type() const { + return m_mem_type; +} + +void *mem_buffer::ptr() const { + return m_ptr; +} + +size_t mem_buffer::size() const { + return m_size; +} diff --git a/test/gtest/common/mem_buffer.h b/test/gtest/common/mem_buffer.h new file mode 100644 index 0000000..134e366 --- /dev/null +++ b/test/gtest/common/mem_buffer.h @@ -0,0 +1,82 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef GTEST_MEM_BUFFER_H_ +#define GTEST_MEM_BUFFER_H_ + +#include +#include +#include +#include + + +/** + * Wrapper and utility functions for memory type buffers, e.g buffers which are + * not necessarily allocated on host memory, such as cuda, rocm, etc. + */ +class mem_buffer { +public: + static std::vector supported_mem_types(); + + /* allocate buffer of a given memory type */ + static void *allocate(size_t size, ucs_memory_type_t mem_type); + + /* release buffer of a given memory type */ + static void release(void *ptr, ucs_memory_type_t mem_type); + + /* fill pattern in a host-accessible buffer */ + static void pattern_fill(void *buffer, size_t length, uint64_t seed); + + /* check pattern in a host-accessible buffer */ + static void pattern_check(const void *buffer, size_t length, uint64_t seed); + + /* check pattern in a host-accessible buffer, take seed from 1st word */ + static void pattern_check(const void *buffer, size_t length); + + /* fill pattern in a memtype buffer */ + static void pattern_fill(void *buffer, size_t length, uint64_t seed, + ucs_memory_type_t mem_type); + + /* check pattern in a memtype buffer */ + static void pattern_check(const void *buffer, size_t length, uint64_t seed, + ucs_memory_type_t mem_type); + + /* copy from host memory to memtype buffer */ + static void copy_to(void *dst, const void *src, size_t length, + ucs_memory_type_t dst_mem_type); + + /* copy from memtype buffer to host memory */ + static void copy_from(void *dst, const void *src, size_t length, + ucs_memory_type_t src_mem_type); + + /* compare memtype buffer with host memory, return true if equal */ + static bool compare(const void *expected, const void *buffer, + size_t length, ucs_memory_type_t mem_type); + + /* return the string name of a memory type */ + static std::string mem_type_name(ucs_memory_type_t mem_type); + + mem_buffer(size_t size, ucs_memory_type_t mem_type); + virtual ~mem_buffer(); + + ucs_memory_type_t mem_type() const; + + void *ptr() const; + + size_t size() const; + +private: + static void abort_wrong_mem_type(ucs_memory_type_t mem_type); + + static uint64_t pat(uint64_t prev); + + const ucs_memory_type_t m_mem_type; + void * const m_ptr; + const size_t m_size; +}; + + +#endif diff --git a/test/gtest/common/tap.h b/test/gtest/common/tap.h new file mode 100644 index 0000000..771f323 --- /dev/null +++ b/test/gtest/common/tap.h @@ -0,0 +1,252 @@ +/* + * The MIT License + * + * Copyright (c) 2011 Bruno P. Kinoshita + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @author Bruno P. Kinoshita + * @since 0.1 + */ + +#ifndef TAP_H_ +#define TAP_H_ + +#include +#include +#include +#include +#include +#include + +namespace tap { + +#ifdef GTEST_TAP_13_DIAGNOSTIC +// based on http://stackoverflow.com/a/7724536/831180 +static std::string replace_all_copy( + std::string const& original, + std::string const& before, + std::string const& after +) { + using namespace std; + + if (before == after) return string(original); + + string retval; + if (before.length() == after.length()) retval.reserve(original.size()); + + basic_string ::const_iterator end = original.end(); + basic_string ::const_iterator current = original.begin(); + basic_string ::const_iterator next = + search(current, end, before.begin(), before.end()); + + while ( next != end ) { + retval.append( current, next ); + retval.append( after ); + current = next + before.size(); + next = search(current, end, before.begin(), before.end()); + } + retval.append( current, next ); + return retval; +} +#endif + +class TestResult { + + private: + int number; + std::string status; + std::string name; + std::string comment; + bool skip; + + public: + std::string getComment() const { + std::stringstream ss; + if (this->skip) { + ss << "# SKIP " << this->comment; + } else if (!this->comment.empty()) { + ss << "# " << this->comment; + } + return ss.str(); + } + + const std::string& getName() const { + return name; + } + + int getNumber() const { + return number; + } + + const std::string& getStatus() const { + return status; + } + + bool getSkip() const { + return skip; + } + + void setComment(const std::string& comment) { + this->comment = comment; + } + + void setName(const std::string& name) { + this->name = name; + } + + void setNumber(int number) { + this->number = number; + } + + void setStatus(const std::string& status) { + this->status = status; + } + + void setSkip(bool skip) { + this->skip = skip; + } + + std::string toString() const { + std::stringstream ss; + ss << this->status << " " << this->number << " " << this->name; +#ifdef GTEST_TAP_13_DIAGNOSTIC + std::string comment_text = this->getComment(); + if (!comment_text.empty()) { + ss << std::endl + << "# Diagnostic" << std::endl + << " ---" << std::endl + << " " << replace_all_copy(this->getComment(), "\n", "\n "); + } +#endif + return ss.str(); + } +}; + +class TestSet { + + private: + std::list testResults; + + public: + const std::list& getTestResults() const { + return testResults; + } + + void addTestResult(TestResult& testResult) { + testResult.setNumber((this->getNumberOfTests() + 1)); + this->testResults.push_back(testResult); + } + + int getNumberOfTests() const { + return this->testResults.size(); + } + + std::string toString() const { + std::stringstream ss; + ss << "1.." << this->getNumberOfTests() << std::endl; + for (std::list::const_iterator ci = this->testResults.begin(); + ci != this->testResults.end(); ++ci) { + TestResult testResult = *ci; + ss << testResult.toString() << std::endl; + } + return ss.str(); + } +}; + +class TapListener: public ::testing::EmptyTestEventListener { + + private: + std::map testCaseTestResultMap; + + void addTapTestResult(const testing::TestInfo& testInfo) { + tap::TestResult tapResult; + tapResult.setName(testInfo.name()); + tapResult.setSkip(!testInfo.should_run()); + + const testing::TestResult *testResult = testInfo.result(); + int number = testResult->total_part_count(); + tapResult.setNumber(number-1); + if (testResult->HasFatalFailure()) { + tapResult.setStatus("Bail out!"); + } else if (testResult->Failed()) { + tapResult.setStatus("not ok"); + tapResult.setComment(testResult->GetTestPartResult(number-1).summary()); + } else { + tapResult.setStatus("ok"); + } + + this->addNewOrUpdate(testInfo.test_case_name(), tapResult); + } + + std::string getCommentOrDirective(const std::string& comment, bool skip) { + std::stringstream commentText; + + if (skip) { + commentText << " # SKIP " << comment; + } else if (!comment.empty()) { + commentText << " # " << comment; + } + + return commentText.str(); + } + + void addNewOrUpdate(const std::string& testCaseName, tap::TestResult testResult) { + std::map::const_iterator ci = + this->testCaseTestResultMap.find(testCaseName); + if (ci != this->testCaseTestResultMap.end()) { + tap::TestSet testSet = ci->second; + testSet.addTestResult(testResult); + this->testCaseTestResultMap[testCaseName] = testSet; + } else { + tap::TestSet testSet; + testSet.addTestResult(testResult); + this->testCaseTestResultMap[testCaseName] = testSet; + } + } + +public: + virtual void OnTestEnd(const testing::TestInfo& testInfo) { + //printf("%s %d - %s\n", testInfo.result()->Passed() ? "ok" : "not ok", this->testNumber, testInfo.name()); + this->addTapTestResult(testInfo); + } + + virtual void OnTestProgramEnd(const testing::UnitTest& unit_test) { + //--- Write the count and the word. + std::map::const_iterator ci; + for (ci = this->testCaseTestResultMap.begin(); + ci != this->testCaseTestResultMap.end(); ++ci) { + const tap::TestSet& testSet = ci->second; +#ifdef GTEST_TAP_PRINT_TO_STDOUT + std::cout << "TAP version 13" << std::endl; + std::cout << testSet.toString(); +#else + std::string ext = ".tap"; + std::ofstream tapFile; + tapFile.open((ci->first + ext).c_str()); + tapFile << testSet.toString(); + tapFile.close(); +#endif + } + } +}; + +} // namespace tap + +#endif // TAP_H_ diff --git a/test/gtest/common/test.cc b/test/gtest/common/test.cc new file mode 100644 index 0000000..ec3e4d3 --- /dev/null +++ b/test/gtest/common/test.cc @@ -0,0 +1,361 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test.h" + +#include +#include +#include + +#include + +namespace ucs { + +pthread_mutex_t test_base::m_logger_mutex = PTHREAD_MUTEX_INITIALIZER; +unsigned test_base::m_total_warnings = 0; +unsigned test_base::m_total_errors = 0; +std::vector test_base::m_errors; +std::vector test_base::m_warnings; + +test_base::test_base() : + m_state(NEW), + m_initialized(false), + m_num_threads(1), + m_num_valgrind_errors_before(0), + m_num_errors_before(0), + m_num_warnings_before(0), + m_num_log_handlers_before(0) +{ + push_config(); +} + +test_base::~test_base() { + while (!m_config_stack.empty()) { + pop_config(); + } + ucs_assertv_always(m_state == FINISHED || + m_state == SKIPPED || + m_state == NEW || /* can be skipped from a class constructor */ + m_state == ABORTED, + "state=%d", m_state); +} + +void test_base::set_num_threads(unsigned num_threads) { + if (m_state != NEW) { + GTEST_FAIL() << "Cannot modify number of threads after test is started, " + << "it must be done in the constructor."; + } + m_num_threads = num_threads; +} + +unsigned test_base::num_threads() const { + return m_num_threads; +} + +void test_base::set_config(const std::string& config_str) +{ + std::string::size_type pos = config_str.find("="); + std::string name, value; + bool optional; + + if (pos == std::string::npos) { + name = config_str; + value = ""; + } else { + name = config_str.substr(0, pos); + value = config_str.substr(pos + 1); + } + + optional = false; + if ((name.length() > 0) && name.at(name.length() - 1) == '?') { + name = name.substr(0, name.length() - 1); + optional = true; + } + + modify_config(name, value, optional); +} + +void test_base::get_config(const std::string& name, std::string& value, size_t max) +{ + ucs_status_t status; + + value.resize(max, '\0'); + status = ucs_global_opts_get_value(name.c_str(), + const_cast(value.c_str()), + max); + if (status != UCS_OK) { + GTEST_FAIL() << "Invalid UCS configuration for " << name + << ": " << ucs_status_string(status) + << "(" << status << ")"; + } +} + +void test_base::modify_config(const std::string& name, const std::string& value, + bool optional) +{ + ucs_status_t status = ucs_global_opts_set_value(name.c_str(), value.c_str()); + if ((status == UCS_ERR_NO_ELEM) && optional) { + m_env_stack.push_back(new scoped_setenv(("UCX_" + name).c_str(), + value.c_str())); + } else if (status != UCS_OK) { + GTEST_FAIL() << "Invalid UCS configuration for " << name << " : " + << value << ", error message: " + << ucs_status_string(status) << "(" << status << ")"; + } +} + +void test_base::push_config() +{ + ucs_global_opts_t new_opts; + /* save current options to the vector + * it is important to keep the first original global options at the first + * vector element to release it at the end. Otherwise, memtrack will not work + */ + m_config_stack.push_back(ucs_global_opts); + ucs_global_opts_clone(&new_opts); + ucs_global_opts = new_opts; +} + +void test_base::pop_config() +{ + ucs_global_opts_release(); + ucs_global_opts = m_config_stack.back(); + m_config_stack.pop_back(); +} + +ucs_log_func_rc_t +test_base::count_warns_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) +{ + pthread_mutex_lock(&m_logger_mutex); + if (level == UCS_LOG_LEVEL_ERROR) { + ++m_total_errors; + } else if (level == UCS_LOG_LEVEL_WARN) { + ++m_total_warnings; + } + pthread_mutex_unlock(&m_logger_mutex); + return UCS_LOG_FUNC_RC_CONTINUE; +} + +std::string test_base::format_message(const char *message, va_list ap) +{ + const size_t buffer_size = ucs_log_get_buffer_size(); + std::string buf(buffer_size, '\0'); + vsnprintf(&buf[0], buffer_size, message, ap); + buf.resize(strlen(buf.c_str())); + return buf; +} + +void test_base::push_debug_message_with_limit(std::vector& vec, + const std::string& message, + const size_t limit) { + if (vec.size() >= limit) { + UCS_TEST_ABORT("aborting after " + ucs::to_string(vec.size()) + + " error messages (" + message + ")"); + } + + vec.push_back(message); +} + +ucs_log_func_rc_t +test_base::hide_errors_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) +{ + if (level == UCS_LOG_LEVEL_ERROR) { + pthread_mutex_lock(&m_logger_mutex); + va_list ap2; + va_copy(ap2, ap); + m_errors.push_back(format_message(message, ap2)); + va_end(ap2); + level = UCS_LOG_LEVEL_DEBUG; + pthread_mutex_unlock(&m_logger_mutex); + } + + ucs_log_default_handler(file, line, function, level, message, ap); + return UCS_LOG_FUNC_RC_STOP; +} + +ucs_log_func_rc_t +test_base::hide_warns_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) +{ + if (level == UCS_LOG_LEVEL_WARN) { + pthread_mutex_lock(&m_logger_mutex); + va_list ap2; + va_copy(ap2, ap); + m_warnings.push_back(format_message(message, ap2)); + va_end(ap2); + level = UCS_LOG_LEVEL_DEBUG; + pthread_mutex_unlock(&m_logger_mutex); + } + + ucs_log_default_handler(file, line, function, level, message, ap); + return UCS_LOG_FUNC_RC_STOP; +} + +ucs_log_func_rc_t +test_base::wrap_errors_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) +{ + /* Ignore warnings about empty memory pool */ + if (level == UCS_LOG_LEVEL_ERROR) { + pthread_mutex_lock(&m_logger_mutex); + std::istringstream iss(format_message(message, ap)); + std::string text; + while (getline(iss, text, '\n')) { + push_debug_message_with_limit(m_errors, text, 1000); + UCS_TEST_MESSAGE << "< " << text << " >"; + } + pthread_mutex_unlock(&m_logger_mutex); + return UCS_LOG_FUNC_RC_STOP; + } + + return UCS_LOG_FUNC_RC_CONTINUE; +} + +void test_base::SetUpProxy() { + ucs_assert(m_state == NEW); + m_num_valgrind_errors_before = VALGRIND_COUNT_ERRORS; + m_num_warnings_before = m_total_warnings; + m_num_errors_before = m_total_errors; + + m_errors.clear(); + m_warnings.clear(); + m_num_log_handlers_before = ucs_log_num_handlers(); + ucs_log_push_handler(count_warns_logger); + + try { + check_skip_test(); + init(); + m_initialized = true; + m_state = RUNNING; + } catch (test_skip_exception& e) { + skipped(e); + } catch (test_abort_exception&) { + m_state = ABORTED; + } +} + +void test_base::TearDownProxy() { + ucs_assertv_always(m_state == FINISHED || + m_state == SKIPPED || + m_state == ABORTED, + "state=%d", m_state); + + watchdog_signal(); + + if (m_initialized) { + cleanup(); + } + + m_errors.clear(); + + ucs_log_pop_handler(); + + unsigned num_not_removed = ucs_log_num_handlers() - m_num_log_handlers_before; + if (num_not_removed != 0) { + ADD_FAILURE() << num_not_removed << " log handlers were not removed"; + } + + int num_valgrind_errors = VALGRIND_COUNT_ERRORS - m_num_valgrind_errors_before; + if (num_valgrind_errors > 0) { + ADD_FAILURE() << "Got " << num_valgrind_errors << " valgrind errors during the test"; + } + int num_errors = m_total_errors - m_num_errors_before; + if (num_errors > 0) { + ADD_FAILURE() << "Got " << num_errors << " errors during the test"; + } + int num_warnings = m_total_warnings - m_num_warnings_before; + if (num_warnings > 0) { + ADD_FAILURE() << "Got " << num_warnings << " warnings during the test"; + } +} + +void test_base::run() +{ + if (num_threads() == 1) { + test_body(); + } else { + pthread_t threads[num_threads()]; + pthread_barrier_init(&m_barrier, NULL, num_threads()); + for (unsigned i = 0; i < num_threads(); ++i) { + pthread_create(&threads[i], NULL, thread_func, reinterpret_cast(this)); + } + for (unsigned i = 0; i < num_threads(); ++i) { + void *retval; + pthread_join(threads[i], &retval); + } + pthread_barrier_destroy(&m_barrier); + } +} + +void *test_base::thread_func(void *arg) +{ + test_base *self = reinterpret_cast(arg); + self->barrier(); /* Let all threads start in the same time */ + self->test_body(); + return NULL; +} + +void test_base::TestBodyProxy() { + if (m_state == RUNNING) { + try { + run(); + m_state = FINISHED; + } catch (test_skip_exception& e) { + skipped(e); + } catch (test_abort_exception&) { + m_state = ABORTED; + } catch (exit_exception& e) { + if (RUNNING_ON_VALGRIND) { + /* When running with valgrind, exec true/false instead of just + * exiting, to avoid warnings about memory leaks of objects + * allocated inside gtest run loop. + */ + const char *program = e.failed() ? "false" : "true"; + execlp(program, program, NULL); + } + + /* If not running on valgrind / execp failed, use exit() */ + exit(e.failed() ? 1 : 0); + } catch (...) { + m_state = ABORTED; + throw; + } + } +} + +void test_base::skipped(const test_skip_exception& e) { + std::string reason = e.what(); + if (reason.empty()) { + detail::message_stream("SKIP"); + } else { + detail::message_stream("SKIP") << "(" << reason << ")"; + } + m_state = SKIPPED; + skipped_tests.insert(::testing::UnitTest:: + GetInstance()->current_test_info()); +} + +void test_base::init() { +} + +void test_base::cleanup() { +} + +bool test_base::barrier() { + int ret = pthread_barrier_wait(&m_barrier); + if (ret == 0) { + return false; + } else if (ret == PTHREAD_BARRIER_SERIAL_THREAD) { + return true; + } else { + UCS_TEST_ABORT("pthread_barrier_wait() failed"); + } + +} + +} diff --git a/test/gtest/common/test.h b/test/gtest/common/test.h new file mode 100644 index 0000000..a593e7f --- /dev/null +++ b/test/gtest/common/test.h @@ -0,0 +1,307 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_TEST_BASE_H +#define UCS_TEST_BASE_H + +#include "test_helpers.h" + +#include +#include +#include + +#include +#include +#include + +namespace ucs { + +/** + * Base class for tests + */ +class test_base { +public: + test_base(); + virtual ~test_base(); + + void set_num_threads(unsigned num_threads); + unsigned num_threads() const; + + void get_config(const std::string& name, std::string& value, + size_t max); + virtual void set_config(const std::string& config_str); + virtual void modify_config(const std::string& name, const std::string& value, + bool optional = false); + virtual void push_config(); + virtual void pop_config(); + +protected: + class scoped_log_handler { +public: + scoped_log_handler(ucs_log_func_t handler) { + ucs_log_push_handler(handler); + } + ~scoped_log_handler() { + ucs_log_pop_handler(); + } + }; + + typedef enum { + NEW, RUNNING, SKIPPED, ABORTED, FINISHED + } state_t; + + typedef std::vector config_stack_t; + + void SetUpProxy(); + void TearDownProxy(); + void TestBodyProxy(); + static std::string format_message(const char *message, va_list ap); + + virtual void cleanup(); + virtual void init(); + bool barrier(); + + virtual void check_skip_test() = 0; + + virtual void test_body() = 0; + + static ucs_log_func_rc_t + count_warns_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap); + + static ucs_log_func_rc_t + hide_errors_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap); + + static ucs_log_func_rc_t + hide_warns_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap); + + static ucs_log_func_rc_t + wrap_errors_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap); + + state_t m_state; + bool m_initialized; + unsigned m_num_threads; + config_stack_t m_config_stack; + ptr_vector m_env_stack; + int m_num_valgrind_errors_before; + unsigned m_num_errors_before; + unsigned m_num_warnings_before; + unsigned m_num_log_handlers_before; + + static pthread_mutex_t m_logger_mutex; + static unsigned m_total_errors; + static unsigned m_total_warnings; + static std::vector m_errors; + static std::vector m_warnings; + +private: + void skipped(const test_skip_exception& e); + void run(); + static void push_debug_message_with_limit(std::vector& vec, + const std::string& message, + const size_t limit); + + static void *thread_func(void *arg); + + pthread_barrier_t m_barrier; +}; + +#define UCS_TEST_BASE_IMPL \ + virtual void SetUp() { \ + test_base::SetUpProxy(); \ + } \ + \ + virtual void TearDown() { \ + test_base::TearDownProxy(); \ + } \ + virtual void TestBody() { \ + test_base::TestBodyProxy(); \ + } + +/* + * Base class from generic tests + */ +class test : public testing::Test, public test_base { +public: + UCS_TEST_BASE_IMPL; +}; + +/* + * Base class from generic tests with user-defined parameter + */ +template +class test_with_param : public testing::TestWithParam, public test_base { +public: + UCS_TEST_BASE_IMPL; +}; + +/** + * UCT/UCP tests common storage for tests entities + */ +template +class entities_storage { +public: + const ucs::ptr_vector& entities() const { + return m_entities; + } + + T& sender() { + return *m_entities.front(); + } + + T& receiver() { + return *m_entities.back(); + } + + T& e(size_t idx) { + return m_entities.at(idx); + } + + bool is_loopback() { + return &sender() == &receiver(); + } + + void skip_loopback() { + if (is_loopback()) { + UCS_TEST_SKIP_R("loopback"); + } + } + + ucs::ptr_vector m_entities; +}; + +} + +#define UCS_TEST_SET_CONFIG(_dummy, _config) \ + set_config(_config); + +/* + * Helper macro + */ +#define UCS_TEST_(test_case_name, test_name, parent_class, parent_id, \ + num_threads, skip_cond, skip_reason, ...) \ +class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class { \ + public: \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() { \ + set_num_threads(num_threads); \ + UCS_PP_FOREACH(UCS_TEST_SET_CONFIG, _, __VA_ARGS__) \ + } \ + private: \ + virtual void check_skip_test() { \ + if (skip_cond) { \ + UCS_TEST_SKIP_R(skip_reason); \ + } \ + } \ + virtual void test_body(); \ + static ::testing::TestInfo* const test_info_;\ + GTEST_DISALLOW_COPY_AND_ASSIGN_(\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\ +}; \ +\ +::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\ + ::test_info_ = \ + ::testing::internal::MakeAndRegisterTestInfo( \ + #test_case_name, \ + (num_threads == 1) ? #test_name : #test_name "/mt_" #num_threads, \ + "", "", \ + (parent_id), \ + parent_class::SetUpTestCase, \ + parent_class::TearDownTestCase, \ + new ::testing::internal::TestFactoryImpl< \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>); \ +void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::test_body() + + +/* + * Define test fixture with modified configuration + */ +#define UCS_TEST_F(test_fixture, test_name, ...)\ + UCS_TEST_(test_fixture, test_name, test_fixture, \ + ::testing::internal::GetTypeId(), \ + 1, 0, "", __VA_ARGS__) + + +/* + * Define test fixture with modified configuration and check skip condition + */ +#define UCS_TEST_SKIP_COND_F(test_fixture, test_name, skip_cond, ...) \ + UCS_TEST_(test_fixture, test_name, test_fixture, \ + ::testing::internal::GetTypeId(), \ + 1, skip_cond, #skip_cond, __VA_ARGS__) + + +/* + * Define test fixture with multiple threads + */ +#define UCS_MT_TEST_F(test_fixture, test_name, num_threads, ...) \ + UCS_TEST_(test_fixture, test_name, test_fixture, \ + ::testing::internal::GetTypeId(), \ + num_threads, 0, "", __VA_ARGS__) + + +/* + * Helper macro + */ +#define UCS_TEST_P_(test_case_name, test_name, num_threads, \ + skip_cond, skip_reason, ...) \ + class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ + : public test_case_name { \ + public: \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() { \ + set_num_threads(num_threads); \ + UCS_PP_FOREACH(UCS_TEST_SET_CONFIG, _, __VA_ARGS__); \ + } \ + virtual void test_body(); \ + private: \ + virtual void check_skip_test() { \ + if (skip_cond) { \ + UCS_TEST_SKIP_R(skip_reason); \ + } \ + } \ + static int AddToRegistry() { \ + ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ + GetTestCasePatternHolder( \ + #test_case_name, __FILE__, __LINE__)->AddTestPattern( \ + #test_case_name, \ + (num_threads == 1) ? #test_name : #test_name "/mt_" #num_threads, \ + new ::testing::internal::TestMetaFactory< \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \ + return 0; \ + } \ + static int gtest_registering_dummy_; \ + GTEST_DISALLOW_COPY_AND_ASSIGN_(\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \ + }; \ + int GTEST_TEST_CLASS_NAME_(test_case_name, \ + test_name)::gtest_registering_dummy_ = \ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \ + void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::test_body() + + +/* + * Define parameterized test with modified configuration + */ +#define UCS_TEST_P(test_case_name, test_name, ...) \ + UCS_TEST_P_(test_case_name, test_name, 1, 0, "", __VA_ARGS__) + + +/* + * Define parameterized test with modified configuration and check skip condition + */ +#define UCS_TEST_SKIP_COND_P(test_case_name, test_name, skip_cond, ...) \ + UCS_TEST_P_(test_case_name, test_name, 1, skip_cond, #skip_cond, __VA_ARGS__) + + +/* + * Define parameterized test with multiple threads + */ +#define UCS_MT_TEST_P(test_case_name, test_name, num_threads, ...) \ + UCS_TEST_P_(test_case_name, test_name, num_threads, 0, "", __VA_ARGS__) + +#endif diff --git a/test/gtest/common/test_gtest_cmn.cc b/test/gtest/common/test_gtest_cmn.cc new file mode 100644 index 0000000..580dcca --- /dev/null +++ b/test/gtest/common/test_gtest_cmn.cc @@ -0,0 +1,18 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "test.h" +#include "test_helpers.h" + + +class gtest_common : public ucs::test { +}; + + +UCS_TEST_F(gtest_common, auto_ptr) { + ucs::auto_ptr p(new int); +} + diff --git a/test/gtest/common/test_helpers.cc b/test/gtest/common/test_helpers.cc new file mode 100644 index 0000000..973851f --- /dev/null +++ b/test/gtest/common/test_helpers.cc @@ -0,0 +1,693 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2012. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_helpers.h" + +#include +#include +#include +#include + +#include +#include + +namespace ucs { + +typedef std::pair test_result_t; + +const double test_timeout_in_sec = 60.; + +const double watchdog_timeout_default = 900.; // 15 minutes + +static test_watchdog_t watchdog; + +std::set< const ::testing::TestInfo*> skipped_tests; + +void *watchdog_func(void *arg) +{ + int ret = 0; + double now; + struct timespec timeout; + + pthread_mutex_lock(&watchdog.mutex); + + // sync with the watched thread + pthread_barrier_wait(&watchdog.barrier); + + do { + now = ucs_get_accurate_time(); + ucs_sec_to_timespec(now + watchdog.timeout, &timeout); + + ret = pthread_cond_timedwait(&watchdog.cv, &watchdog.mutex, &timeout); + if (!ret) { + pthread_barrier_wait(&watchdog.barrier); + } else { + // something wrong happened - handle it + ADD_FAILURE() << strerror(ret) << " - abort testing"; + if (ret == ETIMEDOUT) { + pthread_kill(watchdog.watched_thread, watchdog.kill_signal); + } else { + abort(); + } + } + + switch (watchdog.state) { + case WATCHDOG_TEST: + watchdog.kill_signal = SIGTERM; + // reset when the test completed + watchdog.state = WATCHDOG_DEFAULT_SET; + break; + case WATCHDOG_RUN: + // yawn - nothing to do + break; + case WATCHDOG_STOP: + // force the end of the loop + ret = 1; + break; + case WATCHDOG_TIMEOUT_SET: + // reset when the test completed + watchdog.state = WATCHDOG_DEFAULT_SET; + break; + case WATCHDOG_DEFAULT_SET: + watchdog.timeout = watchdog_timeout_default; + watchdog.state = WATCHDOG_RUN; + watchdog.kill_signal = SIGABRT; + break; + } + } while (!ret); + + pthread_mutex_unlock(&watchdog.mutex); + + return NULL; +} + +void watchdog_signal(bool barrier) +{ + pthread_mutex_lock(&watchdog.mutex); + pthread_cond_signal(&watchdog.cv); + pthread_mutex_unlock(&watchdog.mutex); + + if (barrier) { + pthread_barrier_wait(&watchdog.barrier); + } +} + +void watchdog_set(test_watchdog_state_t new_state, double new_timeout) +{ + pthread_mutex_lock(&watchdog.mutex); + // change timeout value + watchdog.timeout = new_timeout; + watchdog.state = new_state; + // apply new value for timeout + watchdog_signal(0); + pthread_mutex_unlock(&watchdog.mutex); + + pthread_barrier_wait(&watchdog.barrier); +} + +void watchdog_set(test_watchdog_state_t new_state) +{ + watchdog_set(new_state, watchdog_timeout_default); +} + +void watchdog_set(double new_timeout) +{ + watchdog_set(WATCHDOG_TIMEOUT_SET, new_timeout); +} + +#define WATCHDOG_DEFINE_GETTER(_what, _what_type) \ + _what_type UCS_PP_TOKENPASTE(watchdog_get_, _what)() \ + { \ + _what_type value; \ + \ + pthread_mutex_lock(&watchdog.mutex); \ + value = watchdog._what; \ + pthread_mutex_unlock(&watchdog.mutex); \ + \ + return value; \ + } + +WATCHDOG_DEFINE_GETTER(timeout, double) +WATCHDOG_DEFINE_GETTER(state, test_watchdog_state_t) +WATCHDOG_DEFINE_GETTER(kill_signal, int) + +int watchdog_start() +{ + pthread_mutexattr_t mutex_attr; + int ret; + + ret = pthread_mutexattr_init(&mutex_attr); + if (ret != 0) { + return -1; + } + // create reentrant mutex + ret = pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_RECURSIVE); + if (ret != 0) { + goto err_destroy_mutex_attr; + } + + ret = pthread_mutex_init(&watchdog.mutex, &mutex_attr); + if (ret != 0) { + goto err_destroy_mutex_attr; + } + + ret = pthread_cond_init(&watchdog.cv, NULL); + if (ret != 0) { + goto err_destroy_mutex; + } + + // 2 - watched thread + watchdog + ret = pthread_barrier_init(&watchdog.barrier, NULL, 2); + if (ret != 0) { + goto err_destroy_cond; + } + + pthread_mutex_lock(&watchdog.mutex); + watchdog.state = WATCHDOG_RUN; + watchdog.timeout = watchdog_timeout_default; + watchdog.kill_signal = SIGABRT; + watchdog.watched_thread = pthread_self(); + pthread_mutex_unlock(&watchdog.mutex); + + ret = pthread_create(&watchdog.thread, NULL, watchdog_func, NULL); + if (ret != 0) { + goto err_destroy_barrier; + } + + pthread_mutexattr_destroy(&mutex_attr); + + // sync with the watchdog thread + pthread_barrier_wait(&watchdog.barrier); + + // test signaling + watchdog_signal(); + + return 0; + +err_destroy_barrier: + pthread_barrier_destroy(&watchdog.barrier); +err_destroy_cond: + pthread_cond_destroy(&watchdog.cv); +err_destroy_mutex: + pthread_mutex_destroy(&watchdog.mutex); +err_destroy_mutex_attr: + pthread_mutexattr_destroy(&mutex_attr); + return -1; +} + +void watchdog_stop() +{ + void *ret_val; + + pthread_mutex_lock(&watchdog.mutex); + watchdog.state = WATCHDOG_STOP; + watchdog_signal(0); + pthread_mutex_unlock(&watchdog.mutex); + + pthread_barrier_wait(&watchdog.barrier); + pthread_join(watchdog.thread, &ret_val); + + pthread_barrier_destroy(&watchdog.barrier); + pthread_cond_destroy(&watchdog.cv); + pthread_mutex_destroy(&watchdog.mutex); +} + +static bool test_results_cmp(const test_result_t &a, const test_result_t &b) +{ + return a.second > b.second; +} + +void analyze_test_results() +{ + // GTEST_REPORT_LONGEST_TESTS=100 will report TOP-100 longest tests + /* coverity[tainted_data_return] */ + char *env_p = getenv("GTEST_REPORT_LONGEST_TESTS"); + if (env_p == NULL) { + return; + } + + size_t total_skipped_cnt = skipped_tests.size(); + ::testing::TimeInMillis total_skipped_time = 0; + size_t max_name_size = 0; + std::set< const ::testing::TestInfo*>::iterator skipped_it; + int top_n; + + if (!strcmp(env_p, "*")) { + top_n = std::numeric_limits::max(); + } else { + top_n = atoi(env_p); + if (!top_n) { + return; + } + } + + ::testing::UnitTest *unit_test = ::testing::UnitTest::GetInstance(); + std::vector test_results; + + if (unit_test == NULL) { + ADD_FAILURE() << "Unable to get the Unit Test instance"; + return; + } + + for (int i = 0; i < unit_test->total_test_case_count(); i++) { + const ::testing::TestCase *test_case = unit_test->GetTestCase(i); + if (test_case == NULL) { + ADD_FAILURE() << "Unable to get the Test Case instance with index " + << i; + return; + } + + for (int i = 0; i < test_case->total_test_count(); i++) { + const ::testing::TestInfo *test = test_case->GetTestInfo(i); + if (test == NULL) { + ADD_FAILURE() << "Unable to get the Test Info instance with index " + << i; + return; + } + + if (test->should_run()) { + const ::testing::TestResult *result = test->result(); + std::string test_name = test->test_case_name(); + + test_name += "."; + test_name += test->name(); + + test_results.push_back(std::make_pair(test_name, + result->elapsed_time())); + + max_name_size = std::max(test_name.size(), max_name_size); + + skipped_it = skipped_tests.find(test); + if (skipped_it != skipped_tests.end()) { + total_skipped_time += result->elapsed_time(); + skipped_tests.erase(skipped_it); + } + } + } + } + + std::sort(test_results.begin(), test_results.end(), test_results_cmp); + + top_n = std::min((int)test_results.size(), top_n); + if (!top_n) { + return; + } + + // Print TOP- slowest tests + int max_index_size = ucs::to_string(top_n).size(); + std::cout << std::endl << "TOP-" << top_n << " longest tests:" << std::endl; + + for (int i = 0; i < top_n; i++) { + std::cout << std::setw(max_index_size - ucs::to_string(i + 1).size() + 1) + << (i + 1) << ". " << test_results[i].first + << std::setw(max_name_size - test_results[i].first.size() + 3) + << " - " << test_results[i].second << " ms" << std::endl; + } + + // Print skipped tests statistics + std::cout << std::endl << "Skipped tests: count - " + << total_skipped_cnt << ", time - " + << total_skipped_time << " ms" << std::endl; +} + +int test_time_multiplier() +{ + int factor = 1; +#if _BullseyeCoverage + factor *= 10; +#endif + if (RUNNING_ON_VALGRIND) { + factor *= 20; + } + return factor; +} + +ucs_time_t get_deadline(double timeout_in_sec) +{ + return ucs_get_time() + ucs_time_from_sec(timeout_in_sec * + test_time_multiplier()); +} + +int max_tcp_connections() +{ + static int max_conn = 0; + + if (!max_conn) { + max_conn = 65535 - 1024; /* limit on number of ports */ + + /* Limit numer of endpoints to number of open files, for TCP */ + struct rlimit rlim; + int ret = getrlimit(RLIMIT_NOFILE, &rlim); + if (ret == 0) { + /* assume no more than 100 fd-s are already used */ + max_conn = ucs_min((static_cast(rlim.rlim_cur) - 100) / 2, max_conn); + } + } + + return max_conn; +} + +void fill_random(void *data, size_t size) +{ + if (ucs::test_time_multiplier() > 1) { + memset(data, 0, size); + return; + } + + uint64_t seed = rand(); + for (size_t i = 0; i < size / sizeof(uint64_t); ++i) { + ((uint64_t*)data)[i] = seed; + seed = seed * 10 + 17; + } + size_t remainder = size % sizeof(uint64_t); + memset((char*)data + size - remainder, 0xab, remainder); +} + +scoped_setenv::scoped_setenv(const char *name, const char *value) : m_name(name) { + if (getenv(name)) { + m_old_value = getenv(name); + } + setenv(m_name.c_str(), value, 1); +} + +scoped_setenv::~scoped_setenv() { + if (!m_old_value.empty()) { + setenv(m_name.c_str(), m_old_value.c_str(), 1); + } else { + unsetenv(m_name.c_str()); + } +} + +ucx_env_cleanup::ucx_env_cleanup() { + const size_t prefix_len = strlen(UCS_CONFIG_PREFIX); + char **envp; + + for (envp = environ; *envp != NULL; ++envp) { + std::string env_var = *envp; + + if ((env_var.find("=") != std::string::npos) && + (env_var.find(UCS_CONFIG_PREFIX, 0, prefix_len) != std::string::npos)) { + ucx_env_storage.push_back(env_var); + } + } + + for (size_t i = 0; i < ucx_env_storage.size(); i++) { + std::string var_name = + ucx_env_storage[i].substr(0, ucx_env_storage[i].find("=")); + + unsetenv(var_name.c_str()); + } +} + +ucx_env_cleanup::~ucx_env_cleanup() { + while (!ucx_env_storage.empty()) { + std::string var_name = + ucx_env_storage.back().substr(0, ucx_env_storage.back().find("=")); + std::string var_value = + ucx_env_storage.back().substr(ucx_env_storage.back().find("=") + 1); + + setenv(var_name.c_str(), var_value.c_str(), 1); + ucx_env_storage.pop_back(); + } +} + +void safe_sleep(double sec) { + ucs_time_t current_time = ucs_get_time(); + ucs_time_t end_time = current_time + ucs_time_from_sec(sec); + + while (current_time < end_time) { + usleep((long)ucs_time_to_usec(end_time - current_time)); + current_time = ucs_get_time(); + } +} + +void safe_usleep(double usec) { + safe_sleep(usec * 1e-6); +} + +bool is_inet_addr(const struct sockaddr* ifa_addr) { + return (ifa_addr->sa_family == AF_INET) || + (ifa_addr->sa_family == AF_INET6); +} + +bool is_rdmacm_netdev(const char *ifa_name) { + struct dirent *entry; + char path[PATH_MAX]; + char dev_name[16]; + char guid_buf[32]; + DIR *dir; + + snprintf(path, PATH_MAX, "/sys/class/net/%s/device/infiniband", ifa_name); + dir = opendir(path); + if (dir == NULL) { + return false; + } + + /* read IB device name */ + for (;;) { + entry = readdir(dir); + if (entry == NULL) { + closedir(dir); + return false; + } else if (entry->d_name[0] != '.') { + ucs_strncpy_zero(dev_name, entry->d_name, sizeof(dev_name)); + break; + } + } + closedir(dir); + + /* read node guid */ + memset(guid_buf, 0, sizeof(guid_buf)); + ssize_t nread = ucs_read_file(guid_buf, sizeof(guid_buf), 1, + "/sys/class/infiniband/%s/node_guid", dev_name); + if (nread < 0) { + return false; + } + + /* use the device if node_guid != 0 */ + return strstr(guid_buf, "0000:0000:0000:0000") == NULL; +} + +uint16_t get_port() { + int sock_fd, ret; + ucs_status_t status; + struct sockaddr_in addr_in, ret_addr; + socklen_t len = sizeof(ret_addr); + uint16_t port; + + status = ucs_socket_create(AF_INET, SOCK_STREAM, &sock_fd); + EXPECT_EQ(status, UCS_OK); + + memset(&addr_in, 0, sizeof(struct sockaddr_in)); + addr_in.sin_family = AF_INET; + addr_in.sin_addr.s_addr = INADDR_ANY; + + do { + addr_in.sin_port = htons(0); + /* Ports below 1024 are considered "privileged" (can be used only by + * user root). Ports above and including 1024 can be used by anyone */ + ret = bind(sock_fd, (struct sockaddr*)&addr_in, + sizeof(struct sockaddr_in)); + } while (ret); + + ret = getsockname(sock_fd, (struct sockaddr*)&ret_addr, &len); + EXPECT_EQ(ret, 0); + EXPECT_LT(1023, ntohs(ret_addr.sin_port)) ; + + port = ntohs(ret_addr.sin_port); + close(sock_fd); + return port; +} + +void *mmap_fixed_address() { + return (void*)0xff0000000; +} + +sock_addr_storage::sock_addr_storage() : m_size(0), m_is_valid(false) { + memset(&m_storage, 0, sizeof(m_storage)); +} + +sock_addr_storage::sock_addr_storage(const ucs_sock_addr_t &ucs_sock_addr) { + if (sizeof(m_storage) < ucs_sock_addr.addrlen) { + memset(&m_storage, 0, sizeof(m_storage)); + m_size = 0; + m_is_valid = false; + } else { + set_sock_addr(*ucs_sock_addr.addr, ucs_sock_addr.addrlen); + } +} + +void sock_addr_storage::set_sock_addr(const struct sockaddr &addr, + const size_t size) { + ASSERT_GE(sizeof(m_storage), size); + ASSERT_TRUE(ucs::is_inet_addr(&addr)); + memcpy(&m_storage, &addr, size); + m_size = size; + m_is_valid = true; +} + +void sock_addr_storage::reset_to_any() { + ASSERT_TRUE(m_is_valid); + + if (get_sock_addr_ptr()->sa_family == AF_INET) { + struct sockaddr_in sin = {0}; + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = INADDR_ANY; + sin.sin_port = get_port(); + + set_sock_addr(*(struct sockaddr*)&sin, sizeof(sin)); + } else { + ASSERT_EQ(get_sock_addr_ptr()->sa_family, AF_INET6); + struct sockaddr_in6 sin = {0}; + + sin.sin6_family = AF_INET6; + sin.sin6_addr = in6addr_any; + sin.sin6_port = get_port(); + + set_sock_addr(*(struct sockaddr*)&sin, sizeof(sin)); + } +} + +bool +sock_addr_storage::operator==(const struct sockaddr_storage &sockaddr) const { + ucs_status_t status; + int result = ucs_sockaddr_cmp(get_sock_addr_ptr(), + (const struct sockaddr*)&sockaddr, &status); + ASSERT_UCS_OK(status); + return result == 0; +} + +void sock_addr_storage::set_port(uint16_t port) { + if (get_sock_addr_ptr()->sa_family == AF_INET) { + struct sockaddr_in *addr_in = (struct sockaddr_in *)&m_storage; + addr_in->sin_port = htons(port); + } else { + ASSERT_TRUE(get_sock_addr_ptr()->sa_family == AF_INET6); + struct sockaddr_in6 *addr_in = (struct sockaddr_in6 *)&m_storage; + addr_in->sin6_port = htons(port); + } +} + +uint16_t sock_addr_storage::get_port() const { + if (get_sock_addr_ptr()->sa_family == AF_INET) { + struct sockaddr_in *addr_in = (struct sockaddr_in *)&m_storage; + return ntohs(addr_in->sin_port); + } else { + EXPECT_TRUE(get_sock_addr_ptr()->sa_family == AF_INET6); + + struct sockaddr_in6 *addr_in = (struct sockaddr_in6 *)&m_storage; + return ntohs(addr_in->sin6_port); + } +} + +size_t sock_addr_storage::get_addr_size() const { + return m_size; +} + +ucs_sock_addr_t sock_addr_storage::to_ucs_sock_addr() const { + ucs_sock_addr_t addr; + + addr.addr = get_sock_addr_ptr(); + addr.addrlen = m_size; + return addr; +} + +std::string sock_addr_storage::to_str() const { + char str[UCS_SOCKADDR_STRING_LEN]; + return ucs_sockaddr_str(get_sock_addr_ptr(), str, sizeof(str)); +} + +const struct sockaddr* sock_addr_storage::get_sock_addr_ptr() const { + return m_is_valid ? (struct sockaddr *)(&m_storage) : NULL; +} + +std::ostream& operator<<(std::ostream& os, const sock_addr_storage& sa_storage) +{ + return os << ucs::sockaddr_to_str(sa_storage.get_sock_addr_ptr()); +} + +auto_buffer::auto_buffer(size_t size) : m_ptr(malloc(size)) { + if (!m_ptr) { + UCS_TEST_ABORT("Failed to allocate memory"); + } +} + +auto_buffer::~auto_buffer() +{ + free(m_ptr); +} + +void* auto_buffer::operator*() const { + return m_ptr; +}; + +namespace detail { + +message_stream::message_stream(const std::string& title) { + static const char PADDING[] = " "; + static const size_t WIDTH = strlen(PADDING); + + msg << "["; + msg.write(PADDING, ucs_max(WIDTH - 1, title.length()) - title.length()); + msg << title << " ] "; +} + +message_stream::~message_stream() { + msg << std::endl; + std::cout << msg.str() << std::flush; +} + +} // detail + +template +void cartesian_product(std::vector > &final_output, + std::vector &cur_output, + typename std::vector > + ::const_iterator cur_input, + typename std::vector > + ::const_iterator end_input) { + if (cur_input == end_input) { + final_output.push_back(cur_output); + return; + } + + const std::vector &cur_vector = *cur_input; + + cur_input++; + + for (typename std::vector::const_iterator iter = + cur_vector.begin(); iter != cur_vector.end(); ++iter) { + cur_output.push_back(*iter); + ucs::cartesian_product(final_output, cur_output, + cur_input, end_input); + cur_output.pop_back(); + } +} + +template +void cartesian_product(std::vector > &output, + const std::vector > &input) { + std::vector cur_output; + cartesian_product(output, cur_output, input.begin(), input.end()); +} + +std::vector > supported_mem_type_pairs() { + static std::vector > result; + + if (result.empty()) { + std::vector > input; + + input.push_back(mem_buffer::supported_mem_types()); + input.push_back(mem_buffer::supported_mem_types()); + + ucs::cartesian_product(result, input); + } + + return result; +} + +} // ucs diff --git a/test/gtest/common/test_helpers.h b/test/gtest/common/test_helpers.h new file mode 100644 index 0000000..a272352 --- /dev/null +++ b/test/gtest/common/test_helpers.h @@ -0,0 +1,842 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCS_TEST_HELPERS_H +#define UCS_TEST_HELPERS_H + +#include "gtest.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifndef UINT16_MAX +#define UINT16_MAX (65535) +#endif /* UINT16_MAX */ + + +/* Test output */ +#define UCS_TEST_MESSAGE \ + ucs::detail::message_stream("INFO") + + +/* Skip test */ +#define UCS_TEST_SKIP \ + do { \ + throw ucs::test_skip_exception(); \ + } while(0) + + +#define UCS_TEST_SKIP_R(_reason) \ + do { \ + throw ucs::test_skip_exception(_reason); \ + } while(0) + + +/* Abort test */ +#define UCS_TEST_ABORT(_message) \ + do { \ + std::stringstream ss; \ + ss << _message; \ + GTEST_MESSAGE_(ss.str().c_str(), ::testing::TestPartResult::kFatalFailure); \ + throw ucs::test_abort_exception(); \ + } while(0) + + +/* UCS error check */ +#define EXPECT_UCS_OK(_expr) \ + do { \ + ucs_status_t _status = (_expr); \ + EXPECT_EQ(UCS_OK, _status) << "Error: " << ucs_status_string(_status); \ + } while (0) + + +#define ASSERT_UCS_OK(_expr, ...) \ + do { \ + ucs_status_t _status = (_expr); \ + if ((_status) != UCS_OK) { \ + UCS_TEST_ABORT("Error: " << ucs_status_string(_status) __VA_ARGS__); \ + } \ + } while (0) + + +#define ASSERT_UCS_OK_OR_INPROGRESS(_expr) \ + do { \ + ucs_status_t _status = (_expr); \ + if (((_status) != UCS_OK) && ((_status) != UCS_INPROGRESS)) { \ + UCS_TEST_ABORT("Error: " << ucs_status_string(_status)); \ + } \ + } while (0) + + +#define ASSERT_UCS_OK_OR_BUSY(_expr) \ + do { \ + ucs_status_t _status = (_expr); \ + if (((_status) != UCS_OK) && ((_status) != UCS_ERR_BUSY)) { \ + UCS_TEST_ABORT("Error: " << ucs_status_string(_status)); \ + } \ + } while (0) + + +#define ASSERT_UCS_PTR_OK(_expr) \ + do { \ + ucs_status_ptr_t _status = (_expr); \ + if (UCS_PTR_IS_ERR(_status)) { \ + UCS_TEST_ABORT("Error: " << ucs_status_string(UCS_PTR_STATUS(_status))); \ + } \ + } while (0) + + +#define EXPECT_UD_CHECK(_val1, _val2, _exp_ud, _exp_non_ud) \ + do { \ + if (has_ud()) { \ + EXPECT_##_exp_ud(_val1, _val2); \ + } else { \ + EXPECT_##_exp_non_ud(_val1, _val2); \ + } \ + } while (0) + + +/* Run code block with given time limit */ +#define UCS_TEST_TIME_LIMIT(_seconds) \ + for (ucs_time_t _start_time = ucs_get_time(), _elapsed = 0; \ + _start_time != 0; \ + ((ucs_time_to_sec(_elapsed = ucs_get_time() - _start_time) >= \ + (_seconds) * ucs::test_time_multiplier()) && \ + (ucs::perf_retry_count > 0)) \ + ? (GTEST_NONFATAL_FAILURE_("Time limit exceeded:") << \ + "Expected time: " << ((_seconds) * ucs::test_time_multiplier()) << " seconds\n" << \ + "Actual time: " << ucs_time_to_sec(_elapsed) << " seconds", 0) \ + : 0, \ + _start_time = 0) + + +/** + * Scoped exit for C++. Usage: + * + * UCS_TEST_SCOPE_EXIT() { } UCS_TEST_SCOPE_EXIT_END + */ +#define _UCS_TEST_SCOPE_EXIT(_classname, ...) \ + class _classname { \ + public: \ + _classname() {} \ + ~_classname() +#define UCS_TEST_SCOPE_EXIT(...) \ + _UCS_TEST_SCOPE_EXIT(UCS_PP_APPEND_UNIQUE_ID(onexit), ## __VA_ARGS__) + + +#define UCS_TEST_SCOPE_EXIT_END \ + } UCS_PP_APPEND_UNIQUE_ID(onexit_var); + + +/** + * Make uct_iov_t iov[iovcnt] array with pointer elements to original buffer + */ +#define UCS_TEST_GET_BUFFER_IOV(_name_iov, _name_iovcnt, _buffer_ptr, _buffer_length, _memh, _iovcnt) \ + uct_iov_t _name_iov[_iovcnt]; \ + const size_t _name_iovcnt = _iovcnt; \ + const size_t _buffer_iov_length = _buffer_length / _name_iovcnt; \ + size_t _buffer_iov_length_it = 0; \ + for (size_t iov_it = 0; iov_it < _name_iovcnt; ++iov_it) { \ + _name_iov[iov_it].buffer = (char *)(_buffer_ptr) + _buffer_iov_length_it; \ + _name_iov[iov_it].count = 1; \ + _name_iov[iov_it].stride = 0; \ + _name_iov[iov_it].memh = _memh; \ + if (iov_it == (_name_iovcnt - 1)) { /* Last iteration */ \ + _name_iov[iov_it].length = _buffer_length - _buffer_iov_length_it; \ + } else { \ + _name_iov[iov_it].length = _buffer_iov_length; \ + _buffer_iov_length_it += _buffer_iov_length; \ + } \ + } + + +namespace ucs { + +extern const double test_timeout_in_sec; +extern const double watchdog_timeout_default; + +extern std::set< const ::testing::TestInfo*> skipped_tests; + +typedef enum { + WATCHDOG_STOP, + WATCHDOG_RUN, + WATCHDOG_TIMEOUT_SET, + WATCHDOG_DEFAULT_SET, + WATCHDOG_TEST +} test_watchdog_state_t; + +typedef struct { + pthread_t thread; + pthread_mutex_t mutex; + pthread_cond_t cv; + double timeout; + pthread_t watched_thread; + pthread_barrier_t barrier; + test_watchdog_state_t state; + int kill_signal; +} test_watchdog_t; + +void *watchdog_func(void *arg); +void watchdog_signal(bool barrier = 1); +void watchdog_set(test_watchdog_state_t new_state, double new_timeout); +void watchdog_set(test_watchdog_state_t new_state); +void watchdog_set(double new_timeout); +test_watchdog_state_t watchdog_get_state(); +double watchdog_get_timeout(); +int watchdog_get_kill_signal(); +int watchdog_start(); +void watchdog_stop(); + +void analyze_test_results(); + +class test_abort_exception : public std::exception { +}; + + +class exit_exception : public std::exception { +public: + exit_exception(bool failed) : m_failed(failed) { + } + + virtual ~exit_exception() throw() { + } + + bool failed() const { + return m_failed; + } + +private: + const bool m_failed; +}; + + +class test_skip_exception : public std::exception { +public: + test_skip_exception(const std::string& reason = "") : m_reason(reason) { + } + virtual ~test_skip_exception() throw() { + } + + virtual const char* what() const throw() { + return m_reason.c_str(); + } + +private: + const std::string m_reason; +}; + + +/** + * @return Time multiplier for performance tests. + */ +int test_time_multiplier(); + + +/** + * Get current time + @a timeout_in_sec. + */ +ucs_time_t get_deadline(double timeout_in_sec = test_timeout_in_sec); + + +/** + * @return System limit on number of TCP connections. + */ +int max_tcp_connections(); + + +/** + * Signal-safe sleep. + */ +void safe_sleep(double sec); +void safe_usleep(double usec); + + +/** + * Check if the given interface has an IPv4 or an IPv6 address. + */ +bool is_inet_addr(const struct sockaddr* ifa_addr); + + +/** + * Check if the given network device is supported by rdmacm. + */ +bool is_rdmacm_netdev(const char *ifa_name); + + +/** + * Get an available port on the host. + */ +uint16_t get_port(); + + +/** + * Address to use for mmap(FIXED) + */ +void *mmap_fixed_address(); + + +/** + * Return the IP address of the given interface address. + */ +template +std::string sockaddr_to_str(const S *saddr) { + static char buffer[UCS_SOCKADDR_STRING_LEN]; + return ::ucs_sockaddr_str(reinterpret_cast(saddr), + buffer, UCS_SOCKADDR_STRING_LEN); +} + +/** + * Wrapper for struct sockaddr_storage to unify work flow for IPv4 and IPv6 + */ +class sock_addr_storage { +public: + sock_addr_storage(); + + sock_addr_storage(const ucs_sock_addr_t &ucs_sock_addr); + + void set_sock_addr(const struct sockaddr &addr, const size_t size); + + void reset_to_any(); + + bool operator==(const struct sockaddr_storage &sockaddr) const; + + void set_port(uint16_t port); + + uint16_t get_port() const; + + size_t get_addr_size() const; + + ucs_sock_addr_t to_ucs_sock_addr() const; + + std::string to_str() const; + + const struct sockaddr* get_sock_addr_ptr() const; + +private: + struct sockaddr_storage m_storage; + size_t m_size; + bool m_is_valid; +}; + + +std::ostream& operator<<(std::ostream& os, const sock_addr_storage& sa_storage); + + +/* + * For gtest's EXPECT_EQ + */ +template +static std::ostream& operator<<(std::ostream& os, const std::vector& vec) { + static const size_t LIMIT = 2000; + size_t i = 0; + for (std::vector::const_iterator iter = vec.begin(); + iter != vec.end(); ++iter) { + if (i >= LIMIT) { + os << "..."; + break; + } + os << "[" << i << "]=" << *iter << " "; + ++i; + } + return os << std::endl; +} + +std::ostream& operator<<(std::ostream& os, const std::vector& vec); + +static inline int rand() { + /* coverity[dont_call] */ + return ::rand(); +} + +void fill_random(void *data, size_t size); + +/* C can be vector or string */ +template +static void fill_random(C& c) { + fill_random(&c[0], sizeof(c[0]) * c.size()); +} + +/* C can be vector or string */ +template +static void fill_random(C& c, size_t size) { + fill_random(&c[0], sizeof(c[0]) * size); +} + +template +static inline T random_upper() { + return static_cast((rand() / static_cast(RAND_MAX)) * + std::numeric_limits::max()); +} + +template +class hex_num { +public: + hex_num(const T num) : m_num(num) { + } + + operator T() const { + return m_num; + } + + template + friend std::ostream& operator<<(std::ostream& os, const hex_num& h); +private: + const T m_num; +}; + +template +hex_num make_hex(const T num) { + return hex_num(num); +} + +template +std::ostream& operator<<(std::ostream& os, const hex_num& h) { + return os << std::hex << h.m_num << std::dec; +} +class scoped_setenv { +public: + scoped_setenv(const char *name, const char *value); + ~scoped_setenv(); +private: + scoped_setenv(const scoped_setenv&); + const std::string m_name; + std::string m_old_value; +}; + +class ucx_env_cleanup { +public: + ucx_env_cleanup(); + ~ucx_env_cleanup(); +private: + std::vector ucx_env_storage; +}; + +template +std::string to_string(const T& value) { + std::stringstream ss; + ss << value; + return ss.str(); +} + +template +std::string to_hex_string(const T& value) { + std::stringstream ss; + ss << std::hex << value; + return ss.str(); +} + +template +T from_string(const std::string& str) { + T value; + return (std::stringstream(str) >> value).fail() ? 0 : value; +} + +template +class ptr_vector_base { +public: + typedef std::vector vec_type; + typedef typename vec_type::const_iterator const_iterator; + + ptr_vector_base() { + } + + virtual ~ptr_vector_base() { + clear(); + } + + /** Add and take ownership */ + void push_back(T* ptr) { + m_vec.push_back(ptr); + } + + void push_front(T* ptr) { + m_vec.insert(m_vec.begin(), ptr); + } + + virtual void clear() { + while (!m_vec.empty()) { + T* ptr = m_vec.back(); + m_vec.pop_back(); + release(ptr); + } + } + + const_iterator begin() const { + return m_vec.begin(); + } + + const_iterator end() const { + return m_vec.end(); + } + + T* front() { + return m_vec.front(); + } + + T* back() { + return m_vec.back(); + } + + size_t size() const { + return m_vec.size(); + } + +protected: + ptr_vector_base(const ptr_vector_base&); + vec_type m_vec; + + void release(T *ptr) { + delete ptr; + } +}; + +template<> inline void ptr_vector_base::release(void *ptr) { + free(ptr); +} + + +template +class ptr_vector : public ptr_vector_base { +public: + T& at(size_t index) const { + return *ptr_vector_base::m_vec.at(index); + } + + size_t remove(T *value) { + const size_t removed = std::distance(std::remove(this->m_vec.begin(), + this->m_vec.end(), + value), + this->m_vec.end()); + if (removed) { + this->m_vec.resize(this->m_vec.size() - removed); + this->release(value); + } + return removed; + } +}; + +template <> +class ptr_vector : public ptr_vector_base { +}; + + +/** + * Safely wraps C handles + */ +template +class handle { +public: + typedef T handle_type; + typedef void (*dtor_t)(T handle); + typedef void (*dtor2_t)(T handle, ArgT arg); + + handle() : m_initialized(false), m_value(NULL), m_dtor(NULL), + m_dtor_with_arg(NULL), m_dtor_arg(NULL) { + } + + handle(const T& value, dtor_t dtor) : m_initialized(true), m_value(value), + m_dtor(dtor), m_dtor_with_arg(NULL), + m_dtor_arg(NULL) { + EXPECT_TRUE(value != NULL); + } + + handle(const T& value, dtor2_t dtor, ArgT arg) : + m_initialized(true), m_value(value), m_dtor(NULL), + m_dtor_with_arg(dtor), m_dtor_arg(arg) + { + EXPECT_TRUE(value != NULL); + } + + handle(const handle& other) : m_initialized(false), m_value(NULL), + m_dtor(NULL), m_dtor_with_arg(NULL), + m_dtor_arg(NULL) { + *this = other; + } + + ~handle() { + reset(); + } + + void reset() { + if (m_initialized) { + release(); + } + } + + void revoke() const { + m_initialized = false; + } + + void reset(const T& value, dtor_t dtor) { + reset(); + if (value == NULL) { + throw std::invalid_argument("value cannot be NULL"); + } + m_value = value; + m_dtor = dtor; + m_dtor_with_arg = NULL; + m_dtor_arg = NULL; + m_initialized = true; + } + + void reset(const T& value, dtor2_t dtor, ArgT arg) { + reset(); + if (value == NULL) { + throw std::invalid_argument("value cannot be NULL"); + } + m_value = value; + m_dtor = NULL; + m_dtor_with_arg = dtor; + m_dtor_arg = arg; + m_initialized = true; + } + + const handle& operator=(const handle& other) { + reset(); + if (other.m_initialized) { + if (other.m_dtor) { + reset(other.m_value, other.m_dtor); + } else { + reset(other.m_value, other.m_dtor_with_arg, other.m_dtor_arg); + } + other.revoke(); + } + return *this; + } + + operator T() const { + return get(); + } + + operator bool() const { + return m_initialized; + } + + T get() const { + return m_initialized ? m_value : NULL; + } + +private: + + void release() { + if (m_dtor) { + m_dtor(m_value); + } else { + m_dtor_with_arg(m_value, m_dtor_arg); + } + + m_initialized = false; + } + + mutable bool m_initialized; + T m_value; + dtor_t m_dtor; + dtor2_t m_dtor_with_arg; + ArgT m_dtor_arg; +}; + +/* simplified version of std::auto_ptr which was deprecated in newer stdc++ + * versions in favor of unique_ptr */ +template +class auto_ptr { +public: + auto_ptr() : m_ptr(NULL) { + } + + auto_ptr(T* ptr) : m_ptr(NULL) { + reset(ptr); + } + + ~auto_ptr() { + reset(); + } + + void reset(T* ptr = NULL) { + if (m_ptr) { + delete m_ptr; + } + m_ptr = ptr; + } + + operator T*() const { + return m_ptr; + } + + T* operator->() const { + return m_ptr; + } + +private: + auto_ptr(const auto_ptr&); /* disable copy */ + auto_ptr operator=(const auto_ptr&); /* disable assign */ + + T* m_ptr; +}; + +#define UCS_TEST_TRY_CREATE_HANDLE(_t, _handle, _dtor, _ctor, ...) \ + ({ \ + _t h; \ + ucs_status_t status = _ctor(__VA_ARGS__, &h); \ + ASSERT_UCS_OK_OR_BUSY(status); \ + if (status == UCS_OK) { \ + _handle.reset(h, _dtor); \ + } \ + status; \ + }) + +#define UCS_TEST_CREATE_HANDLE(_t, _handle, _dtor, _ctor, ...) \ + { \ + _t h; \ + ucs_status_t status = _ctor(__VA_ARGS__, &h); \ + ASSERT_UCS_OK(status); \ + _handle.reset(h, _dtor); \ + } + +#define UCS_TEST_CREATE_HANDLE_IF_SUPPORTED(_t, _handle, _dtor, _ctor, ...) \ + { \ + _t h; \ + ucs_status_t status = _ctor(__VA_ARGS__, &h); \ + if (status == UCS_ERR_UNSUPPORTED) { \ + UCS_TEST_SKIP_R(std::string("Unsupported operation: ") + \ + UCS_PP_MAKE_STRING(_ctor)); \ + } \ + ASSERT_UCS_OK(status); \ + _handle.reset(h, _dtor); \ + } + +class size_value { +public: + explicit size_value(size_t value) : m_value(value) {} + + size_t value() const { + return m_value; + } +private: + size_t m_value; +}; + + +template +static inline O& operator<<(O& os, const size_value& sz) +{ + size_t v = sz.value(); + + std::iostream::fmtflags f(os.flags()); + + /* coverity[format_changed] */ + os << std::fixed << std::setprecision(1); + if (v < 1024) { + os << v; + } else if (v < 1024 * 1024) { + os << (v / 1024.0) << "k"; + } else if (v < 1024 * 1024 * 1024) { + os << (v / 1024.0 / 1024.0) << "m"; + } else { + os << (v / 1024.0 / 1024.0 / 1024.0) << "g"; + } + + os.flags(f); + return os; +} + + +class auto_buffer { +public: + auto_buffer(size_t size); + ~auto_buffer(); + void* operator*() const; +private: + void *m_ptr; +}; + + +template +static void deleter(T *ptr) { + delete ptr; +} + + +extern int perf_retry_count; +extern double perf_retry_interval; + +namespace detail { + +class message_stream { +public: + message_stream(const std::string& title); + ~message_stream(); + + template + message_stream& operator<<(const T& value) { + msg << value; + return *this; + } + + message_stream& operator<< (std::ostream&(*f)(std::ostream&)) { + if (f == (std::basic_ostream& (*)(std::basic_ostream&)) &std::flush) { + std::string s = msg.str(); + if (!s.empty()) { + std::cout << s << std::flush; + msg.str(""); + } + msg.clear(); + } else { + msg << f; + } + return *this; + } + + message_stream& operator<< (const size_value& value) { + msg << value.value(); + return *this; + } + + std::iostream::fmtflags flags() { + return msg.flags(); + } + + void flags(std::iostream::fmtflags f) { + msg.flags(f); + } +private: + std::ostringstream msg; +}; + +} // detail + +/** + * N-ary Cartesian product over the N vectors provided in the input vector + * The cardinality of the result vector: + * output.size = input[0].size * input[1].size * ... * input[input.size].size + */ +template +void cartesian_product(std::vector > &output, + const std::vector > &input); + +std::vector > supported_mem_type_pairs(); + +} // ucs + +#endif /* UCS_TEST_HELPERS_H */ diff --git a/test/gtest/common/test_obj_size.cc b/test/gtest/common/test_obj_size.cc new file mode 100644 index 0000000..1c9b11d --- /dev/null +++ b/test/gtest/common/test_obj_size.cc @@ -0,0 +1,70 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +extern "C" { +#include +#include +#include +#include +#include +#include +#include +#if HAVE_TL_RC +# include +#endif +#if HAVE_TL_DC +# include +# include +#endif +#if HAVE_TL_UD +# include +# include +#endif +} + +class test_obj_size : public ucs::test { +}; + +#define EXPECTED_SIZE(_obj, _size) EXPECT_EQ((size_t)_size, sizeof(_obj)) + +UCS_TEST_F(test_obj_size, size) { + +#if ENABLE_DEBUG_DATA + UCS_TEST_SKIP_R("Debug data"); +#elif ENABLE_STATS + UCS_TEST_SKIP_R("Statistic enabled"); +#elif UCS_ENABLE_ASSERT + UCS_TEST_SKIP_R("Assert enabled"); +#else + EXPECTED_SIZE(ucp_ep_t, 64); + EXPECTED_SIZE(ucp_request_t, 232); + EXPECTED_SIZE(ucp_recv_desc_t, 48); + EXPECTED_SIZE(uct_ep_t, 8); + EXPECTED_SIZE(uct_base_ep_t, 8); + EXPECTED_SIZE(uct_rkey_bundle_t, 24); + EXPECTED_SIZE(uct_self_ep_t, 8); + EXPECTED_SIZE(uct_tcp_ep_t, 160); +# if HAVE_TL_RC + EXPECTED_SIZE(uct_rc_ep_t, 64); + EXPECTED_SIZE(uct_rc_verbs_ep_t, 80); +# endif +# if HAVE_TL_DC + EXPECTED_SIZE(uct_dc_mlx5_ep_t, 32); +# endif +# if HAVE_TL_UD + EXPECTED_SIZE(uct_ud_ep_t, 240); + EXPECTED_SIZE(uct_ud_verbs_ep_t, 256); +# endif +#endif +} + diff --git a/test/gtest/common/test_perf.cc b/test/gtest/common/test_perf.cc new file mode 100644 index 0000000..5b35217 --- /dev/null +++ b/test/gtest/common/test_perf.cc @@ -0,0 +1,306 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) The University of Tennessee and The University +* of Tennessee Research Foundation. 2015. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "test_perf.h" + +extern "C" { +#include +#include +} +#include +#include +#include + + +test_perf::rte_comm::rte_comm() { + pthread_mutex_init(&m_mutex, NULL); +} + +void test_perf::rte_comm::push(const void *data, size_t size) { + pthread_mutex_lock(&m_mutex); + m_queue.append((const char *)data, size); + pthread_mutex_unlock(&m_mutex); +} + +void test_perf::rte_comm::pop(void *data, size_t size, + void (*progress)(void *arg), void *arg) { + bool done = false; + do { + pthread_mutex_lock(&m_mutex); + if (m_queue.length() >= size) { + memcpy(data, &m_queue[0], size); + m_queue.erase(0, size); + done = true; + } + pthread_mutex_unlock(&m_mutex); + if (!done) { + progress(arg); + } + } while (!done); +} + + +test_perf::rte::rte(unsigned index, rte_comm& send, rte_comm& recv) : + m_index(index), m_send(send), m_recv(recv) { +} + +unsigned test_perf::rte::index() const { + return m_index; +} + +unsigned test_perf::rte::group_size(void *rte_group) { + return 2; +} + +unsigned test_perf::rte::group_index(void *rte_group) { + rte *self = reinterpret_cast(rte_group); + return self->index(); +} + +void test_perf::rte::barrier(void *rte_group, void (*progress)(void *arg), + void *arg) { + static const uint32_t magic = 0xdeadbeed; + rte *self = reinterpret_cast(rte_group); + uint32_t dummy = magic; + self->m_send.push(&dummy, sizeof(dummy)); + dummy = 0; + self->m_recv.pop(&dummy, sizeof(dummy), progress, arg); + ucs_assert_always(dummy == magic); +} + +void test_perf::rte::post_vec(void *rte_group, const struct iovec *iovec, + int iovcnt, void **req) +{ + rte *self = reinterpret_cast(rte_group); + size_t size; + int i; + + size = 0; + for (i = 0; i < iovcnt; ++i) { + size += iovec[i].iov_len; + } + + self->m_send.push(&size, sizeof(size)); + for (i = 0; i < iovcnt; ++i) { + self->m_send.push(iovec[i].iov_base, iovec[i].iov_len); + } +} + +void test_perf::rte::recv(void *rte_group, unsigned src, void *buffer, + size_t max, void *req) +{ + rte *self = reinterpret_cast(rte_group); + size_t size; + + if (src != 1 - self->m_index) { + return; + } + + self->m_recv.pop(&size, sizeof(size), (void(*)(void*))ucs_empty_function, NULL); + ucs_assert_always(size <= max); + self->m_recv.pop(buffer, size, (void(*)(void*))ucs_empty_function, NULL); +} + +void test_perf::rte::exchange_vec(void *rte_group, void * req) +{ +} + +void test_perf::rte::report(void *rte_group, const ucx_perf_result_t *result, + void *arg, int is_final) +{ +} + +ucx_perf_rte_t test_perf::rte::test_rte = { + rte::group_size, + rte::group_index, + rte::barrier, + rte::post_vec, + rte::recv, + rte::exchange_vec, + rte::report, +}; + +std::vector test_perf::get_affinity() { + std::vector cpus; + cpu_set_t affinity; + int ret, nr_cpus; + + ret = sched_getaffinity(getpid(), sizeof(affinity), &affinity); + if (ret != 0) { + ucs_error("Failed to get CPU affinity: %m"); + throw ucs::test_abort_exception(); + } + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + if (nr_cpus < 0) { + ucs_error("Failed to get CPU count: %m"); + throw ucs::test_abort_exception(); + } + + for (int cpu = 0; cpu < nr_cpus; ++cpu) { + if (CPU_ISSET(cpu, &affinity)) { + cpus.push_back(cpu); + } + } + + return cpus; +} + +void test_perf::set_affinity(int cpu) +{ + cpu_set_t affinity; + CPU_ZERO(&affinity); + CPU_SET(cpu , &affinity); + sched_setaffinity(ucs_get_tid(), sizeof(affinity), &affinity); +} + +void* test_perf::thread_func(void *arg) +{ + thread_arg *a = (thread_arg*)arg; + test_result *result; + + set_affinity(a->cpu); + result = new test_result(); + result->status = ucx_perf_run(&a->params, &result->result); + return result; +} + +test_perf::test_result test_perf::run_multi_threaded(const test_spec &test, unsigned flags, + const std::string &tl_name, + const std::string &dev_name, + const std::vector &cpus) +{ + rte_comm c0to1, c1to0; + + ucx_perf_params_t params; + memset(¶ms, 0, sizeof(params)); + params.api = test.api; + params.command = test.command; + params.test_type = test.test_type; + params.thread_mode = UCS_THREAD_MODE_SINGLE; + params.async_mode = UCS_ASYNC_THREAD_LOCK_TYPE; + params.thread_count = 1; + params.wait_mode = UCX_PERF_WAIT_MODE_LAST; + params.flags = test.test_flags | flags; + params.am_hdr_size = 8; + params.alignment = ucs_get_page_size(); + params.max_outstanding = test.max_outstanding; + if (ucs::test_time_multiplier() == 1) { + params.warmup_iter = test.iters / 10; + params.max_iter = test.iters; + } else { + params.warmup_iter = 0; + params.max_iter = ucs_min(20u, test.iters / ucs::test_time_multiplier()); + } + params.max_time = 0.0; + params.report_interval = 1.0; + params.rte_group = NULL; + params.rte = &rte::test_rte; + params.report_arg = NULL; + ucs_strncpy_zero(params.uct.dev_name, dev_name.c_str(), sizeof(params.uct.dev_name)); + ucs_strncpy_zero(params.uct.tl_name , tl_name.c_str(), sizeof(params.uct.tl_name)); + params.uct.data_layout = (uct_perf_data_layout_t)test.data_layout; + params.uct.fc_window = UCT_PERF_TEST_MAX_FC_WINDOW; + params.msg_size_cnt = test.msglencnt; + params.msg_size_list = (size_t *)test.msglen; + params.iov_stride = test.msg_stride; + params.ucp.send_datatype = (ucp_perf_datatype_t)test.data_layout; + params.ucp.recv_datatype = (ucp_perf_datatype_t)test.data_layout; + + thread_arg arg0; + arg0.params = params; + arg0.cpu = cpus[0]; + + rte rte0(0, c0to1, c1to0); + arg0.params.rte_group = &rte0; + + pthread_t thread0, thread1; + int ret = pthread_create(&thread0, NULL, thread_func, &arg0); + if (ret) { + UCS_TEST_MESSAGE << strerror(errno); + throw ucs::test_abort_exception(); + } + + thread_arg arg1; + arg1.params = params; + arg1.cpu = cpus[1]; + + rte rte1(1, c1to0, c0to1); + arg1.params.rte_group = &rte1; + + ret = pthread_create(&thread1, NULL, thread_func, &arg1); + if (ret) { + UCS_TEST_MESSAGE << strerror(errno); + throw ucs::test_abort_exception(); + } + + void *ptr0, *ptr1; + pthread_join(thread0, &ptr0); + pthread_join(thread1, &ptr1); + + test_result *result0 = reinterpret_cast(ptr0), + *result1 = reinterpret_cast(ptr1); + test_result result = *result1; + delete result0; + delete result1; + return result; +} + +void test_perf::run_test(const test_spec& test, unsigned flags, bool check_perf, + const std::string &tl_name, const std::string &dev_name) +{ + std::vector cpus = get_affinity(); + if (cpus.size() < 2) { + UCS_TEST_MESSAGE << "Need at least 2 CPUs (got: " << cpus.size() << " )"; + throw ucs::test_abort_exception(); + } + cpus.resize(2); + + check_perf = check_perf && + (ucs::test_time_multiplier() == 1) && + (ucs::perf_retry_count > 0); + for (int i = 0; i < (ucs::perf_retry_count + 1); ++i) { + test_result result = run_multi_threaded(test, flags, tl_name, dev_name, + cpus); + if ((result.status == UCS_ERR_UNSUPPORTED) || + (result.status == UCS_ERR_UNREACHABLE)) + { + return; /* Skipped */ + } + + ASSERT_UCS_OK(result.status); + + double value = *(double*)( ((char*)&result.result) + test.field_offset) * + test.norm; + char result_str[200] = {0}; + snprintf(result_str, sizeof(result_str) - 1, "%s %25s : %.3f %s", + dev_name.c_str(), test.title, value, test.units); + if (i == 0) { + if (check_perf) { + UCS_TEST_MESSAGE << result_str; + } else { + UCS_TEST_MESSAGE << result_str << " (performance not checked)"; + } + } else { + UCS_TEST_MESSAGE << result_str << " (attempt " << i << ")"; + } + + if (!check_perf) { + return; /* Skip */ + } else if ((value >= test.min) && (value <= test.max)) { + return; /* Success */ + } else { + ucs::safe_sleep(ucs::perf_retry_interval); + } + } + + ADD_FAILURE() << "Invalid " << test.title << " performance, expected: " << + std::setprecision(3) << test.min << ".." << test.max; +} + diff --git a/test/gtest/common/test_perf.h b/test/gtest/common/test_perf.h new file mode 100644 index 0000000..795000b --- /dev/null +++ b/test/gtest/common/test_perf.h @@ -0,0 +1,107 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef TEST_PERF_H_ +#define TEST_PERF_H_ + +#include +#include + + +class test_perf { +protected: + struct test_spec { + const char *title; + const char *units; + ucx_perf_api_t api; + ucx_perf_cmd_t command; + ucx_perf_test_type_t test_type; + int data_layout; + size_t msg_stride; + size_t msglencnt; + size_t msglen[3]; + unsigned max_outstanding; + size_t iters; + size_t field_offset; + double norm; + double min; /* TODO remove this field */ + double max; /* TODO remove this field */ + unsigned test_flags; + }; + + static std::vector get_affinity(); + + void run_test(const test_spec& test, unsigned flags, bool check_perf, + const std::string &tl_name, const std::string &dev_name); + +private: + class rte_comm { + public: + rte_comm(); + + void push(const void *data, size_t size); + + void pop(void *data, size_t size, void (*progress)(void *arg), void *arg); + + private: + pthread_mutex_t m_mutex; + std::string m_queue; + }; + + class rte { + public: + /* RTE functions */ + rte(unsigned index, rte_comm& send, rte_comm& recv); + + unsigned index() const; + + static unsigned group_size(void *rte_group); + + static unsigned group_index(void *rte_group); + + static void barrier(void *rte_group, void (*progress)(void *arg), + void *arg); + + static void post_vec(void *rte_group, const struct iovec *iovec, + int iovcnt, void **req); + + static void recv(void *rte_group, unsigned src, void *buffer, + size_t max, void *req); + + static void exchange_vec(void *rte_group, void * req); + + static void report(void *rte_group, const ucx_perf_result_t *result, + void *arg, int is_final); + + static ucx_perf_rte_t test_rte; + + private: + const unsigned m_index; + rte_comm &m_send; + rte_comm &m_recv; + }; + + struct thread_arg { + ucx_perf_params_t params; + int cpu; + }; + + struct test_result { + ucs_status_t status; + ucx_perf_result_t result; + }; + + static void set_affinity(int cpu); + + static void* thread_func(void *arg); + + test_result run_multi_threaded(const test_spec &test, unsigned flags, + const std::string &tl_name, + const std::string &dev_name, + const std::vector &cpus); +}; + +#endif diff --git a/test/gtest/common/test_watchdog.cc b/test/gtest/common/test_watchdog.cc new file mode 100644 index 0000000..9b7d6e2 --- /dev/null +++ b/test/gtest/common/test_watchdog.cc @@ -0,0 +1,99 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +class test_watchdog : public ucs::test { +public: + void reset_to_default() { + ucs::watchdog_signal(); + // all have to be set to their default values + EXPECT_EQ(ucs::WATCHDOG_RUN, ucs::watchdog_get_state()); + EXPECT_EQ(ucs::watchdog_timeout_default, ucs::watchdog_get_timeout()); + } +}; + +UCS_TEST_F(test_watchdog, watchdog_set) { + EXPECT_EQ(ucs::WATCHDOG_RUN, ucs::watchdog_get_state()); + EXPECT_EQ(ucs::watchdog_timeout_default, ucs::watchdog_get_timeout()); + EXPECT_EQ(SIGABRT, ucs::watchdog_get_kill_signal()); + + ucs::watchdog_set(ucs::WATCHDOG_TEST); + // when the test state is applied, the watchdog + // changes state to WATCHDOG_DEFAULT_SET + EXPECT_EQ(ucs::WATCHDOG_DEFAULT_SET, ucs::watchdog_get_state()); + EXPECT_EQ(ucs::watchdog_timeout_default, ucs::watchdog_get_timeout()); + EXPECT_EQ(SIGTERM, ucs::watchdog_get_kill_signal()); + + reset_to_default(); + + ucs::watchdog_set(500.); + EXPECT_EQ(ucs::WATCHDOG_DEFAULT_SET, ucs::watchdog_get_state()); + EXPECT_EQ(500., ucs::watchdog_get_timeout()); + EXPECT_EQ(SIGABRT, ucs::watchdog_get_kill_signal()); + + reset_to_default(); + + ucs::watchdog_set(ucs::WATCHDOG_TEST, 100.); + // when the test state and the timeout are applied, + // the watchdog changes state to WATCHDOG_DEFAULT_SET + EXPECT_EQ(ucs::WATCHDOG_DEFAULT_SET, ucs::watchdog_get_state()); + EXPECT_EQ(100., ucs::watchdog_get_timeout()); + EXPECT_EQ(SIGTERM, ucs::watchdog_get_kill_signal()); + + reset_to_default(); + + ucs::watchdog_set(ucs::WATCHDOG_DEFAULT_SET, 200.); + // when the timeout and the timeout applied, the watchdog + // changes state to WATCHDOG_DEFAULT_SET + EXPECT_EQ(ucs::WATCHDOG_RUN, ucs::watchdog_get_state()); + EXPECT_EQ(ucs::watchdog_timeout_default, ucs::watchdog_get_timeout()); + EXPECT_EQ(SIGABRT, ucs::watchdog_get_kill_signal()); + + ucs::watchdog_set(ucs::WATCHDOG_DEFAULT_SET); + EXPECT_EQ(ucs::WATCHDOG_RUN, ucs::watchdog_get_state()); + EXPECT_EQ(ucs::watchdog_timeout_default, ucs::watchdog_get_timeout()); + EXPECT_EQ(SIGABRT, ucs::watchdog_get_kill_signal()); +} + +UCS_TEST_F(test_watchdog, watchdog_signal) { + for (int i = 0; i < 10; i++) { + ucs::watchdog_signal(); + } + + EXPECT_EQ(ucs::WATCHDOG_RUN, ucs::watchdog_get_state()); +} + +UCS_TEST_F(test_watchdog, watchdog_timeout) { + double timeout, sleep_time; + char *gtest_timeout, *gtest_sleep_time; + + /* This test can not be run with the other tests + * because it terminates testing due to timeout + */ + gtest_timeout = getenv("WATCHDOG_GTEST_TIMEOUT_"); + if (gtest_timeout == NULL) { + UCS_TEST_SKIP_R("WATCHDOG_GTEST_TIMEOUT_ is not set"); + } + timeout = atof(gtest_timeout); + + gtest_sleep_time = getenv("WATCHDOG_GTEST_SLEEP_TIME_"); + if (gtest_sleep_time == NULL) { + UCS_TEST_SKIP_R("WATCHDOG_GTEST_SLEEP_TIME_ is not set"); + } + sleep_time = atof(gtest_sleep_time); + + ucs::watchdog_set(ucs::WATCHDOG_TEST, timeout); + + sleep((int)ceil(sleep_time)); + + // shouldn't reach this statement + ASSERT_NE(timeout, timeout); +} diff --git a/test/gtest/configure.m4 b/test/gtest/configure.m4 new file mode 100644 index 0000000..2c0d045 --- /dev/null +++ b/test/gtest/configure.m4 @@ -0,0 +1,29 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +AC_LANG_PUSH([C++]) + +CHECK_COMPILER_FLAG([-fno-tree-vectorize], [-fno-tree-vectorize], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [GTEST_CXXFLAGS="$GTEST_CXXFLAGS -fno-tree-vectorize"], + []) + +# error #236: controlling expression is constant +CHECK_COMPILER_FLAG([--diag_suppress 236], [--diag_suppress 236], + [AC_LANG_SOURCE([[int main(){return 0;}]])], + [GTEST_CXXFLAGS="$GTEST_CXXFLAGS --diag_suppress 236"], + []) + +AC_LANG_POP([C++]) + +AC_SUBST([GTEST_CXXFLAGS], [$GTEST_CXXFLAGS]) + +test_modules="" +m4_include([test/gtest/ucm/test_dlopen/configure.m4]) +m4_include([test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4]) +m4_include([test/gtest/ucs/test_module/configure.m4]) +AC_DEFINE_UNQUOTED([test_MODULES], ["${test_modules}"], [Test loadable modules]) +AC_CONFIG_FILES([test/gtest/Makefile]) diff --git a/test/gtest/ucm/cuda_hooks.cc b/test/gtest/ucm/cuda_hooks.cc new file mode 100644 index 0000000..bc13d43 --- /dev/null +++ b/test/gtest/ucm/cuda_hooks.cc @@ -0,0 +1,235 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ +#include +#include +#include +#include + +static ucm_event_t alloc_event, free_event; + +static void cuda_mem_alloc_callback(ucm_event_type_t event_type, + ucm_event_t *event, void *arg) +{ + alloc_event.mem_type.address = event->mem_type.address; + alloc_event.mem_type.size = event->mem_type.size; + alloc_event.mem_type.mem_type = event->mem_type.mem_type; +} + +static void cuda_mem_free_callback(ucm_event_type_t event_type, + ucm_event_t *event, void *arg) +{ + free_event.mem_type.address = event->mem_type.address; + free_event.mem_type.size = event->mem_type.size; + free_event.mem_type.mem_type = event->mem_type.mem_type; +} + + +class cuda_hooks : public ucs::test { +protected: + + virtual void init() { + ucs_status_t result; + CUresult ret; + ucs::test::init(); + + /* intialize device context */ + if (cudaSetDevice(0) != cudaSuccess) { + UCS_TEST_SKIP_R("can't set cuda device"); + } + + ret = cuInit(0); + if (ret != CUDA_SUCCESS) { + UCS_TEST_SKIP_R("can't init cuda device"); + } + + ret = cuDeviceGet(&device, 0); + if (ret != CUDA_SUCCESS) { + UCS_TEST_SKIP_R("can't get cuda device"); + } + + ret = cuCtxCreate(&context, 0, device); + if (ret != CUDA_SUCCESS) { + UCS_TEST_SKIP_R("can't create cuda context"); + } + + /* install memory hooks */ + result = ucm_set_event_handler(UCM_EVENT_MEM_TYPE_ALLOC, 0, cuda_mem_alloc_callback, + reinterpret_cast(this)); + ASSERT_UCS_OK(result); + + result = ucm_set_event_handler(UCM_EVENT_MEM_TYPE_FREE, 0, cuda_mem_free_callback, + reinterpret_cast(this)); + ASSERT_UCS_OK(result); + } + + virtual void cleanup() { + CUresult ret; + + ucm_unset_event_handler(UCM_EVENT_MEM_TYPE_ALLOC, cuda_mem_alloc_callback, + reinterpret_cast(this)); + ucm_unset_event_handler(UCM_EVENT_MEM_TYPE_FREE, cuda_mem_free_callback, + reinterpret_cast(this)); + + ret = cuCtxDestroy(context); + EXPECT_EQ(ret, CUDA_SUCCESS); + + ucs::test::cleanup(); + } + + + void check_mem_alloc_events(void *ptr, size_t size, + int expect_mem_type = UCS_MEMORY_TYPE_CUDA) { + ASSERT_EQ(ptr, alloc_event.mem_type.address); + ASSERT_EQ(size, alloc_event.mem_type.size); + ASSERT_EQ(expect_mem_type, alloc_event.mem_type.mem_type); + } + + void check_mem_free_events(void *ptr, size_t size, + int expect_mem_type = UCS_MEMORY_TYPE_CUDA) { + ASSERT_EQ(ptr, free_event.mem_type.address); + ASSERT_EQ(expect_mem_type, free_event.mem_type.mem_type); + } + + CUdevice device; + CUcontext context; +}; + +UCS_TEST_F(cuda_hooks, test_cuMem_Alloc_Free) { + CUresult ret; + CUdeviceptr dptr, dptr1; + + /* small allocation */ + ret = cuMemAlloc(&dptr, 64); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_alloc_events((void *)dptr, 64); + + ret = cuMemFree(dptr); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_free_events((void *)dptr, 64); + + /* large allocation */ + ret = cuMemAlloc(&dptr, (256 * 1024 *1024)); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_alloc_events((void *)dptr, (256 * 1024 *1024)); + + ret = cuMemFree(dptr); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_free_events((void *)dptr, (256 * 1024 *1024)); + + /* multiple allocations, cudafree in reverse order */ + ret = cuMemAlloc(&dptr, (1 * 1024 *1024)); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_alloc_events((void *)dptr, (1 * 1024 *1024)); + + ret = cuMemAlloc(&dptr1, (1 * 1024 *1024)); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_alloc_events((void *)dptr1, (1 * 1024 *1024)); + + ret = cuMemFree(dptr1); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_free_events((void *)dptr1, (1 * 1024 *1024)); + + ret = cuMemFree(dptr); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_free_events((void *)dptr, (1 * 1024 *1024)); +} + +UCS_TEST_F(cuda_hooks, test_cuMemAllocManaged) { + CUresult ret; + CUdeviceptr dptr; + + ret = cuMemAllocManaged(&dptr, 64, CU_MEM_ATTACH_GLOBAL); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_alloc_events((void *)dptr, 64, UCS_MEMORY_TYPE_CUDA_MANAGED); + + ret = cuMemFree(dptr); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_free_events((void *)dptr, 0); +} + +UCS_TEST_F(cuda_hooks, test_cuMemAllocPitch) { + CUresult ret; + CUdeviceptr dptr; + size_t pitch; + + ret = cuMemAllocPitch(&dptr, &pitch, 4, 8, 4); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_alloc_events((void *)dptr, (4 * 8)); + + ret = cuMemFree(dptr); + ASSERT_EQ(ret, CUDA_SUCCESS); + check_mem_free_events((void *)dptr, 0); +} + +UCS_TEST_F(cuda_hooks, test_cuda_Malloc_Free) { + cudaError_t ret; + void *ptr, *ptr1; + + /* small allocation */ + ret = cudaMalloc(&ptr, 64); + ASSERT_EQ(ret, cudaSuccess); + check_mem_alloc_events(ptr, 64); + + ret = cudaFree(ptr); + ASSERT_EQ(ret, cudaSuccess); + check_mem_free_events(ptr, 64); + + /* large allocation */ + ret = cudaMalloc(&ptr, (256 * 1024 *1024)); + ASSERT_EQ(ret, cudaSuccess); + check_mem_alloc_events(ptr, (256 * 1024 *1024)); + + ret = cudaFree(ptr); + ASSERT_EQ(ret, cudaSuccess); + check_mem_free_events(ptr, (256 * 1024 *1024)); + + /* multiple allocations, cudafree in reverse order */ + ret = cudaMalloc(&ptr, (1 * 1024 *1024)); + ASSERT_EQ(ret, cudaSuccess); + check_mem_alloc_events(ptr, (1 * 1024 *1024)); + + ret = cudaMalloc(&ptr1, (1 * 1024 *1024)); + ASSERT_EQ(ret, cudaSuccess); + check_mem_alloc_events(ptr1, (1 * 1024 *1024)); + + ret = cudaFree(ptr1); + ASSERT_EQ(ret, cudaSuccess); + check_mem_free_events(ptr1, (1 * 1024 *1024)); + + ret = cudaFree(ptr); + ASSERT_EQ(ret, cudaSuccess); + check_mem_free_events(ptr, (1 * 1024 *1024)); + + /* cudaFree with NULL */ + ret = cudaFree(NULL); + ASSERT_EQ(ret, cudaSuccess); +} + +UCS_TEST_F(cuda_hooks, test_cudaMallocManaged) { + cudaError_t ret; + void *ptr; + + ret = cudaMallocManaged(&ptr, 64, cudaMemAttachGlobal); + ASSERT_EQ(ret, cudaSuccess); + check_mem_alloc_events(ptr, 64, UCS_MEMORY_TYPE_CUDA_MANAGED); + + ret = cudaFree(ptr); + ASSERT_EQ(ret, cudaSuccess); + check_mem_free_events(ptr, 0); +} + +UCS_TEST_F(cuda_hooks, test_cudaMallocPitch) { + cudaError_t ret; + void *devPtr; + size_t pitch; + + ret = cudaMallocPitch(&devPtr, &pitch, 4, 8); + ASSERT_EQ(ret, cudaSuccess); + check_mem_alloc_events(devPtr, (4 * 8)); + + ret = cudaFree(devPtr); + ASSERT_EQ(ret, cudaSuccess); + check_mem_free_events(devPtr, 0); +} diff --git a/test/gtest/ucm/malloc_hook.cc b/test/gtest/ucm/malloc_hook.cc new file mode 100644 index 0000000..3df965a --- /dev/null +++ b/test/gtest/ucm/malloc_hook.cc @@ -0,0 +1,1306 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#define __STDC_LIMIT_MACROS + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include +#include +#include +#include +#include +} + +#if HAVE_MALLOC_SET_STATE && HAVE_MALLOC_GET_STATE +# define HAVE_MALLOC_STATES 1 +#endif /* HAVE_MALLOC_SET_STATE && HAVE_MALLOC_GET_STATE */ + +#define EXPECT_INCREASED(_value, _prev, _size, _name) \ + { \ + EXPECT_GE(_value, (_prev) + (_size)) << _name; \ + _prev = _value; \ + } + +template +class mhook_thread { +public: + mhook_thread(T *test): m_test(test) + { + pthread_create(&m_thread, NULL, thread_func, reinterpret_cast(m_test)); + } + + ~mhook_thread() { + join(); + delete m_test; + } + + void join() { + void *retval; + pthread_join(m_thread, &retval); + } + +protected: + T *m_test; + pthread_t m_thread; + + static void *thread_func(void *arg) { + T *test = reinterpret_cast(arg); + test->test(); + return NULL; + } +}; + +template +class mmap_event { +public: + mmap_event(T *test): m_test(test), m_events(0) + { + } + + ~mmap_event() + { + unset(); + } + + ucs_status_t set(int events) + { + ucs_status_t status; + + status = ucm_set_event_handler(events, 0, mem_event_callback, + reinterpret_cast(m_test)); + ASSERT_UCS_OK(status); + m_events |= events; + return status; + } + + void unset() + { + if (m_events) { + ucm_unset_event_handler(m_events, mem_event_callback, + reinterpret_cast(m_test)); + m_events = 0; + } + } + +protected: + T *m_test; + int m_events; + + static void mem_event_callback(ucm_event_type_t event_type, + ucm_event_t *event, + void *arg) + { + T *test = reinterpret_cast(arg); + test->mem_event(event_type, event); + } +}; + + +class malloc_hook : public ucs::test { + friend class mmap_event; +protected: + /* use template argument to call/not call vm_unmap handler */ + /* GCC 4.4.7 doesn't allow to define template static function + * with integer template argument. using template inner class + * to define static function */ + template class bistro_hook { + public: + static int munmap(void *addr, size_t length) + { + UCM_BISTRO_PROLOGUE; + malloc_hook::bistro_call_counter++; + if (C) { + /* notify aggregate vm_munmap event only */ + ucm_vm_munmap(addr, length); + } + int res = (intptr_t)syscall(SYS_munmap, addr, length); + UCM_BISTRO_EPILOGUE; + return res; + } + + static int madvise(void *addr, size_t length, int advise) + { + UCM_BISTRO_PROLOGUE; + malloc_hook::bistro_call_counter++; + if (C) { + /* notify aggregate vm_munmap event only */ + ucm_vm_munmap(addr, length); + } + int res = (intptr_t)syscall(SYS_madvise, addr, length, advise); + UCM_BISTRO_EPILOGUE; + return res; + } + }; + + class bistro_patch { + public: + bistro_patch(const char* symbol, void *hook) + { + ucs_status_t status; + + status = ucm_bistro_patch(symbol, hook, &m_rp); + ASSERT_UCS_OK(status); + EXPECT_NE((intptr_t)m_rp, 0); + } + + ~bistro_patch() + { + ucm_bistro_restore(m_rp); + } + + protected: + ucm_bistro_restore_point_t *m_rp; + }; + + void mem_event(ucm_event_type_t event_type, ucm_event_t *event) + { + m_got_event = 1; + } + + virtual void init() { + ucs_status_t status; + mmap_event event(this); + + m_got_event = 0; + ucm_malloc_state_reset(128 * 1024, 128 * 1024); + malloc_trim(0); + status = event.set(UCM_EVENT_VM_MAPPED); + ASSERT_UCS_OK(status); + + for (;;) { + void *ptr = malloc(small_alloc_size); + if (m_got_event) { + /* If the heap grew, the minimal size is the previous one */ + free(ptr); + break; + } else { + m_pts.push_back(ptr); + } + } + event.unset(); + } + +public: + static int small_alloc_count; + static const size_t small_alloc_size = 10000; + ucs::ptr_vector m_pts; + int m_got_event; + static volatile int bistro_call_counter; +}; + +static bool skip_on_bistro() { + return (ucm_global_opts.mmap_hook_mode == UCM_MMAP_HOOK_BISTRO); +} + +static bool skip_on_bistro_without_valgrind() { + /* BISTRO is disabled under valgrind, we may run tests */ + return (skip_on_bistro() && !RUNNING_ON_VALGRIND); +} + +int malloc_hook::small_alloc_count = 1000 / ucs::test_time_multiplier(); +volatile int malloc_hook::bistro_call_counter = 0; + +class test_thread { +public: + test_thread(const std::string& name, int num_threads, pthread_barrier_t *barrier, + malloc_hook *test, void (test_thread::*test_func)() = &test_thread::test) : + m_name(name), m_num_threads(num_threads), m_barrier(barrier), + m_map_size(0), m_unmap_size(0), m_test(test), m_event(this) + { + pthread_mutex_init(&m_stats_lock, NULL); + } + + ~test_thread() { + pthread_mutex_destroy(&m_stats_lock); + } + + void test(); + void mem_event(ucm_event_type_t event_type, ucm_event_t *event); + +private: + typedef std::pair range; + + bool is_ptr_in_range(void *ptr, size_t size, const std::vector &ranges) { + for (std::vector::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) { + if ((ptr >= iter->first) && ((char*)ptr < iter->second)) { + return true; + } + } + return false; + } + + static pthread_mutex_t lock; + static pthread_barrier_t barrier; + + std::string m_name; + int m_num_threads; + pthread_barrier_t *m_barrier; + + pthread_mutex_t m_stats_lock; + size_t m_map_size; + size_t m_unmap_size; + std::vector m_map_ranges; + std::vector m_unmap_ranges; + + malloc_hook *m_test; + mmap_event m_event; +}; + +pthread_mutex_t test_thread::lock = PTHREAD_MUTEX_INITIALIZER; + +void test_thread::mem_event(ucm_event_type_t event_type, ucm_event_t *event) +{ + pthread_mutex_lock(&m_stats_lock); + switch (event_type) { + case UCM_EVENT_VM_MAPPED: + m_map_ranges.push_back(range(event->vm_mapped.address, + (char*)event->vm_mapped.address + event->vm_mapped.size)); + m_map_size += event->vm_mapped.size; + break; + case UCM_EVENT_VM_UNMAPPED: + m_unmap_ranges.push_back(range(event->vm_unmapped.address, + (char*)event->vm_unmapped.address + event->vm_unmapped.size)); + m_unmap_size += event->vm_unmapped.size; + break; + default: + break; + } + pthread_mutex_unlock(&m_stats_lock); +} + +void test_thread::test() { + static const size_t large_alloc_size = 40 * 1024 * 1024; + ucs_status_t result; + ucs::ptr_vector old_ptrs; + ucs::ptr_vector new_ptrs; + void *ptr_r; + size_t small_map_size; + const size_t small_alloc_size = malloc_hook::small_alloc_size; + int num_ptrs_in_range; + static volatile uint32_t total_ptrs_in_range = 0; + char *test_str; + + /* Allocate some pointers with old heap manager */ + for (unsigned i = 0; i < 10; ++i) { + old_ptrs.push_back(malloc(small_alloc_size)); + } + + ptr_r = malloc(small_alloc_size); + + m_map_ranges.reserve ((m_test->small_alloc_count * 8 + 10) * m_num_threads); + m_unmap_ranges.reserve((m_test->small_alloc_count * 8 + 10) * m_num_threads); + + total_ptrs_in_range = 0; + + pthread_barrier_wait(m_barrier); + + /* Install memory hooks */ + result = m_event.set(UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED); + ASSERT_UCS_OK(result); + + /* Allocate small pointers with new heap manager */ + for (int i = 0; i < m_test->small_alloc_count; ++i) { + new_ptrs.push_back(malloc(small_alloc_size)); + } + small_map_size = m_map_size; + + /* If this test runs more than once, then sbrk may not really allocate new + * memory + */ + EXPECT_GT(m_map_size, 0lu) << m_name; + + num_ptrs_in_range = 0; + for (ucs::ptr_vector::const_iterator iter = new_ptrs.begin(); + iter != new_ptrs.end(); ++iter) + { + if (is_ptr_in_range(*iter, small_alloc_size, m_map_ranges)) { + ++num_ptrs_in_range; + } + } + + /* Need at least one ptr in the mapped ranges in one the threads */ + ucs_atomic_add32(&total_ptrs_in_range, num_ptrs_in_range); + pthread_barrier_wait(m_barrier); + + EXPECT_GT(total_ptrs_in_range, 0u); + + /* Allocate large chunk */ + void *ptr = malloc(large_alloc_size); + EXPECT_GE(m_map_size, large_alloc_size + small_map_size) << m_name; + EXPECT_TRUE(is_ptr_in_range(ptr, large_alloc_size, m_map_ranges)) << m_name; + EXPECT_GE(malloc_usable_size(ptr), large_alloc_size); + + free(ptr); + EXPECT_GE(m_unmap_size, large_alloc_size) << m_name; + /* coverity[pass_freed_arg] */ + EXPECT_TRUE(is_ptr_in_range(ptr, large_alloc_size, m_unmap_ranges)) << m_name; + + /* Test strdup */ + void *s = strdup("test"); + free(s); + + /* Test setenv */ + pthread_mutex_lock(&lock); + setenv("TEST", "VALUE", 1); + test_str = getenv("TEST"); + if (test_str != NULL) { + EXPECT_EQ(std::string("VALUE"), test_str); + } else { + UCS_TEST_ABORT("getenv(\"TEST\") returned NULL"); + } + pthread_mutex_unlock(&lock); + + /* Test username */ + ucs_get_user_name(); + + /* Test usable size */ + EXPECT_GE(malloc_usable_size(ptr_r), small_alloc_size); + + /* Test realloc */ + ptr_r = realloc(ptr_r, small_alloc_size / 2); + free(ptr_r); + + /* Test C++ allocations */ + { + std::vector vec(large_alloc_size, 0); + ptr = &vec[0]; + EXPECT_TRUE(is_ptr_in_range(ptr, large_alloc_size, m_map_ranges)) << m_name; + } + + /* coverity[use_after_free] - we don't dereference ptr, just search it*/ + EXPECT_TRUE(is_ptr_in_range(ptr, large_alloc_size, m_unmap_ranges)) << m_name; + + /* Release old pointers (should not crash) */ + old_ptrs.clear(); + + m_map_ranges.clear(); + m_unmap_ranges.clear(); + + /* Don't release pointers before other threads exit, so they will map new memory + * and not reuse memory from other threads. + */ + pthread_barrier_wait(m_barrier); + + /* This must be done when all other threads are inactive, otherwise we may leak */ +#if HAVE_MALLOC_STATES + if (!RUNNING_ON_VALGRIND) { + pthread_mutex_lock(&lock); + void *state = malloc_get_state(); + malloc_set_state(state); + free(state); + pthread_mutex_unlock(&lock); + } +#endif /* HAVE_MALLOC_STATES */ + + pthread_barrier_wait(m_barrier); + + /* Release new pointers */ + new_ptrs.clear(); + + /* Call several malloc routines */ + malloc_trim(0); + + ptr = malloc(large_alloc_size); + + free(ptr); + + /* shmat/shmdt */ + size_t shm_seg_size = ucs_get_page_size() * 2; + int shmid = shmget(IPC_PRIVATE, shm_seg_size, IPC_CREAT | SHM_R | SHM_W); + EXPECT_NE(-1, shmid) << strerror(errno); + + ptr = shmat(shmid, NULL, 0); + EXPECT_NE(MAP_FAILED, ptr) << strerror(errno); + + shmdt(ptr); + shmctl(shmid, IPC_RMID, NULL); + + EXPECT_TRUE(is_ptr_in_range(ptr, shm_seg_size, m_unmap_ranges)); + + ptr = mmap(NULL, shm_seg_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, ptr) << strerror(errno); + madvise(ptr, shm_seg_size, MADV_DONTNEED); + + EXPECT_TRUE(is_ptr_in_range(ptr, shm_seg_size, m_unmap_ranges)); + munmap(ptr, shm_seg_size); + + /* Print results */ + pthread_mutex_lock(&lock); + UCS_TEST_MESSAGE << m_name + << ": small mapped: " << small_map_size + << ", total mapped: " << m_map_size + << ", total unmapped: " << m_unmap_size; + std::cout.flush(); + pthread_mutex_unlock(&lock); + + m_event.unset(); +} + +UCS_TEST_SKIP_COND_F(malloc_hook, single_thread, + skip_on_bistro_without_valgrind()) { + pthread_barrier_t barrier; + pthread_barrier_init(&barrier, NULL, 1); + { + mhook_thread(new test_thread("single-thread", 1, &barrier, this)); + } + pthread_barrier_destroy(&barrier); +} + +UCS_TEST_SKIP_COND_F(malloc_hook, multi_threads, + skip_on_bistro_without_valgrind()) { + typedef mhook_thread thread_t; + + static const int num_threads = 8; + ucs::ptr_vector threads; + pthread_barrier_t barrier; + + malloc_trim(0); + + pthread_barrier_init(&barrier, NULL, num_threads); + for (int i = 0; i < num_threads; ++i) { + std::stringstream ss; + ss << "thread " << i << "/" << num_threads; + threads.push_back(new thread_t(new test_thread(ss.str(), num_threads, &barrier, this))); + } + + threads.clear(); + pthread_barrier_destroy(&barrier); +} + +UCS_TEST_F(malloc_hook, asprintf) { + /* Install memory hooks */ + (void)dlerror(); +} + +UCS_TEST_SKIP_COND_F(malloc_hook, fork, "broken") { + static const int num_processes = 4; + pthread_barrier_t barrier; + std::vector pids; + pid_t pid; + + for (int i = 0; i < num_processes; ++i) { + pid = fork(); + if (pid == 0) { + pthread_barrier_init(&barrier, NULL, 1); + { + std::stringstream ss; + ss << "process " << i << "/" << num_processes; + test_thread thread(ss.str(), 1, &barrier, this); + } + pthread_barrier_destroy(&barrier); + throw ucs::exit_exception(HasFailure()); + } + pids.push_back(pid); + } + + for (int i = 0; i < num_processes; ++i) { + int status; + waitpid(pids[i], &status, 0); + EXPECT_EQ(0, WEXITSTATUS(status)) << "Process " << i << " failed"; + } +} + +class malloc_hook_cplusplus : public malloc_hook { +public: + + malloc_hook_cplusplus() : + m_mapped_size(0), m_unmapped_size(0), + m_dynamic_mmap_config(ucm_global_opts.enable_dynamic_mmap_thresh), + m_event(this) { + } + + ~malloc_hook_cplusplus() { + ucm_global_opts.enable_dynamic_mmap_thresh = m_dynamic_mmap_config; + } + + void set() { + ucs_status_t status; + status = m_event.set(UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED); + ASSERT_UCS_OK(status); + } + + void unset() { + m_event.unset(); + } + + void mem_event(ucm_event_type_t event_type, ucm_event_t *event) + { + switch (event_type) { + case UCM_EVENT_VM_MAPPED: + m_mapped_size += event->vm_mapped.size; + break; + case UCM_EVENT_VM_UNMAPPED: + m_unmapped_size += event->vm_unmapped.size; + break; + default: + break; + } + } + +protected: + double measure_alloc_time(size_t size, unsigned iters) + { + ucs_time_t start_time = ucs_get_time(); + for (unsigned i = 0; i < iters; ++i) { + void *ptr = malloc(size); + /* prevent the compiler from optimizing-out the memory allocation */ + *(volatile char*)ptr = '5'; + free(ptr); + } + return ucs_time_to_sec(ucs_get_time() - start_time); + } + + void test_dynamic_mmap_thresh() + { + const size_t size = 8 * UCS_MBYTE; + + set(); + + std::vector strs; + + m_mapped_size = 0; + while (m_mapped_size < size) { + strs.push_back(std::string(size, 't')); + } + + m_unmapped_size = 0; + strs.clear(); + EXPECT_GE(m_unmapped_size, size); + + m_mapped_size = 0; + while (m_mapped_size < size) { + strs.push_back(std::string()); + strs.back().resize(size); + } + + m_unmapped_size = 0; + strs.clear(); + if (ucm_global_opts.enable_dynamic_mmap_thresh) { + EXPECT_EQ(0ul, m_unmapped_size); + } else { + EXPECT_GE(m_unmapped_size, size); + } + + unset(); + } + + size_t m_mapped_size; + size_t m_unmapped_size; + int m_dynamic_mmap_config; + mmap_event m_event; +}; + + +class mmap_hooks { +public: + mmap_hooks(const std::string& name, int num_threads, pthread_barrier_t *barrier): + m_num_threads(num_threads), m_mapped_size(0), m_unmapped_size(0), + m_search_ptr(NULL), m_is_ptr_found(false), m_name(name), + m_barrier(barrier), m_event(this) + { + pthread_spin_init(&m_lock,0); + } + + void mem_event(ucm_event_type_t event_type, ucm_event_t *event) + { + pthread_spin_lock(&m_lock); + switch (event_type) { + case UCM_EVENT_VM_MAPPED: + m_mapped_size += event->vm_mapped.size; + break; + case UCM_EVENT_VM_UNMAPPED: + m_unmapped_size += event->vm_unmapped.size; + if (m_search_ptr == event->vm_unmapped.address) { + m_is_ptr_found = true; + } + break; + default: + break; + } + pthread_spin_unlock(&m_lock); + } + + void test() + { + /* + * Test memory mapping functions which override an existing mapping + */ + size_t size = ucs_get_page_size() * 800; + size_t mapped_size = 0; + size_t unmapped_size = 0; + void *buffer; + int shmid; + ucs_status_t status; + + EXPECT_EQ(0u, m_mapped_size) << m_name; + EXPECT_EQ(0u, m_unmapped_size) << m_name; + + status = m_event.set(UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED); + ASSERT_UCS_OK(status); + + pthread_barrier_wait(m_barrier); + + /* 1. Map a large buffer */ + { + buffer = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, buffer) << strerror(errno); + + EXPECT_INCREASED(m_mapped_size, mapped_size, size, m_name); + EXPECT_INCREASED(m_unmapped_size, unmapped_size, 0, m_name); + } + + /* + * 2. Map another buffer in the same place. + * Expected behavior: unmap event on the old buffer + */ + { + void *remap = mmap(buffer, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0); + ASSERT_EQ(buffer, remap); + + EXPECT_INCREASED(m_mapped_size, mapped_size, size, m_name); + EXPECT_INCREASED(m_unmapped_size, unmapped_size, size, m_name); + } + + /* 3. Create a shared memory segment */ + { + shmid = shmget(IPC_PRIVATE, size, IPC_CREAT | SHM_R | SHM_W); + ASSERT_NE(-1, shmid) << strerror(errno) << m_name; + } + + /* + * 4. Attach the segment at the same buffer address. + * Expected behavior: unmap event on the old buffer + */ + { + m_is_ptr_found = false; + m_search_ptr = buffer; + + /* Make sure every thread will have a unique value of 'buffer' - no + * thread will release its buffer before all others already + * allocated theirs */ + pthread_barrier_wait(m_barrier); + + /* adding 0x1 to 'buffer' with SHM_RND flag should still send event + * for 'buffer', because it aligns to SHMLBA + */ + void *shmaddr = shmat(shmid, (char*)buffer + 0x1, SHM_REMAP | SHM_RND); + ASSERT_EQ(buffer, shmaddr) << m_name; + + if (SHMLBA > 0x1) { + EXPECT_TRUE(m_is_ptr_found); + } + EXPECT_INCREASED(m_mapped_size, mapped_size, size, m_name); + EXPECT_INCREASED(m_unmapped_size, unmapped_size, size, m_name); + } + + /* 5. Detach the sysv segment */ + { + shmdt(buffer); + + EXPECT_INCREASED(m_unmapped_size, unmapped_size, size, m_name); + } + + /* 6. Remove the shared memory segment */ + { + int ret = shmctl(shmid, IPC_RMID, NULL); + ASSERT_NE(-1, ret) << strerror(errno); + } + + /* 7. Unmap the buffer */ + { + munmap(buffer, size); + + EXPECT_INCREASED(m_unmapped_size, unmapped_size, size, m_name); + } + + /* 8. sbrk call - single thread only */ + { + if (!RUNNING_ON_VALGRIND && m_num_threads < 2) { + /* valgrind failed when sbrk is called directly, + * also sbrk is not thread safe */ + + /* sbrk call is used to extend/cut memory heap, + * don't add any evaluations between calls sbrk+/sbrk- - it + * may break heap */ + sbrk(size); + sbrk(-size); + + EXPECT_INCREASED(m_mapped_size, mapped_size, size, m_name); + EXPECT_INCREASED(m_unmapped_size, unmapped_size, size, m_name); + } + } + pthread_barrier_wait(m_barrier); + } + +protected: + int m_num_threads; + size_t m_mapped_size; + size_t m_unmapped_size; + void *m_search_ptr; + bool m_is_ptr_found; + pthread_spinlock_t m_lock; + std::string m_name; + pthread_barrier_t *m_barrier; + mmap_event m_event; +}; + + +UCS_TEST_F(malloc_hook_cplusplus, new_delete) { + const size_t size = 8 * 1000 * 1000; + + set(); + + { + std::vector vec1(size, 0); + std::vector vec2(size, 0); + std::vector vec3(size, 0); + } + + { + std::vector vec1(size, 0); + std::vector vec2(size, 0); + std::vector vec3(size, 0); + } + + malloc_trim(0); + + EXPECT_GE(m_unmapped_size, size); + + unset(); +} + +UCS_TEST_SKIP_COND_F(malloc_hook_cplusplus, dynamic_mmap_enable, + RUNNING_ON_VALGRIND || skip_on_bistro()) { + EXPECT_TRUE(ucm_global_opts.enable_dynamic_mmap_thresh); + test_dynamic_mmap_thresh(); +} + +UCS_TEST_SKIP_COND_F(malloc_hook_cplusplus, dynamic_mmap_disable, + skip_on_bistro_without_valgrind()) { + ucm_global_opts.enable_dynamic_mmap_thresh = 0; + + test_dynamic_mmap_thresh(); +} + +extern "C" { + int ucm_dlmallopt_get(int); +}; + +UCS_TEST_SKIP_COND_F(malloc_hook_cplusplus, mallopt, + skip_on_bistro_without_valgrind()) { + + int v; + int trim_thresh, mmap_thresh; + char *p; + size_t size; + + /* This test can not be run with the other + * tests because it assumes that malloc hooks + * are not initialized + */ + p = getenv("MALLOC_TRIM_THRESHOLD_"); + if (p == NULL) { + UCS_TEST_SKIP_R("MALLOC_TRIM_THRESHOLD_ is not set"); + } + trim_thresh = atoi(p); + + p = getenv("MALLOC_MMAP_THRESHOLD_"); + if (p == NULL) { + UCS_TEST_SKIP_R("MALLOC_MMAP_THRESHOLD_ is not set"); + } + mmap_thresh = atoi(p); + + /* make sure that rcache is explicitly disabled so + * that the malloc hooks are installed after the setenv() + */ + p = getenv("UCX_IB_RCACHE"); + if ((p == NULL) || (p[0] != 'n')) { + UCS_TEST_SKIP_R("rcache must be disabled"); + } + + set(); + + v = ucm_dlmallopt_get(M_TRIM_THRESHOLD); + EXPECT_EQ(trim_thresh, v); + + v = ucm_dlmallopt_get(M_MMAP_THRESHOLD); + EXPECT_EQ(mmap_thresh, v); + + /* give a lot of extra space since the same block + * can be also used by other allocations + */ + if (trim_thresh > 0) { + size = trim_thresh/2; + } else if (mmap_thresh > 0) { + size = mmap_thresh/2; + } else { + size = 10 * 1024 * 1024; + } + + UCS_TEST_MESSAGE << "trim_thresh=" << trim_thresh << " mmap_thresh=" << mmap_thresh << + " allocating=" << size; + p = new char [size]; + ASSERT_TRUE(p != NULL); + delete [] p; + + EXPECT_EQ(m_unmapped_size, size_t(0)); + + unset(); +} + +UCS_TEST_SKIP_COND_F(malloc_hook_cplusplus, mmap_ptrs, RUNNING_ON_VALGRIND) { + ucm_global_opts.enable_dynamic_mmap_thresh = 0; + set(); + + const size_t size = ucm_dlmallopt_get(M_MMAP_THRESHOLD) * 2; + const size_t max_mem = ucs_min(ucs_get_phys_mem_size() / 4, 4 * UCS_GBYTE); + const unsigned count = ucs_min(400000ul, max_mem / size); + const unsigned iters = 100000; + + std::vector< std::vector > ptrs; + + size_t large_blocks = 0; + + /* Allocate until we get MMAP event + * Lock memory to avoid going to swap and ensure consistet test results. + */ + while (m_mapped_size == 0) { + std::vector str(size, 'r'); + ptrs.push_back(str); + ++large_blocks; + } + + /* Remove memory off the heap top, to ensure the following large allocations + * will use mmap() + */ + malloc_trim(0); + + /* Measure allocation time with "clear" heap state */ + double alloc_time = measure_alloc_time(size, iters); + UCS_TEST_MESSAGE << "With " << large_blocks << " large blocks:" + << " allocated " << iters << " buffers of " << size + << " bytes in " << alloc_time << " sec"; + + /* Allocate many large strings to trigger mmap() based allocation. */ + ptrs.resize(count); + for (unsigned i = 0; i < count; ++i) { + ptrs[i].resize(size, 't'); + ++large_blocks; + } + + /* Measure allocation time with many large blocks on the heap */ + bool success = false; + unsigned attempt = 0; + while (!success && (attempt < 5)) { + double alloc_time_with_ptrs = measure_alloc_time(size, iters); + UCS_TEST_MESSAGE << "With " << large_blocks << " large blocks:" + << " allocated " << iters << " buffers of " << size + << " bytes in " << alloc_time_with_ptrs << " sec"; + + /* Allow up to 75% difference */ + success = (alloc_time < 0.25) || (alloc_time_with_ptrs < (1.75 * alloc_time)); + ++attempt; + } + + if (!success) { + ADD_FAILURE() << "Failed after " << attempt << " attempts"; + } + + ptrs.clear(); + + unset(); + +} + +UCS_TEST_F(malloc_hook_cplusplus, remap_override_single_thread) { + pthread_barrier_t barrier; + pthread_barrier_init(&barrier, NULL, 1); + { + mhook_thread(new mmap_hooks("single-thread", 1, &barrier)); + } + pthread_barrier_destroy(&barrier); +} + +UCS_TEST_F(malloc_hook_cplusplus, remap_override_multi_threads) { + typedef mhook_thread thread_t; + + static const int num_threads = 8; + ucs::ptr_vector threads; + pthread_barrier_t barrier; + + pthread_barrier_init(&barrier, NULL, num_threads); + for (int i = 0; i < num_threads; ++i) { + std::stringstream ss; + ss << "thread " << i << "/" << num_threads; + threads.push_back(new thread_t(new mmap_hooks(ss.str(), num_threads, &barrier))); + } + + threads.clear(); + pthread_barrier_destroy(&barrier); +} + +typedef int (munmap_f_t)(void *addr, size_t len); + +UCS_TEST_SKIP_COND_F(malloc_hook, bistro_patch, RUNNING_ON_VALGRIND) { + const char *symbol = "munmap"; + ucm_bistro_restore_point_t *rp = NULL; + ucs_status_t status; + munmap_f_t *munmap_f; + void *ptr; + int res; + uint64_t UCS_V_UNUSED patched; + uint64_t UCS_V_UNUSED origin; + + /* set hook to mmap call */ + status = ucm_bistro_patch(symbol, (void*)bistro_hook<0>::munmap, &rp); + ASSERT_UCS_OK(status); + EXPECT_NE((intptr_t)rp, 0); + + munmap_f = (munmap_f_t*)ucm_bistro_restore_addr(rp); + EXPECT_NE((intptr_t)munmap_f, 0); + + /* save partial body of patched function */ + patched = *(uint64_t*)munmap_f; + + bistro_call_counter = 0; + ptr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + EXPECT_NE(ptr, MAP_FAILED); + + /* try to call munmap, we should jump into munmap_hook instead */ + res = munmap_f(ptr, 4096); + EXPECT_EQ(res, 0); + /* due to cache coherency issues on ARM systems could be executed + * original function body, so, skip counter evaluation */ + EXPECT_GT(bistro_call_counter, 0); + + /* restore original mmap body */ + status = ucm_bistro_restore(rp); + ASSERT_UCS_OK(status); + + bistro_call_counter = 0; + /* now try to call mmap, we should NOT jump into mmap_hook */ + ptr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + EXPECT_NE(ptr, MAP_FAILED); + res = munmap_f(ptr, 4096); + EXPECT_EQ(res, 0); + EXPECT_EQ(bistro_call_counter, 0); /* hook is not called */ + /* save partial body of restored function */ + origin = *(uint64_t*)munmap_f; + +#if !defined (__powerpc64__) + EXPECT_NE(patched, origin); +#endif +} + +UCS_TEST_SKIP_COND_F(malloc_hook, test_event, RUNNING_ON_VALGRIND) { + mmap_event event(this); + ucs_status_t status; + + status = event.set(UCM_EVENT_VM_MAPPED | UCM_EVENT_VM_UNMAPPED); + ASSERT_UCS_OK(status); + + status = ucm_test_events(UCM_EVENT_VM_MAPPED | UCM_EVENT_VM_UNMAPPED); + ASSERT_UCS_OK(status); +} + +UCS_TEST_SKIP_COND_F(malloc_hook, test_event_failed, + RUNNING_ON_VALGRIND || !skip_on_bistro()) { + mmap_event event(this); + ucs_status_t status; + const char *symbol_munmap = "munmap"; + const char *symbol_madvise = "madvise"; + + status = event.set(UCM_EVENT_MUNMAP | UCM_EVENT_VM_UNMAPPED); + ASSERT_UCS_OK(status); + + /* set hook to munmap call */ + { + bistro_patch patch(symbol_munmap, (void*)bistro_hook<0>::munmap); + EXPECT_TRUE(ucm_test_events(UCM_EVENT_MUNMAP) == UCS_ERR_UNSUPPORTED); + EXPECT_TRUE(ucm_test_events(UCM_EVENT_VM_UNMAPPED) == UCS_ERR_UNSUPPORTED); + } + /* set hook to madvise call */ + { + bistro_patch patch(symbol_madvise, (void*)bistro_hook<0>::madvise); + EXPECT_TRUE(ucm_test_events(UCM_EVENT_MADVISE) == UCS_ERR_UNSUPPORTED); + EXPECT_TRUE(ucm_test_events(UCM_EVENT_VM_UNMAPPED) == UCS_ERR_UNSUPPORTED); + } +} + +UCS_TEST_SKIP_COND_F(malloc_hook, test_event_unmap, + RUNNING_ON_VALGRIND || !skip_on_bistro()) { + mmap_event event(this); + ucs_status_t status; + const char *symbol = "munmap"; + + status = event.set(UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | UCM_EVENT_VM_UNMAPPED); + ASSERT_UCS_OK(status); + + /* set hook to mmap call */ + bistro_patch patch(symbol, (void*)bistro_hook<1>::munmap); + + /* munmap should be broken */ + status = ucm_test_events(UCM_EVENT_MUNMAP); + EXPECT_TRUE(status == UCS_ERR_UNSUPPORTED); + + /* vm_unmap should be broken as well, because munmap is broken */ + status = ucm_test_events(UCM_EVENT_MUNMAP); + EXPECT_TRUE(status == UCS_ERR_UNSUPPORTED); + + /* mmap should still work */ + status = ucm_test_events(UCM_EVENT_MMAP); + EXPECT_TRUE(status == UCS_OK); +} + +class malloc_hook_dlopen : public malloc_hook { +protected: + class library { + public: + typedef void* (*loader_t)(const char*, int); + + library(loader_t loader, const std::string &name = ""): + m_loader(loader), m_name(name), m_lib(NULL) + { + } + + ~library() + { + close(); + } + + void *open(const std::string name = "") + { + if (!name.empty()) { + m_name = name; + } + + close(); + + return (m_lib = m_loader(m_name.empty() ? NULL : m_name.c_str(), RTLD_NOW)); + } + + void attach(void *lib) + { + close(); + m_lib = lib; + } + + void close() + { + if (m_lib != NULL) { + dlclose(m_lib); + m_lib = NULL; + } + } + + operator bool() + { + return m_lib != NULL; + } + + void* sym(const char *name) + { + return dlsym(m_lib, name); + } + + protected: + loader_t m_loader; + std::string m_name; + void *m_lib; + }; + +public: + typedef library::loader_t loader_t; + + static std::string get_lib_dir() { +#ifndef GTEST_UCM_HOOK_LIB_DIR +# error "Missing build configuration" +#else + return GTEST_UCM_HOOK_LIB_DIR; +#endif + } + + static std::string get_lib_path_do_load() { + return get_lib_dir() + "/libdlopen_test_do_load.so"; + } + + static std::string get_lib_path_do_mmap() { + return get_lib_dir() + "/libdlopen_test_do_mmap.so"; + } + + static std::string get_lib_path_do_load_rpath() { + return get_lib_dir() + "/libdlopen_test_do_load_rpath.so"; + } + + static std::string get_lib_path_do_load_sub_rpath() { + return "libdlopen_test_rpath.so"; // library should be located using rpath + } + + /* test for mmap events are fired from non-direct load modules + * we are trying to load lib1, from lib1 load lib2, and + * fire mmap event from lib2 */ + void test_indirect_dlopen(loader_t loader) + { + typedef void (*fire_mmap_f)(void); + typedef void* (*load_lib_f)(const char *path, void* (*func)(const char*, int)); + + const char *load_lib = "load_lib"; + const char *fire_mmap = "fire_mmap"; + + library lib(loader, get_lib_path_do_load()); + library lib2(NULL); // lib2 is used for attach only + load_lib_f load; + fire_mmap_f fire; + ucs_status_t status; + mmap_event event(this); + + status = event.set(UCM_EVENT_VM_MAPPED); + ASSERT_UCS_OK(status); + + lib.open(); + ASSERT_TRUE(lib); + + load = (load_lib_f)lib.sym(load_lib); + ASSERT_TRUE(load != NULL); + + lib2.attach(load(get_lib_path_do_mmap().c_str(), loader)); + ASSERT_TRUE(lib2); + + fire = (fire_mmap_f)lib2.sym(fire_mmap); + ASSERT_TRUE(fire != NULL); + + m_got_event = 0; + fire(); + EXPECT_GT(m_got_event, 0); + } + + /* Test for rpath section of caller module is processes */ + void test_rpath_dlopen(loader_t loader) + { + typedef void* (*load_lib_f)(const char *path, void* (*func)(const char*, int)); + + const char *load_lib = "load_lib"; + + library lib(loader); + library lib2(NULL); // lib2 is used for attach only + load_lib_f load; + ucs_status_t status; + mmap_event event(this); + + /* in case if reloc mode is used - it force hook dlopen */ + status = event.set(UCM_EVENT_VM_MAPPED); + ASSERT_UCS_OK(status); + + /* first check that without rpath library located in subdirectory could not be loaded */ + lib.open(get_lib_path_do_load()); + ASSERT_TRUE(lib); + if (!lib) { + return; + } + + load = (load_lib_f)lib.sym(load_lib); + ASSERT_TRUE(load != NULL); + + lib2.attach(load(get_lib_path_do_load_sub_rpath().c_str(), loader)); + ASSERT_FALSE(lib2); + + /* next check that rpath helps to load library located in subdirectory */ + /* don't care about opened libs - it will be closed automatically */ + lib.open(get_lib_path_do_load_rpath()); + ASSERT_TRUE(lib); + if (!lib) { + return; + } + + load = (load_lib_f)lib.sym(load_lib); + ASSERT_TRUE(load != NULL); + + lib2.attach(load(get_lib_path_do_load_sub_rpath().c_str(), loader)); + ASSERT_TRUE(lib2); + } + + void test_dlopen_null(loader_t loader) + { + library lib(loader); + + lib.open(); + ASSERT_TRUE(lib); + } +}; + +UCS_TEST_F(malloc_hook_dlopen, indirect_dlopen) { + test_indirect_dlopen(dlopen); +} + +UCS_TEST_F(malloc_hook_dlopen, indirect_ucm_dlopen) { + test_indirect_dlopen(ucm_dlopen); +} + +UCS_TEST_F(malloc_hook_dlopen, rpath_dlopen) { + test_rpath_dlopen(dlopen); +} + +UCS_TEST_F(malloc_hook_dlopen, rpath_ucm_dlopen) { + test_rpath_dlopen(ucm_dlopen); +} + +UCS_TEST_F(malloc_hook_dlopen, ucm_dlopen_null_dlopen) { + test_dlopen_null(dlopen); +} + +UCS_TEST_F(malloc_hook_dlopen, ucm_dlopen_null_ucm_dlopen) { + test_dlopen_null(ucm_dlopen); +} + +UCS_MT_TEST_F(malloc_hook_dlopen, dlopen_mt_with_memtype, 2) { +#ifndef GTEST_UCM_HOOK_LIB_DIR +# error "Missing build configuration" +#endif + mmap_event event(this); + + ucs_status_t status = event.set(UCM_EVENT_VM_MAPPED | + UCM_EVENT_MEM_TYPE_ALLOC | + UCM_EVENT_MEM_TYPE_FREE); + ASSERT_UCS_OK(status); + + const std::string path = get_lib_path_do_mmap(); + static uint32_t count = 0; + + for (int i = 0; i < 100 / ucs::test_time_multiplier(); ++i) { + /* Tests that calling dlopen() from 2 threads does not deadlock, if for + * example we install memtype relocation patches and call dladdr() while + * iterating over loaded libraries. + */ + if (ucs_atomic_fadd32(&count, 1) % 2) { + void *lib1 = dlopen(get_lib_path_do_mmap().c_str(), RTLD_LAZY); + ASSERT_TRUE(lib1 != NULL); + dlclose(lib1); + } else { + void *lib2 = dlopen(get_lib_path_do_load().c_str(), RTLD_LAZY); + ASSERT_TRUE(lib2 != NULL); + dlclose(lib2); + } + + barrier(); + } + + event.unset(); +} diff --git a/test/gtest/ucm/rocm_hooks.cc b/test/gtest/ucm/rocm_hooks.cc new file mode 100644 index 0000000..dceb26f --- /dev/null +++ b/test/gtest/ucm/rocm_hooks.cc @@ -0,0 +1,193 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ +#include +#include +#include + +static ucm_event_t alloc_event, free_event; + +static void rocm_mem_alloc_callback(ucm_event_type_t event_type, + ucm_event_t *event, void *arg) +{ + alloc_event.mem_type.address = event->mem_type.address; + alloc_event.mem_type.size = event->mem_type.size; + alloc_event.mem_type.mem_type = event->mem_type.mem_type; +} + +static void rocm_mem_free_callback(ucm_event_type_t event_type, + ucm_event_t *event, void *arg) +{ + free_event.mem_type.address = event->mem_type.address; + free_event.mem_type.size = event->mem_type.size; + free_event.mem_type.mem_type = event->mem_type.mem_type; +} + +class rocm_hooks : public ucs::test { +protected: + + virtual void init() { + int dev_count; + ucs_status_t result; + hipError_t ret; + ucs::test::init(); + + ret = hipGetDeviceCount(&dev_count); + if ((ret != hipSuccess) || (dev_count < 1)) { + UCS_TEST_SKIP_R("no ROCm device detected"); + } + + if (hipSetDevice(0) != hipSuccess) { + UCS_TEST_SKIP_R("can't set ROCm device"); + } + + /* install memory hooks */ + result = ucm_set_event_handler(UCM_EVENT_MEM_TYPE_ALLOC, 0, + rocm_mem_alloc_callback, + reinterpret_cast(this)); + ASSERT_UCS_OK(result); + + result = ucm_set_event_handler(UCM_EVENT_MEM_TYPE_FREE, 0, + rocm_mem_free_callback, + reinterpret_cast(this)); + ASSERT_UCS_OK(result); + } + + virtual void cleanup() { + ucm_unset_event_handler(UCM_EVENT_MEM_TYPE_ALLOC, + rocm_mem_alloc_callback, + reinterpret_cast(this)); + ucm_unset_event_handler(UCM_EVENT_MEM_TYPE_FREE, + rocm_mem_free_callback, + reinterpret_cast(this)); + ucs::test::cleanup(); + } + + void check_mem_alloc_events(void *ptr, size_t size, + int expect_mem_type = UCS_MEMORY_TYPE_ROCM) { + ASSERT_EQ(ptr, alloc_event.mem_type.address); + ASSERT_EQ(size, alloc_event.mem_type.size); + ASSERT_EQ(expect_mem_type, alloc_event.mem_type.mem_type); + } + + void check_mem_free_events(void *ptr, size_t size, + int expect_mem_type = UCS_MEMORY_TYPE_ROCM) { + ASSERT_EQ(ptr, free_event.mem_type.address); + ASSERT_EQ(expect_mem_type, free_event.mem_type.mem_type); + } + +}; + +UCS_TEST_F(rocm_hooks, test_hipMem_Alloc_Free) { + hipError_t ret; + void *dptr, *dptr1; + + /* small allocation */ + ret = hipMalloc(&dptr, 64); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events((void *)dptr, 64); + + ret = hipFree(dptr); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events((void *)dptr, 64); + + /* large allocation */ + ret = hipMalloc(&dptr, (256 * UCS_MBYTE)); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events((void *)dptr, (256 * UCS_MBYTE)); + + ret = hipFree(dptr); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events((void *)dptr, (256 * UCS_MBYTE)); + + /* multiple allocations, hipfree in reverse order */ + ret = hipMalloc(&dptr, (1 * UCS_MBYTE)); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events((void *)dptr, (1 * UCS_MBYTE)); + + ret = hipMalloc(&dptr1, (1 * UCS_MBYTE)); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events((void *)dptr1, (1 * UCS_MBYTE)); + + ret = hipFree(dptr1); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events((void *)dptr1, (1 * UCS_MBYTE)); + + ret = hipFree(dptr); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events((void *)dptr, (1 * UCS_MBYTE)); +} + +UCS_TEST_F(rocm_hooks, test_hipMallocManaged) { + hipError_t ret; + void * dptr; + + ret = hipMallocManaged(&dptr, 64); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events((void *)dptr, 64, UCS_MEMORY_TYPE_ROCM_MANAGED); + + ret = hipFree(dptr); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events((void *)dptr, 0, UCS_MEMORY_TYPE_ROCM_MANAGED); +} + +UCS_TEST_F(rocm_hooks, test_hipMallocPitch) { + hipError_t ret; + void * dptr; + size_t pitch; + + ret = hipMallocPitch(&dptr, &pitch, 4, 8); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events((void *)dptr, (128 * 8)); + + ret = hipFree(dptr); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events((void *)dptr, 0); +} + +UCS_TEST_F(rocm_hooks, test_hip_Malloc_Free) { + hipError_t ret; + void *ptr, *ptr1; + + /* small allocation */ + ret = hipMalloc(&ptr, 64); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events(ptr, 64); + + ret = hipFree(ptr); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events(ptr, 64); + + /* large allocation */ + ret = hipMalloc(&ptr, (256 * UCS_MBYTE)); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events(ptr, (256 * UCS_MBYTE)); + + ret = hipFree(ptr); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events(ptr, (256 * UCS_MBYTE)); + + /* multiple allocations, rocmfree in reverse order */ + ret = hipMalloc(&ptr, (1 * UCS_MBYTE)); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events(ptr, (1 * UCS_MBYTE)); + + ret = hipMalloc(&ptr1, (1 * UCS_MBYTE)); + ASSERT_EQ(ret, hipSuccess); + check_mem_alloc_events(ptr1, (1 * UCS_MBYTE)); + + ret = hipFree(ptr1); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events(ptr1, (1 * UCS_MBYTE)); + + ret = hipFree(ptr); + ASSERT_EQ(ret, hipSuccess); + check_mem_free_events(ptr, (1 * UCS_MBYTE)); + + /* hipFree with NULL */ + ret = hipFree(NULL); + ASSERT_EQ(ret, hipSuccess); +} + diff --git a/test/gtest/ucm/test_dlopen/Makefile.am b/test/gtest/ucm/test_dlopen/Makefile.am new file mode 100644 index 0000000..3f729a6 --- /dev/null +++ b/test/gtest/ucm/test_dlopen/Makefile.am @@ -0,0 +1,22 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +noinst_lib_LTLIBRARIES = \ + libdlopen_test_do_mmap.la \ + libdlopen_test_do_load.la \ + libdlopen_test_do_load_rpath.la + +libdlopen_test_do_mmap_la_SOURCES = dlopen_test_do_mmap.c +libdlopen_test_do_load_la_SOURCES = dlopen_test_do_load.c +libdlopen_test_do_load_rpath_la_SOURCES = dlopen_test_do_load.c +noinst_libdir = ${PWD}/.noinst + +libdlopen_test_do_load_rpath_la_CPPFLAGS = -I$(top_srcdir)/src +libdlopen_test_do_load_la_CPPFLAGS = -I$(top_srcdir)/src +libdlopen_test_do_load_rpath_la_LDFLAGS = -R=${PWD}/rpath-subdir/.libs + +SUBDIRS = rpath-subdir diff --git a/test/gtest/ucm/test_dlopen/Makefile.in b/test/gtest/ucm/test_dlopen/Makefile.in new file mode 100644 index 0000000..47f6915 --- /dev/null +++ b/test/gtest/ucm/test_dlopen/Makefile.in @@ -0,0 +1,972 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = test/gtest/ucm/test_dlopen +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(noinst_libdir)" +LTLIBRARIES = $(noinst_lib_LTLIBRARIES) +libdlopen_test_do_load_la_LIBADD = +am_libdlopen_test_do_load_la_OBJECTS = \ + libdlopen_test_do_load_la-dlopen_test_do_load.lo +libdlopen_test_do_load_la_OBJECTS = \ + $(am_libdlopen_test_do_load_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libdlopen_test_do_load_rpath_la_LIBADD = +am_libdlopen_test_do_load_rpath_la_OBJECTS = \ + libdlopen_test_do_load_rpath_la-dlopen_test_do_load.lo +libdlopen_test_do_load_rpath_la_OBJECTS = \ + $(am_libdlopen_test_do_load_rpath_la_OBJECTS) +libdlopen_test_do_load_rpath_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) \ + $(libdlopen_test_do_load_rpath_la_LDFLAGS) $(LDFLAGS) -o $@ +libdlopen_test_do_mmap_la_LIBADD = +am_libdlopen_test_do_mmap_la_OBJECTS = dlopen_test_do_mmap.lo +libdlopen_test_do_mmap_la_OBJECTS = \ + $(am_libdlopen_test_do_mmap_la_OBJECTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/dlopen_test_do_mmap.Plo \ + ./$(DEPDIR)/libdlopen_test_do_load_la-dlopen_test_do_load.Plo \ + ./$(DEPDIR)/libdlopen_test_do_load_rpath_la-dlopen_test_do_load.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libdlopen_test_do_load_la_SOURCES) \ + $(libdlopen_test_do_load_rpath_la_SOURCES) \ + $(libdlopen_test_do_mmap_la_SOURCES) +DIST_SOURCES = $(libdlopen_test_do_load_la_SOURCES) \ + $(libdlopen_test_do_load_rpath_la_SOURCES) \ + $(libdlopen_test_do_mmap_la_SOURCES) +RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ + ctags-recursive dvi-recursive html-recursive info-recursive \ + install-data-recursive install-dvi-recursive \ + install-exec-recursive install-html-recursive \ + install-info-recursive install-pdf-recursive \ + install-ps-recursive install-recursive installcheck-recursive \ + installdirs-recursive pdf-recursive ps-recursive \ + tags-recursive uninstall-recursive +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +am__recursive_targets = \ + $(RECURSIVE_TARGETS) \ + $(RECURSIVE_CLEAN_TARGETS) \ + $(am__extra_recursive_targets) +AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \ + distdir distdir-am +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +noinst_lib_LTLIBRARIES = \ + libdlopen_test_do_mmap.la \ + libdlopen_test_do_load.la \ + libdlopen_test_do_load_rpath.la + +libdlopen_test_do_mmap_la_SOURCES = dlopen_test_do_mmap.c +libdlopen_test_do_load_la_SOURCES = dlopen_test_do_load.c +libdlopen_test_do_load_rpath_la_SOURCES = dlopen_test_do_load.c +noinst_libdir = ${PWD}/.noinst +libdlopen_test_do_load_rpath_la_CPPFLAGS = -I$(top_srcdir)/src +libdlopen_test_do_load_la_CPPFLAGS = -I$(top_srcdir)/src +libdlopen_test_do_load_rpath_la_LDFLAGS = -R=${PWD}/rpath-subdir/.libs +SUBDIRS = rpath-subdir +all: all-recursive + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/gtest/ucm/test_dlopen/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/gtest/ucm/test_dlopen/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-noinst_libLTLIBRARIES: $(noinst_lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(noinst_lib_LTLIBRARIES)'; test -n "$(noinst_libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(noinst_libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(noinst_libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(noinst_libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(noinst_libdir)"; \ + } + +uninstall-noinst_libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(noinst_lib_LTLIBRARIES)'; test -n "$(noinst_libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(noinst_libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(noinst_libdir)/$$f"; \ + done + +clean-noinst_libLTLIBRARIES: + -test -z "$(noinst_lib_LTLIBRARIES)" || rm -f $(noinst_lib_LTLIBRARIES) + @list='$(noinst_lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libdlopen_test_do_load.la: $(libdlopen_test_do_load_la_OBJECTS) $(libdlopen_test_do_load_la_DEPENDENCIES) $(EXTRA_libdlopen_test_do_load_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) -rpath $(noinst_libdir) $(libdlopen_test_do_load_la_OBJECTS) $(libdlopen_test_do_load_la_LIBADD) $(LIBS) + +libdlopen_test_do_load_rpath.la: $(libdlopen_test_do_load_rpath_la_OBJECTS) $(libdlopen_test_do_load_rpath_la_DEPENDENCIES) $(EXTRA_libdlopen_test_do_load_rpath_la_DEPENDENCIES) + $(AM_V_CCLD)$(libdlopen_test_do_load_rpath_la_LINK) -rpath $(noinst_libdir) $(libdlopen_test_do_load_rpath_la_OBJECTS) $(libdlopen_test_do_load_rpath_la_LIBADD) $(LIBS) + +libdlopen_test_do_mmap.la: $(libdlopen_test_do_mmap_la_OBJECTS) $(libdlopen_test_do_mmap_la_DEPENDENCIES) $(EXTRA_libdlopen_test_do_mmap_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) -rpath $(noinst_libdir) $(libdlopen_test_do_mmap_la_OBJECTS) $(libdlopen_test_do_mmap_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dlopen_test_do_mmap.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libdlopen_test_do_load_la-dlopen_test_do_load.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libdlopen_test_do_load_rpath_la-dlopen_test_do_load.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libdlopen_test_do_load_la-dlopen_test_do_load.lo: dlopen_test_do_load.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libdlopen_test_do_load_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libdlopen_test_do_load_la-dlopen_test_do_load.lo -MD -MP -MF $(DEPDIR)/libdlopen_test_do_load_la-dlopen_test_do_load.Tpo -c -o libdlopen_test_do_load_la-dlopen_test_do_load.lo `test -f 'dlopen_test_do_load.c' || echo '$(srcdir)/'`dlopen_test_do_load.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libdlopen_test_do_load_la-dlopen_test_do_load.Tpo $(DEPDIR)/libdlopen_test_do_load_la-dlopen_test_do_load.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dlopen_test_do_load.c' object='libdlopen_test_do_load_la-dlopen_test_do_load.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libdlopen_test_do_load_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libdlopen_test_do_load_la-dlopen_test_do_load.lo `test -f 'dlopen_test_do_load.c' || echo '$(srcdir)/'`dlopen_test_do_load.c + +libdlopen_test_do_load_rpath_la-dlopen_test_do_load.lo: dlopen_test_do_load.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libdlopen_test_do_load_rpath_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libdlopen_test_do_load_rpath_la-dlopen_test_do_load.lo -MD -MP -MF $(DEPDIR)/libdlopen_test_do_load_rpath_la-dlopen_test_do_load.Tpo -c -o libdlopen_test_do_load_rpath_la-dlopen_test_do_load.lo `test -f 'dlopen_test_do_load.c' || echo '$(srcdir)/'`dlopen_test_do_load.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libdlopen_test_do_load_rpath_la-dlopen_test_do_load.Tpo $(DEPDIR)/libdlopen_test_do_load_rpath_la-dlopen_test_do_load.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dlopen_test_do_load.c' object='libdlopen_test_do_load_rpath_la-dlopen_test_do_load.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libdlopen_test_do_load_rpath_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libdlopen_test_do_load_rpath_la-dlopen_test_do_load.lo `test -f 'dlopen_test_do_load.c' || echo '$(srcdir)/'`dlopen_test_do_load.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run 'make' without going through this Makefile. +# To change the values of 'make' variables: instead of editing Makefiles, +# (1) if the variable is set in 'config.status', edit 'config.status' +# (which will cause the Makefiles to be regenerated when you run 'make'); +# (2) otherwise, pass the desired values on the 'make' command line. +$(am__recursive_targets): + @fail=; \ + if $(am__make_keepgoing); then \ + failcom='fail=yes'; \ + else \ + failcom='exit 1'; \ + fi; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-recursive +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-recursive + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-recursive + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + $(am__make_dryrun) \ + || test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile $(LTLIBRARIES) +installdirs: installdirs-recursive +installdirs-am: + for dir in "$(DESTDIR)$(noinst_libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool clean-noinst_libLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-recursive + -rm -f ./$(DEPDIR)/dlopen_test_do_mmap.Plo + -rm -f ./$(DEPDIR)/libdlopen_test_do_load_la-dlopen_test_do_load.Plo + -rm -f ./$(DEPDIR)/libdlopen_test_do_load_rpath_la-dlopen_test_do_load.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: install-noinst_libLTLIBRARIES + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f ./$(DEPDIR)/dlopen_test_do_mmap.Plo + -rm -f ./$(DEPDIR)/libdlopen_test_do_load_la-dlopen_test_do_load.Plo + -rm -f ./$(DEPDIR)/libdlopen_test_do_load_rpath_la-dlopen_test_do_load.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: uninstall-noinst_libLTLIBRARIES + +.MAKE: $(am__recursive_targets) install-am install-strip + +.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \ + am--depfiles check check-am clean clean-generic clean-libtool \ + clean-noinst_libLTLIBRARIES cscopelist-am ctags ctags-am \ + distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-noinst_libLTLIBRARIES \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-noinst_libLTLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/test/gtest/ucm/test_dlopen/configure.m4 b/test/gtest/ucm/test_dlopen/configure.m4 new file mode 100644 index 0000000..3af2b8a --- /dev/null +++ b/test/gtest/ucm/test_dlopen/configure.m4 @@ -0,0 +1,7 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +AC_CONFIG_FILES([test/gtest/ucm/test_dlopen/Makefile]) diff --git a/test/gtest/ucm/test_dlopen/dlopen_test_do_load.c b/test/gtest/ucm/test_dlopen/dlopen_test_do_load.c new file mode 100644 index 0000000..63c7f07 --- /dev/null +++ b/test/gtest/ucm/test_dlopen/dlopen_test_do_load.c @@ -0,0 +1,15 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include + +#include + +UCS_F_NOOPTIMIZE /* prevent using tail recursion unwind */ +void* load_lib(const char *path, void* (*load_func)(const char*, int)) +{ + return (load_func ? load_func : dlopen)(path, RTLD_NOW); +} diff --git a/test/gtest/ucm/test_dlopen/dlopen_test_do_mmap.c b/test/gtest/ucm/test_dlopen/dlopen_test_do_mmap.c new file mode 100644 index 0000000..91bead8 --- /dev/null +++ b/test/gtest/ucm/test_dlopen/dlopen_test_do_mmap.c @@ -0,0 +1,15 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include + +void fire_mmap(void) +{ + void* map_ptr; + + map_ptr = mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + munmap(map_ptr, 4096); +} diff --git a/test/gtest/ucm/test_dlopen/rpath-subdir/Makefile.am b/test/gtest/ucm/test_dlopen/rpath-subdir/Makefile.am new file mode 100644 index 0000000..67ea306 --- /dev/null +++ b/test/gtest/ucm/test_dlopen/rpath-subdir/Makefile.am @@ -0,0 +1,14 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + + +noinst_lib_LTLIBRARIES = \ + libdlopen_test_rpath.la + +libdlopen_test_rpath_la_SOURCES = dlopen_test_rpath.c +noinst_libdir = ${PWD}/.noinst + + diff --git a/test/gtest/ucm/test_dlopen/rpath-subdir/Makefile.in b/test/gtest/ucm/test_dlopen/rpath-subdir/Makefile.in new file mode 100644 index 0000000..11b713d --- /dev/null +++ b/test/gtest/ucm/test_dlopen/rpath-subdir/Makefile.in @@ -0,0 +1,803 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = test/gtest/ucm/test_dlopen/rpath-subdir +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(noinst_libdir)" +LTLIBRARIES = $(noinst_lib_LTLIBRARIES) +libdlopen_test_rpath_la_LIBADD = +am_libdlopen_test_rpath_la_OBJECTS = dlopen_test_rpath.lo +libdlopen_test_rpath_la_OBJECTS = \ + $(am_libdlopen_test_rpath_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/dlopen_test_rpath.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libdlopen_test_rpath_la_SOURCES) +DIST_SOURCES = $(libdlopen_test_rpath_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +noinst_lib_LTLIBRARIES = \ + libdlopen_test_rpath.la + +libdlopen_test_rpath_la_SOURCES = dlopen_test_rpath.c +noinst_libdir = ${PWD}/.noinst +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/gtest/ucm/test_dlopen/rpath-subdir/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/gtest/ucm/test_dlopen/rpath-subdir/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-noinst_libLTLIBRARIES: $(noinst_lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(noinst_lib_LTLIBRARIES)'; test -n "$(noinst_libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(noinst_libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(noinst_libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(noinst_libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(noinst_libdir)"; \ + } + +uninstall-noinst_libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(noinst_lib_LTLIBRARIES)'; test -n "$(noinst_libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(noinst_libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(noinst_libdir)/$$f"; \ + done + +clean-noinst_libLTLIBRARIES: + -test -z "$(noinst_lib_LTLIBRARIES)" || rm -f $(noinst_lib_LTLIBRARIES) + @list='$(noinst_lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libdlopen_test_rpath.la: $(libdlopen_test_rpath_la_OBJECTS) $(libdlopen_test_rpath_la_DEPENDENCIES) $(EXTRA_libdlopen_test_rpath_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) -rpath $(noinst_libdir) $(libdlopen_test_rpath_la_OBJECTS) $(libdlopen_test_rpath_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dlopen_test_rpath.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: + for dir in "$(DESTDIR)$(noinst_libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinst_libLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/dlopen_test_rpath.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-noinst_libLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/dlopen_test_rpath.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-noinst_libLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinst_libLTLIBRARIES \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man \ + install-noinst_libLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-noinst_libLTLIBRARIES + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 b/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 new file mode 100644 index 0000000..b121802 --- /dev/null +++ b/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 @@ -0,0 +1,7 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +AC_CONFIG_FILES([test/gtest/ucm/test_dlopen/rpath-subdir/Makefile]) diff --git a/test/gtest/ucm/test_dlopen/rpath-subdir/dlopen_test_rpath.c b/test/gtest/ucm/test_dlopen/rpath-subdir/dlopen_test_rpath.c new file mode 100644 index 0000000..335f6a6 --- /dev/null +++ b/test/gtest/ucm/test_dlopen/rpath-subdir/dlopen_test_rpath.c @@ -0,0 +1,10 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +int dummy() +{ + return 0; +} diff --git a/test/gtest/ucp/test_ucp_am.cc b/test/gtest/ucp/test_ucp_am.cc new file mode 100644 index 0000000..daa4e4c --- /dev/null +++ b/test/gtest/ucp/test_ucp_am.cc @@ -0,0 +1,285 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * Copyright (c) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. + * Copyright (C) Los Alamos National Security, LLC. 2018. ALL RIGHTS RESERVED. + * + */ +#include +#include +#include +#include +#include + +#include "ucp_datatype.h" +#include "ucp_test.h" + +#define NUM_MESSAGES 17 + +#define UCP_REALLOC_ID 1000 +#define UCP_SEND_ID 0 +#define UCP_REPLY_ID 1 +#define UCP_RELEASE 1 + +class test_ucp_am_base : public ucp_test { +public: + int sent_ams; + int replies; + int recv_ams; + void *reply; + void *for_release[NUM_MESSAGES]; + int release; + + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.field_mask |= UCP_PARAM_FIELD_FEATURES; + params.features = UCP_FEATURE_AM; + return params; + } + + static void ucp_send_am_cb(void *request, ucs_status_t status); + + static ucs_status_t ucp_process_am_cb(void *arg, void *data, + size_t length, + ucp_ep_h reply_ep, + unsigned flags); + + static ucs_status_t ucp_process_reply_cb(void *arg, void *data, + size_t length, + ucp_ep_h reply_ep, + unsigned flags); + + ucs_status_t am_handler(test_ucp_am_base *me, void *data, + size_t length, unsigned flags); +}; + +ucs_status_t test_ucp_am_base::ucp_process_reply_cb(void *arg, void *data, + size_t length, + ucp_ep_h reply_ep, + unsigned flags) +{ + test_ucp_am_base *self = reinterpret_cast(arg); + self->replies++; + return UCS_OK; +} + +ucs_status_t test_ucp_am_base::ucp_process_am_cb(void *arg, void *data, + size_t length, + ucp_ep_h reply_ep, + unsigned flags) +{ + test_ucp_am_base *self = reinterpret_cast(arg); + + if (reply_ep) { + self->reply = ucp_am_send_nb(reply_ep, UCP_REPLY_ID, NULL, 1, + ucp_dt_make_contig(0), + (ucp_send_callback_t) ucs_empty_function, + 0); + EXPECT_FALSE(UCS_PTR_IS_ERR(self->reply)); + } + + return self->am_handler(self, data, length, flags); +} + +ucs_status_t test_ucp_am_base::am_handler(test_ucp_am_base *me, void *data, + size_t length, unsigned flags) +{ + ucs_status_t status; + std::vector cmp(length, (char)length); + std::vector databuf(length, 'r'); + + memcpy(&databuf[0], data, length); + + EXPECT_EQ(cmp, databuf); + if (me->release) { + me->for_release[me->recv_ams] = data; + status = UCS_INPROGRESS; + } else { + status = UCS_OK; + } + + me->recv_ams++; + return status; +} + +class test_ucp_am : public test_ucp_am_base { +public: + ucp_ep_params_t get_ep_params() { + ucp_ep_params_t params = test_ucp_am_base::get_ep_params(); + params.field_mask |= UCP_EP_PARAM_FIELD_FLAGS; + params.flags |= UCP_EP_PARAMS_FLAGS_NO_LOOPBACK; + return params; + } + + virtual void init() { + ucp_test::init(); + sender().connect(&receiver(), get_ep_params()); + receiver().connect(&sender(), get_ep_params()); + } + +protected: + void do_set_am_handler_realloc_test(); + void do_send_process_data_test(int test_release, uint16_t am_id, + int send_reply); + void do_send_process_data_iov_test(); + void set_handlers(uint16_t am_id); + void set_reply_handlers(); +}; + +void test_ucp_am::set_reply_handlers() +{ + ucp_worker_set_am_handler(sender().worker(), UCP_REPLY_ID, + ucp_process_reply_cb, this, + UCP_AM_FLAG_WHOLE_MSG); + ucp_worker_set_am_handler(receiver().worker(), UCP_REPLY_ID, + ucp_process_reply_cb, this, + UCP_AM_FLAG_WHOLE_MSG); +} + +void test_ucp_am::set_handlers(uint16_t am_id) +{ + ucp_worker_set_am_handler(sender().worker(), am_id, + ucp_process_am_cb, this, + UCP_AM_FLAG_WHOLE_MSG); + ucp_worker_set_am_handler(receiver().worker(), am_id, + ucp_process_am_cb, this, + UCP_AM_FLAG_WHOLE_MSG); +} + +void test_ucp_am::do_send_process_data_test(int test_release, uint16_t am_id, + int send_reply) +{ + size_t buf_size = pow(2, NUM_MESSAGES - 2); + ucs_status_ptr_t sstatus = NULL; + std::vector buf(buf_size); + + recv_ams = 0; + sent_ams = 0; + replies = 0; + this->release = test_release; + + for (size_t i = 0; i < buf_size + 1; i = i ? (i * 2) : 1) { + for (size_t j = 0; j < i; j++) { + buf[j] = i; + } + + reply = NULL; + sstatus = ucp_am_send_nb(receiver().ep(), am_id, + buf.data(), 1, ucp_dt_make_contig(i), + (ucp_send_callback_t) ucs_empty_function, + send_reply); + + EXPECT_FALSE(UCS_PTR_IS_ERR(sstatus)); + wait(sstatus); + sent_ams++; + + if (send_reply) { + while (sent_ams != replies) { + progress(); + } + + if (reply != NULL) { + ucp_request_release(reply); + } + } + } + + while (sent_ams != recv_ams) { + progress(); + } + + if (send_reply) { + while (sent_ams != replies) { + progress(); + } + } + + if (test_release) { + for(int i = 0; i < recv_ams; i++) { + if (for_release[i] != NULL) { + ucp_am_data_release(receiver().worker(), for_release[i]); + } + } + } +} + +void test_ucp_am::do_send_process_data_iov_test() +{ + ucs_status_ptr_t sstatus; + size_t iovcnt = 2; + size_t size = 8192; + size_t index; + size_t i; + + recv_ams = 0; + sent_ams = 0; + release = 0; + + std::vector b1(size); + std::vector b2(size); + ucp_dt_iov_t iovec[iovcnt]; + + set_handlers(0); + + for (i = 1; i < size; i *= 2) { + for (index = 0; index < i; index++) { + b1[index] = i * 2; + b2[index] = i * 2; + } + + iovec[0].buffer = b1.data(); + iovec[1].buffer = b2.data(); + + iovec[0].length = i; + iovec[1].length = i; + + sstatus = ucp_am_send_nb(receiver().ep(), 0, + iovec, 2, ucp_dt_make_iov(), + (ucp_send_callback_t) ucs_empty_function, 0); + wait(sstatus); + EXPECT_FALSE(UCS_PTR_IS_ERR(sstatus)); + sent_ams++; + } + + while (sent_ams != recv_ams) { + progress(); + } +} + +void test_ucp_am::do_set_am_handler_realloc_test() +{ + set_handlers(UCP_SEND_ID); + do_send_process_data_test(0, UCP_SEND_ID, 0); + + set_handlers(UCP_REALLOC_ID); + do_send_process_data_test(0, UCP_REALLOC_ID, 0); + + set_handlers(UCP_SEND_ID + 1); + do_send_process_data_test(0, UCP_SEND_ID + 1, 0); +} + +UCS_TEST_P(test_ucp_am, send_process_am) +{ + set_handlers(UCP_SEND_ID); + do_send_process_data_test(0, UCP_SEND_ID, 0); + + set_reply_handlers(); + do_send_process_data_test(0, UCP_SEND_ID, UCP_AM_SEND_REPLY); +} + +UCS_TEST_P(test_ucp_am, send_process_am_release) +{ + set_handlers(UCP_SEND_ID); + do_send_process_data_test(UCP_RELEASE, 0, 0); +} + +UCS_TEST_P(test_ucp_am, send_process_iov_am) +{ + do_send_process_data_iov_test(); +} + +UCS_TEST_P(test_ucp_am, set_am_handler_realloc) +{ + do_set_am_handler_realloc_test(); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_am) diff --git a/test/gtest/ucp/test_ucp_atomic.cc b/test/gtest/ucp/test_ucp_atomic.cc new file mode 100644 index 0000000..53322c6 --- /dev/null +++ b/test/gtest/ucp/test_ucp_atomic.cc @@ -0,0 +1,352 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_atomic.h" +extern "C" { +#include +} + +std::vector +test_ucp_atomic::enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) +{ + std::vector result; + generate_test_params_variant(ctx_params, name, + test_case_name, tls, UCP_ATOMIC_MODE_CPU, result); + generate_test_params_variant(ctx_params, name, + test_case_name, tls, UCP_ATOMIC_MODE_DEVICE, result); + generate_test_params_variant(ctx_params, name, + test_case_name, tls, UCP_ATOMIC_MODE_GUESS, result); + return result; +} + +void test_ucp_atomic::init() { + const char *atomic_mode = + (GetParam().variant == UCP_ATOMIC_MODE_CPU) ? "cpu" : + (GetParam().variant == UCP_ATOMIC_MODE_DEVICE) ? "device" : + (GetParam().variant == UCP_ATOMIC_MODE_GUESS) ? "guess" : + ""; + modify_config("ATOMIC_MODE", atomic_mode); + test_ucp_memheap::init(); +} + +template +void test_ucp_atomic::blocking_add(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data) +{ + ucs_status_t status; + T add, prev; + + prev = *(T*)memheap_addr; + add = (T)ucs::rand() * (T)ucs::rand(); + + if (sizeof(T) == sizeof(uint32_t)) { + status = ucp_atomic_add32(e->ep(), add, (uintptr_t)memheap_addr, rkey); + } else if (sizeof(T) == sizeof(uint64_t)) { + status = ucp_atomic_add64(e->ep(), add, (uintptr_t)memheap_addr, rkey); + } else { + status = UCS_ERR_UNSUPPORTED; + } + ASSERT_UCS_OK(status); + + expected_data.resize(sizeof(T)); + *(T*)&expected_data[0] = add + prev; +} + +void test_ucp_atomic::unaligned_blocking_add64(entity *e, size_t max_size, + void *memheap_addr, ucp_rkey_h rkey, + std::string& expected_data) +{ + ucs_status_t status; + { + /* Test that unaligned addresses generate error */ + scoped_log_handler slh(hide_errors_logger); + status = ucp_atomic_add64(e->ep(), 0, (uintptr_t)memheap_addr + 1, rkey); + } + EXPECT_EQ(UCS_ERR_INVALID_PARAM, status); + expected_data.clear(); +} + +template +ucs_status_t test_ucp_atomic::ucp_atomic_post_nbi(ucp_ep_h ep, ucp_atomic_post_op_t opcode, + T value, void *remote_addr, + ucp_rkey_h rkey) +{ + return ucp_atomic_post(ep, opcode, value, sizeof(T), (uintptr_t)remote_addr, rkey); +} + +template +void test_ucp_atomic::nb_post(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data) +{ + ucs_status_t status; + T val, prev; + + prev = *(T*)memheap_addr; + val = (T)ucs::rand() * (T)ucs::rand(); + + status = test_ucp_atomic::ucp_atomic_post_nbi(e->ep(), OP, val, memheap_addr, rkey); + + if (status == UCS_INPROGRESS) { + flush_worker(*e); + } else { + ASSERT_UCS_OK(status); + } + expected_data.resize(sizeof(T)); + *(T*)&expected_data[0] = atomic_op_val(val, prev); +} + +template +void test_ucp_atomic::unaligned_nb_post(entity *e, size_t max_size, + void *memheap_addr, ucp_rkey_h rkey, + std::string& expected_data) +{ + ucs_status_t status; + { + /* Test that unaligned addresses generate error */ + scoped_log_handler slh(hide_errors_logger); + status = test_ucp_atomic::ucp_atomic_post_nbi + (e->ep(), OP, 0, (void *)((uintptr_t)memheap_addr + 1), rkey); + } + EXPECT_EQ(UCS_ERR_INVALID_PARAM, status); + expected_data.clear(); +} + +template +ucs_status_ptr_t test_ucp_atomic::ucp_atomic_fetch(ucp_ep_h ep, + ucp_atomic_fetch_op_t opcode, + T value, T *result, + void *remote_addr, ucp_rkey_h rkey) +{ + return ucp_atomic_fetch_nb(ep, opcode, value, result, sizeof(T), + (uintptr_t)remote_addr, rkey, send_completion); +} + +template +void test_ucp_atomic::nb_fetch(entity *e, size_t max_size, + void *memheap_addr, ucp_rkey_h rkey, + std::string& expected_data) +{ + void *amo_req; + T val, prev, result; + + prev = *(T*)memheap_addr; + val = (T)ucs::rand() * (T)ucs::rand(); + + amo_req = test_ucp_atomic::ucp_atomic_fetch(e->ep(), FOP, + val, &result, memheap_addr, rkey); + if(UCS_PTR_IS_PTR(amo_req)){ + wait(amo_req); + } + + EXPECT_EQ(prev, result); + + expected_data.resize(sizeof(T)); + *(T*)&expected_data[0] = atomic_fop_val(val, prev); +} + +template +void test_ucp_atomic::nb_cswap(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data) +{ + T compare, swap, prev, result; + void *amo_req; + + prev = *(T*)memheap_addr; + if ((ucs::rand() % 2) == 0) { + compare = prev; /* success mode */ + } else { + compare = ~prev; /* fail mode */ + } + swap = result = (T)ucs::rand() * (T)ucs::rand(); + + amo_req = test_ucp_atomic::ucp_atomic_fetch(e->ep(), UCP_ATOMIC_FETCH_OP_CSWAP, + compare, &result, + memheap_addr, rkey); + if(UCS_PTR_IS_PTR(amo_req)){ + wait(amo_req); + } + + EXPECT_EQ(prev, result); + + expected_data.resize(sizeof(T)); + if (compare == prev) { + *(T*)&expected_data[0] = swap; + } else { + *(T*)&expected_data[0] = prev; + } +} + +template +void test_ucp_atomic::test(F f, bool malloc_allocate) { + test_blocking_xfer(static_cast(f), + DEFAULT_SIZE, DEFAULT_ITERS, + sizeof(T), + malloc_allocate, false); +} + + +class test_ucp_atomic32 : public test_ucp_atomic { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features |= UCP_FEATURE_AMO32; + return params; + } +}; + +UCS_TEST_P(test_ucp_atomic32, atomic_add) { + test(&test_ucp_atomic32::blocking_add, false); + test(&test_ucp_atomic32::blocking_add, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_add_nb) { + test(&test_ucp_atomic32::nb_post, false); + test(&test_ucp_atomic32::nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_and_nb) { + test(&test_ucp_atomic32::nb_post, false); + test(&test_ucp_atomic32::nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_or_nb) { + test(&test_ucp_atomic32::nb_post, false); + test(&test_ucp_atomic32::nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_xor_nb) { + test(&test_ucp_atomic32::nb_post, false); + test(&test_ucp_atomic32::nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_fadd_nb) { + test(&test_ucp_atomic32::nb_fetch, false); + test(&test_ucp_atomic32::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_fand_nb) { + test(&test_ucp_atomic32::nb_fetch, false); + test(&test_ucp_atomic32::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_for_nb) { + test(&test_ucp_atomic32::nb_fetch, false); + test(&test_ucp_atomic32::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_fxor_nb) { + test(&test_ucp_atomic32::nb_fetch, false); + test(&test_ucp_atomic32::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_swap_nb) { + test(&test_ucp_atomic32::nb_fetch, false); + test(&test_ucp_atomic32::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic32, atomic_cswap_nb) { + test(&test_ucp_atomic32::nb_cswap, false); + test(&test_ucp_atomic32::nb_cswap, true); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_atomic32) + +class test_ucp_atomic64 : public test_ucp_atomic { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features |= UCP_FEATURE_AMO64; + return params; + } +}; + +UCS_TEST_P(test_ucp_atomic64, atomic_add) { + test(&test_ucp_atomic64::blocking_add, false); + test(&test_ucp_atomic64::blocking_add, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_add_nb) { + test(&test_ucp_atomic64::nb_post, false); + test(&test_ucp_atomic64::nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_and_nb) { + test(&test_ucp_atomic64::nb_post, false); + test(&test_ucp_atomic64::nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_or_nb) { + test(&test_ucp_atomic64::nb_post, false); + test(&test_ucp_atomic64::nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_xor_nb) { + test(&test_ucp_atomic64::nb_post, false); + test(&test_ucp_atomic64::nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_fadd_nb) { + test(&test_ucp_atomic64::nb_fetch, false); + test(&test_ucp_atomic64::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_fand_nb) { + test(&test_ucp_atomic64::nb_fetch, false); + test(&test_ucp_atomic64::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_for_nb) { + test(&test_ucp_atomic64::nb_fetch, false); + test(&test_ucp_atomic64::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_fxor_nb) { + test(&test_ucp_atomic64::nb_fetch, false); + test(&test_ucp_atomic64::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_swap_nb) { + test(&test_ucp_atomic64::nb_fetch, false); + test(&test_ucp_atomic64::nb_fetch, true); +} + +UCS_TEST_P(test_ucp_atomic64, atomic_cswap_nb) { + test(&test_ucp_atomic64::nb_cswap, false); + test(&test_ucp_atomic64::nb_cswap, true); +} + +#if ENABLE_PARAMS_CHECK +UCS_TEST_P(test_ucp_atomic64, unaligned_atomic_add) { + test(&test_ucp_atomic::unaligned_blocking_add64, false); + test(&test_ucp_atomic::unaligned_blocking_add64, true); +} + +UCS_TEST_P(test_ucp_atomic64, unaligned_atomic_add_nb) { + test(&test_ucp_atomic::unaligned_nb_post, false); + test(&test_ucp_atomic::unaligned_nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic64, unaligned_atomic_and_nb) { + test(&test_ucp_atomic::unaligned_nb_post, false); + test(&test_ucp_atomic::unaligned_nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic64, unaligned_atomic_or_nb) { + test(&test_ucp_atomic::unaligned_nb_post, false); + test(&test_ucp_atomic::unaligned_nb_post, true); +} + +UCS_TEST_P(test_ucp_atomic64, unaligned_atomic_xor_nb) { + test(&test_ucp_atomic::unaligned_nb_post, false); + test(&test_ucp_atomic::unaligned_nb_post, true); +} +#endif + +UCP_INSTANTIATE_TEST_CASE(test_ucp_atomic64) diff --git a/test/gtest/ucp/test_ucp_atomic.h b/test/gtest/ucp/test_ucp_atomic.h new file mode 100644 index 0000000..0c48efe --- /dev/null +++ b/test/gtest/ucp/test_ucp_atomic.h @@ -0,0 +1,102 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * Copyright (C) UT-Battelle, LLC. 2016-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#ifndef TEST_UCP_ATOMIC_H_ +#define TEST_UCP_ATOMIC_H_ + +#include "test_ucp_memheap.h" + + +class test_ucp_atomic : public test_ucp_memheap { +public: + static std::vector enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls); + + virtual void init(); + + template + void blocking_add(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data); + + template void blocking_add(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data); + + void unaligned_blocking_add64(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data); + + template + void unaligned_nb_post(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data); + + template + void nb_cswap(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data); + + template + void test(F f, bool malloc_allocate); + + template + void nb_post(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data); + + template + void nb_fetch(entity *e, size_t max_size, void *memheap_addr, + ucp_rkey_h rkey, std::string& expected_data); + + template + T atomic_op_val(T v1, T v2) + { + /* coverity[switch_selector_expr_is_constant] */ + switch (OP) { + case UCP_ATOMIC_POST_OP_ADD: + return v1 + v2; + case UCP_ATOMIC_POST_OP_AND: + return v1 & v2; + case UCP_ATOMIC_POST_OP_OR: + return v1 | v2; + case UCP_ATOMIC_POST_OP_XOR: + return v1 ^ v2; + default: + return 0; + } + } + + template + T atomic_fop_val(T v1, T v2) + { + /* coverity[switch_selector_expr_is_constant] */ + switch (OP) { + case UCP_ATOMIC_FETCH_OP_FADD: + return v1 + v2; + case UCP_ATOMIC_FETCH_OP_FAND: + return v1 & v2; + case UCP_ATOMIC_FETCH_OP_FOR: + return v1 | v2; + case UCP_ATOMIC_FETCH_OP_FXOR: + return v1 ^ v2; + case UCP_ATOMIC_FETCH_OP_SWAP: + return v1; + default: + return 0; + } + } + +private: + static void send_completion(void *request, ucs_status_t status){} + template + ucs_status_t ucp_atomic_post_nbi(ucp_ep_h ep, ucp_atomic_post_op_t opcode, + T value, void *remote_addr, + ucp_rkey_h rkey); + template + ucs_status_ptr_t ucp_atomic_fetch(ucp_ep_h ep, ucp_atomic_fetch_op_t opcode, + T value, T *result, + void *remote_addr, ucp_rkey_h rkey); +}; + +#endif diff --git a/test/gtest/ucp/test_ucp_context.cc b/test/gtest/ucp/test_ucp_context.cc new file mode 100644 index 0000000..694add3 --- /dev/null +++ b/test/gtest/ucp/test_ucp_context.cc @@ -0,0 +1,110 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ucp_test.h" +extern "C" { +#include +} + + +class test_ucp_context : public ucp_test { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features |= UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP; + return params; + } +}; + +UCS_TEST_P(test_ucp_context, minimal_field_mask) { + ucs::handle config; + UCS_TEST_CREATE_HANDLE(ucp_config_t*, config, ucp_config_release, + ucp_config_read, NULL, NULL); + + ucs::handle ucph; + ucs::handle worker; + + { + /* Features ONLY */ + ucp_params_t params; + VALGRIND_MAKE_MEM_UNDEFINED(¶ms, sizeof(params)); + params.field_mask = UCP_PARAM_FIELD_FEATURES; + params.features = get_ctx_params().features; + + UCS_TEST_CREATE_HANDLE(ucp_context_h, ucph, ucp_cleanup, + ucp_init, ¶ms, config.get()); + } + + { + /* Empty set */ + ucp_worker_params_t params; + VALGRIND_MAKE_MEM_UNDEFINED(¶ms, sizeof(params)); + params.field_mask = 0; + + UCS_TEST_CREATE_HANDLE(ucp_worker_h, worker, ucp_worker_destroy, + ucp_worker_create, ucph.get(), ¶ms); + } +} + +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_context, all, "all") + +class test_ucp_aliases : public test_ucp_context { +}; + +UCS_TEST_P(test_ucp_aliases, aliases) { + create_entity(); +} + +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_aliases, rc_v, "rc_v") +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_aliases, rc_x, "rc_x") +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_aliases, ud, "ud") +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_aliases, ud_mlx5, "ud_mlx5") +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_aliases, ugni, "ugni") +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_aliases, shm, "shm") + + +class test_ucp_version : public test_ucp_context { +}; + +UCS_TEST_P(test_ucp_version, wrong_api_version) { + + ucs::handle config; + UCS_TEST_CREATE_HANDLE(ucp_config_t*, config, ucp_config_release, + ucp_config_read, NULL, NULL); + + ucp_params_t params = get_ctx_params(); + ucp_context_h ucph; + ucs_status_t status; + size_t warn_count; + { + scoped_log_handler slh(hide_warns_logger); + warn_count = m_warnings.size(); + status = ucp_init_version(99, 99, ¶ms, config.get(), &ucph); + } + if (status != UCS_OK) { + ADD_FAILURE() << "Failed to create UCP with wrong version"; + } else { + if (m_warnings.size() == warn_count) { + ADD_FAILURE() << "Missing wrong version warning"; + } + ucp_cleanup(ucph); + } +} + +UCS_TEST_P(test_ucp_version, version_string) { + + unsigned major_version, minor_version, release_number; + + ucp_get_version(&major_version, &minor_version, &release_number); + + char buffer[256]; + snprintf(buffer, sizeof(buffer), "%d.%d.%d", major_version, minor_version, + release_number); + + EXPECT_EQ(std::string(buffer), std::string(ucp_get_version_string())); +} + +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_version, all, "all") diff --git a/test/gtest/ucp/test_ucp_dt.cc b/test/gtest/ucp/test_ucp_dt.cc new file mode 100644 index 0000000..49969b8 --- /dev/null +++ b/test/gtest/ucp/test_ucp_dt.cc @@ -0,0 +1,48 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +extern "C" { +#include +} + +class test_ucp_dt_iov : public ucs::test{ +protected: + size_t calc_iov_offset(const ucp_dt_iov_t *iov, size_t iov_indx, size_t iov_offs) { + size_t offset = iov_offs;; + for (size_t i = 0; i < iov_indx; ++i) { + offset += iov[i].length; + } + return offset; + } +}; + +UCS_TEST_F(test_ucp_dt_iov, seek) +{ + for (int count = 0; count < 100; ++count) { + size_t iovcnt = (ucs::rand() % 20) + 1; + std::vector iov(iovcnt); + + size_t total_size = 0; + for (size_t i = 0; i < iovcnt; ++i) { + iov[i].length = (ucs::rand() % 1000) + 1; + total_size += iov[i].length; + } + + ASSERT_EQ(total_size, calc_iov_offset(&iov[0], iovcnt, 0)); + + size_t offset = 0; + size_t iov_offs = 0, iov_indx = 0; + for (int j = 0; j < 100; ++j) { + size_t new_offset = ucs::rand() % total_size; + ucp_dt_iov_seek(&iov[0], iovcnt, + (ptrdiff_t)new_offset - (ptrdiff_t)offset, + &iov_offs, &iov_indx); + EXPECT_EQ(new_offset, calc_iov_offset(&iov[0], iov_indx, iov_offs)); + offset = new_offset; + } + } +} diff --git a/test/gtest/ucp/test_ucp_fence.cc b/test/gtest/ucp/test_ucp_fence.cc new file mode 100644 index 0000000..391fd4e --- /dev/null +++ b/test/gtest/ucp/test_ucp_fence.cc @@ -0,0 +1,176 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_atomic.h" +#include "common/gtest.h" + +class test_ucp_fence : public test_ucp_atomic { +public: + typedef void (test_ucp_fence::* send_func_t)(entity *e, uint64_t *initial_buf, + uint64_t *result_buf, void *memheap_addr, + ucp_rkey_h rkey); + + static void send_cb(void *request, ucs_status_t status) + { + } + + template + void blocking_add(entity *e, uint64_t *initial_buf, uint64_t *result_buf, + void *memheap_addr, ucp_rkey_h rkey) { + ucs_status_t status = ucp_atomic_post(e->ep(), UCP_ATOMIC_POST_OP_ADD, + *initial_buf, sizeof(T), + (uintptr_t)memheap_addr, rkey); + ASSERT_UCS_OK(status); + } + + template + void blocking_fadd(entity *e, uint64_t *initial_buf, uint64_t *result_buf, + void *memheap_addr, ucp_rkey_h rkey) + { + void *request = ucp_atomic_fetch_nb(e->ep(), UCP_ATOMIC_FETCH_OP_FADD, + *initial_buf, (T*)result_buf, sizeof(T), + (uintptr_t)memheap_addr, rkey, send_cb); + wait(request); + } + + template + void test(F f1, F f2) { + test_fence(static_cast(f1), + static_cast(f2), sizeof(T)); + } + + class worker { + public: + worker(test_ucp_fence* test, send_func_t send1, send_func_t send2, + entity* entity, ucp_rkey_h rkey, void *memheap_ptr, + uint64_t initial_value, uint32_t* error): + test(test), value(initial_value), result(0), error(error), + running(true), m_rkey(rkey), m_memheap(memheap_ptr), + m_send_1(send1), m_send_2(send2), m_entity(entity) { + pthread_create(&m_thread, NULL, run, reinterpret_cast(this)); + } + + ~worker() { + assert(!running); + } + + static void *run(void *arg) { + worker *self = reinterpret_cast(arg); + self->run(); + return NULL; + } + + void join() { + void *retval; + pthread_join(m_thread, &retval); + running = false; + } + + test_ucp_fence* const test; + uint64_t value, result; + uint32_t* error; + bool running; + + private: + void run() { + uint64_t zero = 0; + + for (int i = 0; i < 500 / ucs::test_time_multiplier(); i++) { + (test->*m_send_1)(m_entity, &value, &result, + m_memheap, m_rkey); + + m_entity->fence(); + + (test->*m_send_2)(m_entity, &zero, &result, + m_memheap, m_rkey); + + test->flush_worker(*m_entity); + + if (result != (uint64_t)(i+1)) + (*error)++; + + result = 0; /* reset for the next loop */ + } + } + + ucp_rkey_h m_rkey; + void *m_memheap; + send_func_t m_send_1, m_send_2; + entity* m_entity; + pthread_t m_thread; + }; + + void run_workers(send_func_t send1, send_func_t send2, entity* sender, + ucp_rkey_h rkey, void *memheap_ptr, + uint64_t initial_value, uint32_t* error) { + ucs::ptr_vector m_workers; + m_workers.clear(); + m_workers.push_back(new worker(this, send1, send2, sender, rkey, + memheap_ptr, initial_value, error)); + m_workers.at(0).join(); + m_workers.clear(); + } + +protected: + void test_fence(send_func_t send1, send_func_t send2, size_t alignment) { + static const size_t memheap_size = sizeof(uint64_t); + uint32_t error = 0; + + sender().connect(&receiver(), get_ep_params()); + flush_worker(sender()); /* avoid deadlock for blocking amo */ + + mapped_buffer buffer(memheap_size, receiver(), 0); + + EXPECT_LE(memheap_size, buffer.size()); + memset(buffer.ptr(), 0, memheap_size); + + run_workers(send1, send2, &sender(), buffer.rkey(sender()), + buffer.ptr(), 1, &error); + + EXPECT_EQ(error, (uint32_t)0); + + disconnect(sender()); + disconnect(receiver()); + } + + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features |= UCP_FEATURE_RMA; + return params; + } +}; + +class test_ucp_fence32 : public test_ucp_fence { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = test_ucp_fence::get_ctx_params(); + params.features |= UCP_FEATURE_AMO32; + return params; + } +}; + +UCS_TEST_P(test_ucp_fence32, atomic_add_fadd) { + test(&test_ucp_fence32::blocking_add, + &test_ucp_fence32::blocking_fadd); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_fence32) + +class test_ucp_fence64 : public test_ucp_fence { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = test_ucp_fence::get_ctx_params(); + params.features |= UCP_FEATURE_AMO64; + return params; + } +}; + +UCS_TEST_P(test_ucp_fence64, atomic_add_fadd) { + test(&test_ucp_fence64::blocking_add, + &test_ucp_fence64::blocking_fadd); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_fence64) diff --git a/test/gtest/ucp/test_ucp_mem_type.cc b/test/gtest/ucp/test_ucp_mem_type.cc new file mode 100644 index 0000000..ea27a90 --- /dev/null +++ b/test/gtest/ucp/test_ucp_mem_type.cc @@ -0,0 +1,131 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ucp_test.h" +#include + +extern "C" { +#include +#include +#include +} + + +#define UCP_INSTANTIATE_TEST_CASE_MEMTYPE(_test_case, _name, _mem_type) \ + INSTANTIATE_TEST_CASE_P(_name, _test_case, \ + testing::ValuesIn(_test_case::enum_test_params( \ + _test_case::get_ctx_params(), \ + #_test_case, _mem_type))); + +#define UCP_INSTANTIATE_TEST_CASE_MEMTYPES(_test_case) \ + UCP_INSTANTIATE_TEST_CASE_MEMTYPE(_test_case, host, UCS_MEMORY_TYPE_HOST) \ + UCP_INSTANTIATE_TEST_CASE_MEMTYPE(_test_case, cuda, UCS_MEMORY_TYPE_CUDA) \ + UCP_INSTANTIATE_TEST_CASE_MEMTYPE(_test_case, cuda_managed, UCS_MEMORY_TYPE_CUDA_MANAGED) \ + UCP_INSTANTIATE_TEST_CASE_MEMTYPE(_test_case, rocm, UCS_MEMORY_TYPE_ROCM) \ + UCP_INSTANTIATE_TEST_CASE_MEMTYPE(_test_case, rocm_managed, UCS_MEMORY_TYPE_ROCM_MANAGED) + +class test_ucp_mem_type : public ucp_test { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features |= UCP_FEATURE_TAG; + return params; + } + + static std::vector + enum_test_params(const ucp_params_t& ctx_params, + const std::string& test_case_name, ucs_memory_type_t mem_type) + { + std::vector result; + + std::vector mem_types = + mem_buffer::supported_mem_types(); + if (std::find(mem_types.begin(), mem_types.end(), mem_type) != + mem_types.end()) { + generate_test_params_variant(ctx_params, "all", test_case_name, + "all", mem_type, result); + } + + return result; + } + +protected: + ucs_memory_type_t mem_type() const { + return static_cast(GetParam().variant); + } +}; + +UCS_TEST_P(test_ucp_mem_type, detect) { + + const size_t size = 256; + const ucs_memory_type_t alloc_mem_type = mem_type(); + + mem_buffer b(size, alloc_mem_type); + + ucs_memory_type_t detected_mem_type = + ucp_memory_type_detect(sender().ucph(), b.ptr(), size); + EXPECT_EQ(alloc_mem_type, detected_mem_type); +} + +UCP_INSTANTIATE_TEST_CASE_MEMTYPES(test_ucp_mem_type) + +class test_ucp_mem_type_alloc_before_init : public test_ucp_mem_type { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features |= UCP_FEATURE_TAG; + return params; + } + + test_ucp_mem_type_alloc_before_init() { + m_size = 10000; + } + + virtual void init() { + m_send_buffer.reset(new mem_buffer(m_size, mem_type())); + m_recv_buffer.reset(new mem_buffer(m_size, mem_type())); + test_ucp_mem_type::init(); + } + + virtual void cleanup() { + test_ucp_mem_type::cleanup(); + m_send_buffer.reset(); + m_recv_buffer.reset(); + } + + static const uint64_t SEED = 0x1111111111111111lu; +protected: + size_t m_size; + ucs::auto_ptr m_send_buffer, m_recv_buffer; +}; + +UCS_TEST_P(test_ucp_mem_type_alloc_before_init, xfer) { + sender().connect(&receiver(), get_ep_params()); + + EXPECT_EQ(mem_type(), ucp_memory_type_detect(sender().ucph(), + m_send_buffer->ptr(), m_size)); + EXPECT_EQ(mem_type(), ucp_memory_type_detect(receiver().ucph(), + m_recv_buffer->ptr(), m_size)); + + mem_buffer::pattern_fill(m_send_buffer->ptr(), m_size, SEED, mem_type()); + + for (int i = 0; i < 3; ++i) { + mem_buffer::pattern_fill(m_recv_buffer->ptr(), m_size, 0, mem_type()); + + void *sreq = ucp_tag_send_nb(sender().ep(), m_send_buffer->ptr(), m_size, + ucp_dt_make_contig(1), 1, + (ucp_send_callback_t)ucs_empty_function); + void *rreq = ucp_tag_recv_nb(receiver().worker(), m_recv_buffer->ptr(), + m_size, ucp_dt_make_contig(1), 1, 1, + (ucp_tag_recv_callback_t)ucs_empty_function); + wait(sreq); + wait(rreq); + + mem_buffer::pattern_check(m_recv_buffer->ptr(), m_size, SEED, mem_type()); + } +} + +UCP_INSTANTIATE_TEST_CASE_MEMTYPES(test_ucp_mem_type_alloc_before_init) diff --git a/test/gtest/ucp/test_ucp_memheap.cc b/test/gtest/ucp/test_ucp_memheap.cc new file mode 100644 index 0000000..21f3df4 --- /dev/null +++ b/test/gtest/ucp/test_ucp_memheap.cc @@ -0,0 +1,254 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_memheap.h" + +#include +#include + + +std::vector +test_ucp_memheap::enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) +{ + std::vector result; + generate_test_params_variant(ctx_params, name, + test_case_name, tls, 0, result); + generate_test_params_variant(ctx_params, name, + test_case_name + "/map_nb", + tls, UCP_MEM_MAP_NONBLOCK, result); + return result; +} + +void test_ucp_memheap::test_nonblocking_implicit_stream_xfer(nonblocking_send_func_t send, + size_t size, int max_iter, + size_t alignment, + bool malloc_allocate, + bool is_ep_flush) +{ + void *memheap; + size_t memheap_size; + ucp_mem_map_params_t params; + ucp_mem_attr_t mem_attr; + ucs_status_t status; + + memheap = NULL; + memheap_size = max_iter * size + alignment; + + if (max_iter == DEFAULT_ITERS) { + max_iter = 300 / ucs::test_time_multiplier(); + } + + if (size == DEFAULT_SIZE) { + size = ucs_max((size_t)ucs::rand() % (12 * UCS_KBYTE), alignment); + } + memheap_size = max_iter * size + alignment; + + sender().connect(&receiver(), get_ep_params()); + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.length = memheap_size; + params.flags = GetParam().variant; + if (malloc_allocate) { + memheap = malloc(memheap_size); + params.address = memheap; + params.flags = params.flags & (~(UCP_MEM_MAP_ALLOCATE|UCP_MEM_MAP_FIXED)); + } else if (params.flags & UCP_MEM_MAP_FIXED) { + params.address = ucs::mmap_fixed_address(); + } else { + params.address = NULL; + params.flags |= UCP_MEM_MAP_ALLOCATE; + } + + ucp_mem_h memh; + status = ucp_mem_map(receiver().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(status); + + mem_attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS | + UCP_MEM_ATTR_FIELD_LENGTH; + status = ucp_mem_query(memh, &mem_attr); + ASSERT_UCS_OK(status); + + EXPECT_GE(mem_attr.length, memheap_size); + if (!malloc_allocate) { + memheap = mem_attr.address; + } + memset(memheap, 0, memheap_size); + + void *rkey_buffer; + size_t rkey_buffer_size; + status = ucp_rkey_pack(receiver().ucph(), memh, &rkey_buffer, &rkey_buffer_size); + ASSERT_UCS_OK(status); + + ucp_rkey_h rkey; + status = ucp_ep_rkey_unpack(sender().ep(), rkey_buffer, &rkey); + ASSERT_UCS_OK(status); + + std::string expected_data[300]; + assert (max_iter <= 300); + + for (int i = 0; i < max_iter; ++i) { + expected_data[i].resize(size); + + ucs::fill_random(expected_data[i]); + + ucs_assert(size * i + alignment <= memheap_size); + + char *ptr = (char*)memheap + alignment + i * size; + (this->*send)(&sender(), size, (void*)ptr, rkey, expected_data[i]); + + ASSERT_UCS_OK(status); + + } + + if (is_ep_flush) { + flush_ep(sender()); + } else { + flush_worker(sender()); + } + + for (int i = 0; i < max_iter; ++i) { + char *ptr = (char*)memheap + alignment + i * size; + EXPECT_EQ(expected_data[i].substr(0, 20), + std::string(ptr, expected_data[i].length()).substr(0, 20)) << + ((void*)ptr); + } + + ucp_rkey_destroy(rkey); + + disconnect(sender()); + + ucp_rkey_buffer_release(rkey_buffer); + status = ucp_mem_unmap(receiver().ucph(), memh); + ASSERT_UCS_OK(status); + + if (malloc_allocate) { + free(memheap); + } +} + +/* NOTE: alignment is ignored if memheap_size is not default */ +void test_ucp_memheap::test_blocking_xfer(blocking_send_func_t send, + size_t memheap_size, int max_iter, + size_t alignment, + bool malloc_allocate, + bool is_ep_flush) +{ + ucp_mem_map_params_t params; + ucp_mem_attr_t mem_attr; + ucs_status_t status; + size_t size; + int zero_offset = 0; + + if (max_iter == DEFAULT_ITERS) { + max_iter = 300 / ucs::test_time_multiplier(); + } + + if (memheap_size == DEFAULT_SIZE) { + memheap_size = 3 * UCS_KBYTE; + zero_offset = 1; + } + + sender().connect(&receiver(), get_ep_params()); + + /* avoid deadlock for blocking rma/amo */ + flush_worker(sender()); + + ucp_mem_h memh; + void *memheap = NULL; + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.length = memheap_size; + params.flags = GetParam().variant; + if (malloc_allocate) { + memheap = malloc(memheap_size); + params.address = memheap; + params.flags = params.flags & (~(UCP_MEM_MAP_ALLOCATE|UCP_MEM_MAP_FIXED)); + } else if (params.flags & UCP_MEM_MAP_FIXED) { + params.address = ucs::mmap_fixed_address(); + params.flags |= UCP_MEM_MAP_ALLOCATE; + } else { + params.address = NULL; + params.flags |= UCP_MEM_MAP_ALLOCATE; + } + + status = ucp_mem_map(receiver().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(status); + + mem_attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS | + UCP_MEM_ATTR_FIELD_LENGTH; + status = ucp_mem_query(memh, &mem_attr); + ASSERT_UCS_OK(status); + EXPECT_GE(mem_attr.length, memheap_size); + if (!memheap) { + memheap = mem_attr.address; + } + memset(memheap, 0, memheap_size); + + void *rkey_buffer; + size_t rkey_buffer_size; + status = ucp_rkey_pack(receiver().ucph(), memh, &rkey_buffer, &rkey_buffer_size); + ASSERT_UCS_OK(status); + + ucp_rkey_h rkey; + status = ucp_ep_rkey_unpack(sender().ep(), rkey_buffer, &rkey); + ASSERT_UCS_OK(status); + + ucp_rkey_buffer_release(rkey_buffer); + + for (int i = 0; i < max_iter; ++i) { + size_t offset; + + if (!zero_offset) { + size = ucs_max(ucs::rand() % (memheap_size - alignment - 1), alignment); + offset = ucs::rand() % (memheap_size - size - alignment); + } else { + size = memheap_size; + offset = 0; + } + + offset = ucs_align_up(offset, alignment); + + ucs_assert(((((uintptr_t)memheap + offset)) % alignment) == 0); + ucs_assert(size + offset <= memheap_size); + + std::string expected_data; + expected_data.resize(size); + + ucs::fill_random(expected_data); + (this->*send)(&sender(), size, (void*)((uintptr_t)memheap + offset), + rkey, expected_data); + + if (is_ep_flush) { + flush_ep(sender()); + } else { + flush_worker(sender()); + } + + EXPECT_EQ(expected_data, + std::string((char*)memheap + offset, expected_data.length())); + + expected_data.clear(); + } + + ucp_rkey_destroy(rkey); + + disconnect(sender()); + + status = ucp_mem_unmap(receiver().ucph(), memh); + ASSERT_UCS_OK(status); + + if (malloc_allocate) { + free(memheap); + } +} diff --git a/test/gtest/ucp/test_ucp_memheap.h b/test/gtest/ucp/test_ucp_memheap.h new file mode 100644 index 0000000..ec0f5b9 --- /dev/null +++ b/test/gtest/ucp/test_ucp_memheap.h @@ -0,0 +1,64 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef TEST_UCP_MEMHEAP_H +#define TEST_UCP_MEMHEAP_H + +#include "ucp_test.h" + + +class test_ucp_memheap : public ucp_test { +public: + /* + * @param [in] max_size Maximal size of data to send. + * @param [in] memheap_addr VA to perform the RMA operation to, + * @param [in] rkey Memheap remote key. + * @param [out] expected_data What should the memheap contain at the given + * address after the operation (also can be used + * as a source/destination data). + */ + typedef void (test_ucp_memheap::* blocking_send_func_t)(entity *e, + size_t max_size, + void *memheap_addr, + ucp_rkey_h rkey, + std::string& expected_data); + + /* + * @param [in] max_size Maximal size of data to send. + * @param [in] memheap_addr VA to perform the RMA operation to, + * @param [in] rkey Memheap remote key. + * @param [out] expected_data What should the memheap contain at the given + * address after the operation (also can be used + * as a source/destination data). + */ + typedef void (test_ucp_memheap::* nonblocking_send_func_t)(entity *e, + size_t max_size, + void *memheap_addr, + ucp_rkey_h rkey, + std::string& expected_data); + + static std::vector enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls); + + +protected: + const static size_t DEFAULT_SIZE = 0; + const static int DEFAULT_ITERS = 0; + + void test_blocking_xfer(blocking_send_func_t send, size_t len, int max_iters, + size_t alignment, bool malloc_allocate, bool is_ep_flush); + + void test_nonblocking_implicit_stream_xfer(nonblocking_send_func_t send, + size_t len, int max_iters, + size_t alignment, bool malloc_allocate, + bool is_ep_flush); +}; + + +#endif diff --git a/test/gtest/ucp/test_ucp_mmap.cc b/test/gtest/ucp/test_ucp_mmap.cc new file mode 100644 index 0000000..04e2252 --- /dev/null +++ b/test/gtest/ucp/test_ucp_mmap.cc @@ -0,0 +1,390 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_memheap.h" +extern "C" { +#include +#include +#include +} + +class test_ucp_mmap : public test_ucp_memheap { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features |= UCP_FEATURE_RMA; + return params; + } + + static int rand_flags() { + if ((ucs::rand() % 2) == 0) { + return 0; + } else { + return UCP_MEM_MAP_NONBLOCK; + } + } + +protected: + bool resolve_rma(entity *e, ucp_rkey_h rkey); + bool resolve_amo(entity *e, ucp_rkey_h rkey); + bool resolve_rma_bw(entity *e, ucp_rkey_h rkey); + void test_length0(unsigned flags); + void test_rkey_management(entity *e, ucp_mem_h memh, bool is_dummy); +}; + +bool test_ucp_mmap::resolve_rma(entity *e, ucp_rkey_h rkey) +{ + ucs_status_t status; + + { + scoped_log_handler slh(hide_errors_logger); + status = UCP_RKEY_RESOLVE(rkey, e->ep(), rma); + } + + if (status == UCS_OK) { + EXPECT_NE(UCP_NULL_LANE, rkey->cache.rma_lane); + return true; + } else if (status == UCS_ERR_UNREACHABLE) { + EXPECT_EQ(UCP_NULL_LANE, rkey->cache.rma_lane); + return false; + } else { + UCS_TEST_ABORT("Invalid status from UCP_RKEY_RESOLVE"); + } +} + +bool test_ucp_mmap::resolve_amo(entity *e, ucp_rkey_h rkey) +{ + ucs_status_t status; + + { + scoped_log_handler slh(hide_errors_logger); + status = UCP_RKEY_RESOLVE(rkey, e->ep(), amo); + } + + if (status == UCS_OK) { + EXPECT_NE(UCP_NULL_LANE, rkey->cache.amo_lane); + return true; + } else if (status == UCS_ERR_UNREACHABLE) { + EXPECT_EQ(UCP_NULL_LANE, rkey->cache.amo_lane); + return false; + } else { + UCS_TEST_ABORT("Invalid status from UCP_RKEY_RESOLVE"); + } +} + +bool test_ucp_mmap::resolve_rma_bw(entity *e, ucp_rkey_h rkey) +{ + ucp_ep_config_t *ep_config = ucp_ep_config(e->ep()); + ucp_lane_index_t lane; + uct_rkey_t uct_rkey; + + lane = ucp_rkey_find_rma_lane(e->ucph(), ep_config, UCS_MEMORY_TYPE_HOST, + ep_config->tag.rndv.get_zcopy_lanes, rkey, 0, + &uct_rkey); + if (lane != UCP_NULL_LANE) { + return true; + } else { + return false; + } +} + +void test_ucp_mmap::test_rkey_management(entity *e, ucp_mem_h memh, bool is_dummy) +{ + size_t rkey_size; + void *rkey_buffer; + ucs_status_t status; + + /* Some transports don't support memory registration, so the memory + * can be inaccessible remotely. But it should always be possible + * to pack/unpack a key, even if empty. */ + status = ucp_rkey_pack(e->ucph(), memh, &rkey_buffer, &rkey_size); + if (status == UCS_ERR_UNSUPPORTED && !is_dummy) { + return; + } + ASSERT_UCS_OK(status); + + EXPECT_EQ(ucp_rkey_packed_size(e->ucph(), memh->md_map), rkey_size); + + /* Unpack remote key buffer */ + ucp_rkey_h rkey; + status = ucp_ep_rkey_unpack(e->ep(), rkey_buffer, &rkey); + if (status == UCS_ERR_UNREACHABLE && !is_dummy) { + ucp_rkey_buffer_release(rkey_buffer); + return; + } + ASSERT_UCS_OK(status); + + /* Test ucp_rkey_packed_md_map() */ + EXPECT_EQ(rkey->md_map, ucp_rkey_packed_md_map(rkey_buffer)); + + bool have_rma = resolve_rma(e, rkey); + bool have_amo = resolve_amo(e, rkey); + bool have_rma_bw = resolve_rma_bw(e, rkey); + + /* Test that lane resolution on the remote key returns consistent results */ + for (int i = 0; i < 10; ++i) { + switch (ucs::rand() % 3) { + case 0: + EXPECT_EQ(have_rma, resolve_rma(e, rkey)); + break; + case 1: + EXPECT_EQ(have_amo, resolve_amo(e, rkey)); + break; + case 2: + EXPECT_EQ(have_rma_bw, resolve_rma_bw(e, rkey)); + break; + } + } + + /* Test obtaining direct-access pointer */ + void *ptr; + status = ucp_rkey_ptr(rkey, (uint64_t)memh->address, &ptr); + if (status == UCS_OK) { + EXPECT_EQ(0, memcmp(memh->address, ptr, memh->length)); + } else { + EXPECT_EQ(UCS_ERR_UNREACHABLE, status); + } + + ucp_rkey_destroy(rkey); + ucp_rkey_buffer_release(rkey_buffer); +} + + +UCS_TEST_P(test_ucp_mmap, alloc) { + ucs_status_t status; + bool is_dummy; + + sender().connect(&sender(), get_ep_params()); + + for (int i = 0; i < 1000 / ucs::test_time_multiplier(); ++i) { + size_t size = ucs::rand() % (UCS_MBYTE); + + ucp_mem_h memh; + ucp_mem_map_params_t params; + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.address = NULL; + params.length = size; + params.flags = rand_flags() | UCP_MEM_MAP_ALLOCATE; + + status = ucp_mem_map(sender().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(status); + + is_dummy = (size == 0); + test_rkey_management(&sender(), memh, is_dummy); + + status = ucp_mem_unmap(sender().ucph(), memh); + ASSERT_UCS_OK(status); + } +} + +UCS_TEST_P(test_ucp_mmap, reg) { + + ucs_status_t status; + bool is_dummy; + + sender().connect(&sender(), get_ep_params()); + + for (int i = 0; i < 1000 / ucs::test_time_multiplier(); ++i) { + size_t size = ucs::rand() % (UCS_MBYTE); + + void *ptr = malloc(size); + ucs::fill_random(ptr, size); + + ucp_mem_h memh; + ucp_mem_map_params_t params; + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.address = ptr; + params.length = size; + params.flags = rand_flags(); + + status = ucp_mem_map(sender().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(status); + + is_dummy = (size == 0); + test_rkey_management(&sender(), memh, is_dummy); + + status = ucp_mem_unmap(sender().ucph(), memh); + ASSERT_UCS_OK(status); + + free(ptr); + } +} + +void test_ucp_mmap::test_length0(unsigned flags) +{ + ucs_status_t status; + int buf_num = 2; + ucp_mem_h memh[buf_num]; + int dummy[1]; + ucp_mem_map_params_t params; + int i; + + sender().connect(&sender(), get_ep_params()); + + /* Check that ucp_mem_map accepts any value for buffer if size is 0 and + * UCP_MEM_FLAG_ZERO_REG flag is passed to it. */ + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.address = NULL; + params.length = 0; + params.flags = rand_flags() | flags; + + status = ucp_mem_map(sender().ucph(), ¶ms, &memh[0]); + ASSERT_UCS_OK(status); + + params.address = dummy; + status = ucp_mem_map(sender().ucph(), ¶ms, &memh[1]); + ASSERT_UCS_OK(status); + + for (i = 0; i < buf_num; i++) { + test_rkey_management(&sender(), memh[i], true); + status = ucp_mem_unmap(sender().ucph(), memh[i]); + ASSERT_UCS_OK(status); + } +} + +UCS_TEST_P(test_ucp_mmap, reg0) { + test_length0(0); +} + +UCS_TEST_P(test_ucp_mmap, alloc0) { + test_length0(UCP_MEM_MAP_ALLOCATE); +} + +UCS_TEST_P(test_ucp_mmap, alloc_advise) { + ucs_status_t status; + bool is_dummy; + + sender().connect(&sender(), get_ep_params()); + + size_t size = 128 * UCS_MBYTE; + + ucp_mem_h memh; + ucp_mem_map_params_t params; + ucp_mem_attr_t attr; + ucp_mem_advise_params_t advise_params; + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.address = NULL; + params.length = size; + params.flags = UCP_MEM_MAP_NONBLOCK | UCP_MEM_MAP_ALLOCATE; + + status = ucp_mem_map(sender().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(status); + + attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS | UCP_MEM_ATTR_FIELD_LENGTH; + status = ucp_mem_query(memh, &attr); + ASSERT_UCS_OK(status); + EXPECT_GE(attr.length, size); + + advise_params.field_mask = UCP_MEM_ADVISE_PARAM_FIELD_ADDRESS | + UCP_MEM_ADVISE_PARAM_FIELD_LENGTH | + UCP_MEM_ADVISE_PARAM_FIELD_ADVICE; + advise_params.address = attr.address; + advise_params.length = size; + advise_params.advice = UCP_MADV_WILLNEED; + status = ucp_mem_advise(sender().ucph(), memh, &advise_params); + ASSERT_UCS_OK(status); + + is_dummy = (size == 0); + test_rkey_management(&sender(), memh, is_dummy); + + status = ucp_mem_unmap(sender().ucph(), memh); + ASSERT_UCS_OK(status); +} + +UCS_TEST_P(test_ucp_mmap, reg_advise) { + + ucs_status_t status; + bool is_dummy; + + sender().connect(&sender(), get_ep_params()); + + size_t size = 128 * UCS_MBYTE; + + void *ptr = malloc(size); + ucs::fill_random(ptr, size); + + ucp_mem_h memh; + ucp_mem_map_params_t params; + ucp_mem_attr_t mem_attr; + ucp_mem_advise_params_t advise_params; + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.address = ptr; + params.length = size; + params.flags = UCP_MEM_MAP_NONBLOCK; + + status = ucp_mem_map(sender().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(status); + + mem_attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS; + status = ucp_mem_query(memh, &mem_attr); + ASSERT_UCS_OK(status); + + advise_params.field_mask = UCP_MEM_ADVISE_PARAM_FIELD_ADDRESS | + UCP_MEM_ADVISE_PARAM_FIELD_LENGTH | + UCP_MEM_ADVISE_PARAM_FIELD_ADVICE; + advise_params.address = mem_attr.address; + advise_params.length = size; + advise_params.advice = UCP_MADV_WILLNEED; + status = ucp_mem_advise(sender().ucph(), memh, &advise_params); + ASSERT_UCS_OK(status); + is_dummy = (size == 0); + test_rkey_management(&sender(), memh, is_dummy); + + status = ucp_mem_unmap(sender().ucph(), memh); + ASSERT_UCS_OK(status); + + free(ptr); +} + +UCS_TEST_P(test_ucp_mmap, fixed) { + ucs_status_t status; + bool is_dummy; + + sender().connect(&sender(), get_ep_params()); + + for (int i = 0; i < 1000 / ucs::test_time_multiplier(); ++i) { + size_t size = (i + 1) * ((i % 2) ? 1000 : 1); + void *ptr = ucs::mmap_fixed_address(); + + ucp_mem_h memh; + ucp_mem_map_params_t params; + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.address = ptr; + params.length = size; + params.flags = UCP_MEM_MAP_FIXED | UCP_MEM_MAP_ALLOCATE; + + status = ucp_mem_map(sender().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(status); + EXPECT_EQ(memh->address, ptr); + EXPECT_GE(memh->length, size); + + is_dummy = (size == 0); + test_rkey_management(&sender(), memh, is_dummy); + + status = ucp_mem_unmap(sender().ucph(), memh); + ASSERT_UCS_OK(status); + } +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_mmap) diff --git a/test/gtest/ucp/test_ucp_peer_failure.cc b/test/gtest/ucp/test_ucp_peer_failure.cc new file mode 100644 index 0000000..4ef68c3 --- /dev/null +++ b/test/gtest/ucp/test_ucp_peer_failure.cc @@ -0,0 +1,440 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" +#include "ucp_datatype.h" + +extern "C" { +#include /* for testing EP RNDV configuration */ +#include /* for debug */ +#include /* for testing memory consumption */ +} + +class test_ucp_peer_failure : public ucp_test { +public: + test_ucp_peer_failure(); + + static std::vector + enum_test_params(const ucp_params_t& ctx_params, const std::string& name, + const std::string& test_case_name, const std::string& tls); + + ucp_ep_params_t get_ep_params(); + +protected: + enum { + TEST_TAG = UCS_BIT(0), + TEST_RMA = UCS_BIT(1), + FAIL_IMM = UCS_BIT(2) + }; + + enum { + STABLE_EP_INDEX, + FAILING_EP_INDEX + }; + + typedef ucs::handle mem_handle_t; + + void set_timeouts(); + static void err_cb(void *arg, ucp_ep_h ep, ucs_status_t status); + ucp_ep_h stable_sender(); + ucp_ep_h failing_sender(); + entity& stable_receiver(); + entity& failing_receiver(); + void *send_nb(ucp_ep_h ep, ucp_rkey_h rkey); + void *recv_nb(entity& e); + void fail_receiver(); + void smoke_test(bool stable_pair); + static void unmap_memh(ucp_mem_h memh, ucp_context_h context); + void get_rkey(ucp_ep_h ep, entity& dst, mem_handle_t& memh, + ucs::handle& rkey); + void set_rkeys(); + static void send_cb(void *request, ucs_status_t status); + static void recv_cb(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info); + + virtual void cleanup(); + + void do_test(size_t msg_size, int pre_msg_count, bool force_close, + bool request_must_fail); + + size_t m_err_count; + ucs_status_t m_err_status; + std::string m_sbuf, m_rbuf; + mem_handle_t m_stable_memh, m_failing_memh; + ucs::handle m_stable_rkey, m_failing_rkey; + ucs::ptr_vector m_env; +}; + +UCP_INSTANTIATE_TEST_CASE(test_ucp_peer_failure) + + +test_ucp_peer_failure::test_ucp_peer_failure() : m_err_count(0), m_err_status(UCS_OK) { + ucs::fill_random(m_sbuf); + set_timeouts(); +} + +std::vector +test_ucp_peer_failure::enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) +{ + std::vector result; + + ucp_params_t params = ucp_test::get_ctx_params(); + + params.field_mask |= UCP_PARAM_FIELD_FEATURES; + + params.features = UCP_FEATURE_TAG; + generate_test_params_variant(params, name, test_case_name + "/tag", tls, + TEST_TAG, result); + generate_test_params_variant(params, name, test_case_name + "/tag_fail_imm", + tls, TEST_TAG | FAIL_IMM, result); + + params.features = UCP_FEATURE_RMA; + generate_test_params_variant(params, name, test_case_name + "/rma", tls, + TEST_RMA, result); + generate_test_params_variant(params, name, test_case_name + "/rma_fail_imm", + tls, TEST_RMA | FAIL_IMM, result); + + return result; +} + +ucp_ep_params_t test_ucp_peer_failure::get_ep_params() { + ucp_ep_params_t params; + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | + UCP_EP_PARAM_FIELD_ERR_HANDLER; + params.err_mode = UCP_ERR_HANDLING_MODE_PEER; + params.err_handler.cb = err_cb; + params.err_handler.arg = reinterpret_cast(this); + return params; +} + +void test_ucp_peer_failure::set_timeouts() { + /* Set small TL timeouts to reduce testing time */ + m_env.push_back(new ucs::scoped_setenv("UCX_RC_TIMEOUT", "10ms")); + m_env.push_back(new ucs::scoped_setenv("UCX_RC_RNR_TIMEOUT", "10ms")); + m_env.push_back(new ucs::scoped_setenv("UCX_RC_RETRY_COUNT", "2")); +} + +void test_ucp_peer_failure::err_cb(void *arg, ucp_ep_h ep, ucs_status_t status) { + test_ucp_peer_failure *self = reinterpret_cast(arg); + EXPECT_EQ(UCS_ERR_ENDPOINT_TIMEOUT, status); + self->m_err_status = status; + ++self->m_err_count; +} + +ucp_ep_h test_ucp_peer_failure::stable_sender() { + return sender().ep(0, STABLE_EP_INDEX); +} + +ucp_ep_h test_ucp_peer_failure::failing_sender() { + return sender().ep(0, FAILING_EP_INDEX); +} + +ucp_test::entity& test_ucp_peer_failure::stable_receiver() { + return m_entities.at(m_entities.size() - 2); +} + +ucp_test::entity& test_ucp_peer_failure::failing_receiver() { + return m_entities.at(m_entities.size() - 1); +} + +void *test_ucp_peer_failure::send_nb(ucp_ep_h ep, ucp_rkey_h rkey) { + if (GetParam().variant & TEST_TAG) { + return ucp_tag_send_nb(ep, &m_sbuf[0], m_sbuf.size(), DATATYPE, 0, + send_cb); + } else if (GetParam().variant & TEST_RMA) { + return ucp_put_nb(ep, &m_sbuf[0], m_sbuf.size(), (uintptr_t)&m_rbuf[0], + rkey, send_cb); + } else { + ucs_fatal("invalid test case"); + } +} + +void *test_ucp_peer_failure::recv_nb(entity& e) { + ucs_assert(m_rbuf.size() >= m_sbuf.size()); + if (GetParam().variant & TEST_TAG) { + return ucp_tag_recv_nb(e.worker(), &m_rbuf[0], m_rbuf.size(), DATATYPE, 0, + 0, recv_cb); + } else if (GetParam().variant & TEST_RMA) { + return NULL; + } else { + ucs_fatal("invalid test case"); + } +} + +void test_ucp_peer_failure::fail_receiver() { + /* TODO: need to handle non-empty TX window in UD EP destructor", + * see debug message (ud_ep.c:220) + * ucs_debug("ep=%p id=%d conn_id=%d has %d unacked packets", + * self, self->ep_id, self->conn_id, + * (int)ucs_queue_length(&self->tx.window)); + */ + // TODO use force-close to close connections + flush_worker(failing_receiver()); + m_failing_memh.reset(); + failing_receiver().cleanup(); +} + +void test_ucp_peer_failure::smoke_test(bool stable_pair) { + void *rreq = recv_nb(stable_pair ? stable_receiver() : failing_receiver()); + void *sreq = send_nb(stable_pair ? stable_sender() : failing_sender(), + stable_pair ? m_stable_rkey : m_failing_rkey); + wait(sreq); + wait(rreq); + EXPECT_EQ(m_sbuf, m_rbuf); +} + +void test_ucp_peer_failure::unmap_memh(ucp_mem_h memh, ucp_context_h context) +{ + ucs_status_t status = ucp_mem_unmap(context, memh); + if (status != UCS_OK) { + ucs_warn("failed to unmap memory: %s", ucs_status_string(status)); + } +} + +void test_ucp_peer_failure::get_rkey(ucp_ep_h ep, entity& dst, mem_handle_t& memh, + ucs::handle& rkey) { + ucp_mem_map_params_t params; + + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH; + params.address = &m_rbuf[0]; + params.length = m_rbuf.size(); + + ucp_mem_h ucp_memh; + ucs_status_t status = ucp_mem_map(dst.ucph(), ¶ms, &ucp_memh); + ASSERT_UCS_OK(status); + memh.reset(ucp_memh, unmap_memh, dst.ucph()); + + void *rkey_buffer; + size_t rkey_buffer_size; + status = ucp_rkey_pack(dst.ucph(), memh, &rkey_buffer, &rkey_buffer_size); + ASSERT_UCS_OK(status); + + ucp_rkey_h ucp_rkey; + status = ucp_ep_rkey_unpack(ep, rkey_buffer, &ucp_rkey); + ASSERT_UCS_OK(status); + rkey.reset(ucp_rkey, ucp_rkey_destroy); + + ucp_rkey_buffer_release(rkey_buffer); +} + +void test_ucp_peer_failure::set_rkeys() { + + if (GetParam().variant & TEST_RMA) { + get_rkey(failing_sender(), failing_receiver(), m_failing_memh, + m_failing_rkey); + get_rkey(stable_sender(), stable_receiver(), m_stable_memh, + m_stable_rkey); + } +} + +void test_ucp_peer_failure::send_cb(void *request, ucs_status_t status) +{ +} + +void test_ucp_peer_failure::recv_cb(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) +{ +} + +void test_ucp_peer_failure::cleanup() { + m_failing_rkey.reset(); + m_stable_rkey.reset(); + m_failing_memh.reset(); + m_stable_memh.reset(); + ucp_test::cleanup(); +} + +void test_ucp_peer_failure::do_test(size_t msg_size, int pre_msg_count, + bool force_close, bool request_must_fail) +{ + skip_loopback(); + + m_sbuf.resize(msg_size); + m_rbuf.resize(msg_size); + + /* connect 2 ep's from sender() to 2 receiver entities */ + create_entity(); + sender().connect(&stable_receiver(), get_ep_params(), STABLE_EP_INDEX); + sender().connect(&failing_receiver(), get_ep_params(), FAILING_EP_INDEX); + + set_rkeys(); + + /* Since we don't want to test peer failure on a stable pair + * and don't expect EP timeout error on those EPs, + * run traffic on a stable pair to connect it */ + smoke_test(true); + + if (!(GetParam().variant & FAIL_IMM)) { + /* if not fail immediately, run traffic on failing pair to connect it */ + smoke_test(false); + } + + /* put some sends on the failing pair */ + std::vector sreqs_pre; + for (int i = 0; i < pre_msg_count; ++i) { + progress(); + void *req = send_nb(failing_sender(), m_failing_rkey); + ASSERT_FALSE(UCS_PTR_IS_ERR(req)); + if (UCS_PTR_IS_PTR(req)) { + sreqs_pre.push_back(req); + } + } + + EXPECT_EQ(UCS_OK, m_err_status); + + /* Since UCT/UD EP has a SW implementation of reliablity on which peer + * failure mechanism is based, we should set small UCT/UD EP timeout + * for UCT/UD EPs for sender's UCP EP to reduce testing time */ + double prev_ib_ud_timeout = sender().set_ib_ud_timeout(3.); + + { + scoped_log_handler slh(wrap_errors_logger); + + fail_receiver(); + + void *sreq = send_nb(failing_sender(), m_failing_rkey); + + while (!m_err_count) { + progress(); + } + EXPECT_NE(UCS_OK, m_err_status); + + if (UCS_PTR_IS_PTR(sreq)) { + /* The request may either succeed or fail, even though the data is + * not * delivered - depends on when the error is detected on sender + * side and if zcopy/bcopy protocol is used. In any case, the + * request must complete, and all resources have to be released. + */ + ucs_status_t status = ucp_request_check_status(sreq); + EXPECT_NE(UCS_INPROGRESS, status); + if (request_must_fail) { + EXPECT_EQ(m_err_status, status); + } else { + EXPECT_TRUE((m_err_status == status) || (UCS_OK == status)); + } + ucp_request_release(sreq); + } + + /* Additional sends must fail */ + void *sreq2 = send_nb(failing_sender(), m_failing_rkey); + EXPECT_FALSE(UCS_PTR_IS_PTR(sreq2)); + EXPECT_EQ(m_err_status, UCS_PTR_STATUS(sreq2)); + + if (force_close) { + unsigned allocd_eps_before = + ucs_strided_alloc_inuse_count(&sender().worker()->ep_alloc); + + ucp_ep_h ep = sender().revoke_ep(0, FAILING_EP_INDEX); + + m_failing_rkey.reset(); + + void *creq = ucp_ep_close_nb(ep, UCP_EP_CLOSE_MODE_FORCE); + wait(creq); + + unsigned allocd_eps_after = + ucs_strided_alloc_inuse_count(&sender().worker()->ep_alloc); + + if (!(GetParam().variant & FAIL_IMM)) { + EXPECT_LT(allocd_eps_after, allocd_eps_before); + } + } + + /* release requests */ + while (!sreqs_pre.empty()) { + void *req = sreqs_pre.back(); + sreqs_pre.pop_back(); + EXPECT_NE(UCS_INPROGRESS, ucp_request_test(req, NULL)); + ucp_request_release(req); + } + } + + /* Since we won't test peer failure anymore, reset UCT/UD EP timeout to the + * default value to avoid possible UD EP timeout errors under high load */ + sender().set_ib_ud_timeout(prev_ib_ud_timeout); + + /* Check workability of stable pair */ + smoke_test(true); + + /* Check that TX polling is working well */ + while (sender().progress()); + + /* Destroy rkeys before destroying the worker (which also destroys the + * endpoints) */ + m_failing_rkey.reset(); + m_stable_rkey.reset(); + + /* When all requests on sender are done we need to prevent LOCAL_FLUSH + * in test teardown. Receiver is killed and doesn't respond on FC requests + */ + sender().destroy_worker(); +} + +UCS_TEST_P(test_ucp_peer_failure, basic) { + do_test(UCS_KBYTE, /* msg_size */ + 0, /* pre_msg_cnt */ + false, /* force_close */ + false /* must_fail */); +} + +UCS_TEST_P(test_ucp_peer_failure, rndv_disable) { + const size_t size_max = std::numeric_limits::max(); + + sender().connect(&receiver(), get_ep_params(), STABLE_EP_INDEX); + EXPECT_EQ(size_max, ucp_ep_config(sender().ep())->tag.rndv.am_thresh); + EXPECT_EQ(size_max, ucp_ep_config(sender().ep())->tag.rndv.rma_thresh); + EXPECT_EQ(size_max, ucp_ep_config(sender().ep())->tag.rndv_send_nbr.am_thresh); + EXPECT_EQ(size_max, ucp_ep_config(sender().ep())->tag.rndv_send_nbr.rma_thresh); +} + +UCS_TEST_P(test_ucp_peer_failure, zcopy, "ZCOPY_THRESH=1023") { + do_test(UCS_KBYTE, /* msg_size */ + 0, /* pre_msg_cnt */ + false, /* force_close */ + true /* must_fail */); +} + +UCS_TEST_P(test_ucp_peer_failure, bcopy_multi, "SEG_SIZE?=512", "RC_TM_ENABLE?=n") { + do_test(UCS_KBYTE, /* msg_size */ + 0, /* pre_msg_cnt */ + false, /* force_close */ + false /* must_fail */); +} + +UCS_TEST_P(test_ucp_peer_failure, force_close, "RC_FC_ENABLE?=n") { + do_test(16000, /* msg_size */ + 1000, /* pre_msg_cnt */ + true, /* force_close */ + false /* must_fail */); +} + +UCS_TEST_SKIP_COND_P(test_ucp_peer_failure, disable_sync_send, + !(GetParam().variant & TEST_TAG)) { + const size_t max_size = UCS_MBYTE; + std::vector buf(max_size, 0); + void *req; + + sender().connect(&receiver(), get_ep_params()); + + /* Make sure API is disabled for any size and data type */ + for (size_t size = 1; size <= max_size; size *= 2) { + req = ucp_tag_send_sync_nb(sender().ep(), buf.data(), size, DATATYPE, + 0x111337, NULL); + EXPECT_FALSE(UCS_PTR_IS_PTR(req)); + EXPECT_EQ(UCS_ERR_UNSUPPORTED, UCS_PTR_STATUS(req)); + + ucp::data_type_desc_t dt_desc(DATATYPE_IOV, buf.data(), size); + req = ucp_tag_send_sync_nb(sender().ep(), dt_desc.buf(), dt_desc.count(), + dt_desc.dt(), 0x111337, NULL); + EXPECT_FALSE(UCS_PTR_IS_PTR(req)); + EXPECT_EQ(UCS_ERR_UNSUPPORTED, UCS_PTR_STATUS(req)); + } +} diff --git a/test/gtest/ucp/test_ucp_perf.cc b/test/gtest/ucp/test_ucp_perf.cc new file mode 100644 index 0000000..d1ac194 --- /dev/null +++ b/test/gtest/ucp/test_ucp_perf.cc @@ -0,0 +1,195 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "ucp_test.h" + +#include + + +#define MB pow(1024.0, -2) +#define UCP_ARM_PERF_TEST_MULTIPLIER 2 +class test_ucp_perf : public ucp_test, public test_perf { +protected: + virtual void init() { + test_base::init(); /* Skip entities creation in ucp_test */ + ucs_log_push_handler(log_handler); + } + + virtual void cleanup() { + ucs_log_pop_handler(); + test_base::cleanup(); + } + + static ucs_log_func_rc_t + log_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) { + // Ignore errors that transport cannot reach peer + if (level == UCS_LOG_LEVEL_ERROR) { + std::string err_str = format_message(message, ap); + if (strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNREACHABLE)) || + strstr(err_str.c_str(), ucs_status_string(UCS_ERR_UNSUPPORTED))) { + UCS_TEST_MESSAGE << err_str; + return UCS_LOG_FUNC_RC_STOP; + } + } + return UCS_LOG_FUNC_RC_CONTINUE; + } + + const static test_spec tests[]; +}; + + +const test_perf::test_spec test_ucp_perf::tests[] = +{ + { "tag latency", "usec", + UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0, + 0 }, + + { "tag iov latency", "usec", + UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_PINGPONG, + UCP_PERF_DATATYPE_IOV, 8192, 3, { 1024, 1024, 1024 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 60.0, + 0 }, + + { "tag mr", "Mpps", + UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0, + 0 }, + + { "tag sync mr", "Mpps", + UCX_PERF_API_UCP, UCX_PERF_CMD_TAG_SYNC, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.05, 100.0, 0}, + + { "tag wild mr", "Mpps", + UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 100.0, + UCX_PERF_TEST_FLAG_TAG_WILDCARD }, + + { "tag bw", "MB/sec", + UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 }, + + { "tag bw_zcopy_multi", "MB/sec", + UCX_PERF_API_UCP, UCX_PERF_CMD_TAG, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_LAST, 0, 1, { 2048 }, 16, 100000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 100.0, 100000.0 }, + + { "put latency", "usec", + UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, + 0 }, + + { "put rate", "Mpps", + UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 2000000lu, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.5, 100.0, + 0 }, + + { "put bw", "MB/sec", + UCX_PERF_API_UCP, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 2048 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, + 0 }, + + { "get latency", "usec", + UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, + 0 }, + + { "get bw", "MB/sec", + UCX_PERF_API_UCP, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, + 0 }, + + { "stream latency", "usec", + UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, 0 }, + + { "stream bw", "MB/sec", + UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, 0 }, + + { "stream recv-data latency", "usec", + UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_PINGPONG, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, + UCX_PERF_TEST_FLAG_STREAM_RECV_DATA }, + + { "stream recv-data bw", "MB/sec", + UCX_PERF_API_UCP, UCX_PERF_CMD_STREAM, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 16384 }, 1, 10000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 200.0, 100000.0, + UCX_PERF_TEST_FLAG_STREAM_RECV_DATA }, + + { "atomic add rate", "Mpps", + UCX_PERF_API_UCP, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 1000000lu, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.1, 500.0, + 0 }, + + { "atomic fadd latency", "usec", + UCX_PERF_API_UCP, UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, + 0 }, + + { "atomic swap latency", "usec", + UCX_PERF_API_UCP, UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, + 0 }, + + { "atomic cswap latency", "usec", + UCX_PERF_API_UCP, UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCP_PERF_DATATYPE_CONTIG, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.001, 30.0, + 0 }, + + { NULL } +}; + + +UCS_TEST_P(test_ucp_perf, envelope) { + bool check_perf = true; + size_t max_iter = std::numeric_limits::max(); + + if (has_transport("tcp")) { + check_perf = false; + max_iter = 1000lu; + } + + std::stringstream ss; + ss << GetParam(); + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv tls("UCX_TLS", ss.str().c_str()); + ucs::scoped_setenv warn_invalid("UCX_WARN_INVALID_CONFIG", "no"); + + /* Run all tests */ + for (const test_spec *test_iter = tests; test_iter->title != NULL; ++test_iter) { + test_spec test = *test_iter; + + if (ucs_arch_get_cpu_model() == UCS_CPU_MODEL_ARM_AARCH64) { + test.max *= UCP_ARM_PERF_TEST_MULTIPLIER; + test.min /= UCP_ARM_PERF_TEST_MULTIPLIER; + } + test.iters = ucs_min(test.iters, max_iter); + run_test(test, 0, check_perf, "", ""); + } +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_perf) diff --git a/test/gtest/ucp/test_ucp_rma.cc b/test/gtest/ucp/test_ucp_rma.cc new file mode 100644 index 0000000..e0e0079 --- /dev/null +++ b/test/gtest/ucp/test_ucp_rma.cc @@ -0,0 +1,308 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* Copyright (c) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_memheap.h" +#include + + +class test_ucp_rma : public test_ucp_memheap { +private: + static void send_completion(void *request, ucs_status_t status){} +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features |= UCP_FEATURE_RMA; + return params; + } + + std::vector + static enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) + { + std::vector result; + generate_test_params_variant(ctx_params, name, test_case_name, tls, 0, + result); + generate_test_params_variant(ctx_params, name, test_case_name + "/map_nb", + tls, UCP_MEM_MAP_NONBLOCK, result); + return result; + } + + void nonblocking_put_nbi(entity *e, size_t max_size, + void *memheap_addr, + ucp_rkey_h rkey, + std::string& expected_data) + { + ucs_status_t status; + status = ucp_put_nbi(e->ep(), &expected_data[0], expected_data.length(), + (uintptr_t)memheap_addr, rkey); + ASSERT_UCS_OK_OR_INPROGRESS(status); + } + + void nonblocking_put_nb(entity *e, size_t max_size, + void *memheap_addr, + ucp_rkey_h rkey, + std::string& expected_data) + { + void *status; + + status = ucp_put_nb(e->ep(), &expected_data[0], expected_data.length(), + (uintptr_t)memheap_addr, rkey, send_completion); + ASSERT_UCS_PTR_OK(status); + if (UCS_PTR_IS_PTR(status)) { + wait(status); + } + } + + void nonblocking_get_nbi(entity *e, size_t max_size, + void *memheap_addr, + ucp_rkey_h rkey, + std::string& expected_data) + { + ucs_status_t status; + + ucs::fill_random(memheap_addr, ucs_min(max_size, 16384U)); + status = ucp_get_nbi(e->ep(), (void *)&expected_data[0], expected_data.length(), + (uintptr_t)memheap_addr, rkey); + ASSERT_UCS_OK_OR_INPROGRESS(status); + } + + void nonblocking_get_nb(entity *e, size_t max_size, + void *memheap_addr, + ucp_rkey_h rkey, + std::string& expected_data) + { + void *status; + + ucs::fill_random(memheap_addr, ucs_min(max_size, 16384U)); + status = ucp_get_nb(e->ep(), &expected_data[0], expected_data.length(), + (uintptr_t)memheap_addr, rkey, send_completion); + ASSERT_UCS_PTR_OK(status); + if (UCS_PTR_IS_PTR(status)) { + wait(status); + } + } + + void test_message_sizes(blocking_send_func_t func, size_t *msizes, int iters, int is_nbi); +}; + +void test_ucp_rma::test_message_sizes(blocking_send_func_t func, size_t *msizes, int iters, int is_nbi) +{ + int i; + + for (i = 0; msizes[i] > 0; i++) { + if (is_nbi) { + test_nonblocking_implicit_stream_xfer(static_cast(func), + msizes[i], i, 1, false, false); + } else { + test_blocking_xfer(func, msizes[i], iters, 1, false, false); + } + } +} + +UCS_TEST_P(test_ucp_rma, nbi_small) { + size_t sizes[] = { 8, 24, 96, 120, 250, 0}; + + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_put_nbi), + sizes, 1000, 1); + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_get_nbi), + sizes, 1000, 1); +} + +UCS_TEST_P(test_ucp_rma, nbi_med) { + size_t sizes[] = { 1000, 3000, 9000, 17300, 31000, 99000, 130000, 0}; + + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_put_nbi), + sizes, 100, 1); + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_get_nbi), + sizes, 100, 1); +} + +UCS_TEST_SKIP_COND_P(test_ucp_rma, nbi_large, RUNNING_ON_VALGRIND) { + size_t sizes[] = { 1 * UCS_MBYTE, 3 * UCS_MBYTE, 9 * UCS_MBYTE, + 17 * UCS_MBYTE, 32 * UCS_MBYTE, 0}; + + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_put_nbi), + sizes, 3, 1); + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_get_nbi), + sizes, 3, 1); +} + +UCS_TEST_P(test_ucp_rma, nb_small) { + size_t sizes[] = { 8, 24, 96, 120, 250, 0}; + + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_put_nb), + sizes, 1000, 1); + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_get_nb), + sizes, 1000, 1); +} + +UCS_TEST_P(test_ucp_rma, nb_med) { + size_t sizes[] = { 1000, 3000, 9000, 17300, 31000, 99000, 130000, 0}; + + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_put_nb), + sizes, 100, 1); + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_get_nb), + sizes, 100, 1); +} + +UCS_TEST_SKIP_COND_P(test_ucp_rma, nb_large, RUNNING_ON_VALGRIND) { + size_t sizes[] = { 1 * UCS_MBYTE, 3 * UCS_MBYTE, 9 * UCS_MBYTE, + 17 * UCS_MBYTE, 32 * UCS_MBYTE, 0}; + + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_put_nb), + sizes, 3, 1); + test_message_sizes(static_cast(&test_ucp_rma::nonblocking_get_nb), + sizes, 3, 1); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_put_nbi_flush_worker) { + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_put_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, false); + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_put_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, false); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_put_nbi_flush_ep) { + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_put_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, true); + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_put_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, true); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_stream_put_nbi_flush_worker) { + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_put_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, false); + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_put_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, false); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_stream_put_nbi_flush_ep) { + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_put_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, true); + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_put_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, true); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_put_nb_flush_worker) { + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_put_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, false); + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_put_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, false); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_put_nb_flush_ep) { + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_put_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, true); + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_put_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, true); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_stream_put_nb_flush_worker) { + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_put_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, false); + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_put_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, false); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_stream_put_nb_flush_ep) { + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_put_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, true); + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_put_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, true); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_get_nbi_flush_worker) { + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_get_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, false); + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_get_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, false); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_get_nbi_flush_ep) { + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_get_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, true); + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_get_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, true); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_stream_get_nbi_flush_worker) { + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_get_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, false); + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_get_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, false); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_stream_get_nbi_flush_ep) { + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_get_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, true); + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_get_nbi), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, true); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_get_nb_flush_worker) { + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_get_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, false); + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_get_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, false); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_get_nb_flush_ep) { + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_get_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, true); + test_blocking_xfer(static_cast(&test_ucp_rma::nonblocking_get_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, true); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_stream_get_nb_flush_worker) { + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_get_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, false); + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_get_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, false); +} + +UCS_TEST_P(test_ucp_rma, nonblocking_stream_get_nb_flush_ep) { + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_get_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, false, true); + test_nonblocking_implicit_stream_xfer(static_cast(&test_ucp_rma::nonblocking_get_nb), + DEFAULT_SIZE, DEFAULT_ITERS, + 1, true, true); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_rma) diff --git a/test/gtest/ucp/test_ucp_rma_mt.cc b/test/gtest/ucp/test_ucp_rma_mt.cc new file mode 100644 index 0000000..120b035 --- /dev/null +++ b/test/gtest/ucp/test_ucp_rma_mt.cc @@ -0,0 +1,216 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ucp_test.h" + +#include + +#if _OPENMP +#include "omp.h" +#endif + +using namespace ucs; /* For vector serialization */ + +class test_ucp_rma_mt : public ucp_test { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features = UCP_FEATURE_RMA; + return params; + } + + void init() + { + ucp_test::init(); + sender().connect(&receiver(), get_ep_params()); + for (int i = 0; i < sender().get_num_workers(); i++) { + /* avoid deadlock for blocking rma */ + flush_worker(sender(), i); + } + } + + static void send_cb(void *req, ucs_status_t status) + { + } + + static std::vector enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) + { + std::vector result; + + generate_test_params_variant(ctx_params, name, test_case_name, tls, 0, + result, MULTI_THREAD_CONTEXT); + generate_test_params_variant(ctx_params, name, test_case_name, tls, 0, + result, MULTI_THREAD_WORKER); + return result; + } +}; + +UCS_TEST_P(test_ucp_rma_mt, put_get) { + ucs_status_t st; + uint64_t orig_data[MT_TEST_NUM_THREADS] GTEST_ATTRIBUTE_UNUSED_; + uint64_t target_data[MT_TEST_NUM_THREADS] GTEST_ATTRIBUTE_UNUSED_; + + ucp_mem_map_params_t params; + ucp_mem_h memh; + void *memheap = target_data; + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.address = memheap; + params.length = sizeof(uint64_t) * MT_TEST_NUM_THREADS; + params.flags = GetParam().variant; + + st = ucp_mem_map(receiver().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(st); + + void *rkey_buffer; + size_t rkey_buffer_size; + + st = ucp_rkey_pack(receiver().ucph(), memh, &rkey_buffer, &rkey_buffer_size); + ASSERT_UCS_OK(st); + + std::vector rkey; + rkey.resize(MT_TEST_NUM_THREADS); + + /* test parallel rkey unpack */ +#if _OPENMP && ENABLE_MT +#pragma omp parallel for + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + int worker_index = 0; + if (GetParam().thread_type == MULTI_THREAD_CONTEXT) { + worker_index = i; + } + ucs_status_t status = ucp_ep_rkey_unpack(sender().ep(worker_index), + rkey_buffer, &rkey[i]); + ASSERT_UCS_OK(status); + } +#endif + + ucp_rkey_buffer_release(rkey_buffer); + + /* test blocking PUT */ + + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + orig_data[i] = 0xdeadbeefdeadbeef + 10 * i; + target_data[i] = 0; + } + +#if _OPENMP && ENABLE_MT +#pragma omp parallel for + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + int worker_index = 0; + + if (GetParam().thread_type == MULTI_THREAD_CONTEXT) { + worker_index = i; + } + + void* req = ucp_put_nb(sender().ep(worker_index), &orig_data[i], + sizeof(uint64_t), (uintptr_t)((uint64_t*)memheap + i), + rkey[i], send_cb); + wait(req, worker_index); + + flush_worker(sender(), worker_index); + + EXPECT_EQ(orig_data[i], target_data[i]); + } +#endif + + /* test nonblocking PUT */ + + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + orig_data[i] = 0xdeadbeefdeadbeef + 10 * i; + target_data[i] = 0; + } + +#if _OPENMP && ENABLE_MT +#pragma omp parallel for + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + ucs_status_t status; + int worker_index = 0; + + if (GetParam().thread_type == MULTI_THREAD_CONTEXT) + worker_index = i; + + status = ucp_put_nbi(sender().ep(worker_index), &orig_data[i], sizeof(uint64_t), + (uintptr_t)((uint64_t*)memheap + i), rkey[i]); + ASSERT_UCS_OK_OR_INPROGRESS(status); + + flush_worker(sender(), worker_index); + + EXPECT_EQ(orig_data[i], target_data[i]); + } +#endif + + /* test blocking GET */ + + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + orig_data[i] = 0; + target_data[i] = 0xdeadbeefdeadbeef + 10 * i; + } + +#if _OPENMP && ENABLE_MT +#pragma omp parallel for + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + int worker_index = 0; + + if (GetParam().thread_type == MULTI_THREAD_CONTEXT) { + worker_index = i; + } + + void *req = ucp_get_nb(sender().ep(worker_index), &orig_data[i], + sizeof(uint64_t), (uintptr_t)((uint64_t*)memheap + i), + rkey[i], send_cb); + wait(req, worker_index); + + flush_worker(sender(), worker_index); + + EXPECT_EQ(orig_data[i], target_data[i]); + } +#endif + + /* test nonblocking GET */ + + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + orig_data[i] = 0; + target_data[i] = 0xdeadbeefdeadbeef + 10 * i; + } + +#if _OPENMP && ENABLE_MT +#pragma omp parallel for + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + ucs_status_t status; + int worker_index = 0; + + if (GetParam().thread_type == MULTI_THREAD_CONTEXT) + worker_index = i; + + status = ucp_get_nbi(sender().ep(worker_index), &orig_data[i], sizeof(uint64_t), + (uintptr_t)((uint64_t *)memheap + i), rkey[i]); + ASSERT_UCS_OK_OR_INPROGRESS(status); + + flush_worker(sender(), worker_index); + + EXPECT_EQ(orig_data[i], target_data[i]); + } +#endif + +#if _OPENMP && ENABLE_MT +#pragma omp parallel for + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + ucp_rkey_destroy(rkey[i]); + } +#endif + + st = ucp_mem_unmap(receiver().ucph(), memh); + ASSERT_UCS_OK(st); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_rma_mt) diff --git a/test/gtest/ucp/test_ucp_sockaddr.cc b/test/gtest/ucp/test_ucp_sockaddr.cc new file mode 100644 index 0000000..ea64441 --- /dev/null +++ b/test/gtest/ucp/test_ucp_sockaddr.cc @@ -0,0 +1,729 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ucp_test.h" +#include "common/test.h" +#include "ucp/ucp_test.h" + +#include +#include +#include +#include + +extern "C" { +#include +} + +#define UCP_INSTANTIATE_ALL_TEST_CASE(_test_case) \ + UCP_INSTANTIATE_TEST_CASE (_test_case) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, shm, "shm") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, dc_ud, "dc_x,ud_v,ud_x,mm") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, no_ud_ud_x, "dc_x,mm") \ + /* dc_ud case is for testing handling of a large worker address on + * UCT_IFACE_FLAG_CONNECT_TO_IFACE transports (dc_x) */ + /* no_ud_ud_x case is for testing handling a large worker address + * but with the lack of ud/ud_x transports, which would return an error + * and skipped */ + +class test_ucp_sockaddr : public ucp_test { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.field_mask |= UCP_PARAM_FIELD_FEATURES; + params.features = UCP_FEATURE_TAG | UCP_FEATURE_STREAM; + return params; + } + + enum { + CONN_REQ_TAG = DEFAULT_PARAM_VARIANT + 1, /* Accepting by ucp_conn_request_h, + send/recv by TAG API */ + CONN_REQ_STREAM /* Accepting by ucp_conn_request_h, + send/recv by STREAM API */ + }; + + enum { + TEST_MODIFIER_MASK = UCS_MASK(16), + TEST_MODIFIER_MT = UCS_BIT(16), + TEST_MODIFIER_CM = UCS_BIT(17) + }; + + enum { + SEND_DIRECTION_C2S = UCS_BIT(0), /* send data from client to server */ + SEND_DIRECTION_S2C = UCS_BIT(1), /* send data from server to client */ + SEND_DIRECTION_BIDI = SEND_DIRECTION_C2S | SEND_DIRECTION_S2C /* bidirectional send */ + }; + + typedef enum { + SEND_RECV_TAG, + SEND_RECV_STREAM + } send_recv_type_t; + + ucs::sock_addr_storage m_test_addr; + + void init() { + if (GetParam().variant & TEST_MODIFIER_CM) { + modify_config("SOCKADDR_CM_ENABLE", "yes"); + } + get_sockaddr(); + ucp_test::init(); + skip_loopback(); + } + + static std::vector + enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) + { + std::vector result = + ucp_test::enum_test_params(ctx_params, name, test_case_name, tls); + + generate_test_params_variant(ctx_params, name, test_case_name, tls, + CONN_REQ_TAG, result); + generate_test_params_variant(ctx_params, name, test_case_name, tls, + CONN_REQ_TAG | TEST_MODIFIER_MT, result, + MULTI_THREAD_WORKER); + generate_test_params_variant(ctx_params, name, test_case_name, tls, + CONN_REQ_TAG | TEST_MODIFIER_CM, result); + generate_test_params_variant(ctx_params, name, test_case_name, tls, + CONN_REQ_TAG | TEST_MODIFIER_MT | + TEST_MODIFIER_CM, result, + MULTI_THREAD_WORKER); + + generate_test_params_variant(ctx_params, name, test_case_name, tls, + CONN_REQ_STREAM, result); + generate_test_params_variant(ctx_params, name, test_case_name, tls, + CONN_REQ_STREAM | TEST_MODIFIER_MT, result, + MULTI_THREAD_WORKER); + generate_test_params_variant(ctx_params, name, test_case_name, tls, + CONN_REQ_STREAM | TEST_MODIFIER_CM, result); + generate_test_params_variant(ctx_params, name, test_case_name, tls, + CONN_REQ_STREAM | TEST_MODIFIER_MT | + TEST_MODIFIER_CM, result, + MULTI_THREAD_WORKER); + return result; + } + + static ucs_log_func_rc_t + detect_error_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + if (level == UCS_LOG_LEVEL_ERROR) { + static std::vector stop_list; + if (stop_list.empty()) { + stop_list.push_back("no supported sockaddr auxiliary transports found for"); + stop_list.push_back("sockaddr aux resources addresses"); + stop_list.push_back("no peer failure handler"); + stop_list.push_back("connection request failed on listener"); + /* when the "peer failure" error happens, it is followed by: */ + stop_list.push_back("received event RDMA_CM_EVENT_UNREACHABLE"); + stop_list.push_back(ucs_status_string(UCS_ERR_UNREACHABLE)); + stop_list.push_back(ucs_status_string(UCS_ERR_UNSUPPORTED)); + } + + std::string err_str = format_message(message, ap); + for (size_t i = 0; i < stop_list.size(); ++i) { + if (err_str.find(stop_list[i]) != std::string::npos) { + UCS_TEST_MESSAGE << err_str; + return UCS_LOG_FUNC_RC_STOP; + } + } + } + return UCS_LOG_FUNC_RC_CONTINUE; + } + + void get_sockaddr() + { + struct ifaddrs* ifaddrs; + ucs_status_t status; + size_t size; + int ret = getifaddrs(&ifaddrs); + ASSERT_EQ(ret, 0); + + for (struct ifaddrs *ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) { + if (ucs_netif_flags_is_active(ifa->ifa_flags) && + ucs::is_inet_addr(ifa->ifa_addr) && + ucs::is_rdmacm_netdev(ifa->ifa_name)) + { + status = ucs_sockaddr_sizeof(ifa->ifa_addr, &size); + ASSERT_UCS_OK(status); + m_test_addr.set_sock_addr(*ifa->ifa_addr, size); + m_test_addr.set_port(0); /* listen on any port then update */ + + freeifaddrs(ifaddrs); + return; + } + } + freeifaddrs(ifaddrs); + UCS_TEST_SKIP_R("No interface for testing"); + } + + void start_listener(ucp_test_base::entity::listen_cb_type_t cb_type) + { + ucs_time_t deadline = ucs::get_deadline(); + ucs_status_t status; + + do { + status = receiver().listen(cb_type, m_test_addr.get_sock_addr_ptr(), + m_test_addr.get_addr_size(), + get_ep_params()); + } while ((status == UCS_ERR_BUSY) && (ucs_get_time() < deadline)); + + if (status == UCS_ERR_UNREACHABLE) { + UCS_TEST_SKIP_R("cannot listen to " + m_test_addr.to_str()); + } + + ASSERT_UCS_OK(status); + ucp_listener_attr_t attr; + uint16_t port; + + attr.field_mask = UCP_LISTENER_ATTR_FIELD_SOCKADDR; + ASSERT_UCS_OK(ucp_listener_query(receiver().listenerh(), &attr)); + ASSERT_UCS_OK(ucs_sockaddr_get_port( + (const struct sockaddr *)&attr.sockaddr, &port)); + m_test_addr.set_port(port); + UCS_TEST_MESSAGE << "server listening on " << m_test_addr.to_str(); + } + + static void scomplete_cb(void *req, ucs_status_t status) + { + if ((status == UCS_OK) || + (status == UCS_ERR_UNREACHABLE) || + (status == UCS_ERR_REJECTED)) { + return; + } + UCS_TEST_ABORT("Error: " << ucs_status_string(status)); + } + + static void rtag_complete_cb(void *req, ucs_status_t status, + ucp_tag_recv_info_t *info) + { + EXPECT_UCS_OK(status); + } + + static void rstream_complete_cb(void *req, ucs_status_t status, + size_t length) + { + EXPECT_UCS_OK(status); + } + + static void wait_for_wakeup(ucp_worker_h send_worker, ucp_worker_h recv_worker) + { + int ret, send_efd, recv_efd; + ucs_status_t status; + + ASSERT_UCS_OK(ucp_worker_get_efd(send_worker, &send_efd)); + ASSERT_UCS_OK(ucp_worker_get_efd(recv_worker, &recv_efd)); + + status = ucp_worker_arm(recv_worker); + if (status == UCS_ERR_BUSY) { + return; + } + ASSERT_UCS_OK(status); + + status = ucp_worker_arm(send_worker); + if (status == UCS_ERR_BUSY) { + return; + } + ASSERT_UCS_OK(status); + + do { + struct pollfd pfd[2]; + pfd[0].fd = send_efd; + pfd[1].fd = recv_efd; + pfd[0].events = POLLIN; + pfd[1].events = POLLIN; + ret = poll(pfd, 2, -1); + } while ((ret < 0) && (errno == EINTR)); + if (ret < 0) { + UCS_TEST_MESSAGE << "poll() failed: " << strerror(errno); + } + + EXPECT_GE(ret, 1); + } + + void check_events(ucp_worker_h send_worker, ucp_worker_h recv_worker, + bool wakeup, void *req) + { + if (progress()) { + return; + } + + if ((req != NULL) && (ucp_request_check_status(req) == UCS_ERR_UNREACHABLE)) { + return; + } + + if (wakeup) { + wait_for_wakeup(send_worker, recv_worker); + } + } + + void send_recv(entity& from, entity& to, send_recv_type_t send_recv_type, + bool wakeup, ucp_test_base::entity::listen_cb_type_t cb_type) + { + const uint64_t send_data = ucs_generate_uuid(0); + void *send_req = NULL; + if (send_recv_type == SEND_RECV_TAG) { + send_req = ucp_tag_send_nb(from.ep(), &send_data, 1, + ucp_dt_make_contig(sizeof(send_data)), 1, + scomplete_cb); + } else if (send_recv_type == SEND_RECV_STREAM) { + send_req = ucp_stream_send_nb(from.ep(), &send_data, 1, + ucp_dt_make_contig(sizeof(send_data)), + scomplete_cb, 0); + } else { + ASSERT_TRUE(false) << "unsupported communication type"; + } + + ucs_status_t send_status; + if (send_req == NULL) { + send_status = UCS_OK; + } else if (UCS_PTR_IS_ERR(send_req)) { + send_status = UCS_PTR_STATUS(send_req); + ASSERT_UCS_OK(send_status); + } else { + while (!ucp_request_is_completed(send_req)) { + check_events(from.worker(), to.worker(), wakeup, send_req); + } + send_status = ucp_request_check_status(send_req); + ucp_request_free(send_req); + } + + if (send_status == UCS_ERR_UNREACHABLE) { + /* Check if the error was completed due to the error handling flow. + * If so, skip the test since a valid error occurred - the one expected + * from the error handling flow - cases of failure to handle long worker + * address or transport doesn't support the error handling requirement */ + UCS_TEST_SKIP_R("Skipping due an unreachable destination (unsupported " + "feature or too long worker address or no " + "supported transport to send partial worker " + "address)"); + } else if ((send_status == UCS_ERR_REJECTED) && + (cb_type == ucp_test_base::entity::LISTEN_CB_REJECT)) { + return; + } else { + ASSERT_UCS_OK(send_status); + } + + uint64_t recv_data = 0; + void *recv_req; + if (send_recv_type == SEND_RECV_TAG) { + recv_req = ucp_tag_recv_nb(to.worker(), &recv_data, 1, + ucp_dt_make_contig(sizeof(recv_data)), + 1, 0, rtag_complete_cb); + } else { + ASSERT_TRUE(send_recv_type == SEND_RECV_STREAM); + ucp_stream_poll_ep_t poll_eps; + ssize_t ep_count; + size_t recv_length; + do { + progress(); + ep_count = ucp_stream_worker_poll(to.worker(), &poll_eps, 1, 0); + } while (ep_count == 0); + ASSERT_EQ(1, ep_count); + EXPECT_EQ(to.ep(), poll_eps.ep); + EXPECT_EQ(&to, poll_eps.user_data); + + recv_req = ucp_stream_recv_nb(to.ep(), &recv_data, 1, + ucp_dt_make_contig(sizeof(recv_data)), + rstream_complete_cb, &recv_length, + UCP_STREAM_RECV_FLAG_WAITALL); + } + + if (recv_req != NULL) { + ASSERT_TRUE(UCS_PTR_IS_PTR(recv_req)); + while (!ucp_request_is_completed(recv_req)) { + check_events(from.worker(), to.worker(), wakeup, recv_req); + } + ucp_request_free(recv_req); + } + + EXPECT_EQ(send_data, recv_data); + } + + bool wait_for_server_ep(bool wakeup) + { + ucs_time_t deadline = ucs::get_deadline(); + + while ((receiver().get_num_eps() == 0) && + (sender().get_err_num() == 0) && (ucs_get_time() < deadline)) { + check_events(sender().worker(), receiver().worker(), wakeup, NULL); + } + + return (sender().get_err_num() == 0) && (receiver().get_num_eps() > 0); + } + + void wait_for_reject(entity &e, bool wakeup) + { + ucs_time_t deadline = ucs::get_deadline(); + + while ((e.get_err_num_rejected() == 0) && (ucs_get_time() < deadline)) { + check_events(sender().worker(), receiver().worker(), wakeup, NULL); + } + + EXPECT_GT(deadline, ucs_get_time()); + EXPECT_EQ(1ul, e.get_err_num_rejected()); + } + + virtual ucp_ep_params_t get_ep_params() + { + ucp_ep_params_t ep_params = ucp_test::get_ep_params(); + ep_params.field_mask |= UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | + UCP_EP_PARAM_FIELD_ERR_HANDLER; + /* The error handling requirement is needed since we need to take + * care of a case where the client gets an error. In case ucp needs to + * handle a large worker address but neither ud nor ud_x are present */ + ep_params.err_mode = UCP_ERR_HANDLING_MODE_PEER; + ep_params.err_handler.cb = err_handler_cb; + ep_params.err_handler.arg = NULL; + return ep_params; + } + + void client_ep_connect() + { + ucp_ep_params_t ep_params = get_ep_params(); + ep_params.field_mask |= UCP_EP_PARAM_FIELD_FLAGS | + UCP_EP_PARAM_FIELD_SOCK_ADDR | + UCP_EP_PARAM_FIELD_USER_DATA; + ep_params.flags = UCP_EP_PARAMS_FLAGS_CLIENT_SERVER; + ep_params.sockaddr.addr = m_test_addr.get_sock_addr_ptr(); + ep_params.sockaddr.addrlen = m_test_addr.get_addr_size(); + ep_params.user_data = &sender(); + sender().connect(&receiver(), ep_params); + } + + void connect_and_send_recv(bool wakeup, uint64_t flags) + { + { + scoped_log_handler slh(detect_error_logger); + client_ep_connect(); + if (!wait_for_server_ep(wakeup)) { + UCS_TEST_SKIP_R("cannot connect to server"); + } + } + + if (flags & SEND_DIRECTION_C2S) { + send_recv(sender(), receiver(), send_recv_type(), wakeup, + cb_type()); + } + + if (flags & SEND_DIRECTION_S2C) { + send_recv(receiver(), sender(), send_recv_type(), wakeup, + cb_type()); + } + } + + void connect_and_reject(bool wakeup) + { + { + scoped_log_handler slh(detect_error_logger); + client_ep_connect(); + /* Check reachability with tagged send */ + send_recv(sender(), receiver(), SEND_RECV_TAG, wakeup, + ucp_test_base::entity::LISTEN_CB_REJECT); + } + wait_for_reject(receiver(), wakeup); + wait_for_reject(sender(), wakeup); + } + + void listen_and_communicate(bool wakeup, uint64_t flags) + { + UCS_TEST_MESSAGE << "Testing " << m_test_addr.to_str(); + + start_listener(cb_type()); + connect_and_send_recv(wakeup, flags); + } + + void listen_and_reject(bool wakeup) + { + UCS_TEST_MESSAGE << "Testing " << m_test_addr.to_str(); + + start_listener(ucp_test_base::entity::LISTEN_CB_REJECT); + connect_and_reject(wakeup); + } + + void one_sided_disconnect(entity &e) { + void *dreq = e.disconnect_nb(); + if (dreq == NULL) { + return; + } + + ASSERT_EQ(UCS_INPROGRESS, UCS_PTR_STATUS(dreq)); + + ucs_status_t status; + ucs_time_t loop_end_limit = ucs_time_from_sec(10.0) + ucs_get_time(); + do { + /* TODO: replace the progress() with e().progress() when + async progress is implemented. */ + progress(); + status = ucp_request_check_status(dreq); + if (status != UCS_INPROGRESS) { + break; + } + } while (ucs_get_time() < loop_end_limit); + EXPECT_EQ(UCS_OK, status); + ucp_request_release(dreq); + } + + void concurrent_disconnect() { + std::vector reqs; + + ASSERT_EQ(2ul, entities().size()); + ASSERT_EQ(1, sender().get_num_workers()); + ASSERT_EQ(1, sender().get_num_eps()); + ASSERT_EQ(1, receiver().get_num_workers()); + ASSERT_EQ(1, receiver().get_num_eps()); + + reqs.push_back(sender().disconnect_nb()); + reqs.push_back(receiver().disconnect_nb()); + while (!reqs.empty()) { + wait(reqs.back()); + reqs.pop_back(); + } + } + + static void err_handler_cb(void *arg, ucp_ep_h ep, ucs_status_t status) { + ucp_test::err_handler_cb(arg, ep, status); + + /* The current expected errors are only from the err_handle test + * and from transports where the worker address is too long but ud/ud_x + * are not present, or ud/ud_x are present but their addresses are too + * long as well, in addition we can get disconnect events during test + * teardown. + */ + switch (status) { + case UCS_ERR_REJECTED: + case UCS_ERR_UNREACHABLE: + case UCS_ERR_CONNECTION_RESET: + UCS_TEST_MESSAGE << "ignoring error " < +#include +#include +#include + +#include "ucp_datatype.h" +#include "ucp_test.h" + + +class test_ucp_stream_base : public ucp_test { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.field_mask |= UCP_PARAM_FIELD_FEATURES; + params.features = UCP_FEATURE_STREAM; + return params; + } + + static void ucp_send_cb(void *request, ucs_status_t status) {} + static void ucp_recv_cb(void *request, ucs_status_t status, size_t length) {} + + size_t wait_stream_recv(void *request); + +protected: + ucs_status_ptr_t stream_send_nb(const ucp::data_type_desc_t& dt_desc); +}; + +size_t test_ucp_stream_base::wait_stream_recv(void *request) +{ + ucs_status_t status; + size_t length; + do { + progress(); + status = ucp_stream_recv_request_test(request, &length); + } while (status == UCS_INPROGRESS); + ASSERT_UCS_OK(status); + ucp_request_free(request); + + return length; +} + +ucs_status_ptr_t +test_ucp_stream_base::stream_send_nb(const ucp::data_type_desc_t& dt_desc) +{ + return ucp_stream_send_nb(sender().ep(), dt_desc.buf(), dt_desc.count(), + dt_desc.dt(), ucp_send_cb, 0); +} + +class test_ucp_stream_onesided : public test_ucp_stream_base { +public: + ucp_ep_params_t get_ep_params() { + ucp_ep_params_t params = test_ucp_stream_base::get_ep_params(); + params.field_mask |= UCP_EP_PARAM_FIELD_FLAGS; + params.flags |= UCP_EP_PARAMS_FLAGS_NO_LOOPBACK; + return params; + } +}; + +UCS_TEST_P(test_ucp_stream_onesided, send_recv_no_ep) { + + /* connect from sender side only and send */ + sender().connect(&receiver(), get_ep_params()); + uint64_t send_data = ucs::rand(); + ucp::data_type_desc_t dt_desc(ucp_dt_make_contig(sizeof(uint64_t)), + &send_data, sizeof(send_data)); + void *sreq = stream_send_nb(dt_desc); + wait(sreq); + + /* must not receive data before ep is created on receiver side */ + static const size_t max_eps = 10; + ucp_stream_poll_ep_t poll_eps[max_eps]; + ssize_t count = ucp_stream_worker_poll(receiver().worker(), poll_eps, + max_eps, 0); + EXPECT_EQ(0l, count) << "ucp_stream_worker_poll returned ep too early"; + + /* create receiver side ep */ + ucp_ep_params_t recv_ep_param = get_ep_params(); + recv_ep_param.field_mask |= UCP_EP_PARAM_FIELD_USER_DATA; + recv_ep_param.user_data = reinterpret_cast(static_cast(ucs::rand())); + receiver().connect(&sender(), recv_ep_param); + + /* expect ep to be ready */ + ucs_time_t deadline = ucs_get_time() + + (ucs_time_from_sec(10.0) * ucs::test_time_multiplier()); + do { + progress(); + count = ucp_stream_worker_poll(receiver().worker(), poll_eps, max_eps, 0); + } while ((count == 0) && (ucs_get_time() < deadline)); + EXPECT_EQ(1l, count); + EXPECT_EQ(recv_ep_param.user_data, poll_eps[0].user_data); + EXPECT_EQ(receiver().ep(0), poll_eps[0].ep); + + /* expect data to be received */ + uint64_t recv_data = 0; + size_t recv_length = 0; + void *rreq = ucp_stream_recv_nb(receiver().ep(), &recv_data, 1, + ucp_dt_make_contig(sizeof(uint64_t)), + ucp_recv_cb, &recv_length, 0); + ASSERT_UCS_PTR_OK(rreq); + if (rreq != NULL) { + recv_length = wait_stream_recv(rreq); + } + + EXPECT_EQ(sizeof(uint64_t), recv_length); + EXPECT_EQ(send_data, recv_data); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_stream_onesided) + +class test_ucp_stream : public test_ucp_stream_base +{ +public: + virtual void init() { + ucp_test::init(); + + sender().connect(&receiver(), get_ep_params()); + if (!is_loopback()) { + receiver().connect(&sender(), get_ep_params()); + } + } + +protected: + void do_send_recv_data_test(ucp_datatype_t datatype); + template + void do_send_recv_test(ucp_datatype_t datatype); + template + void do_send_exp_recv_test(ucp_datatype_t datatype); + void do_send_recv_data_recv_test(ucp_datatype_t datatype); + + /* for self-validation of generic datatype + * NOTE: it's tested only with byte array data since it's recv completion + * granularity without UCP_RECV_FLAG_WAITALL flag */ + std::vector context; +}; + +void test_ucp_stream::do_send_recv_data_test(ucp_datatype_t datatype) +{ + size_t ssize = 0; /* total send size in bytes */ + std::vector sbuf(16 * UCS_MBYTE, 's'); + std::vector check_pattern; + ucs_status_ptr_t sstatus; + + /* send all msg sizes*/ + for (size_t i = 3; i < sbuf.size(); + i *= (2 * ucs::test_time_multiplier())) { + if (UCP_DT_IS_GENERIC(datatype)) { + for (size_t j = 0; j < i; ++j) { + check_pattern.push_back(char(j)); + } + } else { + ucs::fill_random(sbuf, i); + check_pattern.insert(check_pattern.end(), sbuf.begin(), + sbuf.begin() + i); + } + ucp::data_type_desc_t dt_desc(datatype, sbuf.data(), i); + sstatus = stream_send_nb(dt_desc); + EXPECT_FALSE(UCS_PTR_IS_ERR(sstatus)); + wait(sstatus); + ssize += i; + } + + std::vector rbuf(ssize, 'r'); + size_t roffset = 0; + ucs_status_ptr_t rdata; + size_t length; + do { + progress(); + rdata = ucp_stream_recv_data_nb(receiver().ep(), &length); + if (rdata == NULL) { + continue; + } + + memcpy(&rbuf[roffset], rdata, length); + roffset += length; + ucp_stream_data_release(receiver().ep(), rdata); + } while (roffset < ssize); + + EXPECT_EQ(roffset, ssize); + EXPECT_EQ(check_pattern, rbuf); +} + +template +void test_ucp_stream::do_send_recv_test(ucp_datatype_t datatype) +{ + const size_t dt_elem_size = UCP_DT_IS_CONTIG(datatype) ? + ucp_contig_dt_elem_size(datatype) : 1; + size_t ssize = 0; /* total send size */ + std::vector sbuf(16 * UCS_MBYTE, 's'); + ucs_status_ptr_t sstatus; + std::vector check_pattern; + + /* send all msg sizes in bytes*/ + for (size_t i = 3; i < sbuf.size(); i *= 2) { + ucp_datatype_t dt; + if (UCP_DT_IS_GENERIC(datatype)) { + dt = datatype; + for (size_t j = 0; j < i; ++j) { + context.push_back(uint8_t(j)); + } + } else { + dt = DATATYPE; + ucs::fill_random(sbuf, i); + check_pattern.insert(check_pattern.end(), sbuf.begin(), + sbuf.begin() + i); + } + ucp::data_type_desc_t dt_desc(dt, sbuf.data(), i); + sstatus = stream_send_nb(dt_desc); + EXPECT_FALSE(UCS_PTR_IS_ERR(sstatus)); + wait(sstatus); + ssize += i; + } + + size_t align_tail = UCP_DT_IS_GENERIC(datatype) ? 0 : + (dt_elem_size - ssize % dt_elem_size); + if (align_tail != 0) { + ucs::fill_random(sbuf, align_tail); + check_pattern.insert(check_pattern.end(), sbuf.begin(), sbuf.begin() + align_tail); + ucp::data_type_desc_t dt_desc(ucp_dt_make_contig(align_tail), + sbuf.data(), align_tail); + sstatus = stream_send_nb(dt_desc); + EXPECT_FALSE(UCS_PTR_IS_ERR(sstatus)); + wait(sstatus); + ssize += align_tail; + } + + EXPECT_EQ(size_t(0), (ssize % dt_elem_size)); + + std::vector rbuf(ssize / dt_elem_size, 'r'); + size_t roffset = 0; + size_t counter = 0; + do { + ucp::data_type_desc_t dt_desc(datatype, &rbuf[roffset / dt_elem_size], + ssize - roffset); + + size_t length; + void *rreq = ucp_stream_recv_nb(receiver().ep(), dt_desc.buf(), + dt_desc.count(), dt_desc.dt(), + ucp_recv_cb, &length, recv_flags); + ASSERT_TRUE(!UCS_PTR_IS_ERR(rreq)); + if (UCS_PTR_IS_PTR(rreq)) { + length = wait_stream_recv(rreq); + } + EXPECT_EQ(size_t(0), length % dt_elem_size); + roffset += length; + counter++; + } while (roffset < ssize); + + /* waitall flag requires completion by single request */ + if (recv_flags & UCP_STREAM_RECV_FLAG_WAITALL) { + EXPECT_EQ(size_t(1), counter); + } + + EXPECT_EQ(roffset, ssize); + if (!UCP_DT_IS_GENERIC(datatype)) { + const T *check_ptr = reinterpret_cast(check_pattern.data()); + const size_t check_size = check_pattern.size() / dt_elem_size; + EXPECT_EQ(std::vector(check_ptr, check_ptr + check_size), rbuf); + } +} + +template +void test_ucp_stream::do_send_exp_recv_test(ucp_datatype_t datatype) +{ + const size_t dt_elem_size = UCP_DT_IS_CONTIG(datatype) ? + ucp_contig_dt_elem_size(datatype) : 1; + const size_t msg_size = dt_elem_size * UCS_MBYTE; + const size_t n_msgs = 10; + + std::vector > rbufs(n_msgs, + std::vector(msg_size / dt_elem_size, 'r')); + std::vector dt_rdescs(n_msgs); + std::vector rreqs; + + /* post recvs */ + for (size_t i = 0; i < n_msgs; ++i) { + ucp::data_type_desc_t &rdesc = dt_rdescs[i].make(datatype, &rbufs[i][0], + msg_size); + size_t length; + + void *rreq = ucp_stream_recv_nb(receiver().ep(), rdesc.buf(), + rdesc.count(), rdesc.dt(), ucp_recv_cb, + &length, recv_flags); + EXPECT_TRUE(UCS_PTR_IS_PTR(rreq)); + rreqs.push_back(rreq); + } + + std::vector sbuf(msg_size, 's'); + size_t scount = 0; /* total send size */ + ucp::data_type_desc_t dt_desc(datatype, sbuf.data(), sbuf.size()); + + /* send all msgs */ + for (size_t i = 0; i < n_msgs; ++i) { + void *sreq = stream_send_nb(dt_desc); + EXPECT_FALSE(UCS_PTR_IS_ERR(sreq)); + wait(sreq); + scount += sbuf.size(); + } + + size_t rcount = 0; + for (size_t i = 0; i < rreqs.size(); ++i) { + size_t length = wait_stream_recv(rreqs[i]); + EXPECT_EQ(size_t(0), length % dt_elem_size); + rcount += length; + } + + size_t counter = 0; + while (rcount < scount) { + size_t length = std::numeric_limits::max(); + ucs_status_ptr_t rreq; + rreq = ucp_stream_recv_nb(receiver().ep(), dt_rdescs[0].buf(), + dt_rdescs[0].count(), dt_rdescs[0].dt(), + ucp_recv_cb, &length, 0); + if (UCS_PTR_IS_PTR(rreq)) { + length = wait_stream_recv(rreq); + } + ASSERT_GT(length, 0ul); + ASSERT_LE(length, msg_size); + EXPECT_EQ(size_t(0), length % dt_elem_size); + rcount += length; + counter++; + } + EXPECT_EQ(scount, rcount); + + /* waitall flag requires completion by single request */ + if (recv_flags & UCP_STREAM_RECV_FLAG_WAITALL) { + EXPECT_EQ(size_t(0), counter); + } + + /* double check, no data should be here */ + while (progress()); + + size_t s; + void *p; + while ((p = ucp_stream_recv_data_nb(receiver().ep(), &s)) != NULL) { + rcount += s; + ucp_stream_data_release(receiver().ep(), p); + progress(); + } + EXPECT_EQ(scount, rcount); +} + +void test_ucp_stream::do_send_recv_data_recv_test(ucp_datatype_t datatype) +{ + const size_t dt_elem_size = UCP_DT_IS_CONTIG(datatype) ? + ucp_contig_dt_elem_size(datatype) : 1; + size_t ssize = 0; /* total send size */ + size_t roffset = 0; + size_t send_i = dt_elem_size; + size_t recv_i = 0; + std::vector sbuf(16 * UCS_MBYTE, 's'); + ucs_status_ptr_t sstatus; + std::vector check_pattern; + std::vector rbuf; + ucs_status_ptr_t rdata; + size_t length; + + do { + if (send_i < sbuf.size()) { + rbuf.resize(rbuf.size() + send_i, 'r'); + ucs::fill_random(sbuf, send_i); + check_pattern.insert(check_pattern.end(), sbuf.begin(), + sbuf.begin() + send_i); + ucp::data_type_desc_t dt_desc(datatype, sbuf.data(), send_i); + sstatus = stream_send_nb(dt_desc); + EXPECT_FALSE(UCS_PTR_IS_ERR(sstatus)); + wait(sstatus); + ssize += send_i; + send_i *= 2; + } + + progress(); + + if ((++recv_i % 2) || ((ssize - roffset) < dt_elem_size)) { + rdata = ucp_stream_recv_data_nb(receiver().ep(), &length); + if (rdata == NULL) { + continue; + } + + memcpy(&rbuf[roffset], rdata, length); + ucp_stream_data_release(receiver().ep(), rdata); + } else { + ucp::data_type_desc_t dt_desc(datatype, &rbuf[roffset], ssize - roffset); + void *rreq = ucp_stream_recv_nb(receiver().ep(), dt_desc.buf(), + dt_desc.count(), dt_desc.dt(), + ucp_recv_cb, &length, 0); + ASSERT_TRUE(!UCS_PTR_IS_ERR(rreq)); + if (UCS_PTR_IS_PTR(rreq)) { + length = wait_stream_recv(rreq); + } + } + roffset += length; + } while (roffset < ssize); + + EXPECT_EQ(roffset, ssize); + EXPECT_EQ(check_pattern, rbuf); +} + +UCS_TEST_P(test_ucp_stream, send_recv_data) { + do_send_recv_data_test(DATATYPE); +} + +UCS_TEST_P(test_ucp_stream, send_iov_recv_data) { + do_send_recv_data_test(DATATYPE_IOV); +} + +UCS_TEST_P(test_ucp_stream, send_generic_recv_data) { + ucp_datatype_t dt; + ucs_status_t status; + + status = ucp_dt_create_generic(&ucp::test_dt_uint8_ops, NULL, &dt); + ASSERT_UCS_OK(status); + do_send_recv_data_test(dt); + ucp_dt_destroy(dt); +} + +UCS_TEST_P(test_ucp_stream, send_recv_8) { + ucp_datatype_t datatype = ucp_dt_make_contig(sizeof(uint8_t)); + + do_send_recv_test(datatype); + do_send_recv_test(datatype); +} + +UCS_TEST_P(test_ucp_stream, send_recv_16) { + ucp_datatype_t datatype = ucp_dt_make_contig(sizeof(uint16_t)); + + do_send_recv_test(datatype); + do_send_recv_test(datatype); +} + +UCS_TEST_P(test_ucp_stream, send_recv_32) { + ucp_datatype_t datatype = ucp_dt_make_contig(sizeof(uint32_t)); + + do_send_recv_test(datatype); + do_send_recv_test(datatype); +} + +UCS_TEST_P(test_ucp_stream, send_recv_64) { + ucp_datatype_t datatype = ucp_dt_make_contig(sizeof(uint64_t)); + + do_send_recv_test(datatype); + do_send_recv_test(datatype); +} + +UCS_TEST_P(test_ucp_stream, send_recv_iov) { + do_send_recv_test(DATATYPE_IOV); + do_send_recv_test(DATATYPE_IOV); +} + +UCS_TEST_P(test_ucp_stream, send_recv_generic) { + ucp_datatype_t dt; + ucs_status_t status; + + status = ucp_dt_create_generic(&ucp::test_dt_uint8_ops, &context, &dt); + ASSERT_UCS_OK(status); + do_send_recv_test(dt); + ucp_dt_destroy(dt); + +} + +UCS_TEST_P(test_ucp_stream, send_exp_recv_8) { + ucp_datatype_t datatype = ucp_dt_make_contig(sizeof(uint8_t)); + + do_send_exp_recv_test(datatype); + do_send_exp_recv_test(datatype); +} + +UCS_TEST_P(test_ucp_stream, send_exp_recv_16) { + ucp_datatype_t datatype = ucp_dt_make_contig(sizeof(uint16_t)); + + do_send_exp_recv_test(datatype); + do_send_exp_recv_test(datatype); +} + +UCS_TEST_P(test_ucp_stream, send_exp_recv_32) { + ucp_datatype_t datatype = ucp_dt_make_contig(sizeof(uint32_t)); + + do_send_exp_recv_test(datatype); + do_send_exp_recv_test(datatype); +} + +UCS_TEST_P(test_ucp_stream, send_exp_recv_64) { + ucp_datatype_t datatype = ucp_dt_make_contig(sizeof(uint64_t)); + + do_send_exp_recv_test(datatype); + do_send_exp_recv_test(datatype); +} + +UCS_TEST_P(test_ucp_stream, send_exp_recv_iov) { + do_send_exp_recv_test(DATATYPE_IOV); + do_send_exp_recv_test(DATATYPE_IOV); +} + +UCS_TEST_P(test_ucp_stream, send_recv_data_recv_8) { + do_send_recv_data_recv_test(ucp_dt_make_contig(sizeof(uint8_t))); +} + +UCS_TEST_P(test_ucp_stream, send_recv_data_recv_16) { + do_send_recv_data_recv_test(ucp_dt_make_contig(sizeof(uint16_t))); +} + +UCS_TEST_P(test_ucp_stream, send_recv_data_recv_32) { + do_send_recv_data_recv_test(ucp_dt_make_contig(sizeof(uint32_t))); +} + +UCS_TEST_P(test_ucp_stream, send_recv_data_recv_64) { + do_send_recv_data_recv_test(ucp_dt_make_contig(sizeof(uint64_t))); +} + +UCS_TEST_P(test_ucp_stream, send_recv_data_recv_iov) { + do_send_recv_data_recv_test(DATATYPE_IOV); +} + +UCS_TEST_P(test_ucp_stream, send_zero_ending_iov_recv_data) { + const size_t min_size = UCS_KBYTE; + const size_t max_size = min_size * 64; + const size_t iov_num = 8; /* must be divisible by 4 without a + * remainder, caught on mlx5 based TLs + * where max_iov = 3 for zcopy multi + * protocol, where every posting includes: + * 1 header + 2 nonempty IOVs */ + const size_t iov_num_nonempty = iov_num / 2; + + std::vector buf(max_size * 2); + ucs::fill_random(buf, buf.size()); + std::vector v(iov_num); + + for (size_t size = min_size; size < max_size; ++size) { + size_t slen = 0; + for (size_t j = 0; j < iov_num; ++j) { + if ((j % 2) == 0) { + uint8_t *ptr = buf.data(); + v[j].buffer = &(ptr[j * size / iov_num_nonempty]); + v[j].length = size / iov_num_nonempty; + slen += v[j].length; + } else { + v[j].buffer = NULL; + v[j].length = 0; + } + } + + void *sreq = ucp_stream_send_nb(sender().ep(), &v[0], iov_num, + DATATYPE_IOV, ucp_send_cb, 0); + + size_t rlen = 0; + while (rlen < slen) { + progress(); + size_t length; + void *rdata = ucp_stream_recv_data_nb(receiver().ep(), &length); + EXPECT_FALSE(UCS_PTR_IS_ERR(rdata)); + if (rdata != NULL) { + rlen += length; + ucp_stream_data_release(receiver().ep(), rdata); + } + } + wait(sreq); + } +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_stream) + +class test_ucp_stream_many2one : public test_ucp_stream_base { +protected: + struct request_wrapper_t { + request_wrapper_t(void *request, ucp::data_type_desc_t *dt_desc) + : m_req(request), m_dt_desc(dt_desc) {} + + void *m_req; + ucp::data_type_desc_t *m_dt_desc; + }; + +public: + test_ucp_stream_many2one() : m_receiver_idx(3), m_nsenders(3) { + m_recv_data.resize(m_nsenders); + } + + static ucp_params_t get_ctx_params() { + return test_ucp_stream::get_ctx_params(); + } + + virtual void init(); + static void ucp_send_cb(void *request, ucs_status_t status) {} + static void ucp_recv_cb(void *request, ucs_status_t status, size_t length) {} + + void do_send_worker_poll_test(ucp_datatype_t dt); + void do_send_recv_test(ucp_datatype_t dt); + +protected: + static void erase_completed_reqs(std::vector &reqs); + ucs_status_ptr_t stream_send_nb(size_t sender_idx, + const ucp::data_type_desc_t& dt_desc); + size_t send_all_nb(ucp_datatype_t datatype, size_t n_iter, + std::vector &sreqs); + size_t send_all(ucp_datatype_t datatype, size_t n_iter); + void check_no_data(); + std::set check_no_data(entity &e); + void check_recv_data(size_t n_iter, ucp_datatype_t dt); + + std::vector m_msgs; + std::vector > m_recv_data; + const size_t m_receiver_idx; + const size_t m_nsenders; +}; + +void test_ucp_stream_many2one::init() +{ + if (is_self()) { + UCS_TEST_SKIP_R("self"); + } + + /* Skip entities creation */ + test_base::init(); + + for (size_t i = 0; i < m_nsenders + 1; ++i) { + create_entity(); + } + + for (size_t i = 0; i < m_nsenders; ++i) { + e(i).connect(&e(m_receiver_idx), get_ep_params(), i); + + ucp_ep_params_t recv_ep_param = get_ep_params(); + recv_ep_param.field_mask |= UCP_EP_PARAM_FIELD_USER_DATA; + recv_ep_param.user_data = (void *)uintptr_t(i); + e(m_receiver_idx).connect(&e(i), recv_ep_param, i); + } + + for (size_t i = 0; i < m_nsenders; ++i) { + m_msgs.push_back(std::string("sender_") + ucs::to_string(i)); + } +} + +void test_ucp_stream_many2one::do_send_worker_poll_test(ucp_datatype_t dt) +{ + const size_t niter = 2018; + std::vector sreqs; + size_t total_len; + + total_len = send_all_nb(dt, niter, sreqs); + + /* Recv and progress all data */ + do { + ssize_t count; + do { + const size_t max_eps = 10; + ucp_stream_poll_ep_t poll_eps[max_eps]; + progress(); + count = ucp_stream_worker_poll(e(m_receiver_idx).worker(), + poll_eps, max_eps, 0); + EXPECT_LE(0, count); + + for (ssize_t i = 0; i < count; ++i) { + char *rdata; + size_t length; + while ((rdata = (char *)ucp_stream_recv_data_nb(poll_eps[i].ep, + &length)) != NULL) { + ASSERT_FALSE(UCS_PTR_IS_ERR(rdata)); + size_t senser_idx = uintptr_t(poll_eps[i].user_data); + std::vector &dst = m_recv_data[senser_idx]; + dst.insert(dst.end(), rdata, rdata + length); + total_len -= length; + ucp_stream_data_release(poll_eps[i].ep, rdata); + } + } + } while (count > 0); + + erase_completed_reqs(sreqs); + } while (!sreqs.empty() || (total_len != 0)); + + check_no_data(); + check_recv_data(niter, dt); +} + +void test_ucp_stream_many2one::do_send_recv_test(ucp_datatype_t dt) +{ + const size_t niter = 2018; + std::vector roffsets(m_nsenders, 0); + std::vector dt_rdescs(m_nsenders); + std::vector > rreqs; + std::vector sreqs; + size_t total_sdata; + + ASSERT_FALSE(m_msgs.empty()); + + /* Do preposts */ + for (size_t i = 0; i < m_nsenders; ++i) { + m_recv_data[i].resize(m_msgs[i].length() * niter + 1); + ucp::data_type_desc_t &rdesc = dt_rdescs[i].make(dt, + &m_recv_data[i][roffsets[i]], + m_recv_data[i].size()); + size_t length; + void *rreq = ucp_stream_recv_nb(e(m_receiver_idx).ep(0, i), + rdesc.buf(), rdesc.count(), rdesc.dt(), + ucp_recv_cb, &length, 0); + EXPECT_TRUE(UCS_PTR_IS_PTR(rreq)); + rreqs.push_back(std::make_pair(i, request_wrapper_t(rreq, &rdesc))); + } + + total_sdata = send_all_nb(dt, niter, sreqs); + + /* Recv and progress all the rest of data */ + do { + ssize_t count; + /* wait rreqs */ + for (size_t i = 0; i < rreqs.size(); ++i) { + roffsets[rreqs[i].first] += wait_stream_recv(rreqs[i].second.m_req); + } + rreqs.clear(); + progress(); + + const size_t max_eps = 10; + ucp_stream_poll_ep_t poll_eps[max_eps]; + count = ucp_stream_worker_poll(e(m_receiver_idx).worker(), + poll_eps, max_eps, 0); + EXPECT_LE(0, count); + EXPECT_LE(size_t(count), m_nsenders); + + for (ssize_t i = 0; i < count; ++i) { + bool again = true; + while (again) { + size_t sender_idx = uintptr_t(poll_eps[i].user_data); + size_t &roffset = roffsets[sender_idx]; + ucp::data_type_desc_t &dt_desc = + dt_rdescs[sender_idx].forward_to(roffset); + EXPECT_TRUE(dt_desc.is_valid()); + size_t length; + void *rreq = ucp_stream_recv_nb(poll_eps[i].ep, + dt_desc.buf(), + dt_desc.count(), + dt_desc.dt(), + ucp_recv_cb, &length, 0); + EXPECT_FALSE(UCS_PTR_IS_ERR(rreq)); + if (rreq == NULL) { + EXPECT_LT(size_t(0), length); + roffset += length; + if (ssize_t(length) < dt_desc.buf_length()) { + continue; /* Need to drain the EP */ + } + } else { + rreqs.push_back(std::make_pair(sender_idx, + request_wrapper_t(rreq, + &dt_desc))); + } + again = false; + } + } + + erase_completed_reqs(sreqs); + } while (!rreqs.empty() || !sreqs.empty() || + (total_sdata > std::accumulate(roffsets.begin(), + roffsets.end(), 0ul))); + + EXPECT_EQ(total_sdata, std::accumulate(roffsets.begin(), + roffsets.end(), 0ul)); + check_no_data(); + check_recv_data(niter, dt); +} + +ucs_status_ptr_t +test_ucp_stream_many2one::stream_send_nb(size_t sender_idx, + const ucp::data_type_desc_t& dt_desc) +{ + return ucp_stream_send_nb(m_entities.at(sender_idx).ep(), dt_desc.buf(), + dt_desc.count(), dt_desc.dt(), ucp_send_cb, 0); +} + +size_t +test_ucp_stream_many2one::send_all_nb(ucp_datatype_t datatype, size_t n_iter, + std::vector &sreqs) +{ + size_t total = 0; + /* Send many times in round robin */ + for (size_t i = 0; i < n_iter; ++i) { + for (size_t sender_idx = 0; sender_idx < m_nsenders; ++sender_idx) { + const void *buf = m_msgs[sender_idx].c_str(); + size_t len = m_msgs[sender_idx].length(); + if (i == (n_iter - 1)) { + ++len; + } + + ucp::data_type_desc_t *dt_desc = new ucp::data_type_desc_t(datatype, + buf, + len); + void *sreq = stream_send_nb(sender_idx, *dt_desc); + total += len; + if (UCS_PTR_IS_PTR(sreq)) { + sreqs.push_back(request_wrapper_t(sreq, dt_desc)); + } else { + EXPECT_FALSE(UCS_PTR_IS_ERR(sreq)); + delete dt_desc; + } + } + } + + return total; +} + +size_t +test_ucp_stream_many2one::send_all(ucp_datatype_t datatype, size_t n_iter) +{ + std::vector sreqs; + size_t total; + + total = send_all_nb(datatype, n_iter, sreqs); + while (!sreqs.empty()) { + progress(); + erase_completed_reqs(sreqs); + } + + return total; +} + +void test_ucp_stream_many2one::check_no_data() +{ + std::set check; + + for (size_t i = 0; i <= m_receiver_idx; ++i) { + std::set check_e = check_no_data(e(i)); + check.insert(check_e.begin(), check_e.end()); + } + + EXPECT_EQ(size_t(0), check.size()); +} + +std::set test_ucp_stream_many2one::check_no_data(entity &e) +{ + const size_t max_eps = 10; + ucp_stream_poll_ep_t poll_eps[max_eps]; + std::set ret; + std::list check_list; + + while (progress()); + + ssize_t count = ucp_stream_worker_poll(m_entities.at(m_receiver_idx).worker(), + poll_eps, max_eps, 0); + EXPECT_GE(count, ssize_t(0)); + + for (ssize_t i = 0; i < count; ++i) { + ret.insert(poll_eps[i].ep); + } + + for (int i = 0; i < e.get_num_workers(); ++i) { + for (int j = 0; j < e.get_num_eps(); ++j) { + check_list.push_back(e.ep(i, j)); + } + } + + std::list::const_iterator check_it = check_list.begin(); + while (check_it != check_list.end()) { + EXPECT_EQ(ret.end(), ret.find(*check_it)); + ++check_it; + } + + return ret; +} + +void test_ucp_stream_many2one::check_recv_data(size_t n_iter, ucp_datatype_t dt) +{ + for (size_t i = 0; i < m_nsenders; ++i) { + std::string test = std::string("sender_") + ucs::to_string(i); + const std::string str(&m_recv_data[i].front()); + if (UCP_DT_IS_GENERIC(dt)) { + std::vector test_gen; + for (size_t j = 0; j < test.length(); ++j) { + test_gen.push_back(char(j)); + } + test_gen.push_back('\0'); + test = std::string(test_gen.data()); + } + + size_t next = 0; + for (size_t j = 0; j < n_iter; ++j) { + size_t match = str.find(test, next); + EXPECT_NE(std::string::npos, match) << "failed on sender " << i + << " iteration " << j; + if (match == std::string::npos) { + break; + } + EXPECT_EQ(next, match); + next += test.length(); + } + EXPECT_EQ(next, str.length()); /* nothing more */ + } +} + +void +test_ucp_stream_many2one::erase_completed_reqs(std::vector &reqs) +{ + std::vector::iterator i = reqs.begin(); + + while (i != reqs.end()) { + ucs_status_t status = ucp_request_check_status(i->m_req); + if (status != UCS_INPROGRESS) { + EXPECT_EQ(UCS_OK, status); + ucp_request_free(i->m_req); + delete i->m_dt_desc; + i = reqs.erase(i); + } else { + ++i; + } + } +} + +UCS_TEST_P(test_ucp_stream_many2one, drop_data) { + send_all(DATATYPE, 10); + + ASSERT_EQ(m_receiver_idx, m_nsenders); + for (size_t i = 0; i <= m_receiver_idx; ++i) { + flush_worker(e(i)); + } + + /* destroy 1 connection */ + entity::ep_destructor(m_entities.at(0).ep(), + &m_entities.at(0)); + entity::ep_destructor(m_entities.at(m_receiver_idx).ep(), + &m_entities.at(0)); + m_entities.at(0).revoke_ep(); + m_entities.at(m_receiver_idx).revoke_ep(0, 0); + + /* wait for 1-st byte on the last EP to be sure the network packets have + been arrived */ + uint8_t check; + size_t check_length; + ucp_ep_h last_ep = m_entities.at(m_receiver_idx).ep(0, m_nsenders - 1); + void *check_req = ucp_stream_recv_nb(last_ep, &check, 1, DATATYPE, + ucp_recv_cb, &check_length, 0); + EXPECT_FALSE(UCS_PTR_IS_ERR(check_req)); + if (UCS_PTR_IS_PTR(check_req)) { + wait_stream_recv(check_req); + } + + /* data from disconnected EP should be dropped */ + std::set others = check_no_data(m_entities.at(0)); + /* since ordering between EPs is not guaranteed, some data may be still in + * the network or buffered by transport */ + EXPECT_LE(others.size(), m_nsenders - 1); + + /* reconnect */ + m_entities.at(0).connect(&m_entities.at(m_receiver_idx), get_ep_params(), 0); + ucp_ep_params_t recv_ep_param = get_ep_params(); + recv_ep_param.field_mask |= UCP_EP_PARAM_FIELD_USER_DATA; + recv_ep_param.user_data = (void *)uintptr_t(0xdeadbeef); + e(m_receiver_idx).connect(&e(0), recv_ep_param, 0); + + /* send again */ + send_all(DATATYPE, 10); + + for (size_t i = 0; i <= m_receiver_idx; ++i) { + flush_worker(e(i)); + } + + /* Need to poll out all incoming data from transport layer, see PR #2048 */ + while (progress() > 0); +} + +UCS_TEST_P(test_ucp_stream_many2one, send_worker_poll) { + do_send_worker_poll_test(DATATYPE); +} + +UCS_TEST_P(test_ucp_stream_many2one, send_worker_poll_iov) { + do_send_worker_poll_test(DATATYPE_IOV); +} + +UCS_TEST_P(test_ucp_stream_many2one, send_worker_poll_generic) { + ucp_datatype_t dt; + ucs_status_t status; + + status = ucp_dt_create_generic(&ucp::test_dt_uint8_ops, NULL, &dt); + ASSERT_UCS_OK(status); + do_send_worker_poll_test(dt); + ucp_dt_destroy(dt); +} + +UCS_TEST_P(test_ucp_stream_many2one, send_recv_nb) { + do_send_recv_test(DATATYPE); +} + +UCS_TEST_P(test_ucp_stream_many2one, send_recv_nb_iov) { + do_send_recv_test(DATATYPE_IOV); +} + +UCS_TEST_P(test_ucp_stream_many2one, send_recv_nb_generic) { + ucp_datatype_t dt; + ucs_status_t status; + + status = ucp_dt_create_generic(&ucp::test_dt_uint8_ops, NULL, &dt); + ASSERT_UCS_OK(status); + do_send_recv_test(dt); + ucp_dt_destroy(dt); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_stream_many2one) diff --git a/test/gtest/ucp/test_ucp_tag.cc b/test/gtest/ucp/test_ucp_tag.cc new file mode 100644 index 0000000..1dc9862 --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag.cc @@ -0,0 +1,414 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" + +#include "ucp_datatype.h" + +extern "C" { +#include +#include +#include +} + + +ucp_params_t test_ucp_tag::get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.field_mask |= UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_REQUEST_INIT | + UCP_PARAM_FIELD_REQUEST_SIZE; + params.features = UCP_FEATURE_TAG; + params.request_size = sizeof(request); + params.request_init = request_init; + return params; +} + +void test_ucp_tag::init() +{ + ucp_test::init(); + sender().connect(&receiver(), get_ep_params()); + + ctx_attr.field_mask = 0; + ctx_attr.field_mask |= UCP_ATTR_FIELD_REQUEST_SIZE; + ctx_attr.field_mask |= UCP_ATTR_FIELD_THREAD_MODE; + ucp_context_query(receiver().ucph(), &ctx_attr); + + ucp::dt_gen_start_count = 0; + ucp::dt_gen_finish_count = 0; +} + +void test_ucp_tag::enable_tag_mp_offload() +{ + m_env.push_back(new ucs::scoped_setenv("UCX_RC_TM_ENABLE", "y")); + m_env.push_back(new ucs::scoped_setenv("UCX_RC_TM_NUM_STRIDES", "8")); + m_env.push_back(new ucs::scoped_setenv("UCX_IB_MLX5_DEVX_OBJECTS", + "dct,dcsrq,rcsrq,rcqp")); +} + +void test_ucp_tag::request_init(void *request) +{ + struct request *req = (struct request *)request; + req->completed = false; + req->external = false; + req->info.length = 0; + req->info.sender_tag = 0; +} + +void test_ucp_tag::request_release(struct request *req) +{ + if (req->external) { + free(req->req_mem); + } else { + req->completed = false; + ucp_request_release(req); + } +} + +void test_ucp_tag::request_free(struct request *req) +{ + if (req->external) { + free(req->req_mem); + } else { + req->completed = false; + ucp_request_free(req); + } +} + +test_ucp_tag::request* test_ucp_tag::request_alloc() +{ + void *mem = malloc(ctx_attr.request_size + sizeof(request)); + request *req = (request*)((char*)mem + ctx_attr.request_size); + request_init(req); + req->external = true; + req->req_mem = mem; + return req; +} + +void test_ucp_tag::send_callback(void *request, ucs_status_t status) +{ + struct request *req = (struct request *)request; + ucs_assert(req->completed == false); + req->status = status; + req->completed = true; +} + +void test_ucp_tag::recv_callback(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) +{ + struct request *req = (struct request *)request; + ucs_assert(req->completed == false); + req->status = status; + req->completed = true; + if (status == UCS_OK) { + req->info = *info; + } +} + +void test_ucp_tag::wait(request *req, int buf_index) +{ + int worker_index = get_worker_index(buf_index); + + if (is_external_request()) { + ucp_tag_recv_info_t tag_info; + ucs_status_t status = ucp_request_test(req, &tag_info); + + while (status == UCS_INPROGRESS) { + progress(worker_index); + status = ucp_request_test(req, &tag_info); + } + if (req->external) { + recv_callback(req, status, &tag_info); + } + } else { + while (!req->completed) { + progress(worker_index); + if ((req->external) && + (ucp_request_check_status(req) == UCS_OK)) { + return; + } + } + } +} + +void test_ucp_tag::wait_and_validate(request *req) +{ + if (req == NULL) { + return; + } + + wait(req); + EXPECT_TRUE(req->completed); + EXPECT_EQ(UCS_OK, req->status); + request_release(req); +} + +void test_ucp_tag::wait_for_unexpected_msg(ucp_worker_h worker, double sec) +{ + /* Wait for some message to be added to unexpected queue */ + ucs_time_t timeout = ucs_get_time() + ucs_time_from_sec(sec); + + do { + short_progress_loop(); + } while (ucp_tag_unexp_is_empty(&worker->tm) && (ucs_get_time() < timeout)); +} + +void test_ucp_tag::check_offload_support(bool offload_required) +{ + bool offload_supported = ucp_ep_is_tag_offload_enabled(ucp_ep_config(sender().ep())); + if (offload_supported != offload_required) { + cleanup(); + std::string reason = offload_supported ? "tag offload" : "no tag offload"; + UCS_TEST_SKIP_R(reason); + } +} + +int test_ucp_tag::get_worker_index(int buf_index) +{ + int worker_index = 0; + if (GetParam().thread_type == MULTI_THREAD_CONTEXT) { + worker_index = buf_index; + } else if (GetParam().thread_type == SINGLE_THREAD) { + ucs_assert((buf_index == 0) && (worker_index == 0)); + } + return worker_index; +} + +test_ucp_tag::request * +test_ucp_tag::send(entity &sender, send_type_t type, const void *buffer, + size_t count, ucp_datatype_t datatype, ucp_tag_t tag, + int buf_index) +{ + int worker_index = get_worker_index(buf_index); + request *req; + ucs_status_t status; + + switch (type) { + case SEND_B: + case SEND_NB: + req = (request*)ucp_tag_send_nb(sender.ep(worker_index), buffer, count, + datatype, tag, send_callback); + if ((req != NULL) && (type == SEND_B)) { + wait(req, get_worker_index(buf_index)); + request_release(req); + return NULL; + } + + if (UCS_PTR_IS_ERR(req)) { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } + break; + case SEND_NBR: + req = request_alloc(); + status = ucp_tag_send_nbr(sender.ep(worker_index), buffer, count, + datatype, tag, req); + ASSERT_UCS_OK_OR_INPROGRESS(status); + if (status == UCS_OK) { + request_free(req); + return (request*)UCS_STATUS_PTR(UCS_OK); + } + break; + case SEND_SYNC_NB: + return (request*)ucp_tag_send_sync_nb(sender.ep(worker_index), buffer, + count, datatype, tag, send_callback); + default: + return NULL; + } + + return req; +} + +test_ucp_tag::request * +test_ucp_tag::send_nb(const void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, int buf_index) +{ + return send(sender(), SEND_NB, buffer, count, datatype, tag, buf_index); +} + +test_ucp_tag::request * +test_ucp_tag::send_nbr(const void *buffer, size_t count, + ucp_datatype_t datatype, + ucp_tag_t tag, int buf_index) +{ + return send(sender(), SEND_NBR, buffer, count, datatype, tag, buf_index); +} + + +void test_ucp_tag::send_b(const void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, int buf_index) +{ + send(sender(), SEND_B, buffer, count, datatype, tag, buf_index); +} + +test_ucp_tag::request * +test_ucp_tag::send_sync_nb(const void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, int buf_index) +{ + return send(sender(), SEND_SYNC_NB, buffer, count, datatype, tag, buf_index); +} + +test_ucp_tag::request* +test_ucp_tag::recv(entity &receiver, recv_type_t type, void *buffer, + size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, ucp_tag_t tag_mask, + ucp_tag_recv_info_t *info, int buf_index) +{ + int worker_index = get_worker_index(buf_index); + request *req; + ucs_status_t status; + + switch (type) { + case RECV_B: + case RECV_NB: + req = (request*)ucp_tag_recv_nb(receiver.worker(worker_index), buffer, count, + datatype, tag, tag_mask, recv_callback); + if (type == RECV_NB) { + if (UCS_PTR_IS_ERR(req)) { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } else if (req == NULL) { + UCS_TEST_ABORT("ucp_tag_recv_nb returned NULL"); + } + } else { + if (UCS_PTR_IS_ERR(req)) { + return req; + } else if (req == NULL) { + UCS_TEST_ABORT("ucp_tag_recv_nb returned NULL"); + } else { + wait(req, worker_index); + status = req->status; + *info = req->info; + request_release(req); + return (request*)UCS_STATUS_PTR(status); + } + } + break; + case RECV_BR: + case RECV_NBR: + req = request_alloc(); + status = ucp_tag_recv_nbr(receiver.worker(worker_index), buffer, + count, datatype, tag, tag_mask, req); + if (type == RECV_NBR) { + if (UCS_STATUS_IS_ERR(status)) { + UCS_TEST_ABORT("ucp_tag_recv_nb returned status " << + ucs_status_string(status)); + } + } else { + if (!UCS_STATUS_IS_ERR(status)) { + wait(req, worker_index); + status = req->status; + *info = req->info; + request_release(req); + return (request*)UCS_STATUS_PTR(status); + } + } + break; + default: + return NULL; + } + + return req; +} + +test_ucp_tag::request* +test_ucp_tag::recv_nb(void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, ucp_tag_t tag_mask, int buf_index) +{ + recv_type_t type = is_external_request() ? RECV_NBR : RECV_NB; + return recv(receiver(), type, buffer, count, datatype, + tag, tag_mask, NULL, buf_index); +} + +ucs_status_t +test_ucp_tag::recv_b(void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, ucp_tag_t tag_mask, + ucp_tag_recv_info_t *info, int buf_index) +{ + recv_type_t type = is_external_request() ? RECV_BR : RECV_B; + request* req = recv(receiver(), type, buffer, count, datatype, + tag, tag_mask, info, buf_index); + return UCS_PTR_STATUS(req); +} + +bool test_ucp_tag::is_external_request() +{ + return false; +} + +ucp_context_attr_t test_ucp_tag::ctx_attr; + + +class test_ucp_tag_limits : public test_ucp_tag { +public: + test_ucp_tag_limits() { + m_test_offload = GetParam().variant; + m_env.push_back(new ucs::scoped_setenv("UCX_RC_TM_ENABLE", + ucs::to_string(m_test_offload).c_str())); + } + + void init() { + test_ucp_tag::init(); + check_offload_support(m_test_offload); + } + + std::vector + static enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) + { + std::vector result; + generate_test_params_variant(ctx_params, name, test_case_name, + tls, false, result); + generate_test_params_variant(ctx_params, name, test_case_name + "/offload", + tls, true, result); + return result; + } + +protected: + bool m_test_offload; +}; + +UCS_TEST_P(test_ucp_tag_limits, check_max_short_rndv_thresh_zero, "RNDV_THRESH=0") { + size_t max_short = + static_cast(ucp_ep_config(sender().ep())->tag.eager.max_short + 1); + + // (maximal short + 1) <= RNDV thresh + EXPECT_LE(max_short, + ucp_ep_config(sender().ep())->tag.rndv.am_thresh); + EXPECT_LE(max_short, + ucp_ep_config(sender().ep())->tag.rndv.rma_thresh); + + // (maximal short + 1) <= RNDV send_nbr thresh + EXPECT_LE(max_short, + ucp_ep_config(sender().ep())->tag.rndv_send_nbr.am_thresh); + EXPECT_LE(max_short, + ucp_ep_config(sender().ep())->tag.rndv_send_nbr.rma_thresh); + + if (m_test_offload) { + // There is a lower bound for rndv threshold with tag offload. We should + // not send messages smaller than SW RNDV request size, because receiver + // may temporarily store this request in the user buffer (which will + // result in crash if the request does not fit user buffer). + size_t min_rndv = ucp_ep_tag_offload_min_rndv_thresh(ucp_ep_config(sender().ep())); + + EXPECT_GT(min_rndv, 0ul); // min_rndv should be RTS size at least + EXPECT_GE(min_rndv, + ucp_ep_config(sender().ep())->tag.rndv_send_nbr.am_thresh); + EXPECT_GE(min_rndv, + ucp_ep_config(sender().ep())->tag.rndv_send_nbr.rma_thresh); + } +} + +UCS_TEST_P(test_ucp_tag_limits, check_max_short_zcopy_thresh_zero, "ZCOPY_THRESH=0") { + size_t max_short = + static_cast(ucp_ep_config(sender().ep())->tag.eager.max_short + 1); + + // (maximal short + 1) <= ZCOPY thresh + EXPECT_LE(max_short, + ucp_ep_config(sender().ep())->tag.eager.zcopy_thresh[0]); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_tag_limits) diff --git a/test/gtest/ucp/test_ucp_tag.h b/test/gtest/ucp/test_ucp_tag.h new file mode 100644 index 0000000..c8149db --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag.h @@ -0,0 +1,126 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef TEST_UCP_TAG_H_ +#define TEST_UCP_TAG_H_ + +#include "ucp_test.h" + + +class test_ucp_tag : public ucp_test { +public: + static ucp_params_t get_ctx_params(); + + enum send_type_t { + SEND_NB, + SEND_NBR, + SEND_B, + SEND_SYNC_NB + }; + + enum recv_type_t { + RECV_NB, + RECV_NBR, + RECV_B, + RECV_BR + }; + +protected: + enum { + RECV_REQ_INTERNAL = DEFAULT_PARAM_VARIANT, + RECV_REQ_EXTERNAL /* for a receive request that was allocated by + the upper layer and not by ucx */ + }; + + struct request { + bool completed; + bool external; + void *req_mem; + ucs_status_t status; + ucp_tag_recv_info_t info; + }; + + virtual void init(); + + void enable_tag_mp_offload(); + + static void request_init(void *request); + + static request* request_alloc(); + + static void request_release(struct request *req); + + static void request_free(struct request *req); + + static void send_callback(void *request, ucs_status_t status); + + static void recv_callback(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info); + + request* send(entity &sender, send_type_t type, const void *buffer, + size_t count, ucp_datatype_t datatype, ucp_tag_t tag, + int ep_index = 0); + + request* send_nb(const void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, int ep_index = 0); + + request* send_nbr(const void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, int ep_index = 0); + + void send_b(const void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, int buf_index = 0); + + request* send_sync_nb(const void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, int buf_index = 0); + + request* recv(entity &receiver, recv_type_t type, void *buffer, + size_t count, ucp_datatype_t dt, ucp_tag_t tag, + ucp_tag_t tag_mask, ucp_tag_recv_info_t *info, + int buf_index = 0); + + request* recv_nb(void *buffer, size_t count, ucp_datatype_t dt, + ucp_tag_t tag, ucp_tag_t tag_mask, int buf_index = 0); + + request* recv_req_nb(void *buffer, size_t count, ucp_datatype_t dt, + ucp_tag_t tag, ucp_tag_t tag_mask, int buf_index = 0); + + request* recv_cb_nb(void *buffer, size_t count, ucp_datatype_t dt, + ucp_tag_t tag, ucp_tag_t tag_mask, int buf_index = 0); + + ucs_status_t recv_b(void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, ucp_tag_t tag_mask, + ucp_tag_recv_info_t *info, int buf_index = 0); + + ucs_status_t recv_req_b(void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, ucp_tag_t tag_mask, + ucp_tag_recv_info_t *info, int buf_index = 0); + + ucs_status_t recv_cb_b(void *buffer, size_t count, ucp_datatype_t datatype, + ucp_tag_t tag, ucp_tag_t tag_mask, + ucp_tag_recv_info_t *info, int buf_index = 0); + + void wait(request *req, int buf_index = 0); + + void wait_and_validate(request *req); + + void wait_for_unexpected_msg(ucp_worker_h worker, double sec); + + void check_offload_support(bool offload_required); + + virtual bool is_external_request(); + + static ucp_context_attr_t ctx_attr; + ucs::ptr_vector m_env; + +private: + int get_worker_index(int buf_index); + +public: + int count; +}; + +#endif diff --git a/test/gtest/ucp/test_ucp_tag_cancel.cc b/test/gtest/ucp/test_ucp_tag_cancel.cc new file mode 100644 index 0000000..81e3aac --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag_cancel.cc @@ -0,0 +1,70 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" + +#include + +extern "C" { +#include +} + +class test_ucp_tag_cancel : public test_ucp_tag { +}; + +UCS_TEST_P(test_ucp_tag_cancel, cancel_exp) { + uint64_t recv_data = 0; + request *req; + + req = recv_nb(&recv_data, sizeof(recv_data), DATATYPE, 1, 1); + if (UCS_PTR_IS_ERR(req)) { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } else if (req == NULL) { + UCS_TEST_ABORT("ucp_tag_recv_nb returned NULL"); + } + + ucp_request_cancel(receiver().worker(), req); + wait(req); + + EXPECT_EQ(UCS_ERR_CANCELED, req->status); + EXPECT_EQ(0ul, recv_data); + request_release(req); +} + +// Test that cancelling already matched (but not yet completed) request does +// not produce any error. GH bug #4490. +UCS_TEST_P(test_ucp_tag_cancel, cancel_matched, "RNDV_THRESH=32K") { + uint64_t small_data = 0; + ucp_tag_t tag = 0xfafa; + size_t size = 50000; + + std::vector sbuf(size, 0); + std::vector rbuf(size, 0); + + request *rreq1 = recv_nb(&rbuf[0], rbuf.size(), DATATYPE, tag, + UCP_TAG_MASK_FULL); + request *rreq2 = recv_nb(&small_data, sizeof(small_data), DATATYPE, tag, + UCP_TAG_MASK_FULL); + + request *sreq1 = send_nb(&sbuf[0], sbuf.size(), DATATYPE, tag); + request *sreq2 = send_nb(&small_data, sizeof(small_data), DATATYPE, tag); + + wait_and_validate(rreq2); + + if (!rreq1->completed) { + ucp_request_cancel(receiver().worker(), rreq1); + } else { + UCS_TEST_MESSAGE << "nothing to cancel"; + } + + wait_and_validate(rreq1); + wait_and_validate(sreq1); + wait_and_validate(sreq2); +} + + +UCP_INSTANTIATE_TEST_CASE(test_ucp_tag_cancel) diff --git a/test/gtest/ucp/test_ucp_tag_match.cc b/test/gtest/ucp/test_ucp_tag_match.cc new file mode 100644 index 0000000..e1e9e2d --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag_match.cc @@ -0,0 +1,800 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" + +#include +extern "C" { +#include +#include +} + +using namespace ucs; /* For vector serialization */ + + +class test_ucp_tag_match : public test_ucp_tag { +public: + test_ucp_tag_match() { + // TODO: test offload and offload MP as different variants + enable_tag_mp_offload(); + if (RUNNING_ON_VALGRIND) { + m_env.push_back(new ucs::scoped_setenv("UCX_RC_TM_SEG_SIZE", "8k")); + m_env.push_back(new ucs::scoped_setenv("UCX_TCP_RX_SEG_SIZE", "8k")); + } + } + + virtual void init() + { + modify_config("TM_THRESH", "1"); + + test_ucp_tag::init(); + ucp_test_param param = GetParam(); + } + + static std::vector enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) + { + std::vector result; + generate_test_params_variant(ctx_params, name, test_case_name, tls, + RECV_REQ_INTERNAL, result); + generate_test_params_variant(ctx_params, name, test_case_name, tls, + RECV_REQ_EXTERNAL, result); + return result; + } + + virtual bool is_external_request() + { + return GetParam().variant == RECV_REQ_EXTERNAL; + } + +protected: + static void recv_callback_release_req(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) + { + ucp_request_free(request); + m_req_status = status; + } + + static ucs_status_t m_req_status; +}; + +ucs_status_t test_ucp_tag_match::m_req_status = UCS_OK; + + +UCS_TEST_P(test_ucp_tag_match, send_recv_unexp) { + ucp_tag_recv_info_t info; + ucs_status_t status; + + uint64_t send_data = 0xdeadbeefdeadbeef; + uint64_t recv_data = 0; + + send_b(&send_data, sizeof(send_data), DATATYPE, 0x111337); + + short_progress_loop(); /* Receive messages as unexpected */ + + status = recv_b(&recv_data, sizeof(recv_data), DATATYPE, 0x1337, 0xffff, &info); + ASSERT_UCS_OK(status); + + EXPECT_EQ(sizeof(send_data), info.length); + EXPECT_EQ((ucp_tag_t)0x111337, info.sender_tag); + EXPECT_EQ(send_data, recv_data); +} + +UCS_TEST_SKIP_COND_P(test_ucp_tag_match, send_recv_unexp_rqfree, + /* request free cannot be used for external requests */ + (GetParam().variant == RECV_REQ_EXTERNAL)) { + request *my_recv_req; + uint64_t send_data = 0xdeadbeefdeadbeef; + uint64_t recv_data = 0; + + my_recv_req = recv_nb(&recv_data, sizeof(recv_data), DATATYPE, 0x1337, 0xffff); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + + request_free(my_recv_req); + + send_b(&send_data, sizeof(send_data), DATATYPE, 0x1337); + + wait_for_flag(&recv_data); + EXPECT_EQ(send_data, recv_data); +} + +UCS_TEST_P(test_ucp_tag_match, send_recv_exp_medium) { + static const size_t size = 50000; + request *my_recv_req; + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + ucs::fill_random(sendbuf); + + my_recv_req = recv_nb(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + ASSERT_TRUE(my_recv_req != NULL); /* Couldn't be completed because didn't send yet */ + + send_b(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + + wait(my_recv_req); + + EXPECT_EQ(sendbuf.size(), my_recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_EQ(sendbuf, recvbuf); + request_release(my_recv_req); +} + +UCS_TEST_P(test_ucp_tag_match, send2_nb_recv_exp_medium) { + static const size_t size = 50000; + request *my_recv_req; + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + /* 1st send */ + + my_recv_req = recv_nb(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + ASSERT_TRUE(my_recv_req != NULL); /* Couldn't be completed because didn't send yet */ + + send_b(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + + wait(my_recv_req); + request_release(my_recv_req); + + /* 2nd send */ + + ucs::fill_random(sendbuf); + + my_recv_req = recv_nb(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + ASSERT_TRUE(my_recv_req != NULL); /* Couldn't be completed because didn't send yet */ + + request *my_send_req; + my_send_req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + wait(my_recv_req); + + EXPECT_EQ(sendbuf.size(), my_recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_EQ(sendbuf, recvbuf); + + short_progress_loop(); + + if (my_send_req != NULL) { + EXPECT_TRUE(my_send_req->completed); + EXPECT_EQ(UCS_OK, my_send_req->status); + request_release(my_send_req); + } + request_release(my_recv_req); +} + +UCS_TEST_P(test_ucp_tag_match, send2_nb_recv_medium_wildcard, "RNDV_THRESH=inf") { + static const size_t size = 3000000; + + entity &sender2 = sender(); + create_entity(true); + sender().connect(&receiver(), get_ep_params()); + + for (int is_exp = 0; is_exp <= 1; ++is_exp) { + + UCS_TEST_MESSAGE << "Testing " << (is_exp ? "" : "un") << "expected mode, size " << size; + + std::vector sendbuf1(size, 0); + std::vector sendbuf2(size, 0); + std::vector recvbuf1(size, 0); + std::vector recvbuf2(size, 0); + + ucs::fill_random(sendbuf1); + ucs::fill_random(sendbuf2); + + /* Two sends with different tags */ + + request *sreq1, *sreq2; + sreq1 = (request*)ucp_tag_send_nb(sender().ep(), &sendbuf1[0], sendbuf1.size(), + DATATYPE, 1, send_callback); + ASSERT_TRUE(!UCS_PTR_IS_ERR(sreq1)); + + sreq2 = (request*)ucp_tag_send_nb(sender2.ep(), &sendbuf2[0], sendbuf2.size(), + DATATYPE, 2, send_callback); + ASSERT_TRUE(!UCS_PTR_IS_ERR(sreq2)); + + + /* In unexpected mode, we progress all to put the messages on the + * unexpected queue + */ + if (!is_exp) { + short_progress_loop(); + } + + /* Two receives with any tag */ + + request *rreq1, *rreq2; + + rreq1 = recv_nb(&recvbuf1[0], recvbuf1.size(), DATATYPE, 0, 0); + ASSERT_TRUE(!UCS_PTR_IS_ERR(rreq1)); + + rreq2 = recv_nb(&recvbuf2[0], recvbuf2.size(), DATATYPE, 0, 0); + ASSERT_TRUE(!UCS_PTR_IS_ERR(rreq2)); + + + /* Wait for receives */ + wait(rreq1); + wait(rreq2); + + short_progress_loop(); + + /* Release sends */ + if (sreq1 != NULL) { + wait(sreq1); + EXPECT_TRUE(sreq1->completed); + request_release(sreq1); + } + if (sreq2 != NULL) { + wait(sreq2); + EXPECT_TRUE(sreq2->completed); + request_release(sreq2); + } + + /* Receives should be completed with correct length */ + ASSERT_TRUE(rreq1->completed); + ASSERT_TRUE(rreq2->completed); + + EXPECT_EQ(size, rreq1->info.length); + EXPECT_EQ(size, rreq2->info.length); + + /* The order may be any, but the messages have to be received correctly */ + if (rreq1->info.sender_tag == 1u) { + ASSERT_EQ(2u, rreq2->info.sender_tag); + EXPECT_EQ(sendbuf1, recvbuf1); + EXPECT_EQ(sendbuf2, recvbuf2); + } else { + ASSERT_EQ(2u, rreq1->info.sender_tag); + ASSERT_EQ(1u, rreq2->info.sender_tag); + EXPECT_EQ(sendbuf2, recvbuf1); + EXPECT_EQ(sendbuf1, recvbuf2); + } + + request_release(rreq1); + request_release(rreq2); + } +} + +UCS_TEST_P(test_ucp_tag_match, send_recv_nb_partial_exp_medium) { + static const size_t size = 50000; + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + ucs::fill_random(sendbuf); + + request *my_recv_req; + my_recv_req = recv_nb(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + + send_b(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + + usleep(1000); + progress(); + + wait(my_recv_req); + + EXPECT_EQ(sendbuf.size(), my_recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_EQ(sendbuf, recvbuf); + + request_release(my_recv_req); +} + +UCS_TEST_P(test_ucp_tag_match, send_nb_recv_unexp) { + ucp_tag_recv_info_t info; + ucs_status_t status; + + uint64_t send_data = 0xdeadbeefdeadbeef; + uint64_t recv_data = 0; + + request *my_send_req; + my_send_req = send_nb(&send_data, sizeof(send_data), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + ucp_worker_progress(receiver().worker()); + + status = recv_b(&recv_data, sizeof(recv_data), DATATYPE, 0x1337, 0xffff, &info); + ASSERT_UCS_OK(status); + + EXPECT_EQ(sizeof(send_data), info.length); + EXPECT_EQ((ucp_tag_t)0x111337, info.sender_tag); + EXPECT_EQ(send_data, recv_data); + + if (my_send_req != NULL) { + EXPECT_TRUE(my_send_req->completed); + EXPECT_EQ(UCS_OK, my_send_req->status); + request_release(my_send_req); + } +} + +UCS_TEST_P(test_ucp_tag_match, send_recv_cb_release) { + + uint64_t send_data = 0xdeadbeefdeadbeef; + + send_b(&send_data, sizeof(send_data), DATATYPE, 0x111337); + + short_progress_loop(); /* Receive messages as unexpected */ + + m_req_status = UCS_INPROGRESS; + + uint64_t recv_data; + request *recv_req = (request*)ucp_tag_recv_nb(receiver().worker(), &recv_data, + sizeof(recv_data), DATATYPE, 0, 0, + recv_callback_release_req); + if (UCS_PTR_IS_ERR(recv_req)) { + ASSERT_UCS_OK(UCS_PTR_STATUS(recv_req)); + } else if (recv_req == NULL) { + UCS_TEST_ABORT("ucp_tag_recv_nb returned NULL"); + } else { + /* request would be completed and released by the callback */ + while (m_req_status == UCS_INPROGRESS) { + progress(); + } + ASSERT_UCS_OK(m_req_status); + } +} + +UCS_TEST_P(test_ucp_tag_match, send_recv_truncated) { + ucp_tag_recv_info_t info; + ucs_status_t status; + + uint64_t send_data = 0xdeadbeefdeadbeef; + + send_b(&send_data, sizeof(send_data), DATATYPE, 0x111337); + + short_progress_loop(); /* Receive messages as unexpected */ + + status = recv_b(NULL, 0, DATATYPE, 0x1337, 0xffff, &info); + EXPECT_EQ(UCS_ERR_MESSAGE_TRUNCATED, status); +} + +UCS_TEST_P(test_ucp_tag_match, send_recv_nb_exp) { + + uint64_t send_data = 0xdeadbeefdeadbeef; + uint64_t recv_data = 0; + + request *my_recv_req; + my_recv_req = recv_nb(&recv_data, sizeof(recv_data), DATATYPE, 0x1337, 0xffff); + + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + ASSERT_TRUE(my_recv_req != NULL); /* Couldn't be completed because didn't send yet */ + + send_b(&send_data, sizeof(send_data), DATATYPE, 0x111337); + + wait(my_recv_req); + + EXPECT_TRUE(my_recv_req->completed); + EXPECT_EQ(UCS_OK, my_recv_req->status); + EXPECT_EQ(sizeof(send_data), my_recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_EQ(send_data, recv_data); + request_release(my_recv_req); +} + +UCS_TEST_P(test_ucp_tag_match, send_nb_multiple_recv_unexp) { + const unsigned num_requests = 1000; + ucp_tag_recv_info_t info; + ucs_status_t status; + + uint64_t send_data = 0xdeadbeefdeadbeef; + uint64_t recv_data = 0; + + std::vector send_reqs(num_requests); + + skip_loopback(); + + for (unsigned i = 0; i < num_requests; ++i) { + send_reqs[i] = send_nb(&send_data, sizeof(send_data), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(send_reqs[i])); + } + + ucp_worker_progress(receiver().worker()); + + for (unsigned i = 0; i < num_requests; ++i) { + status = recv_b(&recv_data, sizeof(recv_data), DATATYPE, 0x1337, 0xffff, + &info); + ASSERT_UCS_OK(status); + ASSERT_EQ(num_requests, send_reqs.size()); + + EXPECT_EQ(sizeof(send_data), info.length); + EXPECT_EQ((ucp_tag_t)0x111337, info.sender_tag); + EXPECT_EQ(send_data, recv_data); + } + + for (unsigned i = 0; i < num_requests; ++i) { + if (send_reqs[i] != NULL) { + EXPECT_TRUE(send_reqs[i]->completed); + EXPECT_EQ(UCS_OK, send_reqs[i]->status); + request_release(send_reqs[i]); + } + } +} + +UCS_TEST_P(test_ucp_tag_match, sync_send_unexp) { + ucp_tag_recv_info_t info; + ucs_status_t status; + + uint64_t send_data = 0x0102030405060708; + uint64_t recv_data = 0; + + request *my_send_req = send_sync_nb(&send_data, sizeof(send_data), DATATYPE, + 0x111337); + short_progress_loop(); + + ASSERT_TRUE(my_send_req != NULL); + EXPECT_FALSE(my_send_req->completed); + + ucp_worker_progress(receiver().worker()); + + status = recv_b(&recv_data, sizeof(recv_data), DATATYPE, 0x1337, 0xffff, &info); + ASSERT_UCS_OK(status); + + EXPECT_EQ(sizeof(send_data), info.length); + EXPECT_EQ((ucp_tag_t)0x111337, info.sender_tag); + EXPECT_EQ(send_data, recv_data); + + short_progress_loop(); + + EXPECT_TRUE(my_send_req->completed); + EXPECT_EQ(UCS_OK, my_send_req->status); + request_release(my_send_req); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_tag_match) + +class test_ucp_tag_match_rndv : public test_ucp_tag_match { +public: + enum { + RNDV_SCHEME_AUTO = 0, + RNDV_SCHEME_PUT_ZCOPY, + RNDV_SCHEME_GET_ZCOPY + }; + + static const std::string rndv_schemes[]; + + void init() { + ASSERT_LE(GetParam().variant, (int)RNDV_SCHEME_GET_ZCOPY); + modify_config("RNDV_SCHEME", rndv_schemes[GetParam().variant]); + + test_ucp_tag_match::init(); + } + + std::vector + static enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) + { + std::vector result; + generate_test_params_variant(ctx_params, name, + test_case_name + "/rndv_" + + rndv_schemes[RNDV_SCHEME_AUTO], + tls, RNDV_SCHEME_AUTO, result); + generate_test_params_variant(ctx_params, name, + test_case_name + "/rndv_" + + rndv_schemes[RNDV_SCHEME_PUT_ZCOPY], + tls, RNDV_SCHEME_PUT_ZCOPY, result); + generate_test_params_variant(ctx_params, name, + test_case_name + "/rndv_" + + rndv_schemes[RNDV_SCHEME_GET_ZCOPY], + tls, RNDV_SCHEME_GET_ZCOPY, result); + return result; + } +}; + +const std::string test_ucp_tag_match_rndv::rndv_schemes[] = { "auto", + "put_zcopy", + "get_zcopy" }; + +UCS_TEST_P(test_ucp_tag_match_rndv, sync_send_unexp, "RNDV_THRESH=1048576") { + static const size_t size = 1148576; + request *my_send_req; + ucp_tag_recv_info_t info; + ucs_status_t status; + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + ucs::fill_random(sendbuf); + + /* sender - send the rts*/ + my_send_req = send_sync_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + /* receiver - get the rts and put in unexpected */ + short_progress_loop(); + + ASSERT_TRUE(my_send_req != NULL); + EXPECT_FALSE(my_send_req->completed); + + /* receiver - issue a recv req, match the rts, perform rndv-get and send ats to sender */ + status = recv_b(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff, &info); + ASSERT_UCS_OK(status); + + EXPECT_EQ(sendbuf.size(), info.length); + EXPECT_EQ((ucp_tag_t)0x111337, info.sender_tag); + EXPECT_EQ(sendbuf, recvbuf); + + /* sender - get the ATS and set send request to completed */ + wait_for_flag(&my_send_req->completed); + + EXPECT_TRUE(my_send_req->completed); + EXPECT_EQ(UCS_OK, my_send_req->status); + request_release(my_send_req); +} + +UCS_TEST_P(test_ucp_tag_match_rndv, req_exp, "RNDV_THRESH=1048576") { + static const size_t size = 1148576; + request *my_send_req, *my_recv_req; + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + skip_loopback(); + + ucs::fill_random(sendbuf); + + /* receiver - put the receive request into expected */ + my_recv_req = recv_nb(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + EXPECT_FALSE(my_recv_req->completed); + + /* sender - send the RTS */ + my_send_req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + /* receiver - match the rts, perform rndv get and send an ack upon finishing */ + short_progress_loop(); + /* for UCTs that cannot perform real rndv and may do eager send-recv bcopy instead */ + wait(my_recv_req); + + EXPECT_EQ(sendbuf.size(), my_recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_TRUE(my_recv_req->completed); + EXPECT_EQ(sendbuf, recvbuf); + + wait_and_validate(my_send_req); + request_release(my_recv_req); +} + +UCS_TEST_P(test_ucp_tag_match_rndv, rts_unexp, "RNDV_THRESH=1048576") { + static const size_t size = 1148576; + request *my_send_req; + ucp_tag_recv_info_t info; + ucs_status_t status; + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + skip_loopback(); + + ucs::fill_random(sendbuf); + + /* sender - send the RTS */ + my_send_req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + /* receiver - get the RTS and put it into unexpected */ + short_progress_loop(); + + /* receiver - issue a receive request, match it with the RTS and perform rndv get */ + status = recv_b(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff, &info); + ASSERT_UCS_OK(status); + + /* sender - get the ATS and set send request to completed */ + wait_and_validate(my_send_req); + + EXPECT_EQ(sendbuf.size() , info.length); + EXPECT_EQ((ucp_tag_t)0x111337, info.sender_tag); + EXPECT_EQ(sendbuf, recvbuf); +} + +UCS_TEST_P(test_ucp_tag_match_rndv, truncated, "RNDV_THRESH=1048576") { + static const size_t size = 1148576; + request *my_send_req; + ucp_tag_recv_info_t info; + ucs_status_t status; + + std::vector sendbuf(size, 0); + + skip_loopback(); + + ucs::fill_random(sendbuf); + + /* sender - send the RTS */ + my_send_req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + /* receiver - get the RTS and put it into unexpected */ + short_progress_loop(); + + /* receiver - issue a receive request with zero length, + * no assertions should occur */ + status = recv_b(NULL, 0, DATATYPE, 0x1337, 0xffff, &info); + EXPECT_EQ(UCS_ERR_MESSAGE_TRUNCATED, status); + + /* sender - get the ATS and set send request to completed */ + wait_and_validate(my_send_req); +} + +UCS_TEST_P(test_ucp_tag_match_rndv, post_larger_recv, "RNDV_THRESH=0") { + /* small send size should probably be lower than minimum GET Zcopy + * size supported by IB TLs */ + static const size_t small_send_size = 16; + static const size_t small_recv_size = small_send_size * 2; + static const size_t large_send_size = 1148576; + static const size_t large_recv_size = large_send_size + 1 * UCS_KBYTE; + /* array of [send][recv] sizes */ + static const size_t sizes[][2] = { { small_send_size, small_recv_size }, + { large_send_size, large_recv_size } }; + request *my_send_req, *my_recv_req; + + for (unsigned i = 0; i < ucs_array_size(sizes); i++) { + size_t send_size = sizes[i][0]; + size_t recv_size = sizes[i][1]; + std::vector sendbuf(send_size, 0); + std::vector recvbuf(recv_size, 0); + + ucs::fill_random(sendbuf); + ucs::fill_random(recvbuf); + + my_recv_req = recv_nb(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + EXPECT_FALSE(my_recv_req->completed); + + my_send_req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + wait(my_recv_req); + + EXPECT_EQ(sendbuf.size(), my_recv_req->info.length); + EXPECT_EQ(recvbuf.size(), ((ucp_request_t*)my_recv_req - 1)->recv.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_TRUE(my_recv_req->completed); + EXPECT_NE(sendbuf, recvbuf); + EXPECT_TRUE(std::equal(sendbuf.begin(), sendbuf.end(), recvbuf.begin())); + + wait_and_validate(my_send_req); + request_release(my_recv_req); + } +} + +UCS_TEST_P(test_ucp_tag_match_rndv, req_exp_auto_thresh, "RNDV_THRESH=auto") { + static const size_t size = 1148576; + request *my_send_req, *my_recv_req; + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + skip_loopback(); + + ucs::fill_random(sendbuf); + + /* receiver - put the receive request into expected */ + my_recv_req = recv_nb(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + EXPECT_FALSE(my_recv_req->completed); + + /* sender - send the RTS */ + my_send_req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + /* receiver - match the rts, perform rndv get and send an ack upon finishing */ + short_progress_loop(); + /* for UCTs that cannot perform real rndv and may do eager send-recv bcopy instead */ + wait(my_recv_req); + + EXPECT_EQ(sendbuf.size(), my_recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_TRUE(my_recv_req->completed); + EXPECT_EQ(sendbuf, recvbuf); + + /* sender - get the ATS and set send request to completed */ + wait_and_validate(my_send_req); + request_release(my_recv_req); +} + +UCS_TEST_P(test_ucp_tag_match_rndv, exp_huge_mix) { + const size_t sizes[] = { 1000, 2000, 8000, 2500ul * UCS_MBYTE }; + + /* small sizes should warm-up tag cache */ + for (unsigned i = 0; i < ucs_array_size(sizes); ++i) { + const size_t size = sizes[i] / ucs::test_time_multiplier(); + request *my_send_req, *my_recv_req; + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + ucs::fill_random(sendbuf); + + my_recv_req = recv_nb(&recvbuf[0], recvbuf.size(), DATATYPE, 0x1337, 0xffff); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + EXPECT_FALSE(my_recv_req->completed); + + my_send_req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + wait(my_recv_req); + + EXPECT_EQ(sendbuf.size(), my_recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_TRUE(my_recv_req->completed); + EXPECT_EQ(sendbuf, recvbuf); + + wait_and_validate(my_send_req); + request_free(my_recv_req); + } +} + +UCS_TEST_P(test_ucp_tag_match_rndv, bidir_multi_exp_post, "RNDV_THRESH=0") { + const size_t sizes[] = { 8 * UCS_KBYTE, 128 * UCS_KBYTE, 512 * UCS_KBYTE, + 8 * UCS_MBYTE, 128 * UCS_MBYTE, 512 * UCS_MBYTE }; + + receiver().connect(&sender(), get_ep_params()); + + for (unsigned i = 0; i < ucs_array_size(sizes); ++i) { + const size_t size = sizes[i] / + ucs::test_time_multiplier() / + ucs::test_time_multiplier(); + const size_t count = ucs_max((size_t)(5000.0 / sqrt(sizes[i]) / + ucs::test_time_multiplier()), 3lu); + std::vector sreqs; + std::vector rreqs; + std::vector > sbufs; + std::vector > rbufs; + + sbufs.resize(count * 2); + rbufs.resize(count * 2); + + for (size_t repeat = 0; repeat < count * 2; ++repeat) { + entity &send_e = repeat < count ? sender() : receiver(); + entity &recv_e = repeat < count ? receiver() : sender(); + request *my_send_req, *my_recv_req; + + sbufs[repeat].resize(size, 0); + rbufs[repeat].resize(size, 0); + ucs::fill_random(sbufs[repeat]); + + my_recv_req = recv(recv_e, RECV_NB, + &rbufs[repeat][0], rbufs[repeat].size(), + DATATYPE, 0x1337, 0xffff, NULL); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + EXPECT_FALSE(my_recv_req->completed); + + my_send_req = send(send_e, SEND_NB, + &sbufs[repeat][0], sbufs[repeat].size(), + DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + sreqs.push_back(my_send_req); + rreqs.push_back(my_recv_req); + } + + for (size_t repeat = 0; repeat < count * 2; ++repeat) { + request *my_send_req, *my_recv_req; + + my_recv_req = rreqs[repeat]; + my_send_req = sreqs[repeat]; + + wait(my_recv_req); + + EXPECT_EQ(sbufs[repeat].size(), my_recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_TRUE(my_recv_req->completed); + EXPECT_EQ(sbufs[repeat], rbufs[repeat]); + + wait_and_validate(my_send_req); + request_free(my_recv_req); + } + } +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_tag_match_rndv) diff --git a/test/gtest/ucp/test_ucp_tag_mem_type.cc b/test/gtest/ucp/test_ucp_tag_mem_type.cc new file mode 100644 index 0000000..bbac2a1 --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag_mem_type.cc @@ -0,0 +1,210 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" +#include + +#include "ucp_datatype.h" + +extern "C" { +#include +#include +} + +#include + + +class test_ucp_tag_mem_type: public test_ucp_tag { +public: + enum { + VARIANT_DEFAULT = UCS_BIT(0), + VARIANT_GDR_OFF = UCS_BIT(1), + VARIANT_TAG_OFFLOAD = UCS_BIT(2), + VARIANT_MAX = UCS_BIT(3) + }; + + void init() { + int mem_type_pair_index = GetParam().variant % mem_type_pairs.size(); + int varient_index = GetParam().variant / mem_type_pairs.size(); + + if (varient_index & VARIANT_GDR_OFF) { + m_env.push_back(new ucs::scoped_setenv("UCX_IB_GPU_DIRECT_RDMA", "n")); + } + + if (varient_index & VARIANT_TAG_OFFLOAD) { + enable_tag_mp_offload(); + + if (RUNNING_ON_VALGRIND) { + m_env.push_back(new ucs::scoped_setenv("UCX_RC_TM_SEG_SIZE", "8k")); + m_env.push_back(new ucs::scoped_setenv("UCX_TCP_RX_SEG_SIZE", "8k")); + } + } + + m_send_mem_type = mem_type_pairs[mem_type_pair_index][0]; + m_recv_mem_type = mem_type_pairs[mem_type_pair_index][1]; + + modify_config("MAX_EAGER_LANES", "2"); + modify_config("MAX_RNDV_LANES", "2"); + + test_ucp_tag::init(); + } + + void cleanup() { + test_ucp_tag::cleanup(); + } + + std::vector + static enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) { + + std::vector result; + int count = 0; + + for (int i = 0; i < VARIANT_MAX; i++) { + for (std::vector >::const_iterator iter = + mem_type_pairs.begin(); iter != mem_type_pairs.end(); ++iter) { + generate_test_params_variant(ctx_params, name, test_case_name + "/" + + std::string(ucs_memory_type_names[(*iter)[0]]) + + "<->" + std::string(ucs_memory_type_names[(*iter)[1]]), + tls, count++, result); + } + } + + return result; + } + + static std::vector > mem_type_pairs; + +protected: + + size_t do_xfer(const void *sendbuf, void *recvbuf, size_t count, + ucp_datatype_t send_dt, ucp_datatype_t recv_dt, + bool expected, bool truncated, bool extended); + + ucs_memory_type_t m_send_mem_type; + ucs_memory_type_t m_recv_mem_type; + +private: + + static const uint64_t SENDER_TAG = 0x111337; + static const uint64_t RECV_MASK = 0xffff; + static const uint64_t RECV_TAG = 0x1337; +}; + +std::vector > +test_ucp_tag_mem_type::mem_type_pairs = ucs::supported_mem_type_pairs(); + +size_t test_ucp_tag_mem_type::do_xfer(const void *sendbuf, void *recvbuf, + size_t count, ucp_datatype_t send_dt, + ucp_datatype_t recv_dt, bool expected, + bool truncated, bool extended) +{ + size_t recv_count = count; + size_t send_count = count; + size_t recvd = 0; + request *rreq, *sreq; + + if (truncated) { + recv_count /= 2; + } + + if (extended) { + send_count /= 2; + } + + if (expected) { + rreq = recv_nb(recvbuf, recv_count, recv_dt, RECV_TAG, RECV_MASK); + sreq = send_nb(sendbuf, send_count, send_dt, SENDER_TAG); + } else { + sreq = send_nb(sendbuf, send_count, send_dt, SENDER_TAG); + + wait_for_unexpected_msg(receiver().worker(), 10.0); + + rreq = recv_nb(recvbuf, recv_count, recv_dt, RECV_TAG, RECV_MASK); + } + + /* progress both sender and receiver */ + wait(rreq); + if (sreq != NULL) { + wait(sreq); + request_release(sreq); + } + + recvd = rreq->info.length; + if (!truncated) { + EXPECT_UCS_OK(rreq->status); + EXPECT_EQ((ucp_tag_t)SENDER_TAG, rreq->info.sender_tag); + } else { + EXPECT_EQ(UCS_ERR_MESSAGE_TRUNCATED, rreq->status); + } + + request_release(rreq); + return recvd; +}; + +UCS_TEST_P(test_ucp_tag_mem_type, basic) +{ + ucp_datatype_t type = ucp_dt_make_contig(1); + + UCS_TEST_MESSAGE << "TEST: " + << ucs_memory_type_names[m_send_mem_type] << " <-> " + << ucs_memory_type_names[m_recv_mem_type]; + + for (unsigned i = 1; i <= 7; ++i) { + size_t max = (long)pow(10.0, i); + size_t length = ucs::rand() % max + 1; + + mem_buffer m_recv_mem_buf(length, m_recv_mem_type); + mem_buffer m_send_mem_buf(length, m_send_mem_type); + + mem_buffer::pattern_fill(m_recv_mem_buf.ptr(), m_recv_mem_buf.size(), + 1, m_recv_mem_buf.mem_type()); + + mem_buffer::pattern_fill(m_send_mem_buf.ptr(), m_send_mem_buf.size(), + 2, m_send_mem_buf.mem_type()); + + size_t recvd = do_xfer(m_send_mem_buf.ptr(), m_recv_mem_buf.ptr(), + length, type, type, true, false, false); + ASSERT_EQ(length, recvd); + mem_buffer::pattern_check(m_recv_mem_buf.ptr(), length, + 2, m_recv_mem_buf.mem_type()); + } +} + +UCS_TEST_P(test_ucp_tag_mem_type, xfer_mismatch_length) +{ + ucp_datatype_t type = ucp_dt_make_contig(1); + size_t length = ucs::rand() % ((ssize_t)pow(10.0, 7)); + + UCS_TEST_MESSAGE << "TEST: " + << ucs_memory_type_names[m_send_mem_type] << " <-> " + << ucs_memory_type_names[m_recv_mem_type] << " length :" + << length; + + mem_buffer m_recv_mem_buf(length, m_recv_mem_type); + mem_buffer m_send_mem_buf(length, m_send_mem_type); + + mem_buffer::pattern_fill(m_recv_mem_buf.ptr(), m_recv_mem_buf.size(), + 1, m_recv_mem_buf.mem_type()); + + mem_buffer::pattern_fill(m_send_mem_buf.ptr(), m_send_mem_buf.size(), + 2, m_send_mem_buf.mem_type()); + + /* truncated */ + do_xfer(m_send_mem_buf.ptr(), m_recv_mem_buf.ptr(), + length, type, type, true, true, false); + + /* extended recv buffer */ + size_t recvd = do_xfer(m_send_mem_buf.ptr(), m_recv_mem_buf.ptr(), + length, type, type, true, false, true); + ASSERT_EQ(length / 2, recvd); + +} + + +UCP_INSTANTIATE_TEST_CASE_GPU_AWARE(test_ucp_tag_mem_type); diff --git a/test/gtest/ucp/test_ucp_tag_mt.cc b/test/gtest/ucp/test_ucp_tag_mt.cc new file mode 100644 index 0000000..90e1c12 --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag_mt.cc @@ -0,0 +1,90 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" + +#include + +#if _OPENMP +#include "omp.h" +#endif + +using namespace ucs; /* For vector serialization */ + + +class test_ucp_tag_mt : public test_ucp_tag { +public: + virtual void init() + { + test_ucp_tag::init(); + ucp_test_param param = GetParam(); + } + + static std::vector enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) + { + std::vector result; + + generate_test_params_variant(ctx_params, name, + test_case_name, tls, RECV_REQ_INTERNAL, + result, MULTI_THREAD_CONTEXT); + generate_test_params_variant(ctx_params, name, + test_case_name, tls, RECV_REQ_EXTERNAL, + result, MULTI_THREAD_CONTEXT); + generate_test_params_variant(ctx_params, name, + test_case_name, tls, RECV_REQ_INTERNAL, + result, MULTI_THREAD_WORKER); + generate_test_params_variant(ctx_params, name, + test_case_name, tls, RECV_REQ_EXTERNAL, + result, MULTI_THREAD_WORKER); + return result; + } + + virtual bool is_external_request() + { + return GetParam().variant == RECV_REQ_EXTERNAL; + } +}; + +UCS_TEST_P(test_ucp_tag_mt, send_recv) { + uint64_t send_data[MT_TEST_NUM_THREADS] GTEST_ATTRIBUTE_UNUSED_; + uint64_t recv_data[MT_TEST_NUM_THREADS] GTEST_ATTRIBUTE_UNUSED_; + ucp_tag_recv_info_t info[MT_TEST_NUM_THREADS] GTEST_ATTRIBUTE_UNUSED_; + + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + send_data[i] = 0xdeadbeefdeadbeef + 10 * i; + recv_data[i] = 0; + } + +#if _OPENMP && ENABLE_MT +#pragma omp parallel for + for (int i = 0; i < MT_TEST_NUM_THREADS; i++) { + ucs_status_t status; + int worker_index = 0; + + if (GetParam().thread_type == MULTI_THREAD_CONTEXT) { + worker_index = i; + } + + send_b(&(send_data[i]), sizeof(send_data[i]), DATATYPE, 0x111337+i, i); + + short_progress_loop(worker_index); /* Receive messages as unexpected */ + + status = recv_b(&(recv_data[i]), sizeof(recv_data[i]), DATATYPE, 0x1337+i, + 0xffff, &(info[i]), i); + ASSERT_UCS_OK(status); + + EXPECT_EQ(sizeof(send_data[i]), info[i].length); + EXPECT_EQ((ucp_tag_t)(0x111337+i), info[i].sender_tag); + EXPECT_EQ(send_data[i], recv_data[i]); + } +#endif +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_tag_mt) diff --git a/test/gtest/ucp/test_ucp_tag_offload.cc b/test/gtest/ucp/test_ucp_tag_offload.cc new file mode 100644 index 0000000..67216bd --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag_offload.cc @@ -0,0 +1,755 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2017-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" + +#include "ucp_datatype.h" + +extern "C" { +#include +#include +#include +} + +#define UCP_INSTANTIATE_TAG_OFFLOAD_TEST_CASE(_test_case) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, dcx, "dc_x") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, rcx, "rc_x") + +class test_ucp_tag_offload : public test_ucp_tag { +public: + test_ucp_tag_offload() { + // TODO: test offload and offload MP as different variants + enable_tag_mp_offload(); + } + + void init() + { + test_ucp_tag::init(); + check_offload_support(true); + } + + request* recv_nb_and_check(void *buffer, size_t count, ucp_datatype_t dt, + ucp_tag_t tag, ucp_tag_t tag_mask) + { + request *req = recv_nb(buffer, count, dt, tag, tag_mask); + EXPECT_TRUE(!UCS_PTR_IS_ERR(req)); + EXPECT_TRUE(req != NULL); + return req; + } + + request* recv_nb_exp(void *buffer, size_t count, ucp_datatype_t dt, + ucp_tag_t tag, ucp_tag_t tag_mask) + { + request *req1 = recv_nb_and_check(buffer, count, DATATYPE, tag, + UCP_TAG_MASK_FULL); + + // Post and cancel another receive to make sure the first one was offloaded + size_t size = receiver().worker()->context->config.ext.tm_thresh + 1; + std::vector tbuf(size, 0); + request *req2 = recv_nb_and_check(&tbuf[0], size, DATATYPE, tag, + UCP_TAG_MASK_FULL); + req_cancel(receiver(), req2); + + return req1; + } + + void send_recv(entity &se, ucp_tag_t tag, size_t length) + { + std::vector sendbuf(length); + std::vector recvbuf(length); + + request *rreq = recv_nb_exp(&recvbuf[0], length, DATATYPE, tag, + UCP_TAG_MASK_FULL); + + request *sreq = (request*)ucp_tag_send_nb(se.ep(), &sendbuf[0], length, + DATATYPE, tag, send_callback); + if (UCS_PTR_IS_ERR(sreq)) { + ASSERT_UCS_OK(UCS_PTR_STATUS(sreq)); + } else if (sreq != NULL) { + wait(sreq); + request_free(sreq); + } + + wait(rreq); + request_free(rreq); + } + + void activate_offload(entity &se, ucp_tag_t tag = 0x11) + { + send_recv(se, tag, receiver().worker()->context->config.ext.tm_thresh); + } + + void req_cancel(entity &e, request *req) + { + ucp_request_cancel(e.worker(), req); + wait(req); + request_free(req); + } +}; + +UCS_TEST_P(test_ucp_tag_offload, post_after_cancel) +{ + uint64_t small_val = 0xFAFA; + ucp_tag_t tag = 0x11; + std::vector recvbuf(2048, 0); + + activate_offload(sender()); + + request *req = recv_nb_and_check(&small_val, sizeof(small_val), DATATYPE, + tag, UCP_TAG_MASK_FULL); + + EXPECT_EQ(1u, receiver().worker()->tm.expected.sw_all_count); + req_cancel(receiver(), req); + EXPECT_EQ(0u, receiver().worker()->tm.expected.sw_all_count); + + req = recv_nb_and_check(&recvbuf, recvbuf.size(), DATATYPE, tag, + UCP_TAG_MASK_FULL); + + EXPECT_EQ(0u, receiver().worker()->tm.expected.sw_all_count); + req_cancel(receiver(), req); +} + +UCS_TEST_P(test_ucp_tag_offload, post_after_comp) +{ + uint64_t small_val = 0xFAFA; + ucp_tag_t tag = 0x11; + std::vector recvbuf(2048, 0); + + activate_offload(sender()); + + request *req = recv_nb_and_check(&small_val, sizeof(small_val), DATATYPE, + tag, UCP_TAG_MASK_FULL); + + EXPECT_EQ(1u, receiver().worker()->tm.expected.sw_all_count); + + send_b(&small_val, sizeof(small_val), DATATYPE, tag); + wait(req); + request_free(req); + EXPECT_EQ(0u, receiver().worker()->tm.expected.sw_all_count); + + req = recv_nb_and_check(&recvbuf, recvbuf.size(), DATATYPE, tag, + UCP_TAG_MASK_FULL); + + EXPECT_EQ(0u, receiver().worker()->tm.expected.sw_all_count); + req_cancel(receiver(), req); +} + +UCS_TEST_P(test_ucp_tag_offload, post_wild) +{ + uint64_t small_val = 0xFAFA; + ucp_tag_t tag1 = 0x11; // these two tags should go to different + ucp_tag_t tag2 = 0x13; // hash buckets in the TM expected queue + std::vector recvbuf(2048, 0); + + activate_offload(sender()); + + request *req1 = recv_nb_and_check(&small_val, sizeof(small_val), DATATYPE, + tag1, 0); + EXPECT_EQ(1u, receiver().worker()->tm.expected.sw_all_count); + + request *req2 = recv_nb_and_check(&recvbuf, recvbuf.size(), DATATYPE, tag2, + UCP_TAG_MASK_FULL); + // Second request should not be posted as well. Even though it has another + // tag, the first request is a wildcard, which needs to be handled in SW, + // so it blocks all other requests + EXPECT_EQ(2u, receiver().worker()->tm.expected.sw_all_count); + req_cancel(receiver(), req1); + req_cancel(receiver(), req2); +} + +UCS_TEST_P(test_ucp_tag_offload, post_dif_buckets) +{ + uint64_t small_val = 0xFAFA; + ucp_tag_t tag1 = 0x11; // these two tags should go to different + ucp_tag_t tag2 = 0x13; // hash buckets in the TM expected queue + std::vector reqs; + request *req; + + std::vector recvbuf(2048, 0); + + activate_offload(sender()); + + req = recv_nb_and_check(&small_val, sizeof(small_val), DATATYPE, tag1, + UCP_TAG_MASK_FULL); + reqs.push_back(req); + + req = recv_nb_and_check(&recvbuf, recvbuf.size(), DATATYPE, tag1, + UCP_TAG_MASK_FULL); + reqs.push_back(req); + + // The first request was not offloaded due to small size and the second + // is blocked by the first one. + EXPECT_EQ(2u, receiver().worker()->tm.expected.sw_all_count); + + req = recv_nb_and_check(&recvbuf, recvbuf.size(), DATATYPE, tag2, + UCP_TAG_MASK_FULL); + reqs.push_back(req); + + // Check that another request with different tag is offloaded. + EXPECT_EQ(2u, receiver().worker()->tm.expected.sw_all_count); + + for (std::vector::const_iterator iter = reqs.begin(); + iter != reqs.end(); ++iter) { + req_cancel(receiver(), *iter); + } +} + +UCS_TEST_P(test_ucp_tag_offload, force_thresh_basic, "TM_FORCE_THRESH=4k", + "TM_THRESH=1k") +{ + uint64_t small_val = 0xFAFA; + const size_t big_size = 5000; + int num_reqs = 8; + int tag = 0x11; + std::vector reqs; + request *req; + + activate_offload(sender()); + + for (int i = 0; i < num_reqs - 1; ++i) { + req = recv_nb_and_check(&small_val, sizeof(small_val), DATATYPE, + tag, UCP_TAG_MASK_FULL); + reqs.push_back(req); + } + + // No requests should be posted to the transport, because their sizes less + // than TM_THRESH + EXPECT_EQ((unsigned)(num_reqs - 1), receiver().worker()->tm.expected.sw_all_count); + + std::vector recvbuf_big(big_size, 0); + + req = recv_nb(&recvbuf_big[0], recvbuf_big.size(), DATATYPE, tag, + UCP_TAG_MASK_FULL); + reqs.push_back(req); + + // Now, all requests should be posted to the transport, because receive + // buffer bigger than FORCE_THRESH has been posted + EXPECT_EQ((unsigned)0, receiver().worker()->tm.expected.sw_all_count); + + std::vector::const_iterator iter; + for (iter = reqs.begin(); iter != reqs.end(); ++iter) { + req_cancel(receiver(), *iter); + } +} + +UCS_TEST_P(test_ucp_tag_offload, force_thresh_blocked, "TM_FORCE_THRESH=4k", + "TM_THRESH=1k") +{ + uint64_t small_val = 0xFAFA; + const size_t big_size = 5000; + int num_reqs = 8; + int tag = 0x11; + std::vector reqs; + request *req; + int i; + + activate_offload(sender()); + + for (i = 0; i < num_reqs - 3; ++i) { + req = recv_nb_and_check(&small_val, sizeof(small_val), DATATYPE, + tag, UCP_TAG_MASK_FULL); + reqs.push_back(req); + } + + // Add request with noncontig dt + std::vector buf(64, 0); + ucp::data_type_desc_t dt_desc(DATATYPE_IOV, buf.data(), buf.size(), 1); + req = recv_nb_and_check(dt_desc.buf(), dt_desc.count(), dt_desc.dt(), + tag, UCP_TAG_MASK_FULL); + reqs.push_back(req); + + // Add request with wildcard tag + req = recv_nb(&small_val, sizeof(small_val), DATATYPE, tag, 0); + reqs.push_back(req); + + std::vector recvbuf_big(big_size, 0); + // Check that offload is not forced while there are uncompleted blocking + // SW requests with the same tag + for (i = 0; i < 2; ++i) { + req = recv_nb_and_check(&recvbuf_big[0], recvbuf_big.size(), DATATYPE, tag, + UCP_TAG_MASK_FULL); + EXPECT_EQ((unsigned)(num_reqs - i), receiver().worker()->tm.expected.sw_all_count); + req_cancel(receiver(), req); + + req_cancel(receiver(), reqs.back()); + reqs.pop_back(); + } + + req = recv_nb(&recvbuf_big[0], recvbuf_big.size(), DATATYPE, tag, + UCP_TAG_MASK_FULL); + reqs.push_back(req); + + // Now, all requests should be posted to the transport, because receive + // buffer bigger than FORCE_THRESH has been posted + EXPECT_EQ((unsigned)0, receiver().worker()->tm.expected.sw_all_count); + + std::vector::const_iterator iter; + for (iter = reqs.begin(); iter != reqs.end(); ++iter) { + req_cancel(receiver(), *iter); + } +} + +// Check that worker will not try to connect tag offload capable iface with +// the peer which does not support tag offload (e.g CX-5 and CX-4). In this +// case connection attempt should fail (due to peer unreachable) or some other +// transport should be selected (if available). Otherwise connect can hang, +// because some transports (e.g. rcx) have different ep address type for +// interfaces which support tag_offload. +UCS_TEST_P(test_ucp_tag_offload, connect) +{ + m_env.push_back(new ucs::scoped_setenv("UCX_RC_TM_ENABLE", "n")); + + entity *e = create_entity(true); + // Should be: + // - either complete ok + // - or force skipping the test (because peer is unreachable) + e->connect(&receiver(), get_ep_params()); +} + +UCS_TEST_P(test_ucp_tag_offload, small_rndv, "RNDV_THRESH=0", "TM_THRESH=0") +{ + activate_offload(sender()); + send_recv(sender(), 0x11ul, 0ul); + send_recv(sender(), 0x11ul, 1ul); +} + +UCS_TEST_P(test_ucp_tag_offload, small_sw_rndv, "RNDV_THRESH=0", "TM_THRESH=0", + "TM_SW_RNDV=y") +{ + activate_offload(sender()); + send_recv(sender(), 0x11ul, 0ul); + send_recv(sender(), 0x11ul, 1ul); +} + +UCP_INSTANTIATE_TAG_OFFLOAD_TEST_CASE(test_ucp_tag_offload) + + +class test_ucp_tag_offload_multi : public test_ucp_tag_offload { +public: + + static ucp_params_t get_ctx_params() + { + ucp_params_t params = test_ucp_tag::get_ctx_params(); + params.field_mask |= UCP_PARAM_FIELD_TAG_SENDER_MASK; + params.tag_sender_mask = TAG_SENDER; + return params; + } + + void init() + { + // The test checks that increase of active ifaces is handled + // correctly. It needs to start with a single active iface, therefore + // disable multi-rail. + modify_config("MAX_EAGER_LANES", "1"); + modify_config("MAX_RNDV_LANES", "1"); + + test_ucp_tag_offload::init(); + + // TODO: add more tls which support tag offloading + std::vector tls; + tls.push_back("dc_x"); + tls.push_back("rc_x"); + ucp_test_param params = GetParam(); + + // Create new entity and add to to the end of vector + // (thus it will be receiver without any connections) + create_entity(false); + for (std::vector::const_iterator i = tls.begin(); + i != tls.end(); ++i) { + params.transports.clear(); + params.transports.push_back(*i); + create_entity(true, params); + sender().connect(&receiver(), get_ep_params()); + check_offload_support(true); + } + } + + ucp_tag_t make_tag(entity &e, ucp_tag_t t) + { + uint64_t i; + + for (i = 0; i < m_entities.size(); ++i) { + if (&m_entities.at(i) == &e) { + break; + } + } + return (i << 48) | t; + } + + void activate_offload_hashing(entity &se, ucp_tag_t tag) + { + se.connect(&receiver(), get_ep_params()); + // Need to send twice: + // 1. to ensure that wireup's UCT iface has been closed and + // it is not considered for num_active_iface on worker + // (message has to be less than `UCX_TM_THRESH` value) + // 2. to activate tag ofload + // (num_active_ifaces on worker is increased when any message + // is received on any iface. Tag hashing is done when we have + // more than 1 active ifaces and message has to be greater + // than `UCX_TM_THRESH` value) + send_recv(se, tag, 8); + send_recv(se, tag, 2048); + } + + void post_recv_and_check(entity &e, unsigned sw_count, ucp_tag_t tag, + ucp_tag_t tag_mask) + { + std::vector recvbuf(2048, 0); + request *req = recv_nb_and_check(&recvbuf, recvbuf.size(), DATATYPE, + make_tag(e, tag), UCP_TAG_MASK_FULL); + + EXPECT_EQ(sw_count, receiver().worker()->tm.expected.sw_all_count); + req_cancel(receiver(), req); + } + + +protected: + static const uint64_t TAG_SENDER = 0xFFFFFFFFFFFF0000; +}; + + +UCS_TEST_P(test_ucp_tag_offload_multi, recv_from_multi) +{ + ucp_tag_t tag = 0x11; + + // Activate first offload iface. Tag hashing is not done yet, since we + // have only one active iface so far. + activate_offload_hashing(e(0), make_tag(e(0), tag)); + EXPECT_EQ(0u, kh_size(&receiver().worker()->tm.offload.tag_hash)); + + // Activate second offload iface. The tag has been added to the hash. + // From now requests will be offloaded only for those tags which are + // in the hash. + activate_offload_hashing(e(1), make_tag(e(1), tag)); + EXPECT_EQ(1u, kh_size(&receiver().worker()->tm.offload.tag_hash)); + + // Need to send a message on the first iface again, for its 'tag_sender' + // part of the tag to be added to the hash. + send_recv(e(0), make_tag(e(0), tag), 2048); + EXPECT_EQ(2u, kh_size(&receiver().worker()->tm.offload.tag_hash)); + + // Now requests from first two senders should be always offloaded regardless + // of the tag value. Tag does not matter, because hashing is done with + // 'tag & tag_sender_mask' as a key. + for (int i = 0; i < 2; ++i) { + post_recv_and_check(e(i), 0u, tag + i, UCP_TAG_MASK_FULL); + } + + // This request should not be offloaded, because it is sent by the new + // sender and its 'tag_sender_mask' is not added to the hash yet. + post_recv_and_check(e(2), 1u, tag, UCP_TAG_MASK_FULL); + + activate_offload_hashing(e(2), make_tag(e(2), tag)); + EXPECT_EQ(3u, kh_size(&receiver().worker()->tm.offload.tag_hash)); + + // Check that this sender was added as well + post_recv_and_check(e(2), 0u, tag + 1, UCP_TAG_MASK_FULL); +} + +// Do not include SM transports, because they would be selected for tag matching. +// And since they do not support TM offload, this test would be skipped. +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_tag_offload_multi, all_rcdc, "rc,dc") + + +class test_ucp_tag_offload_selection : public test_ucp_tag_offload { +public: + test_ucp_tag_offload_selection() { + m_env.push_back(new ucs::scoped_setenv("UCX_RC_TM_ENABLE", "y")); + } + + void init() + { + test_ucp_tag::init(); + } +}; + +UCS_TEST_P(test_ucp_tag_offload_selection, tag_lane) +{ + ucp_ep_h ep = sender().ep(); + bool has_tag_offload = false; + bool has_shm_or_self = false; + + for (ucp_rsc_index_t idx = 0; idx < sender().ucph()->num_tls; ++idx) { + if (ucp_wireup_is_rsc_self_or_shm(ep, idx)) { + has_shm_or_self = true; + } + + uct_iface_attr_t *attr = ucp_worker_iface_get_attr(sender().worker(), idx); + if (attr->cap.flags & UCT_IFACE_FLAG_TAG_EAGER_BCOPY) { + // We do not have transports with partial tag offload support + EXPECT_TRUE(attr->cap.flags & UCT_IFACE_FLAG_TAG_RNDV_ZCOPY); + has_tag_offload = true; + } + } + + ucp_ep_config_t *ep_config = ucp_ep_config(ep); + + if (has_tag_offload && !has_shm_or_self) { + EXPECT_TRUE(ucp_ep_is_tag_offload_enabled(ep_config)); + EXPECT_EQ(ep_config->key.tag_lane, ep_config->tag.lane); + } else { + // If shm or self transports exist they would be used for tag matching + // rather than network offload + EXPECT_FALSE(ucp_ep_is_tag_offload_enabled(ep_config)); + EXPECT_EQ(ep_config->key.am_lane, ep_config->tag.lane); + } +} + +UCP_INSTANTIATE_TAG_OFFLOAD_TEST_CASE(test_ucp_tag_offload_selection); +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_tag_offload_selection, self_rcx, + "self,rc_x"); + + +class test_ucp_tag_offload_cuda : public test_ucp_tag_offload { +public: + test_ucp_tag_offload_cuda() { + modify_config("RNDV_THRESH", "1024"); + } +}; + +// Test that expected SW RNDV request is handled properly when receive buffer +// is allocated on GPU memory. +UCS_TEST_P(test_ucp_tag_offload_cuda, sw_rndv_to_cuda_mem, "TM_SW_RNDV=y") +{ + activate_offload(sender()); + + size_t size = 2048; + ucp_tag_t tag = 0xCAFEBABEul; + // Test will be skipped here if CUDA mem is not supported + mem_buffer rbuf(size, UCS_MEMORY_TYPE_CUDA); + request *rreq = recv_nb_exp(rbuf.ptr(), size, DATATYPE, tag, + UCP_TAG_MASK_FULL); + + std::vector sendbuf(size); // can send from any memory + request *sreq = (request*)ucp_tag_send_nb(sender().ep(), &sendbuf[0], + size, DATATYPE, tag, + send_callback); + wait_and_validate(rreq); + wait_and_validate(sreq); +} + +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_tag_offload_cuda, rc_dc_cuda, + "dc_x,rc_x,cuda_copy") + + +#if ENABLE_STATS + +class test_ucp_tag_offload_stats : public test_ucp_tag_offload_multi { +public: + + void init() + { + stats_activate(); + test_ucp_tag_offload::init(); // No need for multi::init() + } + + void cleanup() + { + test_ucp_tag_offload::cleanup(); + stats_restore(); + } + + request* recv_nb_exp(void *buffer, size_t count, ucp_datatype_t dt, + ucp_tag_t tag, ucp_tag_t tag_mask) + { + request *req1 = recv_nb_and_check(buffer, count, DATATYPE, tag, + UCP_TAG_MASK_FULL); + + // Post and cancel another receive to make sure the first one was offloaded + size_t size = receiver().worker()->context->config.ext.tm_thresh + 1; + std::vector tbuf(size, 0); + request *req2 = recv_nb_and_check(&tbuf[0], size, DATATYPE, tag, + UCP_TAG_MASK_FULL); + req_cancel(receiver(), req2); + + return req1; + } + + ucs_stats_node_t* worker_offload_stats(entity &e) + { + return e.worker()->tm_offload_stats; + } + + void validate_offload_counter(uint64_t rx_cntr, uint64_t val) + { + uint64_t cnt; + cnt = UCS_STATS_GET_COUNTER(worker_offload_stats(receiver()), rx_cntr); + EXPECT_EQ(val, cnt); + } + + void wait_counter(ucs_stats_node_t *stats, uint64_t cntr, + double timeout = ucs::test_timeout_in_sec) + { + ucs_time_t deadline = ucs::get_deadline(timeout); + uint64_t v; + + do { + short_progress_loop(); + v = UCS_STATS_GET_COUNTER(stats, cntr); + } while ((ucs_get_time() < deadline) && !v); + + EXPECT_EQ(1ul, v); + } + + void test_send_recv(size_t count, bool send_iov, uint64_t cntr) + { + ucp_tag_t tag = 0x11; + + std::vector sbuf(count, 0); + std::vector rbuf(count, 0); + request *req = recv_nb_exp(rbuf.data(), rbuf.size(), DATATYPE, tag, + UCP_TAG_MASK_FULL); + + if (send_iov) { + ucp::data_type_desc_t dt_desc(DATATYPE_IOV, sbuf.data(), + sbuf.size(), 1); + send_b(dt_desc.buf(), dt_desc.count(), dt_desc.dt(), tag); + } else { + send_b(sbuf.data(), sbuf.size(), DATATYPE, tag); + } + wait(req); + request_free(req); + + validate_offload_counter(cntr, 1ul); + } +}; + +UCS_TEST_P(test_ucp_tag_offload_stats, post, "TM_THRESH=1") +{ + uint64_t dummy; + uint64_t tag = 0x11; + + activate_offload(sender()); + + request *rreq = recv_nb(&dummy, sizeof(dummy), DATATYPE, tag, + UCP_TAG_MASK_FULL); + + wait_counter(worker_offload_stats(receiver()), + UCP_WORKER_STAT_TAG_OFFLOAD_POSTED); + + req_cancel(receiver(), rreq); + + wait_counter(worker_offload_stats(receiver()), + UCP_WORKER_STAT_TAG_OFFLOAD_CANCELED); +} + +UCS_TEST_P(test_ucp_tag_offload_stats, block, "TM_THRESH=1") +{ + uint64_t tag = 0x11; + std::vector buf(64, 0); + + activate_offload(sender()); + + // Check BLOCK_NON_CONTIG + ucp::data_type_desc_t dt_desc(DATATYPE_IOV, buf.data(), buf.size(), 1); + request *rreq = recv_nb_and_check(dt_desc.buf(), dt_desc.count(), + dt_desc.dt(), tag, UCP_TAG_MASK_FULL); + + wait_counter(worker_offload_stats(receiver()), + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_NON_CONTIG); + + req_cancel(receiver(), rreq); + + // Check BLOCK_WILDCARD + rreq = recv_nb_and_check(buf.data(), buf.size(), DATATYPE, tag, 0); + + wait_counter(worker_offload_stats(receiver()), + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_WILDCARD); + + req_cancel(receiver(), rreq); + + // Check BLOCK_TAG_EXCEED + std::vector reqs; + uint64_t cnt; + unsigned limit = 1000; // Just a big value to avoid test hang + do { + rreq = recv_nb_and_check(buf.data(), buf.size(), DATATYPE, tag, + UCP_TAG_MASK_FULL); + cnt = UCS_STATS_GET_COUNTER(worker_offload_stats(receiver()), + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_TAG_EXCEED); + reqs.push_back(rreq); + } while (!cnt && (--limit > 0)); + + validate_offload_counter(UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_TAG_EXCEED , 1ul); + + for (std::vector::const_iterator iter = reqs.begin(); + iter != reqs.end(); ++iter) { + req_cancel(receiver(), *iter); + } +} + +UCS_TEST_P(test_ucp_tag_offload_stats, eager, "RNDV_THRESH=1000", "TM_THRESH=64") +{ + size_t size = 512; // Size smaller than RNDV, but bigger than TM thresh + + // Offload is not activated, so the first message should arrive unexpectedly + test_send_recv(size, false, UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_EGR); + test_send_recv(size, false, UCP_WORKER_STAT_TAG_OFFLOAD_MATCHED); +} + +UCS_TEST_P(test_ucp_tag_offload_stats, rndv, "RNDV_THRESH=1000") +{ + size_t size = 2048; // Size bigger than RNDV thresh + + // Offload is not activated, so the first message should arrive unexpectedly + test_send_recv(size, false, UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_RNDV); + test_send_recv(size, false, UCP_WORKER_STAT_TAG_OFFLOAD_MATCHED); +} + +UCS_TEST_P(test_ucp_tag_offload_stats, sw_rndv, "RNDV_THRESH=1000") +{ + size_t size = 2048; // Size bigger than RNDV thresh + + // Offload is not activated, so the first message should arrive unexpectedly + test_send_recv(size, true, UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_SW_RNDV); + test_send_recv(size, true, UCP_WORKER_STAT_TAG_OFFLOAD_MATCHED_SW_RNDV); +} + +UCS_TEST_P(test_ucp_tag_offload_stats, force_sw_rndv, "TM_SW_RNDV=y", + "RNDV_THRESH=1000") +{ + size_t size = 2048; // Size bigger than RNDV thresh + + // Offload is not activated, so the first message should arrive unexpectedly + test_send_recv(size, false, UCP_WORKER_STAT_TAG_OFFLOAD_RX_UNEXP_SW_RNDV); + test_send_recv(size, false, UCP_WORKER_STAT_TAG_OFFLOAD_MATCHED_SW_RNDV); +} + + +UCP_INSTANTIATE_TAG_OFFLOAD_TEST_CASE(test_ucp_tag_offload_stats) + + +class test_ucp_tag_offload_stats_cuda : public test_ucp_tag_offload_stats { +public: + test_ucp_tag_offload_stats_cuda() { + m_env.push_back(new ucs::scoped_setenv("UCX_IB_GPU_DIRECT_RDMA", "n")); + } +}; + +UCS_TEST_P(test_ucp_tag_offload_stats_cuda, block_cuda_no_gpu_direct, + "TM_THRESH=1") +{ + activate_offload(sender()); + + size_t size = 2048; + // Test will be skipped here if CUDA mem is not supported + mem_buffer rbuf(size, UCS_MEMORY_TYPE_CUDA); + request *rreq = recv_nb_and_check(rbuf.ptr(), size, DATATYPE, 0x11, + UCP_TAG_MASK_FULL); + + wait_counter(worker_offload_stats(receiver()), + UCP_WORKER_STAT_TAG_OFFLOAD_BLOCK_MEM_REG); + + validate_offload_counter(UCP_WORKER_STAT_TAG_OFFLOAD_POSTED, 0ul); + + req_cancel(receiver(), rreq); +} + +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_tag_offload_stats_cuda, rc_dc_cuda, + "dc_x,rc_x,cuda_copy") + +#endif diff --git a/test/gtest/ucp/test_ucp_tag_perf.cc b/test/gtest/ucp/test_ucp_tag_perf.cc new file mode 100644 index 0000000..60fcf6e --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag_perf.cc @@ -0,0 +1,125 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" + + +class test_ucp_tag_perf : public test_ucp_tag { +public: + virtual void init() { + if (RUNNING_ON_VALGRIND) { + UCS_TEST_SKIP_R("valgrind"); + } + test_ucp_tag::init(); + } + +protected: + static const size_t COUNT = 8192; + static const ucp_tag_t TAG_MASK = 0xffffffffffffffffUL; + + double check_perf(size_t count, bool is_exp); + void check_scalability(double max_growth, bool is_exp); + void do_sends(size_t count); +}; + +double test_ucp_tag_perf::check_perf(size_t count, bool is_exp) +{ + ucs_time_t start_time; + + if (is_exp) { + std::vector rreqs; + + for (size_t i = 0; i < count; ++i) { + request *rreq = recv_nb(NULL, 0, DATATYPE, i, TAG_MASK); + assert(!UCS_PTR_IS_ERR(rreq)); + EXPECT_FALSE(rreq->completed); + rreqs.push_back(rreq); + } + + start_time = ucs_get_time(); + do_sends(count); + while (!rreqs.empty()) { + request *rreq = rreqs.back(); + rreqs.pop_back(); + wait_and_validate(rreq); + } + } else { + ucp_tag_recv_info_t info; + + send_b(NULL, 0, DATATYPE, 0xdeadbeef); + do_sends(count); + recv_b(NULL, 0, DATATYPE, 0xdeadbeef, TAG_MASK, &info); + + start_time = ucs_get_time(); + for (size_t i = 0; i < count; ++i) { + recv_b(NULL, 0, DATATYPE, i, TAG_MASK, &info); + } + } + + return ucs_time_to_sec(ucs_get_time() - start_time) / count; +} + +void test_ucp_tag_perf::do_sends(size_t count) +{ + size_t i = count; + while (i > 0) { + --i; + send_b(NULL, 0, DATATYPE, i); + } +} + +void test_ucp_tag_perf::check_scalability(double max_growth, bool is_exp) +{ + double prev_time = 0.0, total_growth = 0.0, avg_growth; + size_t n = 0; + + for (int i = 0; i < (ucs::perf_retry_count + 1); ++i) { + + /* Estimate by how much the tag matching time grows when the matching queue + * length grows by 2x. A result close to 1.0 means O(1) scalability (which + * is good), while a result of 2.0 or higher means O(n) or higher. + */ + for (size_t count = 1; count <= COUNT; count *= 2) { + size_t iters = 10 * ucs_max(1ul, COUNT / count); + double total_time = 0; + for (size_t i = 0; i < iters; ++i) { + total_time += check_perf(count, is_exp); + } + + double time = total_time / iters; + if (count >= 16) { + /* don't measure first few iterations - warmup */ + total_growth += (time / prev_time); + ++n; + } + prev_time = time; + } + + avg_growth = total_growth / n; + UCS_TEST_MESSAGE << "Average growth: " << avg_growth; + + if (!ucs::perf_retry_count) { + UCS_TEST_MESSAGE << "not validating performance"; + return; /* Skip */ + } else if (avg_growth < max_growth) { + return; /* Success */ + } else { + ucs::safe_sleep(ucs::perf_retry_interval); + } + } + + ADD_FAILURE() << "Tag matching is not scalable"; +} + +UCS_TEST_P(test_ucp_tag_perf, multi_exp) { + check_scalability(1.5, true); +} + +UCS_TEST_P(test_ucp_tag_perf, multi_unexp) { + check_scalability(1.5, false); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_tag_perf) diff --git a/test/gtest/ucp/test_ucp_tag_probe.cc b/test/gtest/ucp/test_ucp_tag_probe.cc new file mode 100644 index 0000000..de7266d --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag_probe.cc @@ -0,0 +1,314 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" + +#include + + +class test_ucp_tag_probe : public test_ucp_tag { +public: + test_ucp_tag_probe() { + if (has_transport("tcp")) { + /* Decrease `TX_SEG_SIZE` and `RX_SEG_SIZE` parameters + * for TCP transport to be able fully consume receive + * buffer by 100-byte messages */ + m_env.push_back(new ucs::scoped_setenv("UCX_TCP_TX_SEG_SIZE", "4k")); + m_env.push_back(new ucs::scoped_setenv("UCX_TCP_RX_SEG_SIZE", "4k")); + } + } + + /* The parameters mean the following: + * - s_size and r_size: send and recv buffer sizes. + * Can be different for checking message transaction error + * - is_sync: specifies the type of send function to be used + * (sync or not) + * - is_recv_msg: specifies whether probe function needs to remove + * matched message. If yes, then ucp_tag_msg_recv_nb is used for + * receive + * */ + void test_send_probe (size_t s_size, size_t r_size, bool is_sync, + int is_recv_msg) { + ucp_tag_recv_info_t info; + ucp_tag_message_h message; + request *send_req = NULL; + request *recv_req = NULL; + + std::vector sendbuf(s_size, 0); + std::vector recvbuf(r_size, 0); + + ucs::fill_random(sendbuf); + + message = ucp_tag_probe_nb(receiver().worker(), 0x1337, 0xffff, + is_recv_msg, &info); + EXPECT_TRUE(message == NULL); + + if (is_sync) { + send_req = send_sync_nb(&sendbuf[0], sendbuf.size(), DATATYPE, + 0x111337); + + } else { + send_req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + } + + do { + progress(); + message = ucp_tag_probe_nb(receiver().worker(), 0x1337, 0xffff, + is_recv_msg, &info); + } while (message == NULL); + + EXPECT_EQ(sendbuf.size(), info.length); + EXPECT_EQ((ucp_tag_t)0x111337, info.sender_tag); + + if (is_recv_msg == 0) { + recv_req = recv_nb(&recvbuf[0], recvbuf.size(), DATATYPE, + 0x1337, 0xffff); + } else { + recv_req = (request*)ucp_tag_msg_recv_nb(receiver().worker(), + &recvbuf[0],recvbuf.size(), + DATATYPE, message, recv_callback); + ASSERT_TRUE(!UCS_PTR_IS_ERR(recv_req)); + } + + wait(recv_req); + EXPECT_TRUE(recv_req->completed); + if (s_size != r_size) { + /* Test for correct msg transaction handling */ + EXPECT_EQ(UCS_ERR_MESSAGE_TRUNCATED, recv_req->status); + } else { + /* Everything should be received correctly */ + EXPECT_EQ(UCS_OK, recv_req->status); + EXPECT_EQ(sendbuf.size(), recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, recv_req->info.sender_tag); + EXPECT_EQ(sendbuf, recvbuf); + } + request_release(recv_req); + + if (UCS_PTR_IS_PTR(send_req)) { + wait(send_req); + EXPECT_TRUE(send_req->completed); + EXPECT_EQ(UCS_OK, send_req->status); + request_release(send_req); + } + } + + int probe_all(std::string &recvbuf) + { + ucp_tag_message_h message; + ucp_tag_recv_info_t info; + request *req; + + int count = 0; + for (;;) { + message = ucp_tag_probe_nb(receiver().worker(), 0, 0, 1, &info); + if (message == NULL) { + return count; + } + + req = (request*)ucp_tag_msg_recv_nb(receiver().worker(), + &recvbuf[0], recvbuf.size(), + DATATYPE, message, recv_callback); + wait(req); + request_release(req); + ++count; + } + } +}; + + +UCS_TEST_P(test_ucp_tag_probe, send_probe) { + test_send_probe (8, 8, false, 0); + test_send_probe (8, 8, true, 0); +} + +UCS_TEST_P(test_ucp_tag_probe, send_medium_msg_probe, "RNDV_THRESH=1048576") { + test_send_probe (50000, 50000, false, 1); + test_send_probe (50000, 50000, true, 1); +} + +UCS_TEST_P(test_ucp_tag_probe, send_medium_msg_probe_truncated, "RNDV_THRESH=1048576") { + test_send_probe (50000, 0, false, 1); + test_send_probe (50000, 0, true, 1); +} + +UCS_TEST_P(test_ucp_tag_probe, send_rndv_msg_probe, "RNDV_THRESH=1048576") { + static const size_t size = 1148576; + ucp_tag_recv_info_t info; + ucp_tag_message_h message; + request *my_send_req, *my_recv_req; + + skip_loopback(); + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + ucs::fill_random(sendbuf); + + message = ucp_tag_probe_nb(receiver().worker(), 0x1337, 0xffff, 1, &info); + EXPECT_TRUE(message == NULL); + + /* sender - send the RTS */ + my_send_req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_send_req)); + + /* receiver - get the RTS and put it into unexpected */ + wait_for_unexpected_msg(receiver().worker(), 10.0); + + /* receiver - match the rts, remove it from unexpected and return it */ + message = ucp_tag_probe_nb(receiver().worker(), 0x1337, 0xffff, 1, &info); + /* make sure that there was a match (RTS) */ + ASSERT_TRUE(message != NULL); + EXPECT_EQ(sendbuf.size(), info.length); + EXPECT_EQ((ucp_tag_t)0x111337, info.sender_tag); + + /* receiver - process the rts and schedule a get operation */ + my_recv_req = (request*)ucp_tag_msg_recv_nb(receiver().worker(), &recvbuf[0], + recvbuf.size(), DATATYPE, message, + recv_callback); + ASSERT_TRUE(!UCS_PTR_IS_ERR(my_recv_req)); + + /* receiver - perform rndv get and send the ATS */ + wait(my_recv_req); + EXPECT_TRUE(my_recv_req->completed); + + /* sender - get the ATS and set send request to completed */ + short_progress_loop(); + + EXPECT_EQ(UCS_OK, my_recv_req->status); + EXPECT_EQ(sendbuf.size(), my_recv_req->info.length); + EXPECT_EQ((ucp_tag_t)0x111337, my_recv_req->info.sender_tag); + EXPECT_EQ(sendbuf, recvbuf); + + wait_and_validate(my_send_req); + request_release(my_recv_req); +} + +UCS_TEST_P(test_ucp_tag_probe, send_2_msg_probe, "RNDV_THRESH=inf") { + const ucp_datatype_t DT_INT = ucp_dt_make_contig(sizeof(int)); + const ucp_tag_t TAG = 0xaaa; + const size_t COUNT = 20000; + std::vector reqs; + + /* + * send in order: 1, 2 + */ + std::vector sdata1(COUNT, 1); + std::vector sdata2(COUNT, 2); + request *sreq1 = send_nb(&sdata1[0], COUNT, DT_INT, TAG); + if (sreq1 != NULL) { + reqs.push_back(sreq1); + } + request *sreq2 = send_nb(&sdata2[0], COUNT, DT_INT, TAG); + if (sreq2 != NULL) { + reqs.push_back(sreq2); + } + + /* + * probe in order: 1, 2 + */ + ucp_tag_message_h message1, message2; + ucp_tag_recv_info_t info; + do { + progress(); + message1 = ucp_tag_probe_nb(receiver().worker(), TAG, 0xffff, 1, &info); + } while (message1 == NULL); + do { + progress(); + message2 = ucp_tag_probe_nb(receiver().worker(), TAG, 0xffff, 1, &info); + } while (message2 == NULL); + + /* + * receive in **reverse** order: 2, 1 + */ + std::vector rdata2(COUNT); + request *rreq2 = (request*)ucp_tag_msg_recv_nb(receiver().worker(), &rdata2[0], + COUNT, DT_INT, message2, + recv_callback); + reqs.push_back(rreq2); + ASSERT_TRUE(!UCS_PTR_IS_ERR(rreq2)); + wait(rreq2); + + std::vector rdata1(COUNT); + request *rreq1 = (request*)ucp_tag_msg_recv_nb(receiver().worker(), &rdata1[0], + COUNT, DT_INT, message1, + recv_callback); + reqs.push_back(rreq1); + ASSERT_TRUE(!UCS_PTR_IS_ERR(rreq1)); + wait(rreq1); + + if (sreq1 != NULL) { + wait(sreq1); + } + if (sreq2 != NULL) { + wait(sreq2); + } + + /* + * expect data to arrive in probe order (rather than recv order) + */ + EXPECT_EQ(sdata1, rdata1); + EXPECT_EQ(sdata2, rdata2); + while (!reqs.empty()) { + request *req = reqs.back(); + EXPECT_TRUE(req->completed); + EXPECT_EQ(UCS_OK, req->status); + request_release(req); + reqs.pop_back(); + } +} + +UCS_TEST_P(test_ucp_tag_probe, limited_probe_size) { + static const int COUNT = 1000; + std::string sendbuf, recvbuf; + std::vector reqs; + ucp_tag_recv_info_t info; + request *req; + int recvd; + + skip_loopback(); + + sendbuf.resize(100, '1'); + recvbuf.resize(100, '0'); + + send_b(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + recv_b(&recvbuf[0], recvbuf.size(), DATATYPE, 0x111337, 0xffffff, &info); + + /* send 1000 messages without calling progress */ + for (int i = 0; i < COUNT; ++i) { + req = send_nb(&sendbuf[0], sendbuf.size(), DATATYPE, 0x111337); + if (req != NULL) { + reqs.push_back(req); + } + + sender().progress(); /* progress only the sender */ + } + + for (int i = 0; i < 1000; ++i) { + ucs::safe_usleep(1000); + sender().progress(); + } + + /* progress once */ + ucp_worker_progress(receiver().worker()); + + /* probe should not have too many messages here because we poll once */ + recvd = probe_all(recvbuf); + EXPECT_LE(recvd, 128); + + /* receive all the rest */ + while (recvd < COUNT) { + progress(); + recvd += probe_all(recvbuf); + } + + while (!reqs.empty()) { + wait(reqs.back()); + request_release(reqs.back()); + reqs.pop_back(); + } +} +UCP_INSTANTIATE_TEST_CASE(test_ucp_tag_probe) diff --git a/test/gtest/ucp/test_ucp_tag_xfer.cc b/test/gtest/ucp/test_ucp_tag_xfer.cc new file mode 100644 index 0000000..a0e546d --- /dev/null +++ b/test/gtest/ucp/test_ucp_tag_xfer.cc @@ -0,0 +1,1128 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_ucp_tag.h" + +#include "ucp_datatype.h" + +extern "C" { +#include +#include +} + +#include + + +class test_ucp_tag_xfer : public test_ucp_tag { +public: + enum { + VARIANT_DEFAULT, + VARIANT_ERR_HANDLING, + VARIANT_RNDV_PUT_ZCOPY, + VARIANT_RNDV_GET_ZCOPY, + VARIANT_RNDV_AUTO, + VARIANT_SEND_NBR, + }; + + test_ucp_tag_xfer() { + // TODO: test offload and offload MP as different variants + enable_tag_mp_offload(); + + if (RUNNING_ON_VALGRIND) { + // Alow using TM MP offload for messages with a size of at least + // 10000 bytes by setting HW TM segment size to 10 kB, since each + // packet in TM MP offload is MTU-size buffer (i.e., in most cases + // it is 4 kB segments) + m_env.push_back(new ucs::scoped_setenv("UCX_RC_TM_SEG_SIZE", "10k")); + m_env.push_back(new ucs::scoped_setenv("UCX_TCP_RX_SEG_SIZE", "8k")); + } + } + + virtual void init() { + if (GetParam().variant == VARIANT_RNDV_PUT_ZCOPY) { + modify_config("RNDV_SCHEME", "put_zcopy"); + } else if (GetParam().variant == VARIANT_RNDV_GET_ZCOPY) { + modify_config("RNDV_SCHEME", "get_zcopy"); + } else if (GetParam().variant == VARIANT_RNDV_AUTO) { + modify_config("RNDV_SCHEME", "auto"); + } + modify_config("MAX_EAGER_LANES", "2"); + modify_config("MAX_RNDV_LANES", "2"); + + test_ucp_tag::init(); + } + + bool skip_on_ib_dc() { +#if HAVE_DC_DV + // skip due to DCI stuck bug + return has_transport("dc_x"); +#else + return false; +#endif + } + + std::vector + static enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) + { + std::vector result; + generate_test_params_variant(ctx_params, name, test_case_name, tls, + VARIANT_DEFAULT, result); + generate_test_params_variant(ctx_params, name, + test_case_name + "/err_handling_mode_peer", + tls, VARIANT_ERR_HANDLING, result); + generate_test_params_variant(ctx_params, name, + test_case_name + "/rndv_put_zcopy", tls, + VARIANT_RNDV_PUT_ZCOPY, result); + generate_test_params_variant(ctx_params, name, + test_case_name + "/rndv_get_zcopy", tls, + VARIANT_RNDV_GET_ZCOPY, result); + generate_test_params_variant(ctx_params, name, + test_case_name + "/rndv_auto", tls, + VARIANT_RNDV_AUTO, result); + generate_test_params_variant(ctx_params, name, + test_case_name + "/send_nbr", tls, + VARIANT_SEND_NBR, result); + return result; + } + + virtual ucp_ep_params_t get_ep_params() { + ucp_ep_params_t ep_params = test_ucp_tag::get_ep_params(); + if (GetParam().variant == VARIANT_ERR_HANDLING) { + ep_params.field_mask |= UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE; + ep_params.err_mode = UCP_ERR_HANDLING_MODE_PEER; + } + return ep_params; + } + + bool is_err_handling() const { + return GetParam().variant == VARIANT_ERR_HANDLING; + } + + void skip_err_handling() const { + if (is_err_handling()) { + UCS_TEST_SKIP_R("err_handling"); + } + } + + void test_xfer_contig(size_t size, bool expected, bool sync, bool truncated); + void test_xfer_generic(size_t size, bool expected, bool sync, bool truncated); + void test_xfer_iov(size_t size, bool expected, bool sync, bool truncated); + void test_xfer_generic_err(size_t size, bool expected, bool sync, bool truncated); + +protected: + typedef void (test_ucp_tag_xfer::* xfer_func_t)(size_t size, bool expected, + bool sync, bool truncated); + + size_t do_xfer(const void *sendbuf, void *recvbuf, size_t count, + ucp_datatype_t send_dt, ucp_datatype_t recv_dt, + bool expected, bool sync, bool truncated); + + void test_xfer(xfer_func_t func, bool expected, bool sync, bool truncated); + void test_run_xfer(bool send_contig, bool recv_contig, + bool expected, bool sync, bool truncated); + void test_xfer_prepare_bufs(uint8_t *sendbuf, uint8_t *recvbuf, size_t count, + bool send_contig, bool recv_contig, + ucp_datatype_t *send_dt, + ucp_datatype_t *recv_dt); + void test_xfer_probe(bool send_contig, bool recv_contig, + bool expected, bool sync); + + void test_xfer_len_offset(); + +private: + request* do_send(const void *sendbuf, size_t count, ucp_datatype_t dt, bool sync); + + static const uint64_t SENDER_TAG = 0x111337; + static const uint64_t RECV_MASK = 0xffff; + static const uint64_t RECV_TAG = 0x1337; + +}; + +int check_buffers(const std::vector &sendbuf, const std::vector &recvbuf, + size_t recvd, size_t send_iovcnt, size_t recv_iovcnt, + size_t size, bool expected, bool sync, const std::string datatype) +{ + int buffers_equal = memcmp(sendbuf.data(), recvbuf.data(), recvd); + if (buffers_equal) { + std::cout << "\n"; + ucs::detail::message_stream ms("INFO"); + for (size_t it = 0; it < recvd; ++it) { + if (sendbuf[it] != recvbuf[it]) { + ms << datatype << ':' + << " send_iovcnt=" << std::dec << send_iovcnt + << " recv_iovcnt=" << recv_iovcnt << " size=" << size + << " expected=" << expected << " sync=" << sync + << " Sendbuf[" << std::dec << it << "]=0x" + << std::hex << (static_cast(sendbuf[it]) & 0xff) << ',' + << " Recvbuf[" << std::dec << it << "]=0x" + << std::hex << (static_cast(recvbuf[it]) & 0xff) << std::endl; + break; + } + } + } + return buffers_equal; +} + +void test_ucp_tag_xfer::test_xfer(xfer_func_t func, bool expected, bool sync, + bool truncated) +{ + if (sync) { + skip_err_handling(); + } + + ucs::detail::message_stream ms("INFO"); + + ms << "0 " << std::flush; + (this->*func)(0, expected, sync, false); + + for (unsigned i = 1; i <= 7; ++i) { + size_t max = (long)pow(10.0, i); + + long count = ucs_max((long)(5000.0 / sqrt(max) / ucs::test_time_multiplier()), + 3); + if (!expected) { + count = ucs_min(count, 50); + } + ms << count << "x10^" << i << " " << std::flush; + for (long j = 0; j < count; ++j) { + size_t size = ucs::rand() % max + 1; + (this->*func)(size, expected, sync, truncated); + } + } +} + +void test_ucp_tag_xfer::test_xfer_prepare_bufs(uint8_t *sendbuf, uint8_t *recvbuf, + size_t count, bool send_contig, + bool recv_contig, + ucp_datatype_t *send_dt, + ucp_datatype_t *recv_dt) +{ + ucs_status_t status; + + if (send_contig) { + /* the sender has a contig datatype for the data buffer */ + for (unsigned i = 0; i < count; ++i) { + sendbuf[i] = i % 256; + } + *send_dt = DATATYPE; + } else { + /* the sender has a generic datatype */ + status = ucp_dt_create_generic(&ucp::test_dt_uint8_ops, NULL, send_dt); + ASSERT_UCS_OK(status); + } + + if (recv_contig) { + /* the recv has a contig datatype for the data buffer */ + *recv_dt = DATATYPE; + } else { + /* the receiver has a generic datatype */ + status = ucp_dt_create_generic(&ucp::test_dt_uint8_ops, NULL, recv_dt); + /* the recvbuf can be NULL because we only validate the received data in the + * unpack function - we don't copy it to the recvbuf */ + ASSERT_UCS_OK(status); + } +} + +void test_ucp_tag_xfer::test_run_xfer(bool send_contig, bool recv_contig, + bool expected, bool sync, bool truncated) +{ + static const size_t count = 1148544 / ucs::test_time_multiplier(); + uint8_t *sendbuf = NULL, *recvbuf = NULL; + ucp_datatype_t send_dt, recv_dt; + size_t recvd; + + if (sync) { + skip_err_handling(); + } + + ucp::dt_gen_start_count = 0; + ucp::dt_gen_finish_count = 0; + + if (send_contig) { + /* the sender has a contig datatype for the data buffer */ + sendbuf = (uint8_t*) malloc(count * sizeof(*sendbuf)); + } + if (recv_contig) { + /* the recv has a contig datatype for the data buffer */ + recvbuf = (uint8_t*) malloc(count * sizeof(*recvbuf)); + } + + test_xfer_prepare_bufs(sendbuf, recvbuf, count, send_contig, recv_contig, + &send_dt, &recv_dt); + + /* coverity[var_deref_model] */ + /* coverity[var_deref_op] */ + recvd = do_xfer(&sendbuf[0], &recvbuf[0], count, send_dt, recv_dt, expected, + sync, truncated); + if (!truncated) { + EXPECT_EQ(count * sizeof(uint8_t), recvd); + } + + if (send_contig) { + free(sendbuf); + } else { + ucp_dt_destroy(send_dt); + } + + if (recv_contig) { + free(recvbuf); + } else { + ucp_dt_destroy(recv_dt); + } +} + +void test_ucp_tag_xfer::test_xfer_probe(bool send_contig, bool recv_contig, + bool expected, bool sync) +{ + static const size_t count = 1148544 / ucs::test_time_multiplier(); + uint8_t *sendbuf = NULL; + uint8_t *recvbuf = NULL; + ucp_datatype_t send_dt, recv_dt; + ucp_tag_message_h message; + ucp_tag_recv_info_t info; + request *rreq, *sreq; + + /* the self transport doesn't do rndv and completes the send immediately */ + skip_loopback(); + + ucp::dt_gen_start_count = 0; + ucp::dt_gen_finish_count = 0; + + sendbuf = (uint8_t*) malloc(count * sizeof(*sendbuf)); + recvbuf = (uint8_t*) malloc(count * sizeof(*recvbuf)); + + test_xfer_prepare_bufs(sendbuf, recvbuf, count, send_contig, recv_contig, + &send_dt, &recv_dt); + + info.length = 0; + message = ucp_tag_probe_nb(receiver().worker(), 0x1337, 0xffff, 1, &info); + EXPECT_TRUE(message == NULL); + + sreq = send_nb(&sendbuf[0], count, send_dt, SENDER_TAG); + EXPECT_TRUE(!UCS_PTR_IS_ERR(sreq)); + if (sreq != NULL) { + EXPECT_FALSE(sreq->completed); + } + + /* put RTS into the unexpected queue */ + ucs_time_t loop_end_limit = ucs_get_time() + ucs_time_from_sec(10.0); + do { + short_progress_loop(); + message = ucp_tag_probe_nb(receiver().worker(), RECV_TAG, RECV_MASK, 1, &info); + } while ((ucs_get_time() < loop_end_limit) && (message == NULL)); + + /* make sure that there was a match (RTS) */ + EXPECT_TRUE(message != NULL); + EXPECT_EQ(count, info.length); + EXPECT_EQ((ucp_tag_t)SENDER_TAG, info.sender_tag); + + /* coverity[var_deref_model] */ + rreq = (request*) ucp_tag_msg_recv_nb(receiver().worker(), &recvbuf[0], + count, recv_dt, message, recv_callback); + ASSERT_TRUE(!UCS_PTR_IS_ERR(rreq)); + + wait(rreq); + if (sreq != NULL) { + wait(sreq); + request_release(sreq); + } + request_release(rreq); + + free(sendbuf); + free(recvbuf); + if (!send_contig) { + ucp_dt_destroy(send_dt); + } + if (!recv_contig) { + ucp_dt_destroy(recv_dt); + } +} + +void test_ucp_tag_xfer::test_xfer_contig(size_t size, bool expected, bool sync, + bool truncated) +{ + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + ucs::fill_random(sendbuf); + size_t recvd = do_xfer(&sendbuf[0], &recvbuf[0], size, DATATYPE, DATATYPE, + expected, sync, truncated); + if (!truncated) { + ASSERT_EQ(sendbuf.size(), recvd); + } + EXPECT_TRUE(!check_buffers(sendbuf, recvbuf, recvd, 1, 1, + size, expected, sync, "contig")); +} + +void test_ucp_tag_xfer::test_xfer_generic(size_t size, bool expected, bool sync, + bool truncated) +{ + size_t count = size / sizeof(uint32_t); + ucp_datatype_t dt; + ucs_status_t status; + size_t recvd; + + ucp::dt_gen_start_count = 0; + ucp::dt_gen_finish_count = 0; + + /* if count is zero, truncation has no effect */ + if ((truncated) && (!count)) { + truncated = false; + } + + status = ucp_dt_create_generic(&ucp::test_dt_uint32_ops, NULL, &dt); + ASSERT_UCS_OK(status); + + recvd = do_xfer(NULL, NULL, count, dt, dt, expected, sync, truncated); + if (!truncated) { + EXPECT_EQ(count * sizeof(uint32_t), recvd); + } + EXPECT_EQ(2, ucp::dt_gen_start_count); + EXPECT_EQ(2, ucp::dt_gen_finish_count); + + ucp_dt_destroy(dt); +} + +void test_ucp_tag_xfer::test_xfer_iov(size_t size, bool expected, bool sync, + bool truncated) +{ + const size_t iovcnt = 20; + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + + ucs::fill_random(sendbuf); + + ucp::data_type_desc_t send_dt_desc(DATATYPE_IOV, sendbuf.data(), + sendbuf.size(), iovcnt); + ucp::data_type_desc_t recv_dt_desc(DATATYPE_IOV, recvbuf.data(), + recvbuf.size(), iovcnt); + + size_t recvd = do_xfer(send_dt_desc.buf(), recv_dt_desc.buf(), iovcnt, + DATATYPE_IOV, DATATYPE_IOV, expected, sync, + truncated); + if (!truncated) { + ASSERT_EQ(sendbuf.size(), recvd); + } + EXPECT_TRUE(!check_buffers(sendbuf, recvbuf, recvd, send_dt_desc.count(), + recv_dt_desc.count(), size, expected, sync, + "IOV")); +} + +void test_ucp_tag_xfer::test_xfer_generic_err(size_t size, bool expected, + bool sync, bool truncated) +{ + size_t count = size / sizeof(uint32_t); + ucp_datatype_t dt; + ucs_status_t status; + request *rreq, *sreq; + + ucp::dt_gen_start_count = 0; + ucp::dt_gen_finish_count = 0; + + status = ucp_dt_create_generic(&ucp::test_dt_uint32_err_ops, this, &dt); + ASSERT_UCS_OK(status); + + if (expected) { + rreq = recv_nb(NULL, count, dt, RECV_TAG, RECV_MASK); + sreq = do_send(NULL, count, dt, sync); + } else { + sreq = do_send(NULL, count, dt, sync); + short_progress_loop(); + if (sync) { + EXPECT_FALSE(sreq->completed); + } + rreq = recv_nb(NULL, count, dt, RECV_TAG, RECV_MASK); + } + + /* progress both sender and receiver */ + wait(rreq); + if (sreq != NULL) { + wait(sreq); + request_release(sreq); + } + + /* the generic unpack function is expected to fail */ + EXPECT_EQ(UCS_ERR_NO_MEMORY, rreq->status); + request_release(rreq); + EXPECT_EQ(2, ucp::dt_gen_start_count); + EXPECT_EQ(2, ucp::dt_gen_finish_count); + ucp_dt_destroy(dt); +} + +test_ucp_tag_xfer::request* +test_ucp_tag_xfer::do_send(const void *sendbuf, size_t count, ucp_datatype_t dt, + bool sync) +{ + if (sync) { + return send_sync_nb(sendbuf, count, dt, SENDER_TAG); + } else { + if (GetParam().variant == VARIANT_SEND_NBR) { + return send_nbr(sendbuf, count, dt, SENDER_TAG); + } + return send_nb(sendbuf, count, dt, SENDER_TAG); + } +} + +size_t test_ucp_tag_xfer::do_xfer(const void *sendbuf, void *recvbuf, + size_t count, ucp_datatype_t send_dt, + ucp_datatype_t recv_dt, bool expected, + bool sync, bool truncated) +{ + request *rreq, *sreq; + size_t recvd = 0; + size_t recv_count = count; + + if (truncated) { + recv_count /= 2; + } + + if (expected) { + rreq = recv_nb(recvbuf, recv_count, recv_dt, RECV_TAG, RECV_MASK); + sreq = do_send(sendbuf, count, send_dt, sync); + } else { + sreq = do_send(sendbuf, count, send_dt, sync); + + wait_for_unexpected_msg(receiver().worker(), 10.0); + + if (sync) { + EXPECT_FALSE(sreq->completed); + } + rreq = recv_nb(recvbuf, recv_count, recv_dt, RECV_TAG, RECV_MASK); + } + + /* progress both sender and receiver */ + wait(rreq); + if (sreq != NULL) { + wait(sreq); + request_release(sreq); + } + + recvd = rreq->info.length; + if (!truncated) { + EXPECT_UCS_OK(rreq->status); + EXPECT_EQ((ucp_tag_t)SENDER_TAG, rreq->info.sender_tag); + } else { + EXPECT_EQ(UCS_ERR_MESSAGE_TRUNCATED, rreq->status); + } + + request_release(rreq); + return recvd; +} + +void test_ucp_tag_xfer::test_xfer_len_offset() +{ + const size_t max_offset = 128; + const size_t max_length = 64 * UCS_KBYTE; + const size_t min_length = UCS_KBYTE; + const size_t offset_step = 16; + const size_t length_step = 16; + const size_t buf_size = max_length + max_offset + 2; + ucp_datatype_t type = ucp_dt_make_contig(1); + void *send_buf = 0; + void *recv_buf = 0;; + size_t offset; + size_t length; + ucs::detail::message_stream *ms; + + skip_err_handling(); + + EXPECT_EQ(posix_memalign(&send_buf, 8192, buf_size), 0); + EXPECT_EQ(posix_memalign(&recv_buf, 8192, buf_size), 0); + + memset(send_buf, 0, buf_size); + memset(recv_buf, 0, buf_size); + + for (offset = 0; offset <= max_offset; offset += offset_step) { + if (!offset || ucs_is_pow2(offset)) { + ms = new ucs::detail::message_stream("INFO"); + *ms << "offset: " << offset << ": "; + } else { + ms = NULL; + } + for (length = min_length; length <= max_length; length += length_step) { + if (ms && ucs_is_pow2(length)) { + *ms << length << " "; + fflush(stdout); + } + + do_xfer((char*)send_buf + offset, (char*)recv_buf + offset, + length, type, type, true, true, false); + do_xfer((char*)send_buf + max_offset - offset, + (char*)recv_buf + max_offset - offset, + length, type, type, true, true, false); + } + if (ms) { + delete(ms); + } + } + + free(recv_buf); + free(send_buf); +} + +UCS_TEST_P(test_ucp_tag_xfer, contig_exp) { + test_xfer(&test_ucp_tag_xfer::test_xfer_contig, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, contig_exp_truncated) { + check_offload_support(false); + test_xfer(&test_ucp_tag_xfer::test_xfer_contig, true, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, contig_unexp) { + test_xfer(&test_ucp_tag_xfer::test_xfer_contig, false, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, generic_exp) { + test_xfer(&test_ucp_tag_xfer::test_xfer_generic, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, generic_exp_truncated) { + test_xfer(&test_ucp_tag_xfer::test_xfer_generic, true, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, generic_unexp) { + test_xfer(&test_ucp_tag_xfer::test_xfer_generic, false, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, generic_unexp_truncated) { + test_xfer(&test_ucp_tag_xfer::test_xfer_generic, false, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, iov_exp) { + test_xfer(&test_ucp_tag_xfer::test_xfer_iov, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, iov_exp_truncated) { + test_xfer(&test_ucp_tag_xfer::test_xfer_iov, true, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, iov_unexp) { + test_xfer(&test_ucp_tag_xfer::test_xfer_iov, false, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, generic_err_exp) { + test_xfer(&test_ucp_tag_xfer::test_xfer_generic_err, true, false, false); +} + +UCS_TEST_SKIP_COND_P(test_ucp_tag_xfer, generic_err_unexp, + skip_on_ib_dc()) { + test_xfer(&test_ucp_tag_xfer::test_xfer_generic_err, false, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, generic_err_exp_sync) { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_xfer(&test_ucp_tag_xfer::test_xfer_generic_err, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, generic_err_unexp_sync) { + test_xfer(&test_ucp_tag_xfer::test_xfer_generic_err, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, contig_exp_sync) { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_xfer(&test_ucp_tag_xfer::test_xfer_contig, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, contig_exp_sync_zcopy, "ZCOPY_THRESH=1000") { + skip_loopback(); + test_xfer(&test_ucp_tag_xfer::test_xfer_contig, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, contig_unexp_sync) { + test_xfer(&test_ucp_tag_xfer::test_xfer_contig, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, generic_exp_sync) { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_xfer(&test_ucp_tag_xfer::test_xfer_generic, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, generic_unexp_sync) { + test_xfer(&test_ucp_tag_xfer::test_xfer_generic, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, iov_exp_sync) { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_xfer(&test_ucp_tag_xfer::test_xfer_iov, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, iov_unexp_sync) { + test_xfer(&test_ucp_tag_xfer::test_xfer_iov, false, true, false); +} + +/* send_contig_recv_contig */ + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_exp, "RNDV_THRESH=1248576") { + test_run_xfer(true, true, true, false, false); +} + +/* send_generic_recv_generic */ + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_exp, "RNDV_THRESH=1248576") { + test_run_xfer(false, false, true, false, false); +} + +/* send_contig_recv_generic */ + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp, "RNDV_THRESH=1248576") { + test_run_xfer(true, false, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp_sync, "RNDV_THRESH=1248576") { + test_run_xfer(true, false, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_sync, "RNDV_THRESH=1248576") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(true, false, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp, "RNDV_THRESH=1248576") { + test_run_xfer(true, false, false, false, false); +} + +/* send_generic_recv_contig */ + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_exp, "RNDV_THRESH=1248576") { + test_run_xfer(false, true, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_unexp_sync, "RNDV_THRESH=1248576") { + test_run_xfer(false, true, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_exp_sync, "RNDV_THRESH=1248576") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(false, true, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_unexp, "RNDV_THRESH=1248576", + "ZCOPY_THRESH=1248576") { + test_run_xfer(false, true, false, false, false); +} + +/* rndv send_config_recv_config am_rndv with bcopy on the sender side + * (zcopy is tested in the match tests) */ + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_exp_rndv, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1248576") { + test_run_xfer(true, true, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_exp_rndv_truncated, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1248576") { + check_offload_support(false); + test_run_xfer(true, true, true, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_exp_sync_rndv, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1248576") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(true, true, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_exp_sync_rndv_truncated, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1248576") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(true, true, true, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_unexp_rndv, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1248576") { + test_run_xfer(true, true, false, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_unexp_rndv_truncated, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1248576") { + test_run_xfer(true, true, false, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_unexp_sync_rndv, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1248576") { + test_run_xfer(true, true, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_unexp_sync_rndv_truncated, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1248576") { + test_run_xfer(true, true, false, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_contig_exp_rndv_probe, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1248576") { + test_xfer_probe(true, true, true, false); +} + +/* rndv send_generic_recv_generic am_rndv with bcopy on the sender side */ + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_exp_rndv, "RNDV_THRESH=1000") { + test_run_xfer(false, false, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_exp_rndv_truncated, "RNDV_THRESH=1000") { + test_run_xfer(false, false, true, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_exp_sync_rndv, "RNDV_THRESH=1000") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(false, false, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_exp_sync_rndv_truncated, + "RNDV_THRESH=1000") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(false, false, true, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_unexp_rndv, "RNDV_THRESH=1000") { + test_run_xfer(false, false, false, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_unexp_rndv_truncated, "RNDV_THRESH=1000") { + test_run_xfer(false, false, false, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_unexp_sync_rndv, "RNDV_THRESH=1000") { + test_run_xfer(false, false, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_unexp_sync_rndv_truncated, + "RNDV_THRESH=1000") { + test_run_xfer(false, false, false, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_generic_exp_rndv_probe, "RNDV_THRESH=1000") { + test_xfer_probe(false, false, true, false); +} + +/* rndv send_generic_recv_contig am_rndv with bcopy on the sender side */ + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_exp_rndv, "RNDV_THRESH=1000") { + test_run_xfer(false, true, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_exp_rndv_truncated, "RNDV_THRESH=1000") { + test_run_xfer(false, true, true, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_exp_sync_rndv, "RNDV_THRESH=1000") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(false, true, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_exp_sync_rndv_truncated, + "RNDV_THRESH=1000") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(false, true, true, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_unexp_rndv, "RNDV_THRESH=1000") { + test_run_xfer(false, true, false, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_unexp_rndv_truncated, "RNDV_THRESH=1000") { + test_run_xfer(false, true, false, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_unexp_sync_rndv, "RNDV_THRESH=1000") { + test_run_xfer(false, true, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_unexp_sync_rndv_truncated, + "RNDV_THRESH=1000") { + test_run_xfer(false, true, false, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_generic_recv_contig_exp_rndv_probe, "RNDV_THRESH=1000") { + test_xfer_probe(false, true, true, false); +} + +/* rndv send_contig_recv_generic am_rndv with bcopy on the sender side */ + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_rndv, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1248576") { + test_run_xfer(true, false, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_rndv_truncated, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1248576") { + test_run_xfer(true, false, true, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_sync_rndv, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1248576") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(true, false, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_sync_rndv_truncated, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1248576") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(true, false, true, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp_rndv, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1248576") { + test_run_xfer(true, false, false, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp_rndv_truncated, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1248576") { + test_run_xfer(true, false, false, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp_sync_rndv, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1248576") { + test_run_xfer(true, false, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp_sync_rndv_truncated, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1248576") { + test_run_xfer(true, false, false, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_rndv_probe, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1248576") { + test_xfer_probe(true, false, true, false); +} + +/* rndv send_contig_recv_generic am_rndv with zcopy on the sender side */ + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_rndv_zcopy, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1000") { + test_run_xfer(true, false, true, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_rndv_truncated_zcopy, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1000") { + test_run_xfer(true, false, true, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_sync_rndv_zcopy, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1000") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(true, false, true, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_sync_rndv_truncated_zcopy, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1000") { + /* because ucp_tag_send_req return status (instead request) if send operation + * completed immediately */ + skip_loopback(); + test_run_xfer(true, false, true, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp_rndv_zcopy, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1000") { + test_run_xfer(true, false, false, false, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp_rndv_truncated_zcopy, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1000") { + test_run_xfer(true, false, false, false, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp_sync_rndv_zcopy, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1000") { + test_run_xfer(true, false, false, true, false); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_unexp_sync_rndv_truncated_zcopy, + "RNDV_THRESH=1000", "ZCOPY_THRESH=1000") { + test_run_xfer(true, false, false, true, true); +} + +UCS_TEST_P(test_ucp_tag_xfer, send_contig_recv_generic_exp_rndv_probe_zcopy, "RNDV_THRESH=1000", + "ZCOPY_THRESH=1000") { + test_xfer_probe(true, false, true, false); +} + +UCS_TEST_SKIP_COND_P(test_ucp_tag_xfer, test_xfer_len_offset, + RUNNING_ON_VALGRIND, "RNDV_THRESH=1000") { + test_xfer_len_offset(); +} + +UCS_TEST_P(test_ucp_tag_xfer, iov_with_empty_buffers, "ZCOPY_THRESH=512") { + const size_t iovcnt = ucp::data_type_desc_t::MAX_IOV; + const size_t size = UCS_KBYTE; + const int expected = 1; + const int sync = 0; + const int truncated = 0; + + std::vector sendbuf(size, 0); + std::vector recvbuf(size, 0); + ucp_dt_iov_t iovec[iovcnt]; + + ucs::fill_random(sendbuf); + + /* initialize iovec with MAX_IOV-1 empty buffers and one non-empty */ + for (size_t i = 0; i < iovcnt - 1; ++i) { + iovec[i].buffer = NULL; + iovec[i].length = 0; + } + + /* coverity[escape] */ + iovec[iovcnt - 1].buffer = &sendbuf[0]; + iovec[iovcnt - 1].length = size; + + ucp::data_type_desc_t recv_dt_desc(DATATYPE_IOV, recvbuf.data(), + recvbuf.size(), iovcnt); + + size_t recvd = do_xfer(iovec, recv_dt_desc.buf(), iovcnt, + DATATYPE_IOV, DATATYPE_IOV, expected, 0, + truncated); + + ASSERT_EQ(sendbuf.size(), recvd); + EXPECT_TRUE(!check_buffers(sendbuf, recvbuf, recvd, iovcnt, + recv_dt_desc.count(), size, expected, sync, + "IOV")); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_tag_xfer) + + +#if ENABLE_STATS + +class test_ucp_tag_stats : public test_ucp_tag_xfer { +public: + void init() { + stats_activate(); + test_ucp_tag_xfer::init(); + } + + void cleanup() { + test_ucp_tag_xfer::cleanup(); + stats_restore(); + } + + std::vector + static enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) { + + return ucp_test::enum_test_params(ctx_params, name, + test_case_name, tls); + } + + ucs_stats_node_t* ep_stats(entity &e) { + return e.ep()->stats; + } + + ucs_stats_node_t* worker_stats(entity &e) { + return e.worker()->stats; + } + + void validate_counters(uint64_t tx_cntr, uint64_t rx_cntr) { + uint64_t cnt; + cnt = UCS_STATS_GET_COUNTER(ep_stats(sender()), tx_cntr); + EXPECT_EQ(1ul, cnt); + cnt = UCS_STATS_GET_COUNTER(worker_stats(receiver()), rx_cntr); + EXPECT_EQ(1ul, cnt); + } + +}; + + +UCS_TEST_P(test_ucp_tag_stats, eager_expected, "RNDV_THRESH=1248576") { + check_offload_support(false); + test_run_xfer(true, true, true, false, false); + validate_counters(UCP_EP_STAT_TAG_TX_EAGER, + UCP_WORKER_STAT_TAG_RX_EAGER_MSG); + + uint64_t cnt; + cnt = UCS_STATS_GET_COUNTER(worker_stats(receiver()), + UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP); + EXPECT_EQ(cnt, 0ul); +} + +UCS_TEST_P(test_ucp_tag_stats, eager_unexpected, "RNDV_THRESH=1248576") { + check_offload_support(false); + test_run_xfer(true, true, false, false, false); + validate_counters(UCP_EP_STAT_TAG_TX_EAGER, + UCP_WORKER_STAT_TAG_RX_EAGER_MSG); + uint64_t cnt; + cnt = UCS_STATS_GET_COUNTER(worker_stats(receiver()), + UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP); + EXPECT_GT(cnt, 0ul); +} + +UCS_TEST_P(test_ucp_tag_stats, sync_expected, "RNDV_THRESH=1248576") { + check_offload_support(false); + skip_loopback(); + test_run_xfer(true, true, true, true, false); + validate_counters(UCP_EP_STAT_TAG_TX_EAGER_SYNC, + UCP_WORKER_STAT_TAG_RX_EAGER_SYNC_MSG); + + uint64_t cnt; + cnt = UCS_STATS_GET_COUNTER(worker_stats(receiver()), + UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP); + EXPECT_EQ(cnt, 0ul); +} + +UCS_TEST_P(test_ucp_tag_stats, sync_unexpected, "RNDV_THRESH=1248576") { + check_offload_support(false); + skip_loopback(); + test_run_xfer(true, true, false, true, false); + validate_counters(UCP_EP_STAT_TAG_TX_EAGER_SYNC, + UCP_WORKER_STAT_TAG_RX_EAGER_SYNC_MSG); + uint64_t cnt; + cnt = UCS_STATS_GET_COUNTER(worker_stats(receiver()), + UCP_WORKER_STAT_TAG_RX_EAGER_CHUNK_UNEXP); + EXPECT_GT(cnt, 0ul); +} + +UCS_TEST_P(test_ucp_tag_stats, rndv_expected, "RNDV_THRESH=1000") { + check_offload_support(false); + test_run_xfer(true, true, true, false, false); + validate_counters(UCP_EP_STAT_TAG_TX_RNDV, + UCP_WORKER_STAT_TAG_RX_RNDV_EXP); +} + +UCS_TEST_P(test_ucp_tag_stats, rndv_unexpected, "RNDV_THRESH=1000") { + check_offload_support(false); + test_run_xfer(true, true, false, false, false); + validate_counters(UCP_EP_STAT_TAG_TX_RNDV, + UCP_WORKER_STAT_TAG_RX_RNDV_UNEXP); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_tag_stats) + +#endif diff --git a/test/gtest/ucp/test_ucp_wakeup.cc b/test/gtest/ucp/test_ucp_wakeup.cc new file mode 100644 index 0000000..3c279d2 --- /dev/null +++ b/test/gtest/ucp/test_ucp_wakeup.cc @@ -0,0 +1,269 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ucp_test.h" + +#include +#include +#include + + +class test_ucp_wakeup : public ucp_test { +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.features |= UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP; + return params; + } + +protected: + static void send_completion(void *request, ucs_status_t status) { + ++comp_cntr; + } + + static void recv_completion(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) { + ++comp_cntr; + } + + void wait(void *req) { + do { + progress(); + } while (!ucp_request_is_completed(req)); + ucp_request_release(req); + } + + void arm(ucp_worker_h worker) { + ucs_status_t status; + do { + status = ucp_worker_arm(worker); + } while (UCS_ERR_BUSY == status); + ASSERT_EQ(UCS_OK, status); + } + + static size_t comp_cntr; +}; + +size_t test_ucp_wakeup::comp_cntr = 0; + +UCS_TEST_P(test_ucp_wakeup, efd) +{ + const ucp_datatype_t DATATYPE = ucp_dt_make_contig(1); + const uint64_t TAG = 0xdeadbeef; + ucp_worker_h recv_worker; + int recv_efd; + void *req; + + sender().connect(&receiver(), get_ep_params()); + + recv_worker = receiver().worker(); + ASSERT_UCS_OK(ucp_worker_get_efd(recv_worker, &recv_efd)); + + uint64_t send_data = 0x12121212; + req = ucp_tag_send_nb(sender().ep(), &send_data, sizeof(send_data), DATATYPE, + TAG, send_completion); + if (UCS_PTR_IS_PTR(req)) { + wait(req); + } else { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } + + uint64_t recv_data = 0; + req = ucp_tag_recv_nb(receiver().worker(), &recv_data, sizeof(recv_data), + DATATYPE, TAG, (ucp_tag_t)-1, recv_completion); + while (!ucp_request_is_completed(req)) { + + if (ucp_worker_progress(recv_worker)) { + /* Got some receive events, check request */ + continue; + } + + ucs_status_t status = ucp_worker_arm(recv_worker); + if (status == UCS_ERR_BUSY) { + /* Could not arm, poll again */ + ucp_worker_progress(recv_worker); + continue; + } + ASSERT_UCS_OK(status); + + int ret; + do { + struct pollfd pollfd; + pollfd.events = POLLIN; + pollfd.fd = recv_efd; + ret = poll(&pollfd, 1, -1); + } while ((ret < 0) && (errno == EINTR)); + if (ret < 0) { + UCS_TEST_MESSAGE << "poll() failed: " << strerror(errno); + } + ASSERT_EQ(1, ret); + EXPECT_EQ(UCS_ERR_BUSY, ucp_worker_arm(recv_worker)); + } + + ucp_request_release(req); + + flush_worker(sender()); + EXPECT_EQ(send_data, recv_data); +} + +/* This test doesn't progress receiver's worker, while + * waiting for the events on a sender's worker fd. So, + * this causes the hang due to lack of the progress during + * TCP CM message exchange (TCP doesn't have an async progress + * for such events) + * TODO: add async progress for TCP connections */ +UCS_TEST_SKIP_COND_P(test_ucp_wakeup, tx_wait, + has_transport("tcp"), "ZCOPY_THRESH=10000") +{ + const ucp_datatype_t DATATYPE = ucp_dt_make_contig(1); + const size_t COUNT = 20000; + const uint64_t TAG = 0xdeadbeef; + std::string send_data(COUNT, '2'), recv_data(COUNT, '1'); + void *sreq, *rreq; + + sender().connect(&receiver(), get_ep_params()); + + rreq = ucp_tag_recv_nb(receiver().worker(), &recv_data[0], COUNT, DATATYPE, + TAG, (ucp_tag_t)-1, recv_completion); + + sreq = ucp_tag_send_nb(sender().ep(), &send_data[0], COUNT, DATATYPE, TAG, + send_completion); + + if (UCS_PTR_IS_PTR(sreq)) { + /* wait for send completion */ + do { + ucp_worker_wait(sender().worker()); + while (progress()); + } while (!ucp_request_is_completed(sreq)); + ucp_request_release(sreq); + } else { + ASSERT_UCS_OK(UCS_PTR_STATUS(sreq)); + } + + wait(rreq); + + EXPECT_EQ(send_data, recv_data); +} + +UCS_TEST_P(test_ucp_wakeup, signal) +{ + int efd; + ucp_worker_h worker; + struct pollfd polled; + + polled.events = POLLIN; + + worker = sender().worker(); + ASSERT_UCS_OK(ucp_worker_get_efd(worker, &efd)); + + polled.fd = efd; + EXPECT_EQ(0, poll(&polled, 1, 0)); + arm(worker); + ASSERT_UCS_OK(ucp_worker_signal(worker)); + EXPECT_EQ(1, poll(&polled, 1, 0)); + arm(worker); + EXPECT_EQ(0, poll(&polled, 1, 0)); + + ASSERT_UCS_OK(ucp_worker_signal(worker)); + ASSERT_UCS_OK(ucp_worker_signal(worker)); + EXPECT_EQ(1, poll(&polled, 1, 0)); + arm(worker); + EXPECT_EQ(0, poll(&polled, 1, 0)); + + ASSERT_UCS_OK(ucp_worker_signal(worker)); + EXPECT_EQ(UCS_ERR_BUSY, ucp_worker_arm(worker)); + EXPECT_EQ(UCS_OK, ucp_worker_arm(worker)); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_wakeup) + +class test_ucp_wakeup_external_epollfd : public test_ucp_wakeup { +public: + virtual ucp_worker_params_t get_worker_params() { + ucp_worker_params_t params = test_ucp_wakeup::get_worker_params(); + params.field_mask |= UCP_WORKER_PARAM_FIELD_EVENT_FD | + UCP_WORKER_PARAM_FIELD_USER_DATA; + params.event_fd = m_epfd; + params.user_data = USER_DATA; + return params; + } + +protected: + static void* const USER_DATA; + + virtual void init() { + m_epfd = epoll_create(1); + ASSERT_GE(m_epfd, 0); + test_ucp_wakeup::init(); + } + + virtual void cleanup() { + test_ucp_wakeup::cleanup(); + close(m_epfd); + } + + int m_epfd; +}; + +void* const test_ucp_wakeup_external_epollfd::USER_DATA = (void*)0x1337abcdef; + + +UCS_TEST_P(test_ucp_wakeup_external_epollfd, epoll_wait) +{ + const ucp_datatype_t DATATYPE = ucp_dt_make_contig(1); + const uint64_t TAG = 0xdeadbeef; + void *req; + + sender().connect(&receiver(), get_ep_params()); + + uint64_t send_data = 0x12121212; + req = ucp_tag_send_nb(sender().ep(), &send_data, sizeof(send_data), DATATYPE, + TAG, send_completion); + if (UCS_PTR_IS_PTR(req)) { + wait(req); + } else { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } + + uint64_t recv_data = 0; + req = ucp_tag_recv_nb(receiver().worker(), &recv_data, sizeof(recv_data), + DATATYPE, TAG, (ucp_tag_t)-1, recv_completion); + while (!ucp_request_is_completed(req)) { + + ucp_worker_h recv_worker = receiver().worker(); + + if (ucp_worker_progress(recv_worker)) { + /* Got some receive events, check request */ + continue; + } + + ucs_status_t status = ucp_worker_arm(recv_worker); + if (status == UCS_ERR_BUSY) { + /* Could not arm, poll again */ + ucp_worker_progress(recv_worker); + continue; + } + ASSERT_UCS_OK(status); + + struct epoll_event event; + int ret; + do { + ret = epoll_wait(m_epfd, &event, 1, -1); + } while ((ret < 0) && (errno == EINTR)); + if (ret < 0) { + UCS_TEST_MESSAGE << "epoll_wait() failed: " << strerror(errno); + } + ASSERT_EQ(1, ret); + EXPECT_EQ(USER_DATA, event.data.ptr); + } + + ucp_request_release(req); + + flush_worker(sender()); + EXPECT_EQ(send_data, recv_data); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_wakeup_external_epollfd) diff --git a/test/gtest/ucp/test_ucp_wireup.cc b/test/gtest/ucp/test_ucp_wireup.cc new file mode 100644 index 0000000..012c445 --- /dev/null +++ b/test/gtest/ucp/test_ucp_wireup.cc @@ -0,0 +1,1380 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ucp_test.h" +#include "common/test.h" +#include "ucp/ucp_test.h" + +#include +#include + +extern "C" { +#include +#include +} + +class test_ucp_wireup : public ucp_test { +public: + static std::vector + enum_test_params_features(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls, + uint64_t features, bool test_all = 0); + +protected: + enum { + TEST_RMA = UCS_BIT(0), + TEST_TAG = UCS_BIT(1), + TEST_STREAM = UCS_BIT(2), + UNIFIED_MODE = UCS_BIT(3), + TEST_AMO = UCS_BIT(4) + }; + + typedef uint64_t elem_type; + typedef std::vector vec_type; + + static const size_t BUFFER_LENGTH = 16384; + static const ucp_datatype_t DT_U64 = ucp_dt_make_contig(sizeof(elem_type)); + static const uint64_t TAG = 0xdeadbeef; + static const elem_type SEND_DATA = 0xdeadbeef12121212ull; + + virtual void init(); + virtual void cleanup(); + + void send_nb(ucp_ep_h ep, size_t length, int repeat, std::vector& reqs, + uint64_t send_data = SEND_DATA); + + void send_b(ucp_ep_h ep, size_t length, int repeat, + uint64_t send_data = SEND_DATA); + + void recv_b(ucp_worker_h worker, ucp_ep_h ep, size_t length, int repeat, + uint64_t recv_data = SEND_DATA); + + void send_recv(ucp_ep_h send_ep, ucp_worker_h recv_worker, ucp_ep_h recv_ep, + size_t vecsize, int repeat); + + void waitall(std::vector reqs); + + void disconnect(ucp_ep_h ep); + + void disconnect(ucp_test::entity &e); + + static void send_completion(void *request, ucs_status_t status); + + static void tag_recv_completion(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info); + + void rkeys_cleanup(); + + void memhs_cleanup(); + + void clear_recv_data(); + + void fill_send_data(); + + ucp_rkey_h get_rkey(ucp_ep_h ep, ucp_mem_h memh); + +protected: + vec_type m_send_data; + vec_type m_recv_data; + ucs::handle m_memh_sender; + ucs::handle m_memh_receiver; + std::vector< ucs::handle > m_rkeys; + +private: + static void stream_recv_completion(void *request, ucs_status_t status, + size_t length); + + static void unmap_memh(ucp_mem_h memh, ucp_context_h context); +}; + +std::vector +test_ucp_wireup::enum_test_params_features(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls, + uint64_t features, bool test_all) +{ + std::vector result; + ucp_params_t tmp_ctx_params = ctx_params; + + if (features & UCP_FEATURE_RMA) { + tmp_ctx_params.features = UCP_FEATURE_RMA; + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/rma", + tls, TEST_RMA, result); + + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/rma", + tls, TEST_RMA | UNIFIED_MODE, result); + } + + if (features & UCP_FEATURE_TAG) { + tmp_ctx_params.features = UCP_FEATURE_TAG; + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/tag", + tls, TEST_TAG, result); + + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/tag", + tls, TEST_TAG | UNIFIED_MODE, result); + } + + if (features & UCP_FEATURE_STREAM) { + tmp_ctx_params.features = UCP_FEATURE_STREAM; + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/stream", + tls, TEST_STREAM, result); + + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/stream", + tls, TEST_STREAM | UNIFIED_MODE, result); + } + + if (features & (UCP_FEATURE_AMO32 | UCP_FEATURE_AMO64)) { + tmp_ctx_params.features = (UCP_FEATURE_AMO32 | UCP_FEATURE_AMO64); + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/amo", + tls, TEST_AMO, result); + } + + if (test_all) { + uint64_t all_flags = (TEST_TAG | TEST_RMA | TEST_STREAM); + tmp_ctx_params.features = features; + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/all", + tls, all_flags, result); + + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/all", + tls, all_flags | UNIFIED_MODE, result); + } + + return result; +} + +void test_ucp_wireup::unmap_memh(ucp_mem_h memh, ucp_context_h context) +{ + ucs_status_t status = ucp_mem_unmap(context, memh); + if (status != UCS_OK) { + ucs_warn("failed to unmap memory: %s", ucs_status_string(status)); + } +} + +void test_ucp_wireup::init() +{ + if (GetParam().variant & UNIFIED_MODE) { + modify_config("UNIFIED_MODE", "y"); + } + + ucp_test::init(); + + m_send_data.resize(BUFFER_LENGTH, 0); + m_recv_data.resize(BUFFER_LENGTH, 0); + + if (GetParam().variant & (TEST_RMA | TEST_AMO)) { + ucs_status_t status; + ucp_mem_map_params_t params; + ucp_mem_h memh; + + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.address = &m_recv_data[0]; + params.length = m_recv_data.size() * sizeof(m_recv_data[0]); + params.flags = 0; + + status = ucp_mem_map(sender().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(status); + m_memh_sender.reset(memh, unmap_memh, sender().ucph()); + + status = ucp_mem_map(receiver().ucph(), ¶ms, &memh); + ASSERT_UCS_OK(status); + m_memh_receiver.reset(memh, unmap_memh, receiver().ucph()); + } +} + +ucp_rkey_h test_ucp_wireup::get_rkey(ucp_ep_h ep, ucp_mem_h memh) +{ + void *rkey_buffer; + size_t rkey_size; + ucs_status_t status; + ucp_rkey_h rkey; + + if (memh == m_memh_receiver) { + status = ucp_rkey_pack(receiver().ucph(), memh, &rkey_buffer, &rkey_size); + } else if (memh == m_memh_sender) { + status = ucp_rkey_pack(sender().ucph(), memh, &rkey_buffer, &rkey_size); + } else { + status = UCS_ERR_INVALID_PARAM; + } + ASSERT_UCS_OK(status); + + status = ucp_ep_rkey_unpack(ep, rkey_buffer, &rkey); + ASSERT_UCS_OK(status); + + ucp_rkey_buffer_release(rkey_buffer); + + return rkey; +} + +void test_ucp_wireup::rkeys_cleanup() { + m_rkeys.clear(); +} + +void test_ucp_wireup::memhs_cleanup() { + m_memh_sender.reset(); + m_memh_receiver.reset(); +} + +void test_ucp_wireup::cleanup() { + rkeys_cleanup(); + memhs_cleanup(); + ucp_test::cleanup(); +} + +void test_ucp_wireup::clear_recv_data() { + std::fill(m_recv_data.begin(), m_recv_data.end(), 0); +} + +void test_ucp_wireup::send_nb(ucp_ep_h ep, size_t length, int repeat, + std::vector& reqs, uint64_t send_data) +{ + if (GetParam().variant & TEST_TAG) { + std::fill(m_send_data.begin(), m_send_data.end(), send_data); + for (int i = 0; i < repeat; ++i) { + void *req = ucp_tag_send_nb(ep, &m_send_data[0], length, + DT_U64, TAG, send_completion); + if (UCS_PTR_IS_PTR(req)) { + reqs.push_back(req); + } else { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } + } + } else if (GetParam().variant & TEST_STREAM) { + std::fill(m_send_data.begin(), m_send_data.end(), send_data); + for (int i = 0; i < repeat; ++i) { + void *req = ucp_stream_send_nb(ep, &m_send_data[0], length, DT_U64, + send_completion, 0); + if (UCS_PTR_IS_PTR(req)) { + reqs.push_back(req); + } else { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } + } + } else if (GetParam().variant & TEST_RMA) { + clear_recv_data(); + + ucp_mem_h memh = (sender().ucph() == ep->worker->context) ? + m_memh_receiver : m_memh_sender; + ucp_rkey_h rkey = get_rkey(ep, memh); + + m_rkeys.push_back(ucs::handle(rkey, ucp_rkey_destroy)); + + for (int i = 0; i < repeat; ++i) { + std::fill(m_send_data.begin(), m_send_data.end(), send_data + i); + void *req = ucp_put_nb(ep, &m_send_data[0], + m_send_data.size() * sizeof(m_send_data[0]), + (uintptr_t)&m_recv_data[0], rkey, + send_completion); + if (UCS_PTR_IS_PTR(req)) { + reqs.push_back(req); + } else { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } + } + } +} + +void test_ucp_wireup::send_b(ucp_ep_h ep, size_t length, int repeat, + uint64_t send_data) +{ + std::vector reqs; + send_nb(ep, length, repeat, reqs, send_data); + waitall(reqs); +} + +void test_ucp_wireup::recv_b(ucp_worker_h worker, ucp_ep_h ep, size_t length, + int repeat, uint64_t recv_data) +{ + if (GetParam().variant & (TEST_TAG | TEST_STREAM)) { + for (int i = 0; i < repeat; ++i) { + size_t recv_length; + void *req; + + clear_recv_data(); + if (GetParam().variant & TEST_TAG) { + req = ucp_tag_recv_nb(worker, &m_recv_data[0], length, DT_U64, + TAG, (ucp_tag_t)-1, tag_recv_completion); + } else if (GetParam().variant & TEST_STREAM) { + req = ucp_stream_recv_nb(ep, &m_recv_data[0], length, DT_U64, + stream_recv_completion, &recv_length, + UCP_STREAM_RECV_FLAG_WAITALL); + } else { + req = NULL; + } + if (UCS_PTR_IS_PTR(req)) { + wait(req); + } else { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } + EXPECT_EQ(recv_data, m_recv_data[0]) + << "repeat " << i << "/" << repeat; + EXPECT_EQ(length, + (size_t)std::count(m_recv_data.begin(), + m_recv_data.begin() + length, + recv_data)); + } + } else if (GetParam().variant & TEST_RMA) { + for (size_t i = 0; i < length; ++i) { + while (m_recv_data[i] != recv_data + repeat - 1) { + progress(); + } + } + } +} + +void test_ucp_wireup::send_completion(void *request, ucs_status_t status) +{ +} + +void test_ucp_wireup::tag_recv_completion(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) +{ +} + +void test_ucp_wireup::stream_recv_completion(void *request, ucs_status_t status, + size_t length) +{ +} + +void test_ucp_wireup::send_recv(ucp_ep_h send_ep, ucp_worker_h recv_worker, + ucp_ep_h recv_ep, size_t length, int repeat) +{ + std::vector send_reqs; + static uint64_t next_send_data = 0; + uint64_t send_data = next_send_data++; + + send_nb(send_ep, length, repeat, send_reqs, send_data); + recv_b (recv_worker, recv_ep, length, repeat, send_data); + waitall(send_reqs); + m_rkeys.clear(); +} + +void test_ucp_wireup::disconnect(ucp_ep_h ep) { + void *req = ucp_disconnect_nb(ep); + if (!UCS_PTR_IS_PTR(req)) { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } + wait(req); +} + +void test_ucp_wireup::disconnect(ucp_test::entity &e) { + disconnect(e.revoke_ep()); +} + +void test_ucp_wireup::waitall(std::vector reqs) +{ + while (!reqs.empty()) { + wait(reqs.back()); + reqs.pop_back(); + } +} + +class test_ucp_wireup_1sided : public test_ucp_wireup { +public: + static std::vector + enum_test_params(const ucp_params_t& ctx_params, const std::string& name, + const std::string& test_case_name, const std::string& tls) + { + return enum_test_params_features(ctx_params, name, test_case_name, tls, + UCP_FEATURE_RMA | UCP_FEATURE_TAG); + } +}; + +UCS_TEST_P(test_ucp_wireup_1sided, address) { + ucs_status_t status; + size_t size; + void *buffer; + ucp_lane_index_t lanes2remote[UCP_MAX_LANES]; + std::set packed_dev_priorities, unpacked_dev_priorities; + ucp_rsc_index_t tl; + + status = ucp_address_pack(sender().worker(), NULL, + std::numeric_limits::max(), + UCP_ADDRESS_PACK_FLAG_ALL, lanes2remote, &size, + &buffer); + ASSERT_UCS_OK(status); + ASSERT_TRUE(buffer != NULL); + ASSERT_GT(size, 0ul); + EXPECT_LE(size, 2048ul); /* Expect a reasonable address size */ + + ucs_for_each_bit(tl, sender().worker()->context->tl_bitmap) { + if (sender().worker()->context->tl_rscs[tl].flags & UCP_TL_RSC_FLAG_SOCKADDR) { + continue; + } + packed_dev_priorities.insert(ucp_worker_iface_get_attr(sender().worker(), tl)->priority); + } + + ucp_unpacked_address unpacked_address; + + status = ucp_address_unpack(sender().worker(), buffer, + std::numeric_limits::max(), + &unpacked_address); + ASSERT_UCS_OK(status); + + EXPECT_EQ(sender().worker()->uuid, unpacked_address.uuid); +#if ENABLE_DEBUG_DATA + EXPECT_EQ(std::string(ucp_worker_get_name(sender().worker())), + std::string(unpacked_address.name)); +#endif + EXPECT_LE(unpacked_address.address_count, + static_cast(sender().ucph()->num_tls)); + + const ucp_address_entry_t *ae; + ucp_unpacked_address_for_each(ae, &unpacked_address) { + unpacked_dev_priorities.insert(ae->iface_attr.priority); + } + + /* TODO test addresses */ + + ucs_free(unpacked_address.address_list); + ucs_free(buffer); + /* Make sure that the packed device priorities are equal to the unpacked + * device priorities */ + ASSERT_TRUE(packed_dev_priorities == unpacked_dev_priorities); +} + +UCS_TEST_P(test_ucp_wireup_1sided, empty_address) { + ucs_status_t status; + size_t size; + void *buffer; + ucp_lane_index_t lanes2remote[UCP_MAX_LANES]; + + status = ucp_address_pack(sender().worker(), NULL, 0, + UCP_ADDRESS_PACK_FLAG_ALL, lanes2remote, &size, + &buffer); + ASSERT_UCS_OK(status); + ASSERT_TRUE(buffer != NULL); + ASSERT_GT(size, 0ul); + + ucp_unpacked_address unpacked_address; + + status = ucp_address_unpack(sender().worker(), buffer, + std::numeric_limits::max(), + &unpacked_address); + ASSERT_UCS_OK(status); + + EXPECT_EQ(sender().worker()->uuid, unpacked_address.uuid); +#if ENABLE_DEBUG_DATA + EXPECT_EQ(std::string(ucp_worker_get_name(sender().worker())), + std::string(unpacked_address.name)); +#endif + EXPECT_EQ(0u, unpacked_address.address_count); + + ucs_free(unpacked_address.address_list); + ucs_free(buffer); +} + +UCS_TEST_P(test_ucp_wireup_1sided, one_sided_wireup) { + sender().connect(&receiver(), get_ep_params()); + send_recv(sender().ep(), receiver().worker(), receiver().ep(), 1, 1); + flush_worker(sender()); +} + +UCS_TEST_P(test_ucp_wireup_1sided, one_sided_wireup_rndv, "RNDV_THRESH=1") { + sender().connect(&receiver(), get_ep_params()); + send_recv(sender().ep(), receiver().worker(), receiver().ep(), BUFFER_LENGTH, 1); + if (is_loopback() && (GetParam().variant & TEST_TAG)) { + /* expect the endpoint to be connected to itself */ + ucp_ep_h ep = sender().ep(); + EXPECT_EQ((uintptr_t)ep, ucp_ep_dest_ep_ptr(ep)); + } + flush_worker(sender()); +} + +UCS_TEST_P(test_ucp_wireup_1sided, multi_wireup) { + skip_loopback(); + + const size_t count = 10; + while (entities().size() < count) { + create_entity(); + } + + /* connect from sender() to all the rest */ + for (size_t i = 0; i < count; ++i) { + sender().connect(&entities().at(i), get_ep_params(), i); + } +} + +UCS_TEST_P(test_ucp_wireup_1sided, stress_connect) { + for (int i = 0; i < 30; ++i) { + sender().connect(&receiver(), get_ep_params()); + send_recv(sender().ep(), receiver().worker(), receiver().ep(), 1, + 10000 / (ucs::test_time_multiplier() * + ucs::test_time_multiplier())); + if (!is_loopback()) { + receiver().connect(&sender(), get_ep_params()); + } + + disconnect(sender()); + if (!is_loopback()) { + disconnect(receiver()); + } + } +} + +UCS_TEST_P(test_ucp_wireup_1sided, stress_connect2) { + int max_count = (int)ucs_max(10, + (1000.0 / (ucs::test_time_multiplier() * + ucs::test_time_multiplier()))); + int count = ucs_min(max_count, max_connections() / 2); + + for (int i = 0; i < count; ++i) { + sender().connect(&receiver(), get_ep_params()); + send_recv(sender().ep(), receiver().worker(), receiver().ep(), 1, 1); + if (!is_loopback()) { + receiver().connect(&sender(), get_ep_params()); + } + + disconnect(sender()); + if (!is_loopback()) { + disconnect(receiver()); + } + } +} + +UCS_TEST_P(test_ucp_wireup_1sided, disconnect_nonexistent) { + skip_loopback(); + sender().connect(&receiver(), get_ep_params()); + disconnect(sender()); + receiver().destroy_worker(); + sender().destroy_worker(); +} + +UCS_TEST_P(test_ucp_wireup_1sided, disconnect_reconnect) { + sender().connect(&receiver(), get_ep_params()); + send_b(sender().ep(), 1000, 1); + disconnect(sender()); + recv_b(receiver().worker(), receiver().ep(), 1000, 1); + + sender().connect(&receiver(), get_ep_params()); + send_b(sender().ep(), 1000, 1); + disconnect(sender()); + recv_b(receiver().worker(), receiver().ep(), 1000, 1); +} + +UCS_TEST_P(test_ucp_wireup_1sided, send_disconnect_onesided) { + sender().connect(&receiver(), get_ep_params()); + send_b(sender().ep(), 1000, 100); + disconnect(sender()); + recv_b(receiver().worker(), receiver().ep(), 1000, 100); +} + +UCS_TEST_P(test_ucp_wireup_1sided, send_disconnect_onesided_nozcopy, "ZCOPY_THRESH=-1") { + sender().connect(&receiver(), get_ep_params()); + send_b(sender().ep(), 1000, 100); + disconnect(sender()); + recv_b(receiver().worker(), receiver().ep(), 1000, 100); +} + +UCS_TEST_P(test_ucp_wireup_1sided, send_disconnect_onesided_wait) { + sender().connect(&receiver(), get_ep_params()); + send_recv(sender().ep(), receiver().worker(), receiver().ep(), 8, 1); + send_b(sender().ep(), 1000, 200); + disconnect(sender()); + recv_b(receiver().worker(), receiver().ep(), 1000, 200); +} + +UCS_TEST_P(test_ucp_wireup_1sided, send_disconnect_reply1) { + sender().connect(&receiver(), get_ep_params()); + if (!is_loopback()) { + receiver().connect(&sender(), get_ep_params()); + } + + send_b(sender().ep(), 8, 1); + if (!is_loopback()) { + disconnect(sender()); + } + + recv_b(receiver().worker(), receiver().ep(), 8, 1); + send_b(receiver().ep(), 8, 1); + disconnect(receiver()); + recv_b(sender().worker(), sender().ep(), 8, 1); +} + +UCS_TEST_P(test_ucp_wireup_1sided, send_disconnect_reply2) { + sender().connect(&receiver(), get_ep_params()); + + send_b(sender().ep(), 8, 1); + if (!is_loopback()) { + disconnect(sender()); + } + recv_b(receiver().worker(), receiver().ep(), 8, 1); + + if (!is_loopback()) { + receiver().connect(&sender(), get_ep_params()); + } + + send_b(receiver().ep(), 8, 1); + disconnect(receiver()); + recv_b(sender().worker(), receiver().ep(), 8, 1); +} + +UCS_TEST_P(test_ucp_wireup_1sided, disconnect_nb_onesided) { + sender().connect(&receiver(), get_ep_params()); + + std::vector sreqs; + send_nb(sender().ep(), 1000, 1000, sreqs); + + void *dreq = sender().disconnect_nb(); + if (!UCS_PTR_IS_PTR(dreq)) { + ASSERT_UCS_OK(UCS_PTR_STATUS(dreq)); + } + + wait(dreq); + recv_b(receiver().worker(), receiver().ep(), 1000, 1000); + + waitall(sreqs); +} + +UCS_TEST_P(test_ucp_wireup_1sided, multi_ep_1sided) { + const unsigned count = 10; + + for (unsigned i = 0; i < count; ++i) { + sender().connect(&receiver(), get_ep_params(), i); + } + + for (unsigned i = 0; i < count; ++i) { + send_recv(sender().ep(0, i), receiver().worker(), receiver().ep(), 8, 1); + } +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_wireup_1sided) + +class test_ucp_wireup_2sided : public test_ucp_wireup { +public: + static std::vector + enum_test_params(const ucp_params_t& ctx_params, const std::string& name, + const std::string& test_case_name, const std::string& tls) + { + return enum_test_params_features(ctx_params, name, test_case_name, tls, + UCP_FEATURE_RMA | UCP_FEATURE_TAG | + UCP_FEATURE_STREAM); + } + +protected: + void test_connect_loopback(bool delay_before_connect, bool enable_loopback); +}; + +UCS_TEST_P(test_ucp_wireup_2sided, two_sided_wireup) { + sender().connect(&receiver(), get_ep_params()); + if (!is_loopback()) { + receiver().connect(&sender(), get_ep_params()); + } + + send_recv(sender().ep(), receiver().worker(), receiver().ep(), 1, 1); + flush_worker(sender()); + send_recv(receiver().ep(), sender().worker(), sender().ep(), 1, 1); + flush_worker(receiver()); +} + +void test_ucp_wireup_2sided::test_connect_loopback(bool delay_before_connect, + bool enable_loopback) { + ucp_ep_params_t params = test_ucp_wireup::get_ep_params(); + if (!enable_loopback) { + params.field_mask |= UCP_EP_PARAM_FIELD_FLAGS; + params.flags |= UCP_EP_PARAMS_FLAGS_NO_LOOPBACK; + } + + for (int i = 0; i < 5; ++i) { + int base_index = i * 2; + sender().connect(&sender(), params, base_index); + ucp_ep_h ep1 = sender().ep(0, base_index); + + if (delay_before_connect) { + /* let one side create ep */ + short_progress_loop(0); + } + + sender().connect(&sender(), params, base_index + 1); + ucp_ep_h ep2 = sender().ep(0, base_index + 1); + + EXPECT_NE(ep1, ep2); + + if (GetParam().variant & TEST_STREAM) { + uint64_t data1 = (base_index * 10) + 1; + uint64_t data2 = (base_index * 10) + 2; + + send_b(ep1, 1, 1, data1); + send_b(ep2, 1, 1, data2); + + if (enable_loopback) { + /* self-send - each ep receives what was sent on it */ + recv_b(sender().worker(), ep1, 1, 1, data1); + recv_b(sender().worker(), ep2, 1, 1, data2); + } else { + /* cross-send - each ep receives what was sent on the other ep */ + recv_b(sender().worker(), ep1, 1, 1, data2); + recv_b(sender().worker(), ep2, 1, 1, data1); + } + } + } + flush_worker(sender()); +} + +UCS_TEST_P(test_ucp_wireup_2sided, loopback) { + test_connect_loopback(false, true); +} + +UCS_TEST_P(test_ucp_wireup_2sided, loopback_with_delay) { + test_connect_loopback(true, true); +} + +UCS_TEST_P(test_ucp_wireup_2sided, no_loopback) { + test_connect_loopback(false, false); +} + +UCS_TEST_P(test_ucp_wireup_2sided, no_loopback_with_delay) { + test_connect_loopback(true, false); +} + +UCS_TEST_SKIP_COND_P(test_ucp_wireup_2sided, async_connect, + !(GetParam().ctx_params.features & UCP_FEATURE_TAG)) { + sender().connect(&receiver(), get_ep_params()); + ucp_ep_h send_ep = sender().ep(); + std::vector reqs; + + reqs.push_back(ucp_tag_send_nb(send_ep, NULL, 0, DT_U64, 1, send_completion)); + EXPECT_FALSE(UCS_PTR_IS_ERR(reqs.back())); + + ucs_time_t deadline = ucs::get_deadline(); + /* waiting of async reply on wiriup without calling progress on receiver */ + while(!(send_ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED) && + (ucs_get_time() < deadline)) { + ucp_worker_progress(sender().worker()); + } + + reqs.push_back(ucp_tag_recv_nb(receiver().worker(), NULL, 0, DT_U64, 1, + (ucp_tag_t)-1, tag_recv_completion)); + EXPECT_FALSE(UCS_PTR_IS_ERR(reqs.back())); + waitall(reqs); +} + +UCS_TEST_P(test_ucp_wireup_2sided, connect_disconnect) { + sender().connect(&receiver(), get_ep_params()); + if (!is_loopback()) { + receiver().connect(&sender(), get_ep_params()); + } + disconnect(sender()); + if (!is_loopback()) { + disconnect(receiver()); + } +} + +UCS_TEST_P(test_ucp_wireup_2sided, multi_ep_2sided) { + const unsigned count = 10; + + for (unsigned j = 0; j < 4; ++j) { + + unsigned offset = j * count; + + for (unsigned i = 0; i < count; ++i) { + unsigned ep_idx = offset + i; + sender().connect(&receiver(), get_ep_params(), ep_idx); + if (!is_loopback()) { + receiver().connect(&sender(), get_ep_params(), ep_idx); + } + UCS_TEST_MESSAGE << "iteration " << j << " pair " << i << ": " << + sender().ep(0, ep_idx) << " <--> " << receiver().ep(0, ep_idx); + } + + for (unsigned i = 0; i < count; ++i) { + unsigned ep_idx = offset + i; + send_recv(sender().ep(0, ep_idx), receiver().worker(), + receiver().ep(0, ep_idx), 8, 1); + send_recv(receiver().ep(0, ep_idx), sender().worker(), + sender().ep(0, ep_idx), 8, 1); + } + + short_progress_loop(0); + } +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_wireup_2sided) + +class test_ucp_wireup_errh_peer : public test_ucp_wireup_1sided +{ +public: + virtual ucp_ep_params_t get_ep_params() { + ucp_ep_params_t params = test_ucp_wireup::get_ep_params(); + params.field_mask |= UCP_EP_PARAM_FIELD_ERR_HANDLING_MODE | + UCP_EP_PARAM_FIELD_ERR_HANDLER; + params.err_mode = UCP_ERR_HANDLING_MODE_PEER; + params.err_handler.cb = err_cb; + params.err_handler.arg = NULL; + return params; + } + + virtual void init() { + test_ucp_wireup::init(); + skip_loopback(); + } + + static void err_cb(void *, ucp_ep_h, ucs_status_t) {} +}; + +UCS_TEST_P(test_ucp_wireup_errh_peer, msg_after_ep_create) { + receiver().connect(&sender(), get_ep_params()); + + sender().connect(&receiver(), get_ep_params()); + send_recv(sender().ep(), receiver().worker(), receiver().ep(), 1, 1); + flush_worker(sender()); +} + +UCS_TEST_P(test_ucp_wireup_errh_peer, msg_before_ep_create) { + + sender().connect(&receiver(), get_ep_params()); + send_recv(sender().ep(), receiver().worker(), receiver().ep(), 1, 1); + flush_worker(sender()); + + receiver().connect(&sender(), get_ep_params()); + + send_recv(receiver().ep(), sender().worker(), receiver().ep(), 1, 1); + flush_worker(receiver()); +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_wireup_errh_peer) + +class test_ucp_wireup_fallback : public test_ucp_wireup { +public: + test_ucp_wireup_fallback() { + m_num_lanes = 0; + } + + static std::vector + enum_test_params(const ucp_params_t& ctx_params, const std::string& name, + const std::string& test_case_name, const std::string& tls) + { + return enum_test_params_features(ctx_params, name, test_case_name, tls, + UCP_FEATURE_TAG | UCP_FEATURE_RMA | + UCP_FEATURE_STREAM, 1); + } + + void init() { + /* do nothing */ + } + + void cleanup() { + /* do nothing */ + } + + bool test_est_num_eps_fallback(size_t est_num_eps, size_t &min_max_num_eps, + bool has_only_unscalable) { + size_t num_lanes = 0; + bool res = true; + + min_max_num_eps = UCS_ULUNITS_INF; + + UCS_TEST_MESSAGE << "Testing " << est_num_eps << " number of EPs"; + modify_config("NUM_EPS", ucs::to_string(est_num_eps).c_str()); + test_ucp_wireup::init(); + + sender().connect(&receiver(), get_ep_params()); + if (!is_loopback()) { + receiver().connect(&sender(), get_ep_params()); + } + send_recv(sender().ep(), receiver().worker(), receiver().ep(), 1, 1); + flush_worker(sender()); + + for (ucp_lane_index_t lane = 0; + lane < ucp_ep_num_lanes(sender().ep()); lane++) { + uct_ep_h uct_ep = sender().ep()->uct_eps[lane]; + if (uct_ep == NULL) { + continue; + } + + uct_iface_attr_t iface_attr; + ucs_status_t status = uct_iface_query(uct_ep->iface, &iface_attr); + ASSERT_UCS_OK(status); + + num_lanes++; + + if (!has_only_unscalable) { + if (iface_attr.max_num_eps < est_num_eps) { + res = false; + goto out; + } + } + + if (iface_attr.max_num_eps < min_max_num_eps) { + min_max_num_eps = iface_attr.max_num_eps; + } + } + +out: + test_ucp_wireup::cleanup(); + + if (est_num_eps == 1) { + m_num_lanes = num_lanes; + } else if (has_only_unscalable) { + /* If has only unscalable transports, check that the number of + * lanes is the same as for the case when "est_num_eps == 1" */ + res = (num_lanes == m_num_lanes); + } + + return res; + } + +private: + + /* The number of lanes activated for the case when "est_num_eps == 1" */ + size_t m_num_lanes; +}; + +UCS_TEST_P(test_ucp_wireup_fallback, est_num_eps_fallback) { + size_t test_min_max_eps, min_max_eps; + std::vector rc_tls; + + rc_tls.push_back("rc_v"); + rc_tls.push_back("rc_x"); + + /* If test is running with RC only (i.e. unscalable transport), + * check that a number of created lanes is the same for different + * number of estimated EPs values */ + bool has_only_rc = has_only_transports(rc_tls); + + test_est_num_eps_fallback(1, test_min_max_eps, has_only_rc); + + size_t prev_min_max_eps = 0; + while ((test_min_max_eps != UCS_ULUNITS_INF) && + /* number of EPs was changed between iterations */ + (test_min_max_eps != prev_min_max_eps)) { + if (test_min_max_eps > 1) { + EXPECT_TRUE(test_est_num_eps_fallback(test_min_max_eps - 1, + min_max_eps, has_only_rc)); + } + + EXPECT_TRUE(test_est_num_eps_fallback(test_min_max_eps, + min_max_eps, has_only_rc)); + + EXPECT_TRUE(test_est_num_eps_fallback(test_min_max_eps + 1, + min_max_eps, has_only_rc)); + prev_min_max_eps = test_min_max_eps; + test_min_max_eps = min_max_eps; + } +} + +/* Test fallback from RC to UD, since RC isn't scalable enough + * as its iface max_num_eps attribute = 256 by default */ +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_fallback, + rc_ud, "rc_x,rc_v,ud_x,ud_v") +/* Test two scalable enough transports */ +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_fallback, + dc_ud, "dc_x,ud_x,ud_v") +/* Test unsacalable transports only */ +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_fallback, + rc, "rc_x,rc_v") +/* Test all available ib transports */ +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_fallback, + ib, "ib") + +class test_ucp_wireup_unified : public test_ucp_wireup { +public: + static std::vector + enum_test_params(const ucp_params_t& ctx_params, const std::string& name, + const std::string& test_case_name, const std::string& tls) + { + std::vector result; + ucp_params_t tmp_ctx_params = ctx_params; + + tmp_ctx_params.features = UCP_FEATURE_TAG; + + generate_test_params_variant(tmp_ctx_params, name, test_case_name + "/uni", + tls, TEST_TAG | UNIFIED_MODE, result); + return result; + } + + bool context_has_tls(ucp_context_h ctx, const std::string& tl, + ucp_rsc_index_t md_idx) + { + for (ucp_rsc_index_t idx = 0; idx < ctx->num_tls; ++idx) { + if (ctx->tl_rscs[idx].md_index != md_idx) { + continue; + } + + if (!strcmp(ctx->tl_rscs[idx].tl_rsc.tl_name, tl.c_str())) { + return true; + } + } + + return false; + } + + bool worker_has_tls(ucp_worker_h worker, const std::string& tl, + ucp_rsc_index_t md_idx) + { + ucp_context_h ctx = worker->context; + + for (unsigned i = 0; i < worker->num_ifaces; ++i) { + ucp_worker_iface_t *wiface = worker->ifaces[i]; + ucp_rsc_index_t md_idx_it = ctx->tl_rscs[wiface->rsc_index].md_index; + + if (md_idx_it != md_idx) { + continue; + } + + char* name = ctx->tl_rscs[wiface->rsc_index].tl_rsc.tl_name; + if (!strcmp(name, tl.c_str())) { + return true; + } + } + return false; + } + + void check_unified_ifaces(entity *e, + const std::string& better_tl, + const std::string& tl) + { + ucp_context_h ctx = e->ucph(); + ucp_worker_h worker = e->worker(); + + for (ucp_rsc_index_t i = 0; i < ctx->num_mds; ++i) { + if (!(context_has_tls(ctx, better_tl, i) && + context_has_tls(ctx, tl, i))) { + continue; + } + + ASSERT_TRUE(ctx->num_tls > worker->num_ifaces); + EXPECT_TRUE(worker_has_tls(worker, better_tl, i)); + EXPECT_FALSE(worker_has_tls(worker, tl, i)); + } + } +}; + + +UCS_TEST_P(test_ucp_wireup_unified, select_best_ifaces) +{ + // Accelerated transports have better performance charasteristics than their + // verbs counterparts. Check that corresponding verbs transports are not used + // by workers in unified mode. + check_unified_ifaces(&sender(), "rc_mlx5", "rc_verbs"); + check_unified_ifaces(&sender(), "ud_mlx5", "ud_verbs"); + + // RC and DC has similar capabilities, but RC has better latency while + // estimated number of endpoints is relatively small. + // sender() is created with 1 ep, so RC should be selected over DC. + check_unified_ifaces(&sender(), "rc_mlx5", "dc_mlx5"); + + // Set some big enough number of endpoints for DC to be more performance + // efficient than RC. Now check that DC is selected over RC. + modify_config("NUM_EPS", "1000"); + entity *e = create_entity(); + check_unified_ifaces(e, "dc_mlx5", "rc_mlx5"); +} + +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_unified, rc, "rc") +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_unified, ud, "ud") +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_unified, rc_dc, "rc,dc") + +class test_ucp_wireup_fallback_amo : public test_ucp_wireup { + void init() { + size_t device_atomics_cnt = 0; + + test_ucp_wireup::init(); + + for (ucp_rsc_index_t idx = 0; idx < sender().ucph()->num_tls; ++idx) { + uct_iface_attr_t *attr = ucp_worker_iface_get_attr(sender().worker(), + idx); + if (attr->cap.flags & UCT_IFACE_FLAG_ATOMIC_DEVICE) { + device_atomics_cnt++; + } + } + bool device_atomics_supported = sender().worker()->atomic_tls != 0; + + test_ucp_wireup::cleanup(); + + if (!device_atomics_supported || !device_atomics_cnt) { + UCS_TEST_SKIP_R("there are no TLs that support device atomics"); + } + } + + void cleanup() { + /* do nothing */ + } + +protected: + + bool use_device_amo(ucp_ep_h ep) { + ucp_ep_config_t *ep_config = ucp_ep_config(ep); + + for (ucp_lane_index_t lane = 0; lane < UCP_MAX_LANES; ++lane) { + if (ep_config->key.amo_lanes[lane] != UCP_NULL_LANE) { + return (ucp_ep_get_iface_attr(ep, lane)->cap.flags & + UCT_IFACE_FLAG_ATOMIC_DEVICE); + } + } + + return false; + } + + size_t get_min_max_num_eps(ucp_ep_h ep) { + size_t min_max_num_eps = UCS_ULUNITS_INF; + + for (ucp_lane_index_t lane = 0; lane < ucp_ep_num_lanes(ep); lane++) { + uct_iface_attr_t *iface_attr = ucp_ep_get_iface_attr(ep, lane); + + if (iface_attr->max_num_eps < min_max_num_eps) { + min_max_num_eps = iface_attr->max_num_eps; + } + } + + return min_max_num_eps; + } + + size_t test_wireup_fallback_amo(const std::vector &tls, + size_t est_num_eps, bool should_use_device_amo) { + size_t min_max_num_eps = UCS_ULUNITS_INF; + + UCS_TEST_MESSAGE << "Testing " << est_num_eps << " number of EPs"; + modify_config("NUM_EPS", ucs::to_string(est_num_eps).c_str()); + + // Create new entity and add to to the end of vector + // (thus it will be receiver without any connections) + create_entity(false); + + ucp_test_param params = GetParam(); + for (std::vector::const_iterator i = tls.begin(); + i != tls.end(); ++i) { + params.transports.clear(); + params.transports.push_back(*i); + create_entity(true, params); + sender().connect(&receiver(), get_ep_params()); + + EXPECT_EQ(should_use_device_amo, use_device_amo(sender().ep())); + + size_t max_num_eps = get_min_max_num_eps(sender().ep()); + if (max_num_eps < min_max_num_eps) { + min_max_num_eps = max_num_eps; + } + } + + test_ucp_wireup::cleanup(); + + return min_max_num_eps; + } + +public: + + static ucp_params_t get_ctx_params() { + ucp_params_t params = test_ucp_wireup::get_ctx_params(); + params.field_mask |= UCP_PARAM_FIELD_FEATURES; + params.features |= (UCP_FEATURE_AMO32 | + UCP_FEATURE_AMO64); + return params; + } +}; + +class test_ucp_wireup_amo : public test_ucp_wireup { +public: + typedef struct { + test_ucp_wireup_amo *test; + } request_t; + + static ucp_params_t get_ctx_params() { + ucp_params_t params = test_ucp_wireup::get_ctx_params(); + params.field_mask |= UCP_PARAM_FIELD_REQUEST_SIZE; + params.request_size = sizeof(request_t); + return params; + } + + static std::vector + enum_test_params(const ucp_params_t& ctx_params, const std::string& name, + const std::string& test_case_name, const std::string& tls) + { + uint64_t amo_features; + + EXPECT_TRUE((sizeof(elem_type) == 4ul) || (sizeof(elem_type) == 8ul)); + amo_features = (sizeof(elem_type) == 4ul) ? UCP_FEATURE_AMO32 : + UCP_FEATURE_AMO64; + return enum_test_params_features(ctx_params, name, test_case_name, tls, + amo_features, false); + } + +protected: + ucp_rkey_h get_rkey(const entity &e) { + if (&sender() == &e) { + return test_ucp_wireup::get_rkey(e.ep(), m_memh_receiver); + } else if (&receiver() == &e) { + return test_ucp_wireup::get_rkey(e.ep(), m_memh_sender); + } + + return NULL; + } + + void add_rkey(ucp_rkey_h rkey) { + ASSERT_NE((ucp_rkey_h)NULL, rkey); + m_rkeys.push_back(ucs::handle(rkey, ucp_rkey_destroy)); + } + + void fill_send_data() { + m_send_data[0] = ucs_generate_uuid(0); + } + + static void flush_cb(void *req, ucs_status_t status) { + request_t *request = (request_t *)req; + + ASSERT_UCS_OK(status); + request->test->rkeys_cleanup(); + request->test->memhs_cleanup(); + } +}; + +UCS_TEST_P(test_ucp_wireup_amo, relese_key_after_flush) { + fill_send_data(); + clear_recv_data(); + + sender().connect(&receiver(), get_ep_params()); + + ucp_rkey_h rkey = get_rkey(sender()); + add_rkey(rkey); + + ucs_status_t status = ucp_atomic_post(sender().ep(), UCP_ATOMIC_POST_OP_ADD, + m_send_data[0], sizeof(elem_type), + (uint64_t)&m_recv_data[0], rkey); + ASSERT_UCS_OK(status); + request_t *req = (request_t *)ucp_ep_flush_nb(sender().ep(), + UCT_FLUSH_FLAG_LOCAL, + flush_cb); + if (UCS_PTR_IS_PTR(req)) { + req->test = this; + wait(req); + } else { + ASSERT_UCS_OK(UCS_PTR_STATUS(req)); + } +} + +UCP_INSTANTIATE_TEST_CASE(test_ucp_wireup_amo) + +UCS_TEST_P(test_ucp_wireup_fallback_amo, different_amo_types) { + std::vector tls; + + /* the 1st peer support RC only (device atomics) */ + tls.push_back("rc"); + /* the 2nd peer support RC and SHM (device and CPU atomics) */ + tls.push_back("rc,shm"); + + size_t min_max_num_eps = test_wireup_fallback_amo(tls, 1, 1); + test_wireup_fallback_amo(tls, min_max_num_eps + 1, 0); +} + +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_fallback_amo, + shm_rc, "shm,rc_x,rc_v") + +/* NOTE: this fixture is NOT inherited from test_ucp_wireup, because we want to + * create our own entities. + */ +class test_ucp_wireup_asymmetric : public ucp_test { +protected: + virtual void init() { + static const char *ibdev_sysfs_dir = "/sys/class/infiniband"; + + DIR *dir = opendir(ibdev_sysfs_dir); + if (dir == NULL) { + UCS_TEST_SKIP_R(std::string(ibdev_sysfs_dir) + " not found"); + } + + for (;;) { + struct dirent *entry = readdir(dir); + if (entry == NULL) { + break; + } + + if (entry->d_name[0] == '.') { + continue; + } + + m_ib_devices.push_back(entry->d_name); + } + + closedir(dir); + } + + void tag_sendrecv(size_t size) { + std::string send_data(size, 's'); + std::string recv_data(size, 'x'); + + ucs_status_ptr_t sreq = ucp_tag_send_nb( + sender().ep(0), &send_data[0], size, + ucp_dt_make_contig(1), 1, + (ucp_send_callback_t)ucs_empty_function); + ucs_status_ptr_t rreq = ucp_tag_recv_nb( + receiver().worker(), &recv_data[0], size, + ucp_dt_make_contig(1), 1, 1, + (ucp_tag_recv_callback_t)ucs_empty_function); + wait(sreq); + wait(rreq); + + EXPECT_EQ(send_data, recv_data); + } + + /* Generate a pci_bw configuration string for IB devices, which assigns + * the speed ai+b for device i. + */ + std::string pci_bw_config(int a, int b) { + std::string config_str; + for (size_t i = 0; i < m_ib_devices.size(); ++i) { + config_str += m_ib_devices[i] + ":" + + ucs::to_string((a * i) + b) + "Gbps"; + if (i != (m_ib_devices.size() - 1)) { + config_str += ","; + } + } + return config_str; + } + + std::vector m_ib_devices; + +public: + static ucp_params_t get_ctx_params() { + ucp_params_t params = ucp_test::get_ctx_params(); + params.field_mask |= UCP_PARAM_FIELD_FEATURES; + params.features = UCP_FEATURE_TAG; + return params; + } +}; + +/* + * Force asymmetric configuration by different PCI_BW settings + */ +UCS_TEST_SKIP_COND_P(test_ucp_wireup_asymmetric, connect, is_self()) { + + /* Enable cross-dev connection */ + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv path_mtu_env("UCX_RC_PATH_MTU", "1024"); + + { + std::string config_str = pci_bw_config(20, 20); + UCS_TEST_MESSAGE << "creating sender: " << config_str; + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv pci_bw_env("UCX_IB_PCI_BW", config_str.c_str()); + create_entity(); + } + + { + std::string config_str = pci_bw_config(-20, m_ib_devices.size() * 20); + UCS_TEST_MESSAGE << "creating receiver: " << config_str; + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv pci_bw_env("UCX_IB_PCI_BW", config_str.c_str()); + create_entity(); + } + + sender().connect(&receiver(), get_ep_params()); + receiver().connect(&sender(), get_ep_params()); + + ucp_ep_print_info(sender().ep(), stdout); + ucp_ep_print_info(receiver().ep(), stdout); + + tag_sendrecv(1); + tag_sendrecv(100000); + tag_sendrecv(1000000); +} + +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_asymmetric, rcv, "rc_v") +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_asymmetric, rcx, "rc_x") +UCP_INSTANTIATE_TEST_CASE_TLS(test_ucp_wireup_asymmetric, ib, "ib") diff --git a/test/gtest/ucp/ucp_datatype.cc b/test/gtest/ucp/ucp_datatype.cc new file mode 100644 index 0000000..5c1cce7 --- /dev/null +++ b/test/gtest/ucp/ucp_datatype.cc @@ -0,0 +1,201 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "ucp_datatype.h" +#include "ucp_test.h" + +#include + +namespace ucp { + + +data_type_desc_t & +data_type_desc_t::make(ucp_datatype_t datatype, const void *buf, size_t length, + size_t iov_cnt) +{ + EXPECT_FALSE(is_valid()); + + if (m_length == 0) { + m_length = length; + } + + if (m_origin == uintptr_t(NULL)) { + m_origin = uintptr_t(buf); + } + + m_dt = datatype; + memset(m_iov, 0, sizeof(m_iov)); + + switch (m_dt & UCP_DATATYPE_CLASS_MASK) { + case UCP_DATATYPE_CONTIG: + m_buf = buf; + m_count = length / ucp_contig_dt_elem_size(datatype); + break; + case UCP_DATATYPE_IOV: + { + const size_t iov_length = (length > iov_cnt) ? + ucs::rand() % (length / iov_cnt) : 0; + size_t iov_length_it = 0; + for (size_t iov_it = 0; iov_it < iov_cnt - 1; ++iov_it) { + m_iov[iov_it].buffer = (char *)(buf) + iov_length_it; + m_iov[iov_it].length = iov_length; + iov_length_it += iov_length; + } + + /* Last entry */ + m_iov[iov_cnt - 1].buffer = (char *)(buf) + iov_length_it; + m_iov[iov_cnt - 1].length = length - iov_length_it; + + m_buf = m_iov; + m_count = iov_cnt; + break; + } + case UCP_DATATYPE_GENERIC: + m_buf = buf; + m_count = length; + break; + default: + m_buf = NULL; + m_count = 0; + EXPECT_TRUE(false) << "Unsupported datatype"; + break; + } + + return *this; +} + +int dt_gen_start_count = 0; +int dt_gen_finish_count = 0; + +static void* dt_common_start(void *context, size_t count) +{ + dt_gen_state *dt_state = new dt_gen_state; + + dt_state->count = count; + dt_state->started = 1; + dt_state->magic = ucp::MAGIC; + dt_state->context = context; + dt_gen_start_count++; + + return dt_state; +} + +static void* dt_common_start_pack(void *context, const void *buffer, + size_t count) +{ + return dt_common_start(NULL, count); +} + +static void* dt_common_start_unpack(void *context, void *buffer, size_t count) +{ + return dt_common_start(context, count); +} + +template +size_t dt_packed_size(void *state) +{ + dt_gen_state *dt_state = (dt_gen_state*)state; + + return dt_state->count * sizeof(T); +} + +template +size_t dt_pack(void *state, size_t offset, void *dest, size_t max_length) +{ + dt_gen_state *dt_state = (dt_gen_state*)state; + T *p = reinterpret_cast (dest); + uint32_t count; + + EXPECT_GT(dt_gen_start_count, dt_gen_finish_count); + EXPECT_EQ(1, dt_state->started); + EXPECT_EQ(uint32_t(MAGIC), dt_state->magic); + + ucs_assert((offset % sizeof(T)) == 0); + + count = std::min(max_length / sizeof(T), + dt_state->count - (offset / sizeof(T))); + for (unsigned i = 0; i < count; ++i) { + p[i] = (offset / sizeof(T)) + i; + } + return count * sizeof(T); +} + +template +ucs_status_t dt_unpack(void *state, size_t offset, const void *src, + size_t length) +{ + dt_gen_state *dt_state = (dt_gen_state*)state; + std::vector *ctx; + uint32_t count; + + EXPECT_GT(dt_gen_start_count, dt_gen_finish_count); + EXPECT_EQ(1, dt_state->started); + EXPECT_EQ(uint32_t(MAGIC), dt_state->magic); + + ctx = reinterpret_cast*>(dt_state->context); + count = length / sizeof(T); + for (unsigned i = 0; i < count; ++i) { + T expected = ctx ? (*ctx)[offset / sizeof(T) + i] : + (offset / sizeof(T)) + i; + T actual = ((T*)src)[i]; + if (actual != expected) { + UCS_TEST_ABORT("Invalid data at index " << i << ". expected: " << + expected << " actual: " << actual << " offset: " << + offset << "."); + } + } + return UCS_OK; +} + +static ucs_status_t dt_err_unpack(void *state, size_t offset, const void *src, + size_t length) +{ + dt_gen_state *dt_state = (dt_gen_state*)state; + + EXPECT_GT(dt_gen_start_count, dt_gen_finish_count); + EXPECT_EQ(1, dt_state->started); + EXPECT_EQ(uint32_t(MAGIC), dt_state->magic); + + return UCS_ERR_NO_MEMORY; +} + +static void dt_common_finish(void *state) +{ + dt_gen_state *dt_state = (dt_gen_state*)state; + + --dt_state->started; + EXPECT_EQ(0, dt_state->started); + dt_gen_finish_count++; + delete dt_state; +} + +ucp_generic_dt_ops test_dt_uint32_ops = { + dt_common_start_pack, + dt_common_start_unpack, + dt_packed_size, + dt_pack, + dt_unpack, + dt_common_finish +}; + +ucp_generic_dt_ops test_dt_uint8_ops = { + dt_common_start_pack, + dt_common_start_unpack, + dt_packed_size, + dt_pack, + dt_unpack, + dt_common_finish +}; + +ucp_generic_dt_ops test_dt_uint32_err_ops = { + dt_common_start_pack, + dt_common_start_unpack, + dt_packed_size, + dt_pack, + dt_err_unpack, + dt_common_finish +}; + +} // ucp diff --git a/test/gtest/ucp/ucp_datatype.h b/test/gtest/ucp/ucp_datatype.h new file mode 100644 index 0000000..545c860 --- /dev/null +++ b/test/gtest/ucp/ucp_datatype.h @@ -0,0 +1,132 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef TEST_UCP_DATATYPE_H_ +#define TEST_UCP_DATATYPE_H_ + +#include + +#include +extern "C" { +#include +#include +#include +} + +#include + +namespace ucp { + +/* Can't be destroyed before related UCP request is completed */ +class data_type_desc_t { +public: + enum { + MAX_IOV = 40 + }; + + data_type_desc_t() + : m_origin(uintptr_t(NULL)), m_length(0), m_dt(0), m_buf(NULL), + m_count(0), m_iov_cnt_limit(sizeof(m_iov) / sizeof(m_iov[0])) { + memset(m_iov, 0, sizeof(m_iov)); + }; + + data_type_desc_t(ucp_datatype_t datatype, const void *buf, size_t length) + : m_origin(uintptr_t(buf)), m_length(length), m_dt(0), m_buf(NULL), + m_iov_cnt_limit(sizeof(m_iov) / sizeof(m_iov[0])) { + make(datatype, buf, length); + } + + data_type_desc_t(ucp_datatype_t datatype, const void *buf, size_t length, + size_t iov_count) + : m_origin(uintptr_t(buf)), m_length(length), m_dt(0), m_buf(NULL), + m_iov_cnt_limit(sizeof(m_iov) / sizeof(m_iov[0])) { + make(datatype, buf, length, iov_count); + }; + + data_type_desc_t &make(ucp_datatype_t datatype, const void *buf, + size_t length) { + return make(datatype, buf, length, m_iov_cnt_limit); + }; + + data_type_desc_t &forward_to(size_t offset) { + EXPECT_LE(offset, m_length); + invalidate(); + return make(m_dt, (const void *)(m_origin + offset), m_length - offset, + m_iov_cnt_limit); + }; + + ucp_datatype_t dt() const { + EXPECT_TRUE(is_valid()); + return m_dt; + }; + + void *buf() const { + EXPECT_TRUE(is_valid()); + return const_cast(m_buf); + }; + + ssize_t buf_length() const { + EXPECT_TRUE(is_valid()); + if (UCP_DT_IS_CONTIG(m_dt) || UCP_DT_IS_GENERIC(m_dt)) { + return m_length - (uintptr_t(m_buf) - m_origin); + } else if (UCP_DT_IS_IOV(m_dt)) { + size_t length = 0; + for (size_t i = 0; i < count(); ++i) { + length += m_iov[i].length; + } + return length; + } + ADD_FAILURE() << "Not supported datatype"; + return -1; + } + + size_t count() const { + EXPECT_TRUE(is_valid()); + return m_count; + }; + + bool is_valid() const { + return (m_buf != NULL) && (m_count != 0) && + (UCP_DT_IS_IOV(m_dt) ? (m_count <= m_iov_cnt_limit) : + (UCP_DT_IS_CONTIG(m_dt) || UCP_DT_IS_GENERIC(m_dt))); + } + +private: + data_type_desc_t &make(ucp_datatype_t datatype, const void *buf, + size_t length, size_t iov_count); + + void invalidate() { + EXPECT_TRUE(is_valid()); + m_buf = NULL; + m_count = 0; + } + + uintptr_t m_origin; + size_t m_length; + + ucp_datatype_t m_dt; + const void *m_buf; + size_t m_count; + + const size_t m_iov_cnt_limit; + ucp_dt_iov_t m_iov[MAX_IOV]; +}; + +struct dt_gen_state { + size_t count; + int started; + uint32_t magic; + void *context; +}; + +extern int dt_gen_start_count; +extern int dt_gen_finish_count; +extern ucp_generic_dt_ops test_dt_uint32_ops; +extern ucp_generic_dt_ops test_dt_uint32_err_ops; +extern ucp_generic_dt_ops test_dt_uint8_ops; + +} // ucp + +#endif /* TEST_UCP_DATATYPE_H_ */ diff --git a/test/gtest/ucp/ucp_test.cc b/test/gtest/ucp/ucp_test.cc new file mode 100644 index 0000000..123c38b --- /dev/null +++ b/test/gtest/ucp/ucp_test.cc @@ -0,0 +1,804 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "ucp_test.h" +#include + +extern "C" { +#include +#if HAVE_IB +#include +#endif +#include +#include +} + +#include + +namespace ucp { +const uint32_t MAGIC = 0xd7d7d7d7U; +} + +std::ostream& operator<<(std::ostream& os, const ucp_test_param& test_param) +{ + std::vector::const_iterator iter; + const std::vector& transports = test_param.transports; + for (iter = transports.begin(); iter != transports.end(); ++iter) { + if (iter != transports.begin()) { + os << ","; + } + os << *iter; + } + return os; +} + +const ucp_datatype_t ucp_test::DATATYPE = ucp_dt_make_contig(1); +const ucp_datatype_t ucp_test::DATATYPE_IOV = ucp_dt_make_iov(); + +ucp_test::ucp_test() { + ucs_status_t status; + status = ucp_config_read(NULL, NULL, &m_ucp_config); + ASSERT_UCS_OK(status); +} + +ucp_test::~ucp_test() { + + for (ucs::ptr_vector::const_iterator iter = entities().begin(); + iter != entities().end(); ++iter) + { + (*iter)->warn_existing_eps(); + } + ucp_config_release(m_ucp_config); +} + +void ucp_test::cleanup() { + /* disconnect before destroying the entities */ + for (ucs::ptr_vector::const_iterator iter = entities().begin(); + iter != entities().end(); ++iter) + { + disconnect(**iter); + } + + for (ucs::ptr_vector::const_iterator iter = entities().begin(); + iter != entities().end(); ++iter) + { + (*iter)->cleanup(); + } + + m_entities.clear(); +} + +void ucp_test::init() { + test_base::init(); + + create_entity(); + if (!is_self()) { + create_entity(); + } +} + +static bool check_transport(const std::string check_tl_name, + const std::vector& tl_names) { + return (std::find(tl_names.begin(), tl_names.end(), + check_tl_name) != tl_names.end()); +} + +bool ucp_test::has_transport(const std::string& tl_name) const { + return check_transport(tl_name, GetParam().transports); +} + +bool ucp_test::has_only_transports(const std::vector& tl_names) const { + const std::vector& transports = GetParam().transports; + size_t other_tls_count = 0; + std::vector::const_iterator iter; + + for(iter = transports.begin(); iter != transports.end(); ++iter) { + if (!check_transport(*iter, tl_names)) { + other_tls_count++; + } + } + + return !other_tls_count; +} + +bool ucp_test::is_self() const { + return "self" == GetParam().transports.front(); +} + +ucp_test_base::entity* ucp_test::create_entity(bool add_in_front) { + return create_entity(add_in_front, GetParam()); +} + +ucp_test_base::entity* +ucp_test::create_entity(bool add_in_front, const ucp_test_param &test_param) { + entity *e = new entity(test_param, m_ucp_config, get_worker_params(), this); + if (add_in_front) { + m_entities.push_front(e); + } else { + m_entities.push_back(e); + } + return e; +} + +ucp_params_t ucp_test::get_ctx_params() { + ucp_params_t params; + memset(¶ms, 0, sizeof(params)); + params.field_mask |= UCP_PARAM_FIELD_FEATURES; + return params; +} + +ucp_worker_params_t ucp_test::get_worker_params() { + ucp_worker_params_t params; + memset(¶ms, 0, sizeof(params)); + params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; + params.thread_mode = UCS_THREAD_MODE_MULTI; + return params; +} + +ucp_ep_params_t ucp_test::get_ep_params() { + ucp_ep_params_t params; + memset(¶ms, 0, sizeof(params)); + return params; +} + +unsigned ucp_test::progress(int worker_index) const { + unsigned count = 0; + for (ucs::ptr_vector::const_iterator iter = entities().begin(); + iter != entities().end(); ++iter) + { + count += (*iter)->progress(worker_index); + sched_yield(); + } + return count; +} + +void ucp_test::short_progress_loop(int worker_index) const { + for (unsigned i = 0; i < 100; ++i) { + progress(worker_index); + usleep(100); + } +} + +void ucp_test::flush_ep(const entity &e, int worker_index, int ep_index) +{ + void *request = e.flush_ep_nb(worker_index, ep_index); + wait(request, worker_index); +} + +void ucp_test::flush_worker(const entity &e, int worker_index) +{ + void *request = e.flush_worker_nb(worker_index); + wait(request, worker_index); +} + +void ucp_test::disconnect(const entity& e) { + bool has_failed_entity = false; + for (ucs::ptr_vector::const_iterator iter = entities().begin(); + !has_failed_entity && (iter != entities().end()); ++iter) { + has_failed_entity = ((*iter)->get_err_num() > 0); + } + + for (int i = 0; i < e.get_num_workers(); i++) { + enum ucp_ep_close_mode close_mode; + + if (has_failed_entity) { + close_mode = UCP_EP_CLOSE_MODE_FORCE; + } else { + flush_worker(e, i); + close_mode = UCP_EP_CLOSE_MODE_FLUSH; + } + + for (int j = 0; j < e.get_num_eps(i); j++) { + void *dreq = e.disconnect_nb(i, j, close_mode); + if (!UCS_PTR_IS_PTR(dreq)) { + ASSERT_UCS_OK(UCS_PTR_STATUS(dreq)); + } + wait(dreq, i); + } + } +} + +void ucp_test::wait(void *req, int worker_index) +{ + if (req == NULL) { + return; + } + + if (UCS_PTR_IS_ERR(req)) { + ucs_error("operation returned error: %s", + ucs_status_string(UCS_PTR_STATUS(req))); + return; + } + + ucs_status_t status; + do { + progress(worker_index); + status = ucp_request_check_status(req); + } while (status == UCS_INPROGRESS); + + if (status != UCS_OK) { + /* UCS errors are suppressed in case of error handling tests */ + ucs_error("request %p completed with error %s", req, + ucs_status_string(status)); + } + + ucp_request_release(req); +} + +void ucp_test::set_ucp_config(ucp_config_t *config) { + set_ucp_config(config, GetParam()); +} + +int ucp_test::max_connections() { + if (has_transport("tcp")) { + return ucs::max_tcp_connections(); + } else { + return std::numeric_limits::max(); + } +} + +std::vector +ucp_test::enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls) +{ + ucp_test_param test_param; + std::stringstream ss(tls); + + test_param.ctx_params = ctx_params; + test_param.variant = DEFAULT_PARAM_VARIANT; + test_param.thread_type = SINGLE_THREAD; + + while (ss.good()) { + std::string tl_name; + std::getline(ss, tl_name, ','); + test_param.transports.push_back(tl_name); + } + + if (check_test_param(name, test_case_name, test_param)) { + return std::vector(1, test_param); + } else { + return std::vector(); + } +} + +void ucp_test::generate_test_params_variant(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls, + int variant, + std::vector& test_params, + int thread_type) +{ + std::vector tmp_test_params; + + tmp_test_params = ucp_test::enum_test_params(ctx_params,name, + test_case_name, tls); + for (std::vector::iterator iter = tmp_test_params.begin(); + iter != tmp_test_params.end(); ++iter) + { + iter->variant = variant; + iter->thread_type = thread_type; + test_params.push_back(*iter); + } +} + +void ucp_test::set_ucp_config(ucp_config_t *config, + const ucp_test_param& test_param) +{ + std::stringstream ss; + ss << test_param; + ucp_config_modify(config, "TLS", ss.str().c_str()); + /* prevent configuration warnings in the UCP testing */ + ucp_config_modify(config, "WARN_INVALID_CONFIG", "no"); +} + +void ucp_test::modify_config(const std::string& name, const std::string& value, + bool optional) +{ + ucs_status_t status; + + status = ucp_config_modify(m_ucp_config, name.c_str(), value.c_str()); + if (status == UCS_ERR_NO_ELEM) { + test_base::modify_config(name, value, optional); + } else if (status != UCS_OK) { + UCS_TEST_ABORT("Couldn't modify ucp config parameter: " << + name.c_str() << " to " << value.c_str() << ": " << + ucs_status_string(status)); + } +} + +void ucp_test::stats_activate() +{ + ucs_stats_cleanup(); + push_config(); + modify_config("STATS_DEST", "file:/dev/null"); + modify_config("STATS_TRIGGER", "exit"); + ucs_stats_init(); + ASSERT_TRUE(ucs_stats_is_active()); +} + +void ucp_test::stats_restore() +{ + ucs_stats_cleanup(); + pop_config(); + ucs_stats_init(); +} + + +bool ucp_test::check_test_param(const std::string& name, + const std::string& test_case_name, + const ucp_test_param& test_param) +{ + typedef std::map cache_t; + static cache_t cache; + + if (test_param.transports.empty()) { + return false; + } + + cache_t::iterator iter = cache.find(name); + if (iter != cache.end()) { + return iter->second; + } + + ucs::handle config; + UCS_TEST_CREATE_HANDLE(ucp_config_t*, config, ucp_config_release, + ucp_config_read, NULL, NULL); + set_ucp_config(config, test_param); + + ucp_context_h ucph; + ucs_status_t status; + { + scoped_log_handler slh(hide_errors_logger); + status = ucp_init(&test_param.ctx_params, config, &ucph); + } + + bool result; + if (status == UCS_OK) { + ucp_cleanup(ucph); + result = true; + } else if (status == UCS_ERR_NO_DEVICE) { + result = false; + } else { + UCS_TEST_ABORT("Failed to create context (" << test_case_name << "): " + << ucs_status_string(status)); + } + + UCS_TEST_MESSAGE << "checking " << name << ": " << (result ? "yes" : "no"); + cache[name] = result; + return result; +} + +ucp_test_base::entity::entity(const ucp_test_param& test_param, + ucp_config_t* ucp_config, + const ucp_worker_params_t& worker_params, + const ucp_test_base *test_owner) + : m_err_cntr(0), m_rejected_cntr(0) +{ + ucp_test_param entity_param = test_param; + ucp_worker_params_t local_worker_params = worker_params; + int num_workers; + + if (test_param.thread_type == MULTI_THREAD_CONTEXT) { + num_workers = MT_TEST_NUM_THREADS; + entity_param.ctx_params.mt_workers_shared = 1; + local_worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; + } else if (test_param.thread_type == MULTI_THREAD_WORKER) { + num_workers = 1; + entity_param.ctx_params.mt_workers_shared = 0; + local_worker_params.thread_mode = UCS_THREAD_MODE_MULTI; + } else { + num_workers = 1; + entity_param.ctx_params.mt_workers_shared = 0; + local_worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; + } + + entity_param.ctx_params.field_mask |= UCP_PARAM_FIELD_MT_WORKERS_SHARED; + local_worker_params.field_mask |= UCP_WORKER_PARAM_FIELD_THREAD_MODE; + + ucp_test::set_ucp_config(ucp_config, entity_param); + + { + scoped_log_handler slh(hide_errors_logger); + UCS_TEST_CREATE_HANDLE(ucp_context_h, m_ucph, ucp_cleanup, ucp_init, + &entity_param.ctx_params, ucp_config); + } + + m_workers.resize(num_workers); + for (int i = 0; i < num_workers; i++) { + UCS_TEST_CREATE_HANDLE(ucp_worker_h, m_workers[i].first, + ucp_worker_destroy, ucp_worker_create, m_ucph, + &local_worker_params); + } +} + +ucp_test_base::entity::~entity() { + cleanup(); +} + +void ucp_test_base::entity::connect(const entity* other, + const ucp_ep_params_t& ep_params, + int ep_idx, int do_set_ep) { + assert(get_num_workers() == other->get_num_workers()); + for (unsigned i = 0; i < unsigned(get_num_workers()); i++) { + ucs_status_t status; + ucp_address_t *address; + size_t address_length; + ucp_ep_h ep; + + status = ucp_worker_get_address(other->worker(i), &address, &address_length); + ASSERT_UCS_OK(status); + + { + scoped_log_handler slh(hide_errors_logger); + + ucp_ep_params_t local_ep_params = ep_params; + local_ep_params.field_mask |= UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; + local_ep_params.address = address; + + status = ucp_ep_create(m_workers[i].first, &local_ep_params, &ep); + } + + if (status == UCS_ERR_UNREACHABLE) { + ucp_worker_release_address(other->worker(i), address); + UCS_TEST_SKIP_R(m_errors.empty() ? "Unreachable" : m_errors.back()); + } + + ASSERT_UCS_OK(status, << " (" << m_errors.back() << ")"); + + if (do_set_ep) { + set_ep(ep, i, ep_idx); + } + + ucp_worker_release_address(other->worker(i), address); + } +} + +ucp_ep_h ucp_test_base::entity::accept(ucp_worker_h worker, + ucp_conn_request_h conn_request) +{ + ucp_ep_params_t ep_params = *m_server_ep_params; + ucp_ep_h ep; + + ep_params.field_mask |= UCP_EP_PARAM_FIELD_CONN_REQUEST | + UCP_EP_PARAM_FIELD_USER_DATA; + ep_params.user_data = reinterpret_cast(this); + ep_params.conn_request = conn_request; + + ucs_status_t status = ucp_ep_create(worker, &ep_params, &ep); + if (status == UCS_ERR_UNREACHABLE) { + UCS_TEST_SKIP_R("Skipping due an unreachable destination (unsupported " + "feature or no supported transport to send partial " + "worker address)"); + } + ASSERT_UCS_OK(status); + return ep; +} + + +void* ucp_test_base::entity::modify_ep(const ucp_ep_params_t& ep_params, + int worker_idx, int ep_idx) { + return ucp_ep_modify_nb(ep(worker_idx, ep_idx), &ep_params); +} + + +void ucp_test_base::entity::set_ep(ucp_ep_h ep, int worker_index, int ep_index) +{ + if (ep_index < get_num_eps(worker_index)) { + m_workers[worker_index].second[ep_index].reset(ep, ep_destructor, this); + } else { + m_workers[worker_index].second.push_back( + ucs::handle(ep, ucp_ep_destroy)); + } +} + +void ucp_test_base::entity::empty_send_completion(void *r, ucs_status_t status) { +} + +void ucp_test_base::entity::accept_ep_cb(ucp_ep_h ep, void *arg) { + entity *self = reinterpret_cast(arg); + int worker_index = 0; /* TODO pass worker index in arg */ + + /* take error handler from test fixture and add user data */ + ucp_ep_params_t ep_params = *self->m_server_ep_params; + ep_params.field_mask &= UCP_EP_PARAM_FIELD_ERR_HANDLER; + ep_params.field_mask |= UCP_EP_PARAM_FIELD_USER_DATA; + ep_params.user_data = reinterpret_cast(self); + + void *req = ucp_ep_modify_nb(ep, &ep_params); + ASSERT_UCS_PTR_OK(req); /* don't expect this operation to block */ + + self->set_ep(ep, worker_index, self->get_num_eps(worker_index)); +} + +void ucp_test_base::entity::accept_conn_cb(ucp_conn_request_h conn_req, void* arg) +{ + entity *self = reinterpret_cast(arg); + self->m_conn_reqs.push(conn_req); +} + +void ucp_test_base::entity::reject_conn_cb(ucp_conn_request_h conn_req, void* arg) +{ + entity *self = reinterpret_cast(arg); + ucp_listener_reject(self->m_listener, conn_req); + self->m_rejected_cntr++; +} + +void* ucp_test_base::entity::flush_ep_nb(int worker_index, int ep_index) const { + return ucp_ep_flush_nb(ep(worker_index, ep_index), 0, empty_send_completion); +} + +void* ucp_test_base::entity::flush_worker_nb(int worker_index) const { + if (worker(worker_index) == NULL) { + return NULL; + } + return ucp_worker_flush_nb(worker(worker_index), 0, empty_send_completion); +} + +void ucp_test_base::entity::fence(int worker_index) const { + ucs_status_t status = ucp_worker_fence(worker(worker_index)); + ASSERT_UCS_OK(status); +} + +void* ucp_test_base::entity::disconnect_nb(int worker_index, int ep_index, + enum ucp_ep_close_mode mode) const { + ucp_ep_h ep = revoke_ep(worker_index, ep_index); + if (ep == NULL) { + return NULL; + } + return ucp_ep_close_nb(ep, mode); +} + +void ucp_test_base::entity::destroy_worker(int worker_index) { + for (size_t i = 0; i < m_workers[worker_index].second.size(); ++i) { + m_workers[worker_index].second[i].revoke(); + } + m_workers[worker_index].first.reset(); +} + +ucp_ep_h ucp_test_base::entity::ep(int worker_index, int ep_index) const { + if (size_t(worker_index) < m_workers.size()) { + if (size_t(ep_index) < m_workers[worker_index].second.size()) { + return m_workers[worker_index].second[ep_index]; + } + } + return NULL; +} + +ucp_ep_h ucp_test_base::entity::revoke_ep(int worker_index, int ep_index) const { + ucp_ep_h ucp_ep = ep(worker_index, ep_index); + + if (ucp_ep) { + m_workers[worker_index].second[ep_index].revoke(); + } + + return ucp_ep; +} + +ucs_status_t ucp_test_base::entity::listen(listen_cb_type_t cb_type, + const struct sockaddr* saddr, + socklen_t addrlen, + const ucp_ep_params_t& ep_params, + int worker_index) +{ + ucp_listener_params_t params; + ucp_listener_h listener; + + params.field_mask = UCP_LISTENER_PARAM_FIELD_SOCK_ADDR; + params.sockaddr.addr = saddr; + params.sockaddr.addrlen = addrlen; + + switch (cb_type) { + case LISTEN_CB_EP: + params.field_mask |= UCP_LISTENER_PARAM_FIELD_ACCEPT_HANDLER; + params.accept_handler.cb = accept_ep_cb; + params.accept_handler.arg = reinterpret_cast(this); + break; + case LISTEN_CB_CONN: + params.field_mask |= UCP_LISTENER_PARAM_FIELD_CONN_HANDLER; + params.conn_handler.cb = accept_conn_cb; + params.conn_handler.arg = reinterpret_cast(this); + break; + case LISTEN_CB_REJECT: + params.field_mask |= UCP_LISTENER_PARAM_FIELD_CONN_HANDLER; + params.conn_handler.cb = reject_conn_cb; + params.conn_handler.arg = reinterpret_cast(this); + break; + default: + UCS_TEST_ABORT("invalid test parameter"); + } + + m_server_ep_params.reset(new ucp_ep_params_t(ep_params), + ucs::deleter); + + ucs_status_t status; + { + scoped_log_handler wrap_err(wrap_errors_logger); + status = ucp_listener_create(worker(worker_index), ¶ms, &listener); + } + + if (status == UCS_OK) { + m_listener.reset(listener, ucp_listener_destroy); + } else { + /* throw error if status is not (UCS_OK or UCS_ERR_UNREACHABLE or + * UCS_ERR_BUSY). + * UCS_ERR_INVALID_PARAM may also return but then the test should fail */ + EXPECT_TRUE((status == UCS_ERR_UNREACHABLE) || + (status == UCS_ERR_BUSY)) << ucs_status_string(status); + } + + return status; +} + +ucp_worker_h ucp_test_base::entity::worker(int worker_index) const { + if (worker_index < get_num_workers()) { + return m_workers[worker_index].first; + } else { + return NULL; + } +} + +ucp_context_h ucp_test_base::entity::ucph() const { + return m_ucph; +} + +ucp_listener_h ucp_test_base::entity::listenerh() const { + return m_listener; +} + +unsigned ucp_test_base::entity::progress(int worker_index) +{ + ucp_worker_h ucp_worker = worker(worker_index); + + if (ucp_worker == NULL) { + return 0; + } + + unsigned progress_count = 0; + if (!m_conn_reqs.empty()) { + ucp_conn_request_h conn_req = m_conn_reqs.back(); + m_conn_reqs.pop(); + ucp_ep_h ep = accept(ucp_worker, conn_req); + set_ep(ep, worker_index, std::numeric_limits::max()); + ++progress_count; + } + + return progress_count + ucp_worker_progress(ucp_worker); +} + +int ucp_test_base::entity::get_num_workers() const { + return m_workers.size(); +} + +int ucp_test_base::entity::get_num_eps(int worker_index) const { + return m_workers[worker_index].second.size(); +} + +void ucp_test_base::entity::add_err(ucs_status_t status) { + switch (status) { + case UCS_ERR_REJECTED: + ++m_rejected_cntr; + /* fall through */ + default: + ++m_err_cntr; + } +} + +const size_t &ucp_test_base::entity::get_err_num_rejected() const { + return m_rejected_cntr; +} + +const size_t &ucp_test_base::entity::get_err_num() const { + return m_err_cntr; +} + +void ucp_test_base::entity::warn_existing_eps() const { + for (size_t worker_index = 0; worker_index < m_workers.size(); ++worker_index) { + for (size_t ep_index = 0; ep_index < m_workers[worker_index].second.size(); + ++ep_index) { + ADD_FAILURE() << "ep(" << worker_index << "," << ep_index << + ")=" << m_workers[worker_index].second[ep_index].get() << + " was not destroyed during test cleanup()"; + } + } +} + +double ucp_test_base::entity::set_ib_ud_timeout(double timeout_sec) +{ + double prev_timeout_sec = 0.; +#if HAVE_IB + for (ucp_rsc_index_t rsc_index = 0; + rsc_index < ucph()->num_tls; ++rsc_index) { + ucp_worker_iface_t *wiface = ucp_worker_iface(worker(), rsc_index); + // check if the iface is ud transport + if (wiface->iface->ops.iface_flush == uct_ud_iface_flush) { + uct_ud_iface_t *iface = + ucs_derived_of(wiface->iface, uct_ud_iface_t); + + uct_ud_enter(iface); + if (!prev_timeout_sec) { + prev_timeout_sec = ucs_time_to_sec(iface->config.peer_timeout); + } + + iface->config.peer_timeout = ucs_time_from_sec(timeout_sec); + uct_ud_leave(iface); + } + } +#endif + return prev_timeout_sec; +} + +void ucp_test_base::entity::cleanup() { + m_listener.reset(); + m_workers.clear(); +} + +void ucp_test_base::entity::ep_destructor(ucp_ep_h ep, entity *e) +{ + ucs_status_ptr_t req = ucp_disconnect_nb(ep); + if (!UCS_PTR_IS_PTR(req)) { + return; + } + + ucs_status_t status; + ucp_tag_recv_info_t info; + do { + e->progress(); + status = ucp_request_test(req, &info); + } while (status == UCS_INPROGRESS); + EXPECT_EQ(UCS_OK, status); + ucp_request_release(req); +} + +ucp_test::mapped_buffer::mapped_buffer(size_t size, const entity& entity, + int flags, ucs_memory_type_t mem_type) : + mem_buffer(size, mem_type), m_entity(entity), m_memh(NULL), + m_rkey_buffer(NULL) +{ + ucs_status_t status; + + if (flags & (UCP_MEM_MAP_ALLOCATE|UCP_MEM_MAP_FIXED)) { + UCS_TEST_ABORT("mapped_buffer does not support allocation by UCP"); + } + + ucp_mem_map_params_t params; + params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | + UCP_MEM_MAP_PARAM_FIELD_LENGTH | + UCP_MEM_MAP_PARAM_FIELD_FLAGS; + params.flags = flags; + params.address = ptr(); + params.length = size; + + status = ucp_mem_map(m_entity.ucph(), ¶ms, &m_memh); + ASSERT_UCS_OK(status); + + size_t rkey_buffer_size; + status = ucp_rkey_pack(m_entity.ucph(), m_memh, &m_rkey_buffer, + &rkey_buffer_size); + ASSERT_UCS_OK(status); +} + +ucp_test::mapped_buffer::~mapped_buffer() +{ + ucp_rkey_buffer_release(m_rkey_buffer); + ucs_status_t status = ucp_mem_unmap(m_entity.ucph(), m_memh); + EXPECT_UCS_OK(status); +} + +ucs::handle ucp_test::mapped_buffer::rkey(const entity& entity) const +{ + ucp_rkey_h rkey; + + ucs_status_t status = ucp_ep_rkey_unpack(entity.ep(), m_rkey_buffer, &rkey); + ASSERT_UCS_OK(status); + return ucs::handle(rkey, ucp_rkey_destroy); +} + +ucp_mem_h ucp_test::mapped_buffer::memh() const +{ + return m_memh; +} diff --git a/test/gtest/ucp/ucp_test.h b/test/gtest/ucp/ucp_test.h new file mode 100644 index 0000000..ec2a160 --- /dev/null +++ b/test/gtest/ucp/ucp_test.h @@ -0,0 +1,298 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCP_TEST_H_ +#define UCP_TEST_H_ + +#include +#include +#include + +/* ucp version compile time test */ +#if (UCP_API_VERSION != UCP_VERSION(UCP_API_MAJOR,UCP_API_MINOR)) +#error possible bug in UCP version +#endif + +#include + +#include + +#define MT_TEST_NUM_THREADS 4 + + +namespace ucp { +extern const uint32_t MAGIC; +} + + +struct ucp_test_param { + ucp_params_t ctx_params; + std::vector transports; + int variant; + int thread_type; +}; + +class ucp_test_base : public ucs::test_base { +public: + enum { + SINGLE_THREAD = 42, + MULTI_THREAD_CONTEXT, /* workers are single-threaded, context is mt-shared */ + MULTI_THREAD_WORKER /* workers are multi-threaded, cotnext is mt-single */ + }; + + class entity { + typedef std::vector > ep_vec_t; + typedef std::vector, + ep_vec_t> > worker_vec_t; + + public: + typedef enum { + LISTEN_CB_EP, /* User's callback accepts ucp_ep_h */ + LISTEN_CB_CONN, /* User's callback accepts ucp_conn_request_h */ + LISTEN_CB_REJECT /* User's callback rejects ucp_conn_request_h */ + } listen_cb_type_t; + + entity(const ucp_test_param& test_param, ucp_config_t* ucp_config, + const ucp_worker_params_t& worker_params, + const ucp_test_base* test_owner); + + ~entity(); + + void connect(const entity* other, const ucp_ep_params_t& ep_params, + int ep_idx = 0, int do_set_ep = 1); + + ucp_ep_h accept(ucp_worker_h worker, ucp_conn_request_h conn_request); + + void* modify_ep(const ucp_ep_params_t& ep_params, int worker_idx = 0, + int ep_idx = 0); + + void* flush_ep_nb(int worker_index = 0, int ep_index = 0) const; + + void* flush_worker_nb(int worker_index = 0) const; + + void fence(int worker_index = 0) const; + + void* disconnect_nb(int worker_index = 0, int ep_index = 0, + enum ucp_ep_close_mode mode = UCP_EP_CLOSE_MODE_FLUSH) const; + + void destroy_worker(int worker_index = 0); + + ucs_status_t listen(listen_cb_type_t cb_type, + const struct sockaddr *saddr, socklen_t addrlen, + const ucp_ep_params_t& ep_params, + int worker_index = 0); + + ucp_ep_h ep(int worker_index = 0, int ep_index = 0) const; + + ucp_ep_h revoke_ep(int worker_index = 0, int ep_index = 0) const; + + ucp_worker_h worker(int worker_index = 0) const; + + ucp_context_h ucph() const; + + ucp_listener_h listenerh() const; + + unsigned progress(int worker_index = 0); + + int get_num_workers() const; + + int get_num_eps(int worker_index = 0) const; + + void add_err(ucs_status_t status); + + const size_t &get_err_num_rejected() const; + + const size_t &get_err_num() const; + + void warn_existing_eps() const; + + double set_ib_ud_timeout(double timeout_sec); + + void cleanup(); + + static void ep_destructor(ucp_ep_h ep, entity *e); + + protected: + ucs::handle m_ucph; + worker_vec_t m_workers; + ucs::handle m_listener; + std::queue m_conn_reqs; + size_t m_err_cntr; + size_t m_rejected_cntr; + ucs::handle m_server_ep_params; + + private: + static void empty_send_completion(void *r, ucs_status_t status); + static void accept_ep_cb(ucp_ep_h ep, void *arg); + static void accept_conn_cb(ucp_conn_request_h conn_req, void *arg); + static void reject_conn_cb(ucp_conn_request_h conn_req, void *arg); + + void set_ep(ucp_ep_h ep, int worker_index, int ep_index); + }; +}; + +/** + * UCP test + */ +class ucp_test : public ucp_test_base, + public ::testing::TestWithParam, + public ucs::entities_storage { + + friend class ucp_test_base::entity; + +public: + enum { + DEFAULT_PARAM_VARIANT = 0 + }; + + UCS_TEST_BASE_IMPL; + + ucp_test(); + virtual ~ucp_test(); + + ucp_config_t* m_ucp_config; + + static std::vector + enum_test_params(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls); + + static ucp_params_t get_ctx_params(); + virtual ucp_worker_params_t get_worker_params(); + virtual ucp_ep_params_t get_ep_params(); + + static void + generate_test_params_variant(const ucp_params_t& ctx_params, + const std::string& name, + const std::string& test_case_name, + const std::string& tls, + int variant, + std::vector& test_params, + int thread_type = SINGLE_THREAD); + + virtual void modify_config(const std::string& name, const std::string& value, + bool optional = false); + void stats_activate(); + void stats_restore(); + +private: + static void set_ucp_config(ucp_config_t *config, + const ucp_test_param& test_param); + static bool check_test_param(const std::string& name, + const std::string& test_case_name, + const ucp_test_param& test_param); + +protected: + virtual void init(); + bool is_self() const; + virtual void cleanup(); + virtual bool has_transport(const std::string& tl_name) const; + bool has_only_transports(const std::vector& tl_names) const; + entity* create_entity(bool add_in_front = false); + entity* create_entity(bool add_in_front, const ucp_test_param& test_param); + unsigned progress(int worker_index = 0) const; + void short_progress_loop(int worker_index = 0) const; + void flush_ep(const entity &e, int worker_index = 0, int ep_index = 0); + void flush_worker(const entity &e, int worker_index = 0); + void disconnect(const entity& entity); + void wait(void *req, int worker_index = 0); + void set_ucp_config(ucp_config_t *config); + int max_connections(); + + static void err_handler_cb(void *arg, ucp_ep_h ep, ucs_status_t status) { + entity *e = reinterpret_cast(arg); + e->add_err(status); + } + + template + void wait_for_flag(volatile T *flag, double timeout = 10.0) { + ucs_time_t loop_end_limit = ucs_get_time() + ucs_time_from_sec(timeout); + while ((ucs_get_time() < loop_end_limit) && (!(*flag))) { + short_progress_loop(); + } + } + + static const ucp_datatype_t DATATYPE; + static const ucp_datatype_t DATATYPE_IOV; + +protected: + class mapped_buffer : public mem_buffer { + public: + mapped_buffer(size_t size, const entity& entity, int flags = 0, + ucs_memory_type_t mem_type = UCS_MEMORY_TYPE_HOST); + virtual ~mapped_buffer(); + + ucs::handle rkey(const entity& entity) const; + + ucp_mem_h memh() const; + + private: + const entity& m_entity; + ucp_mem_h m_memh; + void* m_rkey_buffer; + }; +}; + + +std::ostream& operator<<(std::ostream& os, const ucp_test_param& test_param); + +/** + * Instantiate the parameterized test case a combination of transports. + * + * @param _test_case Test case class, derived from ucp_test. + * @param _name Instantiation name. + * @param ... Transport names. + */ +#define UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, _name, _tls) \ + INSTANTIATE_TEST_CASE_P(_name, _test_case, \ + testing::ValuesIn(_test_case::enum_test_params(_test_case::get_ctx_params(), \ + #_name, \ + #_test_case, \ + _tls))); + + +/** + * Instantiate the parameterized test case for all transport combinations. + * + * @param _test_case Test case class, derived from ucp_test. + */ +#define UCP_INSTANTIATE_TEST_CASE(_test_case) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, dcx, "dc_x") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, ud, "ud_v") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, udx, "ud_x") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, rc, "rc_v") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, rcx, "rc_x") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, shm_ib, "shm,ib") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, ugni, "ugni") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, self, "self") \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, tcp, "tcp") + + +/** + * The list of GPU copy TLs + */ +#define UCP_TEST_GPU_COPY_TLS "cuda_copy,rocm_copy" + + +/** + * Instantiate the parameterized test case for all transport combinations + * with GPU memory awareness + * + * @param _test_case Test case class, derived from ucp_test. + */ +#define UCP_INSTANTIATE_TEST_CASE_GPU_AWARE(_test_case) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, dcx, "dc_x," UCP_TEST_GPU_COPY_TLS) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, ud, "ud_v," UCP_TEST_GPU_COPY_TLS) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, udx, "ud_x," UCP_TEST_GPU_COPY_TLS) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, rc, "rc_v," UCP_TEST_GPU_COPY_TLS) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, rcx, "rc_x," UCP_TEST_GPU_COPY_TLS) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, shm_ib, "shm,ib," UCP_TEST_GPU_COPY_TLS) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, shm_ib_ipc, "shm,ib,cuda_ipc,rocm_ipc," \ + UCP_TEST_GPU_COPY_TLS) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, ugni, "ugni," UCP_TEST_GPU_COPY_TLS) \ + UCP_INSTANTIATE_TEST_CASE_TLS(_test_case, tcp, "tcp," UCP_TEST_GPU_COPY_TLS) + +#endif diff --git a/test/gtest/ucs/arch/test_x86_64.cc b/test/gtest/ucs/arch/test_x86_64.cc new file mode 100644 index 0000000..2e440dc --- /dev/null +++ b/test/gtest/ucs/arch/test_x86_64.cc @@ -0,0 +1,105 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#if defined(__x86_64__) + +#include +extern "C" { +#include +#include +#include +} + +#include + +class test_arch : public ucs::test { +protected: + /* have to add wrapper for ucs_memcpy_relaxed because pure "C" inline call could + * not be used as template argument */ + static inline void *memcpy_relaxed(void *dst, const void *src, size_t size) + { + return ucs_memcpy_relaxed(dst, src, size); + } + + template + double measure_memcpy_bandwidth(size_t size) + { + ucs_time_t start_time, end_time; + void *src, *dst; + double result = 0.0; + int iter; + + src = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (src == MAP_FAILED) { + goto out; + } + + dst = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (dst == MAP_FAILED) { + goto out_unmap_src; + } + + memset(dst, 0, size); + memset(src, 0, size); + memcpy(dst, src, size); + + iter = 0; + start_time = ucs_get_time(); + do { + C(dst, src, size); + end_time = ucs_get_time(); + ++iter; + } while (end_time < start_time + ucs_time_from_sec(0.5)); + + result = size * iter / ucs_time_to_sec(end_time - start_time); + + munmap(dst, size); + out_unmap_src: + munmap(src, size); + out: + return result; + } +}; + +UCS_TEST_SKIP_COND_F(test_arch, memcpy, RUNNING_ON_VALGRIND || !ucs::perf_retry_count) { + const double diff = 0.95; /* allow 5% fluctuations */ + const double timeout = 30; /* 30 seconds to complete test successfully */ + double memcpy_bw = 0; + double memcpy_relax_bw = 0; + double secs; + size_t size; + char memunits_str[256]; + char thresh_min_str[16]; + char thresh_max_str[16]; + int i; + + ucs_memunits_to_str(ucs_global_opts.arch.builtin_memcpy_min, + thresh_min_str, sizeof(thresh_min_str)); + ucs_memunits_to_str(ucs_global_opts.arch.builtin_memcpy_max, + thresh_max_str, sizeof(thresh_max_str)); + UCS_TEST_MESSAGE << "Using memcpy relaxed for size " << + thresh_min_str << ".." << + thresh_max_str; + for (size = 4096; size <= 256 * UCS_MBYTE; size *= 2) { + secs = ucs_get_accurate_time(); + for (i = 0; ucs_get_accurate_time() - secs < timeout; i++) { + memcpy_bw = measure_memcpy_bandwidth(size); + memcpy_relax_bw = measure_memcpy_bandwidth(size); + if (memcpy_relax_bw / memcpy_bw >= diff) { + break; + } + usleep(1000); /* allow other tasks to complete */ + } + ucs_memunits_to_str(size, memunits_str, sizeof(memunits_str)); + UCS_TEST_MESSAGE << memunits_str << + " memcpy: " << (memcpy_bw / UCS_GBYTE) << + "GB/s memcpy relaxed: " << (memcpy_relax_bw / UCS_GBYTE) << + "GB/s iterations: " << i + 1; + EXPECT_GE(memcpy_relax_bw / memcpy_bw, diff); + } +} + +#endif diff --git a/test/gtest/ucs/test_algorithm.cc b/test/gtest/ucs/test_algorithm.cc new file mode 100644 index 0000000..2e4026d --- /dev/null +++ b/test/gtest/ucs/test_algorithm.cc @@ -0,0 +1,122 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +#include +} +#include + +class test_algorithm : public ucs::test { +protected: + + static int compare_func(const void *elem1, const void *elem2) + { + return *(const int*)elem1 - *(const int*)elem2; + } + + static int compare_func_r(const void *elem1, const void *elem2, void *arg) + { + EXPECT_TRUE(MAGIC == arg); + return compare_func(elem1, elem2); + } + + static void *MAGIC; +}; + +void *test_algorithm::MAGIC = (void*)0xdeadbeef1ee7a880ull; + +UCS_TEST_F(test_algorithm, qsort_r) { + for (int i = 0; i < 1000 / ucs::test_time_multiplier(); ++i) { + unsigned nmemb = ucs::rand() % 100; + + std::vector vec; + for (unsigned j = 0; j < nmemb; ++j) { + vec.push_back(ucs::rand() % 200); + } + + std::vector vec2 = vec; + qsort(&vec2[0], nmemb, sizeof(int), compare_func); + + ucs_qsort_r(&vec[0], nmemb, sizeof(int), compare_func_r, MAGIC); + ASSERT_EQ(vec2, vec); + } +} + +UCS_TEST_F(test_algorithm, crc16) { + std::string test_str; + + test_str = ""; + EXPECT_EQ(0u, ucs_crc16_string(test_str.c_str())); + + test_str = "0"; + EXPECT_EQ(0xc1fbu, ucs_crc16_string(test_str.c_str())); + + test_str = "01"; + EXPECT_EQ(0x99efu, ucs_crc16_string(test_str.c_str())); + + test_str = "012"; + EXPECT_EQ(0xfd89u, ucs_crc16_string(test_str.c_str())); + + test_str = "0123"; + EXPECT_EQ(0xea54u, ucs_crc16_string(test_str.c_str())); + + test_str = "01234"; + EXPECT_EQ(0x9394u, ucs_crc16_string(test_str.c_str())); + + test_str = "012345"; + EXPECT_EQ(0x4468u, ucs_crc16_string(test_str.c_str())); + + test_str = "0123456"; + EXPECT_EQ(0x4bc7u, ucs_crc16_string(test_str.c_str())); + + test_str = "01234567"; + EXPECT_EQ(0x07bcu, ucs_crc16_string(test_str.c_str())); + + test_str = "012345678"; + EXPECT_EQ(0x3253u, ucs_crc16_string(test_str.c_str())); + + test_str = "0123456789"; + EXPECT_EQ(0x3c16u, ucs_crc16_string(test_str.c_str())); +} + +UCS_TEST_F(test_algorithm, crc32) { + std::string test_str; + + test_str = ""; + EXPECT_EQ(0u, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "0"; + EXPECT_EQ(0xf4dbdf21ul, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "01"; + EXPECT_EQ(0xcf412436ul, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "012"; + EXPECT_EQ(0xd5a06ab0ul, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "0123"; + EXPECT_EQ(0xa6669d7dul, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "01234"; + EXPECT_EQ(0xdda47024ul, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "012345"; + EXPECT_EQ(0xb86f6b0ful, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "0123456"; + EXPECT_EQ(0x8dbf08eeul, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "01234567"; + EXPECT_EQ(0x2d803af5ul, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "012345678"; + EXPECT_EQ(0x37fad1baul, ucs_crc32(0, test_str.c_str(), test_str.size())); + + test_str = "0123456789"; + EXPECT_EQ(0xa684c7c6ul, ucs_crc32(0, test_str.c_str(), test_str.size())); +} diff --git a/test/gtest/ucs/test_arbiter.cc b/test/gtest/ucs/test_arbiter.cc new file mode 100644 index 0000000..fbdaa2c --- /dev/null +++ b/test/gtest/ucs/test_arbiter.cc @@ -0,0 +1,725 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#include +#include + +extern "C" { +#include +#include +} +#include + +class test_arbiter : public ucs::test { +protected: + + static ucs_arbiter_cb_result_t resched_groups(ucs_arbiter_t *arbitrer, + ucs_arbiter_elem_t *elem, + void *arg) + { + int *counter = (int*)arg; + if (*counter == 0) { + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } else { + --(*counter); + return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; + } + } + + struct arb_elem { + unsigned group_idx; + unsigned elem_idx; + int count; + bool last; + bool release; + ucs_arbiter_elem_t elem; + }; + + void skip_empty_groups() + { + while (m_empty_groups.find(m_expected_group_idx) != m_empty_groups.end()) { + advance_expected_group(); + } + } + + void advance_expected_group() + { + ++m_expected_group_idx; + if (m_expected_group_idx >= m_num_groups) { + m_expected_group_idx = 0; + } + } + + static void release_element(arb_elem *e) + { + memset(e, 0xCC, sizeof(*e)); /* Invalidate memory to catch use-after-free bugs */ + delete e; + } + + void prepare_groups(ucs_arbiter_group_t *groups, ucs_arbiter_elem_t *elems, + const int N, const int nelems_per_group, bool push_head) + { + int i, j; + + for (i = 0; i < N; i++) { + ucs_arbiter_group_init(&groups[i]); + for (j = 0; j < nelems_per_group; j++) { + ucs_arbiter_elem_init(&elems[nelems_per_group*i+j]); + } + for (j = 0; j < nelems_per_group; j++) { + if (push_head) { + int rev_j = nelems_per_group - 1 - j; + ucs_arbiter_group_push_head_elem(NULL, &groups[i], + &elems[nelems_per_group*i+rev_j]); + } else { + ucs_arbiter_group_push_elem(&groups[i], + &elems[nelems_per_group*i+j]); + } + } + ucs_arbiter_group_schedule(&m_arb1, &groups[i]); + } + } + + ucs_arbiter_cb_result_t dispatch(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem) + { + arb_elem *e = ucs_container_of(elem, arb_elem, elem); + + skip_empty_groups(); + + EXPECT_EQ(m_expected_group_idx, e->group_idx); + EXPECT_EQ(m_expected_elem_idx[e->group_idx], e->elem_idx); + + advance_expected_group(); + + /* Sometimes we just move to the next group */ + if ((ucs::rand() % 5) == 0) { + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } + + /* Sometimes we want to detach the whole group */ + if ((ucs::rand() % 10) == 0) { + m_empty_groups.insert(e->group_idx); + m_detached_groups.insert(e->group_idx); + return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; + } + + ++m_expected_elem_idx[e->group_idx]; + + if (e->last) { + m_empty_groups.insert(e->group_idx); + } + release_element(e); + + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + + static ucs_arbiter_cb_result_t dispatch_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) + { + test_arbiter *self = (test_arbiter *)arg; + return self->dispatch(arbiter, elem); + } + + static ucs_arbiter_cb_result_t dispatch_dummy_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) + { + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + + ucs_arbiter_cb_result_t desched_group(ucs_arbiter_elem_t *elem) + { + ucs_arbiter_group_t *g = ucs_arbiter_elem_group(elem); + //ucs_warn("desched group %d", m_count); + m_count++; + ucs_arbiter_group_schedule(&m_arb2, g); + return UCS_ARBITER_CB_RESULT_DESCHED_GROUP; + } + + ucs_arbiter_cb_result_t remove_elem(ucs_arbiter_elem_t *elem) + { + m_count++; + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + + static ucs_arbiter_cb_result_t desched_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) + { + test_arbiter *self = (test_arbiter *)arg; + return self->desched_group(elem); + } + + static ucs_arbiter_cb_result_t remove_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) + { + test_arbiter *self = (test_arbiter *)arg; + return self->remove_elem(elem); + } + + static ucs_arbiter_cb_result_t stop_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) + { + return UCS_ARBITER_CB_RESULT_STOP; + } + + static ucs_arbiter_cb_result_t purge_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) + { + arb_elem *e = ucs_container_of(elem, arb_elem, elem); + release_element(e); + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + + static ucs_arbiter_cb_result_t count_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) + { + test_arbiter *self = static_cast(arg); + arb_elem *e = ucs_container_of(elem, arb_elem, elem); + + ++e->count; + ++self->m_count; + return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; + } + + static ucs_arbiter_cb_result_t purge_cond_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) + { + test_arbiter *self = static_cast(arg); + arb_elem *e = ucs_container_of(elem, arb_elem, elem); + + if (e->release) { + ++self->m_count; + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + + return UCS_ARBITER_CB_RESULT_NEXT_GROUP; + } + + + static ucs_arbiter_cb_result_t purge_dummy_cb(ucs_arbiter_t *arbiter, + ucs_arbiter_elem_t *elem, + void *arg) + { + test_arbiter *self = static_cast(arg); + ++self->m_count; + return UCS_ARBITER_CB_RESULT_REMOVE_ELEM; + } + + void test_move_groups(int N, int nelems, bool push_head = false) + { + + ucs_arbiter_group_t *groups; + ucs_arbiter_elem_t *elems; + + ucs_arbiter_init(&m_arb1); + ucs_arbiter_init(&m_arb2); + + groups = new ucs_arbiter_group_t [N]; + elems = new ucs_arbiter_elem_t [nelems*N]; + + prepare_groups(groups, elems, N, nelems, push_head); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, desched_cb, this); + EXPECT_EQ(N, m_count); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb2, 1, remove_cb, this); + EXPECT_EQ(nelems*N, m_count); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, remove_cb, this); + EXPECT_EQ(0, m_count); + + delete [] groups; + delete [] elems; + + ucs_arbiter_cleanup(&m_arb1); + ucs_arbiter_cleanup(&m_arb2); + } + +protected: + std::set m_empty_groups; + std::set m_detached_groups; + std::vector m_expected_elem_idx; + unsigned m_expected_group_idx; + unsigned m_num_groups; + ucs_arbiter_t m_arb1; + ucs_arbiter_t m_arb2; + int m_count; +}; + + +UCS_TEST_F(test_arbiter, add_purge) { + + ucs_arbiter_t arbiter; + + ucs_arbiter_group_t group1; + ucs_arbiter_group_t group2; + + ucs_arbiter_init(&arbiter); + ucs_arbiter_group_init(&group1); + ucs_arbiter_group_init(&group2); + + + ucs_arbiter_elem_t elem1; + ucs_arbiter_elem_t elem2_1; + ucs_arbiter_elem_t elem2_2; + + /* test internal function */ + ucs_arbiter_group_push_elem_always(&group1, &elem1); + ucs_arbiter_group_push_elem_always(&group2, &elem2_1); + ucs_arbiter_group_push_elem_always(&group2, &elem2_2); + + ucs_arbiter_group_schedule(&arbiter, &group1); + ucs_arbiter_group_schedule(&arbiter, &group2); + + m_count = 0; + ucs_arbiter_dispatch_nonempty(&arbiter, 3, remove_cb, this); + + EXPECT_EQ(3, m_count); + + ucs_arbiter_group_cleanup(&group2); + ucs_arbiter_group_cleanup(&group1); + ucs_arbiter_cleanup(&arbiter); +} + +UCS_TEST_F(test_arbiter, purge_cond) { + + int num_elems = m_num_groups = 25; + ucs_arbiter_group_t groups[m_num_groups]; + int purged_count[m_num_groups]; + ucs::ptr_vector elems; + + ucs_arbiter_t arbiter; + ucs_arbiter_init(&arbiter); + memset(purged_count, 0, sizeof(int) * m_num_groups); + + for (unsigned i = 0; i < m_num_groups; ++i) { + ucs_arbiter_group_init(&groups[i]); + + for (int j = 0; j < num_elems; ++j) { + arb_elem *e = new arb_elem; + if (ucs::rand() % 2) { + e->release = true; + ++purged_count[i]; + } else { + e->release = false; + } + ucs_arbiter_elem_init(&e->elem); + elems.push_back(e); + ucs_arbiter_group_push_elem(&groups[i], &e->elem); + /* coverity[leaked_storage] */ + } + ucs_arbiter_group_schedule(&arbiter, &groups[i]); + } + + // All groups are scheduled, start purging them from some non-current group + // (purge just half of the groups, the rest will be dispatched) + unsigned start = ucs::rand() % m_num_groups; + for (unsigned i = 0; i < m_num_groups / 2; ++i) { + unsigned idx = (start + i) % m_num_groups; + m_count = 0; + ucs_arbiter_group_purge(&arbiter, &groups[idx], purge_cond_cb, this); + EXPECT_EQ(m_count, purged_count[idx]); + + m_count = 0; + ucs_arbiter_group_purge(&arbiter, &groups[idx], purge_dummy_cb, this); + EXPECT_EQ(m_count, num_elems - purged_count[idx]); + + ucs_arbiter_group_cleanup(&groups[idx]); + } + + ucs_arbiter_dispatch(&arbiter, 1, dispatch_dummy_cb, NULL); + + ucs_arbiter_cleanup(&arbiter); +} + +UCS_TEST_F(test_arbiter, purge_corner) { + + ucs_arbiter_t arbiter; + ucs_arbiter_group_t group; + arb_elem elems[2]; + + ucs_arbiter_init(&arbiter); + ucs_arbiter_group_init(&group); + + for (int i = 0; i < 2; ++i) { + ucs_arbiter_elem_init(&elems[i].elem); + elems[i].release = !i; // try to purge first + ucs_arbiter_group_push_elem(&group, &elems[i].elem); + } + m_count = 0; + ucs_arbiter_group_purge(&arbiter, &group, purge_cond_cb, this); + EXPECT_EQ(1, m_count); + EXPECT_FALSE(ucs_arbiter_group_is_empty(&group)); + EXPECT_FALSE(ucs_arbiter_elem_is_scheduled(&elems[0].elem)); + EXPECT_TRUE(ucs_arbiter_elem_is_scheduled(&elems[1].elem)); + + // try to reuse 0-th element and purge it (now it is last element) + ucs_arbiter_group_push_elem(&group, &elems[0].elem); + EXPECT_EQ(true, elems[0].release); + m_count = 0; + ucs_arbiter_group_purge(&arbiter, &group, purge_cond_cb, this); + EXPECT_EQ(1, m_count); + EXPECT_FALSE(ucs_arbiter_elem_is_scheduled(&elems[0].elem)); + EXPECT_TRUE(ucs_arbiter_elem_is_scheduled(&elems[1].elem)); + + // clear the group + ucs_arbiter_group_purge(&arbiter, &group, purge_dummy_cb, this); + EXPECT_FALSE(ucs_arbiter_elem_is_scheduled(&elems[0].elem)); + EXPECT_FALSE(ucs_arbiter_elem_is_scheduled(&elems[1].elem)); + + ucs_arbiter_group_cleanup(&group); + ucs_arbiter_cleanup(&arbiter); +} + +UCS_TEST_F(test_arbiter, multiple_dispatch) { + m_num_groups = 20; + + ucs_arbiter_t arbiter; + ucs_arbiter_init(&arbiter); + + std::vector groups(m_num_groups); + for (unsigned i = 0; i < m_num_groups; ++i) { + ucs_arbiter_group_init(&groups[i]); + + unsigned num_elems = ucs::rand() % 9; + + for (unsigned j = 0; j < num_elems; ++j) { + arb_elem *e = new arb_elem; + e->group_idx = i; + e->elem_idx = j; + e->release = true; + e->last = (j == num_elems - 1); + ucs_arbiter_elem_init(&e->elem); + ucs_arbiter_group_push_elem(&groups[i], &e->elem); + /* coverity[leaked_storage] */ + } + + if (num_elems == 0) { + m_empty_groups.insert(i); + } + + ucs_arbiter_group_schedule(&arbiter, &groups[i]); + } + + m_expected_group_idx = 0; + m_expected_elem_idx.resize(m_num_groups, 0); + std::fill(m_expected_elem_idx.begin(), m_expected_elem_idx.end(), 0); + + ucs_arbiter_dispatch(&arbiter, 1, dispatch_cb, this); + + ASSERT_TRUE(arbiter.current == NULL); + + /* Release detached groups */ + for (unsigned i = 0; i < m_num_groups; ++i) { + if (m_detached_groups.find(i) != m_detached_groups.end()) { + ucs_arbiter_group_purge(&arbiter, &groups[i], purge_cb, NULL); + } + ucs_arbiter_group_cleanup(&groups[i]); + } + + ucs_arbiter_cleanup(&arbiter); +} + +UCS_TEST_F(test_arbiter, resched) { + + ucs_arbiter_t arbiter; + + ucs_arbiter_group_t group1; + ucs_arbiter_group_t group2; + + ucs_arbiter_init(&arbiter); + ucs_arbiter_group_init(&group1); + ucs_arbiter_group_init(&group2); + + + ucs_arbiter_elem_t elem1; + ucs_arbiter_elem_t elem2_1; + + ucs_arbiter_elem_init(&elem1); + ucs_arbiter_elem_init(&elem2_1); + ucs_arbiter_group_push_elem(&group1, &elem1); + ucs_arbiter_group_push_elem(&group2, &elem2_1); + + ucs_arbiter_group_schedule(&arbiter, &group1); + ucs_arbiter_group_schedule(&arbiter, &group2); + + int count = 2; + ucs_arbiter_dispatch_nonempty(&arbiter, 1, resched_groups, &count); + + EXPECT_EQ(0, count); + + count = 1; + ucs_arbiter_dispatch_nonempty(&arbiter, 1, resched_groups, &count); + EXPECT_EQ(0, count); + + /* one group with one elem should be there */ + m_count = 0; + ucs_arbiter_dispatch_nonempty(&arbiter, 3, remove_cb, this); + EXPECT_EQ(1, m_count); + ASSERT_TRUE(arbiter.current == NULL); + + ucs_arbiter_group_cleanup(&group2); + ucs_arbiter_group_cleanup(&group1); + ucs_arbiter_cleanup(&arbiter); +} + +/* check that it is possible to reuse removed + * element + */ +UCS_TEST_F(test_arbiter, reuse_elem) { + int i; + ucs_arbiter_group_t group1; + ucs_arbiter_elem_t elem1; + ucs_arbiter_elem_t elem2; + + ucs_arbiter_init(&m_arb1); + ucs_arbiter_group_init(&group1); + ucs_arbiter_elem_init(&elem1); + ucs_arbiter_elem_init(&elem2); + + for (i = 0; i < 3; i++) { + ucs_arbiter_group_push_elem(&group1, &elem1); + ucs_arbiter_group_push_elem(&group1, &elem2); + ucs_arbiter_group_schedule(&m_arb1, &group1); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, remove_cb, this); + EXPECT_EQ(2, m_count); + } + + for (i = 0; i < 3; i++) { + ucs_arbiter_group_push_elem(&group1, &elem1); + ucs_arbiter_group_schedule(&m_arb1, &group1); + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, remove_cb, this); + EXPECT_EQ(1, m_count); + } +} + +UCS_TEST_F(test_arbiter, move_group) { + + ucs_arbiter_group_t group1; + ucs_arbiter_elem_t elem1; + ucs_arbiter_elem_t elem2; + + ucs_arbiter_init(&m_arb1); + ucs_arbiter_init(&m_arb2); + + ucs_arbiter_group_init(&group1); + ucs_arbiter_elem_init(&elem1); + ucs_arbiter_elem_init(&elem2); + ucs_arbiter_group_push_elem(&group1, &elem1); + ucs_arbiter_group_push_elem(&group1, &elem2); + ucs_arbiter_group_schedule(&m_arb1, &group1); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, desched_cb, this); + EXPECT_EQ(1, m_count); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb2, 1, remove_cb, this); + EXPECT_EQ(2, m_count); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, remove_cb, this); + EXPECT_EQ(0, m_count); + + ucs_arbiter_cleanup(&m_arb1); + ucs_arbiter_cleanup(&m_arb2); +} + +UCS_TEST_F(test_arbiter, push_head_scheduled) { + + ucs_arbiter_group_t group1; + ucs_arbiter_group_t group2; + arb_elem elem1; + arb_elem elem2; + arb_elem elem3; + + ucs_arbiter_init(&m_arb1); + + ucs_arbiter_group_init(&group1); + ucs_arbiter_group_init(&group2); + ucs_arbiter_elem_init(&elem1.elem); + ucs_arbiter_elem_init(&elem2.elem); + ucs_arbiter_elem_init(&elem3.elem); + elem1.count = elem2.count = elem3.count = 0; + + ucs_arbiter_group_push_head_elem(&m_arb1, &group1, &elem1.elem); + ucs_arbiter_group_push_head_elem(&m_arb1, &group2, &elem2.elem); + + ucs_arbiter_group_schedule(&m_arb1, &group1); + ucs_arbiter_group_schedule(&m_arb1, &group2); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, count_cb, this); + EXPECT_EQ(2, m_count); + EXPECT_EQ(1, elem1.count); + EXPECT_EQ(1, elem2.count); + EXPECT_EQ(0, elem3.count); + + /* Adding new head elem to group2 */ + ucs_arbiter_group_push_head_elem(&m_arb1, &group2, &elem3.elem); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, count_cb, this); + EXPECT_EQ(2, m_count); + EXPECT_EQ(2, elem1.count); + EXPECT_EQ(1, elem2.count); + EXPECT_EQ(1, elem3.count); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 2, remove_cb, this); + EXPECT_EQ(3, m_count); + + /* Add to single scheduled group */ + ucs_arbiter_group_push_head_elem(&m_arb1, &group2, &elem2.elem); + ucs_arbiter_group_schedule(&m_arb1, &group2); + ucs_arbiter_group_push_head_elem(&m_arb1, &group2, &elem3.elem); + + m_count = 0; + elem2.count = elem3.count = 0; + ucs_arbiter_dispatch(&m_arb1, 2, count_cb, this); + EXPECT_EQ(0, elem2.count); + EXPECT_EQ(1, elem3.count); + EXPECT_EQ(1, m_count); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 2, remove_cb, this); + EXPECT_EQ(2, m_count); + + ucs_arbiter_cleanup(&m_arb1); +} + +UCS_TEST_F(test_arbiter, move_groups1) { + test_move_groups(42, 3); +} + +UCS_TEST_F(test_arbiter, move_groups2) { + test_move_groups(42, 1); +} + +UCS_TEST_F(test_arbiter, move_groups1_push_head) { + test_move_groups(42, 3, true); +} + +UCS_TEST_F(test_arbiter, move_groups2_push_head) { + test_move_groups(42, 1, true); +} + +UCS_TEST_F(test_arbiter, desched_group) { + ucs_arbiter_group_t group1; + ucs_arbiter_elem_t elem1; + ucs_arbiter_elem_t elem2; + + ucs_arbiter_init(&m_arb1); + + ucs_arbiter_group_init(&group1); + ucs_arbiter_elem_init(&elem1); + ucs_arbiter_elem_init(&elem2); + ucs_arbiter_group_push_elem(&group1, &elem1); + ucs_arbiter_group_push_elem(&group1, &elem2); + + /* should do nothing */ + ucs_arbiter_group_desched(&m_arb1, &group1); + + ucs_arbiter_group_schedule(&m_arb1, &group1); + /* arbiter will be empty */ + ucs_arbiter_group_desched(&m_arb1, &group1); + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, remove_cb, this); + EXPECT_EQ(0, m_count); + + /* group must still have 2 elements */ + ucs_arbiter_group_schedule(&m_arb1, &group1); + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, remove_cb, this); + EXPECT_EQ(2, m_count); + + ucs_arbiter_cleanup(&m_arb1); +} + +UCS_TEST_F(test_arbiter, desched_groups) { + ucs_arbiter_group_t *groups; + ucs_arbiter_elem_t *elems; + const int N = 17; + + ucs_arbiter_init(&m_arb1); + ucs_arbiter_init(&m_arb2); + + groups = new ucs_arbiter_group_t [N]; + elems = new ucs_arbiter_elem_t [3*N]; + + prepare_groups(groups, elems, N, 3, false); + + ucs_arbiter_group_desched(&m_arb1, &groups[N-1]); + ucs_arbiter_group_desched(&m_arb1, &groups[0]); + ucs_arbiter_group_desched(&m_arb1, &groups[5]); + ucs_arbiter_group_desched(&m_arb1, &groups[11]); + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, remove_cb, this); + /* 4 groups with 3 elems each were descheduled */ + EXPECT_EQ(3*(N-4), m_count); + + ucs_arbiter_group_schedule(&m_arb1, &groups[N-1]); + ucs_arbiter_group_schedule(&m_arb1, &groups[0]); + ucs_arbiter_group_schedule(&m_arb1, &groups[5]); + ucs_arbiter_group_schedule(&m_arb1, &groups[11]); + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, remove_cb, this); + /* 4 groups with 3 elems each were scheduled */ + EXPECT_EQ(4*3, m_count); + + delete [] groups; + delete [] elems; + + ucs_arbiter_cleanup(&m_arb1); +} + +/* make sure that next arbiter dispatch + * continues from the group that stopped + */ +UCS_TEST_F(test_arbiter, result_stop) { + + const int N = 5; + const int nelems = 1; + ucs_arbiter_group_t *groups; + ucs_arbiter_elem_t *elems; + + groups = new ucs_arbiter_group_t [N]; + elems = new ucs_arbiter_elem_t [nelems*N]; + ucs_arbiter_init(&m_arb1); + + prepare_groups(groups, elems, N, nelems, false); + + for (int i = 0; i < N + 3; i++) { + ucs_arbiter_dispatch(&m_arb1, 1, stop_cb, this); + /* arbiter current position must not change on STOP */ + EXPECT_EQ(m_arb1.current, groups[0].tail->next); + } + + m_count = 0; + ucs_arbiter_dispatch(&m_arb1, 1, remove_cb, this); + EXPECT_EQ(N*nelems, m_count); + + ucs_arbiter_cleanup(&m_arb1); + + delete [] groups; + delete [] elems; +} diff --git a/test/gtest/ucs/test_async.cc b/test/gtest/ucs/test_async.cc new file mode 100644 index 0000000..3befe12 --- /dev/null +++ b/test/gtest/ucs/test_async.cc @@ -0,0 +1,841 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +#include + +extern "C" { +#include +#include +#include +#include +} + +#include + + +class base { +public: + base(ucs_async_mode_t mode) : m_mode(mode), m_count(0), m_handler_set(0) { + } + + virtual ~base() { + } + + int count() const { + return m_count; + } + + void set_handler() { + ASSERT_FALSE(m_handler_set); + m_handler_set = 1; + } + + void unset_handler(bool sync = true) { + if (ucs_atomic_cswap32(&m_handler_set, 1, 0)) { + ucs_status_t status = ucs_async_remove_handler(event_id(), sync); + ASSERT_UCS_OK(status); + } + } + +private: + base(const base& other); + +protected: + virtual void ack_event() = 0; + virtual int event_id() = 0; + + static void cb(int id, void *arg) { + base *self = reinterpret_cast(arg); + self->handler(); + } + + ucs_async_mode_t mode() const { + return m_mode; + } + + virtual void handler() { + ++m_count; + ack_event(); + } + + const ucs_async_mode_t m_mode; + int m_count; + uint32_t m_handler_set; +}; + +class base_event : public base { +public: + base_event(ucs_async_mode_t mode) : base(mode) { + ucs_status_t status = ucs_async_pipe_create(&m_event_pipe); + ASSERT_UCS_OK(status); + } + + virtual ~base_event() { + ucs_async_pipe_destroy(&m_event_pipe); + } + + void set_handler(ucs_async_context_t *async) { + ucs_status_t status = + ucs_async_set_event_handler(mode(), event_fd(), + UCS_EVENT_SET_EVREAD, + cb, this, async); + ASSERT_UCS_OK(status); + base::set_handler(); + } + + virtual int event_id() { + return event_fd(); + } + + void push_event() { + ucs_async_pipe_push(&m_event_pipe); + } + + void reset() { + ucs_async_pipe_drain(&m_event_pipe); + } + +protected: + virtual void ack_event() { + reset(); + } + +private: + int event_fd() { + return ucs_async_pipe_rfd(&m_event_pipe); + } + + ucs_async_pipe_t m_event_pipe; +}; + +class base_timer : public base { +public: + base_timer(ucs_async_mode_t mode) : + base(mode), m_timer_id(-1) + { + } + + /* + * Cannot call this from constructor - vptr not ready! + */ + void set_timer(ucs_async_context_t *async, ucs_time_t interval) { + ucs_assert(m_timer_id == -1); + ucs_status_t status = ucs_async_add_timer(mode(), interval, cb, + this, async, &m_timer_id); + ASSERT_UCS_OK(status); + base::set_handler(); + } + + virtual int event_id() { + return m_timer_id; + } + +protected: + virtual void ack_event() { + } + +private: + int m_timer_id; +}; + + +class async_poll { +public: + virtual void poll() = 0; + virtual ~async_poll() { + } +}; + +class global : public async_poll { +public: + virtual void poll() { + ucs_async_poll(NULL); + } + + virtual ~global() { + } +}; + +class global_event : public global, public base_event { +public: + global_event(ucs_async_mode_t mode) : base_event(mode) { + set_handler(NULL); + } + + ~global_event() { + unset_handler(); + } +}; + +class global_timer : public global, public base_timer { +public: + global_timer(ucs_async_mode_t mode) : base_timer(mode) { + set_timer(NULL, ucs_time_from_usec(1000)); + } + + ~global_timer() { + unset_handler(); + } +}; + +class local : public async_poll { +public: + local(ucs_async_mode_t mode) { + ucs_status_t status = ucs_async_context_init(&m_async, mode); + ASSERT_UCS_OK(status); + } + + virtual ~local() { + ucs_async_context_cleanup(&m_async); + } + + void block() { + UCS_ASYNC_BLOCK(&m_async); + } + + void unblock() { + UCS_ASYNC_UNBLOCK(&m_async); + } + + void check_miss() { + ucs_async_check_miss(&m_async); + } + + virtual void poll() { + ucs_async_poll(&m_async); + } + +protected: + ucs_async_context_t m_async; +}; + +class local_event : public local, + public base_event +{ +public: + local_event(ucs_async_mode_t mode) : local(mode), base_event(mode) { + set_handler(&m_async); + } + + ~local_event() { + unset_handler(); + } +}; + +class local_timer : public local, + public base_timer +{ +public: + static const int TIMER_INTERVAL_USEC = 1000; + + local_timer(ucs_async_mode_t mode) : local(mode), base_timer(mode) { + set_timer(&m_async, ucs_time_from_usec(TIMER_INTERVAL_USEC)); + } + + ~local_timer() { + unset_handler(); + } +}; + +class test_async : public testing::TestWithParam, +public ucs::test_base { +public: + UCS_TEST_BASE_IMPL; + +protected: + static const int COUNT = 40; + static const unsigned SLEEP_USEC = 1000; + static const int EVENT_RETRIES = 10; + static const int TIMER_RETRIES = 100; + static const int TIMER_EXP_COUNT = COUNT / 4; + + void suspend(double scale = 1.0) { + ucs::safe_usleep(ucs_max(scale * SLEEP_USEC, 0) * + ucs::test_time_multiplier()); + } + + void suspend_and_poll(async_poll *p, double scale = 1.0) { + if (GetParam() == UCS_ASYNC_MODE_POLL) { + for (double t = 0; t < scale; t += 1.0) { + suspend(); + p->poll(); + } + } else { + suspend(scale); + } + } + + void suspend_and_poll2(async_poll *p1, async_poll *p2, double scale = 1.0) { + if (GetParam() == UCS_ASYNC_MODE_POLL) { + for (double t = 0; t < scale; t += 1.0) { + suspend(); + p1->poll(); + p2->poll(); + } + } else { + suspend(scale); + } + } +}; + +template +class test_async_mt : public test_async { +protected: + static const unsigned NUM_THREADS = 32; + + test_async_mt() { + for (unsigned i = 0; i < NUM_THREADS; ++i) { + m_ev[i] = NULL; + } + } + + virtual void init() { + pthread_barrier_init(&m_barrier, NULL, NUM_THREADS + 1); + } + + int thread_run(unsigned index) { + LOCAL* le; + m_ev[index] = le = new LOCAL(GetParam()); + + barrier(); + + while (!m_stop[index]) { + le->block(); + unsigned before = le->count(); + suspend_and_poll(le, 1.0); + unsigned after = le->count(); + le->unblock(); + + EXPECT_EQ(before, after); /* Should not handle while blocked */ + le->check_miss(); + suspend_and_poll(le, 1.0); + } + + int result = le->count(); + delete le; + m_ev[index] = NULL; + return result; + } + + void spawn() { + for (unsigned i = 0; i < NUM_THREADS; ++i) { + m_stop[i] = false; + pthread_create(&m_threads[i], NULL, thread_func, (void*)this); + } + barrier(); + } + + void stop() { + for (unsigned i = 0; i < NUM_THREADS; ++i) { + m_stop[i] = true; + void *result; + pthread_join(m_threads[i], &result); + m_thread_counts[i] = (int)(uintptr_t)result; + } + } + + LOCAL* event(unsigned thread) { + return m_ev[thread]; + } + + int thread_count(unsigned thread) { + return m_thread_counts[thread]; + } + +private: + void barrier() { + pthread_barrier_wait(&m_barrier); + } + + static void *thread_func(void *arg) + { + test_async_mt *self = reinterpret_cast(arg); + + for (unsigned index = 0; index < NUM_THREADS; ++index) { + if (self->m_threads[index] == pthread_self()) { + return (void*)(uintptr_t)self->thread_run(index); + } + } + + /* Not found */ + return (void*)-1; + } + + pthread_t m_threads[NUM_THREADS]; + pthread_barrier_t m_barrier; + int m_thread_counts[NUM_THREADS]; + bool m_stop[NUM_THREADS]; + LOCAL* m_ev[NUM_THREADS]; +}; + + +UCS_TEST_P(test_async, global_event) { + global_event ge(GetParam()); + ge.push_event(); + suspend_and_poll(&ge, COUNT); + EXPECT_GE(ge.count(), 1); +} + +UCS_TEST_P(test_async, global_timer) { + global_timer gt(GetParam()); + for (int i = 0; i < TIMER_RETRIES; ++i) { + suspend_and_poll(>, COUNT * 4); + if (gt.count() >= COUNT) { + break; + } + UCS_TEST_MESSAGE << "retry " << (i + 1); + } + EXPECT_GE(gt.count(), int(COUNT)); +} + +UCS_TEST_P(test_async, max_events, "ASYNC_MAX_EVENTS=4") { + ucs_status_t status; + ucs_async_context_t async; + + status = ucs_async_context_init(&async, GetParam()); + ASSERT_UCS_OK(status); + + /* 4 timers should be OK */ + std::vector timers; + for (unsigned count = 0; count < 4; ++count) { + int timer_id; + status = ucs_async_add_timer(GetParam(), ucs_time_from_sec(1.0), + (ucs_async_event_cb_t)ucs_empty_function, + NULL, &async, &timer_id); + ASSERT_UCS_OK(status); + timers.push_back(timer_id); + } + + /* 5th timer should fail */ + int timer_id; + status = ucs_async_add_timer(GetParam(), ucs_time_from_sec(1.0), + (ucs_async_event_cb_t)ucs_empty_function, + NULL, &async, &timer_id); + EXPECT_EQ(UCS_ERR_EXCEEDS_LIMIT, status); + + if (status == UCS_OK) { + timers.push_back(timer_id); + } + + /* Release timers */ + for (std::vector::iterator iter = timers.begin(); iter != timers.end(); ++iter) { + status = ucs_async_remove_handler(*iter, 1); + ASSERT_UCS_OK(status); + } + + ucs_async_context_cleanup(&async); +} + +UCS_TEST_P(test_async, many_timers) { + int max_iters = 4010 / ucs::test_time_multiplier(); + for (int count = 0; count < max_iters; ++count) { + std::vector timers; + ucs_status_t status; + int timer_id; + + for (int count2 = 0; count2 < 250; ++count2) { + status = ucs_async_add_timer(GetParam(), ucs_time_from_sec(1.0), + (ucs_async_event_cb_t)ucs_empty_function, + NULL, NULL, &timer_id); + ASSERT_UCS_OK(status); + timers.push_back(timer_id); + } + + while (!timers.empty()) { + ucs_async_remove_handler(timers.back(), 0); + timers.pop_back(); + } + } +} + +UCS_TEST_P(test_async, ctx_event) { + local_event le(GetParam()); + for (int retry = 0; retry < EVENT_RETRIES; ++retry) { + le.push_event(); + suspend_and_poll(&le, COUNT); + if (le.count() >= 1) { + break; + } + UCS_TEST_MESSAGE << "retry " << (retry + 1); + } + EXPECT_GE(le.count(), 1); +} + +UCS_TEST_P(test_async, ctx_timer) { + local_timer lt(GetParam()); + for (int i = 0; i < TIMER_RETRIES; ++i) { + suspend_and_poll(<, COUNT * 4); + if (lt.count() >= TIMER_EXP_COUNT) { + break; + } + UCS_TEST_MESSAGE << "retry " << (i + 1); + } + EXPECT_GE(lt.count(), int(TIMER_EXP_COUNT)); +} + +UCS_TEST_P(test_async, two_timers) { + local_timer lt1(GetParam()); + local_timer lt2(GetParam()); + for (int i = 0; i < TIMER_RETRIES; ++i) { + suspend_and_poll2(<1, <2, COUNT * 4); + if ((lt1.count() >= TIMER_EXP_COUNT) && + (lt2.count() >= TIMER_EXP_COUNT)) { + break; + } + UCS_TEST_MESSAGE << "retry " << (i + 1); + } + EXPECT_GE(lt1.count(), int(TIMER_EXP_COUNT)); + EXPECT_GE(lt2.count(), int(TIMER_EXP_COUNT)); +} + +UCS_TEST_P(test_async, ctx_event_block) { + local_event le(GetParam()); + int count = 0; + + for (int i = 0; i < EVENT_RETRIES; ++i) { + le.block(); + count = le.count(); + le.push_event(); + suspend_and_poll(&le, COUNT); + EXPECT_EQ(count, le.count()); + le.unblock(); + + le.check_miss(); + if (le.count() > count) { + break; + } + UCS_TEST_MESSAGE << "retry " << (i + 1); + } + EXPECT_GT(le.count(), count); +} + +UCS_TEST_P(test_async, ctx_event_block_two_miss) { + local_event le(GetParam()); + + /* Step 1: While async is blocked, generate two events */ + + le.block(); + le.push_event(); + suspend_and_poll(&le, COUNT); + + le.push_event(); + suspend_and_poll(&le, COUNT); + EXPECT_EQ(0, le.count()); + le.unblock(); + + /* Step 2: When checking missed events, should get at least one event */ + + le.check_miss(); + EXPECT_GT(le.count(), 0); + int prev_count = le.count(); + + /* Step 2: Block the async again and generate an event */ + + le.block(); + le.push_event(); + suspend_and_poll(&le, COUNT); + le.unblock(); + + /* Step 2: Check missed events - another event should be found */ + + le.check_miss(); + EXPECT_GT(le.count(), prev_count); +} + +UCS_TEST_P(test_async, ctx_timer_block) { + local_timer lt(GetParam()); + int count = 0; + + for (int i = 0; i < TIMER_RETRIES; ++i) { + lt.block(); + count = lt.count(); + suspend_and_poll(<, COUNT); + EXPECT_EQ(count, lt.count()); + lt.unblock(); + + lt.check_miss(); + if (lt.count() > count) { + break; + } + UCS_TEST_MESSAGE << "retry " << (i + 1); + } + EXPECT_GT(lt.count(), count); /* Timer could expire again after unblock */ +} + +UCS_TEST_P(test_async, modify_event) { + local_event le(GetParam()); + int count; + + le.push_event(); + suspend_and_poll(&le, COUNT); + EXPECT_GE(le.count(), 1); + + ucs_async_modify_handler(le.event_id(), 0); + sleep(1); + count = le.count(); + le.push_event(); + suspend_and_poll(&le, COUNT); + EXPECT_EQ(le.count(), count); + le.reset(); + + ucs_async_modify_handler(le.event_id(), UCS_EVENT_SET_EVREAD); + count = le.count(); + le.push_event(); + for (int i = 0; i < TIMER_RETRIES; ++i) { + suspend_and_poll(&le, 1); + if (le.count() > count) { + break; + } + UCS_TEST_MESSAGE << "retry " << (i + 1); + } + EXPECT_GT(le.count(), count); + + ucs_async_modify_handler(le.event_id(), 0); + sleep(1); + count = le.count(); + le.push_event(); + suspend_and_poll(&le, COUNT); + EXPECT_EQ(le.count(), count); +} + +UCS_TEST_P(test_async, warn_block) { + { + scoped_log_handler slh(hide_warns_logger); + { + local_event le(GetParam()); + le.block(); + } + } + + int warn_count = m_warnings.size(); + for (int i = 0; i < warn_count; ++i) { + UCS_TEST_MESSAGE << "< " << m_warnings[i] << " >"; + } + + if (GetParam() != UCS_ASYNC_MODE_POLL) { + EXPECT_GE(warn_count, 1); + } +} + +class local_timer_long_handler : public local_timer { +public: + local_timer_long_handler(ucs_async_mode_t mode, int sleep_usec) : + local_timer(mode), m_sleep_usec(sleep_usec) { + } + + virtual void handler() { + /* The handler would sleep long enough to increment the counter after + * main thread already considers it removed - unless the main thread + * waits for handler completion properly. + * It sleeps only once to avoid timer overrun deadlock in signal mode. + */ + ucs::safe_usleep(m_sleep_usec * 2); + m_sleep_usec = 0; + local_timer::handler(); + } + + int m_sleep_usec; +}; + +UCS_TEST_P(test_async, remove_sync) { + + /* create another handler so that removing the timer would not have to + * completely cleanup the async context, and race condition could happen + */ + local_timer le(GetParam()); + + for (int i = 0; i < EVENT_RETRIES; ++i) { + local_timer_long_handler lt(GetParam(), SLEEP_USEC * 2); + suspend_and_poll(<, 1); + lt.unset_handler(true); + int count = lt.count(); + suspend_and_poll(<, 1); + ASSERT_EQ(count, lt.count()); + } +} + +class local_timer_remove_handler : public local_timer { +public: + local_timer_remove_handler(ucs_async_mode_t mode) : local_timer(mode) { + } + +protected: + virtual void handler() { + base::handler(); + unset_handler(false); + } +}; + +UCS_TEST_P(test_async, timer_unset_from_handler) { + local_timer_remove_handler lt(GetParam()); + ucs_time_t deadline = ucs_get_time() + ucs_time_from_sec(10.0); + do { + suspend_and_poll(<, 1); + } while ((lt.count() == 0) && (ucs_get_time() < deadline)); + EXPECT_GE(lt.count(), 1); + suspend_and_poll(<, COUNT); + EXPECT_LE(lt.count(), 5); /* timer could fire multiple times before we remove it */ + int count = lt.count(); + suspend_and_poll(<, COUNT); + EXPECT_EQ(count, lt.count()); +} + +class local_event_remove_handler : public local_event { +public: + local_event_remove_handler(ucs_async_mode_t mode, bool sync) : + local_event(mode), m_sync(sync) { + } + +protected: + virtual void handler() { + base::handler(); + unset_handler(m_sync); + } + +private: + bool m_sync; +}; + +class test_async_event_unset_from_handler : public test_async { +protected: + void test_unset_from_handler(bool sync) { + local_event_remove_handler le(GetParam(), sync); + + for (int iter = 0; iter < 5; ++iter) { + for (int retry = 0; retry < EVENT_RETRIES; ++retry) { + le.push_event(); + suspend_and_poll(&le, COUNT); + if (le.count() >= 1) { + break; + } + UCS_TEST_MESSAGE << "retry " << (retry + 1); + } + EXPECT_EQ(1, le.count()); + } + } +}; + +UCS_TEST_P(test_async_event_unset_from_handler, sync) { + test_unset_from_handler(true); +} + +UCS_TEST_P(test_async_event_unset_from_handler, async) { + test_unset_from_handler(false); +} + +class local_event_add_handler : public local_event { +public: + local_event_add_handler(ucs_async_mode_t mode) : + local_event(mode), m_event_set(false) + { + int ret = pipe(m_pipefd); + ucs_assertv_always(0 == ret, "%m"); + } + + ~local_event_add_handler() { + close(m_pipefd[0]); + close(m_pipefd[1]); + } + + void unset_handler(int sync) { + local_event::unset_handler(sync); + if (m_event_set) { + ucs_status_t status = ucs_async_remove_handler(m_pipefd[0], sync); + ASSERT_UCS_OK(status); + m_event_set = false; + } + } + +protected: + static void dummy_cb(int id, void *arg) { + } + + virtual void handler() { + base::handler(); + if (!m_event_set) { + ucs_status_t status = + ucs_async_set_event_handler(mode(), m_pipefd[0], + UCS_EVENT_SET_EVREAD, + dummy_cb, this, &m_async); + ASSERT_UCS_OK(status); + m_event_set = true; + } + } + + int m_pipefd[2]; + bool m_event_set; +}; + +UCS_TEST_P(test_async, event_add_from_handler) { + local_event_add_handler le(GetParam()); + + le.push_event(); + sched_yield(); /* let the async handler run, to provoke the race */ + le.unset_handler(1); +} + +typedef test_async_mt test_async_event_mt; +typedef test_async_mt test_async_timer_mt; + +/* + * Run multiple threads which all process events independently. + */ +UCS_TEST_SKIP_COND_P(test_async_event_mt, multithread, + !(HAVE_DECL_F_SETOWN_EX)) { + spawn(); + + for (int j = 0; j < COUNT; ++j) { + for (unsigned i = 0; i < NUM_THREADS; ++i) { + event(i)->push_event(); + suspend(); + } + } + + suspend(); + + stop(); + + for (unsigned i = 0; i < NUM_THREADS; ++i) { + int count = thread_count(i); + EXPECT_GE(count, (int)(COUNT * 0.4)); + } +} +UCS_TEST_P(test_async_timer_mt, multithread) { + const int exp_min_count = (int)(COUNT * 0.10); + int min_count = 0; + for (int r = 0; r < TIMER_RETRIES; ++r) { + spawn(); + suspend(2 * COUNT); + stop(); + + min_count = std::numeric_limits::max(); + for (unsigned i = 0; i < NUM_THREADS; ++i) { + int count = thread_count(i); + min_count = ucs_min(count, min_count); + } + if (min_count >= exp_min_count) { + break; + } + } + EXPECT_GE(min_count, exp_min_count); +} + +std::ostream& operator<<(std::ostream& os, ucs_async_mode_t mode) +{ + return os << ucs_async_mode_names[mode]; +} + +#define INSTANTIATE_ASYNC_TEST_CASES(_test_fixture) \ + INSTANTIATE_TEST_CASE_P(signal, _test_fixture, ::testing::Values(UCS_ASYNC_MODE_SIGNAL)); \ + INSTANTIATE_TEST_CASE_P(thread_spinlock, _test_fixture, ::testing::Values(UCS_ASYNC_MODE_THREAD_SPINLOCK)); \ + INSTANTIATE_TEST_CASE_P(thread_mutex, _test_fixture, ::testing::Values(UCS_ASYNC_MODE_THREAD_MUTEX)); \ + INSTANTIATE_TEST_CASE_P(poll, _test_fixture, ::testing::Values(UCS_ASYNC_MODE_POLL)); + +INSTANTIATE_ASYNC_TEST_CASES(test_async); +INSTANTIATE_ASYNC_TEST_CASES(test_async_event_unset_from_handler); +INSTANTIATE_ASYNC_TEST_CASES(test_async_event_mt); +INSTANTIATE_ASYNC_TEST_CASES(test_async_timer_mt); diff --git a/test/gtest/ucs/test_callbackq.cc b/test/gtest/ucs/test_callbackq.cc new file mode 100644 index 0000000..705dbdc --- /dev/null +++ b/test/gtest/ucs/test_callbackq.cc @@ -0,0 +1,429 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +#include + +extern "C" { +#include +#include +#include +} + + +class test_callbackq : + public ucs::test_base, + public ::testing::TestWithParam { +protected: + + enum { + COMMAND_REMOVE_SELF, + COMMAND_ENQUEUE_KEY, + COMMAND_ADD_ANOTHER, + COMMAND_NONE + }; + + struct callback_ctx { + test_callbackq *test; + int callback_id; + uint32_t count; + int command; + callback_ctx *to_add; + int key; + }; + + test_callbackq() { + memset(&m_cbq, 0, sizeof(m_cbq)); /* Silence coverity */ + } + + virtual void init() { + ucs_status_t status = ucs_callbackq_init(&m_cbq); + ASSERT_UCS_OK(status); + } + + virtual void cleanup() { + ucs_callbackq_cleanup(&m_cbq); + ucs::test_base::cleanup(); + } + + UCS_TEST_BASE_IMPL; + + static unsigned callback_proxy(void *arg) + { + callback_ctx *ctx = reinterpret_cast(arg); + return ctx->test->callback(ctx); + } + + unsigned callback(callback_ctx *ctx) + { + ucs_atomic_add32(&ctx->count, 1); + + switch (ctx->command) { + case COMMAND_REMOVE_SELF: + remove(ctx); + break; + case COMMAND_ADD_ANOTHER: + add(ctx->to_add); + break; + case COMMAND_ENQUEUE_KEY: + m_keys_queue.push_back(ctx->key); + break; + case COMMAND_NONE: + default: + break; + } + return 1; + } + + void init_ctx(callback_ctx *ctx, int key = 0) + { + ctx->test = this; + ctx->count = 0; + ctx->command = COMMAND_NONE; + ctx->callback_id = UCS_CALLBACKQ_ID_NULL; + ctx->key = key; + } + + virtual unsigned cb_flags() { + return GetParam(); + } + + void add(callback_ctx *ctx, unsigned flags = 0) + { + ctx->callback_id = ucs_callbackq_add(&m_cbq, callback_proxy, + reinterpret_cast(ctx), + cb_flags() | flags); + } + + void remove(int callback_id) + { + ucs_callbackq_remove(&m_cbq, callback_id); + } + + void remove(callback_ctx *ctx) + { + remove(ctx->callback_id); + } + + void add_safe(callback_ctx *ctx, unsigned flags = 0) + { + ctx->callback_id = ucs_callbackq_add_safe(&m_cbq, callback_proxy, + reinterpret_cast(ctx), + cb_flags() | flags); + } + + void remove_safe(callback_ctx *ctx) + { + ucs_callbackq_remove_safe(&m_cbq, ctx->callback_id); + } + + static int remove_if_pred(const ucs_callbackq_elem_t *elem, void *arg) + { + callback_ctx *ctx = reinterpret_cast(elem->arg); + int key = *reinterpret_cast(arg); + + /* remove callbacks with the given key */ + return (elem->cb == callback_proxy) && (ctx->key == key); + } + + void remove_if(int key) + { + ucs_callbackq_remove_if(&m_cbq, remove_if_pred, + reinterpret_cast(&key)); + } + + unsigned dispatch(unsigned count = 1) + { + unsigned total = 0; + for (unsigned i = 0; i < count; ++i) { + total += ucs_callbackq_dispatch(&m_cbq); + } + return total; + } + + ucs_callbackq_t m_cbq; + std::deque m_keys_queue; +}; + +UCS_TEST_P(test_callbackq, single) { + callback_ctx ctx; + + init_ctx(&ctx); + add(&ctx); + dispatch(); + remove(&ctx); + EXPECT_EQ(1u, ctx.count); +} + +UCS_TEST_P(test_callbackq, count) { + callback_ctx ctx; + + init_ctx(&ctx); + add(&ctx); + unsigned count = dispatch(); + remove(&ctx); + EXPECT_EQ(1u, ctx.count); + EXPECT_EQ(1u, count); +} + +UCS_TEST_P(test_callbackq, multi) { + for (unsigned count = 0; count < 20; ++count) { + callback_ctx ctx[count]; + for (unsigned i = 0; i < count; ++i) { + init_ctx(&ctx[i]); + add(&ctx[i]); + } + + dispatch(10); + + for (unsigned i = 0; i < count; ++i) { + remove(&ctx[i]); + EXPECT_EQ(10u, ctx[i].count); + } + } +} + +UCS_TEST_P(test_callbackq, remove_self) { + callback_ctx ctx; + + init_ctx(&ctx); + ctx.command = COMMAND_REMOVE_SELF; + add(&ctx); + dispatch(); + EXPECT_EQ(1u, ctx.count); + + dispatch(); + dispatch(); + EXPECT_EQ(1u, ctx.count); +} + +UCS_TEST_P(test_callbackq, add_another) { + callback_ctx ctx, ctx2; + + init_ctx(&ctx); + init_ctx(&ctx2); + ctx.command = COMMAND_ADD_ANOTHER; + ctx.to_add = &ctx2; + + add(&ctx); + + dispatch(); + EXPECT_EQ(1u, ctx.count); + ctx.command = COMMAND_NONE; + + unsigned count = ctx.count; + + dispatch(); + EXPECT_EQ(2u, ctx.count); + EXPECT_EQ(count + 1, ctx2.count); + + remove(&ctx); + dispatch(); + EXPECT_EQ(2u, ctx.count); + EXPECT_EQ(count + 2, ctx2.count); + + remove(&ctx2); + dispatch(); + EXPECT_EQ(count + 2, ctx2.count); +} + +UCS_MT_TEST_P(test_callbackq, threads, 10) { + + static unsigned COUNT = 2000; + if (barrier()) { + for (unsigned i = 0; i < COUNT; ++i) { + /* part 1 */ + dispatch(100); /* simulate race */ + barrier(); /*1*/ + dispatch(5); + barrier(); /*2*/ + + /* part 2 */ + dispatch(100); /* simulate race */ + barrier(); /*3*/ + dispatch(5); + barrier(); /*4*/ + dispatch(100); + barrier(); /*5*/ + barrier(); /*6*/ /* Next loop barrier*/ + } + } else { + for (unsigned i = 0; i < COUNT; ++i) { + /* part 1 */ + callback_ctx ctx; + init_ctx(&ctx); + add_safe(&ctx); + barrier(); /*1*/ + barrier(); /*2*/ /* dispatch which seen the add command already called */ + EXPECT_GE(ctx.count, 1u); + + /* part 2 */ + remove_safe(&ctx); + barrier(); /*3*/ + barrier(); /*4*/ /* dispatch which seen the remove command already called */ + unsigned count = ctx.count; + barrier(); /*5*/ + EXPECT_EQ(count, ctx.count); + barrier(); /*6*/ /* Next loop barrier*/ + } + } +} + +UCS_MT_TEST_P(test_callbackq, remove, 10) { + static callback_ctx ctx1; + + init_ctx(&ctx1); + + if (barrier()) /*1*/ { + add_safe(&ctx1); + dispatch(100); + barrier(); /*2*/ + remove_safe(&ctx1); + dispatch(1); + + uint32_t count1 = ctx1.count; + dispatch(100); + EXPECT_EQ(count1, ctx1.count); + + barrier();/*3*/ + dispatch(1); /* will remove ctx2 on other threads */ + barrier();/*4*/ + + barrier();/*5*/ + dispatch(100); + barrier();/*6*/ + } else { + callback_ctx ctx2; + init_ctx(&ctx2); + add_safe(&ctx2); + barrier(); /*2*/ + + /* ask to ctx2 and wait until the main thread actually removes it */ + remove_safe(&ctx2); + barrier(); /*3*/ + barrier(); /*4*/ + + /* make sure ctx2 is not dispatched */ + uint32_t count1 = ctx2.count; + barrier();/*5*/ + barrier();/*6*/ + EXPECT_EQ(count1, ctx2.count); + } +} + +INSTANTIATE_TEST_CASE_P(fast_path, test_callbackq, :: + testing::Values(static_cast(UCS_CALLBACKQ_FLAG_FAST))); +INSTANTIATE_TEST_CASE_P(slow_path, test_callbackq, ::testing::Values(0)); + + +class test_callbackq_noflags : public test_callbackq { +protected: + virtual unsigned cb_flags() { + return 0; + } +}; + +UCS_TEST_F(test_callbackq_noflags, oneshot) { + callback_ctx ctx; + + init_ctx(&ctx); + ctx.command = COMMAND_NONE; + + add(&ctx, UCS_CALLBACKQ_FLAG_ONESHOT); + dispatch(100); + EXPECT_EQ(1u, ctx.count); +} + +UCS_TEST_F(test_callbackq_noflags, remove_if) { + const size_t count = 1000; + const int num_keys = 10; + std::vector ctx(count); + size_t key_counts[num_keys] = {0}; + + for (size_t i = 0; i < count; ++i) { + init_ctx(&ctx[i], ucs::rand() % num_keys); + add(&ctx[i], (i % 2) ? UCS_CALLBACKQ_FLAG_FAST : 0); + ++key_counts[ctx[i].key]; + } + + /* calculate how many callbacks expected to remain after removing each of + * the keys. + */ + size_t exp_count[num_keys] = {0}; + for (int key = num_keys - 2; key >= 0; --key) { + exp_count[key] = exp_count[key + 1] + key_counts[key + 1]; + } + + /* remove keys one after another and make sure the exact expected number + * of callbacks is being called after every removal. + */ + for (int key = 0; key < num_keys; ++key) { + remove_if(key); + + /* count how many different callbacks were called */ + size_t num_cbs = 0; + dispatch(1000); + for (size_t i = 0; i < count; ++i) { + num_cbs += !!ctx[i].count; + ctx[i].count = 0; /* reset for next iteration */ + } + + EXPECT_EQ(exp_count[key], num_cbs) << "key=" << key; + } +} + +UCS_TEST_F(test_callbackq_noflags, ordering) { + static const int UNUSED_CB_KEY = -1; + static const int num_callbacks = 100; + std::vector ctxs(num_callbacks); + std::deque gc_list; + std::deque oneshot_callback_keys; + + for (int i = 0; i < num_callbacks; ++i) { + callback_ctx& r_ctx = ctxs[i]; + + // randomize: either permanent callback with key=i or oneshot callback + // with key=-1 + init_ctx(&r_ctx); + unsigned cb_flags = 0; + if (ucs::rand() % 2) { + // oneshot callback, which must stay in order + r_ctx.key = i; + r_ctx.command = COMMAND_ENQUEUE_KEY; + cb_flags = UCS_CALLBACKQ_FLAG_ONESHOT; + oneshot_callback_keys.push_back(i); + } else { + // permanent + r_ctx.key = UNUSED_CB_KEY; + if (ucs::rand() % 2) { + // do-nothing callback + r_ctx.command = COMMAND_NONE; + } else { + // non-one-shot callback which removes itself - for more fun + r_ctx.command = COMMAND_REMOVE_SELF; + } + } + + add(&r_ctx, cb_flags); + + if (r_ctx.command == COMMAND_NONE) { + // we need to remove callbacks which don't remove themselves in the + // end of the test + gc_list.push_back(r_ctx.callback_id); + } + } + + dispatch(10); + + // make sure the ONESHOT callbacks were executed in order + EXPECT_EQ(oneshot_callback_keys, m_keys_queue); + + // remove remaining callbacks + while (!gc_list.empty()) { + remove(gc_list.front()); + gc_list.pop_front(); + } +} diff --git a/test/gtest/ucs/test_class.cc b/test/gtest/ucs/test_class.cc new file mode 100644 index 0000000..c63f913 --- /dev/null +++ b/test/gtest/ucs/test_class.cc @@ -0,0 +1,207 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +} + +class test_class : public ucs::test { +}; + + +typedef struct base { + int field1; +} base_t; +UCS_CLASS_DECLARE(base_t, int); + +typedef struct derived { + base_t super; + int field2; +} derived_t; +UCS_CLASS_DECLARE(derived_t, int, int); + +typedef struct derived2 { + base_t super; + int field2; +} derived2_t; +UCS_CLASS_DECLARE(derived2_t, int, int); + +static int base_init_count = 0; +static int derived_init_count = 0; + + +/* Base impl */ + +UCS_CLASS_INIT_FUNC(base_t, int param) +{ + if (param < 0) { + return UCS_ERR_INVALID_PARAM; + } + self->field1 = param; + ++base_init_count; + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(base_t) +{ + --base_init_count; +} + +UCS_CLASS_DEFINE(base_t, void); + +/* Derived impl */ + +UCS_CLASS_INIT_FUNC(derived_t, int param1, int param2) +{ + UCS_CLASS_CALL_SUPER_INIT(base_t, param1); + + if (param2 < 0) { + return UCS_ERR_INVALID_PARAM; + } + self->field2 = param2; + ++derived_init_count; + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(derived_t) +{ + --derived_init_count; +} + +UCS_CLASS_DEFINE(derived_t, base_t); + +UCS_CLASS_DEFINE_NEW_FUNC(derived_t, derived_t, int, int); +UCS_CLASS_DEFINE_DELETE_FUNC(derived_t, derived_t); + + +/* Derived2 impl */ + +UCS_CLASS_INIT_FUNC(derived2_t, int param1, int param2) +{ + if (param2 < 0) { + return UCS_ERR_INVALID_PARAM; + } + + UCS_CLASS_CALL_SUPER_INIT(base_t, param1); + + self->field2 = param2; + ++derived_init_count; + return UCS_OK; +} + +UCS_CLASS_CLEANUP_FUNC(derived2_t) +{ + --derived_init_count; +} + +UCS_CLASS_DEFINE(derived2_t, base_t); + + +UCS_TEST_F(test_class, basic) { + derived_t *derived; + ucs_status_t status; + + ASSERT_EQ(0, base_init_count); + ASSERT_EQ(0, derived_init_count); + + status = UCS_CLASS_NEW(derived_t, &derived, 1, 2); + ASSERT_UCS_OK(status); + + /* coverity[uninit_use] */ + EXPECT_EQ(2, derived->field2); + EXPECT_EQ(1, derived->super.field1); + + EXPECT_EQ(1, base_init_count); + EXPECT_EQ(1, derived_init_count); + + UCS_CLASS_DELETE(derived_t, derived); + + EXPECT_EQ(0, base_init_count); + EXPECT_EQ(0, derived_init_count); +} + +UCS_TEST_F(test_class, create_destroy) { + derived_t *derived; + ucs_status_t status; + + ASSERT_EQ(0, base_init_count); + ASSERT_EQ(0, derived_init_count); + + status = UCS_CLASS_NEW_FUNC_NAME(derived_t)(1, 2, &derived); + ASSERT_UCS_OK(status); + + EXPECT_EQ(2, derived->field2); + EXPECT_EQ(1, derived->super.field1); + + EXPECT_EQ(1, base_init_count); + EXPECT_EQ(1, derived_init_count); + + UCS_CLASS_DELETE_FUNC_NAME(derived_t)(derived); + + EXPECT_EQ(0, base_init_count); + EXPECT_EQ(0, derived_init_count); +} + +UCS_TEST_F(test_class, failure) { + derived_t *derived; + ucs_status_t status; + + ASSERT_EQ(0, base_init_count); + ASSERT_EQ(0, derived_init_count); + + /* Should fail on base */ + derived = NULL; + status = UCS_CLASS_NEW(derived_t, &derived, -1, 2); + /* coverity[leaked_storage] */ + ASSERT_EQ(UCS_ERR_INVALID_PARAM, status); + ASSERT_TRUE(NULL == derived); + + /* Should be properly cleaned up */ + EXPECT_EQ(0, base_init_count); + EXPECT_EQ(0, derived_init_count); + + /* Should fail on derived */ + derived = NULL; + status = UCS_CLASS_NEW(derived_t, &derived, 1, -2); + /* coverity[leaked_storage] */ + ASSERT_EQ(UCS_ERR_INVALID_PARAM, status); + ASSERT_TRUE(NULL == derived); + + /* Should be properly cleaned up */ + EXPECT_EQ(0, base_init_count); + EXPECT_EQ(0, derived_init_count); +} + +UCS_TEST_F(test_class, failure2) { + derived2_t *derived; + ucs_status_t status; + + ASSERT_EQ(0, base_init_count); + ASSERT_EQ(0, derived_init_count); + + /* Should fail on base */ + derived = NULL; + status = UCS_CLASS_NEW(derived2_t, &derived, -1, 2); + /* coverity[leaked_storage] */ + ASSERT_EQ(UCS_ERR_INVALID_PARAM, status); + ASSERT_TRUE(NULL == derived); + + /* Should be properly cleaned up */ + EXPECT_EQ(0, base_init_count); + EXPECT_EQ(0, derived_init_count); + + /* Should fail on derived */ + derived = NULL; + status = UCS_CLASS_NEW(derived2_t, &derived, 1, -2); + /* coverity[leaked_storage] */ + ASSERT_EQ(UCS_ERR_INVALID_PARAM, status); + ASSERT_TRUE(NULL == derived); + + /* Should be properly cleaned up */ + EXPECT_EQ(0, base_init_count); + EXPECT_EQ(0, derived_init_count); +} diff --git a/test/gtest/ucs/test_config.cc b/test/gtest/ucs/test_config.cc new file mode 100644 index 0000000..8e951cf --- /dev/null +++ b/test/gtest/ucs/test_config.cc @@ -0,0 +1,578 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ +/* force older C++ version to have SIZE_MAX */ +#define __STDC_LIMIT_MACROS +#define __STDC_CONSTANT_MACROS +#include +extern "C" { +#include +#include +} + + +typedef enum { + COLOR_RED, + COLOR_BLUE, + COLOR_BLACK, + COLOR_YELLOW, + COLOR_WHITE, + COLOR_LAST +} color_t; + +typedef enum { + MATERIAL_LEATHER, + MATERIAL_ALCANTARA, + MATERIAL_TEXTILE, + MATERIAL_LAST +} material_t; + +const char *color_names[] = { + /* [COLOR_RED] = */ "red", + /* [COLOR_BLUE] = */ "blue", + /* [COLOR_BLACK] = */ "black", + /* [COLOR_YELLOW] = */ "yellow", + /* [COLOR_WHITE] = */ "white", + /* [COLOR_LAST] = */ NULL +}; + +const char *material_names[] = { + /* [MATERIAL_LEATHER] = */ "leather", + /* [MATERIAL_ALCANTARA] = */ "alcantara", + /* [MATERIAL_TEXTILE] = */ "textile", + /* [MATERIAL_LAST] = */ NULL +}; + +typedef struct { + color_t color; + material_t material; +} seat_opts_t; + +typedef struct { + seat_opts_t driver_seat; + seat_opts_t passenger_seat; + seat_opts_t rear_seat; +} coach_opts_t; + +typedef struct { + unsigned volume; + unsigned long power; +} engine_opts_t; + +typedef struct { + engine_opts_t engine; + coach_opts_t coach; + unsigned price; + const char *brand; + const char *model; + color_t color; + unsigned long vin; + + double bw_bytes; + double bw_kbytes; + double bw_mbytes; + double bw_gbytes; + double bw_tbytes; + double bw_bits; + double bw_kbits; + double bw_mbits; + double bw_gbits; + double bw_tbits; + double bw_auto; + + ucs_config_bw_spec_t can_pci_bw; /* CAN-bus */ + + int air_conditioning; + int abs; + int transmission; +} car_opts_t; + + +ucs_config_field_t seat_opts_table[] = { + {"COLOR", "black", "Seat color", + ucs_offsetof(seat_opts_t, color), UCS_CONFIG_TYPE_ENUM(color_names)}, + + {"COLOR_ALIAS", NULL, "Seat color", + ucs_offsetof(seat_opts_t, color), UCS_CONFIG_TYPE_ENUM(color_names)}, + + {"MATERIAL", "textile", "Cover seat material", + ucs_offsetof(seat_opts_t, material), UCS_CONFIG_TYPE_ENUM(material_names)}, + + {NULL} +}; + +ucs_config_field_t coach_opts_table[] = { + {"DRIVER_", "COLOR=red", "Driver seat options", + ucs_offsetof(coach_opts_t, driver_seat), UCS_CONFIG_TYPE_TABLE(seat_opts_table)}, + + {"PASSENGER_", "", "Passenger seat options", + ucs_offsetof(coach_opts_t, passenger_seat), UCS_CONFIG_TYPE_TABLE(seat_opts_table)}, + + {"REAR_", "", "Rear seat options", + ucs_offsetof(coach_opts_t, rear_seat), UCS_CONFIG_TYPE_TABLE(seat_opts_table)}, + + {NULL} +}; + +ucs_config_field_t engine_opts_table[] = { + {"VOLUME", "6000", "Engine volume", + ucs_offsetof(engine_opts_t, volume), UCS_CONFIG_TYPE_UINT}, + + {"POWER", "200", "Engine power", + ucs_offsetof(engine_opts_t, power), UCS_CONFIG_TYPE_ULUNITS}, + + {"POWER_ALIAS", NULL, "Engine power", + ucs_offsetof(engine_opts_t, power), UCS_CONFIG_TYPE_ULUNITS}, + + {"FUEL_LEVEL", "", "This is electric car", + UCS_CONFIG_DEPRECATED_FIELD_OFFSET, UCS_CONFIG_TYPE_DEPRECATED}, + + {NULL} +}; + +ucs_config_field_t car_opts_table[] = { + {"ENGINE_", "", "Engine options", + ucs_offsetof(car_opts_t, engine), UCS_CONFIG_TYPE_TABLE(engine_opts_table)}, + + {"COACH_", "PASSENGER_COLOR=blue", "Seats options", + ucs_offsetof(car_opts_t, coach), UCS_CONFIG_TYPE_TABLE(coach_opts_table)}, + + {"PRICE", "999", "Price", + ucs_offsetof(car_opts_t, price), UCS_CONFIG_TYPE_UINT}, + + {"PRICE_ALIAS", NULL, "Price", + ucs_offsetof(car_opts_t, price), UCS_CONFIG_TYPE_UINT}, + + {"DRIVER", "", "AI drives a car", + UCS_CONFIG_DEPRECATED_FIELD_OFFSET, UCS_CONFIG_TYPE_DEPRECATED}, + + {"BRAND", "Chevy", "Car brand", + ucs_offsetof(car_opts_t, brand), UCS_CONFIG_TYPE_STRING}, + + {"MODEL", "Corvette", "Car model", + ucs_offsetof(car_opts_t, model), UCS_CONFIG_TYPE_STRING}, + + {"COLOR", "red", "Car color", + ucs_offsetof(car_opts_t, color), UCS_CONFIG_TYPE_ENUM(color_names)}, + + {"VIN", "auto", "Vehicle identification number", + ucs_offsetof(car_opts_t, vin), UCS_CONFIG_TYPE_ULUNITS}, + + {"BW_BYTES", "1024Bs", "Bandwidth in bytes", + ucs_offsetof(car_opts_t, bw_bytes), UCS_CONFIG_TYPE_BW}, + + {"BW_KBYTES", "1024KB/s", "Bandwidth in kbytes", + ucs_offsetof(car_opts_t, bw_kbytes), UCS_CONFIG_TYPE_BW}, + + {"BW_MBYTES", "1024MBs", "Bandwidth in mbytes", + ucs_offsetof(car_opts_t, bw_mbytes), UCS_CONFIG_TYPE_BW}, + + {"BW_GBYTES", "1024GBps", "Bandwidth in gbytes", + ucs_offsetof(car_opts_t, bw_gbytes), UCS_CONFIG_TYPE_BW}, + + {"BW_TBYTES", "1024TB/s", "Bandwidth in tbytes", + ucs_offsetof(car_opts_t, bw_tbytes), UCS_CONFIG_TYPE_BW}, + + {"BW_BITS", "1024bps", "Bandwidth in bits", + ucs_offsetof(car_opts_t, bw_bits), UCS_CONFIG_TYPE_BW}, + + {"BW_KBITS", "1024Kb/s", "Bandwidth in kbits", + ucs_offsetof(car_opts_t, bw_kbits), UCS_CONFIG_TYPE_BW}, + + {"BW_MBITS", "1024Mbs", "Bandwidth in mbits", + ucs_offsetof(car_opts_t, bw_mbits), UCS_CONFIG_TYPE_BW}, + + {"BW_GBITS", "1024Gbps", "Bandwidth in gbits", + ucs_offsetof(car_opts_t, bw_gbits), UCS_CONFIG_TYPE_BW}, + + {"BW_TBITS", "1024Tbs", "Bandwidth in tbits", + ucs_offsetof(car_opts_t, bw_tbits), UCS_CONFIG_TYPE_BW}, + + {"BW_AUTO", "auto", "Auto bandwidth value", + ucs_offsetof(car_opts_t, bw_auto), UCS_CONFIG_TYPE_BW}, + + {"CAN_BUS_BW", "mlx5_0:1024Tbs", "Bandwidth in tbits of CAN-bus", + ucs_offsetof(car_opts_t, can_pci_bw), UCS_CONFIG_TYPE_BW_SPEC}, + + {"AIR_CONDITIONING", "on", "Air conditioning mode", + ucs_offsetof(car_opts_t, air_conditioning), UCS_CONFIG_TYPE_ON_OFF}, + + {"ABS", "off", "ABS mode", + ucs_offsetof(car_opts_t, abs), UCS_CONFIG_TYPE_ON_OFF}, + + {"TRANSMISSION", "auto", "Transmission mode", + ucs_offsetof(car_opts_t, transmission), UCS_CONFIG_TYPE_ON_OFF_AUTO}, + + {NULL} +}; + +static std::vector config_err_exp_str; + +class test_config : public ucs::test { +protected: + static ucs_log_func_rc_t + config_error_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + // Ignore errors that invalid input parameters as it is expected + if (level == UCS_LOG_LEVEL_WARN) { + std::string err_str = format_message(message, ap); + + for (size_t i = 0; i < config_err_exp_str.size(); i++) { + if (err_str.find(config_err_exp_str[i]) != std::string::npos) { + UCS_TEST_MESSAGE << err_str; + return UCS_LOG_FUNC_RC_STOP; + } + } + } + + return UCS_LOG_FUNC_RC_CONTINUE; + } + + /* + * Wrapper class for car options parser. + */ + class car_opts { + public: + car_opts(const char *env_prefix, const char *table_prefix) : + m_opts(parse(env_prefix, table_prefix)), m_max(1024), m_value(NULL) + { + m_value = new char[m_max]; + m_value[0] = '\0'; + } + + car_opts(const car_opts& orig) : m_max(orig.m_max) + { + m_value = new char[m_max]; + strncpy(m_value, orig.m_value, m_max); + + ucs_status_t status = ucs_config_parser_clone_opts(&orig.m_opts, + &m_opts, + car_opts_table); + ASSERT_UCS_OK(status); + } + + ~car_opts() { + ucs_config_parser_release_opts(&m_opts, car_opts_table); + delete [] m_value; + } + + void set(const char *name, const char *value) { + ucs_config_parser_set_value(&m_opts, car_opts_table, name, value); + } + + const char* get(const char *name) { + ucs_status_t status = ucs_config_parser_get_value(&m_opts, + car_opts_table, + name, m_value, + m_max); + ASSERT_UCS_OK(status); + return m_value; + } + + car_opts_t* operator->() { + return &m_opts; + } + + car_opts_t* operator*() { + return &m_opts; + } + private: + + static car_opts_t parse(const char *env_prefix, + const char *table_prefix) { + car_opts_t tmp; + ucs_status_t status = ucs_config_parser_fill_opts(&tmp, + car_opts_table, + env_prefix, + table_prefix, + 0); + ASSERT_UCS_OK(status); + return tmp; + } + + car_opts_t m_opts; + const size_t m_max; + char *m_value; + }; + + static void test_config_print_opts(unsigned flags, + unsigned exp_num_lines, + const char *prefix = NULL) + { + char *dump_data; + size_t dump_size; + char line_buf[1024]; + char alias[128]; + car_opts opts(NULL, NULL); + + memset(alias, 0, sizeof(alias)); + + /* Dump configuration to a memory buffer */ + dump_data = NULL; + FILE *file = open_memstream(&dump_data, &dump_size); + ucs_config_parser_print_opts(file, "", *opts, car_opts_table, + prefix, + (ucs_config_print_flags_t)flags); + + /* Sanity check - all lines begin with UCS_ */ + unsigned num_lines = 0; + fseek(file, 0, SEEK_SET); + while (fgets(line_buf, sizeof(line_buf), file)) { + if (line_buf[0] == '\n') { + continue; + } + + if (line_buf[0] != '#') { + /* found the name of attribute */ + + if (alias[0] != '\0') { + /* the code below relies on the fact that all + * aliases has the name: "_ALIAS" */ + EXPECT_EQ(0, strncmp(alias, line_buf, + strlen(alias) - strlen("_ALIAS"))); + memset(alias, 0, sizeof(alias)); + } + + std::string exp_str = "UCX_"; + if (prefix) { + exp_str += prefix; + } + line_buf[exp_str.size()] = '\0'; + EXPECT_STREQ(exp_str.c_str(), line_buf); + ++num_lines; + } else if (strncmp(&line_buf[2], "alias of:", + strlen("alias of:")) == 0) { + /* found the alias name of attribute */ + + size_t cnt = 0; + for (size_t i = 2 + strlen("alias of: ") + 1; + line_buf[i] != '\n'; i++) { + alias[cnt++] = line_buf[i]; + } + } + } + + EXPECT_EQ(exp_num_lines, num_lines); + + fclose(file); + free(dump_data); + } +}; + +UCS_TEST_F(test_config, parse_default) { + car_opts opts(NULL, "TEST"); + + EXPECT_EQ(999U, opts->price); + EXPECT_EQ(std::string("Chevy"), opts->brand); + EXPECT_EQ(std::string("Corvette"), opts->model); + EXPECT_EQ(COLOR_RED, opts->color); + EXPECT_EQ(6000U, opts->engine.volume); + EXPECT_EQ(COLOR_RED, opts->coach.driver_seat.color); + EXPECT_EQ(COLOR_BLUE, opts->coach.passenger_seat.color); + EXPECT_EQ(COLOR_BLACK, opts->coach.rear_seat.color); + EXPECT_EQ(UCS_ULUNITS_AUTO, opts->vin); + EXPECT_EQ(200UL, opts->engine.power); + + EXPECT_EQ(1024.0, opts->bw_bytes); + EXPECT_EQ(UCS_KBYTE * 1024.0, opts->bw_kbytes); + EXPECT_EQ(UCS_MBYTE * 1024.0, opts->bw_mbytes); + EXPECT_EQ(UCS_GBYTE * 1024.0, opts->bw_gbytes); + EXPECT_EQ(UCS_TBYTE * 1024.0, opts->bw_tbytes); + + EXPECT_EQ(128.0, opts->bw_bits); + EXPECT_EQ(UCS_KBYTE * 128.0, opts->bw_kbits); + EXPECT_EQ(UCS_MBYTE * 128.0, opts->bw_mbits); + EXPECT_EQ(UCS_GBYTE * 128.0, opts->bw_gbits); + EXPECT_EQ(UCS_TBYTE * 128.0, opts->bw_tbits); + EXPECT_EQ(UCS_BANDWIDTH_AUTO, opts->bw_auto); + + EXPECT_EQ(UCS_TBYTE * 128.0, opts->can_pci_bw.bw); + EXPECT_EQ(std::string("mlx5_0"), opts->can_pci_bw.name); + + EXPECT_EQ(UCS_CONFIG_ON, opts->air_conditioning); + EXPECT_EQ(UCS_CONFIG_OFF, opts->abs); + EXPECT_EQ(UCS_CONFIG_AUTO, opts->transmission); +} + +UCS_TEST_F(test_config, clone) { + + car_opts *opts_clone_ptr; + + { + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env1("UCX_COLOR", "white"); + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env2("UCX_PRICE_ALIAS", "0"); + + car_opts opts(NULL, NULL); + EXPECT_EQ(COLOR_WHITE, opts->color); + EXPECT_EQ(0U, opts->price); + + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env3("UCX_COLOR", "black"); + opts_clone_ptr = new car_opts(opts); + } + + EXPECT_EQ(COLOR_WHITE, (*opts_clone_ptr)->color); + delete opts_clone_ptr; +} + +UCS_TEST_F(test_config, set_get) { + car_opts opts(NULL, NULL); + EXPECT_EQ(COLOR_RED, opts->color); + EXPECT_EQ(std::string(color_names[COLOR_RED]), + std::string(opts.get("COLOR"))); + + opts.set("COLOR", "white"); + EXPECT_EQ(COLOR_WHITE, opts->color); + EXPECT_EQ(std::string(color_names[COLOR_WHITE]), + std::string(opts.get("COLOR"))); + + opts.set("DRIVER_COLOR_ALIAS", "black"); + EXPECT_EQ(COLOR_BLACK, opts->coach.driver_seat.color); + EXPECT_EQ(std::string(color_names[COLOR_BLACK]), + std::string(opts.get("COACH_DRIVER_COLOR_ALIAS"))); + + opts.set("VIN", "123456"); + EXPECT_EQ(123456UL, opts->vin); +} + +UCS_TEST_F(test_config, set_get_with_table_prefix) { + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env1("UCX_COLOR", "black"); + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env2("UCX_CARS_COLOR", "white"); + + car_opts opts(NULL, "CARS_"); + EXPECT_EQ(COLOR_WHITE, opts->color); + EXPECT_EQ(std::string(color_names[COLOR_WHITE]), + std::string(opts.get("COLOR"))); +} + +UCS_TEST_F(test_config, set_get_with_env_prefix) { + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env1("UCX_COLOR", "black"); + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env2("UCX_TEST_COLOR", "white"); + + car_opts opts("TEST", NULL); + EXPECT_EQ(COLOR_WHITE, opts->color); + EXPECT_EQ(std::string(color_names[COLOR_WHITE]), + std::string(opts.get("COLOR"))); +} + +UCS_TEST_F(test_config, performance) { + + /* Add stuff to env to presumably make getenv() slower */ + ucs::ptr_vector env; + for (unsigned i = 0; i < 300; ++i) { + env.push_back(new ucs::scoped_setenv( + (std::string("MTEST") + ucs::to_string(i)).c_str(), + "")); + } + + /* Now test the time */ + UCS_TEST_TIME_LIMIT(0.05) { + car_opts opts(NULL, NULL); + } +} + +UCS_TEST_F(test_config, unused) { + ucs::ucx_env_cleanup env_cleanup; + + /* set to warn about unused env vars */ + ucs_global_opts.warn_unused_env_vars = 1; + + const std::string warn_str = "unused env variable"; + const std::string unused_var1 = "UCX_UNUSED_VAR1"; + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env1(unused_var1.c_str(), "unused"); + + { + config_err_exp_str.push_back(warn_str + ": " + unused_var1); + scoped_log_handler log_handler(config_error_handler); + car_opts opts(NULL, NULL); + + ucs_config_parser_warn_unused_env_vars(); + + config_err_exp_str.pop_back(); + } + + { + const std::string unused_var2 = "UCX_UNUSED_VAR2"; + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env2(unused_var2.c_str(), "unused"); + + config_err_exp_str.push_back(warn_str + "s: " + + unused_var1 + ", " + unused_var2); + scoped_log_handler log_handler(config_error_handler); + car_opts opts(NULL, NULL); + + ucs_config_parser_warn_unused_env_vars(); + + config_err_exp_str.pop_back(); + } + + /* reset to not warn about unused env vars */ + ucs_global_opts.warn_unused_env_vars = 0; +} + +UCS_TEST_F(test_config, dump) { + /* aliases must not be counted here */ + test_config_print_opts(UCS_CONFIG_PRINT_CONFIG, 28u); +} + +UCS_TEST_F(test_config, dump_hidden) { + /* aliases must be counted here */ + test_config_print_opts((UCS_CONFIG_PRINT_CONFIG | + UCS_CONFIG_PRINT_HIDDEN), + 35u); +} + +UCS_TEST_F(test_config, dump_hidden_check_alias_name) { + /* aliases must be counted here */ + test_config_print_opts((UCS_CONFIG_PRINT_CONFIG | + UCS_CONFIG_PRINT_HIDDEN | + UCS_CONFIG_PRINT_DOC), + 35u); + + test_config_print_opts((UCS_CONFIG_PRINT_CONFIG | + UCS_CONFIG_PRINT_HIDDEN | + UCS_CONFIG_PRINT_DOC), + 35u, "TEST_"); +} + +UCS_TEST_F(test_config, deprecated) { + /* set to warn about unused env vars */ + ucs_global_opts.warn_unused_env_vars = 1; + + const std::string warn_str = " is deprecated"; + const std::string deprecated_var1 = "UCX_DRIVER"; + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env1(deprecated_var1.c_str(), "Taxi driver"); + config_err_exp_str.push_back(deprecated_var1 + warn_str); + + { + scoped_log_handler log_handler(config_error_handler); + car_opts opts(NULL, NULL); + } + + { + const std::string deprecated_var2 = "UCX_ENGINE_FUEL_LEVEL"; + /* coverity[tainted_string_argument] */ + ucs::scoped_setenv env2(deprecated_var2.c_str(), "58"); + config_err_exp_str.push_back(deprecated_var2 + warn_str); + + scoped_log_handler log_handler_vars(config_error_handler); + car_opts opts(NULL, NULL); + config_err_exp_str.pop_back(); + } + + config_err_exp_str.pop_back(); + + /* reset to not warn about unused env vars */ + ucs_global_opts.warn_unused_env_vars = 0; +} diff --git a/test/gtest/ucs/test_datatype.cc b/test/gtest/ucs/test_datatype.cc new file mode 100644 index 0000000..6580434 --- /dev/null +++ b/test/gtest/ucs/test_datatype.cc @@ -0,0 +1,525 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +#include +#include +#include +} + +#include +#include + +class test_datatype : public ucs::test { +}; + +typedef struct { + int i; + ucs_list_link_t list; + ucs_queue_elem_t queue; +} elem_t; + +UCS_TEST_F(test_datatype, list_basic) { + + ucs_list_link_t head; + elem_t elem0, elem1; + elem_t *iter, *tmp; + + ucs_list_head_init(&head); + ASSERT_EQ((unsigned long)0, ucs_list_length(&head)); + ucs_list_insert_after(&head, &elem0.list); + ucs_list_insert_before(&head, &elem1.list); + + std::vector vec; + ucs_list_for_each(iter, &head, list) { + vec.push_back(iter); + } + ASSERT_EQ(2ul, vec.size()); + ASSERT_EQ(&elem0, vec[0]); + ASSERT_EQ(&elem1, vec[1]); + ASSERT_EQ((unsigned long)2, ucs_list_length(&head)); + + ucs_list_for_each_safe(iter, tmp, &head, list) { + ucs_list_del(&iter->list); + } + ASSERT_TRUE(ucs_list_is_empty(&head)); + ASSERT_EQ((unsigned long)0, ucs_list_length(&head)); +} + +UCS_TEST_F(test_datatype, list_splice) { + + ucs_list_link_t head1, head2; + elem_t l1_elem0, l1_elem1, l1_elem2; + elem_t l2_elem0, l2_elem1, l2_elem2; + elem_t *iter; + + ucs_list_head_init(&head1); + ucs_list_head_init(&head2); + + l1_elem0.i = 0; + ucs_list_add_tail(&head1, &l1_elem0.list); + l1_elem1.i = 1; + ucs_list_add_tail(&head1, &l1_elem1.list); + l1_elem2.i = 2; + ucs_list_add_tail(&head1, &l1_elem2.list); + + l2_elem0.i = 3; + ucs_list_add_tail(&head2, &l2_elem0.list); + l2_elem1.i = 4; + ucs_list_add_tail(&head2, &l2_elem1.list); + l2_elem2.i = 5; + ucs_list_add_tail(&head2, &l2_elem2.list); + + ucs_list_splice_tail(&head1, &head2); + + int i = 0; + ucs_list_for_each(iter, &head1, list) { + EXPECT_EQ(i, iter->i); + ++i; + } +} + +UCS_TEST_F(test_datatype, queue) { + + ucs_queue_head_t head; + elem_t elem0, elem1, elem2; + elem_t *elem; + + ucs_queue_head_init(&head); + EXPECT_TRUE(ucs_queue_is_empty(&head)); + + elem0.i = 0; + elem1.i = 1; + elem2.i = 2; + + for (unsigned i = 0; i < 5; ++i) { + ucs_queue_push(&head, &elem0.queue); + EXPECT_FALSE(ucs_queue_is_empty(&head)); + EXPECT_EQ((unsigned long)1, ucs_queue_length(&head)); + + ucs_queue_push(&head, &elem1.queue); + EXPECT_EQ((unsigned long)2, ucs_queue_length(&head)); + + EXPECT_EQ(&elem1, ucs_queue_tail_elem_non_empty(&head, elem_t, queue)); + + elem = ucs_queue_pull_elem_non_empty(&head, elem_t, queue); + EXPECT_EQ(&elem0, elem); + EXPECT_EQ((unsigned long)1, ucs_queue_length(&head)); + + ucs_queue_push(&head, &elem2.queue); + EXPECT_EQ((unsigned long)2, ucs_queue_length(&head)); + + elem = ucs_queue_pull_elem_non_empty(&head, elem_t, queue); + EXPECT_EQ(&elem1, elem); + EXPECT_EQ((unsigned long)1, ucs_queue_length(&head)); + + elem = ucs_queue_pull_elem_non_empty(&head, elem_t, queue); + EXPECT_EQ(&elem2, elem); + EXPECT_TRUE(ucs_queue_is_empty(&head)); + EXPECT_TRUE(NULL == ucs_queue_pull(&head)); + + /* Push to head now */ + + ucs_queue_push_head(&head, &elem2.queue); + EXPECT_EQ((unsigned long)1, ucs_queue_length(&head)); + + ucs_queue_push_head(&head, &elem1.queue); + ucs_queue_push_head(&head, &elem0.queue); + EXPECT_EQ((unsigned long)3, ucs_queue_length(&head)); + + elem = ucs_queue_pull_elem_non_empty(&head, elem_t, queue); + EXPECT_EQ(&elem0, elem); + + elem = ucs_queue_pull_elem_non_empty(&head, elem_t, queue); + EXPECT_EQ(&elem1, elem); + + elem = ucs_queue_pull_elem_non_empty(&head, elem_t, queue); + EXPECT_EQ(&elem2, elem); + + EXPECT_TRUE(ucs_queue_is_empty(&head)); + } +} + +UCS_TEST_F(test_datatype, queue_iter) { + + const int num_elems = 4; + ucs_queue_head_t head; + std::vector elems(num_elems); + + ucs_queue_head_init(&head); + EXPECT_TRUE(ucs_queue_is_empty(&head)); + + for (int i = 0; i < num_elems; ++i) { + elems[i].i = i + 1; + ucs_queue_push(&head, &elems[i].queue); + } + + { + std::vector vec; + elem_t *elem; + + ucs_queue_for_each(elem, &head, queue) { + vec.push_back(elem->i); + } + ASSERT_EQ(static_cast(num_elems), vec.size()); + EXPECT_EQ(1, vec[0]); + EXPECT_EQ(2, vec[1]); + EXPECT_EQ(3, vec[2]); + EXPECT_EQ(4, vec[3]); + } + + { + std::vector vec; + ucs_queue_iter_t iter; + elem_t *elem; + + ucs_queue_for_each_safe(elem, iter, &head, queue) + { + if (elem->i == 3 || elem->i == 4) { + ucs_queue_del_iter(&head, iter); + memset(elem, 0xff, sizeof(*elem)); + } + } + ASSERT_EQ((unsigned long)2, ucs_queue_length(&head)); + + ucs_queue_for_each_safe(elem, iter, &head, queue) { + vec.push_back(elem->i); + ucs_queue_del_iter(&head, iter); + memset(elem, 0xff, sizeof(*elem)); + } + ASSERT_EQ(2u, vec.size()); + EXPECT_EQ(1, vec[0]); + EXPECT_EQ(2, vec[1]); + } + + /* foreach safe with next pointing to head */ + { + elem_t e1, *elem; + ucs_queue_iter_t iter; + + ucs_queue_push(&head, &e1.queue); + e1.queue.next = &e1.queue; + + int count1 = 0; + ucs_queue_for_each_safe(elem, iter, &head, queue) { + EXPECT_EQ(&elem->queue, *iter); + ++count1; + } + EXPECT_EQ(1, count1) << "Too many iterations on single element queue"; + + int count2 = 0; + ucs_queue_for_each_safe(elem, iter, &head, queue) { + EXPECT_EQ(&elem->queue, *iter); + ucs_queue_del_iter(&head, iter); + ++count2; + ASSERT_LE(count2, 2) << "Too many iterations on single element queue"; + } + } +} + +UCS_TEST_SKIP_COND_F(test_datatype, queue_perf, + (ucs::test_time_multiplier() > 1)) { + const size_t count = 100000000ul; + ucs_queue_head_t head; + ucs_queue_elem_t elem; + + ucs_queue_head_init(&head); + ucs_queue_push(&head, &elem); + elem.next = NULL; + + ucs_time_t start_time = ucs_get_time(); + for (size_t i = 0; i < count; ++i) { + ucs_queue_pull(&head); + ucs_queue_push(&head, &elem); + } + ucs_time_t end_time = ucs_get_time(); + + double lat = ucs_time_to_nsec(end_time - start_time) / count; + UCS_TEST_MESSAGE << lat << " nsec per push+pull"; + + if (ucs::perf_retry_count) { + EXPECT_LT(lat, 15.0 * ucs::test_time_multiplier()); + } else { + UCS_TEST_MESSAGE << "not validating performance"; + } + EXPECT_EQ((unsigned long)1, ucs_queue_length(&head)); +} + +UCS_TEST_F(test_datatype, queue_splice) { + ucs_queue_head_t head; + elem_t elem0, elem1, elem2; + elem_t *elem; + + elem0.i = 0; + elem1.i = 1; + elem2.i = 2; + + ucs_queue_head_init(&head); + ucs_queue_push(&head, &elem0.queue); + ucs_queue_push(&head, &elem1.queue); + ucs_queue_push(&head, &elem2.queue); + + ucs_queue_head_t newq; + ucs_queue_head_init(&newq); + + EXPECT_EQ((unsigned long)3, ucs_queue_length(&head)); + EXPECT_EQ((unsigned long)0, ucs_queue_length(&newq)); + + ucs_queue_splice(&newq, &head); + + EXPECT_EQ((unsigned long)0, ucs_queue_length(&head)); + EXPECT_EQ((unsigned long)3, ucs_queue_length(&newq)); + + elem = ucs_queue_pull_elem_non_empty(&newq, elem_t, queue); + EXPECT_EQ(&elem0, elem); + + elem = ucs_queue_pull_elem_non_empty(&newq, elem_t, queue); + EXPECT_EQ(&elem1, elem); + + elem = ucs_queue_pull_elem_non_empty(&newq, elem_t, queue); + EXPECT_EQ(&elem2, elem); +} + +typedef struct { + ucs_queue_elem_t queue; + uint16_t sn; +} ucs_test_callbackq_elem_t; + +static int ucs_test_callbackq_pull(ucs_queue_head_t *queue, uint16_t sn) +{ + ucs_test_callbackq_elem_t *elem; + int count = 0; + + ucs_queue_for_each_extract(elem, queue, queue, + UCS_CIRCULAR_COMPARE16(elem->sn, <=, sn)) { + elem->sn = 0; + ++count; + } + + return count; +} + +UCS_TEST_F(test_datatype, queue_extract_if) { + ucs_queue_head_t queue; + ucs_test_callbackq_elem_t elem1, elem2, elem3; + unsigned count; + + ucs_queue_head_init(&queue); + + elem1.sn = 1; + elem2.sn = 2; + elem3.sn = 3; + + ucs_queue_push(&queue, &elem1.queue); + ucs_queue_push(&queue, &elem2.queue); + ucs_queue_push(&queue, &elem3.queue); + + count = ucs_test_callbackq_pull(&queue, 0); + EXPECT_EQ(0u, count); + + count = ucs_test_callbackq_pull(&queue, 1); + EXPECT_EQ(1u, count); + + count = ucs_test_callbackq_pull(&queue, 2); + EXPECT_EQ(1u, count); + EXPECT_EQ(0u, elem1.sn); /* should be removed */ + EXPECT_EQ(0u, elem2.sn); /* should be removed */ + + count = ucs_test_callbackq_pull(&queue, 10); + EXPECT_EQ(1u, count); + EXPECT_EQ(0u, elem3.sn); /* should be removed */ +} + +UCS_TEST_F(test_datatype, ptr_array_basic) { + ucs_ptr_array_t pa; + uint32_t value; + int a = 1, b = 2, c = 3, d = 4; + unsigned index; + + ucs_ptr_array_init(&pa, 3, "ptr_array test"); + + index = ucs_ptr_array_insert(&pa, &a, &value); + EXPECT_EQ(0u, index); + EXPECT_EQ(3u, value); + + index = ucs_ptr_array_insert(&pa, &b, &value); + EXPECT_EQ(1u, index); + EXPECT_EQ(3u, value); + + index = ucs_ptr_array_insert(&pa, &c, &value); + EXPECT_EQ(2u, index); + EXPECT_EQ(3u, value); + + index = ucs_ptr_array_insert(&pa, &d, &value); + EXPECT_EQ(3u, index); + EXPECT_EQ(3u, value); + + void *vc; + int present = ucs_ptr_array_lookup(&pa, 2, vc); + ASSERT_TRUE(present); + EXPECT_EQ(&c, vc); + + vc = ucs_ptr_array_replace(&pa, 2, &d); + EXPECT_EQ(&c, vc); + + present = ucs_ptr_array_lookup(&pa, 2, vc); + EXPECT_EQ(&d, vc); + + EXPECT_FALSE(ucs_ptr_array_lookup(&pa, 5, vc)); + EXPECT_FALSE(ucs_ptr_array_lookup(&pa, 5005, vc)); + + ucs_ptr_array_remove(&pa, 0, 0); + ucs_ptr_array_remove(&pa, 1, 0); + ucs_ptr_array_remove(&pa, 2, 0); + ucs_ptr_array_remove(&pa, 3, 0); + + ucs_ptr_array_cleanup(&pa); +} + +UCS_TEST_F(test_datatype, ptr_array_random) { + const unsigned count = 10000 / ucs::test_time_multiplier(); + ucs_ptr_array_t pa; + uint32_t value; + + ucs_ptr_array_init(&pa, 5, "ptr_array test"); + + std::map map; + + /* Insert phase */ + for (unsigned i = 0; i < count; ++i) { + void *ptr = malloc(0); + unsigned index = ucs_ptr_array_insert(&pa, ptr, &value); + + EXPECT_TRUE(map.end() == map.find(index)); + EXPECT_EQ(5u, value); + map[index] = ptr; + } + + /* Remove + insert */ + for (unsigned i = 0; i < count / 10; ++i) { + + int remove_count = ucs::rand() % 10; + for (int j = 0; j < remove_count; ++j) { + unsigned to_remove = ucs::rand() % map.size(); + std::map::iterator iter = map.begin(); + std::advance(iter, to_remove); + unsigned index = iter->first; + + void *ptr = NULL; + EXPECT_TRUE(ucs_ptr_array_lookup(&pa, index, ptr)); + EXPECT_EQ(ptr, map[index]); + free(ptr); + + ucs_ptr_array_remove(&pa, index, index * index); + EXPECT_FALSE(ucs_ptr_array_lookup(&pa, index, ptr)); + + map.erase(index); + } + + int insert_count = ucs::rand() % 10; + for (int j = 0; j < insert_count; ++j) { + void *ptr = malloc(0); + unsigned index = ucs_ptr_array_insert(&pa, ptr, &value); + + EXPECT_TRUE(map.end() == map.find(index)); + EXPECT_TRUE(index * index == value || 5u == value); + map[index] = ptr; + } + } + + /* remove all */ + void *ptr; + unsigned index; + ucs_ptr_array_for_each(ptr, index, &pa) { + EXPECT_EQ(ptr, map[index]); + ucs_ptr_array_remove(&pa, index, 0); + free(ptr); + } + + ucs_ptr_array_cleanup(&pa); +} + +UCS_TEST_F(test_datatype, ptr_array_placeholder) { + ucs_ptr_array_t pa; + uint32_t value; + int a = 1; + unsigned index; + + ucs_ptr_array_init(&pa, 3, "ptr_array test"); + + index = ucs_ptr_array_insert(&pa, &a, &value); + EXPECT_EQ(0u, index); + EXPECT_EQ(3u, value); + + ucs_ptr_array_remove(&pa, index, 4); + + index = ucs_ptr_array_insert(&pa, &a, &value); + EXPECT_EQ(0u, index); + EXPECT_EQ(4u, value); + + ucs_ptr_array_remove(&pa, index, 0); + + ucs_ptr_array_cleanup(&pa); +} + +UCS_TEST_SKIP_COND_F(test_datatype, ptr_array_perf, + (ucs::test_time_multiplier() > 1)) { + const unsigned count = 10000000; + ucs_ptr_array_t pa; + uint32_t value; + + ucs_time_t insert_start_time = ucs_get_time(); + ucs_ptr_array_init(&pa, 0, "ptr_array test"); + for (unsigned i = 0; i < count; ++i) { + EXPECT_EQ(i, ucs_ptr_array_insert(&pa, NULL, &value)); + } + + ucs_time_t lookup_start_time = ucs_get_time(); + for (unsigned i = 0; i < count; ++i) { + void *ptr GTEST_ATTRIBUTE_UNUSED_; + int present = ucs_ptr_array_lookup(&pa, i, ptr); + ASSERT_TRUE(present); + } + + ucs_time_t remove_start_time = ucs_get_time(); + for (unsigned i = 0; i < count; ++i) { + ucs_ptr_array_remove(&pa, i, 0); + } + + ucs_time_t end_time = ucs_get_time(); + + ucs_ptr_array_cleanup(&pa); + + double insert_ns = ucs_time_to_nsec(lookup_start_time - insert_start_time) / count; + double lookup_ns = ucs_time_to_nsec(remove_start_time - lookup_start_time) / count; + double remove_ns = ucs_time_to_nsec(end_time - remove_start_time) / count; + + UCS_TEST_MESSAGE << "Timings (nsec): insert " << insert_ns << " lookup: " << + lookup_ns << " remove: " << remove_ns; + + if (ucs::perf_retry_count) { + EXPECT_LT(insert_ns, 1000.0); + EXPECT_LT(remove_ns, 1000.0); +#ifdef __x86_64__ + EXPECT_LT(lookup_ns, 15.0); +#else + EXPECT_LT(lookup_ns, 100.0); +#endif + } +} + +UCS_TEST_F(test_datatype, ptr_status) { + void *ptr1 = (void*)(UCS_BIT(63) + 10); + EXPECT_TRUE(UCS_PTR_IS_PTR(ptr1)); + EXPECT_FALSE(UCS_PTR_IS_PTR(NULL)); + EXPECT_FALSE(UCS_PTR_IS_ERR(NULL)); + EXPECT_FALSE(UCS_PTR_IS_ERR(ptr1)); + + void *ptr2 = (void*)(uintptr_t)(UCS_ERR_LAST + 1); + EXPECT_TRUE(UCS_PTR_IS_ERR(ptr2)); +} diff --git a/test/gtest/ucs/test_debug.cc b/test/gtest/ucs/test_debug.cc new file mode 100644 index 0000000..1ddc4ad --- /dev/null +++ b/test/gtest/ucs/test_debug.cc @@ -0,0 +1,91 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2012. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +#include +#include +} + +#include + +extern "C" { + +void UCS_F_NOINLINE my_cool_function(unsigned *lineno) { *lineno = __LINE__; }; + +} + +class test_debug : public ucs::test { +}; + +std::string __basename(const std::string& path) { + char *p = strdup(path.c_str()); + std::string bn(::basename(p)); + free(p); + return bn; +} + +UCS_TEST_F(test_debug, lookup_ucs_func) { + const char sym[] = "ucs_log_flush"; + + ucs_debug_address_info info; + ucs_status_t status = ucs_debug_lookup_address(dlsym(RTLD_DEFAULT, sym), &info); + ASSERT_UCS_OK(status); + + EXPECT_NE(std::string::npos, std::string(info.file.path).find("libucs.so")); +#ifdef HAVE_DETAILED_BACKTRACE + EXPECT_EQ(sym, std::string(info.function)); +#endif +} + +UCS_TEST_F(test_debug, lookup_invalid) { + ucs_debug_address_info info; + ucs_status_t status = ucs_debug_lookup_address((void*)0xffffffffffff, &info); + EXPECT_EQ(UCS_ERR_NO_ELEM, status); +} + +UCS_TEST_SKIP_COND_F(test_debug, lookup_address, BULLSEYE_ON) { + unsigned lineno; + + my_cool_function(&lineno); + + ucs_debug_address_info info; + ucs_status_t status = ucs_debug_lookup_address((void*)&my_cool_function, + &info); + ASSERT_UCS_OK(status); + + UCS_TEST_MESSAGE << info.source_file << ":" << info.line_number << + " " << info.function << "()"; + + EXPECT_NE(std::string::npos, std::string(info.file.path).find("gtest")); + +#ifdef HAVE_DETAILED_BACKTRACE + EXPECT_EQ("my_cool_function", std::string(info.function)); + EXPECT_EQ(lineno, info.line_number); + EXPECT_EQ(__basename(__FILE__), __basename(info.source_file)); +#else + EXPECT_EQ(0u, info.line_number); + EXPECT_EQ("???", std::string(info.source_file)); +#endif +} + +UCS_TEST_F(test_debug, print_backtrace) { + char *data; + size_t size; + + FILE *f = open_memstream(&data, &size); + ucs_debug_print_backtrace(f, 0); + fclose(f); + + /* Some functions that should appear */ + EXPECT_TRUE(strstr(data, "print_backtrace") != NULL); +#ifdef HAVE_DETAILED_BACKTRACE + EXPECT_TRUE(strstr(data, "main") != NULL); +#endif + + free(data); +} diff --git a/test/gtest/ucs/test_event_set.cc b/test/gtest/ucs/test_event_set.cc new file mode 100644 index 0000000..1f1fa39 --- /dev/null +++ b/test/gtest/ucs/test_event_set.cc @@ -0,0 +1,284 @@ +/** +* Copyright (C) Hiroyuki Sato. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +#include +#include +} + +#define MAX_BUF_LEN 255 + +static const char *UCS_EVENT_SET_TEST_STRING = "ucs_event_set test string"; +static const char *UCS_EVENT_SET_EXTRA_STRING = "ucs_event_set extra string"; +static const int UCS_EVENT_SET_EXTRA_NUM = 0xFF; + +enum { + UCS_EVENT_SET_EXTERNAL_FD = UCS_BIT(0), +}; + +class test_event_set : public ucs::test_base, + public ::testing::TestWithParam { +public: + static const char *evfd_data; + static pthread_barrier_t barrier; + + typedef void* (*event_set_pthread_callback_t)(void *arg); + + enum event_set_op_t { + EVENT_SET_OP_ADD, + EVENT_SET_OP_MOD, + EVENT_SET_OP_DEL + }; + + UCS_TEST_BASE_IMPL; + +protected: + void init() { + if (GetParam() & UCS_EVENT_SET_EXTERNAL_FD) { + m_ext_fd = epoll_create(1); + ASSERT_TRUE(m_ext_fd > 0); + } else { + m_ext_fd = -1; + } + } + + void cleanup() { + if (GetParam() & UCS_EVENT_SET_EXTERNAL_FD) { + ASSERT_NE(-1, m_ext_fd); + close(m_ext_fd); + m_ext_fd = -1; + } + } + + static void* event_set_read_func(void *arg) { + int *fd = (int *)arg; + int n; + + n = write(fd[1], evfd_data, strlen(test_event_set::evfd_data)); + if (n == -1) { + ADD_FAILURE(); + } + + thread_barrier(); + return 0; + } + + static void* event_set_tmo_func(void *arg) { + thread_barrier(); + return 0; + } + + void event_set_init(event_set_pthread_callback_t func) { + ucs_status_t status; + int ret; + + if (pipe(m_pipefd) == -1) { + UCS_TEST_ABORT("pipe() failed with error - " << + strerror(errno)); + } + + ret = pthread_barrier_init(&barrier, NULL, 2); + if (ret) { + UCS_TEST_ABORT("pthread_barrier_init() failed with error - " << + strerror(errno)); + } + + ret = pthread_create(&m_tid, NULL, func, (void *)&m_pipefd); + if (ret) { + UCS_TEST_ABORT("pthread_create() failed with error - " << + strerror(errno)); + } + + if (GetParam() & UCS_EVENT_SET_EXTERNAL_FD) { + status = ucs_event_set_create_from_fd(&m_event_set, m_ext_fd); + } else { + status = ucs_event_set_create(&m_event_set); + } + ASSERT_UCS_OK(status); + EXPECT_TRUE(m_event_set != NULL); + } + + void event_set_cleanup() { + ucs_event_set_cleanup(m_event_set); + + pthread_join(m_tid, NULL); + pthread_barrier_destroy(&barrier); + + close(m_pipefd[0]); + close(m_pipefd[1]); + } + + void event_set_ctl(event_set_op_t op, int fd, int events) { + ucs_status_t status = UCS_OK; + + switch (op) { + case EVENT_SET_OP_ADD: + status = ucs_event_set_add(m_event_set, fd, + (ucs_event_set_type_t)events, + (void *)(uintptr_t)fd); + break; + case EVENT_SET_OP_MOD: + status = ucs_event_set_mod(m_event_set, fd, + (ucs_event_set_type_t)events, + (void *)(uintptr_t)fd); + break; + case EVENT_SET_OP_DEL: + status = ucs_event_set_del(m_event_set, fd); + break; + default: + UCS_TEST_ABORT("unknown event set operation - " << op); + } + + EXPECT_UCS_OK(status); + } + + void event_set_wait(unsigned exp_event, int timeout_ms, + ucs_event_set_handler_t handler, void *arg) { + unsigned nread = ucs_sys_event_set_max_wait_events; + ucs_status_t status; + + /* Check for events on pipe fd */ + status = ucs_event_set_wait(m_event_set, &nread, 0, handler, arg); + EXPECT_EQ(exp_event, nread); + EXPECT_UCS_OK(status); + } + + static void thread_barrier() { + int ret = pthread_barrier_wait(&barrier); + EXPECT_TRUE((ret == 0) || (ret == PTHREAD_BARRIER_SERIAL_THREAD)); + } + + int m_pipefd[2]; + int m_ext_fd; + pthread_t m_tid; + ucs_sys_event_set_t *m_event_set; +}; + +const char *test_event_set::evfd_data = UCS_EVENT_SET_TEST_STRING; + +pthread_barrier_t test_event_set::barrier; + +static void event_set_func1(void *callback_data, int events, void *arg) +{ + char buf[MAX_BUF_LEN]; + char *extra_str = (char *)((void**)arg)[0]; + int *extra_num = (int *)((void**)arg)[1]; + int n; + int fd = (int)(uintptr_t)callback_data; + memset(buf, 0, MAX_BUF_LEN); + + EXPECT_EQ(UCS_EVENT_SET_EVREAD, events); + + n = read(fd, buf, MAX_BUF_LEN); + if (n == -1) { + ADD_FAILURE(); + return; + } + EXPECT_EQ(0, strcmp(UCS_EVENT_SET_TEST_STRING, buf)); + EXPECT_EQ(0, strcmp(UCS_EVENT_SET_EXTRA_STRING, extra_str)); + EXPECT_EQ(UCS_EVENT_SET_EXTRA_NUM, *extra_num); +} + +static void event_set_func2(void *callback_data, int events, void *arg) +{ + EXPECT_EQ(UCS_EVENT_SET_EVWRITE, events); +} + +static void event_set_func3(void *callback_data, int events, void *arg) +{ + ADD_FAILURE(); +} + +static void event_set_func4(void *callback_data, int events, void *arg) +{ + EXPECT_EQ(UCS_EVENT_SET_EVREAD, events); +} + +UCS_TEST_P(test_event_set, ucs_event_set_read_thread) { + void *arg[] = { (void*)UCS_EVENT_SET_EXTRA_STRING, + (void*)&UCS_EVENT_SET_EXTRA_NUM }; + + event_set_init(event_set_read_func); + event_set_ctl(EVENT_SET_OP_ADD, m_pipefd[0], + UCS_EVENT_SET_EVREAD); + + thread_barrier(); + + event_set_wait(1u, -1, event_set_func1, arg); + + event_set_ctl(EVENT_SET_OP_DEL, m_pipefd[0], 0); + event_set_cleanup(); +} + +UCS_TEST_P(test_event_set, ucs_event_set_write_thread) { + event_set_init(event_set_read_func); + event_set_ctl(EVENT_SET_OP_ADD, m_pipefd[1], + UCS_EVENT_SET_EVWRITE); + + thread_barrier(); + + event_set_wait(1u, -1, event_set_func2, NULL); + + event_set_ctl(EVENT_SET_OP_DEL, m_pipefd[1], 0); + event_set_cleanup(); +} + +UCS_TEST_P(test_event_set, ucs_event_set_tmo_thread) { + event_set_init(event_set_tmo_func); + event_set_ctl(EVENT_SET_OP_ADD, m_pipefd[0], + UCS_EVENT_SET_EVREAD); + + thread_barrier(); + + event_set_wait(0u, 0, event_set_func3, NULL); + + event_set_ctl(EVENT_SET_OP_DEL, m_pipefd[0], 0); + event_set_cleanup(); +} + +UCS_TEST_P(test_event_set, ucs_event_set_trig_modes) { + void *arg[] = { (void*)UCS_EVENT_SET_EXTRA_STRING, + (void*)&UCS_EVENT_SET_EXTRA_NUM }; + + event_set_init(event_set_read_func); + event_set_ctl(EVENT_SET_OP_ADD, m_pipefd[0], + UCS_EVENT_SET_EVREAD); + + thread_barrier(); + + /* Test level-triggered mode (default) */ + for (int i = 0; i < 10; i++) { + event_set_wait(1u, 0, event_set_func4, NULL); + } + + /* Test edge-triggered mode */ + /* Set edge-triggered mode */ + event_set_ctl(EVENT_SET_OP_MOD, m_pipefd[0], + (ucs_event_set_type_t)(UCS_EVENT_SET_EVREAD | + UCS_EVENT_SET_EDGE_TRIGGERED)); + + /* Should have only one event to read */ + event_set_wait(1u, 0, event_set_func4, NULL); + + /* Should not read nothing */ + for (int i = 0; i < 10; i++) { + event_set_wait(0u, 0, event_set_func1, arg); + } + + /* Call the function below directly to read + * all outstanding data from pipe fd */ + event_set_func1((void*)(uintptr_t)m_pipefd[0], UCS_EVENT_SET_EVREAD, arg); + + event_set_ctl(EVENT_SET_OP_DEL, m_pipefd[0], 0); + event_set_cleanup(); +} + +INSTANTIATE_TEST_CASE_P(ext_fd, test_event_set, + ::testing::Values(static_cast( + UCS_EVENT_SET_EXTERNAL_FD))); +INSTANTIATE_TEST_CASE_P(int_fd, test_event_set, ::testing::Values(0)); diff --git a/test/gtest/ucs/test_frag_list.cc b/test/gtest/ucs/test_frag_list.cc new file mode 100644 index 0000000..b902935 --- /dev/null +++ b/test/gtest/ucs/test_frag_list.cc @@ -0,0 +1,336 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +} + +#include + +class frag_list : public ucs::test { +protected: + struct pkt { + uint32_t sn; + ucs_frag_list_elem_t elem; + }; + ucs_frag_list_t m_frags; + // @override + virtual void init(); + virtual void cleanup(); + + void init_pkts(pkt *packets, int n); + void permute_array(int *arr, int n); + +}; + +void frag_list::permute_array(int *arr, int n) +{ + + int i; + int idx; + int tmp; + + for (i = 0; i < n; i++) { + arr[i] = i; + } + + for (i = 0; i < n - 1; i++) { + idx = i + ucs::rand() % (n - i); + tmp = arr[i]; + arr[i] = arr[idx]; + arr[idx] = tmp; + } +} + +void frag_list::init_pkts(pkt *packets, int n) +{ + int i; + + for (i = 0; i < n; i++) { + packets[i].sn = i; + } +} + +void frag_list::init() +{ + ucs_stats_cleanup(); +#if ENABLE_STATS + push_config(); + modify_config("STATS_DEST", "stdout"); + modify_config("STATS_TRIGGER", ""); +#endif + ucs_stats_init(); + ucs_frag_list_init(0, &m_frags, + -1 UCS_STATS_ARG(ucs_stats_get_root())); +} + +void frag_list::cleanup() +{ + ucs_frag_list_cleanup(&m_frags); + ucs_stats_cleanup(); +#if ENABLE_STATS + pop_config(); +#endif + ucs_stats_init(); +} + + +/* next four tests cover all possible insertions and removals. */ + +/** + * rcv in order + */ +UCS_TEST_F(frag_list, in_order_rcv) { + ucs_frag_list_elem_t pkt; + unsigned i; + int err; + + err = ucs_frag_list_insert(&m_frags, &pkt, 0); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_DUP, err); + err = ucs_frag_list_insert(&m_frags, &pkt, (ucs_frag_list_sn_t)(-1)); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_DUP, err); + + for (i = 1; i < 10; i++) { + err = ucs_frag_list_insert(&m_frags, &pkt, i); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_FAST, err); + } +#if ENABLE_STATS + EXPECT_EQ((ucs_stats_counter_t)1, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_BURSTS)); + EXPECT_EQ((ucs_stats_counter_t)9, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_BURST_LEN)); + EXPECT_EQ((ucs_stats_counter_t)0, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_GAPS)); + EXPECT_EQ((ucs_stats_counter_t)0, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_GAP_LEN)); + EXPECT_EQ((ucs_stats_counter_t)0, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_GAP_OUT)); +#endif +} + +/** + * one hole in front + */ +UCS_TEST_F(frag_list, one_hole) { + pkt pkts[10], *out; + ucs_frag_list_elem_t *elem; + unsigned i; + int err; + + init_pkts(pkts, 10); + + for (i = 5; i < 10; i++) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_SLOW, err); + } + + /* try to pull - should fail */ + elem = ucs_frag_list_pull(&m_frags); + EXPECT_EQ((void *)elem, (void *)NULL); + + /* insert 1-3: no need to pull more elems from list + * insert 4: more elems can be pulled + */ + for (i = 1; i < 5; i++) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + if (i < 4) { + EXPECT_EQ(UCS_FRAG_LIST_INSERT_FAST, err); + } + else { + EXPECT_EQ(UCS_FRAG_LIST_INSERT_FIRST, err); + } + } + + /* sn 5 already in - next one must fail */ + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, 5); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_DUP, err); + + + i = 0; + /* elem 5..9 must be on the list now */ + while((elem = ucs_frag_list_pull(&m_frags)) != NULL) { + out = ucs_container_of(elem, pkt, elem); + EXPECT_EQ(out->sn, i+5); + i++; + } + EXPECT_EQ((unsigned)5, i); +#if ENABLE_STATS + EXPECT_EQ((ucs_stats_counter_t)2, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_BURSTS)); + EXPECT_EQ((ucs_stats_counter_t)10, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_BURST_LEN)); + EXPECT_EQ((ucs_stats_counter_t)1, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_GAPS)); + EXPECT_EQ((ucs_stats_counter_t)5, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_GAP_LEN)); + EXPECT_EQ((ucs_stats_counter_t)9, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_GAP_OUT)); +#endif +} + +UCS_TEST_F(frag_list, two_holes_basic) { + pkt pkts[20], *out; + ucs_frag_list_elem_t *elem; + unsigned i; + int err; + + init_pkts(pkts, 20); + + + for (i = 15; i < 20; i++) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_SLOW, err); + } + + /* try to pull - should fail */ + elem = ucs_frag_list_pull(&m_frags); + EXPECT_EQ((void *)NULL, (void *)elem); + + for (i = 5; i < 10; i++) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + EXPECT_EQ(err, UCS_FRAG_LIST_INSERT_SLOW); + } + + /* try to pull - should fail */ + elem = ucs_frag_list_pull(&m_frags); + EXPECT_EQ((void *)NULL, (void *)elem); + + for (i = 4; i > 1; i--) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + EXPECT_EQ(err, UCS_FRAG_LIST_INSERT_SLOW); + } + + err = ucs_frag_list_insert(&m_frags, &pkts[1].elem, 1); + EXPECT_EQ(err, UCS_FRAG_LIST_INSERT_FIRST); + + i = 2; + while((elem = ucs_frag_list_pull(&m_frags)) != NULL) { + out = ucs_container_of(elem, pkt, elem); + EXPECT_EQ(out->sn, i); + i++; + } + EXPECT_EQ(i, (unsigned)10); + + for (i = 10; i < 15; i++) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + if (i < 14) { + EXPECT_EQ(UCS_FRAG_LIST_INSERT_FAST, err); + } + else { + EXPECT_EQ(UCS_FRAG_LIST_INSERT_FIRST, err); + } + } + + while((elem = ucs_frag_list_pull(&m_frags)) != NULL) { + out = ucs_container_of(elem, pkt, elem); + EXPECT_EQ(out->sn, i); + i++; + } + EXPECT_EQ((unsigned)20, i); +#if ENABLE_STATS + EXPECT_EQ((ucs_stats_counter_t)7, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_BURSTS)); + EXPECT_EQ((ucs_stats_counter_t)19, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_BURST_LEN)); + EXPECT_EQ((ucs_stats_counter_t)2, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_GAPS)); + EXPECT_EQ((ucs_stats_counter_t)20, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_GAP_LEN)); + EXPECT_EQ((ucs_stats_counter_t)28, UCS_STATS_GET_COUNTER(m_frags.stats, UCS_FRAG_LIST_STAT_GAP_OUT)); +#endif +} + +/** + * two holes + */ +UCS_TEST_F(frag_list, two_holes_advanced) { + pkt pkts[20], *out; + ucs_frag_list_elem_t *elem; + unsigned i; + int err; + + init_pkts(pkts, 20); + + for (i = 5; i < 10; i++) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_SLOW, err); + } + + /* try to pull - should fail */ + elem = ucs_frag_list_pull(&m_frags); + EXPECT_EQ((void *)NULL, (void *)elem); + + for (i = 13; i < 18; i++) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_SLOW, err); + } + + for (i = 19; i >= 18; i--) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_SLOW, err); + } + + for (i = 12; i >= 10; i--) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_SLOW, err); + } + + for (i = 4; i > 1; i--) { + err = ucs_frag_list_insert(&m_frags, &pkts[i].elem, i); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_SLOW, err); + } + + err = ucs_frag_list_insert(&m_frags, &pkts[1].elem, 1); + EXPECT_EQ(UCS_FRAG_LIST_INSERT_FIRST, err); + + i = 2; + while((elem = ucs_frag_list_pull(&m_frags)) != NULL) { + out = ucs_container_of(elem, pkt, elem); + EXPECT_EQ(out->sn, i); + i++; + } + EXPECT_EQ((unsigned)20, i); +} + +/** + * + * random arrival. Send/recv 10k packets in random order + */ +#define FRAG_LIST_N_PKTS 10000 + +UCS_TEST_F(frag_list, random_arrival) { + std::vector pkts(FRAG_LIST_N_PKTS + 1); + pkt *out; + ucs_frag_list_elem_t *elem; + unsigned i; + std::vector idx(FRAG_LIST_N_PKTS); + int err; + int fast_inserts, slow_inserts, pulled; + uint32_t last_sn = 0; + uint32_t max_holes=0, max_elems=0; + + + init_pkts(&pkts[0], FRAG_LIST_N_PKTS+1); + permute_array(&idx[0], FRAG_LIST_N_PKTS); + + fast_inserts = slow_inserts = pulled = 0; + for (i = 0; i < FRAG_LIST_N_PKTS; i++) { + err = ucs_frag_list_insert(&m_frags, &pkts[idx[i]+1].elem, idx[i]+1); + EXPECT_NE(err, UCS_FRAG_LIST_INSERT_DUP); + if (err == UCS_FRAG_LIST_INSERT_FAST || err == UCS_FRAG_LIST_INSERT_FIRST) { + fast_inserts++; + EXPECT_EQ(last_sn+1, (uint32_t)idx[i]+1); + last_sn = idx[i]+1; + } + else { + slow_inserts++; + } + max_holes = ucs_max(m_frags.list_count, max_holes); + max_elems = ucs_max(m_frags.elem_count, max_elems); + while((elem = ucs_frag_list_pull(&m_frags)) != NULL) { + out = ucs_container_of(elem, pkt, elem); + pulled++; + EXPECT_EQ(last_sn+1, out->sn); + last_sn = out->sn; + } + } + ucs_frag_list_dump(&m_frags, 0); + UCS_TEST_MESSAGE << "max_holes=" << max_holes << " max_elems=" << max_elems; + UCS_TEST_MESSAGE << "fast_ins=" << fast_inserts <<" slow_ins=" << slow_inserts << " pulled=" << pulled; + while((elem = ucs_frag_list_pull(&m_frags)) != NULL) { + out = ucs_container_of(elem, pkt, elem); + EXPECT_EQ(last_sn+1, out->sn); + last_sn = out->sn; + } +} + diff --git a/test/gtest/ucs/test_log.cc b/test/gtest/ucs/test_log.cc new file mode 100644 index 0000000..03d35ef --- /dev/null +++ b/test/gtest/ucs/test_log.cc @@ -0,0 +1,131 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +#include + +extern "C" { +#include +} + +class log_test : public ucs::test { + +public: + virtual void init() { + /* skip because logger does not support file + * output on valgrind + */ + if (RUNNING_ON_VALGRIND) { + UCS_TEST_SKIP_R("skipping on valgrind"); + } + + char ucs_log_spec[70]; + const char *default_tmp_dir = "/tmp"; + const char *tmp_dir; + + ucs::test::init(); + + ucs_log_cleanup(); + push_config(); + tmp_dir = getenv("TMPDIR"); + if (tmp_dir == NULL) { + tmp_dir = default_tmp_dir; + } + snprintf(logfile, sizeof(logfile), "%s/gtest_ucs_log.%d", tmp_dir, getpid()); + /* coverity[tainted_string] */ + unlink(logfile); + snprintf(ucs_log_spec, sizeof(ucs_log_spec), "file:%s", logfile); + modify_config("LOG_FILE", ucs_log_spec); + modify_config("LOG_LEVEL", "info"); + ucs_log_init(); + } + + virtual void cleanup() { + ucs_log_cleanup(); + m_num_log_handlers_before = 0; + pop_config(); + check_log_file(); + unlink(logfile); + ucs_log_init(); + ucs::test::cleanup(); + } + + virtual void check_log_file() { + ADD_FAILURE() << read_logfile(); + } + + int do_grep(const char *needle) { + char cmd[128]; + + snprintf(cmd, sizeof(cmd), "grep '%s' %s", needle, logfile); + return system(cmd); + } + + std::string read_logfile() { + std::stringstream ss; + std::ifstream ifs(logfile); + ss << ifs.rdbuf(); + return ss.str(); + } + +protected: + char logfile[64]; +}; + +class log_test_info : public log_test { + virtual void check_log_file() { + if (do_grep("UCX INFO hello world")) { + ADD_FAILURE() << read_logfile(); + } + } +}; + +UCS_TEST_F(log_test_info, hello) { + ucs_info("hello world"); +} + + +class log_test_print : public log_test { + virtual void check_log_file() { + if (do_grep("UCX PRINT debug message")) { + if (ucs_global_opts.log_print_enable) { + /* not found but it should be there */ + ADD_FAILURE() << read_logfile(); + } + } else { + if (!ucs_global_opts.log_print_enable) { + /* found but prints disabled!!! */ + ADD_FAILURE() << read_logfile(); + } + } + } +}; + +UCS_TEST_F(log_test_print, print_on, "LOG_PRINT_ENABLE=y") { + ucs_print("debug message"); +} + +UCS_TEST_F(log_test_print, print_off) { + ucs_print("debug message"); +} + + +class log_test_backtrace : public log_test { + virtual void check_log_file() { + if (do_grep("print_backtrace")) { + ADD_FAILURE() << read_logfile(); + } + +#ifdef HAVE_DETAILED_BACKTRACE + if (do_grep("main")) { + ADD_FAILURE() << read_logfile(); + } +#endif + } +}; + +UCS_TEST_F(log_test_backtrace, backtrace) { + ucs_log_print_backtrace(UCS_LOG_LEVEL_INFO); +} diff --git a/test/gtest/ucs/test_math.cc b/test/gtest/ucs/test_math.cc new file mode 100644 index 0000000..b06e8fc --- /dev/null +++ b/test/gtest/ucs/test_math.cc @@ -0,0 +1,222 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2012. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include + +#include +#include +#include +#include +#include + +#include + +#define FLAG1 0x100 +#define FLAG2 0x200 +#define FLAG3 0x400 + +class test_math : public ucs::test { +protected: + static const unsigned ATOMIC_COUNT = 50; +}; + +UCS_TEST_F(test_math, convert_flag) { + volatile uint32_t value = FLAG1 | FLAG3; + volatile uint32_t tmp = ucs_convert_flag(value, FLAG1, 0x1); + + EXPECT_EQ(0x1u, tmp); + EXPECT_EQ(0x0u, ucs_convert_flag(value, FLAG2, 0x2u)); + EXPECT_EQ(0x4u, ucs_convert_flag(value, FLAG3, 0x4u)); + + EXPECT_EQ(0x10000u, ucs_convert_flag(value, FLAG1, 0x10000u)); + EXPECT_EQ(0x00000u, ucs_convert_flag(value, FLAG2, 0x20000u)); + EXPECT_EQ(0x40000u, ucs_convert_flag(value, FLAG3, 0x40000u)); +} + +UCS_TEST_F(test_math, test_flag) { + uint32_t value = FLAG2; + EXPECT_TRUE( ucs_test_flags(value, FLAG1, FLAG2) ); + EXPECT_TRUE( ucs_test_flags(value, FLAG2, FLAG3) ); + EXPECT_FALSE( ucs_test_flags(value, FLAG1, FLAG3) ); +} + +UCS_TEST_F(test_math, circular_compare) { + EXPECT_TRUE( UCS_CIRCULAR_COMPARE32(0x000000001, <, 0x000000002) ); + EXPECT_TRUE( UCS_CIRCULAR_COMPARE32(0x000000001, ==, 0x000000001) ); + EXPECT_TRUE( UCS_CIRCULAR_COMPARE32(0xffffffffU, >, 0xfffffffeU) ); + EXPECT_TRUE( UCS_CIRCULAR_COMPARE32(0xffffffffU, <, 0x00000000U) ); + EXPECT_TRUE( UCS_CIRCULAR_COMPARE32(0xffffffffU, <, 0x00000001U) ); + EXPECT_TRUE( UCS_CIRCULAR_COMPARE32(0xffffffffU, <, 0x00000001U) ); + EXPECT_TRUE( UCS_CIRCULAR_COMPARE32(0x80000000U, >, 0x7fffffffU) ); + EXPECT_TRUE( UCS_CIRCULAR_COMPARE32(0xffffffffU, <, 0x7fffffffU) ); +} + +UCS_TEST_F(test_math, bitops) { + EXPECT_EQ(0u, ucs_ffs64(0xfffff)); + EXPECT_EQ(16u, ucs_ffs64(0xf0000)); + EXPECT_EQ(1u, ucs_ffs64(0x4002)); + EXPECT_EQ(41u, ucs_ffs64(1ull<<41)); + + EXPECT_EQ(0u, ucs_ilog2(1)); + EXPECT_EQ(2u, ucs_ilog2(4)); + EXPECT_EQ(2u, ucs_ilog2(5)); + EXPECT_EQ(2u, ucs_ilog2(7)); + EXPECT_EQ(14u, ucs_ilog2(17000)); + EXPECT_EQ(40u, ucs_ilog2(1ull<<40)); + + EXPECT_EQ(0, ucs_popcount(0)); + EXPECT_EQ(2, ucs_popcount(5)); + EXPECT_EQ(16, ucs_popcount(0xffff)); + EXPECT_EQ(48, ucs_popcount(0xffffffffffffUL)); + + EXPECT_EQ(0, ucs_count_trailing_zero_bits(1)); + EXPECT_EQ(28, ucs_count_trailing_zero_bits(0x10000000)); + EXPECT_EQ(32, ucs_count_trailing_zero_bits(0x100000000UL)); +} + +#define TEST_ATOMIC_ADD(_bitsize) \ + { \ + typedef uint##_bitsize##_t inttype; \ + const inttype var_value = ucs::random_upper(); \ + const inttype add_value = ucs::random_upper(); \ + inttype var = var_value; \ + ucs_atomic_add##_bitsize(&var, add_value); \ + EXPECT_EQ(static_cast(var_value + add_value), var); \ + } + +#define TEST_ATOMIC_FADD(_bitsize) \ + { \ + typedef uint##_bitsize##_t inttype; \ + const inttype var_value = ucs::random_upper(); \ + const inttype add_value = ucs::random_upper(); \ + inttype var = var_value; \ + inttype oldvar = ucs_atomic_fadd##_bitsize(&var, add_value); \ + EXPECT_EQ(static_cast(var_value + add_value), var); \ + EXPECT_EQ(var_value, oldvar); \ + } + +#define TEST_ATOMIC_SWAP(_bitsize) \ + { \ + typedef uint##_bitsize##_t inttype; \ + const inttype var_value = ucs::random_upper(); \ + const inttype swap_value = ucs::random_upper(); \ + inttype var = var_value; \ + inttype oldvar = ucs_atomic_swap##_bitsize(&var, swap_value); \ + EXPECT_EQ(var_value, oldvar); \ + EXPECT_EQ(swap_value, var); \ + } + +#define TEST_ATOMIC_CSWAP(_bitsize, is_eq) \ + { \ + typedef uint##_bitsize##_t inttype; \ + const inttype var_value = ucs::random_upper(); \ + const inttype cmp_value = (is_eq) ? var_value : (var_value + 10); \ + const inttype swap_value = ucs::random_upper(); \ + inttype var = var_value; \ + inttype oldvar = ucs_atomic_cswap##_bitsize(&var, cmp_value, swap_value); \ + EXPECT_EQ(var_value, oldvar); \ + if (is_eq) { \ + EXPECT_EQ(swap_value, var); \ + } else { \ + EXPECT_EQ(var_value, var); \ + } \ + } + +UCS_TEST_F(test_math, atomic_add) { + for (unsigned count = 0; count < ATOMIC_COUNT; ++count) { + TEST_ATOMIC_ADD(8); + TEST_ATOMIC_ADD(16); + TEST_ATOMIC_ADD(32); + TEST_ATOMIC_ADD(64); + } +} + +UCS_TEST_F(test_math, atomic_fadd) { + for (unsigned count = 0; count < ATOMIC_COUNT; ++count) { + TEST_ATOMIC_FADD(8); + TEST_ATOMIC_FADD(16); + TEST_ATOMIC_FADD(32); + TEST_ATOMIC_FADD(64); + } +} + +UCS_TEST_F(test_math, atomic_swap) { + for (unsigned count = 0; count < ATOMIC_COUNT; ++count) { + TEST_ATOMIC_SWAP(8); + TEST_ATOMIC_SWAP(16); + TEST_ATOMIC_SWAP(32); + TEST_ATOMIC_SWAP(64); + } +} + +UCS_TEST_F(test_math, atomic_cswap_success) { + for (unsigned count = 0; count < ATOMIC_COUNT; ++count) { + TEST_ATOMIC_CSWAP(8, 0); + TEST_ATOMIC_CSWAP(16, 0); + TEST_ATOMIC_CSWAP(32, 0); + TEST_ATOMIC_CSWAP(64, 0); + } +} + +UCS_TEST_F(test_math, atomic_cswap_fail) { + for (unsigned count = 0; count < ATOMIC_COUNT; ++count) { + TEST_ATOMIC_CSWAP(8, 1); + TEST_ATOMIC_CSWAP(16, 1); + TEST_ATOMIC_CSWAP(32, 1); + TEST_ATOMIC_CSWAP(64, 1); + } +} + +UCS_TEST_F(test_math, for_each_bit) { + uint64_t gen_mask = 0; + uint64_t mask; + int idx; + + mask = ucs_generate_uuid(0); + + ucs_for_each_bit (idx, mask) { + EXPECT_EQ(gen_mask & UCS_BIT(idx), 0ull); + gen_mask |= UCS_BIT(idx); + } + + EXPECT_EQ(mask, gen_mask); + + ucs_for_each_bit(idx, 0) { + EXPECT_EQ(1, 0); /* should not be here */ + } + + gen_mask = 0; + ucs_for_each_bit(idx, UCS_BIT(0)) { + EXPECT_EQ(gen_mask & UCS_BIT(idx), 0ull); + gen_mask |= UCS_BIT(idx); + } + EXPECT_EQ(1ull, gen_mask); + + gen_mask = 0; + ucs_for_each_bit(idx, UCS_BIT(63)) { + EXPECT_EQ(gen_mask & UCS_BIT(idx), 0ull); + gen_mask |= UCS_BIT(idx); + } + EXPECT_EQ(UCS_BIT(63), gen_mask); +} + +UCS_TEST_F(test_math, linear_func) { + ucs_linear_func_t func[2]; + double x, y[2]; + + x = ucs::rand(); + for (unsigned i = 0; i < 2; ++i) { + func[i].m = ucs::rand() / (double)RAND_MAX; + func[i].c = ucs::rand() / (double)RAND_MAX; + y[i] = ucs_linear_func_apply(&func[i], x); + } + + ucs_linear_func_t sum_func; + ucs_linear_func_add(&sum_func, &func[0], &func[1]); + double y_sum = ucs_linear_func_apply(&sum_func, x); + + EXPECT_NEAR(y[0] + y[1], y_sum, fabs(y_sum * 1e-6)); +} diff --git a/test/gtest/ucs/test_memtrack.cc b/test/gtest/ucs/test_memtrack.cc new file mode 100644 index 0000000..788aa40 --- /dev/null +++ b/test/gtest/ucs/test_memtrack.cc @@ -0,0 +1,209 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include + +extern "C" { +#include +#include +} + +#include +#include +#include +#include +#include +#include + + +#if ENABLE_MEMTRACK + +class test_memtrack : public ucs::test { +protected: + static const size_t ALLOC_SIZE = 10000; + static const char ALLOC_NAME[]; + + void init() { + ucs_memtrack_cleanup(); + push_config(); + modify_config("MEMTRACK_DEST", "/dev/null"); + ucs_memtrack_init(); + } + + void cleanup() { + ucs_memtrack_cleanup(); + pop_config(); + ucs_memtrack_init(); + } + + void test_total(size_t peak_count, size_t peak_size) { + ucs_memtrack_entry_t total; + + ucs_memtrack_total(&total); + EXPECT_EQ(0lu, total.count); + EXPECT_EQ(peak_count, total.peak_count); + EXPECT_EQ(peak_size, total.peak_size); + } +}; + +const char test_memtrack::ALLOC_NAME[] = "memtrack_test"; + + +UCS_TEST_F(test_memtrack, sanity) { + ucs_memtrack_entry_t entry; + void *a, *b; + int i; + + ucs_memtrack_total(&entry); + i = entry.count; + + b = ucs_malloc(1, ALLOC_NAME); + ucs_free(b); + + b = ucs_malloc(1, ALLOC_NAME); + a = ucs_malloc(3, ALLOC_NAME); + ucs_free(b); + ucs_memtrack_total(&entry); + if (ucs_memtrack_is_enabled()) { + EXPECT_EQ((size_t)(i + 1), entry.count); + } + + b = ucs_malloc(4, ALLOC_NAME); + ucs_free(b); + ucs_memtrack_total( &entry); + if (ucs_memtrack_is_enabled()) { + EXPECT_EQ((size_t)1, entry.count); + } + ucs_free(a); + + for (i = 0; i < 101; i++) { + a = ucs_malloc(i, ALLOC_NAME); + ucs_free(a); + } +} + +UCS_TEST_F(test_memtrack, parse_dump) { + char *buf; + size_t size; + + /* Dump */ + { + FILE* tempf = open_memstream(&buf, &size); + ucs_memtrack_dump(tempf); + fclose(tempf); + } + + /* Parse */ + ASSERT_NE((void *)NULL, strstr(buf, "TOTAL")); + free(buf); +} + +UCS_TEST_F(test_memtrack, malloc_realloc) { + void* ptr; + + ptr = ucs_malloc(ALLOC_SIZE, ALLOC_NAME); + ASSERT_NE((void *)NULL, ptr); + + ptr = ucs_realloc(ptr, 2 * ALLOC_SIZE, ALLOC_NAME); + ASSERT_NE((void *)NULL, ptr); + ucs_free(ptr); + + test_total(1, 2 * ALLOC_SIZE); +} + +UCS_TEST_F(test_memtrack, realloc_null) { + void* ptr; + + ptr = ucs_realloc(NULL, ALLOC_SIZE, ALLOC_NAME); + ASSERT_NE((void *)NULL, ptr); + ucs_free(ptr); + + test_total(1, ALLOC_SIZE); +} + +UCS_TEST_F(test_memtrack, calloc) { + void* ptr; + + ptr = ucs_calloc(1, ALLOC_SIZE, ALLOC_NAME); + ASSERT_NE((void *)NULL, ptr); + ucs_free(ptr); + + ptr = ucs_calloc(ALLOC_SIZE, 1, ALLOC_NAME); + ASSERT_NE((void *)NULL, ptr); + ucs_free(ptr); + + test_total(1, ALLOC_SIZE); +} + +UCS_TEST_F(test_memtrack, sysv) { + ucs_status_t status; + void* ptr = NULL; + int shmid; + size_t size; + + size = ALLOC_SIZE; + + status = ucs_sysv_alloc(&size, std::numeric_limits::max(), &ptr, 0, + ALLOC_NAME, &shmid); + ASSERT_UCS_OK(status); + ASSERT_NE((void *)NULL, ptr); + + memset(ptr, 0xAA, size); + ucs_sysv_free(ptr); + + test_total(1, size); +} + +UCS_TEST_F(test_memtrack, memalign_realloc) { + void* ptr; + int ret; + + ret = ucs_posix_memalign(&ptr, 8, ALLOC_SIZE, ALLOC_NAME); + ASSERT_EQ(0, ret); + ASSERT_NE((void *)NULL, ptr); + ucs_free(ptr); + /* Silence coverity warning. */ + ptr = NULL; + + ret = ucs_posix_memalign(&ptr, 1024, ALLOC_SIZE, ALLOC_NAME); + ASSERT_EQ(0, ret); + ASSERT_NE((void *)NULL, ptr); + + ptr = ucs_realloc(ptr, 2*ALLOC_SIZE, ALLOC_NAME); + ASSERT_NE((void *)NULL, ptr); + + ucs_free(ptr); + + test_total(1, 2 * ALLOC_SIZE); +} + +UCS_TEST_F(test_memtrack, mmap) { + void* ptr; + + ptr = ucs_mmap(NULL, ALLOC_SIZE, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0, ALLOC_NAME); + ASSERT_NE((void *)NULL, ptr); + ucs_munmap(ptr, ALLOC_SIZE); + + test_total(1, ALLOC_SIZE); +} + +UCS_TEST_F(test_memtrack, custom) { + void *ptr, *initial_ptr; + + initial_ptr = ptr = malloc(ALLOC_SIZE); + ucs_memtrack_allocated(ptr, ALLOC_SIZE, ALLOC_NAME); + + memset(ptr, 0, ALLOC_SIZE); + + ucs_memtrack_releasing(ptr); + ASSERT_EQ(initial_ptr, ptr); + free(ptr); + + test_total(1, ALLOC_SIZE); +} + +#endif diff --git a/test/gtest/ucs/test_memtype_cache.cc b/test/gtest/ucs/test_memtype_cache.cc new file mode 100644 index 0000000..0bd5432 --- /dev/null +++ b/test/gtest/ucs/test_memtype_cache.cc @@ -0,0 +1,506 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +#include + +#include +#include +#include + +extern "C" { +#include +} + +class test_memtype_cache : public ucs::test_with_param { +protected: + test_memtype_cache() : m_memtype_cache(NULL) { + } + + virtual void init() { + ucs::test_with_param::init(); + ucs_status_t status = ucs_memtype_cache_create(&m_memtype_cache); + ASSERT_UCS_OK(status); + } + + virtual void cleanup() { + ucs_memtype_cache_destroy(m_memtype_cache); + ucs::test_with_param::cleanup(); + } + + void check_lookup(const void *ptr, size_t size, + bool expect_found, + ucs_memory_type_t expected_type = UCS_MEMORY_TYPE_LAST) const { + if (!size) { + return; + } + + ucs_memory_type_t mem_type; + ucs_status_t status = ucs_memtype_cache_lookup(m_memtype_cache, ptr, + size, &mem_type); + + if (!expect_found || (expected_type == UCS_MEMORY_TYPE_HOST)) { + /* memory type should be not found or unknown */ + EXPECT_TRUE((status == UCS_ERR_NO_ELEM) || + ((status == UCS_OK) && (mem_type == UCS_MEMORY_TYPE_LAST))) + << "ptr=" << ptr << " size=" << size << ": " + << ucs_status_string(status) + << " memtype=" << mem_buffer::mem_type_name(mem_type); + } else { + EXPECT_UCS_OK(status); + EXPECT_EQ(expected_type, mem_type) << "ptr=" << ptr << " size=" << size; + } + } + + void test_lookup_found(const void *ptr, size_t size, + ucs_memory_type_t expected_type) const { + check_lookup(ptr, size, true, expected_type); + } + + void test_lookup_notfound(const void *ptr, size_t size) const { + check_lookup(ptr, size, false); + } + + void test_ptr_found(const void *ptr, size_t size, + ucs_memory_type_t expected_type) const { + test_lookup_found(ptr, size, expected_type); + test_lookup_found(ptr, size / 2, expected_type); + test_lookup_found(ptr, 1, expected_type); + test_lookup_found(UCS_PTR_BYTE_OFFSET(ptr, size - 1), + 1, expected_type); + test_lookup_found(ptr, 0, expected_type); + } + + void test_region_found(const mem_buffer &b) const { + test_ptr_found(b.ptr(), b.size(), b.mem_type()); + } + + void test_region_not_found(const mem_buffer &b) const { + test_ptr_not_found(b.ptr(), b.size()); + } + + void test_ptr_not_found(const void *ptr, size_t size) const { + /* memtype cache is aligned by Page Table defined constant, + * so need to step by this value to make something not found */ + test_lookup_notfound(ptr, size + UCS_PGT_ADDR_ALIGN); + test_lookup_notfound(UCS_PTR_BYTE_OFFSET(ptr, size), 1 + UCS_PGT_ADDR_ALIGN); + } + + void test_ptr_released(const void *ptr, size_t size) const { + test_lookup_notfound(ptr, size); + test_lookup_notfound(ptr, 1); + } + + mem_buffer* allocate_mem_buffer(size_t size, ucs_memory_type_t mem_type, + std::vector *allocated_buffers = NULL, + bool test_not_found = true) const { + mem_buffer *buf = new mem_buffer(size, mem_type); + + if (allocated_buffers != NULL) { + allocated_buffers->push_back(buf); + } + + test_region_found(*buf); + + if (test_not_found) { + test_region_not_found(*buf); + } + + return buf; + } + + void release_mem_buffer(mem_buffer *buf, + std::vector > *released_ptrs, + std::vector *allocated_buffers = NULL) const { + if (allocated_buffers != NULL) { + allocated_buffers->pop_back(); + } + + released_ptrs->push_back(std::make_pair(buf->ptr(), buf->size())); + + delete buf; + } + + void test_ptrs_released(std::vector > *released_ptrs) const { + while (!released_ptrs->empty()) { + void *ptr = released_ptrs->back().first; + size_t size = released_ptrs->back().second; + + test_ptr_released(ptr, size); + test_ptr_not_found(ptr, size); + + released_ptrs->pop_back(); + } + } + + void release_buffers(std::vector *allocated_buffers) const { + std::vector > released_ptrs; + + while (!allocated_buffers->empty()) { + release_mem_buffer(allocated_buffers->back(), + &released_ptrs, allocated_buffers); + } + + test_ptrs_released(&released_ptrs); + } + + size_t get_test_step(size_t portions = 64) const { + return (RUNNING_ON_VALGRIND ? + (ucs_get_page_size() / 2 - 1) : + (ucs_get_page_size() / portions)); + } + + void test_memtype_cache_alloc_diff_mem_types(bool keep_buffers, + bool same_size_buffers) { + const size_t step = get_test_step(); + const size_t inner_step = (same_size_buffers ? + ucs_get_page_size() : step); + std::vector > released_ptrs; + std::vector allocated_buffers; + + const std::vector supported_mem_types = + mem_buffer::supported_mem_types(); + + /* The tests try to allocate two buffers with different memory types */ + for (std::vector::const_iterator iter = + supported_mem_types.begin(); + iter != supported_mem_types.end(); ++iter) { + for (size_t i = 1; i <= ucs_get_page_size(); i += step) { + mem_buffer *buf1 = allocate_mem_buffer(i, GetParam(), + &allocated_buffers, 0); + + for (size_t j = 1; j <= ucs_get_page_size(); j += inner_step) { + mem_buffer *buf2 = allocate_mem_buffer(j, *iter, + &allocated_buffers, + 0); + if (!keep_buffers) { + release_mem_buffer(buf2, &released_ptrs); + } + } + + if (!keep_buffers) { + release_mem_buffer(buf1, &released_ptrs); + } + } + + if (keep_buffers) { + /* release allocated buffers */ + release_buffers(&allocated_buffers); + } else { + /* test released buffers */ + test_ptrs_released(&released_ptrs); + } + } + } + + struct region_info { + void *start; + void *end; + ucs_memory_type_t mem_type; + + region_info(size_t start, size_t end, + ucs_memory_type_t mem_type) : + start(reinterpret_cast(start)), + end(reinterpret_cast(end)), + mem_type(mem_type) {} + }; + + void generate_test_remove_subintervals( + const std::vector &insert_regions, + size_t interval_start_offset, size_t interval_end_offset, + std::vector &remove_regions) { + // add regions that will be removed as intervals + for (std::vector::const_iterator iter = + insert_regions.begin(); iter != insert_regions.end(); ++iter) { + remove_regions.push_back(region_info(reinterpret_cast(iter->start) + + interval_start_offset, + reinterpret_cast(iter->end) - + interval_end_offset, + UCS_MEMORY_TYPE_LAST)); + } + + // add regions that will be removed as remaining intervals + for (std::vector::const_iterator iter = + insert_regions.begin(); iter != insert_regions.end(); ++iter) { + if (interval_start_offset) { + remove_regions.push_back(region_info(reinterpret_cast(iter->start), + reinterpret_cast(iter->start) + + interval_start_offset, + UCS_MEMORY_TYPE_LAST)); + } + + if (interval_end_offset) { + remove_regions.push_back(region_info(reinterpret_cast(iter->end) - + interval_end_offset, + reinterpret_cast(iter->end), + UCS_MEMORY_TYPE_LAST)); + } + } + } + + void test_region_insert_and_remove_subintervals(const std::vector ®ions, + size_t interval_start_offset, + size_t interval_end_offset, + std::vector &remove_regions) { + generate_test_remove_subintervals(regions, interval_start_offset, + interval_end_offset, remove_regions); + + // insert new regions + for (std::vector::const_iterator iter = + regions.begin(); iter != regions.end(); ++iter) { + size_t size = UCS_PTR_BYTE_DIFF(iter->start, iter->end); + memtype_cache_update(iter->start, size, iter->mem_type); + test_ptr_found(iter->start, size, iter->mem_type); + } + + // remove subintervals + for (std::vector::const_iterator iter = + remove_regions.begin(); iter != remove_regions.end(); ++iter) { + size_t size = UCS_PTR_BYTE_DIFF(iter->start, iter->end); + memtype_cache_remove(iter->start, size); + test_ptr_released(iter->start, size); + } + + // now all buffers released, check that can't find them + for (std::vector::const_iterator iter = + regions.begin(); iter != regions.end(); ++iter) { + size_t size = UCS_PTR_BYTE_DIFF(iter->start, iter->end); + test_ptr_released(iter->start, size); + test_ptr_not_found(iter->start, size); + } + } + + void memtype_cache_update(const void *ptr, size_t size, + ucs_memory_type_t mem_type) { + if (mem_type == UCS_MEMORY_TYPE_HOST) { + return; + } + + ucs_memtype_cache_update(m_memtype_cache, ptr, size, mem_type); + } + + void memtype_cache_update(const mem_buffer &b) { + memtype_cache_update(b.ptr(), b.size(), b.mem_type()); + } + + void memtype_cache_remove(const void *ptr, size_t size) { + ucs_memtype_cache_remove(m_memtype_cache, ptr, size); + } + +private: + ucs_memtype_cache_t *m_memtype_cache; +}; + +UCS_TEST_P(test_memtype_cache, basic) { + const size_t size = 64; + void *ptr; + + { + mem_buffer b(size, GetParam()); + + test_region_found(b); + test_region_not_found(b); + + ptr = b.ptr(); + } + + /* buffer is released */ + test_ptr_released(ptr, size); + test_ptr_not_found(ptr, size); +} + +UCS_TEST_P(test_memtype_cache, update_non_contig_regions_and_remove_subintervals) { + std::vector insert_regions; + std::vector remove_regions; + size_t start, end; + + const size_t region_size = UCS_BIT(28); + const size_t interval_start_offset = UCS_BIT(27); + + // insert [0x7f6ef0000000 .. 0x7f6f00000000] + start = 0x7f6ef0000000; + end = start + region_size; + test_memtype_cache::region_info region_info1(start, end, GetParam()); + insert_regions.push_back(region_info1); + + // insert [0x7f6f2c021000 .. 0x7f6f3c021000] + start = 0x7f6f2c021000; + end = start + region_size; + test_memtype_cache::region_info region_info2(start, end, + UCS_MEMORY_TYPE_LAST); + insert_regions.push_back(region_info2); + + // insert [0x7f6f42000000 .. 0x7f6f52000000] + start = 0x7f6f42000000; + end = start + region_size; + test_memtype_cache::region_info region_info3(start, end, + UCS_MEMORY_TYPE_LAST); + insert_regions.push_back(region_info3); + + test_region_insert_and_remove_subintervals(insert_regions, + interval_start_offset, + 0, remove_regions); +} + +UCS_TEST_P(test_memtype_cache, update_adjacent_regions_and_remove_subintervals) { + std::vector insert_regions; + std::vector remove_regions; + size_t start, end; + + const size_t region_size = UCS_BIT(28); + const size_t interval_start_offset = UCS_BIT(27); + + // insert [0x7f6ef0000000 .. 0x7f6f00000000] + start = 0x7f6ef0000000; + end = start + region_size; + test_memtype_cache::region_info region_info1(0x7f6ef0000000, 0x7f6f00000000, + GetParam()); + insert_regions.push_back(region_info1); + + // insert [0x7f6f00000000 .. 0x7f6f10000000] + start = end; + end = start + region_size; + test_memtype_cache::region_info region_info2(reinterpret_cast + (region_info1.end), + 0x7f6f40000000, GetParam()); + insert_regions.push_back(region_info2); + + // insert [0x7f6f10000000 .. 0x7f6f20000000] + start = end; + end = start + region_size; + test_memtype_cache::region_info region_info3(reinterpret_cast + (region_info2.end), + 0x7f6f48000000, GetParam()); + insert_regions.push_back(region_info3); + + test_region_insert_and_remove_subintervals(insert_regions, + interval_start_offset, + 0, remove_regions); +} + +UCS_TEST_P(test_memtype_cache, shared_page_regions) { + const std::vector supported_mem_types = + mem_buffer::supported_mem_types(); + const size_t size = 1000000; + + for (std::vector::const_iterator iter = + supported_mem_types.begin(); + iter != supported_mem_types.end(); ++iter) { + + std::vector > released_ptrs; + + /* Create two buffers that possibly will share one page + * + * < shared page > + * || || + * \/ || + * +----------------------+ || + * buf1: | | | | | | \/ + * +----------------------+----------------------+ + * buf2: | | | | | | + * +----------------------+ + */ + mem_buffer *buf1 = allocate_mem_buffer(size, GetParam()); + mem_buffer *buf2 = allocate_mem_buffer(size, *iter); + + test_region_found(*buf1); + test_region_found(*buf2); + + release_mem_buffer(buf2, &released_ptrs); + + /* check that `buf1` was not released accidentally + * after releasing `buf2` */ + test_region_found(*buf1); + + release_mem_buffer(buf1, &released_ptrs); + + /* buffer `buf1` and `buf2` are released */ + test_ptrs_released(&released_ptrs); + } +} + +UCS_TEST_P(test_memtype_cache, diff_mem_types_same_bufs) { + test_memtype_cache_alloc_diff_mem_types(false, true); +} + +UCS_TEST_P(test_memtype_cache, diff_mem_types_same_bufs_keep_mem) { + test_memtype_cache_alloc_diff_mem_types(true, true); +} + +UCS_TEST_P(test_memtype_cache, diff_mem_types_diff_bufs) { + test_memtype_cache_alloc_diff_mem_types(false, false); +} + +UCS_TEST_P(test_memtype_cache, diff_mem_types_diff_bufs_keep_mem) { + test_memtype_cache_alloc_diff_mem_types(true, false); +} + +INSTANTIATE_TEST_CASE_P(mem_type, test_memtype_cache, + ::testing::ValuesIn(mem_buffer::supported_mem_types())); + +class test_memtype_cache_deferred_create : public test_memtype_cache { +protected: + virtual void init() { + /* do nothing */ + } + + void test_unknown_region_found(const mem_buffer &b) const { + test_ptr_found(b.ptr(), b.size(), + ((b.mem_type() == UCS_MEMORY_TYPE_HOST) ? + UCS_MEMORY_TYPE_HOST : + UCS_MEMORY_TYPE_LAST)); + } + + void test_alloc_before_init(size_t buf_size, bool test_adjacent, + size_t overlap_size) { + void *ptr; + + { + mem_buffer b(buf_size, GetParam()); + + test_memtype_cache::init(); + + test_unknown_region_found(b); + test_region_not_found(b); + + if (test_adjacent) { + /* add two adjacent regions: */ + memtype_cache_update(b.ptr(), b.size() / 2, b.mem_type()); + test_ptr_found(b.ptr(), b.size() / 2, b.mem_type()); + memtype_cache_update(UCS_PTR_BYTE_OFFSET(b.ptr(), + b.size() / 2 - overlap_size), + b.size() / 2 + 1, b.mem_type()); + test_ptr_found(b.ptr(), b.size() / 2, b.mem_type()); + } else { + memtype_cache_update(b); + } + + /* check that able to find the entire region */ + test_region_found(b); + + ptr = b.ptr(); + } + + /* buffer is released */ + test_ptr_released(ptr, buf_size); + test_ptr_not_found(ptr, buf_size); + } +}; + +UCS_TEST_P(test_memtype_cache_deferred_create, allocate_and_update) { + test_alloc_before_init(1000000, false, 0); +} + +UCS_TEST_P(test_memtype_cache_deferred_create, lookup_adjacent_regions) { + test_alloc_before_init(1000000, true, 0); +} + +UCS_TEST_P(test_memtype_cache_deferred_create, lookup_overlapped_regions) { + test_alloc_before_init(1000000, true, 1); +} + +INSTANTIATE_TEST_CASE_P(mem_type, test_memtype_cache_deferred_create, + ::testing::ValuesIn(mem_buffer::supported_mem_types())); diff --git a/test/gtest/ucs/test_module/Makefile.am b/test/gtest/ucs/test_module/Makefile.am new file mode 100644 index 0000000..ccdc000 --- /dev/null +++ b/test/gtest/ucs/test_module/Makefile.am @@ -0,0 +1,16 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +module_LTLIBRARIES = libtest_module.la +libtest_module_la_CPPFLAGS = $(BASE_CPPFLAGS) +libtest_module_la_CFLAGS = $(BASE_CFLAGS) +libtest_module_la_LDFLAGS = -version-info $(SOVERSION) +libtest_module_la_SOURCES = test_module.c + +include $(top_srcdir)/config/module.am + +# disable install +install: diff --git a/test/gtest/ucs/test_module/Makefile.in b/test/gtest/ucs/test_module/Makefile.in new file mode 100644 index 0000000..973e054 --- /dev/null +++ b/test/gtest/ucs/test_module/Makefile.in @@ -0,0 +1,843 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = test/gtest/ucs/test_module +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(moduledir)" +LTLIBRARIES = $(module_LTLIBRARIES) +libtest_module_la_LIBADD = +am_libtest_module_la_OBJECTS = libtest_module_la-test_module.lo +libtest_module_la_OBJECTS = $(am_libtest_module_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libtest_module_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libtest_module_la_CFLAGS) $(CFLAGS) \ + $(libtest_module_la_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/libtest_module_la-test_module.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libtest_module_la_SOURCES) +DIST_SOURCES = $(libtest_module_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/module.am \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +module_LTLIBRARIES = libtest_module.la +libtest_module_la_CPPFLAGS = $(BASE_CPPFLAGS) +libtest_module_la_CFLAGS = $(BASE_CFLAGS) +libtest_module_la_LDFLAGS = -version-info $(SOVERSION) +libtest_module_la_SOURCES = test_module.c + +# Automake silent rules +AM_V_LN = $(AM_V_LN_@AM_V@) +AM_V_LN_ = $(AM_V_LN_@AM_DEFAULT_V@) +AM_V_LN_0 = echo " LN " +AM_V_LN_1 = true +local_la_modules = $(patsubst %, $(localmoduledir)/%, $(module_LTLIBRARIES)) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/module.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/gtest/ucs/test_module/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/gtest/ucs/test_module/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; +$(top_srcdir)/config/module.am $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +install-moduleLTLIBRARIES: $(module_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(moduledir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(moduledir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(moduledir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(moduledir)"; \ + } + +uninstall-moduleLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(module_LTLIBRARIES)'; test -n "$(moduledir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(moduledir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(moduledir)/$$f"; \ + done + +clean-moduleLTLIBRARIES: + -test -z "$(module_LTLIBRARIES)" || rm -f $(module_LTLIBRARIES) + @list='$(module_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libtest_module.la: $(libtest_module_la_OBJECTS) $(libtest_module_la_DEPENDENCIES) $(EXTRA_libtest_module_la_DEPENDENCIES) + $(AM_V_CCLD)$(libtest_module_la_LINK) -rpath $(moduledir) $(libtest_module_la_OBJECTS) $(libtest_module_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libtest_module_la-test_module.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +libtest_module_la-test_module.lo: test_module.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtest_module_la_CPPFLAGS) $(CPPFLAGS) $(libtest_module_la_CFLAGS) $(CFLAGS) -MT libtest_module_la-test_module.lo -MD -MP -MF $(DEPDIR)/libtest_module_la-test_module.Tpo -c -o libtest_module_la-test_module.lo `test -f 'test_module.c' || echo '$(srcdir)/'`test_module.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libtest_module_la-test_module.Tpo $(DEPDIR)/libtest_module_la-test_module.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='test_module.c' object='libtest_module_la-test_module.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libtest_module_la_CPPFLAGS) $(CPPFLAGS) $(libtest_module_la_CFLAGS) $(CFLAGS) -c -o libtest_module_la-test_module.lo `test -f 'test_module.c' || echo '$(srcdir)/'`test_module.c + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) all-local +installdirs: + for dir in "$(DESTDIR)$(moduledir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-moduleLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/libtest_module_la-test_module.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-moduleLTLIBRARIES + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/libtest_module_la-test_module.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-moduleLTLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am all-local am--depfiles check \ + check-am clean clean-generic clean-libtool \ + clean-moduleLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-moduleLTLIBRARIES install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am \ + uninstall-moduleLTLIBRARIES + +.PRECIOUS: Makefile + + +all-local: $(local_la_modules) + +# Create symbolic links for the built modules under $(localmoduledir) +# Link also *.la files to create proper makefile dependencies +$(local_la_modules): $(module_LTLIBRARIES) + $(AM_V_at)$(MKDIR_P) $(localmoduledir) + $(AM_V_at)for lib in *.la $(objdir)/*$(shrext)*; do \ + (cd $(localmoduledir) && $(LN_RS) -nf $(shell pwd)/$$lib); \ + done + @for lib in *.la $(objdir)/*$(shrext)*; do \ + $(AM_V_LN) $$lib; \ + done + +# disable install +install: + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/test/gtest/ucs/test_module/configure.m4 b/test/gtest/ucs/test_module/configure.m4 new file mode 100644 index 0000000..2e594d2 --- /dev/null +++ b/test/gtest/ucs/test_module/configure.m4 @@ -0,0 +1,8 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2019 ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +test_modules="${test_modules}:module" +AC_CONFIG_FILES([test/gtest/ucs/test_module/Makefile]) diff --git a/test/gtest/ucs/test_module/test_module.c b/test/gtest/ucs/test_module/test_module.c new file mode 100644 index 0000000..e914586 --- /dev/null +++ b/test/gtest/ucs/test_module/test_module.c @@ -0,0 +1,13 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include + +extern int test_module_loaded; + +UCS_STATIC_INIT { + ++test_module_loaded; +} diff --git a/test/gtest/ucs/test_mpmc.cc b/test/gtest/ucs/test_mpmc.cc new file mode 100644 index 0000000..548075c --- /dev/null +++ b/test/gtest/ucs/test_mpmc.cc @@ -0,0 +1,131 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include + +extern "C" { +#include +} +#include + + +class test_mpmc : public ucs::test { +protected: + static const unsigned MPMC_SIZE = 100; + static const uint32_t SENTINEL = 0x7fffffffu; + static const unsigned NUM_THREADS = 4; + + + static long elem_count() { + return ucs_max((long)(100000.0 / (pow(ucs::test_time_multiplier(), NUM_THREADS))), + 500l); + } + + static void * producer_thread_func(void *arg) { + ucs_mpmc_queue_t *mpmc = reinterpret_cast(arg); + long count = elem_count(); + ucs_status_t status; + + for (uint32_t i = 0; i < count; ++i) { + do { + status = ucs_mpmc_queue_push(mpmc, i); + } while (status == UCS_ERR_EXCEEDS_LIMIT); + ASSERT_UCS_OK(status); + } + do { + status = ucs_mpmc_queue_push(mpmc, SENTINEL); + } while (status == UCS_ERR_EXCEEDS_LIMIT); + return NULL; + } + + static void * consumer_thread_func(void *arg) { + ucs_mpmc_queue_t *mpmc = reinterpret_cast(arg); + ucs_status_t status; + uint32_t value; + size_t count; + + count = 0; + do { + do { + status = ucs_mpmc_queue_pull(mpmc, &value); + } while (status == UCS_ERR_NO_PROGRESS); + ASSERT_UCS_OK(status); + ++count; + } while (value != SENTINEL); + + return (void*)((uintptr_t)count - 1); /* return count except sentinel */ + } + +}; + +UCS_TEST_F(test_mpmc, basic) { + ucs_mpmc_queue_t mpmc; + ucs_status_t status; + + status = ucs_mpmc_queue_init(&mpmc, MPMC_SIZE); + ASSERT_UCS_OK(status); + + EXPECT_TRUE(ucs_mpmc_queue_is_empty(&mpmc)); + + status = ucs_mpmc_queue_push(&mpmc, 124); + ASSERT_UCS_OK(status); + + status = ucs_mpmc_queue_push(&mpmc, 125); + ASSERT_UCS_OK(status); + + status = ucs_mpmc_queue_push(&mpmc, 126); + ASSERT_UCS_OK(status); + + EXPECT_FALSE(ucs_mpmc_queue_is_empty(&mpmc)); + + uint32_t value; + + status = ucs_mpmc_queue_pull(&mpmc, &value); + ASSERT_UCS_OK(status); + EXPECT_EQ(124u, value); + + status = ucs_mpmc_queue_pull(&mpmc, &value); + ASSERT_UCS_OK(status); + EXPECT_EQ(125u, value); + + status = ucs_mpmc_queue_pull(&mpmc, &value); + ASSERT_UCS_OK(status); + EXPECT_EQ(126u, value); + + EXPECT_TRUE(ucs_mpmc_queue_is_empty(&mpmc)); + + ucs_mpmc_queue_cleanup(&mpmc); +} + + +UCS_TEST_F(test_mpmc, multi_threaded) { + pthread_t producers[NUM_THREADS]; + pthread_t consumers[NUM_THREADS]; + + ucs_mpmc_queue_t mpmc; + ucs_status_t status; + size_t total; + void *retval; + + status = ucs_mpmc_queue_init(&mpmc, MPMC_SIZE); + ASSERT_UCS_OK(status); + + for (unsigned i = 0; i < NUM_THREADS; ++i) { + pthread_create(&producers[i], NULL, producer_thread_func, &mpmc); + pthread_create(&consumers[i], NULL, consumer_thread_func, &mpmc); + } + + total = 0; + for (unsigned i = 0; i < NUM_THREADS; ++i) { + pthread_join(producers[i], &retval); + pthread_join(consumers[i], &retval); + total += (uintptr_t)retval; + } + + EXPECT_EQ(NUM_THREADS * elem_count(), (long)total); + EXPECT_TRUE(ucs_mpmc_queue_is_empty(&mpmc)); + ucs_mpmc_queue_cleanup(&mpmc); +} diff --git a/test/gtest/ucs/test_mpool.cc b/test/gtest/ucs/test_mpool.cc new file mode 100644 index 0000000..d27e841 --- /dev/null +++ b/test/gtest/ucs/test_mpool.cc @@ -0,0 +1,203 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +} + +#include +#include +#include + +class test_mpool : public ucs::test { +protected: + static ucs_status_t test_alloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) { + *chunk_p = malloc(*size_p); + return (*chunk_p == NULL) ? UCS_ERR_NO_MEMORY : UCS_OK; + } + + static void test_free(ucs_mpool_t *mp, void *chunk) { + free(chunk); + } + + static ucs_log_func_rc_t + mpool_log_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + // Ignore errors that invalid input parameters as it is expected + if (level == UCS_LOG_LEVEL_ERROR) { + std::string err_str = format_message(message, ap); + std::string exp_str = "Invalid memory pool parameter(s)"; + + if (err_str == exp_str) { + UCS_TEST_MESSAGE << err_str; + return UCS_LOG_FUNC_RC_STOP; + } + } + + return UCS_LOG_FUNC_RC_CONTINUE; + } + + static const size_t header_size = 30; + static const size_t data_size = 152; + static const size_t align = 128; +}; + +UCS_TEST_F(test_mpool, no_allocs) { + ucs_mpool_t mp; + ucs_status_t status; + + ucs_mpool_ops_t ops = { + ucs_mpool_chunk_malloc, + ucs_mpool_chunk_free, + NULL, + NULL + }; + + status = ucs_mpool_init(&mp, 0, header_size + data_size, header_size, align, + 6, 18, &ops, "test"); + ASSERT_UCS_OK(status); + ucs_mpool_cleanup(&mp, 1); +} + +UCS_TEST_F(test_mpool, wrong_ops) { + ucs_mpool_t mp; + ucs_status_t status; + ucs_mpool_ops_t ops = { 0 }; + scoped_log_handler log_handler(mpool_log_handler); + + status = ucs_mpool_init(&mp, 0, header_size + data_size, header_size, align, + 6, 18, &ops, "test"); + EXPECT_TRUE(status == UCS_ERR_INVALID_PARAM); +} + +UCS_TEST_F(test_mpool, basic) { + ucs_status_t status; + ucs_mpool_t mp; + + ucs_mpool_ops_t ops = { + ucs_mpool_chunk_malloc, + ucs_mpool_chunk_free, + NULL, + NULL + }; + + push_config(); + + for (int mpool_fifo = 0; mpool_fifo <= 1; ++mpool_fifo) { +#if ENABLE_DEBUG_DATA + modify_config("MPOOL_FIFO", ucs::to_string(mpool_fifo).c_str()); +#else + if (mpool_fifo == 1) { + continue; + } +#endif + status = ucs_mpool_init(&mp, 0, header_size + data_size, header_size, align, + 6, 18, &ops, "test"); + ASSERT_UCS_OK(status); + + for (unsigned loop = 0; loop < 10; ++loop) { + std::vector objs; + for (unsigned i = 0; i < 18; ++i) { + void *ptr = ucs_mpool_get(&mp); + ASSERT_TRUE(ptr != NULL); + ASSERT_EQ(0ul, ((uintptr_t)ptr + header_size) % align) << ptr; + memset(ptr, 0xAA, header_size + data_size); + objs.push_back(ptr); + } + + ASSERT_TRUE(NULL == ucs_mpool_get(&mp)); + + for (std::vector::iterator iter = objs.begin(); iter != objs.end(); ++iter) { + ucs_mpool_put(*iter); + } + } + + ucs_mpool_cleanup(&mp, 1); + } + + pop_config(); +} + +UCS_TEST_F(test_mpool, custom_alloc) { + ucs_status_t status; + ucs_mpool_t mp; + + ucs_mpool_ops_t ops = { + test_alloc, + test_free, + NULL, + NULL + }; + + status = ucs_mpool_init(&mp, 0, header_size + data_size, header_size, align, + 5, 18, &ops, "test"); + ASSERT_UCS_OK(status); + + void *obj = ucs_mpool_get(&mp); + EXPECT_TRUE(obj != NULL); + + ucs_mpool_put(obj); + + ucs_mpool_cleanup(&mp, 1); +} + +UCS_TEST_F(test_mpool, grow) { + ucs_status_t status; + ucs_mpool_t mp; + + ucs_mpool_ops_t ops = { + ucs_mpool_chunk_malloc, + ucs_mpool_chunk_free, + NULL, + NULL + }; + + status = ucs_mpool_init(&mp, 0, header_size + data_size, header_size, align, + 1000, 2000, &ops, "test"); + ASSERT_UCS_OK(status); + + ucs_mpool_grow(&mp, 1); + + void *obj = ucs_mpool_get(&mp); + EXPECT_TRUE(obj != NULL); + + ucs_mpool_put(obj); + + ucs_mpool_cleanup(&mp, 1); +} + +UCS_TEST_F(test_mpool, infinite) { + const unsigned NUM_ELEMS = 1000000 / ucs::test_time_multiplier(); + ucs_status_t status; + ucs_mpool_t mp; + + ucs_mpool_ops_t ops = { + ucs_mpool_chunk_malloc, + ucs_mpool_chunk_free, + NULL, + NULL + }; + + status = ucs_mpool_init(&mp, 0, header_size + data_size, header_size, align, + 10000, UINT_MAX, &ops, "test"); + ASSERT_UCS_OK(status); + + std::queue q; + for (unsigned i = 0; i < NUM_ELEMS; ++i) { + void *obj = ucs_mpool_get(&mp); + ASSERT_TRUE(obj != NULL); + q.push(obj); + } + + while (!q.empty()) { + ucs_mpool_put(q.front()); + q.pop(); + } + + ucs_mpool_cleanup(&mp, 1); +} diff --git a/test/gtest/ucs/test_pgtable.cc b/test/gtest/ucs/test_pgtable.cc new file mode 100644 index 0000000..1a4669a --- /dev/null +++ b/test/gtest/ucs/test_pgtable.cc @@ -0,0 +1,614 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +extern "C" { +#include +#include +} +#include +#include +#include +#include + +class test_pgtable : public ucs::test { +protected: + + typedef std::vector search_result_t; + + virtual void init() { + ucs::test::init(); + ucs_status_t status = ucs_pgtable_init(&m_pgtable, pgd_alloc, pgd_free); + ASSERT_UCS_OK(status); + } + + virtual void cleanup() { + ucs_pgtable_cleanup(&m_pgtable); + ucs::test::cleanup(); + } + + void insert(ucs_pgt_region_t *region, ucs_status_t exp_status = UCS_OK, + const std::string& message = "") + { + ucs_status_t status = ucs_pgtable_insert(&m_pgtable, region); + if (exp_status == UCS_OK) { + ASSERT_UCS_OK(status, << " inserting 0x" << std::hex << + region->start << "..0x" <end); + } else { + EXPECT_EQ(exp_status, status) << message; + } + } + + void remove(ucs_pgt_region_t *region, ucs_status_t exp_status = UCS_OK, + const std::string& message = "") + { + ucs_status_t status = ucs_pgtable_remove(&m_pgtable, region); + if (exp_status == UCS_OK) { + ASSERT_UCS_OK(status); + } else { + EXPECT_EQ(exp_status, status) << message; + } + } + + ucs_pgt_region_t *lookup(ucs_pgt_addr_t address) { + return ucs_pgtable_lookup(&m_pgtable, address); + } + + unsigned num_regions() { + return ucs_pgtable_num_regions(&m_pgtable); + } + + void dump() { + ucs_pgtable_dump(&m_pgtable, UCS_LOG_LEVEL_DEBUG); + } + + void purge() { + ucs_pgtable_purge(&m_pgtable, pgd_purge_cb, reinterpret_cast(this)); + } + + search_result_t search(ucs_pgt_addr_t from, ucs_pgt_addr_t to) + { + search_result_t result; + ucs_pgtable_search_range(&m_pgtable, from, to, pgd_search_cb, + reinterpret_cast(&result)); + return result; + } + + static ucs_pgt_region_t* make_region(ucs_pgt_addr_t start, ucs_pgt_addr_t end) { + ucs_pgt_region_t r = {start, end}; + return new ucs_pgt_region_t(r); + } + + static bool is_overlap(const ucs_pgt_region_t *region, ucs_pgt_addr_t from, + ucs_pgt_addr_t to) + { + return ucs_max(from, region->start) <= ucs_min(to, region->end); + } + + static unsigned count_overlap(const ucs::ptr_vector& regions, + ucs_pgt_addr_t from, ucs_pgt_addr_t to) + { + unsigned count = 0; + for (ucs::ptr_vector::const_iterator iter = regions.begin(); + iter != regions.end(); ++iter) + { + if (is_overlap(*iter, from, to)) { + ++count; + } + } + return count; + } + + void test_search_region(const ucs_pgt_region_t ®ion) + { + search_result_t result; + + result = search(region.start, region.end - 1); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion, result.front()); + + result = search(region.start, region.end); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion, result.front()); + + result = search(region.start, region.end + 1); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion, result.front()); + } + +private: + static ucs_pgt_dir_t *pgd_alloc(const ucs_pgtable_t *pgtable) { + return new ucs_pgt_dir_t; + } + + static void pgd_free(const ucs_pgtable_t *pgtable, ucs_pgt_dir_t *pgdir) { + delete pgdir; + } + + static void pgd_purge_cb(const ucs_pgtable_t *pgtable, + ucs_pgt_region_t *region, void *arg) { + } + + static void pgd_search_cb(const ucs_pgtable_t *pgtable, + ucs_pgt_region_t *region, void *arg) + { + search_result_t *result = reinterpret_cast(arg); + result->push_back(region); + } + +protected: + ucs_pgtable_t m_pgtable; +}; + + +UCS_TEST_F(test_pgtable, basic) { + ucs_pgt_region_t region; + + region.start = 0x400800; + region.end = 0x403400; + insert(®ion); + + dump(); + + EXPECT_EQ(®ion, lookup(0x400800)); + EXPECT_EQ(®ion, lookup(0x402020)); + EXPECT_EQ(®ion, lookup(0x4033ff)); + EXPECT_TRUE(NULL == lookup(0x403400)); + EXPECT_TRUE(NULL == lookup(0x0)); + EXPECT_TRUE(NULL == lookup(std::numeric_limits::max())); + EXPECT_EQ(1u, num_regions()); + + remove(®ion); + + insert(®ion); + + dump(); + + purge(); /* region goes out of scope so we must remove it */ +} + +UCS_TEST_F(test_pgtable, lookup_adjacent) { + ucs_pgt_region_t region1 = {0xc500000, 0xc500400}; + ucs_pgt_region_t region2 = {0xc500400, 0xc500800}; + insert(®ion1); + insert(®ion2); + dump(); + EXPECT_EQ(®ion2, lookup(0xc500400)); + EXPECT_EQ(®ion1, lookup(0xc500000)); + purge(); +} + +UCS_TEST_F(test_pgtable, multi_search) { + for (int count = 0; count < 10; ++count) { + ucs::ptr_vector regions; + ucs_pgt_addr_t min = std::numeric_limits::max(); + ucs_pgt_addr_t max = 0; + + /* generate random regions */ + unsigned num_regions = 0; + for (int i = 0; i < 200 / ucs::test_time_multiplier(); ++i) { + ucs_pgt_addr_t start = (ucs::rand() & 0x7fffffff) << 24; + size_t size = ucs_min((size_t)ucs::rand(), + std::numeric_limits::max() - start); + ucs_pgt_addr_t end = start + ucs_align_down(size, UCS_PGT_ADDR_ALIGN); + if (count_overlap(regions, start, end)) { + /* Make sure regions do not overlap */ + continue; + } + + min = ucs_min(start, min); + max = ucs_max(start, max); + regions.push_back(make_region(start, end)); + ++num_regions; + } + + /* Insert regions */ + for (ucs::ptr_vector::const_iterator iter = regions.begin(); + iter != regions.end(); ++iter) + { + insert(*iter); + } + + /* Count how many fall in the [1/4, 3/4] range */ + ucs_pgt_addr_t from = ((min * 90) + (max * 10)) / 100; + ucs_pgt_addr_t to = ((min * 10) + (max * 90)) / 100; + unsigned num_in_range = count_overlap(regions, from, to); + + /* Search in page table */ + search_result_t result = search(from, to); + UCS_TEST_MESSAGE << "found " << result.size() << "/" << num_in_range << + " regions in the range 0x" << std::hex << from << + "..0x" << to << std::dec; + EXPECT_EQ(num_in_range, result.size()); + + purge(); + } +} + +UCS_TEST_SKIP_COND_F(test_pgtable, invalid_param, + (UCS_PGT_ADDR_ALIGN == 1)) { + ucs_pgt_region_t region1 = {0x4000, 0x4001}; + insert(®ion1, UCS_ERR_INVALID_PARAM); + + ucs_pgt_region_t region2 = {0x4001, 0x5000}; + insert(®ion2, UCS_ERR_INVALID_PARAM); + + ucs_pgt_region_t region3 = {0x5000, 0x4000}; + insert(®ion3, UCS_ERR_INVALID_PARAM); +} + +UCS_TEST_F(test_pgtable, overlap_insert) { + ucs_pgt_region_t region1 = {0x4000, 0x6000}; + insert(®ion1); + + ucs_pgt_region_t region2 = {0x5000, 0x7000}; + insert(®ion2, UCS_ERR_ALREADY_EXISTS, "overlap"); + + ucs_pgt_region_t region3 = {0x3000, 0x5000}; + insert(®ion3, UCS_ERR_ALREADY_EXISTS, "overlap"); + + remove(®ion1); +} + +UCS_TEST_F(test_pgtable, nonexist_remove) { + ucs_pgt_region_t region1 = {0x4000, 0x6000}; + remove(®ion1, UCS_ERR_NO_ELEM); + + ucs_pgt_region_t region2 = {0x5000, 0x7000}; + insert(®ion2); + + remove(®ion1, UCS_ERR_NO_ELEM); + + region1.start = 0x5000; + region1.end = 0x5000; + remove(®ion1, UCS_ERR_NO_ELEM); + + region1 = region2; + remove(®ion1, UCS_ERR_NO_ELEM); /* Fail - should be pointer-equal */ + + remove(®ion2); +} + +UCS_TEST_F(test_pgtable, search_large_region) { + ucs_pgt_region_t region = {0x3c03cb00, 0x3c03f600}; + insert(®ion, UCS_OK); + + search_result_t result; + + result = search(0x36990000, 0x3c810000); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion, result.front()); + + result = search(region.start - 1, region.start); + EXPECT_EQ(1u, result.size()); + + result = search(region.start, region.start + 1); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion, result.front()); + + result = search(region.end - 1, region.end); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion, result.front()); + + result = search(region.end, region.end + 1); + EXPECT_EQ(0u, result.size()); + + remove(®ion); +} + +UCS_TEST_F(test_pgtable, search_non_contig_regions) { + const size_t region_size = UCS_BIT(28); + size_t start, end; + + // insert [0x7f6ef0000000 .. 0x7f6f00000000] + start = 0x7f6ef0000000; + end = start + region_size; + ucs_pgt_region_t region1 = {start, end}; + insert(®ion1, UCS_OK); + + // insert [0x7f6f2c021000 .. 0x7f6f3c021000] + start = 0x7f6f2c021000; + end = start + region_size; + ucs_pgt_region_t region2 = {start, end}; + insert(®ion2, UCS_OK); + + // insert [0x7f6f42000000 .. 0x7f6f52000000] + start = 0x7f6f42000000; + end = start + region_size; + ucs_pgt_region_t region3 = {start, end}; + insert(®ion3, UCS_OK); + + search_result_t result; + + // search the 1st region + test_search_region(region1); + + // search the 2nd region + test_search_region(region2); + + // search the 3rd region + test_search_region(region3); + + remove(®ion1); + remove(®ion2); + remove(®ion3); +} + +UCS_TEST_F(test_pgtable, search_adjacent_regions) { + const size_t region_size = UCS_BIT(28); + size_t start, end; + + // insert [0x7f6ef0000000 .. 0x7f6f00000000] + start = 0x7f6ef0000000; + end = start + region_size; + ucs_pgt_region_t region1 = {start, end}; + insert(®ion1, UCS_OK); + + // insert [0x7f6f00000000 .. 0x7f6f10000000] + start = end; + end = start + region_size; + ucs_pgt_region_t region2 = {region1.end, 0x7f6f40000000}; + insert(®ion2, UCS_OK); + + // insert [0x7f6f10000000 .. 0x7f6f20000000] + start = end; + end = start + region_size; + ucs_pgt_region_t region3 = {region2.end, 0x7f6f48000000}; + insert(®ion3, UCS_OK); + + search_result_t result; + + // search the 1st region + result = search(region1.start, region1.end - 1); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion1, result.front()); + + result = search(region1.start, region1.end); + EXPECT_EQ(2u, result.size()); + EXPECT_EQ(®ion1, result.front()); + + result = search(region1.start, region1.end + 1); + EXPECT_EQ(2u, result.size()); + EXPECT_EQ(®ion1, result.front()); + + // search the 2nd region + result = search(region2.start, region2.end - 1); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion2, result.front()); + + result = search(region2.start, region2.end); + EXPECT_EQ(2u, result.size()); + EXPECT_EQ(®ion2, result.front()); + + result = search(region2.start, region2.end + 1); + EXPECT_EQ(2u, result.size()); + EXPECT_EQ(®ion2, result.front()); + + // search the 3rd region + result = search(region3.start, region3.end - 1); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion3, result.front()); + + result = search(region3.start, region3.end); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion3, result.front()); + + result = search(region3.start, region3.end + 1); + EXPECT_EQ(1u, result.size()); + EXPECT_EQ(®ion3, result.front()); + + remove(®ion1); + remove(®ion2); + remove(®ion3); +} + +class test_pgtable_perf : public test_pgtable { +protected: + + void insert(ucs_pgt_region_t *region) { + /* Insert to both */ + test_pgtable::insert(region); + m_stl_pgt.insert(region); + } + + void purge() { + test_pgtable::purge(); + m_stl_pgt.clear(); + } + + ucs_pgt_region_t* lookup_in_stl(ucs_pgt_addr_t address) { + ucs_pgt_region_t search = {address, address + 1}; + stl_pgtable_t::iterator iter = m_stl_pgt.lower_bound(&search); + if (iter == m_stl_pgt.end()) { + return NULL; + } else { + ucs_pgt_region_t *region = *iter; + EXPECT_LT(address, region->end) << std::hex << "address=" + << address << " region " + << region->start << ".." + << region->end << std::dec; + return (address >= region->start) ? region : NULL; + } + } + + ucs_pgt_region_t* lookup_in_pgt(ucs_pgt_addr_t address) { + return test_pgtable::lookup(address); + } + + void measure_workload(ucs_pgt_addr_t max_addr, + size_t block_size, /* Basic block size */ + unsigned blocks_per_superblock, /* Number of consecutive basic blocks per big block */ + unsigned num_superblocks, /* Number of big blocks */ + unsigned num_lookups, /* How many lookups to generate */ + bool random_access, /* Whether access pattern is random or ordered */ + double hit_ratio) /* Probability of lookup hit */ + { + block_size = ucs_align_up_pow2(block_size, UCS_PGT_ADDR_ALIGN); + + const size_t superblock_size = block_size * blocks_per_superblock; + const size_t max_start = max_addr - superblock_size; + ucs::ptr_vector regions; + std::vector lookups; + lookups.clear(); + + /* Generate random superblocks */ + ucs_pgt_addr_t start = 0; + std::vector superblocks; + for (unsigned i = 0; i < num_superblocks; ++i) { + ucs_pgt_addr_t addr = random_address(start, max_start); + superblocks.push_back(addr); + start = addr + superblock_size * 2; /* minimal gap */ + if (start >= max_start) { + break; + } + } + + num_superblocks = superblocks.size(); + + /* Insert them */ + for (unsigned i = 0; i < num_superblocks; ++i) { + for (unsigned j = 0; j < blocks_per_superblock; ++j) { + ucs_pgt_region_t *region = new ucs_pgt_region_t; + region->start = superblocks[i] + (j * block_size); + region->end = region->start + block_size; + regions.push_back(region); + insert(region); + } + } + + EXPECT_EQ(num_superblocks * blocks_per_superblock, num_regions()); + + /* Create workload */ + unsigned sb_idx = 0; + unsigned block_idx = 0; + for (unsigned n = 0; n < num_lookups; ++n) { + ucs_pgt_addr_t addr = superblocks[sb_idx] + block_idx * block_size; + if (ucs::rand() > (RAND_MAX * hit_ratio)) { + addr += superblock_size; /* make it miss by falling to inter-block gap */ + } + lookups.push_back(addr); + if (random_access) { + sb_idx = ucs::rand() % num_superblocks; + block_idx = ucs::rand() % blocks_per_superblock; + } else { + block_idx = (block_idx + 1) % blocks_per_superblock; + if (block_idx == 0) + sb_idx = (sb_idx + 1) % num_superblocks; + } + } + + invalidate_cache(); + + std::pair result_stl = + measure(lookups, true); + + invalidate_cache(); + + std::pair result_pgt = + measure(lookups, false); + + EXPECT_EQ(result_stl.second, result_pgt.second); + + UCS_TEST_MESSAGE << std::dec << num_superblocks << " areas of " << + blocks_per_superblock << "x" << block_size << " bytes, " << + (random_access ? "random" : "ordered") << ": " << + "stl: " << (ucs_time_to_nsec(result_stl.first) / num_lookups) << " ns, " + "ucs: " << (ucs_time_to_nsec(result_pgt.first) / num_lookups) << " ns " << + (result_pgt.second * 100) / lookups.size() << "% hit" + ; + purge(); + } + +private: + struct region_comparator { + bool operator()(ucs_pgt_region_t* region1, ucs_pgt_region_t* region2) { + return region1->end <= region2->start; + } + }; + + typedef std::set stl_pgtable_t; + + std::pair + inline measure(const std::vector& lookups, bool use_stl) + { + unsigned hit_count = 0; + + ucs_time_t start_time = ucs_get_time(); + ucs_compiler_fence(); + for (std::vector::const_iterator iter = lookups.begin(); + iter != lookups.end(); ++iter) + { + ucs_pgt_region_t *region = + use_stl ? lookup_in_stl(*iter) : lookup_in_pgt(*iter); + if (region != NULL) { + ++hit_count; + } + } + ucs_compiler_fence(); + return std::make_pair(ucs_get_time() - start_time, hit_count); + } + + ucs_pgt_addr_t random_address(ucs_pgt_addr_t start, ucs_pgt_addr_t max) { + ucs_pgt_addr_t r = (ucs_pgt_addr_t)ucs::rand() * (max / 1000) / RAND_MAX; + return ucs_align_up_pow2((r % (max - start)) + start, + UCS_PGT_ADDR_ALIGN); + } + + void invalidate_cache() { + size_t size = 30 * 1024 * 1024; + void *ptr = malloc(size); + memset(ptr, 0xbb, size); + free(ptr); + } + + stl_pgtable_t m_stl_pgt; +}; + +/* + * Compare out lookup performance to STL's + */ +UCS_TEST_F(test_pgtable_perf, basic) { + ucs_pgt_region_t region = {0x4000, 0x5000}; + insert(®ion); + EXPECT_EQ(®ion, lookup_in_stl(0x4500)); + EXPECT_EQ(®ion, lookup_in_stl(0x4000)); + EXPECT_EQ(®ion, lookup_in_pgt(0x4500)); + EXPECT_TRUE(NULL == lookup_in_stl(0x5000)); + purge(); +} + +UCS_TEST_SKIP_COND_F(test_pgtable_perf, workloads, + (ucs::test_time_multiplier() != 1)) { + + measure_workload(UCS_MASK(28), + 1024, + 10000, + 20, + 5000000, + false, + 0.8); + measure_workload(UCS_MASK(28), + 1024, + 10000, + 20, + 500000, + true, + 0.8); + measure_workload(UCS_MASK(28), + 1024, + 10000, + 2, + 10000000, + false, + 0.8); + measure_workload(UCS_MASK(28), + 1024 * 256, + 1, + 4, + 10000000, + false, + 0.8); +} diff --git a/test/gtest/ucs/test_profile.cc b/test/gtest/ucs/test_profile.cc new file mode 100644 index 0000000..47cd9c7 --- /dev/null +++ b/test/gtest/ucs/test_profile.cc @@ -0,0 +1,398 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2012. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +#include +#include +} + +#include +#include + +#if HAVE_PROFILING + +class scoped_profile { +public: + scoped_profile(ucs::test_base& test, const std::string &file_name, + const char *mode) : m_test(test), m_file_name(file_name) +{ + ucs_profile_global_cleanup(); + ucs_profile_reset_locations(); + m_test.push_config(); + m_test.modify_config("PROFILE_MODE", mode); + m_test.modify_config("PROFILE_FILE", m_file_name.c_str()); + ucs_profile_global_init(); + } + + std::string read() { + ucs_profile_dump(); + std::ifstream f(m_file_name.c_str()); + return std::string(std::istreambuf_iterator(f), + std::istreambuf_iterator()); + } + + ~scoped_profile() { + ucs_profile_global_cleanup(); + unlink(m_file_name.c_str()); + m_test.pop_config(); + ucs_profile_global_init(); + } +private: + ucs::test_base& m_test; + const std::string m_file_name; +}; + +class test_profile : public testing::TestWithParam, + public ucs::test_base { +public: + test_profile(); + ~test_profile(); + + UCS_TEST_BASE_IMPL; + +protected: + static const int MIN_LINE; + static const int MAX_LINE; + static const unsigned NUM_LOCAITONS; + static const char* PROFILE_FILENAME; + + + std::set m_tids; + pthread_spinlock_t m_tids_lock; + + struct thread_param { + test_profile *test; + int iters; + }; + + void add_tid(int tid); + + static void *profile_thread_func(void *arg); + + int num_threads() const; + + void run_profiled_code(int num_iters); + + void test_header(const ucs_profile_header_t *hdr, unsigned exp_mode, + const void **ptr); + void test_locations(const ucs_profile_location_t *locations, + unsigned num_locations, const void **ptr); + void test_thread_locations(const ucs_profile_thread_header_t *thread_hdr, + unsigned num_locations, uint64_t exp_count, + unsigned exp_num_records, const void **ptr); + + void do_test(unsigned int_mode, const std::string& str_mode); +}; + +static int sum(int a, int b) +{ + return a + b; +} + +const int test_profile::MIN_LINE = __LINE__; + +static void *test_request = &test_request; + +UCS_PROFILE_FUNC_VOID(profile_test_func1, ()) +{ + UCS_PROFILE_REQUEST_NEW(test_request, "allocate", 10); + UCS_PROFILE_REQUEST_EVENT(test_request, "work", 0); + UCS_PROFILE_REQUEST_FREE(test_request); + UCS_PROFILE_CODE("code") { + UCS_PROFILE_SAMPLE("sample"); + } +} + +UCS_PROFILE_FUNC(int, profile_test_func2, (a, b), int a, int b) +{ + return UCS_PROFILE_CALL(sum, a, b); +} + +const int test_profile::MAX_LINE = __LINE__; +const unsigned test_profile::NUM_LOCAITONS = 12u; +const char* test_profile::PROFILE_FILENAME = "test.prof"; + +test_profile::test_profile() +{ + pthread_spin_init(&m_tids_lock, 0); +} + +test_profile::~test_profile() +{ + pthread_spin_destroy(&m_tids_lock); +} + +void test_profile::add_tid(int tid) +{ + pthread_spin_lock(&m_tids_lock); + m_tids.insert(tid); + pthread_spin_unlock(&m_tids_lock); +} + +void *test_profile::profile_thread_func(void *arg) +{ + const thread_param *param = (const thread_param*)arg; + + param->test->add_tid(ucs_get_tid()); + + for (int i = 0; i < param->iters; ++i) { + profile_test_func1(); + profile_test_func2(1, 2); + } + + return NULL; +} + +int test_profile::num_threads() const +{ + return GetParam(); +} + +void test_profile::run_profiled_code(int num_iters) +{ + int ret; + thread_param param; + + param.iters = num_iters; + param.test = this; + + if (num_threads() == 1) { + profile_thread_func(¶m); + } else { + std::vector threads; + + for (int i = 0; i < num_threads(); ++i) { + pthread_t profile_thread; + ret = pthread_create(&profile_thread, NULL, profile_thread_func, + (void*)¶m); + if (ret < 0) { + ADD_FAILURE() << "pthread_create failed: " << strerror(errno); + break; + } + + threads.push_back(profile_thread); + } + + while (!threads.empty()) { + void *result; + ret = pthread_join(threads.back(), &result); + if (ret < 0) { + ADD_FAILURE() << "pthread_join failed: " << strerror(errno); + } + + threads.pop_back(); + } + } +} + +void test_profile::test_header(const ucs_profile_header_t *hdr, unsigned exp_mode, + const void **ptr) +{ + EXPECT_EQ(UCS_PROFILE_FILE_VERSION, hdr->version); + EXPECT_EQ(std::string(ucs_get_host_name()), std::string(hdr->hostname)); + EXPECT_EQ(getpid(), (pid_t)hdr->pid); + EXPECT_EQ(exp_mode, hdr->mode); + EXPECT_EQ(NUM_LOCAITONS, hdr->num_locations); + EXPECT_EQ((uint32_t)num_threads(), hdr->num_threads); + EXPECT_NEAR(hdr->one_second / ucs_time_from_sec(1.0), 1.0, 0.01); + + *ptr = hdr + 1; +} + +void test_profile::test_locations(const ucs_profile_location_t *locations, + unsigned num_locations, const void **ptr) +{ + std::set loc_names; + for (unsigned i = 0; i < num_locations; ++i) { + const ucs_profile_location_t *loc = &locations[i]; + EXPECT_EQ(std::string(basename(__FILE__)), std::string(loc->file)); + EXPECT_GE(loc->line, MIN_LINE); + EXPECT_LE(loc->line, MAX_LINE); + loc_names.insert(loc->name); + } + + EXPECT_NE(loc_names.end(), loc_names.find("profile_test_func1")); + EXPECT_NE(loc_names.end(), loc_names.find("profile_test_func2")); + EXPECT_NE(loc_names.end(), loc_names.find("code")); + EXPECT_NE(loc_names.end(), loc_names.find("sample")); + EXPECT_NE(loc_names.end(), loc_names.find("sum")); + EXPECT_NE(loc_names.end(), loc_names.find("allocate")); + EXPECT_NE(loc_names.end(), loc_names.find("work")); + + *ptr = locations + num_locations; +} + +void test_profile::test_thread_locations( + const ucs_profile_thread_header_t *thread_hdr, + unsigned num_locations, uint64_t exp_count, + unsigned exp_num_records, const void **ptr) +{ + const ucs_profile_thread_location_t *loc; + + EXPECT_NE(m_tids.end(), m_tids.find(thread_hdr->tid)); + EXPECT_EQ(exp_num_records, thread_hdr->num_records); + + EXPECT_LE(thread_hdr->end_time, ucs_get_time()); + EXPECT_LE(thread_hdr->start_time, thread_hdr->end_time); + EXPECT_LE(thread_hdr->end_time - thread_hdr->start_time, + ucs_time_from_sec(1.0) * ucs::test_time_multiplier() * (1 + exp_count)); + + for (unsigned i = 0; i < num_locations; ++i) { + loc = &reinterpret_cast + (thread_hdr + 1)[i]; + EXPECT_EQ(exp_count, loc->count); + EXPECT_LE(loc->total_time, + ucs_time_from_sec(1.0) * ucs::test_time_multiplier() * exp_count); + } + + *ptr = reinterpret_cast(thread_hdr + 1) + + num_locations; +} + +void test_profile::do_test(unsigned int_mode, const std::string& str_mode) +{ + const int ITER = 5; + uint64_t exp_count = (int_mode & UCS_BIT(UCS_PROFILE_MODE_ACCUM)) ? + ITER : 0; + uint64_t exp_num_records = (int_mode & UCS_BIT(UCS_PROFILE_MODE_LOG)) ? + (NUM_LOCAITONS * ITER) : 0; + + + scoped_profile p(*this, PROFILE_FILENAME, str_mode.c_str()); + run_profiled_code(ITER); + + std::string data = p.read(); + const void *ptr = &data[0]; + + /* Read and test file header */ + const ucs_profile_header_t *hdr = + reinterpret_cast(ptr); + test_header(hdr, int_mode, &ptr); + + /* Read and test global locations */ + const ucs_profile_location_t *locations = + reinterpret_cast(ptr); + test_locations(locations, hdr->num_locations, &ptr); + + /* Read and test threads */ + for (int i = 0; i < num_threads(); ++i) { + const ucs_profile_thread_header_t *thread_hdr = + reinterpret_cast(ptr); + + test_thread_locations(thread_hdr, hdr->num_locations, exp_count, + exp_num_records, &ptr); + + const ucs_profile_record_t *records = + reinterpret_cast(ptr); + uint64_t prev_ts = records[0].timestamp; + for (uint64_t i = 0; i < thread_hdr->num_records; ++i) { + const ucs_profile_record_t *rec = &records[i]; + + /* test location index */ + EXPECT_GE(rec->location, 0u); + EXPECT_LT(rec->location, uint32_t(NUM_LOCAITONS)); + + /* test timestamp */ + EXPECT_GE(rec->timestamp, prev_ts); + prev_ts = rec->timestamp; + + /* test param64 */ + const ucs_profile_location_t *loc = &locations[rec->location]; + if ((loc->type == UCS_PROFILE_TYPE_REQUEST_NEW) || + (loc->type == UCS_PROFILE_TYPE_REQUEST_EVENT) || + (loc->type == UCS_PROFILE_TYPE_REQUEST_FREE)) + { + EXPECT_EQ((uintptr_t)&test_request, rec->param64); + } + } + + ptr = records + thread_hdr->num_records; + } + + EXPECT_EQ(&data[data.size()], ptr) << data.size(); +} + +UCS_TEST_P(test_profile, accum) { + do_test(UCS_BIT(UCS_PROFILE_MODE_ACCUM), "accum"); +} + +UCS_TEST_P(test_profile, log) { + do_test(UCS_BIT(UCS_PROFILE_MODE_LOG), "log"); +} + +UCS_TEST_P(test_profile, log_accum) { + do_test(UCS_BIT(UCS_PROFILE_MODE_LOG) | UCS_BIT(UCS_PROFILE_MODE_ACCUM), + "log,accum"); +} + +INSTANTIATE_TEST_CASE_P(st, test_profile, ::testing::Values(1)); +INSTANTIATE_TEST_CASE_P(mt, test_profile, ::testing::Values(2, 4, 8)); + +class test_profile_perf : public test_profile { +}; + +UCS_TEST_SKIP_COND_P(test_profile_perf, overhead, RUNNING_ON_VALGRIND) { + +#if defined(__x86_64__) || defined(__powerpc64__) + const double EXP_OVERHEAD_NSEC = 100.0; +#else + const double EXP_OVERHEAD_NSEC = 150.0; +#endif + const int ITERS = 100; + const int WARMUP_ITERS = 5; + const int COUNT = 100000; + double overhead_nsec = 0.0; + + scoped_profile p(*this, PROFILE_FILENAME, "accum"); + + for (int retry = 0; retry < (ucs::perf_retry_count + 1); ++retry) { + ucs_time_t time_profile_on = 0; + ucs_time_t time_profile_off = 0; + + for (int i = 0; i < WARMUP_ITERS + ITERS; ++i) { + ucs_time_t t; + + t = ucs_get_time(); + for (volatile int j = 0; j < COUNT;) { + ++j; + } + if (i > WARMUP_ITERS) { + time_profile_off += ucs_get_time() - t; + } + + t = ucs_get_time(); + for (volatile int j = 0; j < COUNT;) { + UCS_PROFILE_CODE("test") { + ++j; + } + } + if (i > WARMUP_ITERS) { + time_profile_on += ucs_get_time() - t; + } + } + + overhead_nsec = ucs_time_to_nsec(time_profile_on - time_profile_off) / + COUNT / ITERS; + UCS_TEST_MESSAGE << "overhead: " << overhead_nsec << " nsec"; + + if (!ucs::perf_retry_count) { + UCS_TEST_MESSAGE << "not validating performance"; + return; /* Success */ + } else if (overhead_nsec < EXP_OVERHEAD_NSEC) { + return; /* Success */ + } else { + ucs::safe_sleep(ucs::perf_retry_interval); + } + } + + EXPECT_LT(overhead_nsec, EXP_OVERHEAD_NSEC) << "Profiling overhead is too high"; +} + +INSTANTIATE_TEST_CASE_P(st, test_profile_perf, ::testing::Values(1)); + +#endif diff --git a/test/gtest/ucs/test_rcache.cc b/test/gtest/ucs/test_rcache.cc new file mode 100644 index 0000000..bab6b2b --- /dev/null +++ b/test/gtest/ucs/test_rcache.cc @@ -0,0 +1,799 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +extern "C" { +#include +#include +#include +#include +#include +#include +#include +} + + +class test_rcache_basic : public ucs::test { +}; + +UCS_TEST_F(test_rcache_basic, create_fail) { + static const ucs_rcache_ops_t ops = { + NULL, NULL, NULL + }; + ucs_rcache_params_t params = { + sizeof(ucs_rcache_region_t), + UCS_PGT_ADDR_ALIGN, + ucs_get_page_size(), + UCS_BIT(30), /* non-existing event */ + 1000, + &ops, + NULL + }; + + ucs_rcache_t *rcache; + ucs_status_t status = ucs_rcache_create(¶ms, "test", + ucs_stats_get_root(), &rcache); + EXPECT_NE(UCS_OK, status); /* should fail */ + if (status == UCS_OK) { + ucs_rcache_destroy(rcache); + } +} + + +class test_rcache : public ucs::test { +protected: + + struct region { + ucs_rcache_region_t super; + uint32_t magic; + uint32_t id; + }; + + test_rcache() : m_reg_count(0), m_ptr(NULL) { + } + + virtual void init() { + ucs::test::init(); + static const ucs_rcache_ops_t ops = { + mem_reg_cb, + mem_dereg_cb, + dump_region_cb + }; + ucs_rcache_params_t params = { + sizeof(region), + UCS_PGT_ADDR_ALIGN, + ucs_get_page_size(), + UCM_EVENT_VM_UNMAPPED, + 1000, + &ops, + reinterpret_cast(this) + }; + UCS_TEST_CREATE_HANDLE(ucs_rcache_t*, m_rcache, ucs_rcache_destroy, + ucs_rcache_create, ¶ms, "test", ucs_stats_get_root()); + } + + virtual void cleanup() { + m_rcache.reset(); + EXPECT_EQ(0u, m_reg_count); + ucs::test::cleanup(); + } + + region *get(void *address, size_t length, int prot = PROT_READ|PROT_WRITE) { + ucs_status_t status; + ucs_rcache_region_t *r; + status = ucs_rcache_get(m_rcache, address, length, prot, NULL, &r); + ASSERT_UCS_OK(status); + EXPECT_TRUE(r != NULL); + struct region *region = ucs_derived_of(r, struct region); + EXPECT_EQ(uint32_t(MAGIC), region->magic); + EXPECT_TRUE(ucs_test_all_flags(region->super.prot, prot)); + return region; + } + + void put(region *r) { + ucs_rcache_region_put(m_rcache, &r->super); + } + + virtual ucs_status_t mem_reg(region *region) + { + int mem_prot = ucs_get_mem_prot(region->super.super.start, region->super.super.end); + if (!ucs_test_all_flags(mem_prot, region->super.prot)) { + ucs_debug("protection error mem_prot " UCS_RCACHE_PROT_FMT " wanted " UCS_RCACHE_PROT_FMT, + UCS_RCACHE_PROT_ARG(mem_prot), + UCS_RCACHE_PROT_ARG(region->super.prot)); + return UCS_ERR_IO_ERROR; + } + + mlock((const void*)region->super.super.start, + region->super.super.end - region->super.super.start); + EXPECT_NE(uint32_t(MAGIC), region->magic); + region->magic = MAGIC; + region->id = ucs_atomic_fadd32(&next_id, 1); + + ucs_atomic_add32(&m_reg_count, +1); + return UCS_OK; + } + + virtual void mem_dereg(region *region) + { + munlock((const void*)region->super.super.start, + region->super.super.end - region->super.super.start); + EXPECT_EQ(uint32_t(MAGIC), region->magic); + region->magic = 0; + uint32_t prev = ucs_atomic_fsub32(&m_reg_count, 1); + EXPECT_GT(prev, 0u); + } + + virtual void dump_region(region *region, char *buf, size_t max) + { + snprintf(buf, max, "magic 0x%x id %u", region->magic, region->id); + } + + void* shared_malloc(size_t size) + { + if (barrier()) { + m_ptr = malloc(size); + } + barrier(); + return m_ptr; + } + + void shared_free(void *ptr) + { + if (barrier()) { + free(ptr); + } + } + + static void* alloc_pages(size_t size, int prot) + { + void *ptr = mmap(NULL, size, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + EXPECT_NE(MAP_FAILED, ptr) << strerror(errno); + return ptr; + } + + static const uint32_t MAGIC = 0x05e905e9; + static volatile uint32_t next_id; + volatile uint32_t m_reg_count; + ucs::handle m_rcache; + void * volatile m_ptr; + +private: + + static ucs_status_t mem_reg_cb(void *context, ucs_rcache_t *rcache, + void *arg, ucs_rcache_region_t *r, + uint16_t rcache_mem_reg_flags) + { + return reinterpret_cast(context)->mem_reg( + ucs_derived_of(r, struct region)); + } + + static void mem_dereg_cb(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *r) + { + reinterpret_cast(context)->mem_dereg( + ucs_derived_of(r, struct region)); + } + + static void dump_region_cb(void *context, ucs_rcache_t *rcache, + ucs_rcache_region_t *r, char *buf, size_t max) + { + reinterpret_cast(context)->dump_region( + ucs_derived_of(r, struct region), buf, max); + } +}; + +volatile uint32_t test_rcache::next_id = 1; + + +static uintptr_t virt_to_phys(uintptr_t address) +{ + static const char *pagemap_file = "/proc/self/pagemap"; + const size_t page_size = ucs_get_page_size(); + uint64_t entry, pfn; + ssize_t offset, ret; + uintptr_t pa; + int fd; + + /* See https://www.kernel.org/doc/Documentation/vm/pagemap.txt */ + fd = open(pagemap_file, O_RDONLY); + if (fd < 0) { + ucs_error("failed to open %s: %m", pagemap_file); + pa = std::numeric_limits::max(); + goto out; + } + + offset = (address / page_size) * sizeof(entry); + ret = lseek(fd, offset, SEEK_SET); + if (ret != offset) { + ucs_error("failed to seek in %s to offset %zu: %m", pagemap_file, offset); + pa = std::numeric_limits::max(); + goto out_close; + } + + ret = read(fd, &entry, sizeof(entry)); + if (ret != sizeof(entry)) { + ucs_error("read from %s at offset %zu returned %ld: %m", pagemap_file, + offset, ret); + pa = std::numeric_limits::max(); + goto out_close; + } + + if (entry & (1ULL << 63)) { + pfn = entry & ((1ULL << 54) - 1); + pa = (pfn * page_size) | (address & (page_size - 1)); + } else { + pa = std::numeric_limits::max(); /* Page not present */ + } + +out_close: + close(fd); +out: + return pa; +} + +UCS_MT_TEST_F(test_rcache, basic, 10) { + static const size_t size = 1 * 1024 * 1024; + void *ptr = malloc(size); + region *region = get(ptr, size); + put(region); + free(ptr); +} + +UCS_MT_TEST_F(test_rcache, get_unmapped, 6) { + /* + * - allocate, get, put, get again -> should be same id + * - release, get again -> should be different id + */ + static const size_t size = 1 * 1024 * 1024; + region *region; + uintptr_t pa, new_pa; + uint32_t id; + void *ptr; + + ptr = malloc(size); + region = get(ptr, size); + id = region->id; + pa = virt_to_phys(region->super.super.start); + put(region); + + region = get(ptr, size); + put(region); + free(ptr); + + ptr = malloc(size); + region = get(ptr, size); + ucs_debug("got region id %d", region->id); + new_pa = virt_to_phys(region->super.super.start); + if (pa != new_pa) { + ucs_debug("physical address changed (0x%lx->0x%lx)", + pa, new_pa); + ucs_debug("id=%d region->id=%d", id, region->id); + EXPECT_NE(id, region->id); + } else { + ucs_debug("physical address not changed (0x%lx)", pa); + } + put(region); + free(ptr); +} + +UCS_MT_TEST_F(test_rcache, merge, 6) { + /* + * +---------+-----+---------+ + * | region1 | pad | region2 | + * +---+-----+-----+----+----+ + * | region3 | + * +----------------+ + */ + static const size_t size1 = 256 * ucs_get_page_size(); + static const size_t size2 = 512 * ucs_get_page_size(); + static const size_t pad = 64 * ucs_get_page_size(); + region *region1, *region2, *region3, *region1_2; + void *ptr1, *ptr2, *ptr3, *mem; + size_t size3; + + mem = alloc_pages(size1 + pad + size2, PROT_READ|PROT_WRITE); + + /* Create region1 */ + ptr1 = (char*)mem; + region1 = get(ptr1, size1); + + /* Get same region as region1 - should be same one */ + region1_2 = get(ptr1, size1); + EXPECT_EQ(region1, region1_2); + put(region1_2); + + /* Create region2 */ + ptr2 = (char*)mem + pad + size1; + region2 = get(ptr2, size2); + + /* Create region3 which should merge region1 and region2 */ + ptr3 = (char*)mem + pad; + size3 = size1 + size2 - pad; + region3 = get(ptr3, size3); + + /* Get the same area as region1 - should be a different region now */ + region1_2 = get(ptr1, size1); + EXPECT_NE(region1, region1_2); /* should be different region because was merged */ + EXPECT_EQ(region3, region1_2); /* it should be the merged region */ + put(region1_2); + + put(region1); + put(region2); + put(region3); + + munmap(mem, size1 + pad + size2); +} + +UCS_MT_TEST_F(test_rcache, merge_inv, 6) { + /* + * Merge with another region which causes immediate invalidation of the + * other region. + * +---------+ + * | region1 | + * +---+-----+----------+ + * | region2 | + * +----------------+ + */ + static const size_t size1 = 256 * 1024; + static const size_t size2 = 512 * 1024; + static const size_t pad = 64 * 1024; + region *region1, *region2; + void *ptr1, *ptr2, *mem; + uint32_t id1; + + mem = alloc_pages(pad + size2, PROT_READ|PROT_WRITE); + + /* Create region1 */ + ptr1 = (char*)mem; + region1 = get(ptr1, size1); + id1 = region1->id; + put(region1); + + /* Create overlapping region - should destroy region1 */ + ptr2 = (char*)mem + pad; + region2 = get(ptr2, size2); + EXPECT_NE(id1, region2->id); + put(region2); + + munmap(mem, pad + size2); +} + +UCS_MT_TEST_F(test_rcache, release_inuse, 6) { + static const size_t size = 1 * 1024 * 1024; + + void *ptr1 = malloc(size); + region *region1 = get(ptr1, size); + free(ptr1); + + void *ptr2 = malloc(size); + region *region2 = get(ptr2, size); + put(region2); + free(ptr2); + + /* key should still be valid */ + EXPECT_EQ(uint32_t(MAGIC), region1->magic); + + put(region1); +} + +/* + * +-------------+-------------+ + * | region1 -r | region2 -w | + * +---+---------+------+------+ + * | region3 r | + * +----------------+ + * + * don't merge with inaccessible pages + */ +UCS_MT_TEST_F(test_rcache, merge_with_unwritable, 6) { + static const size_t size1 = 10 * ucs_get_page_size(); + static const size_t size2 = 8 * ucs_get_page_size(); + + void *mem = alloc_pages(size1 + size2, PROT_READ); + void *ptr1 = mem; + + /* Set region1 to map all of 1-st part of the 2-nd */ + region *region1 = get(ptr1, size1 + size2 / 2, PROT_READ); + EXPECT_EQ(PROT_READ, region1->super.prot); + + /* Set 2-nd part as write-only */ + void *ptr2 = (char*)mem + size1; + int ret = mprotect(ptr2, size2, PROT_WRITE); + ASSERT_EQ(0, ret) << strerror(errno); + + /* Get 2-nd part - should not merge with region1 */ + region *region2 = get(ptr2, size2, PROT_WRITE); + EXPECT_GE(region2->super.super.start, (uintptr_t)ptr2); + EXPECT_EQ(PROT_WRITE, region2->super.prot); + + EXPECT_TRUE(!(region1->super.flags & UCS_RCACHE_REGION_FLAG_PGTABLE)); + put(region1); + + put(region2); + munmap(mem, size1 + size2); +} + +/* don't expand prot of our region if our pages cant support it */ +UCS_MT_TEST_F(test_rcache, merge_merge_unwritable, 6) { + static const size_t size1 = 10 * ucs_get_page_size(); + static const size_t size2 = 8 * ucs_get_page_size(); + + void *mem = alloc_pages(size1 + size2, PROT_READ|PROT_WRITE); + ASSERT_NE(MAP_FAILED, mem) << strerror(errno); + + void *ptr1 = mem; + + /* Set region1 to map all of 1-st part of the 2-nd */ + region *region1 = get(ptr1, size1 + size2 / 2, PROT_READ|PROT_WRITE); + EXPECT_EQ(PROT_READ|PROT_WRITE, region1->super.prot); + + /* Set 2-nd part as read-only */ + void *ptr2 = (char*)mem + size1; + int ret = mprotect(ptr2, size2, PROT_READ); + ASSERT_EQ(0, ret) << strerror(errno); + + /* Get 2-nd part - should not merge because we are read-only */ + region *region2 = get(ptr2, size2, PROT_READ); + EXPECT_GE(region2->super.super.start, (uintptr_t)ptr2); + EXPECT_EQ(PROT_READ, region2->super.prot); + + put(region1); + put(region2); + munmap(mem, size1 + size2); +} + +/* expand prot of new region to support existing regions */ +UCS_MT_TEST_F(test_rcache, merge_expand_prot, 6) { + static const size_t size1 = 10 * ucs_get_page_size(); + static const size_t size2 = 8 * ucs_get_page_size(); + + void *mem = alloc_pages(size1 + size2, PROT_READ|PROT_WRITE); + ASSERT_NE(MAP_FAILED, mem) << strerror(errno); + + void *ptr1 = mem; + + /* Set region1 to map all of 1-st part of the 2-nd */ + region *region1 = get(ptr1, size1 + size2 / 2, PROT_READ); + EXPECT_EQ(PROT_READ, region1->super.prot); + + /* Get 2-nd part - should merge with region1 with full protection */ + void *ptr2 = (char*)mem + size1; + region *region2 = get(ptr2, size2, PROT_WRITE); + if (region1->super.flags & UCS_RCACHE_REGION_FLAG_PGTABLE) { + EXPECT_LE(region2->super.super.start, (uintptr_t)ptr1); + EXPECT_TRUE(region2->super.prot & PROT_READ); + } + EXPECT_TRUE(region2->super.prot & PROT_WRITE); + EXPECT_GE(region2->super.super.end, (uintptr_t)ptr2 + size2); + + put(region1); + put(region2); + munmap(mem, size1 + size2); +} + +/* + * Test flow: + * +---------------------+ + * | r+w | 1. memory allocated with R+W prot + * +---------+-----------+ + * | region1 | | 2. region1 is created in part of the memory + * +-----+---+-----------+ + * | r | r+w | 3. region1 is freed, some of the region memory changed to R + * +-----+---------------+ + * | | region2 | 4. region2 is created. region1 must be invalidated and + * +-----+---------------+ kicked out of pagetable. + */ +UCS_MT_TEST_F(test_rcache, merge_invalid_prot, 6) +{ + static const size_t size1 = 10 * ucs_get_page_size(); + static const size_t size2 = 8 * ucs_get_page_size(); + int ret; + + void *mem = alloc_pages(size1+size2, PROT_READ|PROT_WRITE); + void *ptr1 = mem; + + region *region1 = get(ptr1, size1, PROT_READ|PROT_WRITE); + EXPECT_EQ(PROT_READ|PROT_WRITE, region1->super.prot); + put(region1); + + ret = mprotect(ptr1, ucs_get_page_size(), PROT_READ); + ASSERT_EQ(0, ret) << strerror(errno); + + void *ptr2 = (char*)mem+size1 - 1024 ; + region *region2 = get(ptr2, size2, PROT_READ|PROT_WRITE); + + /* check permissions and that the region is not merged */ + EXPECT_EQ(PROT_READ|PROT_WRITE, region2->super.prot); + EXPECT_EQ(region2->super.super.start, (uintptr_t)ptr2); + + barrier(); + EXPECT_EQ(6u, m_reg_count); + barrier(); + put(region2); + munmap(mem, size1+size2); +} + +UCS_MT_TEST_F(test_rcache, shared_region, 6) { + static const size_t size = 1 * 1024 * 1024; + + void *mem = shared_malloc(size); + + void *ptr1 = mem; + size_t size1 = size * 2 / 3; + + void *ptr2 = (char*)mem + size - size1; + size_t size2 = size1; + + region *region1 = get(ptr1, size1); + usleep(100); + put(region1); + + region *region2 = get(ptr2, size2); + usleep(100); + put(region2); + + shared_free(mem); +} + +class test_rcache_no_register : public test_rcache { +protected: + bool m_fail_reg; + virtual ucs_status_t mem_reg(region *region) { + if (m_fail_reg) { + return UCS_ERR_IO_ERROR; + } + return test_rcache::mem_reg(region); + } + + virtual void init() { + test_rcache::init(); + ucs_log_push_handler(log_handler); + m_fail_reg = true; + } + + virtual void cleanup() { + ucs_log_pop_handler(); + test_rcache::cleanup(); + } + + static ucs_log_func_rc_t + log_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + /* Ignore warnings about empty memory pool */ + if ((level == UCS_LOG_LEVEL_WARN) && strstr(message, "failed to register")) { + UCS_TEST_MESSAGE << format_message(message, ap); + return UCS_LOG_FUNC_RC_STOP; + } + + return UCS_LOG_FUNC_RC_CONTINUE; + } +}; + +UCS_MT_TEST_F(test_rcache_no_register, register_failure, 10) { + static const size_t size = 1 * 1024 * 1024; + void *ptr = malloc(size); + + ucs_status_t status; + ucs_rcache_region_t *r; + status = ucs_rcache_get(m_rcache, ptr, size, PROT_READ|PROT_WRITE, NULL, &r); + EXPECT_EQ(UCS_ERR_IO_ERROR, status); + EXPECT_EQ(0u, m_reg_count); + + free(ptr); +} + +/* The region overlaps an old region with different + * protection and memory protection does not fit one of + * the region. + * This should trigger an error during merge. + * + * Test flow: + * +---------------------+ + * | r+w | 1. memory allocated with R+W prot + * +---------+-----------+ + * | region1 | | 2. region1 is created in part of the memory + * +-----+---+-----------+ + * | r | 3. region1 is freed, all memory changed to R + * +-----+---------------+ + * | | region2(w) | 4. region2 is created. region1 must be invalidated and + * +-----+---------------+ kicked out of pagetable. Creation of region2 + * must fail. + */ +UCS_MT_TEST_F(test_rcache_no_register, merge_invalid_prot_slow, 5) +{ + static const size_t size1 = 10 * ucs_get_page_size(); + static const size_t size2 = 8 * ucs_get_page_size(); + int ret; + + void *mem = alloc_pages(size1+size2, PROT_READ|PROT_WRITE); + void *ptr1 = mem; + + m_fail_reg = false; + region *region1 = get(ptr1, size1, PROT_READ|PROT_WRITE); + EXPECT_EQ(PROT_READ|PROT_WRITE, region1->super.prot); + put(region1); + + void *ptr2 = (char*)mem+size1 - 1024 ; + ret = mprotect(mem, size1, PROT_READ); + ASSERT_EQ(0, ret) << strerror(errno); + + + ucs_status_t status; + ucs_rcache_region_t *r; + + status = ucs_rcache_get(m_rcache, ptr2, size2, PROT_WRITE, NULL, &r); + EXPECT_EQ(UCS_ERR_IO_ERROR, status); + + barrier(); + EXPECT_EQ(0u, m_reg_count); + + munmap(mem, size1+size2); +} + +#if ENABLE_STATS +class test_rcache_stats : public test_rcache { +protected: + + virtual void init() { + ucs_stats_cleanup(); + push_config(); + modify_config("STATS_DEST", "file:/dev/null"); + modify_config("STATS_TRIGGER", "exit"); + ucs_stats_init(); + ASSERT_TRUE(ucs_stats_is_active()); + test_rcache::init(); + } + + virtual void cleanup() { + test_rcache::cleanup(); + ucs_stats_cleanup(); + pop_config(); + ucs_stats_init(); + } + + int get_counter(int stat) { + return (int)UCS_STATS_GET_COUNTER(m_rcache.get()->stats, stat); + } + + /* a helper function for stats tests debugging */ + void dump_stats() { + printf("gets %d hf %d hs %d misses %d merges %d unmaps %d" + " unmaps_inv %d puts %d regs %d deregs %d\n", + get_counter(UCS_RCACHE_GETS), + get_counter(UCS_RCACHE_HITS_FAST), + get_counter(UCS_RCACHE_HITS_SLOW), + get_counter(UCS_RCACHE_MISSES), + get_counter(UCS_RCACHE_MERGES), + get_counter(UCS_RCACHE_UNMAPS), + get_counter(UCS_RCACHE_UNMAP_INVALIDATES), + get_counter(UCS_RCACHE_PUTS), + get_counter(UCS_RCACHE_REGS), + get_counter(UCS_RCACHE_DEREGS)); + } +}; + +UCS_TEST_F(test_rcache_stats, basic) { + static const size_t size = 4096; + void *ptr = malloc(size); + region *r1, *r2; + + r1 = get(ptr, size); + EXPECT_EQ(1, get_counter(UCS_RCACHE_GETS)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_MISSES)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_REGS)); + + r2 = get(ptr, size); + EXPECT_EQ(2, get_counter(UCS_RCACHE_GETS)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_HITS_FAST)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_MISSES)); + + put(r1); + EXPECT_EQ(2, get_counter(UCS_RCACHE_GETS)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_PUTS)); + + put(r2); + EXPECT_EQ(2, get_counter(UCS_RCACHE_GETS)); + EXPECT_EQ(2, get_counter(UCS_RCACHE_PUTS)); + + free(ptr); + EXPECT_EQ(2, get_counter(UCS_RCACHE_GETS)); + EXPECT_EQ(2, get_counter(UCS_RCACHE_PUTS)); + EXPECT_EQ(0, get_counter(UCS_RCACHE_DEREGS)); + EXPECT_EQ(0, get_counter(UCS_RCACHE_UNMAPS)); +} + +UCS_TEST_F(test_rcache_stats, unmap_dereg) { + static const size_t size1 = 1024 * 1024; + void *mem = alloc_pages(size1, PROT_READ|PROT_WRITE); + region *r1; + + r1 = get(mem, size1); + put(r1); + + /* Should generate umap event but no dereg or unmap invalidation. + * We can have more unmap events if releasing the region structure triggers + * releasing memory back to the OS. + */ + munmap(mem, size1); + EXPECT_GE(get_counter(UCS_RCACHE_UNMAPS), 1); + EXPECT_EQ(0, get_counter(UCS_RCACHE_UNMAP_INVALIDATES)); + EXPECT_EQ(0, get_counter(UCS_RCACHE_DEREGS)); + + mem = alloc_pages(size1, PROT_READ|PROT_WRITE); + + /* + * Adding a new region shall force a processing of invalidation queue and dereg + */ + r1 = get(mem, size1); + EXPECT_GE(get_counter(UCS_RCACHE_UNMAPS), 1); + EXPECT_EQ(1, get_counter(UCS_RCACHE_UNMAP_INVALIDATES)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_DEREGS)); + + /* cleanup */ + put(r1); + munmap(mem, size1); +} + +UCS_TEST_F(test_rcache_stats, merge) { + static const size_t size1 = 1024 * 1024; + void *mem = alloc_pages(size1, PROT_READ|PROT_WRITE); + region *r1, *r2; + + r1 = get(mem, 8192); + /* should trigger merge of the two regions */ + r2 = get((char *)mem + 4096, 8192); + EXPECT_EQ(1, get_counter(UCS_RCACHE_MERGES)); + + EXPECT_EQ(2, get_counter(UCS_RCACHE_GETS)); + EXPECT_EQ(2, get_counter(UCS_RCACHE_MISSES)); + + put(r1); + put(r2); + munmap(mem, size1); +} + +UCS_TEST_F(test_rcache_stats, hits_slow) { + static const size_t size1 = 1024 * 1024; + region *r1, *r2; + void *mem1, *mem2; + + mem1 = alloc_pages(size1, PROT_READ|PROT_WRITE); + r1 = get(mem1, size1); + put(r1); + + mem2 = alloc_pages(size1, PROT_READ|PROT_WRITE); + r1 = get(mem2, size1); + + /* generate unmap event */ + munmap(mem1, size1); + EXPECT_EQ(1, get_counter(UCS_RCACHE_UNMAPS)); + + EXPECT_EQ(2, get_counter(UCS_RCACHE_GETS)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_PUTS)); + EXPECT_EQ(2, get_counter(UCS_RCACHE_MISSES)); + EXPECT_EQ(0, get_counter(UCS_RCACHE_UNMAP_INVALIDATES)); + EXPECT_EQ(0, get_counter(UCS_RCACHE_DEREGS)); + /* it should produce a slow hit because there is + * a pending unmap event + */ + r2 = get(mem2, size1); + EXPECT_EQ(1, get_counter(UCS_RCACHE_HITS_SLOW)); + + EXPECT_EQ(3, get_counter(UCS_RCACHE_GETS)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_PUTS)); + EXPECT_EQ(2, get_counter(UCS_RCACHE_MISSES)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_UNMAPS)); + /* unmap event processed */ + EXPECT_EQ(1, get_counter(UCS_RCACHE_UNMAP_INVALIDATES)); + EXPECT_EQ(1, get_counter(UCS_RCACHE_DEREGS)); + + put(r1); + put(r2); + munmap(mem2, size1); +} +#endif diff --git a/test/gtest/ucs/test_sock.cc b/test/gtest/ucs/test_sock.cc new file mode 100644 index 0000000..fb5d6b0 --- /dev/null +++ b/test/gtest/ucs/test_sock.cc @@ -0,0 +1,422 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +} + +#include + + +static std::string socket_err_exp_str; + +class test_socket : public ucs::test { +public: +protected: + + static ucs_log_func_rc_t + socket_error_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + // Ignore errors that invalid input parameters as it is expected + if (level == UCS_LOG_LEVEL_ERROR) { + std::string err_str = format_message(message, ap); + + if (err_str.find(socket_err_exp_str) != std::string::npos) { + UCS_TEST_MESSAGE << err_str; + return UCS_LOG_FUNC_RC_STOP; + } + } + + return UCS_LOG_FUNC_RC_CONTINUE; + } +}; + +UCS_TEST_F(test_socket, sockaddr_sizeof) { + struct sockaddr_in sa_in; + struct sockaddr_in6 sa_in6; + struct sockaddr_un sa_un; + size_t size; + + sa_in.sin_family = AF_INET; + sa_in6.sin6_family = AF_INET6; + sa_un.sun_family = AF_UNIX; + + /* Check with wrong IPv4 */ + { + size = 0; + EXPECT_UCS_OK(ucs_sockaddr_sizeof((const struct sockaddr*)&sa_in, &size)); + EXPECT_EQ(sizeof(struct sockaddr_in), size); + } + + /* Check with wrong IPv6 */ + { + size = 0; + EXPECT_UCS_OK(ucs_sockaddr_sizeof((const struct sockaddr*)&sa_in6, &size)); + EXPECT_EQ(sizeof(struct sockaddr_in6), size); + } + + /* Check with wrong address family */ + { + socket_err_exp_str = "unknown address family:"; + scoped_log_handler log_handler(socket_error_handler); + + size = 0; + EXPECT_EQ(UCS_ERR_INVALID_PARAM, + ucs_sockaddr_sizeof((const struct sockaddr*)&sa_un, &size)); + /* Check that doesn't touch provided memory in error case */ + EXPECT_EQ(0ULL, size); + } +} + +UCS_TEST_F(test_socket, sockaddr_get_port) { + const uint16_t sin_port = 5555; + struct sockaddr_in sa_in; + struct sockaddr_in6 sa_in6; + struct sockaddr_un sa_un; + uint16_t port = 0; + + sa_in.sin_family = AF_INET; + sa_in.sin_port = htons(sin_port); + sa_in6.sin6_family = AF_INET6; + sa_in6.sin6_port = htons(sin_port); + sa_un.sun_family = AF_UNIX; + + /* Check with wrong IPv4 */ + { + port = 0; + EXPECT_UCS_OK(ucs_sockaddr_get_port((const struct sockaddr*)&sa_in, &port)); + EXPECT_EQ(sin_port, port); + } + + /* Check with wrong IPv6 */ + { + port = 0; + EXPECT_UCS_OK(ucs_sockaddr_get_port((const struct sockaddr*)&sa_in6, &port)); + EXPECT_EQ(sin_port, port); + } + + /* Check with wrong address family */ + { + socket_err_exp_str = "unknown address family:"; + scoped_log_handler log_handler(socket_error_handler); + + port = sin_port; + EXPECT_EQ(UCS_ERR_INVALID_PARAM, + ucs_sockaddr_get_port((const struct sockaddr*)&sa_un, &port)); + /* Check that doesn't touch provided memory in error case */ + EXPECT_EQ(sin_port, port); + } +} + +UCS_TEST_F(test_socket, sockaddr_get_inet_addr) { + struct sockaddr_in sa_in; + struct sockaddr_in6 sa_in6; + struct sockaddr_un sa_un; + struct in_addr sin_addr; + struct in6_addr sin6_addr; + + sa_in.sin_family = AF_INET; + sa_in6.sin6_family = AF_INET6; + sa_un.sun_family = AF_UNIX; + + sin_addr.s_addr = sa_in.sin_addr.s_addr = htonl(INADDR_ANY); + sin6_addr = sa_in6.sin6_addr = in6addr_any; + + /* Check with wrong IPv4 */ + { + EXPECT_EQ(&sa_in.sin_addr, + ucs_sockaddr_get_inet_addr((const struct sockaddr*)&sa_in)); + EXPECT_EQ(0, memcmp(&sa_in.sin_addr, &sin_addr, + sizeof(sa_in.sin_addr))); + } + + /* Check with wrong IPv6 */ + { + EXPECT_EQ(&sa_in6.sin6_addr, + ucs_sockaddr_get_inet_addr((const struct sockaddr*)&sa_in6)); + EXPECT_EQ(0, memcmp(&sa_in6.sin6_addr, &sin6_addr, + sizeof(sa_in6.sin6_addr))); + } + + /* Check with wrong address family */ + { + socket_err_exp_str = "unknown address family:"; + scoped_log_handler log_handler(socket_error_handler); + + EXPECT_EQ(NULL, ucs_sockaddr_get_inet_addr((const struct sockaddr*)&sa_un)); + } +} + +UCS_TEST_F(test_socket, sockaddr_str) { + const uint16_t port = 65534; + const char *ipv4_addr = "192.168.122.157"; + const char *ipv6_addr = "fe80::218:e7ff:fe16:fb97"; + struct sockaddr_in sa_in; + struct sockaddr_in6 sa_in6; + char ipv4_addr_out[128], ipv6_addr_out[128], *str, test_str[1024]; + + sa_in.sin_family = AF_INET; + sa_in.sin_port = htons(port); + sa_in6.sin6_family = AF_INET6; + sa_in6.sin6_port = htons(port); + + sprintf(ipv4_addr_out, "%s:%d", ipv4_addr, port); + sprintf(ipv6_addr_out, "%s:%d", ipv6_addr, port); + + inet_pton(AF_INET, ipv4_addr, &(sa_in.sin_addr)); + inet_pton(AF_INET6, ipv6_addr, &(sa_in6.sin6_addr)); + + /* Check with short `str_len` to fit IP address only */ + { + str = (char*)ucs_sockaddr_str((const struct sockaddr*)&sa_in, + test_str, INET_ADDRSTRLEN); + EXPECT_EQ(str, test_str); + EXPECT_EQ(0, strncmp(test_str, ipv4_addr_out, + INET_ADDRSTRLEN - 1)); + + str = (char*)ucs_sockaddr_str((const struct sockaddr*)&sa_in6, + test_str, INET6_ADDRSTRLEN); + EXPECT_EQ(str, test_str); + EXPECT_EQ(0, strncmp(test_str, ipv6_addr_out, + INET6_ADDRSTRLEN - 1)); + } + + /* Check with big enough `str_len` */ + { + str = (char*)ucs_sockaddr_str((const struct sockaddr*)&sa_in, + test_str, 1024); + EXPECT_EQ(str, test_str); + EXPECT_EQ(0, strcmp(test_str, ipv4_addr_out)); + + str = (char*)ucs_sockaddr_str((const struct sockaddr*)&sa_in6, + test_str, 1024); + EXPECT_TRUE(str == test_str); + EXPECT_EQ(0, strcmp(test_str, ipv6_addr_out)); + } + + /* Check with wrong sa_family */ + { + struct sockaddr_un sa_un; + sa_un.sun_family = AF_UNIX; + + /* with big enough string */ + { + str = (char*)ucs_sockaddr_str((const struct sockaddr*)&sa_un, + test_str, 1024); + EXPECT_EQ(test_str, str); + EXPECT_EQ(0, strcmp(str, "")); + } + + /* without string */ + { + str = (char*)ucs_sockaddr_str((const struct sockaddr*)&sa_un, + NULL, 0); + EXPECT_EQ(NULL, str); + } + } +} + +UCS_TEST_F(test_socket, socket_setopt) { + socklen_t optlen; + int optname; + int optval; + int level; + ucs_status_t status; + int fd; + + optlen = sizeof(optval); + + status = ucs_socket_create(AF_INET, SOCK_STREAM, &fd); + EXPECT_UCS_OK(status); + EXPECT_GE(fd, 0); + + /* with acceptable parameters */ + { + level = SOL_SOCKET; + optname = SO_REUSEADDR; + optval = 1; + + status = ucs_socket_setopt(fd, level, optname, &optval, optlen); + EXPECT_UCS_OK(status); + } + + /* with bad parameters */ + { + level = IPPROTO_TCP; + optname = SO_REUSEADDR; + optval = 1; + + socket_err_exp_str = "failed to set " + ucs::to_string(optname) + " option for " + + ucs::to_string(level) + " level on fd " + ucs::to_string(fd) + + + ": " + strerror(EINVAL); + scoped_log_handler log_handler(socket_error_handler); + status = ucs_socket_setopt(fd, level, optname, &optval, optlen); + EXPECT_EQ(status, UCS_ERR_IO_ERROR); + } + + close(fd); +} + +static void sockaddr_cmp_test(int sa_family, const char *ip_addr1, + const char *ip_addr2, unsigned port1, + unsigned port2, struct sockaddr *sa1, + struct sockaddr *sa2) +{ + int cmp_res1, cmp_res2; + ucs_status_t status; + + sa1->sa_family = sa_family; + sa2->sa_family = sa_family; + + inet_pton(sa_family, ip_addr1, + const_cast(ucs_sockaddr_get_inet_addr(sa1))); + inet_pton(sa_family, ip_addr2, + const_cast(ucs_sockaddr_get_inet_addr(sa2))); + + status = ucs_sockaddr_set_port(sa1, port1); + ASSERT_UCS_OK(status); + status = ucs_sockaddr_set_port(sa2, port2); + ASSERT_UCS_OK(status); + + const void *addr1 = ucs_sockaddr_get_inet_addr(sa1); + const void *addr2 = ucs_sockaddr_get_inet_addr(sa2); + + ASSERT_TRUE(addr1 != NULL); + ASSERT_TRUE(addr2 != NULL); + + size_t addr_size = ((sa_family == AF_INET) ? + sizeof(UCS_SOCKET_INET_ADDR(sa1)) : + sizeof(UCS_SOCKET_INET6_ADDR(sa1))); + + // `sa1` vs `sa2` + { + int addr_cmp_res = memcmp(addr1, addr2, addr_size); + int port_cmp_res = + (port1 == port2) ? 0 : ((port1 < port2) ? -1 : 1); + int expected_cmp_res = + addr_cmp_res ? addr_cmp_res : port_cmp_res; + + cmp_res1 = ucs_sockaddr_cmp(sa1, sa2, &status); + EXPECT_UCS_OK(status); + EXPECT_EQ(expected_cmp_res, cmp_res1); + + // Call w/o `status` provided + cmp_res2 = ucs_sockaddr_cmp(sa1, sa2, &status); + EXPECT_EQ(cmp_res1, cmp_res2); + } + + // `sa2` vs `sa1` + { + int addr_cmp_res = memcmp(addr2, addr1, addr_size); + int port_cmp_res = + (port2 == port1) ? 0 : ((port2 < port1) ? -1 : 1); + int expected_cmp_res = + addr_cmp_res ? addr_cmp_res : port_cmp_res; + + cmp_res1 = ucs_sockaddr_cmp(sa2, sa1, &status); + EXPECT_UCS_OK(status); + EXPECT_EQ(expected_cmp_res, cmp_res1); + + // Call w/o `status` provided + cmp_res2 = ucs_sockaddr_cmp(sa2, sa1, &status); + EXPECT_EQ(cmp_res1, cmp_res2); + } +} + +UCS_TEST_F(test_socket, sockaddr_cmp) { + const unsigned port1 = 65534; + const unsigned port2 = 65533; + const char *ipv4_addr1 = "192.168.122.157"; + const char *ipv4_addr2 = "192.168.123.157"; + const char *ipv6_addr1 = "fe80::218:e7ff:fe16:fb97"; + const char *ipv6_addr2 = "fe80::219:e7ff:fe16:fb97"; + struct sockaddr_in sa_in_1 = { 0 }; + struct sockaddr_in sa_in_2 = { 0 }; + struct sockaddr_in6 sa_in6_1 = { 0 }; + struct sockaddr_in6 sa_in6_2 = { 0 }; + + // Same addresses; same ports + sockaddr_cmp_test(AF_INET, ipv4_addr1, ipv4_addr1, + port1, port1, + (struct sockaddr*)&sa_in_1, + (struct sockaddr*)&sa_in_2); + sockaddr_cmp_test(AF_INET6, ipv6_addr1, ipv6_addr1, + port1, port1, + (struct sockaddr*)&sa_in6_1, + (struct sockaddr*)&sa_in6_2); + + // Same addresses; different ports + sockaddr_cmp_test(AF_INET, ipv4_addr1, ipv4_addr1, + port1, port2, + (struct sockaddr*)&sa_in_1, + (struct sockaddr*)&sa_in_2); + sockaddr_cmp_test(AF_INET6, ipv6_addr1, ipv6_addr1, + port1, port2, + (struct sockaddr*)&sa_in6_1, + (struct sockaddr*)&sa_in6_2); + + // Different addresses; same ports + sockaddr_cmp_test(AF_INET, ipv4_addr1, ipv4_addr2, + port1, port1, + (struct sockaddr*)&sa_in_1, + (struct sockaddr*)&sa_in_2); + sockaddr_cmp_test(AF_INET6, ipv6_addr1, ipv6_addr2, + port1, port1, + (struct sockaddr*)&sa_in6_1, + (struct sockaddr*)&sa_in6_2); + + // Different addresses; different ports + sockaddr_cmp_test(AF_INET, ipv4_addr1, ipv4_addr2, + port1, port2, + (struct sockaddr*)&sa_in_1, + (struct sockaddr*)&sa_in_2); + sockaddr_cmp_test(AF_INET6, ipv6_addr1, ipv6_addr2, + port1, port2, + (struct sockaddr*)&sa_in6_1, + (struct sockaddr*)&sa_in6_2); +} + +static void sockaddr_cmp_err_test(const struct sockaddr *sa1, + const struct sockaddr *sa2) +{ + ucs_status_t status; + int result; + + result = ucs_sockaddr_cmp((const struct sockaddr*)sa1, + (const struct sockaddr*)sa2, + &status); + EXPECT_EQ(UCS_ERR_INVALID_PARAM, status); + EXPECT_TRUE(result > 0); + + // Call w/o `status` provided + result = ucs_sockaddr_cmp((const struct sockaddr*)sa1, + (const struct sockaddr*)sa2, + NULL); + EXPECT_TRUE(result > 0); +} + +UCS_TEST_F(test_socket, sockaddr_cmp_err) { + // Check with wrong sa_family + struct sockaddr_un sa_un; + struct sockaddr_in sa_in; + + sa_un.sun_family = AF_UNIX; + sa_in.sin_family = AF_INET; + + socket_err_exp_str = "unknown address family: "; + scoped_log_handler log_handler(socket_error_handler); + + sockaddr_cmp_err_test((const struct sockaddr*)&sa_un, + (const struct sockaddr*)&sa_un); + + sockaddr_cmp_err_test((const struct sockaddr*)&sa_in, + (const struct sockaddr*)&sa_un); + + sockaddr_cmp_err_test((const struct sockaddr*)&sa_un, + (const struct sockaddr*)&sa_in); +} diff --git a/test/gtest/ucs/test_stats.cc b/test/gtest/ucs/test_stats.cc new file mode 100644 index 0000000..d29dfa0 --- /dev/null +++ b/test/gtest/ucs/test_stats.cc @@ -0,0 +1,338 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +} + +#include +#include + +#if ENABLE_STATS +#define NUM_DATA_NODES 20 + +class stats_test : public ucs::test { +public: + + stats_test() { + size_t size = sizeof(ucs_stats_class_t) + + NUM_COUNTERS * sizeof(m_data_stats_class->counter_names[0]); + m_data_stats_class = (ucs_stats_class_t*)malloc(size); + m_data_stats_class->name = "data"; + m_data_stats_class->num_counters = NUM_COUNTERS; + m_data_stats_class->counter_names[0] = "counter0"; + m_data_stats_class->counter_names[1] = "counter1"; + m_data_stats_class->counter_names[2] = "counter2"; + m_data_stats_class->counter_names[3] = "counter3"; + } + + ~stats_test() { + free(m_data_stats_class); + } + + virtual void init() { + ucs::test::init(); + ucs_stats_cleanup(); + push_config(); + modify_config("STATS_DEST", stats_dest_config().c_str()); + modify_config("STATS_TRIGGER", stats_trigger_config().c_str()); + ucs_stats_init(); + ASSERT_TRUE(ucs_stats_is_active()); + } + + virtual void cleanup() { + ucs_stats_cleanup(); + pop_config(); + ucs_stats_init(); + ucs::test::cleanup(); + } + + virtual std::string stats_dest_config() = 0; + virtual std::string stats_trigger_config() = 0; + + void prepare_nodes(ucs_stats_node_t **cat_node, + ucs_stats_node_t *data_nodes[NUM_DATA_NODES]) { + static ucs_stats_class_t category_stats_class = { + "category", 0 + }; + + ucs_status_t status = UCS_STATS_NODE_ALLOC(cat_node, + &category_stats_class, + ucs_stats_get_root()); + ASSERT_UCS_OK(status); + for (unsigned i = 0; i < NUM_DATA_NODES; ++i) { + status = UCS_STATS_NODE_ALLOC(&data_nodes[i], m_data_stats_class, + *cat_node, "-%d", i); + ASSERT_UCS_OK(status); + + UCS_STATS_UPDATE_COUNTER(data_nodes[i], 0, 10); + UCS_STATS_UPDATE_COUNTER(data_nodes[i], 1, 20); + UCS_STATS_UPDATE_COUNTER(data_nodes[i], 2, 30); + UCS_STATS_UPDATE_COUNTER(data_nodes[i], 3, 40); + } + + /* make sure our original node is ok */ + check_cat_node(*cat_node, data_nodes); + } + + void free_nodes(ucs_stats_node_t *cat_node, + ucs_stats_node_t *data_nodes[NUM_DATA_NODES]) { + for (unsigned i = 0; i < NUM_DATA_NODES; ++i) { + UCS_STATS_NODE_FREE(data_nodes[i]); + } + UCS_STATS_NODE_FREE(cat_node); + } + + void check_tree(ucs_stats_node_t *root, + ucs_stats_node_t *data_nodes[NUM_DATA_NODES]) { + EXPECT_EQ(1ul, ucs_list_length(&root->children[UCS_STATS_ACTIVE_CHILDREN])); + check_cat_node(ucs_list_head(&root->children[UCS_STATS_ACTIVE_CHILDREN], + ucs_stats_node_t, list), data_nodes); + } + + void check_cat_node(ucs_stats_node_t *cat_node, + ucs_stats_node_t *data_nodes[NUM_DATA_NODES]) { + EXPECT_EQ(std::string("category"), std::string(cat_node->cls->name)); + EXPECT_EQ((unsigned)0, cat_node->cls->num_counters); + + ucs_stats_node_t *data_node; + ucs_list_for_each(data_node, &cat_node->children[UCS_STATS_ACTIVE_CHILDREN], list) { + EXPECT_EQ(std::string("data"), std::string(data_node->cls->name)); + EXPECT_EQ(unsigned(NUM_COUNTERS), data_node->cls->num_counters); + EXPECT_EQ(std::string("counter0"), std::string(data_node->cls->counter_names[0])); + + EXPECT_EQ((unsigned)10, data_node->counters[0]); + EXPECT_EQ((unsigned)20, data_node->counters[1]); + EXPECT_EQ((unsigned)30, data_node->counters[2]); + EXPECT_EQ((unsigned)40, data_node->counters[3]); + } + } + +protected: + static const unsigned NUM_COUNTERS = 4; + + ucs_stats_class_t *m_data_stats_class; +}; + +class stats_udp_test : public stats_test { +public: + virtual void init() { + ucs_status_t status = ucs_stats_server_start(0, &m_server); + ASSERT_UCS_OK(status); + stats_test::init(); + } + + virtual void cleanup() { + stats_test::cleanup(); + ucs_stats_server_destroy(m_server); + } + + void wait_for_stats() { + do { + usleep(1000 * ucs::test_time_multiplier()); + } while (ucs_stats_server_rcvd_packets(m_server) == 0); + } + + virtual std::string stats_dest_config() { + int port = ucs_stats_server_get_port(m_server); + EXPECT_GT(port, 0); + return "udp:localhost:" + ucs::to_string(port); + } + + virtual std::string stats_trigger_config() { + return "timer:0.1s"; + } + + void read_and_check_stats(ucs_stats_node_t *data_nodes[NUM_DATA_NODES]) { + ucs_list_link_t *list = ucs_stats_server_get_stats(m_server); + ASSERT_EQ(1ul, ucs_list_length(list)); + check_tree(ucs_list_head(list, ucs_stats_node_t, list), data_nodes); + ucs_stats_server_purge_stats(m_server); + } + +protected: + ucs_stats_server_h m_server; +}; + +class stats_file_test : public stats_test { +public: + stats_file_test() { + m_pipefds[0] = -1; + m_pipefds[1] = -1; + } + + virtual void init() { + /* Note: this test assumes data <64k, o/w stats dump will block forever */ + int ret = pipe(m_pipefds); + ASSERT_EQ(0, ret); + stats_test::init(); + } + + void close_pipes() + { + close(m_pipefds[0]); + close(m_pipefds[1]); + m_pipefds[0] = -1; + m_pipefds[1] = -1; + } + + virtual void cleanup() { + stats_test::cleanup(); + close_pipes(); + } + + virtual std::string stats_dest_config() { + return "file:/dev/fd/" + ucs::to_string(m_pipefds[1]) + ":bin"; + } + + std::string get_data() { + std::string data(65536, '\0'); + ssize_t ret = read(m_pipefds[0], &data[0], data.size()); + EXPECT_GE(ret, 0); + data.resize(ret); + return data; + } + + virtual std::string stats_trigger_config() { + return ""; + } + +protected: + int m_pipefds[2]; +}; + +class stats_on_demand_test : public stats_udp_test { +public: + virtual std::string stats_trigger_config() { + return ""; + } +}; + +class stats_on_signal_test : public stats_udp_test { +public: + virtual std::string stats_trigger_config() { + return "signal:USR1"; + } +}; + +class stats_on_exit_test : public stats_file_test { +public: + virtual std::string stats_dest_config() { + return "file:/dev/fd/" + ucs::to_string(m_pipefds[1]); + } + + /* + * we check the dump-on-exit in cleanup method . + */ + virtual void cleanup() { + stats_test::cleanup(); + std::string data = get_data(); + size_t pos = 0; + for (unsigned i = 0; i < NUM_DATA_NODES; ++i) { + std::string node_name = " data-" + ucs::to_string(i) + ":"; + pos = data.find(node_name, pos); + EXPECT_NE(pos, std::string::npos) << node_name << " not found"; + for (unsigned j = 0; j < NUM_COUNTERS; ++j) { + std::string value = "counter" + + ucs::to_string(j) + + ": " + + ucs::to_string((j + 1) * 10); + pos = data.find(value, pos); + EXPECT_NE(pos, std::string::npos) << value << " not found"; + } + } + close_pipes(); + } + + virtual std::string stats_trigger_config() { + return "exit"; + } +}; + +UCS_TEST_F(stats_on_demand_test, null_root) { + ucs_stats_node_t *cat_node; + + static ucs_stats_class_t category_stats_class = { + "category", 0 + }; + ucs_status_t status = UCS_STATS_NODE_ALLOC(&cat_node, &category_stats_class, + NULL); + + EXPECT_GE(status, UCS_ERR_INVALID_PARAM); +} + +UCS_TEST_F(stats_udp_test, report) { + ucs_stats_node_t *cat_node; + ucs_stats_node_t *data_nodes[NUM_DATA_NODES] = {NULL}; + + prepare_nodes(&cat_node, data_nodes); + wait_for_stats(); + read_and_check_stats(data_nodes); + free_nodes(cat_node, data_nodes); +} + +UCS_TEST_F(stats_file_test, report) { + ucs_stats_node_t *cat_node; + ucs_stats_node_t *data_nodes[NUM_DATA_NODES] = {NULL}; + + prepare_nodes(&cat_node, data_nodes); + ucs_stats_dump(); + free_nodes(cat_node, data_nodes); + + std::string data = get_data(); + FILE *f = fmemopen(&data[0], data.size(), "rb"); + ucs_stats_node_t *root; + ucs_status_t status = ucs_stats_deserialize(f, &root); + ASSERT_UCS_OK(status); + fclose(f); + + check_tree(root, data_nodes); + ucs_stats_free(root); +} + +UCS_TEST_F(stats_on_demand_test, report) { + ucs_stats_node_t *cat_node; + ucs_stats_node_t *data_nodes[NUM_DATA_NODES] = {NULL}; + + prepare_nodes(&cat_node, data_nodes); + ucs_stats_dump(); + wait_for_stats(); + read_and_check_stats(data_nodes); + free_nodes(cat_node, data_nodes); +} + +UCS_TEST_F(stats_on_signal_test, report) { + ucs_stats_node_t *cat_node; + ucs_stats_node_t *data_nodes[NUM_DATA_NODES] = {NULL}; + + prepare_nodes(&cat_node, data_nodes); + kill(getpid(), SIGUSR1); + wait_for_stats(); + read_and_check_stats(data_nodes); + free_nodes(cat_node, data_nodes); +} + +UCS_TEST_F(stats_on_exit_test, dump) { + ucs_stats_node_t *cat_node; + ucs_stats_node_t *data_nodes[NUM_DATA_NODES] = {NULL}; + + prepare_nodes(&cat_node, data_nodes); + free_nodes(cat_node, data_nodes); +} + +UCS_MT_TEST_F(stats_file_test, mt_add_remove, 10) { + ucs_stats_node_t *cat_node; + ucs_stats_node_t *data_nodes[NUM_DATA_NODES] = {NULL}; + unsigned i; + + for (i = 0; i < 100; i++) { + prepare_nodes(&cat_node, data_nodes); + free_nodes(cat_node, data_nodes); + } +} + +#endif diff --git a/test/gtest/ucs/test_stats_filter.cc b/test/gtest/ucs/test_stats_filter.cc new file mode 100644 index 0000000..833412a --- /dev/null +++ b/test/gtest/ucs/test_stats_filter.cc @@ -0,0 +1,284 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2013. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +#include +#include +} + +#include +#include + +#if ENABLE_STATS + +class stats_filter_test : public ucs::test { +public: + + stats_filter_test() { + size_t size = sizeof(ucs_stats_class_t) + + NUM_COUNTERS * sizeof(m_data_stats_class->counter_names[0]); + m_data_stats_class = (ucs_stats_class_t*)malloc(size); + m_data_stats_class->name = "data"; + m_data_stats_class->num_counters = NUM_COUNTERS; + m_data_stats_class->counter_names[0] = "counter0"; + m_data_stats_class->counter_names[1] = "counter1"; + m_data_stats_class->counter_names[2] = "counter2"; + m_data_stats_class->counter_names[3] = "counter3"; + + cat_node = NULL; + data_nodes[0] = data_nodes[1] = data_nodes[2] = NULL; + } + + ~stats_filter_test() { + free(m_data_stats_class); + } + + virtual void init() { + ucs::test::init(); + ucs_stats_cleanup(); + push_config(); + modify_config("STATS_DEST", stats_dest_config().c_str()); + modify_config("STATS_TRIGGER", stats_trigger_config().c_str()); + modify_config("STATS_FORMAT", stats_format_config().c_str()); + ucs_stats_init(); + ASSERT_TRUE(ucs_stats_is_active()); + } + + virtual void cleanup() { + ucs_stats_cleanup(); + pop_config(); + ucs_stats_init(); + ucs::test::cleanup(); + } + + virtual std::string stats_dest_config() = 0; + virtual std::string stats_trigger_config() = 0; + virtual std::string stats_format_config() = 0; + + void prepare_nodes() { + static ucs_stats_class_t category_stats_class = { + "category", 0 + }; + + ucs_status_t status = UCS_STATS_NODE_ALLOC(&cat_node, &category_stats_class, + ucs_stats_get_root()); + ASSERT_UCS_OK(status); + for (unsigned i = 0; i < NUM_DATA_NODES; ++i) { + status = UCS_STATS_NODE_ALLOC(&data_nodes[i], m_data_stats_class, + cat_node, "-%d", i); + ASSERT_UCS_OK(status); + + UCS_STATS_UPDATE_COUNTER(data_nodes[i], 0, 10); + UCS_STATS_UPDATE_COUNTER(data_nodes[i], 1, 20); + UCS_STATS_UPDATE_COUNTER(data_nodes[i], 2, 30); + UCS_STATS_UPDATE_COUNTER(data_nodes[i], 3, 40); + } + } + + void free_nodes() { + for (unsigned i = 0; i < NUM_DATA_NODES; ++i) { + UCS_STATS_NODE_FREE(data_nodes[i]); + } + UCS_STATS_NODE_FREE(cat_node); + } + +protected: + static const unsigned NUM_DATA_NODES = 3; + static const unsigned NUM_COUNTERS = 4; + + ucs_stats_class_t *m_data_stats_class; + ucs_stats_node_t *cat_node; + ucs_stats_node_t *data_nodes[NUM_DATA_NODES]; +}; + + +class stats_filter_text_test : public stats_filter_test { +public: + stats_filter_text_test() { + m_pipefds[0] = -1; + m_pipefds[1] = -1; + } + + virtual void init() { + /* Note: this test assumes data <64k, o/w stats dump will block forever */ + int ret = pipe(m_pipefds); + ASSERT_EQ(0, ret); + modify_config("STATS_FILTER", "*counter*"); + stats_filter_test::init(); + } + + void close_pipes() + { + close(m_pipefds[0]); + close(m_pipefds[1]); + m_pipefds[0] = -1; + m_pipefds[1] = -1; + } + + virtual void cleanup() { + stats_filter_test::cleanup(); + close_pipes(); + } + + virtual std::string stats_dest_config() { + return "file:/dev/fd/" + ucs::to_string(m_pipefds[1]) + ""; + } + + std::string get_data() { + std::string data(65536, '\0'); + ssize_t ret = read(m_pipefds[0], &data[0], data.size()); + EXPECT_GE(ret, 0); + data.resize(ret); + return data; + } + + virtual std::string stats_trigger_config() { + return ""; + } + +protected: + int m_pipefds[2]; +}; + + +class stats_filter_report : public stats_filter_text_test { +public: + + virtual std::string stats_format_config() { + return "full"; + } + +}; + +class stats_filter_agg : public stats_filter_text_test { +public: + + virtual std::string stats_format_config() { + return "agg"; + } + +}; + +class stats_filter_summary : public stats_filter_text_test { +public: + + virtual std::string stats_format_config() { + return "summary"; + } + +}; + + +UCS_TEST_F(stats_filter_report, report) { + prepare_nodes(); + ucs_stats_dump(); + free_nodes(); + + std::string data = get_data(); + FILE *f = fmemopen(&data[0], data.size(), "rb"); + std::string output = ""; + char s[80]; + while (!feof(f)) { + int term = fread(&s, 1, sizeof(s) - 1, f); + if (term > 0) { + s[term]=0; + output += std::string(s); + } else { + break; + } + } + + std::string header = std::string(ucs_get_host_name()) + ":" + + ucs::to_string(getpid()); + + std::string compared_string = header.substr(0, UCS_STAT_NAME_MAX - 1) + ":" + + "\n category:\n" + + " data-0:\n" + + " counter0: 10\n" + + " counter1: 20\n" + + " counter2: 30\n" + + " counter3: 40\n" + + " data-1:\n" + + " counter0: 10\n" + + " counter1: 20\n" + + " counter2: 30\n" + + " counter3: 40\n" + + " data-2:\n" + + " counter0: 10\n" + + " counter1: 20\n" + + " counter2: 30\n" + + " counter3: 40\n\n"; + EXPECT_EQ(compared_string, output); + fclose(f); +} + +UCS_TEST_F(stats_filter_agg, report_agg) { + + prepare_nodes(); + ucs_stats_dump(); + free_nodes(); + + std::string data = get_data(); + FILE *f = fmemopen(&data[0], data.size(), "rb"); + std::string output = ""; + char s[80]; + while (!feof(f)) { + int term = fread(&s, 1, sizeof(s) - 1, f); + if (term > 0) { + s[term]=0; + output += std::string(s); + } else { + break; + } + } + + std::string header = std::string(ucs_get_host_name()) + ":" + + ucs::to_string(getpid()); + + std::string compared_string = header.substr(0, UCS_STAT_NAME_MAX - 1) + + ":" + + "\n category:\n" + " data*:\n" + " counter0: 30\n" + " counter1: 60\n" + " counter2: 90\n" + " counter3: 120\n\n"; + EXPECT_EQ(compared_string, output); + fclose(f); +} + +UCS_TEST_F(stats_filter_summary, summary) { + prepare_nodes(); + ucs_stats_dump(); + free_nodes(); + + std::string data = get_data(); + FILE *f = fmemopen(&data[0], data.size(), "rb"); + std::string output = ""; + char s[80]; + while (!feof(f)) { + int term = fread(&s, 1, sizeof(s) - 1, f); + if (term > 0) { + s[term]=0; + output += std::string(s); + } else { + break; + } + } + + std::string node_name = std::string(ucs_get_host_name()) + ":" + + ucs::to_string(getpid()); + node_name.resize(std::min(node_name.length(), UCS_STAT_NAME_MAX - 1)); + std::string compared_string = node_name + + ":data*:{counter0:30 counter1:60 " + + "counter2:90 counter3:120} \n"; + EXPECT_EQ(compared_string, output); + fclose(f); +} + +#endif diff --git a/test/gtest/ucs/test_strided_alloc.cc b/test/gtest/ucs/test_strided_alloc.cc new file mode 100644 index 0000000..e309b5a --- /dev/null +++ b/test/gtest/ucs/test_strided_alloc.cc @@ -0,0 +1,63 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +} + +#include +#include +#include + +class test_strided_alloc : public ucs::test { +protected: + static const size_t area_size = 64; + static const unsigned num_areas = 3; +}; + + +UCS_TEST_F(test_strided_alloc, basic) { + + ucs_strided_alloc_t sa; + + ucs_strided_alloc_init(&sa, area_size, num_areas); + + std::vector objs; + for (size_t i = 0; i < 2; ++i) { + /* allocate */ + void *base = ucs_strided_alloc_get(&sa, "test"); + + for (unsigned j = 0; j < num_areas; ++j) { + void *area = ucs_strided_elem_get(base, 0, j); + memset(area, i*j, area_size); + } + + /* save in a vector */ + objs.push_back(base); + } + + /* check data integrity */ + char buf[area_size]; + for (size_t i = 0; i < objs.size(); ++i) { + void *base = objs[i]; + + for (unsigned j = 0; j < num_areas; ++j) { + void *area = ucs_strided_elem_get(base, 0, j); + memset(buf, i*j, area_size); + EXPECT_EQ(0, memcmp(area, buf, area_size)); + } + } + + /* release */ + while (!objs.empty()) { + void *base = objs.back(); + objs.pop_back(); + ucs_strided_alloc_put(&sa, base); + } + + ucs_strided_alloc_cleanup(&sa); +} diff --git a/test/gtest/ucs/test_string.cc b/test/gtest/ucs/test_string.cc new file mode 100644 index 0000000..ec34073 --- /dev/null +++ b/test/gtest/ucs/test_string.cc @@ -0,0 +1,87 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +#include +#include +} + +class test_string : public ucs::test { +}; + +UCS_TEST_F(test_string, trim) { + char str1[] = " foo "; + EXPECT_EQ("foo", std::string(ucs_strtrim(str1))); + + char str2[] = " foo foo "; + EXPECT_EQ("foo foo", std::string(ucs_strtrim(str2))); +} + +class test_string_buffer : public ucs::test { +}; + + +UCS_TEST_F(test_string_buffer, appendf) { + ucs_string_buffer_t strb; + + ucs_string_buffer_init(&strb); + + ucs_string_buffer_appendf(&strb, "%s", "We,"); + ucs_string_buffer_appendf(&strb, "%s", " Created,"); + ucs_string_buffer_appendf(&strb, "%s-%s", " The", "Monster"); + + EXPECT_EQ("We, Created, The-Monster", + std::string(ucs_string_buffer_cstr(&strb))); + + ucs_string_buffer_cleanup(&strb); +} + +UCS_TEST_F(test_string_buffer, rtrim) { + static const char *test_string = "wabbalubbadabdab"; + ucs_string_buffer_t strb; + + ucs_string_buffer_init(&strb); + ucs_string_buffer_appendf(&strb, "%s%s", test_string, ",,"); + ucs_string_buffer_rtrim(&strb, ","); + EXPECT_EQ(std::string(test_string), ucs_string_buffer_cstr(&strb)); + ucs_string_buffer_cleanup(&strb); + + ucs_string_buffer_init(&strb); + ucs_string_buffer_appendf(&strb, "%s%s", test_string, " \t \n \r "); + ucs_string_buffer_rtrim(&strb, NULL); + EXPECT_EQ(std::string(test_string), ucs_string_buffer_cstr(&strb)); + ucs_string_buffer_cleanup(&strb); +} + +class test_string_set : public ucs::test { +}; + +UCS_TEST_F(test_string_set, add) { + ucs_string_set_t sset; + + ucs_string_set_init(&sset); + + ucs_string_set_add(&sset, "We"); + ucs_string_set_addf(&sset, "%s", "Created"); + ucs_string_set_addf(&sset, "%s-%s", "The", "Monster"); + + EXPECT_TRUE (ucs_string_set_contains(&sset, "We")); + EXPECT_FALSE(ucs_string_set_contains(&sset, "Created ")); + EXPECT_TRUE (ucs_string_set_contains(&sset, "Created")); + + ucs_string_buffer_t strb; + ucs_string_buffer_init(&strb); + ucs_string_set_print_sorted(&sset, &strb, ","); + + EXPECT_EQ("Created,The-Monster,We", + std::string(ucs_string_buffer_cstr(&strb))); + + ucs_string_buffer_cleanup(&strb); + + ucs_string_set_cleanup(&sset); +} diff --git a/test/gtest/ucs/test_sys.cc b/test/gtest/ucs/test_sys.cc new file mode 100644 index 0000000..386f469 --- /dev/null +++ b/test/gtest/ucs/test_sys.cc @@ -0,0 +1,158 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +#include +#include +#include +#include +#include +} + +#include +#include + +class test_sys : public ucs::test { +protected: + static int get_mem_prot(void *address, size_t size) { + return ucs_get_mem_prot((uintptr_t)address, (uintptr_t)address + size); + } + + void test_memunits(size_t size, const char *expected) { + char buf[256]; + + ucs_memunits_to_str(size, buf, sizeof(buf)); + EXPECT_EQ(std::string(expected), buf); + } + + static void check_cache_type(ucs_cpu_cache_type_t type, const char *name) + { + size_t cache; + char memunits[32]; + + cache = ucs_cpu_get_cache_size(type); + + ucs_memunits_to_str(cache, memunits, sizeof(memunits)); + UCS_TEST_MESSAGE << name << " cache: " << memunits; + } +}; + +UCS_TEST_F(test_sys, uuid) { + std::set uuids; + for (unsigned i = 0; i < 10000; ++i) { + uint64_t uuid = ucs_generate_uuid(0); + std::pair::iterator, bool> ret = uuids.insert(uuid); + ASSERT_TRUE(ret.second); + } +} + +UCS_TEST_F(test_sys, machine_guid) { + uint64_t guid1 = ucs_machine_guid(); + uint64_t guid2 = ucs_machine_guid(); + EXPECT_EQ(guid1, guid2); +} + +UCS_TEST_F(test_sys, spinlock) { + ucs_spinlock_t lock; + pthread_t self; + + self = pthread_self(); + + ucs_spinlock_init(&lock); + + ucs_spin_lock(&lock); + EXPECT_TRUE(ucs_spin_is_owner(&lock, self)); + + /* coverity[double_lock] */ + ucs_spin_lock(&lock); + EXPECT_TRUE(ucs_spin_is_owner(&lock, self)); + + ucs_spin_unlock(&lock); + EXPECT_TRUE(ucs_spin_is_owner(&lock, self)); + + /* coverity[double_unlock] */ + ucs_spin_unlock(&lock); + EXPECT_FALSE(ucs_spin_is_owner(&lock, self)); +} + +UCS_TEST_F(test_sys, get_mem_prot) { + int x; + + ASSERT_TRUE( get_mem_prot(&x, sizeof(x)) & PROT_READ ); + ASSERT_TRUE( get_mem_prot(&x, sizeof(x)) & PROT_WRITE ); + ASSERT_TRUE( get_mem_prot((void*)&get_mem_prot, 1) & PROT_EXEC ); + + ucs_time_t start_time = ucs_get_time(); + get_mem_prot(&x, sizeof(x)); + ucs_time_t duration = ucs_get_time() - start_time; + UCS_TEST_MESSAGE << "Time: " << ucs_time_to_usec(duration) << " us"; +} + +UCS_TEST_F(test_sys, fcntl) { + ucs_status_t status; + int fd, fl; + + fd = open("/dev/null", O_RDONLY); + if (fd < 0) { + FAIL(); + } + + /* Add */ + status = ucs_sys_fcntl_modfl(fd, O_NONBLOCK, 0); + EXPECT_TRUE(status == UCS_OK); + + fl = fcntl(fd, F_GETFL); + EXPECT_GE(fl, 0); + EXPECT_TRUE(fl & O_NONBLOCK); + + /* Remove */ + status = ucs_sys_fcntl_modfl(fd, 0, O_NONBLOCK); + EXPECT_TRUE(status == UCS_OK); + + fl = fcntl(fd, F_GETFL); + EXPECT_GE(fl, 0); + EXPECT_FALSE(fl & O_NONBLOCK); + + close(fd); +} + +UCS_TEST_F(test_sys, memory) { + size_t phys_size = ucs_get_phys_mem_size(); + UCS_TEST_MESSAGE << "Physical memory size: " << ucs::size_value(phys_size); + EXPECT_GT(phys_size, 1ul * 1024 * 1024); +} + +extern "C" { +int test_module_loaded = 0; +} + +UCS_TEST_F(test_sys, module) { + UCS_MODULE_FRAMEWORK_DECLARE(test); + + EXPECT_EQ(0, test_module_loaded); + UCS_MODULE_FRAMEWORK_LOAD(test, 0); + EXPECT_EQ(1, test_module_loaded); +} + +UCS_TEST_F(test_sys, memunits_to_str) { + test_memunits(256, "256"); + test_memunits(1256, "1256"); + test_memunits(UCS_KBYTE, "1K"); + test_memunits(UCS_MBYTE + UCS_KBYTE, "1025K"); + test_memunits(UCS_GBYTE, "1G"); + test_memunits(2 * UCS_GBYTE, "2G"); + test_memunits(UCS_TBYTE, "1T"); + test_memunits(UCS_TBYTE * 1024, "1024T"); +} + +UCS_TEST_F(test_sys, cpu_cache) { + check_cache_type(UCS_CPU_CACHE_L1d, "L1d"); + check_cache_type(UCS_CPU_CACHE_L1i, "L1i"); + check_cache_type(UCS_CPU_CACHE_L2, "L2"); + check_cache_type(UCS_CPU_CACHE_L3, "L3"); +} diff --git a/test/gtest/ucs/test_time.cc b/test/gtest/ucs/test_time.cc new file mode 100644 index 0000000..904b36c --- /dev/null +++ b/test/gtest/ucs/test_time.cc @@ -0,0 +1,145 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +} + +#include + +class test_time : public ucs::test { +}; + +UCS_TEST_F(test_time, time_calc) { + double value = ucs::rand() % UCS_USEC_PER_SEC; + + EXPECT_NEAR(value * 1000, ucs_time_to_msec(ucs_time_from_sec (value)), 0.000001); + EXPECT_NEAR(value * 1000, ucs_time_to_usec(ucs_time_from_msec(value)), 0.001); + EXPECT_NEAR(value * 1000, ucs_time_to_nsec(ucs_time_from_usec(value)), 10.0); +} + +/* This test is only useful when used with high-precision timers */ +#if HAVE_HW_TIMER +UCS_TEST_SKIP_COND_F(test_time, get_time, + (ucs::test_time_multiplier() > 1)) { + ucs_time_t time1 = ucs_get_time(); + ucs_time_t time2 = ucs_get_time(); + EXPECT_GE(time2, time1); + + ucs_time_t start_time = ucs_get_time(); + ucs_time_t end_time = start_time + ucs_time_from_sec(1); + ucs_time_t current_time; + + time_t system_start_time = time(NULL); + + uint64_t count = 0; + do { + current_time = ucs_get_time(); + ++count; + } while (current_time <= end_time); + + /* Check the sleep interval is correct */ + if (ucs::perf_retry_count) { + ASSERT_NEAR(1.0, time(NULL) - system_start_time, 1.00001); + + double nsec = (ucs_time_to_nsec(current_time - start_time)) / count; + EXPECT_LT(nsec, 40.0) << "ucs_get_time() performance is too bad"; + } +} +#endif + +UCS_TEST_F(test_time, timerq) { + static const int TIMER_ID_1 = 100; + static const int TIMER_ID_2 = 200; + + ucs_timer_queue_t timerq; + ucs_status_t status; + + for (unsigned test_count = 0; test_count < 500; ++test_count) { + + const ucs_time_t interval1 = (ucs::rand() % 20) + 1; + const ucs_time_t interval2 = (ucs::rand() % 20) + 1; + const ucs_time_t test_time = ucs::rand() % 10000; + const ucs_time_t time_base = ucs::rand(); + ucs_timer_t *timer; + unsigned counter1, counter2; + + status = ucs_timerq_init(&timerq); + ASSERT_UCS_OK(status); + + EXPECT_TRUE(ucs_timerq_is_empty(&timerq)); + EXPECT_EQ(UCS_TIME_INFINITY, ucs_timerq_min_interval(&timerq)); + + ucs_time_t current_time = time_base; + + ucs_timerq_add(&timerq, TIMER_ID_1, interval1); + ucs_timerq_add(&timerq, TIMER_ID_2, interval2); + + EXPECT_FALSE(ucs_timerq_is_empty(&timerq)); + EXPECT_EQ(std::min(interval1, interval2), ucs_timerq_min_interval(&timerq)); + + /* + * Check that both timers are invoked + */ + counter1 = 0; + counter2 = 0; + for (unsigned count = 0; count < test_time; ++count) { + ++current_time; + ucs_timerq_for_each_expired(timer, &timerq, current_time, { + if (timer->id == TIMER_ID_1) ++counter1; + if (timer->id == TIMER_ID_2) ++counter2; + }) + } + EXPECT_NEAR(test_time / interval1, counter1, 1); + EXPECT_NEAR(test_time / interval2, counter2, 1); + + /* + * Check that after canceling, only one timer is invoked + */ + counter1 = 0; + counter2 = 0; + status = ucs_timerq_remove(&timerq, TIMER_ID_1); + ASSERT_UCS_OK(status); + for (unsigned count = 0; count < test_time; ++count) { + ++current_time; + ucs_timerq_for_each_expired(timer, &timerq, current_time, { + if (timer->id == TIMER_ID_1) ++counter1; + if (timer->id == TIMER_ID_2) ++counter2; + }) + } + EXPECT_EQ(0u, counter1); + EXPECT_NEAR(test_time / interval2, counter2, 1); + EXPECT_EQ(interval2, ucs_timerq_min_interval(&timerq)); + + /* + * Check that after rescheduling, both timers are invoked again + */ + ucs_timerq_add(&timerq, TIMER_ID_1, interval1); + + counter1 = 0; + counter2 = 0; + for (unsigned count = 0; count < test_time; ++count) { + ++current_time; + ucs_timerq_for_each_expired(timer, &timerq, current_time, { + if (timer->id == TIMER_ID_1) ++counter1; + if (timer->id == TIMER_ID_2) ++counter2; + }) + } + EXPECT_NEAR(test_time / interval1, counter1, 1); + EXPECT_NEAR(test_time / interval2, counter2, 1); + + status = ucs_timerq_remove(&timerq, TIMER_ID_1); + ASSERT_UCS_OK(status); + status = ucs_timerq_remove(&timerq, TIMER_ID_2); + ASSERT_UCS_OK(status); + + ucs_timerq_cleanup(&timerq); + } +} + + diff --git a/test/gtest/ucs/test_twheel.cc b/test/gtest/ucs/test_twheel.cc new file mode 100644 index 0000000..3c8b89a --- /dev/null +++ b/test/gtest/ucs/test_twheel.cc @@ -0,0 +1,241 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +} + +#include + +/** + * note: the fast timer precision is dependent on context switch latency!!! + * expected timer precision 2x wheel resolution plus some time for processing and + * context switching + */ +class twheel : public ucs::test { +protected: + + struct hr_timer { + ucs_wtimer_t timer; + int tid; + ucs_time_t start_time; + ucs_time_t end_time; + ucs_time_t d; + ucs_time_t total_time; + twheel *self; + }; + + ucs_twheel_t m_wheel; + + // @override + virtual void init(); + + // @override + virtual void cleanup(); + + static void timer_func(ucs_wtimer_t *self); + void timer_expired(struct hr_timer *t); + void add_timer(struct hr_timer *t); + void init_timer(struct hr_timer *t, int id); + void init_timerv(struct hr_timer *v, int n); + void set_timer_delta(struct hr_timer *t, int how); +}; + +void twheel::init() +{ + ucs_twheel_init(&m_wheel, ucs_time_from_usec(32) * ucs::test_time_multiplier(), + ucs_get_time()); +} + +void twheel::cleanup() +{ + ucs_twheel_cleanup(&m_wheel); +} + +void twheel::timer_func(ucs_wtimer_t *self) +{ + struct hr_timer *t = ucs_container_of(self, struct hr_timer, timer); + t->self->timer_expired(t); +} + +void twheel::timer_expired(struct hr_timer *t) +{ + t->total_time += (m_wheel.now - t->start_time); + t->end_time = m_wheel.now; +} + +void twheel::add_timer(struct hr_timer *t) +{ + t->end_time = 0; + ASSERT_EQ(ucs_wtimer_add(&m_wheel, &t->timer, t->d), UCS_OK); + t->start_time = ucs_get_time(); +} + +void twheel::init_timer(struct hr_timer *t, int id) +{ + t->tid = id; + t->total_time = 0; + t->self = this; + ucs_wtimer_init(&t->timer, timer_func); +} + +void twheel::init_timerv(struct hr_timer *v, int n) +{ + for (int i = 0; i < n; i++) { + init_timer(&v[i], i); + } +} + +void twheel::set_timer_delta(struct hr_timer *t, int how) +{ + int slot; + + switch (how) { + case 0: + /* first */ + slot = 1; + break; + case 1: + /* last */ + slot = m_wheel.num_slots - 1; + break; + case 2: + /* middle */ + slot = m_wheel.num_slots / 2; + break; + case -2: + /* overflow */ + slot = m_wheel.num_slots + (ucs::rand() % 1000000); + break; + default: + slot = 1 + ucs::rand() % (m_wheel.num_slots - 2); + break; + } + + if (how == -2) { + t->d = m_wheel.res + m_wheel.res * (m_wheel.num_slots - 1) / 2; + } else { + t->d = m_wheel.res + m_wheel.res * slot / 2; + } +} + +#define N_LOOPS 20 + +UCS_TEST_SKIP_COND_F(twheel, precision_single, true) { + // Test is broken +#if 0 + struct hr_timer t; + ucs_time_t now; + int i, k; + int fail_count; + + init_timer(&t, 0); + for (k = 0; k < 10; k++ ) { + set_timer_delta(&t, k); + fail_count = 0; + for (i = 0; i < N_LOOPS; i++) { + t.total_time = 0; + add_timer(&t); + do { + now = ucs_get_time(); + ucs_twheel_sweep(&m_wheel, now); + } while (t.end_time == 0); + + if ((ucs_time_t)::abs(t.total_time - t.d) > 2 * m_wheel.res) { + ++fail_count; + } + } + EXPECT_LE(fail_count, N_LOOPS / 3); + } +#endif +} + +#define N_TIMERS 10000 + +UCS_TEST_SKIP_COND_F(twheel, precision_multi, true) { + // Test is broken +#if 0 + std::vector t(N_TIMERS); + ucs_time_t start, now, eps; + init_timerv(&t[0], N_TIMERS); + for (int i = 0; i < N_TIMERS; i++) { + set_timer_delta(&t[i], i); + add_timer(&t[i]); + } + + start = ucs_get_time(); + /* all timers were delayed by at most eps */ + eps = start - m_wheel.now; + do { + now = ucs_get_time(); + ucs_twheel_sweep(&m_wheel, now); + } while (now < start + m_wheel.res * m_wheel.num_slots); + + /* all timers should ve been triggered + * correct delta + */ + for (int i = 0; i < N_TIMERS; i++) { + EXPECT_NE(t[i].end_time, (ucs_time_t)0); + EXPECT_NEAR(t[i].total_time, t[i].d, 2 * m_wheel.res + eps); + } +#endif +} + +UCS_TEST_F(twheel, add_twice) { + struct hr_timer t; + + init_timer(&t, 0); + + set_timer_delta(&t, -1); + add_timer(&t); + + set_timer_delta(&t, -1); + EXPECT_EQ(ucs_wtimer_add(&m_wheel, &t.timer, t.d), UCS_ERR_BUSY); + do { + ucs_twheel_sweep(&m_wheel, ucs_get_time()); + /* coverity[loop_condition] */ + } while(t.end_time == 0); +} + + +UCS_TEST_SKIP_COND_F(twheel, add_overflow, true) { + // Test is broken +#if 0 + struct hr_timer t; + init_timer(&t, 0); + + t.total_time = 0; + set_timer_delta(&t, -2); + for (int i = 0; i < N_LOOPS; i++) { + add_timer(&t); + do { + ucs_twheel_sweep(&m_wheel, ucs_get_time()); + } while (t.end_time == 0); + } + EXPECT_NEAR(t.total_time , t.d * N_LOOPS, 4 * N_LOOPS * m_wheel.res); +#endif +} + +UCS_TEST_F(twheel, delayed_sweep) { + std::vector t(N_TIMERS); + + init_timerv(&t[0], N_TIMERS); + for (int i = 0; i < N_TIMERS; i++) { + set_timer_delta(&t[i], i); + add_timer(&t[i]); + } + + sleep(1); + + ucs_twheel_sweep(&m_wheel, ucs_get_time()); + + /* all timers should have been triggered */ + for (int i = 0; i < N_TIMERS; i++) { + EXPECT_NE(t[i].end_time, (ucs_time_t)0); + } +} + diff --git a/test/gtest/ucs/test_type.cc b/test/gtest/ucs/test_type.cc new file mode 100644 index 0000000..333fea4 --- /dev/null +++ b/test/gtest/ucs/test_type.cc @@ -0,0 +1,75 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +extern "C" { +#include +#include +#include +} + +#include + +class test_type : public ucs::test { +}; + +UCS_TEST_F(test_type, cpu_set) { + ucs_cpu_set_t cpu_mask; + + UCS_CPU_ZERO(&cpu_mask); + EXPECT_FALSE(ucs_cpu_is_set(117, &cpu_mask)); + EXPECT_FALSE(ucs_cpu_is_set(127, &cpu_mask)); + EXPECT_EQ(0, ucs_cpu_set_find_lcs(&cpu_mask)); + + UCS_CPU_SET(127, &cpu_mask); + UCS_CPU_SET(117, &cpu_mask); + EXPECT_TRUE(ucs_cpu_is_set(117, &cpu_mask)); + EXPECT_TRUE(ucs_cpu_is_set(127, &cpu_mask)); + EXPECT_EQ(117, ucs_cpu_set_find_lcs(&cpu_mask)); + + UCS_CPU_CLR(117, &cpu_mask); + EXPECT_FALSE(ucs_cpu_is_set(117, &cpu_mask)); + EXPECT_TRUE(ucs_cpu_is_set(127, &cpu_mask)); + EXPECT_EQ(127, ucs_cpu_set_find_lcs(&cpu_mask)); + + UCS_CPU_CLR(127, &cpu_mask); + EXPECT_FALSE(ucs_cpu_is_set(117, &cpu_mask)); + EXPECT_FALSE(ucs_cpu_is_set(127, &cpu_mask)); + EXPECT_EQ(0, ucs_cpu_set_find_lcs(&cpu_mask)); +} + +UCS_TEST_F(test_type, status) { + void *ptr = (void*)0xff00000000ul; + EXPECT_TRUE(UCS_PTR_IS_PTR(ptr)); + EXPECT_FALSE(UCS_PTR_IS_PTR(NULL)); + EXPECT_NE(UCS_OK, UCS_PTR_STATUS(ptr)); +} + +class test_init_once: public test_type { +protected: + test_init_once() : m_once(INIT_ONCE_INIT), m_count(0) {}; + + /* counter is not atomic, we expect the lock of init_once will protect it */ + ucs_init_once_t m_once; + int m_count; + +private: + static const ucs_init_once_t INIT_ONCE_INIT; +}; + +const ucs_init_once_t test_init_once::INIT_ONCE_INIT = UCS_INIT_ONCE_INITIALIZER; + +UCS_MT_TEST_F(test_init_once, init_once, 10) { + + for (int i = 0; i < 100; ++i) { + UCS_INIT_ONCE(&m_once) { + ++m_count; + } + } + + EXPECT_EQ(1, m_count); +} + diff --git a/test/gtest/uct/ib/test_cq_moderation.cc b/test/gtest/uct/ib/test_cq_moderation.cc new file mode 100644 index 0000000..aa916cd --- /dev/null +++ b/test/gtest/uct/ib/test_cq_moderation.cc @@ -0,0 +1,180 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +extern "C" { +#include +} +#include +#include +#include +#include + +/* wait for 1 sec to get statistics */ +static const unsigned long test_period_usec = (1ul * UCS_USEC_PER_SEC); +static const unsigned moderation_period_usec = 1000; /* usecs */ +/* use multiplier 2 because we have same iface to send/recv which may produce 2x events */ +static const unsigned event_limit = (2 * test_period_usec / moderation_period_usec); +static const unsigned max_repeats = 60; /* max 3 minutes per test */ + +class test_uct_cq_moderation : public uct_test { +protected: + + void init() { + if (RUNNING_ON_VALGRIND) { + UCS_TEST_SKIP_R("skipping on valgrind"); + } + + if (!has_rc() && !has_ud()) { + UCS_TEST_SKIP_R("unsupported"); + } + + uct_test::init(); + + if (has_rc()) { + set_config("RC_FC_ENABLE=n"); + } + + set_config(std::string("IB_TX_EVENT_MOD_PERIOD=") + ucs::to_string(moderation_period_usec) + "us"); + set_config(std::string("IB_RX_EVENT_MOD_PERIOD=") + ucs::to_string(moderation_period_usec) + "us"); + + m_sender = uct_test::create_entity(0); + m_entities.push_back(m_sender); + + check_skip_test(); + + m_receiver = uct_test::create_entity(0); + m_entities.push_back(m_receiver); + } + + void connect() { + m_sender->connect(0, *m_receiver, 0); + short_progress_loop(10); /* Some transports need time to become ready */ + } + + void disconnect() { + flush(); + if (m_receiver->iface_attr().cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { + m_receiver->destroy_ep(0); + } + m_sender->destroy_ep(0); + } + + void iface_arm(uct_iface_h iface) { + struct pollfd pfd; + int fd; + + /* wait for all messages are arrived */ + while (m_recv < m_send) { + progress(); + } + + uct_iface_event_fd_get(iface, &fd); + + pfd.fd = fd; + pfd.events = POLLIN; + + do { + /* arm all event types */ + while (1) { + if (uct_iface_event_arm(iface, + UCT_EVENT_SEND_COMP | + UCT_EVENT_RECV | + UCT_EVENT_RECV_SIG) != UCS_ERR_BUSY) { + break; + } + progress(); + } + /* repeat till FD is in active state */ + } while (poll(&pfd, 1, 0) > 0); + } + + static ucs_status_t am_cb(void *arg, void *data, size_t len, unsigned flags) { + ucs_assert_always(arg != NULL); + test_uct_cq_moderation *self = static_cast(arg); + + self->m_recv++; + + return UCS_OK; + } + + void run_test(uct_iface_h iface); + + entity * m_sender; + entity * m_receiver; + + unsigned m_send; + unsigned m_recv; +}; + +void test_uct_cq_moderation::run_test(uct_iface_h iface) { + unsigned events; + int fd; + unsigned i; + int polled; + struct pollfd pfd; + ucs_status_t status; + + uct_iface_set_am_handler(m_receiver->iface(), 0, am_cb, this, 0); + + connect(); + + m_send = 0; + m_recv = 0; + + uct_iface_event_fd_get(iface, &fd); + pfd.fd = fd; + pfd.events = POLLIN; + + /* repeat test till at least one iteration is successful + * to exclude random fluctuations */ + for (i = 0; i < max_repeats; i++) { + events = 0; + iface_arm(iface); + + ucs_time_t tm = ucs_get_time(); + + while ((ucs_time_to_usec(ucs_get_time()) - ucs_time_to_usec(tm)) < test_period_usec) { + polled = poll(&pfd, 1, 0); + if (polled > 0) { + events++; + iface_arm(iface); + } + + do { + status = uct_ep_am_short(m_sender->ep(0), 0, 0, NULL, 0); + progress(); + } while (status == UCS_ERR_NO_RESOURCE); + m_send++; + ASSERT_UCS_OK(status); + } + m_sender->flush(); + UCS_TEST_MESSAGE << "iteration: " << i + 1 << ", events: " << events + << ", limit: " << event_limit; + if (events <= event_limit) { + break; + } + } + + disconnect(); + + EXPECT_LE(events, event_limit); +} + +UCS_TEST_SKIP_COND_P(test_uct_cq_moderation, send_period, + !check_caps(UCT_IFACE_FLAG_EVENT_SEND_COMP | + UCT_IFACE_FLAG_EVENT_RECV)) { + run_test(m_sender->iface()); +} + +UCS_TEST_SKIP_COND_P(test_uct_cq_moderation, recv_period, + !check_caps(UCT_IFACE_FLAG_EVENT_SEND_COMP | + UCT_IFACE_FLAG_EVENT_RECV)) { + run_test(m_receiver->iface()); +} + +#if HAVE_DECL_IBV_EXP_CQ_MODERATION +UCT_INSTANTIATE_IB_TEST_CASE(test_uct_cq_moderation) +#endif /* HAVE_DECL_IBV_EXP_CQ_MODERATION */ diff --git a/test/gtest/uct/ib/test_dc.cc b/test/gtest/uct/ib/test_dc.cc new file mode 100644 index 0000000..c4690c6 --- /dev/null +++ b/test/gtest/uct/ib/test_dc.cc @@ -0,0 +1,748 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2016. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016.All rights reserved. +* See file LICENSE for terms. +*/ + +#include "test_rc.h" + +#include +#include + +extern "C" { +#include +#include +#include +} + + +#define UCT_DC_INSTANTIATE_TEST_CASE(_test_case) \ + _UCT_INSTANTIATE_TEST_CASE(_test_case, dc_mlx5) + + +class test_dc : public test_rc { +public: + virtual void init() { + uct_test::init(); + + m_e1 = uct_test::create_entity(0); + m_entities.push_back(m_e1); + + m_e2 = uct_test::create_entity(0); + m_entities.push_back(m_e2); + + uct_iface_set_am_handler(m_e1->iface(), 0, am_dummy_handler, NULL, 0); + uct_iface_set_am_handler(m_e2->iface(), 0, am_dummy_handler, NULL, 0); + } + + entity* create_rand_entity() { + if (UCS_OK != uct_config_modify(m_iface_config, "DC_TX_POLICY", "rand")) { + UCS_TEST_ABORT("Error: cannot enable random DCI policy"); + } + entity *rand_e = uct_test::create_entity(0); + m_entities.push_back(rand_e); + return rand_e; + } + + static uct_dc_mlx5_iface_t* dc_iface(entity *e) { + return ucs_derived_of(e->iface(), uct_dc_mlx5_iface_t); + } + + static uct_dc_mlx5_ep_t* dc_ep(entity *e, int idx) { + return ucs_derived_of(e->ep(idx), uct_dc_mlx5_ep_t); + } + + virtual void cleanup() { + uct_test::cleanup(); + } + + static int n_warnings; + + static ucs_log_func_rc_t + log_ep_destroy(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + if (level != UCS_LOG_LEVEL_WARN) { + /* debug messages are ignored */ + return UCS_LOG_FUNC_RC_CONTINUE; + } + if (strcmp("ep (%p) is destroyed with %d outstanding ops", message) == 0) { + n_warnings++; + } + return UCS_LOG_FUNC_RC_STOP; + } + +protected: + static uint32_t m_am_rx_count; + static int m_purge_count; + + struct dcs_comp { + uct_completion_t uct_comp; + entity *e; + } comp; + + static void uct_comp_cb(uct_completion_t *uct_comp, ucs_status_t status) + { + struct dcs_comp *comp = (struct dcs_comp *)uct_comp; + uct_dc_mlx5_ep_t *ep; + + EXPECT_UCS_OK(status); + + ep = dc_ep(comp->e, 0); + /* dci must be released before completion cb is called */ + EXPECT_EQ(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + comp->e->destroy_eps(); + } + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags) + { + ++m_am_rx_count; + return UCS_OK; + } + + static ucs_status_t am_dummy_handler(void *arg, void *data, size_t length, + unsigned flags) { + return UCS_OK; + } + + struct dcs_pending { + uct_pending_req_t uct_req; + entity *e; + int is_done; + } preq; + + static ucs_status_t uct_pending_flush(uct_pending_req_t *uct_req) + { + struct dcs_pending *preq = (struct dcs_pending *)uct_req; + ucs_status_t status; + uct_dc_mlx5_ep_t *ep; + + ep = dc_ep(preq->e, 0); + EXPECT_NE(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + + status = uct_ep_flush(preq->e->ep(0), 0, NULL); + if (status == UCS_OK) { + preq->is_done = 1; + } + return status; + } + + static ucs_status_t uct_pending_dummy(uct_pending_req_t *uct_req) + { + struct dcs_pending *preq = (struct dcs_pending *)uct_req; + uct_dc_mlx5_ep_t *ep; + + ep = dc_ep(preq->e, 0); + + EXPECT_NE(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + + /* simulate arbiter stop because lack of global resorce + * operation still stands on pending + */ + preq->is_done = 1; + return UCS_INPROGRESS; + } + + static void purge_cb(uct_pending_req_t *uct_req, void *arg) + { + struct dcs_pending *preq = (struct dcs_pending *)uct_req; + + EXPECT_NE(UCT_DC_MLX5_EP_NO_DCI, dc_ep(preq->e, 0)->dci); + } + + static void purge_count_cb(uct_pending_req_t *uct_req, void *arg) + { + ++m_purge_count; + } + +}; + +int test_dc::n_warnings = 0; +int test_dc::m_purge_count = 0; +uint32_t test_dc::m_am_rx_count = 0; + +UCS_TEST_P(test_dc, dcs_single) { + ucs_status_t status; + uct_dc_mlx5_ep_t *ep; + uct_dc_mlx5_iface_t *iface; + + m_e1->connect_to_iface(0, *m_e2); + ep = dc_ep(m_e1, 0); + iface = dc_iface(m_e1); + EXPECT_EQ(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + status = uct_ep_am_short(m_e1->ep(0), 0, 0, NULL, 0); + EXPECT_UCS_OK(status); + /* dci 0 must be assigned to the ep */ + EXPECT_EQ(iface->tx.dcis_stack[0], ep->dci); + EXPECT_EQ(1, iface->tx.stack_top); + EXPECT_EQ(ep, iface->tx.dcis[ep->dci].ep); + + flush(); + + /* after the flush dci must be released */ + EXPECT_EQ(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + EXPECT_EQ(0, iface->tx.stack_top); + EXPECT_EQ(0, iface->tx.dcis_stack[0]); +} + +UCS_TEST_P(test_dc, dcs_multi) { + ucs_status_t status; + uct_dc_mlx5_ep_t *ep; + uct_dc_mlx5_iface_t *iface; + unsigned i; + + iface = dc_iface(m_e1); + for (i = 0; i <= iface->tx.ndci; i++) { + m_e1->connect_to_iface(i, *m_e2); + } + + for (i = 0; i < iface->tx.ndci; i++) { + ep = dc_ep(m_e1, i); + EXPECT_EQ(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + status = uct_ep_am_short(m_e1->ep(i), 0, 0, NULL, 0); + EXPECT_UCS_OK(status); + + /* dci on free LIFO must be assigned to the ep */ + EXPECT_EQ(iface->tx.dcis_stack[i], ep->dci); + EXPECT_EQ(i+1, iface->tx.stack_top); + EXPECT_EQ(ep, iface->tx.dcis[ep->dci].ep); + } + + /* this should fail because there are no free dci */ + status = uct_ep_am_short(m_e1->ep(i), 0, 0, NULL, 0); + EXPECT_EQ(UCS_ERR_NO_RESOURCE, status); + + flush(); + + /* after the flush dci must be released */ + + EXPECT_EQ(0, iface->tx.stack_top); + for (i = 0; i < iface->tx.ndci; i++) { + ep = dc_ep(m_e1, i); + EXPECT_EQ(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + } +} + +/** + * send message, destroy ep while it is still holding dci. + * Do not crash. + */ +UCS_TEST_P(test_dc, dcs_ep_destroy) { + + ucs_status_t status; + uct_dc_mlx5_ep_t *ep; + uct_dc_mlx5_iface_t *iface; + + + ucs_log_push_handler(log_ep_destroy); + UCS_TEST_SCOPE_EXIT() { ucs_log_pop_handler(); } UCS_TEST_SCOPE_EXIT_END + + m_e1->connect_to_iface(0, *m_e2); + ep = dc_ep(m_e1, 0); + iface = dc_iface(m_e1); + n_warnings = 0; + EXPECT_EQ(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + status = uct_ep_am_short(m_e1->ep(0), 0, 0, NULL, 0); + EXPECT_UCS_OK(status); + /* dci 0 must be assigned to the ep */ + EXPECT_EQ(iface->tx.dcis_stack[0], ep->dci); + EXPECT_EQ(1, iface->tx.stack_top); + EXPECT_EQ(ep, iface->tx.dcis[ep->dci].ep); + + m_e1->destroy_eps(); + EXPECT_EQ(1, iface->tx.stack_top); + + flush(); + EXPECT_EQ(0, iface->tx.stack_top); +} + +/** + * destroy ep from the flush completion. It may not work in general but + * it must work with dc ep + */ +UCS_TEST_P(test_dc, dcs_ep_flush_destroy) { + + ucs_status_t status; + uct_dc_mlx5_ep_t *ep; + uct_dc_mlx5_iface_t *iface; + + m_e1->connect_to_iface(0, *m_e2); + ep = dc_ep(m_e1, 0); + iface = dc_iface(m_e1); + EXPECT_EQ(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + status = uct_ep_am_short(m_e1->ep(0), 0, 0, NULL, 0); + EXPECT_UCS_OK(status); + + EXPECT_EQ(iface->tx.dcis_stack[0], ep->dci); + EXPECT_EQ(1, iface->tx.stack_top); + EXPECT_EQ(ep, iface->tx.dcis[ep->dci].ep); + + comp.uct_comp.count = 1; + comp.uct_comp.func = uct_comp_cb; + comp.e = m_e1; + + status = uct_ep_flush(m_e1->ep(0), 0, &comp.uct_comp); + do { + progress(); + } while (comp.uct_comp.count > 0); + + EXPECT_EQ(0, iface->tx.stack_top); +} + +UCS_TEST_P(test_dc, dcs_ep_flush_pending, "DC_NUM_DCI=1") { + ucs_status_t status; + uct_dc_mlx5_iface_t *iface; + + m_e1->connect_to_iface(0, *m_e2); + m_e1->connect_to_iface(1, *m_e2); + + iface = dc_iface(m_e1); + + /* shorten test time by reducing dci QP resources */ + iface->tx.dcis[0].txqp.available = 8; + do { + status = uct_ep_am_short(m_e1->ep(1), 0, 0, NULL, 0); + } while (status == UCS_OK); + + EXPECT_EQ(UCS_ERR_NO_RESOURCE, status); + + /* flush another ep. Flush fails because there is no free dci */ + status = uct_ep_flush(m_e1->ep(0), 0, NULL); + EXPECT_EQ(UCS_ERR_NO_RESOURCE, status); + + /* put flush op on pending */ + preq.is_done = 0; + preq.e = m_e1; + preq.uct_req.func = uct_pending_flush; + status = uct_ep_pending_add(m_e1->ep(0), &preq.uct_req, 0); + EXPECT_UCS_OK(status); + + status = uct_ep_am_short(m_e1->ep(0), 0, 0, NULL, 0); + EXPECT_EQ(UCS_ERR_NO_RESOURCE, status); + + /* progress till ep is flushed */ + do { + progress(); + } while (!preq.is_done); + + /* flush the other active ep */ + flush(); + + status = uct_ep_am_short(m_e1->ep(0), 0, 0, NULL, 0); + EXPECT_EQ(UCS_OK, status); + flush(); + + /* check that ep does not hold dci */ + EXPECT_EQ(0, iface->tx.stack_top); +} + +/* check that ep does not hold dci after purge + */ +UCS_TEST_P(test_dc, dcs_ep_purge_pending, "DC_NUM_DCI=1") { + + ucs_status_t status; + uct_dc_mlx5_iface_t *iface; + uct_dc_mlx5_ep_t *ep; + + m_e1->connect_to_iface(0, *m_e2); + + iface = dc_iface(m_e1); + ep = dc_ep(m_e1, 0); + iface->tx.dcis[0].txqp.available = 8; + + do { + status = uct_ep_am_short(m_e1->ep(0), 0, 0, NULL, 0); + } while (status == UCS_OK); + + EXPECT_EQ(UCS_ERR_NO_RESOURCE, status); + + status = uct_ep_flush(m_e1->ep(0), 0, NULL); + EXPECT_EQ(UCS_ERR_NO_RESOURCE, status); + + /* put flush op on pending */ + preq.is_done = 0; + preq.e = m_e1; + preq.uct_req.func = uct_pending_flush; + status = uct_ep_pending_add(m_e1->ep(0), &preq.uct_req, 0); + EXPECT_UCS_OK(status); + + uct_ep_pending_purge(m_e1->ep(0), purge_cb, NULL); + flush(); + EXPECT_EQ(UCT_DC_MLX5_EP_NO_DCI, ep->dci); +} + +UCS_TEST_P(test_dc, rand_dci_many_eps) { + uct_dc_mlx5_ep_t *ep; + + m_am_rx_count = 0; + entity *rand_e = create_rand_entity(); + uct_iface_set_am_handler(m_e2->iface(), 0, am_handler, NULL, 0); + + uct_dc_mlx5_iface_t *iface = dc_iface(rand_e); + int num_eps = 2 * iface->tx.ndci; + + /* Create more eps than we have dcis, all eps should have a valid dci */ + for (int i = 0; i < num_eps; i++) { + rand_e->connect_to_iface(i, *m_e2); + ep = dc_ep(rand_e, i); + EXPECT_NE(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + } + + /* Try to send on all eps (taking into account available resources) */ + uint32_t num_sends = num_eps; + + for (unsigned i = 0; i < num_sends; i++) { + ucs_status_t status = uct_ep_am_short(rand_e->ep(i), 0, 0, NULL, 0); + EXPECT_UCS_OK(status); + } + wait_for_value(&m_am_rx_count, num_sends, true); + + EXPECT_EQ(m_am_rx_count, num_sends); + + flush(); +} + +UCS_TEST_P(test_dc, rand_dci_pending_purge) { + entity *rand_e = create_rand_entity(); + uct_dc_mlx5_iface_t *iface = dc_iface(rand_e); + int num_eps = 5; + int ndci = iface->tx.ndci; + int num_reqs = 10; + int idx = 0; + uct_pending_req_t preq[num_eps * num_reqs * ndci]; + int dci_id; + uct_dc_mlx5_ep_t *ep; + + for (dci_id = 0; dci_id < ndci; ++dci_id) { + for (int i = 0; i < num_eps; i++) { + int ep_id = i + dci_id*ndci; + rand_e->connect_to_iface(ep_id, *m_e2); + ep = dc_ep(rand_e, ep_id); + EXPECT_NE(UCT_DC_MLX5_EP_NO_DCI, ep->dci); + int available = iface->tx.dcis[ep->dci].txqp.available; + iface->tx.dcis[ep->dci].txqp.available = 0; + for (int j = 0; j < num_reqs; ++j, ++idx) { + preq[idx].func = NULL; + ASSERT_UCS_OK(uct_ep_pending_add(rand_e->ep(ep_id), + &preq[idx], 0)); + } + iface->tx.dcis[ep->dci].txqp.available = available; + } + } + + for (dci_id = 0; dci_id < ndci; ++dci_id) { + for (int i = 0; i < num_eps; i++) { + m_purge_count = 0; + uct_ep_pending_purge(rand_e->ep(i + dci_id*ndci), + purge_count_cb, NULL); + EXPECT_EQ(num_reqs, m_purge_count); + } + } + + flush(); +} + +UCS_TEST_SKIP_COND_P(test_dc, stress_iface_ops, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY), "DC_NUM_DCI=1") { + + test_iface_ops(dc_iface(m_e1)->tx.dcis[0].txqp.available); +} + +UCT_DC_INSTANTIATE_TEST_CASE(test_dc) + + +class test_dc_flow_control : public test_rc_flow_control { +public: + + /* virtual */ + uct_rc_fc_t* get_fc_ptr(entity *e, int ep_idx = 0) { + return &ucs_derived_of(e->ep(ep_idx), uct_dc_mlx5_ep_t)->fc; + } + + virtual void disable_entity(entity *e) { + uct_dc_mlx5_iface_t *iface = ucs_derived_of(e->iface(), + uct_dc_mlx5_iface_t); + + for (int i = 0; i < iface->tx.ndci; ++i) { + uct_rc_txqp_available_set(&iface->tx.dcis[i].txqp, 0); + } + iface->tx.stack_top = iface->tx.ndci; + } + + virtual void enable_entity(entity *e, unsigned cq_num = 128) { + uct_dc_mlx5_iface_t *iface = ucs_derived_of(e->iface(), + uct_dc_mlx5_iface_t); + + for (int i = 0; i < iface->tx.ndci; ++i) { + uct_rc_txqp_available_set(&iface->tx.dcis[i].txqp, + iface->tx.dcis[i].txwq.bb_max); + } + iface->tx.stack_top = 0; + } +}; + +UCS_TEST_P(test_dc_flow_control, general_enabled) +{ + /* Do not set FC hard thresh bigger than 1, because DC decreases + * the window by one when it sends fc grant request. So some checks + * may fail if threshold is bigger than 1. */ + test_general(8, 4, 1, true); +} + +UCS_TEST_P(test_dc_flow_control, general_disabled) +{ + test_general(8, 4, 1, false); +} + +UCS_TEST_P(test_dc_flow_control, pending_grant) +{ + test_pending_grant(5); + flush(); +} + +UCS_TEST_P(test_dc_flow_control, fc_disabled_flush) +{ + test_flush_fc_disabled(); +} + +UCS_TEST_P(test_dc_flow_control, fc_disabled_pending_no_dci) { + + pending_send_request_t pending_req; + pending_req.uct.func = pending_cb; + pending_req.cb_count = 0; + + set_fc_disabled(m_e1); + + /* Send on new endpoints until out of DCIs */ + for (int ep_index = 0; ep_index < 20; ++ep_index) { + m_e1->connect(ep_index, *m_e2, ep_index); + + ucs_status_t status = uct_ep_am_short(m_e1->ep(ep_index), 0, 0, NULL, 0); + if (status == UCS_ERR_NO_RESOURCE) { + /* if FC is disabled, it should be OK to set fc_wnd to 0 */ + get_fc_ptr(m_e1, ep_index)->fc_wnd = 0; + + /* Add to pending */ + status = uct_ep_pending_add(m_e1->ep(ep_index), &pending_req.uct, 0); + ASSERT_UCS_OK(status); + + wait_for_flag(&pending_req.cb_count); + EXPECT_EQ(1, pending_req.cb_count); + break; + } + + ASSERT_UCS_OK(status); + } +} + +/* Check that soft request is not handled by DC */ +UCS_TEST_P(test_dc_flow_control, soft_request) +{ + int wnd = 8; + int s_thresh = 4; + int h_thresh = 1; + + set_fc_attributes(m_e1, true, wnd, s_thresh, h_thresh); + + send_am_and_flush(m_e1, wnd - (s_thresh - 1)); + + set_tx_moderation(m_e2, 0); + send_am_and_flush(m_e2, 1); + + /* Check that window is not updated */ + EXPECT_EQ(get_fc_ptr(m_e1)->fc_wnd, s_thresh - 1); +} + +/* Check that: + * 1) flush returns UCS_OK even if there is an outgoing grant request + * 2) No crash when grant for destroyed ep arrives */ +UCS_TEST_P(test_dc_flow_control, flush_destroy) +{ + int wnd = 5; + ucs_status_t status; + + disable_entity(m_e2); + + set_fc_attributes(m_e1, true, wnd, + ucs_max((int)(wnd*0.5), 1), + ucs_max((int)(wnd*0.25), 1)); + + send_am_and_flush(m_e1, wnd); + + /* At this point m_e1 sent grant request to m_e2, m_e2 received all + * messages and added grant to m_e1 to pending queue + * (because it does not have tx resources yet) */ + + /* Invoke flush in a loop, because some send completions may not be polled yet */ + ucs_time_t timeout = ucs_get_time() + ucs_time_from_sec(DEFAULT_TIMEOUT_SEC); + do { + short_progress_loop(); + status = uct_ep_flush(m_e1->ep(0), 0, NULL); + } while (((status == UCS_ERR_NO_RESOURCE) || (status == UCS_INPROGRESS)) && + (ucs_get_time() < timeout)); + ASSERT_UCS_OK(status); + + m_e1->destroy_eps(); + + /* Enable send capabilities of m_e2 and send AM message to force pending queue + * dispatch. Thus, pending grant will be sent to m_e1. There should not be + * any warning/error and/or crash. */ + enable_entity(m_e2); + set_tx_moderation(m_e2, 0); + send_am_and_flush(m_e2, 1); +} + +/* Check that there is no dci leak when just one (out of several) ep gets + * grant. The leak can happen if some other ep has not got grant yet, but + * is scheduled for dci allocation. */ +UCS_TEST_P(test_dc_flow_control, dci_leak) +{ + disable_entity(m_e2); + int wnd = 5; + set_fc_attributes(m_e1, true, wnd, + ucs_max((int)(wnd*0.5), 1), + ucs_max((int)(wnd*0.25), 1)); + send_am_messages(m_e1, wnd, UCS_OK); + send_am_messages(m_e1, 1, UCS_ERR_NO_RESOURCE); + uct_pending_req_t req; + req.func = reinterpret_cast + (ucs_empty_function_return_no_resource); + EXPECT_UCS_OK(uct_ep_pending_add(m_e1->ep(0), &req, 0)); + + /* Make sure that ep does not hold dci when sends completed */ + uct_dc_mlx5_iface_t *iface = ucs_derived_of(m_e1->iface(), uct_dc_mlx5_iface_t); + ucs_time_t deadline = ucs::get_deadline(); + while (iface->tx.stack_top && (ucs_get_time() < deadline)) { + progress(); + } + EXPECT_EQ(0, iface->tx.stack_top); + + /* Clean up FC and pending to avoid assetions during tear down */ + uct_ep_pending_purge(m_e1->ep(0), + reinterpret_cast (ucs_empty_function), + NULL); + enable_entity(m_e2); + set_tx_moderation(m_e2, 0); + send_am_messages(m_e2, 1, UCS_OK); + validate_grant(m_e1); +} + +UCT_DC_INSTANTIATE_TEST_CASE(test_dc_flow_control) + + +class test_dc_fc_deadlock : public test_dc_flow_control { +public: + test_dc_fc_deadlock() { + modify_config("IB_TX_QUEUE_LEN", "8"); + modify_config("RC_FC_WND_SIZE", "128"); + modify_config("DC_TX_POLICY", "rand"); + } + +protected: + struct dc_pending { + uct_pending_req_t uct_req; + entity *e; + }; + + static ucs_status_t am_pending(uct_pending_req_t *req) { + struct dc_pending *pr = reinterpret_cast(req); + return uct_ep_am_short(pr->e->ep(0), 0, 0, NULL, 0); + } +}; + +UCS_TEST_P(test_dc_fc_deadlock, basic, "DC_NUM_DCI=1") +{ + // Send to m_e2 until dci resources are exhausted. + // Also set FC window to 0 emulating lack of all TX resources + ucs_status_t status; + do { + status = uct_ep_am_short(m_e1->ep(0), 0, 0, NULL, 0); + } while (status == UCS_OK); + send_am_messages(m_e1, 1, UCS_ERR_NO_RESOURCE); + get_fc_ptr(m_e1)->fc_wnd = 0; + + // Add am send to pending + struct dc_pending preq; + preq.e = m_e1; + preq.uct_req.func = am_pending; + EXPECT_UCS_OK(uct_ep_pending_add(m_e1->ep(0), &preq.uct_req, 0)); + + // Send whole FC window to m_e1, which will force sending grant request. + // This grant request will be added to pending on m_e1, because it has no + // resources. + int wnd = 5; + set_fc_attributes(m_e2, true, wnd, + ucs_max((int)(wnd*0.5), 1), + ucs_max((int)(wnd*0.25), 1)); + send_am_and_flush(m_e2, wnd); + + // Now, make sure that m_e1 will send grant to m_e2 even though FC window + // is still empty (dci resources will be restored during progression). + // If grant was not sent, this would be a deadlock situation due to lack + // of FC resources. + validate_grant(m_e2); + + // Restore m_e1 for proper cleanup + ucs_derived_of(m_e1->iface(), uct_dc_mlx5_iface_t)->tx.fc_grants = 0; + uct_ep_pending_purge(m_e1->ep(0), NULL, NULL); +} + +UCT_DC_INSTANTIATE_TEST_CASE(test_dc_fc_deadlock) + + +#if ENABLE_STATS + +class test_dc_flow_control_stats : public test_rc_flow_control_stats { +public: + /* virtual */ + void init() { + if (UCS_OK != uct_config_modify(m_iface_config, "RC_FC_ENABLE", "y")) { + UCS_TEST_ABORT("Error: cannot enable flow control"); + } + test_rc_flow_control_stats::init(); + } + + uct_rc_fc_t* get_fc_ptr(entity *e, int ep_idx = 0) { + return &ucs_derived_of(e->ep(ep_idx), uct_dc_mlx5_ep_t)->fc; + } + + uct_rc_fc_t* fake_ep_fc_ptr(entity *e) { + return &ucs_derived_of(e->iface(), uct_dc_mlx5_iface_t)->tx.fc_ep->fc; + } +}; + +UCS_TEST_P(test_dc_flow_control_stats, general) +{ + test_general(5, 2, 1); +} + +UCS_TEST_P(test_dc_flow_control_stats, fc_ep) +{ + uint64_t v; + int wnd = 5; + + set_fc_attributes(m_e1, true, wnd, + ucs_max((int)(wnd*0.5), 1), + ucs_max((int)(wnd*0.25), 1)); + + send_am_messages(m_e1, wnd, UCS_OK); + validate_grant(m_e1); + + v = UCS_STATS_GET_COUNTER(get_fc_ptr(m_e1)->stats, UCT_RC_FC_STAT_TX_HARD_REQ); + EXPECT_EQ(1ul, v); + v = UCS_STATS_GET_COUNTER(fake_ep_fc_ptr(m_e2)->stats, UCT_RC_FC_STAT_RX_HARD_REQ); + EXPECT_EQ(1ul, v); + + v = UCS_STATS_GET_COUNTER(get_fc_ptr(m_e1)->stats, UCT_RC_FC_STAT_RX_PURE_GRANT); + EXPECT_EQ(1ul, v); + v = UCS_STATS_GET_COUNTER(fake_ep_fc_ptr(m_e2)->stats, UCT_RC_FC_STAT_TX_PURE_GRANT); + EXPECT_EQ(1ul, v); + flush(); +} + + +UCT_DC_INSTANTIATE_TEST_CASE(test_dc_flow_control_stats) + +#endif diff --git a/test/gtest/uct/ib/test_devx.cc b/test/gtest/uct/ib/test_devx.cc new file mode 100644 index 0000000..09ba4cb --- /dev/null +++ b/test/gtest/uct/ib/test_devx.cc @@ -0,0 +1,63 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +extern "C" { +#include +} +#include +#include +#include + +class test_devx : public uct_test { +public: + entity* m_e; + + void init() { + uct_test::init(); + + m_e = create_entity(0); + m_entities.push_back(m_e); + + if (!(md()->super.dev.flags & UCT_IB_DEVICE_FLAG_MLX5_PRM && + md()->flags & UCT_IB_MLX5_MD_FLAG_DEVX)) { + std::stringstream ss; + ss << "DEVX is not supported by " << GetParam(); + UCS_TEST_SKIP_R(ss.str()); + } + } + + uct_ib_mlx5_md_t *md() { + return ucs_derived_of(m_e->md(), uct_ib_mlx5_md_t); + } + + uct_priv_worker_t *worker() { + return ucs_derived_of(m_e->worker(), uct_priv_worker_t); + } +}; + +UCS_TEST_P(test_devx, dbrec) +{ + uct_ib_mlx5_dbrec_t *dbrec; + + dbrec = (uct_ib_mlx5_dbrec_t *)ucs_mpool_get_inline(&md()->dbrec_pool); + ASSERT_FALSE(dbrec == NULL); + ucs_mpool_put_inline(dbrec); +} + +UCS_TEST_P(test_devx, uar) +{ + uct_ib_mlx5_txwq_t txwq; + ucs_status_t status; + + status = uct_ib_mlx5_txwq_init_devx(worker(), md(), &txwq, + UCT_IB_MLX5_MMIO_MODE_BF_POST); + ASSERT_UCS_OK(status); + ASSERT_TRUE(txwq.super.type == UCT_IB_MLX5_OBJ_TYPE_DEVX); + uct_ib_mlx5_txwq_cleanup(&txwq); +} + +UCT_INSTANTIATE_IB_TEST_CASE(test_devx); diff --git a/test/gtest/uct/ib/test_ib.cc b/test/gtest/uct/ib/test_ib.cc new file mode 100644 index 0000000..515ea8d --- /dev/null +++ b/test/gtest/uct/ib/test_ib.cc @@ -0,0 +1,601 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include + +test_uct_ib::test_uct_ib() : m_e1(NULL), m_e2(NULL) { } + +void test_uct_ib::create_connected_entities() { + m_e1 = uct_test::create_entity(0); + m_e2 = uct_test::create_entity(0); + + m_entities.push_back(m_e1); + m_entities.push_back(m_e2); + + m_e1->connect(0, *m_e2, 0); + m_e2->connect(0, *m_e1, 0); +} + +void test_uct_ib::init() { + uct_test::init(); + create_connected_entities(); + test_uct_ib::m_ib_am_handler_counter = 0; +} + +ucs_status_t test_uct_ib::ib_am_handler(void *arg, void *data, + size_t length, unsigned flags) { + recv_desc_t *my_desc = (recv_desc_t *) arg; + uint64_t *test_ib_hdr = (uint64_t *) data; + uint64_t *actual_data = (uint64_t *) test_ib_hdr + 1; + unsigned data_length = length - sizeof(test_ib_hdr); + + my_desc->length = data_length; + if (*test_ib_hdr == 0xbeef) { + memcpy(my_desc + 1, actual_data , data_length); + } + ++test_uct_ib::m_ib_am_handler_counter; + return UCS_OK; +} + +void test_uct_ib::send_recv_short() { + size_t start_am_counter = test_uct_ib::m_ib_am_handler_counter; + uint64_t send_data = 0xdeadbeef; + uint64_t test_ib_hdr = 0xbeef; + recv_desc_t *recv_buffer; + ucs_status_t status; + + check_caps_skip(UCT_IFACE_FLAG_AM_SHORT); + + recv_buffer = (recv_desc_t *) malloc(sizeof(*recv_buffer) + sizeof(uint64_t)); + recv_buffer->length = 0; /* Initialize length to 0 */ + + /* set a callback for the uct to invoke for receiving the data */ + uct_iface_set_am_handler(m_e2->iface(), 0, ib_am_handler, recv_buffer, 0); + + /* send the data */ + status = uct_ep_am_short(m_e1->ep(0), 0, test_ib_hdr, + &send_data, sizeof(send_data)); + EXPECT_TRUE((status == UCS_OK) || (status == UCS_INPROGRESS)); + + flush(); + wait_for_value(&test_uct_ib::m_ib_am_handler_counter, + start_am_counter + 1, true); + + ASSERT_EQ(sizeof(send_data), recv_buffer->length); + EXPECT_EQ(send_data, *(uint64_t*)(recv_buffer+1)); + + free(recv_buffer); +} + +size_t test_uct_ib::m_ib_am_handler_counter = 0; + +class test_uct_ib_addr : public test_uct_ib { +public: + uct_ib_iface_config_t *ib_config() { + return ucs_derived_of(m_iface_config, uct_ib_iface_config_t); + } + + void test_address_pack(uint64_t subnet_prefix) { + uct_ib_iface_t *iface = ucs_derived_of(m_e1->iface(), uct_ib_iface_t); + static const uint16_t lid_in = 0x1ee7; + union ibv_gid gid_in, gid_out; + uct_ib_address_t *ib_addr; + uint16_t lid_out; + + ib_addr = (uct_ib_address_t*)malloc(uct_ib_iface_address_size(iface)); + + gid_in.global.subnet_prefix = subnet_prefix; + gid_in.global.interface_id = 0xdeadbeef; + uct_ib_iface_address_pack(iface, &gid_in, lid_in, ib_addr); + + uct_ib_address_unpack(ib_addr, &lid_out, &gid_out); + + if (uct_ib_iface_is_roce(iface)) { + EXPECT_TRUE(iface->config.force_global_addr); + } else { + EXPECT_EQ(lid_in, lid_out); + } + + if (ib_config()->is_global) { + EXPECT_EQ(gid_in.global.subnet_prefix, gid_out.global.subnet_prefix); + EXPECT_EQ(gid_in.global.interface_id, gid_out.global.interface_id); + } + + free(ib_addr); + } + + void test_fill_ah_attr(uint64_t subnet_prefix) { + uct_ib_iface_t *iface = ucs_derived_of(m_e1->iface(), uct_ib_iface_t); + static const uint16_t lid = 0x1ee7; + union ibv_gid gid; + struct ibv_ah_attr ah_attr; + + ASSERT_EQ(iface->config.force_global_addr, + ib_config()->is_global || uct_ib_iface_is_roce(iface)); + + gid.global.subnet_prefix = subnet_prefix ?: iface->gid.global.subnet_prefix; + gid.global.interface_id = 0xdeadbeef; + + uct_ib_iface_fill_ah_attr_from_gid_lid(iface, lid, &gid, &ah_attr); + + if (uct_ib_iface_is_roce(iface)) { + /* in case of roce, should be global */ + EXPECT_TRUE(ah_attr.is_global); + } else if (ib_config()->is_global) { + /* in case of global address is forced - ah_attr should use GRH */ + EXPECT_TRUE(ah_attr.is_global); + } else if (iface->gid.global.subnet_prefix == gid.global.subnet_prefix) { + /* in case of subnets are same - ah_attr depend from forced/nonforced GRH */ + EXPECT_FALSE(ah_attr.is_global); + } else if (iface->gid.global.subnet_prefix != gid.global.subnet_prefix) { + /* in case of subnets are different - ah_attr should use GRH */ + EXPECT_TRUE(ah_attr.is_global); + } + } +}; + +UCS_TEST_P(test_uct_ib_addr, address_pack) { + test_address_pack(UCT_IB_LINK_LOCAL_PREFIX); + test_address_pack(UCT_IB_SITE_LOCAL_PREFIX | htobe64(0x7200)); + test_address_pack(0xdeadfeedbeefa880ul); +} + +UCS_TEST_P(test_uct_ib_addr, fill_ah_attr) { + test_fill_ah_attr(UCT_IB_LINK_LOCAL_PREFIX); + test_fill_ah_attr(UCT_IB_SITE_LOCAL_PREFIX | htobe64(0x7200)); + test_fill_ah_attr(0xdeadfeedbeefa880ul); + test_fill_ah_attr(0l); +} + +UCS_TEST_P(test_uct_ib_addr, address_pack_global, "IB_IS_GLOBAL=y") { + test_address_pack(UCT_IB_LINK_LOCAL_PREFIX); + test_address_pack(UCT_IB_SITE_LOCAL_PREFIX | htobe64(0x7200)); + test_address_pack(0xdeadfeedbeefa880ul); +} + +UCS_TEST_P(test_uct_ib_addr, fill_ah_attr_global, "IB_IS_GLOBAL=y") { + test_fill_ah_attr(UCT_IB_LINK_LOCAL_PREFIX); + test_fill_ah_attr(UCT_IB_SITE_LOCAL_PREFIX | htobe64(0x7200)); + test_fill_ah_attr(0xdeadfeedbeefa880ul); + test_fill_ah_attr(0l); +} + +UCT_INSTANTIATE_IB_TEST_CASE(test_uct_ib_addr); + + +test_uct_ib_with_specific_port::test_uct_ib_with_specific_port() { + m_ibctx = NULL; + m_port = 0; + m_dev_name = ""; + + memset(&m_port_attr, 0, sizeof(m_port_attr)); +} + +void test_uct_ib_with_specific_port::init() { + size_t colon_pos = GetParam()->dev_name.find(":"); + std::string port_num_str; + + m_dev_name = GetParam()->dev_name.substr(0, colon_pos); + port_num_str = GetParam()->dev_name.substr(colon_pos + 1); + + /* port number */ + if (sscanf(port_num_str.c_str(), "%d", &m_port) != 1) { + UCS_TEST_ABORT("Failed to get the port number on device: " << m_dev_name); + } + + std::string abort_reason = + "The requested device " + m_dev_name + + " wasn't found in the device list."; + struct ibv_device **device_list; + int i, num_devices; + + /* get device list */ + device_list = ibv_get_device_list(&num_devices); + if (device_list == NULL) { + abort_reason = "Failed to get the device list."; + num_devices = 0; + } + + /* search for the given device in the device list */ + for (i = 0; i < num_devices; ++i) { + if (strcmp(device_list[i]->name, m_dev_name.c_str())) { + continue; + } + + /* found this dev_name on the host - open it */ + m_ibctx = ibv_open_device(device_list[i]); + if (m_ibctx == NULL) { + abort_reason = "Failed to open the device."; + } + break; + } + + ibv_free_device_list(device_list); + if (m_ibctx == NULL) { + UCS_TEST_ABORT(abort_reason); + } + + if (ibv_query_port(m_ibctx, m_port, &m_port_attr) != 0) { + UCS_TEST_ABORT("Failed to query port " << m_port << + "on device: " << m_dev_name); + } + + try { + check_port_attr(); + } catch (...) { + test_uct_ib_with_specific_port::cleanup(); + throw; + } +} + +void test_uct_ib_with_specific_port::cleanup() { + if (m_ibctx != NULL) { + ibv_close_device(m_ibctx); + m_ibctx = NULL; + } +} + +class test_uct_ib_lmc : public test_uct_ib_with_specific_port { +public: + void init() { + test_uct_ib_with_specific_port::init(); + test_uct_ib::init(); + } + + void cleanup() { + test_uct_ib::cleanup(); + test_uct_ib_with_specific_port::cleanup(); + } + + void check_port_attr() { + /* check if a non zero lmc is set on the port */ + if (!m_port_attr.lmc) { + UCS_TEST_SKIP_R("lmc is set to zero on an IB port"); + } + } +}; + +UCS_TEST_P(test_uct_ib_lmc, non_default_lmc, "IB_LID_PATH_BITS=1") { + send_recv_short(); +} + +UCT_INSTANTIATE_IB_TEST_CASE(test_uct_ib_lmc); + +class test_uct_ib_gid_idx : public test_uct_ib_with_specific_port { +public: + void init() { + test_uct_ib_with_specific_port::init(); + test_uct_ib::init(); + } + + void cleanup() { + test_uct_ib::cleanup(); + test_uct_ib_with_specific_port::cleanup(); + } + + void check_port_attr() { + std::stringstream device_str; + device_str << ibv_get_device_name(m_ibctx->device) << ":" << m_port; + + if (!IBV_PORT_IS_LINK_LAYER_ETHERNET(&m_port_attr)) { + UCS_TEST_SKIP_R(device_str.str() + " is not Ethernet"); + } + + union ibv_gid gid; + uct_ib_md_config_t *md_config = + ucs_derived_of(m_md_config, uct_ib_md_config_t); + ucs::handle uct_md; + uct_ib_md_t *ib_md; + ucs_status_t status; + uint8_t gid_index; + + UCS_TEST_CREATE_HANDLE(uct_md_h, uct_md, uct_ib_md_close, uct_ib_md_open, + &uct_ib_component, + ibv_get_device_name(m_ibctx->device), m_md_config); + + ib_md = ucs_derived_of(uct_md, uct_ib_md_t); + status = uct_ib_device_select_gid_index(&ib_md->dev, m_port, + md_config->ext.gid_index, + &gid_index); + ASSERT_UCS_OK(status); + + device_str << " gid index " << static_cast(gid_index); + + /* check the gid index */ + if (ibv_query_gid(m_ibctx, m_port, gid_index, &gid) != 0) { + UCS_TEST_ABORT("failed to query " + device_str.str()); + } + + /* check if the gid is valid to use */ + if (uct_ib_device_is_gid_raw_empty(gid.raw)) { + UCS_TEST_SKIP_R(device_str.str() + " is empty"); + } + + if (!uct_ib_device_test_roce_gid_index(&ib_md->dev, m_port, &gid, + gid_index)) { + UCS_TEST_SKIP_R("failed to create address handle on " + + device_str.str()); + } + } +}; + +UCS_TEST_P(test_uct_ib_gid_idx, non_default_gid_idx, "GID_INDEX=1") { + send_recv_short(); +} + +UCT_INSTANTIATE_IB_TEST_CASE(test_uct_ib_gid_idx); + +class test_uct_ib_utils : public ucs::test { +}; + +UCS_TEST_F(test_uct_ib_utils, sec_to_qp_time) { + double avg; + uint8_t qp_val; + + // 0 sec + qp_val = uct_ib_to_qp_fabric_time(0); + EXPECT_EQ(1, qp_val); + + // the average time defined for the [0, 1st element] + qp_val = uct_ib_to_qp_fabric_time(4.096 * pow(2, 0) / UCS_USEC_PER_SEC); + EXPECT_EQ(1, qp_val); + + // the time defined for the 1st element + qp_val = uct_ib_to_qp_fabric_time(4.096 * pow(2, 1) / UCS_USEC_PER_SEC); + EXPECT_EQ(1, qp_val); + + for (uint8_t index = 2; index <= UCT_IB_FABRIC_TIME_MAX; index++) { + uint8_t prev_index = index - 1; + + // the time defined for the (i)th element + qp_val = uct_ib_to_qp_fabric_time(4.096 * pow(2, index) / UCS_USEC_PER_SEC); + EXPECT_EQ(index % UCT_IB_FABRIC_TIME_MAX, qp_val); + + // avg = (the average time defined for the [(i - 1)th element, (i)th element]) + avg = (4.096 * pow(2, prev_index) + 4.096 * pow(2, index)) * 0.5; + qp_val = uct_ib_to_qp_fabric_time(avg / UCS_USEC_PER_SEC); + EXPECT_EQ(index % UCT_IB_FABRIC_TIME_MAX, qp_val); + + // the average time defined for the [(i - 1)th element, avg] + qp_val = uct_ib_to_qp_fabric_time((4.096 * pow(2, prev_index) + avg) * 0.5 / UCS_USEC_PER_SEC); + EXPECT_EQ(prev_index, qp_val); + + // the average time defined for the [avg, (i)th element] + qp_val = uct_ib_to_qp_fabric_time((avg + 4.096 * pow(2, index)) * 0.5 / UCS_USEC_PER_SEC); + EXPECT_EQ(index % UCT_IB_FABRIC_TIME_MAX, qp_val); + } +} + +UCS_TEST_F(test_uct_ib_utils, sec_to_rnr_time) { + double avg; + uint8_t rnr_val; + + // 0 sec + rnr_val = uct_ib_to_rnr_fabric_time(0); + EXPECT_EQ(1, rnr_val); + + // the average time defined for the [0, 1st element] + avg = uct_ib_qp_rnr_time_ms[1] * 0.5; + rnr_val = uct_ib_to_rnr_fabric_time(avg / UCS_MSEC_PER_SEC); + EXPECT_EQ(1, rnr_val); + + for (uint8_t index = 1; index < UCT_IB_FABRIC_TIME_MAX; index++) { + uint8_t next_index = (index + 1) % UCT_IB_FABRIC_TIME_MAX; + + // the time defined for the (i)th element + rnr_val = uct_ib_to_rnr_fabric_time(uct_ib_qp_rnr_time_ms[index] / UCS_MSEC_PER_SEC); + EXPECT_EQ(index, rnr_val); + + // avg = (the average time defined for the [(i)th element, (i + 1)th element]) + avg = (uct_ib_qp_rnr_time_ms[index] + uct_ib_qp_rnr_time_ms[next_index]) * 0.5; + rnr_val = uct_ib_to_rnr_fabric_time(avg / UCS_MSEC_PER_SEC); + EXPECT_EQ(next_index, rnr_val); + + // the average time defined for the [(i)th element, avg] + rnr_val = uct_ib_to_rnr_fabric_time((uct_ib_qp_rnr_time_ms[index] + avg) * 0.5 / UCS_MSEC_PER_SEC); + EXPECT_EQ(index, rnr_val); + + // the average time defined for the [avg, (i + 1)th element] + rnr_val = uct_ib_to_rnr_fabric_time((avg + uct_ib_qp_rnr_time_ms[next_index]) * + 0.5 / UCS_MSEC_PER_SEC); + EXPECT_EQ(next_index, rnr_val); + } + + // the time defined for the biggest value + rnr_val = uct_ib_to_rnr_fabric_time(uct_ib_qp_rnr_time_ms[0] / UCS_MSEC_PER_SEC); + EXPECT_EQ(0, rnr_val); + + // 1 sec + rnr_val = uct_ib_to_rnr_fabric_time(1.); + EXPECT_EQ(0, rnr_val); +} + + +class test_uct_event_ib : public test_uct_ib { +public: + test_uct_event_ib() { + length = 8; + wakeup_fd.revents = 0; + wakeup_fd.events = POLLIN; + wakeup_fd.fd = 0; + test_ib_hdr = 0xbeef; + m_buf1 = NULL; + m_buf2 = NULL; + } + + void init() { + ucs_status_t status; + + test_uct_ib::init(); + + try { + check_caps_skip(UCT_IFACE_FLAG_PUT_SHORT | UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_EVENT_SEND_COMP | + UCT_IFACE_FLAG_EVENT_RECV); + } catch (...) { + test_uct_ib::cleanup(); + throw; + } + + /* create receiver wakeup */ + status = uct_iface_event_fd_get(m_e1->iface(), &wakeup_fd.fd); + ASSERT_EQ(status, UCS_OK); + + EXPECT_EQ(0, poll(&wakeup_fd, 1, 0)); + + m_buf1 = new mapped_buffer(length, 0x1, *m_e1); + m_buf2 = new mapped_buffer(length, 0x2, *m_e2); + + /* set a callback for the uct to invoke for receiving the data */ + uct_iface_set_am_handler(m_e1->iface(), 0, ib_am_handler, m_buf1->ptr(), + 0); + + test_uct_event_ib::bcopy_pack_count = 0; + } + + static size_t pack_cb(void *dest, void *arg) { + const mapped_buffer *buf = (const mapped_buffer *)arg; + memcpy(dest, buf->ptr(), buf->length()); + ++test_uct_event_ib::bcopy_pack_count; + return buf->length(); + } + + /* Use put_bcopy here to provide send_cq entry */ + void send_msg_e1_e2(size_t count = 1) { + for (size_t i = 0; i < count; ++i) { + ssize_t status = uct_ep_put_bcopy(m_e1->ep(0), pack_cb, (void *)m_buf1, + m_buf2->addr(), m_buf2->rkey()); + if (status < 0) { + ASSERT_UCS_OK((ucs_status_t)status); + } + } + } + + void send_msg_e2_e1(size_t count = 1) { + for (size_t i = 0; i < count; ++i) { + ucs_status_t status = uct_ep_am_short(m_e2->ep(0), 0, test_ib_hdr, + m_buf2->ptr(), m_buf2->length()); + ASSERT_UCS_OK(status); + } + } + + void check_send_cq(uct_iface_t *iface, size_t val) { + uct_ib_iface_t *ib_iface = ucs_derived_of(iface, uct_ib_iface_t); + struct ibv_cq *send_cq = ib_iface->cq[UCT_IB_DIR_TX]; + + if (val != send_cq->comp_events_completed) { + uint32_t completed_evt = send_cq->comp_events_completed; + /* need this call to acknowledge the completion to prevent iface dtor hung*/ + ibv_ack_cq_events(ib_iface->cq[UCT_IB_DIR_TX], 1); + UCS_TEST_ABORT("send_cq->comp_events_completed have to be 1 but the value " + << completed_evt); + } + } + + void check_recv_cq(uct_iface_t *iface, size_t val) { + uct_ib_iface_t *ib_iface = ucs_derived_of(iface, uct_ib_iface_t); + struct ibv_cq *recv_cq = ib_iface->cq[UCT_IB_DIR_RX]; + + if (val != recv_cq->comp_events_completed) { + uint32_t completed_evt = recv_cq->comp_events_completed; + /* need this call to acknowledge the completion to prevent iface dtor hung*/ + ibv_ack_cq_events(ib_iface->cq[UCT_IB_DIR_RX], 1); + UCS_TEST_ABORT("recv_cq->comp_events_completed have to be 1 but the value " + << completed_evt); + } + } + + void cleanup() { + delete(m_buf1); + delete(m_buf2); + test_uct_ib::cleanup(); + } + +protected: + static const unsigned EVENTS = UCT_EVENT_RECV | UCT_EVENT_SEND_COMP; + + struct pollfd wakeup_fd; + size_t length; + uint64_t test_ib_hdr; + mapped_buffer *m_buf1, *m_buf2; + static size_t bcopy_pack_count; +}; + +size_t test_uct_event_ib::bcopy_pack_count = 0; + + +UCS_TEST_P(test_uct_event_ib, tx_cq) +{ + ucs_status_t status; + + status = uct_iface_event_arm(m_e1->iface(), EVENTS); + ASSERT_EQ(status, UCS_OK); + + /* check initial state of the fd and [send|recv]_cq */ + EXPECT_EQ(0, poll(&wakeup_fd, 1, 0)); + check_send_cq(m_e1->iface(), 0); + check_recv_cq(m_e1->iface(), 0); + + /* send the data */ + send_msg_e1_e2(); + + /* make sure the file descriptor is signaled once */ + ASSERT_EQ(1, poll(&wakeup_fd, 1, 1000*ucs::test_time_multiplier())); + + status = uct_iface_event_arm(m_e1->iface(), EVENTS); + ASSERT_EQ(status, UCS_ERR_BUSY); + + /* make sure [send|recv]_cq handled properly */ + check_send_cq(m_e1->iface(), 1); + check_recv_cq(m_e1->iface(), 0); + + m_e1->flush(); +} + + +UCS_TEST_P(test_uct_event_ib, txrx_cq) +{ + const size_t msg_count = 1; + ucs_status_t status; + + status = uct_iface_event_arm(m_e1->iface(), EVENTS); + ASSERT_EQ(UCS_OK, status); + + /* check initial state of the fd and [send|recv]_cq */ + EXPECT_EQ(0, poll(&wakeup_fd, 1, 0)); + check_send_cq(m_e1->iface(), 0); + check_recv_cq(m_e1->iface(), 0); + + /* send the data */ + send_msg_e1_e2(msg_count); + send_msg_e2_e1(msg_count); + + twait(150); /* Let completion to be generated */ + + /* Make sure all messages delivered */ + while ((test_uct_ib::m_ib_am_handler_counter < msg_count) || + (test_uct_event_ib::bcopy_pack_count < msg_count)) { + progress(); + } + + /* make sure the file descriptor is signaled */ + ASSERT_EQ(1, poll(&wakeup_fd, 1, 1000*ucs::test_time_multiplier())); + + /* Acknowledge all the requests */ + short_progress_loop(); + status = uct_iface_event_arm(m_e1->iface(), EVENTS); + ASSERT_EQ(UCS_ERR_BUSY, status); + + /* make sure [send|recv]_cq handled properly */ + check_send_cq(m_e1->iface(), 1); + check_recv_cq(m_e1->iface(), 1); + + m_e1->flush(); + m_e2->flush(); +} + + +UCT_INSTANTIATE_IB_TEST_CASE(test_uct_event_ib); diff --git a/test/gtest/uct/ib/test_ib.h b/test/gtest/uct/ib/test_ib.h new file mode 100644 index 0000000..1a3f72f --- /dev/null +++ b/test/gtest/uct/ib/test_ib.h @@ -0,0 +1,50 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +#include + +extern "C" { +#include +#include +#include +#include +#include +#include +} + + +class test_uct_ib : public uct_test { +public: + typedef struct { + unsigned length; + /* data follows */ + } recv_desc_t; + + test_uct_ib(); + void init(); + void create_connected_entities(); + static ucs_status_t ib_am_handler(void *arg, void *data, + size_t length, unsigned flags); + virtual void send_recv_short(); + +protected: + entity *m_e1, *m_e2; + static size_t m_ib_am_handler_counter; +}; + +class test_uct_ib_with_specific_port : public test_uct_ib { +public: + test_uct_ib_with_specific_port(); + void init(); + void cleanup(); + virtual void check_port_attr() = 0; + +protected: + std::string m_dev_name; + unsigned m_port; + struct ibv_context *m_ibctx; + struct ibv_port_attr m_port_attr; +}; diff --git a/test/gtest/uct/ib/test_ib_md.cc b/test/gtest/uct/ib/test_ib_md.cc new file mode 100644 index 0000000..4e1da6f --- /dev/null +++ b/test/gtest/uct/ib/test_ib_md.cc @@ -0,0 +1,164 @@ + +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* Copyright (C) Advanced Micro Devices, Inc. 2016 - 2017. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include +#include +#include +#if HAVE_MLX5_HW +#include +#endif + +#include +#include + +class test_ib_md : public test_md +{ +protected: + void ib_md_umr_check(void *rkey_buffer, + bool amo_access, + size_t size = 8192); + bool has_ksm() const; + bool check_umr(uct_ib_md_t *ib_md) const; +}; + + +/* + * Test that ib md does not create umr region if + * UCT_MD_MEM_ACCESS_REMOTE_ATOMIC is not set + */ + +void test_ib_md::ib_md_umr_check(void *rkey_buffer, + bool amo_access, + size_t size) +{ + ucs_status_t status; + size_t alloc_size; + void *buffer; + + if (ucs_get_phys_mem_size() < size * 8) { + UCS_TEST_SKIP_R("not enough physical memory"); + } + if (ucs_get_memfree_size() < size * 4) { + UCS_TEST_SKIP_R("not enough free memory"); + } + + buffer = NULL; + alloc_size = size; + status = ucs_mmap_alloc(&alloc_size, &buffer, 0 + UCS_MEMTRACK_NAME("test_umr")); + ASSERT_UCS_OK(status); + + uct_mem_h memh; + status = uct_md_mem_reg(md(), buffer, size, + amo_access ? UCT_MD_MEM_ACCESS_REMOTE_ATOMIC : + UCT_MD_MEM_ACCESS_RMA, + &memh); + ASSERT_UCS_OK(status, << " buffer=" << buffer << " size=" << size); + ASSERT_TRUE(memh != UCT_MEM_HANDLE_NULL); + + uct_ib_mem_t *ib_memh = (uct_ib_mem_t *)memh; + + if (amo_access) { + EXPECT_TRUE(ib_memh->flags & UCT_IB_MEM_ACCESS_REMOTE_ATOMIC); + } else { + EXPECT_FALSE(ib_memh->flags & UCT_IB_MEM_ACCESS_REMOTE_ATOMIC); + } + +#if HAVE_MLX5_HW + EXPECT_FALSE(ib_memh->flags & UCT_IB_MEM_FLAG_ATOMIC_MR); +#endif + + status = uct_md_mkey_pack(md(), memh, rkey_buffer); + EXPECT_UCS_OK(status); + +#if HAVE_MLX5_HW + uct_ib_md_t *ib_md = (uct_ib_md_t *)md(); + + if (amo_access) { + if (check_umr(ib_md)) { + EXPECT_TRUE(ib_memh->flags & UCT_IB_MEM_FLAG_ATOMIC_MR); + EXPECT_TRUE(ib_memh->atomic_rkey != 0); + } else { + EXPECT_FALSE(ib_memh->flags & UCT_IB_MEM_FLAG_ATOMIC_MR); + EXPECT_TRUE(ib_memh->atomic_rkey == 0); + } + } else { + EXPECT_FALSE(ib_memh->flags & UCT_IB_MEM_FLAG_ATOMIC_MR); + EXPECT_TRUE(ib_memh->atomic_rkey == 0); + } +#endif + + status = uct_md_mem_dereg(md(), memh); + EXPECT_UCS_OK(status); + + ucs_mmap_free(buffer, alloc_size); +} + +bool test_ib_md::has_ksm() const { +#if HAVE_DEVX + return (ucs_derived_of(md(), uct_ib_md_t)->dev.flags & UCT_IB_DEVICE_FLAG_MLX5_PRM) && + (ucs_derived_of(md(), uct_ib_mlx5_md_t)->flags & UCT_IB_MLX5_MD_FLAG_KSM); +#elif defined(HAVE_EXP_UMR_KSM) + return ucs_derived_of(md(), uct_ib_md_t)->dev.dev_attr.exp_device_cap_flags & + IBV_EXP_DEVICE_UMR_FIXED_SIZE; +#else + return false; +#endif +} + +bool test_ib_md::check_umr(uct_ib_md_t *ib_md) const { +#if HAVE_DEVX + return has_ksm(); +#elif HAVE_MLX5_HW + if (ib_md->dev.flags & UCT_IB_DEVICE_FLAG_MLX5_PRM) { + uct_ib_mlx5_md_t *mlx5_md = ucs_derived_of(ib_md, uct_ib_mlx5_md_t); + return mlx5_md->umr_qp != NULL; + } + return false; +#else + return false; +#endif +} + +UCS_TEST_P(test_ib_md, ib_md_umr_rcache, "REG_METHODS=rcache") { + std::string rkey_buffer(md_attr().rkey_packed_size, '\0'); + + /* The order is important here because + * of registration cache. A cached region will + * be promoted to atomic access but it will never be demoted + */ + ib_md_umr_check(&rkey_buffer[0], false); + ib_md_umr_check(&rkey_buffer[0], true); +} + +UCS_TEST_P(test_ib_md, ib_md_umr_direct, "REG_METHODS=direct") { + std::string rkey_buffer(md_attr().rkey_packed_size, '\0'); + + /* without rcache the order is not really important */ + ib_md_umr_check(&rkey_buffer[0], true); + ib_md_umr_check(&rkey_buffer[0], false); + ib_md_umr_check(&rkey_buffer[0], true); + ib_md_umr_check(&rkey_buffer[0], false); +} + +UCS_TEST_P(test_ib_md, ib_md_umr_ksm) { + std::string rkey_buffer(md_attr().rkey_packed_size, '\0'); + ib_md_umr_check(&rkey_buffer[0], has_ksm(), UCT_IB_MD_MAX_MR_SIZE + 0x1000); +} + +#if HAVE_UMR_KSM +UCS_TEST_P(test_ib_md, umr_noninline_klm, "MAX_INLINE_KLM_LIST=1") { + + /* KLM list size would be 2, and setting MAX_INLINE_KLM_LIST=1 would force + * using non-inline UMR post_send. + */ + std::string rkey_buffer(md_attr().rkey_packed_size, '\0'); + ib_md_umr_check(&rkey_buffer[0], has_ksm(), UCT_IB_MD_MAX_MR_SIZE + 0x1000); +} +#endif + +_UCT_MD_INSTANTIATE_TEST_CASE(test_ib_md, ib) diff --git a/test/gtest/uct/ib/test_ib_pkey.cc b/test/gtest/uct/ib/test_ib_pkey.cc new file mode 100644 index 0000000..9b502f0 --- /dev/null +++ b/test/gtest/uct/ib/test_ib_pkey.cc @@ -0,0 +1,101 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include + + +class test_uct_ib_pkey : public test_uct_ib_with_specific_port { +public: + test_uct_ib_pkey() { + m_pkey_value = 0; + m_pkey_index = 0; + } + + void check_port_attr() { + if (IBV_PORT_IS_LINK_LAYER_ETHERNET(&m_port_attr)) { + /* no pkeys for Ethernet */ + UCS_TEST_SKIP_R("skip pkey test for port with Ethernet link type"); + } + } + + void send_recv_short() { + create_connected_entities(); + + EXPECT_TRUE(check_pkey(m_e1->iface(), m_pkey_value, m_pkey_index)); + EXPECT_TRUE(check_pkey(m_e2->iface(), m_pkey_value, m_pkey_index)); + + test_uct_ib::send_recv_short(); + + m_e1->destroy_eps(); + m_e2->destroy_eps(); + m_entities.remove(m_e1); + m_entities.remove(m_e2); + } + + uint16_t query_pkey(uint16_t pkey_idx) const { + uint16_t pkey; + + if (ibv_query_pkey(m_ibctx, m_port, pkey_idx, &pkey)) { + UCS_TEST_ABORT("Failed to query pkey on port " << m_port << + " on device: " << m_dev_name); + } + return ntohs(pkey); + } + + bool check_pkey(const uct_iface_t *iface, uint16_t pkey_value, + uint16_t pkey_index) const { + const uct_ib_iface_t *ib_iface = ucs_derived_of(iface, uct_ib_iface_t); + return ((pkey_value == ib_iface->pkey_value) && + (pkey_index == ib_iface->pkey_index)); + } + + bool find_default_pkey(uint16_t &pkey_value, uint16_t &pkey_index) const { + for (uint16_t table_idx = 0; table_idx < m_port_attr.pkey_tbl_len; table_idx++) { + uint16_t pkey = query_pkey(table_idx); + if (can_use_pkey(pkey)) { + /* found the first valid pkey with full membership */ + pkey_value = pkey; + pkey_index = table_idx; + return true; + } + } + + return false; + } + + bool can_use_pkey(uint16_t pkey_value) const { + return (pkey_value && (pkey_value & UCT_IB_PKEY_MEMBERSHIP_MASK)); + } + +public: + uint16_t m_pkey_value; + uint16_t m_pkey_index; +}; + +UCS_TEST_P(test_uct_ib_pkey, default_pkey) { + if (!find_default_pkey(m_pkey_value, m_pkey_index)) { + UCS_TEST_SKIP_R("unable to find a valid pkey with full membership"); + } + + send_recv_short(); +} + +UCS_TEST_P(test_uct_ib_pkey, all_avail_pkeys) { + /* test all pkeys that are configured for the device */ + for (uint16_t table_idx = 0; table_idx < m_port_attr.pkey_tbl_len; table_idx++) { + m_pkey_value = query_pkey(table_idx); + if (!can_use_pkey(m_pkey_value)) { + continue; + } + modify_config("IB_PKEY", "0x" + + ucs::to_hex_string(m_pkey_value & + UCT_IB_PKEY_PARTITION_MASK)); + m_pkey_index = table_idx; + send_recv_short(); + } +} + + +UCT_INSTANTIATE_IB_TEST_CASE(test_uct_ib_pkey); diff --git a/test/gtest/uct/ib/test_ib_xfer.cc b/test/gtest/uct/ib/test_ib_xfer.cc new file mode 100644 index 0000000..95be2b7 --- /dev/null +++ b/test/gtest/uct/ib/test_ib_xfer.cc @@ -0,0 +1,120 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#include + + +class uct_p2p_rma_test_inlresp : public uct_p2p_rma_test {}; + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test_inlresp, get_bcopy_inlresp0, + !check_caps(UCT_IFACE_FLAG_GET_BCOPY), + "IB_TX_INLINE_RESP=0") { + test_xfer_multi(static_cast(&uct_p2p_rma_test::get_bcopy), + 1ul, sender().iface_attr().cap.get.max_bcopy, + TEST_UCT_FLAG_RECV_ZCOPY); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test_inlresp, get_bcopy_inlresp64, + !check_caps(UCT_IFACE_FLAG_GET_BCOPY), + "IB_TX_INLINE_RESP=64") { + test_xfer_multi(static_cast(&uct_p2p_rma_test::get_bcopy), + 1ul, sender().iface_attr().cap.get.max_bcopy, + TEST_UCT_FLAG_RECV_ZCOPY); +} + +UCT_INSTANTIATE_IB_TEST_CASE(uct_p2p_rma_test_inlresp) + + +class uct_p2p_rma_test_alloc_methods : public uct_p2p_rma_test { +protected: + void test_put_zcopy() { + test_xfer_multi(static_cast(&uct_p2p_rma_test::put_zcopy), + 0, sender().iface_attr().cap.put.max_zcopy, + TEST_UCT_FLAG_SEND_ZCOPY); + } + + void test_get_zcopy() { + test_xfer_multi(static_cast(&uct_p2p_rma_test::get_zcopy), + sender().iface_attr().cap.get.min_zcopy, + sender().iface_attr().cap.get.max_zcopy, + TEST_UCT_FLAG_RECV_ZCOPY); + } +}; + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test_alloc_methods, xfer_reg_odp, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_GET_ZCOPY), + "REG_METHODS=odp,direct", + "MLX5_DEVX_OBJECTS=dct,dcsrq") +{ + test_put_zcopy(); + test_get_zcopy(); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test_alloc_methods, xfer_reg_rcache, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_GET_ZCOPY), + "REG_METHODS=rcache,direct") +{ + test_put_zcopy(); + test_get_zcopy(); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test_alloc_methods, xfer_reg_direct, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_GET_ZCOPY), + "REG_METHODS=direct") +{ + test_put_zcopy(); + test_get_zcopy(); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test_alloc_methods, xfer_reg_multithreaded, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_GET_ZCOPY), + "REG_MT_THRESH=1", "REG_MT_CHUNK=1G", "REG_MT_BIND=y") +{ + test_put_zcopy(); + test_get_zcopy(); +} + +UCT_INSTANTIATE_IB_TEST_CASE(uct_p2p_rma_test_alloc_methods) + + +class uct_p2p_mix_test_alloc_methods : public uct_p2p_mix_test {}; + +UCS_TEST_P(uct_p2p_mix_test_alloc_methods, mix1000_odp, + "REG_METHODS=odp,direct", "MLX5_DEVX_OBJECTS=dct,dcsrq") +{ + run(1000); +} + +UCS_TEST_P(uct_p2p_mix_test_alloc_methods, mix1000_rcache, + "REG_METHODS=rcache,direct") +{ + run(1000); +} + +UCS_TEST_P(uct_p2p_mix_test_alloc_methods, mix1000_multithreaded, + "REG_MT_THRESH=1", "REG_MT_CHUNK=1K", "REG_MT_BIND=y") +{ + run(1000); +} + +UCT_INSTANTIATE_IB_TEST_CASE(uct_p2p_mix_test_alloc_methods) + + +class uct_p2p_mix_test_indirect_atomic : public uct_p2p_mix_test {}; + +UCS_TEST_P(uct_p2p_mix_test_indirect_atomic, mix1000_indirect_atomic, + "INDIRECT_ATOMIC=n") +{ + run(1000); +} + +UCT_INSTANTIATE_IB_TEST_CASE(uct_p2p_mix_test_indirect_atomic) + diff --git a/test/gtest/uct/ib/test_rc.cc b/test/gtest/uct/ib/test_rc.cc new file mode 100644 index 0000000..72d5f85 --- /dev/null +++ b/test/gtest/uct/ib/test_rc.cc @@ -0,0 +1,370 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2016. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016.All rights reserved. +* See file LICENSE for terms. +*/ + +#include "test_rc.h" + + +#define UCT_INSTANTIATE_RC_TEST_CASE(_test_case) \ + _UCT_INSTANTIATE_TEST_CASE(_test_case, rc_verbs) \ + _UCT_INSTANTIATE_TEST_CASE(_test_case, rc_mlx5) + + +void test_rc::init() +{ + uct_test::init(); + + m_e1 = uct_test::create_entity(0); + m_entities.push_back(m_e1); + + check_skip_test(); + + m_e2 = uct_test::create_entity(0); + m_entities.push_back(m_e2); + + connect(); +} + +void test_rc::connect() +{ + m_e1->connect(0, *m_e2, 0); + m_e2->connect(0, *m_e1, 0); + + uct_iface_set_am_handler(m_e1->iface(), 0, am_dummy_handler, NULL, 0); + uct_iface_set_am_handler(m_e2->iface(), 0, am_dummy_handler, NULL, 0); +} + +// Check that iface tx ops buffer and flush comp memory pool are moderated +// properly when we have communication ops + lots of flushes +void test_rc::test_iface_ops(int cq_len) +{ + entity *e = uct_test::create_entity(0); + m_entities.push_back(e); + e->connect(0, *m_e2, 0); + + mapped_buffer sendbuf(1024, 0ul, *e); + mapped_buffer recvbuf(1024, 0ul, *m_e2); + uct_completion_t comp; + comp.count = cq_len * 512; // some big value to avoid func invocation + comp.func = NULL; + + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, sendbuf.ptr(), sendbuf.length(), + sendbuf.memh(), m_e1->iface_attr().cap.put.max_iov); + // For _x transports several CQEs can be consumed per WQE, post less put zcopy + // ops, so that flush would be sucessfull (otherwise flush will return + // NO_RESOURCES and completion will not be added for it). + for (int i = 0; i < cq_len / 5; i++) { + ASSERT_UCS_OK_OR_INPROGRESS(uct_ep_put_zcopy(e->ep(0), iov, iovcnt, + recvbuf.addr(), + recvbuf.rkey(), &comp)); + + // Create some stress on iface (flush mp): + // post 10 flushes per every put. + for (int j = 0; j < 10; j++) { + ASSERT_UCS_OK_OR_INPROGRESS(uct_ep_flush(e->ep(0), 0, &comp)); + } + } + + flush(); +} + +UCS_TEST_SKIP_COND_P(test_rc, stress_iface_ops, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY)) { + int cq_len = 16; + + if (UCS_OK != uct_config_modify(m_iface_config, "RC_TX_CQ_LEN", + ucs::to_string(cq_len).c_str())) { + UCS_TEST_ABORT("Error: cannot modify RC_TX_CQ_LEN"); + } + + test_iface_ops(cq_len); +} + +UCS_TEST_P(test_rc, tx_cq_moderation) { + unsigned tx_mod = ucs_min(rc_iface(m_e1)->config.tx_moderation / 4, 8); + int16_t init_rsc = rc_ep(m_e1)->txqp.available; + + send_am_messages(m_e1, tx_mod, UCS_OK); + + int16_t rsc = rc_ep(m_e1)->txqp.available; + + EXPECT_LE(rsc, init_rsc); + + short_progress_loop(100); + + EXPECT_EQ(rsc, rc_ep(m_e1)->txqp.available); + + flush(); + + EXPECT_EQ(init_rsc, rc_ep(m_e1)->txqp.available); +} + +UCT_INSTANTIATE_RC_TEST_CASE(test_rc) + + +class test_rc_max_wr : public test_rc { +protected: + virtual void init() { + ucs_status_t status1, status2; + status1 = uct_config_modify(m_iface_config, "TX_MAX_WR", "32"); + status2 = uct_config_modify(m_iface_config, "RC_TX_MAX_BB", "32"); + if (status1 != UCS_OK && status2 != UCS_OK) { + UCS_TEST_ABORT("Error: cannot set rc max wr/bb"); + } + test_rc::init(); + } +}; + +/* Check that max_wr stops from sending */ +UCS_TEST_P(test_rc_max_wr, send_limit) +{ + /* first 32 messages should be OK */ + send_am_messages(m_e1, 32, UCS_OK); + + /* next message - should fail */ + send_am_messages(m_e1, 1, UCS_ERR_NO_RESOURCE); + + progress_loop(); + send_am_messages(m_e1, 1, UCS_OK); +} + +UCT_INSTANTIATE_RC_TEST_CASE(test_rc_max_wr) + +uint32_t test_rc_flow_control::m_am_rx_count = 0; + +void test_rc_flow_control::init() +{ + /* For correct testing FC needs to be initialized during interface creation */ + if (UCS_OK != uct_config_modify(m_iface_config, "RC_FC_ENABLE", "y")) { + UCS_TEST_ABORT("Error: cannot enable flow control"); + } + test_rc::init(); + + ucs_assert(rc_iface(m_e1)->config.fc_enabled); + ucs_assert(rc_iface(m_e2)->config.fc_enabled); + + uct_iface_set_am_handler(m_e1->iface(), FLUSH_AM_ID, am_handler, NULL, 0); + uct_iface_set_am_handler(m_e2->iface(), FLUSH_AM_ID, am_handler, NULL, 0); + +} + +void test_rc_flow_control::cleanup() +{ + /* Restore FC state to enabled, so iface cleanup will destroy the grant mpool */ + rc_iface(m_e1)->config.fc_enabled = 1; + rc_iface(m_e2)->config.fc_enabled = 1; + test_rc::cleanup(); +} + +void test_rc_flow_control::send_am_and_flush(entity *e, int num_msg) +{ + m_am_rx_count = 0; + + send_am_messages(e, num_msg - 1, UCS_OK); + send_am_messages(e, 1, UCS_OK, FLUSH_AM_ID); /* send last msg with FLUSH id */ + wait_for_flag(&m_am_rx_count); + EXPECT_EQ(m_am_rx_count, 1ul); +} + +void test_rc_flow_control::validate_grant(entity *e) +{ + wait_for_flag(&get_fc_ptr(e)->fc_wnd); + EXPECT_GT(get_fc_ptr(e)->fc_wnd, 0); +} + +/* Check that FC window works as expected: + * - If FC enabled, only 'wnd' messages can be sent in a row + * - If FC is disabled 'wnd' does not limit senders flow */ +void test_rc_flow_control::test_general(int wnd, int soft_thresh, + int hard_thresh, bool is_fc_enabled) +{ + set_fc_attributes(m_e1, is_fc_enabled, wnd, soft_thresh, hard_thresh); + + send_am_messages(m_e1, wnd, UCS_OK); + send_am_messages(m_e1, 1, is_fc_enabled ? UCS_ERR_NO_RESOURCE : UCS_OK); + + validate_grant(m_e1); + send_am_messages(m_e1, 1, UCS_OK); + + if (!is_fc_enabled) { + /* Make valgrind happy, need to enable FC for proper cleanup */ + set_fc_attributes(m_e1, true, wnd, wnd, 1); + } + flush(); +} + +void test_rc_flow_control::test_pending_grant(int wnd) +{ + /* Block send capabilities of m_e2 for fc grant to be + * added to the pending queue. */ + disable_entity(m_e2); + set_fc_attributes(m_e1, true, wnd, wnd, 1); + + send_am_and_flush(m_e1, wnd); + + /* Now m_e1 should be blocked by FC window and FC grant + * should be in pending queue of m_e2. */ + send_am_messages(m_e1, 1, UCS_ERR_NO_RESOURCE); + EXPECT_LE(get_fc_ptr(m_e1)->fc_wnd, 0); + + /* Enable send capabilities of m_e2 and send AM message + * to force pending queue dispatch */ + enable_entity(m_e2); + set_tx_moderation(m_e2, 0); + send_am_messages(m_e2, 1, UCS_OK); + + /* Check that m_e1 got grant */ + validate_grant(m_e1); + send_am_messages(m_e1, 1, UCS_OK); +} + +void test_rc_flow_control::test_flush_fc_disabled() +{ + set_fc_disabled(m_e1); + ucs_status_t status; + + /* If FC is disabled, wnd=0 should not prevent the flush */ + get_fc_ptr(m_e1)->fc_wnd = 0; + status = uct_ep_flush(m_e1->ep(0), 0, NULL); + EXPECT_EQ(UCS_OK, status); + + /* send active message should be OK */ + get_fc_ptr(m_e1)->fc_wnd = 1; + send_am_messages(m_e1, 1, UCS_OK); + EXPECT_EQ(0, get_fc_ptr(m_e1)->fc_wnd); + + /* flush must have resources */ + status = uct_ep_flush(m_e1->ep(0), 0, NULL); + EXPECT_FALSE(UCS_STATUS_IS_ERR(status)) << ucs_status_string(status); +} + +void test_rc_flow_control::test_pending_purge(int wnd, int num_pend_sends) +{ + pending_send_request_t reqs[num_pend_sends]; + + disable_entity(m_e2); + set_fc_attributes(m_e1, true, wnd, wnd, 1); + + send_am_and_flush(m_e1, wnd); + + /* Now m2 ep should have FC grant message in the pending queue. + * Add some user pending requests as well */ + for (int i = 0; i < num_pend_sends; i++) { + reqs[i].uct.func = NULL; /* make valgrind happy */ + reqs[i].purge_count = 0; + EXPECT_EQ(uct_ep_pending_add(m_e2->ep(0), &reqs[i].uct, 0), UCS_OK); + } + uct_ep_pending_purge(m_e2->ep(0), purge_cb, NULL); + + for (int i = 0; i < num_pend_sends; i++) { + EXPECT_EQ(1, reqs[i].purge_count); + } +} + + +/* Check that FC window works as expected */ +UCS_TEST_P(test_rc_flow_control, general_enabled) +{ + test_general(8, 4, 2, true); +} + +UCS_TEST_P(test_rc_flow_control, general_disabled) +{ + test_general(8, 4, 2, false); +} + +/* Test the scenario when ep is being destroyed while there is + * FC grant message in the pending queue */ +UCS_TEST_P(test_rc_flow_control, pending_only_fc) +{ + int wnd = 2; + + disable_entity(m_e2); + set_fc_attributes(m_e1, true, wnd, wnd, 1); + + send_am_and_flush(m_e1, wnd); + + m_e2->destroy_ep(0); + ASSERT_TRUE(rc_iface(m_e2)->tx.arbiter.current == NULL); +} + +/* Check that user callback passed to uct_ep_pending_purge is not + * invoked for FC grant message */ +UCS_TEST_P(test_rc_flow_control, pending_purge) +{ + test_pending_purge(2, 5); +} + +UCS_TEST_P(test_rc_flow_control, pending_grant) +{ + test_pending_grant(5); +} + +UCS_TEST_P(test_rc_flow_control, fc_disabled_flush) +{ + test_flush_fc_disabled(); +} + +UCT_INSTANTIATE_RC_TEST_CASE(test_rc_flow_control) + + +#if ENABLE_STATS + +void test_rc_flow_control_stats::test_general(int wnd, int soft_thresh, + int hard_thresh) +{ + uint64_t v; + + set_fc_attributes(m_e1, true, wnd, soft_thresh, hard_thresh); + + send_am_messages(m_e1, wnd, UCS_OK); + send_am_messages(m_e1, 1, UCS_ERR_NO_RESOURCE); + + v = UCS_STATS_GET_COUNTER(get_fc_ptr(m_e1)->stats, UCT_RC_FC_STAT_NO_CRED); + EXPECT_EQ(1ul, v); + + validate_grant(m_e1); + send_am_messages(m_e1, 1, UCS_OK); + + v = UCS_STATS_GET_COUNTER(get_fc_ptr(m_e1)->stats, UCT_RC_FC_STAT_TX_HARD_REQ); + EXPECT_EQ(1ul, v); + + v = UCS_STATS_GET_COUNTER(get_fc_ptr(m_e1)->stats, UCT_RC_FC_STAT_RX_PURE_GRANT); + EXPECT_EQ(1ul, v); + flush(); +} + + +UCS_TEST_P(test_rc_flow_control_stats, general) +{ + test_general(5, 2, 1); +} + +UCS_TEST_P(test_rc_flow_control_stats, soft_request) +{ + uint64_t v; + int wnd = 8; + int s_thresh = 4; + int h_thresh = 1; + + set_fc_attributes(m_e1, true, wnd, s_thresh, h_thresh); + send_am_and_flush(m_e1, wnd - (s_thresh - 1)); + + v = UCS_STATS_GET_COUNTER(get_fc_ptr(m_e1)->stats, UCT_RC_FC_STAT_TX_SOFT_REQ); + EXPECT_EQ(1ul, v); + v = UCS_STATS_GET_COUNTER(get_fc_ptr(m_e2)->stats, UCT_RC_FC_STAT_RX_SOFT_REQ); + EXPECT_EQ(1ul, v); + + send_am_and_flush(m_e2, wnd - (s_thresh - 1)); + v = UCS_STATS_GET_COUNTER(get_fc_ptr(m_e1)->stats, UCT_RC_FC_STAT_RX_GRANT); + EXPECT_EQ(1ul, v); + v = UCS_STATS_GET_COUNTER(get_fc_ptr(m_e2)->stats, UCT_RC_FC_STAT_TX_GRANT); + EXPECT_EQ(1ul, v); +} + +UCT_INSTANTIATE_RC_TEST_CASE(test_rc_flow_control_stats) + +#endif diff --git a/test/gtest/uct/ib/test_rc.h b/test/gtest/uct/ib/test_rc.h new file mode 100644 index 0000000..cac6695 --- /dev/null +++ b/test/gtest/uct/ib/test_rc.h @@ -0,0 +1,161 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_RC_TEST_H_ +#define UCT_RC_TEST_H_ + +#include +#include + +extern "C" { +#include +#include +#include +} + + +class test_rc : public uct_test { +public: + virtual void init(); + void connect(); + + uct_rc_iface_t* rc_iface(entity *e) { + return ucs_derived_of(e->iface(), uct_rc_iface_t); + } + + uct_rc_ep_t* rc_ep(entity *e, int ep_idx = 0) { + return ucs_derived_of(e->ep(ep_idx), uct_rc_ep_t); + } + + void send_am_messages(entity *e, int wnd, ucs_status_t expected, + uint8_t am_id = 0, int ep_idx = 0) { + for (int i = 0; i < wnd; i++) { + EXPECT_EQ(expected, send_am_message(e, am_id, ep_idx)); + } + } + + void progress_loop(double delta_ms=10.0) { + uct_test::short_progress_loop(delta_ms); + } + + void test_iface_ops(int cq_len); + + static ucs_status_t am_dummy_handler(void *arg, void *data, size_t length, + unsigned flags) { + return UCS_OK; + } + +protected: + entity *m_e1, *m_e2; + +}; + +class test_rc_flow_control : public test_rc { +public: + typedef struct pending_send_request { + uct_pending_req_t uct; + int cb_count; + int purge_count; + } pending_send_request_t; + + void init(); + void cleanup(); + + virtual uct_rc_fc_t* get_fc_ptr(entity *e, int ep_idx = 0) { + return &rc_ep(e, ep_idx)->fc; + } + + virtual void disable_entity(entity *e) { + rc_iface(e)->tx.cq_available = 0; + } + + virtual void enable_entity(entity *e, unsigned cq_num = 128) { + rc_iface(e)->tx.cq_available = cq_num; + } + + virtual void set_tx_moderation(entity *e, int val) { + rc_iface(e)->config.tx_moderation = val; + } + + void set_fc_attributes(entity *e, bool enabled, int wnd, int s_thresh, int h_thresh) { + rc_iface(e)->config.fc_enabled = enabled; + rc_iface(e)->config.fc_wnd_size = get_fc_ptr(e)->fc_wnd = wnd; + rc_iface(e)->config.fc_soft_thresh = s_thresh; + rc_iface(e)->config.fc_hard_thresh = h_thresh; + + } + + void set_fc_disabled(entity *e) { + /* same as default settings in rc_iface_init */ + set_fc_attributes(e, false, std::numeric_limits::max(), 0, 0); + } + + void send_am_and_flush(entity *e, int num_msg); + + void progress_loop(double delta_ms=10.0) { + uct_test::short_progress_loop(delta_ms); + } + + static ucs_status_t am_dummy_handler(void *arg, void *data, size_t length, void *desc) { + return UCS_OK; + } + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags) + { + ++m_am_rx_count; + return UCS_OK; + } + + static void purge_cb(uct_pending_req_t *self, void *arg) { + pending_send_request_t *req = ucs_container_of(self, + pending_send_request_t, + uct); + ++req->purge_count; + } + + static ucs_status_t pending_cb(uct_pending_req_t *self) { + pending_send_request_t *req = ucs_container_of(self, + pending_send_request_t, + uct); + ++req->cb_count; + return UCS_OK; + } + + void validate_grant(entity *e); + + void test_general(int wnd, int s_thresh, int h_thresh, bool is_fc_enabled); + + void test_pending_grant(int wnd); + + void test_pending_purge(int wnd, int num_pend_sends); + + void test_flush_fc_disabled(); + +protected: + static const uint8_t FLUSH_AM_ID = 1; + static uint32_t m_am_rx_count; +}; + + +#if ENABLE_STATS +class test_rc_flow_control_stats : public test_rc_flow_control { +public: + void init() { + stats_activate(); + test_rc_flow_control::init(); + } + + void cleanup() { + test_rc_flow_control::cleanup(); + stats_restore(); + } + + void test_general(int wnd, int s_thresh, int h_thresh); +}; +#endif + +#endif diff --git a/test/gtest/uct/ib/test_sockaddr.cc b/test/gtest/uct/ib/test_sockaddr.cc new file mode 100644 index 0000000..b7aa07a --- /dev/null +++ b/test/gtest/uct/ib/test_sockaddr.cc @@ -0,0 +1,1080 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#include + +extern "C" { +#include +#include +#include +#include +} + +#include + +class test_uct_sockaddr : public uct_test { +public: + struct completion : public uct_completion_t { + volatile bool m_flag; + + completion() : m_flag(false), m_status(UCS_INPROGRESS) { + count = 1; + func = completion_cb; + } + + ucs_status_t status() const { + return m_status; + } + private: + static void completion_cb(uct_completion_t *self, ucs_status_t status) + { + completion *c = static_cast(self); + c->m_status = status; + c->m_flag = true; + } + + ucs_status_t m_status; + }; + + test_uct_sockaddr() : server(NULL), client(NULL), err_count(0), + server_recv_req(0), delay_conn_reply(false) { + } + + void check_md_usability() { + uct_md_attr_t md_attr; + uct_md_config_t *md_config; + ucs_status_t status; + uct_md_h md; + + status = uct_md_config_read(GetParam()->component, NULL, NULL, &md_config); + EXPECT_TRUE(status == UCS_OK); + + status = uct_md_open(GetParam()->component, GetParam()->md_name.c_str(), + md_config, &md); + EXPECT_TRUE(status == UCS_OK); + uct_config_release(md_config); + + status = uct_md_query(md, &md_attr); + ASSERT_UCS_OK(status); + + uct_md_close(md); + + if (!(md_attr.cap.flags & UCT_MD_FLAG_SOCKADDR)) { + UCS_TEST_SKIP_R(GetParam()->md_name.c_str() + + std::string(" does not support client-server " + "connection establishment via sockaddr " + "without a cm")); + } + } + + void init() { + check_md_usability(); + + uct_iface_params_t server_params, client_params; + uint16_t port; + + uct_test::init(); + + /* This address is accessible, as it was tested at the resource creation */ + m_listen_addr = GetParam()->listen_sock_addr; + m_connect_addr = GetParam()->connect_sock_addr; + + port = ucs::get_port(); + m_listen_addr.set_port(port); + m_connect_addr.set_port(port); + + /* open iface for the server side */ + server_params.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS | + UCT_IFACE_PARAM_FIELD_SOCKADDR; + server_params.open_mode = UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER; + server_params.err_handler = err_handler; + server_params.err_handler_arg = reinterpret_cast(this); + server_params.err_handler_flags = 0; + server_params.mode.sockaddr.listen_sockaddr = m_listen_addr.to_ucs_sock_addr(); + server_params.mode.sockaddr.cb_flags = UCT_CB_FLAG_ASYNC; + server_params.mode.sockaddr.conn_request_cb = conn_request_cb; + server_params.mode.sockaddr.conn_request_arg = reinterpret_cast(this); + + /* if origin port is busy, create_entity will retry with another one */ + server = uct_test::create_entity(server_params); + m_entities.push_back(server); + + check_skip_test(); + + port = ucs::sock_addr_storage(server->iface_params().mode.sockaddr + .listen_sockaddr) + .get_port(); + m_listen_addr.set_port(port); + m_connect_addr.set_port(port); + + /* open iface for the client side */ + client_params.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS; + client_params.open_mode = UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT; + client_params.err_handler = err_handler; + client_params.err_handler_arg = reinterpret_cast(this); + client_params.err_handler_flags = 0; + + client = uct_test::create_entity(client_params); + m_entities.push_back(client); + + /* initiate the client's private data callback argument */ + client->max_conn_priv = server->iface_attr().max_conn_priv; + + UCS_TEST_MESSAGE << "Testing " << m_listen_addr + << " Interface: " << GetParam()->dev_name; + } + + static ssize_t client_iface_priv_data_cb(void *arg, const char *dev_name, + void *priv_data) + { + size_t *max_conn_priv = (size_t*)arg; + size_t priv_data_len; + + priv_data_len = uct_test::entity::priv_data_do_pack(priv_data); + EXPECT_LE(priv_data_len, (*max_conn_priv)); + + return priv_data_len; + } + + static void conn_request_cb(uct_iface_h iface, void *arg, + uct_conn_request_h conn_request, + const void *conn_priv_data, size_t length) + { + test_uct_sockaddr *self = reinterpret_cast(arg); + + EXPECT_EQ(std::string(reinterpret_cast + (uct_test::entity::client_priv_data.c_str())), + std::string(reinterpret_cast(conn_priv_data))); + + EXPECT_EQ(1 + uct_test::entity::client_priv_data.length(), length); + if (self->delay_conn_reply) { + self->delayed_conn_reqs.push(conn_request); + } else { + uct_iface_accept(iface, conn_request); + } + ucs_memory_cpu_store_fence(); + self->server_recv_req++; + } + + static ucs_status_t err_handler(void *arg, uct_ep_h ep, ucs_status_t status) + { + test_uct_sockaddr *self = reinterpret_cast(arg); + ucs_atomic_add32(&self->err_count, 1); + return UCS_OK; + } + +protected: + entity *server, *client; + ucs::sock_addr_storage m_listen_addr, m_connect_addr; + volatile uint32_t err_count; + volatile int server_recv_req; + std::queue delayed_conn_reqs; + bool delay_conn_reply; +}; + +UCS_TEST_P(test_uct_sockaddr, connect_client_to_server) +{ + client->connect(0, *server, 0, m_connect_addr, client_iface_priv_data_cb, + NULL, NULL, &client->max_conn_priv); + + + /* wait for the server to connect */ + while (server_recv_req == 0) { + progress(); + } + ASSERT_TRUE(server_recv_req == 1); + /* since the transport may support a graceful exit in case of an error, + * make sure that the error handling flow wasn't invoked (there were no + * errors) */ + EXPECT_EQ(0ul, err_count); + /* the test may end before the client's ep got connected. + * it should also pass in this case as well - the client's + * ep shouldn't be accessed (for connection reply from the server) after the + * test ends and the client's ep was destroyed */ +} + +UCS_TEST_P(test_uct_sockaddr, connect_client_to_server_with_delay) +{ + delay_conn_reply = true; + client->connect(0, *server, 0, m_connect_addr, client_iface_priv_data_cb, + NULL, NULL, &client->max_conn_priv); + + /* wait for the server to connect */ + while (server_recv_req == 0) { + progress(); + } + ASSERT_EQ(1, server_recv_req); + ucs_memory_cpu_load_fence(); + ASSERT_EQ(1ul, delayed_conn_reqs.size()); + EXPECT_EQ(0ul, err_count); + while (!delayed_conn_reqs.empty()) { + uct_iface_accept(server->iface(), delayed_conn_reqs.front()); + delayed_conn_reqs.pop(); + } + + completion comp; + ucs_status_t status = uct_ep_flush(client->ep(0), 0, &comp); + if (status == UCS_INPROGRESS) { + wait_for_flag(&comp.m_flag); + EXPECT_EQ(UCS_OK, comp.status()); + } else { + EXPECT_EQ(UCS_OK, status); + } + EXPECT_EQ(0ul, err_count); +} + +UCS_TEST_P(test_uct_sockaddr, connect_client_to_server_reject_with_delay) +{ + delay_conn_reply = true; + client->connect(0, *server, 0, m_connect_addr, client_iface_priv_data_cb, + NULL, NULL, &client->max_conn_priv); + + /* wait for the server to connect */ + while (server_recv_req == 0) { + progress(); + } + ASSERT_EQ(1, server_recv_req); + ucs_memory_cpu_load_fence(); + ASSERT_EQ(1ul, delayed_conn_reqs.size()); + EXPECT_EQ(0ul, err_count); + while (!delayed_conn_reqs.empty()) { + uct_iface_reject(server->iface(), delayed_conn_reqs.front()); + delayed_conn_reqs.pop(); + } + while (err_count == 0) { + progress(); + } + EXPECT_EQ(1ul, err_count); +} + +UCS_TEST_P(test_uct_sockaddr, many_clients_to_one_server) +{ + int num_clients = ucs_max(2, 100 / ucs::test_time_multiplier()); + uct_iface_params_t client_params; + entity *client_test; + + /* multiple clients, each on an iface of its own, connecting to the same server */ + for (int i = 0; i < num_clients; ++i) { + /* open iface for the client side */ + client_params.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS; + client_params.open_mode = UCT_IFACE_OPEN_MODE_SOCKADDR_CLIENT; + client_params.err_handler = err_handler; + client_params.err_handler_arg = reinterpret_cast(this); + client_params.err_handler_flags = 0; + + client_test = uct_test::create_entity(client_params); + m_entities.push_back(client_test); + + client_test->max_conn_priv = server->iface_attr().max_conn_priv; + client_test->connect(i, *server, 0, m_connect_addr, + client_iface_priv_data_cb, NULL, NULL, + &client_test->max_conn_priv); + } + + while (server_recv_req < num_clients){ + progress(); + } + ASSERT_TRUE(server_recv_req == num_clients); + EXPECT_EQ(0ul, err_count); +} + +UCS_TEST_P(test_uct_sockaddr, many_conns_on_client) +{ + int num_conns_on_client = ucs_max(2, 100 / ucs::test_time_multiplier()); + + /* multiple clients, on the same iface, connecting to the same server */ + for (int i = 0; i < num_conns_on_client; ++i) { + client->connect(i, *server, 0, m_connect_addr, client_iface_priv_data_cb, + NULL, NULL, &client->max_conn_priv); + } + + while (server_recv_req < num_conns_on_client) { + progress(); + } + ASSERT_TRUE(server_recv_req == num_conns_on_client); + EXPECT_EQ(0ul, err_count); +} + +UCS_TEST_SKIP_COND_P(test_uct_sockaddr, err_handle, + !check_caps(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) +{ + client->connect(0, *server, 0, m_connect_addr, client_iface_priv_data_cb, + NULL, NULL, &client->max_conn_priv); + + scoped_log_handler slh(wrap_errors_logger); + /* kill the server */ + m_entities.remove(server); + + /* If the server didn't receive a connection request from the client yet, + * test error handling */ + if (server_recv_req == 0) { + wait_for_flag(&err_count); + /* Double check for server_recv_req if it's not delivered from NIC to + * host memory under hight load */ + EXPECT_TRUE((err_count == 1) || (server_recv_req == 1)); + } +} + +UCS_TEST_SKIP_COND_P(test_uct_sockaddr, conn_to_non_exist_server, + !check_caps(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) +{ + m_connect_addr.set_port(htons(1)); + err_count = 0; + + /* wrap errors now since the client will try to connect to a non existing port */ + { + scoped_log_handler slh(wrap_errors_logger); + /* client - try to connect to a non-existing port on the server side */ + client->connect(0, *server, 0, m_connect_addr, client_iface_priv_data_cb, + NULL, NULL, &client->max_conn_priv); + completion comp; + ucs_status_t status = uct_ep_flush(client->ep(0), 0, &comp); + if (status == UCS_INPROGRESS) { + wait_for_flag(&comp.m_flag); + EXPECT_EQ(UCS_ERR_UNREACHABLE, comp.status()); + } else { + EXPECT_EQ(UCS_ERR_UNREACHABLE, status); + } + /* destroy the client's ep. this ep shouldn't be accessed anymore */ + client->destroy_ep(0); + } +} + +UCT_INSTANTIATE_SOCKADDR_TEST_CASE(test_uct_sockaddr) + +class test_uct_cm_sockaddr : public uct_test { + friend class uct_test::entity; +protected: + enum { + TEST_CM_STATE_CONNECT_REQUESTED = UCS_BIT(0), + TEST_CM_STATE_CLIENT_CONNECTED = UCS_BIT(1), + TEST_CM_STATE_SERVER_CONNECTED = UCS_BIT(2), + TEST_CM_STATE_CLIENT_DISCONNECTED = UCS_BIT(3), + TEST_CM_STATE_SERVER_DISCONNECTED = UCS_BIT(4), + TEST_CM_STATE_SERVER_REJECTED = UCS_BIT(5), + TEST_CM_STATE_CLIENT_GOT_REJECT = UCS_BIT(6), + TEST_CM_STATE_CLIENT_GOT_ERROR = UCS_BIT(7) + }; + +public: + test_uct_cm_sockaddr() : m_cm_state(0), m_server(NULL), m_client(NULL), + m_server_recv_req_cnt(0), m_client_connect_cb_cnt(0), + m_server_connect_cb_cnt(0), + m_server_disconnect_cnt(0), m_client_disconnect_cnt(0), + m_reject_conn_request(false), + m_server_start_disconnect(false), + m_delay_conn_reply(false) { + } + + void init() { + uct_test::init(); + + /* This address is accessible, as it was tested at the resource creation */ + m_listen_addr = GetParam()->listen_sock_addr; + m_connect_addr = GetParam()->connect_sock_addr; + + uint16_t port = ucs::get_port(); + m_listen_addr.set_port(port); + m_connect_addr.set_port(port); + + m_server = uct_test::create_entity(); + m_entities.push_back(m_server); + m_client = uct_test::create_entity(); + m_entities.push_back(m_client); + + /* initiate the client's private data callback argument */ + m_client->max_conn_priv = m_client->cm_attr().max_conn_priv; + + UCS_TEST_MESSAGE << "Testing " << m_listen_addr + << " Interface: " << GetParam()->dev_name; + } + +protected: + + void skip_tcp_sockcm() { + uct_component_attr_t cmpt_attr = {0}; + ucs_status_t status; + + cmpt_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME; + /* coverity[var_deref_model] */ + status = uct_component_query(GetParam()->component, &cmpt_attr); + ASSERT_UCS_OK(status); + + if (!strcmp(cmpt_attr.name, "tcp")) { + UCS_TEST_SKIP_R("tcp cm is not fully implemented"); + } + } + + void cm_start_listen() { + uct_listener_params_t params; + + params.field_mask = UCT_LISTENER_PARAM_FIELD_CONN_REQUEST_CB | + UCT_LISTENER_PARAM_FIELD_USER_DATA; + params.conn_request_cb = cm_conn_request_cb; + params.user_data = static_cast(this); + /* if origin port set in init() is busy, listen() will retry with another one */ + m_server->listen(m_listen_addr, params); + + /* the listen function may have changed the initial port on the listener's + * address. update this port for the address to connect to */ + m_connect_addr.set_port(m_listen_addr.get_port()); + } + + static ssize_t client_cm_priv_data_cb(void *arg, const char *dev_name, + void *priv_data) + { + test_uct_cm_sockaddr *self = reinterpret_cast(arg); + size_t priv_data_len; + + priv_data_len = uct_test::entity::priv_data_do_pack(priv_data); + EXPECT_LE(priv_data_len, self->m_client->max_conn_priv); + return priv_data_len; + } + + void cm_listen_and_connect() { + skip_tcp_sockcm(); + cm_start_listen(); + m_client->connect(0, *m_server, 0, m_connect_addr, client_cm_priv_data_cb, + client_connect_cb, client_disconnect_cb, this); + + wait_for_bits(&m_cm_state, TEST_CM_STATE_CONNECT_REQUESTED); + EXPECT_TRUE(m_cm_state & TEST_CM_STATE_CONNECT_REQUESTED); + } + + virtual void server_accept(entity *server, uct_conn_request_h conn_request, + uct_ep_server_connect_cb_t connect_cb, + uct_ep_disconnect_cb_t disconnect_cb, + void *user_data) + { + server->accept(server->cm(), conn_request, connect_cb, disconnect_cb, + user_data); + } + + static void + cm_conn_request_cb(uct_listener_h listener, void *arg, + const char *local_dev_name, + uct_conn_request_h conn_request, + const uct_cm_remote_data_t *remote_data) { + test_uct_cm_sockaddr *self = reinterpret_cast(arg); + ucs_status_t status; + + EXPECT_EQ(entity::client_priv_data.length() + 1, remote_data->conn_priv_data_length); + EXPECT_EQ(entity::client_priv_data, + std::string(static_cast(remote_data->conn_priv_data))); + + self->m_cm_state |= TEST_CM_STATE_CONNECT_REQUESTED; + + if (self->m_delay_conn_reply) { + self->m_delayed_conn_reqs.push(conn_request); + } else if (self->m_reject_conn_request) { + status = uct_listener_reject(listener, conn_request); + ASSERT_UCS_OK(status); + self->m_cm_state |= TEST_CM_STATE_SERVER_REJECTED; + } else { + self->server_accept(self->m_server, conn_request, + server_connect_cb, server_disconnect_cb, self); + } + + ucs_memory_cpu_store_fence(); + self->m_server_recv_req_cnt++; + } + + static void + server_connect_cb(uct_ep_h ep, void *arg, ucs_status_t status) { + test_uct_cm_sockaddr *self = reinterpret_cast(arg); + + EXPECT_EQ(UCS_OK, status); + self->m_cm_state |= TEST_CM_STATE_SERVER_CONNECTED; + self->m_server_connect_cb_cnt++; + } + + static void + server_disconnect_cb(uct_ep_h ep, void *arg) { + test_uct_cm_sockaddr *self = reinterpret_cast(arg); + + if (!(self->m_server_start_disconnect)) { + self->m_server->disconnect(ep); + } + self->m_cm_state |= TEST_CM_STATE_SERVER_DISCONNECTED; + self->m_server_disconnect_cnt++; + } + + static void + client_connect_cb(uct_ep_h ep, void *arg, + const uct_cm_remote_data_t *remote_data, + ucs_status_t status) { + test_uct_cm_sockaddr *self = reinterpret_cast(arg); + + if (status == UCS_ERR_REJECTED) { + self->m_cm_state |= TEST_CM_STATE_CLIENT_GOT_REJECT; + } else if (status != UCS_OK) { + self->m_cm_state |= TEST_CM_STATE_CLIENT_GOT_ERROR; + } else { + EXPECT_TRUE(ucs_test_all_flags(remote_data->field_mask, + (UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA_LENGTH | + UCT_CM_REMOTE_DATA_FIELD_CONN_PRIV_DATA))); + EXPECT_EQ(entity::server_priv_data.length() + 1, remote_data->conn_priv_data_length); + EXPECT_EQ(entity::server_priv_data, + std::string(static_cast(remote_data->conn_priv_data))); + self->m_cm_state |= TEST_CM_STATE_CLIENT_CONNECTED; + self->m_client_connect_cb_cnt++; + } + } + + static void + client_disconnect_cb(uct_ep_h ep, void *arg) { + test_uct_cm_sockaddr *self; + + self = reinterpret_cast(arg); + + if (self->m_server_start_disconnect) { + /* if the server was the one who initiated the disconnect flow, + * the client should also disconnect its ep from the server in + * its disconnect cb */ + self->m_client->disconnect(ep); + } + + self->m_cm_state |= TEST_CM_STATE_CLIENT_DISCONNECTED; + self->m_client_disconnect_cnt++; + } + + void cm_disconnect(entity *ent) { + size_t i; + + /* Disconnect all the existing endpoints */ + for (i = 0; i < ent->num_eps(); ++i) { + ent->disconnect(ent->ep(i)); + } + + wait_for_bits(&m_cm_state, TEST_CM_STATE_CLIENT_DISCONNECTED | + TEST_CM_STATE_SERVER_DISCONNECTED); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, (TEST_CM_STATE_SERVER_DISCONNECTED | + TEST_CM_STATE_CLIENT_DISCONNECTED))); + } + + void wait_for_client_server_counters(volatile int *server_cnt, + volatile int *client_cnt, int val, + double timeout = 10 * DEFAULT_TIMEOUT_SEC) { + ucs_time_t deadline; + + deadline = ucs_get_time() + ucs_time_from_sec(timeout) * + ucs::test_time_multiplier(); + + while (((*server_cnt < val) || (*client_cnt < val)) && + (ucs_get_time() < deadline)) { + progress(); + } + } + + void test_delayed_server_response(bool reject) + { + ucs_status_t status; + ucs_time_t deadline; + + m_delay_conn_reply = true; + + cm_listen_and_connect(); + + EXPECT_FALSE(m_cm_state & + (TEST_CM_STATE_SERVER_CONNECTED | TEST_CM_STATE_CLIENT_CONNECTED | + TEST_CM_STATE_CLIENT_GOT_REJECT | TEST_CM_STATE_CLIENT_GOT_ERROR)); + + deadline = ucs_get_time() + ucs_time_from_sec(DEFAULT_TIMEOUT_SEC) * + ucs::test_time_multiplier(); + + while ((m_server_recv_req_cnt == 0) && (ucs_get_time() < deadline)) { + progress(); + } + ASSERT_EQ(1, m_server_recv_req_cnt); + ucs_memory_cpu_load_fence(); + + if (reject) { + /* wrap errors since a reject is expected */ + scoped_log_handler slh(detect_reject_error_logger); + + status = uct_listener_reject(m_server->listener(), + m_delayed_conn_reqs.front()); + ASSERT_UCS_OK(status); + + wait_for_bits(&m_cm_state, TEST_CM_STATE_CLIENT_GOT_REJECT); + EXPECT_TRUE(m_cm_state & TEST_CM_STATE_CLIENT_GOT_REJECT); + } else { + server_accept(m_server, m_delayed_conn_reqs.front(), + server_connect_cb, server_disconnect_cb, this); + + wait_for_bits(&m_cm_state, TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED)); + } + + m_delayed_conn_reqs.pop(); + } + + static ucs_log_func_rc_t + detect_addr_route_error_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + if (level == UCS_LOG_LEVEL_ERROR) { + std::string err_str = format_message(message, ap); + if ((strstr(err_str.c_str(), "client: got error event RDMA_CM_EVENT_ADDR_ERROR")) || + (strstr(err_str.c_str(), "client: got error event RDMA_CM_EVENT_ROUTE_ERROR")) || + (strstr(err_str.c_str(), "rdma_resolve_route(to addr=240.0.0.0"))) { + UCS_TEST_MESSAGE << err_str; + return UCS_LOG_FUNC_RC_STOP; + } + } + return UCS_LOG_FUNC_RC_CONTINUE; + } + + static ucs_log_func_rc_t + detect_reject_error_logger(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + if (level == UCS_LOG_LEVEL_ERROR) { + std::string err_str = format_message(message, ap); + if (strstr(err_str.c_str(), "client: got error event RDMA_CM_EVENT_REJECTED")) { + UCS_TEST_MESSAGE << err_str; + return UCS_LOG_FUNC_RC_STOP; + } + } + return UCS_LOG_FUNC_RC_CONTINUE; + } + + static ucs_log_func_rc_t + detect_double_disconnect_error_logger(const char *file, unsigned line, + const char *function, ucs_log_level_t level, + const char *message, va_list ap) + { + if (level == UCS_LOG_LEVEL_ERROR) { + std::string err_str = format_message(message, ap); + if (err_str.find("duplicate call of uct_ep_disconnect") != + std::string::npos) { + UCS_TEST_MESSAGE << err_str; + return UCS_LOG_FUNC_RC_STOP; + } + } + return UCS_LOG_FUNC_RC_CONTINUE; + } + +protected: + ucs::sock_addr_storage m_listen_addr, m_connect_addr; + uint64_t m_cm_state; + entity *m_server; + entity *m_client; + volatile int m_server_recv_req_cnt, m_client_connect_cb_cnt, + m_server_connect_cb_cnt; + volatile int m_server_disconnect_cnt, m_client_disconnect_cnt; + bool m_reject_conn_request; + bool m_server_start_disconnect; + bool m_delay_conn_reply; + std::queue m_delayed_conn_reqs; +}; + +UCS_TEST_P(test_uct_cm_sockaddr, cm_query) +{ + ucs_status_t status; + size_t i; + + for (i = 0; i < m_entities.size(); ++i) { + uct_cm_attr_t attr; + attr.field_mask = UCT_CM_ATTR_FIELD_MAX_CONN_PRIV; + status = uct_cm_query(m_entities.at(i).cm(), &attr); + ASSERT_UCS_OK(status); + EXPECT_LT(0ul, attr.max_conn_priv); + } +} + +UCS_TEST_P(test_uct_cm_sockaddr, listener_query) +{ + uct_listener_attr_t attr; + ucs_status_t status; + uint16_t port; + char m_listener_ip_port_str[UCS_SOCKADDR_STRING_LEN]; + char attr_addr_ip_port_str[UCS_SOCKADDR_STRING_LEN]; + + cm_start_listen(); + + attr.field_mask = UCT_LISTENER_ATTR_FIELD_SOCKADDR; + status = uct_listener_query(m_server->listener(), &attr); + ASSERT_UCS_OK(status); + + ucs_sockaddr_str(m_listen_addr.get_sock_addr_ptr(), m_listener_ip_port_str, + UCS_SOCKADDR_STRING_LEN); + ucs_sockaddr_str((struct sockaddr*)&attr.sockaddr, attr_addr_ip_port_str, + UCS_SOCKADDR_STRING_LEN); + EXPECT_EQ(strcmp(m_listener_ip_port_str, attr_addr_ip_port_str), 0); + + status = ucs_sockaddr_get_port((struct sockaddr*)&attr.sockaddr, &port); + ASSERT_UCS_OK(status); + + EXPECT_EQ(m_listen_addr.get_port(), port); +} + +UCS_TEST_P(test_uct_cm_sockaddr, cm_open_listen_close) +{ + cm_listen_and_connect(); + + wait_for_bits(&m_cm_state, TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, (TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED))); + + cm_disconnect(m_client); +} + +UCS_TEST_P(test_uct_cm_sockaddr, cm_open_listen_kill_server) +{ + cm_listen_and_connect(); + + wait_for_bits(&m_cm_state, TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, (TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED))); + + EXPECT_EQ(1ul, m_entities.remove(m_server)); + m_server = NULL; + + wait_for_bits(&m_cm_state, TEST_CM_STATE_CLIENT_DISCONNECTED); + EXPECT_TRUE(m_cm_state & TEST_CM_STATE_CLIENT_DISCONNECTED); +} + +UCS_TEST_P(test_uct_cm_sockaddr, cm_server_reject) +{ + m_reject_conn_request = true; + + /* wrap errors since a reject is expected */ + scoped_log_handler slh(detect_reject_error_logger); + cm_listen_and_connect(); + + wait_for_bits(&m_cm_state, TEST_CM_STATE_SERVER_REJECTED | + TEST_CM_STATE_CLIENT_GOT_REJECT); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, (TEST_CM_STATE_SERVER_REJECTED | + TEST_CM_STATE_CLIENT_GOT_REJECT))); + + EXPECT_FALSE((m_cm_state & + (TEST_CM_STATE_SERVER_CONNECTED | TEST_CM_STATE_CLIENT_CONNECTED))); +} + +UCS_TEST_P(test_uct_cm_sockaddr, many_clients_to_one_server) +{ + int num_clients = ucs_max(2, 100 / ucs::test_time_multiplier());; + entity *client_test; + + skip_tcp_sockcm(); + /* Listen */ + cm_start_listen(); + + /* Connect */ + /* multiple clients, each on a cm of its own, connecting to the same server */ + for (int i = 0; i < num_clients; ++i) { + client_test = uct_test::create_entity(); + m_entities.push_back(client_test); + client_test->max_conn_priv = client_test->cm_attr().max_conn_priv; + client_test->connect(0, *m_server, 0, m_connect_addr, client_cm_priv_data_cb, + client_connect_cb, client_disconnect_cb, this); + } + + /* wait for the server to connect to all the clients */ + wait_for_client_server_counters(&m_server_connect_cb_cnt, + &m_client_connect_cb_cnt, num_clients); + + EXPECT_EQ(num_clients, m_server_recv_req_cnt); + EXPECT_EQ(num_clients, m_client_connect_cb_cnt); + EXPECT_EQ(num_clients, m_server_connect_cb_cnt); + EXPECT_EQ(num_clients, (int)m_server->num_eps()); + + /* Disconnect */ + for (int i = 0; i < num_clients; ++i) { + /* first 2 entities are m_server and m_client */ + client_test = &m_entities.at(2 + i); + ASSERT_TRUE(client_test != m_client); + + cm_disconnect(client_test); + } + + /* don't remove the ep, i.e. don't call uct_ep_destroy on the client's ep, + * before the client finished disconnecting so that a disconnect event won't + * arrive on a destroyed endpoint on the client side */ + + wait_for_client_server_counters(&m_server_disconnect_cnt, + &m_client_disconnect_cnt, num_clients); + + EXPECT_EQ(num_clients, m_server_disconnect_cnt); + EXPECT_EQ(num_clients, m_client_disconnect_cnt); + + for (int i = 0; i < num_clients; ++i) { + client_test = m_entities.back(); + m_entities.remove(client_test); + } +} + +UCS_TEST_P(test_uct_cm_sockaddr, many_conns_on_client) +{ + int num_conns_on_client = ucs_max(2, 100 / ucs::test_time_multiplier()); + + m_server_start_disconnect = true; + + skip_tcp_sockcm(); + /* Listen */ + cm_start_listen(); + + /* Connect */ + /* multiple clients, on the same cm, connecting to the same server */ + for (int i = 0; i < num_conns_on_client; ++i) { + m_client->connect(i, *m_server, 0, m_connect_addr, client_cm_priv_data_cb, + client_connect_cb, client_disconnect_cb, this); + } + + /* wait for the server to connect to all the endpoints on the cm */ + wait_for_client_server_counters(&m_server_connect_cb_cnt, + &m_client_connect_cb_cnt, + num_conns_on_client); + + EXPECT_EQ(num_conns_on_client, m_server_recv_req_cnt); + EXPECT_EQ(num_conns_on_client, m_client_connect_cb_cnt); + EXPECT_EQ(num_conns_on_client, m_server_connect_cb_cnt); + EXPECT_EQ(num_conns_on_client, (int)m_client->num_eps()); + EXPECT_EQ(num_conns_on_client, (int)m_server->num_eps()); + + /* Disconnect */ + cm_disconnect(m_server); + + /* wait for disconnect to complete */ + wait_for_client_server_counters(&m_server_disconnect_cnt, + &m_client_disconnect_cnt, + num_conns_on_client); + + EXPECT_EQ(num_conns_on_client, m_server_disconnect_cnt); + EXPECT_EQ(num_conns_on_client, m_client_disconnect_cnt); +} + +UCS_TEST_P(test_uct_cm_sockaddr, err_handle) +{ + skip_tcp_sockcm(); + + /* wrap errors since a reject is expected */ + scoped_log_handler slh(detect_reject_error_logger); + + /* client - try to connect to a server that isn't listening */ + m_client->connect(0, *m_server, 0, m_connect_addr, client_cm_priv_data_cb, + client_connect_cb, client_disconnect_cb, this); + + EXPECT_FALSE(m_cm_state & TEST_CM_STATE_CONNECT_REQUESTED); + + /* with the TCP port space (which is currently tested with rdmacm), + * in this case, a REJECT event will be generated on the client side */ + wait_for_bits(&m_cm_state, TEST_CM_STATE_CLIENT_GOT_REJECT); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, TEST_CM_STATE_CLIENT_GOT_REJECT)); +} + +UCS_TEST_P(test_uct_cm_sockaddr, conn_to_non_exist_server_port) +{ + skip_tcp_sockcm(); + /* Listen */ + cm_start_listen(); + + m_connect_addr.set_port(htons(1)); + + /* wrap errors since a reject is expected */ + scoped_log_handler slh(detect_reject_error_logger); + + /* client - try to connect to a non-existing port on the server side. */ + m_client->connect(0, *m_server, 0, m_connect_addr, client_cm_priv_data_cb, + client_connect_cb, client_disconnect_cb, this); + + /* with the TCP port space (which is currently tested with rdmacm), + * in this case, a REJECT event will be generated on the client side */ + wait_for_bits(&m_cm_state, TEST_CM_STATE_CLIENT_GOT_REJECT); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, TEST_CM_STATE_CLIENT_GOT_REJECT)); +} + +UCS_TEST_P(test_uct_cm_sockaddr, conn_to_non_exist_ip) +{ + struct sockaddr_in addr; + ucs_status_t status; + size_t size; + + skip_tcp_sockcm(); + /* Listen */ + cm_start_listen(); + + /* 240.0.0.0/4 - This block, formerly known as the Class E address + space, is reserved for future use; see [RFC1112], Section 4. + therefore, this value can be used as a non-existing IP for this test */ + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = inet_addr("240.0.0.0"); + addr.sin_port = m_listen_addr.get_port(); + + status = ucs_sockaddr_sizeof((struct sockaddr*)&addr, &size); + ASSERT_UCS_OK(status); + + m_connect_addr.set_sock_addr(*(struct sockaddr*)&addr, size); + + /* wrap errors now since the client will try to connect to a non existing IP */ + { + scoped_log_handler slh(detect_addr_route_error_logger); + /* client - try to connect to a non-existing IP */ + m_client->connect(0, *m_server, 0, m_connect_addr, client_cm_priv_data_cb, + client_connect_cb, client_disconnect_cb, this); + + wait_for_bits(&m_cm_state, TEST_CM_STATE_CLIENT_GOT_ERROR); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, TEST_CM_STATE_CLIENT_GOT_ERROR)); + + EXPECT_FALSE(m_cm_state & TEST_CM_STATE_CONNECT_REQUESTED); + EXPECT_FALSE(m_cm_state & + (TEST_CM_STATE_SERVER_CONNECTED | TEST_CM_STATE_CLIENT_CONNECTED)); + } +} + +UCS_TEST_P(test_uct_cm_sockaddr, connect_client_to_server_with_delay) +{ + test_delayed_server_response(false); + + cm_disconnect(m_client); +} + +UCS_TEST_P(test_uct_cm_sockaddr, connect_client_to_server_reject_with_delay) +{ + test_delayed_server_response(true); +} + +UCS_TEST_P(test_uct_cm_sockaddr, ep_disconnect_err_codes) +{ + bool disconnecting = false; + + cm_listen_and_connect(); + + { + entity::scoped_async_lock lock(*m_client); + if (m_cm_state & TEST_CM_STATE_CLIENT_CONNECTED) { + UCS_TEST_MESSAGE << "EXP: " << ucs_status_string(UCS_OK); + EXPECT_EQ(UCS_OK, uct_ep_disconnect(m_client->ep(0), 0)); + disconnecting = true; + } else { + UCS_TEST_MESSAGE << "EXP: " << ucs_status_string(UCS_ERR_BUSY); + EXPECT_EQ(UCS_ERR_BUSY, uct_ep_disconnect(m_client->ep(0), 0)); + } + } + + wait_for_bits(&m_cm_state, TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, (TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED))); + + { + entity::scoped_async_lock lock(*m_client); + if (disconnecting) { + scoped_log_handler slh(detect_double_disconnect_error_logger); + if (m_cm_state & TEST_CM_STATE_CLIENT_DISCONNECTED) { + UCS_TEST_MESSAGE << "EXP: " + << ucs_status_string(UCS_ERR_NOT_CONNECTED); + EXPECT_EQ(UCS_ERR_NOT_CONNECTED, + uct_ep_disconnect(m_client->ep(0), 0)); + } else { + UCS_TEST_MESSAGE << "EXP: " + << ucs_status_string(UCS_INPROGRESS); + EXPECT_EQ(UCS_INPROGRESS, + uct_ep_disconnect(m_client->ep(0), 0)); + } + } else { + UCS_TEST_MESSAGE << "EXP: " << ucs_status_string(UCS_OK); + ASSERT_UCS_OK(uct_ep_disconnect(m_client->ep(0), 0)); + disconnecting = true; + } + } + + ASSERT_TRUE(disconnecting); + wait_for_bits(&m_cm_state, TEST_CM_STATE_CLIENT_DISCONNECTED); + EXPECT_TRUE(m_cm_state & TEST_CM_STATE_CLIENT_DISCONNECTED); + + /* wrap errors since the client will call uct_ep_disconnect the second time + * on the same endpoint. this ep may not be disconnected yet */ + { + scoped_log_handler slh(detect_double_disconnect_error_logger); + UCS_TEST_MESSAGE << "EXP: " << ucs_status_string(UCS_ERR_NOT_CONNECTED); + EXPECT_EQ(UCS_ERR_NOT_CONNECTED, uct_ep_disconnect(m_client->ep(0), 0)); + } +} + +UCT_INSTANTIATE_SOCKADDR_TEST_CASE(test_uct_cm_sockaddr) + + +class test_uct_cm_sockaddr_multiple_cms : public test_uct_cm_sockaddr { +public: + void init() { + ucs_status_t status; + + test_uct_cm_sockaddr::init(); + + status = ucs_async_context_create(UCS_ASYNC_MODE_THREAD_SPINLOCK, + &m_test_async); + ASSERT_UCS_OK(status); + + status = uct_cm_config_read(GetParam()->component, NULL, NULL, &m_test_config); + ASSERT_UCS_OK(status); + + UCS_TEST_CREATE_HANDLE(uct_worker_h, m_test_worker, uct_worker_destroy, + uct_worker_create, m_test_async, + UCS_THREAD_MODE_SINGLE) + + UCS_TEST_CREATE_HANDLE(uct_cm_h, m_test_cm, uct_cm_close, + uct_cm_open, GetParam()->component, + m_test_worker, m_test_config); + } + + void cleanup() { + m_test_cm.reset(); + uct_config_release(m_test_config); + m_test_worker.reset(); + ucs_async_context_destroy(m_test_async); + test_uct_cm_sockaddr::cleanup(); + } + + void server_accept(entity *server, uct_conn_request_h conn_request, + uct_ep_server_connect_cb_t connect_cb, + uct_ep_disconnect_cb_t disconnect_cb, + void *user_data) + { + server->accept(m_test_cm, conn_request, connect_cb, disconnect_cb, + user_data); + } + +protected: + ucs::handle m_test_worker; + ucs::handle m_test_cm; + ucs_async_context_t *m_test_async; + uct_cm_config_t *m_test_config; +}; + +UCS_TEST_P(test_uct_cm_sockaddr_multiple_cms, server_switch_cm) +{ + cm_listen_and_connect(); + + wait_for_bits(&m_cm_state, TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED); + EXPECT_TRUE(ucs_test_all_flags(m_cm_state, (TEST_CM_STATE_SERVER_CONNECTED | + TEST_CM_STATE_CLIENT_CONNECTED))); + + cm_disconnect(m_client); + + /* destroy the server's ep here so that it would be destroyed before the cm + * it is using */ + m_server->destroy_ep(0); +} + +UCT_INSTANTIATE_SOCKADDR_TEST_CASE(test_uct_cm_sockaddr_multiple_cms) diff --git a/test/gtest/uct/ib/test_ud.cc b/test/gtest/uct/ib/test_ud.cc new file mode 100644 index 0000000..7100db5 --- /dev/null +++ b/test/gtest/uct/ib/test_ud.cc @@ -0,0 +1,940 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ud_base.h" + +#include + +extern "C" { +#include +#include +#include +#include +} + + +class test_ud : public ud_base_test { +public: + + static ucs_status_t clear_ack_req(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + neth->packet_type &= ~UCT_UD_PACKET_FLAG_ACK_REQ; + return UCS_OK; + } + + static ucs_status_t drop_ctl(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + if (neth->packet_type & UCT_UD_PACKET_FLAG_CTL) { + return UCS_ERR_BUSY; + } + return UCS_OK; + } + + static int rx_ack_count; + static int tx_ackreq_psn; + + static ucs_status_t count_rx_acks(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + if (UCT_UD_PSN_COMPARE(neth->ack_psn, >, ep->tx.acked_psn)) { + rx_ack_count++; + } + return UCS_OK; + } + + static ucs_status_t save_tx_ackreqs(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + if (neth->packet_type & UCT_UD_PACKET_FLAG_ACK_REQ) { + tx_ackreq_psn = neth->psn; + } + return UCS_OK; + } + + static int rx_drop_count; + + static ucs_status_t drop_rx(uct_ud_ep_t *ep, uct_ud_neth_t *neth) { + rx_drop_count++; + if (neth->packet_type & UCT_UD_PACKET_FLAG_ACK_REQ) { + tx_ack_psn = neth->psn; + ack_req_tx_cnt++; + ucs_debug("RX: psn %u ack_req", neth->psn); + } + return UCS_ERR_BUSY; + } + + static int ack_req_tx_cnt; + + static uct_ud_psn_t tx_ack_psn; + + static ucs_status_t ack_req_count_tx(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + if (neth->packet_type & UCT_UD_PACKET_FLAG_ACK_REQ) { + tx_ack_psn = neth->psn; + ack_req_tx_cnt++; + } + return UCS_OK; + } + + static int tx_count; + + static ucs_status_t count_tx(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + tx_count++; + return UCS_OK; + } + + static ucs_status_t invalidate_creq_tx(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + if ((neth->packet_type & UCT_UD_PACKET_FLAG_CTL) && + (uct_ud_neth_get_dest_id(neth) == UCT_UD_EP_NULL_ID)) { + uct_ud_neth_set_dest_id(neth, 0xbeef); + } + return UCS_OK; + } + + static ucs_status_t drop_ack(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + if (!(neth->packet_type & (UCT_UD_PACKET_FLAG_CTL|UCT_UD_PACKET_FLAG_AM))) { + return UCS_ERR_BUSY; + } + return UCS_OK; + } + + static ucs_status_t drop_creq(uct_ud_iface_t *iface, uct_ud_neth_t *neth) + { + if ((neth->packet_type & UCT_UD_PACKET_FLAG_CTL) && + ((uct_ud_ctl_hdr_t *)(neth + 1))->type == UCT_UD_PACKET_CREQ) + { + return UCS_ERR_BUSY; + } + + return UCS_OK; + } + + void connect_to_iface(unsigned index = 0) + { + m_e1->connect_to_iface(index, *m_e2); + m_e2->connect_to_iface(index, *m_e1); + } + + void validate_connect(uct_ud_ep_t *ep, unsigned value, + double timeout_sec=TEST_UD_TIMEOUT_IN_SEC) { + ucs_time_t timeout = ucs_get_time() + ucs_time_from_sec(timeout_sec); + while ((ep->dest_ep_id != value) && (ucs_get_time() < timeout)) { + progress(); + } + EXPECT_EQ(value, ep->dest_ep_id); + EXPECT_EQ(value, ep->conn_id); + EXPECT_EQ(value, ep->ep_id); + } + + unsigned no_creq_cnt(uct_ud_ep_t *ep) { + return (ep->flags & UCT_UD_EP_FLAG_CREQ_NOTSENT) ? 1 : 0; + } + + void validate_send(uct_ud_ep_t *ep, unsigned value) { + EXPECT_GE(ep->tx.acked_psn, value - no_creq_cnt(ep)); + } + + void validate_recv(uct_ud_ep_t *ep, unsigned value, + double timeout_sec=TEST_UD_TIMEOUT_IN_SEC) { + ucs_time_t timeout = ucs_get_time() + ucs_time_from_sec(timeout_sec); + while ((ucs_frag_list_sn(&ep->rx.ooo_pkts) < value - no_creq_cnt(ep)) && + (ucs_get_time() < timeout)) { + progress(); + } + EXPECT_EQ(value - no_creq_cnt(ep), ucs_frag_list_sn(&ep->rx.ooo_pkts)); + } + + void validate_flush() { + /* 1 packets transmitted, 1 packets received */ + EXPECT_EQ(2, ep(m_e1)->tx.psn); + EXPECT_EQ(1, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); + + /* no data transmitted back */ + EXPECT_EQ(1, ep(m_e2)->tx.psn); + + /* one packet was acked */ + EXPECT_EQ(0U, ucs_queue_length(&ep(m_e1)->tx.window)); + EXPECT_EQ(1, ep(m_e1)->tx.acked_psn); + EXPECT_EQ(1, ep(m_e2)->rx.acked_psn); + } + + void check_connection() { + /* make sure that connection is good */ + EXPECT_UCS_OK(tx(m_e1)); + EXPECT_UCS_OK(tx(m_e1)); + flush(); + EXPECT_EQ(4, ep(m_e1, 0)->tx.psn); + EXPECT_EQ(3, ep(m_e1)->tx.acked_psn); + } +}; + +int test_ud::ack_req_tx_cnt = 0; +int test_ud::rx_ack_count = 0; +int test_ud::tx_ackreq_psn = 0; +int test_ud::rx_drop_count = 0; +int test_ud::tx_count = 0; + +uct_ud_psn_t test_ud::tx_ack_psn = 0; + +UCS_TEST_SKIP_COND_P(test_ud, basic_tx, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + unsigned i, N = 13; + + disable_async(m_e1); + disable_async(m_e2); + connect(); + set_tx_win(m_e1, 1024); + for (i = 0; i < N; i++) { + EXPECT_UCS_OK(tx(m_e1)); + } + short_progress_loop(); + + /* N packets transmitted, N packets received */ + EXPECT_EQ(N+1, ep(m_e1)->tx.psn); + validate_recv(ep(m_e2), N); + + /* no data transmitted back */ + EXPECT_EQ(1, ep(m_e2)->tx.psn); + + /* nothing was acked */ + EXPECT_EQ(N, ucs_queue_length(&ep(m_e1)->tx.window)); + EXPECT_EQ(0, ep(m_e1)->tx.acked_psn); + EXPECT_EQ(0, ep(m_e2)->rx.acked_psn); +} + +UCS_TEST_SKIP_COND_P(test_ud, duplex_tx, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + unsigned i, N = 5; + + disable_async(m_e1); + disable_async(m_e2); + connect(); + set_tx_win(m_e1, 1024); + set_tx_win(m_e2, 1024); + for (i = 0; i < N; i++) { + EXPECT_UCS_OK(tx(m_e1)); + short_progress_loop(); + EXPECT_UCS_OK(tx(m_e2)); + short_progress_loop(); + } + + /* N packets transmitted, N packets received */ + EXPECT_EQ(N+1, ep(m_e1)->tx.psn); + validate_recv(ep(m_e2), N); + + EXPECT_EQ(N+1, ep(m_e2)->tx.psn); + validate_recv(ep(m_e1), N); + + /* everything but last packet from e2 is acked */ + EXPECT_EQ(N, ep(m_e1)->tx.acked_psn); + EXPECT_EQ(N-1, ep(m_e2)->tx.acked_psn); + EXPECT_EQ(N-1, ep(m_e1)->rx.acked_psn); + EXPECT_EQ(N, ep(m_e2)->rx.acked_psn); + EXPECT_EQ(1U, ucs_queue_length(&ep(m_e2)->tx.window)); + EXPECT_TRUE(ucs_queue_is_empty(&ep(m_e1)->tx.window)); +} + +/* send full window, rcv ack after progreess, send some more */ +UCS_TEST_SKIP_COND_P(test_ud, tx_window1, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + unsigned i, N = 13; + + disable_async(m_e1); + disable_async(m_e2); + connect(); + set_tx_win(m_e1, N+1); + for (i = 0; i < N; i++) { + EXPECT_UCS_OK(tx(m_e1)); + } + EXPECT_EQ(UCS_ERR_NO_RESOURCE, tx(m_e1)); + + /* wait for ack */ + ucs_time_t timeout = ucs_get_time() + ucs_time_from_sec(TEST_UD_TIMEOUT_IN_SEC); + while ((ucs_get_time() < timeout) && + uct_ud_ep_no_window(ep(m_e1))) { + short_progress_loop(); + } + EXPECT_UCS_OK(tx(m_e1)); + EXPECT_UCS_OK(tx(m_e1)); + EXPECT_UCS_OK(tx(m_e1)); +} + +/* basic flush */ +/* send packet, flush, wait till flush ended */ + +UCS_TEST_SKIP_COND_P(test_ud, flush_ep, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + connect(); + EXPECT_UCS_OK(tx(m_e1)); + EXPECT_UCS_OK(ep_flush_b(m_e1)); + + validate_flush(); +} + +UCS_TEST_SKIP_COND_P(test_ud, flush_iface, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + connect(); + EXPECT_UCS_OK(tx(m_e1)); + EXPECT_UCS_OK(iface_flush_b(m_e1)); + + validate_flush(); +} + +#if UCT_UD_EP_DEBUG_HOOKS + +/* disable ack req, + * send full window, + * should not be able to send some more + */ +UCS_TEST_SKIP_COND_P(test_ud, tx_window2, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + unsigned i, N = 13; + + disable_async(m_e1); + disable_async(m_e2); + connect(); + set_tx_win(m_e1, N+1); + ep(m_e1)->tx.tx_hook = clear_ack_req; + + for (i = 0; i < N; i++) { + EXPECT_UCS_OK(tx(m_e1)); + } + EXPECT_EQ(UCS_ERR_NO_RESOURCE, tx(m_e1)); + short_progress_loop(); + EXPECT_EQ(UCS_ERR_NO_RESOURCE, tx(m_e1)); + EXPECT_EQ(UCS_ERR_NO_RESOURCE, tx(m_e1)); + EXPECT_EQ(UCS_ERR_NO_RESOURCE, tx(m_e1)); + EXPECT_EQ(N, ucs_queue_length(&ep(m_e1)->tx.window)); +} + + +/* last packet in window must have ack_req + * answered with ack control message + */ +UCS_TEST_SKIP_COND_P(test_ud, ack_req_single, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + connect(); + disable_async(m_e1); + disable_async(m_e2); + set_tx_win(m_e1, 2); + ack_req_tx_cnt = 0; + tx_ack_psn = 0; + rx_ack_count = 0; + ep(m_e1)->tx.tx_hook = ack_req_count_tx; + ep(m_e1)->rx.rx_hook = count_rx_acks; + ep(m_e2)->rx.rx_hook = ack_req_count_tx; + + EXPECT_UCS_OK(tx(m_e1)); + EXPECT_EQ(1, ack_req_tx_cnt); + EXPECT_EQ(1, tx_ack_psn); + + wait_for_flag(&rx_ack_count); + EXPECT_EQ(2, ack_req_tx_cnt); + EXPECT_EQ(1, tx_ack_psn); + EXPECT_TRUE(ucs_queue_is_empty(&ep(m_e1)->tx.window)); +} + +/* test that ack request is sent on 1/4 of window */ +UCS_TEST_SKIP_COND_P(test_ud, ack_req_window, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + unsigned i, N = 16; + + disable_async(m_e1); + disable_async(m_e2); + connect(); + set_tx_win(m_e1, N); + ack_req_tx_cnt = 0; + tx_ack_psn = 0; + rx_ack_count = 0; + ep(m_e1)->tx.tx_hook = ack_req_count_tx; + ep(m_e1)->rx.rx_hook = count_rx_acks; + ep(m_e2)->rx.rx_hook = ack_req_count_tx; + + for (i = 0; i < N/4; i++) { + EXPECT_UCS_OK(tx(m_e1)); + } + EXPECT_EQ(1, ack_req_tx_cnt); + EXPECT_EQ(N/4, tx_ack_psn); + + wait_for_flag(&rx_ack_count); + EXPECT_EQ(2, ack_req_tx_cnt); + EXPECT_EQ(N/4, tx_ack_psn); + EXPECT_TRUE(ucs_queue_is_empty(&ep(m_e1)->tx.window)); +} + +/* simulate retransmission of the CREQ packet */ +UCS_TEST_SKIP_COND_P(test_ud, crep_drop1, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + m_e1->connect_to_iface(0, *m_e2); + /* setup filter to drop crep */ + ep(m_e1, 0)->rx.rx_hook = drop_ctl; + short_progress_loop(50); + /* remove filter. Go to sleep. CREQ will be retransmitted */ + ep(m_e1, 0)->rx.rx_hook = uct_ud_ep_null_hook; + twait(500); + + /* CREQ resend and connection shall be fully functional */ + validate_connect(ep(m_e1), 0U); + + EXPECT_EQ(2, ep(m_e1, 0)->tx.psn); + EXPECT_EQ(1, ucs_frag_list_sn(&ep(m_e1, 0)->rx.ooo_pkts)); + + check_connection(); +} + +/* check that creq is not left on tx window if + * both sides connect simultaniously. + */ +UCS_TEST_SKIP_COND_P(test_ud, crep_drop2, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + connect_to_iface(); + + ep(m_e1)->rx.rx_hook = drop_ctl; + ep(m_e2)->rx.rx_hook = drop_ctl; + + short_progress_loop(100); + + /* Remove filter for CREP to be handled and TX win to be freed. */ + ep(m_e1)->rx.rx_hook = uct_ud_ep_null_hook; + ep(m_e2)->rx.rx_hook = uct_ud_ep_null_hook; + + validate_connect(ep(m_e1), 0U); + validate_connect(ep(m_e2), 0U); + + /* Expect that creq (and maybe crep already) are sent */ + validate_send(ep(m_e1), 1); + validate_send(ep(m_e2), 1); + EXPECT_GE(ep(m_e1)->tx.psn, 2); + EXPECT_GE(ep(m_e2)->tx.psn, 2); + + /* Wait for TX win to be empty (which means that all + * CONN packets are handled) */ + ucs_time_t timeout = ucs_get_time() + ucs_time_from_sec(TEST_UD_TIMEOUT_IN_SEC); + while (ucs_get_time() < timeout) { + if(ucs_queue_is_empty(&ep(m_e1)->tx.window) && + ucs_queue_is_empty(&ep(m_e2)->tx.window)) { + break; + } + short_progress_loop(); + } + EXPECT_TRUE(ucs_queue_is_empty(&ep(m_e1)->tx.window)); + EXPECT_TRUE(ucs_queue_is_empty(&ep(m_e2)->tx.window)); +} + +UCS_TEST_P(test_ud, crep_ack_drop) { + ucs_status_t status; + + connect_to_iface(); + + /* drop ACK from CERQ/CREP */ + ep(m_e1, 0)->rx.rx_hook = drop_ack; + ep(m_e2, 0)->rx.rx_hook = drop_ack; + + short_progress_loop(); + + status = uct_iface_set_am_handler(m_e2->iface(), 0, + (uct_am_callback_t)ucs_empty_function_return_success, + NULL, UCT_CB_FLAG_ASYNC); + ASSERT_UCS_OK(status); + + /* allow sending the active message, in case the congestion window is + * already reduced to minimum (=2) by the slow timer, since CREP ACK + * was not received. + */ + set_tx_win(m_e1, 10); + + do { + status = send_am_message(m_e1); + progress(); + } while (status == UCS_ERR_NO_RESOURCE); + ASSERT_UCS_OK(status); + + validate_recv(ep(m_e2), 3u - no_creq_cnt(ep(m_e1))); + + ep(m_e1, 0)->rx.rx_hook = uct_ud_ep_null_hook; + ep(m_e2, 0)->rx.rx_hook = uct_ud_ep_null_hook; + + /* Should receive both CREP and the active message */ + + short_progress_loop(); + twait(500); + short_progress_loop(); + + status = send_am_message(m_e1); + ASSERT_UCS_OK(status); + + short_progress_loop(); + + m_e1->flush(); + m_e2->flush(); +} + +UCS_TEST_P(test_ud, creq_flush) { + ucs_status_t status; + + m_e1->connect_to_iface(0, *m_e2); + /* Setup filter to drop all packets. We have to drop CREP + * and ACK_REQ packets. */ + ep(m_e1, 0)->rx.rx_hook = drop_rx; + short_progress_loop(); + /* do flush while ep is being connected it must return in progress */ + status = uct_iface_flush(m_e1->iface(), 0, NULL); + EXPECT_EQ(UCS_INPROGRESS, status); +} + +UCS_TEST_SKIP_COND_P(test_ud, ca_ai, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + ucs_status_t status; + int prev_cwnd; + int max_window; + + /* check initial window */ + disable_async(m_e1); + disable_async(m_e2); + /* only test up to 'small' window when on valgrind + * valgrind drops rx packets when window is too big and resends are disabled in this test + */ + max_window = RUNNING_ON_VALGRIND ? 128 : UCT_UD_CA_MAX_WINDOW; + connect(); + EXPECT_EQ(UCT_UD_CA_MIN_WINDOW, ep(m_e1)->ca.cwnd); + EXPECT_EQ(UCT_UD_CA_MIN_WINDOW, ep(m_e2)->ca.cwnd); + + ep(m_e1, 0)->rx.rx_hook = count_rx_acks; + ep(m_e1, 0)->tx.tx_hook = save_tx_ackreqs; + prev_cwnd = ep(m_e1)->ca.cwnd; + rx_ack_count = 0; + + /* window increase upto max window should + * happen when we receive acks */ + while (ep(m_e1)->ca.cwnd < max_window) { + status = tx(m_e1); + if (status != UCS_OK) { + + /* progress until getting all acks for our requests */ + do { + progress(); + } while (UCT_UD_PSN_COMPARE(ep(m_e1)->tx.acked_psn, <, tx_ackreq_psn)); + + /* it is possible to get no acks if tx queue is full. + * But no more than 2 acks per window. + * One at 1/4 and one at the end + * + * every new ack should cause window increase + */ + EXPECT_LE(rx_ack_count, 2); + EXPECT_EQ(rx_ack_count, + UCT_UD_CA_AI_VALUE * (ep(m_e1)->ca.cwnd - prev_cwnd)); + prev_cwnd = ep(m_e1)->ca.cwnd; + rx_ack_count = 0; + } + } +} + +/* skip valgrind for now */ +UCS_TEST_SKIP_COND_P(test_ud, ca_md, + (RUNNING_ON_VALGRIND || + !check_caps(UCT_IFACE_FLAG_AM_SHORT)), + "IB_TX_QUEUE_LEN=" UCS_PP_MAKE_STRING(UCT_UD_CA_MAX_WINDOW)) { + + ucs_status_t status; + int prev_cwnd, new_cwnd; + int i; + + connect(); + + validate_connect(ep(m_e1), 0U); + + /* assume we are at the max window + * on receive drop all packets. After several retransmission + * attempts the window will be reduced to the minimum + */ + set_tx_win(m_e1, UCT_UD_CA_MAX_WINDOW); + ep(m_e2, 0)->rx.rx_hook = drop_rx; + for (i = 1; i < UCT_UD_CA_MAX_WINDOW; i++) { + status = tx(m_e1); + if (status == UCS_ERR_NO_RESOURCE) { + // the congestion window can shrink by async timer if ACKs are + // not received fast enough + EXPECT_GT(i, 1); /* at least one packet should be sent */ + break; + } + EXPECT_UCS_OK(status); + progress(); + } + short_progress_loop(); + + ep(m_e1)->tx.tx_hook = count_tx; + do { + prev_cwnd = ep(m_e1, 0)->ca.cwnd; + tx_count = 0; + do { + progress(); + } while (ep(m_e1, 0)->ca.cwnd > (prev_cwnd / UCT_UD_CA_MD_FACTOR)); + short_progress_loop(); + + new_cwnd = ep(m_e1, 0)->ca.cwnd; + EXPECT_GE(tx_count, new_cwnd - 1); + if (new_cwnd > UCT_UD_CA_MIN_WINDOW) { + /* up to 3 additional ack_reqs per each resend */ + EXPECT_LE(tx_count, (prev_cwnd - new_cwnd) + + (int)(3 * ucs_ilog2(prev_cwnd/new_cwnd))); + } + + } while (ep(m_e1, 0)->ca.cwnd > UCT_UD_CA_MIN_WINDOW); +} + +UCS_TEST_SKIP_COND_P(test_ud, ca_resend, + (RUNNING_ON_VALGRIND || + !check_caps(UCT_IFACE_FLAG_AM_SHORT))) { + + int max_window = 10; + int i; + ucs_status_t status; + + connect(); + set_tx_win(m_e1, max_window); + + ep(m_e2, 0)->rx.rx_hook = drop_rx; + for (i = 1; i < max_window; i++) { + status = tx(m_e1); + EXPECT_UCS_OK(status); + } + short_progress_loop(); + rx_drop_count = 0; + ack_req_tx_cnt = 0; + do { + progress(); + } while(ep(m_e1)->ca.cwnd > max_window/2); + /* expect that: + * 4 packets will be retransmitted + * first packet will have ack_req, + * there will 2 ack_reqs + * in addition there may be up to two + * standalone ack_reqs + */ + disable_async(m_e1); + disable_async(m_e2); + short_progress_loop(100); + EXPECT_LE(0, rx_drop_count); + EXPECT_GE(4+2, rx_drop_count); + EXPECT_LE(0, ack_req_tx_cnt); + EXPECT_GE(2+2, ack_req_tx_cnt); +} + +UCS_TEST_P(test_ud, connect_iface_single_drop_creq) { + /* single connect */ + iface(m_e2)->rx.hook = drop_creq; + + connect_to_iface(); + short_progress_loop(50); + + iface(m_e2)->rx.hook = uct_ud_iface_null_hook; + + validate_connect(ep(m_e2), 0U); +} +#endif + +UCS_TEST_SKIP_COND_P(test_ud, connect_iface_single, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + /* single connect */ + m_e1->connect_to_iface(0, *m_e2); + short_progress_loop(TEST_UD_PROGRESS_TIMEOUT); + validate_connect(ep(m_e1), 0U); + + EXPECT_EQ(2, ep(m_e1, 0)->tx.psn); + EXPECT_EQ(1, ep(m_e1, 0)->tx.acked_psn); + EXPECT_EQ(1, ucs_frag_list_sn(&ep(m_e1, 0)->rx.ooo_pkts)); + + check_connection(); +} + +UCS_TEST_P(test_ud, connect_iface_2to1) { + /* 2 to 1 connect */ + m_e1->connect_to_iface(0, *m_e2); + m_e1->connect_to_iface(1, *m_e2); + + validate_connect(ep(m_e1), 0U); + EXPECT_EQ(2, ep(m_e1,0)->tx.psn); + EXPECT_EQ(1, ucs_frag_list_sn(&ep(m_e1, 0)->rx.ooo_pkts)); + + validate_connect(ep(m_e1, 1), 1U); + EXPECT_EQ(2, ep(m_e1,1)->tx.psn); + EXPECT_EQ(1, ucs_frag_list_sn(&ep(m_e1, 1)->rx.ooo_pkts)); +} + +UCS_TEST_SKIP_COND_P(test_ud, connect_iface_seq, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + /* sequential connect from both sides */ + m_e1->connect_to_iface(0, *m_e2); + validate_connect(ep(m_e1), 0U); + EXPECT_EQ(2, ep(m_e1)->tx.psn); + /* one becase of crep */ + EXPECT_EQ(1, ucs_frag_list_sn(&ep(m_e1)->rx.ooo_pkts)); + + /* now side two connects. existing ep will be reused */ + m_e2->connect_to_iface(0, *m_e1); + validate_connect(ep(m_e2), 0U); + EXPECT_EQ(2, ep(m_e2)->tx.psn); + /* one becase creq sets initial psn */ + EXPECT_EQ(1, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); + + check_connection(); +} + +UCS_TEST_P(test_ud, connect_iface_sim) { + /* simultanious connect from both sides */ + connect_to_iface(); + + validate_connect(ep(m_e1), 0U); + validate_connect(ep(m_e2), 0U); + + /* psns are not checked because it really depends on scheduling */ +} + +UCS_TEST_P(test_ud, connect_iface_sim2v2) { + /* simultanious connect from both sides */ + connect_to_iface(0); + connect_to_iface(1); + + validate_connect(ep(m_e1), 0U); + validate_connect(ep(m_e2), 0U); + validate_connect(ep(m_e1, 1), 1U); + validate_connect(ep(m_e2, 1), 1U); + /* psns are not checked because it really depends on scheduling */ +} + +/* + * check that: + * - connect is not blocking when we run out of iface resources + * - flush() will also progress pending CREQs + */ +UCS_TEST_P(test_ud, connect_iface_2k) { + + unsigned i; + unsigned cids[2000]; + unsigned count = 2000 / ucs::test_time_multiplier(); + + /* create 2k connections */ + for (i = 0; i < count; i++) { + m_e1->connect_to_iface(i, *m_e2); + cids[i] = UCT_UD_EP_NULL_ID; + } + + flush(); + + for (i = 0; i < count; i++) { + ASSERT_EQ(cids[i], (unsigned)UCT_UD_EP_NULL_ID); + cids[i] = ep(m_e1,i)->dest_ep_id; + ASSERT_NE((unsigned)UCT_UD_EP_NULL_ID, ep(m_e1,i)->dest_ep_id); + EXPECT_EQ(i, ep(m_e1,i)->conn_id); + EXPECT_EQ(i, ep(m_e1,i)->ep_id); + } +} + +UCS_TEST_P(test_ud, ep_destroy_simple) { + uct_ep_h ep; + ucs_status_t status; + uct_ud_ep_t *ud_ep1, *ud_ep2; + uct_ep_params_t ep_params; + + ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + ep_params.iface = m_e1->iface(); + + status = uct_ep_create(&ep_params, &ep); + EXPECT_UCS_OK(status); + ud_ep1 = ucs_derived_of(ep, uct_ud_ep_t); + uct_ep_destroy(ep); + + ep_params.iface = m_e1->iface(); + status = uct_ep_create(&ep_params, &ep); + EXPECT_UCS_OK(status); + /* coverity[use_after_free] */ + ud_ep2 = ucs_derived_of(ep, uct_ud_ep_t); + uct_ep_destroy(ep); + + EXPECT_EQ(0U, ud_ep1->ep_id); + EXPECT_EQ(1U, ud_ep2->ep_id); +} + +UCS_TEST_SKIP_COND_P(test_ud, ep_destroy_flush, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + uct_ep_h ep; + ucs_status_t status; + uct_ud_ep_t *ud_ep1; + uct_ep_params_t ep_params; + + connect(); + EXPECT_UCS_OK(tx(m_e1)); + short_progress_loop(); + uct_ep_destroy(m_e1->ep(0)); + /* ep destroy should try to flush outstanding packets */ + short_progress_loop(); + validate_flush(); + + /* next created ep must not reuse old id */ + ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + ep_params.iface = m_e1->iface(); + status = uct_ep_create(&ep_params, &ep); + EXPECT_UCS_OK(status); + ud_ep1 = ucs_derived_of(ep, uct_ud_ep_t); + EXPECT_EQ(1U, ud_ep1->ep_id); + uct_ep_destroy(ep); +} + +UCS_TEST_SKIP_COND_P(test_ud, ep_destroy_passive, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + connect(); + uct_ep_destroy(m_e2->ep(0)); + /* destroyed ep must still accept data */ + EXPECT_UCS_OK(tx(m_e1)); + EXPECT_UCS_OK(ep_flush_b(m_e1)); + + validate_flush(); +} + +UCS_TEST_P(test_ud, ep_destroy_creq) { + uct_ep_h ep; + ucs_status_t status; + uct_ud_ep_t *ud_ep; + uct_ep_params ep_params; + + /* single connect */ + m_e1->connect_to_iface(0, *m_e2); + short_progress_loop(TEST_UD_PROGRESS_TIMEOUT); + + uct_ep_destroy(m_e1->ep(0)); + + /* check that ep id are not reused on both sides */ + ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + ep_params.iface = m_e1->iface(); + status = uct_ep_create(&ep_params, &ep); + EXPECT_UCS_OK(status); + ud_ep = ucs_derived_of(ep, uct_ud_ep_t); + uct_ep_destroy(ep); + EXPECT_EQ(1U, ud_ep->ep_id); + + ep_params.iface = m_e2->iface(); + status = uct_ep_create(&ep_params, &ep); + EXPECT_UCS_OK(status); + /* coverity[use_after_free] */ + ud_ep = ucs_derived_of(ep, uct_ud_ep_t); + uct_ep_destroy(ep); + EXPECT_EQ(1U, ud_ep->ep_id); +} + +/* check that the amount of reserved skbs is not less than + * iface tx queue len + */ +UCS_TEST_P(test_ud, res_skb_basic) { + uct_ud_send_skb_t *skb; + uct_ud_iface_t *ud_if; + int i, tx_qlen; + + connect(); + + ud_if = iface(m_e1); + tx_qlen = ud_if->tx.available; + + uct_ud_send_skb_t *used_skbs[tx_qlen]; + + for (i = 0; i < tx_qlen; i++) { + skb = uct_ud_iface_resend_skb_get(ud_if); + ASSERT_TRUE(skb); + used_skbs[i] = skb; + } + + for (i = 0; i < tx_qlen; i++) { + uct_ud_iface_resend_skb_put(ud_if, used_skbs[i]); + } +} + +/* test that reserved skb is not being reused while it is still in flight + */ +UCS_TEST_SKIP_COND_P(test_ud, res_skb_tx, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + uct_ud_iface_t *ud_if; + int poll_sn; + uct_ud_send_skb_t *skb; + int n, tx_count; + + disable_async(m_e1); + disable_async(m_e2); + connect(); + EXPECT_UCS_OK(tx(m_e1)); + short_progress_loop(); + + ud_if = iface(m_e1); + n = tx_count = 0; + poll_sn = 1; + while(n < 100) { + while(uct_ud_iface_can_tx(ud_if)) { + uct_ud_put_hdr_t *put_hdr; + uct_ud_neth_t *neth; + + skb = uct_ud_iface_resend_skb_get(ud_if); + ASSERT_TRUE(skb); + VALGRIND_MAKE_MEM_DEFINED(skb, sizeof *skb); + ASSERT_LT(skb->flags, poll_sn); + skb->flags = poll_sn; + + /* simulate put */ + neth = skb->neth; + uct_ud_neth_init_data(ep(m_e1), neth); + uct_ud_neth_set_type_put(ep(m_e1), neth); + uct_ud_neth_ack_req(ep(m_e1), neth); + + put_hdr = (uct_ud_put_hdr_t *)(neth+1); + put_hdr->rva = (uint64_t)&m_dummy; + memcpy(put_hdr+1, &m_dummy, sizeof(m_dummy)); + skb->len = sizeof(*neth) + sizeof(*put_hdr) + sizeof(m_dummy); + + ucs_derived_of(ud_if->super.ops, uct_ud_iface_ops_t)->tx_skb(ep(m_e1), + skb, 0); + uct_ud_iface_resend_skb_put(ud_if, skb); + tx_count++; + } + short_progress_loop(1); + poll_sn++; + n++; + } +} + +#if UCT_UD_EP_DEBUG_HOOKS +/* Simulate loss of ctl packets during simultaneous CREQs. + * Use-case: CREQ and CREP packets from m_e2 to m_e1 are lost. + * Check: that both eps (m_e1 and m_e2) are connected finally */ +UCS_TEST_SKIP_COND_P(test_ud, ctls_loss, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + iface(m_e2)->tx.available = 0; + + connect_to_iface(); + + /* Simulate loss of CREQ to m_e1 */ + ep(m_e2)->tx.tx_hook = invalidate_creq_tx; + iface(m_e2)->tx.available = 128; + iface(m_e1)->tx.available = 128; + + /* Simulate loss of CREP to m_e1 */ + ep(m_e1)->rx.rx_hook = drop_ctl; + short_progress_loop(300); + + /* m_e2 ep should be in connected state now, as it received CREQ which is + * counter to its own CREQ. So, send a packet to m_e1 (which is not in + * connected state yet) */ + ep(m_e2)->tx.tx_hook = uct_ud_ep_null_hook; + set_tx_win(m_e2, 128); + EXPECT_UCS_OK(tx(m_e2)); + short_progress_loop(); + ep(m_e1)->rx.rx_hook = uct_ud_ep_null_hook; + twait(500); + + validate_connect(ep(m_e1), 0U); + validate_connect(ep(m_e2), 0U); +} +#endif + +UCT_INSTANTIATE_UD_TEST_CASE(test_ud) diff --git a/test/gtest/uct/ib/test_ud_ds.cc b/test/gtest/uct/ib/test_ud_ds.cc new file mode 100644 index 0000000..35a0350 --- /dev/null +++ b/test/gtest/uct/ib/test_ud_ds.cc @@ -0,0 +1,157 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ud_base.h" + +#include + +extern "C" { +#include +#include +#include +#include +#include +} + + +/* test ud connect data structures */ +class test_ud_ds : public uct_test { +public: + virtual void init() { + uct_test::init(); + + m_e1 = create_entity(0); + m_entities.push_back(m_e1); + + m_e2 = create_entity(0); + m_entities.push_back(m_e2); + + uct_iface_get_address(m_e1->iface(), (uct_iface_addr_t*)(void *)&if_adr1); + uct_iface_get_address(m_e2->iface(), (uct_iface_addr_t*)(void *)&if_adr2); + + ib_adr1 = (uct_ib_address_t*)malloc(ucs_derived_of(m_e1->iface(), uct_ib_iface_t)->addr_size); + ib_adr2 = (uct_ib_address_t*)malloc(ucs_derived_of(m_e2->iface(), uct_ib_iface_t)->addr_size); + + uct_iface_get_device_address(m_e1->iface(), (uct_device_addr_t*)ib_adr1); + uct_iface_get_device_address(m_e2->iface(), (uct_device_addr_t*)ib_adr2); + } + + uct_ud_iface_t *iface(entity *e) { + return ucs_derived_of(e->iface(), uct_ud_iface_t); + } + + uct_ud_ep_t *ep(entity *e, int i) { + return ucs_derived_of(e->ep(i), uct_ud_ep_t); + } + + void cleanup() { + free(ib_adr2); + free(ib_adr1); + uct_test::cleanup(); + } + + void test_cep_insert(entity *e, uct_ib_address_t *ib_addr, uct_ud_iface_addr_t *if_addr, unsigned base); + +protected: + entity *m_e1, *m_e2; + uct_ib_address_t *ib_adr1, *ib_adr2; + uct_ud_iface_addr_t if_adr1, if_adr2; + static unsigned N; +}; + +unsigned test_ud_ds::N = 1000; + +UCS_TEST_P(test_ud_ds, if_addr) { + union ibv_gid gid1, gid2; + uint16_t lid1, lid2; + uct_ib_address_unpack(ib_adr1, &lid1, &gid1); + uct_ib_address_unpack(ib_adr2, &lid2, &gid2); + EXPECT_EQ(lid1, lid2); + EXPECT_EQ(gid1.global.subnet_prefix, gid2.global.subnet_prefix); + EXPECT_EQ(gid1.global.interface_id, gid2.global.interface_id); + EXPECT_NE(uct_ib_unpack_uint24(if_adr1.qp_num), + uct_ib_unpack_uint24(if_adr2.qp_num)); +} + +void test_ud_ds::test_cep_insert(entity *e, uct_ib_address_t *ib_addr, + uct_ud_iface_addr_t *if_addr, unsigned base) +{ + unsigned i; + uct_ud_ep_t *my_ep; + + for (i = 0; i < N; i++) { + e->create_ep(i + base); + EXPECT_EQ(i+base, ep(e, i + base)->ep_id); + EXPECT_EQ((unsigned)UCT_UD_EP_NULL_ID, ep(e, i + base)->dest_ep_id); + EXPECT_UCS_OK(uct_ud_iface_cep_insert(iface(e), ib_addr, if_addr, ep(e, i + base), UCT_UD_EP_CONN_ID_MAX)); + EXPECT_EQ(i, ep(e, i + base)->conn_id); + } + /* lookup non existing ep */ + my_ep = uct_ud_iface_cep_lookup(iface(e), ib_addr, if_addr, 3333); + EXPECT_TRUE(my_ep == NULL); + for (i = 0; i < N; i++) { + my_ep = uct_ud_iface_cep_lookup(iface(e), ib_addr, if_addr, i); + EXPECT_TRUE(my_ep != NULL); + EXPECT_EQ(i+base, ep(e, i + base)->ep_id); + EXPECT_EQ(i, ep(e, i + base)->conn_id); + } +} + +/* simulate creq send */ +UCS_TEST_P(test_ud_ds, cep_insert) { + test_cep_insert(m_e1, ib_adr1, &if_adr1, 0); + test_cep_insert(m_e1, ib_adr2, &if_adr2, N); +} + +UCS_TEST_P(test_ud_ds, cep_rollback) { + + m_e1->create_ep(0); + EXPECT_EQ(0U, ep(m_e1, 0)->ep_id); + EXPECT_EQ((unsigned)UCT_UD_EP_NULL_ID, ep(m_e1, 0)->dest_ep_id); + EXPECT_UCS_OK(uct_ud_iface_cep_insert(iface(m_e1), ib_adr1, &if_adr1, ep(m_e1, 0), UCT_UD_EP_CONN_ID_MAX)); + EXPECT_EQ(0U, ep(m_e1, 0)->conn_id); + + uct_ud_iface_cep_rollback(iface(m_e1), ib_adr1, &if_adr1, ep(m_e1, 0)); + + EXPECT_UCS_OK(uct_ud_iface_cep_insert(iface(m_e1), ib_adr1, &if_adr1, ep(m_e1, 0), UCT_UD_EP_CONN_ID_MAX)); + EXPECT_EQ(0U, ep(m_e1, 0)->conn_id); +} + +UCS_TEST_P(test_ud_ds, cep_replace) { + + uct_ud_ep_t *my_ep; + + /* add N connections */ + test_cep_insert(m_e1, ib_adr1, &if_adr1, 0); + + /* Assume that we have 5 connections pending and 3 CREQs received */ + m_e1->create_ep(N); + EXPECT_UCS_OK(uct_ud_iface_cep_insert(iface(m_e1), ib_adr1, &if_adr1, ep(m_e1, N), N+1)); + EXPECT_EQ(N+1, ep(m_e1, N)->conn_id); + + m_e1->create_ep(N+1); + EXPECT_UCS_OK(uct_ud_iface_cep_insert(iface(m_e1), ib_adr1, &if_adr1, ep(m_e1, N+1), N+4)); + EXPECT_EQ(N+4, ep(m_e1, N+1)->conn_id); + + m_e1->create_ep(N+2); + EXPECT_UCS_OK(uct_ud_iface_cep_insert(iface(m_e1), ib_adr1, &if_adr1, ep(m_e1, N+2), N+5)); + EXPECT_EQ(N+5, ep(m_e1, N+2)->conn_id); + + /* we initiate 2 connections */ + my_ep = uct_ud_iface_cep_lookup(iface(m_e1), ib_adr1, &if_adr1, UCT_UD_EP_CONN_ID_MAX); + EXPECT_TRUE(my_ep == NULL); + m_e1->create_ep(N+3); + /* slot N must be free. conn_id will be N+1 when inserting ep with no id */ + EXPECT_UCS_OK(uct_ud_iface_cep_insert(iface(m_e1), ib_adr1, &if_adr1, ep(m_e1, N+3), UCT_UD_EP_CONN_ID_MAX)); + EXPECT_EQ(N, ep(m_e1, N+3)->conn_id); + + /* slot N+1 already occupied */ + my_ep = uct_ud_iface_cep_lookup(iface(m_e1), ib_adr1, &if_adr1, UCT_UD_EP_CONN_ID_MAX); + EXPECT_TRUE(my_ep != NULL); + EXPECT_EQ(N+1, my_ep->conn_id); +} + +UCT_INSTANTIATE_UD_TEST_CASE(test_ud_ds) diff --git a/test/gtest/uct/ib/test_ud_pending.cc b/test/gtest/uct/ib/test_ud_pending.cc new file mode 100644 index 0000000..3b444dc --- /dev/null +++ b/test/gtest/uct/ib/test_ud_pending.cc @@ -0,0 +1,227 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ud_base.h" + +#include + +extern "C" { +#include +#include +#include +#include +} + + +class test_ud_pending : public ud_base_test { +public: + uct_pending_req_t m_r[64]; + + void dispatch_req(uct_pending_req_t *r) { + EXPECT_UCS_OK(tx(m_e1)); + } + + void post_pending_reqs(void) + { + int i; + + req_count = 0; + me = this; + m_e1->connect_to_iface(0, *m_e2); + set_tx_win(m_e1, UCT_UD_CA_MAX_WINDOW); + /* ep is not connected yet */ + EXPECT_EQ(UCS_ERR_NO_RESOURCE, tx(m_e1)); + + /* queuee some work */ + for(i = 0; i < N; i++) { + m_r[i].func = pending_cb_dispatch; + EXPECT_EQ(UCS_OK, uct_ep_pending_add(m_e1->ep(0), &m_r[i], 0)); + } + } + + void check_pending_reqs(bool wait) + { + /* wait for all work to be complete */ + ucs_time_t start_time = ucs_get_time(); + while (wait && (req_count < N) && + (ucs_get_time() < start_time + ucs_time_from_sec(10.0))) + { + progress(); + } + EXPECT_EQ(N, req_count); + uct_ep_pending_purge(m_e1->ep(0), purge_cb, NULL); + } + + static const int N; + static const int W; + static int req_count; + static test_ud_pending *me; + + static ucs_status_t pending_cb_dispatch(uct_pending_req_t *r) + { + req_count++; + me->dispatch_req(r); + return UCS_OK; + } + + static ucs_status_t pending_cb(uct_pending_req_t *r) + { + req_count++; + return UCS_OK; + } + + static void purge_cb(uct_pending_req_t *r, void *arg) + { + req_count++; + } + + static ucs_status_t pending_cb_busy(uct_pending_req_t *r) + { + return UCS_ERR_BUSY; + } + +}; + +const int test_ud_pending::N = 13; +const int test_ud_pending::W = 6; +int test_ud_pending::req_count = 0; +test_ud_pending *test_ud_pending::me = 0; + +/* add/purge requests */ +UCS_TEST_SKIP_COND_P(test_ud_pending, async_progress, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + uct_pending_req_t r[N]; + int i; + + req_count = 0; + connect(); + + set_tx_win(m_e1, 2); + EXPECT_UCS_OK(tx(m_e1)); + + for(i = 0; i < N; i++) { + EXPECT_EQ(UCS_OK, uct_ep_pending_add(m_e1->ep(0), &r[i], 0)); + } + twait(300); + /* requests must not be dispatched from async progress */ + EXPECT_EQ(0, req_count); + uct_ep_pending_purge(m_e1->ep(0), purge_cb, NULL); + EXPECT_EQ(N, req_count); +} + +UCS_TEST_SKIP_COND_P(test_ud_pending, sync_progress, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + uct_pending_req_t r[N]; + int i; + + req_count = 0; + connect(); + + set_tx_win(m_e1, 2); + EXPECT_UCS_OK(tx(m_e1)); + + for(i = 0; i < N; i++) { + r[i].func = pending_cb; + EXPECT_EQ(UCS_OK, uct_ep_pending_add(m_e1->ep(0), &r[i], 0)); + } + wait_for_value(&req_count, N, true); + /* requests must be dispatched from progress */ + EXPECT_EQ(N, req_count); + uct_ep_pending_purge(m_e1->ep(0), purge_cb, NULL); + EXPECT_EQ(N, req_count); +} + +UCS_TEST_SKIP_COND_P(test_ud_pending, err_busy, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + uct_pending_req_t r[N]; + int i; + + req_count = 0; + connect(); + + set_tx_win(m_e1, 2); + EXPECT_UCS_OK(tx(m_e1)); + + for(i = 0; i < N; i++) { + r[i].func = pending_cb_busy; + EXPECT_EQ(UCS_OK, uct_ep_pending_add(m_e1->ep(0), &r[i], 0)); + } + short_progress_loop(); + /* requests will not be dispatched from progress */ + EXPECT_EQ(0, req_count); + uct_ep_pending_purge(m_e1->ep(0), purge_cb, NULL); + EXPECT_EQ(N, req_count); +} + +UCS_TEST_SKIP_COND_P(test_ud_pending, connect, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) +{ + disable_async(m_e1); + disable_async(m_e2); + post_pending_reqs(); + check_pending_reqs(true); +} + +UCS_TEST_SKIP_COND_P(test_ud_pending, flush, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) +{ + disable_async(m_e1); + disable_async(m_e2); + post_pending_reqs(); + flush(); + check_pending_reqs(false); +} + +UCS_TEST_SKIP_COND_P(test_ud_pending, window, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) +{ + int i; + uct_pending_req_t r; + + req_count = 0; + me = this; + connect(); + set_tx_win(m_e1, W+1); + for (i = 0; i < W; i ++) { + EXPECT_UCS_OK(tx(m_e1)); + } + EXPECT_EQ(UCS_ERR_NO_RESOURCE, tx(m_e1)); + r.func = pending_cb_dispatch; + EXPECT_EQ(UCS_OK, uct_ep_pending_add(m_e1->ep(0), &r, 0)); + wait_for_value(&req_count, 1, true); + EXPECT_EQ(1, req_count); + uct_ep_pending_purge(m_e1->ep(0), purge_cb, NULL); +} + +UCS_TEST_SKIP_COND_P(test_ud_pending, tx_wqe, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) +{ + int i; + uct_pending_req_t r; + ucs_status_t status; + + req_count = 0; + me = this; + disable_async(m_e1); + disable_async(m_e2); + connect(); + /* set big window */ + set_tx_win(m_e1, 8192); + i = 0; + do { + status = tx(m_e1); + i++; + } while (status == UCS_OK); + + r.func = pending_cb_dispatch; + EXPECT_EQ(UCS_OK, uct_ep_pending_add(m_e1->ep(0), &r, 0)); + wait_for_value(&req_count, 1, true); + EXPECT_EQ(1, req_count); + short_progress_loop(); + uct_ep_pending_purge(m_e1->ep(0), purge_cb, NULL); +} + +UCT_INSTANTIATE_UD_TEST_CASE(test_ud_pending) diff --git a/test/gtest/uct/ib/test_ud_slow_timer.cc b/test/gtest/uct/ib/test_ud_slow_timer.cc new file mode 100644 index 0000000..634f8d6 --- /dev/null +++ b/test/gtest/uct/ib/test_ud_slow_timer.cc @@ -0,0 +1,235 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "ud_base.h" + +#include + +extern "C" { +#include +#include +#include +#include +#include +} + + +class test_ud_slow_timer : public ud_base_test { +public: + /* ack while doing retransmit */ + static int packet_count, rx_limit; + static ucs_status_t rx_npackets(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + if (packet_count++ < rx_limit) { + return UCS_OK; + } + else { + return UCS_ERR_INVALID_PARAM; + } + } + /* test slow timer and restransmit */ + static int tick_count; + + static ucs_status_t tick_counter(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + uct_ud_iface_t *iface = ucs_derived_of(ep->super.super.iface, + uct_ud_iface_t); + + /* hack to disable retransmit */ + ep->tx.send_time = ucs_twheel_get_time(&iface->async.slow_timer); + tick_count++; + return UCS_OK; + } + + static ucs_status_t drop_packet(uct_ud_ep_t *ep, uct_ud_neth_t *neth) + { + return UCS_ERR_INVALID_PARAM; + } + + void wait_for_rx_sn(unsigned sn) + { + ucs_time_t deadline = ucs_get_time() + + ucs_time_from_sec(10) * ucs::test_time_multiplier(); + while ((ucs_get_time() < deadline) && (ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts) < sn)) { + usleep(1000); + } + } + + void wait_for_ep_destroyed(uct_ud_iface_t *iface, uint32_t ep_idx) + { + ucs_time_t deadline = ucs_get_time() + + ucs_time_from_sec(60) * ucs::test_time_multiplier(); + void *ud_ep_tmp GTEST_ATTRIBUTE_UNUSED_; + + while ((ucs_get_time() < deadline) && + ucs_ptr_array_lookup(&iface->eps, ep_idx, ud_ep_tmp)) { + usleep(1000); + } + } +}; + +int test_ud_slow_timer::rx_limit = 10; +int test_ud_slow_timer::packet_count = 0; +int test_ud_slow_timer::tick_count = 0; + + +/* single packet received without progress */ +UCS_TEST_SKIP_COND_P(test_ud_slow_timer, tx1, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + connect(); + EXPECT_UCS_OK(tx(m_e1)); + wait_for_rx_sn(1); + EXPECT_EQ(2, ep(m_e1)->tx.psn); + EXPECT_EQ(1, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); +} + +/* multiple packets received without progress */ +UCS_TEST_SKIP_COND_P(test_ud_slow_timer, txn, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + unsigned i, N = 42; + + connect(); + set_tx_win(m_e1, 1024); + for (i = 0; i < N; i++) { + EXPECT_UCS_OK(tx(m_e1)); + } + wait_for_rx_sn(N); + EXPECT_EQ(N+1, ep(m_e1)->tx.psn); + EXPECT_EQ(N, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); +} + +UCS_TEST_P(test_ud_slow_timer, ep_destroy, "UD_TIMEOUT=1s") { + void *ud_ep_tmp GTEST_ATTRIBUTE_UNUSED_; + connect(); + + uct_ud_ep_t *ud_ep = ep(m_e1); + uct_ud_iface_t *iface = ucs_derived_of(ud_ep->super.super.iface, + uct_ud_iface_t); + uint32_t ep_idx = ud_ep->ep_id; + EXPECT_TRUE(ucs_ptr_array_lookup(&iface->eps, ep_idx, ud_ep_tmp)); + + m_e1->destroy_eps(); + wait_for_ep_destroyed(iface, ep_idx); + EXPECT_FALSE(ucs_ptr_array_lookup(&iface->eps, ep_idx, ud_ep_tmp)); +} + +UCS_TEST_P(test_ud_slow_timer, backoff_config) { + /* check minimum allowed value */ + ASSERT_UCS_OK(uct_config_modify(m_iface_config, + "UD_SLOW_TIMER_BACKOFF", + ucs::to_string(UCT_UD_MIN_TIMER_TIMER_BACKOFF).c_str())); + entity *e = uct_test::create_entity(0); + m_entities.push_back(e); + + { + /* iface creation should fail with back off value less than + * UCT_UD_MIN_TIMER_TIMER_BACKOFF */ + ASSERT_UCS_OK(uct_config_modify(m_iface_config, + "UD_SLOW_TIMER_BACKOFF", + ucs::to_string(UCT_UD_MIN_TIMER_TIMER_BACKOFF - 0.1).c_str())); + scoped_log_handler wrap_err(wrap_errors_logger); + uct_iface_h iface; + ucs_status_t status = uct_iface_open(e->md(), e->worker(), + &e->iface_params(), + m_iface_config, &iface); + EXPECT_EQ(UCS_ERR_INVALID_PARAM, status); + EXPECT_EQ(NULL, iface); + } +} + +#if UCT_UD_EP_DEBUG_HOOKS +/* no traffic - no ticks */ +UCS_TEST_P(test_ud_slow_timer, tick1) { + connect(); + tick_count = 0; + ep(m_e1)->timer_hook = tick_counter; + twait(500); + EXPECT_EQ(0, tick_count); +} + +/* ticks while tx window is not empty */ +UCS_TEST_SKIP_COND_P(test_ud_slow_timer, tick2, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + connect(); + tick_count = 0; + ep(m_e1)->timer_hook = tick_counter; + EXPECT_UCS_OK(tx(m_e1)); + twait(500); + EXPECT_LT(0, tick_count); +} + +/* retransmit one packet */ + +UCS_TEST_SKIP_COND_P(test_ud_slow_timer, retransmit1, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + connect(); + ep(m_e2)->rx.rx_hook = drop_packet; + EXPECT_UCS_OK(tx(m_e1)); + short_progress_loop(); + EXPECT_EQ(0, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); + ep(m_e2)->rx.rx_hook = uct_ud_ep_null_hook; + EXPECT_EQ(2, ep(m_e1)->tx.psn); + wait_for_rx_sn(1); + EXPECT_EQ(2, ep(m_e1)->tx.psn); + EXPECT_EQ(1, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); +} + +/* retransmit many packets */ +UCS_TEST_SKIP_COND_P(test_ud_slow_timer, retransmitn, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + + unsigned i, N = 42; + + connect(); + set_tx_win(m_e1, 1024); + ep(m_e2)->rx.rx_hook = drop_packet; + for (i = 0; i < N; i++) { + EXPECT_UCS_OK(tx(m_e1)); + } + short_progress_loop(); + EXPECT_EQ(0, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); + ep(m_e2)->rx.rx_hook = uct_ud_ep_null_hook; + EXPECT_EQ(N+1, ep(m_e1)->tx.psn); + wait_for_rx_sn(N); + EXPECT_EQ(N+1, ep(m_e1)->tx.psn); + EXPECT_EQ(N, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); +} + + +UCS_TEST_SKIP_COND_P(test_ud_slow_timer, partial_drop, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + + unsigned i, N = 24; + int orig_avail; + + connect(); + set_tx_win(m_e1, 1024); + packet_count = 0; + rx_limit = 10; + ep(m_e2)->rx.rx_hook = rx_npackets; + for (i = 0; i < N; i++) { + EXPECT_UCS_OK(tx(m_e1)); + } + short_progress_loop(); + EXPECT_EQ(rx_limit, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); + ep(m_e2)->rx.rx_hook = uct_ud_ep_null_hook; + EXPECT_EQ(N+1, ep(m_e1)->tx.psn); + orig_avail = iface(m_e1)->tx.available; + /* allow only 6 outgoing packets. It will allow to get ack + * from receiver + */ + iface(m_e1)->tx.available = 6; + twait(500); + iface(m_e1)->tx.available = orig_avail-6; + short_progress_loop(); + + EXPECT_EQ(N+1, ep(m_e1)->tx.psn); + wait_for_rx_sn(N); + EXPECT_EQ(N, ucs_frag_list_sn(&ep(m_e2)->rx.ooo_pkts)); +} +#endif + +UCT_INSTANTIATE_UD_TEST_CASE(test_ud_slow_timer) diff --git a/test/gtest/uct/ib/ud_base.cc b/test/gtest/uct/ib/ud_base.cc new file mode 100644 index 0000000..6a3c754 --- /dev/null +++ b/test/gtest/uct/ib/ud_base.cc @@ -0,0 +1,93 @@ +#include "ud_base.h" + + + +void ud_base_test::init() +{ + uct_test::init(); + + m_e1 = uct_test::create_entity(0); + m_entities.push_back(m_e1); + + check_skip_test(); + + m_e2 = uct_test::create_entity(0); + m_entities.push_back(m_e2); +} + +uct_ud_ep_t *ud_base_test::ep(entity *e) +{ + return ucs_derived_of(e->ep(0), uct_ud_ep_t); +} + +uct_ud_ep_t *ud_base_test::ep(entity *e, int i) +{ + return ucs_derived_of(e->ep(i), uct_ud_ep_t); +} + +uct_ud_iface_t *ud_base_test::iface(entity *e) +{ + return ucs_derived_of(e->iface(), uct_ud_iface_t); +} + +void ud_base_test::short_progress_loop(double delta_ms) const +{ + uct_test::short_progress_loop(delta_ms); +} + +void ud_base_test::connect() +{ + m_e1->connect(0, *m_e2, 0); + m_e2->connect(0, *m_e1, 0); +} + +void ud_base_test::cleanup() +{ + uct_test::cleanup(); +} + +ucs_status_t ud_base_test::tx(entity *e) +{ + ucs_status_t err; + err = uct_ep_put_short(e->ep(0), &m_dummy, sizeof(m_dummy), (uint64_t)&m_dummy, 0); + return err; +} + +ucs_status_t ud_base_test::ep_flush_b(entity *e) +{ + ucs_status_t status; + + do { + short_progress_loop(); + status = uct_ep_flush(e->ep(0), 0, NULL); + } while (status == UCS_INPROGRESS || status == UCS_ERR_NO_RESOURCE); + + return status; +} + +ucs_status_t ud_base_test::iface_flush_b(entity *e) +{ + ucs_status_t status; + + do { + short_progress_loop(); + status = uct_iface_flush(e->iface(), 0, NULL); + } while (status == UCS_INPROGRESS || status == UCS_ERR_NO_RESOURCE); + + return status; +} + + +void ud_base_test::set_tx_win(entity *e, uct_ud_psn_t size) +{ + /* force window */ + ep(e)->tx.max_psn = ep(e)->tx.acked_psn + size; + ep(e)->ca.cwnd = size; +} + +void ud_base_test::disable_async(entity *e) +{ + ucs_async_remove_handler(iface(e)->async.timer_id, 1); +} + + diff --git a/test/gtest/uct/ib/ud_base.h b/test/gtest/uct/ib/ud_base.h new file mode 100644 index 0000000..a6e00a2 --- /dev/null +++ b/test/gtest/uct/ib/ud_base.h @@ -0,0 +1,60 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ +#ifndef _UD_BASE_TEST +#define _UD_BASE_TEST + +#include + +#include +#include +extern "C" { +#include +#include +} + + +#define TEST_UD_PROGRESS_TIMEOUT 300.0 +#define TEST_UD_TIMEOUT_IN_SEC 10.0 + + +class ud_base_test : public uct_test { +public: + virtual void init(); + + uct_ud_ep_t *ep(entity *e); + + uct_ud_ep_t *ep(entity *e, int i); + + uct_ud_iface_t *iface(entity *e); + + void connect(); + + void cleanup(); + + ucs_status_t tx(entity *e); + + ucs_status_t ep_flush_b(entity *e); + + ucs_status_t iface_flush_b(entity *e); + + void set_tx_win(entity *e, uct_ud_psn_t size); + + void disable_async(entity *e); + + virtual void short_progress_loop(double delta_ms=10.0) const; + +protected: + entity *m_e1, *m_e2; + uint64_t m_dummy; +}; + + +#define UCT_INSTANTIATE_UD_TEST_CASE(_test_case) \ + _UCT_INSTANTIATE_TEST_CASE(_test_case, ud_verbs) \ + _UCT_INSTANTIATE_TEST_CASE(_test_case, ud_mlx5) + + +#endif diff --git a/test/gtest/uct/tcp/test_tcp.cc b/test/gtest/uct/tcp/test_tcp.cc new file mode 100644 index 0000000..c7db746 --- /dev/null +++ b/test/gtest/uct/tcp/test_tcp.cc @@ -0,0 +1,258 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED. + * See file LICENSE for terms. + */ + +#include +#include + +extern "C" { +#include +#include +} + +class test_uct_tcp : public uct_test { +public: + void init() { + if (RUNNING_ON_VALGRIND) { + modify_config("TX_SEG_SIZE", "1kb"); + modify_config("RX_SEG_SIZE", "1kb"); + } + + uct_test::init(); + m_ent = uct_test::create_entity(0); + m_entities.push_back(m_ent); + m_tcp_iface = (uct_tcp_iface*)m_ent->iface(); + } + + size_t get_accepted_conn_num(entity& ent) { + size_t num = 0; + uct_tcp_ep_t *ep; + + ucs_list_for_each(ep, &m_tcp_iface->ep_list, list) { + num += (ep->conn_state == UCT_TCP_EP_CONN_STATE_RECV_MAGIC_NUMBER); + } + + return num; + } + + ucs_status_t post_recv(int fd, bool nb = false) { + uint8_t msg; + size_t msg_size = sizeof(msg); + ucs_status_t status; + + scoped_log_handler slh(wrap_errors_logger); + if (nb) { + status = ucs_socket_recv_nb(fd, &msg, &msg_size, NULL, NULL); + } else { + status = ucs_socket_recv(fd, &msg, msg_size, NULL, NULL); + } + + return status; + } + + void post_send(int fd, const std::vector &buf) { + scoped_log_handler slh(wrap_errors_logger); + ucs_status_t status = ucs_socket_send(fd, &buf[0], + buf.size(), NULL, NULL); + // send can be OK or fail when a connection was closed by a peer + // before all data were sent + ASSERT_TRUE((status == UCS_OK) || + (status == UCS_ERR_IO_ERROR)); + } + + void detect_conn_reset(int fd) { + // Try to receive something on this socket fd - it has to be failed + ucs_status_t status = post_recv(fd); + ASSERT_TRUE((status == UCS_ERR_IO_ERROR) || + (status == UCS_ERR_CANCELED)); + EXPECT_EQ(0, ucs_socket_is_connected(fd)); + } + + void test_listener_flood(entity& test_entity, size_t max_conn, + size_t msg_size) { + std::vector fds; + std::vector buf; + + if (msg_size > 0) { + buf.resize(msg_size + sizeof(uct_tcp_am_hdr_t)); + std::fill(buf.begin(), buf.end(), 0); + init_data(&buf[0], buf.size()); + } + + setup_conns_to_entity(test_entity, max_conn, fds); + + size_t handled = 0; + for (std::vector::const_iterator iter = fds.begin(); + iter != fds.end(); ++iter) { + size_t sent_length = 0; + do { + if (msg_size > 0) { + post_send(*iter, buf); + sent_length += buf.size(); + } else { + close(*iter); + } + + // If it was sent >= the length of the magic number or sending + // is not required by the current test, wait until connection + // is destroyed. Otherwise, need to send more data + if ((msg_size == 0) || (sent_length >= sizeof(uint64_t))) { + handled++; + + while (get_accepted_conn_num(test_entity) != (max_conn - handled)) { + sched_yield(); + progress(); + } + } else { + // Peers still have to be connected + ucs_status_t status = post_recv(*iter, true); + EXPECT_TRUE((status == UCS_OK) || + (status == UCS_ERR_NO_PROGRESS)); + EXPECT_EQ(1, ucs_socket_is_connected(*iter)); + } + } while ((msg_size != 0) && (sent_length < sizeof(uint64_t))); + } + + // give a chance to close all connections + while (!ucs_list_is_empty(&m_tcp_iface->ep_list)) { + sched_yield(); + progress(); + } + + // TCP has to reject all connections and forget EPs that were + // created after accept(): + // - EP list has to be empty + EXPECT_EQ(1, ucs_list_is_empty(&m_tcp_iface->ep_list)); + // - all connections have to be destroyed (if wasn't closed + // yet by the clients) + if (msg_size > 0) { + // if we sent data during the test, close socket fd here + while (!fds.empty()) { + int fd = fds.back(); + fds.pop_back(); + detect_conn_reset(fd); + close(fd); + } + } + } + + void setup_conns_to_entity(entity& to, size_t max_conn, + std::vector &fds) { + for (size_t i = 0; i < max_conn; i++) { + int fd = setup_conn_to_entity(to, i + 1lu); + fds.push_back(fd); + + // give a chance to finish all connections + while (get_accepted_conn_num(to) != (i + 1lu)) { + sched_yield(); + progress(); + } + + EXPECT_EQ(1, ucs_socket_is_connected(fd)); + } + } + +private: + void init_data(void *buf, size_t msg_size) { + uct_tcp_am_hdr_t *tcp_am_hdr; + ASSERT_TRUE(msg_size >= sizeof(*tcp_am_hdr)); + tcp_am_hdr = static_cast(buf); + tcp_am_hdr->am_id = std::numeric_limits::max(); + tcp_am_hdr->length = msg_size; + } + + int connect_to_entity(entity& to) { + uct_device_addr_t *dev_addr; + uct_iface_addr_t *iface_addr; + ucs_status_t status; + + dev_addr = (uct_device_addr_t*)malloc(to.iface_attr().device_addr_len); + iface_addr = (uct_iface_addr_t*)malloc(to.iface_attr().iface_addr_len); + + status = uct_iface_get_device_address(to.iface(), dev_addr); + ASSERT_UCS_OK(status); + + status = uct_iface_get_address(to.iface(), iface_addr); + ASSERT_UCS_OK(status); + + struct sockaddr_in dest_addr; + dest_addr.sin_family = AF_INET; + dest_addr.sin_port = *(in_port_t*)iface_addr; + dest_addr.sin_addr = *(struct in_addr*)dev_addr; + + int fd; + status = ucs_socket_create(AF_INET, SOCK_STREAM, &fd); + ASSERT_UCS_OK(status); + + status = ucs_socket_connect(fd, (const struct sockaddr*)&dest_addr); + ASSERT_UCS_OK(status); + + status = ucs_sys_fcntl_modfl(fd, O_NONBLOCK, 0); + ASSERT_UCS_OK(status); + + free(iface_addr); + free(dev_addr); + + return fd; + } + + int setup_conn_to_entity(entity &to, size_t sn = 1) { + int fd = -1; + + do { + if (fd != -1) { + close(fd); + } + + fd = connect_to_entity(to); + EXPECT_NE(-1, fd); + + // give a chance to finish the connection + while (get_accepted_conn_num(to) != sn) { + sched_yield(); + progress(); + + ucs_status_t status = post_recv(fd, true); + if ((status != UCS_OK) && + (status != UCS_ERR_NO_PROGRESS)) { + break; + } + } + } while (!ucs_socket_is_connected(fd)); + + EXPECT_EQ(1, ucs_socket_is_connected(fd)); + + return fd; + } + +protected: + uct_tcp_iface *m_tcp_iface; + entity *m_ent; +}; + +UCS_TEST_P(test_uct_tcp, listener_flood_connect_and_send_large) { + const size_t max_conn = + ucs_min(static_cast(max_connections()), 128lu) / + ucs::test_time_multiplier(); + const size_t msg_size = m_tcp_iface->config.rx_seg_size * 4; + test_listener_flood(*m_ent, max_conn, msg_size); +} + +UCS_TEST_P(test_uct_tcp, listener_flood_connect_and_send_small) { + const size_t max_conn = + ucs_min(static_cast(max_connections()), 128lu) / + ucs::test_time_multiplier(); + // It should be less than length of the expected magic number by TCP + const size_t msg_size = 1; + test_listener_flood(*m_ent, max_conn, msg_size); +} + +UCS_TEST_P(test_uct_tcp, listener_flood_connect_and_close) { + const size_t max_conn = + ucs_min(static_cast(max_connections()), 128lu) / + ucs::test_time_multiplier(); + test_listener_flood(*m_ent, max_conn, 0); +} + +_UCT_INSTANTIATE_TEST_CASE(test_uct_tcp, tcp) diff --git a/test/gtest/uct/test_amo.cc b/test/gtest/uct/test_amo.cc new file mode 100644 index 0000000..5dce707 --- /dev/null +++ b/test/gtest/uct/test_amo.cc @@ -0,0 +1,200 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_amo.h" + +#include + + +uct_amo_test::uct_amo_test() { + pthread_spin_init(&m_replies_lock, 0); +} + +void uct_amo_test::init() { + uct_test::init(); + + srand48(ucs::rand()); + + entity *receiver = uct_test::create_entity(0); + m_entities.push_back(receiver); + + check_skip_test(); + + for (unsigned i = 0; i < num_senders(); ++i) { + entity *sender = uct_test::create_entity(0); + m_entities.push_back(sender); + sender->connect(0, *receiver, i); + receiver->connect(i, *sender, 0); + } +} + +void uct_amo_test::cleanup() { + uct_test::cleanup(); +} + +uint64_t uct_amo_test::rand64() { + /* coverity[dont_call] */ + return (mrand48() << 32) | (uint32_t)mrand48(); +} + +uint64_t uct_amo_test::hash64(uint64_t value) { + return value * 171711717; +} + +void uct_amo_test::atomic_reply_cb(uct_completion_t *self, ucs_status_t status) { + completion *comp = ucs_container_of(self, completion, uct); + comp->self->add_reply_safe(comp->result); +} + +void uct_amo_test::add_reply_safe(uint64_t data) { + pthread_spin_lock(&m_replies_lock); + m_replies.push_back(data); + pthread_spin_unlock(&m_replies_lock); +} + +const uct_amo_test::entity& uct_amo_test::receiver() { + return m_entities.at(0); +} + +const uct_amo_test::entity& uct_amo_test::sender(unsigned index) { + return m_entities.at(1 + index); +} + +void uct_amo_test::validate_replies(const std::vector& exp_replies) { + + /* Count histogram of expected replies */ + std::map exp_h; + for (std::vector::const_iterator iter = exp_replies.begin(); + iter != exp_replies.end(); ++iter) { + ++exp_h[*iter]; + } + + for (ucs::ptr_vector::const_iterator iter = m_workers.begin(); + iter != m_workers.end(); ++iter) + { + ucs_assert(!(*iter)->running); + } + + /* Workers should not be running now. + * Count a histogram of actual replies. + */ + unsigned count = 0; + std::map h; + + while (count < exp_replies.size()) { + while (m_replies.empty()) { + progress(); + } + + ++h[m_replies.back()]; + m_replies.pop_back(); + ++count; + } + + /* Destroy workers only after getting all replies, because reply callback + * may use the worker object (e.g CSWAP test). */ + m_workers.clear(); + + /* Every reply should be present exactly once */ + for (std::map::const_iterator iter = exp_h.begin(); + iter != exp_h.end(); ++iter) + { + if (h[iter->first] != iter->second) { + UCS_TEST_ABORT("Reply " << iter->first << " appeared " << h[iter->first] << + " times; expected: " << iter->second); + } + h.erase(iter->first); + } + + if (!h.empty()) { + UCS_TEST_ABORT("Got some unexpected replies, e.g: " << h.begin()->first << + " (" << h.begin()->second << " times)"); + } +} + +void uct_amo_test::wait_for_remote() { + for (unsigned i = 0; i < num_senders(); ++i) { + sender(i).flush(); + } +} + +void uct_amo_test::run_workers(send_func_t send, const mapped_buffer& recvbuf, + std::vector initial_values, bool advance) +{ + m_workers.clear(); + + for (unsigned i = 0; i < num_senders(); ++i) { + m_workers.push_back(new worker(this, send, recvbuf, sender(i), + initial_values[i], advance)); + } + + for (unsigned i = 0; i < num_senders(); ++i) { + m_workers.at(i).join(); + } +} + +uct_amo_test::worker::worker(uct_amo_test* test, send_func_t send, + const mapped_buffer& recvbuf, const entity& entity, + uint64_t initial_value, bool advance) : + test(test), value(initial_value), count(0), running(true), + m_send(send), m_advance(advance), m_recvbuf(recvbuf), m_entity(entity) + +{ + m_completions.resize(uct_amo_test::count()); + pthread_create(&m_thread, NULL, run, reinterpret_cast(this)); +} + +uct_amo_test::worker::~worker() { + ucs_assert(!running); +} + +uct_amo_test::completion *uct_amo_test::worker::get_completion(unsigned index) +{ + return &m_completions[index]; +} + +void* uct_amo_test::worker::run(void *arg) { + worker *self = reinterpret_cast(arg); + self->run(); + return NULL; +} + +void uct_amo_test::worker::run() { + for (unsigned i = 0; i < uct_amo_test::count(); ++i) { + ucs_status_t status; + completion *comp; + + comp = get_completion(i); + comp->result = 0; + comp->uct.func = NULL; + status = (test->*m_send)(m_entity.ep(0), *this, m_recvbuf, + &comp->result, comp); + while (status == UCS_ERR_NO_RESOURCE) { + m_entity.progress(); + status = (test->*m_send)(m_entity.ep(0), *this, m_recvbuf, + &comp->result, comp); + } + if (status == UCS_OK) { + if (comp->uct.func != NULL) { + comp->uct.func(&comp->uct, UCS_OK); + } + } else if (status == UCS_INPROGRESS) { + comp->uct.count = 1; + } else { + UCS_TEST_ABORT(ucs_status_string(status)); + } + ++count; + if (m_advance) { + value = hash64(value); + } + } +} + +void uct_amo_test::worker::join() { + void *retval; + pthread_join(m_thread, &retval); + running = false; +} diff --git a/test/gtest/uct/test_amo.h b/test/gtest/uct/test_amo.h new file mode 100644 index 0000000..9e58c8d --- /dev/null +++ b/test/gtest/uct/test_amo.h @@ -0,0 +1,168 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_TEST_AMO_H +#define UCT_TEST_AMO_H + +#include "uct_test.h" + +#include + +class uct_amo_test : public uct_test { +public: + class worker; + struct completion; + typedef ucs_status_t (uct_amo_test::* send_func_t)(uct_ep_h ep, worker& worker, + const mapped_buffer& recvbuf, + uint64_t *result, completion *comp); + + static inline unsigned num_senders() { + return (RUNNING_ON_VALGRIND) ? 2 : 4; + } + static inline unsigned count() { + return 1000 / ucs::test_time_multiplier(); + } + + uct_amo_test(); + virtual void init(); + virtual void cleanup(); + + const entity& receiver(); + const entity& sender(unsigned index); + void validate_replies(const std::vector& exp_replies); + void wait_for_remote(); + void add_reply_safe(uint64_t data); + + static uint64_t rand64(); + static uint64_t hash64(uint64_t value); + + static void atomic_reply_cb(uct_completion_t *self, ucs_status_t status); + + void run_workers(send_func_t send, const mapped_buffer& recvbuf, + std::vector initial_values, bool advance); + + struct completion { + union { + uct_amo_test *self; + worker *w; + }; + uint64_t result; + uct_completion_t uct; + }; + + class worker { + public: + worker(uct_amo_test* test, send_func_t send, const mapped_buffer& recvbuf, + const entity& entity, uint64_t initial_value, bool advance); + ~worker(); + + completion *get_completion(unsigned index); + static void* run(void *arg); + void join(); + + uct_amo_test* const test; + uint64_t value; + unsigned count; + bool running; + + private: + void run(); + + send_func_t m_send; + const bool m_advance; + const mapped_buffer& m_recvbuf; + const entity& m_entity; + pthread_t m_thread; + std::vector m_completions; + }; + + ucs_status_t atomic_post(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint64_t remote_addr, + uct_rkey_t rkey) { + return uct_ep_atomic32_post(ep, opcode, value, remote_addr, rkey); + } + + ucs_status_t atomic_post(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t remote_addr, + uct_rkey_t rkey) { + return uct_ep_atomic64_post(ep, opcode, value, remote_addr, rkey); + } + + ucs_status_t atomic_fetch_nb(uct_ep_h ep, uct_atomic_op_t opcode, + uint32_t value, uint32_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) { + return uct_ep_atomic32_fetch(ep, opcode, value, result, remote_addr, rkey, comp); + } + + ucs_status_t atomic_fetch_nb(uct_ep_h ep, uct_atomic_op_t opcode, + uint64_t value, uint64_t *result, + uint64_t remote_addr, uct_rkey_t rkey, + uct_completion_t *comp) { + return uct_ep_atomic64_fetch(ep, opcode, value, result, remote_addr, rkey, comp); + } + + template + ucs_status_t atomic_op(uct_ep_h ep, worker& worker, const mapped_buffer& recvbuf, + uint64_t *result, completion *comp) { + return atomic_post(ep, opcode, (T)worker.value, recvbuf.addr(), recvbuf.rkey()); + } + + template + ucs_status_t atomic_fop(uct_ep_h ep, worker& worker, const mapped_buffer& recvbuf, + uint64_t *result, completion *comp) { + comp->self = this; + comp->uct.func = atomic_reply_cb; + return atomic_fetch_nb(ep, opcode, (T)worker.value, + (T*)result, recvbuf.addr(), recvbuf.rkey(), + &comp->uct); + } + + template + static T and_op(T v1, T v2) + { + return v1 & v2; + } + + template + static T or_op(T v1, T v2) + { + return v1 | v2; + } + + template + static T add_op(T v1, T v2) + { + return v1 + v2; + } + + template + static T xor_op(T v1, T v2) + { + return v1 ^ v2; + } + + template + static T and_val(unsigned i) + { + return ~(UCS_BIT(i * 2) | UCS_BIT(i + 16)); + } + + template + static T or_val(unsigned i) + { + return UCS_BIT(i * 2) | UCS_BIT(i + 16); + } + +protected: + + ucs::ptr_vector m_workers; + pthread_spinlock_t m_replies_lock; + std::vector m_replies; +}; + + +#endif diff --git a/test/gtest/uct/test_amo_add_xor.cc b/test/gtest/uct/test_amo_add_xor.cc new file mode 100644 index 0000000..f09ab0d --- /dev/null +++ b/test/gtest/uct/test_amo_add_xor.cc @@ -0,0 +1,66 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_amo.h" + + +class uct_amo_add_xor_test : public uct_amo_test { +public: + + template + void test_op(T (*op)(T, T)) { + /* + * Method: Add/xor may random values from multiple workers running at the same + * time. We expect the final result to be the sum/xor of all these values. + */ + + mapped_buffer recvbuf(sizeof(T), 0, receiver()); + + T value = rand64(); + *(T*)recvbuf.ptr() = value; + + T exp_result = value; + std::vector add_vec; + for (unsigned i = 0; i < num_senders(); ++i) { + value = rand64(); + add_vec.push_back(value); + + for (unsigned j = 0; j < count(); ++j) { + exp_result = op(exp_result, value); + value = hash64(value); + } + } + + run_workers(static_cast(&uct_amo_test::atomic_op), + recvbuf, add_vec, true); + + wait_for_remote(); + EXPECT_EQ(exp_result, *(T*)recvbuf.ptr()); + } +}; + +UCS_TEST_SKIP_COND_P(uct_amo_add_xor_test, add32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), OP32)) { + test_op(add_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_add_xor_test, add64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), OP64)) { + test_op(add_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_add_xor_test, xor32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), OP32)) { + test_op(xor_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_add_xor_test, xor64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), OP64)) { + test_op(xor_op); +} + +UCT_INSTANTIATE_TEST_CASE(uct_amo_add_xor_test) + diff --git a/test/gtest/uct/test_amo_and_or.cc b/test/gtest/uct/test_amo_and_or.cc new file mode 100644 index 0000000..f410d1b --- /dev/null +++ b/test/gtest/uct/test_amo_and_or.cc @@ -0,0 +1,67 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_amo.h" + + +class uct_amo_and_or_test : public uct_amo_test { +public: + + template + void test_op(T (*op)(T, T), T (*val)(unsigned)) { + /* + * Method: Add may random values from multiple workers running at the same + * time. We expect the final result to be the and/or of all these values. + * This is simplified version of add/xor test: operated value is costant + * for every worker to eliminate result to 0 or MAX_INT + */ + + mapped_buffer recvbuf(sizeof(T), 0, receiver()); + + T value = 0x0ff0f00f; + *(T*)recvbuf.ptr() = value; + + T exp_result = value; + std::vector op_vec; + for (unsigned i = 0; i < num_senders(); ++i) { + value = val(i); + op_vec.push_back(value); + + for (unsigned j = 0; j < count(); ++j) { + exp_result = op(exp_result, value); + } + } + + run_workers(static_cast(&uct_amo_test::atomic_op), + recvbuf, op_vec, false); + + wait_for_remote(); + EXPECT_EQ(exp_result, *(T*)recvbuf.ptr()); + } +}; + +UCS_TEST_SKIP_COND_P(uct_amo_and_or_test, and32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), OP32)) { + test_op(and_op, and_val); +} + +UCS_TEST_SKIP_COND_P(uct_amo_and_or_test, add64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), OP64)) { + test_op(and_op, and_val); +} + +UCS_TEST_SKIP_COND_P(uct_amo_and_or_test, or32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), OP32)) { + test_op(or_op, or_val); +} + +UCS_TEST_SKIP_COND_P(uct_amo_and_or_test, or64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), OP64)) { + test_op(or_op, or_val); +} + +UCT_INSTANTIATE_TEST_CASE(uct_amo_and_or_test) + diff --git a/test/gtest/uct/test_amo_cswap.cc b/test/gtest/uct/test_amo_cswap.cc new file mode 100644 index 0000000..cb7df33 --- /dev/null +++ b/test/gtest/uct/test_amo_cswap.cc @@ -0,0 +1,107 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_amo.h" + + +class uct_amo_cswap_test : public uct_amo_test { +public: + + static const uint64_t MISS = 0; + + template + static void cswap_reply_cb(uct_completion_t *self, ucs_status_t status) { + completion *comp = ucs_container_of(self, completion, uct); + worker* w = comp->w; + T dataval = comp->result; + + /* Compare after casting to T, since w->value is always 64 bit */ + if (dataval == (T)w->value) { + w->test->add_reply_safe(dataval); /* Swapped */ + } else { + w->test->add_reply_safe(MISS); /* Miss value */ + } + + w->value = (T)hash64(w->value); /* Move to next value */ + --w->count; /* Allow one more operation */ + } + + ucs_status_t cswap32(uct_ep_h ep, worker& worker, const mapped_buffer& recvbuf, + uint64_t *result, completion *comp) { + if (worker.count > 0) { + return UCS_ERR_NO_RESOURCE; /* Don't proceed until got a reply */ + } + comp->uct.func = cswap_reply_cb; + comp->w = &worker; + // TODO will not work if completes immediately + return uct_ep_atomic_cswap32(ep, worker.value, hash64(worker.value), + recvbuf.addr(), recvbuf.rkey(), + (uint32_t*)result, &comp->uct); + } + + ucs_status_t cswap64(uct_ep_h ep, worker& worker, const mapped_buffer& recvbuf, + uint64_t *result, completion *comp) { + if (worker.count > 0) { + return UCS_ERR_NO_RESOURCE; /* Don't proceed until got a reply */ + } + comp->uct.func = cswap_reply_cb; + comp->w = &worker; + // TODO will not work if completes immediately + return uct_ep_atomic_cswap64(ep, worker.value, hash64(worker.value), + recvbuf.addr(), recvbuf.rkey(), + result, &comp->uct); + } + + template + void test_cswap(send_func_t send) { + /* + * Method: All workers try to create a swap chain using the same series of + * values. But only one worker should be able to advance to the next + * value every time. + * This test is different because it sends the next request only after + * getting a reply. + */ + + mapped_buffer recvbuf(sizeof(T), 0, receiver()); + + /* Set ransom initial value */ + T initial_value = rand64(); + *(T*)recvbuf.ptr() = initial_value; + + T value = initial_value; + std::vector exp_replies; + for (unsigned i = 0; i < count(); ++i) { + exp_replies.push_back(value); + value = hash64(value); + } + + /* Expect N-1 cswap misses for each value */ + for (unsigned i = 0; i < count() * (num_senders() - 1); ++i) { + exp_replies.push_back(static_cast(MISS)); + } + + run_workers(send, recvbuf, std::vector(num_senders(), initial_value), + false); + + validate_replies(exp_replies); + + wait_for_remote(); + EXPECT_EQ(value, *(T*)recvbuf.ptr()); + } +}; + + +UCS_TEST_SKIP_COND_P(uct_amo_cswap_test, cswap32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_CSWAP), FOP32)) { + test_cswap(static_cast(&uct_amo_cswap_test::cswap32)); +} + +UCS_TEST_SKIP_COND_P(uct_amo_cswap_test, cswap64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_CSWAP), FOP64)) { + test_cswap(static_cast(&uct_amo_cswap_test::cswap64)); +} + +UCT_INSTANTIATE_TEST_CASE(uct_amo_cswap_test) diff --git a/test/gtest/uct/test_amo_fadd_fxor.cc b/test/gtest/uct/test_amo_fadd_fxor.cc new file mode 100644 index 0000000..86b6143 --- /dev/null +++ b/test/gtest/uct/test_amo_fadd_fxor.cc @@ -0,0 +1,104 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_amo.h" + + +class uct_amo_fadd_fxor_test : public uct_amo_test { +public: + + template + void test_fop(T (*op)(T, T)) { + /* + * Method: Do concurrent atomic fetch-and-add/xor of constant random value + * to a single atomic variable. Check that every sender gets a unique reply + * and the final value of atomic variable is the sum/xor of all. + */ + + mapped_buffer recvbuf(sizeof(T), 0, receiver()); + + T value = rand64(); + T add = rand64(); + *(T*)recvbuf.ptr() = value; + + std::vector exp_replies; + for (unsigned i = 0; i < count() * num_senders(); ++i) { + exp_replies.push_back(value); + value = op(value, add); + } + + run_workers(static_cast(&uct_amo_test::atomic_fop), + recvbuf, std::vector(num_senders(), add), false); + + validate_replies(exp_replies); + + wait_for_remote(); + EXPECT_EQ(value, *(T*)recvbuf.ptr()); + } +}; + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test, fadd32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), FOP32)) { + test_fop(add_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test, fadd64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), FOP64)) { + test_fop(add_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test, fxor32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), FOP32)) { + test_fop(xor_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test, fxor64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), FOP64)) { + test_fop(xor_op); +} + +UCT_INSTANTIATE_TEST_CASE(uct_amo_fadd_fxor_test) + +class uct_amo_fadd_fxor_test_inlresp : public uct_amo_fadd_fxor_test {}; + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test_inlresp, fadd64_inlresp0, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), FOP64), + "IB_TX_INLINE_RESP=0") { + test_fop(add_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test_inlresp, fadd64_inlresp32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), FOP64), + "IB_TX_INLINE_RESP=32") { + test_fop(add_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test_inlresp, fadd64_inlresp64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), FOP64), + "IB_TX_INLINE_RESP=64") { + test_fop(add_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test_inlresp, fxor64_inlresp0, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), FOP64), + "IB_TX_INLINE_RESP=0") { + test_fop(xor_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test_inlresp, fxor64_inlresp32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), FOP64), + "IB_TX_INLINE_RESP=32") { + test_fop(xor_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fadd_fxor_test_inlresp, fxor64_inlresp64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), FOP64), + "IB_TX_INLINE_RESP=64") { + test_fop(xor_op); +} + +UCT_INSTANTIATE_IB_TEST_CASE(uct_amo_fadd_fxor_test_inlresp) + diff --git a/test/gtest/uct/test_amo_fand_for.cc b/test/gtest/uct/test_amo_fand_for.cc new file mode 100644 index 0000000..b4778e1 --- /dev/null +++ b/test/gtest/uct/test_amo_fand_for.cc @@ -0,0 +1,104 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2018. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_amo.h" + + +class uct_amo_fand_for_test : public uct_amo_test { +public: + + template + void test_fop(T (*op)(T, T)) { + /* + * Method: Do concurrent atomic fetch-and-and/or of constant random value + * to a single atomic variable. Check that every sender gets a unique reply + * and the final value of atomic variable is the and/or of all. + */ + + mapped_buffer recvbuf(sizeof(T), 0, receiver()); + + T value = rand64(); + T add = rand64(); + *(T*)recvbuf.ptr() = value; + + std::vector exp_replies; + for (unsigned i = 0; i < count() * num_senders(); ++i) { + exp_replies.push_back(value); + value = op(value, add); + } + + run_workers(static_cast(&uct_amo_test::atomic_fop), + recvbuf, std::vector(num_senders(), add), false); + + validate_replies(exp_replies); + + wait_for_remote(); + EXPECT_EQ(value, *(T*)recvbuf.ptr()); + } +}; + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test, fand32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), FOP32)) { + test_fop(and_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test, fand64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), FOP64)) { + test_fop(and_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test, for32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), FOP32)) { + test_fop(or_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test, for64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), FOP64)) { + test_fop(or_op); +} + +UCT_INSTANTIATE_TEST_CASE(uct_amo_fand_for_test) + +class uct_amo_fand_for_test_inlresp : public uct_amo_fand_for_test {}; + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test_inlresp, fand64_inlresp0, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), FOP64), + "IB_TX_INLINE_RESP=0") { + test_fop(and_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test_inlresp, fand64_inlresp32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), FOP64), + "IB_TX_INLINE_RESP=32") { + test_fop(and_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test_inlresp, fand64_inlresp64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), FOP64), + "IB_TX_INLINE_RESP=64") { + test_fop(and_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test_inlresp, for64_inlresp0, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), FOP64), + "IB_TX_INLINE_RESP=0") { + test_fop(or_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test_inlresp, for64_inlresp32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), FOP64), + "IB_TX_INLINE_RESP=32") { + test_fop(or_op); +} + +UCS_TEST_SKIP_COND_P(uct_amo_fand_for_test_inlresp, for64_inlresp64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), FOP64), + "IB_TX_INLINE_RESP=64") { + test_fop(or_op); +} + +UCT_INSTANTIATE_IB_TEST_CASE(uct_amo_fand_for_test_inlresp) + diff --git a/test/gtest/uct/test_amo_swap.cc b/test/gtest/uct/test_amo_swap.cc new file mode 100644 index 0000000..7367dc8 --- /dev/null +++ b/test/gtest/uct/test_amo_swap.cc @@ -0,0 +1,99 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_amo.h" + + +class uct_amo_swap_test : public uct_amo_test { +public: + + ucs_status_t swap32(uct_ep_h ep, worker& worker, const mapped_buffer& recvbuf, + uint64_t *result, completion *comp) { + comp->self = this; + comp->uct.func = atomic_reply_cb; + return uct_ep_atomic32_fetch(ep, UCT_ATOMIC_OP_SWAP, worker.value, (uint32_t*)result, + recvbuf.addr(), recvbuf.rkey(), &comp->uct); + } + + ucs_status_t swap64(uct_ep_h ep, worker& worker, const mapped_buffer& recvbuf, + uint64_t *result, completion *comp) { + comp->self = this; + comp->uct.func = atomic_reply_cb; + return uct_ep_atomic64_fetch(ep, UCT_ATOMIC_OP_SWAP, worker.value, (uint64_t*)result, + recvbuf.addr(), recvbuf.rkey(), &comp->uct); + } + + template + void test_swap(send_func_t send) { + /* + * Method: Initialize the buffer to random value, and then have each + * worker thread swap it to a series of generated values. We expect that + * the replies will include the first value, and all other values, except + * the final value of the buffer. + */ + + mapped_buffer recvbuf(sizeof(T), 0, receiver()); + + /* Set ransom initial value */ + T value = rand64(); + *(T*)recvbuf.ptr() = value; + + std::vector exp_replies; + exp_replies.push_back(value); + + std::vector swap_vec; + for (unsigned i = 0; i < num_senders(); ++i) { + value = rand64(); + swap_vec.push_back(value); + + for (unsigned j = 0; j < count(); ++j) { + exp_replies.push_back(value); + value = hash64(value); + } + } + + run_workers(send, recvbuf, swap_vec, true); + + wait_for_remote(); + add_reply_safe(*(T*)recvbuf.ptr()); + validate_replies(exp_replies); + } +}; + + +UCS_TEST_SKIP_COND_P(uct_amo_swap_test, swap32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_SWAP), FOP32)) { + test_swap(static_cast(&uct_amo_swap_test::swap32)); +} + +UCS_TEST_SKIP_COND_P(uct_amo_swap_test, swap64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_SWAP), FOP64)) { + test_swap(static_cast(&uct_amo_swap_test::swap64)); +} + +UCT_INSTANTIATE_TEST_CASE(uct_amo_swap_test) + +class uct_amo_swap_test_inlresp : public uct_amo_swap_test {}; + +UCS_TEST_SKIP_COND_P(uct_amo_swap_test_inlresp, swap32_inlresp0, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_SWAP), FOP32), + "IB_TX_INLINE_RESP=0") { + test_swap(static_cast(&uct_amo_swap_test::swap32)); +} + +UCS_TEST_SKIP_COND_P(uct_amo_swap_test_inlresp, swap32_inlresp32, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_SWAP), FOP32), + "IB_TX_INLINE_RESP=32") { + test_swap(static_cast(&uct_amo_swap_test::swap32)); +} + +UCS_TEST_SKIP_COND_P(uct_amo_swap_test_inlresp, swap32_inlresp64, + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_SWAP), FOP32), + "IB_TX_INLINE_RESP=64") { + test_swap(static_cast(&uct_amo_swap_test::swap32)); +} + +UCT_INSTANTIATE_IB_TEST_CASE(uct_amo_swap_test_inlresp) diff --git a/test/gtest/uct/test_event.cc b/test/gtest/uct/test_event.cc new file mode 100644 index 0000000..d040bb9 --- /dev/null +++ b/test/gtest/uct/test_event.cc @@ -0,0 +1,171 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016.All rights reserved. +* See file LICENSE for terms. +*/ + +extern "C" { +#include +#include +#include +} +#include +#include "uct_test.h" + +class test_uct_event_fd : public uct_test { +public: + void init() { + uct_test::init(); + + m_e1 = uct_test::create_entity(0); + m_entities.push_back(m_e1); + + check_skip_test(); + + m_e2 = uct_test::create_entity(0); + m_entities.push_back(m_e2); + + m_e1->connect(0, *m_e2, 0); + m_e2->connect(0, *m_e1, 0); + + m_am_count = 0; + } + + typedef struct { + unsigned length; + /* data follows */ + } recv_desc_t; + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags) { + recv_desc_t *my_desc = (recv_desc_t *) arg; + uint64_t *test_ib_hdr = (uint64_t *) data; + uint64_t *actual_data = (uint64_t *) test_ib_hdr + 1; + unsigned data_length = length - sizeof(test_ib_hdr); + + my_desc->length = data_length; + if (*test_ib_hdr == 0xbeef) { + memcpy(my_desc + 1, actual_data , data_length); + } + + ++m_am_count; + return UCS_OK; + } + + void cleanup() { + uct_test::cleanup(); + } + + void test_recv_am(unsigned arm_flags, unsigned send_flags); + + static size_t pack_u64(void *dest, void *arg) + { + *reinterpret_cast(dest) = *reinterpret_cast(arg); + return sizeof(uint64_t); + } + + void arm(entity *e, unsigned arm_flags) { + ucs_status_t status; + for (int i = 0; i < 10; ++i) { + /* have several retries for arming, in case a transport has spurious + * events */ + status = uct_iface_event_arm(e->iface(), arm_flags); + if (status == UCS_OK) { + break; + } + } + ASSERT_EQ(UCS_OK, status); + } + +protected: + entity *m_e1, *m_e2; + static int m_am_count; +}; + +int test_uct_event_fd::m_am_count = 0; + +void test_uct_event_fd::test_recv_am(unsigned arm_flags, unsigned send_flags) +{ + uint64_t send_data = 0xdeadbeef; + int am_send_count = 0; + ssize_t res; + recv_desc_t *recv_buffer; + struct pollfd wakeup_fd; + ucs_status_t status; + + recv_buffer = (recv_desc_t *)malloc(sizeof(*recv_buffer) + + sizeof(send_data)); + recv_buffer->length = 0; /* Initialize length to 0 */ + + /* give a chance to finish connection for some transports (ib/ud, tcp) */ + flush(); + + /* set a callback for the uct to invoke for receiving the data */ + uct_iface_set_am_handler(m_e2->iface(), 0, am_handler, recv_buffer, 0); + + /* create receiver wakeup */ + status = uct_iface_event_fd_get(m_e2->iface(), &wakeup_fd.fd); + ASSERT_EQ(UCS_OK, status); + + wakeup_fd.events = POLLIN; + EXPECT_EQ(0, poll(&wakeup_fd, 1, 0)); + + arm(m_e2, arm_flags); + + EXPECT_EQ(0, poll(&wakeup_fd, 1, 0)); + + /* send the data */ + res = uct_ep_am_bcopy(m_e1->ep(0), 0, pack_u64, &send_data, send_flags); + ASSERT_EQ((ssize_t)sizeof(send_data), res); + ++am_send_count; + + /* make sure the file descriptor IS signaled ONCE */ + ASSERT_EQ(1, poll(&wakeup_fd, 1, 1000*ucs::test_time_multiplier())); + + for (;;) { + if ((progress() == 0) && (m_am_count == am_send_count)) { + status = uct_iface_event_arm(m_e2->iface(), arm_flags); + if (status != UCS_ERR_BUSY) { + break; + } + } + } + ASSERT_EQ(UCS_OK, status); + + arm(m_e2, arm_flags); + + /* send the data again */ + res = uct_ep_am_bcopy(m_e1->ep(0), 0, pack_u64, &send_data, send_flags); + ASSERT_EQ((ssize_t)sizeof(send_data), res); + ++am_send_count; + + /* make sure the file descriptor IS signaled */ + ASSERT_EQ(1, poll(&wakeup_fd, 1, 1000*ucs::test_time_multiplier())); + + while (m_am_count < am_send_count) { + progress(); + } + + m_e1->flush(); + + free(recv_buffer); +} + +UCS_TEST_SKIP_COND_P(test_uct_event_fd, am, + !check_caps(UCT_IFACE_FLAG_EVENT_RECV | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_AM_BCOPY)) +{ + test_recv_am(UCT_EVENT_RECV, 0); +} + +UCS_TEST_SKIP_COND_P(test_uct_event_fd, sig_am, + !check_caps(UCT_IFACE_FLAG_EVENT_RECV_SIG | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_AM_BCOPY)) +{ + test_recv_am(UCT_EVENT_RECV_SIG, UCT_SEND_FLAG_SIGNALED); +} + +UCT_INSTANTIATE_NO_SELF_TEST_CASE(test_uct_event_fd); diff --git a/test/gtest/uct/test_fence.cc b/test/gtest/uct/test_fence.cc new file mode 100644 index 0000000..8644a72 --- /dev/null +++ b/test/gtest/uct/test_fence.cc @@ -0,0 +1,248 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include "uct_test.h" +#include +#include +#include + +using namespace std; + +extern "C" { +#include +} + +class uct_fence_test : public uct_test { +public: + class worker; + typedef ucs_status_t (uct_fence_test::* send_func_t)(uct_ep_h ep, worker& worker, + const mapped_buffer& recvbuf); + typedef ucs_status_t (uct_fence_test::* recv_func_t)(uct_ep_h ep, worker& worker, + const mapped_buffer& recvbuf, + uct_completion_t *comp); + static inline unsigned count() { + return 1000 / ucs::test_time_multiplier(); + } + + virtual void init() { + uct_test::init(); + + entity *receiver = uct_test::create_entity(0); + m_entities.push_back(receiver); + + check_skip_test(); + + entity *sender = uct_test::create_entity(0); + m_entities.push_back(sender); + + sender->connect(0, *receiver, 1); + receiver->connect(1, *sender, 0); + } + + virtual void cleanup() { + uct_test::cleanup(); + } + + const entity& sender() { + return m_entities.at(1); + } + + const entity& receiver() { + return m_entities.at(0); + } + + static void completion_cb(uct_completion_t *self, ucs_status_t status) {} + + class worker { + public: + worker(uct_fence_test* test, send_func_t send, recv_func_t recv, + const mapped_buffer& recvbuf, + const entity& entity, uct_atomic_op_t op, uint32_t* error) : + test(test), value(0), result32(0), result64(0), + error(error), running(true), op(op), m_send(send), m_recv(recv), + m_recvbuf(recvbuf), m_entity(entity) { + pthread_create(&m_thread, NULL, run, reinterpret_cast(this)); + } + + ~worker() { + ucs_assert(!running); + } + + static void *run(void *arg) { + worker *self = reinterpret_cast(arg); + self->run(); + return NULL; + } + + void join() { + void *retval; + pthread_join(m_thread, &retval); + running = false; + } + + uint64_t atomic_op_val(uct_atomic_op_t op, uint64_t v1, uint64_t v2) + { + switch (op) { + case UCT_ATOMIC_OP_ADD: + return v1 + v2; + case UCT_ATOMIC_OP_AND: + return v1 & v2; + case UCT_ATOMIC_OP_OR: + return v1 | v2; + case UCT_ATOMIC_OP_XOR: + return v1 ^ v2; + default: + return 0; + } + } + + uct_fence_test* const test; + uint64_t value; + uint32_t result32; + uint64_t result64; + uint32_t* error; + bool running; + uct_atomic_op_t op; + + private: + void run() { + uct_completion_t uct_comp; + uct_comp.func = completion_cb; + for (unsigned i = 0; i < uct_fence_test::count(); i++) { + uint64_t local_val = ucs::rand(); + uint64_t remote_val = ucs::rand(); + uct_comp.count = 1; + + if (m_recvbuf.length() == sizeof(uint32_t)) { + *(uint32_t*)m_recvbuf.ptr() = remote_val; + } else { + *(uint64_t*)m_recvbuf.ptr() = remote_val; + } + value = local_val; + + (test->*m_send)(m_entity.ep(0), *this, m_recvbuf); + uct_ep_fence(m_entity.ep(0), 0); + (test->*m_recv)(m_entity.ep(0), *this, + m_recvbuf, &uct_comp); + m_entity.flush(); + + uint64_t result = (m_recvbuf.length() == sizeof(uint32_t)) ? + result32 : result64; + + if (result != atomic_op_val(op, local_val, remote_val)) + (*error)++; + + // reset for next loop + result32 = 0; + result64 = 0; + } + } + + send_func_t m_send; + recv_func_t m_recv; + const mapped_buffer& m_recvbuf; + const entity& m_entity; + pthread_t m_thread; + }; + + template + void run_workers(send_func_t send, recv_func_t recv, + const mapped_buffer& recvbuf, uint32_t* error) { + ucs::ptr_vector m_workers; + m_workers.clear(); + m_workers.push_back(new worker(this, send, recv, recvbuf, + sender(), OP, error)); + m_workers.at(0).join(); + m_workers.clear(); + } + + template + ucs_status_t atomic_op(uct_ep_h ep, worker& worker, const mapped_buffer& recvbuf) { + if (sizeof(T) == sizeof(uint32_t)) { + return uct_ep_atomic32_post(ep, OP, worker.value, recvbuf.addr(), recvbuf.rkey()); + } else { + return uct_ep_atomic64_post(ep, OP, worker.value, recvbuf.addr(), recvbuf.rkey()); + } + } + + template + ucs_status_t atomic_fop(uct_ep_h ep, worker& worker, + const mapped_buffer& recvbuf, uct_completion_t *comp) { + if (sizeof(T) == sizeof(uint32_t)) { + return uct_ep_atomic32_fetch(ep, OP, 0, &worker.result32, + recvbuf.addr(), recvbuf.rkey(), comp); + } else { + return uct_ep_atomic64_fetch(ep, OP, 0, &worker.result64, + recvbuf.addr(), recvbuf.rkey(), comp); + } + } + + template + void test_fence() { + + mapped_buffer recvbuf(sizeof(T), 0, receiver()); + + uint32_t error = 0; + + *(T*)recvbuf.ptr() = 0; + + run_workers(static_cast(&uct_fence_test::atomic_op), + static_cast(&uct_fence_test::atomic_fop), + recvbuf, &error); + + EXPECT_EQ(error, (uint32_t)0); + } +}; + +UCS_TEST_SKIP_COND_P(uct_fence_test, add32, + (!check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), OP32) || + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), FOP32))) { + test_fence(); +} + +UCS_TEST_SKIP_COND_P(uct_fence_test, add64, + (!check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), OP64) || + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ADD), FOP64))) { + test_fence(); +} + +UCS_TEST_SKIP_COND_P(uct_fence_test, and32, + (!check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), OP32) || + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), FOP32))) { + test_fence(); +} + +UCS_TEST_SKIP_COND_P(uct_fence_test, and64, + (!check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), OP64) || + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_AND), FOP64))) { + test_fence(); +} + +UCS_TEST_SKIP_COND_P(uct_fence_test, or32, + (!check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), OP32) || + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), FOP32))) { + test_fence(); +} + +UCS_TEST_SKIP_COND_P(uct_fence_test, or64, + (!check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), OP64) || + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_OR), FOP64))) { + test_fence(); +} + +UCS_TEST_SKIP_COND_P(uct_fence_test, xor32, + (!check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), OP32) || + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), FOP32))) { + test_fence(); +} + +UCS_TEST_SKIP_COND_P(uct_fence_test, xor64, + (!check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), OP64) || + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_XOR), FOP64))) { + test_fence(); +} + +UCT_INSTANTIATE_TEST_CASE(uct_fence_test) diff --git a/test/gtest/uct/test_flush.cc b/test/gtest/uct/test_flush.cc new file mode 100644 index 0000000..eb5aff5 --- /dev/null +++ b/test/gtest/uct/test_flush.cc @@ -0,0 +1,537 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "uct_p2p_test.h" +extern "C" { +#include +} + +class uct_flush_test : public uct_test { +public: + static const uint64_t SEED1 = 0x1111111111111111lu; + static const uint64_t SEED2 = 0x2222222222222222lu; + static const uint64_t SEED3 = 0x3333333333333333lu; + static const int AM_ID = 1; + static const int AM_ID_CANCEL = 2; + + typedef void (uct_flush_test::* flush_func_t)(); + + struct test_req_t { + uct_pending_req_t uct; + uct_completion_t comp; + mapped_buffer *sendbuf; + uct_flush_test *test; + }; + + void init() { + uct_test::init(); + + entity *m_sender = uct_test::create_entity(0); + m_entities.push_back(m_sender); + + check_skip_test(); + + if (UCT_DEVICE_TYPE_SELF == GetParam()->dev_type) { + m_sender->connect(0, *m_sender, 0); + } else { + entity *m_receiver = uct_test::create_entity(0); + m_entities.push_back(m_receiver); + + m_sender->connect(0, *m_receiver, 0); + } + am_rx_count = 0; + m_flush_flags = 0; + } + + static size_t pack_cb(void *dest, void *arg) + { + const mapped_buffer *sendbuf = (const mapped_buffer *)arg; + memcpy(dest, sendbuf->ptr(), sendbuf->length()); + return sendbuf->length(); + } + + void blocking_put_bcopy(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf) + { + ssize_t status; + for (;;) { + status = uct_ep_put_bcopy(sender().ep(0), pack_cb, (void*)&sendbuf, + recvbuf.addr(), recvbuf.rkey()); + if (status >= 0) { + return; + } else if (status == UCS_ERR_NO_RESOURCE) { + progress(); + continue; + } else { + ASSERT_UCS_OK((ucs_status_t)status); + } + } + } + + void blocking_am_bcopy(const mapped_buffer &sendbuf) + { + ssize_t status; + for (;;) { + status = uct_ep_am_bcopy(sender().ep(0), get_am_id(), pack_cb, + (void*)&sendbuf, 0); + if (status >= 0) { + return; + } else if (status == UCS_ERR_NO_RESOURCE) { + progress(); + continue; + } else { + ASSERT_UCS_OK((ucs_status_t)status); + } + } + } + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags) + { + if (arg == NULL) { + /* This is not completely canceled message, drop it */ + return UCS_OK; + } + const mapped_buffer *recvbuf = (const mapped_buffer *)arg; + memcpy(recvbuf->ptr(), data, ucs_min(length, recvbuf->length())); + ucs_atomic_add32(&am_rx_count, 1); + return UCS_OK; + } + + ucs_status_t am_send_pending(test_req_t *am_req) + { + ssize_t status; + + status = uct_ep_am_bcopy(sender().ep(0), get_am_id(), pack_cb, + (void*)am_req->sendbuf, 0); + if (status >= 0) { + --am_req->comp.count; + return UCS_OK; + } else { + return (ucs_status_t)status; + } + } + + static ucs_status_t am_progress(uct_pending_req_t *req) + { + test_req_t *am_req = ucs_container_of(req, test_req_t, uct); + return am_req->test->am_send_pending(am_req); + } + + static ucs_status_t flush_progress(uct_pending_req_t *req) + { + test_req_t *flush_req = ucs_container_of(req, test_req_t, uct); + ucs_status_t status; + + status = uct_ep_flush(flush_req->test->sender().ep(0), 0, + &flush_req->comp); + if (status == UCS_OK) { + --flush_req->comp.count; + return UCS_OK; + } else if (status == UCS_INPROGRESS) { + return UCS_OK; + } else if (status == UCS_ERR_NO_RESOURCE) { + return UCS_ERR_NO_RESOURCE; + } else { + UCS_TEST_ABORT("Error: " << ucs_status_string(status)); + } + } + + void test_flush_put_bcopy(flush_func_t flush) { + const size_t length = 8; + mapped_buffer sendbuf(length, SEED1, sender()); + mapped_buffer recvbuf(length, SEED2, receiver()); + sendbuf.pattern_fill(SEED3); + blocking_put_bcopy(sendbuf, recvbuf); + (this->*flush)(); + + if (is_flush_cancel()) { + return; + } + + recvbuf.pattern_check(SEED3); + } + + void wait_am(unsigned count) { + while (am_rx_count < count) { + progress(); + sched_yield(); + } + } + + void test_flush_am_zcopy(flush_func_t flush, bool destroy_ep) { + const size_t length = 8; + if (is_flush_cancel()) { + ASSERT_TRUE(destroy_ep); + } + mapped_buffer sendbuf(length, SEED1, sender()); + mapped_buffer recvbuf(length, SEED2, receiver()); + sendbuf.pattern_fill(SEED3); + + uct_iface_set_am_handler(receiver().iface(), get_am_id(), am_handler, + is_flush_cancel() ? NULL : &recvbuf, + UCT_CB_FLAG_ASYNC); + + uct_completion_t zcomp; + zcomp.count = 2; + zcomp.func = NULL; + + ucs_status_t status; + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, sendbuf.ptr(), sendbuf.length(), + sendbuf.memh(), + sender().iface_attr().cap.am.max_iov); + do { + status = uct_ep_am_zcopy(sender().ep(0), get_am_id(), NULL, 0, iov, + iovcnt, 0, &zcomp); + progress(); + } while (status == UCS_ERR_NO_RESOURCE); + ASSERT_UCS_OK_OR_INPROGRESS(status); + if (status == UCS_OK) { + --zcomp.count; + } + + (this->*flush)(); + + EXPECT_EQ(1, zcomp.count); /* Zero copy op should be already completed + since flush returned */ + + if (destroy_ep) { + sender().destroy_ep(0); + } + + if (is_flush_cancel()) { + return; + } + + wait_am(1); + + uct_iface_set_am_handler(receiver().iface(), get_am_id(), NULL, NULL, 0); + + recvbuf.pattern_check(SEED3); + } + + void test_flush_am_disconnect(flush_func_t flush, bool destroy_ep) { + const size_t length = 8; + if (is_flush_cancel()) { + ASSERT_TRUE(destroy_ep); + } + mapped_buffer sendbuf(length, SEED1, sender()); + mapped_buffer recvbuf(length, SEED2, receiver()); + sendbuf.pattern_fill(SEED3); + + uct_iface_set_am_handler(receiver().iface(), get_am_id(), am_handler, + is_flush_cancel() ? NULL : &recvbuf, + UCT_CB_FLAG_ASYNC); + blocking_am_bcopy(sendbuf); + (this->*flush)(); + + if (destroy_ep) { + sender().destroy_ep(0); + } + + if (is_flush_cancel()) { + return; + } + + wait_am(1); + uct_iface_set_am_handler(receiver().iface(), get_am_id(), NULL, NULL, 0); + + recvbuf.pattern_check(SEED3); + } + + void flush_ep_no_comp() { + ucs_status_t status; + do { + progress(); + status = uct_ep_flush(sender().ep(0), m_flush_flags, NULL); + } while ((status == UCS_ERR_NO_RESOURCE) || (status == UCS_INPROGRESS)); + ASSERT_UCS_OK(status); + } + + void flush_iface_no_comp() { + ucs_status_t status; + do { + progress(); + status = uct_iface_flush(sender().iface(), m_flush_flags, NULL); + } while ((status == UCS_ERR_NO_RESOURCE) || (status == UCS_INPROGRESS)); + ASSERT_UCS_OK(status); + } + + void flush_ep_nb() { + uct_completion_t comp; + ucs_status_t status; + comp.count = 2; + comp.func = NULL; + do { + progress(); + status = uct_ep_flush(sender().ep(0), m_flush_flags, &comp); + } while (status == UCS_ERR_NO_RESOURCE); + ASSERT_UCS_OK_OR_INPROGRESS(status); + if (status == UCS_OK) { + return; + } + /* coverity[loop_condition] */ + while (comp.count != 1) { + progress(); + } + } + + void test_flush_am_pending(flush_func_t flush, bool destroy_ep); + +protected: + uct_test::entity& sender() { + return **m_entities.begin(); + } + + uct_test::entity& receiver() { + return **(m_entities.end() - 1); + } + + bool is_flush_cancel() const { + return (m_flush_flags & UCT_FLUSH_FLAG_CANCEL); + } + + uint8_t get_am_id() const { + return is_flush_cancel() ? AM_ID_CANCEL : AM_ID; + } + + static uint32_t am_rx_count; + unsigned m_flush_flags; +}; + +uint32_t uct_flush_test::am_rx_count = 0; + +void uct_flush_test::test_flush_am_pending(flush_func_t flush, bool destroy_ep) +{ + if (is_flush_cancel()) { + ASSERT_TRUE(destroy_ep); + } + const size_t length = 8; + mapped_buffer sendbuf(length, SEED1, sender()); + mapped_buffer recvbuf(length, SEED2, receiver()); + sendbuf.pattern_fill(SEED3); + + uct_iface_set_am_handler(receiver().iface(), get_am_id(), am_handler, + is_flush_cancel() ? NULL : &recvbuf, + UCT_CB_FLAG_ASYNC); + + /* Send until resources are exhausted or timeout in 1sec*/ + unsigned count = 0; + ucs_time_t loop_end_limit = ucs_get_time() + ucs_time_from_sec(1.0); + ssize_t packed_len; + for (;;) { + packed_len = uct_ep_am_bcopy(sender().ep(0), get_am_id(), pack_cb, + (void*)&sendbuf, 0); + if (packed_len == UCS_ERR_NO_RESOURCE) { + break; + } + if (ucs_get_time() > loop_end_limit) { + ++count; + break; + } + + if (packed_len >= 0) { + ++count; + } else { + ASSERT_UCS_OK((ucs_status_t)packed_len); + } + } + + /* Queue some pending AMs */ + ucs_status_t status; + std::vector reqs; + reqs.resize(10); + for (std::vector::iterator it = reqs.begin(); it != reqs.end();) { + it->sendbuf = &sendbuf; + it->test = this; + it->uct.func = am_progress; + it->comp.count = 2; + it->comp.func = NULL; + status = uct_ep_pending_add(sender().ep(0), &it->uct, 0); + if (UCS_ERR_BUSY == status) { + /* User advised to retry the send. It means no requests added + * to the queue + */ + it = reqs.erase(it); + status = UCS_OK; + } else { + ++count; + ++it; + } + ASSERT_UCS_OK(status); + } + + /* Try to start a flush */ + test_req_t flush_req; + flush_req.comp.count = 2; + flush_req.comp.func = NULL; + + for (;;) { + status = uct_ep_flush(sender().ep(0), m_flush_flags, &flush_req.comp); + if (status == UCS_OK) { + --flush_req.comp.count; + } else if (status == UCS_ERR_NO_RESOURCE) { + /* If flush returned NO_RESOURCE, add to pending must succeed */ + flush_req.test = this; + flush_req.uct.func = flush_progress; + status = uct_ep_pending_add(sender().ep(0), &flush_req.uct, 0); + if (status == UCS_ERR_BUSY) { + continue; + } + EXPECT_EQ(UCS_OK, status); + } else if (status == UCS_INPROGRESS) { + } else { + UCS_TEST_ABORT("failed to flush ep: " << ucs_status_string(status)); + } + break; + } + + /* timeout used to prevent test hung */ + wait_for_value(&flush_req.comp.count, 1, true, 60.0); + EXPECT_EQ(1, flush_req.comp.count); + + while (!reqs.empty()) { + if (is_flush_cancel()) { + EXPECT_EQ(2, reqs.back().comp.count); + } else { + EXPECT_EQ(1, reqs.back().comp.count); + } + reqs.pop_back(); + } + + if (!is_flush_cancel()) { + wait_am(count); + } + + if (destroy_ep) { + sender().destroy_ep(0); + } + + if (is_flush_cancel()) { + return; + } + + uct_iface_set_am_handler(receiver().iface(), get_am_id(), NULL, NULL, 0); + + recvbuf.pattern_check(SEED3); +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, put_bcopy_flush_ep_no_comp, + !check_caps(UCT_IFACE_FLAG_PUT_BCOPY)) { + am_rx_count = 0; + m_flush_flags = UCT_FLUSH_FLAG_LOCAL; + + test_flush_put_bcopy(&uct_flush_test::flush_ep_no_comp); + + if (is_caps_supported(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { + am_rx_count = 0; + m_flush_flags |= UCT_FLUSH_FLAG_CANCEL; + test_flush_put_bcopy(&uct_flush_test::flush_ep_no_comp); + } +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, put_bcopy_flush_iface_no_comp, + !check_caps(UCT_IFACE_FLAG_PUT_BCOPY)) { + test_flush_put_bcopy(&uct_flush_test::flush_iface_no_comp); +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, put_bcopy_flush_ep_nb, + !check_caps(UCT_IFACE_FLAG_PUT_BCOPY)) { + am_rx_count = 0; + m_flush_flags = UCT_FLUSH_FLAG_LOCAL; + + test_flush_put_bcopy(&uct_flush_test::flush_ep_nb); + + if (is_caps_supported(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { + am_rx_count = 0; + m_flush_flags |= UCT_FLUSH_FLAG_CANCEL; + test_flush_put_bcopy(&uct_flush_test::flush_ep_nb); + } +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, am_zcopy_flush_ep_no_comp, + !check_caps(UCT_IFACE_FLAG_AM_ZCOPY)) { + am_rx_count = 0; + m_flush_flags = UCT_FLUSH_FLAG_LOCAL; + + test_flush_am_zcopy(&uct_flush_test::flush_ep_no_comp, false); + + if (is_caps_supported(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { + am_rx_count = 0; + m_flush_flags |= UCT_FLUSH_FLAG_CANCEL; + test_flush_am_zcopy(&uct_flush_test::flush_ep_no_comp, true); + } +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, am_zcopy_flush_iface_no_comp, + !check_caps(UCT_IFACE_FLAG_AM_ZCOPY)) { + test_flush_am_zcopy(&uct_flush_test::flush_iface_no_comp, true); +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, am_zcopy_flush_ep_nb, + !check_caps(UCT_IFACE_FLAG_AM_ZCOPY)) { + am_rx_count = 0; + m_flush_flags = UCT_FLUSH_FLAG_LOCAL; + + test_flush_am_zcopy(&uct_flush_test::flush_ep_nb, false); + + if (is_caps_supported(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { + am_rx_count = 0; + m_flush_flags |= UCT_FLUSH_FLAG_CANCEL; + test_flush_am_zcopy(&uct_flush_test::flush_ep_nb, true); + } +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, am_flush_ep_no_comp, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) { + am_rx_count = 0; + m_flush_flags = UCT_FLUSH_FLAG_LOCAL; + + test_flush_am_disconnect(&uct_flush_test::flush_ep_no_comp, false); + + if (is_caps_supported(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { + am_rx_count = 0; + m_flush_flags |= UCT_FLUSH_FLAG_CANCEL; + test_flush_am_disconnect(&uct_flush_test::flush_ep_no_comp, true); + } +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, am_flush_iface_no_comp, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) { + m_flush_flags = UCT_FLUSH_FLAG_LOCAL; + test_flush_am_disconnect(&uct_flush_test::flush_iface_no_comp, true); +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, am_flush_ep_nb, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) { + am_rx_count = 0; + m_flush_flags = UCT_FLUSH_FLAG_LOCAL; + + test_flush_am_disconnect(&uct_flush_test::flush_ep_nb, false); + + if (is_caps_supported(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { + am_rx_count = 0; + m_flush_flags |= UCT_FLUSH_FLAG_CANCEL; + test_flush_am_disconnect(&uct_flush_test::flush_ep_nb, true); + } +} + +UCS_TEST_SKIP_COND_P(uct_flush_test, am_pending_flush_nb, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_PENDING)) { + am_rx_count = 0; + m_flush_flags = UCT_FLUSH_FLAG_LOCAL; + + test_flush_am_pending(&uct_flush_test::flush_ep_nb, false); + + if (is_caps_supported(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)) { + am_rx_count = 0; + m_flush_flags |= UCT_FLUSH_FLAG_CANCEL; + test_flush_am_pending(&uct_flush_test::flush_ep_nb, true); + } +} + +UCT_INSTANTIATE_TEST_CASE(uct_flush_test) diff --git a/test/gtest/uct/test_many2one_am.cc b/test/gtest/uct/test_many2one_am.cc new file mode 100644 index 0000000..652d8e2 --- /dev/null +++ b/test/gtest/uct/test_many2one_am.cc @@ -0,0 +1,173 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "uct_test.h" + +extern "C" { +#include +} + +class test_many2one_am : public uct_test { +public: + static const uint8_t AM_ID = 15; + static const uint64_t MAGIC_DESC = 0xdeadbeef12345678ul; + static const uint64_t MAGIC_ALLOC = 0xbaadf00d12345678ul; + + typedef struct { + uint64_t magic; + unsigned length; + } receive_desc_t; + + test_many2one_am() : m_am_count(0), m_receiver(NULL) { + } + + void init() { + std::string val = "16k"; + std::string tx_name, rx_name; + + if (has_ib()) { + tx_name = "IB_SEG_SIZE"; + } else if (has_transport("tcp")) { + tx_name = "TX_SEG_SIZE"; + rx_name = "RX_SEG_SIZE"; + } else if (has_transport("mm") || + has_transport("self")) { + tx_name = "SEG_SIZE"; + } + + if (!tx_name.empty()) { + modify_config(tx_name, val); + } + + if (!rx_name.empty()) { + modify_config(rx_name, val); + } + + uct_test::init(); + + m_receiver = create_entity(sizeof(receive_desc_t)); + m_entities.push_back(m_receiver); + + check_skip_test(); + } + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags) { + test_many2one_am *self = reinterpret_cast(arg); + return self->am_handler(data, length, flags); + } + + ucs_status_t am_handler(void *data, size_t length, unsigned flags) { + if (ucs::rand() % 4 == 0) { + receive_desc_t *my_desc; + if (flags & UCT_CB_PARAM_FLAG_DESC) { + my_desc = (receive_desc_t *)data - 1; + my_desc->magic = MAGIC_DESC; + } else { + my_desc = (receive_desc_t *)ucs_malloc(sizeof(*my_desc) + length, + "TODO: remove allocation"); + my_desc->magic = MAGIC_ALLOC; + } + my_desc->length = length; + if (data != my_desc + 1) { + memcpy(my_desc + 1, data, length); + } + m_backlog.push_back(my_desc); + ucs_atomic_add32(&m_am_count, 1); + return (flags & UCT_CB_PARAM_FLAG_DESC) ? UCS_INPROGRESS : UCS_OK; + } + mem_buffer::pattern_check(data, length); + ucs_atomic_add32(&m_am_count, 1); + return UCS_OK; + } + + void check_backlog() { + while (!m_backlog.empty()) { + receive_desc_t *my_desc = m_backlog.back(); + m_backlog.pop_back(); + mem_buffer::pattern_check(my_desc + 1, my_desc->length); + if (my_desc->magic == MAGIC_DESC) { + uct_iface_release_desc(my_desc); + } else { + EXPECT_EQ(uint64_t(MAGIC_ALLOC), my_desc->magic); + ucs_free(my_desc); + } + } + } + + static const size_t NUM_SENDERS = 10; + +protected: + volatile uint32_t m_am_count; + std::vector m_backlog; + entity *m_receiver; +}; + + +UCS_TEST_SKIP_COND_P(test_many2one_am, am_bcopy, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_CB_SYNC)) +{ + const unsigned num_sends = 1000 / ucs::test_time_multiplier(); + ucs_status_t status; + + ucs::ptr_vector buffers; + for (unsigned i = 0; i < NUM_SENDERS; ++i) { + entity *sender = create_entity(0); + mapped_buffer *buffer = new mapped_buffer( + sender->iface_attr().cap.am.max_bcopy, 0, *sender); + sender->connect(0, *m_receiver, i); + m_entities.push_back(sender); + buffers.push_back(buffer); + } + + m_am_count = 0; + + status = uct_iface_set_am_handler(m_receiver->iface(), AM_ID, am_handler, + (void*)this, 0); + ASSERT_UCS_OK(status); + + for (unsigned i = 0; i < num_sends; ++i) { + unsigned sender_num = ucs::rand() % NUM_SENDERS; + + mapped_buffer& buffer = buffers.at(sender_num); + buffer.pattern_fill(i); + + ssize_t packed_len; + for (;;) { + const entity& sender = ent(sender_num + 1); + packed_len = uct_ep_am_bcopy(sender.ep(0), AM_ID, + mapped_buffer::pack, + (void*)&buffer, 0); + if (packed_len != UCS_ERR_NO_RESOURCE) { + break; + } + sender.progress(); + m_receiver->progress(); + } + if (packed_len < 0) { + ASSERT_UCS_OK((ucs_status_t)packed_len); + } + } + + while (m_am_count < num_sends) { + progress(); + } + + status = uct_iface_set_am_handler(m_receiver->iface(), AM_ID, + NULL, NULL, 0); + ASSERT_UCS_OK(status); + + check_backlog(); + + for (unsigned i = 0; i < NUM_SENDERS; ++i) { + ent(i + 1).flush(); + } + + buffers.clear(); +} + +UCT_INSTANTIATE_NO_SELF_TEST_CASE(test_many2one_am) diff --git a/test/gtest/uct/test_md.cc b/test/gtest/uct/test_md.cc new file mode 100644 index 0000000..03ab61f --- /dev/null +++ b/test/gtest/uct/test_md.cc @@ -0,0 +1,513 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* Copyright (C) Advanced Micro Devices, Inc. 2016 - 2017. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "test_md.h" + +#include + +#include +extern "C" { +#include +#include +#include +#include +#include +#include +} +#include +#include +#include +#include +#include + + +void* test_md::alloc_thread(void *arg) +{ + volatile int *stop_flag = (int*)arg; + + while (!*stop_flag) { + int count = ucs::rand() % 100; + std::vector buffers; + for (int i = 0; i < count; ++i) { + buffers.push_back(malloc(ucs::rand() % (256 * UCS_KBYTE))); + } + std::for_each(buffers.begin(), buffers.end(), free); + } + return NULL; +} + +std::vector test_md::enum_mds(const std::string& cmpt_name) { + + std::vector md_resources = enum_md_resources(); + + std::vector result; + for (std::vector::iterator iter = md_resources.begin(); + iter != md_resources.end(); ++iter) { + if (iter->cmpt_attr.name == cmpt_name) { + result.push_back(test_md_param()); + result.back().component = iter->cmpt; + result.back().md_name = iter->rsc_desc.md_name; + } + } + + return result; +} + +test_md::test_md() +{ + UCS_TEST_CREATE_HANDLE(uct_md_config_t*, m_md_config, + (void (*)(uct_md_config_t*))uct_config_release, + uct_md_config_read, GetParam().component, NULL, NULL); + memset(&m_md_attr, 0, sizeof(m_md_attr)); +} + +void test_md::init() +{ + ucs::test_base::init(); + UCS_TEST_CREATE_HANDLE(uct_md_h, m_md, uct_md_close, uct_md_open, + GetParam().component, GetParam().md_name.c_str(), + m_md_config); + + ucs_status_t status = uct_md_query(m_md, &m_md_attr); + ASSERT_UCS_OK(status); + + check_skip_test(); +} + +void test_md::cleanup() +{ + m_md.reset(); + ucs::test_base::cleanup(); +} + +void test_md::modify_config(const std::string& name, const std::string& value, + bool optional) +{ + ucs_status_t status = uct_config_modify(m_md_config, name.c_str(), value.c_str()); + if (status == UCS_ERR_NO_ELEM) { + return ucs::test_base::modify_config(name, value, optional); + } else { + ASSERT_UCS_OK(status); + } +} + +bool test_md::check_caps(uint64_t flags) +{ + return ((md() == NULL) || ucs_test_all_flags(m_md_attr.cap.flags, flags)); +} + +void test_md::alloc_memory(void **address, size_t size, char *fill_buffer, + ucs_memory_type_t mem_type) +{ + *address = mem_buffer::allocate(size, mem_type); + if (fill_buffer) { + mem_buffer::copy_to(*address, fill_buffer, size, mem_type); + } +} + +void test_md::check_memory(void *address, void *expect, size_t size, + ucs_memory_type_t mem_type) +{ + EXPECT_TRUE(mem_buffer::compare(expect, address, size, mem_type)); +} + +void test_md::free_memory(void *address, ucs_memory_type_t mem_type) +{ + mem_buffer::release(address, mem_type); +} + +UCS_TEST_SKIP_COND_P(test_md, rkey_ptr, + !check_caps(UCT_MD_FLAG_ALLOC | + UCT_MD_FLAG_RKEY_PTR)) { + size_t size; + uct_md_attr_t md_attr; + void *rkey_buffer; + ucs_status_t status; + unsigned *rva, *lva; + uct_mem_h memh; + uct_rkey_bundle_t rkey_bundle; + unsigned i; + + // alloc (should work with both sysv and xpmem + size = sizeof(unsigned) * UCS_MBYTE; + rva = NULL; + status = uct_md_mem_alloc(md(), &size, (void **)&rva, + UCT_MD_MEM_ACCESS_ALL, + "test", &memh); + ASSERT_UCS_OK(status); + EXPECT_LE(sizeof(unsigned) * UCS_MBYTE, size); + + // pack + status = uct_md_query(md(), &md_attr); + ASSERT_UCS_OK(status); + rkey_buffer = malloc(md_attr.rkey_packed_size); + if (rkey_buffer == NULL) { + // make coverity happy + uct_md_mem_free(md(), memh); + GTEST_FAIL(); + } + + status = uct_md_mkey_pack(md(), memh, rkey_buffer); + + // unpack + status = uct_rkey_unpack(GetParam().component, rkey_buffer, &rkey_bundle); + ASSERT_UCS_OK(status); + + // get direct ptr + status = uct_rkey_ptr(GetParam().component, &rkey_bundle, (uintptr_t)rva, + (void **)&lva); + ASSERT_UCS_OK(status); + // check direct access + // read + for (i = 0; i < size/sizeof(unsigned); i++) { + rva[i] = i; + } + EXPECT_EQ(memcmp(lva, rva, size), 0); + + // write + for (i = 0; i < size/sizeof(unsigned); i++) { + lva[i] = size - i; + } + EXPECT_EQ(memcmp(lva, rva, size), 0); + + // check bounds + // + status = uct_rkey_ptr(GetParam().component, &rkey_bundle, (uintptr_t)(rva-1), + (void **)&lva); + UCS_TEST_MESSAGE << "rkey_ptr of invalid address returned " + << ucs_status_string(status); + + status = uct_rkey_ptr(GetParam().component, &rkey_bundle, (uintptr_t)rva+size, + (void **)&lva); + UCS_TEST_MESSAGE << "rkey_ptr of invalid address returned " + << ucs_status_string(status); + + free(rkey_buffer); + uct_md_mem_free(md(), memh); + uct_rkey_release(GetParam().component, &rkey_bundle); +} + +UCS_TEST_SKIP_COND_P(test_md, alloc, + !check_caps(UCT_MD_FLAG_ALLOC)) { + size_t size, orig_size; + ucs_status_t status; + void *address; + uct_mem_h memh; + + for (unsigned i = 0; i < 300; ++i) { + size = orig_size = ucs::rand() % 65536; + if (size == 0) { + continue; + } + + address = NULL; + status = uct_md_mem_alloc(md(), &size, &address, + UCT_MD_MEM_ACCESS_ALL, "test", &memh); + EXPECT_GT(size, 0ul); + + ASSERT_UCS_OK(status); + EXPECT_GE(size, orig_size); + EXPECT_TRUE(address != NULL); + EXPECT_TRUE(memh != UCT_MEM_HANDLE_NULL); + + memset(address, 0xBB, size); + uct_md_mem_free(md(), memh); + } +} + +UCS_TEST_P(test_md, mem_type_detect_mds) { + + uct_md_attr_t md_attr; + ucs_status_t status; + ucs_memory_type_t mem_type; + int mem_type_id; + void *address; + + status = uct_md_query(md(), &md_attr); + ASSERT_UCS_OK(status); + + if (!md_attr.cap.detect_mem_types) { + UCS_TEST_SKIP_R("MD can't detect any memory types"); + } + + ucs_for_each_bit(mem_type_id, md_attr.cap.detect_mem_types) { + alloc_memory(&address, UCS_KBYTE, NULL, + static_cast(mem_type_id)); + status = uct_md_detect_memory_type(md(), address, 1024, &mem_type); + ASSERT_UCS_OK(status); + EXPECT_TRUE(mem_type == mem_type_id); + } +} + +UCS_TEST_SKIP_COND_P(test_md, reg, + !check_caps(UCT_MD_FLAG_REG)) { + size_t size; + uct_md_attr_t md_attr; + ucs_status_t status; + void *address; + uct_mem_h memh; + + status = uct_md_query(md(), &md_attr); + ASSERT_UCS_OK(status); + for (unsigned mem_type_id = 0; mem_type_id < UCS_MEMORY_TYPE_LAST; mem_type_id++) { + ucs_memory_type_t mem_type = static_cast(mem_type_id); + + if (!(md_attr.cap.reg_mem_types & UCS_BIT(mem_type_id))) { + UCS_TEST_MESSAGE << mem_buffer::mem_type_name(mem_type) << " memory " + << "registration is not supported by " + << GetParam().md_name; + continue; + } + + for (unsigned i = 0; i < 300; ++i) { + size = ucs::rand() % 65536; + if (size == 0) { + continue; + } + + std::vector fill_buffer(size, 0); + ucs::fill_random(fill_buffer); + + alloc_memory(&address, size, &fill_buffer[0], mem_type); + + status = uct_md_mem_reg(md(), address, size, UCT_MD_MEM_ACCESS_ALL, &memh); + + ASSERT_UCS_OK(status); + ASSERT_TRUE(memh != UCT_MEM_HANDLE_NULL); + check_memory(address, &fill_buffer[0], size, mem_type); + + status = uct_md_mem_dereg(md(), memh); + ASSERT_UCS_OK(status); + check_memory(address, &fill_buffer[0], size, mem_type); + + free_memory(address, mem_type); + + } + } +} + +UCS_TEST_SKIP_COND_P(test_md, reg_perf, + !check_caps(UCT_MD_FLAG_REG)) { + static const unsigned count = 10000; + ucs_status_t status; + uct_md_attr_t md_attr; + void *ptr; + + status = uct_md_query(md(), &md_attr); + ASSERT_UCS_OK(status); + for (unsigned mem_type_id = 0; mem_type_id < UCS_MEMORY_TYPE_LAST; mem_type_id++) { + ucs_memory_type_t mem_type = static_cast(mem_type_id); + if (!(md_attr.cap.reg_mem_types & UCS_BIT(mem_type_id))) { + UCS_TEST_MESSAGE << mem_buffer::mem_type_name(mem_type) << " memory " + << " registration is not supported by " + << GetParam().md_name; + continue; + } + for (size_t size = 4 * UCS_KBYTE; size <= 4 * UCS_MBYTE; size *= 2) { + alloc_memory(&ptr, size, NULL, + static_cast(mem_type_id)); + + ucs_time_t start_time = ucs_get_time(); + ucs_time_t end_time = start_time; + + unsigned n = 0; + while (n < count) { + uct_mem_h memh; + status = uct_md_mem_reg(md(), ptr, size, UCT_MD_MEM_ACCESS_ALL, + &memh); + ASSERT_UCS_OK(status); + ASSERT_TRUE(memh != UCT_MEM_HANDLE_NULL); + + status = uct_md_mem_dereg(md(), memh); + ASSERT_UCS_OK(status); + + ++n; + end_time = ucs_get_time(); + + if (end_time - start_time > ucs_time_from_sec(1.0)) { + break; + } + } + + UCS_TEST_MESSAGE << GetParam().md_name << ": Registration time for " << + ucs_memory_type_names[mem_type] << " memory " << size << " bytes: " << + long(ucs_time_to_nsec(end_time - start_time) / n) << " ns"; + + free_memory(ptr, mem_type); + } + } +} + +UCS_TEST_SKIP_COND_P(test_md, reg_advise, + !check_caps(UCT_MD_FLAG_REG | + UCT_MD_FLAG_ADVISE)) { + size_t size; + ucs_status_t status; + void *address; + uct_mem_h memh; + + size = 128 * UCS_MBYTE; + address = malloc(size); + ASSERT_TRUE(address != NULL); + + status = uct_md_mem_reg(md(), address, size, + UCT_MD_MEM_FLAG_NONBLOCK|UCT_MD_MEM_ACCESS_ALL, + &memh); + ASSERT_UCS_OK(status); + ASSERT_TRUE(memh != UCT_MEM_HANDLE_NULL); + + status = uct_md_mem_advise(md(), memh, (char *)address + 7, + 32 * UCS_KBYTE, UCT_MADV_WILLNEED); + EXPECT_UCS_OK(status); + + status = uct_md_mem_dereg(md(), memh); + EXPECT_UCS_OK(status); + free(address); +} + +UCS_TEST_SKIP_COND_P(test_md, alloc_advise, + !check_caps(UCT_MD_FLAG_ALLOC | + UCT_MD_FLAG_ADVISE)) { + size_t size, orig_size; + ucs_status_t status; + void *address; + uct_mem_h memh; + + orig_size = size = 128 * UCS_MBYTE; + address = NULL; + + status = uct_md_mem_alloc(md(), &size, &address, + UCT_MD_MEM_FLAG_NONBLOCK| + UCT_MD_MEM_ACCESS_ALL, + "test", &memh); + ASSERT_UCS_OK(status); + EXPECT_GE(size, orig_size); + EXPECT_TRUE(address != NULL); + EXPECT_TRUE(memh != UCT_MEM_HANDLE_NULL); + + status = uct_md_mem_advise(md(), memh, (char *)address + 7, + 32 * UCS_KBYTE, UCT_MADV_WILLNEED); + EXPECT_UCS_OK(status); + + memset(address, 0xBB, size); + uct_md_mem_free(md(), memh); +} + +/* + * reproduce issue #1284, main thread is registering memory while another thread + * allocates and releases memory. + */ +UCS_TEST_SKIP_COND_P(test_md, reg_multi_thread, + !check_caps(UCT_MD_FLAG_REG)) { + ucs_status_t status; + uct_md_attr_t md_attr; + + status = uct_md_query(md(), &md_attr); + ASSERT_UCS_OK(status); + + if (!(md_attr.cap.reg_mem_types & UCS_BIT(UCS_MEMORY_TYPE_HOST))) { + UCS_TEST_SKIP_R("not host memory type"); + } + + pthread_t thread_id; + int stop_flag = 0; + pthread_create(&thread_id, NULL, alloc_thread, &stop_flag); + + ucs_time_t start_time = ucs_get_time(); + while (ucs_get_time() - start_time < ucs_time_from_sec(0.5)) { + const size_t size = (ucs::rand() % 65536) + 1; + + void *buffer = malloc(size); + ASSERT_TRUE(buffer != NULL); + + uct_mem_h memh; + status = uct_md_mem_reg(md(), buffer, size, + UCT_MD_MEM_FLAG_NONBLOCK| + UCT_MD_MEM_ACCESS_ALL, + &memh); + ASSERT_UCS_OK(status, << " buffer=" << buffer << " size=" << size); + ASSERT_TRUE(memh != UCT_MEM_HANDLE_NULL); + + sched_yield(); + + status = uct_md_mem_dereg(md(), memh); + EXPECT_UCS_OK(status); + free(buffer); + } + + stop_flag = 1; + pthread_join(thread_id, NULL); +} + +UCS_TEST_SKIP_COND_P(test_md, sockaddr_accessibility, + !check_caps(UCT_MD_FLAG_SOCKADDR)) { + ucs_sock_addr_t sock_addr; + struct ifaddrs *ifaddr, *ifa; + bool found_rdma = false; + bool found_ip = false; + + ASSERT_TRUE(getifaddrs(&ifaddr) != -1); + + /* go through a linked list of available interfaces */ + for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { + if (ucs::is_inet_addr(ifa->ifa_addr) && + ucs_netif_flags_is_active(ifa->ifa_flags)) { + sock_addr.addr = ifa->ifa_addr; + + found_ip = true; + + if (GetParam().md_name == "rdmacm") { + if (ucs::is_rdmacm_netdev(ifa->ifa_name)) { + UCS_TEST_MESSAGE << "Testing " << ifa->ifa_name << " with " << + ucs::sockaddr_to_str(ifa->ifa_addr); + ASSERT_TRUE(uct_md_is_sockaddr_accessible(md(), &sock_addr, + UCT_SOCKADDR_ACC_LOCAL)); + ASSERT_TRUE(uct_md_is_sockaddr_accessible(md(), &sock_addr, + UCT_SOCKADDR_ACC_REMOTE)); + found_rdma = true; + } + } else { + UCS_TEST_MESSAGE << "Testing " << ifa->ifa_name << " with " << + ucs::sockaddr_to_str(ifa->ifa_addr); + ASSERT_TRUE(uct_md_is_sockaddr_accessible(md(), &sock_addr, + UCT_SOCKADDR_ACC_LOCAL)); + ASSERT_TRUE(uct_md_is_sockaddr_accessible(md(), &sock_addr, + UCT_SOCKADDR_ACC_REMOTE)); + } + } + } + + if (GetParam().md_name == "rdmacm") { + if (!found_rdma) { + UCS_TEST_MESSAGE << + "Cannot find an IPoIB/RoCE interface with an IPv4 address on the host"; + } + } else if (!found_ip) { + UCS_TEST_MESSAGE << "Cannot find an IPv4/IPv6 interface on the host"; + } + + freeifaddrs(ifaddr); +} + +#define UCT_MD_INSTANTIATE_TEST_CASE(_test_case) \ + UCS_PP_FOREACH(_UCT_MD_INSTANTIATE_TEST_CASE, _test_case, \ + knem, \ + cma, \ + posix, \ + sysv, \ + xpmem, \ + cuda_cpy, \ + cuda_ipc, \ + rocm_cpy, \ + rocm_ipc, \ + ib, \ + ugni, \ + sockcm, \ + rdmacm \ + ) + +UCT_MD_INSTANTIATE_TEST_CASE(test_md) + diff --git a/test/gtest/uct/test_md.h b/test/gtest/uct/test_md.h new file mode 100644 index 0000000..7bd0a36 --- /dev/null +++ b/test/gtest/uct/test_md.h @@ -0,0 +1,68 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + + +#ifndef UCT_TEST_MD +#define UCT_TEST_MD + +#include "uct_test.h" + + +struct test_md_param { + uct_component_h component; + std::string md_name; +}; + +static std::ostream& operator<<(std::ostream& os, const test_md_param& md_param) { + return os << md_param.md_name; +} + +class test_md : public testing::TestWithParam, + public uct_test_base +{ +public: + UCS_TEST_BASE_IMPL; + + static std::vector enum_mds(const std::string& cmpt_name); + + test_md(); + +protected: + virtual void init(); + virtual void cleanup(); + virtual void modify_config(const std::string& name, const std::string& value, + bool optional); + bool check_caps(uint64_t flags); + void alloc_memory(void **address, size_t size, char *fill, + ucs_memory_type_t mem_type); + void check_memory(void *address, void *expect, size_t size, + ucs_memory_type_t mem_type); + void free_memory(void *address, ucs_memory_type_t mem_type); + + void test_registration(); + + uct_md_h md() const { + return m_md; + } + + const uct_md_attr_t& md_attr() const { + return m_md_attr; + } + + + static void* alloc_thread(void *arg); + +private: + ucs::handle m_md_config; + ucs::handle m_md; + uct_md_attr_t m_md_attr; +}; + + +#define _UCT_MD_INSTANTIATE_TEST_CASE(_test_case, _cmpt_name) \ + INSTANTIATE_TEST_CASE_P(_cmpt_name, _test_case, \ + testing::ValuesIn(_test_case::enum_mds(#_cmpt_name))); +#endif diff --git a/test/gtest/uct/test_mem.cc b/test/gtest/uct/test_mem.cc new file mode 100644 index 0000000..b125510 --- /dev/null +++ b/test/gtest/uct/test_mem.cc @@ -0,0 +1,207 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "uct_test.h" + +#include + + +class test_mem : public testing::TestWithParam, + public uct_test_base { +public: + UCS_TEST_BASE_IMPL; + +protected: + + void check_mem(const uct_allocated_memory &mem, size_t min_length) { + EXPECT_TRUE(mem.address != 0); + EXPECT_GE(mem.length, min_length); + if (mem.method == UCT_ALLOC_METHOD_MD) { + EXPECT_TRUE(mem.md != NULL); + EXPECT_TRUE(mem.memh != UCT_MEM_HANDLE_NULL); + } else { + EXPECT_TRUE((mem.method == GetParam()) || + (mem.method == UCT_ALLOC_METHOD_HEAP)); + } + } + + static const size_t min_length = 1234557; +}; + + +UCS_TEST_P(test_mem, nomd_alloc) { + uct_alloc_method_t methods[2]; + uct_allocated_memory mem; + ucs_status_t status; + + methods[0] = GetParam(); + methods[1] = UCT_ALLOC_METHOD_HEAP; + + status = uct_mem_alloc(NULL, min_length, UCT_MD_MEM_ACCESS_ALL, methods, + 2, NULL, 0, "test", &mem); + ASSERT_UCS_OK(status); + + check_mem(mem, min_length); + + uct_mem_free(&mem); +} + +UCS_TEST_P(test_mem, md_alloc) { + uct_alloc_method_t methods[3]; + uct_allocated_memory mem; + std::vector md_resources; + uct_md_attr_t md_attr; + ucs_status_t status; + uct_md_h md; + uct_md_config_t *md_config; + int nonblock; + + methods[0] = UCT_ALLOC_METHOD_MD; + methods[1] = GetParam(); + methods[2] = UCT_ALLOC_METHOD_HEAP; + + md_resources = enum_md_resources(); + for (std::vector::iterator iter = md_resources.begin(); + iter != md_resources.end(); ++iter) { + + status = uct_md_config_read(iter->cmpt, NULL, NULL, &md_config); + ASSERT_UCS_OK(status); + + status = uct_md_open(iter->cmpt, iter->rsc_desc.md_name, md_config, &md); + uct_config_release(md_config); + ASSERT_UCS_OK(status); + + status = uct_md_query(md, &md_attr); + ASSERT_UCS_OK(status); + + for (nonblock = 0; nonblock <= 1; ++nonblock) { + int flags = nonblock ? UCT_MD_MEM_FLAG_NONBLOCK : 0; + flags |= UCT_MD_MEM_ACCESS_ALL; + status = uct_mem_alloc(NULL, min_length, flags, methods, 3, &md, 1, + "test", &mem); + ASSERT_UCS_OK(status); + + if (md_attr.cap.flags & UCT_MD_FLAG_ALLOC) { + EXPECT_EQ(UCT_ALLOC_METHOD_MD, mem.method); + } else { + EXPECT_NE(UCT_ALLOC_METHOD_MD, mem.method); + } + + check_mem(mem, min_length); + + uct_mem_free(&mem); + } + + uct_md_close(md); + } +} + +UCS_TEST_P(test_mem, md_fixed) { + std::vector md_resources; + uct_md_attr_t md_attr; + uct_md_config_t *md_config; + uct_md_h md; + unsigned j; + + const size_t page_size = ucs_get_page_size(); + const size_t n_tryes = 101; + uct_alloc_method_t meth; + void* p_addr = ucs::mmap_fixed_address(); + size_t n_success; + + uct_allocated_memory_t uct_mem; + ucs_status_t status; + + md_resources = enum_md_resources(); + for (std::vector::iterator iter = md_resources.begin(); + iter != md_resources.end(); ++iter) { + + status = uct_md_config_read(iter->cmpt, NULL, NULL, &md_config); + ASSERT_UCS_OK(status); + + status = uct_md_open(iter->cmpt, iter->rsc_desc.md_name, md_config, &md); + uct_config_release(md_config); + ASSERT_UCS_OK(status); + + status = uct_md_query(md, &md_attr); + ASSERT_UCS_OK(status); + + if ((md_attr.cap.flags & UCT_MD_FLAG_ALLOC) && + (md_attr.cap.flags & UCT_MD_FLAG_FIXED)) { + n_success = 0; + + for (j = 0; j < n_tryes; ++j) { + meth = UCT_ALLOC_METHOD_MD; + + status = uct_mem_alloc(p_addr, 1, + UCT_MD_MEM_FLAG_FIXED| + UCT_MD_MEM_ACCESS_ALL, + &meth, 1, &md, 1, "test", &uct_mem); + if (status == UCS_OK) { + ++n_success; + EXPECT_EQ(meth, uct_mem.method); + EXPECT_EQ(p_addr, uct_mem.address); + EXPECT_GE(uct_mem.length, (size_t)1); + /* touch the page*/ + memset(uct_mem.address, 'c', uct_mem.length); + EXPECT_EQ(*(char*)p_addr, 'c'); + status = uct_mem_free(&uct_mem); + } else { + EXPECT_EQ(status, UCS_ERR_NO_MEMORY); + } + + p_addr = (char*)p_addr + (2 * page_size); + } + + EXPECT_GT(n_success, (size_t)0); + } + + uct_md_close(md); + } +} + + +UCS_TEST_P(test_mem, mmap_fixed) { + unsigned i; + + const size_t page_size = ucs_get_page_size(); + const size_t n_tryes = 101; + uct_alloc_method_t meth; + void* p_addr = ucs::mmap_fixed_address(); + size_t n_success; + + uct_allocated_memory_t uct_mem; + ucs_status_t status; + + n_success = 0; + + for (i = 0; i < n_tryes; ++i) { + meth = (i % 2) ? UCT_ALLOC_METHOD_MMAP : UCT_ALLOC_METHOD_HUGE; + + status = uct_mem_alloc(p_addr, 1, + UCT_MD_MEM_FLAG_FIXED|UCT_MD_MEM_ACCESS_ALL, + &meth, 1, NULL, 0, "test", &uct_mem); + if (status == UCS_OK) { + ++n_success; + EXPECT_EQ(meth, uct_mem.method); + EXPECT_EQ(p_addr, uct_mem.address); + EXPECT_GE(uct_mem.length, (size_t)1); + /* touch the page*/ + memset(uct_mem.address, 'c', uct_mem.length); + EXPECT_EQ(*(char*)p_addr, 'c'); + status = uct_mem_free(&uct_mem); + } else { + EXPECT_EQ(status, UCS_ERR_NO_MEMORY); + } + p_addr = (char*)p_addr + (2 * page_size); + } +} + +INSTANTIATE_TEST_CASE_P(alloc_methods, test_mem, + ::testing::Values(UCT_ALLOC_METHOD_THP, + UCT_ALLOC_METHOD_HEAP, + UCT_ALLOC_METHOD_MMAP, + UCT_ALLOC_METHOD_HUGE)); diff --git a/test/gtest/uct/test_mm.cc b/test/gtest/uct/test_mm.cc new file mode 100644 index 0000000..49ab93f --- /dev/null +++ b/test/gtest/uct/test_mm.cc @@ -0,0 +1,260 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +extern "C" { +#include +#include +#include +} +#include "uct_p2p_test.h" +#include +#include "uct_test.h" + + +class test_uct_mm : public uct_test { +public: + + struct mm_resource : public resource { + std::string shm_dir; + + mm_resource(const resource& res, const std::string& shm_dir = "") : + resource(res.component, res.md_name, res.local_cpus, res.tl_name, + res.dev_name, res.dev_type), + shm_dir(shm_dir) + { + } + + virtual std::string name() const { + std::string name = resource::name(); + if (!shm_dir.empty()) { + name += ",dir=" + shm_dir; + } + return name; + } + }; + + typedef struct { + unsigned length; + /* data follows */ + } recv_desc_t; + + static std::vector enum_resources(const std::string& tl_name) { + static std::vector all_resources; + + if (all_resources.empty()) { + std::vector r = uct_test::enum_resources(""); + for (std::vector::iterator iter = r.begin(); + iter != r.end(); ++iter) { + if ((*iter)->tl_name == "posix") { + enum_posix_variants(**iter, all_resources); + } else { + all_resources.push_back(mm_resource(**iter)); + } + } + } + + return filter_resources(all_resources, tl_name); + } + + test_uct_mm() : m_e1(NULL), m_e2(NULL) { + if (GetParam()->tl_name == "posix") { + set_posix_config(); + } + } + + const mm_resource* GetParam() { + return dynamic_cast(uct_test::GetParam()); + } + + static void enum_posix_variants(const resource &res, + std::vector &variants) { + variants.push_back(mm_resource(res, "." )); + variants.push_back(mm_resource(res, "/dev/shm")); + } + + void set_posix_config() { + set_config("DIR=" + GetParam()->shm_dir); + } + + virtual void init() { + uct_test::init(); + + m_e1 = uct_test::create_entity(0); + m_entities.push_back(m_e1); + + check_skip_test(); + + m_e2 = uct_test::create_entity(0); + m_entities.push_back(m_e2); + + m_e1->connect(0, *m_e2, 0); + m_e2->connect(0, *m_e1, 0); + } + + static ucs_status_t mm_am_handler(void *arg, void *data, size_t length, + unsigned flags) { + recv_desc_t *my_desc = (recv_desc_t *) arg; + uint64_t *test_mm_hdr = (uint64_t *) data; + uint64_t *actual_data = (uint64_t *) test_mm_hdr + 1; + unsigned data_length = length - sizeof(test_mm_hdr); + + my_desc->length = data_length; + if (*test_mm_hdr == 0xbeef) { + memcpy(my_desc + 1, actual_data, data_length); + } + + return UCS_OK; + } + + bool check_md_caps(uint64_t flags) { + FOR_EACH_ENTITY(iter) { + if (!(ucs_test_all_flags((*iter)->md_attr().cap.flags, flags))) { + return false; + } + } + return true; + } + + void test_attach_ptr(void *ptr, void *attach_ptr, uint64_t magic) + { + *(uint64_t*)attach_ptr = 0; + ucs_memory_cpu_store_fence(); + + *(uint64_t*)ptr = magic; + ucs_memory_cpu_load_fence(); + + /* Writing to *ptr should also update *attach_ptr */ + EXPECT_EQ(magic, *(uint64_t*)attach_ptr) + << "ptr=" << ptr << " attach_ptr=" << attach_ptr; + + UCS_TEST_MESSAGE << std::hex << *(uint64_t*)attach_ptr; + } + + uct_mm_md_t *md(entity *e) { + return ucs_derived_of(e->md(), uct_mm_md_t); + } + + void test_attach(void *ptr, uct_mem_h memh, size_t size) + { + uct_mm_seg_t *seg = (uct_mm_seg_t*)memh; + ucs_status_t status; + + size_t iface_addr_len = uct_mm_md_mapper_call(md(m_e1), iface_addr_length); + std::vector iface_addr(iface_addr_len); + + status = uct_mm_md_mapper_call(md(m_e1), iface_addr_pack, &iface_addr[0]); + ASSERT_UCS_OK(status); + + uct_mm_remote_seg_t rseg; + status = uct_mm_md_mapper_call(md(m_e2), mem_attach, seg->seg_id, size, + &iface_addr[0], &rseg); + ASSERT_UCS_OK(status); + + test_attach_ptr(ptr, rseg.address, 0xdeadbeef11111); + + uct_mm_md_mapper_call(md(m_e2), mem_detach, &rseg); + } + + void test_rkey(void *ptr, uct_mem_h memh, size_t size) + { + ucs_status_t status; + + std::vector rkey_buffer(m_e1->md_attr().rkey_packed_size); + + status = uct_md_mkey_pack(m_e1->md(), memh, &rkey_buffer[0]); + ASSERT_UCS_OK(status); + + uct_rkey_bundle_t rkey_ob; + status = uct_rkey_unpack(GetParam()->component, &rkey_buffer[0], &rkey_ob); + ASSERT_UCS_OK(status); + + /* For shared memory transports, rkey is the offset between local and + * remote pointers. + */ + test_attach_ptr(ptr, UCS_PTR_BYTE_OFFSET(ptr, rkey_ob.rkey), + 0xdeadbeef22222); + + uct_rkey_release(GetParam()->component, &rkey_ob); + } + + void test_memh(void *ptr, uct_mem_h memh, size_t size) { + test_attach(ptr, memh, size); + test_attach(ptr, memh, size); + test_rkey(ptr, memh, size); + } + +protected: + entity *m_e1, *m_e2; +}; + +UCS_TEST_SKIP_COND_P(test_uct_mm, open_for_posix, + check_caps(UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CB_SYNC)) +{ + uint64_t send_data = 0xdeadbeef; + uint64_t test_mm_hdr = 0xbeef; + recv_desc_t *recv_buffer; + + recv_buffer = (recv_desc_t *)malloc(sizeof(*recv_buffer) + + sizeof(uint64_t)); + recv_buffer->length = 0; /* Initialize length to 0 */ + + /* set a callback for the uct to invoke for receiving the data */ + uct_iface_set_am_handler(m_e2->iface(), 0, mm_am_handler , recv_buffer, + 0); + + /* send the data */ + uct_ep_am_short(m_e1->ep(0), 0, test_mm_hdr, &send_data, sizeof(send_data)); + + /* progress sender and receiver until the receiver gets the message */ + wait_for_flag(&recv_buffer->length); + + ASSERT_EQ(sizeof(send_data), recv_buffer->length); + EXPECT_EQ(send_data, *(uint64_t*)(recv_buffer+1)); + + free(recv_buffer); +} + +UCS_TEST_SKIP_COND_P(test_uct_mm, alloc, + !check_md_caps(UCT_MD_FLAG_ALLOC)) { + + size_t size = ucs_min(100000u, m_e1->md_attr().cap.max_alloc); + ucs_status_t status; + + void *address = NULL; + size_t alloc_length = size; + uct_mem_h memh; + status = uct_md_mem_alloc(m_e1->md(), &alloc_length, &address, + UCT_MD_MEM_ACCESS_ALL, "test_mm", &memh); + ASSERT_UCS_OK(status); + + test_memh(address, memh, size); + + status = uct_md_mem_free(m_e1->md(), memh); + ASSERT_UCS_OK(status); +} + +UCS_TEST_SKIP_COND_P(test_uct_mm, reg, + !check_md_caps(UCT_MD_FLAG_REG)) { + + size_t size = ucs_min(100000u, m_e1->md_attr().cap.max_reg); + ucs_status_t status; + + std::vector buffer(size); + + uct_mem_h memh; + status = uct_md_mem_reg(m_e1->md(), &buffer[0], size, UCT_MD_MEM_ACCESS_ALL, + &memh); + ASSERT_UCS_OK(status); + + test_memh(&buffer[0], memh, size); + + status = uct_md_mem_dereg(m_e1->md(), memh); + ASSERT_UCS_OK(status); +} + +_UCT_INSTANTIATE_TEST_CASE(test_uct_mm, posix) +_UCT_INSTANTIATE_TEST_CASE(test_uct_mm, sysv) +_UCT_INSTANTIATE_TEST_CASE(test_uct_mm, xpmem) diff --git a/test/gtest/uct/test_p2p_am.cc b/test/gtest/uct/test_p2p_am.cc new file mode 100644 index 0000000..a855989 --- /dev/null +++ b/test/gtest/uct/test_p2p_am.cc @@ -0,0 +1,706 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "uct_p2p_test.h" + +#include +#include + +class uct_p2p_am_test : public uct_p2p_test +{ +public: + static const uint8_t AM_ID = 11; + static const uint8_t AM_ID_RESP = 12; + static const uint64_t SEED1 = 0xa1a1a1a1a1a1a1a1ul; + static const uint64_t SEED2 = 0xa2a2a2a2a2a2a2a2ul; + static const uint64_t MAGIC_DESC = 0xdeadbeef12345678ul; + static const uint64_t MAGIC_ALLOC = 0xbaadf00d12345678ul; + + typedef struct { + uint64_t magic; + unsigned length; + /* data follows */ + } receive_desc_t; + + typedef struct { + unsigned count; + } tracer_ctx_t; + + uct_p2p_am_test() : + uct_p2p_test(sizeof(receive_desc_t)), m_am_count(0), m_am_posted(0), + m_keep_data(false) + { + m_pending_req.sendbuf = NULL; + m_pending_req.test = NULL; + m_pending_req.posted = false; + memset(&m_pending_req.uct, 0, sizeof(m_pending_req.uct)); + + m_send_tracer.count = 0; + m_recv_tracer.count = 0; + pthread_mutex_init(&m_lock, NULL); + } + + virtual void init() { + uct_p2p_test::init(); + m_am_count = 0; + uct_iface_set_am_tracer(sender().iface(), am_tracer, &m_send_tracer); + if (&sender() != &receiver()) { + uct_iface_set_am_tracer(receiver().iface(), am_tracer, &m_recv_tracer); + } + } + + virtual void cleanup() { + uct_iface_set_am_tracer(receiver().iface(), NULL, NULL); + uct_iface_set_am_tracer(sender().iface(), NULL, NULL); + uct_p2p_test::cleanup(); + } + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags) { + uct_p2p_am_test *self = reinterpret_cast(arg); + return self->am_handler(data, length, flags); + } + + static ucs_status_t resp_progress(uct_pending_req_t *req) + { + test_req_t *resp_req = ucs_container_of(req, test_req_t, uct); + uct_p2p_am_test *test = resp_req->test; + mapped_buffer dummy_bufer(0, 0, test->receiver()); + ucs_status_t status; + + uint64_t hdr = *(uint64_t*)resp_req->sendbuf->ptr(); + status = uct_ep_am_short(test->receiver().ep(0), AM_ID_RESP, hdr, + (char*)resp_req->sendbuf->ptr() + sizeof(hdr), + resp_req->sendbuf->length() - sizeof(hdr)); + if (status == UCS_OK) { + pthread_mutex_lock(&test->m_lock); + ++test->m_am_posted; + resp_req->posted = true; + delete resp_req->sendbuf; + pthread_mutex_unlock(&test->m_lock); + } + return status; + } + + static ucs_status_t am_handler_resp(void *arg, void *data, size_t length, + unsigned flags) { + uct_p2p_am_test *self = reinterpret_cast(arg); + + ucs_assert(self->receiver().iface_attr().cap.flags & + UCT_IFACE_FLAG_AM_SHORT); + + ucs_status_t ret = self->am_handler(data, length, flags); + + pthread_mutex_lock(&self->m_lock); + + self->m_pending_req.uct.func = resp_progress; + self->m_pending_req.sendbuf = new mapped_buffer(sizeof(SEED1), SEED1, + self->receiver()); + self->m_pending_req.test = self; + + ucs_status_t status; + do { + status = uct_ep_am_short(self->receiver().ep(0), AM_ID_RESP, SEED1, + NULL, 0); + self->m_am_posted += (status == UCS_OK) ? 1 : 0; + } while (status == UCS_OK); + + EXPECT_EQ(UCS_ERR_NO_RESOURCE, status); + status = uct_ep_pending_add(self->receiver().ep(0), + &self->m_pending_req.uct, + UCT_CB_FLAG_ASYNC); + EXPECT_EQ(UCS_OK, status); + + pthread_mutex_unlock(&self->m_lock); + return ret; + } + + static void am_tracer(void *arg, uct_am_trace_type_t type, uint8_t id, + const void *data, size_t length, char *buffer, + size_t max) + { + tracer_ctx_t *ctx = (tracer_ctx_t *)arg; + + EXPECT_EQ(uint8_t(AM_ID), id); + mem_buffer::pattern_check(data, length, SEED1); + *buffer = '\0'; + ++ctx->count; + } + + ucs_status_t am_handler(void *data, size_t length, unsigned flags) { + pthread_mutex_lock(&m_lock); + ++m_am_count; + pthread_mutex_unlock(&m_lock); + + if (m_keep_data) { + receive_desc_t *my_desc; + if (flags & UCT_CB_PARAM_FLAG_DESC) { + my_desc = (receive_desc_t *)data - 1; + my_desc->magic = MAGIC_DESC; + } else { + my_desc = (receive_desc_t *)ucs_malloc(sizeof(*my_desc) + length, + "TODO: remove allocation"); + memcpy(my_desc + 1, data, length); + my_desc->magic = MAGIC_ALLOC; + } + my_desc->length = length; + pthread_mutex_lock(&m_lock); + m_backlog.push_back(my_desc); + pthread_mutex_unlock(&m_lock); + return (my_desc->magic == MAGIC_DESC) ? UCS_INPROGRESS : UCS_OK; + } + mem_buffer::pattern_check(data, length, SEED1); + return UCS_OK; + } + + void check_backlog() { + pthread_mutex_lock(&m_lock); + while (!m_backlog.empty()) { + receive_desc_t *my_desc = m_backlog.back(); + m_backlog.pop_back(); + mem_buffer::pattern_check(my_desc + 1, my_desc->length, SEED1); + pthread_mutex_unlock(&m_lock); + if (my_desc->magic == MAGIC_DESC) { + uct_iface_release_desc(my_desc); + } else { + EXPECT_EQ(uint64_t(MAGIC_ALLOC), my_desc->magic); + ucs_free(my_desc); + } + pthread_mutex_lock(&m_lock); + } + pthread_mutex_unlock(&m_lock); + } + + ucs_status_t am_short(uct_ep_h ep, const mapped_buffer& sendbuf, + const mapped_buffer& recvbuf) + { + uint64_t hdr = *(uint64_t*)sendbuf.ptr(); + return uct_ep_am_short(ep, AM_ID, hdr, (char*)sendbuf.ptr() + sizeof(hdr), + sendbuf.length() - sizeof(hdr)); + } + + ucs_status_t am_bcopy(uct_ep_h ep, const mapped_buffer& sendbuf, + const mapped_buffer& recvbuf) + { + ssize_t packed_len; + packed_len = uct_ep_am_bcopy(ep, AM_ID, mapped_buffer::pack, + (void*)&sendbuf, 0); + if (packed_len >= 0) { + EXPECT_EQ(sendbuf.length(), (size_t)packed_len); + return UCS_OK; + } else { + return (ucs_status_t)packed_len; + } + } + + ucs_status_t am_zcopy(uct_ep_h ep, const mapped_buffer& sendbuf, + const mapped_buffer& recvbuf) + { + size_t max_hdr = ucs_min(sender().iface_attr().cap.am.max_hdr, + sendbuf.length()); + size_t hdr_size = ucs::rand() % (max_hdr + 1); + + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, ((char*)sendbuf.ptr() + hdr_size), + (sendbuf.length() - hdr_size), sendbuf.memh(), + sender().iface_attr().cap.am.max_iov); + + return uct_ep_am_zcopy(ep, + AM_ID, + sendbuf.ptr(), + hdr_size, + iov, + iovcnt, + 0, + comp()); + } + + void test_xfer_do(send_func_t send, size_t length, unsigned flags, + uint32_t am_mode, ucs_memory_type_t mem_type) + { + ucs_status_t status; + + m_am_count = 0; + m_send_tracer.count = 0; + m_recv_tracer.count = 0; + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, am_handler, + this, am_mode); + ASSERT_UCS_OK(status); + + mapped_buffer sendbuf(length, SEED1, sender(), 0, mem_type); + mapped_buffer recvbuf(0, 0, sender(), 0, mem_type); /* dummy */ + + blocking_send(send, sender_ep(), sendbuf, recvbuf, true); + sendbuf.pattern_fill(SEED2); + + while (m_am_count == 0) { + short_progress_loop(); + } + + if (!(receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_DUP)) { + flush(); + EXPECT_EQ(1u, m_am_count); + } else { + EXPECT_GE(m_am_count, 1u); + } + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, NULL, NULL, + am_mode); + ASSERT_UCS_OK(status); + + check_backlog(); + + if (ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { + if (&sender() == &receiver()) { + EXPECT_UD_CHECK(2u, m_send_tracer.count, LE, EQ); + } else { + EXPECT_UD_CHECK(1u, m_send_tracer.count, LE, EQ); + EXPECT_UD_CHECK(1u, m_recv_tracer.count, LE, EQ); + } + } + } + + virtual void test_xfer(send_func_t send, size_t length, unsigned flags, + ucs_memory_type_t mem_type) { + + if (receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_CB_SYNC) { + test_xfer_do(send, length, flags, 0, mem_type); + } + if (receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_CB_ASYNC) { + test_xfer_do(send, length, flags, UCT_CB_FLAG_ASYNC, mem_type); + } + } + + void set_keep_data(bool keep) { + m_keep_data = keep; + } + + void am_sync_finish(unsigned prev_am_count) { + /* am message handler must be only invoked from progress */ + twait(500); + EXPECT_EQ(prev_am_count, m_am_count); + wait_for_value(&m_am_count, prev_am_count + 1, true); + EXPECT_EQ(prev_am_count+1, m_am_count); + } + + void am_async_finish(unsigned prev_am_count) { + /* am message handler must be only invoked within reasonable time if + * progress is not called */ + wait_for_value(&m_am_count, prev_am_count + 1, false); + EXPECT_EQ(prev_am_count+1, m_am_count); + } + +protected: + inline size_t backlog_size() const { + return m_backlog.size(); + } + +protected: + unsigned m_am_count; + unsigned m_am_posted; + + struct test_req_t { + uct_pending_req_t uct; + mapped_buffer *sendbuf; + uct_p2p_am_test *test; + bool posted; + } m_pending_req; + pthread_mutex_t m_lock; + +private: + bool m_keep_data; + std::vector m_backlog; + tracer_ctx_t m_send_tracer; + tracer_ctx_t m_recv_tracer; +}; + +UCS_TEST_SKIP_COND_P(uct_p2p_am_test, am_sync, + ((UCT_DEVICE_TYPE_SELF == GetParam()->dev_type) || + !check_caps(UCT_IFACE_FLAG_CB_SYNC, + UCT_IFACE_FLAG_AM_DUP))) { + + ucs_status_t status; + mapped_buffer recvbuf(0, 0, sender()); /* dummy */ + unsigned am_count = m_am_count = 0; + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, am_handler, + this, 0); + ASSERT_UCS_OK(status); + + if (receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_SHORT) { + mapped_buffer sendbuf_short(sender().iface_attr().cap.am.max_short, + SEED1, sender()); + am_count = m_am_count; + blocking_send(static_cast(&uct_p2p_am_test::am_short), + sender_ep(), sendbuf_short, recvbuf, false); + am_sync_finish(am_count); + } + + if (receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_BCOPY) { + mapped_buffer sendbuf_bcopy(sender().iface_attr().cap.am.max_bcopy, + SEED1, sender()); + am_count = m_am_count; + blocking_send(static_cast(&uct_p2p_am_test::am_bcopy), + sender_ep(), sendbuf_bcopy, recvbuf, false); + am_sync_finish(am_count); + } + + if (receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_ZCOPY) { + mapped_buffer sendbuf_zcopy(sender().iface_attr().cap.am.max_zcopy, + SEED1, sender()); + am_count = m_am_count; + blocking_send(static_cast(&uct_p2p_am_test::am_zcopy), + sender_ep(), sendbuf_zcopy, recvbuf, false); + am_sync_finish(am_count); + } + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, NULL, NULL, 0); + ASSERT_UCS_OK(status); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_am_test, am_async, + !check_caps(UCT_IFACE_FLAG_CB_ASYNC, + UCT_IFACE_FLAG_AM_DUP)) { + ucs_status_t status; + + mapped_buffer recvbuf(0, 0, sender()); /* dummy */ + unsigned am_count = m_am_count = 0; + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, am_handler, + this, UCT_CB_FLAG_ASYNC); + ASSERT_UCS_OK(status); + + if (receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_SHORT) { + mapped_buffer sendbuf_short(sender().iface_attr().cap.am.max_short, + SEED1, sender()); + am_count = m_am_count; + blocking_send(static_cast(&uct_p2p_am_test::am_short), + sender_ep(), sendbuf_short, recvbuf, false); + am_async_finish(am_count); + } + + if (receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_BCOPY) { + mapped_buffer sendbuf_bcopy(sender().iface_attr().cap.am.max_bcopy, + SEED1, sender()); + am_count = m_am_count; + blocking_send(static_cast(&uct_p2p_am_test::am_bcopy), + sender_ep(), sendbuf_bcopy, recvbuf, false); + am_async_finish(am_count); + } + + if (receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_ZCOPY) { + mapped_buffer sendbuf_zcopy(sender().iface_attr().cap.am.max_zcopy, + SEED1, sender()); + am_count = m_am_count; + blocking_send(static_cast(&uct_p2p_am_test::am_zcopy), + sender_ep(), sendbuf_zcopy, recvbuf, false); + am_async_finish(am_count); + } + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, NULL, NULL, + UCT_CB_FLAG_ASYNC); + ASSERT_UCS_OK(status); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_am_test, am_async_response, + !check_caps(UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_CB_ASYNC, + UCT_IFACE_FLAG_AM_DUP)) { + ucs_status_t status; + mapped_buffer recvbuf(0, 0, sender()); /* dummy */ + + m_am_posted = m_am_count = 0; + m_pending_req.posted = false; + + status = uct_iface_set_am_handler(sender().iface(), AM_ID_RESP, am_handler, + this, 0); + ASSERT_UCS_OK(status); + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, + am_handler_resp, this, UCT_CB_FLAG_ASYNC); + ASSERT_UCS_OK(status); + + if (receiver().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_SHORT) { + mapped_buffer sendbuf_short(sender().iface_attr().cap.am.max_short, + SEED1, sender()); + + const double timeout = 60. * ucs::test_time_multiplier(); + ucs_time_t deadline = ucs_get_time() + ucs_time_from_sec(timeout); + do { + sender().progress(); + status = am_short(sender_ep(), sendbuf_short, recvbuf); + } while ((status == UCS_ERR_NO_RESOURCE) && (ucs_get_time() < deadline)); + EXPECT_EQ(UCS_OK, status); + ++m_am_posted; + + deadline = ucs_get_time() + ucs_time_from_sec(timeout); + pthread_mutex_lock(&m_lock); + while ((!m_pending_req.posted || (m_am_count != m_am_posted)) && + (ucs_get_time() < deadline)) { + pthread_mutex_unlock(&m_lock); + sender().progress(); + pthread_mutex_lock(&m_lock); + } + UCS_TEST_MESSAGE << "posted: " << m_am_posted << " am_count: " << m_am_count; + EXPECT_TRUE(m_pending_req.posted); + EXPECT_EQ(m_am_posted, m_am_count); + pthread_mutex_unlock(&m_lock); + } + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, NULL, NULL, 0); + ASSERT_UCS_OK(status); +} + +class uct_p2p_am_misc : public uct_p2p_am_test +{ +public: + static const unsigned RX_MAX_BUFS; + static const unsigned RX_QUEUE_LEN; + + uct_p2p_am_misc() : uct_p2p_am_test() { + ucs_status_t status_ib_bufs, status_ib_qlen, status_bufs; + m_rx_buf_limit_failed = false; + status_ib_bufs = uct_config_modify(m_iface_config, "IB_RX_MAX_BUFS", + ucs::to_string(RX_MAX_BUFS).c_str()); + status_ib_qlen = uct_config_modify(m_iface_config, "IB_RX_QUEUE_LEN", + ucs::to_string(RX_QUEUE_LEN).c_str()); + status_bufs = uct_config_modify(m_iface_config, "RX_MAX_BUFS", + ucs::to_string(RX_MAX_BUFS).c_str()); + if ((status_ib_bufs != UCS_OK) && (status_ib_qlen != UCS_OK) && + (status_bufs != UCS_OK)) { + /* none of the above environment parameters were set successfully + * (for UCTs that don't have them) */ + m_rx_buf_limit_failed = true; + } + } + + ucs_status_t send_with_timeout(uct_ep_h ep, const mapped_buffer& sendbuf, + const mapped_buffer& recvbuf, double timeout) + { + ucs_time_t loop_end_limit; + ucs_status_t status = UCS_ERR_NO_RESOURCE; + + loop_end_limit = ucs_get_time() + ucs_time_from_sec(timeout); + + while ((ucs_get_time() < loop_end_limit) && (status != UCS_OK) && + (backlog_size() < 1000000)) { + status = am_short(sender_ep(), sendbuf, recvbuf); + progress(); + } + return status; + } + + static ucs_log_func_rc_t + no_rx_buffs_log_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + /* Ignore warnings about empty memory pool */ + if ((level == UCS_LOG_LEVEL_WARN) && + !strcmp(function, UCS_PP_QUOTE(uct_iface_mpool_empty_warn))) + { + UCS_TEST_MESSAGE << file << ":" << line << ": " + << format_message(message, ap); + return UCS_LOG_FUNC_RC_STOP; + } + + return UCS_LOG_FUNC_RC_CONTINUE; + } + + bool m_rx_buf_limit_failed; +}; + +UCS_TEST_SKIP_COND_P(uct_p2p_am_test, am_bcopy, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY, + UCT_IFACE_FLAG_AM_DUP)) { + test_xfer_multi(static_cast(&uct_p2p_am_test::am_bcopy), + 0ul, + sender().iface_attr().cap.am.max_bcopy, + TEST_UCT_FLAG_DIR_SEND_TO_RECV); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_am_test, am_short_keep_data, + !check_caps(UCT_IFACE_FLAG_AM_SHORT, + UCT_IFACE_FLAG_AM_DUP)) { + set_keep_data(true); + test_xfer_multi(static_cast(&uct_p2p_am_test::am_short), + sizeof(uint64_t), + sender().iface_attr().cap.am.max_short, + TEST_UCT_FLAG_DIR_SEND_TO_RECV); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_am_test, am_bcopy_keep_data, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY, + UCT_IFACE_FLAG_AM_DUP)) { + set_keep_data(true); + test_xfer_multi(static_cast(&uct_p2p_am_test::am_bcopy), + sizeof(uint64_t), + sender().iface_attr().cap.am.max_bcopy, + TEST_UCT_FLAG_DIR_SEND_TO_RECV); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_am_test, am_zcopy, + !check_caps(UCT_IFACE_FLAG_AM_ZCOPY, + UCT_IFACE_FLAG_AM_DUP)) { + test_xfer_multi(static_cast(&uct_p2p_am_test::am_zcopy), + 0ul, + sender().iface_attr().cap.am.max_zcopy, + TEST_UCT_FLAG_DIR_SEND_TO_RECV); +} + +UCT_INSTANTIATE_TEST_CASE(uct_p2p_am_test) + +const unsigned uct_p2p_am_misc::RX_MAX_BUFS = 1024; /* due to hard coded 'grow' + parameter in uct_ib_iface_recv_mpool_init */ +const unsigned uct_p2p_am_misc::RX_QUEUE_LEN = 64; + +UCS_TEST_SKIP_COND_P(uct_p2p_am_misc, no_rx_buffs, + (RUNNING_ON_VALGRIND || m_rx_buf_limit_failed || + !check_caps(UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_CB_SYNC))) +{ + mapped_buffer sendbuf(ucs_min(sender().iface_attr().cap.am.max_short, + 10 * sizeof(uint64_t)), + SEED1, sender()); + mapped_buffer recvbuf(0, 0, sender()); /* dummy */ + ucs_status_t status; + + if (&sender() == &receiver()) { + UCS_TEST_SKIP_R("skipping on loopback"); + } + + /* set a callback for the uct to invoke for receiving the data */ + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, am_handler, + (void*)this, 0); + ASSERT_UCS_OK(status); + + /* send many messages and progress the receiver. the receiver will keep getting + * UCS_INPROGRESS from the recv-handler and will keep consuming its rx memory pool. + * the goal is to make the receiver's rx memory pool run out. + * once this happens, the sender shouldn't be able to send */ + ucs_log_push_handler(no_rx_buffs_log_handler); + set_keep_data(true); + status = send_with_timeout(sender_ep(), sendbuf, recvbuf, 1); + while (status != UCS_ERR_NO_RESOURCE) { + ASSERT_UCS_OK(status); + status = send_with_timeout(sender_ep(), sendbuf, recvbuf, 3); + } + set_keep_data(false); + check_backlog(); + short_progress_loop(); + ucs_log_pop_handler(); + + /* check that now the sender is able to send */ + EXPECT_EQ(UCS_OK, send_with_timeout(sender_ep(), sendbuf, recvbuf, 6)); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_am_misc, am_max_short_multi, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + ucs_status_t status; + + m_am_count = 0; + set_keep_data(false); + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, am_handler, + this, UCT_CB_FLAG_ASYNC); + ASSERT_UCS_OK(status); + + size_t size = ucs_min(sender().iface_attr().cap.am.max_short, 8192ul); + std::string sendbuf(size, 0); + mem_buffer::pattern_fill(&sendbuf[0], sendbuf.size(), SEED1); + ucs_assert(SEED1 == *(uint64_t*)&sendbuf[0]); + + /* exhaust all resources or time out 1sec */ + ucs_time_t loop_end_limit = ucs_get_time() + ucs_time_from_sec(1.0); + do { + status = uct_ep_am_short(sender_ep(), AM_ID, SEED1, + ((uint64_t*)&sendbuf[0]) + 1, + sendbuf.size() - sizeof(uint64_t)); + } while ((ucs_get_time() < loop_end_limit) && (status == UCS_OK)); + if (status != UCS_ERR_NO_RESOURCE) { + ASSERT_UCS_OK(status); + } + + /* should be able to send again after a while */ + ucs_time_t deadline = ucs_get_time() + + (ucs::test_time_multiplier() * + ucs_time_from_sec(DEFAULT_TIMEOUT_SEC)); + do { + progress(); + status = uct_ep_am_short(sender_ep(), AM_ID, SEED1, NULL, 0); + } while ((status == UCS_ERR_NO_RESOURCE) && (ucs_get_time() < deadline)); + EXPECT_EQ(UCS_OK, status); +} + +UCT_INSTANTIATE_TEST_CASE(uct_p2p_am_misc) + +class uct_p2p_am_tx_bufs : public uct_p2p_am_test +{ +public: + uct_p2p_am_tx_bufs() : uct_p2p_am_test() { + std::string cfg_prefix = ""; + ucs_status_t status1, status2; + + /* can not reduce mpool size below retransmission window for ud */ + if (has_ud()) { + m_inited = false; + return; + } + + if (has_ib()) { + cfg_prefix = "IB_"; + } + + status1 = uct_config_modify(m_iface_config, + (cfg_prefix + "TX_MAX_BUFS").c_str() , "32"); + status2 = uct_config_modify(m_iface_config, + (cfg_prefix + "TX_BUFS_GROW").c_str(), "32"); + if ((status1 != UCS_OK) || (status2 != UCS_OK)) { + m_inited = false; + } else { + m_inited = true; + } + } + bool m_inited; +}; + +UCS_TEST_P(uct_p2p_am_tx_bufs, am_tx_max_bufs) { + ucs_status_t status; + mapped_buffer recvbuf(0, 0, sender()); /* dummy */ + mapped_buffer sendbuf_bcopy(sender().iface_attr().cap.am.max_bcopy, + SEED1, sender()); + + status = uct_iface_set_am_handler(receiver().iface(), AM_ID, am_handler, + this, UCT_CB_FLAG_ASYNC); + ASSERT_UCS_OK(status); + /* skip on cm, ud */ + if (!m_inited) { + UCS_TEST_SKIP_R("Test does not apply to the current transport"); + } + if (has_transport("cm")) { + UCS_TEST_SKIP_R("Test does not work with IB CM transport"); + } + if (has_rc()) { + UCS_TEST_SKIP_R("Test does not work with IB RC transports"); + } + do { + status = am_bcopy(sender_ep(), sendbuf_bcopy, recvbuf); + } while (status == UCS_OK); + + /* short progress shall release tx buffers and + * the next message shall go out */ + ucs_time_t loop_end_limit = ucs_get_time() + ucs_time_from_sec(1.0); + do { + progress(); + status = am_bcopy(sender_ep(), sendbuf_bcopy, recvbuf); + if (status == UCS_OK) { + break; + } + } while (ucs_get_time() < loop_end_limit); + + EXPECT_EQ(UCS_OK, status); +} + +UCT_INSTANTIATE_TEST_CASE(uct_p2p_am_tx_bufs) diff --git a/test/gtest/uct/test_p2p_err.cc b/test/gtest/uct/test_p2p_err.cc new file mode 100644 index 0000000..fb8bc59 --- /dev/null +++ b/test/gtest/uct/test_p2p_err.cc @@ -0,0 +1,300 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "uct_p2p_test.h" + +#include + +class uct_p2p_err_test : public uct_p2p_test { +public: + + enum operation { + OP_PUT_SHORT, + OP_PUT_BCOPY, + OP_PUT_ZCOPY, + OP_AM_SHORT, + OP_AM_BCOPY, + OP_AM_ZCOPY + }; + + struct pack_arg { + void *buffer; + size_t length; + }; + + uct_p2p_err_test() : + uct_p2p_test(0, error_handler) { + } + + static size_t pack_cb(void *dest, void *arg) + { + pack_arg *pa = (pack_arg*)arg; + memcpy(dest, pa->buffer, pa->length); + return pa->length; + } + + void test_error_run(enum operation op, uint8_t am_id, + void *buffer, size_t length, uct_mem_h memh, + uint64_t remote_addr, uct_rkey_t rkey, + const std::string& error_pattern) + { + pack_arg arg; + + scoped_log_handler slh(wrap_errors_logger); + + ucs_status_t status = UCS_OK; + ssize_t packed_len; + do { + switch (op) { + case OP_PUT_SHORT: + status = uct_ep_put_short(sender_ep(), buffer, length, + remote_addr, rkey); + break; + case OP_PUT_BCOPY: + arg.buffer = buffer; + arg.length = length; + packed_len = uct_ep_put_bcopy(sender_ep(), pack_cb, &arg, remote_addr, + rkey); + status = (packed_len >= 0) ? UCS_OK : (ucs_status_t)packed_len; + break; + case OP_PUT_ZCOPY: + { + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, buffer, length, memh, + sender().iface_attr().cap.put.max_iov); + status = uct_ep_put_zcopy(sender_ep(), iov, iovcnt, + remote_addr, rkey, NULL); + } + break; + case OP_AM_SHORT: + status = uct_ep_am_short(sender_ep(), am_id, 0, buffer, length); + break; + case OP_AM_BCOPY: + arg.buffer = buffer; + arg.length = length; + packed_len = uct_ep_am_bcopy(sender_ep(), am_id, pack_cb, &arg, 0); + status = (packed_len >= 0) ? UCS_OK : (ucs_status_t)packed_len; + break; + case OP_AM_ZCOPY: + { + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, buffer, 1, memh, 1); + status = uct_ep_am_zcopy(sender_ep(), am_id, buffer, length, + iov, iovcnt, 0, NULL); + } + break; + } + + progress(); + } while (status == UCS_ERR_NO_RESOURCE); + + if (status != UCS_OK && status != UCS_INPROGRESS) { + last_error = status; + } else { + /* Flush async events */ + wait_for_remote(); + ucs::safe_usleep(1e4); + } + + /* Count how many error messages match/don't match the given pattern */ + size_t num_matched = 0; + size_t num_unmatched = 0; + for (std::vector::iterator iter = m_errors.begin(); + iter != m_errors.end(); ++iter) { + if (iter->find(error_pattern) != iter->npos) { + ++num_matched; + } else { + ++num_unmatched; + } + } + + EXPECT_GT(num_matched, 0ul) << + "No error which contains the string '" << error_pattern << + "' has occurred during the test"; + EXPECT_EQ(0ul, num_unmatched) << + "Unexpected error(s) occurred during the test"; + } + + static void* get_unused_address(size_t length) + { + void *address = NULL; + ucs_status_t status = ucs_mmap_alloc(&length, &address, 0 + UCS_MEMTRACK_NAME("test_dummy")); + ASSERT_UCS_OK(status, << "length = " << length); + status = ucs_mmap_free(address, length); + ASSERT_UCS_OK(status); + /* coverity[use_after_free] */ + return address; + } + + static ucs_status_t last_error; + +private: + static ucs_status_t + error_handler(void *arg, uct_ep_h ep, ucs_status_t status) { + uct_p2p_err_test *self = static_cast(arg); + const p2p_resource *r = dynamic_cast(self->GetParam()); + ucs_assert_always(r != NULL); + if (r->loopback) { + /* In loop back IB TLs can generate QP flush error before remote + * access error. */ + ucs_log(UCS_LOG_LEVEL_ERROR, "Error on ep %p with status %s is handled", + ep, ucs_status_string(status)); + } + return UCS_OK; + } +}; + +ucs_status_t uct_p2p_err_test::last_error = UCS_OK; + + +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, local_access_error, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_ERRHANDLE_ZCOPY_BUF)) { + mapped_buffer sendbuf(16, 1, sender()); + mapped_buffer recvbuf(16, 2, receiver()); + + test_error_run(OP_PUT_ZCOPY, 0, + get_unused_address(sendbuf.length()), sendbuf.length(), + sendbuf.memh(), recvbuf.addr(), recvbuf.rkey(), + ""); + + recvbuf.pattern_check(2); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, remote_access_error, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY | + UCT_IFACE_FLAG_ERRHANDLE_REMOTE_MEM)) { + mapped_buffer sendbuf(16, 1, sender()); + mapped_buffer recvbuf(16, 2, receiver()); + + test_error_run(OP_PUT_ZCOPY, 0, + sendbuf.ptr(), sendbuf.length(), sendbuf.memh(), + (uintptr_t)get_unused_address(recvbuf.length()), recvbuf.rkey(), + ""); + + recvbuf.pattern_check(2); +} + +#if ENABLE_PARAMS_CHECK +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, invalid_put_short_length, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + size_t max_short = sender().iface_attr().cap.put.max_short; + if (max_short > (2 * UCS_MBYTE)) { + UCS_TEST_SKIP_R("max_short too large"); + } + + mapped_buffer sendbuf(max_short + 1, 1, sender()); + mapped_buffer recvbuf(max_short + 1, 2, receiver()); + + test_error_run(OP_PUT_SHORT, 0, sendbuf.ptr(), sendbuf.length(), + UCT_MEM_HANDLE_NULL, recvbuf.addr(), recvbuf.rkey(), + "length"); + + recvbuf.pattern_check(2); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, invalid_put_bcopy_length, + !check_caps(UCT_IFACE_FLAG_PUT_BCOPY | + UCT_IFACE_FLAG_ERRHANDLE_BCOPY_LEN)) { + size_t max_bcopy = sender().iface_attr().cap.put.max_bcopy; + if (max_bcopy > (2 * UCS_MBYTE)) { + UCS_TEST_SKIP_R("max_bcopy too large"); + } + + mapped_buffer sendbuf(max_bcopy + 1, 1, sender()); + mapped_buffer recvbuf(max_bcopy + 1, 2, receiver()); + + test_error_run(OP_PUT_BCOPY, 0, sendbuf.ptr(), sendbuf.length(), + UCT_MEM_HANDLE_NULL, recvbuf.addr(), recvbuf.rkey(), + "length"); + + recvbuf.pattern_check(2); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, invalid_am_short_length, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + size_t max_short = sender().iface_attr().cap.am.max_short; + if (max_short > (2 * UCS_MBYTE)) { + UCS_TEST_SKIP_R("max_short too large"); + } + + mapped_buffer sendbuf(max_short + 1 - sizeof(uint64_t), 1, sender()); + mapped_buffer recvbuf(max_short + 1, 2, receiver()); + + test_error_run(OP_AM_SHORT, 0, sendbuf.ptr(), sendbuf.length(), + UCT_MEM_HANDLE_NULL, recvbuf.addr(), recvbuf.rkey(), + "length"); + + recvbuf.pattern_check(2); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, invalid_am_bcopy_length, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_ERRHANDLE_BCOPY_LEN)) { + size_t max_bcopy = sender().iface_attr().cap.am.max_bcopy; + if (max_bcopy > (2 * UCS_MBYTE)) { + UCS_TEST_SKIP_R("max_bcopy too large"); + } + + mapped_buffer sendbuf(max_bcopy + 1, 1, sender()); + mapped_buffer recvbuf(max_bcopy + 1, 2, receiver()); + + test_error_run(OP_AM_BCOPY, 0, sendbuf.ptr(), sendbuf.length(), + UCT_MEM_HANDLE_NULL, recvbuf.addr(), recvbuf.rkey(), + "length"); + + recvbuf.pattern_check(2); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, invalid_am_zcopy_hdr_length, + !check_caps(UCT_IFACE_FLAG_AM_ZCOPY)) { + size_t max_hdr = sender().iface_attr().cap.am.max_hdr; + if (max_hdr > (2 * UCS_MBYTE)) { + UCS_TEST_SKIP_R("max_hdr too large"); + } + if (max_hdr + 2 > sender().iface_attr().cap.am.max_bcopy) { + UCS_TEST_SKIP_R("max_hdr + 2 exceeds maximal bcopy size"); + } + + /* Send header of (max_hdr+1) and payload length 1 */ + mapped_buffer sendbuf(max_hdr + 1, 1, sender()); + mapped_buffer recvbuf(max_hdr + 2, 2, receiver()); + + test_error_run(OP_AM_ZCOPY, 0, sendbuf.ptr(), sendbuf.length(), + sendbuf.memh(), recvbuf.addr(), recvbuf.rkey(), + "length"); + + recvbuf.pattern_check(2); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, short_invalid_am_id, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) { + mapped_buffer sendbuf(4, 2, sender()); + + test_error_run(OP_AM_SHORT, UCT_AM_ID_MAX, sendbuf.ptr(), sendbuf.length(), + UCT_MEM_HANDLE_NULL, 0, UCT_INVALID_RKEY, + "active message id"); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, bcopy_invalid_am_id, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) { + mapped_buffer sendbuf(4, 2, sender()); + + test_error_run(OP_AM_BCOPY, UCT_AM_ID_MAX, sendbuf.ptr(), sendbuf.length(), + UCT_MEM_HANDLE_NULL, 0, UCT_INVALID_RKEY, + "active message id"); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_err_test, zcopy_invalid_am_id, + !check_caps(UCT_IFACE_FLAG_AM_ZCOPY)) { + mapped_buffer sendbuf(4, 2, sender()); + + test_error_run(OP_AM_ZCOPY, UCT_AM_ID_MAX, sendbuf.ptr(), sendbuf.length(), + UCT_MEM_HANDLE_NULL, 0, UCT_INVALID_RKEY, + "active message id"); +} +#endif + +UCT_INSTANTIATE_TEST_CASE(uct_p2p_err_test) diff --git a/test/gtest/uct/test_p2p_mix.cc b/test/gtest/uct/test_p2p_mix.cc new file mode 100644 index 0000000..4eb51a7 --- /dev/null +++ b/test/gtest/uct/test_p2p_mix.cc @@ -0,0 +1,234 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_p2p_mix.h" + +extern "C" { +#include +} +#include + + +uct_p2p_mix_test::uct_p2p_mix_test() : uct_p2p_test(0), m_send_size(0) { +} + +ucs_status_t uct_p2p_mix_test::am_callback(void *arg, void *data, size_t length, + unsigned flags) +{ + ucs_atomic_sub32(&am_pending, 1); + return UCS_OK; +} + +void uct_p2p_mix_test::completion_callback(uct_completion_t *comp, ucs_status_t status) +{ + EXPECT_UCS_OK(status); +} + +template +ucs_status_t uct_p2p_mix_test::uct_p2p_mix_test::atomic_fop(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp) +{ + if (sizeof(T) == sizeof(uint32_t)) { + return uct_ep_atomic32_fetch(sender().ep(0), OP, 1, (uint32_t*)sendbuf.ptr(), + recvbuf.addr(), recvbuf.rkey(), comp); + } else { + return uct_ep_atomic64_fetch(sender().ep(0), OP, 1, (uint64_t*)sendbuf.ptr(), + recvbuf.addr(), recvbuf.rkey(), comp); + } +} + +ucs_status_t uct_p2p_mix_test::cswap64(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp) +{ + return uct_ep_atomic_cswap64(sender().ep(0), 0, 1, recvbuf.addr(), + recvbuf.rkey(), (uint64_t*)sendbuf.ptr(), + comp); +} + +ucs_status_t uct_p2p_mix_test::put_short(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp) +{ + return uct_ep_put_short(sender().ep(0), sendbuf.ptr(), + sendbuf.length(), recvbuf.addr(), + recvbuf.rkey()); +} + +ucs_status_t uct_p2p_mix_test::put_bcopy(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp) +{ + ssize_t packed_len; + packed_len = uct_ep_put_bcopy(sender().ep(0), mapped_buffer::pack, + (void*)&sendbuf, recvbuf.addr(), recvbuf.rkey()); + if (packed_len >= 0) { + EXPECT_EQ(sendbuf.length(), (size_t)packed_len); + return UCS_OK; + } else { + return (ucs_status_t)packed_len; + } +} + +ucs_status_t uct_p2p_mix_test::am_short(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp) +{ + ucs_status_t status; + status = uct_ep_am_short(sender().ep(0), AM_ID, *(uint64_t*)sendbuf.ptr(), + (uint64_t*)sendbuf.ptr() + 1, + sendbuf.length() - sizeof(uint64_t)); + if (status == UCS_OK) { + ucs_atomic_add32(&am_pending, +1); + } + return status; +} + +ucs_status_t uct_p2p_mix_test::am_zcopy(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp) +{ + ucs_status_t status; + size_t header_length; + uct_iov_t iov; + + header_length = ucs_min(ucs::rand() % sender().iface_attr().cap.am.max_hdr, + sendbuf.length()); + + iov.buffer = (char*)sendbuf.ptr() + header_length; + iov.count = 1; + iov.length = sendbuf.length() - header_length; + iov.memh = sendbuf.memh(); + status = uct_ep_am_zcopy(sender().ep(0), AM_ID, sendbuf.ptr(), header_length, + &iov, 1, 0, comp); + if (status == UCS_OK || status == UCS_INPROGRESS) { + ucs_atomic_add32(&am_pending, +1); + } + return status; +} + +void uct_p2p_mix_test::random_op(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf) +{ + uct_completion_t comp; + ucs_status_t status; + int op; + + op = ucs::rand() % m_avail_send_funcs.size(); + comp.count = 1; + comp.func = completion_callback; + + for (;;) { + status = (this->*m_avail_send_funcs[op])(sendbuf, recvbuf, &comp); + if (status == UCS_INPROGRESS) { + /* coverity[loop_condition] */ + while (comp.count > 0) { + progress(); + } + break; + } else if (status == UCS_ERR_NO_RESOURCE) { + progress(); + continue; + } else { + ASSERT_UCS_OK(status); + break; + } + } +} + +void uct_p2p_mix_test::run(unsigned count) { + if (m_avail_send_funcs.size() == 0) { + UCS_TEST_SKIP_R("unsupported"); + } + if (sender().md_attr().cap.access_mem_type != UCS_MEMORY_TYPE_HOST) { + UCS_TEST_SKIP_R("skipping on non-host memory"); + } + + mapped_buffer sendbuf(m_send_size, 0, sender()); + mapped_buffer recvbuf(m_send_size, 0, receiver()); + + for (unsigned i = 0; i < count; ++i) { + random_op(sendbuf, recvbuf); + } + + flush(); +} + +void uct_p2p_mix_test::init() { + uct_p2p_test::init(); + ucs_status_t status = uct_iface_set_am_handler(receiver().iface(), AM_ID, + am_callback, NULL, + UCT_CB_FLAG_ASYNC); + ASSERT_UCS_OK(status); + + m_send_size = MAX_SIZE; + if (sender().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_SHORT) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::am_short); + m_send_size = ucs_min(m_send_size, sender().iface_attr().cap.am.max_short); + } + if (sender().iface_attr().cap.flags & UCT_IFACE_FLAG_AM_ZCOPY) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::am_zcopy); + m_send_size = ucs_min(m_send_size, sender().iface_attr().cap.am.max_zcopy); + } + if (sender().iface_attr().cap.flags & UCT_IFACE_FLAG_PUT_SHORT) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::put_short); + m_send_size = ucs_min(m_send_size, sender().iface_attr().cap.put.max_short); + } + if (sender().iface_attr().cap.flags & UCT_IFACE_FLAG_PUT_BCOPY) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::put_bcopy); + m_send_size = ucs_min(m_send_size, sender().iface_attr().cap.put.max_bcopy); + } + if (sender().iface_attr().cap.atomic64.fop_flags & UCS_BIT(UCT_ATOMIC_OP_CSWAP)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::cswap64); + } + if (sender().iface_attr().cap.atomic64.fop_flags & UCS_BIT(UCT_ATOMIC_OP_ADD)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } + if (sender().iface_attr().cap.atomic32.fop_flags & UCS_BIT(UCT_ATOMIC_OP_ADD)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } + if (sender().iface_attr().cap.atomic64.fop_flags & UCS_BIT(UCT_ATOMIC_OP_AND)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } + if (sender().iface_attr().cap.atomic32.fop_flags & UCS_BIT(UCT_ATOMIC_OP_AND)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } + if (sender().iface_attr().cap.atomic64.fop_flags & UCS_BIT(UCT_ATOMIC_OP_OR)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } + if (sender().iface_attr().cap.atomic32.fop_flags & UCS_BIT(UCT_ATOMIC_OP_OR)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } + if (sender().iface_attr().cap.atomic64.fop_flags & UCS_BIT(UCT_ATOMIC_OP_XOR)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } + if (sender().iface_attr().cap.atomic32.fop_flags & UCS_BIT(UCT_ATOMIC_OP_XOR)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } + if (sender().iface_attr().cap.atomic64.fop_flags & UCS_BIT(UCT_ATOMIC_OP_SWAP)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } + if (sender().iface_attr().cap.atomic32.fop_flags & UCS_BIT(UCT_ATOMIC_OP_SWAP)) { + m_avail_send_funcs.push_back(&uct_p2p_mix_test::atomic_fop); + } +} + +void uct_p2p_mix_test::cleanup() { + while (am_pending) { + progress(); + } + uct_iface_set_am_handler(receiver().iface(), AM_ID, NULL, NULL, 0); + uct_p2p_test::cleanup(); +} + +uint32_t uct_p2p_mix_test::am_pending = 0; + +UCS_TEST_P(uct_p2p_mix_test, mix_10000) { + run(10000); +} + +UCT_INSTANTIATE_TEST_CASE(uct_p2p_mix_test) diff --git a/test/gtest/uct/test_p2p_mix.h b/test/gtest/uct/test_p2p_mix.h new file mode 100644 index 0000000..4588095 --- /dev/null +++ b/test/gtest/uct/test_p2p_mix.h @@ -0,0 +1,70 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_TEST_P2P_MIX_H +#define UCT_TEST_P2P_MIX_H + +#include "uct_p2p_test.h" + +class uct_p2p_mix_test : public uct_p2p_test { +public: + + typedef ucs_status_t + (uct_p2p_mix_test::* send_func_t)(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp); + + static const uint8_t AM_ID = 1; + static const size_t MAX_SIZE = 256; + + uct_p2p_mix_test(); + +protected: + static ucs_status_t am_callback(void *arg, void *data, size_t length, + unsigned flags); + + static void completion_callback(uct_completion_t *comp, ucs_status_t status); + + template + ucs_status_t atomic_fop(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp); + + ucs_status_t cswap64(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp); + + ucs_status_t put_short(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp); + + ucs_status_t put_bcopy(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp); + + ucs_status_t am_short(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp); + + ucs_status_t am_zcopy(const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + uct_completion_t *comp); + + void random_op(const mapped_buffer &sendbuf, const mapped_buffer &recvbuf); + + void run(unsigned count); + + virtual void init(); + + virtual void cleanup(); + +private: + std::vector m_avail_send_funcs; + size_t m_send_size; + static uint32_t am_pending; +}; + +#endif diff --git a/test/gtest/uct/test_p2p_rma.cc b/test/gtest/uct/test_p2p_rma.cc new file mode 100644 index 0000000..a418fff --- /dev/null +++ b/test/gtest/uct/test_p2p_rma.cc @@ -0,0 +1,136 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "test_p2p_rma.h" + +#include + + +uct_p2p_rma_test::uct_p2p_rma_test() : uct_p2p_test(0) { +} + +ucs_status_t uct_p2p_rma_test::put_short(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf) +{ + return uct_ep_put_short(ep, sendbuf.ptr(), sendbuf.length(), + recvbuf.addr(), recvbuf.rkey()); +} + +ucs_status_t uct_p2p_rma_test::put_bcopy(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf) +{ + ssize_t packed_len; + packed_len = uct_ep_put_bcopy(ep, mapped_buffer::pack, (void*)&sendbuf, + recvbuf.addr(), recvbuf.rkey()); + if (packed_len >= 0) { + EXPECT_EQ(sendbuf.length(), (size_t)packed_len); + return UCS_OK; + } else { + return (ucs_status_t)packed_len; + } +} + +ucs_status_t uct_p2p_rma_test::put_zcopy(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf) +{ + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, sendbuf.ptr(), sendbuf.length(), + sendbuf.memh(), sender().iface_attr().cap.put.max_iov); + + return uct_ep_put_zcopy(ep, iov, iovcnt, recvbuf.addr(), recvbuf.rkey(), comp()); +} + +ucs_status_t uct_p2p_rma_test::get_short(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf) +{ + return uct_ep_get_short(ep, sendbuf.ptr(), sendbuf.length(), + recvbuf.addr(), recvbuf.rkey()); +} + +ucs_status_t uct_p2p_rma_test::get_bcopy(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf) +{ + return uct_ep_get_bcopy(ep, (uct_unpack_callback_t)memcpy, sendbuf.ptr(), + sendbuf.length(), recvbuf.addr(), + recvbuf.rkey(), comp()); +} + +ucs_status_t uct_p2p_rma_test::get_zcopy(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf) +{ + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, sendbuf.ptr(), sendbuf.length(), + sendbuf.memh(), sender().iface_attr().cap.get.max_iov); + + return uct_ep_get_zcopy(ep, iov, iovcnt, recvbuf.addr(), recvbuf.rkey(), comp()); +} + +void uct_p2p_rma_test::test_xfer(send_func_t send, size_t length, + unsigned flags, ucs_memory_type_t mem_type) +{ + ucs_memory_type_t src_mem_type = UCS_MEMORY_TYPE_HOST; + + if (has_transport("cuda_ipc")) { + src_mem_type = mem_type; + } + + mapped_buffer sendbuf(length, SEED1, sender(), 1, src_mem_type); + mapped_buffer recvbuf(length, SEED2, receiver(), 3, mem_type); + + blocking_send(send, sender_ep(), sendbuf, recvbuf, true); + if (flags & TEST_UCT_FLAG_SEND_ZCOPY) { + sendbuf.pattern_fill(SEED3); + wait_for_remote(); + recvbuf.pattern_check(SEED1); + } else if (flags & TEST_UCT_FLAG_RECV_ZCOPY) { + recvbuf.pattern_fill(SEED3); + sendbuf.pattern_check(SEED2); + wait_for_remote(); + } +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test, put_short, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) { + test_xfer_multi(static_cast(&uct_p2p_rma_test::put_short), + 0ul, sender().iface_attr().cap.put.max_short, + TEST_UCT_FLAG_SEND_ZCOPY); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test, put_bcopy, + !check_caps(UCT_IFACE_FLAG_PUT_BCOPY)) { + test_xfer_multi(static_cast(&uct_p2p_rma_test::put_bcopy), + 0ul, sender().iface_attr().cap.put.max_bcopy, + TEST_UCT_FLAG_SEND_ZCOPY); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test, put_zcopy, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY)) { + test_xfer_multi(static_cast(&uct_p2p_rma_test::put_zcopy), + 0ul, sender().iface_attr().cap.put.max_zcopy, + TEST_UCT_FLAG_SEND_ZCOPY); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test, get_short, + !check_caps(UCT_IFACE_FLAG_GET_SHORT)) { + test_xfer_multi(static_cast(&uct_p2p_rma_test::get_short), + 0ul, sender().iface_attr().cap.get.max_short, + TEST_UCT_FLAG_RECV_ZCOPY); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test, get_bcopy, + !check_caps(UCT_IFACE_FLAG_GET_BCOPY)) { + test_xfer_multi(static_cast(&uct_p2p_rma_test::get_bcopy), + 1ul, sender().iface_attr().cap.get.max_bcopy, + TEST_UCT_FLAG_RECV_ZCOPY); +} + +UCS_TEST_SKIP_COND_P(uct_p2p_rma_test, get_zcopy, + !check_caps(UCT_IFACE_FLAG_GET_ZCOPY)) { + test_xfer_multi(static_cast(&uct_p2p_rma_test::get_zcopy), + ucs_max(1ull, sender().iface_attr().cap.get.min_zcopy), + sender().iface_attr().cap.get.max_zcopy, + TEST_UCT_FLAG_RECV_ZCOPY); +} + +UCT_INSTANTIATE_TEST_CASE(uct_p2p_rma_test) diff --git a/test/gtest/uct/test_p2p_rma.h b/test/gtest/uct/test_p2p_rma.h new file mode 100644 index 0000000..e9b0062 --- /dev/null +++ b/test/gtest/uct/test_p2p_rma.h @@ -0,0 +1,42 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_TEST_P2P_RMA +#define UCT_TEST_P2P_RMA + +#include "uct_p2p_test.h" + +class uct_p2p_rma_test : public uct_p2p_test { +public: + static const uint64_t SEED1 = 0x1111111111111111lu; + static const uint64_t SEED2 = 0x2222222222222222lu; + static const uint64_t SEED3 = 0x3333333333333333lu; + + uct_p2p_rma_test(); + + ucs_status_t put_short(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf); + + ucs_status_t put_bcopy(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf); + + ucs_status_t put_zcopy(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf); + + ucs_status_t get_short(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf); + + ucs_status_t get_bcopy(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf); + + ucs_status_t get_zcopy(uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf); + + virtual void test_xfer(send_func_t send, size_t length, + unsigned flags, ucs_memory_type_t mem_type); +}; + +#endif diff --git a/test/gtest/uct/test_peer_failure.cc b/test/gtest/uct/test_peer_failure.cc new file mode 100644 index 0000000..a29af98 --- /dev/null +++ b/test/gtest/uct/test_peer_failure.cc @@ -0,0 +1,485 @@ + +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2016. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016.All rights reserved. +* See file LICENSE for terms. +*/ + +extern "C" { +#include +} +#include +#include "uct_test.h" + + +class test_uct_peer_failure : public uct_test { +private: + struct am_handler_setter + { + am_handler_setter(test_uct_peer_failure *test) : m_test(test) {} + + void operator() (test_uct_peer_failure::entity *e) { + uct_iface_set_am_handler(e->iface(), 0, + am_dummy_handler, + reinterpret_cast(m_test), 0); + } + + test_uct_peer_failure* m_test; + }; + +public: + + test_uct_peer_failure() : m_sender(NULL), m_nreceivers(2), + m_tx_window(100), m_err_count(0), m_am_count(0) {} + + virtual void init(); + + inline uct_iface_params_t entity_params() { + static uct_iface_params_t params; + params.field_mask = UCT_IFACE_PARAM_FIELD_ERR_HANDLER | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS; + params.err_handler = get_err_handler(); + params.err_handler_arg = reinterpret_cast(this); + params.err_handler_flags = 0; + return params; + } + + virtual uct_error_handler_t get_err_handler() const { + return err_cb; + } + + static ucs_status_t am_dummy_handler(void *arg, void *data, size_t length, + unsigned flags) { + reinterpret_cast(arg)->m_am_count++; + return UCS_OK; + } + + static ucs_status_t pending_cb(uct_pending_req_t *self) + { + m_req_count++; + return UCS_OK; + } + + static void purge_cb(uct_pending_req_t *self, void *arg) + { + m_req_count++; + } + + static ucs_status_t err_cb(void *arg, uct_ep_h ep, ucs_status_t status) + { + EXPECT_EQ(UCS_ERR_ENDPOINT_TIMEOUT, status); + reinterpret_cast(arg)->m_err_count++; + return UCS_OK; + } + + void kill_receiver() + { + ucs_assert(!m_receivers.empty()); + m_entities.remove(m_receivers.front()); + ucs_assert(m_entities.size() == m_receivers.size()); + m_receivers.erase(m_receivers.begin()); + } + + void new_receiver() + { + uct_iface_params_t p = entity_params(); + p.field_mask |= UCT_IFACE_PARAM_FIELD_OPEN_MODE; + p.open_mode = UCT_IFACE_OPEN_MODE_DEVICE; + m_receivers.push_back(uct_test::create_entity(p)); + m_entities.push_back(m_receivers.back()); + m_sender->connect(m_receivers.size() - 1, *m_receivers.back(), 0); + + am_handler_setter(this)(m_receivers.back()); + /* Make sure that TL is up and has resources */ + send_recv_am(m_receivers.size() - 1); + } + + void set_am_handlers() + { + check_caps_skip(UCT_IFACE_FLAG_CB_SYNC); + std::for_each(m_receivers.begin(), m_receivers.end(), + am_handler_setter(this)); + } + + ucs_status_t send_am(int index) + { + ucs_status_t status; + while ((status = uct_ep_am_short(m_sender->ep(index), 0, 0, NULL, 0)) == + UCS_ERR_NO_RESOURCE) { + progress(); + }; + return status; + } + + void send_recv_am(int index, ucs_status_t exp_status = UCS_OK) + { + m_am_count = 0; + + ucs_status_t status = send_am(index); + EXPECT_EQ(exp_status, status); + + if (exp_status == UCS_OK) { + wait_for_flag(&m_am_count); + EXPECT_EQ(m_am_count, 1ul); + } + } + + uct_ep_h ep0() { + return m_sender->ep(0); + } + + ucs_status_t flush_ep(size_t index, ucs_time_t deadline = ULONG_MAX) { + uct_completion_t comp; + ucs_status_t status; + int is_time_out; + + comp.count = 2; + comp.func = NULL; + do { + progress(); + status = uct_ep_flush(m_sender->ep(index), 0, &comp); + is_time_out = (ucs_get_time() > deadline); + } while ((status == UCS_ERR_NO_RESOURCE) && !is_time_out); + + if (!is_time_out) { + ASSERT_UCS_OK_OR_INPROGRESS(status); + } + + if (status == UCS_OK) { + return UCS_OK; + } else if (is_time_out) { + return UCS_ERR_TIMED_OUT; + } + + /* coverity[loop_condition] */ + while ((comp.count == 2) && !is_time_out) { + progress(); + is_time_out = (ucs_get_time() > deadline); + } + + return (comp.count == 1) ? UCS_OK : + (is_time_out ? UCS_ERR_TIMED_OUT : UCS_ERR_OUT_OF_RANGE); + } + +protected: + entity *m_sender; + std::vector m_receivers; + size_t m_nreceivers; + size_t m_tx_window; + size_t m_err_count; + size_t m_am_count; + static size_t m_req_count; + static const uint64_t m_required_caps; +}; + +size_t test_uct_peer_failure::m_req_count = 0ul; +const uint64_t test_uct_peer_failure::m_required_caps = UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_PENDING | + UCT_IFACE_FLAG_CB_SYNC | + UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE; + +void test_uct_peer_failure::init() +{ + uct_test::init(); + + /* To reduce test execution time decrease retransmition timeouts + * where it is relevant */ + if (has_rc_or_dc()) { + set_config("RC_TIMEOUT=100us"); /* 100 us should be enough */ + set_config("RC_RETRY_COUNT=4"); + } else if (has_ud()) { + set_config("UD_TIMEOUT=3s"); + } + + uct_iface_params_t p = entity_params(); + p.field_mask |= UCT_IFACE_PARAM_FIELD_OPEN_MODE; + p.open_mode = UCT_IFACE_OPEN_MODE_DEVICE; + m_sender = uct_test::create_entity(p); + m_entities.push_back(m_sender); + + check_skip_test(); + for (size_t i = 0; i < 2; ++i) { + new_receiver(); + } + + m_err_count = 0; + m_req_count = 0; + m_am_count = 0; +} + +UCS_TEST_SKIP_COND_P(test_uct_peer_failure, peer_failure, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT | + m_required_caps)) +{ + { + scoped_log_handler slh(wrap_errors_logger); + + kill_receiver(); + EXPECT_EQ(UCS_OK, uct_ep_put_short(ep0(), NULL, 0, 0, 0)); + + flush(); + } + + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, NULL, 0, NULL, 1); + + /* Check that all ep operations return pre-defined error code */ + EXPECT_EQ(uct_ep_am_short(ep0(), 0, 0, NULL, 0), UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_am_bcopy(ep0(), 0, NULL, NULL, 0), UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_am_zcopy(ep0(), 0, NULL, 0, iov, iovcnt, 0, NULL), + UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_put_short(ep0(), NULL, 0, 0, 0), UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_put_bcopy(ep0(), NULL, NULL, 0, 0), UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_put_zcopy(ep0(), iov, iovcnt, 0, 0, NULL), + UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_get_bcopy(ep0(), NULL, NULL, 0, 0, 0, NULL), + UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_get_zcopy(ep0(), iov, iovcnt, 0, 0, NULL), + UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_atomic64_post(ep0(), UCT_ATOMIC_OP_ADD, 0, 0, 0), UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_atomic32_post(ep0(), UCT_ATOMIC_OP_ADD, 0, 0, 0), UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_atomic64_fetch(ep0(), UCT_ATOMIC_OP_ADD, 0, NULL, 0, 0, NULL), + UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_atomic32_fetch(ep0(), UCT_ATOMIC_OP_ADD, 0, NULL, 0, 0, NULL), + UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_atomic_cswap64(ep0(), 0, 0, 0, 0, NULL, NULL), + UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_atomic_cswap32(ep0(), 0, 0, 0, 0, NULL, NULL), + UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_flush(ep0(), 0, NULL), UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_get_address(ep0(), NULL), UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_pending_add(ep0(), NULL, 0), UCS_ERR_ENDPOINT_TIMEOUT); + EXPECT_EQ(uct_ep_connect_to_ep(ep0(), NULL, NULL), UCS_ERR_ENDPOINT_TIMEOUT); + + EXPECT_GT(m_err_count, 0ul); +} + +UCS_TEST_SKIP_COND_P(test_uct_peer_failure, purge_failed_peer, + !check_caps(m_required_caps)) +{ + set_am_handlers(); + + send_recv_am(0); + send_recv_am(1); + + const size_t num_pend_sends = 3ul; + uct_pending_req_t reqs[num_pend_sends]; + { + scoped_log_handler slh(wrap_errors_logger); + + kill_receiver(); + + ucs_status_t status; + do { + status = uct_ep_am_short(ep0(), 0, 0, NULL, 0); + } while (status == UCS_OK); + + for (size_t i = 0; i < num_pend_sends; i ++) { + reqs[i].func = pending_cb; + EXPECT_EQ(uct_ep_pending_add(ep0(), &reqs[i], 0), UCS_OK); + } + + flush(); + } + + EXPECT_EQ(uct_ep_am_short(ep0(), 0, 0, NULL, 0), UCS_ERR_ENDPOINT_TIMEOUT); + + uct_ep_pending_purge(ep0(), purge_cb, NULL); + EXPECT_EQ(num_pend_sends, m_req_count); + EXPECT_GE(m_err_count, 0ul); +} + +UCS_TEST_SKIP_COND_P(test_uct_peer_failure, two_pairs_send, + !check_caps(m_required_caps)) +{ + set_am_handlers(); + + /* queue sends on 1st pair */ + for (size_t i = 0; i < m_tx_window; ++i) { + send_am(0); + } + + /* kill the 1st receiver while sending on 2nd pair */ + { + scoped_log_handler slh(wrap_errors_logger); + kill_receiver(); + send_am(0); + send_recv_am(1); + flush(); + } + + /* test flushing one operations */ + send_recv_am(0, UCS_ERR_ENDPOINT_TIMEOUT); + send_recv_am(1, UCS_OK); + flush(); + + /* test flushing many operations */ + for (size_t i = 0; i < (m_tx_window * 10 / ucs::test_time_multiplier()); ++i) { + send_recv_am(0, UCS_ERR_ENDPOINT_TIMEOUT); + send_recv_am(1, UCS_OK); + } + flush(); +} + + +UCS_TEST_SKIP_COND_P(test_uct_peer_failure, two_pairs_send_after, + !check_caps(m_required_caps)) +{ + set_am_handlers(); + + { + scoped_log_handler slh(wrap_errors_logger); + kill_receiver(); + for (int i = 0; i < 100; ++i) { + send_am(0); + } + flush(); + } + + send_recv_am(0, UCS_ERR_ENDPOINT_TIMEOUT); + + m_am_count = 0; + send_am(1); + ucs_debug("flushing"); + flush_ep(1); + ucs_debug("flushed"); + wait_for_flag(&m_am_count); + EXPECT_EQ(m_am_count, 1ul); +} + +UCT_INSTANTIATE_TEST_CASE(test_uct_peer_failure) + +class test_uct_peer_failure_cb : public test_uct_peer_failure { +public: + virtual uct_error_handler_t get_err_handler() const { + return err_cb_ep_destroy; + } + + static ucs_status_t err_cb_ep_destroy(void *arg, uct_ep_h ep, ucs_status_t status) { + test_uct_peer_failure_cb *self(reinterpret_cast(arg)); + EXPECT_EQ(self->ep0(), ep); + self->m_sender->destroy_ep(0); + return UCS_OK; + } +}; + +UCS_TEST_SKIP_COND_P(test_uct_peer_failure_cb, desproy_ep_cb, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT | + m_required_caps)) +{ + scoped_log_handler slh(wrap_errors_logger); + kill_receiver(); + EXPECT_EQ(uct_ep_put_short(ep0(), NULL, 0, 0, 0), UCS_OK); + flush(); +} + +UCT_INSTANTIATE_TEST_CASE(test_uct_peer_failure_cb) + +class test_uct_peer_failure_multiple : public test_uct_peer_failure +{ +public: + virtual void init(); + +protected: + size_t get_tx_queue_len() const; +}; + +void test_uct_peer_failure_multiple::init() +{ + size_t tx_queue_len = get_tx_queue_len(); + + if (ucs_get_page_size() > 4096) { + /* NOTE: Too much receivers may cause failure of ibv_open_device */ + test_uct_peer_failure::m_nreceivers = 10; + } else { + test_uct_peer_failure::m_nreceivers = tx_queue_len; + } + + test_uct_peer_failure::m_tx_window = tx_queue_len / 3; + + test_uct_peer_failure::init(); + + m_receivers.reserve(m_nreceivers); + while (m_receivers.size() < m_nreceivers) { + new_receiver(); + } +} + +size_t test_uct_peer_failure_multiple::get_tx_queue_len() const +{ + bool set = true; + std::string name, val; + size_t tx_queue_len; + + if (has_rc()) { + name = "RC_RC_IB_TX_QUEUE_LEN"; + } else if (has_transport("dc_mlx5")) { + name = "DC_RC_IB_TX_QUEUE_LEN"; + } else if (has_ud()) { + name = "UD_IB_TX_QUEUE_LEN"; + } else { + set = false; + name = "TX_QUEUE_LEN"; + } + + if (get_config(name, val)) { + tx_queue_len = ucs::from_string(val); + EXPECT_LT(0ul, tx_queue_len); + } else { + tx_queue_len = 256; + UCS_TEST_MESSAGE << name << " setting not found, " + << "taken test default value: " << tx_queue_len; + if (set) { + UCS_TEST_ABORT(name + " config name must be found for %s transport" + + GetParam()->tl_name); + } + } + + return tx_queue_len; +} + +/* Skip under valgrind due to brk segment overflow. + * See https://bugs.kde.org/show_bug.cgi?id=352742 */ +UCS_TEST_SKIP_COND_P(test_uct_peer_failure_multiple, test, + (RUNNING_ON_VALGRIND || + !check_caps(m_required_caps)), + "RC_TM_ENABLE?=n") +{ + ucs_time_t timeout = ucs_get_time() + + ucs_time_from_sec(200 * ucs::test_time_multiplier()); + + { + scoped_log_handler slh(wrap_errors_logger); + for (size_t idx = 0; idx < m_nreceivers - 1; ++idx) { + for (size_t i = 0; i < m_tx_window; ++i) { + send_am(idx); + } + kill_receiver(); + } + flush(timeout); + + /* if EPs are not failed yet, these ops should trigger that */ + for (size_t idx = 0; idx < m_nreceivers - 1; ++idx) { + for (size_t i = 0; i < m_tx_window; ++i) { + send_am(idx); + } + } + + flush(timeout); + } + + for (size_t idx = 0; idx < m_nreceivers - 1; ++idx) { + send_recv_am(idx, UCS_ERR_ENDPOINT_TIMEOUT); + } + + m_am_count = 0; + send_am(m_nreceivers - 1); + ucs_debug("flushing"); + flush_ep(m_nreceivers - 1); + ucs_debug("flushed"); + wait_for_flag(&m_am_count); + EXPECT_EQ(m_am_count, 1ul); +} + +UCT_INSTANTIATE_TEST_CASE(test_uct_peer_failure_multiple) diff --git a/test/gtest/uct/test_pending.cc b/test/gtest/uct/test_pending.cc new file mode 100644 index 0000000..a98a805 --- /dev/null +++ b/test/gtest/uct/test_pending.cc @@ -0,0 +1,579 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +extern "C" { +#include +#include +#include +} +#include +#include "uct_test.h" + +class test_uct_pending : public uct_test { +public: + test_uct_pending() : uct_test() { + m_e1 = NULL; + m_e2 = NULL; + + if (has_transport("tcp")) { + /* Set `SO_SNDBUF` and `SO_RCVBUF` socket options to minimum + * values to reduce the testing time for `pending_fairness` test */ + modify_config("SNDBUF", "1k"); + modify_config("RCVBUF", "128"); + } + } + + virtual void init() { + uct_test::init(); + + m_e1 = uct_test::create_entity(0); + m_entities.push_back(m_e1); + + m_e2 = uct_test::create_entity(0); + m_entities.push_back(m_e2); + + check_skip_test(); + } + + void initialize() { + + m_e1->connect(0, *m_e2, 0); + m_e2->connect(0, *m_e1, 0); + } + + typedef struct pending_send_request { + uct_ep_h ep; + uint64_t data; + int countdown; /* Actually send after X calls */ + uct_pending_req_t uct; + int active; + int id; + mapped_buffer *buf; + } pending_send_request_t; + + void send_am_fill_resources(uct_ep_h ep) { + uint64_t send_data = 0xdeadbeef; + ucs_time_t loop_end_limit = ucs_get_time() + ucs_time_from_sec(2); + ucs_status_t status; + + do { + status = uct_ep_am_short(ep, 0, test_pending_hdr, &send_data, + sizeof(send_data)); + if (status == UCS_ERR_NO_RESOURCE) { + break; + } + } while (ucs_get_time() < loop_end_limit); + + if (status != UCS_ERR_NO_RESOURCE) { + UCS_TEST_SKIP_R("Can't fill UCT resources in the given time."); + } + } + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags) { + + volatile unsigned *counter = (volatile unsigned*) arg; + uint64_t test_hdr = *(uint64_t *) data; + uint64_t actual_data = *(unsigned*)((char*)data + sizeof(test_hdr)); + + if ((test_hdr == 0xabcd) && (actual_data == (0xdeadbeef + *counter))) { + ucs_atomic_add32(counter, 1); + } else { + UCS_TEST_ABORT("Error in comparison in pending_am_handler. Counter: " << counter); + } + + return UCS_OK; + } + + static ucs_status_t am_handler_simple(void *arg, void *data, size_t length, + unsigned flags) { + return UCS_OK; + } + + static ucs_status_t pending_send_op(uct_pending_req_t *self) { + + pending_send_request_t *req = ucs_container_of(self, pending_send_request_t, uct); + ucs_status_t status; + + if (req->countdown > 0) { + --req->countdown; + return UCS_INPROGRESS; + } + + status = uct_ep_am_short(req->ep, 0, test_pending_hdr, &req->data, + sizeof(req->data)); + if (status == UCS_OK) { + pending_delete(req); + } + return status; + } + + static ucs_status_t pending_send_op_simple(uct_pending_req_t *self) { + + pending_send_request_t *req = ucs_container_of(self, pending_send_request_t, uct); + ucs_status_t status; + + status = uct_ep_am_short(req->ep, 0, test_pending_hdr, &req->data, + sizeof(req->data)); + if (status == UCS_OK) { + req->countdown ++; + n_pending--; + req->active = 0; + //ucs_warn("dispatched %p idx %d total %d", req->ep, req->id, req->countdown); + } + return status; + } + + static ucs_status_t pending_send_op_bcopy(uct_pending_req_t *self) { + + pending_send_request_t *req = ucs_container_of(self, pending_send_request_t, uct); + ssize_t packed_len; + + packed_len = uct_ep_am_bcopy(req->ep, 0, mapped_buffer::pack, req->buf, 0); + if (packed_len > 0) { + req->countdown ++; + n_pending--; + req->active = 0; + return UCS_OK; + } + return (ucs_status_t)packed_len; + } + + static ucs_status_t pending_send_op_ok(uct_pending_req_t *self) { + pending_send_request_t *req = ucs_container_of(self, pending_send_request_t, uct); + + pending_delete(req); + n_pending--; + return UCS_OK; + } + + static void purge_cb(uct_pending_req_t *uct_req, void *arg) + { + ++n_purge; + } + + pending_send_request_t* pending_alloc(uint64_t send_data) { + pending_send_request_t *req = new pending_send_request_t(); + req->ep = m_e1->ep(0); + req->data = send_data; + req->countdown = 5; + req->uct.func = pending_send_op; + return req; + } + + pending_send_request_t* pending_alloc_simple(uint64_t send_data, int idx) { + pending_send_request_t *req = new pending_send_request_t(); + req->ep = m_e1->ep(idx); + req->data = send_data; + req->countdown = 0; + req->uct.func = pending_send_op_simple; + req->active = 0; + req->id = idx; + return req; + } + + pending_send_request_t* pending_alloc_simple(mapped_buffer *sbuf, int idx) { + pending_send_request_t *req = new pending_send_request_t(); + req->ep = m_e1->ep(idx); + req->buf = sbuf; + req->countdown = 0; + req->uct.func = pending_send_op_bcopy; + req->active = 0; + req->id = idx; + return req; + } + + static void pending_delete(pending_send_request_t *req) { + delete req; + } + +protected: + static const uint64_t test_pending_hdr = 0xabcd; + entity *m_e1, *m_e2; + static int n_pending; + static int n_purge; +}; + +int test_uct_pending::n_pending = 0; +int test_uct_pending::n_purge = 0; + +void install_handler_sync_or_async(uct_iface_t *iface, uint8_t id, uct_am_callback_t cb, void *arg) +{ + ucs_status_t status; + uct_iface_attr_t attr; + + status = uct_iface_query(iface, &attr); + ASSERT_UCS_OK(status); + + if (attr.cap.flags & UCT_IFACE_FLAG_CB_SYNC) { + uct_iface_set_am_handler(iface, id, cb, arg, 0); + } else { + ASSERT_TRUE(attr.cap.flags & UCT_IFACE_FLAG_CB_ASYNC); + uct_iface_set_am_handler(iface, id, cb, arg, UCT_CB_FLAG_ASYNC); + } +} + +UCS_TEST_SKIP_COND_P(test_uct_pending, pending_op, + !check_caps(UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_PENDING)) +{ + uint64_t send_data = 0xdeadbeef; + ucs_status_t status; + unsigned i, iters, counter = 0; + + initialize(); + + iters = 1000000 / ucs::test_time_multiplier(); + + /* set a callback for the uct to invoke for receiving the data */ + install_handler_sync_or_async(m_e2->iface(), 0, am_handler, &counter); + + /* send the data until the resources run out */ + i = 0; + while (i < iters) { + status = uct_ep_am_short(m_e1->ep(0), 0, test_pending_hdr, &send_data, + sizeof(send_data)); + if (status != UCS_OK) { + if (status == UCS_ERR_NO_RESOURCE) { + + pending_send_request_t *req = pending_alloc(send_data); + + status = uct_ep_pending_add(m_e1->ep(0), &req->uct, 0); + if (status != UCS_OK) { + /* the request wasn't added to the pending data structure + * since resources became available. retry sending this message */ + pending_delete(req); + } else { + /* the request was added to the pending data structure */ + send_data += 1; + i++; + } + /* coverity[leaked_storage] */ + } else { + UCS_TEST_ABORT("Error: " << ucs_status_string(status)); + } + } else { + send_data += 1; + i++; + } + } + /* coverity[loop_condition] */ + while (counter != iters) { + progress(); + } + flush(); + + ASSERT_EQ(counter, iters); +} + +UCS_TEST_SKIP_COND_P(test_uct_pending, send_ooo_with_pending, + !check_caps(UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_PENDING)) +{ + uint64_t send_data = 0xdeadbeef; + ucs_status_t status_send, status_pend = UCS_ERR_LAST; + ucs_time_t loop_end_limit; + unsigned i, counter = 0; + + initialize(); + + /* set a callback for the uct to invoke when receiving the data */ + install_handler_sync_or_async(m_e2->iface(), 0, am_handler, &counter); + + loop_end_limit = ucs_get_time() + ucs_time_from_sec(2); + /* send while resources are available. try to add a request to pending */ + do { + status_send = uct_ep_am_short(m_e1->ep(0), 0, test_pending_hdr, &send_data, + sizeof(send_data)); + if (status_send == UCS_ERR_NO_RESOURCE) { + + pending_send_request_t *req = pending_alloc(send_data); + + status_pend = uct_ep_pending_add(m_e1->ep(0), &req->uct, 0); + if (status_pend == UCS_ERR_BUSY) { + pending_delete(req); + } else { + /* coverity[leaked_storage] */ + ++send_data; + break; + } + } else { + ASSERT_UCS_OK(status_send); + ++send_data; + } + } while (ucs_get_time() < loop_end_limit); + + if ((status_send == UCS_OK) || (status_pend == UCS_ERR_BUSY)) { + /* got here due to reaching the time limit in the above loop. + * couldn't add a request to pending. all sends were successful. */ + UCS_TEST_MESSAGE << "Can't create out-of-order in the given time."; + return; + } + /* there is one pending request */ + EXPECT_EQ(UCS_OK, status_pend); + + /* progress the receiver a bit to release resources */ + for (i = 0; i < 1000; i++) { + m_e2->progress(); + } + + /* send a new message. the transport should make sure that this new message + * isn't sent before the one in pending, thus preventing out-of-order in sending. */ + do { + status_send = uct_ep_am_short(m_e1->ep(0), 0, test_pending_hdr, + &send_data, sizeof(send_data)); + short_progress_loop(); + } while (status_send == UCS_ERR_NO_RESOURCE); + ASSERT_UCS_OK(status_send); + ++send_data; + + /* the receive side checks that the messages were received in order. + * check the last message here. (counter was raised by one for next iteration) */ + unsigned exp_counter = send_data - 0xdeadbeefUL; + wait_for_value(&counter, exp_counter, true); + EXPECT_EQ(exp_counter, counter); +} + +UCS_TEST_SKIP_COND_P(test_uct_pending, pending_purge, + !check_caps(UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_PENDING)) +{ + const int num_eps = 5; + uct_pending_req_t reqs[num_eps]; + + /* set a callback for the uct to invoke when receiving the data */ + install_handler_sync_or_async(m_e2->iface(), 0, am_handler_simple, NULL); + + for (int i = 0; i < num_eps; ++i) { + m_e1->connect(i, *m_e2, i); + send_am_fill_resources(m_e1->ep(i)); + reqs[i].func = NULL; + EXPECT_UCS_OK(uct_ep_pending_add(m_e1->ep(i), &reqs[i], 0)); + } + + for (int i = 0; i < num_eps; ++i) { + n_purge = 0; + uct_ep_pending_purge(m_e1->ep(i), purge_cb, NULL); + EXPECT_EQ(1, n_purge); + } +} + +/* + * test that the pending op callback is only called from the progress() + */ +UCS_TEST_SKIP_COND_P(test_uct_pending, pending_async, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_PENDING)) +{ + pending_send_request_t *req = NULL; + ucs_status_t status; + ssize_t packed_len; + + initialize(); + + mapped_buffer sbuf(ucs_min(64ul, m_e1->iface_attr().cap.am.max_bcopy), 0, + *m_e1); + + req = pending_alloc_simple(&sbuf, 0); + + /* set a callback for the uct to invoke when receiving the data */ + install_handler_sync_or_async(m_e2->iface(), 0, am_handler_simple, 0); + + /* send while resources are available */ + n_pending = 0; + do { + packed_len = uct_ep_am_bcopy(m_e1->ep(0), 0, mapped_buffer::pack, + &sbuf, 0); + } while (packed_len >= 0); + + EXPECT_TRUE(packed_len == UCS_ERR_NO_RESOURCE); + + status = uct_ep_pending_add(m_e1->ep(0), &req->uct, 0); + EXPECT_UCS_OK(status); + n_pending++; + + /* pending op must not be called either asynchronously or from the + * uct_ep_am_bcopy() */ + twait(300); + EXPECT_EQ(1, n_pending); + + packed_len = uct_ep_am_bcopy(m_e1->ep(0), 0, mapped_buffer::pack, &sbuf, 0); + EXPECT_EQ(1, n_pending); + EXPECT_GT(0, packed_len); + + wait_for_value(&n_pending, 0, true); + EXPECT_EQ(0, n_pending); + pending_delete(req); +} + +/* + * test that arbiter does not block when ucs_ok is returned + * The issue is a dc transport specific but test may be also useful + * for other transports + */ +UCS_TEST_SKIP_COND_P(test_uct_pending, pending_ucs_ok_dc_arbiter_bug, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_PENDING) || + has_transport("cm")) +{ + ucs_status_t status; + ssize_t packed_len; + int N, max_listen_conn; + + initialize(); + + mapped_buffer sbuf(ucs_min(64ul, m_e1->iface_attr().cap.am.max_bcopy), 0, + *m_e1); + + /* set a callback for the uct to invoke when receiving the data */ + install_handler_sync_or_async(m_e2->iface(), 0, am_handler_simple, 0); + + if (RUNNING_ON_VALGRIND) { + N = 64; + } else if (m_e1->iface_attr().cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + N = 2048; + } else { + N = 128; + } + + N = ucs_min(N, max_connections()); + + /* idx 0 is setup in initialize(). only need to alloc request */ + for (int j, i = 1; i < N; i += j) { + max_listen_conn = ucs_min(max_connect_batch(), N - i); + + for (j = 0; j < max_listen_conn; j++) { + int idx = i + j; + m_e1->connect(idx, *m_e2, idx); + } + /* give a chance to finish connection for some transports (ib/ud, tcp) */ + flush(); + } + + n_pending = 0; + + /* try to exaust global resources and create a pending queue */ + for (int i = 0; i < N; i++) { + packed_len = uct_ep_am_bcopy(m_e1->ep(i), 0, mapped_buffer::pack, + &sbuf, 0); + + if (packed_len == UCS_ERR_NO_RESOURCE) { + pending_send_request_t *req = pending_alloc(i); + + req->uct.func = pending_send_op_ok; + status = uct_ep_pending_add(m_e1->ep(i), &req->uct, 0); + EXPECT_UCS_OK(status); + n_pending++; + /* coverity[leaked_storage] */ + } + } + + UCS_TEST_MESSAGE << "pending queue len: " << n_pending; + + wait_for_value(&n_pending, 0, true); + EXPECT_EQ(0, n_pending); +} + +UCS_TEST_SKIP_COND_P(test_uct_pending, pending_fairness, + (RUNNING_ON_VALGRIND || + !check_caps(UCT_IFACE_FLAG_AM_SHORT | + UCT_IFACE_FLAG_PENDING))) +{ + int N = 16; + uint64_t send_data = 0xdeadbeef; + int i, iters; + ucs_status_t status; + + initialize(); + + if (m_e1->iface_attr().cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + N = ucs_min(128, max_connect_batch()); + } + pending_send_request_t *reqs[N]; + install_handler_sync_or_async(m_e2->iface(), 0, am_handler_simple, 0); + + /* idx 0 is setup in initialize(). only need to alloc request */ + reqs[0] = pending_alloc_simple(send_data, 0); + for (i = 1; i < N; i++) { + m_e1->connect(i, *m_e2, i); + reqs[i] = pending_alloc_simple(send_data, i); + } + + /* give a chance to finish connection for some transports (ib/ud, tcp) */ + flush(); + + n_pending = 0; + for (iters = 0; iters < 10000; iters++) { + /* send until resources of all eps are exausted */ + while (n_pending < N) { + for (i = 0; i < N; ++i) { /* TODO: change to list */ + if (reqs[i]->active) { + continue; + } + for (;;) { + status = uct_ep_am_short(m_e1->ep(i), 0, test_pending_hdr, + &send_data, sizeof(send_data)); + if (status == UCS_ERR_NO_RESOURCE) { + /* schedule pending */ + status = uct_ep_pending_add(m_e1->ep(i), &reqs[i]->uct, + 0); + if (status == UCS_ERR_BUSY) { + continue; /* retry */ + } + ASSERT_UCS_OK(status); + + n_pending++; + reqs[i]->active = 1; + break; + } else { + ASSERT_UCS_OK(status); + /* sent */ + reqs[i]->countdown++; + break; + } + } + } + } + /* progress until it is possible to send more */ + while(n_pending == N) { + progress(); + } + /* repeat the cycle. + * it is expected that every ep will send about + * the same number of messages. + */ + } + + /* check fairness: */ + int min_sends = INT_MAX; + int max_sends = 0; + for (i = 0; i < N; i++) { + min_sends = ucs_min(min_sends, reqs[i]->countdown); + max_sends = ucs_max(max_sends, reqs[i]->countdown); + } + UCS_TEST_MESSAGE << " min_sends: " << min_sends + << " max_sends: " << max_sends + << " still pending: " << n_pending; + + while(n_pending > 0) { + progress(); + } + + flush(); + + for (i = 0; i < N; i++) { + pending_delete(reqs[i]); + } + + /* there must be no starvation */ + EXPECT_LT(0, min_sends); + /* TODO: add stricter fairness criteria */ + if (min_sends < max_sends /2) { + UCS_TEST_MESSAGE << " CHECK: pending queue is not fair"; + } +} + +UCT_INSTANTIATE_NO_SELF_TEST_CASE(test_uct_pending); diff --git a/test/gtest/uct/test_progress.cc b/test/gtest/uct/test_progress.cc new file mode 100644 index 0000000..e51d088 --- /dev/null +++ b/test/gtest/uct/test_progress.cc @@ -0,0 +1,43 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +extern "C" { +#include +} +#include +#include "uct_test.h" + + +class test_uct_progress : public uct_test { +public: + virtual void init() { + uct_test::init(); + m_entities.push_back(create_entity(0)); + } +}; + + +UCS_TEST_P(test_uct_progress, random_enable_disable) { + for (int i = 0; i < 100; ++i) { + unsigned flags = 0; + if (ucs::rand() % 2) { + flags |= UCT_PROGRESS_SEND; + } + if (ucs::rand() % 2) { + flags |= UCT_PROGRESS_RECV; + } + if (ucs::rand() % 2) { + uct_iface_progress_enable(ent(0).iface(), flags); + } else { + uct_iface_progress_disable(ent(0).iface(), flags); + } + progress(); + } + +} + + +UCT_INSTANTIATE_TEST_CASE(test_uct_progress); diff --git a/test/gtest/uct/test_stats.cc b/test/gtest/uct/test_stats.cc new file mode 100644 index 0000000..196b21b --- /dev/null +++ b/test/gtest/uct/test_stats.cc @@ -0,0 +1,611 @@ + +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) UT-Battelle, LLC. 2014-2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ +extern "C" { +#include +#include + +#include +} +#include +#include "uct_test.h" +#include "uct_p2p_test.h" + +#if ENABLE_STATS + +#define EXPECT_STAT(_side, _uct_obj, _stat, _exp_val) \ + do { \ + uint64_t v = UCS_STATS_GET_COUNTER(_uct_obj(_side())->stats, _stat); \ + EXPECT_EQ(get_cntr_init(UCS_PP_MAKE_STRING(_side), \ + UCS_PP_MAKE_STRING(_stat)) + (_exp_val), v); \ + } while (0) + + +class test_uct_stats : public uct_p2p_test { +public: + test_uct_stats() : uct_p2p_test(0), lbuf(NULL), rbuf(NULL) { + m_comp.func = NULL; + m_comp.count = 0; + } + + virtual void init() { + stats_activate(); + uct_p2p_test::init(); + + // Sender EP + collect_cntr_init("sender", uct_ep(sender())->stats, + UCS_PP_MAKE_STRING(UCT_EP_STAT_FLUSH), + UCT_EP_STAT_FLUSH); + collect_cntr_init("sender", uct_ep(sender())->stats, + UCS_PP_MAKE_STRING(UCT_EP_STAT_FLUSH_WAIT), + UCT_EP_STAT_FLUSH_WAIT); + collect_cntr_init("sender", uct_ep(sender())->stats, + UCS_PP_MAKE_STRING(UCT_EP_STAT_FENCE), + UCT_EP_STAT_FENCE); + collect_cntr_init("sender", uct_ep(sender())->stats, + UCS_PP_MAKE_STRING(UCT_EP_STAT_AM), + UCT_EP_STAT_AM); + collect_cntr_init("sender", uct_ep(sender())->stats, + UCS_PP_MAKE_STRING(UCT_EP_STAT_NO_RES), + UCT_EP_STAT_NO_RES); + collect_cntr_init("sender", uct_ep(sender())->stats, + UCS_PP_MAKE_STRING(UCT_EP_STAT_PENDING), + UCT_EP_STAT_PENDING); + collect_cntr_init("sender", uct_ep(sender())->stats, + UCS_PP_MAKE_STRING(UCT_EP_STAT_ATOMIC), + UCT_EP_STAT_ATOMIC); + + // Sender IFACE + collect_cntr_init("sender", uct_iface(sender())->stats, + UCS_PP_MAKE_STRING(UCT_IFACE_STAT_FLUSH), + UCT_IFACE_STAT_FLUSH); + collect_cntr_init("sender", uct_iface(sender())->stats, + UCS_PP_MAKE_STRING(UCT_IFACE_STAT_FLUSH_WAIT), + UCT_IFACE_STAT_FLUSH_WAIT); + collect_cntr_init("sender", uct_iface(sender())->stats, + UCS_PP_MAKE_STRING(UCT_IFACE_STAT_FENCE), + UCT_IFACE_STAT_FENCE); + + // Receiver IFACE + collect_cntr_init("receiver", uct_iface(receiver())->stats, + UCS_PP_MAKE_STRING(UCT_IFACE_STAT_RX_AM), + UCT_IFACE_STAT_RX_AM); + collect_cntr_init("receiver", uct_iface(receiver())->stats, + UCS_PP_MAKE_STRING(UCT_IFACE_STAT_RX_AM_BYTES), + UCT_IFACE_STAT_RX_AM_BYTES); + } + + void collect_cntr_init(std::string side, ucs_stats_node_t *stats_node, + std::string stat_name, unsigned stat) { + cntr_init[side][stat_name] = UCS_STATS_GET_COUNTER(stats_node, stat); + } + + size_t get_cntr_init(std::string side, std::string stat_name) { + return cntr_init[side][stat_name]; + } + + void init_bufs(size_t min, size_t max) + { + size_t size = ucs_max(min, ucs_min(64ul, max)); + lbuf = new mapped_buffer(size, 0, sender(), 0, sender().md_attr().cap.access_mem_type); + rbuf = new mapped_buffer(size, 0, receiver(), 0, sender().md_attr().cap.access_mem_type); + } + + virtual void cleanup() { + flush(); + delete lbuf; + delete rbuf; + uct_p2p_test::cleanup(); + stats_restore(); + } + + uct_base_ep_t *uct_ep(const entity &e) + { + return ucs_derived_of(e.ep(0), uct_base_ep_t); + } + + uct_base_iface_t *uct_iface(const entity &e) + { + return ucs_derived_of(e.iface(), uct_base_iface_t); + } + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags) { + return UCS_OK; + } + + static void purge_cb(uct_pending_req_t *r, void *arg) + { + } + + void check_am_rx_counters(size_t len) { + uint64_t iface_rx_am_init = get_cntr_init("receiver", + UCS_PP_MAKE_STRING(UCT_IFACE_STAT_RX_AM)); + uint64_t v; + + ucs_time_t deadline = ucs::get_deadline(); + do { + short_progress_loop(); + v = UCS_STATS_GET_COUNTER(uct_iface(receiver())->stats, UCT_IFACE_STAT_RX_AM); + } while ((ucs_get_time() < deadline) && (v == iface_rx_am_init)); + + EXPECT_STAT(receiver, uct_iface, UCT_IFACE_STAT_RX_AM, 1UL); + EXPECT_STAT(receiver, uct_iface, UCT_IFACE_STAT_RX_AM_BYTES, len); + } + + void check_atomic_counters() { + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_ATOMIC, 1UL); + /* give atomic chance to complete */ + short_progress_loop(); + } + + int fill_tx_q(int n) { + int count_wait; + int i, max; + size_t len; + + max = (n == 0) ? 1024 : n; + + for (count_wait = i = 0; i < max; i++) { + len = uct_ep_am_bcopy(sender_ep(), 0, mapped_buffer::pack, lbuf, 0); + if (len != lbuf->length()) { + if (n == 0) { + return 1; + } + count_wait++; + } + } + return count_wait; + } + + void init_completion() { + m_comp.count = 2; + m_comp.func = NULL; + } + + void wait_for_completion(ucs_status_t status) { + + EXPECT_TRUE(UCS_INPROGRESS == status || UCS_OK == status); + if (status == UCS_OK) { + --m_comp.count; + } + + ucs_time_t deadline = ucs::get_deadline(); + do { + short_progress_loop(); + } while ((ucs_get_time() < deadline) && (m_comp.count > 1)); + EXPECT_EQ(1, m_comp.count); + } + +protected: + mapped_buffer *lbuf, *rbuf; + uct_completion_t m_comp; + std::map< std::string, std::map< std::string, uint64_t > > cntr_init; +}; + + +/* test basic stat counters: + * am, put, get, amo, flush and fence + */ +UCS_TEST_SKIP_COND_P(test_uct_stats, am_short, + !check_caps(UCT_IFACE_FLAG_AM_SHORT)) +{ + uint64_t hdr=0xdeadbeef, send_data=0xfeedf00d; + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.am.max_short); + + status = uct_iface_set_am_handler(receiver().iface(), 0, am_handler, + 0, UCT_CB_FLAG_ASYNC); + EXPECT_UCS_OK(status); + + UCT_TEST_CALL_AND_TRY_AGAIN(uct_ep_am_short(sender_ep(), 0, hdr, &send_data, + sizeof(send_data)), status); + EXPECT_UCS_OK(status); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_AM, 1UL); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_BYTES_SHORT, + sizeof(hdr) + sizeof(send_data)); + check_am_rx_counters(sizeof(hdr) + sizeof(send_data)); +} + +UCS_TEST_SKIP_COND_P(test_uct_stats, am_bcopy, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) +{ + ssize_t v; + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.am.max_bcopy); + + status = uct_iface_set_am_handler(receiver().iface(), 0, am_handler, 0, UCT_CB_FLAG_ASYNC); + EXPECT_UCS_OK(status); + + UCT_TEST_CALL_AND_TRY_AGAIN(uct_ep_am_bcopy(sender_ep(), 0, mapped_buffer::pack, + lbuf, 0), v); + EXPECT_EQ((ssize_t)lbuf->length(), v); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_AM, 1UL); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_BYTES_BCOPY, + lbuf->length()); + check_am_rx_counters(lbuf->length()); +} + +UCS_TEST_SKIP_COND_P(test_uct_stats, am_zcopy, + !check_caps(UCT_IFACE_FLAG_AM_ZCOPY)) +{ + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.am.max_zcopy); + + status = uct_iface_set_am_handler(receiver().iface(), 0, am_handler, 0, UCT_CB_FLAG_ASYNC); + EXPECT_UCS_OK(status); + + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, lbuf->ptr(), lbuf->length(), lbuf->memh(), + sender().iface_attr().cap.am.max_iov); + + UCT_TEST_CALL_AND_TRY_AGAIN(uct_ep_am_zcopy(sender_ep(), 0, 0, 0, + iov, iovcnt, 0, NULL), status); + EXPECT_TRUE(UCS_INPROGRESS == status || UCS_OK == status); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_AM, 1UL); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_BYTES_ZCOPY, + lbuf->length()); + check_am_rx_counters(lbuf->length()); +} + + +UCS_TEST_SKIP_COND_P(test_uct_stats, put_short, + !check_caps(UCT_IFACE_FLAG_PUT_SHORT)) +{ + uint64_t send_data=0xfeedf00d; + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.put.max_short); + + UCT_TEST_CALL_AND_TRY_AGAIN(uct_ep_put_short(sender_ep(), &send_data, sizeof(send_data), + rbuf->addr(), rbuf->rkey()), status); + EXPECT_UCS_OK(status); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_PUT, 1UL); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_BYTES_SHORT, + sizeof(send_data)); +} + +UCS_TEST_SKIP_COND_P(test_uct_stats, put_bcopy, + !check_caps(UCT_IFACE_FLAG_PUT_BCOPY)) +{ + ssize_t v; + + init_bufs(0, sender().iface_attr().cap.put.max_bcopy); + + UCT_TEST_CALL_AND_TRY_AGAIN(uct_ep_put_bcopy(sender_ep(), mapped_buffer::pack, lbuf, + rbuf->addr(), rbuf->rkey()), v); + EXPECT_EQ((ssize_t)lbuf->length(), v); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_PUT, 1UL); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_BYTES_BCOPY, + lbuf->length()); +} + +UCS_TEST_SKIP_COND_P(test_uct_stats, put_zcopy, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY)) +{ + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.put.max_zcopy); + + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, lbuf->ptr(), lbuf->length(), lbuf->memh(), + sender().iface_attr().cap.put.max_iov); + + UCT_TEST_CALL_AND_TRY_AGAIN( + uct_ep_put_zcopy(sender_ep(), iov, iovcnt, rbuf->addr(), + rbuf->rkey(), 0), status); + EXPECT_TRUE(UCS_INPROGRESS == status || UCS_OK == status); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_PUT, 1UL); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_BYTES_ZCOPY, + lbuf->length()); +} + + +UCS_TEST_SKIP_COND_P(test_uct_stats, get_bcopy, + !check_caps(UCT_IFACE_FLAG_GET_BCOPY)) +{ + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.get.max_bcopy); + + init_completion(); + UCT_TEST_CALL_AND_TRY_AGAIN( + uct_ep_get_bcopy(sender_ep(), (uct_unpack_callback_t)memcpy, + lbuf->ptr(), lbuf->length(), + rbuf->addr(), rbuf->rkey(), &m_comp), status); + wait_for_completion(status); + + short_progress_loop(); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_GET, 1UL); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_BYTES_BCOPY, + lbuf->length()); +} + +UCS_TEST_SKIP_COND_P(test_uct_stats, get_zcopy, + !check_caps(UCT_IFACE_FLAG_GET_ZCOPY)) +{ + ucs_status_t status; + + init_bufs(sender().iface_attr().cap.get.min_zcopy, + sender().iface_attr().cap.get.max_zcopy); + + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, lbuf->ptr(), lbuf->length(), lbuf->memh(), + sender().iface_attr().cap.get.max_iov); + + init_completion(); + UCT_TEST_CALL_AND_TRY_AGAIN( + uct_ep_get_zcopy(sender_ep(), iov, iovcnt, rbuf->addr(), + rbuf->rkey(), &m_comp), status); + wait_for_completion(status); + + short_progress_loop(); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_GET, 1UL); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_BYTES_ZCOPY, + lbuf->length()); +} + +#define TEST_STATS_ATOMIC_POST(_op, _val) \ +UCS_TEST_SKIP_COND_P(test_uct_stats, atomic_post_ ## _op ## _val, \ + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ ## _op), OP ## _val)) \ +{ \ + ucs_status_t status; \ + init_bufs(sizeof(uint##_val##_t), sizeof(uint##_val##_t)); \ + status = uct_ep_atomic ##_val##_post(sender_ep(), (UCT_ATOMIC_OP_ ## _op), \ + 1, rbuf->addr(), rbuf->rkey()); \ + EXPECT_UCS_OK(status); \ + check_atomic_counters(); \ +} + +TEST_STATS_ATOMIC_POST(ADD, 32) +TEST_STATS_ATOMIC_POST(ADD, 64) +TEST_STATS_ATOMIC_POST(AND, 32) +TEST_STATS_ATOMIC_POST(AND, 64) +TEST_STATS_ATOMIC_POST(OR, 32) +TEST_STATS_ATOMIC_POST(OR, 64) +TEST_STATS_ATOMIC_POST(XOR, 32) +TEST_STATS_ATOMIC_POST(XOR, 64) + + +#define TEST_STATS_ATOMIC_FETCH(_op, _val) \ +UCS_TEST_SKIP_COND_P(test_uct_stats, atomic_fetch_## _op ## _val, \ + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_ ## _op), FOP ## _val)) \ +{ \ + ucs_status_t status; \ + uint##_val##_t result; \ + \ + init_bufs(sizeof(result), sizeof(result)); \ + \ + init_completion(); \ + status = uct_ep_atomic##_val##_fetch(sender_ep(), (UCT_ATOMIC_OP_ ## _op), 1, \ + &result, rbuf->addr(), rbuf->rkey(), &m_comp); \ + wait_for_completion(status); \ + \ + check_atomic_counters(); \ +} + +TEST_STATS_ATOMIC_FETCH(ADD, 32) +TEST_STATS_ATOMIC_FETCH(ADD, 64) +TEST_STATS_ATOMIC_FETCH(AND, 32) +TEST_STATS_ATOMIC_FETCH(AND, 64) +TEST_STATS_ATOMIC_FETCH(OR, 32) +TEST_STATS_ATOMIC_FETCH(OR, 64) +TEST_STATS_ATOMIC_FETCH(XOR, 32) +TEST_STATS_ATOMIC_FETCH(XOR, 64) +TEST_STATS_ATOMIC_FETCH(SWAP, 32) +TEST_STATS_ATOMIC_FETCH(SWAP, 64) + +#define TEST_STATS_ATOMIC_CSWAP(val) \ +UCS_TEST_SKIP_COND_P(test_uct_stats, atomic_cswap##val, \ + !check_atomics(UCS_BIT(UCT_ATOMIC_OP_CSWAP), FOP ## val)) \ +{ \ + ucs_status_t status; \ + uint##val##_t result; \ + \ + init_bufs(sizeof(result), sizeof(result)); \ + \ + init_completion(); \ + UCT_TEST_CALL_AND_TRY_AGAIN( \ + uct_ep_atomic_cswap##val (sender_ep(), 1, 2, rbuf->addr(), \ + rbuf->rkey(), &result, &m_comp), \ + status); \ + wait_for_completion(status); \ + \ + check_atomic_counters(); \ +} + +TEST_STATS_ATOMIC_CSWAP(32) +TEST_STATS_ATOMIC_CSWAP(64) + +UCS_TEST_P(test_uct_stats, flush) +{ + ucs_status_t status; + + if (sender_ep()) { + status = uct_ep_flush(sender_ep(), 0, NULL); + EXPECT_UCS_OK(status); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_FLUSH, 1Ul); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_FLUSH_WAIT, 0UL); + } + + status = uct_iface_flush(sender().iface(), 0, NULL); + EXPECT_UCS_OK(status); + EXPECT_STAT(sender, uct_iface, UCT_IFACE_STAT_FLUSH, 1UL); + EXPECT_STAT(sender, uct_iface, UCT_IFACE_STAT_FLUSH_WAIT, 0UL); +} + +UCS_TEST_P(test_uct_stats, fence) +{ + ucs_status_t status; + + if (sender_ep()) { + status = uct_ep_fence(sender_ep(), 0); + EXPECT_UCS_OK(status); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_FENCE, 1UL); + } + + status = uct_iface_fence(sender().iface(), 0); + EXPECT_UCS_OK(status); + EXPECT_STAT(sender, uct_iface, UCT_IFACE_STAT_FENCE, 1UL); +} + +/* flush test only check stats on tls with am_bcopy + * TODO: full test matrix + */ +UCS_TEST_SKIP_COND_P(test_uct_stats, flush_wait_iface, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) +{ + uint64_t count_wait; + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.am.max_bcopy); + + status = uct_iface_set_am_handler(receiver().iface(), 0, am_handler, 0, UCT_CB_FLAG_ASYNC); + EXPECT_UCS_OK(status); + + fill_tx_q(0); + count_wait = 0; + do { + status = uct_iface_flush(sender().iface(), 0, NULL); + if (status == UCS_INPROGRESS) { + count_wait++; + } + progress(); + } while (status != UCS_OK); + + EXPECT_STAT(sender, uct_iface, UCT_IFACE_STAT_FLUSH, 1UL); + EXPECT_STAT(sender, uct_iface, UCT_IFACE_STAT_FLUSH_WAIT, count_wait); +} + +UCS_TEST_SKIP_COND_P(test_uct_stats, flush_wait_ep, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) +{ + uint64_t count_wait; + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.am.max_bcopy); + + status = uct_iface_set_am_handler(receiver().iface(), 0, am_handler, 0, UCT_CB_FLAG_ASYNC); + EXPECT_UCS_OK(status); + + fill_tx_q(0); + count_wait = 0; + do { + status = uct_ep_flush(sender_ep(), 0, NULL); + if (status == UCS_INPROGRESS) { + count_wait++; + } + progress(); + } while (status != UCS_OK); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_FLUSH, 1UL); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_FLUSH_WAIT, count_wait); +} + +/* fence test only check stats on tls with am_bcopy + * TODO: full test matrix + */ +UCS_TEST_SKIP_COND_P(test_uct_stats, fence_iface, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) +{ + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.am.max_bcopy); + + status = uct_iface_set_am_handler(receiver().iface(), 0, am_handler, 0, UCT_CB_FLAG_ASYNC); + EXPECT_UCS_OK(status); + + fill_tx_q(0); + + status = uct_iface_fence(sender().iface(), 0); + EXPECT_UCS_OK(status); + + fill_tx_q(0); + + EXPECT_STAT(sender, uct_iface, UCT_IFACE_STAT_FENCE, 1UL); +} + +UCS_TEST_SKIP_COND_P(test_uct_stats, fence_ep, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) +{ + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.am.max_bcopy); + + status = uct_iface_set_am_handler(receiver().iface(), 0, am_handler, 0, UCT_CB_FLAG_ASYNC); + EXPECT_UCS_OK(status); + + fill_tx_q(0); + + status = uct_ep_fence(sender_ep(), 0); + EXPECT_UCS_OK(status); + + fill_tx_q(0); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_FENCE, 1UL); +} + +UCS_TEST_SKIP_COND_P(test_uct_stats, tx_no_res, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY)) +{ + uint64_t count; + ucs_status_t status; + + init_bufs(0, sender().iface_attr().cap.am.max_bcopy); + + status = uct_iface_set_am_handler(receiver().iface(), 0, am_handler, 0, UCT_CB_FLAG_ASYNC); + EXPECT_UCS_OK(status); + count = fill_tx_q(1024); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_NO_RES, count); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_AM, 1024 - count); +} + +UCS_TEST_SKIP_COND_P(test_uct_stats, pending_add, + !check_caps(UCT_IFACE_FLAG_AM_BCOPY | + UCT_IFACE_FLAG_PENDING)) +{ + const size_t num_reqs = 5; + uct_pending_req_t p_reqs[num_reqs]; + ssize_t len; + + init_bufs(0, sender().iface_attr().cap.am.max_bcopy); + + EXPECT_UCS_OK(uct_iface_set_am_handler(receiver().iface(), 0, am_handler, 0, + UCT_CB_FLAG_ASYNC)); + + // Check that counter is not increased if pending_add returns NOT_OK + EXPECT_EQ(uct_ep_pending_add(sender().ep(0), &p_reqs[0], 0), + UCS_ERR_BUSY); + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_PENDING, 0UL); + + // Check that counter gets increased on every successfull pending_add returns NOT_OK + fill_tx_q(0); + + UCT_TEST_CALL_AND_TRY_AGAIN( + uct_ep_am_bcopy(sender_ep(), 0, mapped_buffer::pack, + lbuf, 0), len); + if (len == (ssize_t)lbuf->length()) { + UCS_TEST_SKIP_R("Can't add to pending"); + } + + for (size_t i = 0; i < num_reqs; ++i) { + p_reqs[i].func = NULL; + EXPECT_UCS_OK(uct_ep_pending_add(sender().ep(0), &p_reqs[i], 0)); + } + uct_ep_pending_purge(sender().ep(0), purge_cb, NULL); + + EXPECT_STAT(sender, uct_ep, UCT_EP_STAT_PENDING, num_reqs); +} + +UCT_INSTANTIATE_TEST_CASE(test_uct_stats); +#endif diff --git a/test/gtest/uct/test_tag.cc b/test/gtest/uct/test_tag.cc new file mode 100644 index 0000000..8269538 --- /dev/null +++ b/test/gtest/uct/test_tag.cc @@ -0,0 +1,1312 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +extern "C" { +#include +} +#include +#include "uct_test.h" + +#define UCT_TAG_INSTANTIATE_TEST_CASE(_test_case) \ + _UCT_INSTANTIATE_TEST_CASE(_test_case, rc_mlx5) \ + _UCT_INSTANTIATE_TEST_CASE(_test_case, dc_mlx5) + +class test_tag : public uct_test { +public: + static const uint64_t SEND_SEED = 0xa1a1a1a1a1a1a1a1ul; + static const uint64_t RECV_SEED = 0xb2b2b2b2b2b2b2b2ul; + static const uint64_t MASK = 0xfffffffffffffffful; + + struct rndv_hdr { + uint64_t priv[2]; + uint16_t tail; + } UCS_S_PACKED; + + struct recv_ctx { + mapped_buffer *mbuf; + uct_tag_t tag; + uct_tag_t tmask; + bool take_uct_desc; + bool comp; + bool unexp; + bool consumed; + bool sw_rndv; + uct_tag_context_t uct_ctx; + ucs_status_t status; + }; + + struct send_ctx { + mapped_buffer *mbuf; + void *rndv_op; + uct_tag_t tag; + uint64_t imm_data; + uct_completion_t uct_comp; + ucs_status_t status; + bool sw_rndv; + bool comp; + bool unexp; + }; + + typedef ucs_status_t (test_tag::*send_func)(entity&, send_ctx&); + + void init() + { + ucs_status_t status = uct_config_modify(m_iface_config, + "RC_TM_ENABLE", "y"); + ASSERT_TRUE((status == UCS_OK) || (status == UCS_ERR_NO_ELEM)); + + status = uct_config_modify(m_iface_config, "RC_TM_MP_NUM_STRIDES", "1"); + ASSERT_TRUE((status == UCS_OK) || (status == UCS_ERR_NO_ELEM)); + + uct_test::init(); + + uct_iface_params params; + params.field_mask = UCT_IFACE_PARAM_FIELD_RX_HEADROOM | + UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_CB | + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_ARG | + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_CB | + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_ARG; + + // tl and dev names are taken from resources via GetParam, no need + // to fill it here + params.rx_headroom = 0; + params.open_mode = UCT_IFACE_OPEN_MODE_DEVICE; + params.eager_cb = unexp_eager; + params.eager_arg = reinterpret_cast(this); + params.rndv_cb = unexp_rndv; + params.rndv_arg = reinterpret_cast(this); + + entity *sender = uct_test::create_entity(params); + m_entities.push_back(sender); + + check_skip_test(); + + if (UCT_DEVICE_TYPE_SELF == GetParam()->dev_type) { + sender->connect(0, *sender, 0); + } else { + entity *receiver = uct_test::create_entity(params); + m_entities.push_back(receiver); + + sender->connect(0, *receiver, 0); + } + } + + void init_send_ctx(send_ctx &s,mapped_buffer *b, uct_tag_t t, uint64_t i, + bool unexp_flow = true) + { + s.mbuf = b; + s.rndv_op = NULL; + s.tag = t; + s.imm_data = i; + s.uct_comp.count = 1; + s.uct_comp.func = send_completion; + s.sw_rndv = s.comp = false; + s.unexp = unexp_flow; + s.status = UCS_ERR_NO_PROGRESS; + } + + void init_recv_ctx(recv_ctx &r, mapped_buffer *b, uct_tag_t t, + uct_tag_t m = MASK, bool uct_d = false) + { + r.mbuf = b; + r.tag = t; + r.tmask = m; + r.uct_ctx.completed_cb = completed; + r.uct_ctx.tag_consumed_cb = tag_consumed; + r.uct_ctx.rndv_cb = sw_rndv_completed; + r.take_uct_desc = uct_d; + r.status = UCS_ERR_NO_PROGRESS; + r.comp = r.unexp = r.consumed = r.sw_rndv = false; + } + + ucs_status_t tag_eager_short(entity &e, send_ctx &ctx) + { + ctx.status = uct_ep_tag_eager_short(e.ep(0), ctx.tag, ctx.mbuf->ptr(), + ctx.mbuf->length()); + ctx.comp = true; + + return ctx.status; + } + + ucs_status_t tag_eager_bcopy(entity &e, send_ctx &ctx) + { + ssize_t status = uct_ep_tag_eager_bcopy(e.ep(0), ctx.tag, + ctx.imm_data, mapped_buffer::pack, + reinterpret_cast(ctx.mbuf), + 0); + ctx.status = (status >= 0) ? UCS_OK : static_cast(status); + ctx.comp = true; + + return ctx.status; + } + + ucs_status_t tag_eager_zcopy(entity &e, send_ctx &ctx) + { + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, ctx.mbuf->ptr(), + ctx.mbuf->length(), ctx.mbuf->memh(), + e.iface_attr().cap.tag.eager.max_iov); + + ucs_status_t status = uct_ep_tag_eager_zcopy(e.ep(0), ctx.tag, + ctx.imm_data, iov, iovcnt, + 0, &ctx.uct_comp); + if (status == UCS_INPROGRESS) { + status = UCS_OK; + } + return status; + } + + ucs_status_t tag_rndv_zcopy(entity &e, send_ctx &ctx) + { + rndv_hdr hdr = {{ctx.imm_data, + reinterpret_cast(&ctx) + }, + 0xFAFA + }; + + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, ctx.mbuf->ptr(), + ctx.mbuf->length(), ctx.mbuf->memh(), 1); + + ctx.rndv_op = uct_ep_tag_rndv_zcopy(e.ep(0), ctx.tag, &hdr, + sizeof(hdr), iov, iovcnt, 0, + &ctx.uct_comp); + + return (UCS_PTR_IS_ERR(ctx.rndv_op)) ? UCS_PTR_STATUS(ctx.rndv_op) : + UCS_OK; + } + + ucs_status_t tag_rndv_cancel(entity &e, void *op) + { + return uct_ep_tag_rndv_cancel(e.ep(0), op); + } + + ucs_status_t tag_rndv_request(entity &e, send_ctx &ctx) + { + ctx.sw_rndv = true; + + if (ctx.unexp) { + // Unexpected flow, will need to analyze ctx data on the receiver + rndv_hdr hdr = {{ctx.imm_data, + reinterpret_cast(&ctx) + }, + 0xFAFA + }; + ctx.status = uct_ep_tag_rndv_request(e.ep(0), ctx.tag, &hdr, + sizeof(hdr), 0); + } else { + // Expected flow, send just plain data (will be stored in rx buf by HCA) + ctx.status = uct_ep_tag_rndv_request(e.ep(0), ctx.tag, ctx.mbuf->ptr(), + ctx.mbuf->length(), 0); + } + ctx.comp = true; + + return ctx.status; + } + + ucs_status_t tag_post(entity &e, recv_ctx &ctx) + { + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, ctx.mbuf->ptr(), + ctx.mbuf->length(), ctx.mbuf->memh(), 1); + return uct_iface_tag_recv_zcopy(e.iface(), ctx.tag, ctx.tmask, + iov, iovcnt, &ctx.uct_ctx); + } + + ucs_status_t tag_cancel(entity &e, recv_ctx &ctx, int force) + { + return uct_iface_tag_recv_cancel(e.iface(), &ctx.uct_ctx, force); + } + + + // If expected message arrives, two callbacks should be called: + // tag_consumed and completed (unexpected callback should not be + // called). And it is vice versa if message arrives unexpectedly. + // If expected SW RNDV request arrives tag_consumed and sw_rndv_cb + // should be called. + void check_rx_completion(recv_ctx &ctx, bool is_expected, uint64_t seed, + ucs_status_t status = UCS_OK, bool is_sw_rndv = false) + { + EXPECT_EQ(ctx.consumed, is_expected); + EXPECT_EQ(ctx.comp, (is_expected && !is_sw_rndv)); + EXPECT_EQ(ctx.unexp, (!is_expected && !is_sw_rndv)); + EXPECT_EQ(ctx.sw_rndv, is_sw_rndv); + EXPECT_EQ(ctx.status, status); + if (is_expected) { + ctx.mbuf->pattern_check(seed); + } + } + + void check_tx_completion(send_ctx &ctx) + { + wait_for_flag(&ctx.comp); + EXPECT_TRUE(ctx.comp); + EXPECT_EQ(ctx.status, UCS_OK); + } + + void test_tag_expected(send_func sfunc, size_t length = 75, + bool is_sw_rndv = false) { + uct_tag_t tag = 11; + + if (RUNNING_ON_VALGRIND) { + length = ucs_min(length, 128U); + } + + mapped_buffer recvbuf(length, RECV_SEED, receiver()); + recv_ctx r_ctx; + init_recv_ctx(r_ctx, &recvbuf, tag); + ASSERT_UCS_OK(tag_post(receiver(), r_ctx)); + + short_progress_loop(); + + mapped_buffer sendbuf(length, SEND_SEED, sender()); + send_ctx s_ctx; + init_send_ctx(s_ctx, &sendbuf, tag, reinterpret_cast(&r_ctx), + false); + ASSERT_UCS_OK((this->*sfunc)(sender(), s_ctx)); + + // max rndv can be quite big, use increased timeout + wait_for_flag(is_sw_rndv ? &r_ctx.sw_rndv : &r_ctx.comp, + 3 * DEFAULT_TIMEOUT_SEC); + + check_rx_completion(r_ctx, true, SEND_SEED, UCS_OK, is_sw_rndv); + + // If it was RNDV send, need to wait send completion as well + check_tx_completion(s_ctx); + + flush(); + } + + void test_tag_unexpected(send_func sfunc, size_t length = 75, + bool take_uct_desc = false) + { + uct_tag_t tag = 11; + + if (RUNNING_ON_VALGRIND) { + length = ucs_min(length, 128U); + } + + mapped_buffer recvbuf(length, RECV_SEED, receiver()); + mapped_buffer sendbuf(length, SEND_SEED, sender()); + recv_ctx r_ctx; + init_recv_ctx(r_ctx, &recvbuf, tag, MASK, take_uct_desc); + send_ctx s_ctx; + init_send_ctx(s_ctx, &sendbuf, tag, reinterpret_cast(&r_ctx)); + ASSERT_UCS_OK((this->*sfunc)(sender(), s_ctx)); + + wait_for_flag(&r_ctx.unexp); + if (static_cast(&test_tag::tag_rndv_zcopy) == sfunc) { + // Need to cancel origin RNDV operation, beacuse no RNDV_COMP + // will be received (as it arrived unexpectedly and should be + // handled by SW). + ASSERT_UCS_OK(tag_rndv_cancel(sender(), s_ctx.rndv_op)); + } + + check_rx_completion(r_ctx, false, SEND_SEED); + flush(); + } + + void test_tag_wrong_tag(send_func sfunc) + { + const size_t length = 65; + uct_tag_t tag = 11; + + mapped_buffer sendbuf(length, SEND_SEED, sender()); + mapped_buffer recvbuf(length, RECV_SEED, receiver()); + + // Post modified tag for incoming message to be reported as unexpected + // and not to be macthed. + recv_ctx r_ctx; + init_recv_ctx(r_ctx, &recvbuf, tag + 1); + send_ctx s_ctx; + init_send_ctx(s_ctx, &sendbuf, tag, reinterpret_cast(&r_ctx)); + + ASSERT_UCS_OK((this->*sfunc)(sender(), s_ctx)); + + short_progress_loop(); + + ASSERT_UCS_OK(tag_post(receiver(), r_ctx)); + + wait_for_flag(&r_ctx.unexp); + + // Message should be reported as unexpected and filled with + // recv seed (unchanged), as the incoming tag does not match the expected + check_rx_completion(r_ctx, false, RECV_SEED); + ASSERT_UCS_OK(tag_cancel(receiver(), r_ctx, 1)); + flush(); + } + + void test_tag_mask(send_func sfunc) + { + const size_t length = 65; + + mapped_buffer recvbuf(length, RECV_SEED, receiver()); + + // Post tag and tag mask in a way that it matches sender tag with + // tag_mask applied, but is not exactly the same. + recv_ctx r_ctx; + init_recv_ctx(r_ctx, &recvbuf, 0xff, 0xff); + ASSERT_UCS_OK(tag_post(receiver(), r_ctx)); + + short_progress_loop(); + + mapped_buffer sendbuf(length, SEND_SEED, sender()); + send_ctx s_ctx; + init_send_ctx(s_ctx, &sendbuf, 0xffff, reinterpret_cast(&r_ctx)); + ASSERT_UCS_OK((this->*sfunc)(sender(), s_ctx)); + wait_for_flag(&r_ctx.comp); + + // Should be matched because tags are equal with tag mask applied. + check_rx_completion(r_ctx, true, SEND_SEED); + + // If it was RNDV send, need to wait send completion as well + check_tx_completion(s_ctx); + flush(); + } + + ucs_status_t unexpected_handler(recv_ctx *ctx, void *data, unsigned flags) + { + if (ctx->take_uct_desc && (flags & UCT_CB_PARAM_FLAG_DESC)) { + m_uct_descs.push_back(data); + return UCS_INPROGRESS; + } else { + return UCS_OK; + } + } + + static void tag_consumed(uct_tag_context_t *self) + { + recv_ctx *user_ctx = ucs_container_of(self, recv_ctx, uct_ctx); + user_ctx->consumed = true; + } + + static void completed(uct_tag_context_t *self, uct_tag_t stag, uint64_t imm, + size_t length, ucs_status_t status) + { + recv_ctx *user_ctx = ucs_container_of(self, recv_ctx, uct_ctx); + user_ctx->comp = true; + user_ctx->status = status; + EXPECT_EQ(user_ctx->tag, (stag & user_ctx->tmask)); + EXPECT_EQ(user_ctx->mbuf->length(), length); + } + + static void sw_rndv_completed(uct_tag_context_t *self, uct_tag_t stag, + const void *header, unsigned header_length, + ucs_status_t status) + { + recv_ctx *user_ctx = ucs_container_of(self, recv_ctx, uct_ctx); + user_ctx->sw_rndv = true; + user_ctx->status = status; + EXPECT_EQ(user_ctx->tag, (stag & user_ctx->tmask)); + EXPECT_EQ(user_ctx->mbuf->length(), header_length); + } + + static ucs_status_t unexp_eager(void *arg, void *data, size_t length, + unsigned flags, uct_tag_t stag, + uint64_t imm, void **context) + { + recv_ctx *user_ctx = reinterpret_cast(imm); + user_ctx->unexp = true; + user_ctx->status = UCS_OK; + if (user_ctx->tag == stag) { + memcpy(user_ctx->mbuf->ptr(), data, ucs_min(length, + user_ctx->mbuf->length())); + user_ctx->mbuf->pattern_check(SEND_SEED); + } + + test_tag *self = reinterpret_cast(arg); + return self->unexpected_handler(user_ctx, data, flags); + } + + static ucs_status_t unexp_rndv(void *arg, unsigned flags, uint64_t stag, + const void *header, unsigned header_length, + uint64_t remote_addr, size_t length, + const void *rkey_buf) + { + rndv_hdr *rhdr = const_cast(static_cast(header)); + recv_ctx *r_ctx = reinterpret_cast(rhdr->priv[0]); + send_ctx *s_ctx = reinterpret_cast(rhdr->priv[1]); + uint16_t tail = rhdr->tail; + r_ctx->unexp = true; + r_ctx->status = UCS_OK; + + EXPECT_EQ(tail, 0xFAFA); + EXPECT_EQ(s_ctx->tag, stag); + EXPECT_EQ(length, s_ctx->sw_rndv ? 0 : s_ctx->mbuf->length()); + EXPECT_EQ(remote_addr, s_ctx->sw_rndv ? 0ul : + reinterpret_cast(s_ctx->mbuf->ptr())); + + test_tag *self = reinterpret_cast(arg); + return self->unexpected_handler(r_ctx, const_cast(header), flags); + } + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags) + { + is_am_received = true; + return UCS_OK; + } + + static ucs_log_func_rc_t + log_ep_destroy(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) + { + if (level == UCS_LOG_LEVEL_WARN) { + // Ignore warnings about uncompleted operations during ep destroy + return UCS_LOG_FUNC_RC_STOP; + } + return UCS_LOG_FUNC_RC_CONTINUE; + } + + static void send_completion(uct_completion_t *self, ucs_status_t status) + { + send_ctx *user_ctx = ucs_container_of(self, send_ctx, uct_comp); + user_ctx->comp = true; + user_ctx->status = status; + } + + +protected: + uct_test::entity& sender() { + return **m_entities.begin(); + } + + uct_test::entity& receiver() { + return **(m_entities.end() - 1); + } + + std::vector m_uct_descs; + + static bool is_am_received; +}; + +bool test_tag::is_am_received = false; + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_short_expected, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_SHORT)) +{ + test_tag_expected(static_cast(&test_tag::tag_eager_short), + sender().iface_attr().cap.tag.eager.max_short); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_bcopy_expected, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + test_tag_expected(static_cast(&test_tag::tag_eager_bcopy), + sender().iface_attr().cap.tag.eager.max_bcopy); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_zcopy_expected, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)) +{ + test_tag_expected(static_cast(&test_tag::tag_eager_zcopy), + sender().iface_attr().cap.tag.eager.max_zcopy); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_rndv_zcopy_expected, + !check_caps(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)) +{ + test_tag_expected(static_cast(&test_tag::tag_rndv_zcopy), + sender().iface_attr().cap.tag.rndv.max_zcopy); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_bcopy_unexpected, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + test_tag_unexpected(static_cast(&test_tag::tag_eager_bcopy), + sender().iface_attr().cap.tag.eager.max_bcopy); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_zcopy_unexpected, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)) +{ + test_tag_unexpected(static_cast(&test_tag::tag_eager_zcopy), + sender().iface_attr().cap.tag.eager.max_bcopy); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_rndv_zcopy_unexpected, + !check_caps(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)) +{ + test_tag_unexpected(static_cast(&test_tag::tag_rndv_zcopy)); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_bcopy_wrong_tag, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + test_tag_wrong_tag(static_cast(&test_tag::tag_eager_bcopy)); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_zcopy_wrong_tag, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)) +{ + test_tag_wrong_tag(static_cast(&test_tag::tag_eager_zcopy)); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_short_tag_mask, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_SHORT)) +{ + test_tag_mask(static_cast(&test_tag::tag_eager_short)); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_bcopy_tag_mask, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + test_tag_mask(static_cast(&test_tag::tag_eager_bcopy)); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_eager_zcopy_tag_mask, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)) +{ + test_tag_mask(static_cast(&test_tag::tag_eager_zcopy)); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_rndv_zcopy_tag_mask, + !check_caps(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)) +{ + test_tag_mask(static_cast(&test_tag::tag_rndv_zcopy)); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_hold_uct_desc, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY | + UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)) +{ + int n = 10; + int msg_size = ucs_min(sender().iface_attr().cap.tag.eager.max_bcopy, + sender().iface_attr().cap.tag.rndv.max_zcopy); + for (int i = 0; i < n; ++i) { + test_tag_unexpected(static_cast(&test_tag::tag_eager_bcopy), + msg_size, true); + + test_tag_unexpected(static_cast(&test_tag::tag_rndv_zcopy), + msg_size, true); + } + + for (ucs::ptr_vector::const_iterator iter = m_uct_descs.begin(); + iter != m_uct_descs.end(); ++iter) + { + uct_iface_release_desc(*iter); + } +} + + +UCS_TEST_SKIP_COND_P(test_tag, tag_send_no_tag, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + uct_iface_set_am_handler(receiver().iface(), 0, am_handler, NULL, 0); + mapped_buffer lbuf(200, SEND_SEED, sender()); + ssize_t len = uct_ep_am_bcopy(sender().ep(0), 0, mapped_buffer::pack, + reinterpret_cast(&lbuf), 0); + EXPECT_EQ(lbuf.length(), static_cast(len)); + wait_for_flag(&is_am_received); + EXPECT_TRUE(is_am_received); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_cancel_force, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + const size_t length = 128; + mapped_buffer recvbuf(length, RECV_SEED, receiver()); + recv_ctx r_ctx; + init_recv_ctx(r_ctx, &recvbuf, 1); + + ASSERT_UCS_OK(tag_post(receiver(), r_ctx)); + short_progress_loop(200); + ASSERT_UCS_OK(tag_cancel(receiver(), r_ctx, 1)); + + short_progress_loop(); + + mapped_buffer sendbuf(length, SEND_SEED, sender()); + send_ctx s_ctx; + init_send_ctx(s_ctx, &sendbuf, 1, reinterpret_cast(&r_ctx)); + ASSERT_UCS_OK(tag_eager_bcopy(sender(), s_ctx)); + + // Message should arrive unexpected, since tag was cancelled + // on the receiver. + wait_for_flag(&r_ctx.unexp); + check_rx_completion(r_ctx, false, SEND_SEED); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_cancel_noforce, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + const size_t length = 128; + mapped_buffer recvbuf(length, RECV_SEED, receiver()); + recv_ctx r_ctx; + init_recv_ctx(r_ctx, &recvbuf, 1); + + ASSERT_UCS_OK(tag_post(receiver(), r_ctx)); + short_progress_loop(200); + ASSERT_UCS_OK(tag_cancel(receiver(), r_ctx, 0)); + + wait_for_flag(&r_ctx.comp); + + // Check that completed callback has been called with CANCELED status + // (because 0 was passed as force parameter to cancel). + EXPECT_TRUE(r_ctx.comp); + EXPECT_EQ(r_ctx.status, UCS_ERR_CANCELED); +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_limit, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + const size_t length = 32; + ucs::ptr_vector rctxs; + ucs::ptr_vector rbufs; + ucs_status_t status; + + do { + recv_ctx *rctx_p = new recv_ctx(); + mapped_buffer *buf_p = new mapped_buffer(length, RECV_SEED, receiver()); + init_recv_ctx(*rctx_p, buf_p, 1); + rctxs.push_back(rctx_p); + rbufs.push_back(buf_p); + status = tag_post(receiver(), *rctx_p); + // Make sure send resources are acknowledged, as we + // awaiting for tag space exhaustion. + short_progress_loop(); + } while (status == UCS_OK); + + EXPECT_EQ(status, UCS_ERR_EXCEEDS_LIMIT); + + // Cancel one of the postings + ASSERT_UCS_OK(tag_cancel(receiver(), rctxs.at(0), 1)); + short_progress_loop(); + + // Check we can post again within a reasonable time + ucs_time_t deadline = ucs_get_time() + ucs_time_from_sec(20.0); + do { + status = tag_post(receiver(), rctxs.at(0)); + } while ((ucs_get_time() < deadline) && (status == UCS_ERR_EXCEEDS_LIMIT)); + ASSERT_UCS_OK(status); + + // remove posted tags from HW + for (ucs::ptr_vector::const_iterator iter = rctxs.begin(); + iter != rctxs.end() - 1; ++iter) { + ASSERT_UCS_OK(tag_cancel(receiver(), **iter, 1)); + } +} + +UCS_TEST_SKIP_COND_P(test_tag, tag_post_same, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + const size_t length = 128; + mapped_buffer recvbuf(length, RECV_SEED, receiver()); + recv_ctx r_ctx; + init_recv_ctx(r_ctx, &recvbuf, 1); + + ASSERT_UCS_OK(tag_post(receiver(), r_ctx)); + + // Can't post the same buffer until it is completed/cancelled + ucs_status_t status = tag_post(receiver(), r_ctx); + EXPECT_EQ(status, UCS_ERR_ALREADY_EXISTS); + + // Cancel with force, should be able to re-post immediately + ASSERT_UCS_OK(tag_cancel(receiver(), r_ctx, 1)); + ASSERT_UCS_OK(tag_post(receiver(), r_ctx)); + + // Cancel without force, should be able to re-post when receive completion + ASSERT_UCS_OK(tag_cancel(receiver(), r_ctx, 0)); + status = tag_post(receiver(), r_ctx); + EXPECT_EQ(status, UCS_ERR_ALREADY_EXISTS); // no completion yet + + wait_for_flag(&r_ctx.comp); // cancel completed, should be able to post + ASSERT_UCS_OK(tag_post(receiver(), r_ctx)); + + // Now send something to trigger rx completion + init_recv_ctx(r_ctx, &recvbuf, 1); // reinit rx to clear completed states + mapped_buffer sendbuf(length, SEND_SEED, sender()); + send_ctx s_ctx; + init_send_ctx(s_ctx, &sendbuf, 1, reinterpret_cast(&r_ctx)); + ASSERT_UCS_OK(tag_eager_bcopy(sender(), s_ctx)); + + wait_for_flag(&r_ctx.comp); // message consumed, should be able to post + ASSERT_UCS_OK(tag_post(receiver(), r_ctx)); + + ASSERT_UCS_OK(tag_cancel(receiver(), r_ctx, 1)); +} + +UCS_TEST_SKIP_COND_P(test_tag, sw_rndv_expected, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY | + UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)) +{ + test_tag_expected(static_cast(&test_tag::tag_rndv_request), + sender().iface_attr().cap.tag.rndv.max_hdr, true); +} + +UCS_TEST_SKIP_COND_P(test_tag, rndv_limit, + !check_caps(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)) +{ + mapped_buffer sendbuf(8, SEND_SEED, sender()); + ucs::ptr_vector sctxs; + ucs_status_t status; + send_ctx *sctx_p; + void *op; + + do { + sctx_p = new send_ctx; + init_send_ctx(*sctx_p, &sendbuf, 0xffff, 0); + status = tag_rndv_zcopy(sender(), *sctx_p); + sctxs.push_back(sctx_p); + } while (status == UCS_OK); + + EXPECT_EQ(status, UCS_ERR_NO_RESOURCE); + + for (ucs::ptr_vector::const_iterator iter = sctxs.begin(); + iter != sctxs.end(); ++iter) + { + op = (*iter)->rndv_op; + if (!UCS_PTR_IS_ERR(op)) { + tag_rndv_cancel(sender(), op); + } + } + + ucs_log_push_handler(log_ep_destroy); + sender().destroy_eps(); + ucs_log_pop_handler(); +} + +UCS_TEST_SKIP_COND_P(test_tag, sw_rndv_unexpected, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY | + UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)) +{ + test_tag_unexpected(static_cast(&test_tag::tag_rndv_request)); +} + +UCT_TAG_INSTANTIATE_TEST_CASE(test_tag) + + +#if ENABLE_STATS && IBV_HW_TM +extern "C" { +#include +#include +#include +} + +class test_tag_stats : public test_tag { +public: + void init() { + stats_activate(); + test_tag::init(); + } + + void cleanup() { + test_tag::cleanup(); + stats_restore(); + } + + ucs_stats_node_t *ep_stats(const entity &e) + { + return ucs_derived_of(e.ep(0), uct_base_ep_t)->stats; + } + + ucs_stats_node_t *iface_stats(const entity &e) + { + return ucs_derived_of(e.iface(), uct_rc_mlx5_iface_common_t)->tm.stats; + } + + void provoke_sync(const entity &e) + { + uct_rc_mlx5_iface_common_t *iface; + + iface = ucs_derived_of(e.iface(), uct_rc_mlx5_iface_common_t); + + // Counters are synced every IBV_DEVICE_MAX_UNEXP_COUNT ops, set + // it one op before, so that any following unexpected message would + // cause HW ans SW counters sync. + iface->tm.unexpected_cnt = IBV_DEVICE_MAX_UNEXP_COUNT - 1; + } + + void check_tx_counters(int op, uint64_t op_val, int type, size_t len) + { + uint64_t v; + + v = UCS_STATS_GET_COUNTER(ep_stats(sender()), op); + EXPECT_EQ(op_val, v); + + // With valgrind reduced messages is sent + if (!RUNNING_ON_VALGRIND) { + v = UCS_STATS_GET_COUNTER(ep_stats(sender()), type); + EXPECT_EQ(len, v); + } + } + + void check_rx_counter(int op, uint64_t val, entity &e) + { + EXPECT_EQ(val, UCS_STATS_GET_COUNTER(iface_stats(e), op)); + } +}; + +UCS_TEST_SKIP_COND_P(test_tag_stats, tag_expected_eager, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_SHORT | + UCT_IFACE_FLAG_TAG_EAGER_BCOPY | + UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)) +{ + std::pair > sfuncs[3] = { + std::make_pair(static_cast(&test_tag::tag_eager_short), + std::make_pair(sender().iface_attr().cap.tag.eager.max_short, + static_cast(UCT_EP_STAT_BYTES_SHORT))), + + std::make_pair(static_cast(&test_tag::tag_eager_bcopy), + std::make_pair(sender().iface_attr().cap.tag.eager.max_bcopy, + static_cast(UCT_EP_STAT_BYTES_BCOPY))), + + std::make_pair(static_cast(&test_tag::tag_eager_zcopy), + std::make_pair(sender().iface_attr().cap.tag.eager.max_zcopy, + static_cast(UCT_EP_STAT_BYTES_ZCOPY))) + }; + + for (int i = 0; i < 3; ++i) { + test_tag_expected(sfuncs[i].first, sfuncs[i].second.first); + check_tx_counters(UCT_EP_STAT_TAG, i + 1, + sfuncs[i].second.second, + sfuncs[i].second.first); + check_rx_counter(UCT_RC_MLX5_STAT_TAG_RX_EXP, i + 1, receiver()); + } +} + +UCS_TEST_SKIP_COND_P(test_tag_stats, tag_unexpected_eager, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY | + UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)) +{ + std::pair > sfuncs[2] = { + std::make_pair(static_cast(&test_tag::tag_eager_bcopy), + std::make_pair(sender().iface_attr().cap.tag.eager.max_bcopy, + static_cast(UCT_EP_STAT_BYTES_BCOPY))), + + std::make_pair(static_cast(&test_tag::tag_eager_zcopy), + std::make_pair(sender().iface_attr().cap.tag.eager.max_zcopy, + static_cast(UCT_EP_STAT_BYTES_ZCOPY))) + }; + + for (int i = 0; i < 2; ++i) { + test_tag_unexpected(sfuncs[i].first, sfuncs[i].second.first); + check_tx_counters(UCT_EP_STAT_TAG, i + 1, + sfuncs[i].second.second, + sfuncs[i].second.first); + check_rx_counter(UCT_RC_MLX5_STAT_TAG_RX_EAGER_UNEXP, i + 1, receiver()); + } +} + +UCS_TEST_SKIP_COND_P(test_tag_stats, tag_list_ops, + !check_caps(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + mapped_buffer recvbuf(32, RECV_SEED, receiver()); + recv_ctx rctx; + + init_recv_ctx(rctx, &recvbuf, 1); + + ASSERT_UCS_OK(tag_post(receiver(), rctx)); + check_rx_counter(UCT_RC_MLX5_STAT_TAG_LIST_ADD, 1ul, receiver()); + + ASSERT_UCS_OK(tag_cancel(receiver(), rctx, 1)); + check_rx_counter(UCT_RC_MLX5_STAT_TAG_LIST_DEL, 1ul, receiver()); + + // Every ADD and DEL is paired with SYNC, but stats counter is increased + // when separate SYNC op is issued only. So, we expect it to be 0 after + // ADD and DEL operations. + check_rx_counter(UCT_RC_MLX5_STAT_TAG_LIST_SYNC, 0ul, receiver()); + + // Provoke real SYNC op and send a message unexpectedly + provoke_sync(receiver()); + test_tag_unexpected(static_cast(&test_tag::tag_eager_bcopy)); + check_rx_counter(UCT_RC_MLX5_STAT_TAG_LIST_SYNC, 1ul, receiver()); +} + + +UCS_TEST_SKIP_COND_P(test_tag_stats, tag_rndv, + !check_caps(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY | + UCT_IFACE_FLAG_TAG_EAGER_BCOPY)) +{ + size_t len = sender().iface_attr().cap.tag.rndv.max_zcopy / 8; + + // Check UNEXP_RNDV on the receiver + test_tag_unexpected(static_cast(&test_tag::tag_rndv_zcopy), len); + check_rx_counter(UCT_RC_MLX5_STAT_TAG_RX_RNDV_UNEXP, 1ul, receiver()); + + // Check that sender receives RNDV_FIN in case of expected rndv message + test_tag_expected(static_cast(&test_tag::tag_rndv_zcopy), len); + check_rx_counter(UCT_RC_MLX5_STAT_TAG_RX_RNDV_FIN, 1ul, sender()); + + + // Check UNEXP_RNDV_REQ on the receiver + test_tag_unexpected(static_cast(&test_tag::tag_rndv_request)); + check_rx_counter(UCT_RC_MLX5_STAT_TAG_RX_RNDV_REQ_UNEXP, 1ul, receiver()); + + // Check NEXP_RNDV_REQ on the receiver + test_tag_expected(static_cast(&test_tag::tag_rndv_request), + sender().iface_attr().cap.tag.rndv.max_hdr, true); + check_rx_counter(UCT_RC_MLX5_STAT_TAG_RX_RNDV_REQ_EXP, 1ul, receiver()); +} + +UCT_TAG_INSTANTIATE_TEST_CASE(test_tag_stats) + +#endif + + +#if IBV_HW_TM + +extern "C" { +#include +} + +// TODO: Unite with test_tag + add GRH testing for DC +class test_tag_mp_xrq : public uct_test { +public: + static const uint64_t SEND_SEED = 0xa1a1a1a1a1a1a1a1ul; + static const uint64_t AM_ID = 1; + typedef void (test_tag_mp_xrq::*send_func)(mapped_buffer*); + + virtual void init(); + test_tag_mp_xrq(); + uct_rc_mlx5_iface_common_t* rc_mlx5_iface(entity &e); + void send_eager_bcopy(mapped_buffer *buf); + void send_eager_zcopy(mapped_buffer *buf); + void send_rndv_zcopy(mapped_buffer *buf); + void send_rndv_request(mapped_buffer *buf); + void send_am_bcopy(mapped_buffer *buf); + void test_common(send_func sfunc, size_t num_segs, size_t exp_segs = 1, + bool is_eager = true); + + static ucs_status_t am_handler(void *arg, void *data, size_t length, + unsigned flags); + + static ucs_status_t unexp_eager(void *arg, void *data, size_t length, + unsigned flags, uct_tag_t stag, + uint64_t imm, void **context); + + static ucs_status_t unexp_rndv(void *arg, unsigned flags, uint64_t stag, + const void *header, unsigned header_length, + uint64_t remote_addr, size_t length, + const void *rkey_buf); + +protected: + static size_t m_rx_counter; + std::vector m_uct_descs; + bool m_hold_uct_desc; + + uct_test::entity& sender() { + return **m_entities.begin(); + } + + uct_test::entity& receiver() { + return **(m_entities.end() - 1); + } + +private: + ucs_status_t unexp_handler(void *data, unsigned flags, uint64_t imm, + void **context); + ucs_status_t handle_uct_desc(void *data, unsigned flags); + void set_env_var_or_skip(void *config, const char *var, const char *val); + size_t m_max_hdr; + bool m_first_received; + bool m_last_received; + uct_completion_t m_uct_comp; +}; + +size_t test_tag_mp_xrq::m_rx_counter = 0; + +test_tag_mp_xrq::test_tag_mp_xrq() : m_hold_uct_desc(false), + m_first_received(false), + m_last_received(false) +{ + m_max_hdr = sizeof(ibv_tmh) + sizeof(ibv_rvh); + m_uct_comp.count = 512; // We do not need completion func to be invoked + m_uct_comp.func = NULL; +} + +uct_rc_mlx5_iface_common_t* test_tag_mp_xrq::rc_mlx5_iface(entity &e) +{ + return ucs_derived_of(e.iface(), uct_rc_mlx5_iface_common_t); +} + +void test_tag_mp_xrq::set_env_var_or_skip(void *config, const char *var, + const char *val) +{ + ucs_status_t status = uct_config_modify(config, var, val); + if (status != UCS_OK) { + ucs_warn("%s", ucs_status_string(status)); + UCS_TEST_SKIP_R(std::string("Can't set ") + var); + } +} + +void test_tag_mp_xrq::init() +{ + set_env_var_or_skip(m_iface_config, "RC_TM_NUM_STRIDES", "8"); + set_env_var_or_skip(m_iface_config, "RC_TM_ENABLE", "y"); + set_env_var_or_skip(m_md_config, "MLX5_DEVX_OBJECTS", "dct,dcsrq,rcsrq,rcqp"); + + uct_test::init(); + + uct_iface_params params; + params.field_mask = UCT_IFACE_PARAM_FIELD_RX_HEADROOM | + UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_CB | + UCT_IFACE_PARAM_FIELD_HW_TM_EAGER_ARG | + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_CB | + UCT_IFACE_PARAM_FIELD_HW_TM_RNDV_ARG; + + // tl and dev names are taken from resources via GetParam, no need + // to fill it here + params.rx_headroom = 0; + params.open_mode = UCT_IFACE_OPEN_MODE_DEVICE; + params.eager_cb = unexp_eager; + params.eager_arg = reinterpret_cast(this); + params.rndv_cb = unexp_rndv; + params.rndv_arg = reinterpret_cast(this); + + entity *sender = uct_test::create_entity(params); + m_entities.push_back(sender); + + entity *receiver = uct_test::create_entity(params); + m_entities.push_back(receiver); + + if (!UCT_RC_MLX5_MP_ENABLED(rc_mlx5_iface(test_tag_mp_xrq::sender()))) { + UCS_TEST_SKIP_R("No MP XRQ support"); + } + + sender->connect(0, *receiver, 0); + + uct_iface_set_am_handler(receiver->iface(), AM_ID, am_handler, this, 0); +} + +void test_tag_mp_xrq::send_eager_bcopy(mapped_buffer *buf) +{ + ssize_t len = uct_ep_tag_eager_bcopy(sender().ep(0), 0x11, + reinterpret_cast(this), + mapped_buffer::pack, + reinterpret_cast(buf), 0); + + EXPECT_EQ(buf->length(), static_cast(len)); +} + +void test_tag_mp_xrq::send_eager_zcopy(mapped_buffer *buf) +{ + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, buf->ptr(), buf->length(), buf->memh(), + sender().iface_attr().cap.tag.eager.max_iov); + + ucs_status_t status = uct_ep_tag_eager_zcopy(sender().ep(0), 0x11, + reinterpret_cast(this), + iov, iovcnt, 0, &m_uct_comp); + ASSERT_UCS_OK_OR_INPROGRESS(status); +} + +void test_tag_mp_xrq::send_rndv_zcopy(mapped_buffer *buf) +{ + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, buf->ptr(), buf->length(), buf->memh(), + sender().iface_attr().cap.tag.rndv.max_iov); + + uint64_t dummy_hdr = 0xFAFA; + ucs_status_ptr_t rndv_op = uct_ep_tag_rndv_zcopy(sender().ep(0), 0x11, &dummy_hdr, + sizeof(dummy_hdr), iov, + iovcnt, 0, &m_uct_comp); + ASSERT_FALSE(UCS_PTR_IS_ERR(rndv_op)); + + // There will be no real RNDV performed, cancel the op to avoid mpool + // warning on exit + ASSERT_UCS_OK(uct_ep_tag_rndv_cancel(sender().ep(0),rndv_op)); +} + +void test_tag_mp_xrq::send_rndv_request(mapped_buffer *buf) +{ + size_t size = sender().iface_attr().cap.tag.rndv.max_hdr; + void *hdr = alloca(size); + + ASSERT_UCS_OK(uct_ep_tag_rndv_request(sender().ep(0), 0x11, hdr, size, 0)); +} + +void test_tag_mp_xrq::send_am_bcopy(mapped_buffer *buf) +{ + ssize_t len = uct_ep_am_bcopy(sender().ep(0), AM_ID, mapped_buffer::pack, + reinterpret_cast(buf), 0); + + EXPECT_EQ(buf->length(), static_cast(len)); +} + +void test_tag_mp_xrq::test_common(send_func sfunc, size_t num_segs, + size_t exp_segs, bool is_eager) +{ + size_t seg_size = rc_mlx5_iface(sender())->super.super.config.seg_size; + size_t seg_num = is_eager ? num_segs : 1; + size_t exp_val = is_eager ? exp_segs : 1; + size_t size = (seg_size * seg_num) - m_max_hdr; + m_rx_counter = 0; + m_first_received = m_last_received = false; + + EXPECT_TRUE(size <= sender().iface_attr().cap.tag.eager.max_bcopy); + mapped_buffer buf(size, SEND_SEED, sender()); + + (this->*sfunc)(&buf); + + wait_for_value(&m_rx_counter, exp_val, true); + EXPECT_EQ(exp_val, m_rx_counter); + EXPECT_EQ(is_eager, m_first_received); // relevant for eager only + EXPECT_EQ(is_eager, m_last_received); // relevant for eager only +} + +ucs_status_t test_tag_mp_xrq::handle_uct_desc(void *data, unsigned flags) +{ + if ((flags & UCT_CB_PARAM_FLAG_DESC) && m_hold_uct_desc) { + m_uct_descs.push_back(data); + return UCS_INPROGRESS; + } + + return UCS_OK; +} + +ucs_status_t test_tag_mp_xrq::am_handler(void *arg, void *data, size_t length, + unsigned flags) +{ + EXPECT_TRUE(flags & UCT_CB_PARAM_FLAG_FIRST); + EXPECT_FALSE(flags & UCT_CB_PARAM_FLAG_MORE); + + m_rx_counter++; + + test_tag_mp_xrq *self = reinterpret_cast(arg); + return self->handle_uct_desc(data, flags); +} + +ucs_status_t test_tag_mp_xrq::unexp_handler(void *data, unsigned flags, + uint64_t imm, void **context) +{ + void *self = reinterpret_cast(this); + + if (flags & UCT_CB_PARAM_FLAG_FIRST) { + // Set the message context which will be passed back with the rest of + // message fragments + *context = self; + m_first_received = true; + + } else { + // Check that the correct message context is passed with all fragments + EXPECT_EQ(self, *context); + } + + if (!(flags & UCT_CB_PARAM_FLAG_MORE)) { + // Last message should contain valid immediate value + EXPECT_EQ(reinterpret_cast(this), imm); + m_last_received = true; + } else { + // Immediate value is passed with the last message only + EXPECT_EQ(0ul, imm); + } + + + return handle_uct_desc(data, flags); +} + +ucs_status_t test_tag_mp_xrq::unexp_eager(void *arg, void *data, size_t length, + unsigned flags, uct_tag_t stag, + uint64_t imm, void **context) +{ + test_tag_mp_xrq *self = reinterpret_cast(arg); + + m_rx_counter++; + + return self->unexp_handler(data, flags, imm, context); +} + +ucs_status_t test_tag_mp_xrq::unexp_rndv(void *arg, unsigned flags, + uint64_t stag, const void *header, + unsigned header_length, + uint64_t remote_addr, size_t length, + const void *rkey_buf) +{ + EXPECT_FALSE(flags & UCT_CB_PARAM_FLAG_FIRST); + EXPECT_FALSE(flags & UCT_CB_PARAM_FLAG_MORE); + + m_rx_counter++; + + return UCS_OK; +} + +UCS_TEST_P(test_tag_mp_xrq, config) +{ + uct_rc_mlx5_iface_common_t *iface = rc_mlx5_iface(sender()); + + // MP XRQ is supported with tag offload only + EXPECT_TRUE(UCT_RC_MLX5_TM_ENABLED(iface)); + + // With MP XRQ segment size should be equal to MTU, because HW generates + // CQE per each received MTU + size_t mtu = uct_ib_mtu_value(uct_ib_iface_port_attr(&(iface)->super.super)->active_mtu); + EXPECT_EQ(mtu, iface->super.super.config.seg_size); + + const uct_iface_attr *attrs = &sender().iface_attr(); + + // Max tag bcopy is limited by tag tx memory pool + EXPECT_EQ(iface->tm.max_bcopy - sizeof(ibv_tmh), + attrs->cap.tag.eager.max_bcopy); + EXPECT_GT(attrs->cap.tag.eager.max_bcopy, + iface->super.super.config.seg_size); + + // Max tag zcopy is limited by maximal IB message size + EXPECT_EQ(uct_ib_iface_port_attr(&iface->super.super)->max_msg_sz - sizeof(ibv_tmh), + attrs->cap.tag.eager.max_zcopy); + + // Maximal AM size should not exceed segment size, so it would always + // arrive in one-fragment packet (with header it should be strictly less) + EXPECT_LT(attrs->cap.am.max_bcopy, iface->super.super.config.seg_size); + EXPECT_LT(attrs->cap.am.max_zcopy, iface->super.super.config.seg_size); +} + +UCS_TEST_P(test_tag_mp_xrq, desc_release) +{ + m_hold_uct_desc = true; // We want to "hold" UCT memory descriptors + std::pair sfuncs[5] = { + std::make_pair(&test_tag_mp_xrq::send_eager_bcopy, true), + std::make_pair(&test_tag_mp_xrq::send_eager_zcopy, true), + std::make_pair(&test_tag_mp_xrq::send_rndv_zcopy, false), + std::make_pair(&test_tag_mp_xrq::send_rndv_request, false), + std::make_pair(&test_tag_mp_xrq::send_am_bcopy, false) + }; + + for (int i = 0; i < 5; ++i) { + test_common(sfuncs[i].first, 3, 3, sfuncs[i].second); + } + + for (ucs::ptr_vector::const_iterator iter = m_uct_descs.begin(); + iter != m_uct_descs.end(); ++iter) + { + uct_iface_release_desc(*iter); + } +} + +UCS_TEST_P(test_tag_mp_xrq, am) +{ + test_common(&test_tag_mp_xrq::send_am_bcopy, 1, 1, false); +} + +UCS_TEST_P(test_tag_mp_xrq, bcopy_eager_only) +{ + test_common(&test_tag_mp_xrq::send_eager_bcopy, 1); +} + +UCS_TEST_P(test_tag_mp_xrq, zcopy_eager_only) +{ + test_common(&test_tag_mp_xrq::send_eager_zcopy, 1); +} + +UCS_TEST_P(test_tag_mp_xrq, bcopy_eager) +{ + test_common(&test_tag_mp_xrq::send_eager_bcopy, 5, 5); +} + +UCS_TEST_P(test_tag_mp_xrq, zcopy_eager) +{ + test_common(&test_tag_mp_xrq::send_eager_zcopy, 5, 5); +} + +UCS_TEST_P(test_tag_mp_xrq, rndv_zcopy) +{ + test_common(&test_tag_mp_xrq::send_rndv_zcopy, 1, 1, false); +} + +UCS_TEST_P(test_tag_mp_xrq, rndv_request) +{ + test_common(&test_tag_mp_xrq::send_rndv_request, 1, 1, false); +} + +UCT_TAG_INSTANTIATE_TEST_CASE(test_tag_mp_xrq) + +#endif diff --git a/test/gtest/uct/test_uct_ep.cc b/test/gtest/uct/test_uct_ep.cc new file mode 100644 index 0000000..81ba407 --- /dev/null +++ b/test/gtest/uct/test_uct_ep.cc @@ -0,0 +1,88 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2016. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +extern "C" { +#include +} +#include "uct_test.h" + + +class test_uct_ep : public uct_test { +protected: + + void init() { + uct_test::init(); + m_sender = uct_test::create_entity(0); + m_entities.push_back(m_sender); + + check_skip_test(); + + m_receiver = uct_test::create_entity(0); + m_entities.push_back(m_receiver); + + uct_iface_set_am_handler(m_receiver->iface(), 1, + (uct_am_callback_t)ucs_empty_function_return_success, + NULL, UCT_CB_FLAG_ASYNC); + } + + void connect() { + m_sender->connect(0, *m_receiver, 0); + short_progress_loop(10); /* Some transports need time to become ready */ + } + + void disconnect() { + flush(); + if (m_receiver->iface_attr().cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { + m_receiver->destroy_ep(0); + } + m_sender->destroy_ep(0); + } + + bool skip_on_ib_dc() { +#ifdef HAVE_DC_DV /* skip due to DCI stuck bug */ + return has_transport("dc_mlx5"); +#else + return false; +#endif + } + + entity * m_sender; + entity * m_receiver; +}; + +UCS_TEST_SKIP_COND_P(test_uct_ep, disconnect_after_send, + (!check_caps(UCT_IFACE_FLAG_AM_ZCOPY) || + skip_on_ib_dc())) { + ucs_status_t status; + + mapped_buffer buffer(256, 0, *m_sender); + UCS_TEST_GET_BUFFER_IOV(iov, iovcnt, buffer.ptr(), + (ucs_min(buffer.length(), m_sender->iface_attr().cap.am.max_zcopy)), + buffer.memh(), + m_sender->iface_attr().cap.am.max_iov); + + unsigned max_iter = 300 / ucs::test_time_multiplier(); + for (unsigned i = 0; i < max_iter; ++i) { + connect(); + for (unsigned count = 0; count < max_iter; ) { + status = uct_ep_am_zcopy(m_sender->ep(0), 1, NULL, 0, iov, iovcnt, + 0, NULL); + if (status == UCS_ERR_NO_RESOURCE) { + if (count > 0) { + break; + } + progress(); + } else { + ASSERT_UCS_OK_OR_INPROGRESS(status); + ++count; + } + } + disconnect(); + short_progress_loop(); + } +} + +UCT_INSTANTIATE_NO_SELF_TEST_CASE(test_uct_ep) diff --git a/test/gtest/uct/test_uct_perf.cc b/test/gtest/uct/test_uct_perf.cc new file mode 100644 index 0000000..8b87703 --- /dev/null +++ b/test/gtest/uct/test_uct_perf.cc @@ -0,0 +1,174 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#include "uct_test.h" + +#include +extern "C" { +#include +} + +#define MB pow(1024, -2) +#define UCT_PERF_TEST_MULTIPLIER 5 +#define UCT_ARM_PERF_TEST_MULTIPLIER 15 + +class test_uct_perf : public uct_test, public test_perf { +protected: + const static test_spec tests[]; +}; + + +const test_perf::test_spec test_uct_perf::tests[] = +{ + { "am latency", "usec", + UCX_PERF_API_UCT, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_PINGPONG, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.01, 2.5, + 0 }, + + { "am rate", "Mpps", + UCX_PERF_API_UCT, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 8 }, 1, 2000000lu, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.8, 80.0, + 0 }, + + { "am rate64", "Mpps", + UCX_PERF_API_UCT, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 64 }, 1, 2000000lu, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.8, 80.0, + 0 }, + + { "am bcopy latency", "usec", + UCX_PERF_API_UCT, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_PINGPONG, + UCT_PERF_DATA_LAYOUT_BCOPY, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.01, 2.5}, + + { "am bcopy bw", "MB/sec", + UCX_PERF_API_UCT, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_BCOPY, 0, 1, { 1000 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 620.0, 15000.0, + 0 }, + + { "am zcopy bw", "MB/sec", + UCX_PERF_API_UCT, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_ZCOPY, 0, 1, { 1000 }, 32, 100000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 600.0, 15000.0, + 0 }, + + { "put latency", "usec", + UCX_PERF_API_UCT, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_PINGPONG, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.01, 1.5, + 0 }, + + { "put rate", "Mpps", + UCX_PERF_API_UCT, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 8 }, 1, 2000000lu, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.8, 80.0, + 0 }, + + { "put bcopy bw", "MB/sec", + UCX_PERF_API_UCT, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_BCOPY, 0, 1, { 2048 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 620.0, 50000.0, + 0 }, + + { "put zcopy bw", "MB/sec", + UCX_PERF_API_UCT, UCX_PERF_CMD_PUT, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_ZCOPY, 0, 1, { 2048 }, 32, 100000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 620.0, 50000.0, + 0 }, + + { "get latency", "usec", + UCX_PERF_API_UCT, UCX_PERF_CMD_GET, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_ZCOPY, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.01, 3.5, + 0 }, + + { "atomic add latency", "usec", + UCX_PERF_API_UCT, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_PINGPONG, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.01, 3.5, + 0 }, + + { "atomic add rate", "Mpps", + UCX_PERF_API_UCT, UCX_PERF_CMD_ADD, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 8 }, 1, 2000000lu, + ucs_offsetof(ucx_perf_result_t, msgrate.total_average), 1e-6, 0.5, 50.0, + 0 }, + + { "atomic fadd latency", "usec", + UCX_PERF_API_UCT, UCX_PERF_CMD_FADD, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.01, 3.5, + 0 }, + + { "atomic cswap latency", "usec", + UCX_PERF_API_UCT, UCX_PERF_CMD_CSWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.01, 3.5, + 0 }, + + { "atomic swap latency", "usec", + UCX_PERF_API_UCT, UCX_PERF_CMD_SWAP, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_SHORT, 0, 1, { 8 }, 1, 100000lu, + ucs_offsetof(ucx_perf_result_t, latency.total_average), 1e6, 0.01, 3.5, + 0 }, + + { "am iov bw", "MB/sec", + UCX_PERF_API_UCT, UCX_PERF_CMD_AM, UCX_PERF_TEST_TYPE_STREAM_UNI, + UCT_PERF_DATA_LAYOUT_ZCOPY, 8192, 3, { 256, 256, 512 }, 32, 100000lu, + ucs_offsetof(ucx_perf_result_t, bandwidth.total_average), MB, 600.0, 15000.0, + 0 }, + + { NULL } +}; + + +UCS_TEST_P(test_uct_perf, envelope) { + if (has_transport("cm") || + has_transport("ugni_udt")) { + UCS_TEST_SKIP; + } + + /* For SandyBridge CPUs, don't check performance of far-socket devices */ + std::vector cpus = get_affinity(); + bool check_perf = true; + size_t max_iter = std::numeric_limits::max(); + + if (ucs_arch_get_cpu_model() == UCS_CPU_MODEL_INTEL_SANDYBRIDGE) { + for (std::vector::iterator iter = cpus.begin(); iter != cpus.end(); ++iter) { + if (!ucs_cpu_is_set(*iter, &GetParam()->local_cpus)) { + UCS_TEST_MESSAGE << "Not enforcing performance on SandyBridge far socket"; + check_perf = false; + break; + } + } + } + + if (has_transport("tcp")) { + check_perf = false; /* TODO calibrate expected performance based on transport */ + max_iter = 1000lu; + } + + /* Run all tests */ + for (const test_spec *test_iter = tests; test_iter->title != NULL; ++test_iter) { + test_spec test = *test_iter; + + if (ucs_arch_get_cpu_model() == UCS_CPU_MODEL_ARM_AARCH64) { + test.max *= UCT_ARM_PERF_TEST_MULTIPLIER; + test.min /= UCT_ARM_PERF_TEST_MULTIPLIER; + } else { + test.max *= UCT_PERF_TEST_MULTIPLIER; + test.min /= UCT_PERF_TEST_MULTIPLIER; + } + test.iters = ucs_min(test.iters, max_iter); + run_test(test, 0, check_perf, GetParam()->tl_name, GetParam()->dev_name); + } +} + +UCT_INSTANTIATE_NO_SELF_TEST_CASE(test_uct_perf); diff --git a/test/gtest/uct/test_zcopy_comp.cc b/test/gtest/uct/test_zcopy_comp.cc new file mode 100644 index 0000000..f53ad9f --- /dev/null +++ b/test/gtest/uct/test_zcopy_comp.cc @@ -0,0 +1,88 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "uct_test.h" + + +class test_zcopy_comp : public uct_test { + void init() { + uct_test::init(); + + m_sender = create_entity(0); + m_entities.push_back(m_sender); + + check_skip_test(); + } + +protected: + entity *m_sender; +}; + + +UCS_TEST_SKIP_COND_P(test_zcopy_comp, issue1440, + !check_caps(UCT_IFACE_FLAG_PUT_ZCOPY)) +{ + entity *receiver_small = create_entity(0); + m_entities.push_back(receiver_small); + + entity *receiver_large = create_entity(0); + m_entities.push_back(receiver_large); + + m_sender->connect(0, *receiver_small, 0); + m_sender->connect(1, *receiver_large, 0); + + size_t size_small = ucs_max(8ul, m_sender->iface_attr().cap.put.min_zcopy); + size_t size_large = ucs_min(65536ul, m_sender->iface_attr().cap.put.max_zcopy); + ucs_assert(size_large > size_small); + + if (m_sender->md_attr().cap.access_mem_type != UCS_MEMORY_TYPE_HOST) { + std::stringstream ss; + ss << "test_zcopy_comp is not supported by " << GetParam(); + UCS_TEST_SKIP_R(ss.str()); + } + + mapped_buffer sendbuf_small(size_small, 0, *m_sender); + mapped_buffer sendbuf_large(size_large, 0, *m_sender); + mapped_buffer recvbuf_small(size_small, 0, *receiver_small); + mapped_buffer recvbuf_large(size_large, 0, *receiver_large); + + /* + * Send a mix of small messages to one destination and large messages to + * another destination. This can trigger overriding RC/DC send completions. + */ + uct_completion_t dummy_comp = { NULL, INT_MAX }; + int num_small_sends = 1000000 / ucs::test_time_multiplier(); + int num_large_sends = 1000 / ucs::test_time_multiplier(); + while (num_small_sends || num_large_sends) { + if (num_small_sends) { + ucs_status_t status; + status = uct_ep_put_zcopy(m_sender->ep(0), sendbuf_small.iov(), 1, + (uintptr_t)recvbuf_small.ptr(), + recvbuf_small.rkey(), &dummy_comp); + if ((status == UCS_OK) || (status == UCS_INPROGRESS)) { + --num_small_sends; + } + } + if (num_large_sends) { + ucs_status_t status; + status = uct_ep_put_zcopy(m_sender->ep(1), sendbuf_large.iov(), 1, + (uintptr_t)recvbuf_large.ptr(), + recvbuf_large.rkey(), &dummy_comp); + if ((status == UCS_OK) || (status == UCS_INPROGRESS)) { + --num_large_sends; + } + } + progress(); + } + + /* Call flush on local and remote ifaces to progress data + * (e.g. if call flush only on local iface, a target side may + * not be able to send PUT ACK to an initiator in case of TCP) */ + flush(); +} + + +UCT_INSTANTIATE_NO_SELF_TEST_CASE(test_zcopy_comp) diff --git a/test/gtest/uct/uct_p2p_test.cc b/test/gtest/uct/uct_p2p_test.cc new file mode 100644 index 0000000..4e7f8a4 --- /dev/null +++ b/test/gtest/uct/uct_p2p_test.cc @@ -0,0 +1,314 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "uct_p2p_test.h" +extern "C" { +#include +} + + +int uct_p2p_test::log_data_count = 0; +ucs_log_level_t uct_p2p_test::orig_log_level; + + +std::string uct_p2p_test::p2p_resource::name() const { + std::stringstream ss; + ss << resource::name(); + if (loopback) { + ss << "/loopback"; + } + return ss.str(); +} + +std::vector uct_p2p_test::enum_resources(const std::string& tl_name) +{ + static std::vector all_resources; + + if (all_resources.empty()) { + std::vector r = uct_test::enum_resources(""); + for (std::vector::iterator iter = r.begin(); iter != r.end(); ++iter) { + p2p_resource res(**iter); + + if (UCT_DEVICE_TYPE_SELF != res.dev_type) { + res.loopback = false; + all_resources.push_back(res); + } + + res.loopback = true; + all_resources.push_back(res); + } + } + + return filter_resources(all_resources, tl_name); +} + +uct_p2p_test::uct_p2p_test(size_t rx_headroom, + uct_error_handler_t err_handler) : + m_rx_headroom(rx_headroom), + m_err_handler(err_handler), + m_completion_count(0) +{ + m_null_completion = false; + m_completion.self = this; + m_completion.uct.func = completion_cb; + m_completion.uct.count = 0; +} + +void uct_p2p_test::init() { + uct_test::init(); + + const p2p_resource *r = dynamic_cast(GetParam()); + ucs_assert_always(r != NULL); + + /* Create 2 connected endpoints */ + entity *e1 = uct_test::create_entity(m_rx_headroom, m_err_handler); + m_entities.push_back(e1); + + check_skip_test(); + + if (r->loopback) { + e1->connect(0, *e1, 0); + } else { + entity *e2 = uct_test::create_entity(m_rx_headroom, m_err_handler); + m_entities.push_back(e2); + + e1->connect(0, *e2, 0); + e2->connect(0, *e1, 0); + } + + /* Allocate completion handle and set the callback */ + m_completion_count = 0; + + /* Give a chance to finish connection for some transports (ib/ud, tcp) */ + flush(); +} + +void uct_p2p_test::cleanup() { + flush(); + uct_test::cleanup(); +} + +void uct_p2p_test::test_xfer(send_func_t send, size_t length, unsigned flags, + ucs_memory_type_t mem_type) { + UCS_TEST_SKIP; +} + +ucs_log_func_rc_t +uct_p2p_test::log_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *message, va_list ap) +{ + if (level == UCS_LOG_LEVEL_TRACE_DATA) { + ++log_data_count; + } + + /* Continue to next log handler if original log level would have allowed it */ + return (level <= orig_log_level) ? UCS_LOG_FUNC_RC_CONTINUE + : UCS_LOG_FUNC_RC_STOP; +} + +template +void uct_p2p_test::test_xfer_print(O& os, send_func_t send, size_t length, + unsigned flags, ucs_memory_type_t mem_type) +{ + if (!ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { + os << ucs::size_value(length) << " " << std::flush; + } + + /* + * Set our own log handler, and raise log level, to test that the transport + * prints log messages for the transfers. + */ + int count_before = log_data_count; + ucs_log_push_handler(log_handler); + orig_log_level = ucs_global_opts.log_level; + ucs_global_opts.log_level = UCS_LOG_LEVEL_TRACE_DATA; + bool expect_log = ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA); + + UCS_TEST_SCOPE_EXIT() { + /* Restore logging */ + ucs_global_opts.log_level = orig_log_level; + ucs_log_pop_handler(); + } UCS_TEST_SCOPE_EXIT_END + + test_xfer(send, length, flags, mem_type); + + if (expect_log) { + EXPECT_GE(log_data_count - count_before, 1); + } +} + +void uct_p2p_test::test_xfer_multi(send_func_t send, size_t min_length, + size_t max_length, unsigned flags) +{ + + for (int mem_type = 0; mem_type < UCS_MEMORY_TYPE_LAST; mem_type++) { + /* test mem type if md supports mem type + * (or) if HOST MD can register mem type + */ + if (!((sender().md_attr().cap.access_mem_type == mem_type) || + (sender().md_attr().cap.access_mem_type == UCS_MEMORY_TYPE_HOST && + sender().md_attr().cap.reg_mem_types & UCS_BIT(mem_type)))) { + continue; + } + if (mem_type == UCS_MEMORY_TYPE_CUDA) { + if (!(flags & (TEST_UCT_FLAG_RECV_ZCOPY | TEST_UCT_FLAG_SEND_ZCOPY))) { + continue; + } + } + test_xfer_multi_mem_type(send, min_length, max_length, flags, + (ucs_memory_type_t) mem_type); + } +} + +void uct_p2p_test::test_xfer_multi_mem_type(send_func_t send, size_t min_length, + size_t max_length, unsigned flags, + ucs_memory_type_t mem_type) { + + ucs::detail::message_stream ms("INFO"); + + ms << "memory_type:" << ucs_memory_type_names[mem_type] << " " << std::flush; + + /* Trim at 4.1 GB */ + max_length = ucs_min(max_length, (size_t)(4.1 * (double)UCS_GBYTE)); + + /* Trim at max. phys memory */ + max_length = ucs_min(max_length, ucs_get_phys_mem_size() / 8); + + /* Trim at max. shared memory */ + max_length = ucs_min(max_length, ucs_get_shmmax() * 0.8); + + /* Trim when short of available memory */ + max_length = ucs_min(max_length, ucs_get_memfree_size() / 4); + + /* For large size, slow down if needed */ + if (max_length > UCS_MBYTE) { + max_length = max_length / ucs::test_time_multiplier(); + if (RUNNING_ON_VALGRIND) { + max_length = ucs_min(max_length, 20u * UCS_MBYTE); + } + } + + if (max_length <= min_length) { + UCS_TEST_SKIP; + } + + m_null_completion = false; + + /* Run with min and max values */ + test_xfer_print(ms, send, min_length, flags, mem_type); + test_xfer_print(ms, send, max_length, flags, mem_type); + + /* + * Generate SQRT( log(max/min) ) random sizes + */ + double log_min = log2(min_length + 1); + double log_max = log2(max_length - 1); + + /* How many times to repeat */ + int repeat_count; + repeat_count = (256 * UCS_KBYTE) / ((max_length + min_length) / 2); + if (repeat_count > 1000) { + repeat_count = 1000; + } + repeat_count /= ucs::test_time_multiplier(); + if (repeat_count == 0) { + repeat_count = 1; + } + + if (!ucs_log_is_enabled(UCS_LOG_LEVEL_TRACE_DATA)) { + ms << repeat_count << "x{" << ucs::size_value(min_length) << ".." + << ucs::size_value(max_length) << "} " << std::flush; + } + + for (int i = 0; i < repeat_count; ++i) { + double exp = (ucs::rand() * (log_max - log_min)) / RAND_MAX + log_min; + size_t length = (ssize_t)pow(2.0, exp); + ucs_assert(length >= min_length && length <= max_length); + test_xfer(send, length, flags, mem_type); + } + + /* Run a test with implicit non-blocking mode */ + m_null_completion = true; + ms << "nocomp "; + test_xfer_print(ms, send, (long)sqrt((min_length + 1.0) * max_length), + flags, mem_type); + + sender().flush(); +} + +void uct_p2p_test::blocking_send(send_func_t send, uct_ep_h ep, + const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, + bool wait_for_completion) +{ + unsigned prev_comp_count = m_completion_count; + + ucs_status_t status; + do { + status = (this->*send)(ep, sendbuf, recvbuf); + if (status == UCS_OK) { + return; + } else if (status == UCS_ERR_NO_RESOURCE) { + progress(); + } else if (status == UCS_INPROGRESS) { + break; + } else { + UCS_TEST_ABORT(ucs_status_string(status)); + } + } while (status == UCS_ERR_NO_RESOURCE); + + /* Operation in progress, wait for completion */ + ucs_assert(status == UCS_INPROGRESS); + if (wait_for_completion) { + if (comp() == NULL) { + /* implicit non-blocking mode */ + /* Call flush on local and remote ifaces to progress data + * (e.g. if call flush only on local iface, a target side may + * not be able to send PUT ACK to an initiator in case of TCP) */ + flush(); + } else { + /* explicit non-blocking mode */ + ++m_completion.uct.count; + while (m_completion_count <= prev_comp_count) { + progress(); + } + EXPECT_EQ(0, m_completion.uct.count); + } + } +} + +void uct_p2p_test::wait_for_remote() { + /* Call flush on local and remote ifaces to progress data + * (e.g. if call flush only on local iface, a target side may + * not be able to send PUT ACK to an initiator in case of TCP) */ + flush(); +} + +uct_test::entity& uct_p2p_test::sender() { + return **m_entities.begin(); +} + +uct_ep_h uct_p2p_test::sender_ep() { + return sender().ep(0); +} + +uct_test::entity& uct_p2p_test::receiver() { + return **(m_entities.end() - 1); +} + +uct_completion_t *uct_p2p_test::comp() { + if (m_null_completion) { + return NULL; + } else { + return &m_completion.uct; + } +} + +void uct_p2p_test::completion_cb(uct_completion_t *self, ucs_status_t status) { + completion *comp = ucs_container_of(self, completion, uct); + ++comp->self->m_completion_count; +} diff --git a/test/gtest/uct/uct_p2p_test.h b/test/gtest/uct/uct_p2p_test.h new file mode 100644 index 0000000..aa91f3e --- /dev/null +++ b/test/gtest/uct/uct_p2p_test.h @@ -0,0 +1,87 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2014. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#ifndef UCT_P2P_TEST_H_ +#define UCT_P2P_TEST_H_ + +#include "uct_test.h" + +/** + * Point-to-point UCT test. + */ +class uct_p2p_test : public uct_test { +public: + uct_p2p_test(size_t rx_headroom, uct_error_handler_t err_handler = NULL); + + static std::vector enum_resources(const std::string& tl_name); + + virtual void init(); + virtual void cleanup(); + + UCS_TEST_BASE_IMPL; +protected: + typedef ucs_status_t (uct_p2p_test::* send_func_t)(uct_ep_h ep, + const mapped_buffer &, + const mapped_buffer &); + + enum uct_p2p_test_flags { + TEST_UCT_FLAG_DIR_SEND_TO_RECV = UCS_BIT(0), + TEST_UCT_FLAG_SEND_ZCOPY = UCS_BIT(1), + TEST_UCT_FLAG_RECV_ZCOPY = UCS_BIT(2), + }; + + struct completion { + uct_p2p_test *self; + uct_completion_t uct; + }; + + struct p2p_resource : public resource { + virtual std::string name() const; + bool loopback; + + p2p_resource(const resource& res) : + resource(res.component, res.md_name, res.local_cpus, + res.tl_name, res.dev_name, res.dev_type), + loopback(false) { } + }; + + virtual void test_xfer(send_func_t send, size_t length, unsigned flags, + ucs_memory_type_t mem_type); + void test_xfer_multi(send_func_t send, size_t min_length, size_t max_length, + unsigned flags); + void test_xfer_multi_mem_type(send_func_t send, size_t min_length, size_t max_length, + unsigned flags, ucs_memory_type_t mem_type); + void blocking_send(send_func_t send, uct_ep_h ep, const mapped_buffer &sendbuf, + const mapped_buffer &recvbuf, bool wait_for_completion); + void wait_for_remote(); + entity& sender(); + uct_ep_h sender_ep(); + entity& receiver(); + uct_completion_t *comp(); + +private: + template + void test_xfer_print(O& os, send_func_t send, size_t length, + unsigned flags, ucs_memory_type_t mem_type); + + static void completion_cb(uct_completion_t *self, ucs_status_t status); + + static ucs_log_func_rc_t + log_handler(const char *file, unsigned line, const char *function, + ucs_log_level_t level, const char *prefix, va_list ap); + + static int log_data_count; + static ucs_log_level_t orig_log_level; + + const size_t m_rx_headroom; + uct_error_handler_t m_err_handler; + bool m_null_completion; + completion m_completion; + unsigned m_completion_count; +}; + + +#endif diff --git a/test/gtest/uct/uct_test.cc b/test/gtest/uct/uct_test.cc new file mode 100644 index 0000000..bac57fd --- /dev/null +++ b/test/gtest/uct/uct_test.cc @@ -0,0 +1,1419 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include "uct_test.h" +#include "uct/api/uct_def.h" + +#include +#include +#include +#include +#include +#include +#include + + +std::string resource::name() const { + std::stringstream ss; + ss << tl_name << "/" << dev_name; + if (!variant_name.empty()) { + ss << "/" << variant_name; + } + return ss.str(); +} + +resource::resource() : component(NULL), md_name(""), tl_name(""), dev_name(""), + variant_name(""), dev_type(UCT_DEVICE_TYPE_LAST), + variant(DEFAULT_VARIANT) +{ + CPU_ZERO(&local_cpus); +} + +resource::resource(uct_component_h component, const std::string& md_name, + const ucs_cpu_set_t& local_cpus, const std::string& tl_name, + const std::string& dev_name, uct_device_type_t dev_type) : + component(component), md_name(md_name), local_cpus(local_cpus), + tl_name(tl_name), dev_name(dev_name), variant_name(""), + dev_type(dev_type), variant(DEFAULT_VARIANT) +{ +} + +resource::resource(uct_component_h component, const uct_md_attr_t& md_attr, + const uct_md_resource_desc_t& md_resource, + const uct_tl_resource_desc_t& tl_resource) : + component(component), + md_name(md_resource.md_name), + local_cpus(md_attr.local_cpus), + tl_name(tl_resource.tl_name), + dev_name(tl_resource.dev_name), + variant_name(""), + dev_type(tl_resource.dev_type), + variant(DEFAULT_VARIANT) +{ +} + +resource_speed::resource_speed(uct_component_h component, const uct_worker_h& worker, + const uct_md_h& md, const uct_md_attr_t& md_attr, + const uct_md_resource_desc_t& md_resource, + const uct_tl_resource_desc_t& tl_resource) : + resource(component, md_attr, md_resource, + tl_resource) { + ucs_status_t status; + uct_iface_params_t iface_params = { 0 }; + uct_iface_config_t *iface_config; + uct_iface_attr_t iface_attr; + uct_iface_h iface; + + status = uct_md_iface_config_read(md, tl_name.c_str(), NULL, + NULL, &iface_config); + ASSERT_UCS_OK(status); + + iface_params.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_DEVICE; + iface_params.open_mode = UCT_IFACE_OPEN_MODE_DEVICE; + iface_params.mode.device.tl_name = tl_name.c_str(); + iface_params.mode.device.dev_name = dev_name.c_str(); + + status = uct_iface_open(md, worker, &iface_params, iface_config, &iface); + ASSERT_UCS_OK(status); + + status = uct_iface_query(iface, &iface_attr); + ASSERT_UCS_OK(status); + + bw = ucs_max(iface_attr.bandwidth.dedicated, iface_attr.bandwidth.shared); + + uct_iface_close(iface); + uct_config_release(iface_config); +} + +std::vector uct_test_base::enum_md_resources() { + + static std::vector all_md_resources; + + if (all_md_resources.empty()) { + uct_component_h *uct_components; + unsigned num_components; + ucs_status_t status; + + status = uct_query_components(&uct_components, &num_components); + ASSERT_UCS_OK(status); + + /* for RAII */ + ucs::handle cmpt_list(uct_components, + uct_release_component_list); + + for (unsigned cmpt_index = 0; cmpt_index < num_components; ++cmpt_index) { + uct_component_attr_t component_attr = {0}; + + component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME | + UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT | + UCT_COMPONENT_ATTR_FIELD_FLAGS; + /* coverity[var_deref_model] */ + status = uct_component_query(uct_components[cmpt_index], &component_attr); + ASSERT_UCS_OK(status); + + /* Save attributes before asking for MD resource list */ + md_resource md_rsc; + md_rsc.cmpt = uct_components[cmpt_index]; + md_rsc.cmpt_attr = component_attr; + + std::vector md_resources; + uct_component_attr_t component_attr_resouces = {0}; + md_resources.resize(md_rsc.cmpt_attr.md_resource_count); + component_attr_resouces.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES; + component_attr_resouces.md_resources = &md_resources[0]; + status = uct_component_query(uct_components[cmpt_index], + &component_attr_resouces); + ASSERT_UCS_OK(status); + + for (unsigned md_index = 0; + md_index < md_rsc.cmpt_attr.md_resource_count; ++md_index) { + md_rsc.rsc_desc = md_resources[md_index]; + all_md_resources.push_back(md_rsc); + } + } + } + + return all_md_resources; +} + +uct_test::uct_test() { + uct_component_attr_t component_attr = {0}; + ucs_status_t status; + uct_md_attr_t md_attr; + uct_md_h md; + + status = uct_md_config_read(GetParam()->component, NULL, NULL, &m_md_config); + ASSERT_UCS_OK(status); + + status = uct_md_open(GetParam()->component, GetParam()->md_name.c_str(), + m_md_config, &md); + ASSERT_UCS_OK(status); + + status = uct_md_query(md, &md_attr); + ASSERT_UCS_OK(status); + + if (md_attr.cap.flags & UCT_MD_FLAG_SOCKADDR) { + status = uct_md_iface_config_read(md, NULL, NULL, NULL, &m_iface_config); + } else if (!strcmp(GetParam()->tl_name.c_str(), "sockaddr")) { + m_iface_config = NULL; + } else { + status = uct_md_iface_config_read(md, GetParam()->tl_name.c_str(), NULL, + NULL, &m_iface_config); + } + + ASSERT_UCS_OK(status); + uct_md_close(md); + + component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME | + UCT_COMPONENT_ATTR_FIELD_FLAGS; + /* coverity[var_deref_model] */ + status = uct_component_query(GetParam()->component, &component_attr); + ASSERT_UCS_OK(status); + + if (component_attr.flags & UCT_COMPONENT_FLAG_CM) { + status = uct_cm_config_read(GetParam()->component, NULL, NULL, &m_cm_config); + ASSERT_UCS_OK(status); + } else { + m_cm_config = NULL; + } +} + +uct_test::~uct_test() { + if (m_cm_config != NULL) { + uct_config_release(m_cm_config); + } + if (m_iface_config != NULL) { + uct_config_release(m_iface_config); + } + uct_config_release(m_md_config); +} + +void uct_test::init_sockaddr_rsc(resource *rsc, struct sockaddr *listen_addr, + struct sockaddr *connect_addr, size_t size) +{ + rsc->listen_sock_addr.set_sock_addr(*listen_addr, size); + rsc->connect_sock_addr.set_sock_addr(*connect_addr, size); +} + +void uct_test::set_interface_rscs(uct_component_h cmpt, const char *name, + ucs_cpu_set_t local_cpus, struct ifaddrs *ifa, + std::vector& all_resources) +{ + int i; + + /* Create two resources on the same interface. the first one will have the + * ip of the interface and the second one will have INADDR_ANY */ + for (i = 0; i < 2; i++) { + resource rsc(cmpt, std::string(name), local_cpus, "sockaddr", + std::string(ifa->ifa_name), UCT_DEVICE_TYPE_NET); + + if (i == 0) { + /* first rsc */ + if (ifa->ifa_addr->sa_family == AF_INET) { + uct_test::init_sockaddr_rsc(&rsc, ifa->ifa_addr, ifa->ifa_addr, + sizeof(struct sockaddr_in)); + } else if (ifa->ifa_addr->sa_family == AF_INET6) { + uct_test::init_sockaddr_rsc(&rsc, ifa->ifa_addr, ifa->ifa_addr, + sizeof(struct sockaddr_in6)); + } else { + UCS_TEST_ABORT("Unknown sa_family " << ifa->ifa_addr->sa_family); + } + all_resources.push_back(rsc); + } else { + /* second rsc */ + if (ifa->ifa_addr->sa_family == AF_INET) { + struct sockaddr_in sin; + memset(&sin, 0, sizeof(struct sockaddr_in)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = INADDR_ANY; + uct_test::init_sockaddr_rsc(&rsc, (struct sockaddr*)&sin, + ifa->ifa_addr, sizeof(struct sockaddr_in)); + } else if (ifa->ifa_addr->sa_family == AF_INET6) { + struct sockaddr_in6 sin; + memset(&sin, 0, sizeof(struct sockaddr_in6)); + sin.sin6_family = AF_INET6; + sin.sin6_addr = in6addr_any; + uct_test::init_sockaddr_rsc(&rsc, (struct sockaddr*)&sin, + ifa->ifa_addr, sizeof(struct sockaddr_in6)); + } else { + UCS_TEST_ABORT("Unknown sa_family " << ifa->ifa_addr->sa_family); + } + all_resources.push_back(rsc); + } + } +} + +bool uct_test::is_interface_usable(struct ifaddrs *ifa, const char *name) { + if (!(ucs_netif_flags_is_active(ifa->ifa_flags)) || + !(ucs::is_inet_addr(ifa->ifa_addr))) { + return false; + } + + /* If rdmacm is tested, make sure that this is an IPoIB or RoCE interface */ + if (!strcmp(name, "rdmacm") && !ucs::is_rdmacm_netdev(ifa->ifa_name)) { + return false; + } + + return true; +} + +void uct_test::set_md_sockaddr_resources(const md_resource& md_rsc, uct_md_h md, + ucs_cpu_set_t local_cpus, + std::vector& all_resources) { + + struct ifaddrs *ifaddr, *ifa; + ucs_sock_addr_t sock_addr; + + EXPECT_EQ(0, getifaddrs(&ifaddr)) << strerror(errno); + + for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { + sock_addr.addr = ifa->ifa_addr; + + if (!uct_test::is_interface_usable(ifa, md_rsc.rsc_desc.md_name)) { + continue; + } + + if (uct_md_is_sockaddr_accessible(md, &sock_addr, UCT_SOCKADDR_ACC_LOCAL) && + uct_md_is_sockaddr_accessible(md, &sock_addr, UCT_SOCKADDR_ACC_REMOTE)) + { + uct_test::set_interface_rscs(md_rsc.cmpt, md_rsc.rsc_desc.md_name, + local_cpus, ifa, all_resources); + } + } + + freeifaddrs(ifaddr); +} + +void uct_test::set_cm_sockaddr_resources(uct_component_h cmpt, const char *cmpt_name, + ucs_cpu_set_t local_cpus, + std::vector& all_resources) { + + struct ifaddrs *ifaddr, *ifa; + + EXPECT_EQ(0, getifaddrs(&ifaddr)) << strerror(errno); + + for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { + if (!uct_test::is_interface_usable(ifa, cmpt_name)) { + continue; + } + + uct_test::set_interface_rscs(cmpt, cmpt_name, local_cpus, ifa, all_resources); + } + + freeifaddrs(ifaddr); +} + +void uct_test::set_cm_resources(std::vector& all_resources) +{ + uct_component_h *uct_components; + unsigned num_components; + ucs_status_t status; + + status = uct_query_components(&uct_components, &num_components); + ASSERT_UCS_OK(status); + + for (unsigned cmpt_index = 0; cmpt_index < num_components; ++cmpt_index) { + uct_component_attr_t component_attr = {0}; + + component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME | + UCT_COMPONENT_ATTR_FIELD_FLAGS; + /* coverity[var_deref_model] */ + status = uct_component_query(uct_components[cmpt_index], &component_attr); + ASSERT_UCS_OK(status); + + if (component_attr.flags & UCT_COMPONENT_FLAG_CM) { + ucs_cpu_set_t local_cpus; + CPU_ZERO(&local_cpus); + uct_test::set_cm_sockaddr_resources(uct_components[cmpt_index], + component_attr.name, local_cpus, + all_resources); + } + } + + uct_release_component_list(uct_components); +} + +std::vector uct_test::enum_resources(const std::string& tl_name) +{ + static bool tcp_fastest_dev = (getenv("GTEST_UCT_TCP_FASTEST_DEV") != NULL); + static std::vector all_resources; + + if (all_resources.empty()) { + ucs_async_context_t *async; + uct_worker_h worker; + ucs_status_t status; + + status = ucs_async_context_create(UCS_ASYNC_MODE_THREAD_SPINLOCK, &async); + ASSERT_UCS_OK(status); + + status = uct_worker_create(async, UCS_THREAD_MODE_SINGLE, &worker); + ASSERT_UCS_OK(status); + + std::vector md_resources = enum_md_resources(); + + for (std::vector::iterator iter = md_resources.begin(); + iter != md_resources.end(); ++iter) { + uct_md_h md; + uct_md_config_t *md_config; + status = uct_md_config_read(iter->cmpt, NULL, NULL, &md_config); + ASSERT_UCS_OK(status); + + { + scoped_log_handler slh(hide_errors_logger); + status = uct_md_open(iter->cmpt, iter->rsc_desc.md_name, + md_config, &md); + } + uct_config_release(md_config); + if (status != UCS_OK) { + continue; + } + + uct_md_attr_t md_attr; + status = uct_md_query(md, &md_attr); + ASSERT_UCS_OK(status); + + uct_tl_resource_desc_t *tl_resources; + unsigned num_tl_resources; + status = uct_md_query_tl_resources(md, &tl_resources, &num_tl_resources); + ASSERT_UCS_OK(status); + + resource_speed tcp_fastest_rsc; + + for (unsigned j = 0; j < num_tl_resources; ++j) { + if (tcp_fastest_dev && (std::string("tcp") == tl_resources[j].tl_name)) { + resource_speed rsc(iter->cmpt, worker, md, md_attr, + iter->rsc_desc, tl_resources[j]); + if (!tcp_fastest_rsc.bw || (rsc.bw > tcp_fastest_rsc.bw)) { + tcp_fastest_rsc = rsc; + } + } else { + resource rsc(iter->cmpt, md_attr, iter->rsc_desc, + tl_resources[j]); + all_resources.push_back(rsc); + } + } + + if (tcp_fastest_dev && tcp_fastest_rsc.bw) { + all_resources.push_back(tcp_fastest_rsc); + } + + if ((md_attr.cap.flags & UCT_MD_FLAG_SOCKADDR) && + !(iter->cmpt_attr.flags & UCT_COMPONENT_FLAG_CM)) { + uct_test::set_md_sockaddr_resources(*iter, md, md_attr.local_cpus, + all_resources); + } + + uct_release_tl_resource_list(tl_resources); + uct_md_close(md); + } + + uct_worker_destroy(worker); + ucs_async_context_destroy(async); + + set_cm_resources(all_resources); + } + + return filter_resources(all_resources, tl_name); +} + +void uct_test::generate_test_variant(int variant, + const std::string &variant_name, + std::vector& test_res, + const std::string &tl_name) +{ + std::vector r = uct_test::enum_resources(""); + + for (std::vector::iterator iter = r.begin(); + iter != r.end(); ++iter) { + if (tl_name.empty() || ((*iter)->tl_name == tl_name)) { + resource rsc((*iter)->component, (*iter)->md_name, + (*iter)->local_cpus, (*iter)->tl_name, + (*iter)->dev_name, (*iter)->dev_type); + rsc.variant = variant; + rsc.variant_name = variant_name; + test_res.push_back(rsc); + } + } +} + +void uct_test::init() { +} + +void uct_test::cleanup() { + FOR_EACH_ENTITY(iter) { + (*iter)->destroy_eps(); + } + m_entities.clear(); +} + +bool uct_test::is_caps_supported(uint64_t required_flags) { + bool ret = true; + + FOR_EACH_ENTITY(iter) { + ret &= (*iter)->is_caps_supported(required_flags); + } + + return ret; +} + +bool uct_test::check_caps(uint64_t required_flags, uint64_t invalid_flags) { + FOR_EACH_ENTITY(iter) { + if (!(*iter)->check_caps(required_flags, invalid_flags)) { + return false; + } + } + return true; +} + +void uct_test::check_caps_skip(uint64_t required_flags, uint64_t invalid_flags) { + if (!check_caps(required_flags, invalid_flags)) { + UCS_TEST_SKIP_R("unsupported"); + } +} + +bool uct_test::check_atomics(uint64_t required_ops, atomic_mode mode) { + FOR_EACH_ENTITY(iter) { + if (!(*iter)->check_atomics(required_ops, mode)) { + return false; + } + } + return true; +} + +void uct_test::modify_config(const std::string& name, const std::string& value, + bool optional) { + ucs_status_t status = UCS_OK; + + if (m_cm_config != NULL) { + status = uct_config_modify(m_cm_config, name.c_str(), value.c_str()); + if (status == UCS_OK) { + return; + } else if (status != UCS_ERR_NO_ELEM) { + UCS_TEST_ABORT("Couldn't modify cm config parameter: " << name.c_str() << + " to " << value.c_str() << ": " << ucs_status_string(status)); + } + } + + if (m_iface_config != NULL) { + status = uct_config_modify(m_iface_config, name.c_str(), value.c_str()); + if (status == UCS_OK) { + return; + } else if (status != UCS_ERR_NO_ELEM) { + UCS_TEST_ABORT("Couldn't modify iface config parameter: " << name.c_str() << + " to " << value.c_str() << ": " << ucs_status_string(status)); + } + } + + ucs_assert(status == UCS_ERR_NO_ELEM); + + status = uct_config_modify(m_md_config, name.c_str(), value.c_str()); + if (status == UCS_ERR_NO_ELEM) { + test_base::modify_config(name, value, optional); + } else if (status != UCS_OK) { + UCS_TEST_ABORT("Couldn't modify md config parameter: " << name.c_str() << + " to " << value.c_str() << ": " << ucs_status_string(status)); + } +} + +bool uct_test::get_config(const std::string& name, std::string& value) const +{ + ucs_status_t status; + const size_t max = 1024; + + value.resize(max); + + if (m_cm_config != NULL) { + status = uct_config_get(m_cm_config, name.c_str(), + const_cast(value.c_str()), max); + if (status == UCS_OK) { + return true; + } + } + + if (m_iface_config != NULL) { + status = uct_config_get(m_iface_config, name.c_str(), + const_cast(value.c_str()), max); + if (status == UCS_OK) { + return true; + } + } + + status = uct_config_get(m_md_config, name.c_str(), + const_cast(value.c_str()), max); + if (status == UCS_OK) { + return true; + } + + return false; +} + +bool uct_test::has_transport(const std::string& tl_name) const { + return (GetParam()->tl_name == tl_name); +} + +bool uct_test::has_ud() const { + return (has_transport("ud_verbs") || has_transport("ud_mlx5")); +} + +bool uct_test::has_rc() const { + return (has_transport("rc_verbs") || has_transport("rc_mlx5")); +} + +bool uct_test::has_rc_or_dc() const { + return (has_rc() || has_transport("dc_mlx5")); +} + +bool uct_test::has_ib() const { + return (has_rc_or_dc() || has_ud()); +} + +void uct_test::stats_activate() +{ + ucs_stats_cleanup(); + push_config(); + modify_config("STATS_DEST", "file:/dev/null"); + modify_config("STATS_TRIGGER", "exit"); + ucs_stats_init(); + ASSERT_TRUE(ucs_stats_is_active()); +} + +void uct_test::stats_restore() +{ + ucs_stats_cleanup(); + pop_config(); + ucs_stats_init(); +} + +uct_test::entity* uct_test::create_entity(size_t rx_headroom, + uct_error_handler_t err_handler) { + uct_iface_params_t iface_params; + + iface_params.field_mask = UCT_IFACE_PARAM_FIELD_RX_HEADROOM | + UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_ARG | + UCT_IFACE_PARAM_FIELD_ERR_HANDLER_FLAGS; + iface_params.rx_headroom = rx_headroom; + iface_params.open_mode = UCT_IFACE_OPEN_MODE_DEVICE; + iface_params.err_handler = err_handler; + iface_params.err_handler_arg = this; + iface_params.err_handler_flags = 0; + entity *new_ent = new entity(*GetParam(), m_iface_config, &iface_params, + m_md_config); + return new_ent; +} + +uct_test::entity* uct_test::create_entity(uct_iface_params_t ¶ms) { + entity *new_ent = new entity(*GetParam(), m_iface_config, ¶ms, + m_md_config); + return new_ent; +} + +uct_test::entity* uct_test::create_entity() { + return new entity(*GetParam(), m_md_config, m_cm_config); +} + +const uct_test::entity& uct_test::ent(unsigned index) const { + return m_entities.at(index); +} + +unsigned uct_test::progress() const { + unsigned count = 0; + FOR_EACH_ENTITY(iter) { + count += (*iter)->progress(); + } + return count; +} + +void uct_test::flush(ucs_time_t deadline) const { + + bool flushed; + do { + flushed = true; + FOR_EACH_ENTITY(iter) { + (*iter)->progress(); + ucs_status_t status = uct_iface_flush((*iter)->iface(), 0, NULL); + if ((status == UCS_ERR_NO_RESOURCE) || (status == UCS_INPROGRESS)) { + flushed = false; + } else { + ASSERT_UCS_OK(status); + } + } + } while (!flushed && (ucs_get_time() < deadline)); + + EXPECT_TRUE(flushed) << "Timed out"; +} + +void uct_test::short_progress_loop(double delay_ms) const { + ucs_time_t end_time = ucs_get_time() + ucs_time_from_msec(delay_ms * ucs::test_time_multiplier()); + while (ucs_get_time() < end_time) { + progress(); + } +} + +void uct_test::twait(int delta_ms) const { + ucs_time_t now, t1, t2; + int left; + + now = ucs_get_time(); + left = delta_ms; + do { + t1 = ucs_get_time(); + usleep(1000 * left); + t2 = ucs_get_time(); + left -= (int)ucs_time_to_msec(t2-t1); + } while (now + ucs_time_from_msec(delta_ms) > ucs_get_time()); +} + +int uct_test::max_connections() +{ + if (has_transport("tcp")) { + return ucs::max_tcp_connections(); + } else { + return std::numeric_limits::max(); + } +} + +int uct_test::max_connect_batch() +{ + if (has_transport("tcp")) { + /* TCP connection listener is limited by Accept queue */ + return ucs_socket_max_conn(); + } else { + return std::numeric_limits::max(); + } +} + +const std::string uct_test::entity::server_priv_data = "Server private data"; +std::string uct_test::entity::client_priv_data = ""; + +uct_test::entity::entity(const resource& resource, uct_iface_config_t *iface_config, + uct_iface_params_t *params, uct_md_config_t *md_config) : + m_resource(resource) +{ + ucs_status_t status; + + if (params->open_mode == UCT_IFACE_OPEN_MODE_DEVICE) { + params->field_mask |= UCT_IFACE_PARAM_FIELD_DEVICE; + params->mode.device.tl_name = resource.tl_name.c_str(); + params->mode.device.dev_name = resource.dev_name.c_str(); + } + + params->field_mask |= UCT_IFACE_PARAM_FIELD_STATS_ROOT | + UCT_IFACE_PARAM_FIELD_CPU_MASK; + params->stats_root = ucs_stats_get_root(); + UCS_CPU_ZERO(¶ms->cpu_mask); + + UCS_TEST_CREATE_HANDLE(uct_worker_h, m_worker, uct_worker_destroy, + uct_worker_create, &m_async.m_async, + UCS_THREAD_MODE_SINGLE); + + UCS_TEST_CREATE_HANDLE(uct_md_h, m_md, uct_md_close, uct_md_open, + resource.component, resource.md_name.c_str(), + md_config); + + status = uct_md_query(m_md, &m_md_attr); + ASSERT_UCS_OK(status); + + for (;;) { + { + scoped_log_handler slh(wrap_errors_logger); + status = UCS_TEST_TRY_CREATE_HANDLE(uct_iface_h, m_iface, + uct_iface_close, uct_iface_open, + m_md, m_worker, params, + iface_config); + if (status == UCS_OK) { + break; + } + } + EXPECT_EQ(UCS_ERR_BUSY, status); + if (params->open_mode != UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER) { + UCS_TEST_ABORT("any mode different from UCT_IFACE_OPEN_MODE_SOCKADDR_SERVER must go with status UCS_OK"); + } + + const struct sockaddr* c_ifa_addr = + params->mode.sockaddr.listen_sockaddr.addr; + struct sockaddr* ifa_addr = const_cast(c_ifa_addr); + if (ifa_addr->sa_family == AF_INET) { + struct sockaddr_in *addr = + reinterpret_cast(ifa_addr); + addr->sin_port = ntohs(ucs::get_port()); + } else { + struct sockaddr_in6 *addr = + reinterpret_cast(ifa_addr); + addr->sin6_port = ntohs(ucs::get_port()); + } + } + + status = uct_iface_query(m_iface, &m_iface_attr); + ASSERT_UCS_OK(status); + + uct_iface_progress_enable(m_iface, UCT_PROGRESS_SEND | UCT_PROGRESS_RECV); + m_iface_params = *params; + + memset(&m_cm_attr, 0, sizeof(m_cm_attr)); + max_conn_priv = 0; +} + +uct_test::entity::entity(const resource& resource, uct_md_config_t *md_config, + uct_cm_config_t *cm_config) { + ucs_status_t status; + uct_component_attr_t comp_attr; + + memset(&m_iface_attr, 0, sizeof(m_iface_attr)); + memset(&m_iface_params, 0, sizeof(m_iface_params)); + + UCS_TEST_CREATE_HANDLE(uct_worker_h, m_worker, uct_worker_destroy, + uct_worker_create, &m_async.m_async, + UCS_THREAD_MODE_SINGLE); + + UCS_TEST_CREATE_HANDLE(uct_md_h, m_md, uct_md_close, + uct_md_open, resource.component, + resource.md_name.c_str(), md_config); + + status = uct_md_query(m_md, &m_md_attr); + ASSERT_UCS_OK(status); + + + comp_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME | + UCT_COMPONENT_ATTR_FIELD_FLAGS; + status = uct_component_query(resource.component, &comp_attr); + ASSERT_UCS_OK(status); + + if (comp_attr.flags & UCT_COMPONENT_FLAG_CM) { + UCS_TEST_CREATE_HANDLE(uct_cm_h, m_cm, uct_cm_close, uct_cm_open, + resource.component, m_worker, cm_config); + + m_cm_attr.field_mask = UCT_CM_ATTR_FIELD_MAX_CONN_PRIV; + status = uct_cm_query(m_cm, &m_cm_attr); + ASSERT_UCS_OK(status); + + max_conn_priv = 0; + } else { + UCS_TEST_SKIP_R(std::string("cm is not supported on component ") + + comp_attr.name ); + } +} + +void uct_test::entity::mem_alloc_host(size_t length, + uct_allocated_memory_t *mem) const { + + ucs_status_t status; + + if (md_attr().cap.flags & (UCT_MD_FLAG_ALLOC|UCT_MD_FLAG_REG)) { + status = uct_iface_mem_alloc(m_iface, length, UCT_MD_MEM_ACCESS_ALL, + "uct_test", mem); + ASSERT_UCS_OK(status); + } else { + uct_alloc_method_t method = UCT_ALLOC_METHOD_MMAP; + status = uct_mem_alloc(NULL, length, UCT_MD_MEM_ACCESS_ALL, &method, 1, + NULL, 0, "uct_test", mem); + ASSERT_UCS_OK(status); + ucs_assert(mem->memh == UCT_MEM_HANDLE_NULL); + } + ucs_assert(mem->mem_type == UCS_MEMORY_TYPE_HOST); +} + +void uct_test::entity::mem_free_host(const uct_allocated_memory_t *mem) const { + if (mem->method != UCT_ALLOC_METHOD_LAST) { + uct_iface_mem_free(mem); + } +} + +void uct_test::entity::mem_type_reg(uct_allocated_memory_t *mem) const { + if (md_attr().cap.reg_mem_types & UCS_BIT(mem->mem_type)) { + ucs_status_t status = uct_md_mem_reg(m_md, mem->address, mem->length, + UCT_MD_MEM_ACCESS_ALL, &mem->memh); + ASSERT_UCS_OK(status); + mem->md = m_md; + } +} + +void uct_test::entity::mem_type_dereg(uct_allocated_memory_t *mem) const { + if ((mem->memh != UCT_MEM_HANDLE_NULL) && + (md_attr().cap.reg_mem_types & UCS_BIT(mem->mem_type))) { + ucs_status_t status = uct_md_mem_dereg(m_md, mem->memh); + ASSERT_UCS_OK(status); + mem->memh = UCT_MEM_HANDLE_NULL; + mem->md = NULL; + } +} + +void uct_test::entity::rkey_unpack(const uct_allocated_memory_t *mem, + uct_rkey_bundle *rkey_bundle) const +{ + if ((mem->memh != UCT_MEM_HANDLE_NULL) && + (md_attr().cap.flags & UCT_MD_FLAG_NEED_RKEY)) { + + void *rkey_buffer = malloc(md_attr().rkey_packed_size); + if (rkey_buffer == NULL) { + UCS_TEST_ABORT("Failed to allocate rkey buffer"); + } + + ucs_status_t status = uct_md_mkey_pack(m_md, mem->memh, rkey_buffer); + ASSERT_UCS_OK(status); + + status = uct_rkey_unpack(m_resource.component, rkey_buffer, + rkey_bundle); + ASSERT_UCS_OK(status); + + free(rkey_buffer); + } else { + rkey_bundle->handle = NULL; + rkey_bundle->rkey = UCT_INVALID_RKEY; + } +} + +void uct_test::entity::rkey_release(const uct_rkey_bundle *rkey_bundle) const +{ + if (rkey_bundle->rkey != UCT_INVALID_RKEY) { + ucs_status_t status = uct_rkey_release(m_resource.component, rkey_bundle); + ASSERT_UCS_OK(status); + } +} + +unsigned uct_test::entity::progress() const { + unsigned count = uct_worker_progress(m_worker); + m_async.check_miss(); + return count; +} + +bool uct_test::entity::is_caps_supported(uint64_t required_flags) { + uint64_t iface_flags = iface_attr().cap.flags; + return ucs_test_all_flags(iface_flags, required_flags); +} + +bool uct_test::entity::check_caps(uint64_t required_flags, + uint64_t invalid_flags) +{ + uint64_t iface_flags = iface_attr().cap.flags; + return (ucs_test_all_flags(iface_flags, required_flags) && + !(iface_flags & invalid_flags)); +} + +bool uct_test::entity::check_atomics(uint64_t required_ops, atomic_mode mode) +{ + uint64_t amo; + + switch (mode) { + case OP32: + amo = iface_attr().cap.atomic32.op_flags; + break; + case OP64: + amo = iface_attr().cap.atomic64.op_flags; + break; + case FOP32: + amo = iface_attr().cap.atomic32.fop_flags; + break; + case FOP64: + amo = iface_attr().cap.atomic64.fop_flags; + break; + default: + UCS_TEST_ABORT("Incorrect atomic mode: " << mode); + } + + return ucs_test_all_flags(amo, required_ops); +} + +uct_md_h uct_test::entity::md() const { + return m_md; +} + +const uct_md_attr& uct_test::entity::md_attr() const { + return m_md_attr; +} + +uct_worker_h uct_test::entity::worker() const { + return m_worker; +} + +uct_cm_h uct_test::entity::cm() const { + return m_cm; +} + +const uct_cm_attr_t& uct_test::entity::cm_attr() const { + return m_cm_attr; +} + +uct_listener_h uct_test::entity::listener() const { + return m_listener; +} + +uct_iface_h uct_test::entity::iface() const { + return m_iface; +} + +const uct_iface_attr& uct_test::entity::iface_attr() const { + return m_iface_attr; +} + +const uct_iface_params& uct_test::entity::iface_params() const { + return m_iface_params; +} + +uct_ep_h uct_test::entity::ep(unsigned index) const { + return m_eps.at(index); +} + +size_t uct_test::entity::num_eps() const { + return m_eps.size(); +} + +void uct_test::entity::reserve_ep(unsigned index) { + if (index >= m_eps.size()) { + m_eps.resize(index + 1); + } +} + +void uct_test::entity::connect_p2p_ep(uct_ep_h from, uct_ep_h to) +{ + uct_iface_attr_t iface_attr; + uct_device_addr_t *dev_addr; + uct_ep_addr_t *ep_addr; + ucs_status_t status; + + status = uct_iface_query(to->iface, &iface_attr); + ASSERT_UCS_OK(status); + + dev_addr = (uct_device_addr_t*)malloc(iface_attr.device_addr_len); + ep_addr = (uct_ep_addr_t*)malloc(iface_attr.ep_addr_len); + + status = uct_iface_get_device_address(to->iface, dev_addr); + ASSERT_UCS_OK(status); + + status = uct_ep_get_address(to, ep_addr); + ASSERT_UCS_OK(status); + + status = uct_ep_connect_to_ep(from, dev_addr, ep_addr); + ASSERT_UCS_OK(status); + + free(ep_addr); + free(dev_addr); +} + +void uct_test::entity::create_ep(unsigned index) { + uct_ep_h ep = NULL; + uct_ep_params_t ep_params; + ucs_status_t status; + + reserve_ep(index); + + if (m_eps[index]) { + UCS_TEST_ABORT("ep[" << index << "] already exists"); + } + + ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + ep_params.iface = m_iface; + status = uct_ep_create(&ep_params, &ep); + ASSERT_UCS_OK(status); + m_eps[index].reset(ep, uct_ep_destroy); +} + +void uct_test::entity::destroy_ep(unsigned index) { + if (!m_eps[index]) { + UCS_TEST_ABORT("ep[" << index << "] does not exist"); + } + + m_eps[index].reset(); +} + +void uct_test::entity::destroy_eps() { + for (unsigned index = 0; index < m_eps.size(); ++index) { + if (!m_eps[index]) { + continue; + } + m_eps[index].reset(); + } +} + +size_t uct_test::entity::priv_data_do_pack(void *priv_data) +{ + size_t priv_data_len; + + client_priv_data = "Client private data"; + priv_data_len = 1 + client_priv_data.length(); + + memcpy(priv_data, client_priv_data.c_str(), priv_data_len); + return priv_data_len; +} + +ssize_t uct_test::entity::server_priv_data_cb(void *arg, const char *dev_name, + void *priv_data) +{ + const size_t priv_data_len = server_priv_data.length() + 1; + + memcpy(priv_data, server_priv_data.c_str(), priv_data_len); + return priv_data_len; +} + +void +uct_test::entity::connect_to_sockaddr(unsigned index, entity& other, + const ucs::sock_addr_storage &remote_addr, + uct_sockaddr_priv_pack_callback_t pack_cb, + uct_ep_client_connect_cb_t connect_cb, + uct_ep_disconnect_cb_t disconnect_cb, + void *user_data) +{ + ucs_sock_addr_t ucs_remote_addr = remote_addr.to_ucs_sock_addr(); + uct_ep_params_t params; + uct_ep_h ep; + ucs_status_t status; + + reserve_ep(index); + if (m_eps[index]) { + return; /* Already connected */ + } + + /* Connect to the server */ + if (m_cm) { + params.field_mask = UCT_EP_PARAM_FIELD_CM | + UCT_EP_PARAM_FIELD_SOCKADDR_CONNECT_CB | + UCT_EP_PARAM_FIELD_SOCKADDR_DISCONNECT_CB | + UCT_EP_PARAM_FIELD_USER_DATA; + params.cm = m_cm; + params.sockaddr_connect_cb.client = connect_cb; + params.disconnect_cb = disconnect_cb; + } else { + params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + params.iface = m_iface; + } + + params.field_mask |= UCT_EP_PARAM_FIELD_USER_DATA | + UCT_EP_PARAM_FIELD_SOCKADDR | + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS | + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB; + params.user_data = user_data; + params.sockaddr = &ucs_remote_addr; + params.sockaddr_cb_flags = UCT_CB_FLAG_ASYNC; + params.sockaddr_pack_cb = pack_cb; + status = uct_ep_create(¶ms, &ep); + ASSERT_UCS_OK(status); + + m_eps[index].reset(ep, uct_ep_destroy); +} + +void uct_test::entity::connect_to_ep(unsigned index, entity& other, + unsigned other_index) +{ + ucs_status_t status; + uct_ep_h ep, remote_ep; + uct_ep_params_t ep_params; + + reserve_ep(index); + if (m_eps[index]) { + return; /* Already connected */ + } + + other.reserve_ep(other_index); + ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE; + ep_params.iface = other.m_iface; + status = uct_ep_create(&ep_params, &remote_ep); + ASSERT_UCS_OK(status); + other.m_eps[other_index].reset(remote_ep, uct_ep_destroy); + + if (&other == this) { + connect_p2p_ep(remote_ep, remote_ep); + } else { + ep_params.iface = m_iface; + ucs_status_t status = uct_ep_create(&ep_params, &ep); + ASSERT_UCS_OK(status); + + connect_p2p_ep(ep, remote_ep); + connect_p2p_ep(remote_ep, ep); + + m_eps[index].reset(ep, uct_ep_destroy); + } +} + +void uct_test::entity::connect_to_iface(unsigned index, entity& other) { + uct_device_addr_t *dev_addr; + uct_iface_addr_t *iface_addr; + uct_ep_params_t ep_params; + ucs_status_t status; + uct_ep_h ep; + + reserve_ep(index); + if (m_eps[index]) { + return; /* Already connected */ + } + + dev_addr = (uct_device_addr_t*)malloc(other.iface_attr().device_addr_len); + iface_addr = (uct_iface_addr_t*) malloc(other.iface_attr().iface_addr_len); + + status = uct_iface_get_device_address(other.iface(), dev_addr); + ASSERT_UCS_OK(status); + + status = uct_iface_get_address(other.iface(), iface_addr); + ASSERT_UCS_OK(status); + + ep_params.field_mask = UCT_EP_PARAM_FIELD_IFACE | + UCT_EP_PARAM_FIELD_DEV_ADDR | + UCT_EP_PARAM_FIELD_IFACE_ADDR; + ep_params.iface = iface(); + ep_params.dev_addr = dev_addr; + ep_params.iface_addr = iface_addr; + + status = uct_ep_create(&ep_params, &ep); + ASSERT_UCS_OK(status); + + m_eps[index].reset(ep, uct_ep_destroy); + + free(iface_addr); + free(dev_addr); +} + +void uct_test::entity::connect(unsigned index, entity& other, + unsigned other_index, + const ucs::sock_addr_storage &remote_addr, + uct_sockaddr_priv_pack_callback_t pack_cb, + uct_ep_client_connect_cb_t connect_cb, + uct_ep_disconnect_cb_t disconnect_cb, + void *user_data) +{ + if (m_cm || + iface_attr().cap.flags & UCT_IFACE_FLAG_CONNECT_TO_SOCKADDR) { + connect_to_sockaddr(index, other, remote_addr, pack_cb, connect_cb, + disconnect_cb, user_data); + } else { + UCS_TEST_SKIP_R("cannot connect"); + } +} + +void uct_test::entity::connect(unsigned index, entity& other, unsigned other_index) +{ + if (iface_attr().cap.flags & UCT_IFACE_FLAG_CONNECT_TO_EP) { + connect_to_ep(index, other, other_index); + } else if (iface_attr().cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + connect_to_iface(index, other); + } else { + UCS_TEST_SKIP_R("cannot connect"); + } +} + +void uct_test::entity::accept(uct_cm_h cm, uct_conn_request_h conn_request, + uct_ep_server_connect_cb_t connect_cb, + uct_ep_disconnect_cb_t disconnect_cb, + void *user_data) +{ + uct_ep_params_t ep_params; + ucs_status_t status; + uct_ep_h ep; + + ASSERT_TRUE(m_listener); + reserve_ep(m_eps.size()); + + ep_params.field_mask = UCT_EP_PARAM_FIELD_CM | + UCT_EP_PARAM_FIELD_CONN_REQUEST | + UCT_EP_PARAM_FIELD_USER_DATA | + UCT_EP_PARAM_FIELD_SOCKADDR_CONNECT_CB | + UCT_EP_PARAM_FIELD_SOCKADDR_DISCONNECT_CB | + UCT_EP_PARAM_FIELD_SOCKADDR_CB_FLAGS | + UCT_EP_PARAM_FIELD_SOCKADDR_PACK_CB; + + ep_params.cm = cm; + ep_params.conn_request = conn_request; + ep_params.sockaddr_cb_flags = UCT_CB_FLAG_ASYNC; + ep_params.sockaddr_pack_cb = server_priv_data_cb; + ep_params.sockaddr_connect_cb.server = connect_cb; + ep_params.disconnect_cb = disconnect_cb; + ep_params.user_data = user_data; + + status = uct_ep_create(&ep_params, &ep); + ASSERT_UCS_OK(status); + m_eps.back().reset(ep, uct_ep_destroy); +} + +void uct_test::entity::listen(const ucs::sock_addr_storage &listen_addr, + const uct_listener_params_t ¶ms) +{ + ucs_status_t status; + + for (;;) { + { + scoped_log_handler slh(wrap_errors_logger); + status = UCS_TEST_TRY_CREATE_HANDLE(uct_listener_h, m_listener, + uct_listener_destroy, + uct_listener_create, m_cm, + listen_addr.get_sock_addr_ptr(), + listen_addr.get_addr_size(), + ¶ms); + if (status == UCS_OK) { + break; + } + } + EXPECT_EQ(UCS_ERR_BUSY, status); + + const struct sockaddr* c_ifa_addr = listen_addr.get_sock_addr_ptr(); + struct sockaddr* ifa_addr = const_cast(c_ifa_addr); + if (ifa_addr->sa_family == AF_INET) { + struct sockaddr_in *addr = + reinterpret_cast(ifa_addr); + addr->sin_port = ntohs(ucs::get_port()); + } else { + struct sockaddr_in6 *addr = + reinterpret_cast(ifa_addr); + addr->sin6_port = ntohs(ucs::get_port()); + } + } +} + +void uct_test::entity::disconnect(uct_ep_h ep) { + ASSERT_UCS_OK(uct_ep_disconnect(ep, 0)); +} + +void uct_test::entity::flush() const { + ucs_status_t status; + do { + progress(); + status = uct_iface_flush(m_iface, 0, NULL); + } while (status == UCS_INPROGRESS); + ASSERT_UCS_OK(status); +} + +std::ostream& operator<<(std::ostream& os, const uct_tl_resource_desc_t& resource) { + return os << resource.tl_name << "/" << resource.dev_name; +} + +uct_test::mapped_buffer::mapped_buffer(size_t size, uint64_t seed, + const entity& entity, size_t offset, + ucs_memory_type_t mem_type) : + m_entity(entity) +{ + if (size > 0) { + size_t alloc_size = size + offset; + if (mem_type == UCS_MEMORY_TYPE_HOST) { + m_entity.mem_alloc_host(alloc_size, &m_mem); + } else { + m_mem.method = UCT_ALLOC_METHOD_LAST; + m_mem.address = mem_buffer::allocate(alloc_size, mem_type); + m_mem.length = alloc_size; + m_mem.mem_type = mem_type; + m_mem.memh = UCT_MEM_HANDLE_NULL; + m_mem.md = NULL; + m_entity.mem_type_reg(&m_mem); + } + m_buf = (char*)m_mem.address + offset; + m_end = (char*)m_buf + size; + pattern_fill(seed); + } else { + m_mem.method = UCT_ALLOC_METHOD_LAST; + m_mem.address = NULL; + m_mem.md = NULL; + m_mem.memh = UCT_MEM_HANDLE_NULL; + m_mem.mem_type= UCS_MEMORY_TYPE_HOST; + m_mem.length = 0; + m_buf = NULL; + m_end = NULL; + m_rkey.rkey = UCT_INVALID_RKEY; + m_rkey.handle = NULL; + } + m_iov.buffer = ptr(); + m_iov.length = length(); + m_iov.count = 1; + m_iov.stride = 0; + m_iov.memh = memh(); + + m_entity.rkey_unpack(&m_mem, &m_rkey); + m_rkey.type = NULL; +} + +uct_test::mapped_buffer::~mapped_buffer() { + m_entity.rkey_release(&m_rkey); + if (m_mem.mem_type == UCS_MEMORY_TYPE_HOST) { + m_entity.mem_free_host(&m_mem); + } else { + ucs_assert(m_mem.method == UCT_ALLOC_METHOD_LAST); + m_entity.mem_type_dereg(&m_mem); + mem_buffer::release(m_mem.address, m_mem.mem_type); + } +} + +void uct_test::mapped_buffer::pattern_fill(uint64_t seed) { + mem_buffer::pattern_fill(ptr(), length(), seed, m_mem.mem_type); +} + +void uct_test::mapped_buffer::pattern_check(uint64_t seed) { + mem_buffer::pattern_check(ptr(), length(), seed, m_mem.mem_type); +} + +void *uct_test::mapped_buffer::ptr() const { + return m_buf; +} + +uintptr_t uct_test::mapped_buffer::addr() const { + return (uintptr_t)m_buf; +} + +size_t uct_test::mapped_buffer::length() const { + return (char*)m_end - (char*)m_buf; +} + +uct_mem_h uct_test::mapped_buffer::memh() const { + return m_mem.memh; +} + +uct_rkey_t uct_test::mapped_buffer::rkey() const { + return m_rkey.rkey; +} + +const uct_iov_t* uct_test::mapped_buffer::iov() const { + return &m_iov; +} + +size_t uct_test::mapped_buffer::pack(void *dest, void *arg) { + const mapped_buffer* buf = (const mapped_buffer*)arg; + mem_buffer::copy_from(dest, buf->ptr(), buf->length(), buf->m_mem.mem_type); + return buf->length(); +} + +std::ostream& operator<<(std::ostream& os, const resource* resource) { + return os << resource->name(); +} + +uct_test::entity::async_wrapper::async_wrapper() +{ + ucs_status_t status; + + /* Initialize context */ + status = ucs_async_context_init(&m_async, UCS_ASYNC_THREAD_LOCK_TYPE); + if (UCS_OK != status) { + fprintf(stderr, "Failed to init async context.\n");fflush(stderr); + } + ASSERT_UCS_OK(status); +} + +uct_test::entity::async_wrapper::~async_wrapper() +{ + ucs_async_context_cleanup(&m_async); +} + +void uct_test::entity::async_wrapper::check_miss() +{ + ucs_async_check_miss(&m_async); +} + +uct_test::entity::scoped_async_lock::scoped_async_lock(entity &e) : m_entity(e) { + UCS_ASYNC_BLOCK(&m_entity.m_async.m_async); +} + +uct_test::entity::scoped_async_lock::~scoped_async_lock() { + UCS_ASYNC_UNBLOCK(&m_entity.m_async.m_async); +} + +ucs_status_t uct_test::send_am_message(entity *e, uint8_t am_id, int ep_idx) +{ + ssize_t res; + + if (is_caps_supported(UCT_IFACE_FLAG_AM_SHORT)) { + return uct_ep_am_short(e->ep(ep_idx), am_id, 0, NULL, 0); + } else { + res = uct_ep_am_bcopy(e->ep(ep_idx), am_id, + (uct_pack_callback_t)ucs_empty_function_return_zero_int64, + NULL, 0); + return (ucs_status_t)(res >= 0 ? UCS_OK : res); + } +} diff --git a/test/gtest/uct/uct_test.h b/test/gtest/uct/uct_test.h new file mode 100644 index 0000000..58468b1 --- /dev/null +++ b/test/gtest/uct/uct_test.h @@ -0,0 +1,457 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. +* +* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +* Copyright (C) ARM Ltd. 2017. ALL RIGHTS RESERVED +* Copyright (C) Advanced Micro Devices, Inc. 2016 - 2017. ALL RIGHTS RESERVED. +* See file LICENSE for terms. +*/ + +#ifndef UCT_TEST_H_ +#define UCT_TEST_H_ + +#include +#include +#include +#include +#include +#include + + + +#define DEFAULT_DELAY_MS 1.0 +#define DEFAULT_TIMEOUT_SEC 10.0 +#define DEFAULT_VARIANT 0 + +#define UCT_TEST_CALL_AND_TRY_AGAIN(_func, _res) \ + do { \ + _res = _func; \ + if (_res == UCS_ERR_NO_RESOURCE) { \ + short_progress_loop(); \ + } \ + } while (_res == UCS_ERR_NO_RESOURCE) + + +#define FOR_EACH_ENTITY(_iter) \ + for (ucs::ptr_vector::const_iterator _iter = m_entities.begin(); \ + _iter != m_entities.end(); ++_iter) \ + + +/* Testing resource */ +struct resource { + virtual ~resource() {}; + virtual std::string name() const; + uct_component_h component; + std::string md_name; + ucs_cpu_set_t local_cpus; + std::string tl_name; + std::string dev_name; + std::string variant_name; + uct_device_type_t dev_type; + ucs::sock_addr_storage listen_sock_addr; /* sockaddr to listen on */ + ucs::sock_addr_storage connect_sock_addr; /* sockaddr to connect to */ + int variant; + + resource(); + resource(uct_component_h component, const std::string& md_name, + const ucs_cpu_set_t& local_cpus, const std::string& tl_name, + const std::string& dev_name, uct_device_type_t dev_type); + resource(uct_component_h component, const uct_md_attr_t& md_attr, + const uct_md_resource_desc_t& md_resource, + const uct_tl_resource_desc_t& tl_resource); +}; + +struct resource_speed : public resource { + double bw; + + resource_speed() : resource(), bw(0) { } + resource_speed(uct_component_h component, const uct_worker_h& worker, + const uct_md_h& md, const uct_md_attr_t& md_attr, + const uct_md_resource_desc_t& md_resource, + const uct_tl_resource_desc_t& tl_resource); +}; + + +/** + * UCT test, without parameterization + */ +class uct_test_base : public ucs::test_base { +protected: + struct md_resource { + uct_component_h cmpt; + uct_component_attr_t cmpt_attr; + uct_md_resource_desc_t rsc_desc; + }; + + static std::vector enum_md_resources(); +}; + + +/** + * UCT test, parametrized on a transport/device. + */ +class uct_test : public testing::TestWithParam, + public uct_test_base { +public: + UCS_TEST_BASE_IMPL; + + /* we return a vector of pointers to allow test fixtures to extend the + * resource structure. + */ + static std::vector enum_resources(const std::string& tl_name); + + /* By default generate test variant for all tls. If variant is specific to + * the particular transport tl_name need to be specified accordingly */ + static void generate_test_variant(int variant, + const std::string &variant_name, + std::vector& test_res, + const std::string &tl_name=""); + uct_test(); + virtual ~uct_test(); + + enum atomic_mode { + OP32, + OP64, + FOP32, + FOP64 + }; + +protected: + + class entity { + public: + typedef uct_test::atomic_mode atomic_mode; + + entity(const resource& resource, uct_iface_config_t *iface_config, + uct_iface_params_t *params, uct_md_config_t *md_config); + + entity(const resource& resource, uct_md_config_t *md_config, + uct_cm_config_t *cm_config); + + void mem_alloc_host(size_t length, uct_allocated_memory_t *mem) const; + + void mem_free_host(const uct_allocated_memory_t *mem) const; + + void mem_type_reg(uct_allocated_memory_t *mem) const; + + void mem_type_dereg(uct_allocated_memory_t *mem) const; + + void rkey_unpack(const uct_allocated_memory_t *mem, + uct_rkey_bundle *rkey_bundle) const; + + void rkey_release(const uct_rkey_bundle *rkey_bundle) const; + + unsigned progress() const; + + bool is_caps_supported(uint64_t required_flags); + bool check_caps(uint64_t required_flags, uint64_t invalid_flags = 0); + bool check_atomics(uint64_t required_ops, atomic_mode mode); + + uct_md_h md() const; + + const uct_md_attr& md_attr() const; + + uct_worker_h worker() const; + + uct_cm_h cm() const; + + const uct_cm_attr_t& cm_attr() const; + + uct_listener_h listener() const; + + uct_iface_h iface() const; + + const uct_iface_attr& iface_attr() const; + + const uct_iface_params& iface_params() const; + + uct_ep_h ep(unsigned index) const; + + size_t num_eps() const; + + void create_ep(unsigned index); + void destroy_ep(unsigned index); + void destroy_eps(); + void connect(unsigned index, entity& other, unsigned other_index); + void connect(unsigned index, entity& other, unsigned other_index, + const ucs::sock_addr_storage &remote_addr, + uct_sockaddr_priv_pack_callback_t pack_cb, + uct_ep_client_connect_cb_t connect_cb, + uct_ep_disconnect_cb_t disconnect_cb, + void *user_data); + void connect_to_iface(unsigned index, entity& other); + void connect_to_ep(unsigned index, entity& other, + unsigned other_index); + void connect_to_sockaddr(unsigned index, entity& other, + const ucs::sock_addr_storage &remote_addr, + uct_sockaddr_priv_pack_callback_t pack_cb, + uct_ep_client_connect_cb_t connect_cb, + uct_ep_disconnect_cb_t disconnect_cb, + void *user_sata); + + static size_t priv_data_do_pack(void *priv_data); + void accept(uct_cm_h cm, uct_conn_request_h conn_request, + uct_ep_server_connect_cb_t connect_cb, + uct_ep_disconnect_cb_t disconnect_cb, + void *user_data); + void listen(const ucs::sock_addr_storage &listen_addr, + const uct_listener_params_t ¶ms); + void disconnect(uct_ep_h ep); + + void flush() const; + + static const std::string server_priv_data; + static std::string client_priv_data; + size_t max_conn_priv; + + class scoped_async_lock { + public: + scoped_async_lock(entity &e); + ~scoped_async_lock(); + private: + entity &m_entity; + }; + + private: + class async_wrapper { + public: + ucs_async_context_t m_async; + async_wrapper(); + ~async_wrapper(); + void check_miss(); + private: + async_wrapper(const async_wrapper &); + }; + typedef std::vector< ucs::handle > eps_vec_t; + + entity(const entity&); + + void reserve_ep(unsigned index); + + void connect_p2p_ep(uct_ep_h from, uct_ep_h to); + void cuda_mem_alloc(size_t length, uct_allocated_memory_t *mem) const; + void cuda_mem_free(const uct_allocated_memory_t *mem) const; + static ssize_t server_priv_data_cb(void *arg, const char *dev_name, + void *priv_data); + + + const resource m_resource; + ucs::handle m_md; + uct_md_attr_t m_md_attr; + mutable async_wrapper m_async; + ucs::handle m_worker; + ucs::handle m_cm; + uct_cm_attr_t m_cm_attr; + ucs::handle m_listener; + ucs::handle m_iface; + eps_vec_t m_eps; + uct_iface_attr_t m_iface_attr; + uct_iface_params_t m_iface_params; + }; + + class mapped_buffer { + public: + mapped_buffer(size_t size, uint64_t seed, const entity& entity, + size_t offset = 0, + ucs_memory_type_t mem_type = UCS_MEMORY_TYPE_HOST); + virtual ~mapped_buffer(); + + void *ptr() const; + uintptr_t addr() const; + size_t length() const; + uct_mem_h memh() const; + uct_rkey_t rkey() const; + const uct_iov_t* iov() const; + + void pattern_fill(uint64_t seed); + void pattern_check(uint64_t seed); + + static size_t pack(void *dest, void *arg); + + private: + + const uct_test::entity& m_entity; + + void *m_buf; + void *m_end; + uct_rkey_bundle_t m_rkey; + uct_allocated_memory_t m_mem; + uct_iov_t m_iov; + }; + + template + static std::vector filter_resources(const std::vector& resources, + const std::string& tl_name) + { + std::vector result; + for (typename std::vector::const_iterator iter = resources.begin(); + iter != resources.end(); ++iter) + { + if (tl_name.empty() || (iter->tl_name == tl_name)) { + result.push_back(&*iter); + } + } + return result; + } + + template + void wait_for_flag(volatile T *flag, double timeout = DEFAULT_TIMEOUT_SEC) const + { + ucs_time_t deadline = ucs_get_time() + + ucs_time_from_sec(timeout) * ucs::test_time_multiplier(); + while ((ucs_get_time() < deadline) && (!(*flag))) { + short_progress_loop(); + } + } + + void wait_for_bits(volatile uint64_t *flag, uint64_t mask, + double timeout = DEFAULT_TIMEOUT_SEC) const + { + ucs_time_t deadline = ucs_get_time() + + ucs_time_from_sec(timeout) * + ucs::test_time_multiplier(); + while ((ucs_get_time() < deadline) && (!ucs_test_all_flags(*flag, mask))) { + /* Don't do short_progress_loop() to avoid extra timings */ + progress(); + } + } + + template + void wait_for_value(volatile T *var, T value, bool progress, + double timeout = DEFAULT_TIMEOUT_SEC) const + { + ucs_time_t deadline = ucs_get_time() + + ucs_time_from_sec(timeout) * ucs::test_time_multiplier(); + while ((ucs_get_time() < deadline) && (*var != value)) { + if (progress) { + short_progress_loop(); + } else { + twait(); + } + } + } + + virtual void init(); + virtual void cleanup(); + virtual void modify_config(const std::string& name, const std::string& value, + bool optional = false); + bool get_config(const std::string& name, std::string& value) const; + void stats_activate(); + void stats_restore(); + + virtual bool has_transport(const std::string& tl_name) const; + virtual bool has_ud() const; + virtual bool has_rc() const; + virtual bool has_rc_or_dc() const; + virtual bool has_ib() const; + + bool is_caps_supported(uint64_t required_flags); + bool check_caps(uint64_t required_flags, uint64_t invalid_flags = 0); + void check_caps_skip(uint64_t required_flags, uint64_t invalid_flags = 0); + bool check_atomics(uint64_t required_ops, atomic_mode mode); + const entity& ent(unsigned index) const; + unsigned progress() const; + void flush(ucs_time_t deadline = ULONG_MAX) const; + virtual void short_progress_loop(double delay_ms = DEFAULT_DELAY_MS) const; + virtual void twait(int delta_ms = DEFAULT_DELAY_MS) const; + static void set_cm_resources(std::vector& all_resources); + static bool is_interface_usable(struct ifaddrs *ifa, const char *name); + static void set_md_sockaddr_resources(const md_resource& md_rsc, uct_md_h pm, + ucs_cpu_set_t local_cpus, + std::vector& all_resources); + static void set_cm_sockaddr_resources(uct_component_h cmpt, const char *cmpt_name, + ucs_cpu_set_t local_cpus, + std::vector& all_resources); + static void set_interface_rscs(uct_component_h comt, const char * name, + ucs_cpu_set_t local_cpus, struct ifaddrs *ifa, + std::vector& all_resources); + static void init_sockaddr_rsc(resource *rsc, struct sockaddr *listen_addr, + struct sockaddr *connect_addr, size_t size); + uct_test::entity* create_entity(size_t rx_headroom, + uct_error_handler_t err_handler = NULL); + uct_test::entity* create_entity(uct_iface_params_t ¶ms); + uct_test::entity* create_entity(); + int max_connections(); + int max_connect_batch(); + + ucs_status_t send_am_message(entity *e, uint8_t am_id = 0, int ep_idx = 0); + + ucs::ptr_vector m_entities; + uct_iface_config_t *m_iface_config; + uct_md_config_t *m_md_config; + uct_cm_config_t *m_cm_config; +}; + +std::ostream& operator<<(std::ostream& os, const resource* resource); + + +#define UCT_TEST_IB_TLS \ + rc_mlx5, \ + rc_verbs, \ + dc_mlx5, \ + ud_verbs, \ + ud_mlx5, \ + cm + +#define UCT_TEST_SOCKADDR_TLS \ + sockaddr + +#define UCT_TEST_NO_SELF_TLS \ + UCT_TEST_IB_TLS, \ + ugni_rdma, \ + ugni_udt, \ + ugni_smsg, \ + tcp, \ + posix, \ + sysv, \ + xpmem, \ + cma, \ + knem + +#define UCT_TEST_CUDA_MEM_TYPE_TLS \ + cuda_copy, \ + gdr_copy + +#define UCT_TEST_ROCM_MEM_TYPE_TLS \ + rocm_copy + +#define UCT_TEST_TLS \ + UCT_TEST_NO_SELF_TLS, \ + UCT_TEST_CUDA_MEM_TYPE_TLS, \ + UCT_TEST_ROCM_MEM_TYPE_TLS, \ + self + +/** + * Instantiate the parametrized test case for all transports. + * + * @param _test_case Test case class, derived from uct_test. + */ +#define UCT_INSTANTIATE_TEST_CASE(_test_case) \ + UCS_PP_FOREACH(_UCT_INSTANTIATE_TEST_CASE, _test_case, UCT_TEST_TLS) +#define _UCT_INSTANTIATE_TEST_CASE(_test_case, _tl_name) \ + INSTANTIATE_TEST_CASE_P(_tl_name, _test_case, \ + testing::ValuesIn(_test_case::enum_resources(UCS_PP_QUOTE(_tl_name)))); + + +/** + * Instantiate the parametrized test case for the IB transports. + * + * @param _test_case Test case class, derived from uct_test. + */ +#define UCT_INSTANTIATE_IB_TEST_CASE(_test_case) \ + UCS_PP_FOREACH(_UCT_INSTANTIATE_TEST_CASE, _test_case, UCT_TEST_IB_TLS) + +/** + * Instantiate the parametrized test case for all transports excluding SELF. + * + * @param _test_case Test case class, derived from uct_test. + */ +#define UCT_INSTANTIATE_NO_SELF_TEST_CASE(_test_case) \ + UCS_PP_FOREACH(_UCT_INSTANTIATE_TEST_CASE, _test_case, UCT_TEST_NO_SELF_TLS) + +#define UCT_INSTANTIATE_SOCKADDR_TEST_CASE(_test_case) \ + UCS_PP_FOREACH(_UCT_INSTANTIATE_TEST_CASE, _test_case, UCT_TEST_SOCKADDR_TLS) + +std::ostream& operator<<(std::ostream& os, const uct_tl_resource_desc_t& resource); + +#endif diff --git a/test/mpi/Makefile.am b/test/mpi/Makefile.am new file mode 100644 index 0000000..0bb8d5f --- /dev/null +++ b/test/mpi/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +# +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + +CC = $(MPICC) +LD = $(MPICC) + +if HAVE_MPICC + +# Test application for memory hooks when running with MPI +# (some MPIs have hooks of their own and we make sure ours still work) +noinst_PROGRAMS = test_memhooks shmem_pingpong +AM_CPPFLAGS = \ + $(BASE_CPPFLAGS) \ + -DUCM_LIB_DIR="$(abs_top_builddir)/src/ucm/.libs" \ + -DTEST_LIB_DIR="$(abs_builddir)/.libs" +AM_CFLAGS = $(BASE_CFLAGS) +test_memhooks_SOURCES = test_memhooks.c + + +# A library we use for testing that memory hooks work in libraries loaded +# after the hooks were installed +noinst_LTLIBRARIES = libtest_memhooks.la +libtest_memhooks_la_SOURCES = test_memhooks_lib.c +libtest_memhooks_la_LDFLAGS = -rpath /nowhere # Force shared library + + +# SHMEM ping-pong test +shmem_pingpong_LDFLAGS = -loshmem +shmem_pingpong_SOURCES = shmem_pingpong.c + +endif diff --git a/test/mpi/Makefile.in b/test/mpi/Makefile.in new file mode 100644 index 0000000..b443713 --- /dev/null +++ b/test/mpi/Makefile.in @@ -0,0 +1,811 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +# +# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED. +# See file LICENSE for terms. +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@HAVE_MPICC_TRUE@noinst_PROGRAMS = test_memhooks$(EXEEXT) \ +@HAVE_MPICC_TRUE@ shmem_pingpong$(EXEEXT) +subdir = test/mpi +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/config/m4/gtest.m4 \ + $(top_srcdir)/config/m4/libtool.m4 \ + $(top_srcdir)/config/m4/ltoptions.m4 \ + $(top_srcdir)/config/m4/ltsugar.m4 \ + $(top_srcdir)/config/m4/ltversion.m4 \ + $(top_srcdir)/config/m4/lt~obsolete.m4 \ + $(top_srcdir)/config/m4/ax_prog_doxygen.m4 \ + $(top_srcdir)/config/m4/graphviz.m4 \ + $(top_srcdir)/config/m4/compiler.m4 \ + $(top_srcdir)/config/m4/sysdep.m4 \ + $(top_srcdir)/config/m4/ucs.m4 $(top_srcdir)/config/m4/ucm.m4 \ + $(top_srcdir)/config/m4/mpi.m4 $(top_srcdir)/config/m4/rte.m4 \ + $(top_srcdir)/config/m4/java.m4 \ + $(top_srcdir)/config/m4/cuda.m4 \ + $(top_srcdir)/config/m4/rocm.m4 \ + $(top_srcdir)/config/m4/gdrcopy.m4 \ + $(top_srcdir)/src/ucm/configure.m4 \ + $(top_srcdir)/src/ucm/cuda/configure.m4 \ + $(top_srcdir)/src/ucm/rocm/configure.m4 \ + $(top_srcdir)/src/uct/configure.m4 \ + $(top_srcdir)/src/uct/cuda/configure.m4 \ + $(top_srcdir)/src/uct/cuda/gdr_copy/configure.m4 \ + $(top_srcdir)/src/uct/ib/configure.m4 \ + $(top_srcdir)/src/uct/ib/cm/configure.m4 \ + $(top_srcdir)/src/uct/ib/rdmacm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/configure.m4 \ + $(top_srcdir)/src/uct/rocm/gdr/configure.m4 \ + $(top_srcdir)/src/uct/sm/configure.m4 \ + $(top_srcdir)/src/uct/sm/cma/configure.m4 \ + $(top_srcdir)/src/uct/sm/knem/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/configure.m4 \ + $(top_srcdir)/src/uct/sm/mm/xpmem/configure.m4 \ + $(top_srcdir)/src/uct/ugni/configure.m4 \ + $(top_srcdir)/src/tools/perf/configure.m4 \ + $(top_srcdir)/src/tools/perf/lib/configure.m4 \ + $(top_srcdir)/src/tools/perf/cuda/configure.m4 \ + $(top_srcdir)/src/tools/perf/rocm/configure.m4 \ + $(top_srcdir)/test/gtest/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/configure.m4 \ + $(top_srcdir)/test/gtest/ucm/test_dlopen/rpath-subdir/configure.m4 \ + $(top_srcdir)/test/gtest/ucs/test_module/configure.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = run_mpi.sh +CONFIG_CLEAN_VPATH_FILES = +PROGRAMS = $(noinst_PROGRAMS) +LTLIBRARIES = $(noinst_LTLIBRARIES) +libtest_memhooks_la_LIBADD = +am__libtest_memhooks_la_SOURCES_DIST = test_memhooks_lib.c +@HAVE_MPICC_TRUE@am_libtest_memhooks_la_OBJECTS = \ +@HAVE_MPICC_TRUE@ test_memhooks_lib.lo +libtest_memhooks_la_OBJECTS = $(am_libtest_memhooks_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libtest_memhooks_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(libtest_memhooks_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@HAVE_MPICC_TRUE@am_libtest_memhooks_la_rpath = +am__shmem_pingpong_SOURCES_DIST = shmem_pingpong.c +@HAVE_MPICC_TRUE@am_shmem_pingpong_OBJECTS = shmem_pingpong.$(OBJEXT) +shmem_pingpong_OBJECTS = $(am_shmem_pingpong_OBJECTS) +shmem_pingpong_LDADD = $(LDADD) +shmem_pingpong_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(shmem_pingpong_LDFLAGS) $(LDFLAGS) -o \ + $@ +am__test_memhooks_SOURCES_DIST = test_memhooks.c +@HAVE_MPICC_TRUE@am_test_memhooks_OBJECTS = test_memhooks.$(OBJEXT) +test_memhooks_OBJECTS = $(am_test_memhooks_OBJECTS) +test_memhooks_LDADD = $(LDADD) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/shmem_pingpong.Po \ + ./$(DEPDIR)/test_memhooks.Po ./$(DEPDIR)/test_memhooks_lib.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libtest_memhooks_la_SOURCES) $(shmem_pingpong_SOURCES) \ + $(test_memhooks_SOURCES) +DIST_SOURCES = $(am__libtest_memhooks_la_SOURCES_DIST) \ + $(am__shmem_pingpong_SOURCES_DIST) \ + $(am__test_memhooks_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/run_mpi.sh.in \ + $(top_srcdir)/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_CFLAGS = @BASE_CFLAGS@ +BASE_CPPFLAGS = @BASE_CPPFLAGS@ +BASE_CXXFLAGS = @BASE_CXXFLAGS@ +CC = $(MPICC) +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAGS_NO_DEPRECATED = @CFLAGS_NO_DEPRECATED@ +CFLAGS_PEDANTIC = @CFLAGS_PEDANTIC@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CRAY_UGNI_CFLAGS = @CRAY_UGNI_CFLAGS@ +CRAY_UGNI_LIBS = @CRAY_UGNI_LIBS@ +CUDA_CPPFLAGS = @CUDA_CPPFLAGS@ +CUDA_LDFLAGS = @CUDA_LDFLAGS@ +CXX = @CXX@ +CXX11FLAGS = @CXX11FLAGS@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_BIBTEX = @DX_BIBTEX@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GDR_COPY_CPPFLAGS = @GDR_COPY_CPPFLAGS@ +GDR_COPY_LDFLAGS = @GDR_COPY_LDFLAGS@ +GITBIN = @GITBIN@ +GRAPHVIZ_DOT = @GRAPHVIZ_DOT@ +GREP = @GREP@ +GTEST_CXXFLAGS = @GTEST_CXXFLAGS@ +HIP_CPPFLAGS = @HIP_CPPFLAGS@ +HIP_CXXFLAGS = @HIP_CXXFLAGS@ +HIP_LDFLAGS = @HIP_LDFLAGS@ +HIP_LIBS = @HIP_LIBS@ +IBCM_LIBS = @IBCM_LIBS@ +IBVERBS_CFLAGS = @IBVERBS_CFLAGS@ +IBVERBS_CPPFLAGS = @IBVERBS_CPPFLAGS@ +IBVERBS_DIR = @IBVERBS_DIR@ +IBVERBS_LDFLAGS = @IBVERBS_LDFLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JAVA = @JAVA@ +JAVABIN = @JAVABIN@ +JAVA_HOME = @JAVA_HOME@ +JDK = @JDK@ +KNEM_CPPFLAGS = @KNEM_CPPFLAGS@ +LD = $(MPICC) +LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_MLX5 = @LIB_MLX5@ +LIPO = @LIPO@ +LN_RS = @LN_RS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAJOR_VERSION = @MAJOR_VERSION@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINOR_VERSION = @MINOR_VERSION@ +MKDIR_P = @MKDIR_P@ +MPICC = @MPICC@ +MPIRUN = @MPIRUN@ +MVN = @MVN@ +MVNBIN = @MVNBIN@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OPENMP_CFLAGS = @OPENMP_CFLAGS@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATCH_VERSION = @PATCH_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERF_LIB_CXXFLAGS = @PERF_LIB_CXXFLAGS@ +PKG_CONFIG = @PKG_CONFIG@ +RANLIB = @RANLIB@ +RDMACM_CPPFLAGS = @RDMACM_CPPFLAGS@ +RDMACM_LDFLAGS = @RDMACM_LDFLAGS@ +RDMACM_LIBS = @RDMACM_LIBS@ +READLINK = @READLINK@ +ROCM_CPPFLAGS = @ROCM_CPPFLAGS@ +ROCM_LDFLAGS = @ROCM_LDFLAGS@ +ROCM_LIBS = @ROCM_LIBS@ +RTE_CPPFLAGS = @RTE_CPPFLAGS@ +RTE_LDFLAGS = @RTE_LDFLAGS@ +SCM_BRANCH = @SCM_BRANCH@ +SCM_VERSION = @SCM_VERSION@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SOVERSION = @SOVERSION@ +STRIP = @STRIP@ +UCM_MODULE_LDFLAGS = @UCM_MODULE_LDFLAGS@ +UCX_PERFTEST_CC = @UCX_PERFTEST_CC@ +VALGRIND_LIBPATH = @VALGRIND_LIBPATH@ +VERSION = @VERSION@ +XPMEM_CFLAGS = @XPMEM_CFLAGS@ +XPMEM_LIBS = @XPMEM_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_bindings = @build_bindings@ +build_cpu = @build_cpu@ +build_modules = @build_modules@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localmoduledir = @localmoduledir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +moduledir = @moduledir@ +modulesubdir = @modulesubdir@ +objdir = @objdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +shrext = @shrext@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +top_top_srcdir = @top_top_srcdir@ +@HAVE_MPICC_TRUE@AM_CPPFLAGS = \ +@HAVE_MPICC_TRUE@ $(BASE_CPPFLAGS) \ +@HAVE_MPICC_TRUE@ -DUCM_LIB_DIR="$(abs_top_builddir)/src/ucm/.libs" \ +@HAVE_MPICC_TRUE@ -DTEST_LIB_DIR="$(abs_builddir)/.libs" + +@HAVE_MPICC_TRUE@AM_CFLAGS = $(BASE_CFLAGS) +@HAVE_MPICC_TRUE@test_memhooks_SOURCES = test_memhooks.c + +# A library we use for testing that memory hooks work in libraries loaded +# after the hooks were installed +@HAVE_MPICC_TRUE@noinst_LTLIBRARIES = libtest_memhooks.la +@HAVE_MPICC_TRUE@libtest_memhooks_la_SOURCES = test_memhooks_lib.c +@HAVE_MPICC_TRUE@libtest_memhooks_la_LDFLAGS = -rpath /nowhere # Force shared library + +# SHMEM ping-pong test +@HAVE_MPICC_TRUE@shmem_pingpong_LDFLAGS = -loshmem +@HAVE_MPICC_TRUE@shmem_pingpong_SOURCES = shmem_pingpong.c +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/mpi/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign test/mpi/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +run_mpi.sh: $(top_builddir)/config.status $(srcdir)/run_mpi.sh.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libtest_memhooks.la: $(libtest_memhooks_la_OBJECTS) $(libtest_memhooks_la_DEPENDENCIES) $(EXTRA_libtest_memhooks_la_DEPENDENCIES) + $(AM_V_CCLD)$(libtest_memhooks_la_LINK) $(am_libtest_memhooks_la_rpath) $(libtest_memhooks_la_OBJECTS) $(libtest_memhooks_la_LIBADD) $(LIBS) + +shmem_pingpong$(EXEEXT): $(shmem_pingpong_OBJECTS) $(shmem_pingpong_DEPENDENCIES) $(EXTRA_shmem_pingpong_DEPENDENCIES) + @rm -f shmem_pingpong$(EXEEXT) + $(AM_V_CCLD)$(shmem_pingpong_LINK) $(shmem_pingpong_OBJECTS) $(shmem_pingpong_LDADD) $(LIBS) + +test_memhooks$(EXEEXT): $(test_memhooks_OBJECTS) $(test_memhooks_DEPENDENCIES) $(EXTRA_test_memhooks_DEPENDENCIES) + @rm -f test_memhooks$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(test_memhooks_OBJECTS) $(test_memhooks_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shmem_pingpong.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_memhooks.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_memhooks_lib.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/shmem_pingpong.Po + -rm -f ./$(DEPDIR)/test_memhooks.Po + -rm -f ./$(DEPDIR)/test_memhooks_lib.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/shmem_pingpong.Po + -rm -f ./$(DEPDIR)/test_memhooks.Po + -rm -f ./$(DEPDIR)/test_memhooks_lib.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstLTLIBRARIES \ + clean-noinstPROGRAMS cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/test/mpi/run_mpi.sh.in b/test/mpi/run_mpi.sh.in new file mode 100755 index 0000000..5d25e44 --- /dev/null +++ b/test/mpi/run_mpi.sh.in @@ -0,0 +1,220 @@ +#!/bin/sh +# +# Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +# +# See file LICENSE for terms. +# + +# +# Convenience script to run MPI applications with UCX +# +# Usage: run_mpi.sh +# + +verbose() +{ + [ $VERBOSE -ne 0 ] && echo "$@" +} + +check_slurm_env() +{ + if [ -z "$SLURM_JOBID" ] + then + # Search for jobs of the current user + SLURM_JOBID=$(squeue -h -u $USER -o "%i"|head -1) + fi + + if [ -z "$SLURM_JOBID" ] + then + # Skip slurm + return + fi + + # Nodes to run on + export HOSTS=$(hostlist -e $(squeue -j ${SLURM_JOBID} -h -o "%N")) + SLURM_NNODES=$(squeue -j ${SLURM_JOBID} -h -o "%D") + NNODES=$SLURM_NNODES + + if [ -n "$SLURM_JOB_CPUS_PER_NODE" ] + then + export PPN=$(echo $SLURM_JOB_CPUS_PER_NODE|cut -d'(' -f1) + else + TOTAL_CPUS=$(squeue -j ${SLURM_JOBID} -h -o "%C") + export PPN=$((${TOTAL_CPUS} / ${SLURM_NNODES})) + fi + +} + +usage() +{ + echo "Usage: run_mpi.sh -- " + echo + echo " -h|--help Show this help message" + echo " -v|--verbose Turn on verbosity" + echo " -c|--config = Set UCX configuration" + echo " -N|--nnodes Number of nodes to run on ($NNODES)" + echo " --ppn Number of processes per node ($PPN)" + echo " --mpi-log-level Log level for MPI UCX component ($MPI_LOG_LEVEL)" + echo " --valgrind Run with valgrind" + echo " --valgrind-args \"\" Extra arguments to valgrind" + echo +} + +initialize() +{ + export MPIRUN=@MPIRUN@ + export LIBUCS=@abs_top_builddir@/src/ucs/.libs/libucs.so + export LIBUCT=@abs_top_builddir@/src/uct/.libs/libuct.so + export LIBUCP=@abs_top_builddir@/src/ucp/.libs/libucp.so + export VERBOSE=0 + export EXE="" + export EXE_ARGS="" + export EXTRA_MPI_ARGS="" + export NNODES=1 + export PPN=1 + export CONFIG="" + export MPI_LOG_LEVEL=0 + export VALGRIND=0 + export VALGRIND_ARGS="" +} + +parse_args() +{ + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -h|--help) + usage + exit 0 + ;; + -v|--verbose) + export VERBOSE=1 + ;; + -c|--config) + export CONFIG="$CONFIG $2" + shift + ;; + -N|--nnodes) + export NNODES=$2 + shift + ;; + --ppn) + export PPN=$2 + shift + ;; + --mpi-log-level) + export MPI_LOG_LEVEL=$2 + shift + ;; + --valgrind) + export VALGRIND=1 + ;; + --valgrind-args) + export VALGRIND_ARGS="$2" + shift + ;; + [^-]*) + export EXE=$key + shift + break + ;; + *) + usage + exit -2 + ;; + esac + shift + done + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + --) + shift + export EXTRA_MPI_ARGS="$@" + break + ;; + *) + EXE_ARGS+=("$key") + ;; + esac + shift + done +} + +adjust_run_params() +{ + export NP=$((${NNODES} * ${PPN})) + export HOSTLIST=$(echo $HOSTS|cut -d' ' -f 1-$NNODES|tr ' ' ',') +} + +run_open_mpi() +{ + OMPI_ARGS="" + OMPI_ARGS="$OMPI_ARGS -mca pml ucx" + OMPI_ARGS="$OMPI_ARGS -mca pml_ucx_verbose $MPI_LOG_LEVEL" + OMPI_ARGS="$OMPI_ARGS -mca spml ucx" + OMPI_ARGS="$OMPI_ARGS -mca spml_ucx_verbose $MPI_LOG_LEVEL" + OMPI_ARGS="$OMPI_ARGS -H $HOSTLIST" + OMPI_ARGS="$OMPI_ARGS -n $NP" + OMPI_ARGS="$OMPI_ARGS --map-by node" + OMPI_ARGS="$OMPI_ARGS -mca ess_base_stream_buffering 0" + OMPI_ARGS="$OMPI_ARGS -mca mpi_abort_delay -1" + + OMPI_ARGS="$OMPI_ARGS -x LD_PRELOAD=$LD_PRELOAD:$LIBUCP" + if [ $VALGRIND -ne 0 ] + then + # Preload valgrind-enabled libraries + for lib in /usr/lib64/mlnx_ofed/valgrind/*.so + do + [ -f $lib ] && OMPI_ARGS="$OMPI_ARGS:$lib" + done + fi + + OMPI_ARGS="$OMPI_ARGS -x UCX_HANDLE_ERRORS=freeze" + for c in $CONFIG + do + OMPI_ARGS="$OMPI_ARGS -x $c" + done + + if [ $VALGRIND -ne 0 ] + then + MPI_HOME=$(cd $(dirname ${MPIRUN})/.. && pwd) + EXE="valgrind \ + --fair-sched=try \ + --track-origins=yes \ + --leak-check=yes \ + --suppressions=${MPI_HOME}/share/openmpi/openmpi-valgrind.supp \ + --suppressions=@abs_srcdir@/ompi.supp \ + $VALGRIND_ARGS \ + $EXE" + LD_LIBRARY_PATH="$LD_LIBRARY_PATH:@VALGRIND_LIBPATH@" + fi + + OMPI_ARGS="$OMPI_ARGS -x LD_LIBRARY_PATH" + + export LD_LIBRARY_PATH + verbose $MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}" + $MPIRUN $OMPI_ARGS $EXTRA_MPI_ARGS $EXE "${EXE_ARGS[@]}" +} + +main() +{ + EXE_ARGS=() + initialize + check_slurm_env + parse_args "$@" + adjust_run_params + + if (strings $MPIRUN|grep -qi orte) && ($MPIRUN -h|grep -q "Open MPI") + then + run_open_mpi + else + echo "Unrecognized MPI flavor ($MPIRUN)" + exit -3 + fi +} + +main "$@" diff --git a/test/mpi/shmem_pingpong.c b/test/mpi/shmem_pingpong.c new file mode 100644 index 0000000..dcd63fe --- /dev/null +++ b/test/mpi/shmem_pingpong.c @@ -0,0 +1,205 @@ +/** +* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. +* +* See file LICENSE for terms. +*/ + +#include +#include +#include +#include +#include +#include +#include + + +#define GLOBAL_DATA_SIZE 1024 + + +static int show_result(const struct timeval *tv_prev, + const struct timeval *tv_curr, + long iters, size_t msg_size, int force) +{ + double elapsed; + + elapsed = (tv_curr->tv_sec + tv_curr->tv_usec * 1e-6) - + (tv_prev->tv_sec + tv_prev->tv_usec * 1e-6); + + if (((elapsed >= 1.0) || force) && (shmem_my_pe() == 0)) { + printf("%ld iterations, %lu bytes, latency: %.3f usec\n", + iters, msg_size, elapsed * 1e6 / iters / 2.0); + return 1; + } + + return 0; +} + +static void run_pingpong(char *mem, size_t msg_size, long num_iters, int use_wait, + int do_quiet, int use_flag) +{ + struct timeval tv_prev, tv_curr; + int my_pe, dst_pe; + volatile int *rsn; + char *msg; + int *ssn; + int sn; + long i, prev_i; + + msg = malloc(msg_size); + if (msg == NULL) { + return; + } + + memset(msg, 0, msg_size); + + gettimeofday(&tv_prev, NULL); + prev_i = 0; + my_pe = shmem_my_pe(); + dst_pe = 1 - my_pe; + rsn = (int*)&mem[msg_size - sizeof(int)]; + ssn = (int*)&msg[msg_size - sizeof(int)]; + + for (i = 0; i < num_iters; ++i) { + sn = i & 127; + *ssn = sn; + if (my_pe == 0) { + shmem_putmem(mem, msg, msg_size, dst_pe); + if (do_quiet) { + shmem_quiet(); + } + } + if (use_wait) { + shmem_int_wait_until(rsn, SHMEM_CMP_EQ, sn); + } else { + while (*rsn != sn); + } + if (my_pe == 1) { + if (use_flag) { + shmem_putmem(mem, msg, msg_size - sizeof(int), dst_pe); + shmem_fence(); + shmem_int_put((int*)rsn, ssn, 1, dst_pe); + } else { + shmem_putmem(mem, msg, msg_size, dst_pe); + } + if (do_quiet) { + shmem_quiet(); + } + } + if ((i % 1000) == 0) { + gettimeofday(&tv_curr, NULL); + if (show_result(&tv_prev, &tv_curr, i - prev_i, msg_size, 0)) { + prev_i = i; + tv_prev = tv_curr; + } + } + } + + gettimeofday(&tv_curr, NULL); + show_result(&tv_prev, &tv_curr, num_iters - prev_i, msg_size, 1); + free(msg); +} + +static void usage() +{ + printf("Usage: shmem_pingpong [options]\n"); + printf("\n"); + printf("Options are:\n"); + printf(" -n Specify number of iterations to run (default: 10000).\n"); + printf(" -s Specify message size (default: 4 bytes).\n"); + printf(" -w Wait for data using shmem_wait_until() (default: poll on memory).\n"); + printf(" -f Send data and flag separately with shmem_fence() in-between.\n"); + printf(" -g Use global data (default: heap).\n"); + printf(" -q call shmem_quiet() after every shmem_put().\n"); + printf("\n"); +} + +int main(int argc, char **argv) +{ + static char global_buffer[GLOBAL_DATA_SIZE]; + int use_wait, use_global, do_quiet, use_flag; + size_t msg_size; + long num_iters; + int my_pe; + char *mem; + int c; + + start_pes(0); + + my_pe = shmem_my_pe(); + + if (shmem_n_pes() != 2) { + fprintf(stderr, "This test requires exactly 2 processes\n"); + return -1; + } + + num_iters = 10000; + use_global = 0; + use_wait = 0; + do_quiet = 0; + use_flag = 0; + msg_size = 8; + while ((c = getopt (argc, argv, "n:s:wgqfh")) != -1) { + switch (c) { + break; + case 'n': + num_iters = atol(optarg); + if (num_iters == 0) { + num_iters = LONG_MAX; + } + break; + case 'w': + use_wait = 1; + break; + case 'g': + use_global = 1; + break; + case 'q': + do_quiet = 1; + break; + case 'f': + use_flag = 1; + break; + case 's': + msg_size = atol(optarg); + break; + case 'h': + default: + if (my_pe == 0) { + usage(); + } + return 0; + } + } + + if (msg_size < sizeof(int)) { + fprintf(stderr, "message size must be at least %lu\n", sizeof(int)); + return -1; + } + + if (use_global) { + if (msg_size <= GLOBAL_DATA_SIZE) { + mem = global_buffer; + } else { + fprintf(stderr, "global data can be used only up to %lu bytes\n", + (size_t)GLOBAL_DATA_SIZE); + return -1; + } + } else { + mem = shmalloc(msg_size); + } + + memset(mem, 0xff, msg_size); + + shmem_barrier_all(); + + run_pingpong(mem, msg_size, num_iters, use_wait, do_quiet, use_flag); + + shmem_barrier_all(); + + if (!use_global) { + shfree(mem); + } + + shmem_finalize(); + return 0; +} diff --git a/test/mpi/test_memhooks.c b/test/mpi/test_memhooks.c new file mode 100644 index 0000000..e6366b1 --- /dev/null +++ b/test/mpi/test_memhooks.c @@ -0,0 +1,458 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ +#define _GNU_SOURCE /* For basename */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CHKERR_JUMP(cond, msg, label) \ + do { \ + if (cond) { \ + printf("%s:%d: %s\n", basename(__FILE__), __LINE__, msg); \ + goto label; \ + } \ + } while (0) + +#define DL_FIND_FUNC(dl, func_name, func, err_action) \ + do { \ + char *error; \ + dlerror(); /* clear existing errors */ \ + func = dlsym(dl, func_name); \ + if (((error = dlerror()) != NULL) || (func == NULL)) { \ + error = error ? error : "not found"; \ + fprintf(stderr, "Failed to resolve symbol '%s': %s\n", \ + func_name, error); \ + err_action; \ + } \ + } while (0); + +#define SHMAT_FAILED ((void*)-1) + +void* open_dyn_lib(const char *lib_path); +void* flag_no_install_init(const char *path); +int malloc_hooks_run_all(void *dl); +int malloc_hooks_run_unmapped(void *dl); +int ext_event_run(void *dl); +void *ext_event_init(const char *path); + +typedef struct memtest_type { + const char *name; + void* (*init)(const char *path); + int (*run) (void *arg); +} memtest_type_t; + +memtest_type_t tests[] = { + {"malloc_hooks", open_dyn_lib, malloc_hooks_run_all}, + {"malloc_hooks_unmapped", open_dyn_lib, malloc_hooks_run_unmapped}, + {"external_events", ext_event_init, ext_event_run}, + {"flag_no_install", flag_no_install_init, ext_event_run}, + {NULL} +}; + +static volatile size_t total_mapped = 0; +static volatile size_t total_unmapped = 0; + +static void usage() { + printf("Usage: test_memhooks [options]\n"); + printf("Options are:\n"); + printf(" -h Print this info.\n"); + printf(" -t Test name to execute (malloc_hooks)\n"); + printf(" malloc_hooks : General UCM test for VM_MAPPED and VM_UNMAPPED\n"); + printf(" malloc_hooks_unmapped : Test VM_UNMAPPED event only\n"); + printf(" external_events : Test of ucm_set_external_event() API\n"); + printf(" flag_no_install : Test of UCM_EVENT_FLAG_NO_INSTALL flag\n"); + printf("\n"); +} + +static void event_callback(ucm_event_type_t event_type, ucm_event_t *event, + void *arg) +{ + if (event_type == UCM_EVENT_VM_MAPPED) { + total_mapped += event->vm_mapped.size; + } else if (event_type == UCM_EVENT_VM_UNMAPPED) { + total_unmapped += event->vm_unmapped.size; + } +} + +static ucs_status_t set_event_handler(void *dl, int events) +{ + ucs_status_t (*set_handler)(int events, int priority, + ucm_event_callback_t cb, void *arg); + + DL_FIND_FUNC(dl, "ucm_set_event_handler", set_handler, + return UCS_ERR_UNSUPPORTED); + + return set_handler(events, 0, event_callback, NULL); +} + +static ucs_status_t disable_memory_hooks(void *dl) +{ + setenv("UCX_MEM_MALLOC_HOOKS", "n", 1); + setenv("UCX_MEM_MMAP_RELOC", "n", 1); + return UCS_OK; +} + +void* open_dyn_lib(const char *lib_path) +{ + void *dl = dlopen(lib_path, RTLD_LAZY); + char *error; + + if (dl == NULL) { + error = dlerror(); + error = error ? error : "unknown error"; + fprintf(stderr, "Failed to load '%s': %s\n", lib_path, error); + } + return dl; +} + + +void *ext_event_init(const char *path) +{ + void (*set_ext_event)(int events); + ucs_status_t status; + void *dl_ucm; + + dl_ucm = open_dyn_lib(path); + if (dl_ucm == NULL) { + return NULL; + } + + status = disable_memory_hooks(dl_ucm); + CHKERR_JUMP(status != UCS_OK, "Failed to disable memory hooks", fail); + + DL_FIND_FUNC(dl_ucm, "ucm_set_external_event", set_ext_event, goto fail); + set_ext_event(UCM_EVENT_VM_MAPPED | UCM_EVENT_VM_UNMAPPED); + + status = set_event_handler(dl_ucm, UCM_EVENT_VM_MAPPED | + UCM_EVENT_VM_UNMAPPED); + CHKERR_JUMP(status != UCS_OK, "Failed to set event handler", fail); + + return dl_ucm; + +fail: + dlclose(dl_ucm); + return NULL; +} + +void* flag_no_install_init(const char *path) +{ + void *dl_ucm; + ucs_status_t status; + + dl_ucm = open_dyn_lib(path); + if (dl_ucm == NULL) { + return NULL; + } + + status = disable_memory_hooks(dl_ucm); + CHKERR_JUMP(status != UCS_OK, "Failed to disable memory hooks", fail); + + status = set_event_handler(dl_ucm, UCM_EVENT_VM_MAPPED | + UCM_EVENT_VM_UNMAPPED | + UCM_EVENT_FLAG_NO_INSTALL); + CHKERR_JUMP(status != UCS_OK, "Failed to set event handler", fail); + return dl_ucm; + +fail: + dlclose(dl_ucm); + return NULL; +} + +int malloc_hooks_run_flags(void *dl, ucm_event_type_t events) +{ + ucs_status_t status; + void *ptr_malloc_core = NULL; + void *ptr_malloc_mmap = NULL; + void *ptr_direct_mmap = MAP_FAILED; + int shmid = -1; + void *ptr_shmat = SHMAT_FAILED; + void *dl_test; + const size_t size = 1024 * 1024; + const char *lib_path = UCS_PP_MAKE_STRING(TEST_LIB_DIR) "/" "libtest_memhooks.so"; + const char *cust_mmap_name = "memhook_test_lib_call_mmap"; + void * (*cust_mmap)(size_t size); + + status = set_event_handler(dl, events); + CHKERR_JUMP(status != UCS_OK, "Failed to set event handler", fail_close_ucm); + + printf("Allocating memory\n"); + + /* Create SysV segment */ + shmid = shmget(IPC_PRIVATE, size, IPC_CREAT|SHM_R|SHM_W); + CHKERR_JUMP(shmid == -1, "Failed to create shared memory segment: %m", + fail_close_ucm); + + /* + * Test shmat/shmdt before malloc() because shmat() add entires to an internal + * hash of pointers->size, which makes previous pointers un-releasable + */ + + /* Attach SysV segment */ + total_mapped = 0; + ptr_shmat = shmat(shmid, NULL, 0); + CHKERR_JUMP(ptr_shmat == SHMAT_FAILED, "Failed to attach shared memory segment", + fail_close_ucm); + if (events & UCM_EVENT_VM_MAPPED) { + CHKERR_JUMP(total_mapped < size, "No callback for shmat", fail_close_ucm); + } + printf("After shmat: reported mapped=%zu\n", total_mapped); + + /* Detach SysV segment */ + total_unmapped = 0; + shmdt(ptr_shmat); + ptr_shmat = SHMAT_FAILED; + if (events & UCM_EVENT_VM_UNMAPPED) { + CHKERR_JUMP(total_unmapped < size, "No callback for shmdt", fail_close_ucm); + } + printf("After shmdt: reported unmapped=%zu\n", total_unmapped); + + /* Attach SysV segment at fixed address */ + total_mapped = 0; + total_unmapped = 0; + ptr_shmat = shmat(shmid, (void*)0xff000000, SHM_REMAP); + CHKERR_JUMP(ptr_shmat == SHMAT_FAILED, "Failed to attach shared memory segment", + fail_close_ucm); + if (events & UCM_EVENT_VM_MAPPED) { + CHKERR_JUMP(total_mapped < size, "No map callback for shmat(REMAP)", fail_close_ucm); + } + if (events & UCM_EVENT_VM_UNMAPPED) { + CHKERR_JUMP(total_unmapped < size, "No unmap callback for shmat(REMAP)", + fail_close_ucm); + } + printf("After shmat(REMAP): reported mapped=%zu unmapped=%zu\n", total_mapped, + total_unmapped); + + /* Detach SysV segment */ + total_unmapped = 0; + shmdt(ptr_shmat); + ptr_shmat = SHMAT_FAILED; + if (events & UCM_EVENT_VM_UNMAPPED) { + CHKERR_JUMP(total_unmapped < size, "No callback for shmdt", fail_close_ucm); + } + printf("After shmdt: reported unmapped=%zu\n", total_unmapped); + + /* Destroy SysV segment */ + shmctl(shmid, IPC_RMID, NULL); + shmid = -1; + + /* Allocate using morecore */ + mallopt(M_MMAP_THRESHOLD, size * 2); + mallopt(M_TRIM_THRESHOLD, size / 2); + total_mapped = 0; + ptr_malloc_core = malloc(1024 * 1024); + if (events & UCM_EVENT_VM_MAPPED) { + CHKERR_JUMP(total_mapped == 0, "No callback for core malloc", + fail_close_ucm); + } + printf("After core malloc: reported mapped=%zu\n", total_mapped); + + /* Allocate using mmap */ + mallopt(M_MMAP_THRESHOLD, size / 2); + total_mapped = 0; + ptr_malloc_mmap = malloc(2 * 1024 * 1024); + if (events & UCM_EVENT_VM_MAPPED) { + CHKERR_JUMP(total_mapped == 0, "No callback for mmap malloc", + fail_close_ucm); + } + printf("After mmap malloc: reported mapped=%zu\n", total_mapped); + + /* Allocate directly with mmap */ + total_mapped = 0; + ptr_direct_mmap = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (events & UCM_EVENT_VM_MAPPED) { + CHKERR_JUMP(total_mapped < size, "No callback for mmap", fail_close_ucm); + } + printf("After mmap: reported mapped=%zu\n", total_mapped); + + /* Remap */ + total_unmapped = 0; + ptr_direct_mmap = mmap(ptr_direct_mmap, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0); + if (events & UCM_EVENT_VM_UNMAPPED) { + CHKERR_JUMP(total_unmapped < size, "No unmap callback for mmap(FIXED)", + fail_close_ucm); + } + printf("After mmap(FIXED): reported unmapped=%zu\n", total_unmapped); + + /* Call munmap directly */ + total_unmapped = 0; + munmap(ptr_direct_mmap, size); + if (events & UCM_EVENT_VM_UNMAPPED) { + CHKERR_JUMP(total_unmapped == 0, "No callback for munmap", fail_close_ucm); + } + printf("After munmap: reported unmapped=%zu\n", total_unmapped); + + /* Release indirectly */ + total_unmapped = 0; + free(ptr_malloc_mmap); + ptr_malloc_mmap = NULL; + malloc_trim(0); + if (events & UCM_EVENT_VM_UNMAPPED) { + CHKERR_JUMP(total_unmapped == 0, "No callback for munmap from free", + fail_close_ucm); + } + printf("After mmap free + trim: reported unmapped=%zu\n", total_unmapped); + + /* Call mmap from a library we load after hooks are installed */ + dl_test = open_dyn_lib(lib_path); + CHKERR_JUMP(dl_test == NULL, "Failed to load test lib", fail_close_ucm); + + DL_FIND_FUNC(dl_test, cust_mmap_name, cust_mmap, goto fail_close_all); + total_mapped = 0; + ptr_direct_mmap = cust_mmap(size); + CHKERR_JUMP(ptr_direct_mmap == MAP_FAILED, "Failed to mmap from dynamic lib", + fail_close_all); + if (events & UCM_EVENT_VM_MAPPED) { + CHKERR_JUMP(total_mapped == 0,"No callback for mmap from dynamic lib", + fail_close_all); + } + printf("After another mmap from dynamic lib: reported mapped=%zu\n", total_mapped); + munmap(ptr_direct_mmap, size); + ptr_direct_mmap = MAP_FAILED; + + /* + * Test closing UCM. + * The library should not really be unloaded, because the memory hooks still + * point to functions inside it. + */ + total_unmapped = 0; + dlclose(dl); + dlclose(dl_test); + free(ptr_malloc_core); /* This should still work */ + ptr_malloc_core = NULL; + if (events & UCM_EVENT_VM_UNMAPPED) { + CHKERR_JUMP(total_unmapped == 0, "No callback for munmap from malloc", fail); + } + printf("After core malloc free: reported unmapped=%zu\n", total_unmapped); + + return 0; + +fail_close_all: + dlclose(dl_test); +fail_close_ucm: + dlclose(dl); +fail: + if (ptr_shmat != SHMAT_FAILED) { + shmdt(ptr_shmat); + } + if (shmid != -1) { + shmctl(shmid, IPC_RMID, NULL); + } + free(ptr_malloc_mmap); + free(ptr_malloc_core); + if (ptr_direct_mmap != MAP_FAILED) { + munmap(ptr_direct_mmap, size); + } + + return -1; +} + +int malloc_hooks_run_all(void *dl) +{ + return malloc_hooks_run_flags(dl, UCM_EVENT_VM_MAPPED | UCM_EVENT_VM_UNMAPPED); +} + +int malloc_hooks_run_unmapped(void *dl) +{ + return malloc_hooks_run_flags(dl, UCM_EVENT_VM_UNMAPPED); +} + +int ext_event_run(void *dl) +{ + void *ptr_direct_mmap; + void (*ucm_event)(void *addr, size_t length); + const size_t size = 1024 * 1024; + int ret = -1; + + /* Allocate directly with mmap */ + total_mapped = 0; + ptr_direct_mmap = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + printf("totmapped %lu\n", total_mapped); + /* No callback should be called as we registered events to be external */ + CHKERR_JUMP(total_mapped != 0, + "Callback for mmap invoked, while hooks were not set", fail); + DL_FIND_FUNC(dl, "ucm_vm_mmap", ucm_event, goto fail); + ucm_event(ptr_direct_mmap, size); + CHKERR_JUMP(total_mapped == 0, "Callback for mmap is not called", fail); + printf("After ucm_vm_mmap called: mapped=%zu\n", total_mapped); + + /* Call munmap directly */ + total_unmapped = 0; + munmap(ptr_direct_mmap, size); + CHKERR_JUMP(total_unmapped != 0, + "Callback for munmap invoked, while hooks were not set\n", fail); + + DL_FIND_FUNC(dl, "ucm_vm_munmap", ucm_event, goto fail); + ucm_event(ptr_direct_mmap, size); + CHKERR_JUMP(total_unmapped == 0, "Callback for mmap is not called", fail); + printf("After ucm_vm_munmap: unmapped=%zu\n", total_unmapped); + + ret = 0; + +fail: + dlclose(dl); + return ret; +} + +int main(int argc, char **argv) +{ + const char *ucm_path = UCS_PP_MAKE_STRING(UCM_LIB_DIR) "/" "libucm.so"; + memtest_type_t *test = tests; + void *dl; + int ret; + int c; + + while ((c = getopt(argc, argv, "t:h")) != -1) { + switch (c) { + case 't': + for (test = tests; test->name != NULL; ++test) { + if (!strcmp(test->name, optarg)){ + break; + } + } + if (test->name == NULL) { + fprintf(stderr, "Wrong test name %s\n", optarg); + return -1; + } + break; + case 'h': + default: + usage(); + return -1; + } + } + + /* Some tests need to modify UCM config before to call ucp_init, + * which may be called by MPI_Init */ + dl = test->init(ucm_path); + if (dl == NULL) { + return -1; + } + + printf("%s: initialized\n", test->name); + + MPI_Init(&argc, &argv); + + ret = test->run(dl); + + printf("%s: %s\n", test->name, ret == 0 ? "PASS" : "FAIL"); + + MPI_Finalize(); + return ret; +} + + + diff --git a/test/mpi/test_memhooks_lib.c b/test/mpi/test_memhooks_lib.c new file mode 100644 index 0000000..a080ba0 --- /dev/null +++ b/test/mpi/test_memhooks_lib.c @@ -0,0 +1,16 @@ +/** + * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED. + * + * See file LICENSE for terms. + */ + +#include +#include + + +void *memhook_test_lib_call_mmap(size_t size) +{ + return mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); +} + + diff --git a/ucx.pc.in b/ucx.pc.in new file mode 100644 index 0000000..d3486fc --- /dev/null +++ b/ucx.pc.in @@ -0,0 +1,11 @@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ +bindir = @exec_prefix@/bin +libdir = @libdir@ +includedir = @includedir@ + +Name: @PACKAGE@ +Description: Unified Communication X Library +Version: @MAJOR_VERSION@.@MINOR_VERSION@ +Cflags: -I${includedir} +Libs: -L${libdir} -lucs -luct -lucp diff --git a/ucx.spec.in b/ucx.spec.in new file mode 100644 index 0000000..1d7f112 --- /dev/null +++ b/ucx.spec.in @@ -0,0 +1,336 @@ +%{!?configure_options: %global configure_options %{nil}} +%bcond_without cma +%bcond_with cuda +%bcond_with gdrcopy +%bcond_without ib +%if 0%{?fedora} >= 30 || 0%{?rhel} >= 7 +%bcond_with ib_cm +%else +%bcond_without ib_cm +%endif +%bcond_with knem +%bcond_without rdmacm +%bcond_with rocm +%bcond_with ugni +%bcond_with xpmem +%bcond_without java + +Name: ucx +Version: @VERSION@ +Release: 1%{?dist} +Summary: UCX is a communication library implementing high-performance messaging +Group: System Environment/Libraries + +License: BSD +URL: http://www.openucx.org +Source: https://github.com/openucx/%{name}/releases/download/v@MAJOR_VERSION@.@MINOR_VERSION@.@PATCH_VERSION@/ucx-@MAJOR_VERSION@.@MINOR_VERSION@.@PATCH_VERSION@.tar.gz + +BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) + +# UCX currently supports only the following architectures +ExclusiveArch: aarch64 ppc64le x86_64 + +BuildRequires: automake autoconf libtool gcc-c++ +%if "%{_vendor}" == "suse" +BuildRequires: libnuma-devel +%else +BuildRequires: numactl-devel +%endif +%if %{with cma} +BuildRequires: glibc-devel >= 2.15 +%endif +%if %{with gdrcopy} +BuildRequires: gdrcopy +%endif +%if %{with ib} +BuildRequires: libibverbs-devel +%endif +%if %{with ib_cm} +BuildRequires: libibcm-devel +%endif +%if %{with knem} +BuildRequires: knem +%endif +%if %{with rdmacm} +BuildRequires: librdmacm-devel +%endif +%if %{with rocm} +BuildRequires: hsa-rocr-dev +%endif +%if %{with xpmem} +BuildRequires: xpmem-devel +%endif +%if %{with java} +BuildRequires: maven +%endif + +%description +UCX stands for Unified Communication X. UCX provides an optimized communication +layer for Message Passing (MPI), PGAS/OpenSHMEM libraries and RPC/data-centric +applications. UCX utilizes high-speed networks, such as RDMA (InfiniBand, RoCE, +etc), Cray Gemini or Aries, for inter-node communication. If no such network is +available, TCP is used instead. UCX supports efficient transfer of data in +either main memory (RAM) or GPU memory (through CUDA and ROCm libraries). +In addition, UCX provides efficient intra-node communication, by leveraging the +following shared memory mechanisms: posix, sysv, cma, knem, and xpmem. +This package was built from '@SCM_BRANCH@' branch, commit @SCM_VERSION@. + +%if "%{_vendor}" == "suse" +%debug_package +%endif + +%package devel +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: Header files required for developing with UCX +Group: Development/Libraries + +%description devel +Provides header files and examples for developing with UCX. + +%prep +%setup -q + +%build +%define _with_arg() %{expand:%%{?with_%{1}:--with-%{2}}%%{!?with_%{1}:--without-%{2}}} +%define _enable_arg() %{expand:%%{?with_%{1}:--enable-%{2}}%%{!?with_%{1}:--disable-%{2}}} +%configure --disable-optimizations \ + --disable-logging \ + --disable-debug \ + --disable-assertions \ + --disable-params-check \ + %_enable_arg cma cma \ + %_with_arg cuda cuda \ + %_with_arg gdrcopy gdrcopy \ + %_with_arg ib verbs \ + %_with_arg ib_cm cm \ + %_with_arg knem knem \ + %_with_arg rdmacm rdmacm \ + %_with_arg rocm rocm \ + %_with_arg xpmem xpmem \ + %_with_arg ugni ugni \ + %_with_arg java java \ + %{?configure_options} +make %{?_smp_mflags} V=1 + +%install +make DESTDIR=%{buildroot} install +rm -f %{buildroot}%{_libdir}/*.la +rm -f %{buildroot}%{_libdir}/*.a +rm -f %{buildroot}%{_libdir}/ucx/*.la +rm -f %{buildroot}%{_libdir}/ucx/lib*.so +rm -f %{buildroot}%{_libdir}/ucx/lib*.a + +%files +%{_libdir}/lib*.so.* +%{_bindir}/uc* +%{_datadir}/ucx +%exclude %{_datadir}/ucx/examples +%doc README AUTHORS NEWS +%{!?_licensedir:%global license %%doc} +%license LICENSE + +%files devel +%{_includedir}/uc* +%{_libdir}/lib*.so +%{_libdir}/pkgconfig/ucx.pc +%{_datadir}/ucx/examples + +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig + +%if %{with cma} +%package cma +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: UCX CMA support +Group: System Environment/Libraries + +%description cma +Provides CMA (Linux cross-memory-attach) transport for UCX. It utilizes the +system calls process_vm_readv/writev() for one-shot memory copy from another +process. + +%files cma +%{_libdir}/ucx/libuct_cma.so.* +%endif + +%if %{with cuda} +%package cuda +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: UCX CUDA support +Group: System Environment/Libraries + +%description cuda +Provide CUDA (NVIDIA GPU) support for UCX. Enables passing GPU memory pointers +to UCX communication routines, and transports taking advantage of GPU-Direct +technology for direct data transfer between GPU and RDMA devices. + +%files cuda +%{_libdir}/ucx/libucx_perftest_cuda.so.* +%{_libdir}/ucx/libucm_cuda.so.* +%{_libdir}/ucx/libuct_cuda.so.* +%endif + +%if %{with gdrcopy} +%package gdrcopy +Requires: %{name}-cuda%{?_isa} = %{version}-%{release} +Summary: UCX GDRCopy support +Group: System Environment/Libraries + +%description gdrcopy +Provide GDRCopy support for UCX. GDRCopy is a low-latency GPU memory copy +library, built on top of the NVIDIA GPUDirect RDMA technology. + +%files gdrcopy +%{_libdir}/ucx/libuct_cuda_gdrcopy.so.* +%endif + +%if %{with ib} +%package ib +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: UCX RDMA support +Group: System Environment/Libraries + +%description ib +Provides support for IBTA-compliant transports for UCX. This includes RoCE, +InfiniBand, OmniPath, and any other transport supported by IB Verbs API. +Typically these transports provide RDMA support, which enables a fast and +hardware-offloaded data transfer. + +%files ib +%{_libdir}/ucx/libuct_ib.so.* +%endif + +%if %{with ib_cm} +%package ib-cm +Requires: %{name}-ib%{?_isa} = %{version}-%{release} +Summary: UCX InfiniBand connection-manager support +Group: System Environment/Libraries + +%description ib-cm +Provides Infiniband Connection Manager (also known as ibcm) support for UCX. + +%files ib-cm +%{_libdir}/ucx/libuct_ib_cm.so.* +%endif + +%if %{with knem} +%package knem +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: UCX KNEM transport support +Group: System Environment/Libraries + +%description knem +Provides KNEM (fast inter-process copy) transport for UCX. KNEM is a Linux +kernel module that enables high-performance intra-node MPI communication +for large messages. + +%files knem +%{_libdir}/ucx/libuct_knem.so.* +%endif + +%if %{with rdmacm} +%package rdmacm +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: UCX RDMA connection manager support +Group: System Environment/Libraries + +%description rdmacm +Provides RDMA connection-manager support to UCX, which enables client/server +based connection establishment for RDMA-capable transports. + +%files rdmacm +%{_libdir}/ucx/libuct_rdmacm.so.* +%endif + +%if %{with rocm} +%package rocm +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: UCX ROCm GPU support +Group: System Environment/Libraries + +%description rocm +Provides Radeon Open Compute (ROCm) Runtime support for UCX. + +%files rocm +%{_libdir}/ucx/libuct_rocm.so.* +%{_libdir}/ucx/libucm_rocm.so.* + +%if %{with gdrcopy} +%package rocmgdr +Requires: %{name}-rocm%{?_isa} = %{version}-%{release} +Summary: UCX GDRCopy support for ROCM +Group: System Environment/Libraries + +%description rocmgdr +Provide GDRCopy support for UCX ROCM. GDRCopy is a low-latency GPU memory copy +library, built on top of the NVIDIA GPUDirect RDMA technology. + +%files rocmgdr +%{_libdir}/ucx/libuct_rocm_gdr.so.* +%endif +%endif + +%if %{with ugni} +%package ugni +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: UCX Gemini/Aries transport support. +Group: System Environment/Libraries + +%description ugni +Provides Gemini/Aries transport for UCX. + +%files ugni +%{_libdir}/ucx/libuct_ugni.so.* +%endif + +%if %{with xpmem} +%package xpmem +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: UCX XPMEM transport support. +Group: System Environment/Libraries + +%description xpmem +Provides XPMEM transport for UCX. XPMEM is a Linux kernel module that enables a +process to map the memory of another process into its virtual address space. + +%files xpmem +%{_libdir}/ucx/libuct_xpmem.so.* +%endif + +%if %{with java} +%package java +Requires: %{name}%{?_isa} = %{version}-%{release} +Summary: UCX Java bindings +Group: System Environment/Libraries + +%description java +Provides java bindings for UCX. + +%files java +%{_libdir}/jucx-*.jar +%endif + +%changelog +* Sun Sep 22 2019 Yossi Itigin 1.8.0-1 +- Bump version to 1.8.0 +* Sun Mar 24 2019 Yossi Itigin 1.7.0-1 +- Bump version to 1.7.0 +* Thu Jan 24 2019 Yossi Itigin 1.6.0-1 +- Add cma, knem, and xpmem sub-packages +* Tue Nov 20 2018 Yossi Itigin 1.6.0-1 +- Bump version to 1.6.0 +* Tue Nov 6 2018 Andrey Maslennikov 1.5.0-1 +- Bump version to 1.5.0 +- See NEWS for details +* Tue Oct 30 2018 Andrey Maslennikov 1.4.0-1 +- See NEWS for details +* Mon Aug 20 2018 Andrey Maslennikov 1.3.1-1 +- See NEWS for details +* Thu Aug 16 2018 Andrey Maslennikov 1.3.0-1 +- Explicitly set gcc-c++ as requirements +* Wed Mar 7 2018 Andrey Maslennikov 1.3.0-1 +- See NEWS for details +* Mon Aug 21 2017 Andrey Maslennikov 1.2.1-1 +- Spec file now complies with Fedora guidelines +* Mon Jul 3 2017 Andrey Maslennikov 1.2.0-1 +- Fedora package created